diff --git a/sys/dev/axgbe/if_axgbe.c b/sys/dev/axgbe/if_axgbe.c index 342041c9dec4..ef572bf62aea 100644 --- a/sys/dev/axgbe/if_axgbe.c +++ b/sys/dev/axgbe/if_axgbe.c @@ -1,618 +1,618 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016,2017 SoftIron Inc. * Copyright (c) 2020 Advanced Micro Devices, Inc. * * This software was developed by Andrew Turner under * the sponsorship of SoftIron Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "miibus_if.h" #include "xgbe.h" #include "xgbe-common.h" static device_probe_t axgbe_probe; static device_attach_t axgbe_attach; struct axgbe_softc { /* Must be first */ struct xgbe_prv_data prv; uint8_t mac_addr[ETHER_ADDR_LEN]; struct ifmedia media; }; static struct ofw_compat_data compat_data[] = { { "amd,xgbe-seattle-v1a", true }, { NULL, false } }; static struct resource_spec old_phy_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, /* Rx/Tx regs */ { SYS_RES_MEMORY, 1, RF_ACTIVE }, /* Integration regs */ { SYS_RES_MEMORY, 2, RF_ACTIVE }, /* Integration regs */ { SYS_RES_IRQ, 0, RF_ACTIVE }, /* Interrupt */ { -1, 0 } }; static struct resource_spec old_mac_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, /* MAC regs */ { SYS_RES_MEMORY, 1, RF_ACTIVE }, /* PCS regs */ { SYS_RES_IRQ, 0, RF_ACTIVE }, /* Device interrupt */ /* Per-channel interrupts */ { SYS_RES_IRQ, 1, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 2, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 3, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 4, RF_ACTIVE | RF_OPTIONAL }, { -1, 0 } }; static struct resource_spec mac_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, /* MAC regs */ { SYS_RES_MEMORY, 1, RF_ACTIVE }, /* PCS regs */ { SYS_RES_MEMORY, 2, RF_ACTIVE }, /* Rx/Tx regs */ { SYS_RES_MEMORY, 3, RF_ACTIVE }, /* Integration regs */ { SYS_RES_MEMORY, 4, RF_ACTIVE }, /* Integration regs */ { SYS_RES_IRQ, 0, RF_ACTIVE }, /* Device interrupt */ /* Per-channel and auto-negotiation interrupts */ { SYS_RES_IRQ, 1, RF_ACTIVE }, { SYS_RES_IRQ, 2, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 3, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 4, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 5, RF_ACTIVE | RF_OPTIONAL }, { -1, 0 } }; static struct xgbe_version_data xgbe_v1 = { .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v1, .xpcs_access = XGBE_XPCS_ACCESS_V1, .tx_max_fifo_size = 81920, .rx_max_fifo_size = 81920, .tx_tstamp_workaround = 1, }; MALLOC_DEFINE(M_AXGBE, "axgbe", "axgbe data"); static void axgbe_init(void *p) { struct axgbe_softc *sc; - struct ifnet *ifp; + if_t ifp; sc = p; ifp = sc->prv.netdev; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) + if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) return; - ifp->if_drv_flags |= IFF_DRV_RUNNING; + if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); } static int -axgbe_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data) +axgbe_ioctl(if_t ifp, unsigned long command, caddr_t data) { - struct axgbe_softc *sc = ifp->if_softc; + struct axgbe_softc *sc = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; int error = 0; switch(command) { case SIOCSIFMTU: if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > ETHERMTU_JUMBO) error = EINVAL; /* TODO - change it to iflib way */ break; case SIOCSIFFLAGS: error = 0; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->media, command); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static void -axgbe_qflush(struct ifnet *ifp) +axgbe_qflush(if_t ifp) { if_qflush(ifp); } static int -axgbe_media_change(struct ifnet *ifp) +axgbe_media_change(if_t ifp) { struct axgbe_softc *sc; int cur_media; - sc = ifp->if_softc; + sc = if_getsoftc(ifp); sx_xlock(&sc->prv.an_mutex); cur_media = sc->media.ifm_cur->ifm_media; switch (IFM_SUBTYPE(cur_media)) { case IFM_10G_KR: sc->prv.phy.speed = SPEED_10000; sc->prv.phy.autoneg = AUTONEG_DISABLE; break; case IFM_2500_KX: sc->prv.phy.speed = SPEED_2500; sc->prv.phy.autoneg = AUTONEG_DISABLE; break; case IFM_1000_KX: sc->prv.phy.speed = SPEED_1000; sc->prv.phy.autoneg = AUTONEG_DISABLE; break; case IFM_AUTO: sc->prv.phy.autoneg = AUTONEG_ENABLE; break; } sx_xunlock(&sc->prv.an_mutex); return (-sc->prv.phy_if.phy_config_aneg(&sc->prv)); } static void -axgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) +axgbe_media_status(if_t ifp, struct ifmediareq *ifmr) { struct axgbe_softc *sc; - sc = ifp->if_softc; + sc = if_getsoftc(ifp); ifmr->ifm_status = IFM_AVALID; if (!sc->prv.phy.link) return; ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER; if (sc->prv.phy.duplex == DUPLEX_FULL) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; switch (sc->prv.phy.speed) { case SPEED_10000: ifmr->ifm_active |= IFM_10G_KR; break; case SPEED_2500: ifmr->ifm_active |= IFM_2500_KX; break; case SPEED_1000: ifmr->ifm_active |= IFM_1000_KX; break; } } static uint64_t -axgbe_get_counter(struct ifnet *ifp, ift_counter c) +axgbe_get_counter(if_t ifp, ift_counter c) { - struct xgbe_prv_data *pdata = ifp->if_softc; + struct xgbe_prv_data *pdata = if_getsoftc(ifp); struct xgbe_mmc_stats *pstats = &pdata->mmc_stats; DBGPR("-->%s\n", __func__); pdata->hw_if.read_mmc_stats(pdata); switch(c) { case IFCOUNTER_IPACKETS: return (pstats->rxframecount_gb); case IFCOUNTER_IERRORS: return (pstats->rxframecount_gb - pstats->rxbroadcastframes_g - pstats->rxmulticastframes_g - pstats->rxunicastframes_g); case IFCOUNTER_OPACKETS: return (pstats->txframecount_gb); case IFCOUNTER_OERRORS: return (pstats->txframecount_gb - pstats->txframecount_g); case IFCOUNTER_IBYTES: return (pstats->rxoctetcount_gb); case IFCOUNTER_OBYTES: return (pstats->txoctetcount_gb); default: return (if_get_counter_default(ifp, c)); } } static int axgbe_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_search_compatible(dev, compat_data)->ocd_data) return (ENXIO); device_set_desc(dev, "AMD 10 Gigabit Ethernet"); return (BUS_PROBE_DEFAULT); } static int axgbe_get_optional_prop(device_t dev, phandle_t node, const char *name, int *data, size_t len) { if (!OF_hasprop(node, name)) return (-1); if (OF_getencprop(node, name, data, len) <= 0) { device_printf(dev,"%s property is invalid\n", name); return (ENXIO); } return (0); } static int axgbe_attach(device_t dev) { struct axgbe_softc *sc; - struct ifnet *ifp; + if_t ifp; pcell_t phy_handle; device_t phydev; phandle_t node, phy_node; struct resource *mac_res[11]; struct resource *phy_res[4]; ssize_t len; int error, i, j; sc = device_get_softc(dev); sc->prv.vdata = &xgbe_v1; node = ofw_bus_get_node(dev); if (OF_getencprop(node, "phy-handle", &phy_handle, sizeof(phy_handle)) <= 0) { phy_node = node; if (bus_alloc_resources(dev, mac_spec, mac_res)) { device_printf(dev, "could not allocate phy resources\n"); return (ENXIO); } sc->prv.xgmac_res = mac_res[0]; sc->prv.xpcs_res = mac_res[1]; sc->prv.rxtx_res = mac_res[2]; sc->prv.sir0_res = mac_res[3]; sc->prv.sir1_res = mac_res[4]; sc->prv.dev_irq_res = mac_res[5]; sc->prv.per_channel_irq = OF_hasprop(node, XGBE_DMA_IRQS_PROPERTY); for (i = 0, j = 6; j < nitems(mac_res) - 1 && mac_res[j + 1] != NULL; i++, j++) { if (sc->prv.per_channel_irq) { sc->prv.chan_irq_res[i] = mac_res[j]; } } /* The last entry is the auto-negotiation interrupt */ sc->prv.an_irq_res = mac_res[j]; } else { phydev = OF_device_from_xref(phy_handle); phy_node = ofw_bus_get_node(phydev); if (bus_alloc_resources(phydev, old_phy_spec, phy_res)) { device_printf(dev, "could not allocate phy resources\n"); return (ENXIO); } if (bus_alloc_resources(dev, old_mac_spec, mac_res)) { device_printf(dev, "could not allocate mac resources\n"); return (ENXIO); } sc->prv.rxtx_res = phy_res[0]; sc->prv.sir0_res = phy_res[1]; sc->prv.sir1_res = phy_res[2]; sc->prv.an_irq_res = phy_res[3]; sc->prv.xgmac_res = mac_res[0]; sc->prv.xpcs_res = mac_res[1]; sc->prv.dev_irq_res = mac_res[2]; sc->prv.per_channel_irq = OF_hasprop(node, XGBE_DMA_IRQS_PROPERTY); if (sc->prv.per_channel_irq) { for (i = 0, j = 3; i < nitems(sc->prv.chan_irq_res) && mac_res[j] != NULL; i++, j++) { sc->prv.chan_irq_res[i] = mac_res[j]; } } } if ((len = OF_getproplen(node, "mac-address")) < 0) { device_printf(dev, "No mac-address property\n"); return (EINVAL); } if (len != ETHER_ADDR_LEN) return (EINVAL); OF_getprop(node, "mac-address", sc->mac_addr, ETHER_ADDR_LEN); sc->prv.netdev = ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot alloc ifnet\n"); return (ENXIO); } sc->prv.dev = dev; sc->prv.dmat = bus_get_dma_tag(dev); sc->prv.phy.advertising = ADVERTISED_10000baseKR_Full | ADVERTISED_1000baseKX_Full; /* * Read the needed properties from the phy node. */ /* This is documented as optional, but Linux requires it */ if (OF_getencprop(phy_node, XGBE_SPEEDSET_PROPERTY, &sc->prv.speed_set, sizeof(sc->prv.speed_set)) <= 0) { device_printf(dev, "%s property is missing\n", XGBE_SPEEDSET_PROPERTY); return (EINVAL); } error = axgbe_get_optional_prop(dev, phy_node, XGBE_BLWC_PROPERTY, sc->prv.serdes_blwc, sizeof(sc->prv.serdes_blwc)); if (error > 0) { return (error); } else if (error < 0) { sc->prv.serdes_blwc[0] = XGBE_SPEED_1000_BLWC; sc->prv.serdes_blwc[1] = XGBE_SPEED_2500_BLWC; sc->prv.serdes_blwc[2] = XGBE_SPEED_10000_BLWC; } error = axgbe_get_optional_prop(dev, phy_node, XGBE_CDR_RATE_PROPERTY, sc->prv.serdes_cdr_rate, sizeof(sc->prv.serdes_cdr_rate)); if (error > 0) { return (error); } else if (error < 0) { sc->prv.serdes_cdr_rate[0] = XGBE_SPEED_1000_CDR; sc->prv.serdes_cdr_rate[1] = XGBE_SPEED_2500_CDR; sc->prv.serdes_cdr_rate[2] = XGBE_SPEED_10000_CDR; } error = axgbe_get_optional_prop(dev, phy_node, XGBE_PQ_SKEW_PROPERTY, sc->prv.serdes_pq_skew, sizeof(sc->prv.serdes_pq_skew)); if (error > 0) { return (error); } else if (error < 0) { sc->prv.serdes_pq_skew[0] = XGBE_SPEED_1000_PQ; sc->prv.serdes_pq_skew[1] = XGBE_SPEED_2500_PQ; sc->prv.serdes_pq_skew[2] = XGBE_SPEED_10000_PQ; } error = axgbe_get_optional_prop(dev, phy_node, XGBE_TX_AMP_PROPERTY, sc->prv.serdes_tx_amp, sizeof(sc->prv.serdes_tx_amp)); if (error > 0) { return (error); } else if (error < 0) { sc->prv.serdes_tx_amp[0] = XGBE_SPEED_1000_TXAMP; sc->prv.serdes_tx_amp[1] = XGBE_SPEED_2500_TXAMP; sc->prv.serdes_tx_amp[2] = XGBE_SPEED_10000_TXAMP; } error = axgbe_get_optional_prop(dev, phy_node, XGBE_DFE_CFG_PROPERTY, sc->prv.serdes_dfe_tap_cfg, sizeof(sc->prv.serdes_dfe_tap_cfg)); if (error > 0) { return (error); } else if (error < 0) { sc->prv.serdes_dfe_tap_cfg[0] = XGBE_SPEED_1000_DFE_TAP_CONFIG; sc->prv.serdes_dfe_tap_cfg[1] = XGBE_SPEED_2500_DFE_TAP_CONFIG; sc->prv.serdes_dfe_tap_cfg[2] = XGBE_SPEED_10000_DFE_TAP_CONFIG; } error = axgbe_get_optional_prop(dev, phy_node, XGBE_DFE_ENA_PROPERTY, sc->prv.serdes_dfe_tap_ena, sizeof(sc->prv.serdes_dfe_tap_ena)); if (error > 0) { return (error); } else if (error < 0) { sc->prv.serdes_dfe_tap_ena[0] = XGBE_SPEED_1000_DFE_TAP_ENABLE; sc->prv.serdes_dfe_tap_ena[1] = XGBE_SPEED_2500_DFE_TAP_ENABLE; sc->prv.serdes_dfe_tap_ena[2] = XGBE_SPEED_10000_DFE_TAP_ENABLE; } /* Check if the NIC is DMA coherent */ sc->prv.coherent = OF_hasprop(node, "dma-coherent"); if (sc->prv.coherent) { sc->prv.arcr = XGBE_DMA_OS_ARCR; sc->prv.awcr = XGBE_DMA_OS_AWCR; } else { sc->prv.arcr = XGBE_DMA_SYS_ARCR; sc->prv.awcr = XGBE_DMA_SYS_AWCR; } /* Create the lock & workqueues */ spin_lock_init(&sc->prv.xpcs_lock); sc->prv.dev_workqueue = taskqueue_create("axgbe", M_WAITOK, taskqueue_thread_enqueue, &sc->prv.dev_workqueue); taskqueue_start_threads(&sc->prv.dev_workqueue, 1, PI_NET, "axgbe taskq"); /* Set the needed pointers */ xgbe_init_function_ptrs_phy(&sc->prv.phy_if); xgbe_init_function_ptrs_dev(&sc->prv.hw_if); xgbe_init_function_ptrs_desc(&sc->prv.desc_if); sc->prv.vdata->init_function_ptrs_phy_impl(&sc->prv.phy_if); /* Reset the hardware */ sc->prv.hw_if.exit(&sc->prv); /* Read the hardware features */ xgbe_get_all_hw_features(&sc->prv); /* Set default values */ sc->prv.tx_desc_count = XGBE_TX_DESC_CNT; sc->prv.tx_sf_mode = MTL_TSF_ENABLE; sc->prv.tx_threshold = MTL_TX_THRESHOLD_64; sc->prv.tx_osp_mode = DMA_OSP_ENABLE; sc->prv.rx_desc_count = XGBE_RX_DESC_CNT; sc->prv.rx_sf_mode = MTL_RSF_DISABLE; sc->prv.rx_threshold = MTL_RX_THRESHOLD_64; sc->prv.pbl = DMA_PBL_128; sc->prv.pause_autoneg = 1; sc->prv.tx_pause = 1; sc->prv.rx_pause = 1; sc->prv.phy_speed = SPEED_UNKNOWN; sc->prv.power_down = 0; /* TODO: Limit to min(ncpus, hw rings) */ sc->prv.tx_ring_count = 1; sc->prv.tx_q_count = 1; sc->prv.rx_ring_count = 1; sc->prv.rx_q_count = sc->prv.hw_feat.rx_q_cnt; /* Init the PHY */ sc->prv.phy_if.phy_init(&sc->prv); /* Set the coalescing */ xgbe_init_rx_coalesce(&sc->prv); xgbe_init_tx_coalesce(&sc->prv); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - ifp->if_init = axgbe_init; - ifp->if_softc = sc; - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_ioctl = axgbe_ioctl; + if_setinitfn(ifp, axgbe_init); + if_setsoftc(ifp, sc); + if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); + if_setioctlfn(ifp, axgbe_ioctl); /* TODO - change it to iflib way */ - ifp->if_qflush = axgbe_qflush; - ifp->if_get_counter = axgbe_get_counter; + if_setqflushfn(ifp, axgbe_qflush); + if_setgetcounterfn(ifp, axgbe_get_counter); /* TODO: Support HW offload */ - ifp->if_capabilities = 0; - ifp->if_capenable = 0; - ifp->if_hwassist = 0; + if_setcapabilities(ifp, 0); + if_setcapenable(ifp, 0); + if_sethwassist(ifp, 0); ether_ifattach(ifp, sc->mac_addr); ifmedia_init(&sc->media, IFM_IMASK, axgbe_media_change, axgbe_media_status); #ifdef notyet ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_KR, 0, NULL); #endif ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_KX, 0, NULL); ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); set_bit(XGBE_DOWN, &sc->prv.dev_state); /* TODO - change it to iflib way */ return (0); } static device_method_t axgbe_methods[] = { /* Device interface */ DEVMETHOD(device_probe, axgbe_probe), DEVMETHOD(device_attach, axgbe_attach), { 0, 0 } }; DEFINE_CLASS_0(axgbe, axgbe_driver, axgbe_methods, sizeof(struct axgbe_softc)); DRIVER_MODULE(axa, simplebus, axgbe_driver, 0, 0); static struct ofw_compat_data phy_compat_data[] = { { "amd,xgbe-phy-seattle-v1a", true }, { NULL, false } }; static int axgbephy_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_search_compatible(dev, phy_compat_data)->ocd_data) return (ENXIO); device_set_desc(dev, "AMD 10 Gigabit Ethernet"); return (BUS_PROBE_DEFAULT); } static int axgbephy_attach(device_t dev) { phandle_t node; node = ofw_bus_get_node(dev); OF_device_register_xref(OF_xref_from_node(node), dev); return (0); } static device_method_t axgbephy_methods[] = { /* Device interface */ DEVMETHOD(device_probe, axgbephy_probe), DEVMETHOD(device_attach, axgbephy_attach), { 0, 0 } }; DEFINE_CLASS_0(axgbephy, axgbephy_driver, axgbephy_methods, 0); EARLY_DRIVER_MODULE(axgbephy, simplebus, axgbephy_driver, 0, 0, BUS_PASS_RESOURCE + BUS_PASS_ORDER_MIDDLE); diff --git a/sys/dev/axgbe/if_axgbe_pci.c b/sys/dev/axgbe/if_axgbe_pci.c index 4b0bdaa030a6..1bc716d32aea 100644 --- a/sys/dev/axgbe/if_axgbe_pci.c +++ b/sys/dev/axgbe/if_axgbe_pci.c @@ -1,2437 +1,2438 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2020 Advanced Micro Devices, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Contact Information : * Rajesh Kumar * Shreyank Amartya */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "xgbe.h" #include "xgbe-common.h" #include "miibus_if.h" #include "ifdi_if.h" #include "opt_inet.h" #include "opt_inet6.h" MALLOC_DEFINE(M_AXGBE, "axgbe", "axgbe data"); extern struct if_txrx axgbe_txrx; static int axgbe_sph_enable; /* Function prototypes */ static void *axgbe_register(device_t); static int axgbe_if_attach_pre(if_ctx_t); static int axgbe_if_attach_post(if_ctx_t); static int axgbe_if_detach(if_ctx_t); static void axgbe_if_stop(if_ctx_t); static void axgbe_if_init(if_ctx_t); /* Queue related routines */ static int axgbe_if_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static int axgbe_if_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static int axgbe_alloc_channels(if_ctx_t); static void axgbe_free_channels(struct axgbe_if_softc *); static void axgbe_if_queues_free(if_ctx_t); static int axgbe_if_tx_queue_intr_enable(if_ctx_t, uint16_t); static int axgbe_if_rx_queue_intr_enable(if_ctx_t, uint16_t); /* Interrupt related routines */ static void axgbe_if_disable_intr(if_ctx_t); static void axgbe_if_enable_intr(if_ctx_t); static int axgbe_if_msix_intr_assign(if_ctx_t, int); static void xgbe_free_intr(struct xgbe_prv_data *, struct resource *, void *, int); /* Init and Iflib routines */ static void axgbe_pci_init(struct xgbe_prv_data *); static void axgbe_pci_stop(if_ctx_t); static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *, struct xgbe_channel *); static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *); static int axgbe_if_mtu_set(if_ctx_t, uint32_t); static void axgbe_if_update_admin_status(if_ctx_t); static void axgbe_if_media_status(if_ctx_t, struct ifmediareq *); static int axgbe_if_media_change(if_ctx_t); static int axgbe_if_promisc_set(if_ctx_t, int); static uint64_t axgbe_if_get_counter(if_ctx_t, ift_counter); static void axgbe_if_vlan_register(if_ctx_t, uint16_t); static void axgbe_if_vlan_unregister(if_ctx_t, uint16_t); #if __FreeBSD_version >= 1300000 static bool axgbe_if_needs_restart(if_ctx_t, enum iflib_restart_event); #endif static void axgbe_set_counts(if_ctx_t); static void axgbe_init_iflib_softc_ctx(struct axgbe_if_softc *); /* MII interface registered functions */ static int axgbe_miibus_readreg(device_t, int, int); static int axgbe_miibus_writereg(device_t, int, int, int); static void axgbe_miibus_statchg(device_t); /* ISR routines */ static int axgbe_dev_isr(void *); static void axgbe_ecc_isr(void *); static void axgbe_i2c_isr(void *); static void axgbe_an_isr(void *); static int axgbe_msix_que(void *); /* Timer routines */ static void xgbe_service(void *, int); static void xgbe_service_timer(void *); static void xgbe_init_timers(struct xgbe_prv_data *); static void xgbe_stop_timers(struct xgbe_prv_data *); /* Dump routines */ static void xgbe_dump_prop_registers(struct xgbe_prv_data *); /* * Allocate only for MAC (BAR0) and PCS (BAR1) registers, and just point the * MSI-X table bar (BAR5) to iflib. iflib will do the allocation for MSI-X * table. */ static struct resource_spec axgbe_pci_mac_spec[] = { { SYS_RES_MEMORY, PCIR_BAR(0), RF_ACTIVE }, /* MAC regs */ { SYS_RES_MEMORY, PCIR_BAR(1), RF_ACTIVE }, /* PCS regs */ { -1, 0 } }; static pci_vendor_info_t axgbe_vendor_info_array[] = { PVID(0x1022, 0x1458, "AMD 10 Gigabit Ethernet Driver"), PVID(0x1022, 0x1459, "AMD 10 Gigabit Ethernet Driver"), PVID_END }; static struct xgbe_version_data xgbe_v2a = { .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v2, .xpcs_access = XGBE_XPCS_ACCESS_V2, .mmc_64bit = 1, .tx_max_fifo_size = 229376, .rx_max_fifo_size = 229376, .tx_tstamp_workaround = 1, .ecc_support = 1, .i2c_support = 1, .irq_reissue_support = 1, .tx_desc_prefetch = 5, .rx_desc_prefetch = 5, .an_cdr_workaround = 1, }; static struct xgbe_version_data xgbe_v2b = { .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v2, .xpcs_access = XGBE_XPCS_ACCESS_V2, .mmc_64bit = 1, .tx_max_fifo_size = 65536, .rx_max_fifo_size = 65536, .tx_tstamp_workaround = 1, .ecc_support = 1, .i2c_support = 1, .irq_reissue_support = 1, .tx_desc_prefetch = 5, .rx_desc_prefetch = 5, .an_cdr_workaround = 1, }; /* Device Interface */ static device_method_t ax_methods[] = { DEVMETHOD(device_register, axgbe_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), /* MII interface */ DEVMETHOD(miibus_readreg, axgbe_miibus_readreg), DEVMETHOD(miibus_writereg, axgbe_miibus_writereg), DEVMETHOD(miibus_statchg, axgbe_miibus_statchg), DEVMETHOD_END }; static driver_t ax_driver = { "ax", ax_methods, sizeof(struct axgbe_if_softc), }; DRIVER_MODULE(axp, pci, ax_driver, 0, 0); DRIVER_MODULE(miibus, ax, miibus_driver, 0, 0); IFLIB_PNP_INFO(pci, ax_driver, axgbe_vendor_info_array); MODULE_DEPEND(ax, pci, 1, 1, 1); MODULE_DEPEND(ax, ether, 1, 1, 1); MODULE_DEPEND(ax, iflib, 1, 1, 1); MODULE_DEPEND(ax, miibus, 1, 1, 1); /* Iflib Interface */ static device_method_t axgbe_if_methods[] = { DEVMETHOD(ifdi_attach_pre, axgbe_if_attach_pre), DEVMETHOD(ifdi_attach_post, axgbe_if_attach_post), DEVMETHOD(ifdi_detach, axgbe_if_detach), DEVMETHOD(ifdi_init, axgbe_if_init), DEVMETHOD(ifdi_stop, axgbe_if_stop), DEVMETHOD(ifdi_msix_intr_assign, axgbe_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, axgbe_if_enable_intr), DEVMETHOD(ifdi_intr_disable, axgbe_if_disable_intr), DEVMETHOD(ifdi_tx_queue_intr_enable, axgbe_if_tx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, axgbe_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, axgbe_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, axgbe_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, axgbe_if_queues_free), DEVMETHOD(ifdi_update_admin_status, axgbe_if_update_admin_status), DEVMETHOD(ifdi_mtu_set, axgbe_if_mtu_set), DEVMETHOD(ifdi_media_status, axgbe_if_media_status), DEVMETHOD(ifdi_media_change, axgbe_if_media_change), DEVMETHOD(ifdi_promisc_set, axgbe_if_promisc_set), DEVMETHOD(ifdi_get_counter, axgbe_if_get_counter), DEVMETHOD(ifdi_vlan_register, axgbe_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, axgbe_if_vlan_unregister), #if __FreeBSD_version >= 1300000 DEVMETHOD(ifdi_needs_restart, axgbe_if_needs_restart), #endif DEVMETHOD_END }; static driver_t axgbe_if_driver = { "axgbe_if", axgbe_if_methods, sizeof(struct axgbe_if_softc) }; /* Iflib Shared Context */ static struct if_shared_ctx axgbe_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_driver = &axgbe_if_driver, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = XGBE_TSO_MAX_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = PAGE_SIZE, .isc_tso_maxsize = XGBE_TSO_MAX_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = PAGE_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_maxsegsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_admin_intrcnt = 4, .isc_vendor_info = axgbe_vendor_info_array, .isc_driver_version = XGBE_DRV_VERSION, .isc_ntxd_min = {XGBE_TX_DESC_CNT_MIN}, .isc_ntxd_default = {XGBE_TX_DESC_CNT_DEFAULT}, .isc_ntxd_max = {XGBE_TX_DESC_CNT_MAX}, .isc_ntxqs = 1, .isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_SCRATCH | IFLIB_NEED_ZERO_CSUM | IFLIB_NEED_ETHER_PAD, }; static void * axgbe_register(device_t dev) { int axgbe_nfl; int axgbe_nrxqs; int error, i; char *value = NULL; value = kern_getenv("dev.ax.sph_enable"); if (value) { axgbe_sph_enable = strtol(value, NULL, 10); freeenv(value); } else { /* * No tunable found, generate one with default values * Note: only a reboot will reveal the new kenv */ error = kern_setenv("dev.ax.sph_enable", "1"); if (error) { printf("Error setting tunable, using default driver values\n"); } axgbe_sph_enable = 1; } if (!axgbe_sph_enable) { axgbe_nfl = 1; axgbe_nrxqs = 1; } else { axgbe_nfl = 2; axgbe_nrxqs = 2; } axgbe_sctx_init.isc_nfl = axgbe_nfl; axgbe_sctx_init.isc_nrxqs = axgbe_nrxqs; for (i = 0 ; i < axgbe_nrxqs ; i++) { axgbe_sctx_init.isc_nrxd_min[i] = XGBE_RX_DESC_CNT_MIN; axgbe_sctx_init.isc_nrxd_default[i] = XGBE_RX_DESC_CNT_DEFAULT; axgbe_sctx_init.isc_nrxd_max[i] = XGBE_RX_DESC_CNT_MAX; } return (&axgbe_sctx_init); } /* MII Interface Functions */ static int axgbe_miibus_readreg(device_t dev, int phy, int reg) { struct axgbe_if_softc *sc = iflib_get_softc(device_get_softc(dev)); struct xgbe_prv_data *pdata = &sc->pdata; int val; axgbe_printf(3, "%s: phy %d reg %d\n", __func__, phy, reg); val = xgbe_phy_mii_read(pdata, phy, reg); axgbe_printf(2, "%s: val 0x%x\n", __func__, val); return (val & 0xFFFF); } static int axgbe_miibus_writereg(device_t dev, int phy, int reg, int val) { struct axgbe_if_softc *sc = iflib_get_softc(device_get_softc(dev)); struct xgbe_prv_data *pdata = &sc->pdata; axgbe_printf(3, "%s: phy %d reg %d val 0x%x\n", __func__, phy, reg, val); xgbe_phy_mii_write(pdata, phy, reg, val); return(0); } static void axgbe_miibus_statchg(device_t dev) { struct axgbe_if_softc *sc = iflib_get_softc(device_get_softc(dev)); struct xgbe_prv_data *pdata = &sc->pdata; struct mii_data *mii = device_get_softc(pdata->axgbe_miibus); - struct ifnet *ifp = pdata->netdev; + if_t ifp = pdata->netdev; int bmsr; axgbe_printf(2, "%s: Link %d/%d\n", __func__, pdata->phy.link, pdata->phy_link); if (mii == NULL || ifp == NULL || - (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + (if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) return; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: pdata->phy.link = 1; break; case IFM_1000_T: case IFM_1000_SX: case IFM_2500_SX: pdata->phy.link = 1; break; default: pdata->phy.link = 0; break; } } else pdata->phy_link = 0; bmsr = axgbe_miibus_readreg(pdata->dev, pdata->mdio_addr, MII_BMSR); if (bmsr & BMSR_ANEG) { axgbe_printf(2, "%s: Autoneg Done\n", __func__); /* Raise AN Interrupt */ XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, XGBE_AN_CL73_INT_MASK); } } static int axgbe_if_attach_pre(if_ctx_t ctx) { struct axgbe_if_softc *sc; struct xgbe_prv_data *pdata; struct resource *mac_res[2]; if_softc_ctx_t scctx; if_shared_ctx_t sctx; device_t dev; unsigned int ma_lo, ma_hi; unsigned int reg; int ret; sc = iflib_get_softc(ctx); sc->pdata.dev = dev = iflib_get_dev(ctx); sc->sctx = sctx = iflib_get_sctx(ctx); sc->scctx = scctx = iflib_get_softc_ctx(ctx); sc->media = iflib_get_media(ctx); sc->ctx = ctx; sc->link_status = LINK_STATE_DOWN; pdata = &sc->pdata; pdata->netdev = iflib_get_ifp(ctx); spin_lock_init(&pdata->xpcs_lock); /* Initialize locks */ mtx_init(&pdata->rss_mutex, "xgbe rss mutex lock", NULL, MTX_DEF); mtx_init(&pdata->mdio_mutex, "xgbe MDIO mutex lock", NULL, MTX_SPIN); /* Allocate VLAN bitmap */ pdata->active_vlans = bit_alloc(VLAN_NVID, M_AXGBE, M_WAITOK|M_ZERO); pdata->num_active_vlans = 0; /* Get the version data */ DBGPR("%s: Device ID: 0x%x\n", __func__, pci_get_device(dev)); if (pci_get_device(dev) == 0x1458) sc->pdata.vdata = &xgbe_v2a; else if (pci_get_device(dev) == 0x1459) sc->pdata.vdata = &xgbe_v2b; /* PCI setup */ if (bus_alloc_resources(dev, axgbe_pci_mac_spec, mac_res)) { axgbe_error("Unable to allocate bus resources\n"); ret = ENXIO; goto free_vlans; } sc->pdata.xgmac_res = mac_res[0]; sc->pdata.xpcs_res = mac_res[1]; /* Set the PCS indirect addressing definition registers*/ pdata->xpcs_window_def_reg = PCS_V2_WINDOW_DEF; pdata->xpcs_window_sel_reg = PCS_V2_WINDOW_SELECT; /* Configure the PCS indirect addressing support */ reg = XPCS32_IOREAD(pdata, pdata->xpcs_window_def_reg); pdata->xpcs_window = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, OFFSET); pdata->xpcs_window <<= 6; pdata->xpcs_window_size = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, SIZE); pdata->xpcs_window_size = 1 << (pdata->xpcs_window_size + 7); pdata->xpcs_window_mask = pdata->xpcs_window_size - 1; DBGPR("xpcs window def : %#010x\n", pdata->xpcs_window_def_reg); DBGPR("xpcs window sel : %#010x\n", pdata->xpcs_window_sel_reg); DBGPR("xpcs window : %#010x\n", pdata->xpcs_window); DBGPR("xpcs window size : %#010x\n", pdata->xpcs_window_size); DBGPR("xpcs window mask : %#010x\n", pdata->xpcs_window_mask); /* Enable all interrupts in the hardware */ XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff); /* Retrieve the MAC address */ ma_lo = XP_IOREAD(pdata, XP_MAC_ADDR_LO); ma_hi = XP_IOREAD(pdata, XP_MAC_ADDR_HI); pdata->mac_addr[0] = ma_lo & 0xff; pdata->mac_addr[1] = (ma_lo >> 8) & 0xff; pdata->mac_addr[2] = (ma_lo >>16) & 0xff; pdata->mac_addr[3] = (ma_lo >> 24) & 0xff; pdata->mac_addr[4] = ma_hi & 0xff; pdata->mac_addr[5] = (ma_hi >> 8) & 0xff; if (!XP_GET_BITS(ma_hi, XP_MAC_ADDR_HI, VALID)) { axgbe_error("Invalid mac address\n"); ret = EINVAL; goto release_bus_resource; } iflib_set_mac(ctx, pdata->mac_addr); /* Clock settings */ pdata->sysclk_rate = XGBE_V2_DMA_CLOCK_FREQ; pdata->ptpclk_rate = XGBE_V2_PTP_CLOCK_FREQ; /* Set the DMA coherency values */ pdata->coherent = 1; pdata->arcr = XGBE_DMA_PCI_ARCR; pdata->awcr = XGBE_DMA_PCI_AWCR; pdata->awarcr = XGBE_DMA_PCI_AWARCR; /* Read the port property registers */ pdata->pp0 = XP_IOREAD(pdata, XP_PROP_0); pdata->pp1 = XP_IOREAD(pdata, XP_PROP_1); pdata->pp2 = XP_IOREAD(pdata, XP_PROP_2); pdata->pp3 = XP_IOREAD(pdata, XP_PROP_3); pdata->pp4 = XP_IOREAD(pdata, XP_PROP_4); DBGPR("port property 0 = %#010x\n", pdata->pp0); DBGPR("port property 1 = %#010x\n", pdata->pp1); DBGPR("port property 2 = %#010x\n", pdata->pp2); DBGPR("port property 3 = %#010x\n", pdata->pp3); DBGPR("port property 4 = %#010x\n", pdata->pp4); /* Set the maximum channels and queues */ pdata->tx_max_channel_count = XP_GET_BITS(pdata->pp1, XP_PROP_1, MAX_TX_DMA); pdata->rx_max_channel_count = XP_GET_BITS(pdata->pp1, XP_PROP_1, MAX_RX_DMA); pdata->tx_max_q_count = XP_GET_BITS(pdata->pp1, XP_PROP_1, MAX_TX_QUEUES); pdata->rx_max_q_count = XP_GET_BITS(pdata->pp1, XP_PROP_1, MAX_RX_QUEUES); DBGPR("max tx/rx channel count = %u/%u\n", pdata->tx_max_channel_count, pdata->rx_max_channel_count); DBGPR("max tx/rx hw queue count = %u/%u\n", pdata->tx_max_q_count, pdata->rx_max_q_count); axgbe_set_counts(ctx); /* Set the maximum fifo amounts */ pdata->tx_max_fifo_size = XP_GET_BITS(pdata->pp2, XP_PROP_2, TX_FIFO_SIZE); pdata->tx_max_fifo_size *= 16384; pdata->tx_max_fifo_size = min(pdata->tx_max_fifo_size, pdata->vdata->tx_max_fifo_size); pdata->rx_max_fifo_size = XP_GET_BITS(pdata->pp2, XP_PROP_2, RX_FIFO_SIZE); pdata->rx_max_fifo_size *= 16384; pdata->rx_max_fifo_size = min(pdata->rx_max_fifo_size, pdata->vdata->rx_max_fifo_size); DBGPR("max tx/rx max fifo size = %u/%u\n", pdata->tx_max_fifo_size, pdata->rx_max_fifo_size); /* Initialize IFLIB if_softc_ctx_t */ axgbe_init_iflib_softc_ctx(sc); /* Alloc channels */ if (axgbe_alloc_channels(ctx)) { axgbe_error("Unable to allocate channel memory\n"); ret = ENOMEM; goto release_bus_resource; } TASK_INIT(&pdata->service_work, 0, xgbe_service, pdata); /* create the workqueue */ pdata->dev_workqueue = taskqueue_create("axgbe", M_WAITOK, taskqueue_thread_enqueue, &pdata->dev_workqueue); if (pdata->dev_workqueue == NULL) { axgbe_error("Unable to allocate workqueue\n"); ret = ENOMEM; goto free_channels; } ret = taskqueue_start_threads(&pdata->dev_workqueue, 1, PI_NET, "axgbe dev taskq"); if (ret) { axgbe_error("Unable to start taskqueue\n"); ret = ENOMEM; goto free_task_queue; } /* Init timers */ xgbe_init_timers(pdata); return (0); free_task_queue: taskqueue_free(pdata->dev_workqueue); free_channels: axgbe_free_channels(sc); release_bus_resource: bus_release_resources(dev, axgbe_pci_mac_spec, mac_res); free_vlans: free(pdata->active_vlans, M_AXGBE); return (ret); } /* axgbe_if_attach_pre */ static void xgbe_init_all_fptrs(struct xgbe_prv_data *pdata) { xgbe_init_function_ptrs_dev(&pdata->hw_if); xgbe_init_function_ptrs_phy(&pdata->phy_if); xgbe_init_function_ptrs_i2c(&pdata->i2c_if); xgbe_init_function_ptrs_desc(&pdata->desc_if); pdata->vdata->init_function_ptrs_phy_impl(&pdata->phy_if); } static void axgbe_set_counts(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; cpuset_t lcpus; int cpu_count, err; size_t len; /* Set all function pointers */ xgbe_init_all_fptrs(pdata); /* Populate the hardware features */ xgbe_get_all_hw_features(pdata); if (!pdata->tx_max_channel_count) pdata->tx_max_channel_count = pdata->hw_feat.tx_ch_cnt; if (!pdata->rx_max_channel_count) pdata->rx_max_channel_count = pdata->hw_feat.rx_ch_cnt; if (!pdata->tx_max_q_count) pdata->tx_max_q_count = pdata->hw_feat.tx_q_cnt; if (!pdata->rx_max_q_count) pdata->rx_max_q_count = pdata->hw_feat.rx_q_cnt; /* * Calculate the number of Tx and Rx rings to be created * -Tx (DMA) Channels map 1-to-1 to Tx Queues so set * the number of Tx queues to the number of Tx channels * enabled * -Rx (DMA) Channels do not map 1-to-1 so use the actual * number of Rx queues or maximum allowed */ /* Get cpu count from sysctl */ len = sizeof(cpu_count); err = kernel_sysctlbyname(curthread, "hw.ncpu", &cpu_count, &len, NULL, 0, NULL, 0); if (err) { axgbe_error("Unable to fetch number of cpus\n"); cpu_count = 1; } if (bus_get_cpus(pdata->dev, INTR_CPUS, sizeof(lcpus), &lcpus) != 0) { axgbe_error("Unable to fetch CPU list\n"); /* TODO - handle CPU_COPY(&all_cpus, &lcpus); */ } DBGPR("ncpu %d intrcpu %d\n", cpu_count, CPU_COUNT(&lcpus)); pdata->tx_ring_count = min(CPU_COUNT(&lcpus), pdata->hw_feat.tx_ch_cnt); pdata->tx_ring_count = min(pdata->tx_ring_count, pdata->tx_max_channel_count); pdata->tx_ring_count = min(pdata->tx_ring_count, pdata->tx_max_q_count); pdata->tx_q_count = pdata->tx_ring_count; pdata->rx_ring_count = min(CPU_COUNT(&lcpus), pdata->hw_feat.rx_ch_cnt); pdata->rx_ring_count = min(pdata->rx_ring_count, pdata->rx_max_channel_count); pdata->rx_q_count = min(pdata->hw_feat.rx_q_cnt, pdata->rx_max_q_count); DBGPR("TX/RX max channel count = %u/%u\n", pdata->tx_max_channel_count, pdata->rx_max_channel_count); DBGPR("TX/RX max queue count = %u/%u\n", pdata->tx_max_q_count, pdata->rx_max_q_count); DBGPR("TX/RX DMA ring count = %u/%u\n", pdata->tx_ring_count, pdata->rx_ring_count); DBGPR("TX/RX hardware queue count = %u/%u\n", pdata->tx_q_count, pdata->rx_q_count); } /* axgbe_set_counts */ static void axgbe_init_iflib_softc_ctx(struct axgbe_if_softc *sc) { struct xgbe_prv_data *pdata = &sc->pdata; if_softc_ctx_t scctx = sc->scctx; if_shared_ctx_t sctx = sc->sctx; int i; scctx->isc_nrxqsets = pdata->rx_q_count; scctx->isc_ntxqsets = pdata->tx_q_count; scctx->isc_msix_bar = pci_msix_table_bar(pdata->dev); scctx->isc_tx_nsegments = 32; for (i = 0; i < sctx->isc_ntxqs; i++) { scctx->isc_txqsizes[i] = roundup2(scctx->isc_ntxd[i] * sizeof(struct xgbe_ring_desc), 128); scctx->isc_txd_size[i] = sizeof(struct xgbe_ring_desc); } for (i = 0; i < sctx->isc_nrxqs; i++) { scctx->isc_rxqsizes[i] = roundup2(scctx->isc_nrxd[i] * sizeof(struct xgbe_ring_desc), 128); scctx->isc_rxd_size[i] = sizeof(struct xgbe_ring_desc); } scctx->isc_tx_tso_segments_max = 32; scctx->isc_tx_tso_size_max = XGBE_TSO_MAX_SIZE; scctx->isc_tx_tso_segsize_max = PAGE_SIZE; /* * Set capabilities * 1) IFLIB automatically adds IFCAP_HWSTATS, so need to set explicitly * 2) isc_tx_csum_flags is mandatory if IFCAP_TXCSUM (included in * IFCAP_HWCSUM) is set */ scctx->isc_tx_csum_flags = (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6 | CSUM_TSO); scctx->isc_capenable = (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTSO); scctx->isc_capabilities = scctx->isc_capenable; /* * Set rss_table_size alone when adding RSS support. rss_table_mask * will be set by IFLIB based on rss_table_size */ scctx->isc_rss_table_size = XGBE_RSS_MAX_TABLE_SIZE; scctx->isc_ntxqsets_max = XGBE_MAX_QUEUES; scctx->isc_nrxqsets_max = XGBE_MAX_QUEUES; scctx->isc_txrx = &axgbe_txrx; } static int axgbe_alloc_channels(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_channel *channel; int i, j, count; DBGPR("%s: txqs %d rxqs %d\n", __func__, pdata->tx_ring_count, pdata->rx_ring_count); /* Iflibe sets based on isc_ntxqsets/nrxqsets */ count = max_t(unsigned int, pdata->tx_ring_count, pdata->rx_ring_count); /* Allocate channel memory */ for (i = 0; i < count ; i++) { channel = (struct xgbe_channel*)malloc(sizeof(struct xgbe_channel), M_AXGBE, M_NOWAIT | M_ZERO); if (channel == NULL) { for (j = 0; j < i; j++) { free(pdata->channel[j], M_AXGBE); pdata->channel[j] = NULL; } return (ENOMEM); } pdata->channel[i] = channel; } pdata->total_channel_count = count; DBGPR("Channel count set to: %u\n", pdata->total_channel_count); for (i = 0; i < count; i++) { channel = pdata->channel[i]; snprintf(channel->name, sizeof(channel->name), "channel-%d",i); channel->pdata = pdata; channel->queue_index = i; channel->dma_tag = rman_get_bustag(pdata->xgmac_res); bus_space_subregion(channel->dma_tag, rman_get_bushandle(pdata->xgmac_res), DMA_CH_BASE + (DMA_CH_INC * i), DMA_CH_INC, &channel->dma_handle); channel->tx_ring = NULL; channel->rx_ring = NULL; } return (0); } /* axgbe_alloc_channels */ static void axgbe_free_channels(struct axgbe_if_softc *sc) { struct xgbe_prv_data *pdata = &sc->pdata; int i; for (i = 0; i < pdata->total_channel_count ; i++) { free(pdata->channel[i], M_AXGBE); pdata->channel[i] = NULL; } pdata->total_channel_count = 0; pdata->channel_count = 0; } static void xgbe_service(void *ctx, int pending) { struct xgbe_prv_data *pdata = ctx; struct axgbe_if_softc *sc = (struct axgbe_if_softc *)pdata; bool prev_state = false; /* Get previous link status */ prev_state = pdata->phy.link; pdata->phy_if.phy_status(pdata); if (prev_state != pdata->phy.link) { pdata->phy_link = pdata->phy.link; axgbe_if_update_admin_status(sc->ctx); } callout_reset(&pdata->service_timer, 1*hz, xgbe_service_timer, pdata); } static void xgbe_service_timer(void *data) { struct xgbe_prv_data *pdata = data; taskqueue_enqueue(pdata->dev_workqueue, &pdata->service_work); } static void xgbe_init_timers(struct xgbe_prv_data *pdata) { callout_init(&pdata->service_timer, 1); } static void xgbe_start_timers(struct xgbe_prv_data *pdata) { callout_reset(&pdata->service_timer, 1*hz, xgbe_service_timer, pdata); } static void xgbe_stop_timers(struct xgbe_prv_data *pdata) { callout_drain(&pdata->service_timer); callout_stop(&pdata->service_timer); } static void xgbe_dump_phy_registers(struct xgbe_prv_data *pdata) { axgbe_printf(1, "\n************* PHY Reg dump *********************\n"); axgbe_printf(1, "PCS Control Reg (%#06x) = %#06x\n", MDIO_CTRL1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1)); axgbe_printf(1, "PCS Status Reg (%#06x) = %#06x\n", MDIO_STAT1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1)); axgbe_printf(1, "Phy Id (PHYS ID 1 %#06x)= %#06x\n", MDIO_DEVID1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID1)); axgbe_printf(1, "Phy Id (PHYS ID 2 %#06x)= %#06x\n", MDIO_DEVID2, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID2)); axgbe_printf(1, "Devices in Package (%#06x)= %#06x\n", MDIO_DEVS1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS1)); axgbe_printf(1, "Devices in Package (%#06x)= %#06x\n", MDIO_DEVS2, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS2)); axgbe_printf(1, "Auto-Neg Control Reg (%#06x) = %#06x\n", MDIO_CTRL1, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1)); axgbe_printf(1, "Auto-Neg Status Reg (%#06x) = %#06x\n", MDIO_STAT1, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_STAT1)); axgbe_printf(1, "Auto-Neg Ad Reg 1 (%#06x) = %#06x\n", MDIO_AN_ADVERTISE, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE)); axgbe_printf(1, "Auto-Neg Ad Reg 2 (%#06x) = %#06x\n", MDIO_AN_ADVERTISE + 1, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1)); axgbe_printf(1, "Auto-Neg Ad Reg 3 (%#06x) = %#06x\n", MDIO_AN_ADVERTISE + 2, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2)); axgbe_printf(1, "Auto-Neg Completion Reg (%#06x) = %#06x\n", MDIO_AN_COMP_STAT, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_COMP_STAT)); axgbe_printf(1, "\n************************************************\n"); } static void xgbe_dump_prop_registers(struct xgbe_prv_data *pdata) { int i; axgbe_printf(1, "\n************* PROP Reg dump ********************\n"); for (i = 0 ; i < 38 ; i++) { axgbe_printf(1, "PROP Offset 0x%08x = %08x\n", (XP_PROP_0 + (i * 4)), XP_IOREAD(pdata, (XP_PROP_0 + (i * 4)))); } } static void xgbe_dump_dma_registers(struct xgbe_prv_data *pdata, int ch) { struct xgbe_channel *channel; int i; axgbe_printf(1, "\n************* DMA Reg dump *********************\n"); axgbe_printf(1, "DMA MR Reg (%08x) = %08x\n", DMA_MR, XGMAC_IOREAD(pdata, DMA_MR)); axgbe_printf(1, "DMA SBMR Reg (%08x) = %08x\n", DMA_SBMR, XGMAC_IOREAD(pdata, DMA_SBMR)); axgbe_printf(1, "DMA ISR Reg (%08x) = %08x\n", DMA_ISR, XGMAC_IOREAD(pdata, DMA_ISR)); axgbe_printf(1, "DMA AXIARCR Reg (%08x) = %08x\n", DMA_AXIARCR, XGMAC_IOREAD(pdata, DMA_AXIARCR)); axgbe_printf(1, "DMA AXIAWCR Reg (%08x) = %08x\n", DMA_AXIAWCR, XGMAC_IOREAD(pdata, DMA_AXIAWCR)); axgbe_printf(1, "DMA AXIAWARCR Reg (%08x) = %08x\n", DMA_AXIAWARCR, XGMAC_IOREAD(pdata, DMA_AXIAWARCR)); axgbe_printf(1, "DMA DSR0 Reg (%08x) = %08x\n", DMA_DSR0, XGMAC_IOREAD(pdata, DMA_DSR0)); axgbe_printf(1, "DMA DSR1 Reg (%08x) = %08x\n", DMA_DSR1, XGMAC_IOREAD(pdata, DMA_DSR1)); axgbe_printf(1, "DMA DSR2 Reg (%08x) = %08x\n", DMA_DSR2, XGMAC_IOREAD(pdata, DMA_DSR2)); axgbe_printf(1, "DMA DSR3 Reg (%08x) = %08x\n", DMA_DSR3, XGMAC_IOREAD(pdata, DMA_DSR3)); axgbe_printf(1, "DMA DSR4 Reg (%08x) = %08x\n", DMA_DSR4, XGMAC_IOREAD(pdata, DMA_DSR4)); axgbe_printf(1, "DMA TXEDMACR Reg (%08x) = %08x\n", DMA_TXEDMACR, XGMAC_IOREAD(pdata, DMA_TXEDMACR)); axgbe_printf(1, "DMA RXEDMACR Reg (%08x) = %08x\n", DMA_RXEDMACR, XGMAC_IOREAD(pdata, DMA_RXEDMACR)); for (i = 0 ; i < 8 ; i++ ) { if (ch >= 0) { if (i != ch) continue; } channel = pdata->channel[i]; axgbe_printf(1, "\n************* DMA CH %d dump ****************\n", i); axgbe_printf(1, "DMA_CH_CR Reg (%08x) = %08x\n", DMA_CH_CR, XGMAC_DMA_IOREAD(channel, DMA_CH_CR)); axgbe_printf(1, "DMA_CH_TCR Reg (%08x) = %08x\n", DMA_CH_TCR, XGMAC_DMA_IOREAD(channel, DMA_CH_TCR)); axgbe_printf(1, "DMA_CH_RCR Reg (%08x) = %08x\n", DMA_CH_RCR, XGMAC_DMA_IOREAD(channel, DMA_CH_RCR)); axgbe_printf(1, "DMA_CH_TDLR_HI Reg (%08x) = %08x\n", DMA_CH_TDLR_HI, XGMAC_DMA_IOREAD(channel, DMA_CH_TDLR_HI)); axgbe_printf(1, "DMA_CH_TDLR_LO Reg (%08x) = %08x\n", DMA_CH_TDLR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_TDLR_LO)); axgbe_printf(1, "DMA_CH_RDLR_HI Reg (%08x) = %08x\n", DMA_CH_RDLR_HI, XGMAC_DMA_IOREAD(channel, DMA_CH_RDLR_HI)); axgbe_printf(1, "DMA_CH_RDLR_LO Reg (%08x) = %08x\n", DMA_CH_RDLR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_RDLR_LO)); axgbe_printf(1, "DMA_CH_TDTR_LO Reg (%08x) = %08x\n", DMA_CH_TDTR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_TDTR_LO)); axgbe_printf(1, "DMA_CH_RDTR_LO Reg (%08x) = %08x\n", DMA_CH_RDTR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_RDTR_LO)); axgbe_printf(1, "DMA_CH_TDRLR Reg (%08x) = %08x\n", DMA_CH_TDRLR, XGMAC_DMA_IOREAD(channel, DMA_CH_TDRLR)); axgbe_printf(1, "DMA_CH_RDRLR Reg (%08x) = %08x\n", DMA_CH_RDRLR, XGMAC_DMA_IOREAD(channel, DMA_CH_RDRLR)); axgbe_printf(1, "DMA_CH_IER Reg (%08x) = %08x\n", DMA_CH_IER, XGMAC_DMA_IOREAD(channel, DMA_CH_IER)); axgbe_printf(1, "DMA_CH_RIWT Reg (%08x) = %08x\n", DMA_CH_RIWT, XGMAC_DMA_IOREAD(channel, DMA_CH_RIWT)); axgbe_printf(1, "DMA_CH_CATDR_LO Reg (%08x) = %08x\n", DMA_CH_CATDR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_CATDR_LO)); axgbe_printf(1, "DMA_CH_CARDR_LO Reg (%08x) = %08x\n", DMA_CH_CARDR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_CARDR_LO)); axgbe_printf(1, "DMA_CH_CATBR_HI Reg (%08x) = %08x\n", DMA_CH_CATBR_HI, XGMAC_DMA_IOREAD(channel, DMA_CH_CATBR_HI)); axgbe_printf(1, "DMA_CH_CATBR_LO Reg (%08x) = %08x\n", DMA_CH_CATBR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_CATBR_LO)); axgbe_printf(1, "DMA_CH_CARBR_HI Reg (%08x) = %08x\n", DMA_CH_CARBR_HI, XGMAC_DMA_IOREAD(channel, DMA_CH_CARBR_HI)); axgbe_printf(1, "DMA_CH_CARBR_LO Reg (%08x) = %08x\n", DMA_CH_CARBR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_CARBR_LO)); axgbe_printf(1, "DMA_CH_SR Reg (%08x) = %08x\n", DMA_CH_SR, XGMAC_DMA_IOREAD(channel, DMA_CH_SR)); axgbe_printf(1, "DMA_CH_DSR Reg (%08x) = %08x\n", DMA_CH_DSR, XGMAC_DMA_IOREAD(channel, DMA_CH_DSR)); axgbe_printf(1, "DMA_CH_DCFL Reg (%08x) = %08x\n", DMA_CH_DCFL, XGMAC_DMA_IOREAD(channel, DMA_CH_DCFL)); axgbe_printf(1, "DMA_CH_MFC Reg (%08x) = %08x\n", DMA_CH_MFC, XGMAC_DMA_IOREAD(channel, DMA_CH_MFC)); axgbe_printf(1, "DMA_CH_TDTRO Reg (%08x) = %08x\n", DMA_CH_TDTRO, XGMAC_DMA_IOREAD(channel, DMA_CH_TDTRO)); axgbe_printf(1, "DMA_CH_RDTRO Reg (%08x) = %08x\n", DMA_CH_RDTRO, XGMAC_DMA_IOREAD(channel, DMA_CH_RDTRO)); axgbe_printf(1, "DMA_CH_TDWRO Reg (%08x) = %08x\n", DMA_CH_TDWRO, XGMAC_DMA_IOREAD(channel, DMA_CH_TDWRO)); axgbe_printf(1, "DMA_CH_RDWRO Reg (%08x) = %08x\n", DMA_CH_RDWRO, XGMAC_DMA_IOREAD(channel, DMA_CH_RDWRO)); } } static void xgbe_dump_mtl_registers(struct xgbe_prv_data *pdata) { int i; axgbe_printf(1, "\n************* MTL Reg dump *********************\n"); axgbe_printf(1, "MTL OMR Reg (%08x) = %08x\n", MTL_OMR, XGMAC_IOREAD(pdata, MTL_OMR)); axgbe_printf(1, "MTL FDCR Reg (%08x) = %08x\n", MTL_FDCR, XGMAC_IOREAD(pdata, MTL_FDCR)); axgbe_printf(1, "MTL FDSR Reg (%08x) = %08x\n", MTL_FDSR, XGMAC_IOREAD(pdata, MTL_FDSR)); axgbe_printf(1, "MTL FDDR Reg (%08x) = %08x\n", MTL_FDDR, XGMAC_IOREAD(pdata, MTL_FDDR)); axgbe_printf(1, "MTL ISR Reg (%08x) = %08x\n", MTL_ISR, XGMAC_IOREAD(pdata, MTL_ISR)); axgbe_printf(1, "MTL RQDCM0R Reg (%08x) = %08x\n", MTL_RQDCM0R, XGMAC_IOREAD(pdata, MTL_RQDCM0R)); axgbe_printf(1, "MTL RQDCM1R Reg (%08x) = %08x\n", MTL_RQDCM1R, XGMAC_IOREAD(pdata, MTL_RQDCM1R)); axgbe_printf(1, "MTL RQDCM2R Reg (%08x) = %08x\n", MTL_RQDCM2R, XGMAC_IOREAD(pdata, MTL_RQDCM2R)); axgbe_printf(1, "MTL TCPM0R Reg (%08x) = %08x\n", MTL_TCPM0R, XGMAC_IOREAD(pdata, MTL_TCPM0R)); axgbe_printf(1, "MTL TCPM1R Reg (%08x) = %08x\n", MTL_TCPM1R, XGMAC_IOREAD(pdata, MTL_TCPM1R)); for (i = 0 ; i < 8 ; i++ ) { axgbe_printf(1, "\n************* MTL CH %d dump ****************\n", i); axgbe_printf(1, "MTL_Q_TQOMR Reg (%08x) = %08x\n", MTL_Q_TQOMR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TQOMR)); axgbe_printf(1, "MTL_Q_TQUR Reg (%08x) = %08x\n", MTL_Q_TQUR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TQUR)); axgbe_printf(1, "MTL_Q_TQDR Reg (%08x) = %08x\n", MTL_Q_TQDR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TQDR)); axgbe_printf(1, "MTL_Q_TC0ETSCR Reg (%08x) = %08x\n", MTL_Q_TC0ETSCR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TC0ETSCR)); axgbe_printf(1, "MTL_Q_TC0ETSSR Reg (%08x) = %08x\n", MTL_Q_TC0ETSSR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TC0ETSSR)); axgbe_printf(1, "MTL_Q_TC0QWR Reg (%08x) = %08x\n", MTL_Q_TC0QWR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TC0QWR)); axgbe_printf(1, "MTL_Q_RQOMR Reg (%08x) = %08x\n", MTL_Q_RQOMR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_RQOMR)); axgbe_printf(1, "MTL_Q_RQMPOCR Reg (%08x) = %08x\n", MTL_Q_RQMPOCR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_RQMPOCR)); axgbe_printf(1, "MTL_Q_RQDR Reg (%08x) = %08x\n", MTL_Q_RQDR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_RQDR)); axgbe_printf(1, "MTL_Q_RQCR Reg (%08x) = %08x\n", MTL_Q_RQCR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_RQCR)); axgbe_printf(1, "MTL_Q_RQFCR Reg (%08x) = %08x\n", MTL_Q_RQFCR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_RQFCR)); axgbe_printf(1, "MTL_Q_IER Reg (%08x) = %08x\n", MTL_Q_IER, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_IER)); axgbe_printf(1, "MTL_Q_ISR Reg (%08x) = %08x\n", MTL_Q_ISR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_ISR)); } } static void xgbe_dump_mac_registers(struct xgbe_prv_data *pdata) { axgbe_printf(1, "\n************* MAC Reg dump **********************\n"); axgbe_printf(1, "MAC TCR Reg (%08x) = %08x\n", MAC_TCR, XGMAC_IOREAD(pdata, MAC_TCR)); axgbe_printf(1, "MAC RCR Reg (%08x) = %08x\n", MAC_RCR, XGMAC_IOREAD(pdata, MAC_RCR)); axgbe_printf(1, "MAC PFR Reg (%08x) = %08x\n", MAC_PFR, XGMAC_IOREAD(pdata, MAC_PFR)); axgbe_printf(1, "MAC WTR Reg (%08x) = %08x\n", MAC_WTR, XGMAC_IOREAD(pdata, MAC_WTR)); axgbe_printf(1, "MAC HTR0 Reg (%08x) = %08x\n", MAC_HTR0, XGMAC_IOREAD(pdata, MAC_HTR0)); axgbe_printf(1, "MAC HTR1 Reg (%08x) = %08x\n", MAC_HTR1, XGMAC_IOREAD(pdata, MAC_HTR1)); axgbe_printf(1, "MAC HTR2 Reg (%08x) = %08x\n", MAC_HTR2, XGMAC_IOREAD(pdata, MAC_HTR2)); axgbe_printf(1, "MAC HTR3 Reg (%08x) = %08x\n", MAC_HTR3, XGMAC_IOREAD(pdata, MAC_HTR3)); axgbe_printf(1, "MAC HTR4 Reg (%08x) = %08x\n", MAC_HTR4, XGMAC_IOREAD(pdata, MAC_HTR4)); axgbe_printf(1, "MAC HTR5 Reg (%08x) = %08x\n", MAC_HTR5, XGMAC_IOREAD(pdata, MAC_HTR5)); axgbe_printf(1, "MAC HTR6 Reg (%08x) = %08x\n", MAC_HTR6, XGMAC_IOREAD(pdata, MAC_HTR6)); axgbe_printf(1, "MAC HTR7 Reg (%08x) = %08x\n", MAC_HTR7, XGMAC_IOREAD(pdata, MAC_HTR7)); axgbe_printf(1, "MAC VLANTR Reg (%08x) = %08x\n", MAC_VLANTR, XGMAC_IOREAD(pdata, MAC_VLANTR)); axgbe_printf(1, "MAC VLANHTR Reg (%08x) = %08x\n", MAC_VLANHTR, XGMAC_IOREAD(pdata, MAC_VLANHTR)); axgbe_printf(1, "MAC VLANIR Reg (%08x) = %08x\n", MAC_VLANIR, XGMAC_IOREAD(pdata, MAC_VLANIR)); axgbe_printf(1, "MAC IVLANIR Reg (%08x) = %08x\n", MAC_IVLANIR, XGMAC_IOREAD(pdata, MAC_IVLANIR)); axgbe_printf(1, "MAC RETMR Reg (%08x) = %08x\n", MAC_RETMR, XGMAC_IOREAD(pdata, MAC_RETMR)); axgbe_printf(1, "MAC Q0TFCR Reg (%08x) = %08x\n", MAC_Q0TFCR, XGMAC_IOREAD(pdata, MAC_Q0TFCR)); axgbe_printf(1, "MAC Q1TFCR Reg (%08x) = %08x\n", MAC_Q1TFCR, XGMAC_IOREAD(pdata, MAC_Q1TFCR)); axgbe_printf(1, "MAC Q2TFCR Reg (%08x) = %08x\n", MAC_Q2TFCR, XGMAC_IOREAD(pdata, MAC_Q2TFCR)); axgbe_printf(1, "MAC Q3TFCR Reg (%08x) = %08x\n", MAC_Q3TFCR, XGMAC_IOREAD(pdata, MAC_Q3TFCR)); axgbe_printf(1, "MAC Q4TFCR Reg (%08x) = %08x\n", MAC_Q4TFCR, XGMAC_IOREAD(pdata, MAC_Q4TFCR)); axgbe_printf(1, "MAC Q5TFCR Reg (%08x) = %08x\n", MAC_Q5TFCR, XGMAC_IOREAD(pdata, MAC_Q5TFCR)); axgbe_printf(1, "MAC Q6TFCR Reg (%08x) = %08x\n", MAC_Q6TFCR, XGMAC_IOREAD(pdata, MAC_Q6TFCR)); axgbe_printf(1, "MAC Q7TFCR Reg (%08x) = %08x\n", MAC_Q7TFCR, XGMAC_IOREAD(pdata, MAC_Q7TFCR)); axgbe_printf(1, "MAC RFCR Reg (%08x) = %08x\n", MAC_RFCR, XGMAC_IOREAD(pdata, MAC_RFCR)); axgbe_printf(1, "MAC RQC0R Reg (%08x) = %08x\n", MAC_RQC0R, XGMAC_IOREAD(pdata, MAC_RQC0R)); axgbe_printf(1, "MAC RQC1R Reg (%08x) = %08x\n", MAC_RQC1R, XGMAC_IOREAD(pdata, MAC_RQC1R)); axgbe_printf(1, "MAC RQC2R Reg (%08x) = %08x\n", MAC_RQC2R, XGMAC_IOREAD(pdata, MAC_RQC2R)); axgbe_printf(1, "MAC RQC3R Reg (%08x) = %08x\n", MAC_RQC3R, XGMAC_IOREAD(pdata, MAC_RQC3R)); axgbe_printf(1, "MAC ISR Reg (%08x) = %08x\n", MAC_ISR, XGMAC_IOREAD(pdata, MAC_ISR)); axgbe_printf(1, "MAC IER Reg (%08x) = %08x\n", MAC_IER, XGMAC_IOREAD(pdata, MAC_IER)); axgbe_printf(1, "MAC RTSR Reg (%08x) = %08x\n", MAC_RTSR, XGMAC_IOREAD(pdata, MAC_RTSR)); axgbe_printf(1, "MAC PMTCSR Reg (%08x) = %08x\n", MAC_PMTCSR, XGMAC_IOREAD(pdata, MAC_PMTCSR)); axgbe_printf(1, "MAC RWKPFR Reg (%08x) = %08x\n", MAC_RWKPFR, XGMAC_IOREAD(pdata, MAC_RWKPFR)); axgbe_printf(1, "MAC LPICSR Reg (%08x) = %08x\n", MAC_LPICSR, XGMAC_IOREAD(pdata, MAC_LPICSR)); axgbe_printf(1, "MAC LPITCR Reg (%08x) = %08x\n", MAC_LPITCR, XGMAC_IOREAD(pdata, MAC_LPITCR)); axgbe_printf(1, "MAC TIR Reg (%08x) = %08x\n", MAC_TIR, XGMAC_IOREAD(pdata, MAC_TIR)); axgbe_printf(1, "MAC VR Reg (%08x) = %08x\n", MAC_VR, XGMAC_IOREAD(pdata, MAC_VR)); axgbe_printf(1, "MAC DR Reg (%08x) = %08x\n", MAC_DR, XGMAC_IOREAD(pdata, MAC_DR)); axgbe_printf(1, "MAC HWF0R Reg (%08x) = %08x\n", MAC_HWF0R, XGMAC_IOREAD(pdata, MAC_HWF0R)); axgbe_printf(1, "MAC HWF1R Reg (%08x) = %08x\n", MAC_HWF1R, XGMAC_IOREAD(pdata, MAC_HWF1R)); axgbe_printf(1, "MAC HWF2R Reg (%08x) = %08x\n", MAC_HWF2R, XGMAC_IOREAD(pdata, MAC_HWF2R)); axgbe_printf(1, "MAC MDIOSCAR Reg (%08x) = %08x\n", MAC_MDIOSCAR, XGMAC_IOREAD(pdata, MAC_MDIOSCAR)); axgbe_printf(1, "MAC MDIOSCCDR Reg (%08x) = %08x\n", MAC_MDIOSCCDR, XGMAC_IOREAD(pdata, MAC_MDIOSCCDR)); axgbe_printf(1, "MAC MDIOISR Reg (%08x) = %08x\n", MAC_MDIOISR, XGMAC_IOREAD(pdata, MAC_MDIOISR)); axgbe_printf(1, "MAC MDIOIER Reg (%08x) = %08x\n", MAC_MDIOIER, XGMAC_IOREAD(pdata, MAC_MDIOIER)); axgbe_printf(1, "MAC MDIOCL22R Reg (%08x) = %08x\n", MAC_MDIOCL22R, XGMAC_IOREAD(pdata, MAC_MDIOCL22R)); axgbe_printf(1, "MAC GPIOCR Reg (%08x) = %08x\n", MAC_GPIOCR, XGMAC_IOREAD(pdata, MAC_GPIOCR)); axgbe_printf(1, "MAC GPIOSR Reg (%08x) = %08x\n", MAC_GPIOSR, XGMAC_IOREAD(pdata, MAC_GPIOSR)); axgbe_printf(1, "MAC MACA0HR Reg (%08x) = %08x\n", MAC_MACA0HR, XGMAC_IOREAD(pdata, MAC_MACA0HR)); axgbe_printf(1, "MAC MACA0LR Reg (%08x) = %08x\n", MAC_TCR, XGMAC_IOREAD(pdata, MAC_MACA0LR)); axgbe_printf(1, "MAC MACA1HR Reg (%08x) = %08x\n", MAC_MACA1HR, XGMAC_IOREAD(pdata, MAC_MACA1HR)); axgbe_printf(1, "MAC MACA1LR Reg (%08x) = %08x\n", MAC_MACA1LR, XGMAC_IOREAD(pdata, MAC_MACA1LR)); axgbe_printf(1, "MAC RSSCR Reg (%08x) = %08x\n", MAC_RSSCR, XGMAC_IOREAD(pdata, MAC_RSSCR)); axgbe_printf(1, "MAC RSSDR Reg (%08x) = %08x\n", MAC_RSSDR, XGMAC_IOREAD(pdata, MAC_RSSDR)); axgbe_printf(1, "MAC RSSAR Reg (%08x) = %08x\n", MAC_RSSAR, XGMAC_IOREAD(pdata, MAC_RSSAR)); axgbe_printf(1, "MAC TSCR Reg (%08x) = %08x\n", MAC_TSCR, XGMAC_IOREAD(pdata, MAC_TSCR)); axgbe_printf(1, "MAC SSIR Reg (%08x) = %08x\n", MAC_SSIR, XGMAC_IOREAD(pdata, MAC_SSIR)); axgbe_printf(1, "MAC STSR Reg (%08x) = %08x\n", MAC_STSR, XGMAC_IOREAD(pdata, MAC_STSR)); axgbe_printf(1, "MAC STNR Reg (%08x) = %08x\n", MAC_STNR, XGMAC_IOREAD(pdata, MAC_STNR)); axgbe_printf(1, "MAC STSUR Reg (%08x) = %08x\n", MAC_STSUR, XGMAC_IOREAD(pdata, MAC_STSUR)); axgbe_printf(1, "MAC STNUR Reg (%08x) = %08x\n", MAC_STNUR, XGMAC_IOREAD(pdata, MAC_STNUR)); axgbe_printf(1, "MAC TSAR Reg (%08x) = %08x\n", MAC_TSAR, XGMAC_IOREAD(pdata, MAC_TSAR)); axgbe_printf(1, "MAC TSSR Reg (%08x) = %08x\n", MAC_TSSR, XGMAC_IOREAD(pdata, MAC_TSSR)); axgbe_printf(1, "MAC TXSNR Reg (%08x) = %08x\n", MAC_TXSNR, XGMAC_IOREAD(pdata, MAC_TXSNR)); axgbe_printf(1, "MAC TXSSR Reg (%08x) = %08x\n", MAC_TXSSR, XGMAC_IOREAD(pdata, MAC_TXSSR)); } static void xgbe_dump_rmon_counters(struct xgbe_prv_data *pdata) { struct xgbe_mmc_stats *stats = &pdata->mmc_stats; axgbe_printf(1, "\n************* RMON counters dump ***************\n"); pdata->hw_if.read_mmc_stats(pdata); axgbe_printf(1, "rmon txoctetcount_gb (%08x) = %08lx\n", MMC_TXOCTETCOUNT_GB_LO, stats->txoctetcount_gb); axgbe_printf(1, "rmon txframecount_gb (%08x) = %08lx\n", MMC_TXFRAMECOUNT_GB_LO, stats->txframecount_gb); axgbe_printf(1, "rmon txbroadcastframes_g (%08x) = %08lx\n", MMC_TXBROADCASTFRAMES_G_LO, stats->txbroadcastframes_g); axgbe_printf(1, "rmon txmulticastframes_g (%08x) = %08lx\n", MMC_TXMULTICASTFRAMES_G_LO, stats->txmulticastframes_g); axgbe_printf(1, "rmon tx64octets_gb (%08x) = %08lx\n", MMC_TX64OCTETS_GB_LO, stats->tx64octets_gb); axgbe_printf(1, "rmon tx65to127octets_gb (%08x) = %08lx\n", MMC_TX65TO127OCTETS_GB_LO, stats->tx65to127octets_gb); axgbe_printf(1, "rmon tx128to255octets_gb (%08x) = %08lx\n", MMC_TX128TO255OCTETS_GB_LO, stats->tx128to255octets_gb); axgbe_printf(1, "rmon tx256to511octets_gb (%08x) = %08lx\n", MMC_TX256TO511OCTETS_GB_LO, stats->tx256to511octets_gb); axgbe_printf(1, "rmon tx512to1023octets_gb (%08x) = %08lx\n", MMC_TX512TO1023OCTETS_GB_LO, stats->tx512to1023octets_gb); axgbe_printf(1, "rmon tx1024tomaxoctets_gb (%08x) = %08lx\n", MMC_TX1024TOMAXOCTETS_GB_LO, stats->tx1024tomaxoctets_gb); axgbe_printf(1, "rmon txunicastframes_gb (%08x) = %08lx\n", MMC_TXUNICASTFRAMES_GB_LO, stats->txunicastframes_gb); axgbe_printf(1, "rmon txmulticastframes_gb (%08x) = %08lx\n", MMC_TXMULTICASTFRAMES_GB_LO, stats->txmulticastframes_gb); axgbe_printf(1, "rmon txbroadcastframes_gb (%08x) = %08lx\n", MMC_TXBROADCASTFRAMES_GB_LO, stats->txbroadcastframes_gb); axgbe_printf(1, "rmon txunderflowerror (%08x) = %08lx\n", MMC_TXUNDERFLOWERROR_LO, stats->txunderflowerror); axgbe_printf(1, "rmon txoctetcount_g (%08x) = %08lx\n", MMC_TXOCTETCOUNT_G_LO, stats->txoctetcount_g); axgbe_printf(1, "rmon txframecount_g (%08x) = %08lx\n", MMC_TXFRAMECOUNT_G_LO, stats->txframecount_g); axgbe_printf(1, "rmon txpauseframes (%08x) = %08lx\n", MMC_TXPAUSEFRAMES_LO, stats->txpauseframes); axgbe_printf(1, "rmon txvlanframes_g (%08x) = %08lx\n", MMC_TXVLANFRAMES_G_LO, stats->txvlanframes_g); axgbe_printf(1, "rmon rxframecount_gb (%08x) = %08lx\n", MMC_RXFRAMECOUNT_GB_LO, stats->rxframecount_gb); axgbe_printf(1, "rmon rxoctetcount_gb (%08x) = %08lx\n", MMC_RXOCTETCOUNT_GB_LO, stats->rxoctetcount_gb); axgbe_printf(1, "rmon rxoctetcount_g (%08x) = %08lx\n", MMC_RXOCTETCOUNT_G_LO, stats->rxoctetcount_g); axgbe_printf(1, "rmon rxbroadcastframes_g (%08x) = %08lx\n", MMC_RXBROADCASTFRAMES_G_LO, stats->rxbroadcastframes_g); axgbe_printf(1, "rmon rxmulticastframes_g (%08x) = %08lx\n", MMC_RXMULTICASTFRAMES_G_LO, stats->rxmulticastframes_g); axgbe_printf(1, "rmon rxcrcerror (%08x) = %08lx\n", MMC_RXCRCERROR_LO, stats->rxcrcerror); axgbe_printf(1, "rmon rxrunterror (%08x) = %08lx\n", MMC_RXRUNTERROR, stats->rxrunterror); axgbe_printf(1, "rmon rxjabbererror (%08x) = %08lx\n", MMC_RXJABBERERROR, stats->rxjabbererror); axgbe_printf(1, "rmon rxundersize_g (%08x) = %08lx\n", MMC_RXUNDERSIZE_G, stats->rxundersize_g); axgbe_printf(1, "rmon rxoversize_g (%08x) = %08lx\n", MMC_RXOVERSIZE_G, stats->rxoversize_g); axgbe_printf(1, "rmon rx64octets_gb (%08x) = %08lx\n", MMC_RX64OCTETS_GB_LO, stats->rx64octets_gb); axgbe_printf(1, "rmon rx65to127octets_gb (%08x) = %08lx\n", MMC_RX65TO127OCTETS_GB_LO, stats->rx65to127octets_gb); axgbe_printf(1, "rmon rx128to255octets_gb (%08x) = %08lx\n", MMC_RX128TO255OCTETS_GB_LO, stats->rx128to255octets_gb); axgbe_printf(1, "rmon rx256to511octets_gb (%08x) = %08lx\n", MMC_RX256TO511OCTETS_GB_LO, stats->rx256to511octets_gb); axgbe_printf(1, "rmon rx512to1023octets_gb (%08x) = %08lx\n", MMC_RX512TO1023OCTETS_GB_LO, stats->rx512to1023octets_gb); axgbe_printf(1, "rmon rx1024tomaxoctets_gb (%08x) = %08lx\n", MMC_RX1024TOMAXOCTETS_GB_LO, stats->rx1024tomaxoctets_gb); axgbe_printf(1, "rmon rxunicastframes_g (%08x) = %08lx\n", MMC_RXUNICASTFRAMES_G_LO, stats->rxunicastframes_g); axgbe_printf(1, "rmon rxlengtherror (%08x) = %08lx\n", MMC_RXLENGTHERROR_LO, stats->rxlengtherror); axgbe_printf(1, "rmon rxoutofrangetype (%08x) = %08lx\n", MMC_RXOUTOFRANGETYPE_LO, stats->rxoutofrangetype); axgbe_printf(1, "rmon rxpauseframes (%08x) = %08lx\n", MMC_RXPAUSEFRAMES_LO, stats->rxpauseframes); axgbe_printf(1, "rmon rxfifooverflow (%08x) = %08lx\n", MMC_RXFIFOOVERFLOW_LO, stats->rxfifooverflow); axgbe_printf(1, "rmon rxvlanframes_gb (%08x) = %08lx\n", MMC_RXVLANFRAMES_GB_LO, stats->rxvlanframes_gb); axgbe_printf(1, "rmon rxwatchdogerror (%08x) = %08lx\n", MMC_RXWATCHDOGERROR, stats->rxwatchdogerror); } void xgbe_dump_i2c_registers(struct xgbe_prv_data *pdata) { axgbe_printf(1, "*************** I2C Registers **************\n"); axgbe_printf(1, " IC_CON : %010x\n", XI2C_IOREAD(pdata, 0x00)); axgbe_printf(1, " IC_TAR : %010x\n", XI2C_IOREAD(pdata, 0x04)); axgbe_printf(1, " IC_HS_MADDR : %010x\n", XI2C_IOREAD(pdata, 0x0c)); axgbe_printf(1, " IC_INTR_STAT : %010x\n", XI2C_IOREAD(pdata, 0x2c)); axgbe_printf(1, " IC_INTR_MASK : %010x\n", XI2C_IOREAD(pdata, 0x30)); axgbe_printf(1, " IC_RAW_INTR_STAT : %010x\n", XI2C_IOREAD(pdata, 0x34)); axgbe_printf(1, " IC_RX_TL : %010x\n", XI2C_IOREAD(pdata, 0x38)); axgbe_printf(1, " IC_TX_TL : %010x\n", XI2C_IOREAD(pdata, 0x3c)); axgbe_printf(1, " IC_ENABLE : %010x\n", XI2C_IOREAD(pdata, 0x6c)); axgbe_printf(1, " IC_STATUS : %010x\n", XI2C_IOREAD(pdata, 0x70)); axgbe_printf(1, " IC_TXFLR : %010x\n", XI2C_IOREAD(pdata, 0x74)); axgbe_printf(1, " IC_RXFLR : %010x\n", XI2C_IOREAD(pdata, 0x78)); axgbe_printf(1, " IC_ENABLE_STATUS : %010x\n", XI2C_IOREAD(pdata, 0x9c)); axgbe_printf(1, " IC_COMP_PARAM1 : %010x\n", XI2C_IOREAD(pdata, 0xf4)); } static void xgbe_dump_active_vlans(struct xgbe_prv_data *pdata) { int i; for(i=0 ; iactive_vlans[i]); } axgbe_printf(1, "\n"); } static void xgbe_default_config(struct xgbe_prv_data *pdata) { pdata->blen = DMA_SBMR_BLEN_64; pdata->pbl = DMA_PBL_128; pdata->aal = 1; pdata->rd_osr_limit = 8; pdata->wr_osr_limit = 8; pdata->tx_sf_mode = MTL_TSF_ENABLE; pdata->tx_threshold = MTL_TX_THRESHOLD_64; pdata->tx_osp_mode = DMA_OSP_ENABLE; pdata->rx_sf_mode = MTL_RSF_DISABLE; pdata->rx_threshold = MTL_RX_THRESHOLD_64; pdata->pause_autoneg = 1; pdata->tx_pause = 1; pdata->rx_pause = 1; pdata->phy_speed = SPEED_UNKNOWN; pdata->power_down = 0; pdata->enable_rss = 1; } static int axgbe_if_attach_post(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; - struct ifnet *ifp = pdata->netdev; + if_t ifp = pdata->netdev; struct xgbe_phy_if *phy_if = &pdata->phy_if; struct xgbe_hw_if *hw_if = &pdata->hw_if; if_softc_ctx_t scctx = sc->scctx; int i, ret; /* set split header support based on tunable */ pdata->sph_enable = axgbe_sph_enable; /* Initialize ECC timestamps */ pdata->tx_sec_period = ticks; pdata->tx_ded_period = ticks; pdata->rx_sec_period = ticks; pdata->rx_ded_period = ticks; pdata->desc_sec_period = ticks; pdata->desc_ded_period = ticks; /* Reset the hardware */ ret = hw_if->exit(&sc->pdata); if (ret) axgbe_error("%s: exit error %d\n", __func__, ret); /* Configure the defaults */ xgbe_default_config(pdata); /* Set default max values if not provided */ if (!pdata->tx_max_fifo_size) pdata->tx_max_fifo_size = pdata->hw_feat.tx_fifo_size; if (!pdata->rx_max_fifo_size) pdata->rx_max_fifo_size = pdata->hw_feat.rx_fifo_size; DBGPR("%s: tx fifo 0x%x rx fifo 0x%x\n", __func__, pdata->tx_max_fifo_size, pdata->rx_max_fifo_size); /* Set and validate the number of descriptors for a ring */ MPASS(powerof2(XGBE_TX_DESC_CNT)); pdata->tx_desc_count = XGBE_TX_DESC_CNT; MPASS(powerof2(XGBE_RX_DESC_CNT)); pdata->rx_desc_count = XGBE_RX_DESC_CNT; /* Adjust the number of queues based on interrupts assigned */ if (pdata->channel_irq_count) { pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count, pdata->channel_irq_count); pdata->rx_ring_count = min_t(unsigned int, pdata->rx_ring_count, pdata->channel_irq_count); DBGPR("adjusted TX %u/%u RX %u/%u\n", pdata->tx_ring_count, pdata->tx_q_count, pdata->rx_ring_count, pdata->rx_q_count); } /* Set channel count based on interrupts assigned */ pdata->channel_count = max_t(unsigned int, scctx->isc_ntxqsets, scctx->isc_nrxqsets); DBGPR("Channel count set to: %u\n", pdata->channel_count); /* Get RSS key */ #ifdef RSS rss_getkey((uint8_t *)pdata->rss_key); #else arc4rand(&pdata->rss_key, ARRAY_SIZE(pdata->rss_key), 0); #endif XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, IP2TE, 1); XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, TCP4TE, 1); XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1); /* Initialize the PHY device */ pdata->sysctl_an_cdr_workaround = pdata->vdata->an_cdr_workaround; phy_if->phy_init(pdata); /* Set the coalescing */ xgbe_init_rx_coalesce(&sc->pdata); xgbe_init_tx_coalesce(&sc->pdata); ifmedia_add(sc->media, IFM_ETHER | IFM_10G_KR, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_10G_T, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_10G_SFI, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_KX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_CX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_LX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_SX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_T, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_SGMII, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_100_SGMII, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(sc->media, IFM_ETHER | IFM_AUTO); /* Initialize the phy */ pdata->phy_link = -1; pdata->phy_speed = SPEED_UNKNOWN; ret = phy_if->phy_reset(pdata); if (ret) return (ret); /* Calculate the Rx buffer size before allocating rings */ ret = xgbe_calc_rx_buf_size(pdata->netdev, if_getmtu(pdata->netdev)); pdata->rx_buf_size = ret; DBGPR("%s: rx_buf_size %d\n", __func__, ret); /* Setup RSS lookup table */ for (i = 0; i < XGBE_RSS_MAX_TABLE_SIZE; i++) XGMAC_SET_BITS(pdata->rss_table[i], MAC_RSSDR, DMCH, i % pdata->rx_ring_count); /* * Mark the device down until it is initialized, which happens * when the device is accessed first (for configuring the iface, * eg: setting IP) */ set_bit(XGBE_DOWN, &pdata->dev_state); - DBGPR("mtu %d\n", ifp->if_mtu); - scctx->isc_max_frame_size = ifp->if_mtu + 18; + DBGPR("mtu %d\n", if_getmtu(ifp)); + scctx->isc_max_frame_size = if_getmtu(ifp) + 18; scctx->isc_min_frame_size = XGMAC_MIN_PACKET; axgbe_sysctl_init(pdata); axgbe_pci_init(pdata); return (0); } /* axgbe_if_attach_post */ static void xgbe_free_intr(struct xgbe_prv_data *pdata, struct resource *res, void *tag, int rid) { if (tag) bus_teardown_intr(pdata->dev, res, tag); if (res) bus_release_resource(pdata->dev, SYS_RES_IRQ, rid, res); } static void axgbe_interrupts_free(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; if_softc_ctx_t scctx = sc->scctx; struct xgbe_channel *channel; struct if_irq irq; int i; axgbe_printf(2, "%s: mode %d\n", __func__, scctx->isc_intr); /* Free dev_irq */ iflib_irq_free(ctx, &pdata->dev_irq); /* Free ecc_irq */ xgbe_free_intr(pdata, pdata->ecc_irq_res, pdata->ecc_irq_tag, pdata->ecc_rid); /* Free i2c_irq */ xgbe_free_intr(pdata, pdata->i2c_irq_res, pdata->i2c_irq_tag, pdata->i2c_rid); /* Free an_irq */ xgbe_free_intr(pdata, pdata->an_irq_res, pdata->an_irq_tag, pdata->an_rid); for (i = 0; i < scctx->isc_nrxqsets; i++) { channel = pdata->channel[i]; axgbe_printf(2, "%s: rid %d\n", __func__, channel->dma_irq_rid); irq.ii_res = channel->dma_irq_res; irq.ii_tag = channel->dma_irq_tag; iflib_irq_free(ctx, &irq); } } static int axgbe_if_detach(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_phy_if *phy_if = &pdata->phy_if; struct resource *mac_res[2]; mac_res[0] = pdata->xgmac_res; mac_res[1] = pdata->xpcs_res; phy_if->phy_exit(pdata); /* Free Interrupts */ axgbe_interrupts_free(ctx); /* Free workqueues */ taskqueue_free(pdata->dev_workqueue); /* Release bus resources */ bus_release_resources(iflib_get_dev(ctx), axgbe_pci_mac_spec, mac_res); /* Free VLAN bitmap */ free(pdata->active_vlans, M_AXGBE); axgbe_sysctl_exit(pdata); return (0); } /* axgbe_if_detach */ static void axgbe_pci_init(struct xgbe_prv_data *pdata) { struct xgbe_phy_if *phy_if = &pdata->phy_if; struct xgbe_hw_if *hw_if = &pdata->hw_if; int ret = 0; if (!__predict_false((test_bit(XGBE_DOWN, &pdata->dev_state)))) { axgbe_printf(1, "%s: Starting when XGBE_UP\n", __func__); return; } hw_if->init(pdata); ret = phy_if->phy_start(pdata); if (ret) { axgbe_error("%s: phy start %d\n", __func__, ret); ret = hw_if->exit(pdata); if (ret) axgbe_error("%s: exit error %d\n", __func__, ret); return; } hw_if->enable_tx(pdata); hw_if->enable_rx(pdata); xgbe_start_timers(pdata); clear_bit(XGBE_DOWN, &pdata->dev_state); xgbe_dump_phy_registers(pdata); xgbe_dump_prop_registers(pdata); xgbe_dump_dma_registers(pdata, -1); xgbe_dump_mtl_registers(pdata); xgbe_dump_mac_registers(pdata); xgbe_dump_rmon_counters(pdata); } static void axgbe_if_init(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; axgbe_pci_init(pdata); } static void axgbe_pci_stop(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_phy_if *phy_if = &pdata->phy_if; struct xgbe_hw_if *hw_if = &pdata->hw_if; int ret; if (__predict_false(test_bit(XGBE_DOWN, &pdata->dev_state))) { axgbe_printf(1, "%s: Stopping when XGBE_DOWN\n", __func__); return; } xgbe_stop_timers(pdata); taskqueue_drain_all(pdata->dev_workqueue); hw_if->disable_tx(pdata); hw_if->disable_rx(pdata); phy_if->phy_stop(pdata); ret = hw_if->exit(pdata); if (ret) axgbe_error("%s: exit error %d\n", __func__, ret); set_bit(XGBE_DOWN, &pdata->dev_state); } static void axgbe_if_stop(if_ctx_t ctx) { axgbe_pci_stop(ctx); } static void axgbe_if_disable_intr(if_ctx_t ctx) { /* TODO - implement */ } static void axgbe_if_enable_intr(if_ctx_t ctx) { /* TODO - implement */ } static int axgbe_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *va, uint64_t *pa, int ntxqs, int ntxqsets) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; if_softc_ctx_t scctx = sc->scctx; struct xgbe_channel *channel; struct xgbe_ring *tx_ring; int i, j, k; MPASS(scctx->isc_ntxqsets > 0); MPASS(scctx->isc_ntxqsets == ntxqsets); MPASS(ntxqs == 1); axgbe_printf(1, "%s: txqsets %d/%d txqs %d\n", __func__, scctx->isc_ntxqsets, ntxqsets, ntxqs); for (i = 0 ; i < ntxqsets; i++) { channel = pdata->channel[i]; tx_ring = (struct xgbe_ring*)malloc(ntxqs * sizeof(struct xgbe_ring), M_AXGBE, M_NOWAIT | M_ZERO); if (tx_ring == NULL) { axgbe_error("Unable to allocate TX ring memory\n"); goto tx_ring_fail; } channel->tx_ring = tx_ring; for (j = 0; j < ntxqs; j++, tx_ring++) { tx_ring->rdata = (struct xgbe_ring_data*)malloc(scctx->isc_ntxd[j] * sizeof(struct xgbe_ring_data), M_AXGBE, M_NOWAIT); /* Get the virtual & physical address of hw queues */ tx_ring->rdesc = (struct xgbe_ring_desc *)va[i*ntxqs + j]; tx_ring->rdesc_paddr = pa[i*ntxqs + j]; tx_ring->rdesc_count = scctx->isc_ntxd[j]; spin_lock_init(&tx_ring->lock); } } axgbe_printf(1, "allocated for %d tx queues\n", scctx->isc_ntxqsets); return (0); tx_ring_fail: for (j = 0; j < i ; j++) { channel = pdata->channel[j]; tx_ring = channel->tx_ring; for (k = 0; k < ntxqs ; k++, tx_ring++) { if (tx_ring && tx_ring->rdata) free(tx_ring->rdata, M_AXGBE); } free(channel->tx_ring, M_AXGBE); channel->tx_ring = NULL; } return (ENOMEM); } /* axgbe_if_tx_queues_alloc */ static int axgbe_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *va, uint64_t *pa, int nrxqs, int nrxqsets) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; if_softc_ctx_t scctx = sc->scctx; struct xgbe_channel *channel; struct xgbe_ring *rx_ring; int i, j, k; MPASS(scctx->isc_nrxqsets > 0); MPASS(scctx->isc_nrxqsets == nrxqsets); if (!pdata->sph_enable) { MPASS(nrxqs == 1); } else { MPASS(nrxqs == 2); } axgbe_printf(1, "%s: rxqsets %d/%d rxqs %d\n", __func__, scctx->isc_nrxqsets, nrxqsets, nrxqs); for (i = 0 ; i < nrxqsets; i++) { channel = pdata->channel[i]; rx_ring = (struct xgbe_ring*)malloc(nrxqs * sizeof(struct xgbe_ring), M_AXGBE, M_NOWAIT | M_ZERO); if (rx_ring == NULL) { axgbe_error("Unable to allocate RX ring memory\n"); goto rx_ring_fail; } channel->rx_ring = rx_ring; for (j = 0; j < nrxqs; j++, rx_ring++) { rx_ring->rdata = (struct xgbe_ring_data*)malloc(scctx->isc_nrxd[j] * sizeof(struct xgbe_ring_data), M_AXGBE, M_NOWAIT); /* Get the virtual and physical address of the hw queues */ rx_ring->rdesc = (struct xgbe_ring_desc *)va[i*nrxqs + j]; rx_ring->rdesc_paddr = pa[i*nrxqs + j]; rx_ring->rdesc_count = scctx->isc_nrxd[j]; spin_lock_init(&rx_ring->lock); } } axgbe_printf(2, "allocated for %d rx queues\n", scctx->isc_nrxqsets); return (0); rx_ring_fail: for (j = 0 ; j < i ; j++) { channel = pdata->channel[j]; rx_ring = channel->rx_ring; for (k = 0; k < nrxqs ; k++, rx_ring++) { if (rx_ring && rx_ring->rdata) free(rx_ring->rdata, M_AXGBE); } free(channel->rx_ring, M_AXGBE); channel->rx_ring = NULL; } return (ENOMEM); } /* axgbe_if_rx_queues_alloc */ static void axgbe_if_queues_free(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; if_softc_ctx_t scctx = sc->scctx; if_shared_ctx_t sctx = sc->sctx; struct xgbe_channel *channel; struct xgbe_ring *tx_ring; struct xgbe_ring *rx_ring; int i, j; for (i = 0 ; i < scctx->isc_ntxqsets; i++) { channel = pdata->channel[i]; tx_ring = channel->tx_ring; for (j = 0; j < sctx->isc_ntxqs ; j++, tx_ring++) { if (tx_ring && tx_ring->rdata) free(tx_ring->rdata, M_AXGBE); } free(channel->tx_ring, M_AXGBE); channel->tx_ring = NULL; } for (i = 0 ; i < scctx->isc_nrxqsets; i++) { channel = pdata->channel[i]; rx_ring = channel->rx_ring; for (j = 0; j < sctx->isc_nrxqs ; j++, rx_ring++) { if (rx_ring && rx_ring->rdata) free(rx_ring->rdata, M_AXGBE); } free(channel->rx_ring, M_AXGBE); channel->rx_ring = NULL; } axgbe_free_channels(sc); } /* axgbe_if_queues_free */ static void axgbe_if_vlan_register(if_ctx_t ctx, uint16_t vtag) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_hw_if *hw_if = &pdata->hw_if; if (!bit_test(pdata->active_vlans, vtag)) { axgbe_printf(0, "Registering VLAN %d\n", vtag); bit_set(pdata->active_vlans, vtag); hw_if->update_vlan_hash_table(pdata); pdata->num_active_vlans++; axgbe_printf(1, "Total active vlans: %d\n", pdata->num_active_vlans); } else axgbe_printf(0, "VLAN %d already registered\n", vtag); xgbe_dump_active_vlans(pdata); } static void axgbe_if_vlan_unregister(if_ctx_t ctx, uint16_t vtag) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_hw_if *hw_if = &pdata->hw_if; if (pdata->num_active_vlans == 0) { axgbe_printf(1, "No active VLANs to unregister\n"); return; } if (bit_test(pdata->active_vlans, vtag)){ axgbe_printf(0, "Un-Registering VLAN %d\n", vtag); bit_clear(pdata->active_vlans, vtag); hw_if->update_vlan_hash_table(pdata); pdata->num_active_vlans--; axgbe_printf(1, "Total active vlans: %d\n", pdata->num_active_vlans); } else axgbe_printf(0, "VLAN %d already unregistered\n", vtag); xgbe_dump_active_vlans(pdata); } #if __FreeBSD_version >= 1300000 static bool axgbe_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: default: return (true); } } #endif static int axgbe_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; if_softc_ctx_t scctx = sc->scctx; struct xgbe_channel *channel; struct if_irq irq; int i, error, rid = 0, flags; char buf[16]; MPASS(scctx->isc_intr != IFLIB_INTR_LEGACY); pdata->isr_as_tasklet = 1; if (scctx->isc_intr == IFLIB_INTR_MSI) { pdata->irq_count = 1; pdata->channel_irq_count = 1; return (0); } axgbe_printf(1, "%s: msix %d txqsets %d rxqsets %d\n", __func__, msix, scctx->isc_ntxqsets, scctx->isc_nrxqsets); flags = RF_ACTIVE; /* DEV INTR SETUP */ rid++; error = iflib_irq_alloc_generic(ctx, &pdata->dev_irq, rid, IFLIB_INTR_ADMIN, axgbe_dev_isr, sc, 0, "dev_irq"); if (error) { axgbe_error("Failed to register device interrupt rid %d name %s\n", rid, "dev_irq"); return (error); } /* ECC INTR SETUP */ rid++; pdata->ecc_rid = rid; pdata->ecc_irq_res = bus_alloc_resource_any(pdata->dev, SYS_RES_IRQ, &rid, flags); if (!pdata->ecc_irq_res) { axgbe_error("failed to allocate IRQ for rid %d, name %s.\n", rid, "ecc_irq"); return (ENOMEM); } error = bus_setup_intr(pdata->dev, pdata->ecc_irq_res, INTR_MPSAFE | INTR_TYPE_NET, NULL, axgbe_ecc_isr, sc, &pdata->ecc_irq_tag); if (error) { axgbe_error("failed to setup interrupt for rid %d, name %s: %d\n", rid, "ecc_irq", error); return (error); } /* I2C INTR SETUP */ rid++; pdata->i2c_rid = rid; pdata->i2c_irq_res = bus_alloc_resource_any(pdata->dev, SYS_RES_IRQ, &rid, flags); if (!pdata->i2c_irq_res) { axgbe_error("failed to allocate IRQ for rid %d, name %s.\n", rid, "i2c_irq"); return (ENOMEM); } error = bus_setup_intr(pdata->dev, pdata->i2c_irq_res, INTR_MPSAFE | INTR_TYPE_NET, NULL, axgbe_i2c_isr, sc, &pdata->i2c_irq_tag); if (error) { axgbe_error("failed to setup interrupt for rid %d, name %s: %d\n", rid, "i2c_irq", error); return (error); } /* AN INTR SETUP */ rid++; pdata->an_rid = rid; pdata->an_irq_res = bus_alloc_resource_any(pdata->dev, SYS_RES_IRQ, &rid, flags); if (!pdata->an_irq_res) { axgbe_error("failed to allocate IRQ for rid %d, name %s.\n", rid, "an_irq"); return (ENOMEM); } error = bus_setup_intr(pdata->dev, pdata->an_irq_res, INTR_MPSAFE | INTR_TYPE_NET, NULL, axgbe_an_isr, sc, &pdata->an_irq_tag); if (error) { axgbe_error("failed to setup interrupt for rid %d, name %s: %d\n", rid, "an_irq", error); return (error); } pdata->per_channel_irq = 1; pdata->channel_irq_mode = XGBE_IRQ_MODE_LEVEL; rid++; for (i = 0; i < scctx->isc_nrxqsets; i++, rid++) { channel = pdata->channel[i]; snprintf(buf, sizeof(buf), "rxq%d", i); error = iflib_irq_alloc_generic(ctx, &irq, rid, IFLIB_INTR_RXTX, axgbe_msix_que, channel, channel->queue_index, buf); if (error) { axgbe_error("Failed to allocated que int %d err: %d\n", i, error); return (error); } channel->dma_irq_rid = rid; channel->dma_irq_res = irq.ii_res; channel->dma_irq_tag = irq.ii_tag; axgbe_printf(1, "%s: channel count %d idx %d irq %d\n", __func__, scctx->isc_nrxqsets, i, rid); } pdata->irq_count = msix; pdata->channel_irq_count = scctx->isc_nrxqsets; for (i = 0; i < scctx->isc_ntxqsets; i++) { channel = pdata->channel[i]; snprintf(buf, sizeof(buf), "txq%d", i); irq.ii_res = channel->dma_irq_res; iflib_softirq_alloc_generic(ctx, &irq, IFLIB_INTR_TX, channel, channel->queue_index, buf); } return (0); } /* axgbe_if_msix_intr_assign */ static int xgbe_enable_rx_tx_int(struct xgbe_prv_data *pdata, struct xgbe_channel *channel) { struct xgbe_hw_if *hw_if = &pdata->hw_if; enum xgbe_int int_id; if (channel->tx_ring && channel->rx_ring) int_id = XGMAC_INT_DMA_CH_SR_TI_RI; else if (channel->tx_ring) int_id = XGMAC_INT_DMA_CH_SR_TI; else if (channel->rx_ring) int_id = XGMAC_INT_DMA_CH_SR_RI; else return (-1); axgbe_printf(1, "%s channel: %d rx_tx interrupt enabled %d\n", __func__, channel->queue_index, int_id); return (hw_if->enable_int(channel, int_id)); } static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata, struct xgbe_channel *channel) { struct xgbe_hw_if *hw_if = &pdata->hw_if; enum xgbe_int int_id; if (channel->tx_ring && channel->rx_ring) int_id = XGMAC_INT_DMA_CH_SR_TI_RI; else if (channel->tx_ring) int_id = XGMAC_INT_DMA_CH_SR_TI; else if (channel->rx_ring) int_id = XGMAC_INT_DMA_CH_SR_RI; else return; axgbe_printf(1, "%s channel: %d rx_tx interrupt disabled %d\n", __func__, channel->queue_index, int_id); hw_if->disable_int(channel, int_id); } static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *pdata) { unsigned int i; for (i = 0; i < pdata->channel_count; i++) xgbe_disable_rx_tx_int(pdata, pdata->channel[i]); } static int axgbe_msix_que(void *arg) { struct xgbe_channel *channel = (struct xgbe_channel *)arg; struct xgbe_prv_data *pdata = channel->pdata; unsigned int dma_status; axgbe_printf(1, "%s: Channel: %d SR 0x%04x DSR 0x%04x IER:0x%04x D_ISR:0x%04x M_ISR:0x%04x\n", __func__, channel->queue_index, XGMAC_DMA_IOREAD(channel, DMA_CH_SR), XGMAC_DMA_IOREAD(channel, DMA_CH_DSR), XGMAC_DMA_IOREAD(channel, DMA_CH_IER), XGMAC_IOREAD(pdata, DMA_ISR), XGMAC_IOREAD(pdata, MAC_ISR)); (void)XGMAC_DMA_IOREAD(channel, DMA_CH_SR); /* Disable Tx and Rx channel interrupts */ xgbe_disable_rx_tx_int(pdata, channel); /* Clear the interrupts */ dma_status = 0; XGMAC_SET_BITS(dma_status, DMA_CH_SR, TI, 1); XGMAC_SET_BITS(dma_status, DMA_CH_SR, RI, 1); XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_status); return (FILTER_SCHEDULE_THREAD); } static int axgbe_dev_isr(void *arg) { struct axgbe_if_softc *sc = (struct axgbe_if_softc *)arg; struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_channel *channel; struct xgbe_hw_if *hw_if = &pdata->hw_if; unsigned int i, dma_isr, dma_ch_isr; unsigned int mac_isr, mac_mdioisr; int ret = FILTER_HANDLED; dma_isr = XGMAC_IOREAD(pdata, DMA_ISR); axgbe_printf(2, "%s DMA ISR: 0x%x\n", __func__, dma_isr); if (!dma_isr) return (FILTER_HANDLED); for (i = 0; i < pdata->channel_count; i++) { if (!(dma_isr & (1 << i))) continue; channel = pdata->channel[i]; dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR); axgbe_printf(2, "%s: channel %d SR 0x%x DSR 0x%x\n", __func__, channel->queue_index, dma_ch_isr, XGMAC_DMA_IOREAD(channel, DMA_CH_DSR)); /* * The TI or RI interrupt bits may still be set even if using * per channel DMA interrupts. Check to be sure those are not * enabled before using the private data napi structure. */ if (!pdata->per_channel_irq && (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, TI) || XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RI))) { /* Disable Tx and Rx interrupts */ xgbe_disable_rx_tx_ints(pdata); } else { /* * Don't clear Rx/Tx status if doing per channel DMA * interrupts, these will be cleared by the ISR for * per channel DMA interrupts */ XGMAC_SET_BITS(dma_ch_isr, DMA_CH_SR, TI, 0); XGMAC_SET_BITS(dma_ch_isr, DMA_CH_SR, RI, 0); } if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RBU)) pdata->ext_stats.rx_buffer_unavailable++; /* Restart the device on a Fatal Bus Error */ if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, FBE)) axgbe_error("%s: Fatal bus error reported 0x%x\n", __func__, dma_ch_isr); /* Clear all interrupt signals */ XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr); ret = FILTER_SCHEDULE_THREAD; } if (XGMAC_GET_BITS(dma_isr, DMA_ISR, MACIS)) { mac_isr = XGMAC_IOREAD(pdata, MAC_ISR); axgbe_printf(2, "%s MAC ISR: 0x%x\n", __func__, mac_isr); if (XGMAC_GET_BITS(mac_isr, MAC_ISR, MMCTXIS)) hw_if->tx_mmc_int(pdata); if (XGMAC_GET_BITS(mac_isr, MAC_ISR, MMCRXIS)) hw_if->rx_mmc_int(pdata); if (XGMAC_GET_BITS(mac_isr, MAC_ISR, SMI)) { mac_mdioisr = XGMAC_IOREAD(pdata, MAC_MDIOISR); if (XGMAC_GET_BITS(mac_mdioisr, MAC_MDIOISR, SNGLCOMPINT)) wakeup_one(pdata); } } return (ret); } /* axgbe_dev_isr */ static void axgbe_i2c_isr(void *arg) { struct axgbe_if_softc *sc = (struct axgbe_if_softc *)arg; sc->pdata.i2c_if.i2c_isr(&sc->pdata); } static void axgbe_ecc_isr(void *arg) { /* TODO - implement */ } static void axgbe_an_isr(void *arg) { struct axgbe_if_softc *sc = (struct axgbe_if_softc *)arg; sc->pdata.phy_if.an_isr(&sc->pdata); } static int axgbe_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; int ret; if (qid < pdata->tx_q_count) { ret = xgbe_enable_rx_tx_int(pdata, pdata->channel[qid]); if (ret) { axgbe_error("Enable TX INT failed\n"); return (ret); } } else axgbe_error("Queue ID exceed channel count\n"); return (0); } static int axgbe_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; int ret; if (qid < pdata->rx_q_count) { ret = xgbe_enable_rx_tx_int(pdata, pdata->channel[qid]); if (ret) { axgbe_error("Enable RX INT failed\n"); return (ret); } } else axgbe_error("Queue ID exceed channel count\n"); return (0); } static void axgbe_if_update_admin_status(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; axgbe_printf(1, "%s: phy_link %d status %d speed %d\n", __func__, pdata->phy_link, sc->link_status, pdata->phy.speed); if (pdata->phy_link < 0) return; if (pdata->phy_link) { if (sc->link_status == LINK_STATE_DOWN) { sc->link_status = LINK_STATE_UP; if (pdata->phy.speed & SPEED_10000) iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(10)); else if (pdata->phy.speed & SPEED_2500) iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(2.5)); else if (pdata->phy.speed & SPEED_1000) iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(1)); else if (pdata->phy.speed & SPEED_100) iflib_link_state_change(ctx, LINK_STATE_UP, IF_Mbps(100)); else if (pdata->phy.speed & SPEED_10) iflib_link_state_change(ctx, LINK_STATE_UP, IF_Mbps(10)); } } else { if (sc->link_status == LINK_STATE_UP) { sc->link_status = LINK_STATE_DOWN; iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); } } } static int axgbe_if_media_change(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); sx_xlock(&sc->pdata.an_mutex); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_10G_KR: sc->pdata.phy.speed = SPEED_10000; sc->pdata.phy.autoneg = AUTONEG_DISABLE; break; case IFM_2500_KX: sc->pdata.phy.speed = SPEED_2500; sc->pdata.phy.autoneg = AUTONEG_DISABLE; break; case IFM_1000_KX: sc->pdata.phy.speed = SPEED_1000; sc->pdata.phy.autoneg = AUTONEG_DISABLE; break; case IFM_100_TX: sc->pdata.phy.speed = SPEED_100; sc->pdata.phy.autoneg = AUTONEG_DISABLE; break; case IFM_AUTO: sc->pdata.phy.autoneg = AUTONEG_ENABLE; break; } sx_xunlock(&sc->pdata.an_mutex); return (-sc->pdata.phy_if.phy_config_aneg(&sc->pdata)); } static int axgbe_if_promisc_set(if_ctx_t ctx, int flags) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; - struct ifnet *ifp = pdata->netdev; + if_t ifp = pdata->netdev; axgbe_printf(1, "%s: MAC_PFR 0x%x drv_flags 0x%x if_flags 0x%x\n", - __func__, XGMAC_IOREAD(pdata, MAC_PFR), ifp->if_drv_flags, ifp->if_flags); + __func__, XGMAC_IOREAD(pdata, MAC_PFR), if_getdrvflags(ifp), + if_getflags(ifp)); - if (ifp->if_flags & IFF_PPROMISC) { + if (if_getflags(ifp) & IFF_PPROMISC) { axgbe_printf(1, "User requested to enter promisc mode\n"); if (XGMAC_IOREAD_BITS(pdata, MAC_PFR, PR) == 1) { axgbe_printf(1, "Already in promisc mode\n"); return (0); } axgbe_printf(1, "Entering promisc mode\n"); XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PR, 1); XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 0); } else { axgbe_printf(1, "User requested to leave promisc mode\n"); if (XGMAC_IOREAD_BITS(pdata, MAC_PFR, PR) == 0) { axgbe_printf(1, "Already not in promisc mode\n"); return (0); } axgbe_printf(1, "Leaving promisc mode\n"); XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PR, 0); XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 1); } return (0); } static uint64_t axgbe_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_mmc_stats *pstats = &pdata->mmc_stats; pdata->hw_if.read_mmc_stats(pdata); switch(cnt) { case IFCOUNTER_IPACKETS: return (pstats->rxframecount_gb); case IFCOUNTER_IERRORS: return (pstats->rxframecount_gb - pstats->rxbroadcastframes_g - pstats->rxmulticastframes_g - pstats->rxunicastframes_g); case IFCOUNTER_OPACKETS: return (pstats->txframecount_gb); case IFCOUNTER_OERRORS: return (pstats->txframecount_gb - pstats->txframecount_g); case IFCOUNTER_IBYTES: return (pstats->rxoctetcount_gb); case IFCOUNTER_OBYTES: return (pstats->txoctetcount_gb); default: return (if_get_counter_default(ifp, cnt)); } } static int axgbe_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; int ret; if (mtu > XGMAC_JUMBO_PACKET_MTU) return (EINVAL); ret = xgbe_calc_rx_buf_size(pdata->netdev, mtu); pdata->rx_buf_size = ret; axgbe_printf(1, "%s: rx_buf_size %d\n", __func__, ret); sc->scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; return (0); } static void axgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; ifmr->ifm_status = IFM_AVALID; if (!sc->pdata.phy.link) return; ifmr->ifm_active = IFM_ETHER; ifmr->ifm_status |= IFM_ACTIVE; axgbe_printf(1, "Speed 0x%x Mode %d\n", sc->pdata.phy.speed, pdata->phy_if.phy_impl.cur_mode(pdata)); pdata->phy_if.phy_impl.get_type(pdata, ifmr); ifmr->ifm_active |= IFM_FDX; ifmr->ifm_active |= IFM_ETH_TXPAUSE; ifmr->ifm_active |= IFM_ETH_RXPAUSE; } diff --git a/sys/dev/axgbe/xgbe-dev.c b/sys/dev/axgbe/xgbe-dev.c index 95161802ed8e..5787db4f0441 100644 --- a/sys/dev/axgbe/xgbe-dev.c +++ b/sys/dev/axgbe/xgbe-dev.c @@ -1,2864 +1,2864 @@ /* * AMD 10Gb Ethernet driver * * Copyright (c) 2014-2016,2020 Advanced Micro Devices, Inc. * * This file is available to you under your choice of the following two * licenses: * * License 1: GPLv2 * * This file is free software; you may copy, redistribute and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or (at * your option) any later version. * * This file is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * This file incorporates work covered by the following copyright and * permission notice: * The Synopsys DWC ETHER XGMAC Software Driver and documentation * (hereinafter "Software") is an unsupported proprietary work of Synopsys, * Inc. unless otherwise expressly agreed to in writing between Synopsys * and you. * * The Software IS NOT an item of Licensed Software or Licensed Product * under any End User Software License Agreement or Agreement for Licensed * Product with Synopsys or any supplement thereto. Permission is hereby * granted, free of charge, to any person obtaining a copy of this software * annotated with this license and the Software, to deal in the Software * without restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * * License 2: Modified BSD * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Advanced Micro Devices, Inc. nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * This file incorporates work covered by the following copyright and * permission notice: * The Synopsys DWC ETHER XGMAC Software Driver and documentation * (hereinafter "Software") is an unsupported proprietary work of Synopsys, * Inc. unless otherwise expressly agreed to in writing between Synopsys * and you. * * The Software IS NOT an item of Licensed Software or Licensed Product * under any End User Software License Agreement or Agreement for Licensed * Product with Synopsys or any supplement thereto. Permission is hereby * granted, free of charge, to any person obtaining a copy of this software * annotated with this license and the Software, to deal in the Software * without restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "xgbe.h" #include "xgbe-common.h" #include static inline unsigned int xgbe_get_max_frame(struct xgbe_prv_data *pdata) { return (if_getmtu(pdata->netdev) + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); } static unsigned int xgbe_usec_to_riwt(struct xgbe_prv_data *pdata, unsigned int usec) { unsigned long rate; unsigned int ret; rate = pdata->sysclk_rate; /* * Convert the input usec value to the watchdog timer value. Each * watchdog timer value is equivalent to 256 clock cycles. * Calculate the required value as: * ( usec * ( system_clock_mhz / 10^6 ) / 256 */ ret = (usec * (rate / 1000000)) / 256; return (ret); } static unsigned int xgbe_riwt_to_usec(struct xgbe_prv_data *pdata, unsigned int riwt) { unsigned long rate; unsigned int ret; rate = pdata->sysclk_rate; /* * Convert the input watchdog timer value to the usec value. Each * watchdog timer value is equivalent to 256 clock cycles. * Calculate the required value as: * ( riwt * 256 ) / ( system_clock_mhz / 10^6 ) */ ret = (riwt * 256) / (rate / 1000000); return (ret); } static int xgbe_config_pbl_val(struct xgbe_prv_data *pdata) { unsigned int pblx8, pbl; unsigned int i; pblx8 = DMA_PBL_X8_DISABLE; pbl = pdata->pbl; if (pdata->pbl > 32) { pblx8 = DMA_PBL_X8_ENABLE; pbl >>= 3; } for (i = 0; i < pdata->channel_count; i++) { XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, PBLX8, pblx8); if (pdata->channel[i]->tx_ring) XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, PBL, pbl); if (pdata->channel[i]->rx_ring) XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, PBL, pbl); } return (0); } static int xgbe_config_osp_mode(struct xgbe_prv_data *pdata) { unsigned int i; for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->tx_ring) break; XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, OSP, pdata->tx_osp_mode); } return (0); } static int xgbe_config_rsf_mode(struct xgbe_prv_data *pdata, unsigned int val) { unsigned int i; for (i = 0; i < pdata->rx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RSF, val); return (0); } static int xgbe_config_tsf_mode(struct xgbe_prv_data *pdata, unsigned int val) { unsigned int i; for (i = 0; i < pdata->tx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TSF, val); return (0); } static int xgbe_config_rx_threshold(struct xgbe_prv_data *pdata, unsigned int val) { unsigned int i; for (i = 0; i < pdata->rx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RTC, val); return (0); } static int xgbe_config_tx_threshold(struct xgbe_prv_data *pdata, unsigned int val) { unsigned int i; for (i = 0; i < pdata->tx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TTC, val); return (0); } static int xgbe_config_rx_coalesce(struct xgbe_prv_data *pdata) { unsigned int i; for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->rx_ring) break; XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RIWT, RWT, pdata->rx_riwt); } return (0); } static int xgbe_config_tx_coalesce(struct xgbe_prv_data *pdata) { return (0); } static void xgbe_config_rx_buffer_size(struct xgbe_prv_data *pdata) { unsigned int i; for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->rx_ring) break; XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, RBSZ, pdata->rx_buf_size); } } static void xgbe_config_tso_mode(struct xgbe_prv_data *pdata) { unsigned int i; int tso_enabled = (if_getcapenable(pdata->netdev) & IFCAP_TSO); for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->tx_ring) break; axgbe_printf(1, "TSO in channel %d %s\n", i, tso_enabled ? "enabled" : "disabled"); XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, TSE, tso_enabled ? 1 : 0); } } static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata) { unsigned int i; int sph_enable_flag = XGMAC_IOREAD_BITS(pdata, MAC_HWF1R, SPHEN); axgbe_printf(1, "sph_enable %d sph feature enabled?: %d\n", pdata->sph_enable, sph_enable_flag); if (pdata->sph_enable && sph_enable_flag) axgbe_printf(0, "SPH Enabled\n"); for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->rx_ring) break; if (pdata->sph_enable && sph_enable_flag) { /* Enable split header feature */ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 1); } else { /* Disable split header feature */ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 0); } /* per-channel confirmation of SPH being disabled/enabled */ int val = XGMAC_DMA_IOREAD_BITS(pdata->channel[i], DMA_CH_CR, SPH); axgbe_printf(0, "%s: SPH %s in channel %d\n", __func__, (val ? "enabled" : "disabled"), i); } if (pdata->sph_enable && sph_enable_flag) XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE); } static int xgbe_write_rss_reg(struct xgbe_prv_data *pdata, unsigned int type, unsigned int index, unsigned int val) { unsigned int wait; int ret = 0; mtx_lock(&pdata->rss_mutex); if (XGMAC_IOREAD_BITS(pdata, MAC_RSSAR, OB)) { ret = -EBUSY; goto unlock; } XGMAC_IOWRITE(pdata, MAC_RSSDR, val); XGMAC_IOWRITE_BITS(pdata, MAC_RSSAR, RSSIA, index); XGMAC_IOWRITE_BITS(pdata, MAC_RSSAR, ADDRT, type); XGMAC_IOWRITE_BITS(pdata, MAC_RSSAR, CT, 0); XGMAC_IOWRITE_BITS(pdata, MAC_RSSAR, OB, 1); wait = 1000; while (wait--) { if (!XGMAC_IOREAD_BITS(pdata, MAC_RSSAR, OB)) goto unlock; DELAY(1000); } ret = -EBUSY; unlock: mtx_unlock(&pdata->rss_mutex); return (ret); } static int xgbe_write_rss_hash_key(struct xgbe_prv_data *pdata) { unsigned int key_regs = sizeof(pdata->rss_key) / sizeof(uint32_t); unsigned int *key = (unsigned int *)&pdata->rss_key; int ret; while (key_regs--) { ret = xgbe_write_rss_reg(pdata, XGBE_RSS_HASH_KEY_TYPE, key_regs, *key++); if (ret) return (ret); } return (0); } static int xgbe_write_rss_lookup_table(struct xgbe_prv_data *pdata) { unsigned int i; int ret; for (i = 0; i < ARRAY_SIZE(pdata->rss_table); i++) { ret = xgbe_write_rss_reg(pdata, XGBE_RSS_LOOKUP_TABLE_TYPE, i, pdata->rss_table[i]); if (ret) return (ret); } return (0); } static int xgbe_set_rss_hash_key(struct xgbe_prv_data *pdata, const uint8_t *key) { memcpy(pdata->rss_key, key, sizeof(pdata->rss_key)); return (xgbe_write_rss_hash_key(pdata)); } static int xgbe_set_rss_lookup_table(struct xgbe_prv_data *pdata, const uint32_t *table) { unsigned int i; for (i = 0; i < ARRAY_SIZE(pdata->rss_table); i++) XGMAC_SET_BITS(pdata->rss_table[i], MAC_RSSDR, DMCH, table[i]); return (xgbe_write_rss_lookup_table(pdata)); } static int xgbe_enable_rss(struct xgbe_prv_data *pdata) { int ret; if (!pdata->hw_feat.rss) return (-EOPNOTSUPP); /* Program the hash key */ ret = xgbe_write_rss_hash_key(pdata); if (ret) return (ret); /* Program the lookup table */ ret = xgbe_write_rss_lookup_table(pdata); if (ret) return (ret); /* Set the RSS options */ XGMAC_IOWRITE(pdata, MAC_RSSCR, pdata->rss_options); /* Enable RSS */ XGMAC_IOWRITE_BITS(pdata, MAC_RSSCR, RSSE, 1); axgbe_printf(0, "RSS Enabled\n"); return (0); } static int xgbe_disable_rss(struct xgbe_prv_data *pdata) { if (!pdata->hw_feat.rss) return (-EOPNOTSUPP); XGMAC_IOWRITE_BITS(pdata, MAC_RSSCR, RSSE, 0); axgbe_printf(0, "RSS Disabled\n"); return (0); } static void xgbe_config_rss(struct xgbe_prv_data *pdata) { int ret; if (!pdata->hw_feat.rss) return; /* Check if the interface has RSS capability */ if (pdata->enable_rss) ret = xgbe_enable_rss(pdata); else ret = xgbe_disable_rss(pdata); if (ret) axgbe_error("error configuring RSS, RSS disabled\n"); } static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata) { unsigned int max_q_count, q_count; unsigned int reg, reg_val; unsigned int i; /* Clear MTL flow control */ for (i = 0; i < pdata->rx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, 0); /* Clear MAC flow control */ max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count); reg = MAC_Q0TFCR; for (i = 0; i < q_count; i++) { reg_val = XGMAC_IOREAD(pdata, reg); XGMAC_SET_BITS(reg_val, MAC_Q0TFCR, TFE, 0); XGMAC_IOWRITE(pdata, reg, reg_val); reg += MAC_QTFCR_INC; } return (0); } static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata) { unsigned int max_q_count, q_count; unsigned int reg, reg_val; unsigned int i; /* Set MTL flow control */ for (i = 0; i < pdata->rx_q_count; i++) { unsigned int ehfc = 0; if (pdata->rx_rfd[i]) { /* Flow control thresholds are established */ /* TODO - enable pfc/ets support */ ehfc = 1; } XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, ehfc); axgbe_printf(1, "flow control %s for RXq%u\n", ehfc ? "enabled" : "disabled", i); } /* Set MAC flow control */ max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count); reg = MAC_Q0TFCR; for (i = 0; i < q_count; i++) { reg_val = XGMAC_IOREAD(pdata, reg); /* Enable transmit flow control */ XGMAC_SET_BITS(reg_val, MAC_Q0TFCR, TFE, 1); /* Set pause time */ XGMAC_SET_BITS(reg_val, MAC_Q0TFCR, PT, 0xffff); XGMAC_IOWRITE(pdata, reg, reg_val); reg += MAC_QTFCR_INC; } return (0); } static int xgbe_disable_rx_flow_control(struct xgbe_prv_data *pdata) { XGMAC_IOWRITE_BITS(pdata, MAC_RFCR, RFE, 0); return (0); } static int xgbe_enable_rx_flow_control(struct xgbe_prv_data *pdata) { XGMAC_IOWRITE_BITS(pdata, MAC_RFCR, RFE, 1); return (0); } static int xgbe_config_tx_flow_control(struct xgbe_prv_data *pdata) { if (pdata->tx_pause) xgbe_enable_tx_flow_control(pdata); else xgbe_disable_tx_flow_control(pdata); return (0); } static int xgbe_config_rx_flow_control(struct xgbe_prv_data *pdata) { if (pdata->rx_pause) xgbe_enable_rx_flow_control(pdata); else xgbe_disable_rx_flow_control(pdata); return (0); } static void xgbe_config_flow_control(struct xgbe_prv_data *pdata) { xgbe_config_tx_flow_control(pdata); xgbe_config_rx_flow_control(pdata); XGMAC_IOWRITE_BITS(pdata, MAC_RFCR, PFCE, 0); } static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata) { struct xgbe_channel *channel; unsigned int i, ver; /* Set the interrupt mode if supported */ if (pdata->channel_irq_mode) XGMAC_IOWRITE_BITS(pdata, DMA_MR, INTM, pdata->channel_irq_mode); ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); for (i = 0; i < pdata->channel_count; i++) { channel = pdata->channel[i]; /* Clear all the interrupts which are set */ XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, XGMAC_DMA_IOREAD(channel, DMA_CH_SR)); /* Clear all interrupt enable bits */ channel->curr_ier = 0; /* Enable following interrupts * NIE - Normal Interrupt Summary Enable * AIE - Abnormal Interrupt Summary Enable * FBEE - Fatal Bus Error Enable */ if (ver < 0x21) { XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, NIE20, 1); XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, AIE20, 1); } else { XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, NIE, 1); XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, AIE, 1); } XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, FBEE, 1); if (channel->tx_ring) { /* Enable the following Tx interrupts * TIE - Transmit Interrupt Enable (unless using * per channel interrupts in edge triggered * mode) */ if (!pdata->per_channel_irq || pdata->channel_irq_mode) XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TIE, 1); } if (channel->rx_ring) { /* Enable following Rx interrupts * RBUE - Receive Buffer Unavailable Enable * RIE - Receive Interrupt Enable (unless using * per channel interrupts in edge triggered * mode) */ XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RBUE, 1); if (!pdata->per_channel_irq || pdata->channel_irq_mode) XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RIE, 1); } XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, channel->curr_ier); } } static void xgbe_enable_mtl_interrupts(struct xgbe_prv_data *pdata) { unsigned int mtl_q_isr; unsigned int q_count, i; q_count = max(pdata->hw_feat.tx_q_cnt, pdata->hw_feat.rx_q_cnt); for (i = 0; i < q_count; i++) { /* Clear all the interrupts which are set */ mtl_q_isr = XGMAC_MTL_IOREAD(pdata, i, MTL_Q_ISR); XGMAC_MTL_IOWRITE(pdata, i, MTL_Q_ISR, mtl_q_isr); /* No MTL interrupts to be enabled */ XGMAC_MTL_IOWRITE(pdata, i, MTL_Q_IER, 0); } } static void xgbe_enable_mac_interrupts(struct xgbe_prv_data *pdata) { unsigned int mac_ier = 0; /* Enable Timestamp interrupt */ XGMAC_SET_BITS(mac_ier, MAC_IER, TSIE, 1); XGMAC_IOWRITE(pdata, MAC_IER, mac_ier); /* Enable all counter interrupts */ XGMAC_IOWRITE_BITS(pdata, MMC_RIER, ALL_INTERRUPTS, 0xffffffff); XGMAC_IOWRITE_BITS(pdata, MMC_TIER, ALL_INTERRUPTS, 0xffffffff); /* Enable MDIO single command completion interrupt */ XGMAC_IOWRITE_BITS(pdata, MAC_MDIOIER, SNGLCOMPIE, 1); } static int xgbe_set_speed(struct xgbe_prv_data *pdata, int speed) { unsigned int ss; switch (speed) { case SPEED_1000: ss = 0x03; break; case SPEED_2500: ss = 0x02; break; case SPEED_10000: ss = 0x00; break; default: return (-EINVAL); } if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) != ss) XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, ss); return (0); } static int xgbe_enable_rx_vlan_stripping(struct xgbe_prv_data *pdata) { /* Put the VLAN tag in the Rx descriptor */ XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, EVLRXS, 1); /* Don't check the VLAN type */ XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, DOVLTC, 1); /* Check only C-TAG (0x8100) packets */ XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, ERSVLM, 0); /* Don't consider an S-TAG (0x88A8) packet as a VLAN packet */ XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, ESVL, 0); /* Enable VLAN tag stripping */ XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, EVLS, 0x3); axgbe_printf(0, "VLAN Stripping Enabled\n"); return (0); } static int xgbe_disable_rx_vlan_stripping(struct xgbe_prv_data *pdata) { XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, EVLS, 0); axgbe_printf(0, "VLAN Stripping Disabled\n"); return (0); } static int xgbe_enable_rx_vlan_filtering(struct xgbe_prv_data *pdata) { /* Enable VLAN filtering */ XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 1); /* Enable VLAN Hash Table filtering */ XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, VTHM, 1); /* Disable VLAN tag inverse matching */ XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, VTIM, 0); /* Only filter on the lower 12-bits of the VLAN tag */ XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, ETV, 1); /* In order for the VLAN Hash Table filtering to be effective, * the VLAN tag identifier in the VLAN Tag Register must not * be zero. Set the VLAN tag identifier to "1" to enable the * VLAN Hash Table filtering. This implies that a VLAN tag of * 1 will always pass filtering. */ XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, VL, 1); axgbe_printf(0, "VLAN filtering Enabled\n"); return (0); } static int xgbe_disable_rx_vlan_filtering(struct xgbe_prv_data *pdata) { /* Disable VLAN filtering */ XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 0); axgbe_printf(0, "VLAN filtering Disabled\n"); return (0); } static uint32_t xgbe_vid_crc32_le(__le16 vid_le) { uint32_t crc = ~0; uint32_t temp = 0; unsigned char *data = (unsigned char *)&vid_le; unsigned char data_byte = 0; int i, bits; bits = get_bitmask_order(VLAN_VID_MASK); for (i = 0; i < bits; i++) { if ((i % 8) == 0) data_byte = data[i / 8]; temp = ((crc & 1) ^ data_byte) & 1; crc >>= 1; data_byte >>= 1; if (temp) crc ^= CRC32_POLY_LE; } return (crc); } static int xgbe_update_vlan_hash_table(struct xgbe_prv_data *pdata) { uint32_t crc; uint16_t vid; uint16_t vlan_hash_table = 0; __le16 vid_le = 0; axgbe_printf(1, "%s: Before updating VLANHTR 0x%x\n", __func__, XGMAC_IOREAD(pdata, MAC_VLANHTR)); /* Generate the VLAN Hash Table value */ for_each_set_bit(vid, pdata->active_vlans, VLAN_NVID) { /* Get the CRC32 value of the VLAN ID */ vid_le = cpu_to_le16(vid); crc = bitrev32(~xgbe_vid_crc32_le(vid_le)) >> 28; vlan_hash_table |= (1 << crc); axgbe_printf(1, "%s: vid 0x%x vid_le 0x%x crc 0x%x " "vlan_hash_table 0x%x\n", __func__, vid, vid_le, crc, vlan_hash_table); } /* Set the VLAN Hash Table filtering register */ XGMAC_IOWRITE_BITS(pdata, MAC_VLANHTR, VLHT, vlan_hash_table); axgbe_printf(1, "%s: After updating VLANHTR 0x%x\n", __func__, XGMAC_IOREAD(pdata, MAC_VLANHTR)); return (0); } static int xgbe_set_promiscuous_mode(struct xgbe_prv_data *pdata, unsigned int enable) { unsigned int val = enable ? 1 : 0; if (XGMAC_IOREAD_BITS(pdata, MAC_PFR, PR) == val) return (0); axgbe_printf(1, "%s promiscous mode\n", enable? "entering" : "leaving"); XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PR, val); /* Hardware will still perform VLAN filtering in promiscuous mode */ if (enable) { axgbe_printf(1, "Disabling rx vlan filtering\n"); xgbe_disable_rx_vlan_filtering(pdata); } else { if ((if_getcapenable(pdata->netdev) & IFCAP_VLAN_HWFILTER)) { axgbe_printf(1, "Enabling rx vlan filtering\n"); xgbe_enable_rx_vlan_filtering(pdata); } } return (0); } static int xgbe_set_all_multicast_mode(struct xgbe_prv_data *pdata, unsigned int enable) { unsigned int val = enable ? 1 : 0; if (XGMAC_IOREAD_BITS(pdata, MAC_PFR, PM) == val) return (0); axgbe_printf(1,"%s allmulti mode\n", enable ? "entering" : "leaving"); XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PM, val); return (0); } static void xgbe_set_mac_reg(struct xgbe_prv_data *pdata, char *addr, unsigned int *mac_reg) { unsigned int mac_addr_hi, mac_addr_lo; uint8_t *mac_addr; mac_addr_lo = 0; mac_addr_hi = 0; if (addr) { mac_addr = (uint8_t *)&mac_addr_lo; mac_addr[0] = addr[0]; mac_addr[1] = addr[1]; mac_addr[2] = addr[2]; mac_addr[3] = addr[3]; mac_addr = (uint8_t *)&mac_addr_hi; mac_addr[0] = addr[4]; mac_addr[1] = addr[5]; axgbe_printf(1, "adding mac address %pM at %#x\n", addr, *mac_reg); XGMAC_SET_BITS(mac_addr_hi, MAC_MACA1HR, AE, 1); } XGMAC_IOWRITE(pdata, *mac_reg, mac_addr_hi); *mac_reg += MAC_MACA_INC; XGMAC_IOWRITE(pdata, *mac_reg, mac_addr_lo); *mac_reg += MAC_MACA_INC; } static void xgbe_set_mac_addn_addrs(struct xgbe_prv_data *pdata) { unsigned int mac_reg; unsigned int addn_macs; mac_reg = MAC_MACA1HR; addn_macs = pdata->hw_feat.addn_mac; xgbe_set_mac_reg(pdata, pdata->mac_addr, &mac_reg); addn_macs--; /* Clear remaining additional MAC address entries */ while (addn_macs--) xgbe_set_mac_reg(pdata, NULL, &mac_reg); } static int xgbe_add_mac_addresses(struct xgbe_prv_data *pdata) { /* TODO - add support to set mac hash table */ xgbe_set_mac_addn_addrs(pdata); return (0); } static int xgbe_set_mac_address(struct xgbe_prv_data *pdata, uint8_t *addr) { unsigned int mac_addr_hi, mac_addr_lo; mac_addr_hi = (addr[5] << 8) | (addr[4] << 0); mac_addr_lo = (addr[3] << 24) | (addr[2] << 16) | (addr[1] << 8) | (addr[0] << 0); XGMAC_IOWRITE(pdata, MAC_MACA0HR, mac_addr_hi); XGMAC_IOWRITE(pdata, MAC_MACA0LR, mac_addr_lo); return (0); } static int xgbe_config_rx_mode(struct xgbe_prv_data *pdata) { unsigned int pr_mode, am_mode; pr_mode = ((pdata->netdev->if_flags & IFF_PPROMISC) != 0); am_mode = ((pdata->netdev->if_flags & IFF_ALLMULTI) != 0); xgbe_set_promiscuous_mode(pdata, pr_mode); xgbe_set_all_multicast_mode(pdata, am_mode); xgbe_add_mac_addresses(pdata); return (0); } static int xgbe_clr_gpio(struct xgbe_prv_data *pdata, unsigned int gpio) { unsigned int reg; if (gpio > 15) return (-EINVAL); reg = XGMAC_IOREAD(pdata, MAC_GPIOSR); reg &= ~(1 << (gpio + 16)); XGMAC_IOWRITE(pdata, MAC_GPIOSR, reg); return (0); } static int xgbe_set_gpio(struct xgbe_prv_data *pdata, unsigned int gpio) { unsigned int reg; if (gpio > 15) return (-EINVAL); reg = XGMAC_IOREAD(pdata, MAC_GPIOSR); reg |= (1 << (gpio + 16)); XGMAC_IOWRITE(pdata, MAC_GPIOSR, reg); return (0); } static int xgbe_read_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad, int mmd_reg) { unsigned long flags; unsigned int mmd_address, index, offset; int mmd_data; if (mmd_reg & MII_ADDR_C45) mmd_address = mmd_reg & ~MII_ADDR_C45; else mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); /* The PCS registers are accessed using mmio. The underlying * management interface uses indirect addressing to access the MMD * register sets. This requires accessing of the PCS register in two * phases, an address phase and a data phase. * * The mmio interface is based on 16-bit offsets and values. All * register offsets must therefore be adjusted by left shifting the * offset 1 bit and reading 16 bits of data. */ mmd_address <<= 1; index = mmd_address & ~pdata->xpcs_window_mask; offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask); spin_lock_irqsave(&pdata->xpcs_lock, flags); XPCS32_IOWRITE(pdata, pdata->xpcs_window_sel_reg, index); mmd_data = XPCS16_IOREAD(pdata, offset); spin_unlock_irqrestore(&pdata->xpcs_lock, flags); return (mmd_data); } static void xgbe_write_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad, int mmd_reg, int mmd_data) { unsigned long flags; unsigned int mmd_address, index, offset; if (mmd_reg & MII_ADDR_C45) mmd_address = mmd_reg & ~MII_ADDR_C45; else mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); /* The PCS registers are accessed using mmio. The underlying * management interface uses indirect addressing to access the MMD * register sets. This requires accessing of the PCS register in two * phases, an address phase and a data phase. * * The mmio interface is based on 16-bit offsets and values. All * register offsets must therefore be adjusted by left shifting the * offset 1 bit and writing 16 bits of data. */ mmd_address <<= 1; index = mmd_address & ~pdata->xpcs_window_mask; offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask); spin_lock_irqsave(&pdata->xpcs_lock, flags); XPCS32_IOWRITE(pdata, pdata->xpcs_window_sel_reg, index); XPCS16_IOWRITE(pdata, offset, mmd_data); spin_unlock_irqrestore(&pdata->xpcs_lock, flags); } static int xgbe_read_mmd_regs_v1(struct xgbe_prv_data *pdata, int prtad, int mmd_reg) { unsigned long flags; unsigned int mmd_address; int mmd_data; if (mmd_reg & MII_ADDR_C45) mmd_address = mmd_reg & ~MII_ADDR_C45; else mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); /* The PCS registers are accessed using mmio. The underlying APB3 * management interface uses indirect addressing to access the MMD * register sets. This requires accessing of the PCS register in two * phases, an address phase and a data phase. * * The mmio interface is based on 32-bit offsets and values. All * register offsets must therefore be adjusted by left shifting the * offset 2 bits and reading 32 bits of data. */ spin_lock_irqsave(&pdata->xpcs_lock, flags); XPCS32_IOWRITE(pdata, PCS_V1_WINDOW_SELECT, mmd_address >> 8); mmd_data = XPCS32_IOREAD(pdata, (mmd_address & 0xff) << 2); spin_unlock_irqrestore(&pdata->xpcs_lock, flags); return (mmd_data); } static void xgbe_write_mmd_regs_v1(struct xgbe_prv_data *pdata, int prtad, int mmd_reg, int mmd_data) { unsigned int mmd_address; unsigned long flags; if (mmd_reg & MII_ADDR_C45) mmd_address = mmd_reg & ~MII_ADDR_C45; else mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); /* The PCS registers are accessed using mmio. The underlying APB3 * management interface uses indirect addressing to access the MMD * register sets. This requires accessing of the PCS register in two * phases, an address phase and a data phase. * * The mmio interface is based on 32-bit offsets and values. All * register offsets must therefore be adjusted by left shifting the * offset 2 bits and writing 32 bits of data. */ spin_lock_irqsave(&pdata->xpcs_lock, flags); XPCS32_IOWRITE(pdata, PCS_V1_WINDOW_SELECT, mmd_address >> 8); XPCS32_IOWRITE(pdata, (mmd_address & 0xff) << 2, mmd_data); spin_unlock_irqrestore(&pdata->xpcs_lock, flags); } static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad, int mmd_reg) { switch (pdata->vdata->xpcs_access) { case XGBE_XPCS_ACCESS_V1: return (xgbe_read_mmd_regs_v1(pdata, prtad, mmd_reg)); case XGBE_XPCS_ACCESS_V2: default: return (xgbe_read_mmd_regs_v2(pdata, prtad, mmd_reg)); } } static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, int mmd_reg, int mmd_data) { switch (pdata->vdata->xpcs_access) { case XGBE_XPCS_ACCESS_V1: return (xgbe_write_mmd_regs_v1(pdata, prtad, mmd_reg, mmd_data)); case XGBE_XPCS_ACCESS_V2: default: return (xgbe_write_mmd_regs_v2(pdata, prtad, mmd_reg, mmd_data)); } } static unsigned int xgbe_create_mdio_sca(int port, int reg) { unsigned int mdio_sca, da; da = (reg & MII_ADDR_C45) ? reg >> 16 : 0; mdio_sca = 0; XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, RA, reg); XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, PA, port); XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, da); return (mdio_sca); } static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, int reg, uint16_t val) { unsigned int mdio_sca, mdio_sccd; mtx_lock_spin(&pdata->mdio_mutex); mdio_sca = xgbe_create_mdio_sca(addr, reg); XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); mdio_sccd = 0; XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, DATA, val); XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, CMD, 1); XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, BUSY, 1); XGMAC_IOWRITE(pdata, MAC_MDIOSCCDR, mdio_sccd); if (msleep_spin(pdata, &pdata->mdio_mutex, "mdio_xfer", hz / 8) == EWOULDBLOCK) { axgbe_error("%s: MDIO write error\n", __func__); mtx_unlock_spin(&pdata->mdio_mutex); return (-ETIMEDOUT); } mtx_unlock_spin(&pdata->mdio_mutex); return (0); } static int xgbe_read_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, int reg) { unsigned int mdio_sca, mdio_sccd; mtx_lock_spin(&pdata->mdio_mutex); mdio_sca = xgbe_create_mdio_sca(addr, reg); XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); mdio_sccd = 0; XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, CMD, 3); XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, BUSY, 1); XGMAC_IOWRITE(pdata, MAC_MDIOSCCDR, mdio_sccd); if (msleep_spin(pdata, &pdata->mdio_mutex, "mdio_xfer", hz / 8) == EWOULDBLOCK) { axgbe_error("%s: MDIO read error\n", __func__); mtx_unlock_spin(&pdata->mdio_mutex); return (-ETIMEDOUT); } mtx_unlock_spin(&pdata->mdio_mutex); return (XGMAC_IOREAD_BITS(pdata, MAC_MDIOSCCDR, DATA)); } static int xgbe_set_ext_mii_mode(struct xgbe_prv_data *pdata, unsigned int port, enum xgbe_mdio_mode mode) { unsigned int reg_val = XGMAC_IOREAD(pdata, MAC_MDIOCL22R); switch (mode) { case XGBE_MDIO_MODE_CL22: if (port > XGMAC_MAX_C22_PORT) return (-EINVAL); reg_val |= (1 << port); break; case XGBE_MDIO_MODE_CL45: break; default: return (-EINVAL); } XGMAC_IOWRITE(pdata, MAC_MDIOCL22R, reg_val); return (0); } static int xgbe_tx_complete(struct xgbe_ring_desc *rdesc) { return (!XGMAC_GET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, OWN)); } static int xgbe_disable_rx_csum(struct xgbe_prv_data *pdata) { XGMAC_IOWRITE_BITS(pdata, MAC_RCR, IPC, 0); axgbe_printf(0, "Receive checksum offload Disabled\n"); return (0); } static int xgbe_enable_rx_csum(struct xgbe_prv_data *pdata) { XGMAC_IOWRITE_BITS(pdata, MAC_RCR, IPC, 1); axgbe_printf(0, "Receive checksum offload Enabled\n"); return (0); } static void xgbe_tx_desc_reset(struct xgbe_ring_data *rdata) { struct xgbe_ring_desc *rdesc = rdata->rdesc; /* Reset the Tx descriptor * Set buffer 1 (lo) address to zero * Set buffer 1 (hi) address to zero * Reset all other control bits (IC, TTSE, B2L & B1L) * Reset all other control bits (OWN, CTXT, FD, LD, CPC, CIC, etc) */ rdesc->desc0 = 0; rdesc->desc1 = 0; rdesc->desc2 = 0; rdesc->desc3 = 0; wmb(); } static void xgbe_tx_desc_init(struct xgbe_channel *channel) { struct xgbe_ring *ring = channel->tx_ring; struct xgbe_ring_data *rdata; int i; int start_index = ring->cur; /* Initialze all descriptors */ for (i = 0; i < ring->rdesc_count; i++) { rdata = XGBE_GET_DESC_DATA(ring, i); /* Initialize Tx descriptor */ xgbe_tx_desc_reset(rdata); } /* Update the total number of Tx descriptors */ XGMAC_DMA_IOWRITE(channel, DMA_CH_TDRLR, ring->rdesc_count - 1); /* Update the starting address of descriptor ring */ rdata = XGBE_GET_DESC_DATA(ring, start_index); XGMAC_DMA_IOWRITE(channel, DMA_CH_TDLR_HI, upper_32_bits(rdata->rdata_paddr)); XGMAC_DMA_IOWRITE(channel, DMA_CH_TDLR_LO, lower_32_bits(rdata->rdata_paddr)); } static void xgbe_rx_desc_init(struct xgbe_channel *channel) { struct xgbe_ring *ring = channel->rx_ring; struct xgbe_ring_data *rdata; unsigned int start_index = ring->cur; /* * Just set desc_count and the starting address of the desc list * here. Rest will be done as part of the txrx path. */ /* Update the total number of Rx descriptors */ XGMAC_DMA_IOWRITE(channel, DMA_CH_RDRLR, ring->rdesc_count - 1); /* Update the starting address of descriptor ring */ rdata = XGBE_GET_DESC_DATA(ring, start_index); XGMAC_DMA_IOWRITE(channel, DMA_CH_RDLR_HI, upper_32_bits(rdata->rdata_paddr)); XGMAC_DMA_IOWRITE(channel, DMA_CH_RDLR_LO, lower_32_bits(rdata->rdata_paddr)); } static int xgbe_dev_read(struct xgbe_channel *channel) { struct xgbe_prv_data *pdata = channel->pdata; struct xgbe_ring *ring = channel->rx_ring; struct xgbe_ring_data *rdata; struct xgbe_ring_desc *rdesc; struct xgbe_packet_data *packet = &ring->packet_data; unsigned int err, etlt, l34t = 0; axgbe_printf(1, "-->xgbe_dev_read: cur = %d\n", ring->cur); rdata = XGBE_GET_DESC_DATA(ring, ring->cur); rdesc = rdata->rdesc; /* Check for data availability */ if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, OWN)) return (1); rmb(); if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, CTXT)) { /* TODO - Timestamp Context Descriptor */ XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, CONTEXT, 1); XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, CONTEXT_NEXT, 0); return (0); } /* Normal Descriptor, be sure Context Descriptor bit is off */ XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, CONTEXT, 0); /* Indicate if a Context Descriptor is next */ if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, CDA)) XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, CONTEXT_NEXT, 1); /* Get the header length */ if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD)) { XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST, 1); rdata->rx.hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2, RX_NORMAL_DESC2, HL); if (rdata->rx.hdr_len) pdata->ext_stats.rx_split_header_packets++; } else XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST, 0); /* Get the RSS hash */ if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, RSV)) { XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, RSS_HASH, 1); packet->rss_hash = le32_to_cpu(rdesc->desc1); l34t = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, L34T); switch (l34t) { case RX_DESC3_L34T_IPV4_TCP: packet->rss_hash_type = M_HASHTYPE_RSS_TCP_IPV4; break; case RX_DESC3_L34T_IPV4_UDP: packet->rss_hash_type = M_HASHTYPE_RSS_UDP_IPV4; break; case RX_DESC3_L34T_IPV6_TCP: packet->rss_hash_type = M_HASHTYPE_RSS_TCP_IPV6; break; case RX_DESC3_L34T_IPV6_UDP: packet->rss_hash_type = M_HASHTYPE_RSS_UDP_IPV6; break; default: packet->rss_hash_type = M_HASHTYPE_OPAQUE; break; } } /* Not all the data has been transferred for this packet */ if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) { /* This is not the last of the data for this packet */ XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST, 0); return (0); } /* This is the last of the data for this packet */ XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST, 1); /* Get the packet length */ rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL); /* Set checksum done indicator as appropriate */ /* TODO - add tunneling support */ XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, CSUM_DONE, 1); /* Check for errors (only valid in last descriptor) */ err = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, ES); etlt = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, ETLT); axgbe_printf(1, "%s: err=%u, etlt=%#x\n", __func__, err, etlt); if (!err || !etlt) { /* No error if err is 0 or etlt is 0 */ if (etlt == 0x09) { XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, VLAN_CTAG, 1); packet->vlan_ctag = XGMAC_GET_BITS_LE(rdesc->desc0, RX_NORMAL_DESC0, OVT); axgbe_printf(1, "vlan-ctag=%#06x\n", packet->vlan_ctag); } } else { unsigned int tnp = XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, TNP); if ((etlt == 0x05) || (etlt == 0x06)) { axgbe_printf(1, "%s: err1 l34t %d err 0x%x etlt 0x%x\n", __func__, l34t, err, etlt); XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, CSUM_DONE, 0); XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, TNPCSUM_DONE, 0); pdata->ext_stats.rx_csum_errors++; } else if (tnp && ((etlt == 0x09) || (etlt == 0x0a))) { axgbe_printf(1, "%s: err2 l34t %d err 0x%x etlt 0x%x\n", __func__, l34t, err, etlt); XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, CSUM_DONE, 0); XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, TNPCSUM_DONE, 0); pdata->ext_stats.rx_vxlan_csum_errors++; } else { axgbe_printf(1, "%s: tnp %d l34t %d err 0x%x etlt 0x%x\n", __func__, tnp, l34t, err, etlt); axgbe_printf(1, "%s: Channel: %d SR 0x%x DSR 0x%x \n", __func__, channel->queue_index, XGMAC_DMA_IOREAD(channel, DMA_CH_SR), XGMAC_DMA_IOREAD(channel, DMA_CH_DSR)); axgbe_printf(1, "%s: ring cur %d dirty %d\n", __func__, ring->cur, ring->dirty); axgbe_printf(1, "%s: Desc 0x%08x-0x%08x-0x%08x-0x%08x\n", __func__, rdesc->desc0, rdesc->desc1, rdesc->desc2, rdesc->desc3); XGMAC_SET_BITS(packet->errors, RX_PACKET_ERRORS, FRAME, 1); } } axgbe_printf(1, "<--xgbe_dev_read: %s - descriptor=%u (cur=%d)\n", channel->name, ring->cur & (ring->rdesc_count - 1), ring->cur); return (0); } static int xgbe_is_context_desc(struct xgbe_ring_desc *rdesc) { /* Rx and Tx share CTXT bit, so check TDES3.CTXT bit */ return (XGMAC_GET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, CTXT)); } static int xgbe_is_last_desc(struct xgbe_ring_desc *rdesc) { /* Rx and Tx share LD bit, so check TDES3.LD bit */ return (XGMAC_GET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, LD)); } static int xgbe_enable_int(struct xgbe_channel *channel, enum xgbe_int int_id) { struct xgbe_prv_data *pdata = channel->pdata; axgbe_printf(1, "enable_int: DMA_CH_IER read - 0x%x\n", channel->curr_ier); switch (int_id) { case XGMAC_INT_DMA_CH_SR_TI: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TIE, 1); break; case XGMAC_INT_DMA_CH_SR_TPS: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TXSE, 1); break; case XGMAC_INT_DMA_CH_SR_TBU: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TBUE, 1); break; case XGMAC_INT_DMA_CH_SR_RI: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RIE, 1); break; case XGMAC_INT_DMA_CH_SR_RBU: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RBUE, 1); break; case XGMAC_INT_DMA_CH_SR_RPS: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RSE, 1); break; case XGMAC_INT_DMA_CH_SR_TI_RI: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TIE, 1); XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RIE, 1); break; case XGMAC_INT_DMA_CH_SR_FBE: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, FBEE, 1); break; case XGMAC_INT_DMA_ALL: channel->curr_ier |= channel->saved_ier; break; default: return (-1); } XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, channel->curr_ier); axgbe_printf(1, "enable_int: DMA_CH_IER write - 0x%x\n", channel->curr_ier); return (0); } static int xgbe_disable_int(struct xgbe_channel *channel, enum xgbe_int int_id) { struct xgbe_prv_data *pdata = channel->pdata; axgbe_printf(1, "disable_int: DMA_CH_IER read - 0x%x\n", channel->curr_ier); switch (int_id) { case XGMAC_INT_DMA_CH_SR_TI: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TIE, 0); break; case XGMAC_INT_DMA_CH_SR_TPS: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TXSE, 0); break; case XGMAC_INT_DMA_CH_SR_TBU: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TBUE, 0); break; case XGMAC_INT_DMA_CH_SR_RI: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RIE, 0); break; case XGMAC_INT_DMA_CH_SR_RBU: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RBUE, 0); break; case XGMAC_INT_DMA_CH_SR_RPS: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RSE, 0); break; case XGMAC_INT_DMA_CH_SR_TI_RI: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TIE, 0); XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RIE, 0); break; case XGMAC_INT_DMA_CH_SR_FBE: XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, FBEE, 0); break; case XGMAC_INT_DMA_ALL: channel->saved_ier = channel->curr_ier; channel->curr_ier = 0; break; default: return (-1); } XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, channel->curr_ier); axgbe_printf(1, "disable_int: DMA_CH_IER write - 0x%x\n", channel->curr_ier); return (0); } static int __xgbe_exit(struct xgbe_prv_data *pdata) { unsigned int count = 2000; /* Issue a software reset */ XGMAC_IOWRITE_BITS(pdata, DMA_MR, SWR, 1); DELAY(10); /* Poll Until Poll Condition */ while (--count && XGMAC_IOREAD_BITS(pdata, DMA_MR, SWR)) DELAY(500); if (!count) return (-EBUSY); return (0); } static int xgbe_exit(struct xgbe_prv_data *pdata) { int ret; /* To guard against possible incorrectly generated interrupts, * issue the software reset twice. */ ret = __xgbe_exit(pdata); if (ret) { axgbe_error("%s: exit error %d\n", __func__, ret); return (ret); } return (__xgbe_exit(pdata)); } static int xgbe_flush_tx_queues(struct xgbe_prv_data *pdata) { unsigned int i, count; if (XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER) < 0x21) return (0); for (i = 0; i < pdata->tx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, FTQ, 1); /* Poll Until Poll Condition */ for (i = 0; i < pdata->tx_q_count; i++) { count = 2000; while (--count && XGMAC_MTL_IOREAD_BITS(pdata, i, MTL_Q_TQOMR, FTQ)) DELAY(500); if (!count) return (-EBUSY); } return (0); } static void xgbe_config_dma_bus(struct xgbe_prv_data *pdata) { unsigned int sbmr; sbmr = XGMAC_IOREAD(pdata, DMA_SBMR); /* Set enhanced addressing mode */ XGMAC_SET_BITS(sbmr, DMA_SBMR, EAME, 1); /* Set the System Bus mode */ XGMAC_SET_BITS(sbmr, DMA_SBMR, UNDEF, 1); XGMAC_SET_BITS(sbmr, DMA_SBMR, BLEN, pdata->blen >> 2); XGMAC_SET_BITS(sbmr, DMA_SBMR, AAL, pdata->aal); XGMAC_SET_BITS(sbmr, DMA_SBMR, RD_OSR_LMT, pdata->rd_osr_limit - 1); XGMAC_SET_BITS(sbmr, DMA_SBMR, WR_OSR_LMT, pdata->wr_osr_limit - 1); XGMAC_IOWRITE(pdata, DMA_SBMR, sbmr); /* Set descriptor fetching threshold */ if (pdata->vdata->tx_desc_prefetch) XGMAC_IOWRITE_BITS(pdata, DMA_TXEDMACR, TDPS, pdata->vdata->tx_desc_prefetch); if (pdata->vdata->rx_desc_prefetch) XGMAC_IOWRITE_BITS(pdata, DMA_RXEDMACR, RDPS, pdata->vdata->rx_desc_prefetch); } static void xgbe_config_dma_cache(struct xgbe_prv_data *pdata) { XGMAC_IOWRITE(pdata, DMA_AXIARCR, pdata->arcr); XGMAC_IOWRITE(pdata, DMA_AXIAWCR, pdata->awcr); if (pdata->awarcr) XGMAC_IOWRITE(pdata, DMA_AXIAWARCR, pdata->awarcr); } static void xgbe_config_mtl_mode(struct xgbe_prv_data *pdata) { unsigned int i; /* Set Tx to weighted round robin scheduling algorithm */ XGMAC_IOWRITE_BITS(pdata, MTL_OMR, ETSALG, MTL_ETSALG_WRR); /* Set Tx traffic classes to use WRR algorithm with equal weights */ for (i = 0; i < pdata->hw_feat.tc_cnt; i++) { XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA, MTL_TSA_ETS); XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_QWR, QW, 1); } /* Set Rx to strict priority algorithm */ XGMAC_IOWRITE_BITS(pdata, MTL_OMR, RAA, MTL_RAA_SP); } static void xgbe_queue_flow_control_threshold(struct xgbe_prv_data *pdata, unsigned int queue, unsigned int q_fifo_size) { unsigned int frame_fifo_size; unsigned int rfa, rfd; frame_fifo_size = XGMAC_FLOW_CONTROL_ALIGN(xgbe_get_max_frame(pdata)); axgbe_printf(1, "%s: queue %d q_fifo_size %d frame_fifo_size 0x%x\n", __func__, queue, q_fifo_size, frame_fifo_size); /* TODO - add pfc/ets related support */ /* This path deals with just maximum frame sizes which are * limited to a jumbo frame of 9,000 (plus headers, etc.) * so we can never exceed the maximum allowable RFA/RFD * values. */ if (q_fifo_size <= 2048) { /* rx_rfd to zero to signal no flow control */ pdata->rx_rfa[queue] = 0; pdata->rx_rfd[queue] = 0; return; } if (q_fifo_size <= 4096) { /* Between 2048 and 4096 */ pdata->rx_rfa[queue] = 0; /* Full - 1024 bytes */ pdata->rx_rfd[queue] = 1; /* Full - 1536 bytes */ return; } if (q_fifo_size <= frame_fifo_size) { /* Between 4096 and max-frame */ pdata->rx_rfa[queue] = 2; /* Full - 2048 bytes */ pdata->rx_rfd[queue] = 5; /* Full - 3584 bytes */ return; } if (q_fifo_size <= (frame_fifo_size * 3)) { /* Between max-frame and 3 max-frames, * trigger if we get just over a frame of data and * resume when we have just under half a frame left. */ rfa = q_fifo_size - frame_fifo_size; rfd = rfa + (frame_fifo_size / 2); } else { /* Above 3 max-frames - trigger when just over * 2 frames of space available */ rfa = frame_fifo_size * 2; rfa += XGMAC_FLOW_CONTROL_UNIT; rfd = rfa + frame_fifo_size; } pdata->rx_rfa[queue] = XGMAC_FLOW_CONTROL_VALUE(rfa); pdata->rx_rfd[queue] = XGMAC_FLOW_CONTROL_VALUE(rfd); axgbe_printf(1, "%s: forced queue %d rfa 0x%x rfd 0x%x\n", __func__, queue, pdata->rx_rfa[queue], pdata->rx_rfd[queue]); } static void xgbe_calculate_flow_control_threshold(struct xgbe_prv_data *pdata, unsigned int *fifo) { unsigned int q_fifo_size; unsigned int i; for (i = 0; i < pdata->rx_q_count; i++) { q_fifo_size = (fifo[i] + 1) * XGMAC_FIFO_UNIT; axgbe_printf(1, "%s: fifo[%d] - 0x%x q_fifo_size 0x%x\n", __func__, i, fifo[i], q_fifo_size); xgbe_queue_flow_control_threshold(pdata, i, q_fifo_size); } } static void xgbe_config_flow_control_threshold(struct xgbe_prv_data *pdata) { unsigned int i; for (i = 0; i < pdata->rx_q_count; i++) { axgbe_printf(1, "%s: queue %d rfa %d rfd %d\n", __func__, i, pdata->rx_rfa[i], pdata->rx_rfd[i]); XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFA, pdata->rx_rfa[i]); XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFD, pdata->rx_rfd[i]); axgbe_printf(1, "%s: MTL_Q_RQFCR 0x%x\n", __func__, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_RQFCR)); } } static unsigned int xgbe_get_tx_fifo_size(struct xgbe_prv_data *pdata) { /* The configured value may not be the actual amount of fifo RAM */ return (min_t(unsigned int, pdata->tx_max_fifo_size, pdata->hw_feat.tx_fifo_size)); } static unsigned int xgbe_get_rx_fifo_size(struct xgbe_prv_data *pdata) { /* The configured value may not be the actual amount of fifo RAM */ return (min_t(unsigned int, pdata->rx_max_fifo_size, pdata->hw_feat.rx_fifo_size)); } static void xgbe_calculate_equal_fifo(unsigned int fifo_size, unsigned int queue_count, unsigned int *fifo) { unsigned int q_fifo_size; unsigned int p_fifo; unsigned int i; q_fifo_size = fifo_size / queue_count; /* Calculate the fifo setting by dividing the queue's fifo size * by the fifo allocation increment (with 0 representing the * base allocation increment so decrement the result by 1). */ p_fifo = q_fifo_size / XGMAC_FIFO_UNIT; if (p_fifo) p_fifo--; /* Distribute the fifo equally amongst the queues */ for (i = 0; i < queue_count; i++) fifo[i] = p_fifo; } static unsigned int xgbe_set_nonprio_fifos(unsigned int fifo_size, unsigned int queue_count, unsigned int *fifo) { unsigned int i; MPASS(powerof2(XGMAC_FIFO_MIN_ALLOC)); if (queue_count <= IEEE_8021QAZ_MAX_TCS) return (fifo_size); /* Rx queues 9 and up are for specialized packets, * such as PTP or DCB control packets, etc. and * don't require a large fifo */ for (i = IEEE_8021QAZ_MAX_TCS; i < queue_count; i++) { fifo[i] = (XGMAC_FIFO_MIN_ALLOC / XGMAC_FIFO_UNIT) - 1; fifo_size -= XGMAC_FIFO_MIN_ALLOC; } return (fifo_size); } static void xgbe_config_tx_fifo_size(struct xgbe_prv_data *pdata) { unsigned int fifo_size; unsigned int fifo[XGBE_MAX_QUEUES]; unsigned int i; fifo_size = xgbe_get_tx_fifo_size(pdata); axgbe_printf(1, "%s: fifo_size 0x%x\n", __func__, fifo_size); xgbe_calculate_equal_fifo(fifo_size, pdata->tx_q_count, fifo); for (i = 0; i < pdata->tx_q_count; i++) { XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TQS, fifo[i]); axgbe_printf(1, "Tx q %d FIFO Size 0x%x\n", i, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TQOMR)); } axgbe_printf(1, "%d Tx hardware queues, %d byte fifo per queue\n", pdata->tx_q_count, ((fifo[0] + 1) * XGMAC_FIFO_UNIT)); } static void xgbe_config_rx_fifo_size(struct xgbe_prv_data *pdata) { unsigned int fifo_size; unsigned int fifo[XGBE_MAX_QUEUES]; unsigned int prio_queues; unsigned int i; /* TODO - add pfc/ets related support */ /* Clear any DCB related fifo/queue information */ fifo_size = xgbe_get_rx_fifo_size(pdata); prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count); axgbe_printf(1, "%s: fifo_size 0x%x rx_q_cnt %d prio %d\n", __func__, fifo_size, pdata->rx_q_count, prio_queues); /* Assign a minimum fifo to the non-VLAN priority queues */ fifo_size = xgbe_set_nonprio_fifos(fifo_size, pdata->rx_q_count, fifo); xgbe_calculate_equal_fifo(fifo_size, prio_queues, fifo); for (i = 0; i < pdata->rx_q_count; i++) { XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RQS, fifo[i]); axgbe_printf(1, "Rx q %d FIFO Size 0x%x\n", i, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_RQOMR)); } xgbe_calculate_flow_control_threshold(pdata, fifo); xgbe_config_flow_control_threshold(pdata); axgbe_printf(1, "%u Rx hardware queues, %u byte fifo/queue\n", pdata->rx_q_count, ((fifo[0] + 1) * XGMAC_FIFO_UNIT)); } static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata) { unsigned int qptc, qptc_extra, queue; unsigned int prio_queues; unsigned int ppq, ppq_extra, prio; unsigned int mask; unsigned int i, j, reg, reg_val; /* Map the MTL Tx Queues to Traffic Classes * Note: Tx Queues >= Traffic Classes */ qptc = pdata->tx_q_count / pdata->hw_feat.tc_cnt; qptc_extra = pdata->tx_q_count % pdata->hw_feat.tc_cnt; for (i = 0, queue = 0; i < pdata->hw_feat.tc_cnt; i++) { for (j = 0; j < qptc; j++) { axgbe_printf(1, "TXq%u mapped to TC%u\n", queue, i); XGMAC_MTL_IOWRITE_BITS(pdata, queue, MTL_Q_TQOMR, Q2TCMAP, i); pdata->q2tc_map[queue++] = i; } if (i < qptc_extra) { axgbe_printf(1, "TXq%u mapped to TC%u\n", queue, i); XGMAC_MTL_IOWRITE_BITS(pdata, queue, MTL_Q_TQOMR, Q2TCMAP, i); pdata->q2tc_map[queue++] = i; } } /* Map the 8 VLAN priority values to available MTL Rx queues */ prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count); ppq = IEEE_8021QAZ_MAX_TCS / prio_queues; ppq_extra = IEEE_8021QAZ_MAX_TCS % prio_queues; reg = MAC_RQC2R; reg_val = 0; for (i = 0, prio = 0; i < prio_queues;) { mask = 0; for (j = 0; j < ppq; j++) { axgbe_printf(1, "PRIO%u mapped to RXq%u\n", prio, i); mask |= (1 << prio); pdata->prio2q_map[prio++] = i; } if (i < ppq_extra) { axgbe_printf(1, "PRIO%u mapped to RXq%u\n", prio, i); mask |= (1 << prio); pdata->prio2q_map[prio++] = i; } reg_val |= (mask << ((i++ % MAC_RQC2_Q_PER_REG) << 3)); if ((i % MAC_RQC2_Q_PER_REG) && (i != prio_queues)) continue; XGMAC_IOWRITE(pdata, reg, reg_val); reg += MAC_RQC2_INC; reg_val = 0; } /* Select dynamic mapping of MTL Rx queue to DMA Rx channel */ reg = MTL_RQDCM0R; reg_val = 0; for (i = 0; i < pdata->rx_q_count;) { reg_val |= (0x80 << ((i++ % MTL_RQDCM_Q_PER_REG) << 3)); if ((i % MTL_RQDCM_Q_PER_REG) && (i != pdata->rx_q_count)) continue; XGMAC_IOWRITE(pdata, reg, reg_val); reg += MTL_RQDCM_INC; reg_val = 0; } } static void xgbe_config_mac_address(struct xgbe_prv_data *pdata) { - xgbe_set_mac_address(pdata, IF_LLADDR(pdata->netdev)); + xgbe_set_mac_address(pdata, if_getlladdr(pdata->netdev)); /* Filtering is done using perfect filtering and hash filtering */ if (pdata->hw_feat.hash_table_size) { XGMAC_IOWRITE_BITS(pdata, MAC_PFR, HPF, 1); XGMAC_IOWRITE_BITS(pdata, MAC_PFR, HUC, 1); XGMAC_IOWRITE_BITS(pdata, MAC_PFR, HMC, 1); } } static void xgbe_config_jumbo_enable(struct xgbe_prv_data *pdata) { unsigned int val; val = (if_getmtu(pdata->netdev) > XGMAC_STD_PACKET_MTU) ? 1 : 0; XGMAC_IOWRITE_BITS(pdata, MAC_RCR, JE, val); } static void xgbe_config_mac_speed(struct xgbe_prv_data *pdata) { xgbe_set_speed(pdata, pdata->phy_speed); } static void xgbe_config_checksum_offload(struct xgbe_prv_data *pdata) { if ((if_getcapenable(pdata->netdev) & IFCAP_RXCSUM)) xgbe_enable_rx_csum(pdata); else xgbe_disable_rx_csum(pdata); } static void xgbe_config_vlan_support(struct xgbe_prv_data *pdata) { /* Indicate that VLAN Tx CTAGs come from context descriptors */ XGMAC_IOWRITE_BITS(pdata, MAC_VLANIR, CSVL, 0); XGMAC_IOWRITE_BITS(pdata, MAC_VLANIR, VLTI, 1); /* Set the current VLAN Hash Table register value */ xgbe_update_vlan_hash_table(pdata); if ((if_getcapenable(pdata->netdev) & IFCAP_VLAN_HWFILTER)) { axgbe_printf(1, "Enabling rx vlan filtering\n"); xgbe_enable_rx_vlan_filtering(pdata); } else { axgbe_printf(1, "Disabling rx vlan filtering\n"); xgbe_disable_rx_vlan_filtering(pdata); } if ((if_getcapenable(pdata->netdev) & IFCAP_VLAN_HWTAGGING)) { axgbe_printf(1, "Enabling rx vlan stripping\n"); xgbe_enable_rx_vlan_stripping(pdata); } else { axgbe_printf(1, "Disabling rx vlan stripping\n"); xgbe_disable_rx_vlan_stripping(pdata); } } static uint64_t xgbe_mmc_read(struct xgbe_prv_data *pdata, unsigned int reg_lo) { bool read_hi; uint64_t val; if (pdata->vdata->mmc_64bit) { switch (reg_lo) { /* These registers are always 32 bit */ case MMC_RXRUNTERROR: case MMC_RXJABBERERROR: case MMC_RXUNDERSIZE_G: case MMC_RXOVERSIZE_G: case MMC_RXWATCHDOGERROR: read_hi = false; break; default: read_hi = true; } } else { switch (reg_lo) { /* These registers are always 64 bit */ case MMC_TXOCTETCOUNT_GB_LO: case MMC_TXOCTETCOUNT_G_LO: case MMC_RXOCTETCOUNT_GB_LO: case MMC_RXOCTETCOUNT_G_LO: read_hi = true; break; default: read_hi = false; } } val = XGMAC_IOREAD(pdata, reg_lo); if (read_hi) val |= ((uint64_t)XGMAC_IOREAD(pdata, reg_lo + 4) << 32); return (val); } static void xgbe_tx_mmc_int(struct xgbe_prv_data *pdata) { struct xgbe_mmc_stats *stats = &pdata->mmc_stats; unsigned int mmc_isr = XGMAC_IOREAD(pdata, MMC_TISR); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXOCTETCOUNT_GB)) stats->txoctetcount_gb += xgbe_mmc_read(pdata, MMC_TXOCTETCOUNT_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXFRAMECOUNT_GB)) stats->txframecount_gb += xgbe_mmc_read(pdata, MMC_TXFRAMECOUNT_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXBROADCASTFRAMES_G)) stats->txbroadcastframes_g += xgbe_mmc_read(pdata, MMC_TXBROADCASTFRAMES_G_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXMULTICASTFRAMES_G)) stats->txmulticastframes_g += xgbe_mmc_read(pdata, MMC_TXMULTICASTFRAMES_G_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TX64OCTETS_GB)) stats->tx64octets_gb += xgbe_mmc_read(pdata, MMC_TX64OCTETS_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TX65TO127OCTETS_GB)) stats->tx65to127octets_gb += xgbe_mmc_read(pdata, MMC_TX65TO127OCTETS_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TX128TO255OCTETS_GB)) stats->tx128to255octets_gb += xgbe_mmc_read(pdata, MMC_TX128TO255OCTETS_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TX256TO511OCTETS_GB)) stats->tx256to511octets_gb += xgbe_mmc_read(pdata, MMC_TX256TO511OCTETS_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TX512TO1023OCTETS_GB)) stats->tx512to1023octets_gb += xgbe_mmc_read(pdata, MMC_TX512TO1023OCTETS_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TX1024TOMAXOCTETS_GB)) stats->tx1024tomaxoctets_gb += xgbe_mmc_read(pdata, MMC_TX1024TOMAXOCTETS_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXUNICASTFRAMES_GB)) stats->txunicastframes_gb += xgbe_mmc_read(pdata, MMC_TXUNICASTFRAMES_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXMULTICASTFRAMES_GB)) stats->txmulticastframes_gb += xgbe_mmc_read(pdata, MMC_TXMULTICASTFRAMES_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXBROADCASTFRAMES_GB)) stats->txbroadcastframes_g += xgbe_mmc_read(pdata, MMC_TXBROADCASTFRAMES_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXUNDERFLOWERROR)) stats->txunderflowerror += xgbe_mmc_read(pdata, MMC_TXUNDERFLOWERROR_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXOCTETCOUNT_G)) stats->txoctetcount_g += xgbe_mmc_read(pdata, MMC_TXOCTETCOUNT_G_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXFRAMECOUNT_G)) stats->txframecount_g += xgbe_mmc_read(pdata, MMC_TXFRAMECOUNT_G_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXPAUSEFRAMES)) stats->txpauseframes += xgbe_mmc_read(pdata, MMC_TXPAUSEFRAMES_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_TISR, TXVLANFRAMES_G)) stats->txvlanframes_g += xgbe_mmc_read(pdata, MMC_TXVLANFRAMES_G_LO); } static void xgbe_rx_mmc_int(struct xgbe_prv_data *pdata) { struct xgbe_mmc_stats *stats = &pdata->mmc_stats; unsigned int mmc_isr = XGMAC_IOREAD(pdata, MMC_RISR); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXFRAMECOUNT_GB)) stats->rxframecount_gb += xgbe_mmc_read(pdata, MMC_RXFRAMECOUNT_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXOCTETCOUNT_GB)) stats->rxoctetcount_gb += xgbe_mmc_read(pdata, MMC_RXOCTETCOUNT_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXOCTETCOUNT_G)) stats->rxoctetcount_g += xgbe_mmc_read(pdata, MMC_RXOCTETCOUNT_G_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXBROADCASTFRAMES_G)) stats->rxbroadcastframes_g += xgbe_mmc_read(pdata, MMC_RXBROADCASTFRAMES_G_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXMULTICASTFRAMES_G)) stats->rxmulticastframes_g += xgbe_mmc_read(pdata, MMC_RXMULTICASTFRAMES_G_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXCRCERROR)) stats->rxcrcerror += xgbe_mmc_read(pdata, MMC_RXCRCERROR_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXRUNTERROR)) stats->rxrunterror += xgbe_mmc_read(pdata, MMC_RXRUNTERROR); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXJABBERERROR)) stats->rxjabbererror += xgbe_mmc_read(pdata, MMC_RXJABBERERROR); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXUNDERSIZE_G)) stats->rxundersize_g += xgbe_mmc_read(pdata, MMC_RXUNDERSIZE_G); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXOVERSIZE_G)) stats->rxoversize_g += xgbe_mmc_read(pdata, MMC_RXOVERSIZE_G); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RX64OCTETS_GB)) stats->rx64octets_gb += xgbe_mmc_read(pdata, MMC_RX64OCTETS_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RX65TO127OCTETS_GB)) stats->rx65to127octets_gb += xgbe_mmc_read(pdata, MMC_RX65TO127OCTETS_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RX128TO255OCTETS_GB)) stats->rx128to255octets_gb += xgbe_mmc_read(pdata, MMC_RX128TO255OCTETS_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RX256TO511OCTETS_GB)) stats->rx256to511octets_gb += xgbe_mmc_read(pdata, MMC_RX256TO511OCTETS_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RX512TO1023OCTETS_GB)) stats->rx512to1023octets_gb += xgbe_mmc_read(pdata, MMC_RX512TO1023OCTETS_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RX1024TOMAXOCTETS_GB)) stats->rx1024tomaxoctets_gb += xgbe_mmc_read(pdata, MMC_RX1024TOMAXOCTETS_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXUNICASTFRAMES_G)) stats->rxunicastframes_g += xgbe_mmc_read(pdata, MMC_RXUNICASTFRAMES_G_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXLENGTHERROR)) stats->rxlengtherror += xgbe_mmc_read(pdata, MMC_RXLENGTHERROR_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXOUTOFRANGETYPE)) stats->rxoutofrangetype += xgbe_mmc_read(pdata, MMC_RXOUTOFRANGETYPE_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXPAUSEFRAMES)) stats->rxpauseframes += xgbe_mmc_read(pdata, MMC_RXPAUSEFRAMES_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXFIFOOVERFLOW)) stats->rxfifooverflow += xgbe_mmc_read(pdata, MMC_RXFIFOOVERFLOW_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXVLANFRAMES_GB)) stats->rxvlanframes_gb += xgbe_mmc_read(pdata, MMC_RXVLANFRAMES_GB_LO); if (XGMAC_GET_BITS(mmc_isr, MMC_RISR, RXWATCHDOGERROR)) stats->rxwatchdogerror += xgbe_mmc_read(pdata, MMC_RXWATCHDOGERROR); } static void xgbe_read_mmc_stats(struct xgbe_prv_data *pdata) { struct xgbe_mmc_stats *stats = &pdata->mmc_stats; /* Freeze counters */ XGMAC_IOWRITE_BITS(pdata, MMC_CR, MCF, 1); stats->txoctetcount_gb += xgbe_mmc_read(pdata, MMC_TXOCTETCOUNT_GB_LO); stats->txframecount_gb += xgbe_mmc_read(pdata, MMC_TXFRAMECOUNT_GB_LO); stats->txbroadcastframes_g += xgbe_mmc_read(pdata, MMC_TXBROADCASTFRAMES_G_LO); stats->txmulticastframes_g += xgbe_mmc_read(pdata, MMC_TXMULTICASTFRAMES_G_LO); stats->tx64octets_gb += xgbe_mmc_read(pdata, MMC_TX64OCTETS_GB_LO); stats->tx65to127octets_gb += xgbe_mmc_read(pdata, MMC_TX65TO127OCTETS_GB_LO); stats->tx128to255octets_gb += xgbe_mmc_read(pdata, MMC_TX128TO255OCTETS_GB_LO); stats->tx256to511octets_gb += xgbe_mmc_read(pdata, MMC_TX256TO511OCTETS_GB_LO); stats->tx512to1023octets_gb += xgbe_mmc_read(pdata, MMC_TX512TO1023OCTETS_GB_LO); stats->tx1024tomaxoctets_gb += xgbe_mmc_read(pdata, MMC_TX1024TOMAXOCTETS_GB_LO); stats->txunicastframes_gb += xgbe_mmc_read(pdata, MMC_TXUNICASTFRAMES_GB_LO); stats->txmulticastframes_gb += xgbe_mmc_read(pdata, MMC_TXMULTICASTFRAMES_GB_LO); stats->txbroadcastframes_gb += xgbe_mmc_read(pdata, MMC_TXBROADCASTFRAMES_GB_LO); stats->txunderflowerror += xgbe_mmc_read(pdata, MMC_TXUNDERFLOWERROR_LO); stats->txoctetcount_g += xgbe_mmc_read(pdata, MMC_TXOCTETCOUNT_G_LO); stats->txframecount_g += xgbe_mmc_read(pdata, MMC_TXFRAMECOUNT_G_LO); stats->txpauseframes += xgbe_mmc_read(pdata, MMC_TXPAUSEFRAMES_LO); stats->txvlanframes_g += xgbe_mmc_read(pdata, MMC_TXVLANFRAMES_G_LO); stats->rxframecount_gb += xgbe_mmc_read(pdata, MMC_RXFRAMECOUNT_GB_LO); stats->rxoctetcount_gb += xgbe_mmc_read(pdata, MMC_RXOCTETCOUNT_GB_LO); stats->rxoctetcount_g += xgbe_mmc_read(pdata, MMC_RXOCTETCOUNT_G_LO); stats->rxbroadcastframes_g += xgbe_mmc_read(pdata, MMC_RXBROADCASTFRAMES_G_LO); stats->rxmulticastframes_g += xgbe_mmc_read(pdata, MMC_RXMULTICASTFRAMES_G_LO); stats->rxcrcerror += xgbe_mmc_read(pdata, MMC_RXCRCERROR_LO); stats->rxrunterror += xgbe_mmc_read(pdata, MMC_RXRUNTERROR); stats->rxjabbererror += xgbe_mmc_read(pdata, MMC_RXJABBERERROR); stats->rxundersize_g += xgbe_mmc_read(pdata, MMC_RXUNDERSIZE_G); stats->rxoversize_g += xgbe_mmc_read(pdata, MMC_RXOVERSIZE_G); stats->rx64octets_gb += xgbe_mmc_read(pdata, MMC_RX64OCTETS_GB_LO); stats->rx65to127octets_gb += xgbe_mmc_read(pdata, MMC_RX65TO127OCTETS_GB_LO); stats->rx128to255octets_gb += xgbe_mmc_read(pdata, MMC_RX128TO255OCTETS_GB_LO); stats->rx256to511octets_gb += xgbe_mmc_read(pdata, MMC_RX256TO511OCTETS_GB_LO); stats->rx512to1023octets_gb += xgbe_mmc_read(pdata, MMC_RX512TO1023OCTETS_GB_LO); stats->rx1024tomaxoctets_gb += xgbe_mmc_read(pdata, MMC_RX1024TOMAXOCTETS_GB_LO); stats->rxunicastframes_g += xgbe_mmc_read(pdata, MMC_RXUNICASTFRAMES_G_LO); stats->rxlengtherror += xgbe_mmc_read(pdata, MMC_RXLENGTHERROR_LO); stats->rxoutofrangetype += xgbe_mmc_read(pdata, MMC_RXOUTOFRANGETYPE_LO); stats->rxpauseframes += xgbe_mmc_read(pdata, MMC_RXPAUSEFRAMES_LO); stats->rxfifooverflow += xgbe_mmc_read(pdata, MMC_RXFIFOOVERFLOW_LO); stats->rxvlanframes_gb += xgbe_mmc_read(pdata, MMC_RXVLANFRAMES_GB_LO); stats->rxwatchdogerror += xgbe_mmc_read(pdata, MMC_RXWATCHDOGERROR); /* Un-freeze counters */ XGMAC_IOWRITE_BITS(pdata, MMC_CR, MCF, 0); } static void xgbe_config_mmc(struct xgbe_prv_data *pdata) { /* Set counters to reset on read */ XGMAC_IOWRITE_BITS(pdata, MMC_CR, ROR, 1); /* Reset the counters */ XGMAC_IOWRITE_BITS(pdata, MMC_CR, CR, 1); } static void xgbe_txq_prepare_tx_stop(struct xgbe_prv_data *pdata, unsigned int queue) { unsigned int tx_status; unsigned long tx_timeout; /* The Tx engine cannot be stopped if it is actively processing * packets. Wait for the Tx queue to empty the Tx fifo. Don't * wait forever though... */ tx_timeout = ticks + (XGBE_DMA_STOP_TIMEOUT * hz); while (ticks < tx_timeout) { tx_status = XGMAC_MTL_IOREAD(pdata, queue, MTL_Q_TQDR); if ((XGMAC_GET_BITS(tx_status, MTL_Q_TQDR, TRCSTS) != 1) && (XGMAC_GET_BITS(tx_status, MTL_Q_TQDR, TXQSTS) == 0)) break; DELAY(500); } if (ticks >= tx_timeout) axgbe_printf(1, "timed out waiting for Tx queue %u to empty\n", queue); } static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata, unsigned int queue) { unsigned int tx_dsr, tx_pos, tx_qidx; unsigned int tx_status; unsigned long tx_timeout; if (XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER) > 0x20) return (xgbe_txq_prepare_tx_stop(pdata, queue)); /* Calculate the status register to read and the position within */ if (queue < DMA_DSRX_FIRST_QUEUE) { tx_dsr = DMA_DSR0; tx_pos = (queue * DMA_DSR_Q_WIDTH) + DMA_DSR0_TPS_START; } else { tx_qidx = queue - DMA_DSRX_FIRST_QUEUE; tx_dsr = DMA_DSR1 + ((tx_qidx / DMA_DSRX_QPR) * DMA_DSRX_INC); tx_pos = ((tx_qidx % DMA_DSRX_QPR) * DMA_DSR_Q_WIDTH) + DMA_DSRX_TPS_START; } /* The Tx engine cannot be stopped if it is actively processing * descriptors. Wait for the Tx engine to enter the stopped or * suspended state. Don't wait forever though... */ tx_timeout = ticks + (XGBE_DMA_STOP_TIMEOUT * hz); while (ticks < tx_timeout) { tx_status = XGMAC_IOREAD(pdata, tx_dsr); tx_status = GET_BITS(tx_status, tx_pos, DMA_DSR_TPS_WIDTH); if ((tx_status == DMA_TPS_STOPPED) || (tx_status == DMA_TPS_SUSPENDED)) break; DELAY(500); } if (ticks >= tx_timeout) axgbe_printf(1, "timed out waiting for Tx DMA channel %u to stop\n", queue); } static void xgbe_enable_tx(struct xgbe_prv_data *pdata) { unsigned int i; /* Enable each Tx DMA channel */ for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->tx_ring) break; XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 1); } /* Enable each Tx queue */ for (i = 0; i < pdata->tx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, MTL_Q_ENABLED); /* Enable MAC Tx */ XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 1); } static void xgbe_disable_tx(struct xgbe_prv_data *pdata) { unsigned int i; /* Prepare for Tx DMA channel stop */ for (i = 0; i < pdata->tx_q_count; i++) xgbe_prepare_tx_stop(pdata, i); /* Disable MAC Tx */ XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0); /* Disable each Tx queue */ for (i = 0; i < pdata->tx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, 0); /* Disable each Tx DMA channel */ for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->tx_ring) break; XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0); } } static void xgbe_prepare_rx_stop(struct xgbe_prv_data *pdata, unsigned int queue) { unsigned int rx_status; unsigned long rx_timeout; /* The Rx engine cannot be stopped if it is actively processing * packets. Wait for the Rx queue to empty the Rx fifo. Don't * wait forever though... */ rx_timeout = ticks + (XGBE_DMA_STOP_TIMEOUT * hz); while (ticks < rx_timeout) { rx_status = XGMAC_MTL_IOREAD(pdata, queue, MTL_Q_RQDR); if ((XGMAC_GET_BITS(rx_status, MTL_Q_RQDR, PRXQ) == 0) && (XGMAC_GET_BITS(rx_status, MTL_Q_RQDR, RXQSTS) == 0)) break; DELAY(500); } if (ticks >= rx_timeout) axgbe_printf(1, "timed out waiting for Rx queue %d to empty\n", queue); } static void xgbe_enable_rx(struct xgbe_prv_data *pdata) { unsigned int reg_val, i; /* Enable each Rx DMA channel */ for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->rx_ring) break; XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 1); } /* Enable each Rx queue */ reg_val = 0; for (i = 0; i < pdata->rx_q_count; i++) reg_val |= (0x02 << (i << 1)); XGMAC_IOWRITE(pdata, MAC_RQC0R, reg_val); /* Enable MAC Rx */ XGMAC_IOWRITE_BITS(pdata, MAC_RCR, DCRCC, 1); XGMAC_IOWRITE_BITS(pdata, MAC_RCR, CST, 1); XGMAC_IOWRITE_BITS(pdata, MAC_RCR, ACS, 1); XGMAC_IOWRITE_BITS(pdata, MAC_RCR, RE, 1); } static void xgbe_disable_rx(struct xgbe_prv_data *pdata) { unsigned int i; /* Disable MAC Rx */ XGMAC_IOWRITE_BITS(pdata, MAC_RCR, DCRCC, 0); XGMAC_IOWRITE_BITS(pdata, MAC_RCR, CST, 0); XGMAC_IOWRITE_BITS(pdata, MAC_RCR, ACS, 0); XGMAC_IOWRITE_BITS(pdata, MAC_RCR, RE, 0); /* Prepare for Rx DMA channel stop */ for (i = 0; i < pdata->rx_q_count; i++) xgbe_prepare_rx_stop(pdata, i); /* Disable each Rx queue */ XGMAC_IOWRITE(pdata, MAC_RQC0R, 0); /* Disable each Rx DMA channel */ for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->rx_ring) break; XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 0); } } static void xgbe_powerup_tx(struct xgbe_prv_data *pdata) { unsigned int i; /* Enable each Tx DMA channel */ for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->tx_ring) break; XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 1); } /* Enable MAC Tx */ XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 1); } static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata) { unsigned int i; /* Prepare for Tx DMA channel stop */ for (i = 0; i < pdata->tx_q_count; i++) xgbe_prepare_tx_stop(pdata, i); /* Disable MAC Tx */ XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0); /* Disable each Tx DMA channel */ for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->tx_ring) break; XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0); } } static void xgbe_powerup_rx(struct xgbe_prv_data *pdata) { unsigned int i; /* Enable each Rx DMA channel */ for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->rx_ring) break; XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 1); } } static void xgbe_powerdown_rx(struct xgbe_prv_data *pdata) { unsigned int i; /* Disable each Rx DMA channel */ for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->rx_ring) break; XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 0); } } static int xgbe_init(struct xgbe_prv_data *pdata) { struct xgbe_desc_if *desc_if = &pdata->desc_if; int ret; /* Flush Tx queues */ ret = xgbe_flush_tx_queues(pdata); if (ret) { axgbe_error("error flushing TX queues\n"); return (ret); } /* * Initialize DMA related features */ xgbe_config_dma_bus(pdata); xgbe_config_dma_cache(pdata); xgbe_config_osp_mode(pdata); xgbe_config_pbl_val(pdata); xgbe_config_rx_coalesce(pdata); xgbe_config_tx_coalesce(pdata); xgbe_config_rx_buffer_size(pdata); xgbe_config_tso_mode(pdata); xgbe_config_sph_mode(pdata); xgbe_config_rss(pdata); desc_if->wrapper_tx_desc_init(pdata); desc_if->wrapper_rx_desc_init(pdata); xgbe_enable_dma_interrupts(pdata); /* * Initialize MTL related features */ xgbe_config_mtl_mode(pdata); xgbe_config_queue_mapping(pdata); xgbe_config_tsf_mode(pdata, pdata->tx_sf_mode); xgbe_config_rsf_mode(pdata, pdata->rx_sf_mode); xgbe_config_tx_threshold(pdata, pdata->tx_threshold); xgbe_config_rx_threshold(pdata, pdata->rx_threshold); xgbe_config_tx_fifo_size(pdata); xgbe_config_rx_fifo_size(pdata); /*TODO: Error Packet and undersized good Packet forwarding enable (FEP and FUP) */ xgbe_enable_mtl_interrupts(pdata); /* * Initialize MAC related features */ xgbe_config_mac_address(pdata); xgbe_config_rx_mode(pdata); xgbe_config_jumbo_enable(pdata); xgbe_config_flow_control(pdata); xgbe_config_mac_speed(pdata); xgbe_config_checksum_offload(pdata); xgbe_config_vlan_support(pdata); xgbe_config_mmc(pdata); xgbe_enable_mac_interrupts(pdata); return (0); } void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) { hw_if->tx_complete = xgbe_tx_complete; hw_if->set_mac_address = xgbe_set_mac_address; hw_if->config_rx_mode = xgbe_config_rx_mode; hw_if->enable_rx_csum = xgbe_enable_rx_csum; hw_if->disable_rx_csum = xgbe_disable_rx_csum; hw_if->enable_rx_vlan_stripping = xgbe_enable_rx_vlan_stripping; hw_if->disable_rx_vlan_stripping = xgbe_disable_rx_vlan_stripping; hw_if->enable_rx_vlan_filtering = xgbe_enable_rx_vlan_filtering; hw_if->disable_rx_vlan_filtering = xgbe_disable_rx_vlan_filtering; hw_if->update_vlan_hash_table = xgbe_update_vlan_hash_table; hw_if->read_mmd_regs = xgbe_read_mmd_regs; hw_if->write_mmd_regs = xgbe_write_mmd_regs; hw_if->set_speed = xgbe_set_speed; hw_if->set_ext_mii_mode = xgbe_set_ext_mii_mode; hw_if->read_ext_mii_regs = xgbe_read_ext_mii_regs; hw_if->write_ext_mii_regs = xgbe_write_ext_mii_regs; hw_if->set_gpio = xgbe_set_gpio; hw_if->clr_gpio = xgbe_clr_gpio; hw_if->enable_tx = xgbe_enable_tx; hw_if->disable_tx = xgbe_disable_tx; hw_if->enable_rx = xgbe_enable_rx; hw_if->disable_rx = xgbe_disable_rx; hw_if->powerup_tx = xgbe_powerup_tx; hw_if->powerdown_tx = xgbe_powerdown_tx; hw_if->powerup_rx = xgbe_powerup_rx; hw_if->powerdown_rx = xgbe_powerdown_rx; hw_if->dev_read = xgbe_dev_read; hw_if->enable_int = xgbe_enable_int; hw_if->disable_int = xgbe_disable_int; hw_if->init = xgbe_init; hw_if->exit = xgbe_exit; /* Descriptor related Sequences have to be initialized here */ hw_if->tx_desc_init = xgbe_tx_desc_init; hw_if->rx_desc_init = xgbe_rx_desc_init; hw_if->tx_desc_reset = xgbe_tx_desc_reset; hw_if->is_last_desc = xgbe_is_last_desc; hw_if->is_context_desc = xgbe_is_context_desc; /* For FLOW ctrl */ hw_if->config_tx_flow_control = xgbe_config_tx_flow_control; hw_if->config_rx_flow_control = xgbe_config_rx_flow_control; /* For RX coalescing */ hw_if->config_rx_coalesce = xgbe_config_rx_coalesce; hw_if->config_tx_coalesce = xgbe_config_tx_coalesce; hw_if->usec_to_riwt = xgbe_usec_to_riwt; hw_if->riwt_to_usec = xgbe_riwt_to_usec; /* For RX and TX threshold config */ hw_if->config_rx_threshold = xgbe_config_rx_threshold; hw_if->config_tx_threshold = xgbe_config_tx_threshold; /* For RX and TX Store and Forward Mode config */ hw_if->config_rsf_mode = xgbe_config_rsf_mode; hw_if->config_tsf_mode = xgbe_config_tsf_mode; /* For TX DMA Operating on Second Frame config */ hw_if->config_osp_mode = xgbe_config_osp_mode; /* For MMC statistics support */ hw_if->tx_mmc_int = xgbe_tx_mmc_int; hw_if->rx_mmc_int = xgbe_rx_mmc_int; hw_if->read_mmc_stats = xgbe_read_mmc_stats; /* For Receive Side Scaling */ hw_if->enable_rss = xgbe_enable_rss; hw_if->disable_rss = xgbe_disable_rss; hw_if->set_rss_hash_key = xgbe_set_rss_hash_key; hw_if->set_rss_lookup_table = xgbe_set_rss_lookup_table; } diff --git a/sys/dev/axgbe/xgbe-drv.c b/sys/dev/axgbe/xgbe-drv.c index 017c3c9bc6ac..6de58ee83621 100644 --- a/sys/dev/axgbe/xgbe-drv.c +++ b/sys/dev/axgbe/xgbe-drv.c @@ -1,347 +1,347 @@ /* * AMD 10Gb Ethernet driver * * Copyright (c) 2014-2016,2020 Advanced Micro Devices, Inc. * * This file is available to you under your choice of the following two * licenses: * * License 1: GPLv2 * * This file is free software; you may copy, redistribute and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or (at * your option) any later version. * * This file is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * This file incorporates work covered by the following copyright and * permission notice: * The Synopsys DWC ETHER XGMAC Software Driver and documentation * (hereinafter "Software") is an unsupported proprietary work of Synopsys, * Inc. unless otherwise expressly agreed to in writing between Synopsys * and you. * * The Software IS NOT an item of Licensed Software or Licensed Product * under any End User Software License Agreement or Agreement for Licensed * Product with Synopsys or any supplement thereto. Permission is hereby * granted, free of charge, to any person obtaining a copy of this software * annotated with this license and the Software, to deal in the Software * without restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * * License 2: Modified BSD * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Advanced Micro Devices, Inc. nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * This file incorporates work covered by the following copyright and * permission notice: * The Synopsys DWC ETHER XGMAC Software Driver and documentation * (hereinafter "Software") is an unsupported proprietary work of Synopsys, * Inc. unless otherwise expressly agreed to in writing between Synopsys * and you. * * The Software IS NOT an item of Licensed Software or Licensed Product * under any End User Software License Agreement or Agreement for Licensed * Product with Synopsys or any supplement thereto. Permission is hereby * granted, free of charge, to any person obtaining a copy of this software * annotated with this license and the Software, to deal in the Software * without restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "xgbe.h" #include "xgbe-common.h" int -xgbe_calc_rx_buf_size(struct ifnet *netdev, unsigned int mtu) +xgbe_calc_rx_buf_size(if_t netdev, unsigned int mtu) { unsigned int rx_buf_size; if (mtu > XGMAC_JUMBO_PACKET_MTU) return (-EINVAL); rx_buf_size = mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; rx_buf_size = min(max(rx_buf_size, XGBE_RX_MIN_BUF_SIZE), PAGE_SIZE); rx_buf_size = (rx_buf_size + XGBE_RX_BUF_ALIGN - 1) & ~(XGBE_RX_BUF_ALIGN - 1); return (rx_buf_size); } void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata) { unsigned int mac_hfr0, mac_hfr1, mac_hfr2; struct xgbe_hw_features *hw_feat = &pdata->hw_feat; DBGPR("-->xgbe_get_all_hw_features\n"); mac_hfr0 = XGMAC_IOREAD(pdata, MAC_HWF0R); mac_hfr1 = XGMAC_IOREAD(pdata, MAC_HWF1R); mac_hfr2 = XGMAC_IOREAD(pdata, MAC_HWF2R); memset(hw_feat, 0, sizeof(*hw_feat)); hw_feat->version = XGMAC_IOREAD(pdata, MAC_VR); /* Hardware feature register 0 */ hw_feat->gmii = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, GMIISEL); hw_feat->vlhash = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, VLHASH); hw_feat->sma = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, SMASEL); hw_feat->rwk = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, RWKSEL); hw_feat->mgk = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, MGKSEL); hw_feat->mmc = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, MMCSEL); hw_feat->aoe = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, ARPOFFSEL); hw_feat->ts = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, TSSEL); hw_feat->eee = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, EEESEL); hw_feat->tx_coe = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, TXCOESEL); hw_feat->rx_coe = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, RXCOESEL); hw_feat->addn_mac = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, ADDMACADRSEL); hw_feat->ts_src = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, TSSTSSEL); hw_feat->sa_vlan_ins = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, SAVLANINS); hw_feat->vxn = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, VXN); /* Hardware feature register 1 */ hw_feat->rx_fifo_size = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, RXFIFOSIZE); hw_feat->tx_fifo_size = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, TXFIFOSIZE); hw_feat->adv_ts_hi = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, ADVTHWORD); hw_feat->dma_width = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, ADDR64); hw_feat->dcb = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, DCBEN); hw_feat->sph = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, SPHEN); hw_feat->tso = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, TSOEN); hw_feat->dma_debug = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, DBGMEMA); hw_feat->rss = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, RSSEN); hw_feat->tc_cnt = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, NUMTC); hw_feat->hash_table_size = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, HASHTBLSZ); hw_feat->l3l4_filter_num = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, L3L4FNUM); /* Hardware feature register 2 */ hw_feat->rx_q_cnt = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, RXQCNT); hw_feat->tx_q_cnt = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, TXQCNT); hw_feat->rx_ch_cnt = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, RXCHCNT); hw_feat->tx_ch_cnt = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, TXCHCNT); hw_feat->pps_out_num = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, PPSOUTNUM); hw_feat->aux_snap_num = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, AUXSNAPNUM); /* Translate the Hash Table size into actual number */ switch (hw_feat->hash_table_size) { case 0: break; case 1: hw_feat->hash_table_size = 64; break; case 2: hw_feat->hash_table_size = 128; break; case 3: hw_feat->hash_table_size = 256; break; } /* Translate the address width setting into actual number */ switch (hw_feat->dma_width) { case 0: hw_feat->dma_width = 32; break; case 1: hw_feat->dma_width = 40; break; case 2: hw_feat->dma_width = 48; break; default: hw_feat->dma_width = 32; } /* The Queue, Channel and TC counts are zero based so increment them * to get the actual number */ hw_feat->rx_q_cnt++; hw_feat->tx_q_cnt++; hw_feat->rx_ch_cnt++; hw_feat->tx_ch_cnt++; hw_feat->tc_cnt++; /* Translate the fifo sizes into actual numbers */ hw_feat->rx_fifo_size = 1 << (hw_feat->rx_fifo_size + 7); hw_feat->tx_fifo_size = 1 << (hw_feat->tx_fifo_size + 7); DBGPR("%s: Tx fifo 0x%x Rx fifo 0x%x\n", __func__, hw_feat->tx_fifo_size, hw_feat->rx_fifo_size); DBGPR("Hardware features:\n"); /* Hardware feature register 0 */ DBGPR(" 1GbE support : %s\n", hw_feat->gmii ? "yes" : "no"); DBGPR(" VLAN hash filter : %s\n", hw_feat->vlhash ? "yes" : "no"); DBGPR(" MDIO interface : %s\n", hw_feat->sma ? "yes" : "no"); DBGPR(" Wake-up packet support : %s\n", hw_feat->rwk ? "yes" : "no"); DBGPR(" Magic packet support : %s\n", hw_feat->mgk ? "yes" : "no"); DBGPR(" Management counters : %s\n", hw_feat->mmc ? "yes" : "no"); DBGPR(" ARP offload : %s\n", hw_feat->aoe ? "yes" : "no"); DBGPR(" IEEE 1588-2008 Timestamp : %s\n", hw_feat->ts ? "yes" : "no"); DBGPR(" Energy Efficient Ethernet : %s\n", hw_feat->eee ? "yes" : "no"); DBGPR(" TX checksum offload : %s\n", hw_feat->tx_coe ? "yes" : "no"); DBGPR(" RX checksum offload : %s\n", hw_feat->rx_coe ? "yes" : "no"); DBGPR(" Additional MAC addresses : %u\n", hw_feat->addn_mac); DBGPR(" Timestamp source : %s\n", (hw_feat->ts_src == 1) ? "internal" : (hw_feat->ts_src == 2) ? "external" : (hw_feat->ts_src == 3) ? "internal/external" : "n/a"); DBGPR(" SA/VLAN insertion : %s\n", hw_feat->sa_vlan_ins ? "yes" : "no"); /* Hardware feature register 1 */ DBGPR(" RX fifo size : %u\n", hw_feat->rx_fifo_size); DBGPR(" TX fifo size : %u\n", hw_feat->tx_fifo_size); DBGPR(" IEEE 1588 high word : %s\n", hw_feat->adv_ts_hi ? "yes" : "no"); DBGPR(" DMA width : %u\n", hw_feat->dma_width); DBGPR(" Data Center Bridging : %s\n", hw_feat->dcb ? "yes" : "no"); DBGPR(" Split header : %s\n", hw_feat->sph ? "yes" : "no"); DBGPR(" TCP Segmentation Offload : %s\n", hw_feat->tso ? "yes" : "no"); DBGPR(" Debug memory interface : %s\n", hw_feat->dma_debug ? "yes" : "no"); DBGPR(" Receive Side Scaling : %s\n", hw_feat->rss ? "yes" : "no"); DBGPR(" Traffic Class count : %u\n", hw_feat->tc_cnt); DBGPR(" Hash table size : %u\n", hw_feat->hash_table_size); DBGPR(" L3/L4 Filters : %u\n", hw_feat->l3l4_filter_num); /* Hardware feature register 2 */ DBGPR(" RX queue count : %u\n", hw_feat->rx_q_cnt); DBGPR(" TX queue count : %u\n", hw_feat->tx_q_cnt); DBGPR(" RX DMA channel count : %u\n", hw_feat->rx_ch_cnt); DBGPR(" TX DMA channel count : %u\n", hw_feat->rx_ch_cnt); DBGPR(" PPS outputs : %u\n", hw_feat->pps_out_num); DBGPR(" Auxiliary snapshot inputs : %u\n", hw_feat->aux_snap_num); DBGPR("<--xgbe_get_all_hw_features\n"); } void xgbe_init_tx_coalesce(struct xgbe_prv_data *pdata) { struct xgbe_hw_if *hw_if = &pdata->hw_if; DBGPR("-->xgbe_init_tx_coalesce\n"); pdata->tx_usecs = XGMAC_INIT_DMA_TX_USECS; pdata->tx_frames = XGMAC_INIT_DMA_TX_FRAMES; hw_if->config_tx_coalesce(pdata); DBGPR("<--xgbe_init_tx_coalesce\n"); } void xgbe_init_rx_coalesce(struct xgbe_prv_data *pdata) { struct xgbe_hw_if *hw_if = &pdata->hw_if; DBGPR("-->xgbe_init_rx_coalesce\n"); pdata->rx_riwt = hw_if->usec_to_riwt(pdata, XGMAC_INIT_DMA_RX_USECS); pdata->rx_usecs = XGMAC_INIT_DMA_RX_USECS; pdata->rx_frames = XGMAC_INIT_DMA_RX_FRAMES; hw_if->config_rx_coalesce(pdata); DBGPR("<--xgbe_init_rx_coalesce\n"); } diff --git a/sys/dev/axgbe/xgbe-phy-v2.c b/sys/dev/axgbe/xgbe-phy-v2.c index df8a75a145b9..6ee673af6e35 100644 --- a/sys/dev/axgbe/xgbe-phy-v2.c +++ b/sys/dev/axgbe/xgbe-phy-v2.c @@ -1,3785 +1,3785 @@ /* * AMD 10Gb Ethernet driver * * Copyright (c) 2020 Advanced Micro Devices, Inc. * * This file is available to you under your choice of the following two * licenses: * * License 1: GPLv2 * * This file is free software; you may copy, redistribute and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or (at * your option) any later version. * * This file is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * This file incorporates work covered by the following copyright and * permission notice: * The Synopsys DWC ETHER XGMAC Software Driver and documentation * (hereinafter "Software") is an unsupported proprietary work of Synopsys, * Inc. unless otherwise expressly agreed to in writing between Synopsys * and you. * * The Software IS NOT an item of Licensed Software or Licensed Product * under any End User Software License Agreement or Agreement for Licensed * Product with Synopsys or any supplement thereto. Permission is hereby * granted, free of charge, to any person obtaining a copy of this software * annotated with this license and the Software, to deal in the Software * without restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * * License 2: Modified BSD * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Advanced Micro Devices, Inc. nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * This file incorporates work covered by the following copyright and * permission notice: * The Synopsys DWC ETHER XGMAC Software Driver and documentation * (hereinafter "Software") is an unsupported proprietary work of Synopsys, * Inc. unless otherwise expressly agreed to in writing between Synopsys * and you. * * The Software IS NOT an item of Licensed Software or Licensed Product * under any End User Software License Agreement or Agreement for Licensed * Product with Synopsys or any supplement thereto. Permission is hereby * granted, free of charge, to any person obtaining a copy of this software * annotated with this license and the Software, to deal in the Software * without restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "xgbe.h" #include "xgbe-common.h" struct mtx xgbe_phy_comm_lock; #define XGBE_PHY_PORT_SPEED_100 BIT(0) #define XGBE_PHY_PORT_SPEED_1000 BIT(1) #define XGBE_PHY_PORT_SPEED_2500 BIT(2) #define XGBE_PHY_PORT_SPEED_10000 BIT(3) #define XGBE_MUTEX_RELEASE 0x80000000 #define XGBE_SFP_DIRECT 7 #define GPIO_MASK_WIDTH 4 /* I2C target addresses */ #define XGBE_SFP_SERIAL_ID_ADDRESS 0x50 #define XGBE_SFP_DIAG_INFO_ADDRESS 0x51 #define XGBE_SFP_PHY_ADDRESS 0x56 #define XGBE_GPIO_ADDRESS_PCA9555 0x20 /* SFP sideband signal indicators */ #define XGBE_GPIO_NO_TX_FAULT BIT(0) #define XGBE_GPIO_NO_RATE_SELECT BIT(1) #define XGBE_GPIO_NO_MOD_ABSENT BIT(2) #define XGBE_GPIO_NO_RX_LOS BIT(3) /* Rate-change complete wait/retry count */ #define XGBE_RATECHANGE_COUNT 500 /* CDR delay values for KR support (in usec) */ #define XGBE_CDR_DELAY_INIT 10000 #define XGBE_CDR_DELAY_INC 10000 #define XGBE_CDR_DELAY_MAX 100000 /* RRC frequency during link status check */ #define XGBE_RRC_FREQUENCY 10 enum xgbe_port_mode { XGBE_PORT_MODE_RSVD = 0, XGBE_PORT_MODE_BACKPLANE, XGBE_PORT_MODE_BACKPLANE_2500, XGBE_PORT_MODE_1000BASE_T, XGBE_PORT_MODE_1000BASE_X, XGBE_PORT_MODE_NBASE_T, XGBE_PORT_MODE_10GBASE_T, XGBE_PORT_MODE_10GBASE_R, XGBE_PORT_MODE_SFP, XGBE_PORT_MODE_MAX, }; enum xgbe_conn_type { XGBE_CONN_TYPE_NONE = 0, XGBE_CONN_TYPE_SFP, XGBE_CONN_TYPE_MDIO, XGBE_CONN_TYPE_RSVD1, XGBE_CONN_TYPE_BACKPLANE, XGBE_CONN_TYPE_MAX, }; /* SFP/SFP+ related definitions */ enum xgbe_sfp_comm { XGBE_SFP_COMM_DIRECT = 0, XGBE_SFP_COMM_PCA9545, }; enum xgbe_sfp_cable { XGBE_SFP_CABLE_UNKNOWN = 0, XGBE_SFP_CABLE_ACTIVE, XGBE_SFP_CABLE_PASSIVE, }; enum xgbe_sfp_base { XGBE_SFP_BASE_UNKNOWN = 0, XGBE_SFP_BASE_1000_T, XGBE_SFP_BASE_1000_SX, XGBE_SFP_BASE_1000_LX, XGBE_SFP_BASE_1000_CX, XGBE_SFP_BASE_10000_SR, XGBE_SFP_BASE_10000_LR, XGBE_SFP_BASE_10000_LRM, XGBE_SFP_BASE_10000_ER, XGBE_SFP_BASE_10000_CR, }; enum xgbe_sfp_speed { XGBE_SFP_SPEED_UNKNOWN = 0, XGBE_SFP_SPEED_100_1000, XGBE_SFP_SPEED_1000, XGBE_SFP_SPEED_10000, }; /* SFP Serial ID Base ID values relative to an offset of 0 */ #define XGBE_SFP_BASE_ID 0 #define XGBE_SFP_ID_SFP 0x03 #define XGBE_SFP_BASE_EXT_ID 1 #define XGBE_SFP_EXT_ID_SFP 0x04 #define XGBE_SFP_BASE_CV 2 #define XGBE_SFP_BASE_CV_CP 0x21 #define XGBE_SFP_BASE_10GBE_CC 3 #define XGBE_SFP_BASE_10GBE_CC_SR BIT(4) #define XGBE_SFP_BASE_10GBE_CC_LR BIT(5) #define XGBE_SFP_BASE_10GBE_CC_LRM BIT(6) #define XGBE_SFP_BASE_10GBE_CC_ER BIT(7) #define XGBE_SFP_BASE_1GBE_CC 6 #define XGBE_SFP_BASE_1GBE_CC_SX BIT(0) #define XGBE_SFP_BASE_1GBE_CC_LX BIT(1) #define XGBE_SFP_BASE_1GBE_CC_CX BIT(2) #define XGBE_SFP_BASE_1GBE_CC_T BIT(3) #define XGBE_SFP_BASE_CABLE 8 #define XGBE_SFP_BASE_CABLE_PASSIVE BIT(2) #define XGBE_SFP_BASE_CABLE_ACTIVE BIT(3) #define XGBE_SFP_BASE_BR 12 #define XGBE_SFP_BASE_BR_1GBE_MIN 0x0a #define XGBE_SFP_BASE_BR_1GBE_MAX 0x0d #define XGBE_SFP_BASE_BR_10GBE_MIN 0x64 #define XGBE_SFP_BASE_BR_10GBE_MAX 0x68 #define XGBE_SFP_BASE_CU_CABLE_LEN 18 #define XGBE_SFP_BASE_VENDOR_NAME 20 #define XGBE_SFP_BASE_VENDOR_NAME_LEN 16 #define XGBE_SFP_BASE_VENDOR_PN 40 #define XGBE_SFP_BASE_VENDOR_PN_LEN 16 #define XGBE_SFP_BASE_VENDOR_REV 56 #define XGBE_SFP_BASE_VENDOR_REV_LEN 4 #define XGBE_SFP_BASE_CC 63 /* SFP Serial ID Extended ID values relative to an offset of 64 */ #define XGBE_SFP_BASE_VENDOR_SN 4 #define XGBE_SFP_BASE_VENDOR_SN_LEN 16 #define XGBE_SFP_EXTD_OPT1 1 #define XGBE_SFP_EXTD_OPT1_RX_LOS BIT(1) #define XGBE_SFP_EXTD_OPT1_TX_FAULT BIT(3) #define XGBE_SFP_EXTD_DIAG 28 #define XGBE_SFP_EXTD_DIAG_ADDR_CHANGE BIT(2) #define XGBE_SFP_EXTD_SFF_8472 30 #define XGBE_SFP_EXTD_CC 31 struct xgbe_sfp_eeprom { uint8_t base[64]; uint8_t extd[32]; uint8_t vendor[32]; }; #define XGBE_SFP_DIAGS_SUPPORTED(_x) \ ((_x)->extd[XGBE_SFP_EXTD_SFF_8472] && \ !((_x)->extd[XGBE_SFP_EXTD_DIAG] & XGBE_SFP_EXTD_DIAG_ADDR_CHANGE)) #define XGBE_SFP_EEPROM_BASE_LEN 256 #define XGBE_SFP_EEPROM_DIAG_LEN 256 #define XGBE_SFP_EEPROM_MAX (XGBE_SFP_EEPROM_BASE_LEN + \ XGBE_SFP_EEPROM_DIAG_LEN) #define XGBE_BEL_FUSE_VENDOR "BEL-FUSE " #define XGBE_BEL_FUSE_PARTNO "1GBT-SFP06 " struct xgbe_sfp_ascii { union { char vendor[XGBE_SFP_BASE_VENDOR_NAME_LEN + 1]; char partno[XGBE_SFP_BASE_VENDOR_PN_LEN + 1]; char rev[XGBE_SFP_BASE_VENDOR_REV_LEN + 1]; char serno[XGBE_SFP_BASE_VENDOR_SN_LEN + 1]; } u; }; /* MDIO PHY reset types */ enum xgbe_mdio_reset { XGBE_MDIO_RESET_NONE = 0, XGBE_MDIO_RESET_I2C_GPIO, XGBE_MDIO_RESET_INT_GPIO, XGBE_MDIO_RESET_MAX, }; /* Re-driver related definitions */ enum xgbe_phy_redrv_if { XGBE_PHY_REDRV_IF_MDIO = 0, XGBE_PHY_REDRV_IF_I2C, XGBE_PHY_REDRV_IF_MAX, }; enum xgbe_phy_redrv_model { XGBE_PHY_REDRV_MODEL_4223 = 0, XGBE_PHY_REDRV_MODEL_4227, XGBE_PHY_REDRV_MODEL_MAX, }; enum xgbe_phy_redrv_mode { XGBE_PHY_REDRV_MODE_CX = 5, XGBE_PHY_REDRV_MODE_SR = 9, }; #define XGBE_PHY_REDRV_MODE_REG 0x12b0 /* PHY related configuration information */ struct xgbe_phy_data { enum xgbe_port_mode port_mode; unsigned int port_id; unsigned int port_speeds; enum xgbe_conn_type conn_type; enum xgbe_mode cur_mode; enum xgbe_mode start_mode; unsigned int rrc_count; unsigned int mdio_addr; /* SFP Support */ enum xgbe_sfp_comm sfp_comm; unsigned int sfp_mux_address; unsigned int sfp_mux_channel; unsigned int sfp_gpio_address; unsigned int sfp_gpio_mask; unsigned int sfp_gpio_inputs; unsigned int sfp_gpio_rx_los; unsigned int sfp_gpio_tx_fault; unsigned int sfp_gpio_mod_absent; unsigned int sfp_gpio_rate_select; unsigned int sfp_rx_los; unsigned int sfp_tx_fault; unsigned int sfp_mod_absent; unsigned int sfp_changed; unsigned int sfp_phy_avail; unsigned int sfp_cable_len; enum xgbe_sfp_base sfp_base; enum xgbe_sfp_cable sfp_cable; enum xgbe_sfp_speed sfp_speed; struct xgbe_sfp_eeprom sfp_eeprom; /* External PHY support */ enum xgbe_mdio_mode phydev_mode; uint32_t phy_id; int phydev; enum xgbe_mdio_reset mdio_reset; unsigned int mdio_reset_addr; unsigned int mdio_reset_gpio; /* Re-driver support */ unsigned int redrv; unsigned int redrv_if; unsigned int redrv_addr; unsigned int redrv_lane; unsigned int redrv_model; /* KR AN support */ unsigned int phy_cdr_notrack; unsigned int phy_cdr_delay; uint8_t port_sfp_inputs; }; static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata); static int xgbe_phy_reset(struct xgbe_prv_data *pdata); static int xgbe_phy_i2c_xfer(struct xgbe_prv_data *pdata, struct xgbe_i2c_op *i2c_op) { return (pdata->i2c_if.i2c_xfer(pdata, i2c_op)); } static int xgbe_phy_redrv_write(struct xgbe_prv_data *pdata, unsigned int reg, unsigned int val) { struct xgbe_phy_data *phy_data = pdata->phy_data; struct xgbe_i2c_op i2c_op; __be16 *redrv_val; uint8_t redrv_data[5], csum; unsigned int i, retry; int ret; /* High byte of register contains read/write indicator */ redrv_data[0] = ((reg >> 8) & 0xff) << 1; redrv_data[1] = reg & 0xff; redrv_val = (__be16 *)&redrv_data[2]; *redrv_val = cpu_to_be16(val); /* Calculate 1 byte checksum */ csum = 0; for (i = 0; i < 4; i++) { csum += redrv_data[i]; if (redrv_data[i] > csum) csum++; } redrv_data[4] = ~csum; retry = 1; again1: i2c_op.cmd = XGBE_I2C_CMD_WRITE; i2c_op.target = phy_data->redrv_addr; i2c_op.len = sizeof(redrv_data); i2c_op.buf = redrv_data; ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); if (ret) { if ((ret == -EAGAIN) && retry--) goto again1; return (ret); } retry = 1; again2: i2c_op.cmd = XGBE_I2C_CMD_READ; i2c_op.target = phy_data->redrv_addr; i2c_op.len = 1; i2c_op.buf = redrv_data; ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); if (ret) { if ((ret == -EAGAIN) && retry--) goto again2; return (ret); } if (redrv_data[0] != 0xff) { axgbe_error("Redriver write checksum error\n"); ret = -EIO; } return (ret); } static int xgbe_phy_i2c_write(struct xgbe_prv_data *pdata, unsigned int target, void *val, unsigned int val_len) { struct xgbe_i2c_op i2c_op; int retry, ret; retry = 1; again: /* Write the specfied register */ i2c_op.cmd = XGBE_I2C_CMD_WRITE; i2c_op.target = target; i2c_op.len = val_len; i2c_op.buf = val; ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); if ((ret == -EAGAIN) && retry--) goto again; return (ret); } static int xgbe_phy_i2c_read(struct xgbe_prv_data *pdata, unsigned int target, void *reg, unsigned int reg_len, void *val, unsigned int val_len) { struct xgbe_i2c_op i2c_op; int retry, ret; axgbe_printf(3, "%s: target 0x%x reg_len %d val_len %d\n", __func__, target, reg_len, val_len); retry = 1; again1: /* Set the specified register to read */ i2c_op.cmd = XGBE_I2C_CMD_WRITE; i2c_op.target = target; i2c_op.len = reg_len; i2c_op.buf = reg; ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); axgbe_printf(3, "%s: ret1 %d retry %d\n", __func__, ret, retry); if (ret) { if ((ret == -EAGAIN) && retry--) goto again1; return (ret); } retry = 1; again2: /* Read the specfied register */ i2c_op.cmd = XGBE_I2C_CMD_READ; i2c_op.target = target; i2c_op.len = val_len; i2c_op.buf = val; ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); axgbe_printf(3, "%s: ret2 %d retry %d\n", __func__, ret, retry); if ((ret == -EAGAIN) && retry--) goto again2; return (ret); } static int xgbe_phy_sfp_put_mux(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; struct xgbe_i2c_op i2c_op; uint8_t mux_channel; if (phy_data->sfp_comm == XGBE_SFP_COMM_DIRECT) return (0); /* Select no mux channels */ mux_channel = 0; i2c_op.cmd = XGBE_I2C_CMD_WRITE; i2c_op.target = phy_data->sfp_mux_address; i2c_op.len = sizeof(mux_channel); i2c_op.buf = &mux_channel; return (xgbe_phy_i2c_xfer(pdata, &i2c_op)); } static int xgbe_phy_sfp_get_mux(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; struct xgbe_i2c_op i2c_op; uint8_t mux_channel; if (phy_data->sfp_comm == XGBE_SFP_COMM_DIRECT) return (0); /* Select desired mux channel */ mux_channel = 1 << phy_data->sfp_mux_channel; i2c_op.cmd = XGBE_I2C_CMD_WRITE; i2c_op.target = phy_data->sfp_mux_address; i2c_op.len = sizeof(mux_channel); i2c_op.buf = &mux_channel; return (xgbe_phy_i2c_xfer(pdata, &i2c_op)); } static void xgbe_phy_put_comm_ownership(struct xgbe_prv_data *pdata) { mtx_unlock(&xgbe_phy_comm_lock); } static int xgbe_phy_get_comm_ownership(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; unsigned long timeout; unsigned int mutex_id; /* The I2C and MDIO/GPIO bus is multiplexed between multiple devices, * the driver needs to take the software mutex and then the hardware * mutexes before being able to use the busses. */ mtx_lock(&xgbe_phy_comm_lock); /* Clear the mutexes */ XP_IOWRITE(pdata, XP_I2C_MUTEX, XGBE_MUTEX_RELEASE); XP_IOWRITE(pdata, XP_MDIO_MUTEX, XGBE_MUTEX_RELEASE); /* Mutex formats are the same for I2C and MDIO/GPIO */ mutex_id = 0; XP_SET_BITS(mutex_id, XP_I2C_MUTEX, ID, phy_data->port_id); XP_SET_BITS(mutex_id, XP_I2C_MUTEX, ACTIVE, 1); timeout = ticks + (5 * hz); while (ticks < timeout) { /* Must be all zeroes in order to obtain the mutex */ if (XP_IOREAD(pdata, XP_I2C_MUTEX) || XP_IOREAD(pdata, XP_MDIO_MUTEX)) { DELAY(200); continue; } /* Obtain the mutex */ XP_IOWRITE(pdata, XP_I2C_MUTEX, mutex_id); XP_IOWRITE(pdata, XP_MDIO_MUTEX, mutex_id); return (0); } mtx_unlock(&xgbe_phy_comm_lock); axgbe_error("unable to obtain hardware mutexes\n"); return (-ETIMEDOUT); } static int xgbe_phy_mdio_mii_write(struct xgbe_prv_data *pdata, int addr, int reg, uint16_t val) { struct xgbe_phy_data *phy_data = pdata->phy_data; if (reg & MII_ADDR_C45) { if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL45) return (-ENOTSUP); } else { if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL22) return (-ENOTSUP); } return (pdata->hw_if.write_ext_mii_regs(pdata, addr, reg, val)); } static int xgbe_phy_i2c_mii_write(struct xgbe_prv_data *pdata, int reg, uint16_t val) { __be16 *mii_val; uint8_t mii_data[3]; int ret; ret = xgbe_phy_sfp_get_mux(pdata); if (ret) return (ret); mii_data[0] = reg & 0xff; mii_val = (__be16 *)&mii_data[1]; *mii_val = cpu_to_be16(val); ret = xgbe_phy_i2c_write(pdata, XGBE_SFP_PHY_ADDRESS, mii_data, sizeof(mii_data)); xgbe_phy_sfp_put_mux(pdata); return (ret); } int xgbe_phy_mii_write(struct xgbe_prv_data *pdata, int addr, int reg, uint16_t val) { struct xgbe_phy_data *phy_data = pdata->phy_data; int ret; axgbe_printf(3, "%s: addr %d reg %d val %#x\n", __func__, addr, reg, val); ret = xgbe_phy_get_comm_ownership(pdata); if (ret) return (ret); if (phy_data->conn_type == XGBE_CONN_TYPE_SFP) ret = xgbe_phy_i2c_mii_write(pdata, reg, val); else if (phy_data->conn_type & XGBE_CONN_TYPE_MDIO) ret = xgbe_phy_mdio_mii_write(pdata, addr, reg, val); else ret = -ENOTSUP; xgbe_phy_put_comm_ownership(pdata); return (ret); } static int xgbe_phy_mdio_mii_read(struct xgbe_prv_data *pdata, int addr, int reg) { struct xgbe_phy_data *phy_data = pdata->phy_data; if (reg & MII_ADDR_C45) { if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL45) return (-ENOTSUP); } else { if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL22) return (-ENOTSUP); } return (pdata->hw_if.read_ext_mii_regs(pdata, addr, reg)); } static int xgbe_phy_i2c_mii_read(struct xgbe_prv_data *pdata, int reg) { __be16 mii_val; uint8_t mii_reg; int ret; ret = xgbe_phy_sfp_get_mux(pdata); if (ret) return (ret); mii_reg = reg; ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_PHY_ADDRESS, &mii_reg, sizeof(mii_reg), &mii_val, sizeof(mii_val)); if (!ret) ret = be16_to_cpu(mii_val); xgbe_phy_sfp_put_mux(pdata); return (ret); } int xgbe_phy_mii_read(struct xgbe_prv_data *pdata, int addr, int reg) { struct xgbe_phy_data *phy_data = pdata->phy_data; int ret; axgbe_printf(3, "%s: addr %d reg %d\n", __func__, addr, reg); ret = xgbe_phy_get_comm_ownership(pdata); if (ret) return (ret); if (phy_data->conn_type == XGBE_CONN_TYPE_SFP) ret = xgbe_phy_i2c_mii_read(pdata, reg); else if (phy_data->conn_type & XGBE_CONN_TYPE_MDIO) ret = xgbe_phy_mdio_mii_read(pdata, addr, reg); else ret = -ENOTSUP; xgbe_phy_put_comm_ownership(pdata); return (ret); } static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; if (!phy_data->sfp_mod_absent && !phy_data->sfp_changed) return; XGBE_ZERO_SUP(&pdata->phy); if (phy_data->sfp_mod_absent) { pdata->phy.speed = SPEED_UNKNOWN; pdata->phy.duplex = DUPLEX_UNKNOWN; pdata->phy.autoneg = AUTONEG_ENABLE; pdata->phy.pause_autoneg = AUTONEG_ENABLE; XGBE_SET_SUP(&pdata->phy, Autoneg); XGBE_SET_SUP(&pdata->phy, Pause); XGBE_SET_SUP(&pdata->phy, Asym_Pause); XGBE_SET_SUP(&pdata->phy, TP); XGBE_SET_SUP(&pdata->phy, FIBRE); XGBE_LM_COPY(&pdata->phy, advertising, &pdata->phy, supported); return; } switch (phy_data->sfp_base) { case XGBE_SFP_BASE_1000_T: case XGBE_SFP_BASE_1000_SX: case XGBE_SFP_BASE_1000_LX: case XGBE_SFP_BASE_1000_CX: pdata->phy.speed = SPEED_UNKNOWN; pdata->phy.duplex = DUPLEX_UNKNOWN; pdata->phy.autoneg = AUTONEG_ENABLE; pdata->phy.pause_autoneg = AUTONEG_ENABLE; XGBE_SET_SUP(&pdata->phy, Autoneg); XGBE_SET_SUP(&pdata->phy, Pause); XGBE_SET_SUP(&pdata->phy, Asym_Pause); if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T) { if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) XGBE_SET_SUP(&pdata->phy, 100baseT_Full); if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) XGBE_SET_SUP(&pdata->phy, 1000baseT_Full); } else { if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) XGBE_SET_SUP(&pdata->phy, 1000baseX_Full); } break; case XGBE_SFP_BASE_10000_SR: case XGBE_SFP_BASE_10000_LR: case XGBE_SFP_BASE_10000_LRM: case XGBE_SFP_BASE_10000_ER: case XGBE_SFP_BASE_10000_CR: pdata->phy.speed = SPEED_10000; pdata->phy.duplex = DUPLEX_FULL; pdata->phy.autoneg = AUTONEG_DISABLE; pdata->phy.pause_autoneg = AUTONEG_DISABLE; if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) { switch (phy_data->sfp_base) { case XGBE_SFP_BASE_10000_SR: XGBE_SET_SUP(&pdata->phy, 10000baseSR_Full); break; case XGBE_SFP_BASE_10000_LR: XGBE_SET_SUP(&pdata->phy, 10000baseLR_Full); break; case XGBE_SFP_BASE_10000_LRM: XGBE_SET_SUP(&pdata->phy, 10000baseLRM_Full); break; case XGBE_SFP_BASE_10000_ER: XGBE_SET_SUP(&pdata->phy, 10000baseER_Full); break; case XGBE_SFP_BASE_10000_CR: XGBE_SET_SUP(&pdata->phy, 10000baseCR_Full); break; default: break; } } break; default: pdata->phy.speed = SPEED_UNKNOWN; pdata->phy.duplex = DUPLEX_UNKNOWN; pdata->phy.autoneg = AUTONEG_DISABLE; pdata->phy.pause_autoneg = AUTONEG_DISABLE; break; } switch (phy_data->sfp_base) { case XGBE_SFP_BASE_1000_T: case XGBE_SFP_BASE_1000_CX: case XGBE_SFP_BASE_10000_CR: XGBE_SET_SUP(&pdata->phy, TP); break; default: XGBE_SET_SUP(&pdata->phy, FIBRE); break; } XGBE_LM_COPY(&pdata->phy, advertising, &pdata->phy, supported); axgbe_printf(1, "%s: link speed %d spf_base 0x%x pause_autoneg %d " "advert 0x%x support 0x%x\n", __func__, pdata->phy.speed, phy_data->sfp_base, pdata->phy.pause_autoneg, pdata->phy.advertising, pdata->phy.supported); } static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom, enum xgbe_sfp_speed sfp_speed) { uint8_t *sfp_base, min, max; sfp_base = sfp_eeprom->base; switch (sfp_speed) { case XGBE_SFP_SPEED_1000: min = XGBE_SFP_BASE_BR_1GBE_MIN; max = XGBE_SFP_BASE_BR_1GBE_MAX; break; case XGBE_SFP_SPEED_10000: min = XGBE_SFP_BASE_BR_10GBE_MIN; max = XGBE_SFP_BASE_BR_10GBE_MAX; break; default: return (false); } return ((sfp_base[XGBE_SFP_BASE_BR] >= min) && (sfp_base[XGBE_SFP_BASE_BR] <= max)); } static void xgbe_phy_free_phy_device(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; if (phy_data->phydev) phy_data->phydev = 0; } static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; unsigned int phy_id = phy_data->phy_id; if (phy_data->port_mode != XGBE_PORT_MODE_SFP) return (false); if ((phy_id & 0xfffffff0) != 0x01ff0cc0) return (false); /* Enable Base-T AN */ xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x16, 0x0001); xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x00, 0x9140); xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x16, 0x0000); /* Enable SGMII at 100Base-T/1000Base-T Full Duplex */ xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x1b, 0x9084); xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x09, 0x0e00); xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x00, 0x8140); xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x04, 0x0d01); xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x00, 0x9140); axgbe_printf(3, "Finisar PHY quirk in place\n"); return (true); } static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom; unsigned int phy_id = phy_data->phy_id; int reg; if (phy_data->port_mode != XGBE_PORT_MODE_SFP) return (false); if (memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME], XGBE_BEL_FUSE_VENDOR, XGBE_SFP_BASE_VENDOR_NAME_LEN)) return (false); /* For Bel-Fuse, use the extra AN flag */ pdata->an_again = 1; if (memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN], XGBE_BEL_FUSE_PARTNO, XGBE_SFP_BASE_VENDOR_PN_LEN)) return (false); if ((phy_id & 0xfffffff0) != 0x03625d10) return (false); /* Disable RGMII mode */ xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x18, 0x7007); reg = xgbe_phy_mii_read(pdata, phy_data->mdio_addr, 0x18); xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x18, reg & ~0x0080); /* Enable fiber register bank */ xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x1c, 0x7c00); reg = xgbe_phy_mii_read(pdata, phy_data->mdio_addr, 0x1c); reg &= 0x03ff; reg &= ~0x0001; xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x1c, 0x8000 | 0x7c00 | reg | 0x0001); /* Power down SerDes */ reg = xgbe_phy_mii_read(pdata, phy_data->mdio_addr, 0x00); xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x00, reg | 0x00800); /* Configure SGMII-to-Copper mode */ xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x1c, 0x7c00); reg = xgbe_phy_mii_read(pdata, phy_data->mdio_addr, 0x1c); reg &= 0x03ff; reg &= ~0x0006; xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x1c, 0x8000 | 0x7c00 | reg | 0x0004); /* Power up SerDes */ reg = xgbe_phy_mii_read(pdata, phy_data->mdio_addr, 0x00); xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x00, reg & ~0x00800); /* Enable copper register bank */ xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x1c, 0x7c00); reg = xgbe_phy_mii_read(pdata, phy_data->mdio_addr, 0x1c); reg &= 0x03ff; reg &= ~0x0001; xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x1c, 0x8000 | 0x7c00 | reg); /* Power up SerDes */ reg = xgbe_phy_mii_read(pdata, phy_data->mdio_addr, 0x00); xgbe_phy_mii_write(pdata, phy_data->mdio_addr, 0x00, reg & ~0x00800); axgbe_printf(3, "BelFuse PHY quirk in place\n"); return (true); } static void xgbe_phy_external_phy_quirks(struct xgbe_prv_data *pdata) { if (xgbe_phy_belfuse_phy_quirks(pdata)) return; if (xgbe_phy_finisar_phy_quirks(pdata)) return; } static int xgbe_get_phy_id(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; uint32_t oui, model, phy_id1, phy_id2; int phy_reg; phy_reg = xgbe_phy_mii_read(pdata, phy_data->mdio_addr, 0x02); if (phy_reg < 0) return (-EIO); phy_id1 = (phy_reg & 0xffff); phy_data->phy_id = (phy_reg & 0xffff) << 16; phy_reg = xgbe_phy_mii_read(pdata, phy_data->mdio_addr, 0x03); if (phy_reg < 0) return (-EIO); phy_id2 = (phy_reg & 0xffff); phy_data->phy_id |= (phy_reg & 0xffff); oui = MII_OUI(phy_id1, phy_id2); model = MII_MODEL(phy_id2); axgbe_printf(2, "%s: phy_id1: 0x%x phy_id2: 0x%x oui: %#x model %#x\n", __func__, phy_id1, phy_id2, oui, model); return (0); } static int xgbe_phy_start_aneg(struct xgbe_prv_data *pdata) { uint16_t ctl = 0; int changed = 0; int ret; if (AUTONEG_ENABLE != pdata->phy.autoneg) { if (SPEED_1000 == pdata->phy.speed) ctl |= BMCR_SPEED1; else if (SPEED_100 == pdata->phy.speed) ctl |= BMCR_SPEED100; if (DUPLEX_FULL == pdata->phy.duplex) ctl |= BMCR_FDX; ret = xgbe_phy_mii_read(pdata, pdata->mdio_addr, MII_BMCR); if (ret) return (ret); ret = xgbe_phy_mii_write(pdata, pdata->mdio_addr, MII_BMCR, (ret & ~(~(BMCR_LOOP | BMCR_ISO | BMCR_PDOWN))) | ctl); } ctl = xgbe_phy_mii_read(pdata, pdata->mdio_addr, MII_BMCR); if (ctl < 0) return (ctl); if (!(ctl & BMCR_AUTOEN) || (ctl & BMCR_ISO)) changed = 1; if (changed > 0) { ret = xgbe_phy_mii_read(pdata, pdata->mdio_addr, MII_BMCR); if (ret) return (ret); ret = xgbe_phy_mii_write(pdata, pdata->mdio_addr, MII_BMCR, (ret & ~(BMCR_ISO)) | (BMCR_AUTOEN | BMCR_STARTNEG)); } return (0); } static int xgbe_phy_find_phy_device(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; int ret; axgbe_printf(2, "%s: phydev %d phydev_mode %d sfp_phy_avail %d phy_id " "0x%08x\n", __func__, phy_data->phydev, phy_data->phydev_mode, phy_data->sfp_phy_avail, phy_data->phy_id); /* If we already have a PHY, just return */ if (phy_data->phydev) { axgbe_printf(3, "%s: phy present already\n", __func__); return (0); } /* Clear the extra AN flag */ pdata->an_again = 0; /* Check for the use of an external PHY */ if (phy_data->phydev_mode == XGBE_MDIO_MODE_NONE) { axgbe_printf(3, "%s: phydev_mode %d\n", __func__, phy_data->phydev_mode); return (0); } /* For SFP, only use an external PHY if available */ if ((phy_data->port_mode == XGBE_PORT_MODE_SFP) && !phy_data->sfp_phy_avail) { axgbe_printf(3, "%s: port_mode %d avail %d\n", __func__, phy_data->port_mode, phy_data->sfp_phy_avail); return (0); } /* Set the proper MDIO mode for the PHY */ ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->mdio_addr, phy_data->phydev_mode); if (ret) { axgbe_error("mdio port/clause not compatible (%u/%u) ret %d\n", phy_data->mdio_addr, phy_data->phydev_mode, ret); return (ret); } ret = xgbe_get_phy_id(pdata); if (ret) return (ret); axgbe_printf(2, "Get phy_id 0x%08x\n", phy_data->phy_id); phy_data->phydev = 1; xgbe_phy_external_phy_quirks(pdata); xgbe_phy_start_aneg(pdata); return (0); } static void xgbe_phy_sfp_external_phy(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; int ret; axgbe_printf(3, "%s: sfp_changed: 0x%x\n", __func__, phy_data->sfp_changed); if (!phy_data->sfp_changed) return; phy_data->sfp_phy_avail = 0; if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T) return; /* Check access to the PHY by reading CTRL1 */ ret = xgbe_phy_i2c_mii_read(pdata, MII_BMCR); if (ret < 0) { axgbe_error("%s: ext phy fail %d\n", __func__, ret); return; } /* Successfully accessed the PHY */ phy_data->sfp_phy_avail = 1; axgbe_printf(3, "Successfully accessed External PHY\n"); } static bool xgbe_phy_check_sfp_rx_los(struct xgbe_phy_data *phy_data) { uint8_t *sfp_extd = phy_data->sfp_eeprom.extd; if (!(sfp_extd[XGBE_SFP_EXTD_OPT1] & XGBE_SFP_EXTD_OPT1_RX_LOS)) return (false); if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS) return (false); if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_rx_los)) return (true); return (false); } static bool xgbe_phy_check_sfp_tx_fault(struct xgbe_phy_data *phy_data) { uint8_t *sfp_extd = phy_data->sfp_eeprom.extd; if (!(sfp_extd[XGBE_SFP_EXTD_OPT1] & XGBE_SFP_EXTD_OPT1_TX_FAULT)) return (false); if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT) return (false); if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_tx_fault)) return (true); return (false); } static bool xgbe_phy_check_sfp_mod_absent(struct xgbe_phy_data *phy_data) { if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT) return (false); if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_mod_absent)) return (true); return (false); } static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom; uint8_t *sfp_base; sfp_base = sfp_eeprom->base; if (sfp_base[XGBE_SFP_BASE_ID] != XGBE_SFP_ID_SFP) { axgbe_error("base id %d\n", sfp_base[XGBE_SFP_BASE_ID]); return; } if (sfp_base[XGBE_SFP_BASE_EXT_ID] != XGBE_SFP_EXT_ID_SFP) { axgbe_error("base id %d\n", sfp_base[XGBE_SFP_BASE_EXT_ID]); return; } /* Update transceiver signals (eeprom extd/options) */ phy_data->sfp_tx_fault = xgbe_phy_check_sfp_tx_fault(phy_data); phy_data->sfp_rx_los = xgbe_phy_check_sfp_rx_los(phy_data); /* Assume ACTIVE cable unless told it is PASSIVE */ if (sfp_base[XGBE_SFP_BASE_CABLE] & XGBE_SFP_BASE_CABLE_PASSIVE) { phy_data->sfp_cable = XGBE_SFP_CABLE_PASSIVE; phy_data->sfp_cable_len = sfp_base[XGBE_SFP_BASE_CU_CABLE_LEN]; } else phy_data->sfp_cable = XGBE_SFP_CABLE_ACTIVE; /* * Determine the type of SFP. Certain 10G SFP+ modules read as * 1000BASE-CX. To prevent 10G DAC cables to be recognized as * 1G, we first check if it is a DAC and the bitrate is 10G. */ if (((sfp_base[XGBE_SFP_BASE_CV] & XGBE_SFP_BASE_CV_CP) || (phy_data->sfp_cable == XGBE_SFP_CABLE_PASSIVE)) && xgbe_phy_sfp_bit_rate(sfp_eeprom, XGBE_SFP_SPEED_10000)) phy_data->sfp_base = XGBE_SFP_BASE_10000_CR; else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_SR) phy_data->sfp_base = XGBE_SFP_BASE_10000_SR; else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LR) phy_data->sfp_base = XGBE_SFP_BASE_10000_LR; else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LRM) phy_data->sfp_base = XGBE_SFP_BASE_10000_LRM; else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_ER) phy_data->sfp_base = XGBE_SFP_BASE_10000_ER; else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_SX) phy_data->sfp_base = XGBE_SFP_BASE_1000_SX; else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_LX) phy_data->sfp_base = XGBE_SFP_BASE_1000_LX; else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_CX) phy_data->sfp_base = XGBE_SFP_BASE_1000_CX; else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_T) phy_data->sfp_base = XGBE_SFP_BASE_1000_T; switch (phy_data->sfp_base) { case XGBE_SFP_BASE_1000_T: phy_data->sfp_speed = XGBE_SFP_SPEED_100_1000; break; case XGBE_SFP_BASE_1000_SX: case XGBE_SFP_BASE_1000_LX: case XGBE_SFP_BASE_1000_CX: phy_data->sfp_speed = XGBE_SFP_SPEED_1000; break; case XGBE_SFP_BASE_10000_SR: case XGBE_SFP_BASE_10000_LR: case XGBE_SFP_BASE_10000_LRM: case XGBE_SFP_BASE_10000_ER: case XGBE_SFP_BASE_10000_CR: phy_data->sfp_speed = XGBE_SFP_SPEED_10000; break; default: break; } axgbe_printf(3, "%s: sfp_base: 0x%x sfp_speed: 0x%x sfp_cable: 0x%x " "rx_los 0x%x tx_fault 0x%x\n", __func__, phy_data->sfp_base, phy_data->sfp_speed, phy_data->sfp_cable, phy_data->sfp_rx_los, phy_data->sfp_tx_fault); } static void xgbe_phy_sfp_eeprom_info(struct xgbe_prv_data *pdata, struct xgbe_sfp_eeprom *sfp_eeprom) { struct xgbe_sfp_ascii sfp_ascii; char *sfp_data = (char *)&sfp_ascii; axgbe_printf(3, "SFP detected:\n"); memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME], XGBE_SFP_BASE_VENDOR_NAME_LEN); sfp_data[XGBE_SFP_BASE_VENDOR_NAME_LEN] = '\0'; axgbe_printf(3, " vendor: %s\n", sfp_data); memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN], XGBE_SFP_BASE_VENDOR_PN_LEN); sfp_data[XGBE_SFP_BASE_VENDOR_PN_LEN] = '\0'; axgbe_printf(3, " part number: %s\n", sfp_data); memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_REV], XGBE_SFP_BASE_VENDOR_REV_LEN); sfp_data[XGBE_SFP_BASE_VENDOR_REV_LEN] = '\0'; axgbe_printf(3, " revision level: %s\n", sfp_data); memcpy(sfp_data, &sfp_eeprom->extd[XGBE_SFP_BASE_VENDOR_SN], XGBE_SFP_BASE_VENDOR_SN_LEN); sfp_data[XGBE_SFP_BASE_VENDOR_SN_LEN] = '\0'; axgbe_printf(3, " serial number: %s\n", sfp_data); } static bool xgbe_phy_sfp_verify_eeprom(uint8_t cc_in, uint8_t *buf, unsigned int len) { uint8_t cc; for (cc = 0; len; buf++, len--) cc += *buf; return ((cc == cc_in) ? true : false); } static void dump_sfp_eeprom(struct xgbe_prv_data *pdata, uint8_t *sfp_base) { axgbe_printf(3, "sfp_base[XGBE_SFP_BASE_ID] : 0x%04x\n", sfp_base[XGBE_SFP_BASE_ID]); axgbe_printf(3, "sfp_base[XGBE_SFP_BASE_EXT_ID] : 0x%04x\n", sfp_base[XGBE_SFP_BASE_EXT_ID]); axgbe_printf(3, "sfp_base[XGBE_SFP_BASE_CABLE] : 0x%04x\n", sfp_base[XGBE_SFP_BASE_CABLE]); } static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; struct xgbe_sfp_eeprom sfp_eeprom, *eeprom; uint8_t eeprom_addr, *base; int ret; ret = xgbe_phy_sfp_get_mux(pdata); if (ret) { axgbe_error("I2C error setting SFP MUX\n"); return (ret); } /* Read the SFP serial ID eeprom */ eeprom_addr = 0; ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_SERIAL_ID_ADDRESS, &eeprom_addr, sizeof(eeprom_addr), &sfp_eeprom, sizeof(sfp_eeprom)); eeprom = &sfp_eeprom; base = eeprom->base; dump_sfp_eeprom(pdata, base); if (ret) { axgbe_error("I2C error reading SFP EEPROM\n"); goto put; } /* Validate the contents read */ if (!xgbe_phy_sfp_verify_eeprom(sfp_eeprom.base[XGBE_SFP_BASE_CC], sfp_eeprom.base, sizeof(sfp_eeprom.base) - 1)) { axgbe_error("verify eeprom base failed\n"); ret = -EINVAL; goto put; } if (!xgbe_phy_sfp_verify_eeprom(sfp_eeprom.extd[XGBE_SFP_EXTD_CC], sfp_eeprom.extd, sizeof(sfp_eeprom.extd) - 1)) { axgbe_error("verify eeprom extd failed\n"); ret = -EINVAL; goto put; } /* Check for an added or changed SFP */ if (memcmp(&phy_data->sfp_eeprom, &sfp_eeprom, sizeof(sfp_eeprom))) { phy_data->sfp_changed = 1; xgbe_phy_sfp_eeprom_info(pdata, &sfp_eeprom); memcpy(&phy_data->sfp_eeprom, &sfp_eeprom, sizeof(sfp_eeprom)); xgbe_phy_free_phy_device(pdata); } else phy_data->sfp_changed = 0; put: xgbe_phy_sfp_put_mux(pdata); return (ret); } static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; uint8_t gpio_reg, gpio_ports[2]; int ret, prev_sfp_inputs = phy_data->port_sfp_inputs; int shift = GPIO_MASK_WIDTH * (3 - phy_data->port_id); /* Read the input port registers */ axgbe_printf(3, "%s: befor sfp_mod:%d sfp_gpio_address:0x%x\n", __func__, phy_data->sfp_mod_absent, phy_data->sfp_gpio_address); gpio_reg = 0; ret = xgbe_phy_i2c_read(pdata, phy_data->sfp_gpio_address, &gpio_reg, sizeof(gpio_reg), gpio_ports, sizeof(gpio_ports)); if (ret) { axgbe_error("%s: I2C error reading SFP GPIO addr:0x%x\n", __func__, phy_data->sfp_gpio_address); return; } phy_data->sfp_gpio_inputs = (gpio_ports[1] << 8) | gpio_ports[0]; phy_data->port_sfp_inputs = (phy_data->sfp_gpio_inputs >> shift) & 0x0F; if (prev_sfp_inputs != phy_data->port_sfp_inputs) axgbe_printf(0, "%s: port_sfp_inputs: 0x%0x\n", __func__, phy_data->port_sfp_inputs); phy_data->sfp_mod_absent = xgbe_phy_check_sfp_mod_absent(phy_data); axgbe_printf(3, "%s: after sfp_mod:%d sfp_gpio_inputs:0x%x\n", __func__, phy_data->sfp_mod_absent, phy_data->sfp_gpio_inputs); } static void xgbe_phy_sfp_mod_absent(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; xgbe_phy_free_phy_device(pdata); phy_data->sfp_mod_absent = 1; phy_data->sfp_phy_avail = 0; memset(&phy_data->sfp_eeprom, 0, sizeof(phy_data->sfp_eeprom)); } static void xgbe_phy_sfp_reset(struct xgbe_phy_data *phy_data) { phy_data->sfp_rx_los = 0; phy_data->sfp_tx_fault = 0; phy_data->sfp_mod_absent = 1; phy_data->sfp_base = XGBE_SFP_BASE_UNKNOWN; phy_data->sfp_cable = XGBE_SFP_CABLE_UNKNOWN; phy_data->sfp_speed = XGBE_SFP_SPEED_UNKNOWN; } static void xgbe_phy_sfp_detect(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; int ret, prev_sfp_state = phy_data->sfp_mod_absent; /* Reset the SFP signals and info */ xgbe_phy_sfp_reset(phy_data); ret = xgbe_phy_get_comm_ownership(pdata); if (ret) return; /* Read the SFP signals and check for module presence */ xgbe_phy_sfp_signals(pdata); if (phy_data->sfp_mod_absent) { if (prev_sfp_state != phy_data->sfp_mod_absent) axgbe_error("%s: mod absent\n", __func__); xgbe_phy_sfp_mod_absent(pdata); goto put; } ret = xgbe_phy_sfp_read_eeprom(pdata); if (ret) { /* Treat any error as if there isn't an SFP plugged in */ axgbe_error("%s: eeprom read failed\n", __func__); xgbe_phy_sfp_reset(phy_data); xgbe_phy_sfp_mod_absent(pdata); goto put; } xgbe_phy_sfp_parse_eeprom(pdata); xgbe_phy_sfp_external_phy(pdata); put: xgbe_phy_sfp_phy_settings(pdata); axgbe_printf(3, "%s: phy speed: 0x%x duplex: 0x%x autoneg: 0x%x " "pause_autoneg: 0x%x\n", __func__, pdata->phy.speed, pdata->phy.duplex, pdata->phy.autoneg, pdata->phy.pause_autoneg); xgbe_phy_put_comm_ownership(pdata); } static int xgbe_phy_module_eeprom(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; uint8_t eeprom_addr, eeprom_data[XGBE_SFP_EEPROM_MAX]; struct xgbe_sfp_eeprom *sfp_eeprom; int ret; if (phy_data->port_mode != XGBE_PORT_MODE_SFP) { ret = -ENXIO; goto done; } if (phy_data->sfp_mod_absent) { ret = -EIO; goto done; } ret = xgbe_phy_get_comm_ownership(pdata); if (ret) { ret = -EIO; goto done; } ret = xgbe_phy_sfp_get_mux(pdata); if (ret) { axgbe_error("I2C error setting SFP MUX\n"); ret = -EIO; goto put_own; } /* Read the SFP serial ID eeprom */ eeprom_addr = 0; ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_SERIAL_ID_ADDRESS, &eeprom_addr, sizeof(eeprom_addr), eeprom_data, XGBE_SFP_EEPROM_BASE_LEN); if (ret) { axgbe_error("I2C error reading SFP EEPROM\n"); ret = -EIO; goto put_mux; } sfp_eeprom = (struct xgbe_sfp_eeprom *)eeprom_data; if (XGBE_SFP_DIAGS_SUPPORTED(sfp_eeprom)) { /* Read the SFP diagnostic eeprom */ eeprom_addr = 0; ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_DIAG_INFO_ADDRESS, &eeprom_addr, sizeof(eeprom_addr), eeprom_data + XGBE_SFP_EEPROM_BASE_LEN, XGBE_SFP_EEPROM_DIAG_LEN); if (ret) { axgbe_error("I2C error reading SFP DIAGS\n"); ret = -EIO; goto put_mux; } } put_mux: xgbe_phy_sfp_put_mux(pdata); put_own: xgbe_phy_put_comm_ownership(pdata); done: return (ret); } static int xgbe_phy_module_info(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; if (phy_data->port_mode != XGBE_PORT_MODE_SFP) return (-ENXIO); if (phy_data->sfp_mod_absent) return (-EIO); return (0); } static void xgbe_phy_phydev_flowctrl(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; pdata->phy.tx_pause = 0; pdata->phy.rx_pause = 0; if (!phy_data->phydev) return; if (pdata->phy.pause) XGBE_SET_LP_ADV(&pdata->phy, Pause); if (pdata->phy.asym_pause) XGBE_SET_LP_ADV(&pdata->phy, Asym_Pause); axgbe_printf(1, "%s: pause tx/rx %d/%d\n", __func__, pdata->phy.tx_pause, pdata->phy.rx_pause); } static enum xgbe_mode xgbe_phy_an37_sgmii_outcome(struct xgbe_prv_data *pdata) { enum xgbe_mode mode; XGBE_SET_LP_ADV(&pdata->phy, Autoneg); XGBE_SET_LP_ADV(&pdata->phy, TP); axgbe_printf(1, "%s: pause_autoneg %d\n", __func__, pdata->phy.pause_autoneg); /* Use external PHY to determine flow control */ if (pdata->phy.pause_autoneg) xgbe_phy_phydev_flowctrl(pdata); switch (pdata->an_status & XGBE_SGMII_AN_LINK_SPEED) { case XGBE_SGMII_AN_LINK_SPEED_100: if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) { XGBE_SET_LP_ADV(&pdata->phy, 100baseT_Full); mode = XGBE_MODE_SGMII_100; } else { /* Half-duplex not supported */ XGBE_SET_LP_ADV(&pdata->phy, 100baseT_Half); mode = XGBE_MODE_UNKNOWN; } break; case XGBE_SGMII_AN_LINK_SPEED_1000: if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) { XGBE_SET_LP_ADV(&pdata->phy, 1000baseT_Full); mode = XGBE_MODE_SGMII_1000; } else { /* Half-duplex not supported */ XGBE_SET_LP_ADV(&pdata->phy, 1000baseT_Half); mode = XGBE_MODE_UNKNOWN; } break; default: mode = XGBE_MODE_UNKNOWN; } return (mode); } static enum xgbe_mode xgbe_phy_an37_outcome(struct xgbe_prv_data *pdata) { enum xgbe_mode mode; unsigned int ad_reg, lp_reg; XGBE_SET_LP_ADV(&pdata->phy, Autoneg); XGBE_SET_LP_ADV(&pdata->phy, FIBRE); /* Compare Advertisement and Link Partner register */ ad_reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE); lp_reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_LP_ABILITY); if (lp_reg & 0x100) XGBE_SET_LP_ADV(&pdata->phy, Pause); if (lp_reg & 0x80) XGBE_SET_LP_ADV(&pdata->phy, Asym_Pause); axgbe_printf(1, "%s: pause_autoneg %d ad_reg 0x%x lp_reg 0x%x\n", __func__, pdata->phy.pause_autoneg, ad_reg, lp_reg); if (pdata->phy.pause_autoneg) { /* Set flow control based on auto-negotiation result */ pdata->phy.tx_pause = 0; pdata->phy.rx_pause = 0; if (ad_reg & lp_reg & 0x100) { pdata->phy.tx_pause = 1; pdata->phy.rx_pause = 1; } else if (ad_reg & lp_reg & 0x80) { if (ad_reg & 0x100) pdata->phy.rx_pause = 1; else if (lp_reg & 0x100) pdata->phy.tx_pause = 1; } } axgbe_printf(1, "%s: pause tx/rx %d/%d\n", __func__, pdata->phy.tx_pause, pdata->phy.rx_pause); if (lp_reg & 0x20) XGBE_SET_LP_ADV(&pdata->phy, 1000baseX_Full); /* Half duplex is not supported */ ad_reg &= lp_reg; mode = (ad_reg & 0x20) ? XGBE_MODE_X : XGBE_MODE_UNKNOWN; return (mode); } static enum xgbe_mode xgbe_phy_an73_redrv_outcome(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; enum xgbe_mode mode; unsigned int ad_reg, lp_reg; XGBE_SET_LP_ADV(&pdata->phy, Autoneg); XGBE_SET_LP_ADV(&pdata->phy, Backplane); axgbe_printf(1, "%s: pause_autoneg %d\n", __func__, pdata->phy.pause_autoneg); /* Use external PHY to determine flow control */ if (pdata->phy.pause_autoneg) xgbe_phy_phydev_flowctrl(pdata); /* Compare Advertisement and Link Partner register 2 */ ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1); if (lp_reg & 0x80) XGBE_SET_LP_ADV(&pdata->phy, 10000baseKR_Full); if (lp_reg & 0x20) XGBE_SET_LP_ADV(&pdata->phy, 1000baseKX_Full); ad_reg &= lp_reg; if (ad_reg & 0x80) { switch (phy_data->port_mode) { case XGBE_PORT_MODE_BACKPLANE: mode = XGBE_MODE_KR; break; default: mode = XGBE_MODE_SFI; break; } } else if (ad_reg & 0x20) { switch (phy_data->port_mode) { case XGBE_PORT_MODE_BACKPLANE: mode = XGBE_MODE_KX_1000; break; case XGBE_PORT_MODE_1000BASE_X: mode = XGBE_MODE_X; break; case XGBE_PORT_MODE_SFP: switch (phy_data->sfp_base) { case XGBE_SFP_BASE_1000_T: if ((phy_data->phydev) && (pdata->phy.speed == SPEED_100)) mode = XGBE_MODE_SGMII_100; else mode = XGBE_MODE_SGMII_1000; break; case XGBE_SFP_BASE_1000_SX: case XGBE_SFP_BASE_1000_LX: case XGBE_SFP_BASE_1000_CX: default: mode = XGBE_MODE_X; break; } break; default: if ((phy_data->phydev) && (pdata->phy.speed == SPEED_100)) mode = XGBE_MODE_SGMII_100; else mode = XGBE_MODE_SGMII_1000; break; } } else { mode = XGBE_MODE_UNKNOWN; } /* Compare Advertisement and Link Partner register 3 */ ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2); if (lp_reg & 0xc000) XGBE_SET_LP_ADV(&pdata->phy, 10000baseR_FEC); return (mode); } static enum xgbe_mode xgbe_phy_an73_outcome(struct xgbe_prv_data *pdata) { enum xgbe_mode mode; unsigned int ad_reg, lp_reg; XGBE_SET_LP_ADV(&pdata->phy, Autoneg); XGBE_SET_LP_ADV(&pdata->phy, Backplane); /* Compare Advertisement and Link Partner register 1 */ ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE); lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA); if (lp_reg & 0x400) XGBE_SET_LP_ADV(&pdata->phy, Pause); if (lp_reg & 0x800) XGBE_SET_LP_ADV(&pdata->phy, Asym_Pause); axgbe_printf(1, "%s: pause_autoneg %d ad_reg 0x%x lp_reg 0x%x\n", __func__, pdata->phy.pause_autoneg, ad_reg, lp_reg); if (pdata->phy.pause_autoneg) { /* Set flow control based on auto-negotiation result */ pdata->phy.tx_pause = 0; pdata->phy.rx_pause = 0; if (ad_reg & lp_reg & 0x400) { pdata->phy.tx_pause = 1; pdata->phy.rx_pause = 1; } else if (ad_reg & lp_reg & 0x800) { if (ad_reg & 0x400) pdata->phy.rx_pause = 1; else if (lp_reg & 0x400) pdata->phy.tx_pause = 1; } } axgbe_printf(1, "%s: pause tx/rx %d/%d\n", __func__, pdata->phy.tx_pause, pdata->phy.rx_pause); /* Compare Advertisement and Link Partner register 2 */ ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1); if (lp_reg & 0x80) XGBE_SET_LP_ADV(&pdata->phy, 10000baseKR_Full); if (lp_reg & 0x20) XGBE_SET_LP_ADV(&pdata->phy, 1000baseKX_Full); ad_reg &= lp_reg; if (ad_reg & 0x80) mode = XGBE_MODE_KR; else if (ad_reg & 0x20) mode = XGBE_MODE_KX_1000; else mode = XGBE_MODE_UNKNOWN; /* Compare Advertisement and Link Partner register 3 */ ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2); if (lp_reg & 0xc000) XGBE_SET_LP_ADV(&pdata->phy, 10000baseR_FEC); return (mode); } static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata) { switch (pdata->an_mode) { case XGBE_AN_MODE_CL73: return (xgbe_phy_an73_outcome(pdata)); case XGBE_AN_MODE_CL73_REDRV: return (xgbe_phy_an73_redrv_outcome(pdata)); case XGBE_AN_MODE_CL37: return (xgbe_phy_an37_outcome(pdata)); case XGBE_AN_MODE_CL37_SGMII: return (xgbe_phy_an37_sgmii_outcome(pdata)); default: return (XGBE_MODE_UNKNOWN); } } static void xgbe_phy_an_advertising(struct xgbe_prv_data *pdata, struct xgbe_phy *dphy) { struct xgbe_phy_data *phy_data = pdata->phy_data; XGBE_LM_COPY(dphy, advertising, &pdata->phy, advertising); /* Without a re-driver, just return current advertising */ if (!phy_data->redrv) return; /* With the KR re-driver we need to advertise a single speed */ XGBE_CLR_ADV(dphy, 1000baseKX_Full); XGBE_CLR_ADV(dphy, 10000baseKR_Full); /* Advertise FEC support is present */ if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) XGBE_SET_ADV(dphy, 10000baseR_FEC); switch (phy_data->port_mode) { case XGBE_PORT_MODE_BACKPLANE: XGBE_SET_ADV(dphy, 10000baseKR_Full); break; case XGBE_PORT_MODE_BACKPLANE_2500: XGBE_SET_ADV(dphy, 1000baseKX_Full); break; case XGBE_PORT_MODE_1000BASE_T: case XGBE_PORT_MODE_1000BASE_X: case XGBE_PORT_MODE_NBASE_T: XGBE_SET_ADV(dphy, 1000baseKX_Full); break; case XGBE_PORT_MODE_10GBASE_T: if ((phy_data->phydev) && (pdata->phy.speed == SPEED_10000)) XGBE_SET_ADV(dphy, 10000baseKR_Full); else XGBE_SET_ADV(dphy, 1000baseKX_Full); break; case XGBE_PORT_MODE_10GBASE_R: XGBE_SET_ADV(dphy, 10000baseKR_Full); break; case XGBE_PORT_MODE_SFP: switch (phy_data->sfp_base) { case XGBE_SFP_BASE_1000_T: case XGBE_SFP_BASE_1000_SX: case XGBE_SFP_BASE_1000_LX: case XGBE_SFP_BASE_1000_CX: XGBE_SET_ADV(dphy, 1000baseKX_Full); break; default: XGBE_SET_ADV(dphy, 10000baseKR_Full); break; } break; default: XGBE_SET_ADV(dphy, 10000baseKR_Full); break; } } static int xgbe_phy_an_config(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; int ret; ret = xgbe_phy_find_phy_device(pdata); if (ret) return (ret); axgbe_printf(2, "%s: find_phy_device return %s.\n", __func__, ret ? "Failure" : "Success"); if (!phy_data->phydev) return (0); ret = xgbe_phy_start_aneg(pdata); return (ret); } static enum xgbe_an_mode xgbe_phy_an_sfp_mode(struct xgbe_phy_data *phy_data) { switch (phy_data->sfp_base) { case XGBE_SFP_BASE_1000_T: return (XGBE_AN_MODE_CL37_SGMII); case XGBE_SFP_BASE_1000_SX: case XGBE_SFP_BASE_1000_LX: case XGBE_SFP_BASE_1000_CX: return (XGBE_AN_MODE_CL37); default: return (XGBE_AN_MODE_NONE); } } static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; /* A KR re-driver will always require CL73 AN */ if (phy_data->redrv) return (XGBE_AN_MODE_CL73_REDRV); switch (phy_data->port_mode) { case XGBE_PORT_MODE_BACKPLANE: return (XGBE_AN_MODE_CL73); case XGBE_PORT_MODE_BACKPLANE_2500: return (XGBE_AN_MODE_NONE); case XGBE_PORT_MODE_1000BASE_T: return (XGBE_AN_MODE_CL37_SGMII); case XGBE_PORT_MODE_1000BASE_X: return (XGBE_AN_MODE_CL37); case XGBE_PORT_MODE_NBASE_T: return (XGBE_AN_MODE_CL37_SGMII); case XGBE_PORT_MODE_10GBASE_T: return (XGBE_AN_MODE_CL73); case XGBE_PORT_MODE_10GBASE_R: return (XGBE_AN_MODE_NONE); case XGBE_PORT_MODE_SFP: return (xgbe_phy_an_sfp_mode(phy_data)); default: return (XGBE_AN_MODE_NONE); } } static int xgbe_phy_set_redrv_mode_mdio(struct xgbe_prv_data *pdata, enum xgbe_phy_redrv_mode mode) { struct xgbe_phy_data *phy_data = pdata->phy_data; uint16_t redrv_reg, redrv_val; redrv_reg = XGBE_PHY_REDRV_MODE_REG + (phy_data->redrv_lane * 0x1000); redrv_val = (uint16_t)mode; return (pdata->hw_if.write_ext_mii_regs(pdata, phy_data->redrv_addr, redrv_reg, redrv_val)); } static int xgbe_phy_set_redrv_mode_i2c(struct xgbe_prv_data *pdata, enum xgbe_phy_redrv_mode mode) { struct xgbe_phy_data *phy_data = pdata->phy_data; unsigned int redrv_reg; int ret; /* Calculate the register to write */ redrv_reg = XGBE_PHY_REDRV_MODE_REG + (phy_data->redrv_lane * 0x1000); ret = xgbe_phy_redrv_write(pdata, redrv_reg, mode); return (ret); } static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; enum xgbe_phy_redrv_mode mode; int ret; if (!phy_data->redrv) return; mode = XGBE_PHY_REDRV_MODE_CX; if ((phy_data->port_mode == XGBE_PORT_MODE_SFP) && (phy_data->sfp_base != XGBE_SFP_BASE_1000_CX) && (phy_data->sfp_base != XGBE_SFP_BASE_10000_CR)) mode = XGBE_PHY_REDRV_MODE_SR; ret = xgbe_phy_get_comm_ownership(pdata); if (ret) return; axgbe_printf(2, "%s: redrv_if set: %d\n", __func__, phy_data->redrv_if); if (phy_data->redrv_if) xgbe_phy_set_redrv_mode_i2c(pdata, mode); else xgbe_phy_set_redrv_mode_mdio(pdata, mode); xgbe_phy_put_comm_ownership(pdata); } static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, unsigned int cmd, unsigned int sub_cmd) { unsigned int s0 = 0; unsigned int wait; /* Log if a previous command did not complete */ if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) axgbe_error("firmware mailbox not ready for command\n"); /* Construct the command */ XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, cmd); XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, sub_cmd); /* Issue the command */ XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); /* Wait for command to complete */ wait = XGBE_RATECHANGE_COUNT; while (wait--) { if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) { axgbe_printf(3, "%s: Rate change done\n", __func__); return; } DELAY(2000); } axgbe_printf(3, "firmware mailbox command did not complete\n"); } static void xgbe_phy_rrc(struct xgbe_prv_data *pdata) { /* Receiver Reset Cycle */ xgbe_phy_perform_ratechange(pdata, 5, 0); axgbe_printf(3, "receiver reset complete\n"); } static void xgbe_phy_power_off(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; /* Power off */ xgbe_phy_perform_ratechange(pdata, 0, 0); phy_data->cur_mode = XGBE_MODE_UNKNOWN; axgbe_printf(3, "phy powered off\n"); } static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; xgbe_phy_set_redrv_mode(pdata); /* 10G/SFI */ axgbe_printf(3, "%s: cable %d len %d\n", __func__, phy_data->sfp_cable, phy_data->sfp_cable_len); if (phy_data->sfp_cable != XGBE_SFP_CABLE_PASSIVE) xgbe_phy_perform_ratechange(pdata, 3, 0); else { if (phy_data->sfp_cable_len <= 1) xgbe_phy_perform_ratechange(pdata, 3, 1); else if (phy_data->sfp_cable_len <= 3) xgbe_phy_perform_ratechange(pdata, 3, 2); else xgbe_phy_perform_ratechange(pdata, 3, 3); } phy_data->cur_mode = XGBE_MODE_SFI; axgbe_printf(3, "10GbE SFI mode set\n"); } static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; xgbe_phy_set_redrv_mode(pdata); /* 1G/X */ xgbe_phy_perform_ratechange(pdata, 1, 3); phy_data->cur_mode = XGBE_MODE_X; axgbe_printf(3, "1GbE X mode set\n"); } static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; xgbe_phy_set_redrv_mode(pdata); /* 1G/SGMII */ xgbe_phy_perform_ratechange(pdata, 1, 2); phy_data->cur_mode = XGBE_MODE_SGMII_1000; axgbe_printf(2, "1GbE SGMII mode set\n"); } static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; xgbe_phy_set_redrv_mode(pdata); /* 100M/SGMII */ xgbe_phy_perform_ratechange(pdata, 1, 1); phy_data->cur_mode = XGBE_MODE_SGMII_100; axgbe_printf(3, "100MbE SGMII mode set\n"); } static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; xgbe_phy_set_redrv_mode(pdata); /* 10G/KR */ xgbe_phy_perform_ratechange(pdata, 4, 0); phy_data->cur_mode = XGBE_MODE_KR; axgbe_printf(3, "10GbE KR mode set\n"); } static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; xgbe_phy_set_redrv_mode(pdata); /* 2.5G/KX */ xgbe_phy_perform_ratechange(pdata, 2, 0); phy_data->cur_mode = XGBE_MODE_KX_2500; axgbe_printf(3, "2.5GbE KX mode set\n"); } static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; xgbe_phy_set_redrv_mode(pdata); /* 1G/KX */ xgbe_phy_perform_ratechange(pdata, 1, 3); phy_data->cur_mode = XGBE_MODE_KX_1000; axgbe_printf(3, "1GbE KX mode set\n"); } static enum xgbe_mode xgbe_phy_cur_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; return (phy_data->cur_mode); } static enum xgbe_mode xgbe_phy_switch_baset_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; /* No switching if not 10GBase-T */ if (phy_data->port_mode != XGBE_PORT_MODE_10GBASE_T) return (xgbe_phy_cur_mode(pdata)); switch (xgbe_phy_cur_mode(pdata)) { case XGBE_MODE_SGMII_100: case XGBE_MODE_SGMII_1000: return (XGBE_MODE_KR); case XGBE_MODE_KR: default: return (XGBE_MODE_SGMII_1000); } } static enum xgbe_mode xgbe_phy_switch_bp_2500_mode(struct xgbe_prv_data *pdata) { return (XGBE_MODE_KX_2500); } static enum xgbe_mode xgbe_phy_switch_bp_mode(struct xgbe_prv_data *pdata) { /* If we are in KR switch to KX, and vice-versa */ switch (xgbe_phy_cur_mode(pdata)) { case XGBE_MODE_KX_1000: return (XGBE_MODE_KR); case XGBE_MODE_KR: default: return (XGBE_MODE_KX_1000); } } static enum xgbe_mode xgbe_phy_switch_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; switch (phy_data->port_mode) { case XGBE_PORT_MODE_BACKPLANE: return (xgbe_phy_switch_bp_mode(pdata)); case XGBE_PORT_MODE_BACKPLANE_2500: return (xgbe_phy_switch_bp_2500_mode(pdata)); case XGBE_PORT_MODE_1000BASE_T: case XGBE_PORT_MODE_NBASE_T: case XGBE_PORT_MODE_10GBASE_T: return (xgbe_phy_switch_baset_mode(pdata)); case XGBE_PORT_MODE_1000BASE_X: case XGBE_PORT_MODE_10GBASE_R: case XGBE_PORT_MODE_SFP: /* No switching, so just return current mode */ return (xgbe_phy_cur_mode(pdata)); default: return (XGBE_MODE_UNKNOWN); } } static enum xgbe_mode xgbe_phy_get_basex_mode(struct xgbe_phy_data *phy_data, int speed) { switch (speed) { case SPEED_1000: return (XGBE_MODE_X); case SPEED_10000: return (XGBE_MODE_KR); default: return (XGBE_MODE_UNKNOWN); } } static enum xgbe_mode xgbe_phy_get_baset_mode(struct xgbe_phy_data *phy_data, int speed) { switch (speed) { case SPEED_100: return (XGBE_MODE_SGMII_100); case SPEED_1000: return (XGBE_MODE_SGMII_1000); case SPEED_2500: return (XGBE_MODE_KX_2500); case SPEED_10000: return (XGBE_MODE_KR); default: return (XGBE_MODE_UNKNOWN); } } static enum xgbe_mode xgbe_phy_get_sfp_mode(struct xgbe_phy_data *phy_data, int speed) { switch (speed) { case SPEED_100: return (XGBE_MODE_SGMII_100); case SPEED_1000: if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T) return (XGBE_MODE_SGMII_1000); else return (XGBE_MODE_X); case SPEED_10000: case SPEED_UNKNOWN: return (XGBE_MODE_SFI); default: return (XGBE_MODE_UNKNOWN); } } static enum xgbe_mode xgbe_phy_get_bp_2500_mode(int speed) { switch (speed) { case SPEED_2500: return (XGBE_MODE_KX_2500); default: return (XGBE_MODE_UNKNOWN); } } static enum xgbe_mode xgbe_phy_get_bp_mode(int speed) { switch (speed) { case SPEED_1000: return (XGBE_MODE_KX_1000); case SPEED_10000: return (XGBE_MODE_KR); default: return (XGBE_MODE_UNKNOWN); } } static enum xgbe_mode xgbe_phy_get_mode(struct xgbe_prv_data *pdata, int speed) { struct xgbe_phy_data *phy_data = pdata->phy_data; switch (phy_data->port_mode) { case XGBE_PORT_MODE_BACKPLANE: return (xgbe_phy_get_bp_mode(speed)); case XGBE_PORT_MODE_BACKPLANE_2500: return (xgbe_phy_get_bp_2500_mode(speed)); case XGBE_PORT_MODE_1000BASE_T: case XGBE_PORT_MODE_NBASE_T: case XGBE_PORT_MODE_10GBASE_T: return (xgbe_phy_get_baset_mode(phy_data, speed)); case XGBE_PORT_MODE_1000BASE_X: case XGBE_PORT_MODE_10GBASE_R: return (xgbe_phy_get_basex_mode(phy_data, speed)); case XGBE_PORT_MODE_SFP: return (xgbe_phy_get_sfp_mode(phy_data, speed)); default: return (XGBE_MODE_UNKNOWN); } } static void xgbe_phy_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) { switch (mode) { case XGBE_MODE_KX_1000: xgbe_phy_kx_1000_mode(pdata); break; case XGBE_MODE_KX_2500: xgbe_phy_kx_2500_mode(pdata); break; case XGBE_MODE_KR: xgbe_phy_kr_mode(pdata); break; case XGBE_MODE_SGMII_100: xgbe_phy_sgmii_100_mode(pdata); break; case XGBE_MODE_SGMII_1000: xgbe_phy_sgmii_1000_mode(pdata); break; case XGBE_MODE_X: xgbe_phy_x_mode(pdata); break; case XGBE_MODE_SFI: xgbe_phy_sfi_mode(pdata); break; default: break; } } static void xgbe_phy_get_type(struct xgbe_prv_data *pdata, struct ifmediareq * ifmr) { struct xgbe_phy_data *phy_data = pdata->phy_data; switch (pdata->phy.speed) { case SPEED_10000: if (phy_data->port_mode == XGBE_PORT_MODE_BACKPLANE) ifmr->ifm_active |= IFM_10G_KR; else if(phy_data->port_mode == XGBE_PORT_MODE_10GBASE_T) ifmr->ifm_active |= IFM_10G_T; else if(phy_data->port_mode == XGBE_PORT_MODE_10GBASE_R) ifmr->ifm_active |= IFM_10G_KR; else if(phy_data->port_mode == XGBE_PORT_MODE_SFP) ifmr->ifm_active |= IFM_10G_SFI; else ifmr->ifm_active |= IFM_OTHER; break; case SPEED_2500: if (phy_data->port_mode == XGBE_PORT_MODE_BACKPLANE_2500) ifmr->ifm_active |= IFM_2500_KX; else ifmr->ifm_active |= IFM_OTHER; break; case SPEED_1000: if (phy_data->port_mode == XGBE_PORT_MODE_BACKPLANE) ifmr->ifm_active |= IFM_1000_KX; else if(phy_data->port_mode == XGBE_PORT_MODE_1000BASE_T) ifmr->ifm_active |= IFM_1000_T; #if 0 else if(phy_data->port_mode == XGBE_PORT_MODE_1000BASE_X) ifmr->ifm_active |= IFM_1000_SX; ifmr->ifm_active |= IFM_1000_LX; ifmr->ifm_active |= IFM_1000_CX; #endif else if(phy_data->port_mode == XGBE_PORT_MODE_SFP) ifmr->ifm_active |= IFM_1000_SGMII; else ifmr->ifm_active |= IFM_OTHER; break; case SPEED_100: if(phy_data->port_mode == XGBE_PORT_MODE_NBASE_T) ifmr->ifm_active |= IFM_100_T; else if(phy_data->port_mode == XGBE_PORT_MODE_SFP) ifmr->ifm_active |= IFM_1000_SGMII; else ifmr->ifm_active |= IFM_OTHER; break; default: ifmr->ifm_active |= IFM_OTHER; axgbe_printf(1, "Unknown mode detected\n"); break; } } static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode, bool advert) { if (pdata->phy.autoneg == AUTONEG_ENABLE) return (advert); else { enum xgbe_mode cur_mode; cur_mode = xgbe_phy_get_mode(pdata, pdata->phy.speed); if (cur_mode == mode) return (true); } return (false); } static bool xgbe_phy_use_basex_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) { switch (mode) { case XGBE_MODE_X: return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 1000baseX_Full))); case XGBE_MODE_KR: return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 10000baseKR_Full))); default: return (false); } } static bool xgbe_phy_use_baset_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) { axgbe_printf(3, "%s: check mode %d\n", __func__, mode); switch (mode) { case XGBE_MODE_SGMII_100: return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 100baseT_Full))); case XGBE_MODE_SGMII_1000: return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 1000baseT_Full))); case XGBE_MODE_KX_2500: return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 2500baseT_Full))); case XGBE_MODE_KR: return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 10000baseT_Full))); default: return (false); } } static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) { struct xgbe_phy_data *phy_data = pdata->phy_data; switch (mode) { case XGBE_MODE_X: if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T) return (false); return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 1000baseX_Full))); case XGBE_MODE_SGMII_100: if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T) return (false); return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 100baseT_Full))); case XGBE_MODE_SGMII_1000: if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T) return (false); return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 1000baseT_Full))); case XGBE_MODE_SFI: if (phy_data->sfp_mod_absent) return (true); return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 10000baseSR_Full) || XGBE_ADV(&pdata->phy, 10000baseLR_Full) || XGBE_ADV(&pdata->phy, 10000baseLRM_Full) || XGBE_ADV(&pdata->phy, 10000baseER_Full) || XGBE_ADV(&pdata->phy, 10000baseCR_Full))); default: return (false); } } static bool xgbe_phy_use_bp_2500_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) { switch (mode) { case XGBE_MODE_KX_2500: return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 2500baseX_Full))); default: return (false); } } static bool xgbe_phy_use_bp_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) { switch (mode) { case XGBE_MODE_KX_1000: return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 1000baseKX_Full))); case XGBE_MODE_KR: return (xgbe_phy_check_mode(pdata, mode, XGBE_ADV(&pdata->phy, 10000baseKR_Full))); default: return (false); } } static bool xgbe_phy_use_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) { struct xgbe_phy_data *phy_data = pdata->phy_data; switch (phy_data->port_mode) { case XGBE_PORT_MODE_BACKPLANE: return (xgbe_phy_use_bp_mode(pdata, mode)); case XGBE_PORT_MODE_BACKPLANE_2500: return (xgbe_phy_use_bp_2500_mode(pdata, mode)); case XGBE_PORT_MODE_1000BASE_T: axgbe_printf(3, "use_mode %s\n", xgbe_phy_use_baset_mode(pdata, mode) ? "found" : "Not found"); case XGBE_PORT_MODE_NBASE_T: case XGBE_PORT_MODE_10GBASE_T: return (xgbe_phy_use_baset_mode(pdata, mode)); case XGBE_PORT_MODE_1000BASE_X: case XGBE_PORT_MODE_10GBASE_R: return (xgbe_phy_use_basex_mode(pdata, mode)); case XGBE_PORT_MODE_SFP: return (xgbe_phy_use_sfp_mode(pdata, mode)); default: return (false); } } static bool xgbe_phy_valid_speed_basex_mode(struct xgbe_phy_data *phy_data, int speed) { switch (speed) { case SPEED_1000: return (phy_data->port_mode == XGBE_PORT_MODE_1000BASE_X); case SPEED_10000: return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_R); default: return (false); } } static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_phy_data *phy_data, int speed) { switch (speed) { case SPEED_100: case SPEED_1000: return (true); case SPEED_2500: return (phy_data->port_mode == XGBE_PORT_MODE_NBASE_T); case SPEED_10000: return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_T); default: return (false); } } static bool xgbe_phy_valid_speed_sfp_mode(struct xgbe_phy_data *phy_data, int speed) { switch (speed) { case SPEED_100: return (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000); case SPEED_1000: return ((phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000) || (phy_data->sfp_speed == XGBE_SFP_SPEED_1000)); case SPEED_10000: return (phy_data->sfp_speed == XGBE_SFP_SPEED_10000); default: return (false); } } static bool xgbe_phy_valid_speed_bp_2500_mode(int speed) { switch (speed) { case SPEED_2500: return (true); default: return (false); } } static bool xgbe_phy_valid_speed_bp_mode(int speed) { switch (speed) { case SPEED_1000: case SPEED_10000: return (true); default: return (false); } } static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed) { struct xgbe_phy_data *phy_data = pdata->phy_data; switch (phy_data->port_mode) { case XGBE_PORT_MODE_BACKPLANE: return (xgbe_phy_valid_speed_bp_mode(speed)); case XGBE_PORT_MODE_BACKPLANE_2500: return (xgbe_phy_valid_speed_bp_2500_mode(speed)); case XGBE_PORT_MODE_1000BASE_T: case XGBE_PORT_MODE_NBASE_T: case XGBE_PORT_MODE_10GBASE_T: return (xgbe_phy_valid_speed_baset_mode(phy_data, speed)); case XGBE_PORT_MODE_1000BASE_X: case XGBE_PORT_MODE_10GBASE_R: return (xgbe_phy_valid_speed_basex_mode(phy_data, speed)); case XGBE_PORT_MODE_SFP: return (xgbe_phy_valid_speed_sfp_mode(phy_data, speed)); default: return (false); } } static int xgbe_upd_link(struct xgbe_prv_data *pdata) { int reg; axgbe_printf(2, "%s: Link %d\n", __func__, pdata->phy.link); reg = xgbe_phy_mii_read(pdata, pdata->mdio_addr, MII_BMSR); if (reg < 0) return (reg); if ((reg & BMSR_LINK) == 0) pdata->phy.link = 0; else pdata->phy.link = 1; axgbe_printf(2, "Link: %d updated reg %#x\n", pdata->phy.link, reg); return (0); } static int xgbe_phy_read_status(struct xgbe_prv_data *pdata) { int common_adv_gb = 0; int common_adv; int lpagb = 0; int adv, lpa; int ret; ret = xgbe_upd_link(pdata); if (ret) { axgbe_printf(2, "Link Update return %d\n", ret); return (ret); } if (AUTONEG_ENABLE == pdata->phy.autoneg) { if (pdata->phy.supported == SUPPORTED_1000baseT_Half || pdata->phy.supported == SUPPORTED_1000baseT_Full) { lpagb = xgbe_phy_mii_read(pdata, pdata->mdio_addr, MII_100T2SR); if (lpagb < 0) return (lpagb); adv = xgbe_phy_mii_read(pdata, pdata->mdio_addr, MII_100T2CR); if (adv < 0) return (adv); if (lpagb & GTSR_MAN_MS_FLT) { if (adv & GTCR_MAN_MS) axgbe_printf(2, "Master/Slave Resolution " "failed, maybe conflicting manual settings\n"); else axgbe_printf(2, "Master/Slave Resolution failed\n"); return (-ENOLINK); } if (pdata->phy.supported == SUPPORTED_1000baseT_Half) XGBE_SET_ADV(&pdata->phy, 1000baseT_Half); else if (pdata->phy.supported == SUPPORTED_1000baseT_Full) XGBE_SET_ADV(&pdata->phy, 1000baseT_Full); common_adv_gb = lpagb & adv << 2; } lpa = xgbe_phy_mii_read(pdata, pdata->mdio_addr, MII_ANLPAR); if (lpa < 0) return (lpa); if (pdata->phy.supported == SUPPORTED_Autoneg) XGBE_SET_ADV(&pdata->phy, Autoneg); adv = xgbe_phy_mii_read(pdata, pdata->mdio_addr, MII_ANAR); if (adv < 0) return (adv); common_adv = lpa & adv; pdata->phy.speed = SPEED_10; pdata->phy.duplex = DUPLEX_HALF; pdata->phy.pause = 0; pdata->phy.asym_pause = 0; axgbe_printf(2, "%s: lpa %#x adv %#x common_adv_gb %#x " "common_adv %#x\n", __func__, lpa, adv, common_adv_gb, common_adv); if (common_adv_gb & (GTSR_LP_1000TFDX | GTSR_LP_1000THDX)) { axgbe_printf(2, "%s: SPEED 1000\n", __func__); pdata->phy.speed = SPEED_1000; if (common_adv_gb & GTSR_LP_1000TFDX) pdata->phy.duplex = DUPLEX_FULL; } else if (common_adv & (ANLPAR_TX_FD | ANLPAR_TX)) { axgbe_printf(2, "%s: SPEED 100\n", __func__); pdata->phy.speed = SPEED_100; if (common_adv & ANLPAR_TX_FD) pdata->phy.duplex = DUPLEX_FULL; } else if (common_adv & ANLPAR_10_FD) pdata->phy.duplex = DUPLEX_FULL; if (pdata->phy.duplex == DUPLEX_FULL) { pdata->phy.pause = lpa & ANLPAR_FC ? 1 : 0; pdata->phy.asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; } } else { int bmcr = xgbe_phy_mii_read(pdata, pdata->mdio_addr, MII_BMCR); if (bmcr < 0) return (bmcr); if (bmcr & BMCR_FDX) pdata->phy.duplex = DUPLEX_FULL; else pdata->phy.duplex = DUPLEX_HALF; if (bmcr & BMCR_SPEED1) pdata->phy.speed = SPEED_1000; else if (bmcr & BMCR_SPEED100) pdata->phy.speed = SPEED_100; else pdata->phy.speed = SPEED_10; pdata->phy.pause = 0; pdata->phy.asym_pause = 0; axgbe_printf(2, "%s: link speed %#x duplex %#x media %#x " "autoneg %#x\n", __func__, pdata->phy.speed, pdata->phy.duplex, pdata->phy.link, pdata->phy.autoneg); } return (0); } static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) { struct xgbe_phy_data *phy_data = pdata->phy_data; struct mii_data *mii = NULL; unsigned int reg; int ret; *an_restart = 0; if (phy_data->port_mode == XGBE_PORT_MODE_SFP) { /* Check SFP signals */ axgbe_printf(3, "%s: calling phy detect\n", __func__); xgbe_phy_sfp_detect(pdata); if (phy_data->sfp_changed) { axgbe_printf(1, "%s: SFP changed observed\n", __func__); *an_restart = 1; return (0); } if (phy_data->sfp_mod_absent || phy_data->sfp_rx_los) { axgbe_printf(1, "%s: SFP absent 0x%x & sfp_rx_los 0x%x\n", __func__, phy_data->sfp_mod_absent, phy_data->sfp_rx_los); return (0); } } else { mii = device_get_softc(pdata->axgbe_miibus); mii_tick(mii); ret = xgbe_phy_read_status(pdata); if (ret) { axgbe_printf(2, "Link: Read status returned %d\n", ret); return (ret); } axgbe_printf(2, "%s: link speed %#x duplex %#x media %#x " "autoneg %#x\n", __func__, pdata->phy.speed, pdata->phy.duplex, pdata->phy.link, pdata->phy.autoneg); ret = xgbe_phy_mii_read(pdata, pdata->mdio_addr, MII_BMSR); ret = (ret < 0) ? ret : (ret & BMSR_ACOMP); axgbe_printf(2, "Link: BMCR returned %d\n", ret); if ((pdata->phy.autoneg == AUTONEG_ENABLE) && !ret) return (0); return (pdata->phy.link); } /* Link status is latched low, so read once to clear * and then read again to get current state */ reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); axgbe_printf(1, "%s: link_status reg: 0x%x\n", __func__, reg); if (reg & MDIO_STAT1_LSTATUS) return (1); /* No link, attempt a receiver reset cycle */ if (phy_data->rrc_count++ > XGBE_RRC_FREQUENCY) { axgbe_printf(1, "ENTERED RRC: rrc_count: %d\n", phy_data->rrc_count); phy_data->rrc_count = 0; if (pdata->link_workaround) { ret = xgbe_phy_reset(pdata); if (ret) axgbe_error("Error resetting phy\n"); } else xgbe_phy_rrc(pdata); } return (0); } static void xgbe_phy_sfp_gpio_setup(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; phy_data->sfp_gpio_address = XGBE_GPIO_ADDRESS_PCA9555 + XP_GET_BITS(pdata->pp3, XP_PROP_3, GPIO_ADDR); phy_data->sfp_gpio_mask = XP_GET_BITS(pdata->pp3, XP_PROP_3, GPIO_MASK); phy_data->sfp_gpio_rx_los = XP_GET_BITS(pdata->pp3, XP_PROP_3, GPIO_RX_LOS); phy_data->sfp_gpio_tx_fault = XP_GET_BITS(pdata->pp3, XP_PROP_3, GPIO_TX_FAULT); phy_data->sfp_gpio_mod_absent = XP_GET_BITS(pdata->pp3, XP_PROP_3, GPIO_MOD_ABS); phy_data->sfp_gpio_rate_select = XP_GET_BITS(pdata->pp3, XP_PROP_3, GPIO_RATE_SELECT); DBGPR("SFP: gpio_address=%#x\n", phy_data->sfp_gpio_address); DBGPR("SFP: gpio_mask=%#x\n", phy_data->sfp_gpio_mask); DBGPR("SFP: gpio_rx_los=%u\n", phy_data->sfp_gpio_rx_los); DBGPR("SFP: gpio_tx_fault=%u\n", phy_data->sfp_gpio_tx_fault); DBGPR("SFP: gpio_mod_absent=%u\n", phy_data->sfp_gpio_mod_absent); DBGPR("SFP: gpio_rate_select=%u\n", phy_data->sfp_gpio_rate_select); } static void xgbe_phy_sfp_comm_setup(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; unsigned int mux_addr_hi, mux_addr_lo; mux_addr_hi = XP_GET_BITS(pdata->pp4, XP_PROP_4, MUX_ADDR_HI); mux_addr_lo = XP_GET_BITS(pdata->pp4, XP_PROP_4, MUX_ADDR_LO); if (mux_addr_lo == XGBE_SFP_DIRECT) return; phy_data->sfp_comm = XGBE_SFP_COMM_PCA9545; phy_data->sfp_mux_address = (mux_addr_hi << 2) + mux_addr_lo; phy_data->sfp_mux_channel = XP_GET_BITS(pdata->pp4, XP_PROP_4, MUX_CHAN); DBGPR("SFP: mux_address=%#x\n", phy_data->sfp_mux_address); DBGPR("SFP: mux_channel=%u\n", phy_data->sfp_mux_channel); } static void xgbe_phy_sfp_setup(struct xgbe_prv_data *pdata) { xgbe_phy_sfp_comm_setup(pdata); xgbe_phy_sfp_gpio_setup(pdata); } static int xgbe_phy_int_mdio_reset(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; unsigned int ret; ret = pdata->hw_if.set_gpio(pdata, phy_data->mdio_reset_gpio); if (ret) return (ret); ret = pdata->hw_if.clr_gpio(pdata, phy_data->mdio_reset_gpio); return (ret); } static int xgbe_phy_i2c_mdio_reset(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; uint8_t gpio_reg, gpio_ports[2], gpio_data[3]; int ret; /* Read the output port registers */ gpio_reg = 2; ret = xgbe_phy_i2c_read(pdata, phy_data->mdio_reset_addr, &gpio_reg, sizeof(gpio_reg), gpio_ports, sizeof(gpio_ports)); if (ret) return (ret); /* Prepare to write the GPIO data */ gpio_data[0] = 2; gpio_data[1] = gpio_ports[0]; gpio_data[2] = gpio_ports[1]; /* Set the GPIO pin */ if (phy_data->mdio_reset_gpio < 8) gpio_data[1] |= (1 << (phy_data->mdio_reset_gpio % 8)); else gpio_data[2] |= (1 << (phy_data->mdio_reset_gpio % 8)); /* Write the output port registers */ ret = xgbe_phy_i2c_write(pdata, phy_data->mdio_reset_addr, gpio_data, sizeof(gpio_data)); if (ret) return (ret); /* Clear the GPIO pin */ if (phy_data->mdio_reset_gpio < 8) gpio_data[1] &= ~(1 << (phy_data->mdio_reset_gpio % 8)); else gpio_data[2] &= ~(1 << (phy_data->mdio_reset_gpio % 8)); /* Write the output port registers */ ret = xgbe_phy_i2c_write(pdata, phy_data->mdio_reset_addr, gpio_data, sizeof(gpio_data)); return (ret); } static int xgbe_phy_mdio_reset(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; int ret; if (phy_data->conn_type != XGBE_CONN_TYPE_MDIO) return (0); ret = xgbe_phy_get_comm_ownership(pdata); if (ret) return (ret); if (phy_data->mdio_reset == XGBE_MDIO_RESET_I2C_GPIO) ret = xgbe_phy_i2c_mdio_reset(pdata); else if (phy_data->mdio_reset == XGBE_MDIO_RESET_INT_GPIO) ret = xgbe_phy_int_mdio_reset(pdata); xgbe_phy_put_comm_ownership(pdata); return (ret); } static bool xgbe_phy_redrv_error(struct xgbe_phy_data *phy_data) { if (!phy_data->redrv) return (false); if (phy_data->redrv_if >= XGBE_PHY_REDRV_IF_MAX) return (true); switch (phy_data->redrv_model) { case XGBE_PHY_REDRV_MODEL_4223: if (phy_data->redrv_lane > 3) return (true); break; case XGBE_PHY_REDRV_MODEL_4227: if (phy_data->redrv_lane > 1) return (true); break; default: return (true); } return (false); } static int xgbe_phy_mdio_reset_setup(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; if (phy_data->conn_type != XGBE_CONN_TYPE_MDIO) return (0); phy_data->mdio_reset = XP_GET_BITS(pdata->pp3, XP_PROP_3, MDIO_RESET); switch (phy_data->mdio_reset) { case XGBE_MDIO_RESET_NONE: case XGBE_MDIO_RESET_I2C_GPIO: case XGBE_MDIO_RESET_INT_GPIO: break; default: axgbe_error("unsupported MDIO reset (%#x)\n", phy_data->mdio_reset); return (-EINVAL); } if (phy_data->mdio_reset == XGBE_MDIO_RESET_I2C_GPIO) { phy_data->mdio_reset_addr = XGBE_GPIO_ADDRESS_PCA9555 + XP_GET_BITS(pdata->pp3, XP_PROP_3, MDIO_RESET_I2C_ADDR); phy_data->mdio_reset_gpio = XP_GET_BITS(pdata->pp3, XP_PROP_3, MDIO_RESET_I2C_GPIO); } else if (phy_data->mdio_reset == XGBE_MDIO_RESET_INT_GPIO) phy_data->mdio_reset_gpio = XP_GET_BITS(pdata->pp3, XP_PROP_3, MDIO_RESET_INT_GPIO); return (0); } static bool xgbe_phy_port_mode_mismatch(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; switch (phy_data->port_mode) { case XGBE_PORT_MODE_BACKPLANE: if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)) return (false); break; case XGBE_PORT_MODE_BACKPLANE_2500: if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500) return (false); break; case XGBE_PORT_MODE_1000BASE_T: if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)) return (false); break; case XGBE_PORT_MODE_1000BASE_X: if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) return (false); break; case XGBE_PORT_MODE_NBASE_T: if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500)) return (false); break; case XGBE_PORT_MODE_10GBASE_T: if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)) return (false); break; case XGBE_PORT_MODE_10GBASE_R: if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) return (false); break; case XGBE_PORT_MODE_SFP: if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)) return (false); break; default: break; } return (true); } static bool xgbe_phy_conn_type_mismatch(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; switch (phy_data->port_mode) { case XGBE_PORT_MODE_BACKPLANE: case XGBE_PORT_MODE_BACKPLANE_2500: if (phy_data->conn_type == XGBE_CONN_TYPE_BACKPLANE) return (false); break; case XGBE_PORT_MODE_1000BASE_T: case XGBE_PORT_MODE_1000BASE_X: case XGBE_PORT_MODE_NBASE_T: case XGBE_PORT_MODE_10GBASE_T: case XGBE_PORT_MODE_10GBASE_R: if (phy_data->conn_type == XGBE_CONN_TYPE_MDIO) return (false); break; case XGBE_PORT_MODE_SFP: if (phy_data->conn_type == XGBE_CONN_TYPE_SFP) return (false); break; default: break; } return (true); } static bool xgbe_phy_port_enabled(struct xgbe_prv_data *pdata) { if (!XP_GET_BITS(pdata->pp0, XP_PROP_0, PORT_SPEEDS)) return (false); if (!XP_GET_BITS(pdata->pp0, XP_PROP_0, CONN_TYPE)) return (false); return (true); } static void xgbe_phy_cdr_track(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; axgbe_printf(2, "%s: an_cdr_workaround %d phy_cdr_notrack %d\n", __func__, pdata->sysctl_an_cdr_workaround, phy_data->phy_cdr_notrack); if (!pdata->sysctl_an_cdr_workaround) return; if (!phy_data->phy_cdr_notrack) return; DELAY(phy_data->phy_cdr_delay + 500); XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_VEND2_PMA_CDR_CONTROL, XGBE_PMA_CDR_TRACK_EN_MASK, XGBE_PMA_CDR_TRACK_EN_ON); phy_data->phy_cdr_notrack = 0; axgbe_printf(2, "CDR TRACK DONE\n"); } static void xgbe_phy_cdr_notrack(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; axgbe_printf(2, "%s: an_cdr_workaround %d phy_cdr_notrack %d\n", __func__, pdata->sysctl_an_cdr_workaround, phy_data->phy_cdr_notrack); if (!pdata->sysctl_an_cdr_workaround) return; if (phy_data->phy_cdr_notrack) return; XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_VEND2_PMA_CDR_CONTROL, XGBE_PMA_CDR_TRACK_EN_MASK, XGBE_PMA_CDR_TRACK_EN_OFF); xgbe_phy_rrc(pdata); phy_data->phy_cdr_notrack = 1; } static void xgbe_phy_kr_training_post(struct xgbe_prv_data *pdata) { if (!pdata->sysctl_an_cdr_track_early) xgbe_phy_cdr_track(pdata); } static void xgbe_phy_kr_training_pre(struct xgbe_prv_data *pdata) { if (pdata->sysctl_an_cdr_track_early) xgbe_phy_cdr_track(pdata); } static void xgbe_phy_an_post(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; switch (pdata->an_mode) { case XGBE_AN_MODE_CL73: case XGBE_AN_MODE_CL73_REDRV: if (phy_data->cur_mode != XGBE_MODE_KR) break; xgbe_phy_cdr_track(pdata); switch (pdata->an_result) { case XGBE_AN_READY: case XGBE_AN_COMPLETE: break; default: if (phy_data->phy_cdr_delay < XGBE_CDR_DELAY_MAX) phy_data->phy_cdr_delay += XGBE_CDR_DELAY_INC; else phy_data->phy_cdr_delay = XGBE_CDR_DELAY_INIT; break; } break; default: break; } } static void xgbe_phy_an_pre(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; switch (pdata->an_mode) { case XGBE_AN_MODE_CL73: case XGBE_AN_MODE_CL73_REDRV: if (phy_data->cur_mode != XGBE_MODE_KR) break; xgbe_phy_cdr_notrack(pdata); break; default: break; } } static void xgbe_phy_stop(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; /* If we have an external PHY, free it */ xgbe_phy_free_phy_device(pdata); /* Reset SFP data */ xgbe_phy_sfp_reset(phy_data); xgbe_phy_sfp_mod_absent(pdata); /* Reset CDR support */ xgbe_phy_cdr_track(pdata); /* Power off the PHY */ xgbe_phy_power_off(pdata); /* Stop the I2C controller */ pdata->i2c_if.i2c_stop(pdata); } static int xgbe_phy_start(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; int ret; axgbe_printf(2, "%s: redrv %d redrv_if %d start_mode %d\n", __func__, phy_data->redrv, phy_data->redrv_if, phy_data->start_mode); /* Start the I2C controller */ ret = pdata->i2c_if.i2c_start(pdata); if (ret) { axgbe_error("%s: impl i2c start ret %d\n", __func__, ret); return (ret); } /* Set the proper MDIO mode for the re-driver */ if (phy_data->redrv && !phy_data->redrv_if) { ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->redrv_addr, XGBE_MDIO_MODE_CL22); if (ret) { axgbe_error("redriver mdio port not compatible (%u)\n", phy_data->redrv_addr); return (ret); } } /* Start in highest supported mode */ xgbe_phy_set_mode(pdata, phy_data->start_mode); /* Reset CDR support */ xgbe_phy_cdr_track(pdata); /* After starting the I2C controller, we can check for an SFP */ switch (phy_data->port_mode) { case XGBE_PORT_MODE_SFP: axgbe_printf(3, "%s: calling phy detect\n", __func__); xgbe_phy_sfp_detect(pdata); break; default: break; } /* If we have an external PHY, start it */ ret = xgbe_phy_find_phy_device(pdata); if (ret) { axgbe_error("%s: impl find phy dev ret %d\n", __func__, ret); goto err_i2c; } axgbe_printf(3, "%s: impl return success\n", __func__); return (0); err_i2c: pdata->i2c_if.i2c_stop(pdata); return (ret); } static int xgbe_phy_reset(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; enum xgbe_mode cur_mode; int ret; /* Reset by power cycling the PHY */ cur_mode = phy_data->cur_mode; xgbe_phy_power_off(pdata); xgbe_phy_set_mode(pdata, cur_mode); axgbe_printf(3, "%s: mode %d\n", __func__, cur_mode); if (!phy_data->phydev) { axgbe_printf(1, "%s: no phydev\n", __func__); return (0); } /* Reset the external PHY */ ret = xgbe_phy_mdio_reset(pdata); if (ret) { axgbe_error("%s: mdio reset %d\n", __func__, ret); return (ret); } axgbe_printf(3, "%s: return success\n", __func__); return (0); } static void -axgbe_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) +axgbe_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) { struct axgbe_if_softc *sc; struct xgbe_prv_data *pdata; struct mii_data *mii; - sc = ifp->if_softc; + sc = if_getsoftc(ifp); pdata = &sc->pdata; axgbe_printf(2, "%s: Invoked\n", __func__); mtx_lock_spin(&pdata->mdio_mutex); mii = device_get_softc(pdata->axgbe_miibus); axgbe_printf(2, "%s: media_active %#x media_status %#x\n", __func__, mii->mii_media_active, mii->mii_media_status); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; mtx_unlock_spin(&pdata->mdio_mutex); } static int -axgbe_ifmedia_upd(struct ifnet *ifp) +axgbe_ifmedia_upd(if_t ifp) { struct xgbe_prv_data *pdata; struct axgbe_if_softc *sc; struct mii_data *mii; struct mii_softc *miisc; int ret; - sc = ifp->if_softc; + sc = if_getsoftc(ifp); pdata = &sc->pdata; axgbe_printf(2, "%s: Invoked\n", __func__); mtx_lock_spin(&pdata->mdio_mutex); mii = device_get_softc(pdata->axgbe_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); ret = mii_mediachg(mii); mtx_unlock_spin(&pdata->mdio_mutex); return (ret); } static void xgbe_phy_exit(struct xgbe_prv_data *pdata) { if (pdata->axgbe_miibus != NULL) device_delete_child(pdata->dev, pdata->axgbe_miibus); /* free phy_data structure */ free(pdata->phy_data, M_AXGBE); } static int xgbe_phy_init(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data; int ret; /* Initialize the global lock */ if (!mtx_initialized(&xgbe_phy_comm_lock)) mtx_init(&xgbe_phy_comm_lock, "xgbe phy common lock", NULL, MTX_DEF); /* Check if enabled */ if (!xgbe_phy_port_enabled(pdata)) { axgbe_error("device is not enabled\n"); return (-ENODEV); } /* Initialize the I2C controller */ ret = pdata->i2c_if.i2c_init(pdata); if (ret) return (ret); phy_data = malloc(sizeof(*phy_data), M_AXGBE, M_WAITOK | M_ZERO); if (!phy_data) return (-ENOMEM); pdata->phy_data = phy_data; phy_data->port_mode = XP_GET_BITS(pdata->pp0, XP_PROP_0, PORT_MODE); phy_data->port_id = XP_GET_BITS(pdata->pp0, XP_PROP_0, PORT_ID); phy_data->port_speeds = XP_GET_BITS(pdata->pp0, XP_PROP_0, PORT_SPEEDS); phy_data->conn_type = XP_GET_BITS(pdata->pp0, XP_PROP_0, CONN_TYPE); phy_data->mdio_addr = XP_GET_BITS(pdata->pp0, XP_PROP_0, MDIO_ADDR); pdata->mdio_addr = phy_data->mdio_addr; DBGPR("port mode=%u\n", phy_data->port_mode); DBGPR("port id=%u\n", phy_data->port_id); DBGPR("port speeds=%#x\n", phy_data->port_speeds); DBGPR("conn type=%u\n", phy_data->conn_type); DBGPR("mdio addr=%u\n", phy_data->mdio_addr); phy_data->redrv = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_PRESENT); phy_data->redrv_if = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_IF); phy_data->redrv_addr = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_ADDR); phy_data->redrv_lane = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_LANE); phy_data->redrv_model = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_MODEL); if (phy_data->redrv) { DBGPR("redrv present\n"); DBGPR("redrv i/f=%u\n", phy_data->redrv_if); DBGPR("redrv addr=%#x\n", phy_data->redrv_addr); DBGPR("redrv lane=%u\n", phy_data->redrv_lane); DBGPR("redrv model=%u\n", phy_data->redrv_model); } DBGPR("%s: redrv addr=%#x redrv i/f=%u\n", __func__, phy_data->redrv_addr, phy_data->redrv_if); /* Validate the connection requested */ if (xgbe_phy_conn_type_mismatch(pdata)) { axgbe_error("phy mode/connection mismatch " "(%#x/%#x)\n", phy_data->port_mode, phy_data->conn_type); return (-EINVAL); } /* Validate the mode requested */ if (xgbe_phy_port_mode_mismatch(pdata)) { axgbe_error("phy mode/speed mismatch " "(%#x/%#x)\n", phy_data->port_mode, phy_data->port_speeds); return (-EINVAL); } /* Check for and validate MDIO reset support */ ret = xgbe_phy_mdio_reset_setup(pdata); if (ret) { axgbe_error("%s, mdio_reset_setup ret %d\n", __func__, ret); return (ret); } /* Validate the re-driver information */ if (xgbe_phy_redrv_error(phy_data)) { axgbe_error("phy re-driver settings error\n"); return (-EINVAL); } pdata->kr_redrv = phy_data->redrv; /* Indicate current mode is unknown */ phy_data->cur_mode = XGBE_MODE_UNKNOWN; /* Initialize supported features. Current code does not support ethtool */ XGBE_ZERO_SUP(&pdata->phy); DBGPR("%s: port mode %d\n", __func__, phy_data->port_mode); switch (phy_data->port_mode) { /* Backplane support */ case XGBE_PORT_MODE_BACKPLANE: XGBE_SET_SUP(&pdata->phy, Autoneg); XGBE_SET_SUP(&pdata->phy, Pause); XGBE_SET_SUP(&pdata->phy, Asym_Pause); XGBE_SET_SUP(&pdata->phy, Backplane); if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { XGBE_SET_SUP(&pdata->phy, 1000baseKX_Full); phy_data->start_mode = XGBE_MODE_KX_1000; } if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) { XGBE_SET_SUP(&pdata->phy, 10000baseKR_Full); if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) XGBE_SET_SUP(&pdata->phy, 10000baseR_FEC); phy_data->start_mode = XGBE_MODE_KR; } phy_data->phydev_mode = XGBE_MDIO_MODE_NONE; break; case XGBE_PORT_MODE_BACKPLANE_2500: XGBE_SET_SUP(&pdata->phy, Pause); XGBE_SET_SUP(&pdata->phy, Asym_Pause); XGBE_SET_SUP(&pdata->phy, Backplane); XGBE_SET_SUP(&pdata->phy, 2500baseX_Full); phy_data->start_mode = XGBE_MODE_KX_2500; phy_data->phydev_mode = XGBE_MDIO_MODE_NONE; break; /* MDIO 1GBase-T support */ case XGBE_PORT_MODE_1000BASE_T: XGBE_SET_SUP(&pdata->phy, Autoneg); XGBE_SET_SUP(&pdata->phy, Pause); XGBE_SET_SUP(&pdata->phy, Asym_Pause); XGBE_SET_SUP(&pdata->phy, TP); if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { XGBE_SET_SUP(&pdata->phy, 100baseT_Full); phy_data->start_mode = XGBE_MODE_SGMII_100; } if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { XGBE_SET_SUP(&pdata->phy, 1000baseT_Full); phy_data->start_mode = XGBE_MODE_SGMII_1000; } phy_data->phydev_mode = XGBE_MDIO_MODE_CL22; break; /* MDIO Base-X support */ case XGBE_PORT_MODE_1000BASE_X: XGBE_SET_SUP(&pdata->phy, Autoneg); XGBE_SET_SUP(&pdata->phy, Pause); XGBE_SET_SUP(&pdata->phy, Asym_Pause); XGBE_SET_SUP(&pdata->phy, FIBRE); XGBE_SET_SUP(&pdata->phy, 1000baseX_Full); phy_data->start_mode = XGBE_MODE_X; phy_data->phydev_mode = XGBE_MDIO_MODE_CL22; break; /* MDIO NBase-T support */ case XGBE_PORT_MODE_NBASE_T: XGBE_SET_SUP(&pdata->phy, Autoneg); XGBE_SET_SUP(&pdata->phy, Pause); XGBE_SET_SUP(&pdata->phy, Asym_Pause); XGBE_SET_SUP(&pdata->phy, TP); if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { XGBE_SET_SUP(&pdata->phy, 100baseT_Full); phy_data->start_mode = XGBE_MODE_SGMII_100; } if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { XGBE_SET_SUP(&pdata->phy, 1000baseT_Full); phy_data->start_mode = XGBE_MODE_SGMII_1000; } if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500) { XGBE_SET_SUP(&pdata->phy, 2500baseT_Full); phy_data->start_mode = XGBE_MODE_KX_2500; } phy_data->phydev_mode = XGBE_MDIO_MODE_CL45; break; /* 10GBase-T support */ case XGBE_PORT_MODE_10GBASE_T: XGBE_SET_SUP(&pdata->phy, Autoneg); XGBE_SET_SUP(&pdata->phy, Pause); XGBE_SET_SUP(&pdata->phy, Asym_Pause); XGBE_SET_SUP(&pdata->phy, TP); if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { XGBE_SET_SUP(&pdata->phy, 100baseT_Full); phy_data->start_mode = XGBE_MODE_SGMII_100; } if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { XGBE_SET_SUP(&pdata->phy, 1000baseT_Full); phy_data->start_mode = XGBE_MODE_SGMII_1000; } if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) { XGBE_SET_SUP(&pdata->phy, 10000baseT_Full); phy_data->start_mode = XGBE_MODE_KR; } phy_data->phydev_mode = XGBE_MDIO_MODE_CL45; break; /* 10GBase-R support */ case XGBE_PORT_MODE_10GBASE_R: XGBE_SET_SUP(&pdata->phy, Autoneg); XGBE_SET_SUP(&pdata->phy, Pause); XGBE_SET_SUP(&pdata->phy, Asym_Pause); XGBE_SET_SUP(&pdata->phy, FIBRE); XGBE_SET_SUP(&pdata->phy, 10000baseSR_Full); XGBE_SET_SUP(&pdata->phy, 10000baseLR_Full); XGBE_SET_SUP(&pdata->phy, 10000baseLRM_Full); XGBE_SET_SUP(&pdata->phy, 10000baseER_Full); if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) XGBE_SET_SUP(&pdata->phy, 10000baseR_FEC); phy_data->start_mode = XGBE_MODE_SFI; phy_data->phydev_mode = XGBE_MDIO_MODE_NONE; break; /* SFP support */ case XGBE_PORT_MODE_SFP: XGBE_SET_SUP(&pdata->phy, Autoneg); XGBE_SET_SUP(&pdata->phy, Pause); XGBE_SET_SUP(&pdata->phy, Asym_Pause); XGBE_SET_SUP(&pdata->phy, TP); XGBE_SET_SUP(&pdata->phy, FIBRE); if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) phy_data->start_mode = XGBE_MODE_SGMII_100; if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) phy_data->start_mode = XGBE_MODE_SGMII_1000; if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) phy_data->start_mode = XGBE_MODE_SFI; phy_data->phydev_mode = XGBE_MDIO_MODE_CL22; xgbe_phy_sfp_setup(pdata); DBGPR("%s: start %d mode %d adv 0x%x\n", __func__, phy_data->start_mode, phy_data->phydev_mode, pdata->phy.advertising); break; default: return (-EINVAL); } axgbe_printf(2, "%s: start %d mode %d adv 0x%x\n", __func__, phy_data->start_mode, phy_data->phydev_mode, pdata->phy.advertising); DBGPR("%s: conn type %d mode %d\n", __func__, phy_data->conn_type, phy_data->phydev_mode); if ((phy_data->conn_type & XGBE_CONN_TYPE_MDIO) && (phy_data->phydev_mode != XGBE_MDIO_MODE_NONE)) { ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->mdio_addr, phy_data->phydev_mode); if (ret) { axgbe_error("mdio port/clause not compatible (%d/%u)\n", phy_data->mdio_addr, phy_data->phydev_mode); return (-EINVAL); } } if (phy_data->redrv && !phy_data->redrv_if) { ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->redrv_addr, XGBE_MDIO_MODE_CL22); if (ret) { axgbe_error("redriver mdio port not compatible (%u)\n", phy_data->redrv_addr); return (-EINVAL); } } phy_data->phy_cdr_delay = XGBE_CDR_DELAY_INIT; if (phy_data->port_mode != XGBE_PORT_MODE_SFP) { ret = mii_attach(pdata->dev, &pdata->axgbe_miibus, pdata->netdev, (ifm_change_cb_t)axgbe_ifmedia_upd, (ifm_stat_cb_t)axgbe_ifmedia_sts, BMSR_DEFCAPMASK, pdata->mdio_addr, MII_OFFSET_ANY, MIIF_FORCEANEG); if (ret){ axgbe_printf(2, "mii attach failed with err=(%d)\n", ret); return (-EINVAL); } } DBGPR("%s: return success\n", __func__); return (0); } void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *phy_if) { struct xgbe_phy_impl_if *phy_impl = &phy_if->phy_impl; phy_impl->init = xgbe_phy_init; phy_impl->exit = xgbe_phy_exit; phy_impl->reset = xgbe_phy_reset; phy_impl->start = xgbe_phy_start; phy_impl->stop = xgbe_phy_stop; phy_impl->link_status = xgbe_phy_link_status; phy_impl->valid_speed = xgbe_phy_valid_speed; phy_impl->use_mode = xgbe_phy_use_mode; phy_impl->set_mode = xgbe_phy_set_mode; phy_impl->get_mode = xgbe_phy_get_mode; phy_impl->switch_mode = xgbe_phy_switch_mode; phy_impl->cur_mode = xgbe_phy_cur_mode; phy_impl->get_type = xgbe_phy_get_type; phy_impl->an_mode = xgbe_phy_an_mode; phy_impl->an_config = xgbe_phy_an_config; phy_impl->an_advertising = xgbe_phy_an_advertising; phy_impl->an_outcome = xgbe_phy_an_outcome; phy_impl->an_pre = xgbe_phy_an_pre; phy_impl->an_post = xgbe_phy_an_post; phy_impl->kr_training_pre = xgbe_phy_kr_training_pre; phy_impl->kr_training_post = xgbe_phy_kr_training_post; phy_impl->module_info = xgbe_phy_module_info; phy_impl->module_eeprom = xgbe_phy_module_eeprom; } diff --git a/sys/dev/axgbe/xgbe.h b/sys/dev/axgbe/xgbe.h index 85b4c0c5c5d0..32bac28dd093 100644 --- a/sys/dev/axgbe/xgbe.h +++ b/sys/dev/axgbe/xgbe.h @@ -1,1364 +1,1364 @@ /* * AMD 10Gb Ethernet driver * * Copyright (c) 2014-2016,2020 Advanced Micro Devices, Inc. * * This file is available to you under your choice of the following two * licenses: * * License 1: GPLv2 * * This file is free software; you may copy, redistribute and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or (at * your option) any later version. * * This file is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * This file incorporates work covered by the following copyright and * permission notice: * The Synopsys DWC ETHER XGMAC Software Driver and documentation * (hereinafter "Software") is an unsupported proprietary work of Synopsys, * Inc. unless otherwise expressly agreed to in writing between Synopsys * and you. * * The Software IS NOT an item of Licensed Software or Licensed Product * under any End User Software License Agreement or Agreement for Licensed * Product with Synopsys or any supplement thereto. Permission is hereby * granted, free of charge, to any person obtaining a copy of this software * annotated with this license and the Software, to deal in the Software * without restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * * License 2: Modified BSD * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Advanced Micro Devices, Inc. nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * This file incorporates work covered by the following copyright and * permission notice: * The Synopsys DWC ETHER XGMAC Software Driver and documentation * (hereinafter "Software") is an unsupported proprietary work of Synopsys, * Inc. unless otherwise expressly agreed to in writing between Synopsys * and you. * * The Software IS NOT an item of Licensed Software or Licensed Product * under any End User Software License Agreement or Agreement for Licensed * Product with Synopsys or any supplement thereto. Permission is hereby * granted, free of charge, to any person obtaining a copy of this software * annotated with this license and the Software, to deal in the Software * without restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __XGBE_H__ #define __XGBE_H__ #include #include #include #include #include #include #include #include #include #include "xgbe_osdep.h" /* From linux/dcbnl.h */ #define IEEE_8021QAZ_MAX_TCS 8 #define XGBE_DRV_NAME "amd-xgbe" #define XGBE_DRV_VERSION "1.0.3" #define XGBE_DRV_DESC "AMD 10 Gigabit Ethernet Driver" /* Descriptor related defines */ #define XGBE_TX_DESC_CNT 512 #define XGBE_TX_DESC_MIN_FREE (XGBE_TX_DESC_CNT >> 3) #define XGBE_TX_DESC_MAX_PROC (XGBE_TX_DESC_CNT >> 1) #define XGBE_RX_DESC_CNT 512 #define XGBE_TX_DESC_CNT_MIN 64 #define XGBE_TX_DESC_CNT_MAX 4096 #define XGBE_RX_DESC_CNT_MIN 64 #define XGBE_RX_DESC_CNT_MAX 4096 #define XGBE_TX_DESC_CNT_DEFAULT 512 #define XGBE_RX_DESC_CNT_DEFAULT 512 #define XGBE_TX_MAX_BUF_SIZE (0x3fff & ~(64 - 1)) /* Descriptors required for maximum contiguous TSO/GSO packet */ #define XGBE_TX_MAX_SPLIT ((GSO_MAX_SIZE / XGBE_TX_MAX_BUF_SIZE) + 1) /* Maximum possible descriptors needed for an SKB: * - Maximum number of SKB frags * - Maximum descriptors for contiguous TSO/GSO packet * - Possible context descriptor * - Possible TSO header descriptor */ #define XGBE_TX_MAX_DESCS (MAX_SKB_FRAGS + XGBE_TX_MAX_SPLIT + 2) #define XGBE_RX_MIN_BUF_SIZE 1522 #define XGBE_RX_BUF_ALIGN 64 #define XGBE_SKB_ALLOC_SIZE 256 #define XGBE_SPH_HDSMS_SIZE 2 /* Keep in sync with SKB_ALLOC_SIZ */ #define XGBE_MAX_DMA_CHANNELS 16 #define XGBE_MAX_QUEUES 16 #define XGBE_PRIORITY_QUEUES 8 #define XGBE_DMA_STOP_TIMEOUT 5 /* DMA cache settings - Outer sharable, write-back, write-allocate */ #define XGBE_DMA_OS_ARCR 0x002b2b2b #define XGBE_DMA_OS_AWCR 0x2f2f2f2f /* DMA cache settings - System, no caches used */ #define XGBE_DMA_SYS_ARCR 0x00303030 #define XGBE_DMA_SYS_AWCR 0x30303030 /* DMA cache settings - PCI device */ #define XGBE_DMA_PCI_ARCR 0x00000003 #define XGBE_DMA_PCI_AWCR 0x13131313 #define XGBE_DMA_PCI_AWARCR 0x00000313 /* DMA channel interrupt modes */ #define XGBE_IRQ_MODE_EDGE 0 #define XGBE_IRQ_MODE_LEVEL 1 #define XGMAC_MIN_PACKET 60 #define XGMAC_STD_PACKET_MTU 1500 #define XGMAC_MAX_STD_PACKET 1518 #define XGMAC_JUMBO_PACKET_MTU 9000 #define XGMAC_MAX_JUMBO_PACKET 9018 #define XGMAC_ETH_PREAMBLE (12 + 8) /* Inter-frame gap + preamble */ #define XGMAC_PFC_DATA_LEN 46 #define XGMAC_PFC_DELAYS 14000 #define XGMAC_PRIO_QUEUES(_cnt) \ min_t(unsigned int, IEEE_8021QAZ_MAX_TCS, (_cnt)) /* Common property names */ #define XGBE_MAC_ADDR_PROPERTY "mac-address" #define XGBE_PHY_MODE_PROPERTY "phy-mode" #define XGBE_DMA_IRQS_PROPERTY "amd,per-channel-interrupt" #define XGBE_SPEEDSET_PROPERTY "amd,speed-set" #define XGBE_BLWC_PROPERTY "amd,serdes-blwc" #define XGBE_CDR_RATE_PROPERTY "amd,serdes-cdr-rate" #define XGBE_PQ_SKEW_PROPERTY "amd,serdes-pq-skew" #define XGBE_TX_AMP_PROPERTY "amd,serdes-tx-amp" #define XGBE_DFE_CFG_PROPERTY "amd,serdes-dfe-tap-config" #define XGBE_DFE_ENA_PROPERTY "amd,serdes-dfe-tap-enable" /* Device-tree clock names */ #define XGBE_DMA_CLOCK "dma_clk" #define XGBE_PTP_CLOCK "ptp_clk" /* ACPI property names */ #define XGBE_ACPI_DMA_FREQ "amd,dma-freq" #define XGBE_ACPI_PTP_FREQ "amd,ptp-freq" /* PCI BAR mapping */ #define XGBE_XGMAC_BAR 0 #define XGBE_XPCS_BAR 1 #define XGBE_MAC_PROP_OFFSET 0x1d000 #define XGBE_I2C_CTRL_OFFSET 0x1e000 /* PCI MSI/MSIx support */ #define XGBE_MSI_BASE_COUNT 4 #define XGBE_MSI_MIN_COUNT (XGBE_MSI_BASE_COUNT + 1) /* PCI clock frequencies */ #define XGBE_V2_DMA_CLOCK_FREQ 500000000 /* 500 MHz */ #define XGBE_V2_PTP_CLOCK_FREQ 125000000 /* 125 MHz */ /* Timestamp support - values based on 50MHz PTP clock * 50MHz => 20 nsec */ #define XGBE_TSTAMP_SSINC 20 #define XGBE_TSTAMP_SNSINC 0 /* Driver PMT macros */ #define XGMAC_DRIVER_CONTEXT 1 #define XGMAC_IOCTL_CONTEXT 2 #define XGMAC_FIFO_MIN_ALLOC 2048 #define XGMAC_FIFO_UNIT 256 #define XGMAC_FIFO_ALIGN(_x) \ (((_x) + XGMAC_FIFO_UNIT - 1) & ~(XGMAC_FIFO_UNIT - 1)) #define XGMAC_FIFO_FC_OFF 2048 #define XGMAC_FIFO_FC_MIN 4096 #define XGBE_FIFO_MAX 81920 #define XGBE_TC_MIN_QUANTUM 10 /* Helper macro for descriptor handling * Always use XGBE_GET_DESC_DATA to access the descriptor data * since the index is free-running and needs to be and-ed * with the descriptor count value of the ring to index to * the proper descriptor data. */ #define XGBE_GET_DESC_DATA(_ring, _idx) \ ((_ring)->rdata + \ ((_idx) & ((_ring)->rdesc_count - 1))) /* Default coalescing parameters */ #define XGMAC_INIT_DMA_TX_USECS 1000 #define XGMAC_INIT_DMA_TX_FRAMES 25 #define XGMAC_MAX_DMA_RIWT 0xff #define XGMAC_INIT_DMA_RX_USECS 30 #define XGMAC_INIT_DMA_RX_FRAMES 25 /* Flow control queue count */ #define XGMAC_MAX_FLOW_CONTROL_QUEUES 8 /* Flow control threshold units */ #define XGMAC_FLOW_CONTROL_UNIT 512 #define XGMAC_FLOW_CONTROL_ALIGN(_x) \ (((_x) + XGMAC_FLOW_CONTROL_UNIT - 1) & ~(XGMAC_FLOW_CONTROL_UNIT - 1)) #define XGMAC_FLOW_CONTROL_VALUE(_x) \ (((_x) < 1024) ? 0 : ((_x) / XGMAC_FLOW_CONTROL_UNIT) - 2) #define XGMAC_FLOW_CONTROL_MAX 33280 /* Maximum MAC address hash table size (256 bits = 8 bytes) */ #define XGBE_MAC_HASH_TABLE_SIZE 8 /* Receive Side Scaling */ #define XGBE_RSS_HASH_KEY_SIZE 40 #define XGBE_RSS_MAX_TABLE_SIZE 256 #define XGBE_RSS_LOOKUP_TABLE_TYPE 0 #define XGBE_RSS_HASH_KEY_TYPE 1 /* Auto-negotiation */ #define XGBE_AN_MS_TIMEOUT 500 #define XGBE_LINK_TIMEOUT 10 #define XGBE_SGMII_AN_LINK_STATUS BIT(1) #define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) #define XGBE_SGMII_AN_LINK_SPEED_100 0x04 #define XGBE_SGMII_AN_LINK_SPEED_1000 0x08 #define XGBE_SGMII_AN_LINK_DUPLEX BIT(4) /* ECC correctable error notification window (seconds) */ #define XGBE_ECC_LIMIT 60 #define XGBE_AN_INT_CMPLT 0x01 #define XGBE_AN_INC_LINK 0x02 #define XGBE_AN_PG_RCV 0x04 #define XGBE_AN_INT_MASK 0x07 #define XGBE_SGMII_AN_LINK_STATUS BIT(1) #define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) #define XGBE_SGMII_AN_LINK_SPEED_100 0x04 #define XGBE_SGMII_AN_LINK_SPEED_1000 0x08 #define XGBE_SGMII_AN_LINK_DUPLEX BIT(4) /* Rate-change complete wait/retry count */ #define XGBE_RATECHANGE_COUNT 500 /* Default SerDes settings */ #define XGBE_SPEED_10000_BLWC 0 #define XGBE_SPEED_10000_CDR 0x7 #define XGBE_SPEED_10000_PLL 0x1 #define XGBE_SPEED_10000_PQ 0x12 #define XGBE_SPEED_10000_RATE 0x0 #define XGBE_SPEED_10000_TXAMP 0xa #define XGBE_SPEED_10000_WORD 0x7 #define XGBE_SPEED_10000_DFE_TAP_CONFIG 0x1 #define XGBE_SPEED_10000_DFE_TAP_ENABLE 0x7f #define XGBE_SPEED_2500_BLWC 1 #define XGBE_SPEED_2500_CDR 0x2 #define XGBE_SPEED_2500_PLL 0x0 #define XGBE_SPEED_2500_PQ 0xa #define XGBE_SPEED_2500_RATE 0x1 #define XGBE_SPEED_2500_TXAMP 0xf #define XGBE_SPEED_2500_WORD 0x1 #define XGBE_SPEED_2500_DFE_TAP_CONFIG 0x3 #define XGBE_SPEED_2500_DFE_TAP_ENABLE 0x0 #define XGBE_SPEED_1000_BLWC 1 #define XGBE_SPEED_1000_CDR 0x2 #define XGBE_SPEED_1000_PLL 0x0 #define XGBE_SPEED_1000_PQ 0xa #define XGBE_SPEED_1000_RATE 0x3 #define XGBE_SPEED_1000_TXAMP 0xf #define XGBE_SPEED_1000_WORD 0x1 #define XGBE_SPEED_1000_DFE_TAP_CONFIG 0x3 #define XGBE_SPEED_1000_DFE_TAP_ENABLE 0x0 /* TSO related macros */ #define XGBE_TSO_MAX_SIZE UINT16_MAX /* MDIO port types */ #define XGMAC_MAX_C22_PORT 3 /* Link mode bit operations */ #define XGBE_ZERO_SUP(_phy) \ ((_phy)->supported = 0) #define XGBE_SET_SUP(_phy, _mode) \ ((_phy)->supported |= SUPPORTED_##_mode) #define XGBE_CLR_SUP(_phy, _mode) \ ((_phy)->supported &= ~SUPPORTED_##_mode) #define XGBE_IS_SUP(_phy, _mode) \ ((_phy)->supported & SUPPORTED_##_mode) #define XGBE_ZERO_ADV(_phy) \ ((_phy)->advertising = 0) #define XGBE_SET_ADV(_phy, _mode) \ ((_phy)->advertising |= ADVERTISED_##_mode) #define XGBE_CLR_ADV(_phy, _mode) \ ((_phy)->advertising &= ~ADVERTISED_##_mode) #define XGBE_ADV(_phy, _mode) \ ((_phy)->advertising & ADVERTISED_##_mode) #define XGBE_ZERO_LP_ADV(_phy) \ ((_phy)->lp_advertising = 0) #define XGBE_SET_LP_ADV(_phy, _mode) \ ((_phy)->lp_advertising |= ADVERTISED_##_mode) #define XGBE_CLR_LP_ADV(_phy, _mode) \ ((_phy)->lp_advertising &= ~ADVERTISED_##_mode) #define XGBE_LP_ADV(_phy, _mode) \ ((_phy)->lp_advertising & ADVERTISED_##_mode) #define XGBE_LM_COPY(_dphy, _dname, _sphy, _sname) \ ((_dphy)->_dname = (_sphy)->_sname) struct xgbe_prv_data; struct xgbe_packet_data { struct mbuf *m; unsigned int attributes; unsigned int errors; unsigned int rdesc_count; unsigned int length; unsigned int header_len; unsigned int tcp_header_len; unsigned int tcp_payload_len; unsigned short mss; unsigned short vlan_ctag; uint64_t rx_tstamp; unsigned int tx_packets; unsigned int tx_bytes; uint32_t rss_hash; uint32_t rss_hash_type; }; /* Common Rx and Tx descriptor mapping */ struct xgbe_ring_desc { __le32 desc0; __le32 desc1; __le32 desc2; __le32 desc3; }; /* Tx-related ring data */ struct xgbe_tx_ring_data { unsigned int packets; /* BQL packet count */ unsigned int bytes; /* BQL byte count */ }; /* Rx-related ring data */ struct xgbe_rx_ring_data { unsigned short hdr_len; /* Length of received header */ unsigned short len; /* Length of received packet */ }; /* Structure used to hold information related to the descriptor * and the packet associated with the descriptor (always use * use the XGBE_GET_DESC_DATA macro to access this data from the ring) */ struct xgbe_ring_data { struct xgbe_ring_desc *rdesc; /* Virtual address of descriptor */ bus_addr_t rdata_paddr; struct xgbe_tx_ring_data tx; /* Tx-related data */ struct xgbe_rx_ring_data rx; /* Rx-related data */ /* Incomplete receive save location. If the budget is exhausted * or the last descriptor (last normal descriptor or a following * context descriptor) has not been DMA'd yet the current state * of the receive processing needs to be saved. */ unsigned int state_saved; struct { struct mbuf *m; unsigned int len; unsigned int error; } state; }; struct xgbe_ring { /* Ring lock - used just for TX rings at the moment */ spinlock_t lock; /* Per packet related information */ struct xgbe_packet_data packet_data; /* Virtual/DMA addresses and count of allocated descriptor memory */ struct xgbe_ring_desc *rdesc; bus_addr_t rdesc_paddr; unsigned int rdesc_count; /* Array of descriptor data corresponding the descriptor memory * (always use the XGBE_GET_DESC_DATA macro to access this data) */ struct xgbe_ring_data *rdata; /* Ring index values * cur - Tx: index of descriptor to be used for current transfer * Rx: index of descriptor to check for packet availability * dirty - Tx: index of descriptor to check for transfer complete * Rx: index of descriptor to check for buffer reallocation */ unsigned int cur; unsigned int dirty; /* Coalesce frame count used for interrupt bit setting */ unsigned int coalesce_count; union { struct { unsigned int queue_stopped; unsigned int xmit_more; unsigned short cur_mss; unsigned short cur_vlan_ctag; } tx; }; uint16_t prev_pidx; uint8_t prev_count; } __aligned(CACHE_LINE_SIZE); /* Structure used to describe the descriptor rings associated with * a DMA channel. */ struct xgbe_channel { char name[16]; /* Address of private data area for device */ struct xgbe_prv_data *pdata; /* Queue index and base address of queue's DMA registers */ unsigned int queue_index; bus_space_tag_t dma_tag; bus_space_handle_t dma_handle; int dma_irq_rid; /* Per channel interrupt irq number */ struct resource *dma_irq_res; void *dma_irq_tag; /* Per channel interrupt enablement tracker */ unsigned int curr_ier; unsigned int saved_ier; struct xgbe_ring *tx_ring; struct xgbe_ring *rx_ring; } __aligned(CACHE_LINE_SIZE); enum xgbe_state { XGBE_DOWN, XGBE_LINK_INIT, XGBE_LINK_ERR, XGBE_STOPPED, }; enum xgbe_int { XGMAC_INT_DMA_CH_SR_TI, XGMAC_INT_DMA_CH_SR_TPS, XGMAC_INT_DMA_CH_SR_TBU, XGMAC_INT_DMA_CH_SR_RI, XGMAC_INT_DMA_CH_SR_RBU, XGMAC_INT_DMA_CH_SR_RPS, XGMAC_INT_DMA_CH_SR_TI_RI, XGMAC_INT_DMA_CH_SR_FBE, XGMAC_INT_DMA_ALL, }; enum xgbe_int_state { XGMAC_INT_STATE_SAVE, XGMAC_INT_STATE_RESTORE, }; enum xgbe_ecc_sec { XGBE_ECC_SEC_TX, XGBE_ECC_SEC_RX, XGBE_ECC_SEC_DESC, }; enum xgbe_speed { XGBE_SPEED_1000 = 0, XGBE_SPEED_2500, XGBE_SPEED_10000, XGBE_SPEEDS, }; enum xgbe_xpcs_access { XGBE_XPCS_ACCESS_V1 = 0, XGBE_XPCS_ACCESS_V2, }; enum xgbe_an_mode { XGBE_AN_MODE_CL73 = 0, XGBE_AN_MODE_CL73_REDRV, XGBE_AN_MODE_CL37, XGBE_AN_MODE_CL37_SGMII, XGBE_AN_MODE_NONE, }; enum xgbe_an { XGBE_AN_READY = 0, XGBE_AN_PAGE_RECEIVED, XGBE_AN_INCOMPAT_LINK, XGBE_AN_COMPLETE, XGBE_AN_NO_LINK, XGBE_AN_ERROR, }; enum xgbe_rx { XGBE_RX_BPA = 0, XGBE_RX_XNP, XGBE_RX_COMPLETE, XGBE_RX_ERROR, }; enum xgbe_mode { XGBE_MODE_KR = 0, XGBE_MODE_KX, XGBE_MODE_KX_1000, XGBE_MODE_KX_2500, XGBE_MODE_X, XGBE_MODE_SGMII_100, XGBE_MODE_SGMII_1000, XGBE_MODE_SFI, XGBE_MODE_UNKNOWN, }; enum xgbe_speedset { XGBE_SPEEDSET_1000_10000 = 0, XGBE_SPEEDSET_2500_10000, }; enum xgbe_mdio_mode { XGBE_MDIO_MODE_NONE = 0, XGBE_MDIO_MODE_CL22, XGBE_MDIO_MODE_CL45, }; struct xgbe_phy { uint32_t supported; uint32_t advertising; uint32_t lp_advertising; int address; int autoneg; int speed; int duplex; int link; int pause_autoneg; int tx_pause; int rx_pause; int pause; int asym_pause; }; enum xgbe_i2c_cmd { XGBE_I2C_CMD_READ = 0, XGBE_I2C_CMD_WRITE, }; struct xgbe_i2c_op { enum xgbe_i2c_cmd cmd; unsigned int target; void *buf; unsigned int len; }; struct xgbe_i2c_op_state { struct xgbe_i2c_op *op; unsigned int tx_len; unsigned char *tx_buf; unsigned int rx_len; unsigned char *rx_buf; unsigned int tx_abort_source; int ret; }; struct xgbe_i2c { unsigned int started; unsigned int max_speed_mode; unsigned int rx_fifo_size; unsigned int tx_fifo_size; struct xgbe_i2c_op_state op_state; }; struct xgbe_mmc_stats { /* Tx Stats */ uint64_t txoctetcount_gb; uint64_t txframecount_gb; uint64_t txbroadcastframes_g; uint64_t txmulticastframes_g; uint64_t tx64octets_gb; uint64_t tx65to127octets_gb; uint64_t tx128to255octets_gb; uint64_t tx256to511octets_gb; uint64_t tx512to1023octets_gb; uint64_t tx1024tomaxoctets_gb; uint64_t txunicastframes_gb; uint64_t txmulticastframes_gb; uint64_t txbroadcastframes_gb; uint64_t txunderflowerror; uint64_t txoctetcount_g; uint64_t txframecount_g; uint64_t txpauseframes; uint64_t txvlanframes_g; /* Rx Stats */ uint64_t rxframecount_gb; uint64_t rxoctetcount_gb; uint64_t rxoctetcount_g; uint64_t rxbroadcastframes_g; uint64_t rxmulticastframes_g; uint64_t rxcrcerror; uint64_t rxrunterror; uint64_t rxjabbererror; uint64_t rxundersize_g; uint64_t rxoversize_g; uint64_t rx64octets_gb; uint64_t rx65to127octets_gb; uint64_t rx128to255octets_gb; uint64_t rx256to511octets_gb; uint64_t rx512to1023octets_gb; uint64_t rx1024tomaxoctets_gb; uint64_t rxunicastframes_g; uint64_t rxlengtherror; uint64_t rxoutofrangetype; uint64_t rxpauseframes; uint64_t rxfifooverflow; uint64_t rxvlanframes_gb; uint64_t rxwatchdogerror; }; struct xgbe_ext_stats { uint64_t tx_tso_packets; uint64_t rx_split_header_packets; uint64_t rx_buffer_unavailable; uint64_t txq_packets[XGBE_MAX_DMA_CHANNELS]; uint64_t txq_bytes[XGBE_MAX_DMA_CHANNELS]; uint64_t rxq_packets[XGBE_MAX_DMA_CHANNELS]; uint64_t rxq_bytes[XGBE_MAX_DMA_CHANNELS]; uint64_t tx_vxlan_packets; uint64_t rx_vxlan_packets; uint64_t rx_csum_errors; uint64_t rx_vxlan_csum_errors; }; struct xgbe_hw_if { int (*tx_complete)(struct xgbe_ring_desc *); int (*set_mac_address)(struct xgbe_prv_data *, uint8_t *addr); int (*config_rx_mode)(struct xgbe_prv_data *); int (*enable_rx_csum)(struct xgbe_prv_data *); int (*disable_rx_csum)(struct xgbe_prv_data *); int (*enable_rx_vlan_stripping)(struct xgbe_prv_data *); int (*disable_rx_vlan_stripping)(struct xgbe_prv_data *); int (*enable_rx_vlan_filtering)(struct xgbe_prv_data *); int (*disable_rx_vlan_filtering)(struct xgbe_prv_data *); int (*update_vlan_hash_table)(struct xgbe_prv_data *); int (*read_mmd_regs)(struct xgbe_prv_data *, int, int); void (*write_mmd_regs)(struct xgbe_prv_data *, int, int, int); int (*set_speed)(struct xgbe_prv_data *, int); int (*set_ext_mii_mode)(struct xgbe_prv_data *, unsigned int, enum xgbe_mdio_mode); int (*read_ext_mii_regs)(struct xgbe_prv_data *, int, int); int (*write_ext_mii_regs)(struct xgbe_prv_data *, int, int, uint16_t); int (*set_gpio)(struct xgbe_prv_data *, unsigned int); int (*clr_gpio)(struct xgbe_prv_data *, unsigned int); void (*enable_tx)(struct xgbe_prv_data *); void (*disable_tx)(struct xgbe_prv_data *); void (*enable_rx)(struct xgbe_prv_data *); void (*disable_rx)(struct xgbe_prv_data *); void (*powerup_tx)(struct xgbe_prv_data *); void (*powerdown_tx)(struct xgbe_prv_data *); void (*powerup_rx)(struct xgbe_prv_data *); void (*powerdown_rx)(struct xgbe_prv_data *); int (*init)(struct xgbe_prv_data *); int (*exit)(struct xgbe_prv_data *); int (*enable_int)(struct xgbe_channel *, enum xgbe_int); int (*disable_int)(struct xgbe_channel *, enum xgbe_int); int (*dev_read)(struct xgbe_channel *); void (*tx_desc_init)(struct xgbe_channel *); void (*rx_desc_init)(struct xgbe_channel *); void (*tx_desc_reset)(struct xgbe_ring_data *); int (*is_last_desc)(struct xgbe_ring_desc *); int (*is_context_desc)(struct xgbe_ring_desc *); /* For FLOW ctrl */ int (*config_tx_flow_control)(struct xgbe_prv_data *); int (*config_rx_flow_control)(struct xgbe_prv_data *); /* For RX coalescing */ int (*config_rx_coalesce)(struct xgbe_prv_data *); int (*config_tx_coalesce)(struct xgbe_prv_data *); unsigned int (*usec_to_riwt)(struct xgbe_prv_data *, unsigned int); unsigned int (*riwt_to_usec)(struct xgbe_prv_data *, unsigned int); /* For RX and TX threshold config */ int (*config_rx_threshold)(struct xgbe_prv_data *, unsigned int); int (*config_tx_threshold)(struct xgbe_prv_data *, unsigned int); /* For RX and TX Store and Forward Mode config */ int (*config_rsf_mode)(struct xgbe_prv_data *, unsigned int); int (*config_tsf_mode)(struct xgbe_prv_data *, unsigned int); /* For TX DMA Operate on Second Frame config */ int (*config_osp_mode)(struct xgbe_prv_data *); /* For MMC statistics */ void (*rx_mmc_int)(struct xgbe_prv_data *); void (*tx_mmc_int)(struct xgbe_prv_data *); void (*read_mmc_stats)(struct xgbe_prv_data *); /* For Receive Side Scaling */ int (*enable_rss)(struct xgbe_prv_data *); int (*disable_rss)(struct xgbe_prv_data *); int (*set_rss_hash_key)(struct xgbe_prv_data *, const uint8_t *); int (*set_rss_lookup_table)(struct xgbe_prv_data *, const uint32_t *); }; /* This structure represents implementation specific routines for an * implementation of a PHY. All routines are required unless noted below. * Optional routines: * an_pre, an_post * kr_training_pre, kr_training_post * module_info, module_eeprom */ struct xgbe_phy_impl_if { /* Perform Setup/teardown actions */ int (*init)(struct xgbe_prv_data *); void (*exit)(struct xgbe_prv_data *); /* Perform start/stop specific actions */ int (*reset)(struct xgbe_prv_data *); int (*start)(struct xgbe_prv_data *); void (*stop)(struct xgbe_prv_data *); /* Return the link status */ int (*link_status)(struct xgbe_prv_data *, int *); /* Indicate if a particular speed is valid */ bool (*valid_speed)(struct xgbe_prv_data *, int); /* Check if the specified mode can/should be used */ bool (*use_mode)(struct xgbe_prv_data *, enum xgbe_mode); /* Switch the PHY into various modes */ void (*set_mode)(struct xgbe_prv_data *, enum xgbe_mode); /* Retrieve mode needed for a specific speed */ enum xgbe_mode (*get_mode)(struct xgbe_prv_data *, int); /* Retrieve new/next mode when trying to auto-negotiate */ enum xgbe_mode (*switch_mode)(struct xgbe_prv_data *); /* Retrieve current mode */ enum xgbe_mode (*cur_mode)(struct xgbe_prv_data *); /* Retrieve interface sub-type */ void (*get_type)(struct xgbe_prv_data *, struct ifmediareq *); /* Retrieve current auto-negotiation mode */ enum xgbe_an_mode (*an_mode)(struct xgbe_prv_data *); /* Configure auto-negotiation settings */ int (*an_config)(struct xgbe_prv_data *); /* Set/override auto-negotiation advertisement settings */ void (*an_advertising)(struct xgbe_prv_data *, struct xgbe_phy *); /* Process results of auto-negotiation */ enum xgbe_mode (*an_outcome)(struct xgbe_prv_data *); /* Pre/Post auto-negotiation support */ void (*an_pre)(struct xgbe_prv_data *); void (*an_post)(struct xgbe_prv_data *); /* Pre/Post KR training enablement support */ void (*kr_training_pre)(struct xgbe_prv_data *); void (*kr_training_post)(struct xgbe_prv_data *); /* SFP module related info */ int (*module_info)(struct xgbe_prv_data *pdata); int (*module_eeprom)(struct xgbe_prv_data *pdata); }; struct xgbe_phy_if { /* For PHY setup/teardown */ int (*phy_init)(struct xgbe_prv_data *); void (*phy_exit)(struct xgbe_prv_data *); /* For PHY support when setting device up/down */ int (*phy_reset)(struct xgbe_prv_data *); int (*phy_start)(struct xgbe_prv_data *); void (*phy_stop)(struct xgbe_prv_data *); /* For PHY support while device is up */ void (*phy_status)(struct xgbe_prv_data *); int (*phy_config_aneg)(struct xgbe_prv_data *); /* For PHY settings validation */ bool (*phy_valid_speed)(struct xgbe_prv_data *, int); /* For single interrupt support */ void (*an_isr)(struct xgbe_prv_data *); /* PHY implementation specific services */ struct xgbe_phy_impl_if phy_impl; }; struct xgbe_i2c_if { /* For initial I2C setup */ int (*i2c_init)(struct xgbe_prv_data *); /* For I2C support when setting device up/down */ int (*i2c_start)(struct xgbe_prv_data *); void (*i2c_stop)(struct xgbe_prv_data *); /* For performing I2C operations */ int (*i2c_xfer)(struct xgbe_prv_data *, struct xgbe_i2c_op *); /* For single interrupt support */ void (*i2c_isr)(struct xgbe_prv_data *); }; struct xgbe_desc_if { int (*alloc_ring_resources)(struct xgbe_prv_data *); void (*free_ring_resources)(struct xgbe_prv_data *); int (*map_tx_skb)(struct xgbe_channel *, struct mbuf *); int (*map_rx_buffer)(struct xgbe_prv_data *, struct xgbe_ring *, struct xgbe_ring_data *); void (*unmap_rdata)(struct xgbe_prv_data *, struct xgbe_ring_data *); void (*wrapper_tx_desc_init)(struct xgbe_prv_data *); void (*wrapper_rx_desc_init)(struct xgbe_prv_data *); }; /* This structure contains flags that indicate what hardware features * or configurations are present in the device. */ struct xgbe_hw_features { /* HW Version */ unsigned int version; /* HW Feature Register0 */ unsigned int gmii; /* 1000 Mbps support */ unsigned int vlhash; /* VLAN Hash Filter */ unsigned int sma; /* SMA(MDIO) Interface */ unsigned int rwk; /* PMT remote wake-up packet */ unsigned int mgk; /* PMT magic packet */ unsigned int mmc; /* RMON module */ unsigned int aoe; /* ARP Offload */ unsigned int ts; /* IEEE 1588-2008 Advanced Timestamp */ unsigned int eee; /* Energy Efficient Ethernet */ unsigned int tx_coe; /* Tx Checksum Offload */ unsigned int rx_coe; /* Rx Checksum Offload */ unsigned int addn_mac; /* Additional MAC Addresses */ unsigned int ts_src; /* Timestamp Source */ unsigned int sa_vlan_ins; /* Source Address or VLAN Insertion */ unsigned int vxn; /* VXLAN/NVGRE */ /* HW Feature Register1 */ unsigned int rx_fifo_size; /* MTL Receive FIFO Size */ unsigned int tx_fifo_size; /* MTL Transmit FIFO Size */ unsigned int adv_ts_hi; /* Advance Timestamping High Word */ unsigned int dma_width; /* DMA width */ unsigned int dcb; /* DCB Feature */ unsigned int sph; /* Split Header Feature */ unsigned int tso; /* TCP Segmentation Offload */ unsigned int dma_debug; /* DMA Debug Registers */ unsigned int rss; /* Receive Side Scaling */ unsigned int tc_cnt; /* Number of Traffic Classes */ unsigned int hash_table_size; /* Hash Table Size */ unsigned int l3l4_filter_num; /* Number of L3-L4 Filters */ /* HW Feature Register2 */ unsigned int rx_q_cnt; /* Number of MTL Receive Queues */ unsigned int tx_q_cnt; /* Number of MTL Transmit Queues */ unsigned int rx_ch_cnt; /* Number of DMA Receive Channels */ unsigned int tx_ch_cnt; /* Number of DMA Transmit Channels */ unsigned int pps_out_num; /* Number of PPS outputs */ unsigned int aux_snap_num; /* Number of Aux snapshot inputs */ }; struct xgbe_version_data { void (*init_function_ptrs_phy_impl)(struct xgbe_phy_if *); enum xgbe_xpcs_access xpcs_access; unsigned int mmc_64bit; unsigned int tx_max_fifo_size; unsigned int rx_max_fifo_size; unsigned int tx_tstamp_workaround; unsigned int ecc_support; unsigned int i2c_support; unsigned int irq_reissue_support; unsigned int tx_desc_prefetch; unsigned int rx_desc_prefetch; unsigned int an_cdr_workaround; }; struct xgbe_prv_data { - struct ifnet *netdev; + if_t netdev; struct platform_device *pdev; struct acpi_device *adev; device_t dev; /* Version related data */ struct xgbe_version_data *vdata; /* ACPI or DT flag */ unsigned int use_acpi; /* XGMAC/XPCS related mmio registers */ struct resource *xgmac_res; /* XGMAC CSRs */ struct resource *xpcs_res; /* XPCS MMD registers */ struct resource *rxtx_res; /* SerDes Rx/Tx CSRs */ struct resource *sir0_res; /* SerDes integration registers (1/2) */ struct resource *sir1_res; /* SerDes integration registers (2/2) */ /* Port property registers */ unsigned int pp0; unsigned int pp1; unsigned int pp2; unsigned int pp3; unsigned int pp4; /* DMA tag */ bus_dma_tag_t dmat; /* XPCS indirect addressing lock */ spinlock_t xpcs_lock; unsigned int xpcs_window_def_reg; unsigned int xpcs_window_sel_reg; unsigned int xpcs_window; unsigned int xpcs_window_size; unsigned int xpcs_window_mask; /* RSS addressing mutex */ struct mtx rss_mutex; /* Flags representing xgbe_state */ unsigned long dev_state; /* ECC support */ unsigned long tx_sec_period; unsigned long tx_ded_period; unsigned long rx_sec_period; unsigned long rx_ded_period; unsigned long desc_sec_period; unsigned long desc_ded_period; unsigned int tx_sec_count; unsigned int tx_ded_count; unsigned int rx_sec_count; unsigned int rx_ded_count; unsigned int desc_ded_count; unsigned int desc_sec_count; struct if_irq dev_irq; struct resource *dev_irq_res; struct resource *ecc_irq_res; struct resource *i2c_irq_res; struct resource *an_irq_res; int ecc_rid; int i2c_rid; int an_rid; void *dev_irq_tag; void *ecc_irq_tag; void *i2c_irq_tag; void *an_irq_tag; struct resource *chan_irq_res[XGBE_MAX_DMA_CHANNELS]; unsigned int per_channel_irq; unsigned int irq_count; unsigned int channel_irq_count; unsigned int channel_irq_mode; char ecc_name[IFNAMSIZ + 32]; unsigned int isr_as_tasklet; struct xgbe_hw_if hw_if; struct xgbe_phy_if phy_if; struct xgbe_desc_if desc_if; struct xgbe_i2c_if i2c_if; /* AXI DMA settings */ unsigned int coherent; unsigned int arcr; unsigned int awcr; unsigned int awarcr; /* Service routine support */ struct taskqueue *dev_workqueue; struct task service_work; struct callout service_timer; struct mtx timer_mutex; /* Rings for Tx/Rx on a DMA channel */ struct xgbe_channel *channel[XGBE_MAX_DMA_CHANNELS]; unsigned int tx_max_channel_count; unsigned int rx_max_channel_count; unsigned int total_channel_count; unsigned int channel_count; unsigned int tx_ring_count; unsigned int tx_desc_count; unsigned int rx_ring_count; unsigned int rx_desc_count; unsigned int new_tx_ring_count; unsigned int new_rx_ring_count; unsigned int tx_max_q_count; unsigned int rx_max_q_count; unsigned int tx_q_count; unsigned int rx_q_count; /* Tx/Rx common settings */ unsigned int blen; unsigned int pbl; unsigned int aal; unsigned int rd_osr_limit; unsigned int wr_osr_limit; /* Tx settings */ unsigned int tx_sf_mode; unsigned int tx_threshold; unsigned int tx_osp_mode; unsigned int tx_max_fifo_size; /* Rx settings */ unsigned int rx_sf_mode; unsigned int rx_threshold; unsigned int rx_max_fifo_size; /* Tx coalescing settings */ unsigned int tx_usecs; unsigned int tx_frames; /* Rx coalescing settings */ unsigned int rx_riwt; unsigned int rx_usecs; unsigned int rx_frames; /* Current Rx buffer size */ unsigned int rx_buf_size; /* Flow control settings */ unsigned int pause_autoneg; unsigned int tx_pause; unsigned int rx_pause; unsigned int rx_rfa[XGBE_MAX_QUEUES]; unsigned int rx_rfd[XGBE_MAX_QUEUES]; /* Receive Side Scaling settings */ uint8_t rss_key[XGBE_RSS_HASH_KEY_SIZE]; uint32_t rss_table[XGBE_RSS_MAX_TABLE_SIZE]; uint32_t rss_options; unsigned int enable_rss; /* VXLAN settings */ unsigned int vxlan_port_set; unsigned int vxlan_offloads_set; unsigned int vxlan_force_disable; unsigned int vxlan_port_count; uint16_t vxlan_port; uint64_t vxlan_features; /* Netdev related settings */ unsigned char mac_addr[ETH_ALEN]; uint64_t netdev_features; struct xgbe_mmc_stats mmc_stats; struct xgbe_ext_stats ext_stats; /* Filtering support */ bitstr_t *active_vlans; unsigned int num_active_vlans; /* Device clocks */ struct clk *sysclk; unsigned long sysclk_rate; struct clk *ptpclk; unsigned long ptpclk_rate; /* DCB support */ unsigned int q2tc_map[XGBE_MAX_QUEUES]; unsigned int prio2q_map[IEEE_8021QAZ_MAX_TCS]; /* Hardware features of the device */ struct xgbe_hw_features hw_feat; /* Device work structure */ struct task restart_work; struct task stopdev_work; /* Keeps track of power mode */ unsigned int power_down; /* Network interface message level setting */ uint32_t msg_enable; /* Current PHY settings */ int phy_link; int phy_speed; /* MDIO/PHY related settings */ unsigned int phy_started; void *phy_data; struct xgbe_phy phy; int mdio_mmd; unsigned long link_check; struct mtx mdio_mutex; unsigned int mdio_addr; unsigned int kr_redrv; char an_name[IFNAMSIZ + 32]; struct taskqueue *an_workqueue; struct task an_irq_work; unsigned int speed_set; /* SerDes UEFI configurable settings. * Switching between modes/speeds requires new values for some * SerDes settings. The values can be supplied as device * properties in array format. The first array entry is for * 1GbE, second for 2.5GbE and third for 10GbE */ uint32_t serdes_blwc[XGBE_SPEEDS]; uint32_t serdes_cdr_rate[XGBE_SPEEDS]; uint32_t serdes_pq_skew[XGBE_SPEEDS]; uint32_t serdes_tx_amp[XGBE_SPEEDS]; uint32_t serdes_dfe_tap_cfg[XGBE_SPEEDS]; uint32_t serdes_dfe_tap_ena[XGBE_SPEEDS]; /* Auto-negotiation state machine support */ unsigned int an_int; unsigned int an_status; struct sx an_mutex; enum xgbe_an an_result; enum xgbe_an an_state; enum xgbe_rx kr_state; enum xgbe_rx kx_state; struct task an_work; unsigned int an_again; unsigned int an_supported; unsigned int parallel_detect; unsigned int fec_ability; unsigned long an_start; enum xgbe_an_mode an_mode; /* I2C support */ struct xgbe_i2c i2c; struct mtx i2c_mutex; bool i2c_complete; unsigned int lpm_ctrl; /* CTRL1 for resume */ unsigned int an_cdr_track_early; uint64_t features; device_t axgbe_miibus; unsigned int sysctl_xgmac_reg; unsigned int sysctl_xpcs_mmd; unsigned int sysctl_xpcs_reg; unsigned int sysctl_xprop_reg; unsigned int sysctl_xi2c_reg; bool sysctl_an_cdr_workaround; bool sysctl_an_cdr_track_early; int pcie_bus; /* PCIe bus number */ int pcie_device; /* PCIe device/slot number */ int pcie_func; /* PCIe function number */ void *sys_op; uint64_t use_adaptive_rx_coalesce; uint64_t use_adaptive_tx_coalesce; uint64_t rx_coalesce_usecs; unsigned int debug_level; /* * Toggles the split header feature. * This requires a complete restart. */ unsigned int sph_enable; unsigned int link_workaround; }; struct axgbe_if_softc { struct xgbe_prv_data pdata; if_softc_ctx_t scctx; if_shared_ctx_t sctx; if_ctx_t ctx; - struct ifnet *ifp; + if_t ifp; struct ifmedia *media; unsigned int link_status; }; /* Function prototypes*/ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *); void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *); void xgbe_init_function_ptrs_phy_v1(struct xgbe_phy_if *); void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *); void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *); void xgbe_init_function_ptrs_i2c(struct xgbe_i2c_if *); void xgbe_get_all_hw_features(struct xgbe_prv_data *); void xgbe_init_rx_coalesce(struct xgbe_prv_data *); void xgbe_init_tx_coalesce(struct xgbe_prv_data *); -int xgbe_calc_rx_buf_size(struct ifnet *netdev, unsigned int mtu); +int xgbe_calc_rx_buf_size(if_t netdev, unsigned int mtu); void axgbe_sysctl_init(struct xgbe_prv_data *pdata); void axgbe_sysctl_exit(struct xgbe_prv_data *pdata); int xgbe_phy_mii_write(struct xgbe_prv_data *pdata, int addr, int reg, uint16_t val); int xgbe_phy_mii_read(struct xgbe_prv_data *pdata, int addr, int reg); void xgbe_dump_i2c_registers(struct xgbe_prv_data *); uint32_t bitrev32(uint32_t); /* For debug prints */ #ifdef YDEBUG #define DBGPR(x...) device_printf(pdata->dev, x) #else #define DBGPR(x...) do { } while (0) #endif #ifdef YDEBUG_MDIO #define DBGPR_MDIO(x...) device_printf(pdata->dev, x) #else #define DBGPR_MDIO(x...) do { } while (0) #endif #define axgbe_printf(lvl, ...) do { \ if (lvl <= pdata->debug_level) \ device_printf(pdata->dev, __VA_ARGS__); \ } while (0) #define axgbe_error(...) do { \ device_printf(pdata->dev, __VA_ARGS__); \ } while (0) #endif /* __XGBE_H__ */ diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index b7a9052e0ec1..3be1aaeb362e 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -1,4887 +1,4887 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2016 Nicole Graziano * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #include #include #define em_mac_min e1000_82571 #define igb_mac_min e1000_82575 /********************************************************************* * Driver version: *********************************************************************/ char em_driver_version[] = "7.6.1-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static pci_vendor_info_t em_vendor_info_array[] = { /* Intel(R) - lem-class legacy devices */ PVID(0x8086, E1000_DEV_ID_82540EM, "Intel(R) Legacy PRO/1000 MT 82540EM"), PVID(0x8086, E1000_DEV_ID_82540EM_LOM, "Intel(R) Legacy PRO/1000 MT 82540EM (LOM)"), PVID(0x8086, E1000_DEV_ID_82540EP, "Intel(R) Legacy PRO/1000 MT 82540EP"), PVID(0x8086, E1000_DEV_ID_82540EP_LOM, "Intel(R) Legacy PRO/1000 MT 82540EP (LOM)"), PVID(0x8086, E1000_DEV_ID_82540EP_LP, "Intel(R) Legacy PRO/1000 MT 82540EP (Mobile)"), PVID(0x8086, E1000_DEV_ID_82541EI, "Intel(R) Legacy PRO/1000 MT 82541EI (Copper)"), PVID(0x8086, E1000_DEV_ID_82541ER, "Intel(R) Legacy PRO/1000 82541ER"), PVID(0x8086, E1000_DEV_ID_82541ER_LOM, "Intel(R) Legacy PRO/1000 MT 82541ER"), PVID(0x8086, E1000_DEV_ID_82541EI_MOBILE, "Intel(R) Legacy PRO/1000 MT 82541EI (Mobile)"), PVID(0x8086, E1000_DEV_ID_82541GI, "Intel(R) Legacy PRO/1000 MT 82541GI"), PVID(0x8086, E1000_DEV_ID_82541GI_LF, "Intel(R) Legacy PRO/1000 GT 82541PI"), PVID(0x8086, E1000_DEV_ID_82541GI_MOBILE, "Intel(R) Legacy PRO/1000 MT 82541GI (Mobile)"), PVID(0x8086, E1000_DEV_ID_82542, "Intel(R) Legacy PRO/1000 82542 (Fiber)"), PVID(0x8086, E1000_DEV_ID_82543GC_FIBER, "Intel(R) Legacy PRO/1000 F 82543GC (Fiber)"), PVID(0x8086, E1000_DEV_ID_82543GC_COPPER, "Intel(R) Legacy PRO/1000 T 82543GC (Copper)"), PVID(0x8086, E1000_DEV_ID_82544EI_COPPER, "Intel(R) Legacy PRO/1000 XT 82544EI (Copper)"), PVID(0x8086, E1000_DEV_ID_82544EI_FIBER, "Intel(R) Legacy PRO/1000 XF 82544EI (Fiber)"), PVID(0x8086, E1000_DEV_ID_82544GC_COPPER, "Intel(R) Legacy PRO/1000 T 82544GC (Copper)"), PVID(0x8086, E1000_DEV_ID_82544GC_LOM, "Intel(R) Legacy PRO/1000 XT 82544GC (LOM)"), PVID(0x8086, E1000_DEV_ID_82545EM_COPPER, "Intel(R) Legacy PRO/1000 MT 82545EM (Copper)"), PVID(0x8086, E1000_DEV_ID_82545EM_FIBER, "Intel(R) Legacy PRO/1000 MF 82545EM (Fiber)"), PVID(0x8086, E1000_DEV_ID_82545GM_COPPER, "Intel(R) Legacy PRO/1000 MT 82545GM (Copper)"), PVID(0x8086, E1000_DEV_ID_82545GM_FIBER, "Intel(R) Legacy PRO/1000 MF 82545GM (Fiber)"), PVID(0x8086, E1000_DEV_ID_82545GM_SERDES, "Intel(R) Legacy PRO/1000 MB 82545GM (SERDES)"), PVID(0x8086, E1000_DEV_ID_82546EB_COPPER, "Intel(R) Legacy PRO/1000 MT 82546EB (Copper)"), PVID(0x8086, E1000_DEV_ID_82546EB_FIBER, "Intel(R) Legacy PRO/1000 MF 82546EB (Fiber)"), PVID(0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, "Intel(R) Legacy PRO/1000 MT 82546EB (Quad Copper"), PVID(0x8086, E1000_DEV_ID_82546GB_COPPER, "Intel(R) Legacy PRO/1000 MT 82546GB (Copper)"), PVID(0x8086, E1000_DEV_ID_82546GB_FIBER, "Intel(R) Legacy PRO/1000 MF 82546GB (Fiber)"), PVID(0x8086, E1000_DEV_ID_82546GB_SERDES, "Intel(R) Legacy PRO/1000 MB 82546GB (SERDES)"), PVID(0x8086, E1000_DEV_ID_82546GB_PCIE, "Intel(R) Legacy PRO/1000 P 82546GB (PCIe)"), PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, "Intel(R) Legacy PRO/1000 GT 82546GB (Quad Copper)"), PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3, "Intel(R) Legacy PRO/1000 GT 82546GB (Quad Copper)"), PVID(0x8086, E1000_DEV_ID_82547EI, "Intel(R) Legacy PRO/1000 CT 82547EI"), PVID(0x8086, E1000_DEV_ID_82547EI_MOBILE, "Intel(R) Legacy PRO/1000 CT 82547EI (Mobile)"), PVID(0x8086, E1000_DEV_ID_82547GI, "Intel(R) Legacy PRO/1000 CT 82547GI"), /* Intel(R) - em-class devices */ PVID(0x8086, E1000_DEV_ID_82571EB_COPPER, "Intel(R) PRO/1000 PT 82571EB/82571GB (Copper)"), PVID(0x8086, E1000_DEV_ID_82571EB_FIBER, "Intel(R) PRO/1000 PF 82571EB/82571GB (Fiber)"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES, "Intel(R) PRO/1000 PB 82571EB (SERDES)"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, "Intel(R) PRO/1000 82571EB (Dual Mezzanine)"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, "Intel(R) PRO/1000 82571EB (Quad Mezzanine)"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, "Intel(R) PRO/1000 PT 82571EB/82571GB (Quad Copper)"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, "Intel(R) PRO/1000 PT 82571EB/82571GB (Quad Copper)"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, "Intel(R) PRO/1000 PF 82571EB (Quad Fiber)"), PVID(0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, "Intel(R) PRO/1000 PT 82571PT (Quad Copper)"), PVID(0x8086, E1000_DEV_ID_82572EI, "Intel(R) PRO/1000 PT 82572EI (Copper)"), PVID(0x8086, E1000_DEV_ID_82572EI_COPPER, "Intel(R) PRO/1000 PT 82572EI (Copper)"), PVID(0x8086, E1000_DEV_ID_82572EI_FIBER, "Intel(R) PRO/1000 PF 82572EI (Fiber)"), PVID(0x8086, E1000_DEV_ID_82572EI_SERDES, "Intel(R) PRO/1000 82572EI (SERDES)"), PVID(0x8086, E1000_DEV_ID_82573E, "Intel(R) PRO/1000 82573E (Copper)"), PVID(0x8086, E1000_DEV_ID_82573E_IAMT, "Intel(R) PRO/1000 82573E AMT (Copper)"), PVID(0x8086, E1000_DEV_ID_82573L, "Intel(R) PRO/1000 82573L"), PVID(0x8086, E1000_DEV_ID_82583V, "Intel(R) 82583V"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, "Intel(R) 80003ES2LAN (Copper)"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, "Intel(R) 80003ES2LAN (SERDES)"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, "Intel(R) 80003ES2LAN (Dual Copper)"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, "Intel(R) 80003ES2LAN (Dual SERDES)"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, "Intel(R) 82566MM ICH8 AMT (Mobile)"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_AMT, "Intel(R) 82566DM ICH8 AMT"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_C, "Intel(R) 82566DC ICH8"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE, "Intel(R) 82562V ICH8"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE_GT, "Intel(R) 82562GT ICH8"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE_G, "Intel(R) 82562G ICH8"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M, "Intel(R) 82566MC ICH8"), PVID(0x8086, E1000_DEV_ID_ICH8_82567V_3, "Intel(R) 82567V-3 ICH8"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, "Intel(R) 82567LM ICH9 AMT"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_AMT, "Intel(R) 82566DM-2 ICH9 AMT"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_C, "Intel(R) 82566DC-2 ICH9"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M, "Intel(R) 82567LF ICH9"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_V, "Intel(R) 82567V ICH9"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE, "Intel(R) 82562V-2 ICH9"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE_GT, "Intel(R) 82562GT-2 ICH9"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE_G, "Intel(R) 82562G-2 ICH9"), PVID(0x8086, E1000_DEV_ID_ICH9_BM, "Intel(R) 82567LM-4 ICH9"), PVID(0x8086, E1000_DEV_ID_82574L, "Intel(R) Gigabit CT 82574L"), PVID(0x8086, E1000_DEV_ID_82574LA, "Intel(R) 82574L-Apple"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LM, "Intel(R) 82567LM-2 ICH10"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LF, "Intel(R) 82567LF-2 ICH10"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_V, "Intel(R) 82567V-2 ICH10"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LM, "Intel(R) 82567LM-3 ICH10"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LF, "Intel(R) 82567LF-3 ICH10"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_V, "Intel(R) 82567V-4 ICH10"), PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LM, "Intel(R) 82577LM"), PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LC, "Intel(R) 82577LC"), PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DM, "Intel(R) 82578DM"), PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DC, "Intel(R) 82578DC"), PVID(0x8086, E1000_DEV_ID_PCH2_LV_LM, "Intel(R) 82579LM"), PVID(0x8086, E1000_DEV_ID_PCH2_LV_V, "Intel(R) 82579V"), PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, "Intel(R) I217-LM LPT"), PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_V, "Intel(R) I217-V LPT"), PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, "Intel(R) I218-LM LPTLP"), PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, "Intel(R) I218-V LPTLP"), PVID(0x8086, E1000_DEV_ID_PCH_I218_LM2, "Intel(R) I218-LM (2)"), PVID(0x8086, E1000_DEV_ID_PCH_I218_V2, "Intel(R) I218-V (2)"), PVID(0x8086, E1000_DEV_ID_PCH_I218_LM3, "Intel(R) I218-LM (3)"), PVID(0x8086, E1000_DEV_ID_PCH_I218_V3, "Intel(R) I218-V (3)"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, "Intel(R) I219-LM SPT"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V, "Intel(R) I219-V SPT"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, "Intel(R) I219-LM SPT-H(2)"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, "Intel(R) I219-V SPT-H(2)"), PVID(0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, "Intel(R) I219-LM LBG(3)"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4, "Intel(R) I219-LM SPT(4)"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, "Intel(R) I219-V SPT(4)"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5, "Intel(R) I219-LM SPT(5)"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, "Intel(R) I219-V SPT(5)"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6, "Intel(R) I219-LM CNP(6)"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, "Intel(R) I219-V CNP(6)"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7, "Intel(R) I219-LM CNP(7)"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, "Intel(R) I219-V CNP(7)"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8, "Intel(R) I219-LM ICP(8)"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, "Intel(R) I219-V ICP(8)"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9, "Intel(R) I219-LM ICP(9)"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, "Intel(R) I219-V ICP(9)"), PVID(0x8086, E1000_DEV_ID_PCH_CMP_I219_LM10, "Intel(R) I219-LM CMP(10)"), PVID(0x8086, E1000_DEV_ID_PCH_CMP_I219_V10, "Intel(R) I219-V CMP(10)"), PVID(0x8086, E1000_DEV_ID_PCH_CMP_I219_LM11, "Intel(R) I219-LM CMP(11)"), PVID(0x8086, E1000_DEV_ID_PCH_CMP_I219_V11, "Intel(R) I219-V CMP(11)"), PVID(0x8086, E1000_DEV_ID_PCH_CMP_I219_LM12, "Intel(R) I219-LM CMP(12)"), PVID(0x8086, E1000_DEV_ID_PCH_CMP_I219_V12, "Intel(R) I219-V CMP(12)"), PVID(0x8086, E1000_DEV_ID_PCH_TGP_I219_LM13, "Intel(R) I219-LM TGP(13)"), PVID(0x8086, E1000_DEV_ID_PCH_TGP_I219_V13, "Intel(R) I219-V TGP(13)"), PVID(0x8086, E1000_DEV_ID_PCH_TGP_I219_LM14, "Intel(R) I219-LM TGP(14)"), PVID(0x8086, E1000_DEV_ID_PCH_TGP_I219_V14, "Intel(R) I219-V GTP(14)"), PVID(0x8086, E1000_DEV_ID_PCH_TGP_I219_LM15, "Intel(R) I219-LM TGP(15)"), PVID(0x8086, E1000_DEV_ID_PCH_TGP_I219_V15, "Intel(R) I219-V TGP(15)"), PVID(0x8086, E1000_DEV_ID_PCH_ADL_I219_LM16, "Intel(R) I219-LM ADL(16)"), PVID(0x8086, E1000_DEV_ID_PCH_ADL_I219_V16, "Intel(R) I219-V ADL(16)"), PVID(0x8086, E1000_DEV_ID_PCH_ADL_I219_LM17, "Intel(R) I219-LM ADL(17)"), PVID(0x8086, E1000_DEV_ID_PCH_ADL_I219_V17, "Intel(R) I219-V ADL(17)"), PVID(0x8086, E1000_DEV_ID_PCH_MTP_I219_LM18, "Intel(R) I219-LM MTP(18)"), PVID(0x8086, E1000_DEV_ID_PCH_MTP_I219_V18, "Intel(R) I219-V MTP(18)"), PVID(0x8086, E1000_DEV_ID_PCH_MTP_I219_LM19, "Intel(R) I219-LM MTP(19)"), PVID(0x8086, E1000_DEV_ID_PCH_MTP_I219_V19, "Intel(R) I219-V MTP(19)"), /* required last entry */ PVID_END }; static pci_vendor_info_t igb_vendor_info_array[] = { /* Intel(R) - igb-class devices */ PVID(0x8086, E1000_DEV_ID_82575EB_COPPER, "Intel(R) PRO/1000 82575EB (Copper)"), PVID(0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES, "Intel(R) PRO/1000 82575EB (SERDES)"), PVID(0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER, "Intel(R) PRO/1000 VT 82575GB (Quad Copper)"), PVID(0x8086, E1000_DEV_ID_82576, "Intel(R) PRO/1000 82576"), PVID(0x8086, E1000_DEV_ID_82576_NS, "Intel(R) PRO/1000 82576NS"), PVID(0x8086, E1000_DEV_ID_82576_NS_SERDES, "Intel(R) PRO/1000 82576NS (SERDES)"), PVID(0x8086, E1000_DEV_ID_82576_FIBER, "Intel(R) PRO/1000 EF 82576 (Dual Fiber)"), PVID(0x8086, E1000_DEV_ID_82576_SERDES, "Intel(R) PRO/1000 82576 (Dual SERDES)"), PVID(0x8086, E1000_DEV_ID_82576_SERDES_QUAD, "Intel(R) PRO/1000 ET 82576 (Quad SERDES)"), PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER, "Intel(R) PRO/1000 ET 82576 (Quad Copper)"), PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2, "Intel(R) PRO/1000 ET(2) 82576 (Quad Copper)"), PVID(0x8086, E1000_DEV_ID_82576_VF, "Intel(R) PRO/1000 82576 Virtual Function"), PVID(0x8086, E1000_DEV_ID_82580_COPPER, "Intel(R) I340 82580 (Copper)"), PVID(0x8086, E1000_DEV_ID_82580_FIBER, "Intel(R) I340 82580 (Fiber)"), PVID(0x8086, E1000_DEV_ID_82580_SERDES, "Intel(R) I340 82580 (SERDES)"), PVID(0x8086, E1000_DEV_ID_82580_SGMII, "Intel(R) I340 82580 (SGMII)"), PVID(0x8086, E1000_DEV_ID_82580_COPPER_DUAL, "Intel(R) I340-T2 82580 (Dual Copper)"), PVID(0x8086, E1000_DEV_ID_82580_QUAD_FIBER, "Intel(R) I340-F4 82580 (Quad Fiber)"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SERDES, "Intel(R) DH89XXCC (SERDES)"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SGMII, "Intel(R) I347-AT4 DH89XXCC"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SFP, "Intel(R) DH89XXCC (SFP)"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE, "Intel(R) DH89XXCC (Backplane)"), PVID(0x8086, E1000_DEV_ID_I350_COPPER, "Intel(R) I350 (Copper)"), PVID(0x8086, E1000_DEV_ID_I350_FIBER, "Intel(R) I350 (Fiber)"), PVID(0x8086, E1000_DEV_ID_I350_SERDES, "Intel(R) I350 (SERDES)"), PVID(0x8086, E1000_DEV_ID_I350_SGMII, "Intel(R) I350 (SGMII)"), PVID(0x8086, E1000_DEV_ID_I350_VF, "Intel(R) I350 Virtual Function"), PVID(0x8086, E1000_DEV_ID_I210_COPPER, "Intel(R) I210 (Copper)"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_IT, "Intel(R) I210 IT (Copper)"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_OEM1, "Intel(R) I210 (OEM)"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS, "Intel(R) I210 Flashless (Copper)"), PVID(0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS, "Intel(R) I210 Flashless (SERDES)"), PVID(0x8086, E1000_DEV_ID_I210_SGMII_FLASHLESS, "Intel(R) I210 Flashless (SGMII)"), PVID(0x8086, E1000_DEV_ID_I210_FIBER, "Intel(R) I210 (Fiber)"), PVID(0x8086, E1000_DEV_ID_I210_SERDES, "Intel(R) I210 (SERDES)"), PVID(0x8086, E1000_DEV_ID_I210_SGMII, "Intel(R) I210 (SGMII)"), PVID(0x8086, E1000_DEV_ID_I211_COPPER, "Intel(R) I211 (Copper)"), PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS, "Intel(R) I354 (1.0 GbE Backplane)"), PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, "Intel(R) I354 (2.5 GbE Backplane)"), PVID(0x8086, E1000_DEV_ID_I354_SGMII, "Intel(R) I354 (SGMII)"), /* required last entry */ PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ static void *em_register(device_t); static void *igb_register(device_t); static int em_if_attach_pre(if_ctx_t); static int em_if_attach_post(if_ctx_t); static int em_if_detach(if_ctx_t); static int em_if_shutdown(if_ctx_t); static int em_if_suspend(if_ctx_t); static int em_if_resume(if_ctx_t); static int em_if_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static int em_if_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static void em_if_queues_free(if_ctx_t); static uint64_t em_if_get_counter(if_ctx_t, ift_counter); static void em_if_init(if_ctx_t); static void em_if_stop(if_ctx_t); static void em_if_media_status(if_ctx_t, struct ifmediareq *); static int em_if_media_change(if_ctx_t); static int em_if_mtu_set(if_ctx_t, uint32_t); static void em_if_timer(if_ctx_t, uint16_t); static void em_if_vlan_register(if_ctx_t, u16); static void em_if_vlan_unregister(if_ctx_t, u16); static void em_if_watchdog_reset(if_ctx_t); static bool em_if_needs_restart(if_ctx_t, enum iflib_restart_event); static void em_identify_hardware(if_ctx_t); static int em_allocate_pci_resources(if_ctx_t); static void em_free_pci_resources(if_ctx_t); static void em_reset(if_ctx_t); static int em_setup_interface(if_ctx_t); static int em_setup_msix(if_ctx_t); static void em_initialize_transmit_unit(if_ctx_t); static void em_initialize_receive_unit(if_ctx_t); static void em_if_intr_enable(if_ctx_t); static void em_if_intr_disable(if_ctx_t); static void igb_if_intr_enable(if_ctx_t); static void igb_if_intr_disable(if_ctx_t); static int em_if_rx_queue_intr_enable(if_ctx_t, uint16_t); static int em_if_tx_queue_intr_enable(if_ctx_t, uint16_t); static int igb_if_rx_queue_intr_enable(if_ctx_t, uint16_t); static int igb_if_tx_queue_intr_enable(if_ctx_t, uint16_t); static void em_if_multi_set(if_ctx_t); static void em_if_update_admin_status(if_ctx_t); static void em_if_debug(if_ctx_t); static void em_update_stats_counters(struct e1000_softc *); static void em_add_hw_stats(struct e1000_softc *); static int em_if_set_promisc(if_ctx_t, int); static bool em_if_vlan_filter_capable(if_ctx_t); static bool em_if_vlan_filter_used(if_ctx_t); static void em_if_vlan_filter_enable(struct e1000_softc *); static void em_if_vlan_filter_disable(struct e1000_softc *); static void em_if_vlan_filter_write(struct e1000_softc *); static void em_setup_vlan_hw_support(if_ctx_t ctx); static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void em_print_nvm_info(struct e1000_softc *); static void em_fw_version_locked(if_ctx_t); static void em_sbuf_fw_version(struct e1000_fw_version *, struct sbuf *); static void em_print_fw_version(struct e1000_softc *); static int em_sysctl_print_fw_version(SYSCTL_HANDLER_ARGS); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static int em_get_rs(SYSCTL_HANDLER_ARGS); static void em_print_debug_info(struct e1000_softc *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void em_add_int_delay_sysctl(struct e1000_softc *, const char *, const char *, struct em_int_delay_info *, int, int); /* Management and WOL Support */ static void em_init_manageability(struct e1000_softc *); static void em_release_manageability(struct e1000_softc *); static void em_get_hw_control(struct e1000_softc *); static void em_release_hw_control(struct e1000_softc *); static void em_get_wakeup(if_ctx_t); static void em_enable_wakeup(if_ctx_t); static int em_enable_phy_wakeup(struct e1000_softc *); static void em_disable_aspm(struct e1000_softc *); int em_intr(void *); /* MSI-X handlers */ static int em_if_msix_intr_assign(if_ctx_t, int); static int em_msix_link(void *); static void em_handle_link(void *); static void em_enable_vectors_82574(if_ctx_t); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); static void em_if_led_func(if_ctx_t, int); static int em_get_regs(SYSCTL_HANDLER_ARGS); static void lem_smartspeed(struct e1000_softc *); static void igb_configure_queues(struct e1000_softc *); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t em_methods[] = { /* Device interface */ DEVMETHOD(device_register, em_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static device_method_t igb_methods[] = { /* Device interface */ DEVMETHOD(device_register, igb_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static driver_t em_driver = { "em", em_methods, sizeof(struct e1000_softc), }; DRIVER_MODULE(em, pci, em_driver, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); MODULE_DEPEND(em, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, em, em_vendor_info_array); static driver_t igb_driver = { "igb", igb_methods, sizeof(struct e1000_softc), }; DRIVER_MODULE(igb, pci, igb_driver, 0, 0); MODULE_DEPEND(igb, pci, 1, 1, 1); MODULE_DEPEND(igb, ether, 1, 1, 1); MODULE_DEPEND(igb, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, igb, igb_vendor_info_array); static device_method_t em_if_methods[] = { DEVMETHOD(ifdi_attach_pre, em_if_attach_pre), DEVMETHOD(ifdi_attach_post, em_if_attach_post), DEVMETHOD(ifdi_detach, em_if_detach), DEVMETHOD(ifdi_shutdown, em_if_shutdown), DEVMETHOD(ifdi_suspend, em_if_suspend), DEVMETHOD(ifdi_resume, em_if_resume), DEVMETHOD(ifdi_init, em_if_init), DEVMETHOD(ifdi_stop, em_if_stop), DEVMETHOD(ifdi_msix_intr_assign, em_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, em_if_intr_enable), DEVMETHOD(ifdi_intr_disable, em_if_intr_disable), DEVMETHOD(ifdi_tx_queues_alloc, em_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, em_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, em_if_queues_free), DEVMETHOD(ifdi_update_admin_status, em_if_update_admin_status), DEVMETHOD(ifdi_multi_set, em_if_multi_set), DEVMETHOD(ifdi_media_status, em_if_media_status), DEVMETHOD(ifdi_media_change, em_if_media_change), DEVMETHOD(ifdi_mtu_set, em_if_mtu_set), DEVMETHOD(ifdi_promisc_set, em_if_set_promisc), DEVMETHOD(ifdi_timer, em_if_timer), DEVMETHOD(ifdi_watchdog_reset, em_if_watchdog_reset), DEVMETHOD(ifdi_vlan_register, em_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, em_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, em_if_get_counter), DEVMETHOD(ifdi_led_func, em_if_led_func), DEVMETHOD(ifdi_rx_queue_intr_enable, em_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, em_if_tx_queue_intr_enable), DEVMETHOD(ifdi_debug, em_if_debug), DEVMETHOD(ifdi_needs_restart, em_if_needs_restart), DEVMETHOD_END }; static driver_t em_if_driver = { "em_if", em_if_methods, sizeof(struct e1000_softc) }; static device_method_t igb_if_methods[] = { DEVMETHOD(ifdi_attach_pre, em_if_attach_pre), DEVMETHOD(ifdi_attach_post, em_if_attach_post), DEVMETHOD(ifdi_detach, em_if_detach), DEVMETHOD(ifdi_shutdown, em_if_shutdown), DEVMETHOD(ifdi_suspend, em_if_suspend), DEVMETHOD(ifdi_resume, em_if_resume), DEVMETHOD(ifdi_init, em_if_init), DEVMETHOD(ifdi_stop, em_if_stop), DEVMETHOD(ifdi_msix_intr_assign, em_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, igb_if_intr_enable), DEVMETHOD(ifdi_intr_disable, igb_if_intr_disable), DEVMETHOD(ifdi_tx_queues_alloc, em_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, em_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, em_if_queues_free), DEVMETHOD(ifdi_update_admin_status, em_if_update_admin_status), DEVMETHOD(ifdi_multi_set, em_if_multi_set), DEVMETHOD(ifdi_media_status, em_if_media_status), DEVMETHOD(ifdi_media_change, em_if_media_change), DEVMETHOD(ifdi_mtu_set, em_if_mtu_set), DEVMETHOD(ifdi_promisc_set, em_if_set_promisc), DEVMETHOD(ifdi_timer, em_if_timer), DEVMETHOD(ifdi_watchdog_reset, em_if_watchdog_reset), DEVMETHOD(ifdi_vlan_register, em_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, em_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, em_if_get_counter), DEVMETHOD(ifdi_led_func, em_if_led_func), DEVMETHOD(ifdi_rx_queue_intr_enable, igb_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, igb_if_tx_queue_intr_enable), DEVMETHOD(ifdi_debug, em_if_debug), DEVMETHOD(ifdi_needs_restart, em_if_needs_restart), DEVMETHOD_END }; static driver_t igb_if_driver = { "igb_if", igb_if_methods, sizeof(struct e1000_softc) }; /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 #endif static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "EM driver parameters"); static int em_disable_crc_stripping = 0; SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, &em_disable_crc_stripping, 0, "Disable CRC Stripping"); static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 0, "Default transmit interrupt delay in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 0, "Default receive interrupt delay in usecs"); static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, &em_tx_abs_int_delay_dflt, 0, "Default transmit interrupt delay limit in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, &em_rx_abs_int_delay_dflt, 0, "Default receive interrupt delay limit in usecs"); static int em_smart_pwr_down = false; SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 0, "Set to true to leave smart power down enabled on newer adapters"); /* Controls whether promiscuous also shows bad packets */ static int em_debug_sbp = false; SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, "Show bad packets in promiscuous mode"); /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &em_rx_process_limit, 0, "Maximum number of received packets to process " "at a time, -1 means unlimited"); /* Energy efficient ethernet - default to OFF */ static int eee_setting = 1; SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, "Enable Energy Efficient Ethernet"); /* ** Tuneable Interrupt rate */ static int em_max_interrupt_rate = 8000; SYSCTL_INT(_hw_em, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &em_max_interrupt_rate, 0, "Maximum interrupts per second"); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; extern struct if_txrx igb_txrx; extern struct if_txrx em_txrx; extern struct if_txrx lem_txrx; static struct if_shared_ctx em_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = PAGE_SIZE, .isc_tso_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = EM_TSO_SEG_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM9BYTES, .isc_nfl = 1, .isc_nrxqs = 1, .isc_ntxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = em_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, .isc_nrxd_max = {EM_MAX_RXD}, .isc_ntxd_max = {EM_MAX_TXD}, .isc_nrxd_default = {EM_DEFAULT_RXD}, .isc_ntxd_default = {EM_DEFAULT_TXD}, }; static struct if_shared_ctx igb_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = PAGE_SIZE, .isc_tso_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = EM_TSO_SEG_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM9BYTES, .isc_nfl = 1, .isc_nrxqs = 1, .isc_ntxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = igb_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &igb_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, .isc_nrxd_max = {IGB_MAX_RXD}, .isc_ntxd_max = {IGB_MAX_TXD}, .isc_nrxd_default = {EM_DEFAULT_RXD}, .isc_ntxd_default = {EM_DEFAULT_TXD}, }; /***************************************************************** * * Dump Registers * ****************************************************************/ #define IGB_REGS_LEN 739 static int em_get_regs(SYSCTL_HANDLER_ARGS) { struct e1000_softc *sc = (struct e1000_softc *)arg1; struct e1000_hw *hw = &sc->hw; struct sbuf *sb; u32 *regs_buff; int rc; regs_buff = malloc(sizeof(u32) * IGB_REGS_LEN, M_DEVBUF, M_WAITOK); memset(regs_buff, 0, IGB_REGS_LEN * sizeof(u32)); rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) { free(regs_buff, M_DEVBUF); return (rc); } sb = sbuf_new_for_sysctl(NULL, NULL, 32*400, req); MPASS(sb != NULL); if (sb == NULL) { free(regs_buff, M_DEVBUF); return (ENOMEM); } /* General Registers */ regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL); regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS); regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT); regs_buff[3] = E1000_READ_REG(hw, E1000_ICR); regs_buff[4] = E1000_READ_REG(hw, E1000_RCTL); regs_buff[5] = E1000_READ_REG(hw, E1000_RDLEN(0)); regs_buff[6] = E1000_READ_REG(hw, E1000_RDH(0)); regs_buff[7] = E1000_READ_REG(hw, E1000_RDT(0)); regs_buff[8] = E1000_READ_REG(hw, E1000_RXDCTL(0)); regs_buff[9] = E1000_READ_REG(hw, E1000_RDBAL(0)); regs_buff[10] = E1000_READ_REG(hw, E1000_RDBAH(0)); regs_buff[11] = E1000_READ_REG(hw, E1000_TCTL); regs_buff[12] = E1000_READ_REG(hw, E1000_TDBAL(0)); regs_buff[13] = E1000_READ_REG(hw, E1000_TDBAH(0)); regs_buff[14] = E1000_READ_REG(hw, E1000_TDLEN(0)); regs_buff[15] = E1000_READ_REG(hw, E1000_TDH(0)); regs_buff[16] = E1000_READ_REG(hw, E1000_TDT(0)); regs_buff[17] = E1000_READ_REG(hw, E1000_TXDCTL(0)); regs_buff[18] = E1000_READ_REG(hw, E1000_TDFH); regs_buff[19] = E1000_READ_REG(hw, E1000_TDFT); regs_buff[20] = E1000_READ_REG(hw, E1000_TDFHS); regs_buff[21] = E1000_READ_REG(hw, E1000_TDFPC); sbuf_printf(sb, "General Registers\n"); sbuf_printf(sb, "\tCTRL\t %08x\n", regs_buff[0]); sbuf_printf(sb, "\tSTATUS\t %08x\n", regs_buff[1]); sbuf_printf(sb, "\tCTRL_EXT\t %08x\n\n", regs_buff[2]); sbuf_printf(sb, "Interrupt Registers\n"); sbuf_printf(sb, "\tICR\t %08x\n\n", regs_buff[3]); sbuf_printf(sb, "RX Registers\n"); sbuf_printf(sb, "\tRCTL\t %08x\n", regs_buff[4]); sbuf_printf(sb, "\tRDLEN\t %08x\n", regs_buff[5]); sbuf_printf(sb, "\tRDH\t %08x\n", regs_buff[6]); sbuf_printf(sb, "\tRDT\t %08x\n", regs_buff[7]); sbuf_printf(sb, "\tRXDCTL\t %08x\n", regs_buff[8]); sbuf_printf(sb, "\tRDBAL\t %08x\n", regs_buff[9]); sbuf_printf(sb, "\tRDBAH\t %08x\n\n", regs_buff[10]); sbuf_printf(sb, "TX Registers\n"); sbuf_printf(sb, "\tTCTL\t %08x\n", regs_buff[11]); sbuf_printf(sb, "\tTDBAL\t %08x\n", regs_buff[12]); sbuf_printf(sb, "\tTDBAH\t %08x\n", regs_buff[13]); sbuf_printf(sb, "\tTDLEN\t %08x\n", regs_buff[14]); sbuf_printf(sb, "\tTDH\t %08x\n", regs_buff[15]); sbuf_printf(sb, "\tTDT\t %08x\n", regs_buff[16]); sbuf_printf(sb, "\tTXDCTL\t %08x\n", regs_buff[17]); sbuf_printf(sb, "\tTDFH\t %08x\n", regs_buff[18]); sbuf_printf(sb, "\tTDFT\t %08x\n", regs_buff[19]); sbuf_printf(sb, "\tTDFHS\t %08x\n", regs_buff[20]); sbuf_printf(sb, "\tTDFPC\t %08x\n\n", regs_buff[21]); free(regs_buff, M_DEVBUF); #ifdef DUMP_DESCS { if_softc_ctx_t scctx = sc->shared; struct rx_ring *rxr = &rx_que->rxr; struct tx_ring *txr = &tx_que->txr; int ntxd = scctx->isc_ntxd[0]; int nrxd = scctx->isc_nrxd[0]; int j; for (j = 0; j < nrxd; j++) { u32 staterr = le32toh(rxr->rx_base[j].wb.upper.status_error); u32 length = le32toh(rxr->rx_base[j].wb.upper.length); sbuf_printf(sb, "\tReceive Descriptor Address %d: %08" PRIx64 " Error:%d Length:%d\n", j, rxr->rx_base[j].read.buffer_addr, staterr, length); } for (j = 0; j < min(ntxd, 256); j++) { unsigned int *ptr = (unsigned int *)&txr->tx_base[j]; sbuf_printf(sb, "\tTXD[%03d] [0]: %08x [1]: %08x [2]: %08x [3]: %08x eop: %d DD=%d\n", j, ptr[0], ptr[1], ptr[2], ptr[3], buf->eop, buf->eop != -1 ? txr->tx_base[buf->eop].upper.fields.status & E1000_TXD_STAT_DD : 0); } } #endif rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } static void * em_register(device_t dev) { return (&em_sctx_init); } static void * igb_register(device_t dev) { return (&igb_sctx_init); } static int em_set_num_queues(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); int maxqueues; /* Sanity check based on HW */ switch (sc->hw.mac.type) { case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: maxqueues = 8; break; case e1000_i210: case e1000_82575: maxqueues = 4; break; case e1000_i211: case e1000_82574: maxqueues = 2; break; default: maxqueues = 1; break; } return (maxqueues); } #define LEM_CAPS \ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER #define EM_CAPS \ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER | IFCAP_TSO4 | \ IFCAP_LRO | IFCAP_VLAN_HWTSO #define IGB_CAPS \ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER | IFCAP_TSO4 | \ IFCAP_LRO | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU | IFCAP_HWCSUM_IPV6 |\ IFCAP_TSO6 /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int em_if_attach_pre(if_ctx_t ctx) { struct e1000_softc *sc; if_softc_ctx_t scctx; device_t dev; struct e1000_hw *hw; struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx_list; int error = 0; INIT_DEBUGOUT("em_if_attach_pre: begin"); dev = iflib_get_dev(ctx); sc = iflib_get_softc(ctx); sc->ctx = sc->osdep.ctx = ctx; sc->dev = sc->osdep.dev = dev; scctx = sc->shared = iflib_get_softc_ctx(ctx); sc->media = iflib_get_media(ctx); hw = &sc->hw; sc->tx_process_limit = scctx->isc_ntxd[0]; /* Determine hardware and mac info */ em_identify_hardware(ctx); /* SYSCTL stuff */ ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "nvm", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, em_sysctl_nvm_info, "I", "NVM Information"); SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, 0, em_sysctl_print_fw_version, "A", "Prints FW/NVM Versions"); SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, em_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, em_set_flowcntl, "I", "Flow Control"); SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "reg_dump", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, 0, em_get_regs, "A", "Dump Registers"); SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "rs_dump", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, em_get_rs, "I", "Dump RS indexes"); scctx->isc_tx_nsegments = EM_MAX_SCATTER; scctx->isc_nrxqsets_max = scctx->isc_ntxqsets_max = em_set_num_queues(ctx); if (bootverbose) device_printf(dev, "attach_pre capping queues at %d\n", scctx->isc_ntxqsets_max); if (hw->mac.type >= igb_mac_min) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union e1000_adv_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_adv_rx_desc), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(union e1000_adv_tx_desc); scctx->isc_rxd_size[0] = sizeof(union e1000_adv_rx_desc); scctx->isc_txrx = &igb_txrx; scctx->isc_tx_tso_segments_max = EM_MAX_SCATTER; scctx->isc_tx_tso_size_max = EM_TSO_SIZE; scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE; scctx->isc_capabilities = scctx->isc_capenable = IGB_CAPS; scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP | CSUM_IP6_UDP; if (hw->mac.type != e1000_82575) scctx->isc_tx_csum_flags |= CSUM_SCTP | CSUM_IP6_SCTP; /* ** Some new devices, as with ixgbe, now may ** use a different BAR, so we need to keep ** track of which is used. */ scctx->isc_msix_bar = pci_msix_table_bar(dev); } else if (hw->mac.type >= em_mac_min) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]* sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); scctx->isc_rxd_size[0] = sizeof(union e1000_rx_desc_extended); scctx->isc_txrx = &em_txrx; scctx->isc_tx_tso_segments_max = EM_MAX_SCATTER; scctx->isc_tx_tso_size_max = EM_TSO_SIZE; scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE; scctx->isc_capabilities = scctx->isc_capenable = EM_CAPS; /* * For EM-class devices, don't enable IFCAP_{TSO4,VLAN_HWTSO} * by default as we don't have workarounds for all associated * silicon errata. E. g., with several MACs such as 82573E, * TSO only works at Gigabit speed and otherwise can cause the * hardware to hang (which also would be next to impossible to * work around given that already queued TSO-using descriptors * would need to be flushed and vlan(4) reconfigured at runtime * in case of a link speed change). Moreover, MACs like 82579 * still can hang at Gigabit even with all publicly documented * TSO workarounds implemented. Generally, the penality of * these workarounds is rather high and may involve copying * mbuf data around so advantages of TSO lapse. Still, TSO may * work for a few MACs of this class - at least when sticking * with Gigabit - in which case users may enable TSO manually. */ scctx->isc_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; /* * We support MSI-X with 82574 only, but indicate to iflib(4) * that it shall give MSI at least a try with other devices. */ if (hw->mac.type == e1000_82574) { scctx->isc_msix_bar = pci_msix_table_bar(dev); } else { scctx->isc_msix_bar = -1; scctx->isc_disable_msix = 1; } } else { scctx->isc_txqsizes[0] = roundup2((scctx->isc_ntxd[0] + 1) * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2((scctx->isc_nrxd[0] + 1) * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); scctx->isc_rxd_size[0] = sizeof(struct e1000_rx_desc); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP; scctx->isc_txrx = &lem_txrx; scctx->isc_capabilities = LEM_CAPS; if (hw->mac.type < e1000_82543) scctx->isc_capabilities &= ~(IFCAP_HWCSUM|IFCAP_VLAN_HWCSUM); /* 82541ER doesn't do HW tagging */ if (hw->device_id == E1000_DEV_ID_82541ER || hw->device_id == E1000_DEV_ID_82541ER_LOM) scctx->isc_capabilities &= ~IFCAP_VLAN_HWTAGGING; /* INTx only */ scctx->isc_msix_bar = 0; scctx->isc_capenable = scctx->isc_capabilities; } /* Setup PCI resources */ if (em_allocate_pci_resources(ctx)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* ** For ICH8 and family we need to ** map the flash memory, and this ** must happen after the MAC is ** identified */ if ((hw->mac.type == e1000_ich8lan) || (hw->mac.type == e1000_ich9lan) || (hw->mac.type == e1000_ich10lan) || (hw->mac.type == e1000_pchlan) || (hw->mac.type == e1000_pch2lan) || (hw->mac.type == e1000_pch_lpt)) { int rid = EM_BAR_TYPE_FLASH; sc->flash = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->flash == NULL) { device_printf(dev, "Mapping of Flash failed\n"); error = ENXIO; goto err_pci; } /* This is used in the shared code */ hw->flash_address = (u8 *)sc->flash; sc->osdep.flash_bus_space_tag = rman_get_bustag(sc->flash); sc->osdep.flash_bus_space_handle = rman_get_bushandle(sc->flash); } /* ** In the new SPT device flash is not a ** separate BAR, rather it is also in BAR0, ** so use the same tag and an offset handle for the ** FLASH read/write macros in the shared code. */ else if (hw->mac.type >= e1000_pch_spt) { sc->osdep.flash_bus_space_tag = sc->osdep.mem_bus_space_tag; sc->osdep.flash_bus_space_handle = sc->osdep.mem_bus_space_handle + E1000_FLASH_BASE_ADDR; } /* Do Shared Code initialization */ error = e1000_setup_init_funcs(hw, true); if (error) { device_printf(dev, "Setup of Shared code failed, error %d\n", error); error = ENXIO; goto err_pci; } em_setup_msix(ctx); e1000_get_bus_info(hw); /* Set up some sysctls for the tunable interrupt delays */ em_add_int_delay_sysctl(sc, "rx_int_delay", "receive interrupt delay in usecs", &sc->rx_int_delay, E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); em_add_int_delay_sysctl(sc, "tx_int_delay", "transmit interrupt delay in usecs", &sc->tx_int_delay, E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); em_add_int_delay_sysctl(sc, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &sc->rx_abs_int_delay, E1000_REGISTER(hw, E1000_RADV), em_rx_abs_int_delay_dflt); em_add_int_delay_sysctl(sc, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &sc->tx_abs_int_delay, E1000_REGISTER(hw, E1000_TADV), em_tx_abs_int_delay_dflt); em_add_int_delay_sysctl(sc, "itr", "interrupt delay limit in usecs/4", &sc->tx_itr, E1000_REGISTER(hw, E1000_ITR), DEFAULT_ITR); hw->mac.autoneg = DO_AUTO_NEG; hw->phy.autoneg_wait_to_complete = false; hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; if (hw->mac.type < em_mac_min) { e1000_init_script_state_82541(hw, true); e1000_set_tbi_compatibility_82543(hw, true); } /* Copper options */ if (hw->phy.media_type == e1000_media_type_copper) { hw->phy.mdix = AUTO_ALL_MODES; hw->phy.disable_polarity_correction = false; hw->phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ scctx->isc_max_frame_size = hw->mac.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ hw->mac.report_tx_early = 1; /* Allocate multicast array memory. */ sc->mta = malloc(sizeof(u8) * ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (sc->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Check SOL/IDER usage */ if (e1000_check_reset_block(hw)) device_printf(dev, "PHY reset is blocked" " due to SOL/IDER session.\n"); /* Sysctl for setting Energy Efficient Ethernet */ hw->dev_spec.ich8lan.eee_disable = eee_setting; SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "eee_control", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, em_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } if (!em_is_valid_ether_addr(hw->mac.addr)) { if (sc->vf_ifp) { ether_gen_addr(iflib_get_ifp(ctx), (struct ether_addr *)hw->mac.addr); } else { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } } /* Save the EEPROM/NVM versions, must be done under IFLIB_CTX_LOCK */ em_fw_version_locked(ctx); em_print_fw_version(sc); /* Disable ULP support */ e1000_disable_ulp_lpt_lp(hw, true); /* * Get Wake-on-Lan and Management info for later use */ em_get_wakeup(ctx); /* Enable only WOL MAGIC by default */ scctx->isc_capenable &= ~IFCAP_WOL; if (sc->wol != 0) scctx->isc_capenable |= IFCAP_WOL_MAGIC; iflib_set_mac(ctx, hw->mac.addr); return (0); err_late: em_release_hw_control(sc); err_pci: em_free_pci_resources(ctx); free(sc->mta, M_DEVBUF); return (error); } static int em_if_attach_post(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); struct e1000_hw *hw = &sc->hw; int error = 0; /* Setup OS specific network interface */ error = em_setup_interface(ctx); if (error != 0) { device_printf(sc->dev, "Interface setup failed: %d\n", error); goto err_late; } em_reset(ctx); /* Initialize statistics */ em_update_stats_counters(sc); hw->mac.get_link_status = 1; em_if_update_admin_status(ctx); em_add_hw_stats(sc); /* Non-AMT based hardware can now take control from firmware */ if (sc->has_manage && !sc->has_amt) em_get_hw_control(sc); INIT_DEBUGOUT("em_if_attach_post: end"); return (0); err_late: /* upon attach_post() error, iflib calls _if_detach() to free resources. */ return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int em_if_detach(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); INIT_DEBUGOUT("em_if_detach: begin"); e1000_phy_hw_reset(&sc->hw); em_release_manageability(sc); em_release_hw_control(sc); em_free_pci_resources(ctx); free(sc->mta, M_DEVBUF); sc->mta = NULL; return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int em_if_shutdown(if_ctx_t ctx) { return em_if_suspend(ctx); } /* * Suspend/resume device methods. */ static int em_if_suspend(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); em_release_manageability(sc); em_release_hw_control(sc); em_enable_wakeup(ctx); return (0); } static int em_if_resume(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); if (sc->hw.mac.type == e1000_pch2lan) e1000_resume_workarounds_pchlan(&sc->hw); em_if_init(ctx); em_init_manageability(sc); return(0); } static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { int max_frame_size; struct e1000_softc *sc = iflib_get_softc(ctx); if_softc_ctx_t scctx = iflib_get_softc_ctx(ctx); IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); switch (sc->hw.mac.type) { case e1000_82571: case e1000_82572: case e1000_ich9lan: case e1000_ich10lan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: case e1000_82574: case e1000_82583: case e1000_80003es2lan: /* 9K Jumbo Frame size */ max_frame_size = 9234; break; case e1000_pchlan: max_frame_size = 4096; break; case e1000_82542: case e1000_ich8lan: /* Adapters that do not support jumbo frames */ max_frame_size = ETHER_MAX_LEN; break; default: if (sc->hw.mac.type >= igb_mac_min) max_frame_size = 9234; else /* lem */ max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { return (EINVAL); } scctx->isc_max_frame_size = sc->hw.mac.max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; return (0); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * **********************************************************************/ static void em_if_init(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); if_softc_ctx_t scctx = sc->shared; - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); struct em_tx_queue *tx_que; int i; INIT_DEBUGOUT("em_if_init: begin"); /* Get the latest mac address, User can use a LAA */ bcopy(if_getlladdr(ifp), sc->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&sc->hw, sc->hw.mac.addr, 0); /* * With the 82571 adapter, RAR[0] may be overwritten * when the other port is reset, we make a duplicate * in RAR[14] for that eventuality, this assures * the interface continues to function. */ if (sc->hw.mac.type == e1000_82571) { e1000_set_laa_state_82571(&sc->hw, true); e1000_rar_set(&sc->hw, sc->hw.mac.addr, E1000_RAR_ENTRIES - 1); } /* Initialize the hardware */ em_reset(ctx); em_if_update_admin_status(ctx); for (i = 0, tx_que = sc->tx_queues; i < sc->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; txr->tx_rs_cidx = txr->tx_rs_pidx; /* Initialize the last processed descriptor to be the end of * the ring, rather than the start, so that we avoid an * off-by-one error when calculating how many descriptors are * done in the credits_update function. */ txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1; } /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); /* Clear bad data from Rx FIFOs */ if (sc->hw.mac.type >= igb_mac_min) e1000_rx_fifo_flush_base(&sc->hw); /* Configure for OS presence */ em_init_manageability(sc); /* Prepare transmit descriptors and buffers */ em_initialize_transmit_unit(ctx); /* Setup Multicast table */ em_if_multi_set(ctx); sc->rx_mbuf_sz = iflib_get_rx_mbuf_sz(ctx); em_initialize_receive_unit(ctx); /* Set up VLAN support and filter */ em_setup_vlan_hw_support(ctx); /* Don't lose promiscuous settings */ em_if_set_promisc(ctx, if_getflags(ifp)); e1000_clear_hw_cntrs_base_generic(&sc->hw); /* MSI-X configuration for 82574 */ if (sc->hw.mac.type == e1000_82574) { int tmp = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&sc->hw, E1000_IVAR, sc->ivars); } else if (sc->intr_type == IFLIB_INTR_MSIX) /* Set up queue routing */ igb_configure_queues(sc); /* this clears any pending interrupts */ E1000_READ_REG(&sc->hw, E1000_ICR); E1000_WRITE_REG(&sc->hw, E1000_ICS, E1000_ICS_LSC); /* AMT based hardware can now take control from firmware */ if (sc->has_manage && sc->has_amt) em_get_hw_control(sc); /* Set Energy Efficient Ethernet */ if (sc->hw.mac.type >= igb_mac_min && sc->hw.phy.media_type == e1000_media_type_copper) { if (sc->hw.mac.type == e1000_i354) e1000_set_eee_i354(&sc->hw, true, true); else e1000_set_eee_i350(&sc->hw, true, true); } } /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ int em_intr(void *arg) { struct e1000_softc *sc = arg; if_ctx_t ctx = sc->ctx; u32 reg_icr; reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Starting with the 82571 chip, bit 31 should be used to * determine whether the interrupt belongs to us. */ if (sc->hw.mac.type >= e1000_82571 && (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; /* * Only MSI-X interrupts have one-shot behavior by taking advantage * of the EIAC register. Thus, explicitly disable interrupts. This * also works around the MSI message reordering errata on certain * systems. */ IFDI_INTR_DISABLE(ctx); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) em_handle_link(ctx); if (reg_icr & E1000_ICR_RXO) sc->rx_overruns++; return (FILTER_SCHEDULE_THREAD); } static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct e1000_softc *sc = iflib_get_softc(ctx); struct em_rx_queue *rxq = &sc->rx_queues[rxqid]; E1000_WRITE_REG(&sc->hw, E1000_IMS, rxq->eims); return (0); } static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct e1000_softc *sc = iflib_get_softc(ctx); struct em_tx_queue *txq = &sc->tx_queues[txqid]; E1000_WRITE_REG(&sc->hw, E1000_IMS, txq->eims); return (0); } static int igb_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct e1000_softc *sc = iflib_get_softc(ctx); struct em_rx_queue *rxq = &sc->rx_queues[rxqid]; E1000_WRITE_REG(&sc->hw, E1000_EIMS, rxq->eims); return (0); } static int igb_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct e1000_softc *sc = iflib_get_softc(ctx); struct em_tx_queue *txq = &sc->tx_queues[txqid]; E1000_WRITE_REG(&sc->hw, E1000_EIMS, txq->eims); return (0); } /********************************************************************* * * MSI-X RX Interrupt Service routine * **********************************************************************/ static int em_msix_que(void *arg) { struct em_rx_queue *que = arg; ++que->irqs; return (FILTER_SCHEDULE_THREAD); } /********************************************************************* * * MSI-X Link Fast Interrupt Service routine * **********************************************************************/ static int em_msix_link(void *arg) { struct e1000_softc *sc = arg; u32 reg_icr; ++sc->link_irq; MPASS(sc->hw.back != NULL); reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); if (reg_icr & E1000_ICR_RXO) sc->rx_overruns++; if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) em_handle_link(sc->ctx); /* Re-arm unconditionally */ if (sc->hw.mac.type >= igb_mac_min) { E1000_WRITE_REG(&sc->hw, E1000_IMS, E1000_IMS_LSC); E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->link_mask); } else if (sc->hw.mac.type == e1000_82574) { E1000_WRITE_REG(&sc->hw, E1000_IMS, E1000_IMS_LSC | E1000_IMS_OTHER); /* * Because we must read the ICR for this interrupt it may * clear other causes using autoclear, for this reason we * simply create a soft interrupt for all these vectors. */ if (reg_icr) E1000_WRITE_REG(&sc->hw, E1000_ICS, sc->ims); } else E1000_WRITE_REG(&sc->hw, E1000_IMS, E1000_IMS_LSC); return (FILTER_HANDLED); } static void em_handle_link(void *context) { if_ctx_t ctx = context; struct e1000_softc *sc = iflib_get_softc(ctx); sc->hw.mac.get_link_status = 1; iflib_admin_intr_deferred(ctx); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void em_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct e1000_softc *sc = iflib_get_softc(ctx); u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("em_if_media_status: begin"); iflib_admin_intr_deferred(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!sc->link_active) { return; } ifmr->ifm_status |= IFM_ACTIVE; if ((sc->hw.phy.media_type == e1000_media_type_fiber) || (sc->hw.phy.media_type == e1000_media_type_internal_serdes)) { if (sc->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (sc->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (sc->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int em_if_media_change(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("em_if_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: sc->hw.mac.autoneg = DO_AUTO_NEG; sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: sc->hw.mac.autoneg = DO_AUTO_NEG; sc->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: sc->hw.mac.autoneg = false; sc->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) sc->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else sc->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: sc->hw.mac.autoneg = false; sc->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) sc->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else sc->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(sc->dev, "Unsupported media type\n"); } em_if_init(ctx); return (0); } static int em_if_set_promisc(if_ctx_t ctx, int flags) { struct e1000_softc *sc = iflib_get_softc(ctx); - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); u32 reg_rctl; int mcnt = 0; reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL); reg_rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_UPE); if (flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else mcnt = min(if_llmaddr_count(ifp), MAX_NUM_MULTICAST_ADDRESSES); if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~E1000_RCTL_MPE); E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); if (flags & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); em_if_vlan_filter_disable(sc); /* Turn this on if you want to see bad packets */ if (em_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); } else { if (flags & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); } if (em_if_vlan_filter_used(ctx)) em_if_vlan_filter_enable(sc); } return (0); } static u_int em_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int idx) { u8 *mta = arg; if (idx == MAX_NUM_MULTICAST_ADDRESSES) return (0); bcopy(LLADDR(sdl), &mta[idx * ETHER_ADDR_LEN], ETHER_ADDR_LEN); return (1); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void em_if_multi_set(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); u8 *mta; /* Multicast array memory */ u32 reg_rctl = 0; int mcnt = 0; IOCTL_DEBUGOUT("em_set_multi: begin"); mta = sc->mta; bzero(mta, sizeof(u8) * ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (sc->hw.mac.type == e1000_82542 && sc->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL); if (sc->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&sc->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); msec_delay(5); } mcnt = if_foreach_llmaddr(ifp, em_copy_maddr, mta); if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) e1000_update_mc_addr_list(&sc->hw, mta, mcnt); reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL); if (if_getflags(ifp) & IFF_PROMISC) reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES || if_getflags(ifp) & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; } else reg_rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE); E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); if (sc->hw.mac.type == e1000_82542 && sc->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (sc->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&sc->hw); } } /********************************************************************* * Timer routine * * This routine schedules em_if_update_admin_status() to check for * link status and to gather statistics as well as to perform some * controller-specific hardware patting. * **********************************************************************/ static void em_if_timer(if_ctx_t ctx, uint16_t qid) { if (qid != 0) return; iflib_admin_intr_deferred(ctx); } static void em_if_update_admin_status(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); struct e1000_hw *hw = &sc->hw; device_t dev = iflib_get_dev(ctx); u32 link_check, thstat, ctrl; link_check = thstat = ctrl = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { if (hw->mac.type == e1000_pch_spt) msec_delay(50); /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else { link_check = true; } break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = hw->mac.serdes_has_link; break; /* VF device is type_unknown */ case e1000_media_type_unknown: e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; /* FALLTHROUGH */ default: break; } /* Check for thermal downshift or shutdown */ if (hw->mac.type == e1000_i350) { thstat = E1000_READ_REG(hw, E1000_THSTAT); ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); } /* Now check for a transition */ if (link_check && (sc->link_active == 0)) { e1000_get_speed_and_duplex(hw, &sc->link_speed, &sc->link_duplex); /* Check if we must disable SPEED_MODE bit on PCI-E */ if ((sc->link_speed != SPEED_1000) && ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572))) { int tarc0; tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); tarc0 &= ~TARC_SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); } if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", sc->link_speed, ((sc->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); sc->link_active = 1; sc->smartspeed = 0; if ((ctrl & E1000_CTRL_EXT_LINK_MODE_MASK) == E1000_CTRL_EXT_LINK_MODE_GMII && (thstat & E1000_THSTAT_LINK_THROTTLE)) device_printf(dev, "Link: thermal downshift\n"); /* Delay Link Up for Phy update */ if (((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) && (hw->phy.id == I210_I_PHY_ID)) msec_delay(I210_LINK_DELAY); /* Reset if the media type changed. */ if (hw->dev_spec._82575.media_changed && hw->mac.type >= igb_mac_min) { hw->dev_spec._82575.media_changed = false; sc->flags |= IGB_MEDIA_RESET; em_reset(ctx); } iflib_link_state_change(ctx, LINK_STATE_UP, IF_Mbps(sc->link_speed)); } else if (!link_check && (sc->link_active == 1)) { sc->link_speed = 0; sc->link_duplex = 0; sc->link_active = 0; iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); } em_update_stats_counters(sc); /* Reset LAA into RAR[0] on 82571 */ if (hw->mac.type == e1000_82571 && e1000_get_laa_state_82571(hw)) e1000_rar_set(hw, hw->mac.addr, 0); if (hw->mac.type < em_mac_min) lem_smartspeed(sc); } static void em_if_watchdog_reset(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); /* * Just count the event; iflib(4) will already trigger a * sufficient reset of the controller. */ sc->watchdog_events++; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC. * **********************************************************************/ static void em_if_stop(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); INIT_DEBUGOUT("em_if_stop: begin"); e1000_reset_hw(&sc->hw); if (sc->hw.mac.type >= e1000_82544) E1000_WRITE_REG(&sc->hw, E1000_WUFC, 0); e1000_led_off(&sc->hw); e1000_cleanup_led(&sc->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void em_identify_hardware(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct e1000_softc *sc = iflib_get_softc(ctx); /* Make sure our PCI config space has the necessary stuff set */ sc->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ sc->hw.vendor_id = pci_get_vendor(dev); sc->hw.device_id = pci_get_device(dev); sc->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); sc->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); sc->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&sc->hw)) { device_printf(dev, "Setup init failure\n"); return; } /* Are we a VF device? */ if ((sc->hw.mac.type == e1000_vfadapt) || (sc->hw.mac.type == e1000_vfadapt_i350)) sc->vf_ifp = 1; else sc->vf_ifp = 0; } static int em_allocate_pci_resources(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); int rid, val; rid = PCIR_BAR(0); sc->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->memory); sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->memory); sc->hw.hw_addr = (u8 *)&sc->osdep.mem_bus_space_handle; /* Only older adapters use IO mapping */ if (sc->hw.mac.type < em_mac_min && sc->hw.mac.type > e1000_82543) { /* Figure our where our IO BAR is ? */ for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { val = pci_read_config(dev, rid, 4); if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { break; } rid += 4; /* check for 64bit BAR */ if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT) rid += 4; } if (rid >= PCIR_CIS) { device_printf(dev, "Unable to locate IO BAR\n"); return (ENXIO); } sc->ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, RF_ACTIVE); if (sc->ioport == NULL) { device_printf(dev, "Unable to allocate bus resource: " "ioport\n"); return (ENXIO); } sc->hw.io_base = 0; sc->osdep.io_bus_space_tag = rman_get_bustag(sc->ioport); sc->osdep.io_bus_space_handle = rman_get_bushandle(sc->ioport); } sc->hw.back = &sc->osdep; return (0); } /********************************************************************* * * Set up the MSI-X Interrupt handlers * **********************************************************************/ static int em_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct e1000_softc *sc = iflib_get_softc(ctx); struct em_rx_queue *rx_que = sc->rx_queues; struct em_tx_queue *tx_que = sc->tx_queues; int error, rid, i, vector = 0, rx_vectors; char buf[16]; /* First set up ring resources */ for (i = 0; i < sc->rx_num_queues; i++, rx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, em_msix_que, rx_que, rx_que->me, buf); if (error) { device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); sc->rx_num_queues = i + 1; goto fail; } rx_que->msix = vector; /* * Set the bit to enable interrupt * in E1000_IMS -- bits 20 and 21 * are for RX0 and RX1, note this has * NOTHING to do with the MSI-X vector */ if (sc->hw.mac.type == e1000_82574) { rx_que->eims = 1 << (20 + i); sc->ims |= rx_que->eims; sc->ivars |= (8 | rx_que->msix) << (i * 4); } else if (sc->hw.mac.type == e1000_82575) rx_que->eims = E1000_EICR_TX_QUEUE0 << vector; else rx_que->eims = 1 << vector; } rx_vectors = vector; vector = 0; for (i = 0; i < sc->tx_num_queues; i++, tx_que++, vector++) { snprintf(buf, sizeof(buf), "txq%d", i); tx_que = &sc->tx_queues[i]; iflib_softirq_alloc_generic(ctx, &sc->rx_queues[i % sc->rx_num_queues].que_irq, IFLIB_INTR_TX, tx_que, tx_que->me, buf); tx_que->msix = (vector % sc->rx_num_queues); /* * Set the bit to enable interrupt * in E1000_IMS -- bits 22 and 23 * are for TX0 and TX1, note this has * NOTHING to do with the MSI-X vector */ if (sc->hw.mac.type == e1000_82574) { tx_que->eims = 1 << (22 + i); sc->ims |= tx_que->eims; sc->ivars |= (8 | tx_que->msix) << (8 + (i * 4)); } else if (sc->hw.mac.type == e1000_82575) { tx_que->eims = E1000_EICR_TX_QUEUE0 << i; } else { tx_que->eims = 1 << i; } } /* Link interrupt */ rid = rx_vectors + 1; error = iflib_irq_alloc_generic(ctx, &sc->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, sc, 0, "aq"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); goto fail; } sc->linkvec = rx_vectors; if (sc->hw.mac.type < igb_mac_min) { sc->ivars |= (8 | rx_vectors) << 16; sc->ivars |= 0x80000000; /* Enable the "Other" interrupt type for link status change */ sc->ims |= E1000_IMS_OTHER; } return (0); fail: iflib_irq_free(ctx, &sc->irq); rx_que = sc->rx_queues; for (int i = 0; i < sc->rx_num_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (error); } static void igb_configure_queues(struct e1000_softc *sc) { struct e1000_hw *hw = &sc->hw; struct em_rx_queue *rx_que; struct em_tx_queue *tx_que; u32 tmp, ivar = 0, newitr = 0; /* First turn on RSS capability */ if (hw->mac.type != e1000_82575) E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); /* Turn on MSI-X */ switch (hw->mac.type) { case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* RX entries */ for (int i = 0; i < sc->rx_num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); rx_que = &sc->rx_queues[i]; if (i & 1) { ivar &= 0xFF00FFFF; ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; } else { ivar &= 0xFFFFFF00; ivar |= rx_que->msix | E1000_IVAR_VALID; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); } /* TX entries */ for (int i = 0; i < sc->tx_num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); tx_que = &sc->tx_queues[i]; if (i & 1) { ivar &= 0x00FFFFFF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; } else { ivar &= 0xFFFF00FF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); sc->que_mask |= tx_que->eims; } /* And for the link interrupt */ ivar = (sc->linkvec | E1000_IVAR_VALID) << 8; sc->link_mask = 1 << sc->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82576: /* RX entries */ for (int i = 0; i < sc->rx_num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); rx_que = &sc->rx_queues[i]; if (i < 8) { ivar &= 0xFFFFFF00; ivar |= rx_que->msix | E1000_IVAR_VALID; } else { ivar &= 0xFF00FFFF; ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); sc->que_mask |= rx_que->eims; } /* TX entries */ for (int i = 0; i < sc->tx_num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); tx_que = &sc->tx_queues[i]; if (i < 8) { ivar &= 0xFFFF00FF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; } else { ivar &= 0x00FFFFFF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); sc->que_mask |= tx_que->eims; } /* And for the link interrupt */ ivar = (sc->linkvec | E1000_IVAR_VALID) << 8; sc->link_mask = 1 << sc->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82575: /* enable MSI-X support*/ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; /* Auto-Mask interrupts upon ICR read. */ tmp |= E1000_CTRL_EXT_EIAME; tmp |= E1000_CTRL_EXT_IRCA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); /* Queues */ for (int i = 0; i < sc->rx_num_queues; i++) { rx_que = &sc->rx_queues[i]; tmp = E1000_EICR_RX_QUEUE0 << i; tmp |= E1000_EICR_TX_QUEUE0 << i; rx_que->eims = tmp; E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), i, rx_que->eims); sc->que_mask |= rx_que->eims; } /* Link */ E1000_WRITE_REG(hw, E1000_MSIXBM(sc->linkvec), E1000_EIMS_OTHER); sc->link_mask |= E1000_EIMS_OTHER; default: break; } /* Set the starting interrupt rate */ if (em_max_interrupt_rate > 0) newitr = (4000000 / em_max_interrupt_rate) & 0x7FFC; if (hw->mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; for (int i = 0; i < sc->rx_num_queues; i++) { rx_que = &sc->rx_queues[i]; E1000_WRITE_REG(hw, E1000_EITR(rx_que->msix), newitr); } return; } static void em_free_pci_resources(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); struct em_rx_queue *que = sc->rx_queues; device_t dev = iflib_get_dev(ctx); /* Release all MSI-X queue resources */ if (sc->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &sc->irq); if (que != NULL) { for (int i = 0; i < sc->rx_num_queues; i++, que++) { iflib_irq_free(ctx, &que->que_irq); } } if (sc->memory != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->memory), sc->memory); sc->memory = NULL; } if (sc->flash != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->flash), sc->flash); sc->flash = NULL; } if (sc->ioport != NULL) { bus_release_resource(dev, SYS_RES_IOPORT, rman_get_rid(sc->ioport), sc->ioport); sc->ioport = NULL; } } /* Set up MSI or MSI-X */ static int em_setup_msix(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); if (sc->hw.mac.type == e1000_82574) { em_enable_vectors_82574(ctx); } return (0); } /********************************************************************* * * Workaround for SmartSpeed on 82541 and 82547 controllers * **********************************************************************/ static void lem_smartspeed(struct e1000_softc *sc) { u16 phy_tmp; if (sc->link_active || (sc->hw.phy.type != e1000_phy_igp) || sc->hw.mac.autoneg == 0 || (sc->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0) return; if (sc->smartspeed == 0) { /* If Master/Slave config fault is asserted twice, * we assume back-to-back */ e1000_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp); if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT)) return; e1000_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp); if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) { e1000_read_phy_reg(&sc->hw, PHY_1000T_CTRL, &phy_tmp); if(phy_tmp & CR_1000T_MS_ENABLE) { phy_tmp &= ~CR_1000T_MS_ENABLE; e1000_write_phy_reg(&sc->hw, PHY_1000T_CTRL, phy_tmp); sc->smartspeed++; if(sc->hw.mac.autoneg && !e1000_copper_link_autoneg(&sc->hw) && !e1000_read_phy_reg(&sc->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&sc->hw, PHY_CONTROL, phy_tmp); } } } return; } else if(sc->smartspeed == EM_SMARTSPEED_DOWNSHIFT) { /* If still no link, perhaps using 2/3 pair cable */ e1000_read_phy_reg(&sc->hw, PHY_1000T_CTRL, &phy_tmp); phy_tmp |= CR_1000T_MS_ENABLE; e1000_write_phy_reg(&sc->hw, PHY_1000T_CTRL, phy_tmp); if(sc->hw.mac.autoneg && !e1000_copper_link_autoneg(&sc->hw) && !e1000_read_phy_reg(&sc->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&sc->hw, PHY_CONTROL, phy_tmp); } } /* Restart process after EM_SMARTSPEED_MAX iterations */ if(sc->smartspeed++ == EM_SMARTSPEED_MAX) sc->smartspeed = 0; } /********************************************************************* * * Initialize the DMA Coalescing feature * **********************************************************************/ static void igb_init_dmac(struct e1000_softc *sc, u32 pba) { device_t dev = sc->dev; struct e1000_hw *hw = &sc->hw; u32 dmac, reg = ~E1000_DMACR_DMAC_EN; u16 hwm; u16 max_frame_size; if (hw->mac.type == e1000_i211) return; max_frame_size = sc->shared->isc_max_frame_size; if (hw->mac.type > e1000_82580) { if (sc->dmac == 0) { /* Disabling it */ E1000_WRITE_REG(hw, E1000_DMACR, reg); return; } else device_printf(dev, "DMA Coalescing enabled\n"); /* Set starting threshold */ E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); hwm = 64 * pba - max_frame_size / 16; if (hwm < 64 * (pba - 6)) hwm = 64 * (pba - 6); reg = E1000_READ_REG(hw, E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) & E1000_FCRTC_RTH_COAL_MASK); E1000_WRITE_REG(hw, E1000_FCRTC, reg); dmac = pba - max_frame_size / 512; if (dmac < pba - 10) dmac = pba - 10; reg = E1000_READ_REG(hw, E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; reg |= ((dmac << E1000_DMACR_DMACTHR_SHIFT) & E1000_DMACR_DMACTHR_MASK); /* transition to L0x or L1 if available..*/ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); /* Check if status is 2.5Gb backplane connection * before configuration of watchdog timer, which is * in msec values in 12.8usec intervals * watchdog timer= msec values in 32usec intervals * for non 2.5Gb connection */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= ((sc->dmac * 5) >> 6); else reg |= (sc->dmac >> 5); } else { reg |= (sc->dmac >> 5); } E1000_WRITE_REG(hw, E1000_DMACR, reg); E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); /* Set the interval before transition */ reg = E1000_READ_REG(hw, E1000_DMCTLX); if (hw->mac.type == e1000_i350) reg |= IGB_DMCTLX_DCFLUSH_DIS; /* ** in 2.5Gb connection, TTLX unit is 0.4 usec ** which is 0x4*2 = 0xA. But delay is still 4 usec */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= 0xA; else reg |= 0x4; } else { reg |= 0x4; } E1000_WRITE_REG(hw, E1000_DMCTLX, reg); /* free space in tx packet buffer to wake from DMA coal */ E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - (2 * max_frame_size)) >> 6); /* make low power state decision controlled by DMA coal */ reg = E1000_READ_REG(hw, E1000_PCIEMISC); reg &= ~E1000_PCIEMISC_LX_DECISION; E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); } else if (hw->mac.type == e1000_82580) { u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); E1000_WRITE_REG(hw, E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); E1000_WRITE_REG(hw, E1000_DMACR, 0); } } /********************************************************************* * * Initialize the hardware to a configuration as specified by the * sc structure. * **********************************************************************/ static void em_reset(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct e1000_softc *sc = iflib_get_softc(ctx); - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &sc->hw; u32 rx_buffer_size; u32 pba; INIT_DEBUGOUT("em_reset: begin"); /* Let the firmware know the OS is in control */ em_get_hw_control(sc); /* Set up smart power down as default off on newer adapters. */ if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572)) { u16 phy_tmp = 0; /* Speed up time to link by disabling smart power down. */ e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); phy_tmp &= ~IGP02E1000_PM_SPD; e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); } /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { /* 82547: Total Packet Buffer is 40K */ case e1000_82547: case e1000_82547_rev_2: if (hw->mac.max_frame_size > 8192) pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */ else pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */ break; /* 82571/82572/80003es2lan: Total Packet Buffer is 48K */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ break; /* 82573: Total Packet Buffer is 32K */ case e1000_82573: pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ break; case e1000_82574: case e1000_82583: pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ break; case e1000_ich8lan: pba = E1000_PBA_8K; break; case e1000_ich9lan: case e1000_ich10lan: /* Boost Receive side for jumbo frames */ if (hw->mac.max_frame_size > 4096) pba = E1000_PBA_14K; else pba = E1000_PBA_10K; break; case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: pba = E1000_PBA_26K; break; case e1000_82575: pba = E1000_PBA_32K; break; case e1000_82576: case e1000_vfadapt: pba = E1000_READ_REG(hw, E1000_RXPBS); pba &= E1000_RXPBS_SIZE_MASK_82576; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_vfadapt_i350: pba = E1000_READ_REG(hw, E1000_RXPBS); pba = e1000_rxpbs_adjust_82580(pba); break; case e1000_i210: case e1000_i211: pba = E1000_PBA_34K; break; default: /* Remaining devices assumed to have a Packet Buffer of 64K. */ if (hw->mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } /* Special needs in case of Jumbo frames */ - if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { + if ((hw->mac.type == e1000_82575) && (if_getmtu(ifp) > ETHERMTU)) { u32 tx_space, min_tx, min_rx; pba = E1000_READ_REG(hw, E1000_PBA); tx_space = pba >> 16; pba &= 0xffff; min_tx = (hw->mac.max_frame_size + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; min_tx = roundup2(min_tx, 1024); min_tx >>= 10; min_rx = hw->mac.max_frame_size; min_rx = roundup2(min_rx, 1024); min_rx >>= 10; if (tx_space < min_tx && ((min_tx - tx_space) < pba)) { pba = pba - (min_tx - tx_space); /* * if short on rx space, rx wins * and must trump tx adjustment */ if (pba < min_rx) pba = min_rx; } E1000_WRITE_REG(hw, E1000_PBA, pba); } if (hw->mac.type < igb_mac_min) E1000_WRITE_REG(hw, E1000_PBA, pba); INIT_DEBUGOUT1("em_reset: pba=%dK",pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitrary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = (pba & 0xffff) << 10; hw->fc.high_water = rx_buffer_size - roundup2(hw->mac.max_frame_size, 1024); hw->fc.low_water = hw->fc.high_water - 1500; if (sc->fc) /* locally set flow control value? */ hw->fc.requested_mode = sc->fc; else hw->fc.requested_mode = e1000_fc_full; if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; else hw->fc.pause_time = EM_FC_PAUSE_TIME; hw->fc.send_xon = true; /* Device specific overrides/settings */ switch (hw->mac.type) { case e1000_pchlan: /* Workaround: no TX flow ctrl for PCH */ hw->fc.requested_mode = e1000_fc_rx_pause; hw->fc.pause_time = 0xFFFF; /* override */ if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x3500; hw->fc.low_water = 0x1500; } else { hw->fc.high_water = 0x5000; hw->fc.low_water = 0x3000; } hw->fc.refresh_time = 0x1000; break; case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: hw->fc.high_water = 0x5C20; hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; /* Jumbos need adjusted PBA */ if (if_getmtu(ifp) > ETHERMTU) E1000_WRITE_REG(hw, E1000_PBA, 12); else E1000_WRITE_REG(hw, E1000_PBA, 26); break; case e1000_82575: case e1000_82576: /* 8-byte granularity */ hw->fc.low_water = hw->fc.high_water - 8; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* 16-byte granularity */ hw->fc.low_water = hw->fc.high_water - 16; break; case e1000_ich9lan: case e1000_ich10lan: if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x2800; hw->fc.low_water = hw->fc.high_water - 8; break; } /* FALLTHROUGH */ default: if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; break; } /* Issue a global reset */ e1000_reset_hw(hw); if (hw->mac.type >= igb_mac_min) { E1000_WRITE_REG(hw, E1000_WUC, 0); } else { E1000_WRITE_REG(hw, E1000_WUFC, 0); em_disable_aspm(sc); } if (sc->flags & IGB_MEDIA_RESET) { e1000_setup_init_funcs(hw, true); e1000_get_bus_info(hw); sc->flags &= ~IGB_MEDIA_RESET; } /* and a re-init */ if (e1000_init_hw(hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return; } if (hw->mac.type >= igb_mac_min) igb_init_dmac(sc, pba); E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); } /* * Initialise the RSS mapping for NICs that support multiple transmit/ * receive rings. */ #define RSSKEYLEN 10 static void em_initialize_rss_mapping(struct e1000_softc *sc) { uint8_t rss_key[4 * RSSKEYLEN]; uint32_t reta = 0; struct e1000_hw *hw = &sc->hw; int i; /* * Configure RSS key */ arc4rand(rss_key, sizeof(rss_key), 0); for (i = 0; i < RSSKEYLEN; ++i) { uint32_t rssrk = 0; rssrk = EM_RSSRK_VAL(rss_key, i); E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); } /* * Configure RSS redirect table in following fashion: * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] */ for (i = 0; i < sizeof(reta); ++i) { uint32_t q; q = (i % sc->rx_num_queues) << 7; reta |= q << (8 * i); } for (i = 0; i < 32; ++i) E1000_WRITE_REG(hw, E1000_RETA(i), reta); E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | E1000_MRQC_RSS_FIELD_IPV4_TCP | E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | E1000_MRQC_RSS_FIELD_IPV6_EX | E1000_MRQC_RSS_FIELD_IPV6); } static void igb_initialize_rss_mapping(struct e1000_softc *sc) { struct e1000_hw *hw = &sc->hw; int i; int queue_id; u32 reta; u32 rss_key[10], mrqc, shift = 0; /* XXX? */ if (hw->mac.type == e1000_82575) shift = 6; /* * The redirection table controls which destination * queue each bucket redirects traffic to. * Each DWORD represents four queues, with the LSB * being the first queue in the DWORD. * * This just allocates buckets to queues using round-robin * allocation. * * NOTE: It Just Happens to line up with the default * RSS allocation method. */ /* Warning FM follows */ reta = 0; for (i = 0; i < 128; i++) { #ifdef RSS queue_id = rss_get_indirection_to_bucket(i); /* * If we have more queues than buckets, we'll * end up mapping buckets to a subset of the * queues. * * If we have more buckets than queues, we'll * end up instead assigning multiple buckets * to queues. * * Both are suboptimal, but we need to handle * the case so we don't go out of bounds * indexing arrays and such. */ queue_id = queue_id % sc->rx_num_queues; #else queue_id = (i % sc->rx_num_queues); #endif /* Adjust if required */ queue_id = queue_id << shift; /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | ( ((uint32_t) queue_id) << 24); if ((i & 3) == 3) { E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); reta = 0; } } /* Now fill in hash table */ /* * MRQC: Multiple Receive Queues Command * Set queuing to RSS control, number depends on the device. */ mrqc = E1000_MRQC_ENABLE_RSS_MQ; #ifdef RSS /* XXX ew typecasting */ rss_getkey((uint8_t *) &rss_key); #else arc4rand(&rss_key, sizeof(rss_key), 0); #endif for (i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key[i]); /* * Configure the RSS fields to hash upon. */ mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV4_TCP); mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | E1000_MRQC_RSS_FIELD_IPV6_TCP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | E1000_MRQC_RSS_FIELD_IPV6_UDP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); E1000_WRITE_REG(hw, E1000_MRQC, mrqc); } /********************************************************************* * * Setup networking device structure and register interface media. * **********************************************************************/ static int em_setup_interface(if_ctx_t ctx) { - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); struct e1000_softc *sc = iflib_get_softc(ctx); if_softc_ctx_t scctx = sc->shared; INIT_DEBUGOUT("em_setup_interface: begin"); /* Single Queue */ if (sc->tx_num_queues == 1) { if_setsendqlen(ifp, scctx->isc_ntxd[0] - 1); if_setsendqready(ifp); } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ if (sc->hw.phy.media_type == e1000_media_type_fiber || sc->hw.phy.media_type == e1000_media_type_internal_serdes) { u_char fiber_type = IFM_1000_SX; /* default type */ if (sc->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmedia_add(sc->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(sc->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (sc->hw.phy.type != e1000_phy_ife) { ifmedia_add(sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(sc->media, IFM_ETHER | IFM_AUTO); return (0); } static int em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct e1000_softc *sc = iflib_get_softc(ctx); if_softc_ctx_t scctx = sc->shared; int error = E1000_SUCCESS; struct em_tx_queue *que; int i, j; MPASS(sc->tx_num_queues > 0); MPASS(sc->tx_num_queues == ntxqsets); /* First allocate the top level queue structs */ if (!(sc->tx_queues = (struct em_tx_queue *) malloc(sizeof(struct em_tx_queue) * sc->tx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); return(ENOMEM); } for (i = 0, que = sc->tx_queues; i < sc->tx_num_queues; i++, que++) { /* Set up some basics */ struct tx_ring *txr = &que->txr; txr->sc = que->sc = sc; que->me = txr->me = i; /* Allocate report status array */ if (!(txr->tx_rsq = (qidx_t *) malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "failed to allocate rs_idxs memory\n"); error = ENOMEM; goto fail; } for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; /* get the virtual and physical address of the hardware queues */ txr->tx_base = (struct e1000_tx_desc *)vaddrs[i*ntxqs]; txr->tx_paddr = paddrs[i*ntxqs]; } if (bootverbose) device_printf(iflib_get_dev(ctx), "allocated for %d tx_queues\n", sc->tx_num_queues); return (0); fail: em_if_queues_free(ctx); return (error); } static int em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct e1000_softc *sc = iflib_get_softc(ctx); int error = E1000_SUCCESS; struct em_rx_queue *que; int i; MPASS(sc->rx_num_queues > 0); MPASS(sc->rx_num_queues == nrxqsets); /* First allocate the top level queue structs */ if (!(sc->rx_queues = (struct em_rx_queue *) malloc(sizeof(struct em_rx_queue) * sc->rx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = sc->rx_queues; i < nrxqsets; i++, que++) { /* Set up some basics */ struct rx_ring *rxr = &que->rxr; rxr->sc = que->sc = sc; rxr->que = que; que->me = rxr->me = i; /* get the virtual and physical address of the hardware queues */ rxr->rx_base = (union e1000_rx_desc_extended *)vaddrs[i*nrxqs]; rxr->rx_paddr = paddrs[i*nrxqs]; } if (bootverbose) device_printf(iflib_get_dev(ctx), "allocated for %d rx_queues\n", sc->rx_num_queues); return (0); fail: em_if_queues_free(ctx); return (error); } static void em_if_queues_free(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); struct em_tx_queue *tx_que = sc->tx_queues; struct em_rx_queue *rx_que = sc->rx_queues; if (tx_que != NULL) { for (int i = 0; i < sc->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; if (txr->tx_rsq == NULL) break; free(txr->tx_rsq, M_DEVBUF); txr->tx_rsq = NULL; } free(sc->tx_queues, M_DEVBUF); sc->tx_queues = NULL; } if (rx_que != NULL) { free(sc->rx_queues, M_DEVBUF); sc->rx_queues = NULL; } } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void em_initialize_transmit_unit(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que; struct tx_ring *txr; struct e1000_hw *hw = &sc->hw; u32 tctl, txdctl = 0, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); for (int i = 0; i < sc->tx_num_queues; i++, txr++) { u64 bus_addr; caddr_t offp, endp; que = &sc->tx_queues[i]; txr = &que->txr; bus_addr = txr->tx_paddr; /* Clear checksum offload context. */ offp = (caddr_t)&txr->csum_flags; endp = (caddr_t)(txr + 1); bzero(offp, endp - offp); /* Base and Len of TX Ring */ E1000_WRITE_REG(hw, E1000_TDLEN(i), scctx->isc_ntxd[0] * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (u32)bus_addr); /* Init the HEAD/TAIL indices */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(hw, E1000_TDBAL(i)), E1000_READ_REG(hw, E1000_TDLEN(i))); txdctl = 0; /* clear txdctl */ txdctl |= 0x1f; /* PTHRESH */ txdctl |= 1 << 8; /* HTHRESH */ txdctl |= 1 << 16;/* WTHRESH */ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ txdctl |= E1000_TXDCTL_GRAN; txdctl |= 1 << 25; /* LWTHRESH */ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } /* Set the default values for the Tx Inter Packet Gap timer */ switch (hw->mac.type) { case e1000_80003es2lan: tipg = DEFAULT_82543_TIPG_IPGR1; tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; case e1000_82542: tipg = DEFAULT_82542_TIPG_IPGT; tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if (hw->phy.media_type == e1000_media_type_fiber || hw->phy.media_type == e1000_media_type_internal_serdes) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(hw, E1000_TIPG, tipg); E1000_WRITE_REG(hw, E1000_TIDV, sc->tx_int_delay.value); if(hw->mac.type >= e1000_82540) E1000_WRITE_REG(hw, E1000_TADV, sc->tx_abs_int_delay.value); if (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572) { tarc = E1000_READ_REG(hw, E1000_TARC(0)); tarc |= TARC_SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc); } else if (hw->mac.type == e1000_80003es2lan) { /* errata: program both queues to unweighted RR */ tarc = E1000_READ_REG(hw, E1000_TARC(0)); tarc |= 1; E1000_WRITE_REG(hw, E1000_TARC(0), tarc); tarc = E1000_READ_REG(hw, E1000_TARC(1)); tarc |= 1; E1000_WRITE_REG(hw, E1000_TARC(1), tarc); } else if (hw->mac.type == e1000_82574) { tarc = E1000_READ_REG(hw, E1000_TARC(0)); tarc |= TARC_ERRATA_BIT; if ( sc->tx_num_queues > 1) { tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); E1000_WRITE_REG(hw, E1000_TARC(0), tarc); E1000_WRITE_REG(hw, E1000_TARC(1), tarc); } else E1000_WRITE_REG(hw, E1000_TARC(0), tarc); } if (sc->tx_int_delay.value > 0) sc->txd_cmd |= E1000_TXD_CMD_IDE; /* Program the Transmit Control Register */ tctl = E1000_READ_REG(hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); if (hw->mac.type >= e1000_82571) tctl |= E1000_TCTL_MULR; /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(hw, E1000_TCTL, tctl); /* SPT and KBL errata workarounds */ if (hw->mac.type == e1000_pch_spt) { u32 reg; reg = E1000_READ_REG(hw, E1000_IOSFPC); reg |= E1000_RCTL_RDMTS_HEX; E1000_WRITE_REG(hw, E1000_IOSFPC, reg); /* i218-i219 Specification Update 1.5.4.5 */ reg = E1000_READ_REG(hw, E1000_TARC(0)); reg &= ~E1000_TARC0_CB_MULTIQ_3_REQ; reg |= E1000_TARC0_CB_MULTIQ_2_REQ; E1000_WRITE_REG(hw, E1000_TARC(0), reg); } } /********************************************************************* * * Enable receive unit. * **********************************************************************/ #define BSIZEPKT_ROUNDUP ((1<shared; - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &sc->hw; struct em_rx_queue *que; int i; uint32_t rctl, rxcsum; INIT_DEBUGOUT("em_initialize_receive_units: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); /* Do not disable if ever enabled on this hardware */ if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Do not store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Long Packet receive */ if (if_getmtu(ifp) > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Strip the CRC */ if (!em_disable_crc_stripping) rctl |= E1000_RCTL_SECRC; if (hw->mac.type >= e1000_82540) { E1000_WRITE_REG(hw, E1000_RADV, sc->rx_abs_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); } E1000_WRITE_REG(hw, E1000_RDTR, sc->rx_int_delay.value); if (hw->mac.type >= em_mac_min) { uint32_t rfctl; /* Use extended rx descriptor formats */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_EXTEN; /* * When using MSI-X interrupts we need to throttle * using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ rfctl |= E1000_RFCTL_ACK_DIS; } E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); } /* Set up L3 and L4 csum Rx descriptor offloads */ rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (if_getcapenable(ifp) & IFCAP_RXCSUM) { rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL; if (hw->mac.type > e1000_82575) rxcsum |= E1000_RXCSUM_CRCOFL; else if (hw->mac.type < em_mac_min && if_getcapenable(ifp) & IFCAP_HWCSUM_IPV6) rxcsum |= E1000_RXCSUM_IPV6OFL; } else { rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL); if (hw->mac.type > e1000_82575) rxcsum &= ~E1000_RXCSUM_CRCOFL; else if (hw->mac.type < em_mac_min) rxcsum &= ~E1000_RXCSUM_IPV6OFL; } if (sc->rx_num_queues > 1) { /* RSS hash needed in the Rx descriptor */ rxcsum |= E1000_RXCSUM_PCSD; if (hw->mac.type >= igb_mac_min) igb_initialize_rss_mapping(sc); else em_initialize_rss_mapping(sc); } E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); /* * XXX TEMPORARY WORKAROUND: on some systems with 82573 * long latencies are observed, like Lenovo X60. This * change eliminates the problem, but since having positive * values in RDTR is a known source of problems on other * platforms another solution is being sought. */ if (hw->mac.type == e1000_82573) E1000_WRITE_REG(hw, E1000_RDTR, 0x20); for (i = 0, que = sc->rx_queues; i < sc->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; /* Setup the Base and Length of the Rx Descriptor Ring */ u64 bus_addr = rxr->rx_paddr; #if 0 u32 rdt = sc->rx_num_queues -1; /* default */ #endif E1000_WRITE_REG(hw, E1000_RDLEN(i), scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ E1000_WRITE_REG(hw, E1000_RDH(i), 0); E1000_WRITE_REG(hw, E1000_RDT(i), 0); } /* * Set PTHRESH for improved jumbo performance * According to 10.2.5.11 of Intel 82574 Datasheet, * RXDCTL(1) is written whenever RXDCTL(0) is written. * Only write to RXDCTL(1) if there is a need for different * settings. */ if ((hw->mac.type == e1000_ich9lan || hw->mac.type == e1000_pch2lan || hw->mac.type == e1000_ich10lan) && if_getmtu(ifp) > ETHERMTU) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); } else if (hw->mac.type == e1000_82574) { for (int i = 0; i < sc->rx_num_queues; i++) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= 0x20; /* PTHRESH */ rxdctl |= 4 << 8; /* HTHRESH */ rxdctl |= 4 << 16;/* WTHRESH */ rxdctl |= 1 << 24; /* Switch to granularity */ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } else if (hw->mac.type >= igb_mac_min) { u32 psize, srrctl = 0; if (if_getmtu(ifp) > ETHERMTU) { psize = scctx->isc_max_frame_size; /* are we on a vlan? */ - if (ifp->if_vlantrunk != NULL) + if (if_vlantrunkinuse(ifp)) psize += VLAN_TAG_SIZE; if (sc->vf_ifp) e1000_rlpml_set_vf(hw, psize); else E1000_WRITE_REG(hw, E1000_RLPML, psize); } /* Set maximum packet buffer len */ srrctl |= (sc->rx_mbuf_sz + BSIZEPKT_ROUNDUP) >> E1000_SRRCTL_BSIZEPKT_SHIFT; /* * If TX flow control is disabled and there's >1 queue defined, * enable DROP. * * This drops frames rather than hanging the RX MAC for all queues. */ if ((sc->rx_num_queues > 1) && (sc->fc == e1000_fc_none || sc->fc == e1000_fc_rx_pause)) { srrctl |= E1000_SRRCTL_DROP_EN; } /* Setup the Base and Length of the Rx Descriptor Rings */ for (i = 0, que = sc->rx_queues; i < sc->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; u64 bus_addr = rxr->rx_paddr; u32 rxdctl; #ifdef notyet /* Configure for header split? -- ignore for now */ rxr->hdr_split = igb_header_split; #else srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; #endif E1000_WRITE_REG(hw, E1000_RDLEN(i), scctx->isc_nrxd[0] * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr); E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); /* Enable this Queue */ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; rxdctl &= 0xFFF00000; rxdctl |= IGB_RX_PTHRESH; rxdctl |= IGB_RX_HTHRESH << 8; rxdctl |= IGB_RX_WTHRESH << 16; E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } else if (hw->mac.type >= e1000_pch2lan) { if (if_getmtu(ifp) > ETHERMTU) e1000_lv_jumbo_workaround_ich8lan(hw, true); else e1000_lv_jumbo_workaround_ich8lan(hw, false); } /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; /* Set up packet buffer size, overridden by per queue srrctl on igb */ if (hw->mac.type < igb_mac_min) { if (sc->rx_mbuf_sz > 2048 && sc->rx_mbuf_sz <= 4096) rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; else if (sc->rx_mbuf_sz > 4096 && sc->rx_mbuf_sz <= 8192) rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; else if (sc->rx_mbuf_sz > 8192) rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX; else { rctl |= E1000_RCTL_SZ_2048; rctl &= ~E1000_RCTL_BSEX; } } else rctl |= E1000_RCTL_SZ_2048; /* * rctl bits 11:10 are as follows * lem: reserved * em: DTYPE * igb: reserved * and should be 00 on all of the above */ rctl &= ~0x00000C00; /* Write out the settings */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); return; } static void em_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct e1000_softc *sc = iflib_get_softc(ctx); u32 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; sc->shadow_vfta[index] |= (1 << bit); ++sc->num_vlans; em_if_vlan_filter_write(sc); } static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct e1000_softc *sc = iflib_get_softc(ctx); u32 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; sc->shadow_vfta[index] &= ~(1 << bit); --sc->num_vlans; em_if_vlan_filter_write(sc); } static bool em_if_vlan_filter_capable(if_ctx_t ctx) { if_t ifp = iflib_get_ifp(ctx); if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) && !em_disable_crc_stripping) return (true); return (false); } static bool em_if_vlan_filter_used(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); if (!em_if_vlan_filter_capable(ctx)) return (false); for (int i = 0; i < EM_VFTA_SIZE; i++) if (sc->shadow_vfta[i] != 0) return (true); return (false); } static void em_if_vlan_filter_enable(struct e1000_softc *sc) { struct e1000_hw *hw = &sc->hw; u32 reg; reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void em_if_vlan_filter_disable(struct e1000_softc *sc) { struct e1000_hw *hw = &sc->hw; u32 reg; reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~(E1000_RCTL_VFE | E1000_RCTL_CFIEN); E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void em_if_vlan_filter_write(struct e1000_softc *sc) { struct e1000_hw *hw = &sc->hw; if (sc->vf_ifp) return; /* Disable interrupts for lem-class devices during the filter change */ if (hw->mac.type < em_mac_min) em_if_intr_disable(sc->ctx); for (int i = 0; i < EM_VFTA_SIZE; i++) if (sc->shadow_vfta[i] != 0) { /* XXXKB: incomplete VF support, we return early above */ if (sc->vf_ifp) e1000_vfta_set_vf(hw, sc->shadow_vfta[i], true); else e1000_write_vfta(hw, i, sc->shadow_vfta[i]); } /* Re-enable interrupts for lem-class devices */ if (hw->mac.type < em_mac_min) em_if_intr_enable(sc->ctx); } static void em_setup_vlan_hw_support(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); struct e1000_hw *hw = &sc->hw; - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); u32 reg; /* XXXKB: Return early if we are a VF until VF decap and filter management * is ready and tested. */ if (sc->vf_ifp) return; if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING && !em_disable_crc_stripping) { reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); } else { reg = E1000_READ_REG(hw, E1000_CTRL); reg &= ~E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); } /* If we aren't doing HW filtering, we're done */ if (!em_if_vlan_filter_capable(ctx)) { em_if_vlan_filter_disable(sc); return; } /* * A soft reset zero's out the VFTA, so * we need to repopulate it now. */ em_if_vlan_filter_write(sc); /* Enable the Filter Table */ em_if_vlan_filter_enable(sc); } static void em_if_intr_enable(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); struct e1000_hw *hw = &sc->hw; u32 ims_mask = IMS_ENABLE_MASK; if (sc->intr_type == IFLIB_INTR_MSIX) { E1000_WRITE_REG(hw, EM_EIAC, sc->ims); ims_mask |= sc->ims; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); E1000_WRITE_FLUSH(hw); } static void em_if_intr_disable(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); struct e1000_hw *hw = &sc->hw; if (sc->intr_type == IFLIB_INTR_MSIX) E1000_WRITE_REG(hw, EM_EIAC, 0); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_FLUSH(hw); } static void igb_if_intr_enable(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); struct e1000_hw *hw = &sc->hw; u32 mask; if (__predict_true(sc->intr_type == IFLIB_INTR_MSIX)) { mask = (sc->que_mask | sc->link_mask); E1000_WRITE_REG(hw, E1000_EIAC, mask); E1000_WRITE_REG(hw, E1000_EIAM, mask); E1000_WRITE_REG(hw, E1000_EIMS, mask); E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC); } else E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK); E1000_WRITE_FLUSH(hw); } static void igb_if_intr_disable(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); struct e1000_hw *hw = &sc->hw; if (__predict_true(sc->intr_type == IFLIB_INTR_MSIX)) { E1000_WRITE_REG(hw, E1000_EIMC, 0xffffffff); E1000_WRITE_REG(hw, E1000_EIAC, 0); } E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_FLUSH(hw); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void em_init_manageability(struct e1000_softc *sc) { /* A shared code workaround */ #define E1000_82542_MANC2H E1000_MANC2H if (sc->has_manage) { int manc2h = E1000_READ_REG(&sc->hw, E1000_MANC2H); int manc = E1000_READ_REG(&sc->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) manc2h |= E1000_MNG2HOST_PORT_623; manc2h |= E1000_MNG2HOST_PORT_664; E1000_WRITE_REG(&sc->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void em_release_manageability(struct e1000_softc *sc) { if (sc->has_manage) { int manc = E1000_READ_REG(&sc->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); } } /* * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void em_get_hw_control(struct e1000_softc *sc) { u32 ctrl_ext, swsm; if (sc->vf_ifp) return; if (sc->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&sc->hw, E1000_SWSM); E1000_WRITE_REG(&sc->hw, E1000_SWSM, swsm | E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } /* * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void em_release_hw_control(struct e1000_softc *sc) { u32 ctrl_ext, swsm; if (!sc->has_manage) return; if (sc->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&sc->hw, E1000_SWSM); E1000_WRITE_REG(&sc->hw, E1000_SWSM, swsm & ~E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int em_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (false); } return (true); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void em_get_wakeup(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); u16 eeprom_data = 0, device_id, apme_mask; sc->has_manage = e1000_enable_mng_pass_thru(&sc->hw); apme_mask = EM_EEPROM_APME; switch (sc->hw.mac.type) { case e1000_82542: case e1000_82543: break; case e1000_82544: e1000_read_nvm(&sc->hw, NVM_INIT_CONTROL2_REG, 1, &eeprom_data); apme_mask = EM_82544_APME; break; case e1000_82546: case e1000_82546_rev_3: if (sc->hw.bus.func == 1) { e1000_read_nvm(&sc->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&sc->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_82573: case e1000_82583: sc->has_amt = true; /* FALLTHROUGH */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: if (sc->hw.bus.func == 1) { e1000_read_nvm(&sc->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&sc->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_82575: /* listing all igb devices */ case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: apme_mask = E1000_WUC_APME; sc->has_amt = true; eeprom_data = E1000_READ_REG(&sc->hw, E1000_WUC); break; default: e1000_read_nvm(&sc->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) sc->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82546GB_PCIE: sc->wol = 0; break; case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546GB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&sc->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) sc->wol = 0; break; case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) sc->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&sc->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) sc->wol = 0; break; case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) sc->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void em_enable_wakeup(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); if_t ifp = iflib_get_ifp(ctx); int error = 0; u32 pmc, ctrl, ctrl_ext, rctl; u16 status; if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0) return; /* * Determine type of Wakeup: note that wol * is set with all bits on by default. */ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) sc->wol &= ~E1000_WUFC_MAG; if ((if_getcapenable(ifp) & IFCAP_WOL_UCAST) == 0) sc->wol &= ~E1000_WUFC_EX; if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) sc->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&sc->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&sc->hw, E1000_RCTL, rctl); } if (!(sc->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC))) goto pme; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&sc->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&sc->hw, E1000_CTRL, ctrl); /* Keep the laser running on Fiber adapters */ if (sc->hw.phy.media_type == e1000_media_type_fiber || sc->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, ctrl_ext); } if ((sc->hw.mac.type == e1000_ich8lan) || (sc->hw.mac.type == e1000_pchlan) || (sc->hw.mac.type == e1000_ich9lan) || (sc->hw.mac.type == e1000_ich10lan)) e1000_suspend_workarounds_ich8lan(&sc->hw); if ( sc->hw.mac.type >= e1000_pchlan) { error = em_enable_phy_wakeup(sc); if (error) goto pme; } else { /* Enable wakeup by the MAC */ E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); } if (sc->hw.phy.type == e1000_phy_igp_3) e1000_igp3_phy_powerdown_workaround_ich8lan(&sc->hw); pme: status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (!error && (if_getcapenable(ifp) & IFCAP_WOL)) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* * WOL in the newer chipset interfaces (pchlan) * require thing to be copied into the phy */ static int em_enable_phy_wakeup(struct e1000_softc *sc) { struct e1000_hw *hw = &sc->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* copy MAC MTA to PHY MTA */ for (int i = 0; i < hw->mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN | E1000_WUC_APME); E1000_WRITE_REG(hw, E1000_WUFC, sc->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(hw, BM_WUFC, sc->wol); e1000_write_phy_reg(hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void em_if_led_func(if_ctx_t ctx, int onoff) { struct e1000_softc *sc = iflib_get_softc(ctx); if (onoff) { e1000_setup_led(&sc->hw); e1000_led_on(&sc->hw); } else { e1000_led_off(&sc->hw); e1000_cleanup_led(&sc->hw); } } /* * Disable the L0S and L1 LINK states */ static void em_disable_aspm(struct e1000_softc *sc) { int base, reg; u16 link_cap,link_ctrl; device_t dev = sc->dev; switch (sc->hw.mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: break; default: return; } if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) return; reg = base + PCIER_LINK_CAP; link_cap = pci_read_config(dev, reg, 2); if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) return; reg = base + PCIER_LINK_CTL; link_ctrl = pci_read_config(dev, reg, 2); link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; pci_write_config(dev, reg, link_ctrl, 2); return; } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void em_update_stats_counters(struct e1000_softc *sc) { u64 prev_xoffrxc = sc->stats.xoffrxc; if(sc->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&sc->hw, E1000_STATUS) & E1000_STATUS_LU)) { sc->stats.symerrs += E1000_READ_REG(&sc->hw, E1000_SYMERRS); sc->stats.sec += E1000_READ_REG(&sc->hw, E1000_SEC); } sc->stats.crcerrs += E1000_READ_REG(&sc->hw, E1000_CRCERRS); sc->stats.mpc += E1000_READ_REG(&sc->hw, E1000_MPC); sc->stats.scc += E1000_READ_REG(&sc->hw, E1000_SCC); sc->stats.ecol += E1000_READ_REG(&sc->hw, E1000_ECOL); sc->stats.mcc += E1000_READ_REG(&sc->hw, E1000_MCC); sc->stats.latecol += E1000_READ_REG(&sc->hw, E1000_LATECOL); sc->stats.colc += E1000_READ_REG(&sc->hw, E1000_COLC); sc->stats.dc += E1000_READ_REG(&sc->hw, E1000_DC); sc->stats.rlec += E1000_READ_REG(&sc->hw, E1000_RLEC); sc->stats.xonrxc += E1000_READ_REG(&sc->hw, E1000_XONRXC); sc->stats.xontxc += E1000_READ_REG(&sc->hw, E1000_XONTXC); sc->stats.xoffrxc += E1000_READ_REG(&sc->hw, E1000_XOFFRXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ if (sc->stats.xoffrxc != prev_xoffrxc) sc->shared->isc_pause_frames = 1; sc->stats.xofftxc += E1000_READ_REG(&sc->hw, E1000_XOFFTXC); sc->stats.fcruc += E1000_READ_REG(&sc->hw, E1000_FCRUC); sc->stats.prc64 += E1000_READ_REG(&sc->hw, E1000_PRC64); sc->stats.prc127 += E1000_READ_REG(&sc->hw, E1000_PRC127); sc->stats.prc255 += E1000_READ_REG(&sc->hw, E1000_PRC255); sc->stats.prc511 += E1000_READ_REG(&sc->hw, E1000_PRC511); sc->stats.prc1023 += E1000_READ_REG(&sc->hw, E1000_PRC1023); sc->stats.prc1522 += E1000_READ_REG(&sc->hw, E1000_PRC1522); sc->stats.gprc += E1000_READ_REG(&sc->hw, E1000_GPRC); sc->stats.bprc += E1000_READ_REG(&sc->hw, E1000_BPRC); sc->stats.mprc += E1000_READ_REG(&sc->hw, E1000_MPRC); sc->stats.gptc += E1000_READ_REG(&sc->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ sc->stats.gorc += E1000_READ_REG(&sc->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&sc->hw, E1000_GORCH) << 32); sc->stats.gotc += E1000_READ_REG(&sc->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&sc->hw, E1000_GOTCH) << 32); sc->stats.rnbc += E1000_READ_REG(&sc->hw, E1000_RNBC); sc->stats.ruc += E1000_READ_REG(&sc->hw, E1000_RUC); sc->stats.rfc += E1000_READ_REG(&sc->hw, E1000_RFC); sc->stats.roc += E1000_READ_REG(&sc->hw, E1000_ROC); sc->stats.rjc += E1000_READ_REG(&sc->hw, E1000_RJC); sc->stats.tor += E1000_READ_REG(&sc->hw, E1000_TORH); sc->stats.tot += E1000_READ_REG(&sc->hw, E1000_TOTH); sc->stats.tpr += E1000_READ_REG(&sc->hw, E1000_TPR); sc->stats.tpt += E1000_READ_REG(&sc->hw, E1000_TPT); sc->stats.ptc64 += E1000_READ_REG(&sc->hw, E1000_PTC64); sc->stats.ptc127 += E1000_READ_REG(&sc->hw, E1000_PTC127); sc->stats.ptc255 += E1000_READ_REG(&sc->hw, E1000_PTC255); sc->stats.ptc511 += E1000_READ_REG(&sc->hw, E1000_PTC511); sc->stats.ptc1023 += E1000_READ_REG(&sc->hw, E1000_PTC1023); sc->stats.ptc1522 += E1000_READ_REG(&sc->hw, E1000_PTC1522); sc->stats.mptc += E1000_READ_REG(&sc->hw, E1000_MPTC); sc->stats.bptc += E1000_READ_REG(&sc->hw, E1000_BPTC); /* Interrupt Counts */ sc->stats.iac += E1000_READ_REG(&sc->hw, E1000_IAC); sc->stats.icrxptc += E1000_READ_REG(&sc->hw, E1000_ICRXPTC); sc->stats.icrxatc += E1000_READ_REG(&sc->hw, E1000_ICRXATC); sc->stats.ictxptc += E1000_READ_REG(&sc->hw, E1000_ICTXPTC); sc->stats.ictxatc += E1000_READ_REG(&sc->hw, E1000_ICTXATC); sc->stats.ictxqec += E1000_READ_REG(&sc->hw, E1000_ICTXQEC); sc->stats.ictxqmtc += E1000_READ_REG(&sc->hw, E1000_ICTXQMTC); sc->stats.icrxdmtc += E1000_READ_REG(&sc->hw, E1000_ICRXDMTC); sc->stats.icrxoc += E1000_READ_REG(&sc->hw, E1000_ICRXOC); if (sc->hw.mac.type >= e1000_82543) { sc->stats.algnerrc += E1000_READ_REG(&sc->hw, E1000_ALGNERRC); sc->stats.rxerrc += E1000_READ_REG(&sc->hw, E1000_RXERRC); sc->stats.tncrs += E1000_READ_REG(&sc->hw, E1000_TNCRS); sc->stats.cexterr += E1000_READ_REG(&sc->hw, E1000_CEXTERR); sc->stats.tsctc += E1000_READ_REG(&sc->hw, E1000_TSCTC); sc->stats.tsctfc += E1000_READ_REG(&sc->hw, E1000_TSCTFC); } } static uint64_t em_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct e1000_softc *sc = iflib_get_softc(ctx); - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_COLLISIONS: return (sc->stats.colc); case IFCOUNTER_IERRORS: return (sc->dropped_pkts + sc->stats.rxerrc + sc->stats.crcerrs + sc->stats.algnerrc + sc->stats.ruc + sc->stats.roc + sc->stats.mpc + sc->stats.cexterr); case IFCOUNTER_OERRORS: return (sc->stats.ecol + sc->stats.latecol + sc->watchdog_events); default: return (if_get_counter_default(ifp, cnt)); } } /* em_if_needs_restart - Tell iflib when the driver needs to be reinitialized * @ctx: iflib context * @event: event code to check * * Defaults to returning true for unknown events. * * @returns true if iflib needs to reinit the interface */ static bool em_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: return (false); default: return (true); } } /* Export a single 32-bit register via a read-only sysctl. */ static int em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct e1000_softc *sc; u_int val; sc = oidp->oid_arg1; val = E1000_READ_REG(&sc->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void em_add_hw_stats(struct e1000_softc *sc) { device_t dev = iflib_get_dev(sc->ctx); struct em_tx_queue *tx_que = sc->tx_queues; struct em_rx_queue *rx_que = sc->rx_queues; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &sc->stats; struct sysctl_oid *stat_node, *queue_node, *int_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &sc->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &sc->link_irq, "Link MSI-X IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &sc->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &sc->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, E1000_CTRL, em_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, E1000_RCTL, em_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &sc->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &sc->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < sc->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, E1000_TDH(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", CTLFLAG_RD, &txr->tx_irq, "Queue MSI-X Transmit Interrupts"); } for (int j = 0; j < sc->rx_num_queues; j++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", j); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", CTLFLAG_RD, &rxr->rx_irq, "Queue MSI-X Receive Interrupts"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &sc->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &sc->stats.sec, "Sequence Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &sc->stats.dc, "Defer Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &sc->stats.mpc, "Missed Packets"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &sc->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &sc->stats.ruc, "Receive Undersize"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &sc->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &sc->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &sc->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &sc->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &sc->stats.crcerrs, "CRC errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &sc->stats.algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &sc->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &sc->stats.xonrxc, "XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &sc->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &sc->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &sc->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &sc->stats.tpr, "Total Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &sc->stats.gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &sc->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &sc->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &sc->stats.prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &sc->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &sc->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &sc->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &sc->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &sc->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &sc->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &sc->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &sc->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &sc->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &sc->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &sc->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &sc->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &sc->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &sc->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &sc->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &sc->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &sc->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &sc->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &sc->stats.tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &sc->stats.iac, "Interrupt Assertion Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &sc->stats.icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &sc->stats.icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &sc->stats.ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &sc->stats.ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &sc->stats.ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &sc->stats.ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &sc->stats.icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &sc->stats.icrxoc, "Interrupt Cause Receiver Overrun Count"); } static void em_fw_version_locked(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); struct e1000_hw *hw = &sc->hw; struct e1000_fw_version *fw_ver = &sc->fw_ver; uint16_t eep = 0; /* * em_fw_version_locked() must run under the IFLIB_CTX_LOCK to meet the * NVM locking model, so we do it in em_if_attach_pre() and store the * info in the softc */ ASSERT_CTX_LOCK_HELD(hw); *fw_ver = (struct e1000_fw_version){0}; if (hw->mac.type >= igb_mac_min) { /* * Use the Shared Code for igb(4) */ e1000_get_fw_version(hw, fw_ver); } else { /* * Otherwise, EEPROM version should be present on (almost?) all * devices here */ if(e1000_read_nvm(hw, NVM_VERSION, 1, &eep)) { INIT_DEBUGOUT("can't get EEPROM version"); return; } fw_ver->eep_major = (eep & NVM_MAJOR_MASK) >> NVM_MAJOR_SHIFT; fw_ver->eep_minor = (eep & NVM_MINOR_MASK) >> NVM_MINOR_SHIFT; fw_ver->eep_build = (eep & NVM_IMAGE_ID_MASK); } } static void em_sbuf_fw_version(struct e1000_fw_version *fw_ver, struct sbuf *buf) { const char *space = ""; if (fw_ver->eep_major || fw_ver->eep_minor || fw_ver->eep_build) { sbuf_printf(buf, "EEPROM V%d.%d-%d", fw_ver->eep_major, fw_ver->eep_minor, fw_ver->eep_build); space = " "; } if (fw_ver->invm_major || fw_ver->invm_minor || fw_ver->invm_img_type) { sbuf_printf(buf, "%sNVM V%d.%d imgtype%d", space, fw_ver->invm_major, fw_ver->invm_minor, fw_ver->invm_img_type); space = " "; } if (fw_ver->or_valid) { sbuf_printf(buf, "%sOption ROM V%d-b%d-p%d", space, fw_ver->or_major, fw_ver->or_build, fw_ver->or_patch); space = " "; } if (fw_ver->etrack_id) sbuf_printf(buf, "%seTrack 0x%08x", space, fw_ver->etrack_id); } static void em_print_fw_version(struct e1000_softc *sc ) { device_t dev = sc->dev; struct sbuf *buf; int error = 0; buf = sbuf_new_auto(); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return; } em_sbuf_fw_version(&sc->fw_ver, buf); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); else if (sbuf_len(buf)) device_printf(dev, "%s\n", sbuf_data(buf)); sbuf_delete(buf); } static int em_sysctl_print_fw_version(SYSCTL_HANDLER_ARGS) { struct e1000_softc *sc = (struct e1000_softc *)arg1; device_t dev = sc->dev; struct sbuf *buf; int error = 0; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } em_sbuf_fw_version(&sc->fw_ver, buf); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (0); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct e1000_softc *sc = (struct e1000_softc *)arg1; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) em_print_nvm_info(sc); return (error); } static void em_print_nvm_info(struct e1000_softc *sc) { struct e1000_hw *hw = &sc->hw; struct sx *iflib_ctx_lock = iflib_ctx_lock_get(sc->ctx); u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); /* We rely on the IFLIB_CTX_LOCK as part of NVM locking model */ sx_xlock(iflib_ctx_lock); ASSERT_CTX_LOCK_HELD(hw); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } sx_xunlock(iflib_ctx_lock); printf("\n"); } static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct e1000_softc *sc; u32 regval; int error, usecs, ticks; info = (struct em_int_delay_info *) arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); if (info->offset == E1000_ITR) /* units are 256ns here */ ticks *= 4; sc = info->sc; regval = E1000_READ_OFFSET(&sc->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { sc->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else sc->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&sc->hw, info->offset, regval); return (0); } static void em_add_int_delay_sysctl(struct e1000_softc *sc, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->sc = sc; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, info, 0, em_sysctl_int_delay, "I", description); } /* * Set flow control using sysctl: * Flow control values: * 0 - off * 1 - rx pause * 2 - tx pause * 3 - full */ static int em_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct e1000_softc *sc = (struct e1000_softc *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (input == sc->fc) /* no change? */ return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: sc->hw.fc.requested_mode = input; sc->fc = input; break; default: /* Do nothing */ return (error); } sc->hw.fc.current_mode = sc->hw.fc.requested_mode; e1000_force_mac_fc(&sc->hw); return (error); } /* * Manage Energy Efficient Ethernet: * Control values: * 0/1 - enabled/disabled */ static int em_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct e1000_softc *sc = (struct e1000_softc *) arg1; int error, value; value = sc->hw.dev_spec.ich8lan.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); sc->hw.dev_spec.ich8lan.eee_disable = (value != 0); em_if_init(sc->ctx); return (0); } static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct e1000_softc *sc; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { sc = (struct e1000_softc *) arg1; em_print_debug_info(sc); } return (error); } static int em_get_rs(SYSCTL_HANDLER_ARGS) { struct e1000_softc *sc = (struct e1000_softc *) arg1; int error; int result; result = 0; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr || result != 1) return (error); em_dump_rs(sc); return (error); } static void em_if_debug(if_ctx_t ctx) { em_dump_rs(iflib_get_softc(ctx)); } /* * This routine is meant to be fluid, add whatever is * needed for debugging a problem. -jfv */ static void em_print_debug_info(struct e1000_softc *sc) { device_t dev = iflib_get_dev(sc->ctx); - struct ifnet *ifp = iflib_get_ifp(sc->ctx); + if_t ifp = iflib_get_ifp(sc->ctx); struct tx_ring *txr = &sc->tx_queues->txr; struct rx_ring *rxr = &sc->rx_queues->rxr; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) printf("Interface is RUNNING "); else printf("Interface is NOT RUNNING\n"); if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE) printf("and INACTIVE\n"); else printf("and ACTIVE\n"); for (int i = 0; i < sc->tx_num_queues; i++, txr++) { device_printf(dev, "TX Queue %d ------\n", i); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", E1000_READ_REG(&sc->hw, E1000_TDH(i)), E1000_READ_REG(&sc->hw, E1000_TDT(i))); } for (int j=0; j < sc->rx_num_queues; j++, rxr++) { device_printf(dev, "RX Queue %d ------\n", j); device_printf(dev, "hw rdh = %d, hw rdt = %d\n", E1000_READ_REG(&sc->hw, E1000_RDH(j)), E1000_READ_REG(&sc->hw, E1000_RDT(j))); } } /* * 82574 only: * Write a new value to the EEPROM increasing the number of MSI-X * vectors from 3 to 5, for proper multiqueue support. */ static void em_enable_vectors_82574(if_ctx_t ctx) { struct e1000_softc *sc = iflib_get_softc(ctx); struct e1000_hw *hw = &sc->hw; device_t dev = iflib_get_dev(ctx); u16 edata; e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); if (bootverbose) device_printf(dev, "EM_NVM_PCIE_CTRL = %#06x\n", edata); if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { device_printf(dev, "Writing to eeprom: increasing " "reported MSI-X vectors from 3 to 5...\n"); edata &= ~(EM_NVM_MSIX_N_MASK); edata |= 4 << EM_NVM_MSIX_N_SHIFT; e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); e1000_update_nvm_checksum(hw); device_printf(dev, "Writing to eeprom: done\n"); } } diff --git a/sys/dev/ice/ice_lib.c b/sys/dev/ice/ice_lib.c index f562b3b55b63..1fec783cd429 100644 --- a/sys/dev/ice/ice_lib.c +++ b/sys/dev/ice/ice_lib.c @@ -1,9746 +1,9746 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2021, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*$FreeBSD$*/ /** * @file ice_lib.c * @brief Generic device setup and sysctl functions * * Library of generic device functions not specific to the networking stack. * * This includes hardware initialization functions, as well as handlers for * many of the device sysctls used to probe driver status or tune specific * behaviors. */ #include "ice_lib.h" #include "ice_iflib.h" #include #include #include #include #include #include #include /** * @var M_ICE * @brief main ice driver allocation type * * malloc(9) allocation type used by the majority of memory allocations in the * ice driver. */ MALLOC_DEFINE(M_ICE, "ice", "Intel(R) 100Gb Network Driver lib allocations"); /* * Helper function prototypes */ static int ice_get_next_vsi(struct ice_vsi **all_vsi, int size); static void ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx); static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type); static int ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx); static int ice_setup_tx_ctx(struct ice_tx_queue *txq, struct ice_tlan_ctx *tlan_ctx, u16 pf_q); static int ice_setup_rx_ctx(struct ice_rx_queue *rxq); static int ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg); static void ice_free_fltr_list(struct ice_list_head *list); static int ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list, const u8 *addr, enum ice_sw_fwd_act_type action); static void ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname, struct ice_ctl_q_info *cq); static void ice_process_link_event(struct ice_softc *sc, struct ice_rq_event_info *e); static void ice_process_ctrlq_event(struct ice_softc *sc, const char *qname, struct ice_rq_event_info *event); static void ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf); static void ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf); static void ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf); static bool ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info); static u_int ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl, u_int errors); static void ice_add_debug_tunables(struct ice_softc *sc); static void ice_add_debug_sysctls(struct ice_softc *sc); static void ice_vsi_set_rss_params(struct ice_vsi *vsi); static void ice_get_default_rss_key(u8 *seed); static int ice_set_rss_key(struct ice_vsi *vsi); static int ice_set_rss_lut(struct ice_vsi *vsi); static void ice_set_rss_flow_flds(struct ice_vsi *vsi); static void ice_clean_vsi_rss_cfg(struct ice_vsi *vsi); static const char *ice_aq_speed_to_str(struct ice_port_info *pi); static const char *ice_requested_fec_mode(struct ice_port_info *pi); static const char *ice_negotiated_fec_mode(struct ice_port_info *pi); static const char *ice_autoneg_mode(struct ice_port_info *pi); static const char *ice_flowcontrol_mode(struct ice_port_info *pi); static void ice_print_bus_link_data(device_t dev, struct ice_hw *hw); static void ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status); static uint8_t ice_pcie_bandwidth_check(struct ice_softc *sc); static uint64_t ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed); static int ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width); static uint64_t ice_phy_types_to_max_rate(struct ice_port_info *pi); static void ice_add_sysctls_sw_stats(struct ice_vsi *vsi, struct sysctl_ctx_list *ctx, struct sysctl_oid *parent); static void ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent_list, u64* pfc_stat_location, const char *node_name, const char *descr); static void ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid *parent, struct ice_hw_port_stats *stats); static void ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi, enum ice_vsi_type type, int idx, bool dynamic); static void ice_handle_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event); static void ice_handle_lan_overflow_event(struct ice_softc *sc, struct ice_rq_event_info *event); static int ice_add_ethertype_to_list(struct ice_vsi *vsi, struct ice_list_head *list, u16 ethertype, u16 direction, enum ice_sw_fwd_act_type action); static void ice_add_rx_lldp_filter(struct ice_softc *sc); static void ice_del_rx_lldp_filter(struct ice_softc *sc); static u16 ice_aq_phy_types_to_link_speeds(u64 phy_type_low, u64 phy_type_high); struct ice_phy_data; static int ice_intersect_phy_types_and_speeds(struct ice_softc *sc, struct ice_phy_data *phy_data); static int ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc, struct ice_aqc_set_phy_cfg_data *cfg); static int ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc, struct ice_aqc_set_phy_cfg_data *cfg); static void ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg); static void ice_print_ldo_tlv(struct ice_softc *sc, struct ice_link_default_override_tlv *tlv); static void ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low, u64 *phy_type_high); static u16 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type); static void ice_handle_health_status_event(struct ice_softc *sc, struct ice_rq_event_info *event); static void ice_print_health_status_string(device_t dev, struct ice_aqc_health_status_elem *elem); static void ice_debug_print_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event); static bool ice_check_ets_bw(u8 *table); static bool ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg, struct ice_dcbx_cfg *new_cfg); static void ice_dcb_recfg(struct ice_softc *sc); static u8 ice_dcb_num_tc(u8 tc_map); static int ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit); static int ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map); static void ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name, struct ice_dcb_ets_cfg *ets); static void ice_stop_pf_vsi(struct ice_softc *sc); static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt); static void ice_do_dcb_reconfig(struct ice_softc *sc); static int ice_config_pfc(struct ice_softc *sc, u8 new_mode); static u8 ice_dcb_get_tc_map(const struct ice_dcbx_cfg *dcbcfg); static int ice_module_init(void); static int ice_module_exit(void); /* * package version comparison functions */ static bool pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name); static int pkg_ver_compatible(struct ice_pkg_ver *pkg_ver); /* * dynamic sysctl handlers */ static int ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS); static int ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS); static int ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS); static int ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS); static int ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS); static int ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS); static int ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS); static int ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS); static int ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS); static int ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS); static int ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS); static int __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS, bool is_phy_type_high); static int ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS); static int ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS); static int ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS); static int ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS); static int ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS); static int ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS); static int ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode); static int ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS); static int ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS); static int ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS); static int ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS); static int ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS); static int ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS); static int ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS); static int ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS); static int ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS); static int ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS); static int ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS); static int ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS); static int ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS); /** * ice_map_bar - Map PCIe BAR memory * @dev: the PCIe device * @bar: the BAR info structure * @bar_num: PCIe BAR number * * Maps the specified PCIe BAR. Stores the mapping data in struct * ice_bar_info. */ int ice_map_bar(device_t dev, struct ice_bar_info *bar, int bar_num) { if (bar->res != NULL) { device_printf(dev, "PCI BAR%d already mapped\n", bar_num); return (EDOOFUS); } bar->rid = PCIR_BAR(bar_num); bar->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar->rid, RF_ACTIVE); if (!bar->res) { device_printf(dev, "PCI BAR%d mapping failed\n", bar_num); return (ENXIO); } bar->tag = rman_get_bustag(bar->res); bar->handle = rman_get_bushandle(bar->res); bar->size = rman_get_size(bar->res); return (0); } /** * ice_free_bar - Free PCIe BAR memory * @dev: the PCIe device * @bar: the BAR info structure * * Frees the specified PCIe BAR, releasing its resources. */ void ice_free_bar(device_t dev, struct ice_bar_info *bar) { if (bar->res != NULL) bus_release_resource(dev, SYS_RES_MEMORY, bar->rid, bar->res); bar->res = NULL; } /** * ice_set_ctrlq_len - Configure ctrlq lengths for a device * @hw: the device hardware structure * * Configures the control queues for the given device, setting up the * specified lengths, prior to initializing hardware. */ void ice_set_ctrlq_len(struct ice_hw *hw) { hw->adminq.num_rq_entries = ICE_AQ_LEN; hw->adminq.num_sq_entries = ICE_AQ_LEN; hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN; hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN; hw->mailboxq.num_rq_entries = ICE_MBXQ_LEN; hw->mailboxq.num_sq_entries = ICE_MBXQ_LEN; hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN; hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN; } /** * ice_get_next_vsi - Get the next available VSI slot * @all_vsi: the VSI list * @size: the size of the VSI list * * Returns the index to the first available VSI slot. Will return size (one * past the last index) if there are no slots available. */ static int ice_get_next_vsi(struct ice_vsi **all_vsi, int size) { int i; for (i = 0; i < size; i++) { if (all_vsi[i] == NULL) return i; } return size; } /** * ice_setup_vsi_common - Common VSI setup for both dynamic and static VSIs * @sc: the device private softc structure * @vsi: the VSI to setup * @type: the VSI type of the new VSI * @idx: the index in the all_vsi array to use * @dynamic: whether this VSI memory was dynamically allocated * * Perform setup for a VSI that is common to both dynamically allocated VSIs * and the static PF VSI which is embedded in the softc structure. */ static void ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi, enum ice_vsi_type type, int idx, bool dynamic) { /* Store important values in VSI struct */ vsi->type = type; vsi->sc = sc; vsi->idx = idx; sc->all_vsi[idx] = vsi; vsi->dynamic = dynamic; /* Setup the VSI tunables now */ ice_add_vsi_tunables(vsi, sc->vsi_sysctls); } /** * ice_alloc_vsi - Allocate a dynamic VSI * @sc: device softc structure * @type: VSI type * * Allocates a new dynamic VSI structure and inserts it into the VSI list. */ struct ice_vsi * ice_alloc_vsi(struct ice_softc *sc, enum ice_vsi_type type) { struct ice_vsi *vsi; int idx; /* Find an open index for a new VSI to be allocated. If the returned * index is >= the num_available_vsi then it means no slot is * available. */ idx = ice_get_next_vsi(sc->all_vsi, sc->num_available_vsi); if (idx >= sc->num_available_vsi) { device_printf(sc->dev, "No available VSI slots\n"); return NULL; } vsi = (struct ice_vsi *)malloc(sizeof(*vsi), M_ICE, M_WAITOK|M_ZERO); if (!vsi) { device_printf(sc->dev, "Unable to allocate VSI memory\n"); return NULL; } ice_setup_vsi_common(sc, vsi, type, idx, true); return vsi; } /** * ice_setup_pf_vsi - Setup the PF VSI * @sc: the device private softc * * Setup the PF VSI structure which is embedded as sc->pf_vsi in the device * private softc. Unlike other VSIs, the PF VSI memory is allocated as part of * the softc memory, instead of being dynamically allocated at creation. */ void ice_setup_pf_vsi(struct ice_softc *sc) { ice_setup_vsi_common(sc, &sc->pf_vsi, ICE_VSI_PF, 0, false); } /** * ice_alloc_vsi_qmap * @vsi: VSI structure * @max_tx_queues: Number of transmit queues to identify * @max_rx_queues: Number of receive queues to identify * * Allocates a max_[t|r]x_queues array of words for the VSI where each * word contains the index of the queue it represents. In here, all * words are initialized to an index of ICE_INVALID_RES_IDX, indicating * all queues for this VSI are not yet assigned an index and thus, * not ready for use. * * Returns an error code on failure. */ int ice_alloc_vsi_qmap(struct ice_vsi *vsi, const int max_tx_queues, const int max_rx_queues) { struct ice_softc *sc = vsi->sc; int i; MPASS(max_tx_queues > 0); MPASS(max_rx_queues > 0); /* Allocate Tx queue mapping memory */ if (!(vsi->tx_qmap = (u16 *) malloc(sizeof(u16) * max_tx_queues, M_ICE, M_WAITOK))) { device_printf(sc->dev, "Unable to allocate Tx qmap memory\n"); return (ENOMEM); } /* Allocate Rx queue mapping memory */ if (!(vsi->rx_qmap = (u16 *) malloc(sizeof(u16) * max_rx_queues, M_ICE, M_WAITOK))) { device_printf(sc->dev, "Unable to allocate Rx qmap memory\n"); goto free_tx_qmap; } /* Mark every queue map as invalid to start with */ for (i = 0; i < max_tx_queues; i++) { vsi->tx_qmap[i] = ICE_INVALID_RES_IDX; } for (i = 0; i < max_rx_queues; i++) { vsi->rx_qmap[i] = ICE_INVALID_RES_IDX; } return 0; free_tx_qmap: free(vsi->tx_qmap, M_ICE); vsi->tx_qmap = NULL; return (ENOMEM); } /** * ice_free_vsi_qmaps - Free the PF qmaps associated with a VSI * @vsi: the VSI private structure * * Frees the PF qmaps associated with the given VSI. Generally this will be * called by ice_release_vsi, but may need to be called during attach cleanup, * depending on when the qmaps were allocated. */ void ice_free_vsi_qmaps(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; if (vsi->tx_qmap) { ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues); free(vsi->tx_qmap, M_ICE); vsi->tx_qmap = NULL; } if (vsi->rx_qmap) { ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap, vsi->num_rx_queues); free(vsi->rx_qmap, M_ICE); vsi->rx_qmap = NULL; } } /** * ice_set_default_vsi_ctx - Setup default VSI context parameters * @ctx: the VSI context to initialize * * Initialize and prepare a default VSI context for configuring a new VSI. */ static void ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx) { u32 table = 0; memset(&ctx->info, 0, sizeof(ctx->info)); /* VSI will be allocated from shared pool */ ctx->alloc_from_pool = true; /* Enable source pruning by default */ ctx->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE; /* Traffic from VSI can be sent to LAN */ ctx->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; /* Allow all packets untagged/tagged */ ctx->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL & ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >> ICE_AQ_VSI_INNER_VLAN_TX_MODE_S); /* Show VLAN/UP from packets in Rx descriptors */ ctx->info.inner_vlan_flags |= ((ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH & ICE_AQ_VSI_INNER_VLAN_EMODE_M) >> ICE_AQ_VSI_INNER_VLAN_EMODE_S); /* Have 1:1 UP mapping for both ingress/egress tables */ table |= ICE_UP_TABLE_TRANSLATE(0, 0); table |= ICE_UP_TABLE_TRANSLATE(1, 1); table |= ICE_UP_TABLE_TRANSLATE(2, 2); table |= ICE_UP_TABLE_TRANSLATE(3, 3); table |= ICE_UP_TABLE_TRANSLATE(4, 4); table |= ICE_UP_TABLE_TRANSLATE(5, 5); table |= ICE_UP_TABLE_TRANSLATE(6, 6); table |= ICE_UP_TABLE_TRANSLATE(7, 7); ctx->info.ingress_table = CPU_TO_LE32(table); ctx->info.egress_table = CPU_TO_LE32(table); /* Have 1:1 UP mapping for outer to inner UP table */ ctx->info.outer_up_table = CPU_TO_LE32(table); /* No Outer tag support, so outer_vlan_flags remains zero */ } /** * ice_set_rss_vsi_ctx - Setup VSI context parameters for RSS * @ctx: the VSI context to configure * @type: the VSI type * * Configures the VSI context for RSS, based on the VSI type. */ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type) { u8 lut_type, hash_type; switch (type) { case ICE_VSI_PF: lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF; hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; break; case ICE_VSI_VF: lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; break; default: /* Other VSI types do not support RSS */ return; } ctx->info.q_opt_rss = (((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) & ICE_AQ_VSI_Q_OPT_RSS_LUT_M) | ((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) & ICE_AQ_VSI_Q_OPT_RSS_HASH_M)); } /** * ice_setup_vsi_qmap - Setup the queue mapping for a VSI * @vsi: the VSI to configure * @ctx: the VSI context to configure * * Configures the context for the given VSI, setting up how the firmware * should map the queues for this VSI. */ static int ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx) { int pow = 0; u16 qmap; MPASS(vsi->rx_qmap != NULL); /* TODO: * Handle multiple Traffic Classes * Handle scattered queues (for VFs) */ if (vsi->qmap_type != ICE_RESMGR_ALLOC_CONTIGUOUS) return (EOPNOTSUPP); ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG); ctx->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]); ctx->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues); /* Calculate the next power-of-2 of number of queues */ if (vsi->num_rx_queues) pow = flsl(vsi->num_rx_queues - 1); /* Assign all the queues to traffic class zero */ qmap = (pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M; ctx->info.tc_mapping[0] = CPU_TO_LE16(qmap); return 0; } /** * ice_initialize_vsi - Initialize a VSI for use * @vsi: the vsi to initialize * * Initialize a VSI over the adminq and prepare it for operation. */ int ice_initialize_vsi(struct ice_vsi *vsi) { struct ice_vsi_ctx ctx = { 0 }; struct ice_hw *hw = &vsi->sc->hw; u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; enum ice_status status; int err; /* For now, we only have code supporting PF VSIs */ switch (vsi->type) { case ICE_VSI_PF: ctx.flags = ICE_AQ_VSI_TYPE_PF; break; default: return (ENODEV); } ice_set_default_vsi_ctx(&ctx); ice_set_rss_vsi_ctx(&ctx, vsi->type); /* XXX: VSIs of other types may need different port info? */ ctx.info.sw_id = hw->port_info->sw_id; /* Set some RSS parameters based on the VSI type */ ice_vsi_set_rss_params(vsi); /* Initialize the Rx queue mapping for this VSI */ err = ice_setup_vsi_qmap(vsi, &ctx); if (err) { return err; } /* (Re-)add VSI to HW VSI handle list */ status = ice_add_vsi(hw, vsi->idx, &ctx, NULL); if (status != 0) { device_printf(vsi->sc->dev, "Add VSI AQ call failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } vsi->info = ctx.info; /* Initialize VSI with just 1 TC to start */ max_txqs[0] = vsi->num_tx_queues; status = ice_cfg_vsi_lan(hw->port_info, vsi->idx, ICE_DFLT_TRAFFIC_CLASS, max_txqs); if (status) { device_printf(vsi->sc->dev, "Failed VSI lan queue config, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); ice_deinit_vsi(vsi); return (ENODEV); } /* Reset VSI stats */ ice_reset_vsi_stats(vsi); return 0; } /** * ice_deinit_vsi - Tell firmware to release resources for a VSI * @vsi: the VSI to release * * Helper function which requests the firmware to release the hardware * resources associated with a given VSI. */ void ice_deinit_vsi(struct ice_vsi *vsi) { struct ice_vsi_ctx ctx = { 0 }; struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; enum ice_status status; /* Assert that the VSI pointer matches in the list */ MPASS(vsi == sc->all_vsi[vsi->idx]); ctx.info = vsi->info; status = ice_rm_vsi_lan_cfg(hw->port_info, vsi->idx); if (status) { /* * This should only fail if the VSI handle is invalid, or if * any of the nodes have leaf nodes which are still in use. */ device_printf(sc->dev, "Unable to remove scheduler nodes for VSI %d, err %s\n", vsi->idx, ice_status_str(status)); } /* Tell firmware to release the VSI resources */ status = ice_free_vsi(hw, vsi->idx, &ctx, false, NULL); if (status != 0) { device_printf(sc->dev, "Free VSI %u AQ call failed, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } } /** * ice_release_vsi - Release resources associated with a VSI * @vsi: the VSI to release * * Release software and firmware resources associated with a VSI. Release the * queue managers associated with this VSI. Also free the VSI structure memory * if the VSI was allocated dynamically using ice_alloc_vsi(). */ void ice_release_vsi(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; int idx = vsi->idx; /* Assert that the VSI pointer matches in the list */ MPASS(vsi == sc->all_vsi[idx]); /* Cleanup RSS configuration */ if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS)) ice_clean_vsi_rss_cfg(vsi); ice_del_vsi_sysctl_ctx(vsi); /* * If we unload the driver after a reset fails, we do not need to do * this step. */ if (!ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) ice_deinit_vsi(vsi); ice_free_vsi_qmaps(vsi); if (vsi->dynamic) { free(sc->all_vsi[idx], M_ICE); } sc->all_vsi[idx] = NULL; } /** * ice_aq_speed_to_rate - Convert AdminQ speed enum to baudrate * @pi: port info data * * Returns the baudrate value for the current link speed of a given port. */ uint64_t ice_aq_speed_to_rate(struct ice_port_info *pi) { switch (pi->phy.link_info.link_speed) { case ICE_AQ_LINK_SPEED_100GB: return IF_Gbps(100); case ICE_AQ_LINK_SPEED_50GB: return IF_Gbps(50); case ICE_AQ_LINK_SPEED_40GB: return IF_Gbps(40); case ICE_AQ_LINK_SPEED_25GB: return IF_Gbps(25); case ICE_AQ_LINK_SPEED_10GB: return IF_Gbps(10); case ICE_AQ_LINK_SPEED_5GB: return IF_Gbps(5); case ICE_AQ_LINK_SPEED_2500MB: return IF_Mbps(2500); case ICE_AQ_LINK_SPEED_1000MB: return IF_Mbps(1000); case ICE_AQ_LINK_SPEED_100MB: return IF_Mbps(100); case ICE_AQ_LINK_SPEED_10MB: return IF_Mbps(10); case ICE_AQ_LINK_SPEED_UNKNOWN: default: /* return 0 if we don't know the link speed */ return 0; } } /** * ice_aq_speed_to_str - Convert AdminQ speed enum to string representation * @pi: port info data * * Returns the string representation of the current link speed for a given * port. */ static const char * ice_aq_speed_to_str(struct ice_port_info *pi) { switch (pi->phy.link_info.link_speed) { case ICE_AQ_LINK_SPEED_100GB: return "100 Gbps"; case ICE_AQ_LINK_SPEED_50GB: return "50 Gbps"; case ICE_AQ_LINK_SPEED_40GB: return "40 Gbps"; case ICE_AQ_LINK_SPEED_25GB: return "25 Gbps"; case ICE_AQ_LINK_SPEED_20GB: return "20 Gbps"; case ICE_AQ_LINK_SPEED_10GB: return "10 Gbps"; case ICE_AQ_LINK_SPEED_5GB: return "5 Gbps"; case ICE_AQ_LINK_SPEED_2500MB: return "2.5 Gbps"; case ICE_AQ_LINK_SPEED_1000MB: return "1 Gbps"; case ICE_AQ_LINK_SPEED_100MB: return "100 Mbps"; case ICE_AQ_LINK_SPEED_10MB: return "10 Mbps"; case ICE_AQ_LINK_SPEED_UNKNOWN: default: return "Unknown speed"; } } /** * ice_get_phy_type_low - Get media associated with phy_type_low * @phy_type_low: the low 64bits of phy_type from the AdminQ * * Given the lower 64bits of the phy_type from the hardware, return the * ifm_active bit associated. Return IFM_UNKNOWN when phy_type_low is unknown. * Note that only one of ice_get_phy_type_low or ice_get_phy_type_high should * be called. If phy_type_low is zero, call ice_phy_type_high. */ int ice_get_phy_type_low(uint64_t phy_type_low) { switch (phy_type_low) { case ICE_PHY_TYPE_LOW_100BASE_TX: return IFM_100_TX; case ICE_PHY_TYPE_LOW_100M_SGMII: return IFM_100_SGMII; case ICE_PHY_TYPE_LOW_1000BASE_T: return IFM_1000_T; case ICE_PHY_TYPE_LOW_1000BASE_SX: return IFM_1000_SX; case ICE_PHY_TYPE_LOW_1000BASE_LX: return IFM_1000_LX; case ICE_PHY_TYPE_LOW_1000BASE_KX: return IFM_1000_KX; case ICE_PHY_TYPE_LOW_1G_SGMII: return IFM_1000_SGMII; case ICE_PHY_TYPE_LOW_2500BASE_T: return IFM_2500_T; case ICE_PHY_TYPE_LOW_2500BASE_X: return IFM_2500_X; case ICE_PHY_TYPE_LOW_2500BASE_KX: return IFM_2500_KX; case ICE_PHY_TYPE_LOW_5GBASE_T: return IFM_5000_T; case ICE_PHY_TYPE_LOW_5GBASE_KR: return IFM_5000_KR; case ICE_PHY_TYPE_LOW_10GBASE_T: return IFM_10G_T; case ICE_PHY_TYPE_LOW_10G_SFI_DA: return IFM_10G_TWINAX; case ICE_PHY_TYPE_LOW_10GBASE_SR: return IFM_10G_SR; case ICE_PHY_TYPE_LOW_10GBASE_LR: return IFM_10G_LR; case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1: return IFM_10G_KR; case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC: return IFM_10G_AOC; case ICE_PHY_TYPE_LOW_10G_SFI_C2C: return IFM_10G_SFI; case ICE_PHY_TYPE_LOW_25GBASE_T: return IFM_25G_T; case ICE_PHY_TYPE_LOW_25GBASE_CR: return IFM_25G_CR; case ICE_PHY_TYPE_LOW_25GBASE_CR_S: return IFM_25G_CR_S; case ICE_PHY_TYPE_LOW_25GBASE_CR1: return IFM_25G_CR1; case ICE_PHY_TYPE_LOW_25GBASE_SR: return IFM_25G_SR; case ICE_PHY_TYPE_LOW_25GBASE_LR: return IFM_25G_LR; case ICE_PHY_TYPE_LOW_25GBASE_KR: return IFM_25G_KR; case ICE_PHY_TYPE_LOW_25GBASE_KR_S: return IFM_25G_KR_S; case ICE_PHY_TYPE_LOW_25GBASE_KR1: return IFM_25G_KR1; case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC: return IFM_25G_AOC; case ICE_PHY_TYPE_LOW_25G_AUI_C2C: return IFM_25G_AUI; case ICE_PHY_TYPE_LOW_40GBASE_CR4: return IFM_40G_CR4; case ICE_PHY_TYPE_LOW_40GBASE_SR4: return IFM_40G_SR4; case ICE_PHY_TYPE_LOW_40GBASE_LR4: return IFM_40G_LR4; case ICE_PHY_TYPE_LOW_40GBASE_KR4: return IFM_40G_KR4; case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC: return IFM_40G_XLAUI_AC; case ICE_PHY_TYPE_LOW_40G_XLAUI: return IFM_40G_XLAUI; case ICE_PHY_TYPE_LOW_50GBASE_CR2: return IFM_50G_CR2; case ICE_PHY_TYPE_LOW_50GBASE_SR2: return IFM_50G_SR2; case ICE_PHY_TYPE_LOW_50GBASE_LR2: return IFM_50G_LR2; case ICE_PHY_TYPE_LOW_50GBASE_KR2: return IFM_50G_KR2; case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC: return IFM_50G_LAUI2_AC; case ICE_PHY_TYPE_LOW_50G_LAUI2: return IFM_50G_LAUI2; case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC: return IFM_50G_AUI2_AC; case ICE_PHY_TYPE_LOW_50G_AUI2: return IFM_50G_AUI2; case ICE_PHY_TYPE_LOW_50GBASE_CP: return IFM_50G_CP; case ICE_PHY_TYPE_LOW_50GBASE_SR: return IFM_50G_SR; case ICE_PHY_TYPE_LOW_50GBASE_FR: return IFM_50G_FR; case ICE_PHY_TYPE_LOW_50GBASE_LR: return IFM_50G_LR; case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4: return IFM_50G_KR_PAM4; case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC: return IFM_50G_AUI1_AC; case ICE_PHY_TYPE_LOW_50G_AUI1: return IFM_50G_AUI1; case ICE_PHY_TYPE_LOW_100GBASE_CR4: return IFM_100G_CR4; case ICE_PHY_TYPE_LOW_100GBASE_SR4: return IFM_100G_SR4; case ICE_PHY_TYPE_LOW_100GBASE_LR4: return IFM_100G_LR4; case ICE_PHY_TYPE_LOW_100GBASE_KR4: return IFM_100G_KR4; case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC: return IFM_100G_CAUI4_AC; case ICE_PHY_TYPE_LOW_100G_CAUI4: return IFM_100G_CAUI4; case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC: return IFM_100G_AUI4_AC; case ICE_PHY_TYPE_LOW_100G_AUI4: return IFM_100G_AUI4; case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4: return IFM_100G_CR_PAM4; case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4: return IFM_100G_KR_PAM4; case ICE_PHY_TYPE_LOW_100GBASE_CP2: return IFM_100G_CP2; case ICE_PHY_TYPE_LOW_100GBASE_SR2: return IFM_100G_SR2; case ICE_PHY_TYPE_LOW_100GBASE_DR: return IFM_100G_DR; default: return IFM_UNKNOWN; } } /** * ice_get_phy_type_high - Get media associated with phy_type_high * @phy_type_high: the upper 64bits of phy_type from the AdminQ * * Given the upper 64bits of the phy_type from the hardware, return the * ifm_active bit associated. Return IFM_UNKNOWN on an unknown value. Note * that only one of ice_get_phy_type_low or ice_get_phy_type_high should be * called. If phy_type_high is zero, call ice_get_phy_type_low. */ int ice_get_phy_type_high(uint64_t phy_type_high) { switch (phy_type_high) { case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4: return IFM_100G_KR2_PAM4; case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC: return IFM_100G_CAUI2_AC; case ICE_PHY_TYPE_HIGH_100G_CAUI2: return IFM_100G_CAUI2; case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC: return IFM_100G_AUI2_AC; case ICE_PHY_TYPE_HIGH_100G_AUI2: return IFM_100G_AUI2; default: return IFM_UNKNOWN; } } /** * ice_phy_types_to_max_rate - Returns port's max supported baudrate * @pi: port info struct * * ice_aq_get_phy_caps() w/ ICE_AQC_REPORT_TOPO_CAP_MEDIA parameter needs * to have been called before this function for it to work. */ static uint64_t ice_phy_types_to_max_rate(struct ice_port_info *pi) { uint64_t phy_low = pi->phy.phy_type_low; uint64_t phy_high = pi->phy.phy_type_high; uint64_t max_rate = 0; int bit; /* * These are based on the indices used in the BIT() macros for * ICE_PHY_TYPE_LOW_* */ static const uint64_t phy_rates[] = { IF_Mbps(100), IF_Mbps(100), IF_Gbps(1ULL), IF_Gbps(1ULL), IF_Gbps(1ULL), IF_Gbps(1ULL), IF_Gbps(1ULL), IF_Mbps(2500ULL), IF_Mbps(2500ULL), IF_Mbps(2500ULL), IF_Gbps(5ULL), IF_Gbps(5ULL), IF_Gbps(10ULL), IF_Gbps(10ULL), IF_Gbps(10ULL), IF_Gbps(10ULL), IF_Gbps(10ULL), IF_Gbps(10ULL), IF_Gbps(10ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(40ULL), IF_Gbps(40ULL), IF_Gbps(40ULL), IF_Gbps(40ULL), IF_Gbps(40ULL), IF_Gbps(40ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), /* These rates are for ICE_PHY_TYPE_HIGH_* */ IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL) }; /* coverity[address_of] */ for_each_set_bit(bit, &phy_high, 64) if ((bit + 64) < (int)ARRAY_SIZE(phy_rates)) max_rate = uqmax(max_rate, phy_rates[(bit + 64)]); /* coverity[address_of] */ for_each_set_bit(bit, &phy_low, 64) max_rate = uqmax(max_rate, phy_rates[bit]); return (max_rate); } /* The if_media type is split over the original 5 bit media variant field, * along with extended types using up extra bits in the options section. * We want to convert this split number into a bitmap index, so we reverse the * calculation of IFM_X here. */ #define IFM_IDX(x) (((x) & IFM_TMASK) | \ (((x) & IFM_ETH_XTYPE) >> IFM_ETH_XSHIFT)) /** * ice_add_media_types - Add supported media types to the media structure * @sc: ice private softc structure * @media: ifmedia structure to setup * * Looks up the supported phy types, and initializes the various media types * available. * * @pre this function must be protected from being called while another thread * is accessing the ifmedia types. */ enum ice_status ice_add_media_types(struct ice_softc *sc, struct ifmedia *media) { struct ice_aqc_get_phy_caps_data pcaps = { 0 }; struct ice_port_info *pi = sc->hw.port_info; enum ice_status status; uint64_t phy_low, phy_high; int bit; ASSERT_CFG_LOCKED(sc); /* the maximum possible media type index is 511. We probably don't * need most of this space, but this ensures future compatibility when * additional media types are used. */ ice_declare_bitmap(already_added, 511); /* Remove all previous media types */ ifmedia_removeall(media); status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, &pcaps, NULL); if (status != ICE_SUCCESS) { device_printf(sc->dev, "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); return (status); } phy_low = le64toh(pcaps.phy_type_low); phy_high = le64toh(pcaps.phy_type_high); /* make sure the added bitmap is zero'd */ memset(already_added, 0, sizeof(already_added)); /* coverity[address_of] */ for_each_set_bit(bit, &phy_low, 64) { uint64_t type = BIT_ULL(bit); int ostype; /* get the OS media type */ ostype = ice_get_phy_type_low(type); /* don't bother adding the unknown type */ if (ostype == IFM_UNKNOWN) continue; /* only add each media type to the list once */ if (ice_is_bit_set(already_added, IFM_IDX(ostype))) continue; ifmedia_add(media, IFM_ETHER | ostype, 0, NULL); ice_set_bit(IFM_IDX(ostype), already_added); } /* coverity[address_of] */ for_each_set_bit(bit, &phy_high, 64) { uint64_t type = BIT_ULL(bit); int ostype; /* get the OS media type */ ostype = ice_get_phy_type_high(type); /* don't bother adding the unknown type */ if (ostype == IFM_UNKNOWN) continue; /* only add each media type to the list once */ if (ice_is_bit_set(already_added, IFM_IDX(ostype))) continue; ifmedia_add(media, IFM_ETHER | ostype, 0, NULL); ice_set_bit(IFM_IDX(ostype), already_added); } /* Use autoselect media by default */ ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); return (ICE_SUCCESS); } /** * ice_configure_rxq_interrupts - Configure HW Rx queues for MSI-X interrupts * @vsi: the VSI to configure * * Called when setting up MSI-X interrupts to configure the Rx hardware queues. */ void ice_configure_rxq_interrupts(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int i; for (i = 0; i < vsi->num_rx_queues; i++) { struct ice_rx_queue *rxq = &vsi->rx_queues[i]; u32 val; val = (QINT_RQCTL_CAUSE_ENA_M | (ICE_RX_ITR << QINT_RQCTL_ITR_INDX_S) | (rxq->irqv->me << QINT_RQCTL_MSIX_INDX_S)); wr32(hw, QINT_RQCTL(vsi->rx_qmap[rxq->me]), val); } ice_flush(hw); } /** * ice_configure_txq_interrupts - Configure HW Tx queues for MSI-X interrupts * @vsi: the VSI to configure * * Called when setting up MSI-X interrupts to configure the Tx hardware queues. */ void ice_configure_txq_interrupts(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int i; for (i = 0; i < vsi->num_tx_queues; i++) { struct ice_tx_queue *txq = &vsi->tx_queues[i]; u32 val; val = (QINT_TQCTL_CAUSE_ENA_M | (ICE_TX_ITR << QINT_TQCTL_ITR_INDX_S) | (txq->irqv->me << QINT_TQCTL_MSIX_INDX_S)); wr32(hw, QINT_TQCTL(vsi->tx_qmap[txq->me]), val); } ice_flush(hw); } /** * ice_flush_rxq_interrupts - Unconfigure Hw Rx queues MSI-X interrupt cause * @vsi: the VSI to configure * * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger * a software interrupt on that cause. This is required as part of the Rx * queue disable logic to dissociate the Rx queue from the interrupt. * * Note: this function must be called prior to disabling Rx queues with * ice_control_rx_queues, otherwise the Rx queue may not be disabled properly. */ void ice_flush_rxq_interrupts(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int i; for (i = 0; i < vsi->num_rx_queues; i++) { struct ice_rx_queue *rxq = &vsi->rx_queues[i]; u32 reg, val; /* Clear the CAUSE_ENA flag */ reg = vsi->rx_qmap[rxq->me]; val = rd32(hw, QINT_RQCTL(reg)); val &= ~QINT_RQCTL_CAUSE_ENA_M; wr32(hw, QINT_RQCTL(reg), val); ice_flush(hw); /* Trigger a software interrupt to complete interrupt * dissociation. */ wr32(hw, GLINT_DYN_CTL(rxq->irqv->me), GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M); } } /** * ice_flush_txq_interrupts - Unconfigure Hw Tx queues MSI-X interrupt cause * @vsi: the VSI to configure * * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger * a software interrupt on that cause. This is required as part of the Tx * queue disable logic to dissociate the Tx queue from the interrupt. * * Note: this function must be called prior to ice_vsi_disable_tx, otherwise * the Tx queue disable may not complete properly. */ void ice_flush_txq_interrupts(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int i; for (i = 0; i < vsi->num_tx_queues; i++) { struct ice_tx_queue *txq = &vsi->tx_queues[i]; u32 reg, val; /* Clear the CAUSE_ENA flag */ reg = vsi->tx_qmap[txq->me]; val = rd32(hw, QINT_TQCTL(reg)); val &= ~QINT_TQCTL_CAUSE_ENA_M; wr32(hw, QINT_TQCTL(reg), val); ice_flush(hw); /* Trigger a software interrupt to complete interrupt * dissociation. */ wr32(hw, GLINT_DYN_CTL(txq->irqv->me), GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M); } } /** * ice_configure_rx_itr - Configure the Rx ITR settings for this VSI * @vsi: the VSI to configure * * Program the hardware ITR registers with the settings for this VSI. */ void ice_configure_rx_itr(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int i; /* TODO: Handle per-queue/per-vector ITR? */ for (i = 0; i < vsi->num_rx_queues; i++) { struct ice_rx_queue *rxq = &vsi->rx_queues[i]; wr32(hw, GLINT_ITR(ICE_RX_ITR, rxq->irqv->me), ice_itr_to_reg(hw, vsi->rx_itr)); } ice_flush(hw); } /** * ice_configure_tx_itr - Configure the Tx ITR settings for this VSI * @vsi: the VSI to configure * * Program the hardware ITR registers with the settings for this VSI. */ void ice_configure_tx_itr(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int i; /* TODO: Handle per-queue/per-vector ITR? */ for (i = 0; i < vsi->num_tx_queues; i++) { struct ice_tx_queue *txq = &vsi->tx_queues[i]; wr32(hw, GLINT_ITR(ICE_TX_ITR, txq->irqv->me), ice_itr_to_reg(hw, vsi->tx_itr)); } ice_flush(hw); } /** * ice_setup_tx_ctx - Setup an ice_tlan_ctx structure for a queue * @txq: the Tx queue to configure * @tlan_ctx: the Tx LAN queue context structure to initialize * @pf_q: real queue number */ static int ice_setup_tx_ctx(struct ice_tx_queue *txq, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) { struct ice_vsi *vsi = txq->vsi; struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; tlan_ctx->port_num = hw->port_info->lport; /* number of descriptors in the queue */ tlan_ctx->qlen = txq->desc_count; /* set the transmit queue base address, defined in 128 byte units */ tlan_ctx->base = txq->tx_paddr >> 7; tlan_ctx->pf_num = hw->pf_id; /* For now, we only have code supporting PF VSIs */ switch (vsi->type) { case ICE_VSI_PF: tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; break; default: return (ENODEV); } tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); /* Enable TSO */ tlan_ctx->tso_ena = 1; tlan_ctx->internal_usage_flag = 1; tlan_ctx->tso_qnum = pf_q; /* * Stick with the older legacy Tx queue interface, instead of the new * advanced queue interface. */ tlan_ctx->legacy_int = 1; /* Descriptor WB mode */ tlan_ctx->wb_mode = 0; return (0); } /** * ice_cfg_vsi_for_tx - Configure the hardware for Tx * @vsi: the VSI to configure * * Configure the device Tx queues through firmware AdminQ commands. After * this, Tx queues will be ready for transmit. */ int ice_cfg_vsi_for_tx(struct ice_vsi *vsi) { struct ice_aqc_add_tx_qgrp *qg; struct ice_hw *hw = &vsi->sc->hw; device_t dev = vsi->sc->dev; enum ice_status status; int i; int err = 0; u16 qg_size, pf_q; qg_size = ice_struct_size(qg, txqs, 1); qg = (struct ice_aqc_add_tx_qgrp *)malloc(qg_size, M_ICE, M_NOWAIT|M_ZERO); if (!qg) return (ENOMEM); qg->num_txqs = 1; for (i = 0; i < vsi->num_tx_queues; i++) { struct ice_tlan_ctx tlan_ctx = { 0 }; struct ice_tx_queue *txq = &vsi->tx_queues[i]; pf_q = vsi->tx_qmap[txq->me]; qg->txqs[0].txq_id = htole16(pf_q); err = ice_setup_tx_ctx(txq, &tlan_ctx, pf_q); if (err) goto free_txqg; ice_set_ctx(hw, (u8 *)&tlan_ctx, qg->txqs[0].txq_ctx, ice_tlan_ctx_info); status = ice_ena_vsi_txq(hw->port_info, vsi->idx, txq->tc, txq->q_handle, 1, qg, qg_size, NULL); if (status) { device_printf(dev, "Failed to set LAN Tx queue %d (TC %d, handle %d) context, err %s aq_err %s\n", i, txq->tc, txq->q_handle, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = ENODEV; goto free_txqg; } /* Keep track of the Tx queue TEID */ if (pf_q == le16toh(qg->txqs[0].txq_id)) txq->q_teid = le32toh(qg->txqs[0].q_teid); } free_txqg: free(qg, M_ICE); return (err); } /** * ice_setup_rx_ctx - Setup an Rx context structure for a receive queue * @rxq: the receive queue to program * * Setup an Rx queue context structure and program it into the hardware * registers. This is a necessary step for enabling the Rx queue. * * @pre the VSI associated with this queue must have initialized mbuf_sz */ static int ice_setup_rx_ctx(struct ice_rx_queue *rxq) { struct ice_rlan_ctx rlan_ctx = {0}; struct ice_vsi *vsi = rxq->vsi; struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; enum ice_status status; u32 rxdid = ICE_RXDID_FLEX_NIC; u32 regval; u16 pf_q; pf_q = vsi->rx_qmap[rxq->me]; /* set the receive queue base address, defined in 128 byte units */ rlan_ctx.base = rxq->rx_paddr >> 7; rlan_ctx.qlen = rxq->desc_count; rlan_ctx.dbuf = vsi->mbuf_sz >> ICE_RLAN_CTX_DBUF_S; /* use 32 byte descriptors */ rlan_ctx.dsize = 1; /* Strip the Ethernet CRC bytes before the packet is posted to the * host memory. */ rlan_ctx.crcstrip = 1; rlan_ctx.l2tsel = 1; /* don't do header splitting */ rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT; /* strip VLAN from inner headers */ rlan_ctx.showiv = 1; rlan_ctx.rxmax = min(vsi->max_frame_size, ICE_MAX_RX_SEGS * vsi->mbuf_sz); rlan_ctx.lrxqthresh = 1; if (vsi->type != ICE_VSI_VF) { regval = rd32(hw, QRXFLXP_CNTXT(pf_q)); regval &= ~QRXFLXP_CNTXT_RXDID_IDX_M; regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) & QRXFLXP_CNTXT_RXDID_IDX_M; regval &= ~QRXFLXP_CNTXT_RXDID_PRIO_M; regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) & QRXFLXP_CNTXT_RXDID_PRIO_M; wr32(hw, QRXFLXP_CNTXT(pf_q), regval); } status = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); if (status) { device_printf(sc->dev, "Failed to set LAN Rx queue context, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } wr32(hw, rxq->tail, 0); return 0; } /** * ice_cfg_vsi_for_rx - Configure the hardware for Rx * @vsi: the VSI to configure * * Prepare an Rx context descriptor and configure the device to receive * traffic. * * @pre the VSI must have initialized mbuf_sz */ int ice_cfg_vsi_for_rx(struct ice_vsi *vsi) { int i, err; for (i = 0; i < vsi->num_rx_queues; i++) { MPASS(vsi->mbuf_sz > 0); err = ice_setup_rx_ctx(&vsi->rx_queues[i]); if (err) return err; } return (0); } /** * ice_is_rxq_ready - Check if an Rx queue is ready * @hw: ice hw structure * @pf_q: absolute PF queue index to check * @reg: on successful return, contains qrx_ctrl contents * * Reads the QRX_CTRL register and verifies if the queue is in a consistent * state. That is, QENA_REQ matches QENA_STAT. Used to check before making * a request to change the queue, as well as to verify the request has * finished. The queue should change status within a few microseconds, so we * use a small delay while polling the register. * * Returns an error code if the queue does not update after a few retries. */ static int ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg) { u32 qrx_ctrl, qena_req, qena_stat; int i; for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) { qrx_ctrl = rd32(hw, QRX_CTRL(pf_q)); qena_req = (qrx_ctrl >> QRX_CTRL_QENA_REQ_S) & 1; qena_stat = (qrx_ctrl >> QRX_CTRL_QENA_STAT_S) & 1; /* if the request and status bits equal, then the queue is * fully disabled or enabled. */ if (qena_req == qena_stat) { *reg = qrx_ctrl; return (0); } /* wait a few microseconds before we check again */ DELAY(10); } return (ETIMEDOUT); } /** * ice_control_rx_queues - Configure hardware to start or stop the Rx queues * @vsi: VSI to enable/disable queues * @enable: true to enable queues, false to disable * * Control the Rx queues through the QRX_CTRL register, enabling or disabling * them. Wait for the appropriate time to ensure that the queues have actually * reached the expected state. */ int ice_control_rx_queues(struct ice_vsi *vsi, bool enable) { struct ice_hw *hw = &vsi->sc->hw; device_t dev = vsi->sc->dev; u32 qrx_ctrl = 0; int i, err; /* TODO: amortize waits by changing all queues up front and then * checking their status afterwards. This will become more necessary * when we have a large number of queues. */ for (i = 0; i < vsi->num_rx_queues; i++) { struct ice_rx_queue *rxq = &vsi->rx_queues[i]; int pf_q = vsi->rx_qmap[rxq->me]; err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl); if (err) { device_printf(dev, "Rx queue %d is not ready\n", pf_q); return err; } /* Skip if the queue is already in correct state */ if (enable == !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M)) continue; if (enable) qrx_ctrl |= QRX_CTRL_QENA_REQ_M; else qrx_ctrl &= ~QRX_CTRL_QENA_REQ_M; wr32(hw, QRX_CTRL(pf_q), qrx_ctrl); /* wait for the queue to finalize the request */ err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl); if (err) { device_printf(dev, "Rx queue %d %sable timeout\n", pf_q, (enable ? "en" : "dis")); return err; } /* this should never happen */ if (enable != !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M)) { device_printf(dev, "Rx queue %d invalid state\n", pf_q); return (EDOOFUS); } } return (0); } /** * ice_add_mac_to_list - Add MAC filter to a MAC filter list * @vsi: the VSI to forward to * @list: list which contains MAC filter entries * @addr: the MAC address to be added * @action: filter action to perform on match * * Adds a MAC address filter to the list which will be forwarded to firmware * to add a series of MAC address filters. * * Returns 0 on success, and an error code on failure. * */ static int ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list, const u8 *addr, enum ice_sw_fwd_act_type action) { struct ice_fltr_list_entry *entry; entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO); if (!entry) return (ENOMEM); entry->fltr_info.flag = ICE_FLTR_TX; entry->fltr_info.src_id = ICE_SRC_ID_VSI; entry->fltr_info.lkup_type = ICE_SW_LKUP_MAC; entry->fltr_info.fltr_act = action; entry->fltr_info.vsi_handle = vsi->idx; bcopy(addr, entry->fltr_info.l_data.mac.mac_addr, ETHER_ADDR_LEN); LIST_ADD(&entry->list_entry, list); return 0; } /** * ice_free_fltr_list - Free memory associated with a MAC address list * @list: the list to free * * Free the memory of each entry associated with the list. */ static void ice_free_fltr_list(struct ice_list_head *list) { struct ice_fltr_list_entry *e, *tmp; LIST_FOR_EACH_ENTRY_SAFE(e, tmp, list, ice_fltr_list_entry, list_entry) { LIST_DEL(&e->list_entry); free(e, M_ICE); } } /** * ice_add_vsi_mac_filter - Add a MAC address filter for a VSI * @vsi: the VSI to add the filter for * @addr: MAC address to add a filter for * * Add a MAC address filter for a given VSI. This is a wrapper around * ice_add_mac to simplify the interface. First, it only accepts a single * address, so we don't have to mess around with the list setup in other * functions. Second, it ignores the ICE_ERR_ALREADY_EXIST error, so that * callers don't need to worry about attempting to add the same filter twice. */ int ice_add_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr) { struct ice_list_head mac_addr_list; struct ice_hw *hw = &vsi->sc->hw; device_t dev = vsi->sc->dev; enum ice_status status; int err = 0; INIT_LIST_HEAD(&mac_addr_list); err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI); if (err) goto free_mac_list; status = ice_add_mac(hw, &mac_addr_list); if (status == ICE_ERR_ALREADY_EXISTS) { ; /* Don't complain if we try to add a filter that already exists */ } else if (status) { device_printf(dev, "Failed to add a filter for MAC %6D, err %s aq_err %s\n", addr, ":", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (EIO); } free_mac_list: ice_free_fltr_list(&mac_addr_list); return err; } /** * ice_cfg_pf_default_mac_filters - Setup default unicast and broadcast addrs * @sc: device softc structure * * Program the default unicast and broadcast filters for the PF VSI. */ int ice_cfg_pf_default_mac_filters(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; int err; /* Add the LAN MAC address */ err = ice_add_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr); if (err) return err; /* Add the broadcast address */ err = ice_add_vsi_mac_filter(vsi, broadcastaddr); if (err) return err; return (0); } /** * ice_remove_vsi_mac_filter - Remove a MAC address filter for a VSI * @vsi: the VSI to add the filter for * @addr: MAC address to remove a filter for * * Remove a MAC address filter from a given VSI. This is a wrapper around * ice_remove_mac to simplify the interface. First, it only accepts a single * address, so we don't have to mess around with the list setup in other * functions. Second, it ignores the ICE_ERR_DOES_NOT_EXIST error, so that * callers don't need to worry about attempting to remove filters which * haven't yet been added. */ int ice_remove_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr) { struct ice_list_head mac_addr_list; struct ice_hw *hw = &vsi->sc->hw; device_t dev = vsi->sc->dev; enum ice_status status; int err = 0; INIT_LIST_HEAD(&mac_addr_list); err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI); if (err) goto free_mac_list; status = ice_remove_mac(hw, &mac_addr_list); if (status == ICE_ERR_DOES_NOT_EXIST) { ; /* Don't complain if we try to remove a filter that doesn't exist */ } else if (status) { device_printf(dev, "Failed to remove a filter for MAC %6D, err %s aq_err %s\n", addr, ":", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (EIO); } free_mac_list: ice_free_fltr_list(&mac_addr_list); return err; } /** * ice_rm_pf_default_mac_filters - Remove default unicast and broadcast addrs * @sc: device softc structure * * Remove the default unicast and broadcast filters from the PF VSI. */ int ice_rm_pf_default_mac_filters(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; int err; /* Remove the LAN MAC address */ err = ice_remove_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr); if (err) return err; /* Remove the broadcast address */ err = ice_remove_vsi_mac_filter(vsi, broadcastaddr); if (err) return (EIO); return (0); } /** * ice_check_ctrlq_errors - Check for and report controlq errors * @sc: device private structure * @qname: name of the controlq * @cq: the controlq to check * * Check and report controlq errors. Currently all we do is report them to the * kernel message log, but we might want to improve this in the future, such * as to keep track of statistics. */ static void ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname, struct ice_ctl_q_info *cq) { struct ice_hw *hw = &sc->hw; u32 val; /* Check for error indications. Note that all the controlqs use the * same register layout, so we use the PF_FW_AxQLEN defines only. */ val = rd32(hw, cq->rq.len); if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | PF_FW_ARQLEN_ARQCRIT_M)) { if (val & PF_FW_ARQLEN_ARQVFE_M) device_printf(sc->dev, "%s Receive Queue VF Error detected\n", qname); if (val & PF_FW_ARQLEN_ARQOVFL_M) device_printf(sc->dev, "%s Receive Queue Overflow Error detected\n", qname); if (val & PF_FW_ARQLEN_ARQCRIT_M) device_printf(sc->dev, "%s Receive Queue Critical Error detected\n", qname); val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | PF_FW_ARQLEN_ARQCRIT_M); wr32(hw, cq->rq.len, val); } val = rd32(hw, cq->sq.len); if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M | PF_FW_ATQLEN_ATQCRIT_M)) { if (val & PF_FW_ATQLEN_ATQVFE_M) device_printf(sc->dev, "%s Send Queue VF Error detected\n", qname); if (val & PF_FW_ATQLEN_ATQOVFL_M) device_printf(sc->dev, "%s Send Queue Overflow Error detected\n", qname); if (val & PF_FW_ATQLEN_ATQCRIT_M) device_printf(sc->dev, "%s Send Queue Critical Error detected\n", qname); val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M | PF_FW_ATQLEN_ATQCRIT_M); wr32(hw, cq->sq.len, val); } } /** * ice_process_link_event - Process a link event indication from firmware * @sc: device softc structure * @e: the received event data * * Gets the current link status from hardware, and may print a message if an * unqualified is detected. */ static void ice_process_link_event(struct ice_softc *sc, struct ice_rq_event_info __invariant_only *e) { struct ice_port_info *pi = sc->hw.port_info; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; /* Sanity check that the data length matches */ MPASS(le16toh(e->desc.datalen) == sizeof(struct ice_aqc_get_link_status_data)); /* * Even though the adapter gets link status information inside the * event, it needs to send a Get Link Status AQ command in order * to re-enable link events. */ pi->phy.get_link_info = true; ice_get_link_status(pi, &sc->link_up); if (pi->phy.link_info.topo_media_conflict & (ICE_AQ_LINK_TOPO_CONFLICT | ICE_AQ_LINK_MEDIA_CONFLICT | ICE_AQ_LINK_TOPO_CORRUPT)) device_printf(dev, "Possible mis-configuration of the Ethernet port detected; please use the Intel (R) Ethernet Port Configuration Tool utility to address the issue.\n"); if ((pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) && !(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) { if (!(pi->phy.link_info.an_info & ICE_AQ_QUALIFIED_MODULE)) device_printf(dev, "Link is disabled on this device because an unsupported module type was detected! Refer to the Intel (R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) device_printf(dev, "The module's power requirements exceed the device's power supply. Cannot start link.\n"); if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) device_printf(dev, "The installed module is incompatible with the device's NVM image. Cannot start link.\n"); } if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) { if (!ice_testandset_state(&sc->state, ICE_STATE_NO_MEDIA)) { status = ice_aq_set_link_restart_an(pi, false, NULL); if (status != ICE_SUCCESS) device_printf(dev, "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } } /* ICE_STATE_NO_MEDIA is cleared when polling task detects media */ /* Indicate that link status must be reported again */ ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED); /* OS link info is updated elsewhere */ } /** * ice_process_ctrlq_event - Respond to a controlq event * @sc: device private structure * @qname: the name for this controlq * @event: the event to process * * Perform actions in response to various controlq event notifications. */ static void ice_process_ctrlq_event(struct ice_softc *sc, const char *qname, struct ice_rq_event_info *event) { u16 opcode; opcode = le16toh(event->desc.opcode); switch (opcode) { case ice_aqc_opc_get_link_status: ice_process_link_event(sc, event); break; case ice_mbx_opc_send_msg_to_pf: /* TODO: handle IOV event */ break; case ice_aqc_opc_fw_logs_event: ice_handle_fw_log_event(sc, &event->desc, event->msg_buf); break; case ice_aqc_opc_lldp_set_mib_change: ice_handle_mib_change_event(sc, event); break; case ice_aqc_opc_event_lan_overflow: ice_handle_lan_overflow_event(sc, event); break; case ice_aqc_opc_get_health_status: ice_handle_health_status_event(sc, event); break; default: device_printf(sc->dev, "%s Receive Queue unhandled event 0x%04x ignored\n", qname, opcode); } } /** * ice_process_ctrlq - helper function to process controlq rings * @sc: device private structure * @q_type: specific control queue type * @pending: return parameter to track remaining events * * Process controlq events for a given control queue type. Returns zero on * success, and an error code on failure. If successful, pending is the number * of remaining events left in the queue. */ int ice_process_ctrlq(struct ice_softc *sc, enum ice_ctl_q q_type, u16 *pending) { struct ice_rq_event_info event = { { 0 } }; struct ice_hw *hw = &sc->hw; struct ice_ctl_q_info *cq; enum ice_status status; const char *qname; int loop = 0; switch (q_type) { case ICE_CTL_Q_ADMIN: cq = &hw->adminq; qname = "Admin"; break; case ICE_CTL_Q_MAILBOX: cq = &hw->mailboxq; qname = "Mailbox"; break; default: device_printf(sc->dev, "Unknown control queue type 0x%x\n", q_type); return 0; } ice_check_ctrlq_errors(sc, qname, cq); /* * Control queue processing happens during the admin task which may be * holding a non-sleepable lock, so we *must* use M_NOWAIT here. */ event.buf_len = cq->rq_buf_size; event.msg_buf = (u8 *)malloc(event.buf_len, M_ICE, M_ZERO | M_NOWAIT); if (!event.msg_buf) { device_printf(sc->dev, "Unable to allocate memory for %s Receive Queue event\n", qname); return (ENOMEM); } do { status = ice_clean_rq_elem(hw, cq, &event, pending); if (status == ICE_ERR_AQ_NO_WORK) break; if (status) { if (q_type == ICE_CTL_Q_ADMIN) device_printf(sc->dev, "%s Receive Queue event error %s\n", qname, ice_status_str(status)); else device_printf(sc->dev, "%s Receive Queue event error %s\n", qname, ice_status_str(status)); free(event.msg_buf, M_ICE); return (EIO); } /* XXX should we separate this handler by controlq type? */ ice_process_ctrlq_event(sc, qname, &event); } while (*pending && (++loop < ICE_CTRLQ_WORK_LIMIT)); free(event.msg_buf, M_ICE); return 0; } /** * pkg_ver_empty - Check if a package version is empty * @pkg_ver: the package version to check * @pkg_name: the package name to check * * Checks if the package version structure is empty. We consider a package * version as empty if none of the versions are non-zero and the name string * is null as well. * * This is used to check if the package version was initialized by the driver, * as we do not expect an actual DDP package file to have a zero'd version and * name. * * @returns true if the package version is valid, or false otherwise. */ static bool pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name) { return (pkg_name[0] == '\0' && pkg_ver->major == 0 && pkg_ver->minor == 0 && pkg_ver->update == 0 && pkg_ver->draft == 0); } /** * pkg_ver_compatible - Check if the package version is compatible * @pkg_ver: the package version to check * * Compares the package version number to the driver's expected major/minor * version. Returns an integer indicating whether the version is older, newer, * or compatible with the driver. * * @returns 0 if the package version is compatible, -1 if the package version * is older, and 1 if the package version is newer than the driver version. */ static int pkg_ver_compatible(struct ice_pkg_ver *pkg_ver) { if (pkg_ver->major > ICE_PKG_SUPP_VER_MAJ) return (1); /* newer */ else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) && (pkg_ver->minor > ICE_PKG_SUPP_VER_MNR)) return (1); /* newer */ else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) && (pkg_ver->minor == ICE_PKG_SUPP_VER_MNR)) return (0); /* compatible */ else return (-1); /* older */ } /** * ice_os_pkg_version_str - Format OS package version info into a sbuf * @hw: device hw structure * @buf: string buffer to store name/version string * * Formats the name and version of the OS DDP package as found in the ice_ddp * module into a string. * * @remark This will almost always be the same as the active package, but * could be different in some cases. Use ice_active_pkg_version_str to get the * version of the active DDP package. */ static void ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf) { char name_buf[ICE_PKG_NAME_SIZE]; /* If the OS DDP package info is empty, use "None" */ if (pkg_ver_empty(&hw->pkg_ver, hw->pkg_name)) { sbuf_printf(buf, "None"); return; } /* * This should already be null-terminated, but since this is a raw * value from an external source, strlcpy() into a new buffer to * make sure. */ bzero(name_buf, sizeof(name_buf)); strlcpy(name_buf, (char *)hw->pkg_name, ICE_PKG_NAME_SIZE); sbuf_printf(buf, "%s version %u.%u.%u.%u", name_buf, hw->pkg_ver.major, hw->pkg_ver.minor, hw->pkg_ver.update, hw->pkg_ver.draft); } /** * ice_active_pkg_version_str - Format active package version info into a sbuf * @hw: device hw structure * @buf: string buffer to store name/version string * * Formats the name and version of the active DDP package info into a string * buffer for use. */ static void ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf) { char name_buf[ICE_PKG_NAME_SIZE]; /* If the active DDP package info is empty, use "None" */ if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) { sbuf_printf(buf, "None"); return; } /* * This should already be null-terminated, but since this is a raw * value from an external source, strlcpy() into a new buffer to * make sure. */ bzero(name_buf, sizeof(name_buf)); strlcpy(name_buf, (char *)hw->active_pkg_name, ICE_PKG_NAME_SIZE); sbuf_printf(buf, "%s version %u.%u.%u.%u", name_buf, hw->active_pkg_ver.major, hw->active_pkg_ver.minor, hw->active_pkg_ver.update, hw->active_pkg_ver.draft); if (hw->active_track_id != 0) sbuf_printf(buf, ", track id 0x%08x", hw->active_track_id); } /** * ice_nvm_version_str - Format the NVM version information into a sbuf * @hw: device hw structure * @buf: string buffer to store version string * * Formats the NVM information including firmware version, API version, NVM * version, the EETRACK id, and OEM specific version information into a string * buffer. */ static void ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf) { struct ice_nvm_info *nvm = &hw->flash.nvm; struct ice_orom_info *orom = &hw->flash.orom; struct ice_netlist_info *netlist = &hw->flash.netlist; /* Note that the netlist versions are stored in packed Binary Coded * Decimal format. The use of '%x' will correctly display these as * decimal numbers. This works because every 4 bits will be displayed * as a hexadecimal digit, and the BCD format will only use the values * 0-9. */ sbuf_printf(buf, "fw %u.%u.%u api %u.%u nvm %x.%02x etid %08x netlist %x.%x.%x-%x.%x.%x.%04x oem %u.%u.%u", hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch, hw->api_maj_ver, hw->api_min_ver, nvm->major, nvm->minor, nvm->eetrack, netlist->major, netlist->minor, netlist->type >> 16, netlist->type & 0xFFFF, netlist->rev, netlist->cust_ver, netlist->hash, orom->major, orom->build, orom->patch); } /** * ice_print_nvm_version - Print the NVM info to the kernel message log * @sc: the device softc structure * * Format and print an NVM version string using ice_nvm_version_str(). */ void ice_print_nvm_version(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; struct sbuf *sbuf; sbuf = sbuf_new_auto(); ice_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); device_printf(dev, "%s\n", sbuf_data(sbuf)); sbuf_delete(sbuf); } /** * ice_update_vsi_hw_stats - Update VSI-specific ethernet statistics counters * @vsi: the VSI to be updated * * Reads hardware stats and updates the ice_vsi_hw_stats tracking structure with * the updated values. */ void ice_update_vsi_hw_stats(struct ice_vsi *vsi) { struct ice_eth_stats *prev_es, *cur_es; struct ice_hw *hw = &vsi->sc->hw; u16 vsi_num; if (!ice_is_vsi_valid(hw, vsi->idx)) return; vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); /* HW absolute index of a VSI */ prev_es = &vsi->hw_stats.prev; cur_es = &vsi->hw_stats.cur; #define ICE_VSI_STAT40(name, location) \ ice_stat_update40(hw, name ## L(vsi_num), \ vsi->hw_stats.offsets_loaded, \ &prev_es->location, &cur_es->location) #define ICE_VSI_STAT32(name, location) \ ice_stat_update32(hw, name(vsi_num), \ vsi->hw_stats.offsets_loaded, \ &prev_es->location, &cur_es->location) ICE_VSI_STAT40(GLV_GORC, rx_bytes); ICE_VSI_STAT40(GLV_UPRC, rx_unicast); ICE_VSI_STAT40(GLV_MPRC, rx_multicast); ICE_VSI_STAT40(GLV_BPRC, rx_broadcast); ICE_VSI_STAT32(GLV_RDPC, rx_discards); ICE_VSI_STAT40(GLV_GOTC, tx_bytes); ICE_VSI_STAT40(GLV_UPTC, tx_unicast); ICE_VSI_STAT40(GLV_MPTC, tx_multicast); ICE_VSI_STAT40(GLV_BPTC, tx_broadcast); ICE_VSI_STAT32(GLV_TEPC, tx_errors); ice_stat_update_repc(hw, vsi->idx, vsi->hw_stats.offsets_loaded, cur_es); #undef ICE_VSI_STAT40 #undef ICE_VSI_STAT32 vsi->hw_stats.offsets_loaded = true; } /** * ice_reset_vsi_stats - Reset VSI statistics counters * @vsi: VSI structure * * Resets the software tracking counters for the VSI statistics, and indicate * that the offsets haven't been loaded. This is intended to be called * post-reset so that VSI statistics count from zero again. */ void ice_reset_vsi_stats(struct ice_vsi *vsi) { /* Reset HW stats */ memset(&vsi->hw_stats.prev, 0, sizeof(vsi->hw_stats.prev)); memset(&vsi->hw_stats.cur, 0, sizeof(vsi->hw_stats.cur)); vsi->hw_stats.offsets_loaded = false; } /** * ice_update_pf_stats - Update port stats counters * @sc: device private softc structure * * Reads hardware statistics registers and updates the software tracking * structure with new values. */ void ice_update_pf_stats(struct ice_softc *sc) { struct ice_hw_port_stats *prev_ps, *cur_ps; struct ice_hw *hw = &sc->hw; u8 lport; MPASS(hw->port_info); prev_ps = &sc->stats.prev; cur_ps = &sc->stats.cur; lport = hw->port_info->lport; #define ICE_PF_STAT_PFC(name, location, index) \ ice_stat_update40(hw, name(lport, index), \ sc->stats.offsets_loaded, \ &prev_ps->location[index], &cur_ps->location[index]) #define ICE_PF_STAT40(name, location) \ ice_stat_update40(hw, name ## L(lport), \ sc->stats.offsets_loaded, \ &prev_ps->location, &cur_ps->location) #define ICE_PF_STAT32(name, location) \ ice_stat_update32(hw, name(lport), \ sc->stats.offsets_loaded, \ &prev_ps->location, &cur_ps->location) ICE_PF_STAT40(GLPRT_GORC, eth.rx_bytes); ICE_PF_STAT40(GLPRT_UPRC, eth.rx_unicast); ICE_PF_STAT40(GLPRT_MPRC, eth.rx_multicast); ICE_PF_STAT40(GLPRT_BPRC, eth.rx_broadcast); ICE_PF_STAT40(GLPRT_GOTC, eth.tx_bytes); ICE_PF_STAT40(GLPRT_UPTC, eth.tx_unicast); ICE_PF_STAT40(GLPRT_MPTC, eth.tx_multicast); ICE_PF_STAT40(GLPRT_BPTC, eth.tx_broadcast); /* This stat register doesn't have an lport */ ice_stat_update32(hw, PRTRPB_RDPC, sc->stats.offsets_loaded, &prev_ps->eth.rx_discards, &cur_ps->eth.rx_discards); ICE_PF_STAT32(GLPRT_TDOLD, tx_dropped_link_down); ICE_PF_STAT40(GLPRT_PRC64, rx_size_64); ICE_PF_STAT40(GLPRT_PRC127, rx_size_127); ICE_PF_STAT40(GLPRT_PRC255, rx_size_255); ICE_PF_STAT40(GLPRT_PRC511, rx_size_511); ICE_PF_STAT40(GLPRT_PRC1023, rx_size_1023); ICE_PF_STAT40(GLPRT_PRC1522, rx_size_1522); ICE_PF_STAT40(GLPRT_PRC9522, rx_size_big); ICE_PF_STAT40(GLPRT_PTC64, tx_size_64); ICE_PF_STAT40(GLPRT_PTC127, tx_size_127); ICE_PF_STAT40(GLPRT_PTC255, tx_size_255); ICE_PF_STAT40(GLPRT_PTC511, tx_size_511); ICE_PF_STAT40(GLPRT_PTC1023, tx_size_1023); ICE_PF_STAT40(GLPRT_PTC1522, tx_size_1522); ICE_PF_STAT40(GLPRT_PTC9522, tx_size_big); /* Update Priority Flow Control Stats */ for (int i = 0; i <= GLPRT_PXOFFRXC_MAX_INDEX; i++) { ICE_PF_STAT_PFC(GLPRT_PXONRXC, priority_xon_rx, i); ICE_PF_STAT_PFC(GLPRT_PXOFFRXC, priority_xoff_rx, i); ICE_PF_STAT_PFC(GLPRT_PXONTXC, priority_xon_tx, i); ICE_PF_STAT_PFC(GLPRT_PXOFFTXC, priority_xoff_tx, i); ICE_PF_STAT_PFC(GLPRT_RXON2OFFCNT, priority_xon_2_xoff, i); } ICE_PF_STAT32(GLPRT_LXONRXC, link_xon_rx); ICE_PF_STAT32(GLPRT_LXOFFRXC, link_xoff_rx); ICE_PF_STAT32(GLPRT_LXONTXC, link_xon_tx); ICE_PF_STAT32(GLPRT_LXOFFTXC, link_xoff_tx); ICE_PF_STAT32(GLPRT_CRCERRS, crc_errors); ICE_PF_STAT32(GLPRT_ILLERRC, illegal_bytes); ICE_PF_STAT32(GLPRT_MLFC, mac_local_faults); ICE_PF_STAT32(GLPRT_MRFC, mac_remote_faults); ICE_PF_STAT32(GLPRT_RLEC, rx_len_errors); ICE_PF_STAT32(GLPRT_RUC, rx_undersize); ICE_PF_STAT32(GLPRT_RFC, rx_fragments); ICE_PF_STAT32(GLPRT_ROC, rx_oversize); ICE_PF_STAT32(GLPRT_RJC, rx_jabber); #undef ICE_PF_STAT40 #undef ICE_PF_STAT32 #undef ICE_PF_STAT_PFC sc->stats.offsets_loaded = true; } /** * ice_reset_pf_stats - Reset port stats counters * @sc: Device private softc structure * * Reset software tracking values for statistics to zero, and indicate that * offsets haven't been loaded. Intended to be called after a device reset so * that statistics count from zero again. */ void ice_reset_pf_stats(struct ice_softc *sc) { memset(&sc->stats.prev, 0, sizeof(sc->stats.prev)); memset(&sc->stats.cur, 0, sizeof(sc->stats.cur)); sc->stats.offsets_loaded = false; } /** * ice_sysctl_show_fw - sysctl callback to show firmware information * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for the fw_version sysctl, to display the current firmware * information found at hardware init time. */ static int ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct sbuf *sbuf; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ice_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_pba_number - sysctl callback to show PBA number * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for the pba_number sysctl, used to read the Product Board Assembly * number for this device. */ static int ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; u8 pba_string[32] = ""; enum ice_status status; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_read_pba_string(hw, pba_string, sizeof(pba_string)); if (status) { device_printf(dev, "%s: failed to read PBA string from NVM; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } return sysctl_handle_string(oidp, pba_string, sizeof(pba_string), req); } /** * ice_sysctl_pkg_version - sysctl to show the active package version info * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for the pkg_version sysctl, to display the active DDP package name * and version information. */ static int ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct sbuf *sbuf; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ice_active_pkg_version_str(hw, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_os_pkg_version - sysctl to show the OS package version info * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for the pkg_version sysctl, to display the OS DDP package name and * version info found in the ice_ddp module. */ static int ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct sbuf *sbuf; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ice_os_pkg_version_str(hw, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_current_speed - sysctl callback to show current link speed * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for the current_speed sysctl, to display the string representing * the current link speed. */ static int ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct sbuf *sbuf; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); sbuf = sbuf_new_for_sysctl(NULL, NULL, 10, req); sbuf_printf(sbuf, "%s", ice_aq_speed_to_str(hw->port_info)); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * @var phy_link_speeds * @brief PHY link speed conversion array * * Array of link speeds to convert ICE_PHY_TYPE_LOW and ICE_PHY_TYPE_HIGH into * link speeds used by the link speed sysctls. * * @remark these are based on the indices used in the BIT() macros for the * ICE_PHY_TYPE_LOW_* and ICE_PHY_TYPE_HIGH_* definitions. */ static const uint16_t phy_link_speeds[] = { ICE_AQ_LINK_SPEED_100MB, ICE_AQ_LINK_SPEED_100MB, ICE_AQ_LINK_SPEED_1000MB, ICE_AQ_LINK_SPEED_1000MB, ICE_AQ_LINK_SPEED_1000MB, ICE_AQ_LINK_SPEED_1000MB, ICE_AQ_LINK_SPEED_1000MB, ICE_AQ_LINK_SPEED_2500MB, ICE_AQ_LINK_SPEED_2500MB, ICE_AQ_LINK_SPEED_2500MB, ICE_AQ_LINK_SPEED_5GB, ICE_AQ_LINK_SPEED_5GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_40GB, ICE_AQ_LINK_SPEED_40GB, ICE_AQ_LINK_SPEED_40GB, ICE_AQ_LINK_SPEED_40GB, ICE_AQ_LINK_SPEED_40GB, ICE_AQ_LINK_SPEED_40GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, /* These rates are for ICE_PHY_TYPE_HIGH_* */ ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB }; #define ICE_SYSCTL_HELP_ADVERTISE_SPEED \ "\nControl advertised link speed." \ "\nFlags:" \ "\n\t 0x0 - Auto" \ "\n\t 0x1 - 10 Mb" \ "\n\t 0x2 - 100 Mb" \ "\n\t 0x4 - 1G" \ "\n\t 0x8 - 2.5G" \ "\n\t 0x10 - 5G" \ "\n\t 0x20 - 10G" \ "\n\t 0x40 - 20G" \ "\n\t 0x80 - 25G" \ "\n\t 0x100 - 40G" \ "\n\t 0x200 - 50G" \ "\n\t 0x400 - 100G" \ "\n\t0x8000 - Unknown" \ "\n\t" \ "\nUse \"sysctl -x\" to view flags properly." #define ICE_PHYS_100MB \ (ICE_PHY_TYPE_LOW_100BASE_TX | \ ICE_PHY_TYPE_LOW_100M_SGMII) #define ICE_PHYS_1000MB \ (ICE_PHY_TYPE_LOW_1000BASE_T | \ ICE_PHY_TYPE_LOW_1000BASE_SX | \ ICE_PHY_TYPE_LOW_1000BASE_LX | \ ICE_PHY_TYPE_LOW_1000BASE_KX | \ ICE_PHY_TYPE_LOW_1G_SGMII) #define ICE_PHYS_2500MB \ (ICE_PHY_TYPE_LOW_2500BASE_T | \ ICE_PHY_TYPE_LOW_2500BASE_X | \ ICE_PHY_TYPE_LOW_2500BASE_KX) #define ICE_PHYS_5GB \ (ICE_PHY_TYPE_LOW_5GBASE_T | \ ICE_PHY_TYPE_LOW_5GBASE_KR) #define ICE_PHYS_10GB \ (ICE_PHY_TYPE_LOW_10GBASE_T | \ ICE_PHY_TYPE_LOW_10G_SFI_DA | \ ICE_PHY_TYPE_LOW_10GBASE_SR | \ ICE_PHY_TYPE_LOW_10GBASE_LR | \ ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 | \ ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC | \ ICE_PHY_TYPE_LOW_10G_SFI_C2C) #define ICE_PHYS_25GB \ (ICE_PHY_TYPE_LOW_25GBASE_T | \ ICE_PHY_TYPE_LOW_25GBASE_CR | \ ICE_PHY_TYPE_LOW_25GBASE_CR_S | \ ICE_PHY_TYPE_LOW_25GBASE_CR1 | \ ICE_PHY_TYPE_LOW_25GBASE_SR | \ ICE_PHY_TYPE_LOW_25GBASE_LR | \ ICE_PHY_TYPE_LOW_25GBASE_KR | \ ICE_PHY_TYPE_LOW_25GBASE_KR_S | \ ICE_PHY_TYPE_LOW_25GBASE_KR1 | \ ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC | \ ICE_PHY_TYPE_LOW_25G_AUI_C2C) #define ICE_PHYS_40GB \ (ICE_PHY_TYPE_LOW_40GBASE_CR4 | \ ICE_PHY_TYPE_LOW_40GBASE_SR4 | \ ICE_PHY_TYPE_LOW_40GBASE_LR4 | \ ICE_PHY_TYPE_LOW_40GBASE_KR4 | \ ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC | \ ICE_PHY_TYPE_LOW_40G_XLAUI) #define ICE_PHYS_50GB \ (ICE_PHY_TYPE_LOW_50GBASE_CR2 | \ ICE_PHY_TYPE_LOW_50GBASE_SR2 | \ ICE_PHY_TYPE_LOW_50GBASE_LR2 | \ ICE_PHY_TYPE_LOW_50GBASE_KR2 | \ ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC | \ ICE_PHY_TYPE_LOW_50G_LAUI2 | \ ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC | \ ICE_PHY_TYPE_LOW_50G_AUI2 | \ ICE_PHY_TYPE_LOW_50GBASE_CP | \ ICE_PHY_TYPE_LOW_50GBASE_SR | \ ICE_PHY_TYPE_LOW_50GBASE_FR | \ ICE_PHY_TYPE_LOW_50GBASE_LR | \ ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4 | \ ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC | \ ICE_PHY_TYPE_LOW_50G_AUI1) #define ICE_PHYS_100GB_LOW \ (ICE_PHY_TYPE_LOW_100GBASE_CR4 | \ ICE_PHY_TYPE_LOW_100GBASE_SR4 | \ ICE_PHY_TYPE_LOW_100GBASE_LR4 | \ ICE_PHY_TYPE_LOW_100GBASE_KR4 | \ ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \ ICE_PHY_TYPE_LOW_100G_CAUI4 | \ ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \ ICE_PHY_TYPE_LOW_100G_AUI4 | \ ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \ ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \ ICE_PHY_TYPE_LOW_100GBASE_CP2 | \ ICE_PHY_TYPE_LOW_100GBASE_SR2 | \ ICE_PHY_TYPE_LOW_100GBASE_DR) #define ICE_PHYS_100GB_HIGH \ (ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \ ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC | \ ICE_PHY_TYPE_HIGH_100G_CAUI2 | \ ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \ ICE_PHY_TYPE_HIGH_100G_AUI2) /** * ice_aq_phy_types_to_link_speeds - Convert the PHY Types to speeds * @phy_type_low: lower 64-bit PHY Type bitmask * @phy_type_high: upper 64-bit PHY Type bitmask * * Convert the PHY Type fields from Get PHY Abilities and Set PHY Config into * link speed flags. If phy_type_high has an unknown PHY type, then the return * value will include the "ICE_AQ_LINK_SPEED_UNKNOWN" flag as well. */ static u16 ice_aq_phy_types_to_link_speeds(u64 phy_type_low, u64 phy_type_high) { u16 sysctl_speeds = 0; int bit; /* coverity[address_of] */ for_each_set_bit(bit, &phy_type_low, 64) sysctl_speeds |= phy_link_speeds[bit]; /* coverity[address_of] */ for_each_set_bit(bit, &phy_type_high, 64) { if ((bit + 64) < (int)ARRAY_SIZE(phy_link_speeds)) sysctl_speeds |= phy_link_speeds[bit + 64]; else sysctl_speeds |= ICE_AQ_LINK_SPEED_UNKNOWN; } return (sysctl_speeds); } /** * ice_sysctl_speeds_to_aq_phy_types - Convert sysctl speed flags to AQ PHY flags * @sysctl_speeds: 16-bit sysctl speeds or AQ_LINK_SPEED flags * @phy_type_low: output parameter for lower AQ PHY flags * @phy_type_high: output parameter for higher AQ PHY flags * * Converts the given link speed flags into AQ PHY type flag sets appropriate * for use in a Set PHY Config command. */ static void ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low, u64 *phy_type_high) { *phy_type_low = 0, *phy_type_high = 0; if (sysctl_speeds & ICE_AQ_LINK_SPEED_100MB) *phy_type_low |= ICE_PHYS_100MB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_1000MB) *phy_type_low |= ICE_PHYS_1000MB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_2500MB) *phy_type_low |= ICE_PHYS_2500MB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_5GB) *phy_type_low |= ICE_PHYS_5GB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_10GB) *phy_type_low |= ICE_PHYS_10GB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_25GB) *phy_type_low |= ICE_PHYS_25GB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_40GB) *phy_type_low |= ICE_PHYS_40GB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_50GB) *phy_type_low |= ICE_PHYS_50GB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_100GB) { *phy_type_low |= ICE_PHYS_100GB_LOW; *phy_type_high |= ICE_PHYS_100GB_HIGH; } } /** * @struct ice_phy_data * @brief PHY caps and link speeds * * Buffer providing report mode and user speeds; * returning intersection of PHY types and speeds. */ struct ice_phy_data { u64 phy_low_orig; /* PHY low quad from report */ u64 phy_high_orig; /* PHY high quad from report */ u64 phy_low_intr; /* PHY low quad intersection with user speeds */ u64 phy_high_intr; /* PHY high quad intersection with user speeds */ u16 user_speeds_orig; /* Input from caller - See ICE_AQ_LINK_SPEED_* */ u16 user_speeds_intr; /* Intersect with report speeds */ u8 report_mode; /* See ICE_AQC_REPORT_* */ }; /** * ice_intersect_phy_types_and_speeds - Return intersection of link speeds * @sc: device private structure * @phy_data: device PHY data * * On read: Displays the currently supported speeds * On write: Sets the device's supported speeds * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED */ static int ice_intersect_phy_types_and_speeds(struct ice_softc *sc, struct ice_phy_data *phy_data) { struct ice_aqc_get_phy_caps_data pcaps = { 0 }; const char *report_types[5] = { "w/o MEDIA", "w/MEDIA", "ACTIVE", "EDOOFUS", /* Not used */ "DFLT" }; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi = hw->port_info; enum ice_status status; u16 report_speeds, temp_speeds; u8 report_type; bool apply_speed_filter = false; switch (phy_data->report_mode) { case ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA: case ICE_AQC_REPORT_TOPO_CAP_MEDIA: case ICE_AQC_REPORT_ACTIVE_CFG: case ICE_AQC_REPORT_DFLT_CFG: report_type = phy_data->report_mode >> 1; break; default: device_printf(sc->dev, "%s: phy_data.report_mode \"%u\" doesn't exist\n", __func__, phy_data->report_mode); return (EINVAL); } /* 0 is treated as "Auto"; the driver will handle selecting the * correct speeds. Including, in some cases, applying an override * if provided. */ if (phy_data->user_speeds_orig == 0) phy_data->user_speeds_orig = USHRT_MAX; else if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) apply_speed_filter = true; status = ice_aq_get_phy_caps(pi, false, phy_data->report_mode, &pcaps, NULL); if (status != ICE_SUCCESS) { device_printf(sc->dev, "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n", __func__, report_types[report_type], ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); return (EIO); } phy_data->phy_low_orig = le64toh(pcaps.phy_type_low); phy_data->phy_high_orig = le64toh(pcaps.phy_type_high); report_speeds = ice_aq_phy_types_to_link_speeds(phy_data->phy_low_orig, phy_data->phy_high_orig); if (apply_speed_filter) { temp_speeds = ice_apply_supported_speed_filter(report_speeds, pcaps.module_type[0]); if ((phy_data->user_speeds_orig & temp_speeds) == 0) { device_printf(sc->dev, "User-specified speeds (\"0x%04X\") not supported\n", phy_data->user_speeds_orig); return (EINVAL); } report_speeds = temp_speeds; } ice_sysctl_speeds_to_aq_phy_types(phy_data->user_speeds_orig, &phy_data->phy_low_intr, &phy_data->phy_high_intr); phy_data->user_speeds_intr = phy_data->user_speeds_orig & report_speeds; phy_data->phy_low_intr &= phy_data->phy_low_orig; phy_data->phy_high_intr &= phy_data->phy_high_orig; return (0); } /** * ice_sysctl_advertise_speed - Display/change link speeds supported by port * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the currently supported speeds * On write: Sets the device's supported speeds * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED */ static int ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_port_info *pi = sc->hw.port_info; struct ice_phy_data phy_data = { 0 }; device_t dev = sc->dev; u16 sysctl_speeds; int ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Get the current speeds from the adapter's "active" configuration. */ phy_data.report_mode = ICE_AQC_REPORT_ACTIVE_CFG; ret = ice_intersect_phy_types_and_speeds(sc, &phy_data); if (ret) { /* Error message already printed within function */ return (ret); } sysctl_speeds = phy_data.user_speeds_intr; ret = sysctl_handle_16(oidp, &sysctl_speeds, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); if (sysctl_speeds > 0x7FF) { device_printf(dev, "%s: \"%u\" is outside of the range of acceptable values.\n", __func__, sysctl_speeds); return (EINVAL); } pi->phy.curr_user_speed_req = sysctl_speeds; /* Apply settings requested by user */ return ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS); } #define ICE_SYSCTL_HELP_FEC_CONFIG \ "\nDisplay or set the port's requested FEC mode." \ "\n\tauto - " ICE_FEC_STRING_AUTO \ "\n\tfc - " ICE_FEC_STRING_BASER \ "\n\trs - " ICE_FEC_STRING_RS \ "\n\tnone - " ICE_FEC_STRING_NONE \ "\nEither of the left or right strings above can be used to set the requested mode." /** * ice_sysctl_fec_config - Display/change the configured FEC mode * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the configured FEC mode * On write: Sets the device's FEC mode to the input string, if it's valid. * Valid input strings: see ICE_SYSCTL_HELP_FEC_CONFIG */ static int ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_port_info *pi = sc->hw.port_info; enum ice_fec_mode new_mode; device_t dev = sc->dev; char req_fec[32]; int ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); bzero(req_fec, sizeof(req_fec)); strlcpy(req_fec, ice_requested_fec_mode(pi), sizeof(req_fec)); ret = sysctl_handle_string(oidp, req_fec, sizeof(req_fec), req); if ((ret) || (req->newptr == NULL)) return (ret); if (strcmp(req_fec, "auto") == 0 || strcmp(req_fec, ice_fec_str(ICE_FEC_AUTO)) == 0) { new_mode = ICE_FEC_AUTO; } else if (strcmp(req_fec, "fc") == 0 || strcmp(req_fec, ice_fec_str(ICE_FEC_BASER)) == 0) { new_mode = ICE_FEC_BASER; } else if (strcmp(req_fec, "rs") == 0 || strcmp(req_fec, ice_fec_str(ICE_FEC_RS)) == 0) { new_mode = ICE_FEC_RS; } else if (strcmp(req_fec, "none") == 0 || strcmp(req_fec, ice_fec_str(ICE_FEC_NONE)) == 0) { new_mode = ICE_FEC_NONE; } else { device_printf(dev, "%s: \"%s\" is not a valid FEC mode\n", __func__, req_fec); return (EINVAL); } /* Cache user FEC mode for later link ups */ pi->phy.curr_user_fec_req = new_mode; /* Apply settings requested by user */ return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FEC); } /** * ice_sysctl_negotiated_fec - Display the negotiated FEC mode on the link * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the negotiated FEC mode, in a string */ static int ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; char neg_fec[32]; int ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Copy const string into a buffer to drop const qualifier */ bzero(neg_fec, sizeof(neg_fec)); strlcpy(neg_fec, ice_negotiated_fec_mode(hw->port_info), sizeof(neg_fec)); ret = sysctl_handle_string(oidp, neg_fec, 0, req); if (req->newptr != NULL) return (EPERM); return (ret); } #define ICE_SYSCTL_HELP_FC_CONFIG \ "\nDisplay or set the port's advertised flow control mode.\n" \ "\t0 - " ICE_FC_STRING_NONE \ "\n\t1 - " ICE_FC_STRING_RX \ "\n\t2 - " ICE_FC_STRING_TX \ "\n\t3 - " ICE_FC_STRING_FULL \ "\nEither the numbers or the strings above can be used to set the advertised mode." /** * ice_sysctl_fc_config - Display/change the advertised flow control mode * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the configured flow control mode * On write: Sets the device's flow control mode to the input, if it's valid. * Valid input strings: see ICE_SYSCTL_HELP_FC_CONFIG */ static int ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_port_info *pi = sc->hw.port_info; struct ice_aqc_get_phy_caps_data pcaps = { 0 }; enum ice_fc_mode old_mode, new_mode; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; int ret, fc_num; bool mode_set = false; struct sbuf buf; char *fc_str_end; char fc_str[32]; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, &pcaps, NULL); if (status != ICE_SUCCESS) { device_printf(dev, "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } /* Convert HW response format to SW enum value */ if ((pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) && (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)) old_mode = ICE_FC_FULL; else if (pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) old_mode = ICE_FC_TX_PAUSE; else if (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE) old_mode = ICE_FC_RX_PAUSE; else old_mode = ICE_FC_NONE; /* Create "old" string for output */ bzero(fc_str, sizeof(fc_str)); sbuf_new_for_sysctl(&buf, fc_str, sizeof(fc_str), req); sbuf_printf(&buf, "%d<%s>", old_mode, ice_fc_str(old_mode)); sbuf_finish(&buf); sbuf_delete(&buf); ret = sysctl_handle_string(oidp, fc_str, sizeof(fc_str), req); if ((ret) || (req->newptr == NULL)) return (ret); /* Try to parse input as a string, first */ if (strcasecmp(ice_fc_str(ICE_FC_FULL), fc_str) == 0) { new_mode = ICE_FC_FULL; mode_set = true; } else if (strcasecmp(ice_fc_str(ICE_FC_TX_PAUSE), fc_str) == 0) { new_mode = ICE_FC_TX_PAUSE; mode_set = true; } else if (strcasecmp(ice_fc_str(ICE_FC_RX_PAUSE), fc_str) == 0) { new_mode = ICE_FC_RX_PAUSE; mode_set = true; } else if (strcasecmp(ice_fc_str(ICE_FC_NONE), fc_str) == 0) { new_mode = ICE_FC_NONE; mode_set = true; } /* * Then check if it's an integer, for compatibility with the method * used in older drivers. */ if (!mode_set) { fc_num = strtol(fc_str, &fc_str_end, 0); if (fc_str_end == fc_str) fc_num = -1; switch (fc_num) { case 3: new_mode = ICE_FC_FULL; break; case 2: new_mode = ICE_FC_TX_PAUSE; break; case 1: new_mode = ICE_FC_RX_PAUSE; break; case 0: new_mode = ICE_FC_NONE; break; default: device_printf(dev, "%s: \"%s\" is not a valid flow control mode\n", __func__, fc_str); return (EINVAL); } } /* Save flow control mode from user */ pi->phy.curr_user_fc_req = new_mode; /* Turn off Priority Flow Control when Link Flow Control is enabled */ if ((hw->port_info->qos_cfg.is_sw_lldp) && (hw->port_info->qos_cfg.local_dcbx_cfg.pfc.pfcena != 0) && (new_mode != ICE_FC_NONE)) { ret = ice_config_pfc(sc, 0x0); if (ret) return (ret); } /* Apply settings requested by user */ return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC); } /** * ice_sysctl_negotiated_fc - Display currently negotiated FC mode * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the currently negotiated flow control settings. * * If link is not established, this will report ICE_FC_NONE, as no flow * control is negotiated while link is down. */ static int ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_port_info *pi = sc->hw.port_info; const char *negotiated_fc; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); negotiated_fc = ice_flowcontrol_mode(pi); return sysctl_handle_string(oidp, __DECONST(char *, negotiated_fc), 0, req); } /** * __ice_sysctl_phy_type_handler - Display/change supported PHY types/speeds * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * @is_phy_type_high: if true, handle the high PHY type instead of the low PHY type * * Private handler for phy_type_high and phy_type_low sysctls. */ static int __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS, bool is_phy_type_high) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_aqc_get_phy_caps_data pcaps = { 0 }; struct ice_aqc_set_phy_cfg_data cfg = { 0 }; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; uint64_t types; int ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_aq_get_phy_caps(hw->port_info, false, ICE_AQC_REPORT_ACTIVE_CFG, &pcaps, NULL); if (status != ICE_SUCCESS) { device_printf(dev, "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } if (is_phy_type_high) types = pcaps.phy_type_high; else types = pcaps.phy_type_low; ret = sysctl_handle_64(oidp, &types, sizeof(types), req); if ((ret) || (req->newptr == NULL)) return (ret); ice_copy_phy_caps_to_cfg(hw->port_info, &pcaps, &cfg); if (is_phy_type_high) cfg.phy_type_high = types & hw->port_info->phy.phy_type_high; else cfg.phy_type_low = types & hw->port_info->phy.phy_type_low; cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; status = ice_aq_set_phy_cfg(hw, hw->port_info, &cfg, NULL); if (status != ICE_SUCCESS) { device_printf(dev, "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } return (0); } /** * ice_sysctl_phy_type_low - Display/change supported lower PHY types/speeds * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the currently supported lower PHY types * On write: Sets the device's supported low PHY types */ static int ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS) { return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, false); } /** * ice_sysctl_phy_type_high - Display/change supported higher PHY types/speeds * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the currently supported higher PHY types * On write: Sets the device's supported high PHY types */ static int ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS) { return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, true); } /** * ice_sysctl_phy_caps - Display response from Get PHY abililties * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * @report_mode: the mode to report * * On read: Display the response from Get PHY abillities with the given report * mode. */ static int ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_aqc_get_phy_caps_data pcaps = { 0 }; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi = hw->port_info; device_t dev = sc->dev; enum ice_status status; int ret; UNREFERENCED_PARAMETER(arg2); ret = priv_check(curthread, PRIV_DRIVER); if (ret) return (ret); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_aq_get_phy_caps(pi, true, report_mode, &pcaps, NULL); if (status != ICE_SUCCESS) { device_printf(dev, "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ret = sysctl_handle_opaque(oidp, &pcaps, sizeof(pcaps), req); if (req->newptr != NULL) return (EPERM); return (ret); } /** * ice_sysctl_phy_sw_caps - Display response from Get PHY abililties * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Display the response from Get PHY abillities reporting the last * software configuration. */ static int ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS) { return ice_sysctl_phy_caps(oidp, arg1, arg2, req, ICE_AQC_REPORT_ACTIVE_CFG); } /** * ice_sysctl_phy_nvm_caps - Display response from Get PHY abililties * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Display the response from Get PHY abillities reporting the NVM * configuration. */ static int ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS) { return ice_sysctl_phy_caps(oidp, arg1, arg2, req, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA); } /** * ice_sysctl_phy_topo_caps - Display response from Get PHY abililties * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Display the response from Get PHY abillities reporting the * topology configuration. */ static int ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS) { return ice_sysctl_phy_caps(oidp, arg1, arg2, req, ICE_AQC_REPORT_TOPO_CAP_MEDIA); } /** * ice_sysctl_phy_link_status - Display response from Get Link Status * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Display the response from firmware for the Get Link Status * request. */ static int ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS) { struct ice_aqc_get_link_status_data link_data = { 0 }; struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi = hw->port_info; struct ice_aqc_get_link_status *resp; struct ice_aq_desc desc; device_t dev = sc->dev; enum ice_status status; int ret; UNREFERENCED_PARAMETER(arg2); /* * Ensure that only contexts with driver privilege are allowed to * access this information */ ret = priv_check(curthread, PRIV_DRIVER); if (ret) return (ret); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status); resp = &desc.params.get_link_status; resp->lport_num = pi->lport; status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), NULL); if (status != ICE_SUCCESS) { device_printf(dev, "%s: ice_aq_send_cmd failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ret = sysctl_handle_opaque(oidp, &link_data, sizeof(link_data), req); if (req->newptr != NULL) return (EPERM); return (ret); } /** * ice_sysctl_fw_cur_lldp_persist_status - Display current FW LLDP status * @oidp: sysctl oid structure * @arg1: pointer to private softc structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays current persistent LLDP status. */ static int ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; struct sbuf *sbuf; u32 lldp_state; UNREFERENCED_PARAMETER(arg2); UNREFERENCED_PARAMETER(oidp); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_get_cur_lldp_persist_status(hw, &lldp_state); if (status) { device_printf(dev, "Could not acquire current LLDP persistence status, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state)); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_fw_dflt_lldp_persist_status - Display default FW LLDP status * @oidp: sysctl oid structure * @arg1: pointer to private softc structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays default persistent LLDP status. */ static int ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; struct sbuf *sbuf; u32 lldp_state; UNREFERENCED_PARAMETER(arg2); UNREFERENCED_PARAMETER(oidp); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_get_dflt_lldp_persist_status(hw, &lldp_state); if (status) { device_printf(dev, "Could not acquire default LLDP persistence status, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state)); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } #define ICE_SYSCTL_HELP_FW_LLDP_AGENT \ "\nDisplay or change FW LLDP agent state:" \ "\n\t0 - disabled" \ "\n\t1 - enabled" /** * ice_sysctl_fw_lldp_agent - Display or change the FW LLDP agent status * @oidp: sysctl oid structure * @arg1: pointer to private softc structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays whether the FW LLDP agent is running * On write: Persistently enables or disables the FW LLDP agent */ static int ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; int ret; u32 old_state; u8 fw_lldp_enabled; bool retried_start_lldp = false; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_get_cur_lldp_persist_status(hw, &old_state); if (status) { device_printf(dev, "Could not acquire current LLDP persistence status, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } if (old_state > ICE_LLDP_ADMINSTATUS_ENA_RXTX) { status = ice_get_dflt_lldp_persist_status(hw, &old_state); if (status) { device_printf(dev, "Could not acquire default LLDP persistence status, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } if (old_state == 0) fw_lldp_enabled = false; else fw_lldp_enabled = true; ret = sysctl_handle_bool(oidp, &fw_lldp_enabled, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); if (old_state == 0 && fw_lldp_enabled == false) return (0); if (old_state != 0 && fw_lldp_enabled == true) return (0); if (fw_lldp_enabled == false) { status = ice_aq_stop_lldp(hw, true, true, NULL); /* EPERM is returned if the LLDP agent is already shutdown */ if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM) { device_printf(dev, "%s: ice_aq_stop_lldp failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ice_aq_set_dcb_parameters(hw, true, NULL); hw->port_info->qos_cfg.is_sw_lldp = true; ice_add_rx_lldp_filter(sc); } else { ice_del_rx_lldp_filter(sc); retry_start_lldp: status = ice_aq_start_lldp(hw, true, NULL); if (status) { switch (hw->adminq.sq_last_status) { /* EEXIST is returned if the LLDP agent is already started */ case ICE_AQ_RC_EEXIST: break; case ICE_AQ_RC_EAGAIN: /* Retry command after a 2 second wait */ if (retried_start_lldp == false) { retried_start_lldp = true; pause("slldp", ICE_START_LLDP_RETRY_WAIT); goto retry_start_lldp; } /* Fallthrough */ default: device_printf(dev, "%s: ice_aq_start_lldp failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } hw->port_info->qos_cfg.is_sw_lldp = false; } return (ret); } #define ICE_SYSCTL_HELP_ETS_MIN_RATE \ "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS bandwidth table." \ "\nIn SW DCB mode, displays and allows setting the table." \ "\nInput must be in the format e.g. 30,10,10,10,10,10,10,10" \ "\nWhere the bandwidth total must add up to 100" /** * ice_sysctl_ets_min_rate - Report/configure ETS bandwidth * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Returns the current ETS TC bandwidth table * cached by the driver. * * In SW DCB mode this sysctl also accepts a value that will * be sent to the firmware for configuration. */ static int ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_port_info *pi; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; struct sbuf *sbuf; int ret; /* Store input rates from user */ char ets_user_buf[128] = ""; u8 new_ets_table[ICE_MAX_TRAFFIC_CLASS] = {}; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, 128); return (ret); } pi = hw->port_info; local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; sbuf = sbuf_new(NULL, ets_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL); /* Format ETS BW data for output */ for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.tcbwtable[i]); if (i != ICE_MAX_TRAFFIC_CLASS - 1) sbuf_printf(sbuf, ","); } sbuf_finish(sbuf); sbuf_delete(sbuf); /* Read in the new ETS values */ ret = sysctl_handle_string(oidp, ets_user_buf, sizeof(ets_user_buf), req); if ((ret) || (req->newptr == NULL)) return (ret); /* Don't allow setting changes in FW DCB mode */ if (!hw->port_info->qos_cfg.is_sw_lldp) return (EPERM); ret = ice_ets_str_to_tbl(ets_user_buf, new_ets_table, 100); if (ret) { device_printf(dev, "%s: Could not parse input BW table: %s\n", __func__, ets_user_buf); return (ret); } if (!ice_check_ets_bw(new_ets_table)) { device_printf(dev, "%s: Bandwidth sum does not equal 100: %s\n", __func__, ets_user_buf); return (EINVAL); } memcpy(local_dcbx_cfg->etscfg.tcbwtable, new_ets_table, sizeof(new_ets_table)); /* If BW > 0, then set TSA entry to 2 */ for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { if (new_ets_table[i] > 0) local_dcbx_cfg->etscfg.tsatable[i] = 2; else local_dcbx_cfg->etscfg.tsatable[i] = 0; } local_dcbx_cfg->etscfg.willing = 0; local_dcbx_cfg->etsrec = local_dcbx_cfg->etscfg; local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING; status = ice_set_dcb_cfg(pi); if (status) { device_printf(dev, "%s: Failed to set DCB config; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ice_do_dcb_reconfig(sc); return (0); } #define ICE_SYSCTL_HELP_UP2TC_MAP \ "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS priority assignment table." \ "\nIn SW DCB mode, displays and allows setting the table." \ "\nInput must be in this format: 0,1,2,3,4,5,6,7" \ "\nWhere the 1st number is the TC for UP0, 2nd number is the TC for UP1, etc" /** * ice_sysctl_up2tc_map - Report or configure UP2TC mapping * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * In FW DCB mode, returns the current ETS prio table / * UP2TC mapping from the local MIB. * * In SW DCB mode this sysctl also accepts a value that will * be sent to the firmware for configuration. */ static int ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_port_info *pi; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; struct sbuf *sbuf; int ret; /* Store input rates from user */ char up2tc_user_buf[128] = ""; /* This array is indexed by UP, not TC */ u8 new_up2tc[ICE_MAX_TRAFFIC_CLASS] = {}; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, 128); return (ret); } pi = hw->port_info; local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; sbuf = sbuf_new(NULL, up2tc_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL); /* Format ETS Priority Mapping Table for output */ for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.prio_table[i]); if (i != ICE_MAX_TRAFFIC_CLASS - 1) sbuf_printf(sbuf, ","); } sbuf_finish(sbuf); sbuf_delete(sbuf); /* Read in the new ETS priority mapping */ ret = sysctl_handle_string(oidp, up2tc_user_buf, sizeof(up2tc_user_buf), req); if ((ret) || (req->newptr == NULL)) return (ret); /* Don't allow setting changes in FW DCB mode */ if (!hw->port_info->qos_cfg.is_sw_lldp) return (EPERM); ret = ice_ets_str_to_tbl(up2tc_user_buf, new_up2tc, 7); if (ret) { device_printf(dev, "%s: Could not parse input priority assignment table: %s\n", __func__, up2tc_user_buf); return (ret); } /* Prepare updated ETS TLV */ memcpy(local_dcbx_cfg->etscfg.prio_table, new_up2tc, sizeof(new_up2tc)); status = ice_set_dcb_cfg(pi); if (status) { device_printf(dev, "%s: Failed to set DCB config; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ice_do_dcb_reconfig(sc); return (0); } /** * ice_config_pfc - helper function to set PFC config in FW * @sc: device private structure * @new_mode: bit flags indicating PFC status for TCs * * @pre must be in SW DCB mode * * Configures the driver's local PFC TLV and sends it to the * FW for configuration, then reconfigures the driver/VSI * for DCB if needed. */ static int ice_config_pfc(struct ice_softc *sc, u8 new_mode) { struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi; device_t dev = sc->dev; enum ice_status status; pi = hw->port_info; local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; /* Prepare updated PFC TLV */ local_dcbx_cfg->pfc.pfcena = new_mode; local_dcbx_cfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS; local_dcbx_cfg->pfc.willing = 0; local_dcbx_cfg->pfc.mbc = 0; /* Warn if PFC is being disabled with RoCE v2 in use */ if (new_mode == 0 && sc->rdma_entry.attached) device_printf(dev, "WARNING: Recommended that Priority Flow Control is enabled when RoCEv2 is in use\n"); status = ice_set_dcb_cfg(pi); if (status) { device_printf(dev, "%s: Failed to set DCB config; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ice_do_dcb_reconfig(sc); return (0); } #define ICE_SYSCTL_HELP_PFC_CONFIG \ "\nIn FW DCB mode (fw_lldp_agent=1), displays the current Priority Flow Control configuration" \ "\nIn SW DCB mode, displays and allows setting the configuration" \ "\nInput/Output is in this format: 0xff" \ "\nWhere bit position # enables/disables PFC for that Traffic Class #" /** * ice_sysctl_pfc_config - Report or configure enabled PFC TCs * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * In FW DCB mode, returns a bitmap containing the current TCs * that have PFC enabled on them. * * In SW DCB mode this sysctl also accepts a value that will * be sent to the firmware for configuration. */ static int ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_port_info *pi; struct ice_hw *hw = &sc->hw; int ret; /* Store input flags from user */ u8 user_pfc; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, sizeof(u8)); return (ret); } pi = hw->port_info; local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; /* Format current PFC enable setting for output */ user_pfc = local_dcbx_cfg->pfc.pfcena; /* Read in the new PFC config */ ret = sysctl_handle_8(oidp, &user_pfc, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); /* Don't allow setting changes in FW DCB mode */ if (!hw->port_info->qos_cfg.is_sw_lldp) return (EPERM); /* If LFC is active and PFC is going to be turned on, turn LFC off */ if (user_pfc != 0 && pi->phy.curr_user_fc_req != ICE_FC_NONE) { pi->phy.curr_user_fc_req = ICE_FC_NONE; ret = ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC); if (ret) return (ret); } return ice_config_pfc(sc, user_pfc); } /** * ice_add_device_sysctls - add device specific dynamic sysctls * @sc: device private structure * * Add per-device dynamic sysctls which show device configuration or enable * configuring device functionality. For tunable values which can be set prior * to load, see ice_add_device_tunables. * * This function depends on the sysctl layout setup by ice_add_device_tunables, * and likely should be called near the end of the attach process. */ void ice_add_device_sysctls(struct ice_softc *sc) { struct sysctl_oid *hw_node; device_t dev = sc->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_show_fw, "A", "Firmware version"); if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_HAS_PBA)) { SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "pba_number", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_pba_number, "A", "Product Board Assembly Number"); } SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "ddp_version", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_pkg_version, "A", "Active DDP package name and version"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_current_speed, "A", "Current Port Link Speed"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "requested_fec", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, ice_sysctl_fec_config, "A", ICE_SYSCTL_HELP_FEC_CONFIG); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "negotiated_fec", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_negotiated_fec, "A", "Current Negotiated FEC mode"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fc", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, ice_sysctl_fc_config, "A", ICE_SYSCTL_HELP_FC_CONFIG); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "advertise_speed", CTLTYPE_U16 | CTLFLAG_RW, sc, 0, ice_sysctl_advertise_speed, "SU", ICE_SYSCTL_HELP_ADVERTISE_SPEED); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fw_lldp_agent", CTLTYPE_U8 | CTLFLAG_RWTUN, sc, 0, ice_sysctl_fw_lldp_agent, "CU", ICE_SYSCTL_HELP_FW_LLDP_AGENT); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "ets_min_rate", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, ice_sysctl_ets_min_rate, "A", ICE_SYSCTL_HELP_ETS_MIN_RATE); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "up2tc_map", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, ice_sysctl_up2tc_map, "A", ICE_SYSCTL_HELP_UP2TC_MAP); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "pfc", CTLTYPE_U8 | CTLFLAG_RW, sc, 0, ice_sysctl_pfc_config, "CU", ICE_SYSCTL_HELP_PFC_CONFIG); /* Differentiate software and hardware statistics, by keeping hw stats * in their own node. This isn't in ice_add_device_tunables, because * we won't have any CTLFLAG_TUN sysctls under this node. */ hw_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "hw", CTLFLAG_RD, NULL, "Port Hardware Statistics"); ice_add_sysctls_mac_stats(ctx, hw_node, &sc->stats.cur); /* Add the main PF VSI stats now. Other VSIs will add their own stats * during creation */ ice_add_vsi_sysctls(&sc->pf_vsi); /* Add sysctls related to debugging the device driver. This includes * sysctls which display additional internal driver state for use in * understanding what is happening within the driver. */ ice_add_debug_sysctls(sc); } /** * @enum hmc_error_type * @brief enumeration of HMC errors * * Enumeration defining the possible HMC errors that might occur. */ enum hmc_error_type { HMC_ERR_PMF_INVALID = 0, HMC_ERR_VF_IDX_INVALID = 1, HMC_ERR_VF_PARENT_PF_INVALID = 2, /* 3 is reserved */ HMC_ERR_INDEX_TOO_BIG = 4, HMC_ERR_ADDRESS_TOO_LARGE = 5, HMC_ERR_SEGMENT_DESC_INVALID = 6, HMC_ERR_SEGMENT_DESC_TOO_SMALL = 7, HMC_ERR_PAGE_DESC_INVALID = 8, HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION = 9, /* 10 is reserved */ HMC_ERR_INVALID_OBJECT_TYPE = 11, /* 12 is reserved */ }; /** * ice_log_hmc_error - Log an HMC error message * @hw: device hw structure * @dev: the device to pass to device_printf() * * Log a message when an HMC error interrupt is triggered. */ void ice_log_hmc_error(struct ice_hw *hw, device_t dev) { u32 info, data; u8 index, errtype, objtype; bool isvf; info = rd32(hw, PFHMC_ERRORINFO); data = rd32(hw, PFHMC_ERRORDATA); index = (u8)(info & PFHMC_ERRORINFO_PMF_INDEX_M); errtype = (u8)((info & PFHMC_ERRORINFO_HMC_ERROR_TYPE_M) >> PFHMC_ERRORINFO_HMC_ERROR_TYPE_S); objtype = (u8)((info & PFHMC_ERRORINFO_HMC_OBJECT_TYPE_M) >> PFHMC_ERRORINFO_HMC_OBJECT_TYPE_S); isvf = info & PFHMC_ERRORINFO_PMF_ISVF_M; device_printf(dev, "%s HMC Error detected on PMF index %d:\n", isvf ? "VF" : "PF", index); device_printf(dev, "error type %d, object type %d, data 0x%08x\n", errtype, objtype, data); switch (errtype) { case HMC_ERR_PMF_INVALID: device_printf(dev, "Private Memory Function is not valid\n"); break; case HMC_ERR_VF_IDX_INVALID: device_printf(dev, "Invalid Private Memory Function index for PE enabled VF\n"); break; case HMC_ERR_VF_PARENT_PF_INVALID: device_printf(dev, "Invalid parent PF for PE enabled VF\n"); break; case HMC_ERR_INDEX_TOO_BIG: device_printf(dev, "Object index too big\n"); break; case HMC_ERR_ADDRESS_TOO_LARGE: device_printf(dev, "Address extends beyond segment descriptor limit\n"); break; case HMC_ERR_SEGMENT_DESC_INVALID: device_printf(dev, "Segment descriptor is invalid\n"); break; case HMC_ERR_SEGMENT_DESC_TOO_SMALL: device_printf(dev, "Segment descriptor is too small\n"); break; case HMC_ERR_PAGE_DESC_INVALID: device_printf(dev, "Page descriptor is invalid\n"); break; case HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION: device_printf(dev, "Unsupported Request completion received from PCIe\n"); break; case HMC_ERR_INVALID_OBJECT_TYPE: device_printf(dev, "Invalid object type\n"); break; default: device_printf(dev, "Unknown HMC error\n"); } /* Clear the error indication */ wr32(hw, PFHMC_ERRORINFO, 0); } /** * @struct ice_sysctl_info * @brief sysctl information * * Structure used to simplify the process of defining the many similar * statistics sysctls. */ struct ice_sysctl_info { u64 *stat; const char *name; const char *description; }; /** * ice_add_sysctls_eth_stats - Add sysctls for ethernet statistics * @ctx: sysctl ctx to use * @parent: the parent node to add sysctls under * @stats: the ethernet stats structure to source values from * * Adds statistics sysctls for the ethernet statistics of the MAC or a VSI. * Will add them under the parent node specified. * * Note that tx_errors is only meaningful for VSIs and not the global MAC/PF * statistics, so it is not included here. Similarly, rx_discards has different * descriptions for VSIs and MAC/PF stats, so it is also not included here. */ void ice_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid *parent, struct ice_eth_stats *stats) { const struct ice_sysctl_info ctls[] = { /* Rx Stats */ { &stats->rx_bytes, "good_octets_rcvd", "Good Octets Received" }, { &stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received" }, { &stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received" }, { &stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received" }, /* Tx Stats */ { &stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted" }, { &stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted" }, { &stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted" }, { &stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted" }, /* End */ { 0, 0, 0 } }; struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent); const struct ice_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_U64(ctx, parent_list, OID_AUTO, entry->name, CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0, entry->description); entry++; } } /** * ice_sysctl_tx_cso_stat - Display Tx checksum offload statistic * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: Tx CSO stat to read * @req: sysctl request pointer * * On read: Sums the per-queue Tx CSO stat and displays it. */ static int ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS) { struct ice_vsi *vsi = (struct ice_vsi *)arg1; enum ice_tx_cso_stat type = (enum ice_tx_cso_stat)arg2; u64 stat = 0; int i; if (ice_driver_is_detaching(vsi->sc)) return (ESHUTDOWN); /* Check that the type is valid */ if (type >= ICE_CSO_STAT_TX_COUNT) return (EDOOFUS); /* Sum the stat for each of the Tx queues */ for (i = 0; i < vsi->num_tx_queues; i++) stat += vsi->tx_queues[i].stats.cso[type]; return sysctl_handle_64(oidp, NULL, stat, req); } /** * ice_sysctl_rx_cso_stat - Display Rx checksum offload statistic * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: Rx CSO stat to read * @req: sysctl request pointer * * On read: Sums the per-queue Rx CSO stat and displays it. */ static int ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS) { struct ice_vsi *vsi = (struct ice_vsi *)arg1; enum ice_rx_cso_stat type = (enum ice_rx_cso_stat)arg2; u64 stat = 0; int i; if (ice_driver_is_detaching(vsi->sc)) return (ESHUTDOWN); /* Check that the type is valid */ if (type >= ICE_CSO_STAT_RX_COUNT) return (EDOOFUS); /* Sum the stat for each of the Rx queues */ for (i = 0; i < vsi->num_rx_queues; i++) stat += vsi->rx_queues[i].stats.cso[type]; return sysctl_handle_64(oidp, NULL, stat, req); } /** * ice_sysctl_rx_errors_stat - Display aggregate of Rx errors * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Sums current values of Rx error statistics and * displays it. */ static int ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS) { struct ice_vsi *vsi = (struct ice_vsi *)arg1; struct ice_hw_port_stats *hs = &vsi->sc->stats.cur; u64 stat = 0; int i, type; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(vsi->sc)) return (ESHUTDOWN); stat += hs->rx_undersize; stat += hs->rx_fragments; stat += hs->rx_oversize; stat += hs->rx_jabber; stat += hs->rx_len_errors; stat += hs->crc_errors; stat += hs->illegal_bytes; /* Checksum error stats */ for (i = 0; i < vsi->num_rx_queues; i++) for (type = ICE_CSO_STAT_RX_IP4_ERR; type < ICE_CSO_STAT_RX_COUNT; type++) stat += vsi->rx_queues[i].stats.cso[type]; return sysctl_handle_64(oidp, NULL, stat, req); } /** * @struct ice_rx_cso_stat_info * @brief sysctl information for an Rx checksum offload statistic * * Structure used to simplify the process of defining the checksum offload * statistics. */ struct ice_rx_cso_stat_info { enum ice_rx_cso_stat type; const char *name; const char *description; }; /** * @struct ice_tx_cso_stat_info * @brief sysctl information for a Tx checksum offload statistic * * Structure used to simplify the process of defining the checksum offload * statistics. */ struct ice_tx_cso_stat_info { enum ice_tx_cso_stat type; const char *name; const char *description; }; /** * ice_add_sysctls_sw_stats - Add sysctls for software statistics * @vsi: pointer to the VSI to add sysctls for * @ctx: sysctl ctx to use * @parent: the parent node to add sysctls under * * Add statistics sysctls for software tracked statistics of a VSI. * * Currently this only adds checksum offload statistics, but more counters may * be added in the future. */ static void ice_add_sysctls_sw_stats(struct ice_vsi *vsi, struct sysctl_ctx_list *ctx, struct sysctl_oid *parent) { struct sysctl_oid *cso_node; struct sysctl_oid_list *cso_list; /* Tx CSO Stats */ const struct ice_tx_cso_stat_info tx_ctls[] = { { ICE_CSO_STAT_TX_TCP, "tx_tcp", "Transmit TCP Packets marked for HW checksum" }, { ICE_CSO_STAT_TX_UDP, "tx_udp", "Transmit UDP Packets marked for HW checksum" }, { ICE_CSO_STAT_TX_SCTP, "tx_sctp", "Transmit SCTP Packets marked for HW checksum" }, { ICE_CSO_STAT_TX_IP4, "tx_ip4", "Transmit IPv4 Packets marked for HW checksum" }, { ICE_CSO_STAT_TX_IP6, "tx_ip6", "Transmit IPv6 Packets marked for HW checksum" }, { ICE_CSO_STAT_TX_L3_ERR, "tx_l3_err", "Transmit packets that driver failed to set L3 HW CSO bits for" }, { ICE_CSO_STAT_TX_L4_ERR, "tx_l4_err", "Transmit packets that driver failed to set L4 HW CSO bits for" }, /* End */ { ICE_CSO_STAT_TX_COUNT, 0, 0 } }; /* Rx CSO Stats */ const struct ice_rx_cso_stat_info rx_ctls[] = { { ICE_CSO_STAT_RX_IP4_ERR, "rx_ip4_err", "Received packets with invalid IPv4 checksum indicated by HW" }, { ICE_CSO_STAT_RX_IP6_ERR, "rx_ip6_err", "Received IPv6 packets with extension headers" }, { ICE_CSO_STAT_RX_L3_ERR, "rx_l3_err", "Received packets with an unexpected invalid L3 checksum indicated by HW" }, { ICE_CSO_STAT_RX_TCP_ERR, "rx_tcp_err", "Received packets with invalid TCP checksum indicated by HW" }, { ICE_CSO_STAT_RX_UDP_ERR, "rx_udp_err", "Received packets with invalid UDP checksum indicated by HW" }, { ICE_CSO_STAT_RX_SCTP_ERR, "rx_sctp_err", "Received packets with invalid SCTP checksum indicated by HW" }, { ICE_CSO_STAT_RX_L4_ERR, "rx_l4_err", "Received packets with an unexpected invalid L4 checksum indicated by HW" }, /* End */ { ICE_CSO_STAT_RX_COUNT, 0, 0 } }; struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent); /* Add a node for statistics tracked by software. */ cso_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "cso", CTLFLAG_RD, NULL, "Checksum offload Statistics"); cso_list = SYSCTL_CHILDREN(cso_node); const struct ice_tx_cso_stat_info *tx_entry = tx_ctls; while (tx_entry->name && tx_entry->description) { SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, tx_entry->name, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, vsi, tx_entry->type, ice_sysctl_tx_cso_stat, "QU", tx_entry->description); tx_entry++; } const struct ice_rx_cso_stat_info *rx_entry = rx_ctls; while (rx_entry->name && rx_entry->description) { SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, rx_entry->name, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, vsi, rx_entry->type, ice_sysctl_rx_cso_stat, "QU", rx_entry->description); rx_entry++; } } /** * ice_add_vsi_sysctls - Add sysctls for a VSI * @vsi: pointer to VSI structure * * Add various sysctls for a given VSI. */ void ice_add_vsi_sysctls(struct ice_vsi *vsi) { struct sysctl_ctx_list *ctx = &vsi->ctx; struct sysctl_oid *hw_node, *sw_node; struct sysctl_oid_list *vsi_list, *hw_list; vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); /* Keep hw stats in their own node. */ hw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "hw", CTLFLAG_RD, NULL, "VSI Hardware Statistics"); hw_list = SYSCTL_CHILDREN(hw_node); /* Add the ethernet statistics for this VSI */ ice_add_sysctls_eth_stats(ctx, hw_node, &vsi->hw_stats.cur); SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_discards", CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_discards, 0, "Discarded Rx Packets (see rx_errors or rx_no_desc)"); SYSCTL_ADD_PROC(ctx, hw_list, OID_AUTO, "rx_errors", CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, vsi, 0, ice_sysctl_rx_errors_stat, "QU", "Aggregate of all Rx errors"); SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_no_desc", CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_no_desc, 0, "Rx Packets Discarded Due To Lack Of Descriptors"); SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "tx_errors", CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.tx_errors, 0, "Tx Packets Discarded Due To Error"); /* Add a node for statistics tracked by software. */ sw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "sw", CTLFLAG_RD, NULL, "VSI Software Statistics"); ice_add_sysctls_sw_stats(vsi, ctx, sw_node); } /** * ice_add_sysctls_mac_pfc_one_stat - Add sysctl node for a PFC statistic * @ctx: sysctl ctx to use * @parent_list: parent sysctl list to add sysctls under * @pfc_stat_location: address of statistic for sysctl to display * @node_name: Name for statistic node * @descr: Description used for nodes added in this function * * A helper function for ice_add_sysctls_mac_pfc_stats that adds a node * for a stat and leaves for each traffic class for that stat. */ static void ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent_list, u64* pfc_stat_location, const char *node_name, const char *descr) { struct sysctl_oid_list *node_list; struct sysctl_oid *node; struct sbuf *namebuf, *descbuf; node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, node_name, CTLFLAG_RD, NULL, descr); node_list = SYSCTL_CHILDREN(node); namebuf = sbuf_new_auto(); descbuf = sbuf_new_auto(); for (int i = 0; i < ICE_MAX_DCB_TCS; i++) { sbuf_clear(namebuf); sbuf_clear(descbuf); sbuf_printf(namebuf, "%d", i); sbuf_printf(descbuf, "%s for TC %d", descr, i); sbuf_finish(namebuf); sbuf_finish(descbuf); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, sbuf_data(namebuf), CTLFLAG_RD | CTLFLAG_STATS, &pfc_stat_location[i], 0, sbuf_data(descbuf)); } sbuf_delete(namebuf); sbuf_delete(descbuf); } /** * ice_add_sysctls_mac_pfc_stats - Add sysctls for MAC PFC statistics * @ctx: the sysctl ctx to use * @parent: parent node to add the sysctls under * @stats: the hw ports stat structure to pull values from * * Add global Priority Flow Control MAC statistics sysctls. These are * structured as a node with the PFC statistic, where there are eight * nodes for each traffic class. */ static void ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid *parent, struct ice_hw_port_stats *stats) { struct sysctl_oid_list *parent_list; parent_list = SYSCTL_CHILDREN(parent); ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_rx, "p_xon_recvd", "PFC XON received"); ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_rx, "p_xoff_recvd", "PFC XOFF received"); ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_tx, "p_xon_txd", "PFC XON transmitted"); ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_tx, "p_xoff_txd", "PFC XOFF transmitted"); ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_2_xoff, "p_xon2xoff", "PFC XON to XOFF transitions"); } /** * ice_add_sysctls_mac_stats - Add sysctls for global MAC statistics * @ctx: the sysctl ctx to use * @parent: parent node to add the sysctls under * @stats: the hw ports stat structure to pull values from * * Add global MAC statistics sysctls. */ void ice_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid *parent, struct ice_hw_port_stats *stats) { struct sysctl_oid *mac_node; struct sysctl_oid_list *parent_list, *mac_list; parent_list = SYSCTL_CHILDREN(parent); mac_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "mac", CTLFLAG_RD, NULL, "Mac Hardware Statistics"); mac_list = SYSCTL_CHILDREN(mac_node); /* Add the ethernet statistics common to VSI and MAC */ ice_add_sysctls_eth_stats(ctx, mac_node, &stats->eth); /* Add PFC stats that add per-TC counters */ ice_add_sysctls_mac_pfc_stats(ctx, mac_node, stats); const struct ice_sysctl_info ctls[] = { /* Packet Reception Stats */ {&stats->rx_size_64, "rx_frames_64", "64 byte frames received"}, {&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"}, {&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"}, {&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"}, {&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"}, {&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"}, {&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"}, {&stats->rx_undersize, "rx_undersize", "Undersized packets received"}, {&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"}, {&stats->rx_oversize, "rx_oversized", "Oversized packets received"}, {&stats->rx_jabber, "rx_jabber", "Received Jabber"}, {&stats->rx_len_errors, "rx_length_errors", "Receive Length Errors"}, {&stats->eth.rx_discards, "rx_discards", "Discarded Rx Packets by Port (shortage of storage space)"}, /* Packet Transmission Stats */ {&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"}, {&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"}, {&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"}, {&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"}, {&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"}, {&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"}, {&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"}, {&stats->tx_dropped_link_down, "tx_dropped", "Tx Dropped Due To Link Down"}, /* Flow control */ {&stats->link_xon_tx, "xon_txd", "Link XON transmitted"}, {&stats->link_xon_rx, "xon_recvd", "Link XON received"}, {&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"}, {&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"}, /* Other */ {&stats->crc_errors, "crc_errors", "CRC Errors"}, {&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"}, {&stats->mac_local_faults, "local_faults", "MAC Local Faults"}, {&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"}, /* End */ { 0, 0, 0 } }; const struct ice_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_U64(ctx, mac_list, OID_AUTO, entry->name, CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0, entry->description); entry++; } } /** * ice_configure_misc_interrupts - enable 'other' interrupt causes * @sc: pointer to device private softc * * Enable various "other" interrupt causes, and associate them to interrupt 0, * which is our administrative interrupt. */ void ice_configure_misc_interrupts(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; u32 val; /* Read the OICR register to clear it */ rd32(hw, PFINT_OICR); /* Enable useful "other" interrupt causes */ val = (PFINT_OICR_ECC_ERR_M | PFINT_OICR_MAL_DETECT_M | PFINT_OICR_GRST_M | PFINT_OICR_PCI_EXCEPTION_M | PFINT_OICR_VFLR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_CRITERR_M); wr32(hw, PFINT_OICR_ENA, val); /* Note that since we're using MSI-X index 0, and ITR index 0, we do * not explicitly program them when writing to the PFINT_*_CTL * registers. Nevertheless, these writes are associating the * interrupts with the ITR 0 vector */ /* Associate the OICR interrupt with ITR 0, and enable it */ wr32(hw, PFINT_OICR_CTL, PFINT_OICR_CTL_CAUSE_ENA_M); /* Associate the Mailbox interrupt with ITR 0, and enable it */ wr32(hw, PFINT_MBX_CTL, PFINT_MBX_CTL_CAUSE_ENA_M); /* Associate the AdminQ interrupt with ITR 0, and enable it */ wr32(hw, PFINT_FW_CTL, PFINT_FW_CTL_CAUSE_ENA_M); } /** * ice_filter_is_mcast - Check if info is a multicast filter * @vsi: vsi structure addresses are targeted towards * @info: filter info * * @returns true if the provided info is a multicast filter, and false * otherwise. */ static bool ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info) { const u8 *addr = info->l_data.mac.mac_addr; /* * Check if this info matches a multicast filter added by * ice_add_mac_to_list */ if ((info->flag == ICE_FLTR_TX) && (info->src_id == ICE_SRC_ID_VSI) && (info->lkup_type == ICE_SW_LKUP_MAC) && (info->vsi_handle == vsi->idx) && ETHER_IS_MULTICAST(addr) && !ETHER_IS_BROADCAST(addr)) return true; return false; } /** * @struct ice_mcast_sync_data * @brief data used by ice_sync_one_mcast_filter function * * Structure used to store data needed for processing by the * ice_sync_one_mcast_filter. This structure contains a linked list of filters * to be added, an error indication, and a pointer to the device softc. */ struct ice_mcast_sync_data { struct ice_list_head add_list; struct ice_softc *sc; int err; }; /** * ice_sync_one_mcast_filter - Check if we need to program the filter * @p: void pointer to algorithm data * @sdl: link level socket address * @count: unused count value * * Called by if_foreach_llmaddr to operate on each filter in the ifp filter * list. For the given address, search our internal list to see if we have * found the filter. If not, add it to our list of filters that need to be * programmed. * * @returns (1) if we've actually setup the filter to be added */ static u_int ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl, u_int __unused count) { struct ice_mcast_sync_data *data = (struct ice_mcast_sync_data *)p; struct ice_softc *sc = data->sc; struct ice_hw *hw = &sc->hw; struct ice_switch_info *sw = hw->switch_info; const u8 *sdl_addr = (const u8 *)LLADDR(sdl); struct ice_fltr_mgmt_list_entry *itr; struct ice_list_head *rules; int err; rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules; /* * If a previous filter already indicated an error, there is no need * for us to finish processing the rest of the filters. */ if (data->err) return (0); /* See if this filter has already been programmed */ LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) { struct ice_fltr_info *info = &itr->fltr_info; const u8 *addr = info->l_data.mac.mac_addr; /* Only check multicast filters */ if (!ice_filter_is_mcast(&sc->pf_vsi, info)) continue; /* * If this filter matches, mark the internal filter as * "found", and exit. */ if (bcmp(addr, sdl_addr, ETHER_ADDR_LEN) == 0) { itr->marker = ICE_FLTR_FOUND; return (1); } } /* * If we failed to locate the filter in our internal list, we need to * place it into our add list. */ err = ice_add_mac_to_list(&sc->pf_vsi, &data->add_list, sdl_addr, ICE_FWD_TO_VSI); if (err) { device_printf(sc->dev, "Failed to place MAC %6D onto add list, err %s\n", sdl_addr, ":", ice_err_str(err)); data->err = err; return (0); } return (1); } /** * ice_sync_multicast_filters - Synchronize OS and internal filter list * @sc: device private structure * * Called in response to SIOCDELMULTI to synchronize the operating system * multicast address list with the internal list of filters programmed to * firmware. * * Works in one phase to find added and deleted filters using a marker bit on * the internal list. * * First, a loop over the internal list clears the marker bit. Second, for * each filter in the ifp list is checked. If we find it in the internal list, * the marker bit is set. Otherwise, the filter is added to the add list. * Third, a loop over the internal list determines if any filters have not * been found. Each of these is added to the delete list. Finally, the add and * delete lists are programmed to firmware to update the filters. * * @returns zero on success or an integer error code on failure. */ int ice_sync_multicast_filters(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; struct ice_switch_info *sw = hw->switch_info; struct ice_fltr_mgmt_list_entry *itr; struct ice_mcast_sync_data data = {}; struct ice_list_head *rules, remove_list; enum ice_status status; int err = 0; INIT_LIST_HEAD(&data.add_list); INIT_LIST_HEAD(&remove_list); data.sc = sc; data.err = 0; rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules; /* Acquire the lock for the entire duration */ ice_acquire_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock); /* (1) Reset the marker state for all filters */ LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) itr->marker = ICE_FLTR_NOT_FOUND; /* (2) determine which filters need to be added and removed */ if_foreach_llmaddr(sc->ifp, ice_sync_one_mcast_filter, (void *)&data); if (data.err) { /* ice_sync_one_mcast_filter already prints an error */ err = data.err; ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock); goto free_filter_lists; } LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) { struct ice_fltr_info *info = &itr->fltr_info; const u8 *addr = info->l_data.mac.mac_addr; /* Only check multicast filters */ if (!ice_filter_is_mcast(&sc->pf_vsi, info)) continue; /* * If the filter is not marked as found, then it must no * longer be in the ifp address list, so we need to remove it. */ if (itr->marker == ICE_FLTR_NOT_FOUND) { err = ice_add_mac_to_list(&sc->pf_vsi, &remove_list, addr, ICE_FWD_TO_VSI); if (err) { device_printf(sc->dev, "Failed to place MAC %6D onto remove list, err %s\n", addr, ":", ice_err_str(err)); ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock); goto free_filter_lists; } } } ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock); status = ice_add_mac(hw, &data.add_list); if (status) { device_printf(sc->dev, "Could not add new MAC filters, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (EIO); goto free_filter_lists; } status = ice_remove_mac(hw, &remove_list); if (status) { device_printf(sc->dev, "Could not remove old MAC filters, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (EIO); goto free_filter_lists; } free_filter_lists: ice_free_fltr_list(&data.add_list); ice_free_fltr_list(&remove_list); return (err); } /** * ice_add_vlan_hw_filter - Add a VLAN filter for a given VSI * @vsi: The VSI to add the filter for * @vid: VLAN to add * * Programs a HW filter so that the given VSI will receive the specified VLAN. */ enum ice_status ice_add_vlan_hw_filter(struct ice_vsi *vsi, u16 vid) { struct ice_hw *hw = &vsi->sc->hw; struct ice_list_head vlan_list; struct ice_fltr_list_entry vlan_entry; INIT_LIST_HEAD(&vlan_list); memset(&vlan_entry, 0, sizeof(vlan_entry)); vlan_entry.fltr_info.lkup_type = ICE_SW_LKUP_VLAN; vlan_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI; vlan_entry.fltr_info.flag = ICE_FLTR_TX; vlan_entry.fltr_info.src_id = ICE_SRC_ID_VSI; vlan_entry.fltr_info.vsi_handle = vsi->idx; vlan_entry.fltr_info.l_data.vlan.vlan_id = vid; LIST_ADD(&vlan_entry.list_entry, &vlan_list); return ice_add_vlan(hw, &vlan_list); } /** * ice_remove_vlan_hw_filter - Remove a VLAN filter for a given VSI * @vsi: The VSI to add the filter for * @vid: VLAN to remove * * Removes a previously programmed HW filter for the specified VSI. */ enum ice_status ice_remove_vlan_hw_filter(struct ice_vsi *vsi, u16 vid) { struct ice_hw *hw = &vsi->sc->hw; struct ice_list_head vlan_list; struct ice_fltr_list_entry vlan_entry; INIT_LIST_HEAD(&vlan_list); memset(&vlan_entry, 0, sizeof(vlan_entry)); vlan_entry.fltr_info.lkup_type = ICE_SW_LKUP_VLAN; vlan_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI; vlan_entry.fltr_info.flag = ICE_FLTR_TX; vlan_entry.fltr_info.src_id = ICE_SRC_ID_VSI; vlan_entry.fltr_info.vsi_handle = vsi->idx; vlan_entry.fltr_info.l_data.vlan.vlan_id = vid; LIST_ADD(&vlan_entry.list_entry, &vlan_list); return ice_remove_vlan(hw, &vlan_list); } #define ICE_SYSCTL_HELP_RX_ITR \ "\nControl Rx interrupt throttle rate." \ "\n\t0-8160 - sets interrupt rate in usecs" \ "\n\t -1 - reset the Rx itr to default" /** * ice_sysctl_rx_itr - Display or change the Rx ITR for a VSI * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the current Rx ITR value * on write: Sets the Rx ITR value, reconfiguring device if it is up */ static int ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS) { struct ice_vsi *vsi = (struct ice_vsi *)arg1; struct ice_softc *sc = vsi->sc; int increment, ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); ret = sysctl_handle_16(oidp, &vsi->rx_itr, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); if (vsi->rx_itr < 0) vsi->rx_itr = ICE_DFLT_RX_ITR; if (vsi->rx_itr > ICE_ITR_MAX) vsi->rx_itr = ICE_ITR_MAX; /* Assume 2usec increment if it hasn't been loaded yet */ increment = sc->hw.itr_gran ? : 2; /* We need to round the value to the hardware's ITR granularity */ vsi->rx_itr = (vsi->rx_itr / increment ) * increment; /* If the driver has finished initializing, then we need to reprogram * the ITR registers now. Otherwise, they will be programmed during * driver initialization. */ if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED)) ice_configure_rx_itr(vsi); return (0); } #define ICE_SYSCTL_HELP_TX_ITR \ "\nControl Tx interrupt throttle rate." \ "\n\t0-8160 - sets interrupt rate in usecs" \ "\n\t -1 - reset the Tx itr to default" /** * ice_sysctl_tx_itr - Display or change the Tx ITR for a VSI * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the current Tx ITR value * on write: Sets the Tx ITR value, reconfiguring device if it is up */ static int ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS) { struct ice_vsi *vsi = (struct ice_vsi *)arg1; struct ice_softc *sc = vsi->sc; int increment, ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); ret = sysctl_handle_16(oidp, &vsi->tx_itr, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); /* Allow configuring a negative value to reset to the default */ if (vsi->tx_itr < 0) vsi->tx_itr = ICE_DFLT_TX_ITR; if (vsi->tx_itr > ICE_ITR_MAX) vsi->tx_itr = ICE_ITR_MAX; /* Assume 2usec increment if it hasn't been loaded yet */ increment = sc->hw.itr_gran ? : 2; /* We need to round the value to the hardware's ITR granularity */ vsi->tx_itr = (vsi->tx_itr / increment ) * increment; /* If the driver has finished initializing, then we need to reprogram * the ITR registers now. Otherwise, they will be programmed during * driver initialization. */ if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED)) ice_configure_tx_itr(vsi); return (0); } /** * ice_add_vsi_tunables - Add tunables and nodes for a VSI * @vsi: pointer to VSI structure * @parent: parent node to add the tunables under * * Create a sysctl context for the VSI, so that sysctls for the VSI can be * dynamically removed upon VSI removal. * * Add various tunables and set up the basic node structure for the VSI. Must * be called *prior* to ice_add_vsi_sysctls. It should be called as soon as * possible after the VSI memory is initialized. * * VSI specific sysctls with CTLFLAG_TUN should be initialized here so that * their values can be read from loader.conf prior to their first use in the * driver. */ void ice_add_vsi_tunables(struct ice_vsi *vsi, struct sysctl_oid *parent) { struct sysctl_oid_list *vsi_list; char vsi_name[32], vsi_desc[32]; struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent); /* Initialize the sysctl context for this VSI */ sysctl_ctx_init(&vsi->ctx); /* Add a node to collect this VSI's statistics together */ snprintf(vsi_name, sizeof(vsi_name), "%u", vsi->idx); snprintf(vsi_desc, sizeof(vsi_desc), "VSI %u", vsi->idx); vsi->vsi_node = SYSCTL_ADD_NODE(&vsi->ctx, parent_list, OID_AUTO, vsi_name, CTLFLAG_RD, NULL, vsi_desc); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); vsi->rx_itr = ICE_DFLT_TX_ITR; SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "rx_itr", CTLTYPE_S16 | CTLFLAG_RWTUN, vsi, 0, ice_sysctl_rx_itr, "S", ICE_SYSCTL_HELP_RX_ITR); vsi->tx_itr = ICE_DFLT_TX_ITR; SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "tx_itr", CTLTYPE_S16 | CTLFLAG_RWTUN, vsi, 0, ice_sysctl_tx_itr, "S", ICE_SYSCTL_HELP_TX_ITR); } /** * ice_del_vsi_sysctl_ctx - Delete the sysctl context(s) of a VSI * @vsi: the VSI to remove contexts for * * Free the context for the VSI sysctls. This includes the main context, as * well as the per-queue sysctls. */ void ice_del_vsi_sysctl_ctx(struct ice_vsi *vsi) { device_t dev = vsi->sc->dev; int err; if (vsi->vsi_node) { err = sysctl_ctx_free(&vsi->ctx); if (err) device_printf(dev, "failed to free VSI %d sysctl context, err %s\n", vsi->idx, ice_err_str(err)); vsi->vsi_node = NULL; } } /** * ice_add_device_tunables - Add early tunable sysctls and sysctl nodes * @sc: device private structure * * Add per-device dynamic tunable sysctls, and setup the general sysctl trees * for re-use by ice_add_device_sysctls. * * In order for the sysctl fields to be initialized before use, this function * should be called as early as possible during attach activities. * * Any non-global sysctl marked as CTLFLAG_TUN should likely be initialized * here in this function, rather than later in ice_add_device_sysctls. * * To make things easier, this function is also expected to setup the various * sysctl nodes in addition to tunables so that other sysctls which can't be * initialized early can hook into the same nodes. */ void ice_add_device_tunables(struct ice_softc *sc) { device_t dev = sc->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); sc->enable_health_events = ice_enable_health_events; SYSCTL_ADD_BOOL(ctx, ctx_list, OID_AUTO, "enable_health_events", CTLFLAG_RDTUN, &sc->enable_health_events, 0, "Enable FW health event reporting for this PF"); /* Add a node to track VSI sysctls. Keep track of the node in the * softc so that we can hook other sysctls into it later. This * includes both the VSI statistics, as well as potentially dynamic * VSIs in the future. */ sc->vsi_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "vsi", CTLFLAG_RD, NULL, "VSI Configuration and Statistics"); /* Add debug tunables */ ice_add_debug_tunables(sc); } /** * ice_sysctl_dump_mac_filters - Dump a list of all HW MAC Filters * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for "mac_filters" sysctl to dump the programmed MAC filters. */ static int ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct ice_switch_info *sw = hw->switch_info; struct ice_fltr_mgmt_list_entry *fm_entry; struct ice_list_head *rule_head; struct ice_lock *rule_lock; struct ice_fltr_info *fi; struct sbuf *sbuf; int ret; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Wire the old buffer so we can take a non-sleepable lock */ ret = sysctl_wire_old_buffer(req, 0); if (ret) return (ret); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock; rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules; sbuf_printf(sbuf, "MAC Filter List"); ice_acquire_lock(rule_lock); LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) { fi = &fm_entry->fltr_info; sbuf_printf(sbuf, "\nmac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %d", fi->l_data.mac.mac_addr, ":", fi->vsi_handle, ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id); /* if we have a vsi_list_info, print some information about that */ if (fm_entry->vsi_list_info) { sbuf_printf(sbuf, ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d", fm_entry->vsi_count, fm_entry->vsi_list_info->vsi_list_id, fm_entry->vsi_list_info->ref_cnt); } } ice_release_lock(rule_lock); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_dump_vlan_filters - Dump a list of all HW VLAN Filters * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for "vlan_filters" sysctl to dump the programmed VLAN filters. */ static int ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct ice_switch_info *sw = hw->switch_info; struct ice_fltr_mgmt_list_entry *fm_entry; struct ice_list_head *rule_head; struct ice_lock *rule_lock; struct ice_fltr_info *fi; struct sbuf *sbuf; int ret; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Wire the old buffer so we can take a non-sleepable lock */ ret = sysctl_wire_old_buffer(req, 0); if (ret) return (ret); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock; rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules; sbuf_printf(sbuf, "VLAN Filter List"); ice_acquire_lock(rule_lock); LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) { fi = &fm_entry->fltr_info; sbuf_printf(sbuf, "\nvlan_id = %4d, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d", fi->l_data.vlan.vlan_id, fi->vsi_handle, ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id); /* if we have a vsi_list_info, print some information about that */ if (fm_entry->vsi_list_info) { sbuf_printf(sbuf, ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d", fm_entry->vsi_count, fm_entry->vsi_list_info->vsi_list_id, fm_entry->vsi_list_info->ref_cnt); } } ice_release_lock(rule_lock); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_dump_ethertype_filters - Dump a list of all HW Ethertype filters * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for "ethertype_filters" sysctl to dump the programmed Ethertype * filters. */ static int ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct ice_switch_info *sw = hw->switch_info; struct ice_fltr_mgmt_list_entry *fm_entry; struct ice_list_head *rule_head; struct ice_lock *rule_lock; struct ice_fltr_info *fi; struct sbuf *sbuf; int ret; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Wire the old buffer so we can take a non-sleepable lock */ ret = sysctl_wire_old_buffer(req, 0); if (ret) return (ret); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rule_lock; rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rules; sbuf_printf(sbuf, "Ethertype Filter List"); ice_acquire_lock(rule_lock); LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) { fi = &fm_entry->fltr_info; sbuf_printf(sbuf, "\nethertype = 0x%04x, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d", fi->l_data.ethertype_mac.ethertype, fi->vsi_handle, ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id); /* if we have a vsi_list_info, print some information about that */ if (fm_entry->vsi_list_info) { sbuf_printf(sbuf, ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d", fm_entry->vsi_count, fm_entry->vsi_list_info->vsi_list_id, fm_entry->vsi_list_info->ref_cnt); } } ice_release_lock(rule_lock); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_dump_ethertype_mac_filters - Dump a list of all HW Ethertype/MAC filters * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for "ethertype_mac_filters" sysctl to dump the programmed * Ethertype/MAC filters. */ static int ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct ice_switch_info *sw = hw->switch_info; struct ice_fltr_mgmt_list_entry *fm_entry; struct ice_list_head *rule_head; struct ice_lock *rule_lock; struct ice_fltr_info *fi; struct sbuf *sbuf; int ret; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Wire the old buffer so we can take a non-sleepable lock */ ret = sysctl_wire_old_buffer(req, 0); if (ret) return (ret); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rule_lock; rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rules; sbuf_printf(sbuf, "Ethertype/MAC Filter List"); ice_acquire_lock(rule_lock); LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) { fi = &fm_entry->fltr_info; sbuf_printf(sbuf, "\nethertype = 0x%04x, mac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d", fi->l_data.ethertype_mac.ethertype, fi->l_data.ethertype_mac.mac_addr, ":", fi->vsi_handle, ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id); /* if we have a vsi_list_info, print some information about that */ if (fm_entry->vsi_list_info) { sbuf_printf(sbuf, ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d", fm_entry->vsi_count, fm_entry->vsi_list_info->vsi_list_id, fm_entry->vsi_list_info->ref_cnt); } } ice_release_lock(rule_lock); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_dump_state_flags - Dump device driver state flags * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for "state" sysctl to display currently set driver state flags. */ static int ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct sbuf *sbuf; u32 copied_state; unsigned int i; bool at_least_one = false; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Make a copy of the state to ensure we display coherent values */ copied_state = atomic_load_acq_32(&sc->state); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); /* Add the string for each set state to the sbuf */ for (i = 0; i < 32; i++) { if (copied_state & BIT(i)) { const char *str = ice_state_to_str((enum ice_state)i); at_least_one = true; if (str) sbuf_printf(sbuf, "\n%s", str); else sbuf_printf(sbuf, "\nBIT(%u)", i); } } if (!at_least_one) sbuf_printf(sbuf, "Nothing set"); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_add_debug_tunables - Add tunables helpful for debugging the device driver * @sc: device private structure * * Add sysctl tunable values related to debugging the device driver. For now, * this means a tunable to set the debug mask early during driver load. * * The debug node will be marked CTLFLAG_SKIP unless INVARIANTS is defined, so * that in normal kernel builds, these will all be hidden, but on a debug * kernel they will be more easily visible. */ static void ice_add_debug_tunables(struct ice_softc *sc) { struct sysctl_oid_list *debug_list; device_t dev = sc->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); sc->debug_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL, "Debug Sysctls"); debug_list = SYSCTL_CHILDREN(sc->debug_sysctls); SYSCTL_ADD_U64(ctx, debug_list, OID_AUTO, "debug_mask", ICE_CTLFLAG_DEBUG | CTLFLAG_RW | CTLFLAG_TUN, &sc->hw.debug_mask, 0, "Debug message enable/disable mask"); /* Load the default value from the global sysctl first */ sc->enable_tx_fc_filter = ice_enable_tx_fc_filter; SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_fc_filter", ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN, &sc->enable_tx_fc_filter, 0, "Drop Ethertype 0x8808 control frames originating from software on this PF"); /* Load the default value from the global sysctl first */ sc->enable_tx_lldp_filter = ice_enable_tx_lldp_filter; SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_lldp_filter", ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN, &sc->enable_tx_lldp_filter, 0, "Drop Ethertype 0x88cc LLDP frames originating from software on this PF"); ice_add_fw_logging_tunables(sc, sc->debug_sysctls); } #define ICE_SYSCTL_HELP_REQUEST_RESET \ "\nRequest the driver to initiate a reset." \ "\n\tpfr - Initiate a PF reset" \ "\n\tcorer - Initiate a CORE reset" \ "\n\tglobr - Initiate a GLOBAL reset" /** * @var rl_sysctl_ticks * @brief timestamp for latest reset request sysctl call * * Helps rate-limit the call to the sysctl which resets the device */ int rl_sysctl_ticks = 0; /** * ice_sysctl_request_reset - Request that the driver initiate a reset * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for "request_reset" sysctl to request that the driver initiate * a reset. Expects to be passed one of the following strings * * "pfr" - Initiate a PF reset * "corer" - Initiate a CORE reset * "globr" - Initiate a Global reset */ static int ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; enum ice_status status; enum ice_reset_req reset_type = ICE_RESET_INVAL; const char *reset_message; int ret; /* Buffer to store the requested reset string. Must contain enough * space to store the largest expected reset string, which currently * means 6 bytes of space. */ char reset[6] = ""; UNREFERENCED_PARAMETER(arg2); ret = priv_check(curthread, PRIV_DRIVER); if (ret) return (ret); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Read in the requested reset type. */ ret = sysctl_handle_string(oidp, reset, sizeof(reset), req); if ((ret) || (req->newptr == NULL)) return (ret); if (strcmp(reset, "pfr") == 0) { reset_message = "Requesting a PF reset"; reset_type = ICE_RESET_PFR; } else if (strcmp(reset, "corer") == 0) { reset_message = "Initiating a CORE reset"; reset_type = ICE_RESET_CORER; } else if (strcmp(reset, "globr") == 0) { reset_message = "Initiating a GLOBAL reset"; reset_type = ICE_RESET_GLOBR; } else if (strcmp(reset, "empr") == 0) { device_printf(sc->dev, "Triggering an EMP reset via software is not currently supported\n"); return (EOPNOTSUPP); } if (reset_type == ICE_RESET_INVAL) { device_printf(sc->dev, "%s is not a valid reset request\n", reset); return (EINVAL); } /* * Rate-limit the frequency at which this function is called. * Assuming this is called successfully once, typically, * everything should be handled within the allotted time frame. * However, in the odd setup situations, we've also put in * guards for when the reset has finished, but we're in the * process of rebuilding. And instead of queueing an intent, * simply error out and let the caller retry, if so desired. */ if (TICKS_2_MSEC(ticks - rl_sysctl_ticks) < 500) { device_printf(sc->dev, "Call frequency too high. Operation aborted.\n"); return (EBUSY); } rl_sysctl_ticks = ticks; if (TICKS_2_MSEC(ticks - sc->rebuild_ticks) < 100) { device_printf(sc->dev, "Device rebuilding. Operation aborted.\n"); return (EBUSY); } if (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) { device_printf(sc->dev, "Device in reset. Operation aborted.\n"); return (EBUSY); } device_printf(sc->dev, "%s\n", reset_message); /* Initiate the PF reset during the admin status task */ if (reset_type == ICE_RESET_PFR) { ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); return (0); } /* * Other types of resets including CORE and GLOBAL resets trigger an * interrupt on all PFs. Initiate the reset now. Preparation and * rebuild logic will be handled by the admin status task. */ status = ice_reset(hw, reset_type); /* * Resets can take a long time and we still don't want another call * to this function before we settle down. */ rl_sysctl_ticks = ticks; if (status) { device_printf(sc->dev, "failed to initiate device reset, err %s\n", ice_status_str(status)); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); return (EFAULT); } return (0); } /** * ice_add_debug_sysctls - Add sysctls helpful for debugging the device driver * @sc: device private structure * * Add sysctls related to debugging the device driver. Generally these should * simply be sysctls which dump internal driver state, to aid in understanding * what the driver is doing. */ static void ice_add_debug_sysctls(struct ice_softc *sc) { struct sysctl_oid *sw_node; struct sysctl_oid_list *debug_list, *sw_list; device_t dev = sc->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); debug_list = SYSCTL_CHILDREN(sc->debug_sysctls); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "request_reset", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_WR, sc, 0, ice_sysctl_request_reset, "A", ICE_SYSCTL_HELP_REQUEST_RESET); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "pfr_count", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->soft_stats.pfr_count, 0, "# of PF resets handled"); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "corer_count", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->soft_stats.corer_count, 0, "# of CORE resets handled"); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "globr_count", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->soft_stats.globr_count, 0, "# of Global resets handled"); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "empr_count", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->soft_stats.empr_count, 0, "# of EMP resets handled"); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "tx_mdd_count", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->soft_stats.tx_mdd_count, 0, "# of Tx MDD events detected"); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "rx_mdd_count", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->soft_stats.rx_mdd_count, 0, "# of Rx MDD events detected"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "state", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_state_flags, "A", "Driver State Flags"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_low", ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0, ice_sysctl_phy_type_low, "QU", "PHY type Low from Get PHY Caps/Set PHY Cfg"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_high", ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0, ice_sysctl_phy_type_high, "QU", "PHY type High from Get PHY Caps/Set PHY Cfg"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_sw_caps", ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0, ice_sysctl_phy_sw_caps, "", "Get PHY Capabilities (Software configuration)"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_nvm_caps", ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0, ice_sysctl_phy_nvm_caps, "", "Get PHY Capabilities (NVM configuration)"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_topo_caps", ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0, ice_sysctl_phy_topo_caps, "", "Get PHY Capabilities (Topology configuration)"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_link_status", ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0, ice_sysctl_phy_link_status, "", "Get PHY Link Status"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_diag_data", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_read_i2c_diag_data, "A", "Dump selected diagnostic data from FW"); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "fw_build", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->hw.fw_build, 0, "FW Build ID"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "os_ddp_version", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_os_pkg_version, "A", "DDP package name and version found in ice_ddp"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "cur_lldp_persist_status", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_fw_cur_lldp_persist_status, "A", "Current LLDP persistent status"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "dflt_lldp_persist_status", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_fw_dflt_lldp_persist_status, "A", "Default LLDP persistent status"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "negotiated_fc", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_negotiated_fc, "A", "Current Negotiated Flow Control mode"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "local_dcbx_cfg", CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_LOCAL, ice_sysctl_dump_dcbx_cfg, "A", "Dumps Local MIB information from firmware"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "remote_dcbx_cfg", CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_REMOTE, ice_sysctl_dump_dcbx_cfg, "A", "Dumps Remote MIB information from firmware"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "pf_vsi_cfg", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_vsi_cfg, "A", "Dumps Selected PF VSI parameters from firmware"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "query_port_ets", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_query_port_ets, "A", "Prints selected output from Query Port ETS AQ command"); sw_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "switch", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL, "Switch Configuration"); sw_list = SYSCTL_CHILDREN(sw_node); SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "mac_filters", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_mac_filters, "A", "MAC Filters"); SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "vlan_filters", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_vlan_filters, "A", "VLAN Filters"); SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_filters", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_ethertype_filters, "A", "Ethertype Filters"); SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_mac_filters", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_ethertype_mac_filters, "A", "Ethertype/MAC Filters"); } /** * ice_vsi_disable_tx - Disable (unconfigure) Tx queues for a VSI * @vsi: the VSI to disable * * Disables the Tx queues associated with this VSI. Essentially the opposite * of ice_cfg_vsi_for_tx. */ int ice_vsi_disable_tx(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; enum ice_status status; u32 *q_teids; u16 *q_ids, *q_handles; size_t q_teids_size, q_ids_size, q_handles_size; int tc, j, buf_idx, err = 0; if (vsi->num_tx_queues > 255) return (ENOSYS); q_teids_size = sizeof(*q_teids) * vsi->num_tx_queues; q_teids = (u32 *)malloc(q_teids_size, M_ICE, M_NOWAIT|M_ZERO); if (!q_teids) return (ENOMEM); q_ids_size = sizeof(*q_ids) * vsi->num_tx_queues; q_ids = (u16 *)malloc(q_ids_size, M_ICE, M_NOWAIT|M_ZERO); if (!q_ids) { err = (ENOMEM); goto free_q_teids; } q_handles_size = sizeof(*q_handles) * vsi->num_tx_queues; q_handles = (u16 *)malloc(q_handles_size, M_ICE, M_NOWAIT|M_ZERO); if (!q_handles) { err = (ENOMEM); goto free_q_ids; } ice_for_each_traffic_class(tc) { buf_idx = 0; for (j = 0; j < vsi->num_tx_queues; j++) { struct ice_tx_queue *txq = &vsi->tx_queues[j]; if (txq->tc != tc) continue; q_ids[buf_idx] = vsi->tx_qmap[j]; q_handles[buf_idx] = txq->q_handle; q_teids[buf_idx] = txq->q_teid; buf_idx++; } /* Skip TC if no queues belong to it */ if (buf_idx == 0) continue; status = ice_dis_vsi_txq(hw->port_info, vsi->idx, tc, buf_idx, q_handles, q_ids, q_teids, ICE_NO_RESET, 0, NULL); if (status == ICE_ERR_DOES_NOT_EXIST) { ; /* Queues have already been disabled, no need to report this as an error */ } else if (status == ICE_ERR_RESET_ONGOING) { device_printf(sc->dev, "Reset in progress. LAN Tx queues already disabled\n"); break; } else if (status) { device_printf(sc->dev, "Failed to disable LAN Tx queues: err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (ENODEV); break; } /* Clear buffers */ memset(q_teids, 0, q_teids_size); memset(q_ids, 0, q_ids_size); memset(q_handles, 0, q_handles_size); } /* free_q_handles: */ free(q_handles, M_ICE); free_q_ids: free(q_ids, M_ICE); free_q_teids: free(q_teids, M_ICE); return err; } /** * ice_vsi_set_rss_params - Set the RSS parameters for the VSI * @vsi: the VSI to configure * * Sets the RSS table size and lookup table type for the VSI based on its * VSI type. */ static void ice_vsi_set_rss_params(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; struct ice_hw_common_caps *cap; cap = &sc->hw.func_caps.common_cap; switch (vsi->type) { case ICE_VSI_PF: /* The PF VSI inherits RSS instance of the PF */ vsi->rss_table_size = cap->rss_table_size; vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF; break; case ICE_VSI_VF: vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE; vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI; break; default: device_printf(sc->dev, "VSI %d: RSS not supported for VSI type %d\n", vsi->idx, vsi->type); break; } } /** * ice_vsi_add_txqs_ctx - Create a sysctl context and node to store txq sysctls * @vsi: The VSI to add the context for * * Creates a sysctl context for storing txq sysctls. Additionally creates * a node rooted at the given VSI's main sysctl node. This context will be * used to store per-txq sysctls which may need to be released during the * driver's lifetime. */ void ice_vsi_add_txqs_ctx(struct ice_vsi *vsi) { struct sysctl_oid_list *vsi_list; sysctl_ctx_init(&vsi->txqs_ctx); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); vsi->txqs_node = SYSCTL_ADD_NODE(&vsi->txqs_ctx, vsi_list, OID_AUTO, "txqs", CTLFLAG_RD, NULL, "Tx Queues"); } /** * ice_vsi_add_rxqs_ctx - Create a sysctl context and node to store rxq sysctls * @vsi: The VSI to add the context for * * Creates a sysctl context for storing rxq sysctls. Additionally creates * a node rooted at the given VSI's main sysctl node. This context will be * used to store per-rxq sysctls which may need to be released during the * driver's lifetime. */ void ice_vsi_add_rxqs_ctx(struct ice_vsi *vsi) { struct sysctl_oid_list *vsi_list; sysctl_ctx_init(&vsi->rxqs_ctx); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); vsi->rxqs_node = SYSCTL_ADD_NODE(&vsi->rxqs_ctx, vsi_list, OID_AUTO, "rxqs", CTLFLAG_RD, NULL, "Rx Queues"); } /** * ice_vsi_del_txqs_ctx - Delete the Tx queue sysctl context for this VSI * @vsi: The VSI to delete from * * Frees the txq sysctl context created for storing the per-queue Tx sysctls. * Must be called prior to freeing the Tx queue memory, in order to avoid * having sysctls point at stale memory. */ void ice_vsi_del_txqs_ctx(struct ice_vsi *vsi) { device_t dev = vsi->sc->dev; int err; if (vsi->txqs_node) { err = sysctl_ctx_free(&vsi->txqs_ctx); if (err) device_printf(dev, "failed to free VSI %d txqs_ctx, err %s\n", vsi->idx, ice_err_str(err)); vsi->txqs_node = NULL; } } /** * ice_vsi_del_rxqs_ctx - Delete the Rx queue sysctl context for this VSI * @vsi: The VSI to delete from * * Frees the rxq sysctl context created for storing the per-queue Rx sysctls. * Must be called prior to freeing the Rx queue memory, in order to avoid * having sysctls point at stale memory. */ void ice_vsi_del_rxqs_ctx(struct ice_vsi *vsi) { device_t dev = vsi->sc->dev; int err; if (vsi->rxqs_node) { err = sysctl_ctx_free(&vsi->rxqs_ctx); if (err) device_printf(dev, "failed to free VSI %d rxqs_ctx, err %s\n", vsi->idx, ice_err_str(err)); vsi->rxqs_node = NULL; } } /** * ice_add_txq_sysctls - Add per-queue sysctls for a Tx queue * @txq: pointer to the Tx queue * * Add per-queue sysctls for a given Tx queue. Can't be called during * ice_add_vsi_sysctls, since the queue memory has not yet been setup. */ void ice_add_txq_sysctls(struct ice_tx_queue *txq) { struct ice_vsi *vsi = txq->vsi; struct sysctl_ctx_list *ctx = &vsi->txqs_ctx; struct sysctl_oid_list *txqs_list, *this_txq_list; struct sysctl_oid *txq_node; char txq_name[32], txq_desc[32]; const struct ice_sysctl_info ctls[] = { { &txq->stats.tx_packets, "tx_packets", "Queue Packets Transmitted" }, { &txq->stats.tx_bytes, "tx_bytes", "Queue Bytes Transmitted" }, { &txq->stats.mss_too_small, "mss_too_small", "TSO sends with an MSS less than 64" }, { 0, 0, 0 } }; const struct ice_sysctl_info *entry = ctls; txqs_list = SYSCTL_CHILDREN(vsi->txqs_node); snprintf(txq_name, sizeof(txq_name), "%u", txq->me); snprintf(txq_desc, sizeof(txq_desc), "Tx Queue %u", txq->me); txq_node = SYSCTL_ADD_NODE(ctx, txqs_list, OID_AUTO, txq_name, CTLFLAG_RD, NULL, txq_desc); this_txq_list = SYSCTL_CHILDREN(txq_node); /* Add the Tx queue statistics */ while (entry->stat != 0) { SYSCTL_ADD_U64(ctx, this_txq_list, OID_AUTO, entry->name, CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0, entry->description); entry++; } SYSCTL_ADD_U8(ctx, this_txq_list, OID_AUTO, "tc", CTLFLAG_RD, &txq->tc, 0, "Traffic Class that Queue belongs to"); } /** * ice_add_rxq_sysctls - Add per-queue sysctls for an Rx queue * @rxq: pointer to the Rx queue * * Add per-queue sysctls for a given Rx queue. Can't be called during * ice_add_vsi_sysctls, since the queue memory has not yet been setup. */ void ice_add_rxq_sysctls(struct ice_rx_queue *rxq) { struct ice_vsi *vsi = rxq->vsi; struct sysctl_ctx_list *ctx = &vsi->rxqs_ctx; struct sysctl_oid_list *rxqs_list, *this_rxq_list; struct sysctl_oid *rxq_node; char rxq_name[32], rxq_desc[32]; const struct ice_sysctl_info ctls[] = { { &rxq->stats.rx_packets, "rx_packets", "Queue Packets Received" }, { &rxq->stats.rx_bytes, "rx_bytes", "Queue Bytes Received" }, { &rxq->stats.desc_errs, "rx_desc_errs", "Queue Rx Descriptor Errors" }, { 0, 0, 0 } }; const struct ice_sysctl_info *entry = ctls; rxqs_list = SYSCTL_CHILDREN(vsi->rxqs_node); snprintf(rxq_name, sizeof(rxq_name), "%u", rxq->me); snprintf(rxq_desc, sizeof(rxq_desc), "Rx Queue %u", rxq->me); rxq_node = SYSCTL_ADD_NODE(ctx, rxqs_list, OID_AUTO, rxq_name, CTLFLAG_RD, NULL, rxq_desc); this_rxq_list = SYSCTL_CHILDREN(rxq_node); /* Add the Rx queue statistics */ while (entry->stat != 0) { SYSCTL_ADD_U64(ctx, this_rxq_list, OID_AUTO, entry->name, CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0, entry->description); entry++; } SYSCTL_ADD_U8(ctx, this_rxq_list, OID_AUTO, "tc", CTLFLAG_RD, &rxq->tc, 0, "Traffic Class that Queue belongs to"); } /** * ice_get_default_rss_key - Obtain a default RSS key * @seed: storage for the RSS key data * * Copies a pre-generated RSS key into the seed memory. The seed pointer must * point to a block of memory that is at least 40 bytes in size. * * The key isn't randomly generated each time this function is called because * that makes the RSS key change every time we reconfigure RSS. This does mean * that we're hard coding a possibly 'well known' key. We might want to * investigate randomly generating this key once during the first call. */ static void ice_get_default_rss_key(u8 *seed) { const u8 default_seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE] = { 0x39, 0xed, 0xff, 0x4d, 0x43, 0x58, 0x42, 0xc3, 0x5f, 0xb8, 0xa5, 0x32, 0x95, 0x65, 0x81, 0xcd, 0x36, 0x79, 0x71, 0x97, 0xde, 0xa4, 0x41, 0x40, 0x6f, 0x27, 0xe9, 0x81, 0x13, 0xa0, 0x95, 0x93, 0x5b, 0x1e, 0x9d, 0x27, 0x9d, 0x24, 0x84, 0xb5, }; bcopy(default_seed, seed, ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE); } /** * ice_set_rss_key - Configure a given VSI with the default RSS key * @vsi: the VSI to configure * * Program the hardware RSS key. We use rss_getkey to grab the kernel RSS key. * If the kernel RSS interface is not available, this will fall back to our * pre-generated hash seed from ice_get_default_rss_key(). */ static int ice_set_rss_key(struct ice_vsi *vsi) { struct ice_aqc_get_set_rss_keys keydata = { .standard_rss_key = {0} }; struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; enum ice_status status; /* * If the RSS kernel interface is disabled, this will return the * default RSS key above. */ rss_getkey(keydata.standard_rss_key); status = ice_aq_set_rss_key(hw, vsi->idx, &keydata); if (status) { device_printf(sc->dev, "ice_aq_set_rss_key status %s, error %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } return (0); } /** * ice_set_rss_flow_flds - Program the RSS hash flows after package init * @vsi: the VSI to configure * * If the package file is initialized, the default RSS flows are reset. We * need to reprogram the expected hash configuration. We'll use * rss_gethashconfig() to determine which flows to enable. If RSS kernel * support is not enabled, this macro will fall back to suitable defaults. */ static void ice_set_rss_flow_flds(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; struct ice_rss_hash_cfg rss_cfg = { 0, 0, ICE_RSS_ANY_HEADERS, false }; device_t dev = sc->dev; enum ice_status status; u_int rss_hash_config; rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) { rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4; rss_cfg.hash_flds = ICE_FLOW_HASH_IPV4; status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg); if (status) device_printf(dev, "ice_add_rss_cfg on VSI %d failed for ipv4 flow, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) { rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP; rss_cfg.hash_flds = ICE_HASH_TCP_IPV4; status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg); if (status) device_printf(dev, "ice_add_rss_cfg on VSI %d failed for tcp4 flow, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) { rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP; rss_cfg.hash_flds = ICE_HASH_UDP_IPV4; status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg); if (status) device_printf(dev, "ice_add_rss_cfg on VSI %d failed for udp4 flow, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } if (rss_hash_config & (RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_IPV6_EX)) { rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6; rss_cfg.hash_flds = ICE_FLOW_HASH_IPV6; status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg); if (status) device_printf(dev, "ice_add_rss_cfg on VSI %d failed for ipv6 flow, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) { rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP; rss_cfg.hash_flds = ICE_HASH_TCP_IPV6; status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg); if (status) device_printf(dev, "ice_add_rss_cfg on VSI %d failed for tcp6 flow, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) { rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP; rss_cfg.hash_flds = ICE_HASH_UDP_IPV6; status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg); if (status) device_printf(dev, "ice_add_rss_cfg on VSI %d failed for udp6 flow, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } /* Warn about RSS hash types which are not supported */ /* coverity[dead_error_condition] */ if (rss_hash_config & ~ICE_DEFAULT_RSS_HASH_CONFIG) { device_printf(dev, "ice_add_rss_cfg on VSI %d could not configure every requested hash type\n", vsi->idx); } } /** * ice_set_rss_lut - Program the RSS lookup table for a VSI * @vsi: the VSI to configure * * Programs the RSS lookup table for a given VSI. We use * rss_get_indirection_to_bucket which will use the indirection table provided * by the kernel RSS interface when available. If the kernel RSS interface is * not available, we will fall back to a simple round-robin fashion queue * assignment. */ static int ice_set_rss_lut(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; struct ice_aq_get_set_rss_lut_params lut_params; enum ice_status status; int i, err = 0; u8 *lut; lut = (u8 *)malloc(vsi->rss_table_size, M_ICE, M_NOWAIT|M_ZERO); if (!lut) { device_printf(dev, "Failed to allocate RSS lut memory\n"); return (ENOMEM); } /* Populate the LUT with max no. of queues. If the RSS kernel * interface is disabled, this will assign the lookup table in * a simple round robin fashion */ for (i = 0; i < vsi->rss_table_size; i++) { /* XXX: this needs to be changed if num_rx_queues ever counts * more than just the RSS queues */ lut[i] = rss_get_indirection_to_bucket(i) % vsi->num_rx_queues; } lut_params.vsi_handle = vsi->idx; lut_params.lut_size = vsi->rss_table_size; lut_params.lut_type = vsi->rss_lut_type; lut_params.lut = lut; lut_params.global_lut_id = 0; status = ice_aq_set_rss_lut(hw, &lut_params); if (status) { device_printf(dev, "Cannot set RSS lut, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (EIO); } free(lut, M_ICE); return err; } /** * ice_config_rss - Configure RSS for a VSI * @vsi: the VSI to configure * * If FEATURE_RSS is enabled, configures the RSS lookup table and hash key for * a given VSI. */ int ice_config_rss(struct ice_vsi *vsi) { int err; /* Nothing to do, if RSS is not enabled */ if (!ice_is_bit_set(vsi->sc->feat_en, ICE_FEATURE_RSS)) return 0; err = ice_set_rss_key(vsi); if (err) return err; ice_set_rss_flow_flds(vsi); return ice_set_rss_lut(vsi); } /** * ice_log_pkg_init - Log a message about status of DDP initialization * @sc: the device softc pointer * @pkg_status: the status result of ice_copy_and_init_pkg * * Called by ice_load_pkg after an attempt to download the DDP package * contents to the device. Determines whether the download was successful or * not and logs an appropriate message for the system administrator. * * @post if a DDP package was previously downloaded on another port and it * is not compatible with this driver, pkg_status will be updated to reflect * this, and the driver will transition to safe mode. */ void ice_log_pkg_init(struct ice_softc *sc, enum ice_status *pkg_status) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; struct sbuf *active_pkg, *os_pkg; active_pkg = sbuf_new_auto(); ice_active_pkg_version_str(hw, active_pkg); sbuf_finish(active_pkg); os_pkg = sbuf_new_auto(); ice_os_pkg_version_str(hw, os_pkg); sbuf_finish(os_pkg); switch (*pkg_status) { case ICE_SUCCESS: /* The package download AdminQ command returned success because * this download succeeded or ICE_ERR_AQ_NO_WORK since there is * already a package loaded on the device. */ if (hw->pkg_ver.major == hw->active_pkg_ver.major && hw->pkg_ver.minor == hw->active_pkg_ver.minor && hw->pkg_ver.update == hw->active_pkg_ver.update && hw->pkg_ver.draft == hw->active_pkg_ver.draft && !memcmp(hw->pkg_name, hw->active_pkg_name, sizeof(hw->pkg_name))) { switch (hw->pkg_dwnld_status) { case ICE_AQ_RC_OK: device_printf(dev, "The DDP package was successfully loaded: %s.\n", sbuf_data(active_pkg)); break; case ICE_AQ_RC_EEXIST: device_printf(dev, "DDP package already present on device: %s.\n", sbuf_data(active_pkg)); break; default: /* We do not expect this to occur, but the * extra messaging is here in case something * changes in the ice_init_pkg flow. */ device_printf(dev, "DDP package already present on device: %s. An unexpected error occurred, pkg_dwnld_status %s.\n", sbuf_data(active_pkg), ice_aq_str(hw->pkg_dwnld_status)); break; } } else if (pkg_ver_compatible(&hw->active_pkg_ver) == 0) { device_printf(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package %s. The ice_ddp module has package: %s.\n", sbuf_data(active_pkg), sbuf_data(os_pkg)); } else if (pkg_ver_compatible(&hw->active_pkg_ver) > 0) { device_printf(dev, "The device has a DDP package that is higher than the driver supports. The device has package %s. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", sbuf_data(active_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); *pkg_status = ICE_ERR_NOT_SUPPORTED; } else { device_printf(dev, "The device has a DDP package that is lower than the driver supports. The device has package %s. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", sbuf_data(active_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); *pkg_status = ICE_ERR_NOT_SUPPORTED; } break; case ICE_ERR_NOT_SUPPORTED: /* * This assumes that the active_pkg_ver will not be * initialized if the ice_ddp package version is not * supported. */ if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) { /* The ice_ddp version is not supported */ if (pkg_ver_compatible(&hw->pkg_ver) > 0) { device_printf(dev, "The DDP package in the ice_ddp module is higher than the driver supports. The ice_ddp module has package %s. The driver requires version %d.%d.x.x. Please use an updated driver. Entering Safe Mode.\n", sbuf_data(os_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); } else if (pkg_ver_compatible(&hw->pkg_ver) < 0) { device_printf(dev, "The DDP package in the ice_ddp module is lower than the driver supports. The ice_ddp module has package %s. The driver requires version %d.%d.x.x. Please use an updated ice_ddp module. Entering Safe Mode.\n", sbuf_data(os_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); } else { device_printf(dev, "An unknown error (%s aq_err %s) occurred when loading the DDP package. The ice_ddp module has package %s. The device has package %s. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", ice_status_str(*pkg_status), ice_aq_str(hw->pkg_dwnld_status), sbuf_data(os_pkg), sbuf_data(active_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); } } else { if (pkg_ver_compatible(&hw->active_pkg_ver) > 0) { device_printf(dev, "The device has a DDP package that is higher than the driver supports. The device has package %s. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", sbuf_data(active_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); } else if (pkg_ver_compatible(&hw->active_pkg_ver) < 0) { device_printf(dev, "The device has a DDP package that is lower than the driver supports. The device has package %s. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", sbuf_data(active_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); } else { device_printf(dev, "An unknown error (%s aq_err %s) occurred when loading the DDP package. The ice_ddp module has package %s. The device has package %s. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", ice_status_str(*pkg_status), ice_aq_str(hw->pkg_dwnld_status), sbuf_data(os_pkg), sbuf_data(active_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); } } break; case ICE_ERR_CFG: case ICE_ERR_BUF_TOO_SHORT: case ICE_ERR_PARAM: device_printf(dev, "The DDP package in the ice_ddp module is invalid. Entering Safe Mode\n"); break; case ICE_ERR_FW_DDP_MISMATCH: device_printf(dev, "The firmware loaded on the device is not compatible with the DDP package. Please update the device's NVM. Entering safe mode.\n"); break; case ICE_ERR_AQ_ERROR: switch (hw->pkg_dwnld_status) { case ICE_AQ_RC_ENOSEC: case ICE_AQ_RC_EBADSIG: device_printf(dev, "The DDP package in the ice_ddp module cannot be loaded because its signature is not valid. Please use a valid ice_ddp module. Entering Safe Mode.\n"); goto free_sbufs; case ICE_AQ_RC_ESVN: device_printf(dev, "The DDP package in the ice_ddp module could not be loaded because its security revision is too low. Please use an updated ice_ddp module. Entering Safe Mode.\n"); goto free_sbufs; case ICE_AQ_RC_EBADMAN: case ICE_AQ_RC_EBADBUF: device_printf(dev, "An error occurred on the device while loading the DDP package. Entering Safe Mode.\n"); goto free_sbufs; default: break; } /* fall-through */ default: device_printf(dev, "An unknown error (%s aq_err %s) occurred when loading the DDP package. Entering Safe Mode.\n", ice_status_str(*pkg_status), ice_aq_str(hw->pkg_dwnld_status)); break; } free_sbufs: sbuf_delete(active_pkg); sbuf_delete(os_pkg); } /** * ice_load_pkg_file - Load the DDP package file using firmware_get * @sc: device private softc * * Use firmware_get to load the DDP package memory and then request that * firmware download the package contents and program the relevant hardware * bits. * * This function makes a copy of the DDP package memory which is tracked in * the ice_hw structure. The copy will be managed and released by * ice_deinit_hw(). This allows the firmware reference to be immediately * released using firmware_put. */ void ice_load_pkg_file(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; const struct firmware *pkg; pkg = firmware_get("ice_ddp"); if (!pkg) { device_printf(dev, "The DDP package module (ice_ddp) failed to load or could not be found. Entering Safe Mode.\n"); if (cold) device_printf(dev, "The DDP package module cannot be automatically loaded while booting. You may want to specify ice_ddp_load=\"YES\" in your loader.conf\n"); ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap); ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en); return; } /* Copy and download the pkg contents */ status = ice_copy_and_init_pkg(hw, (const u8 *)pkg->data, pkg->datasize); /* Release the firmware reference */ firmware_put(pkg, FIRMWARE_UNLOAD); /* Check the active DDP package version and log a message */ ice_log_pkg_init(sc, &status); /* Place the driver into safe mode */ if (status != ICE_SUCCESS) { ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap); ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en); } } /** * ice_get_ifnet_counter - Retrieve counter value for a given ifnet counter * @vsi: the vsi to retrieve the value for * @counter: the counter type to retrieve * * Returns the value for a given ifnet counter. To do so, we calculate the * value based on the matching hardware statistics. */ uint64_t ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter) { struct ice_hw_port_stats *hs = &vsi->sc->stats.cur; struct ice_eth_stats *es = &vsi->hw_stats.cur; /* For some statistics, especially those related to error flows, we do * not have per-VSI counters. In this case, we just report the global * counters. */ switch (counter) { case IFCOUNTER_IPACKETS: return (es->rx_unicast + es->rx_multicast + es->rx_broadcast); case IFCOUNTER_IERRORS: return (hs->crc_errors + hs->illegal_bytes + hs->mac_local_faults + hs->mac_remote_faults + hs->rx_len_errors + hs->rx_undersize + hs->rx_oversize + hs->rx_fragments + hs->rx_jabber); case IFCOUNTER_OPACKETS: return (es->tx_unicast + es->tx_multicast + es->tx_broadcast); case IFCOUNTER_OERRORS: return (es->tx_errors); case IFCOUNTER_COLLISIONS: return (0); case IFCOUNTER_IBYTES: return (es->rx_bytes); case IFCOUNTER_OBYTES: return (es->tx_bytes); case IFCOUNTER_IMCASTS: return (es->rx_multicast); case IFCOUNTER_OMCASTS: return (es->tx_multicast); case IFCOUNTER_IQDROPS: return (es->rx_discards); case IFCOUNTER_OQDROPS: return (hs->tx_dropped_link_down); case IFCOUNTER_NOPROTO: return (es->rx_unknown_protocol); default: return if_get_counter_default(vsi->sc->ifp, counter); } } /** * ice_save_pci_info - Save PCI configuration fields in HW struct * @hw: the ice_hw struct to save the PCI information in * @dev: the device to get the PCI information from * * This should only be called once, early in the device attach * process. */ void ice_save_pci_info(struct ice_hw *hw, device_t dev) { hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->subsystem_vendor_id = pci_get_subvendor(dev); hw->subsystem_device_id = pci_get_subdevice(dev); hw->revision_id = pci_get_revid(dev); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); } /** * ice_replay_all_vsi_cfg - Replace configuration for all VSIs after reset * @sc: the device softc * * Replace the configuration for each VSI, and then cleanup replay * information. Called after a hardware reset in order to reconfigure the * active VSIs. */ int ice_replay_all_vsi_cfg(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; enum ice_status status; int i; for (i = 0 ; i < sc->num_available_vsi; i++) { struct ice_vsi *vsi = sc->all_vsi[i]; if (!vsi) continue; status = ice_replay_vsi(hw, vsi->idx); if (status) { device_printf(sc->dev, "Failed to replay VSI %d, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } /* Cleanup replay filters after successful reconfiguration */ ice_replay_post(hw); return (0); } /** * ice_clean_vsi_rss_cfg - Cleanup RSS configuration for a given VSI * @vsi: pointer to the VSI structure * * Cleanup the advanced RSS configuration for a given VSI. This is necessary * during driver removal to ensure that all RSS resources are properly * released. * * @remark this function doesn't report an error as it is expected to be * called during driver reset and unload, and there isn't much the driver can * do if freeing RSS resources fails. */ static void ice_clean_vsi_rss_cfg(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; status = ice_rem_vsi_rss_cfg(hw, vsi->idx); if (status) device_printf(dev, "Failed to remove RSS configuration for VSI %d, err %s\n", vsi->idx, ice_status_str(status)); /* Remove this VSI from the RSS list */ ice_rem_vsi_rss_list(hw, vsi->idx); } /** * ice_clean_all_vsi_rss_cfg - Cleanup RSS configuration for all VSIs * @sc: the device softc pointer * * Cleanup the advanced RSS configuration for all VSIs on a given PF * interface. * * @remark This should be called while preparing for a reset, to cleanup stale * RSS configuration for all VSIs. */ void ice_clean_all_vsi_rss_cfg(struct ice_softc *sc) { int i; /* No need to cleanup if RSS is not enabled */ if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS)) return; for (i = 0; i < sc->num_available_vsi; i++) { struct ice_vsi *vsi = sc->all_vsi[i]; if (vsi) ice_clean_vsi_rss_cfg(vsi); } } /** * ice_requested_fec_mode - Return the requested FEC mode as a string * @pi: The port info structure * * Return a string representing the requested FEC mode. */ static const char * ice_requested_fec_mode(struct ice_port_info *pi) { struct ice_aqc_get_phy_caps_data pcaps = { 0 }; enum ice_status status; status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, &pcaps, NULL); if (status) /* Just report unknown if we can't get capabilities */ return "Unknown"; /* Check if RS-FEC has been requested first */ if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ | ICE_AQC_PHY_FEC_25G_RS_544_REQ)) return ice_fec_str(ICE_FEC_RS); /* If RS FEC has not been requested, then check BASE-R */ if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ | ICE_AQC_PHY_FEC_25G_KR_REQ)) return ice_fec_str(ICE_FEC_BASER); return ice_fec_str(ICE_FEC_NONE); } /** * ice_negotiated_fec_mode - Return the negotiated FEC mode as a string * @pi: The port info structure * * Return a string representing the current FEC mode. */ static const char * ice_negotiated_fec_mode(struct ice_port_info *pi) { /* First, check if RS has been requested first */ if (pi->phy.link_info.fec_info & (ICE_AQ_LINK_25G_RS_528_FEC_EN | ICE_AQ_LINK_25G_RS_544_FEC_EN)) return ice_fec_str(ICE_FEC_RS); /* If RS FEC has not been requested, then check BASE-R */ if (pi->phy.link_info.fec_info & ICE_AQ_LINK_25G_KR_FEC_EN) return ice_fec_str(ICE_FEC_BASER); return ice_fec_str(ICE_FEC_NONE); } /** * ice_autoneg_mode - Return string indicating of autoneg completed * @pi: The port info structure * * Return "True" if autonegotiation is completed, "False" otherwise. */ static const char * ice_autoneg_mode(struct ice_port_info *pi) { if (pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) return "True"; else return "False"; } /** * ice_flowcontrol_mode - Return string indicating the Flow Control mode * @pi: The port info structure * * Returns the current Flow Control mode as a string. */ static const char * ice_flowcontrol_mode(struct ice_port_info *pi) { return ice_fc_str(pi->fc.current_mode); } /** * ice_link_up_msg - Log a link up message with associated info * @sc: the device private softc * * Log a link up message with LOG_NOTICE message level. Include information * about the duplex, FEC mode, autonegotiation and flow control. */ void ice_link_up_msg(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; struct ifnet *ifp = sc->ifp; const char *speed, *req_fec, *neg_fec, *autoneg, *flowcontrol; speed = ice_aq_speed_to_str(hw->port_info); req_fec = ice_requested_fec_mode(hw->port_info); neg_fec = ice_negotiated_fec_mode(hw->port_info); autoneg = ice_autoneg_mode(hw->port_info); flowcontrol = ice_flowcontrol_mode(hw->port_info); log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n", - ifp->if_xname, speed, req_fec, neg_fec, autoneg, flowcontrol); + if_name(ifp), speed, req_fec, neg_fec, autoneg, flowcontrol); } /** * ice_update_laa_mac - Update MAC address if Locally Administered * @sc: the device softc * * Update the device MAC address when a Locally Administered Address is * assigned. * * This function does *not* update the MAC filter list itself. Instead, it * should be called after ice_rm_pf_default_mac_filters, so that the previous * address filter will be removed, and before ice_cfg_pf_default_mac_filters, * so that the new address filter will be assigned. */ int ice_update_laa_mac(struct ice_softc *sc) { const u8 *lladdr = (const u8 *)IF_LLADDR(sc->ifp); struct ice_hw *hw = &sc->hw; enum ice_status status; /* If the address is the same, then there is nothing to update */ if (!memcmp(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN)) return (0); /* Reject Multicast addresses */ if (ETHER_IS_MULTICAST(lladdr)) return (EINVAL); status = ice_aq_manage_mac_write(hw, lladdr, ICE_AQC_MAN_MAC_UPDATE_LAA_WOL, NULL); if (status) { device_printf(sc->dev, "Failed to write mac %6D to firmware, err %s aq_err %s\n", lladdr, ":", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EFAULT); } /* Copy the address into place of the LAN address. */ bcopy(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN); return (0); } /** * ice_get_and_print_bus_info - Save (PCI) bus info and print messages * @sc: device softc * * This will potentially print out a warning message if bus bandwidth * is insufficient for full-speed operation. * * This should only be called once, during the attach process, after * hw->port_info has been filled out with port link topology information * (from the Get PHY Capabilities Admin Queue command). */ void ice_get_and_print_bus_info(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; u16 pci_link_status; int offset; pci_find_cap(dev, PCIY_EXPRESS, &offset); pci_link_status = pci_read_config(dev, offset + PCIER_LINK_STA, 2); /* Fill out hw struct with PCIE link status info */ ice_set_pci_link_status_data(hw, pci_link_status); /* Use info to print out bandwidth messages */ ice_print_bus_link_data(dev, hw); if (ice_pcie_bandwidth_check(sc)) { device_printf(dev, "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n"); device_printf(dev, "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n"); } } /** * ice_pcie_bus_speed_to_rate - Convert driver bus speed enum value to * a 64-bit baudrate. * @speed: enum value to convert * * This only goes up to PCIE Gen 4. */ static uint64_t ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed) { /* If the PCI-E speed is Gen1 or Gen2, then report * only 80% of bus speed to account for encoding overhead. */ switch (speed) { case ice_pcie_speed_2_5GT: return IF_Gbps(2); case ice_pcie_speed_5_0GT: return IF_Gbps(4); case ice_pcie_speed_8_0GT: return IF_Gbps(8); case ice_pcie_speed_16_0GT: return IF_Gbps(16); case ice_pcie_speed_unknown: default: return 0; } } /** * ice_pcie_lnk_width_to_int - Convert driver pci-e width enum value to * a 32-bit number. * @width: enum value to convert */ static int ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width) { switch (width) { case ice_pcie_lnk_x1: return (1); case ice_pcie_lnk_x2: return (2); case ice_pcie_lnk_x4: return (4); case ice_pcie_lnk_x8: return (8); case ice_pcie_lnk_x12: return (12); case ice_pcie_lnk_x16: return (16); case ice_pcie_lnk_x32: return (32); case ice_pcie_lnk_width_resrv: case ice_pcie_lnk_width_unknown: default: return (0); } } /** * ice_pcie_bandwidth_check - Check if PCI-E bandwidth is sufficient for * full-speed device operation. * @sc: adapter softc * * Returns 0 if sufficient; 1 if not. */ static uint8_t ice_pcie_bandwidth_check(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; int num_ports, pcie_width; u64 pcie_speed, port_speed; MPASS(hw->port_info); num_ports = bitcount32(hw->func_caps.common_cap.valid_functions); port_speed = ice_phy_types_to_max_rate(hw->port_info); pcie_speed = ice_pcie_bus_speed_to_rate(hw->bus.speed); pcie_width = ice_pcie_lnk_width_to_int(hw->bus.width); /* * If 2x100, clamp ports to 1 -- 2nd port is intended for * failover. */ if (port_speed == IF_Gbps(100)) num_ports = 1; return !!((num_ports * port_speed) > pcie_speed * pcie_width); } /** * ice_print_bus_link_data - Print PCI-E bandwidth information * @dev: device to print string for * @hw: hw struct with PCI-e link information */ static void ice_print_bus_link_data(device_t dev, struct ice_hw *hw) { device_printf(dev, "PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == ice_pcie_speed_16_0GT) ? "16.0GT/s" : (hw->bus.speed == ice_pcie_speed_8_0GT) ? "8.0GT/s" : (hw->bus.speed == ice_pcie_speed_5_0GT) ? "5.0GT/s" : (hw->bus.speed == ice_pcie_speed_2_5GT) ? "2.5GT/s" : "Unknown"), (hw->bus.width == ice_pcie_lnk_x32) ? "Width x32" : (hw->bus.width == ice_pcie_lnk_x16) ? "Width x16" : (hw->bus.width == ice_pcie_lnk_x12) ? "Width x12" : (hw->bus.width == ice_pcie_lnk_x8) ? "Width x8" : (hw->bus.width == ice_pcie_lnk_x4) ? "Width x4" : (hw->bus.width == ice_pcie_lnk_x2) ? "Width x2" : (hw->bus.width == ice_pcie_lnk_x1) ? "Width x1" : "Width Unknown"); } /** * ice_set_pci_link_status_data - store PCI bus info * @hw: pointer to hardware structure * @link_status: the link status word from PCI config space * * Stores the PCI bus info (speed, width, type) within the ice_hw structure **/ static void ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status) { u16 reg; hw->bus.type = ice_bus_pci_express; reg = (link_status & PCIEM_LINK_STA_WIDTH) >> 4; switch (reg) { case ice_pcie_lnk_x1: case ice_pcie_lnk_x2: case ice_pcie_lnk_x4: case ice_pcie_lnk_x8: case ice_pcie_lnk_x12: case ice_pcie_lnk_x16: case ice_pcie_lnk_x32: hw->bus.width = (enum ice_pcie_link_width)reg; break; default: hw->bus.width = ice_pcie_lnk_width_unknown; break; } reg = (link_status & PCIEM_LINK_STA_SPEED) + 0x13; switch (reg) { case ice_pcie_speed_2_5GT: case ice_pcie_speed_5_0GT: case ice_pcie_speed_8_0GT: case ice_pcie_speed_16_0GT: hw->bus.speed = (enum ice_pcie_bus_speed)reg; break; default: hw->bus.speed = ice_pcie_speed_unknown; break; } } /** * ice_init_link_events - Initialize Link Status Events mask * @sc: the device softc * * Initialize the Link Status Events mask to disable notification of link * events we don't care about in software. Also request that link status * events be enabled. */ int ice_init_link_events(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; enum ice_status status; u16 wanted_events; /* Set the bits for the events that we want to be notified by */ wanted_events = (ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA | ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL); /* request that every event except the wanted events be masked */ status = ice_aq_set_event_mask(hw, hw->port_info->lport, ~wanted_events, NULL); if (status) { device_printf(sc->dev, "Failed to set link status event mask, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } /* Request link info with the LSE bit set to enable link status events */ status = ice_aq_get_link_info(hw->port_info, true, NULL, NULL); if (status) { device_printf(sc->dev, "Failed to enable link status events, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } return (0); } /** * ice_handle_mdd_event - Handle possibly malicious events * @sc: the device softc * * Called by the admin task if an MDD detection interrupt is triggered. * Identifies possibly malicious events coming from VFs. Also triggers for * similar incorrect behavior from the PF as well. */ void ice_handle_mdd_event(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; bool mdd_detected = false, request_reinit = false; device_t dev = sc->dev; u32 reg; if (!ice_testandclear_state(&sc->state, ICE_STATE_MDD_PENDING)) return; reg = rd32(hw, GL_MDET_TX_TCLAN); if (reg & GL_MDET_TX_TCLAN_VALID_M) { u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> GL_MDET_TX_TCLAN_PF_NUM_S; u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> GL_MDET_TX_TCLAN_VF_NUM_S; u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> GL_MDET_TX_TCLAN_MAL_TYPE_S; u16 queue = (reg & GL_MDET_TX_TCLAN_QNUM_M) >> GL_MDET_TX_TCLAN_QNUM_S; device_printf(dev, "Malicious Driver Detection Tx Descriptor check event '%s' on Tx queue %u PF# %u VF# %u\n", ice_mdd_tx_tclan_str(event), queue, pf_num, vf_num); /* Only clear this event if it matches this PF, that way other * PFs can read the event and determine VF and queue number. */ if (pf_num == hw->pf_id) wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff); mdd_detected = true; } /* Determine what triggered the MDD event */ reg = rd32(hw, GL_MDET_TX_PQM); if (reg & GL_MDET_TX_PQM_VALID_M) { u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> GL_MDET_TX_PQM_PF_NUM_S; u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> GL_MDET_TX_PQM_VF_NUM_S; u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> GL_MDET_TX_PQM_MAL_TYPE_S; u16 queue = (reg & GL_MDET_TX_PQM_QNUM_M) >> GL_MDET_TX_PQM_QNUM_S; device_printf(dev, "Malicious Driver Detection Tx Quanta check event '%s' on Tx queue %u PF# %u VF# %u\n", ice_mdd_tx_pqm_str(event), queue, pf_num, vf_num); /* Only clear this event if it matches this PF, that way other * PFs can read the event and determine VF and queue number. */ if (pf_num == hw->pf_id) wr32(hw, GL_MDET_TX_PQM, 0xffffffff); mdd_detected = true; } reg = rd32(hw, GL_MDET_RX); if (reg & GL_MDET_RX_VALID_M) { u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >> GL_MDET_RX_PF_NUM_S; u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> GL_MDET_RX_VF_NUM_S; u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >> GL_MDET_RX_MAL_TYPE_S; u16 queue = (reg & GL_MDET_RX_QNUM_M) >> GL_MDET_RX_QNUM_S; device_printf(dev, "Malicious Driver Detection Rx event '%s' on Rx queue %u PF# %u VF# %u\n", ice_mdd_rx_str(event), queue, pf_num, vf_num); /* Only clear this event if it matches this PF, that way other * PFs can read the event and determine VF and queue number. */ if (pf_num == hw->pf_id) wr32(hw, GL_MDET_RX, 0xffffffff); mdd_detected = true; } /* Now, confirm that this event actually affects this PF, by checking * the PF registers. */ if (mdd_detected) { reg = rd32(hw, PF_MDET_TX_TCLAN); if (reg & PF_MDET_TX_TCLAN_VALID_M) { wr32(hw, PF_MDET_TX_TCLAN, 0xffff); sc->soft_stats.tx_mdd_count++; request_reinit = true; } reg = rd32(hw, PF_MDET_TX_PQM); if (reg & PF_MDET_TX_PQM_VALID_M) { wr32(hw, PF_MDET_TX_PQM, 0xffff); sc->soft_stats.tx_mdd_count++; request_reinit = true; } reg = rd32(hw, PF_MDET_RX); if (reg & PF_MDET_RX_VALID_M) { wr32(hw, PF_MDET_RX, 0xffff); sc->soft_stats.rx_mdd_count++; request_reinit = true; } } /* TODO: Implement logic to detect and handle events caused by VFs. */ /* request that the upper stack re-initialize the Tx/Rx queues */ if (request_reinit) ice_request_stack_reinit(sc); ice_flush(hw); } /** * ice_init_dcb_setup - Initialize DCB settings for HW * @sc: the device softc * * This needs to be called after the fw_lldp_agent sysctl is added, since that * can update the device's LLDP agent status if a tunable value is set. * * Get and store the initial state of DCB settings on driver load. Print out * informational messages as well. */ void ice_init_dcb_setup(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; bool dcbx_agent_status; enum ice_status status; /* Don't do anything if DCB isn't supported */ if (!hw->func_caps.common_cap.dcb) { device_printf(dev, "%s: No DCB support\n", __func__); return; } hw->port_info->qos_cfg.dcbx_status = ice_get_dcbx_status(hw); if (hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_DONE && hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) { /* * Start DCBX agent, but not LLDP. The return value isn't * checked here because a more detailed dcbx agent status is * retrieved and checked in ice_init_dcb() and below. */ status = ice_aq_start_stop_dcbx(hw, true, &dcbx_agent_status, NULL); if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM) device_printf(dev, "start_stop_dcbx failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } /* This sets hw->port_info->qos_cfg.is_sw_lldp */ status = ice_init_dcb(hw, true); /* If there is an error, then FW LLDP is not in a usable state */ if (status != 0 && status != ICE_ERR_NOT_READY) { /* Don't print an error message if the return code from the AQ * cmd performed in ice_init_dcb() is EPERM; that means the * FW LLDP engine is disabled, and that is a valid state. */ if (!(status == ICE_ERR_AQ_ERROR && hw->adminq.sq_last_status == ICE_AQ_RC_EPERM)) { device_printf(dev, "DCB init failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED; } switch (hw->port_info->qos_cfg.dcbx_status) { case ICE_DCBX_STATUS_DIS: ice_debug(hw, ICE_DBG_DCB, "DCBX disabled\n"); break; case ICE_DCBX_STATUS_NOT_STARTED: ice_debug(hw, ICE_DBG_DCB, "DCBX not started\n"); break; case ICE_DCBX_STATUS_MULTIPLE_PEERS: ice_debug(hw, ICE_DBG_DCB, "DCBX detected multiple peers\n"); break; default: break; } /* LLDP disabled in FW */ if (hw->port_info->qos_cfg.is_sw_lldp) { ice_add_rx_lldp_filter(sc); device_printf(dev, "Firmware LLDP agent disabled\n"); } } /** * ice_dcb_get_tc_map - Scans config to get bitmap of enabled TCs * @dcbcfg: DCB configuration to examine * * Scans a TC mapping table inside dcbcfg to find traffic classes * enabled and @returns a bitmask of enabled TCs */ static u8 ice_dcb_get_tc_map(const struct ice_dcbx_cfg *dcbcfg) { u8 tc_map = 0; int i = 0; switch (dcbcfg->pfc_mode) { case ICE_QOS_MODE_VLAN: /* XXX: "i" is actually "User Priority" here, not * Traffic Class, but the max for both is 8, so it works * out here. */ for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) tc_map |= BIT(dcbcfg->etscfg.prio_table[i]); break; default: /* Invalid Mode */ tc_map = ICE_DFLT_TRAFFIC_CLASS; break; } return (tc_map); } /** * ice_dcb_num_tc - Count the number of TCs in a bitmap * @tc_map: bitmap of enabled traffic classes * * @return the number of traffic classes in * an 8-bit TC bitmap, or 0 if they are noncontiguous */ static u8 ice_dcb_num_tc(u8 tc_map) { bool tc_unused = false; u8 ret = 0; int i = 0; ice_for_each_traffic_class(i) { if (tc_map & BIT(i)) { if (!tc_unused) { ret++; } else { /* Non-contiguous TCs detected */ return (0); } } else tc_unused = true; } return (ret); } /** * ice_debug_print_mib_change_event - helper function to log LLDP MIB change events * @sc: the device private softc * @event: event received on a control queue * * Prints out the type and contents of an LLDP MIB change event in a DCB debug message. */ static void ice_debug_print_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event) { struct ice_aqc_lldp_get_mib *params = (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib; u8 mib_type, bridge_type, tx_status; static const char* mib_type_strings[] = { "Local MIB", "Remote MIB", "Reserved", "Reserved" }; static const char* bridge_type_strings[] = { "Nearest Bridge", "Non-TPMR Bridge", "Reserved", "Reserved" }; static const char* tx_status_strings[] = { "Port's TX active", "Port's TX suspended and drained", "Reserved", "Port's TX suspended and drained; blocked TC pipe flushed" }; mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >> ICE_AQ_LLDP_MIB_TYPE_S; bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >> ICE_AQ_LLDP_BRID_TYPE_S; tx_status = (params->type & ICE_AQ_LLDP_TX_M) >> ICE_AQ_LLDP_TX_S; ice_debug(&sc->hw, ICE_DBG_DCB, "LLDP MIB Change Event (%s, %s, %s)\n", mib_type_strings[mib_type], bridge_type_strings[bridge_type], tx_status_strings[tx_status]); /* Nothing else to report */ if (!event->msg_buf) return; ice_debug(&sc->hw, ICE_DBG_DCB, "- %s contents:\n", mib_type_strings[mib_type]); ice_debug_array(&sc->hw, ICE_DBG_DCB, 16, 1, event->msg_buf, event->msg_len); } /** * ice_dcb_needs_reconfig - Returns true if driver needs to reconfigure * @sc: the device private softc * @old_cfg: Old DCBX configuration to compare against * @new_cfg: New DCBX configuration to check * * @return true if something changed in new_cfg that requires the driver * to do some reconfiguration. */ static bool ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg, struct ice_dcbx_cfg *new_cfg) { struct ice_hw *hw = &sc->hw; bool needs_reconfig = false; /* Check if ETS config has changed */ if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg, sizeof(new_cfg->etscfg))) { /* If Priority Table has changed, then driver reconfig is needed */ if (memcmp(&new_cfg->etscfg.prio_table, &old_cfg->etscfg.prio_table, sizeof(new_cfg->etscfg.prio_table))) { ice_debug(hw, ICE_DBG_DCB, "ETS UP2TC changed\n"); needs_reconfig = true; } /* These are just informational */ if (memcmp(&new_cfg->etscfg.tcbwtable, &old_cfg->etscfg.tcbwtable, sizeof(new_cfg->etscfg.tcbwtable))) ice_debug(hw, ICE_DBG_DCB, "ETS TCBW table changed\n"); if (memcmp(&new_cfg->etscfg.tsatable, &old_cfg->etscfg.tsatable, sizeof(new_cfg->etscfg.tsatable))) ice_debug(hw, ICE_DBG_DCB, "ETS TSA table changed\n"); } /* Check if PFC config has changed */ if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) { needs_reconfig = true; ice_debug(hw, ICE_DBG_DCB, "PFC config changed\n"); } ice_debug(hw, ICE_DBG_DCB, "%s result: %d\n", __func__, needs_reconfig); return (needs_reconfig); } /** * ice_stop_pf_vsi - Stop queues for PF LAN VSI * @sc: the device private softc * * Flushes interrupts and stops the queues associated with the PF LAN VSI. */ static void ice_stop_pf_vsi(struct ice_softc *sc) { /* Dissociate the Tx and Rx queues from the interrupts */ ice_flush_txq_interrupts(&sc->pf_vsi); ice_flush_rxq_interrupts(&sc->pf_vsi); if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED)) return; /* Disable the Tx and Rx queues */ ice_vsi_disable_tx(&sc->pf_vsi); ice_control_rx_queues(&sc->pf_vsi, false); } /** * ice_vsi_setup_q_map - Setup a VSI queue map * @vsi: the VSI being configured * @ctxt: VSI context structure */ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) { u16 offset = 0, qmap = 0, pow = 0; u16 num_txq_per_tc, num_rxq_per_tc, qcount_rx; int i, j, k; if (vsi->num_tcs == 0) { /* at least TC0 should be enabled by default */ vsi->num_tcs = 1; vsi->tc_map = 0x1; } qcount_rx = vsi->num_rx_queues; num_rxq_per_tc = min(qcount_rx / vsi->num_tcs, ICE_MAX_RXQS_PER_TC); if (!num_rxq_per_tc) num_rxq_per_tc = 1; /* Have TX queue count match RX queue count */ num_txq_per_tc = num_rxq_per_tc; /* find the (rounded up) power-of-2 of qcount */ pow = flsl(num_rxq_per_tc - 1); /* TC mapping is a function of the number of Rx queues assigned to the * VSI for each traffic class and the offset of these queues. * The first 10 bits are for queue offset for TC0, next 4 bits for no:of * queues allocated to TC0. No:of queues is a power-of-2. * * If TC is not enabled, the queue offset is set to 0, and allocate one * queue, this way, traffic for the given TC will be sent to the default * queue. * * Setup number and offset of Rx queues for all TCs for the VSI */ ice_for_each_traffic_class(i) { if (!(vsi->tc_map & BIT(i))) { /* TC is not enabled */ vsi->tc_info[i].qoffset = 0; vsi->tc_info[i].qcount_rx = 1; vsi->tc_info[i].qcount_tx = 1; ctxt->info.tc_mapping[i] = 0; continue; } /* TC is enabled */ vsi->tc_info[i].qoffset = offset; vsi->tc_info[i].qcount_rx = num_rxq_per_tc; vsi->tc_info[i].qcount_tx = num_txq_per_tc; qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & ICE_AQ_VSI_TC_Q_OFFSET_M) | ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M); ctxt->info.tc_mapping[i] = CPU_TO_LE16(qmap); /* Store traffic class and handle data in queue structures */ for (j = offset, k = 0; j < offset + num_txq_per_tc; j++, k++) { vsi->tx_queues[j].q_handle = k; vsi->tx_queues[j].tc = i; } for (j = offset; j < offset + num_rxq_per_tc; j++) vsi->rx_queues[j].tc = i; offset += num_rxq_per_tc; } /* Rx queue mapping */ ctxt->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG); ctxt->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]); ctxt->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues); } /** * ice_pf_vsi_cfg_tc - Configure PF VSI for a given TC map * @sc: the device private softc * @tc_map: traffic class bitmap * * @pre VSI queues are stopped * * @return 0 if configuration is successful * @return EIO if Update VSI AQ cmd fails * @return ENODEV if updating Tx Scheduler fails */ static int ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; struct ice_vsi_ctx ctx = { 0 }; device_t dev = sc->dev; enum ice_status status; u8 num_tcs = 0; int i = 0; /* Count the number of enabled Traffic Classes */ ice_for_each_traffic_class(i) if (tc_map & BIT(i)) num_tcs++; vsi->tc_map = tc_map; vsi->num_tcs = num_tcs; /* Set default parameters for context */ ctx.vf_num = 0; ctx.info = vsi->info; /* Setup queue map */ ice_vsi_setup_q_map(vsi, &ctx); /* Update VSI configuration in firmware (RX queues) */ ctx.info.valid_sections = CPU_TO_LE16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); status = ice_update_vsi(hw, vsi->idx, &ctx, NULL); if (status) { device_printf(dev, "%s: Update VSI AQ call failed, err %s aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } vsi->info = ctx.info; /* Use values derived in ice_vsi_setup_q_map() */ for (i = 0; i < num_tcs; i++) max_txqs[i] = vsi->tc_info[i].qcount_tx; /* Update LAN Tx queue info in firmware */ status = ice_cfg_vsi_lan(hw->port_info, vsi->idx, vsi->tc_map, max_txqs); if (status) { device_printf(dev, "%s: Failed VSI lan queue config, err %s aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (ENODEV); } vsi->info.valid_sections = 0; return (0); } /** * ice_dcb_recfg - Reconfigure VSI with new DCB settings * @sc: the device private softc * * @pre All VSIs have been disabled/stopped * * Reconfigures VSI settings based on local_dcbx_cfg. */ static void ice_dcb_recfg(struct ice_softc *sc) { struct ice_dcbx_cfg *dcbcfg = &sc->hw.port_info->qos_cfg.local_dcbx_cfg; device_t dev = sc->dev; u8 tc_map = 0; int ret; tc_map = ice_dcb_get_tc_map(dcbcfg); /* If non-contiguous TCs are used, then configure * the default TC instead. There's no support for * non-contiguous TCs being used. */ if (ice_dcb_num_tc(tc_map) == 0) { tc_map = ICE_DFLT_TRAFFIC_CLASS; ice_set_default_local_lldp_mib(sc); } /* Reconfigure VSI queues to add/remove traffic classes */ ret = ice_pf_vsi_cfg_tc(sc, tc_map); if (ret) device_printf(dev, "Failed to configure TCs for PF VSI, err %s\n", ice_err_str(ret)); } /** * ice_do_dcb_reconfig - notify RDMA and reconfigure PF LAN VSI * @sc: the device private softc * * @pre Determined that the DCB configuration requires a change * * Reconfigures the PF LAN VSI based on updated DCB configuration * found in the hw struct's/port_info's/ local dcbx configuration. */ static void ice_do_dcb_reconfig(struct ice_softc *sc) { struct ice_aqc_port_ets_elem port_ets = { 0 }; struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi; device_t dev = sc->dev; enum ice_status status; u8 tc_map; pi = sc->hw.port_info; local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; ice_rdma_notify_dcb_qos_change(sc); /* Set state when there's more than one TC */ tc_map = ice_dcb_get_tc_map(local_dcbx_cfg); if (ice_dcb_num_tc(tc_map) > 1) { device_printf(dev, "Multiple traffic classes enabled\n"); ice_set_state(&sc->state, ICE_STATE_MULTIPLE_TCS); } else { device_printf(dev, "Multiple traffic classes disabled\n"); ice_clear_state(&sc->state, ICE_STATE_MULTIPLE_TCS); } /* Disable PF VSI since it's going to be reconfigured */ ice_stop_pf_vsi(sc); /* Query ETS configuration and update SW Tx scheduler info */ status = ice_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL); if (status != ICE_SUCCESS) { device_printf(dev, "Query Port ETS AQ call failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); /* This won't break traffic, but QoS will not work as expected */ } /* Change PF VSI configuration */ ice_dcb_recfg(sc); /* Send new configuration to RDMA client driver */ ice_rdma_dcb_qos_update(sc, pi); ice_request_stack_reinit(sc); } /** * ice_handle_mib_change_event - helper function to handle LLDP MIB change events * @sc: the device private softc * @event: event received on a control queue * * Checks the updated MIB it receives and possibly reconfigures the PF LAN * VSI depending on what has changed. This will also print out some debug * information about the MIB event if ICE_DBG_DCB is enabled in the debug_mask. */ static void ice_handle_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event) { struct ice_aqc_lldp_get_mib *params = (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib; struct ice_dcbx_cfg tmp_dcbx_cfg, *local_dcbx_cfg; struct ice_port_info *pi; device_t dev = sc->dev; struct ice_hw *hw = &sc->hw; bool needs_reconfig; enum ice_status status; u8 mib_type, bridge_type; ASSERT_CFG_LOCKED(sc); ice_debug_print_mib_change_event(sc, event); pi = sc->hw.port_info; mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >> ICE_AQ_LLDP_MIB_TYPE_S; bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >> ICE_AQ_LLDP_BRID_TYPE_S; /* Ignore if event is not for Nearest Bridge */ if (bridge_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID) return; /* Check MIB Type and return if event for Remote MIB update */ if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) { /* Update the cached remote MIB and return */ status = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE, ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, &pi->qos_cfg.remote_dcbx_cfg); if (status) device_printf(dev, "%s: Failed to get Remote DCB config; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); /* Not fatal if this fails */ return; } /* Save line length by aliasing the local dcbx cfg */ local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; /* Save off the old configuration and clear current config */ tmp_dcbx_cfg = *local_dcbx_cfg; memset(local_dcbx_cfg, 0, sizeof(*local_dcbx_cfg)); /* Get updated DCBX data from firmware */ status = ice_get_dcb_cfg(pi); if (status) { device_printf(dev, "%s: Failed to get Local DCB config; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return; } /* No change detected in DCBX config */ if (!memcmp(&tmp_dcbx_cfg, local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) { ice_debug(hw, ICE_DBG_DCB, "No change detected in local DCBX configuration\n"); return; } /* Check to see if DCB needs reconfiguring */ needs_reconfig = ice_dcb_needs_reconfig(sc, &tmp_dcbx_cfg, local_dcbx_cfg); if (!needs_reconfig) return; /* Reconfigure */ ice_do_dcb_reconfig(sc); } /** * ice_send_version - Send driver version to firmware * @sc: the device private softc * * Send the driver version to the firmware. This must be called as early as * possible after ice_init_hw(). */ int ice_send_version(struct ice_softc *sc) { struct ice_driver_ver driver_version = {0}; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; driver_version.major_ver = ice_major_version; driver_version.minor_ver = ice_minor_version; driver_version.build_ver = ice_patch_version; driver_version.subbuild_ver = ice_rc_version; strlcpy((char *)driver_version.driver_string, ice_driver_version, sizeof(driver_version.driver_string)); status = ice_aq_send_driver_ver(hw, &driver_version, NULL); if (status) { device_printf(dev, "Unable to send driver version to firmware, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } return (0); } /** * ice_handle_lan_overflow_event - helper function to log LAN overflow events * @sc: device softc * @event: event received on a control queue * * Prints out a message when a LAN overflow event is detected on a receive * queue. */ static void ice_handle_lan_overflow_event(struct ice_softc *sc, struct ice_rq_event_info *event) { struct ice_aqc_event_lan_overflow *params = (struct ice_aqc_event_lan_overflow *)&event->desc.params.lan_overflow; struct ice_hw *hw = &sc->hw; ice_debug(hw, ICE_DBG_DCB, "LAN overflow event detected, prtdcb_ruptq=0x%08x, qtx_ctl=0x%08x\n", LE32_TO_CPU(params->prtdcb_ruptq), LE32_TO_CPU(params->qtx_ctl)); } /** * ice_add_ethertype_to_list - Add an Ethertype filter to a filter list * @vsi: the VSI to target packets to * @list: the list to add the filter to * @ethertype: the Ethertype to filter on * @direction: The direction of the filter (Tx or Rx) * @action: the action to take * * Add an Ethertype filter to a filter list. Used to forward a series of * filters to the firmware for configuring the switch. * * Returns 0 on success, and an error code on failure. */ static int ice_add_ethertype_to_list(struct ice_vsi *vsi, struct ice_list_head *list, u16 ethertype, u16 direction, enum ice_sw_fwd_act_type action) { struct ice_fltr_list_entry *entry; MPASS((direction == ICE_FLTR_TX) || (direction == ICE_FLTR_RX)); entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO); if (!entry) return (ENOMEM); entry->fltr_info.flag = direction; entry->fltr_info.src_id = ICE_SRC_ID_VSI; entry->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE; entry->fltr_info.fltr_act = action; entry->fltr_info.vsi_handle = vsi->idx; entry->fltr_info.l_data.ethertype_mac.ethertype = ethertype; LIST_ADD(&entry->list_entry, list); return 0; } #define ETHERTYPE_PAUSE_FRAMES 0x8808 #define ETHERTYPE_LLDP_FRAMES 0x88cc /** * ice_cfg_pf_ethertype_filters - Configure switch to drop ethertypes * @sc: the device private softc * * Configure the switch to drop PAUSE frames and LLDP frames transmitted from * the host. This prevents malicious VFs from sending these frames and being * able to control or configure the network. */ int ice_cfg_pf_ethertype_filters(struct ice_softc *sc) { struct ice_list_head ethertype_list; struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; int err = 0; INIT_LIST_HEAD(ðertype_list); /* * Note that the switch filters will ignore the VSI index for the drop * action, so we only need to program drop filters once for the main * VSI. */ /* Configure switch to drop all Tx pause frames coming from any VSI. */ if (sc->enable_tx_fc_filter) { err = ice_add_ethertype_to_list(vsi, ðertype_list, ETHERTYPE_PAUSE_FRAMES, ICE_FLTR_TX, ICE_DROP_PACKET); if (err) goto free_ethertype_list; } /* Configure switch to drop LLDP frames coming from any VSI */ if (sc->enable_tx_lldp_filter) { err = ice_add_ethertype_to_list(vsi, ðertype_list, ETHERTYPE_LLDP_FRAMES, ICE_FLTR_TX, ICE_DROP_PACKET); if (err) goto free_ethertype_list; } status = ice_add_eth_mac(hw, ðertype_list); if (status) { device_printf(dev, "Failed to add Tx Ethertype filters, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (EIO); } free_ethertype_list: ice_free_fltr_list(ðertype_list); return err; } /** * ice_add_rx_lldp_filter - add ethertype filter for Rx LLDP frames * @sc: the device private structure * * Add a switch ethertype filter which forwards the LLDP frames to the main PF * VSI. Called when the fw_lldp_agent is disabled, to allow the LLDP frames to * be forwarded to the stack. */ static void ice_add_rx_lldp_filter(struct ice_softc *sc) { struct ice_list_head ethertype_list; struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; int err; u16 vsi_num; /* * If FW is new enough, use a direct AQ command to perform the filter * addition. */ if (ice_fw_supports_lldp_fltr_ctrl(hw)) { vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); status = ice_lldp_fltr_add_remove(hw, vsi_num, true); if (status) { device_printf(dev, "Failed to add Rx LLDP filter, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } else ice_set_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER); return; } INIT_LIST_HEAD(ðertype_list); /* Forward Rx LLDP frames to the stack */ err = ice_add_ethertype_to_list(vsi, ðertype_list, ETHERTYPE_LLDP_FRAMES, ICE_FLTR_RX, ICE_FWD_TO_VSI); if (err) { device_printf(dev, "Failed to add Rx LLDP filter, err %s\n", ice_err_str(err)); goto free_ethertype_list; } status = ice_add_eth_mac(hw, ðertype_list); if (status && status != ICE_ERR_ALREADY_EXISTS) { device_printf(dev, "Failed to add Rx LLDP filter, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } else { /* * If status == ICE_ERR_ALREADY_EXISTS, we won't treat an * already existing filter as an error case. */ ice_set_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER); } free_ethertype_list: ice_free_fltr_list(ðertype_list); } /** * ice_del_rx_lldp_filter - Remove ethertype filter for Rx LLDP frames * @sc: the device private structure * * Remove the switch filter forwarding LLDP frames to the main PF VSI, called * when the firmware LLDP agent is enabled, to stop routing LLDP frames to the * stack. */ static void ice_del_rx_lldp_filter(struct ice_softc *sc) { struct ice_list_head ethertype_list; struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; int err; u16 vsi_num; /* * Only in the scenario where the driver added the filter during * this session (while the driver was loaded) would we be able to * delete this filter. */ if (!ice_test_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER)) return; /* * If FW is new enough, use a direct AQ command to perform the filter * removal. */ if (ice_fw_supports_lldp_fltr_ctrl(hw)) { vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); status = ice_lldp_fltr_add_remove(hw, vsi_num, false); if (status) { device_printf(dev, "Failed to remove Rx LLDP filter, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } return; } INIT_LIST_HEAD(ðertype_list); /* Remove filter forwarding Rx LLDP frames to the stack */ err = ice_add_ethertype_to_list(vsi, ðertype_list, ETHERTYPE_LLDP_FRAMES, ICE_FLTR_RX, ICE_FWD_TO_VSI); if (err) { device_printf(dev, "Failed to remove Rx LLDP filter, err %s\n", ice_err_str(err)); goto free_ethertype_list; } status = ice_remove_eth_mac(hw, ðertype_list); if (status == ICE_ERR_DOES_NOT_EXIST) { ; /* Don't complain if we try to remove a filter that doesn't exist */ } else if (status) { device_printf(dev, "Failed to remove Rx LLDP filter, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } free_ethertype_list: ice_free_fltr_list(ðertype_list); } /** * ice_init_link_configuration -- Setup link in different ways depending * on whether media is available or not. * @sc: device private structure * * Called at the end of the attach process to either set default link * parameters if there is media available, or force HW link down and * set a state bit if there is no media. */ void ice_init_link_configuration(struct ice_softc *sc) { struct ice_port_info *pi = sc->hw.port_info; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; pi->phy.get_link_info = true; status = ice_get_link_status(pi, &sc->link_up); if (status != ICE_SUCCESS) { device_printf(dev, "%s: ice_get_link_status failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return; } if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA); /* Apply default link settings */ ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC); } else { /* Set link down, and poll for media available in timer. This prevents the * driver from receiving spurious link-related events. */ ice_set_state(&sc->state, ICE_STATE_NO_MEDIA); status = ice_aq_set_link_restart_an(pi, false, NULL); if (status != ICE_SUCCESS) device_printf(dev, "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } } /** * ice_apply_saved_phy_req_to_cfg -- Write saved user PHY settings to cfg data * @sc: device private structure * @cfg: new PHY config data to be modified * * Applies user settings for advertised speeds to the PHY type fields in the * supplied PHY config struct. It uses the data from pcaps to check if the * saved settings are invalid and uses the pcaps data instead if they are * invalid. */ static int ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc, struct ice_aqc_set_phy_cfg_data *cfg) { struct ice_phy_data phy_data = { 0 }; struct ice_port_info *pi = sc->hw.port_info; u64 phy_low = 0, phy_high = 0; u16 link_speeds; int ret; link_speeds = pi->phy.curr_user_speed_req; if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2)) { memset(&phy_data, 0, sizeof(phy_data)); phy_data.report_mode = ICE_AQC_REPORT_DFLT_CFG; phy_data.user_speeds_orig = link_speeds; ret = ice_intersect_phy_types_and_speeds(sc, &phy_data); if (ret != 0) { /* Error message already printed within function */ return (ret); } phy_low = phy_data.phy_low_intr; phy_high = phy_data.phy_high_intr; if (link_speeds == 0 || phy_data.user_speeds_intr) goto finalize_link_speed; if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) { memset(&phy_data, 0, sizeof(phy_data)); phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA; phy_data.user_speeds_orig = link_speeds; ret = ice_intersect_phy_types_and_speeds(sc, &phy_data); if (ret != 0) { /* Error message already printed within function */ return (ret); } phy_low = phy_data.phy_low_intr; phy_high = phy_data.phy_high_intr; if (!phy_data.user_speeds_intr) { phy_low = phy_data.phy_low_orig; phy_high = phy_data.phy_high_orig; } goto finalize_link_speed; } /* If we're here, then it means the benefits of Version 2 * link management aren't utilized. We fall through to * handling Strict Link Mode the same as Version 1 link * management. */ } memset(&phy_data, 0, sizeof(phy_data)); if ((link_speeds == 0) && (sc->ldo_tlv.phy_type_low || sc->ldo_tlv.phy_type_high)) phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA; else phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA; phy_data.user_speeds_orig = link_speeds; ret = ice_intersect_phy_types_and_speeds(sc, &phy_data); if (ret != 0) { /* Error message already printed within function */ return (ret); } phy_low = phy_data.phy_low_intr; phy_high = phy_data.phy_high_intr; if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) { if (phy_low == 0 && phy_high == 0) { device_printf(sc->dev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n"); return (EINVAL); } } else { if (link_speeds == 0) { if (sc->ldo_tlv.phy_type_low & phy_low || sc->ldo_tlv.phy_type_high & phy_high) { phy_low &= sc->ldo_tlv.phy_type_low; phy_high &= sc->ldo_tlv.phy_type_high; } } else if (phy_low == 0 && phy_high == 0) { memset(&phy_data, 0, sizeof(phy_data)); phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA; phy_data.user_speeds_orig = link_speeds; ret = ice_intersect_phy_types_and_speeds(sc, &phy_data); if (ret != 0) { /* Error message already printed within function */ return (ret); } phy_low = phy_data.phy_low_intr; phy_high = phy_data.phy_high_intr; if (!phy_data.user_speeds_intr) { phy_low = phy_data.phy_low_orig; phy_high = phy_data.phy_high_orig; } } } finalize_link_speed: /* Cache new user settings for speeds */ pi->phy.curr_user_speed_req = phy_data.user_speeds_intr; cfg->phy_type_low = htole64(phy_low); cfg->phy_type_high = htole64(phy_high); return (ret); } /** * ice_apply_saved_fec_req_to_cfg -- Write saved user FEC mode to cfg data * @sc: device private structure * @cfg: new PHY config data to be modified * * Applies user setting for FEC mode to PHY config struct. It uses the data * from pcaps to check if the saved settings are invalid and uses the pcaps * data instead if they are invalid. */ static int ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc, struct ice_aqc_set_phy_cfg_data *cfg) { struct ice_port_info *pi = sc->hw.port_info; enum ice_status status; cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC; status = ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req); if (status) return (EIO); return (0); } /** * ice_apply_saved_fc_req_to_cfg -- Write saved user flow control mode to cfg data * @pi: port info struct * @cfg: new PHY config data to be modified * * Applies user setting for flow control mode to PHY config struct. There are * no invalid flow control mode settings; if there are, then this function * treats them like "ICE_FC_NONE". */ static void ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg) { cfg->caps &= ~(ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY | ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY); switch (pi->phy.curr_user_fc_req) { case ICE_FC_FULL: cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY | ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY; break; case ICE_FC_RX_PAUSE: cfg->caps |= ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY; break; case ICE_FC_TX_PAUSE: cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY; break; default: /* ICE_FC_NONE */ break; } } /** * ice_apply_saved_phy_cfg -- Re-apply user PHY config settings * @sc: device private structure * @settings: which settings to apply * * Applies user settings for advertised speeds, FEC mode, and flow * control mode to a PHY config struct; it uses the data from pcaps * to check if the saved settings are invalid and uses the pcaps * data instead if they are invalid. * * For things like sysctls where only one setting needs to be * updated, the bitmap allows the caller to specify which setting * to update. */ int ice_apply_saved_phy_cfg(struct ice_softc *sc, u8 settings) { struct ice_aqc_set_phy_cfg_data cfg = { 0 }; struct ice_port_info *pi = sc->hw.port_info; struct ice_aqc_get_phy_caps_data pcaps = { 0 }; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; u64 phy_low, phy_high; enum ice_status status; enum ice_fec_mode dflt_fec_mode; u16 dflt_user_speed; if (!settings || settings > ICE_APPLY_LS_FEC_FC) { ice_debug(hw, ICE_DBG_LINK, "Settings out-of-bounds: %u\n", settings); } status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, &pcaps, NULL); if (status != ICE_SUCCESS) { device_printf(dev, "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } phy_low = le64toh(pcaps.phy_type_low); phy_high = le64toh(pcaps.phy_type_high); /* Save off initial config parameters */ dflt_user_speed = ice_aq_phy_types_to_link_speeds(phy_low, phy_high); dflt_fec_mode = ice_caps_to_fec_mode(pcaps.caps, pcaps.link_fec_options); /* Setup new PHY config */ ice_copy_phy_caps_to_cfg(pi, &pcaps, &cfg); /* On error, restore active configuration values */ if ((settings & ICE_APPLY_LS) && ice_apply_saved_phy_req_to_cfg(sc, &cfg)) { pi->phy.curr_user_speed_req = dflt_user_speed; cfg.phy_type_low = pcaps.phy_type_low; cfg.phy_type_high = pcaps.phy_type_high; } if ((settings & ICE_APPLY_FEC) && ice_apply_saved_fec_req_to_cfg(sc, &cfg)) { pi->phy.curr_user_fec_req = dflt_fec_mode; } if (settings & ICE_APPLY_FC) { /* No real error indicators for this process, * so we'll just have to assume it works. */ ice_apply_saved_fc_req_to_cfg(pi, &cfg); } /* Enable link and re-negotiate it */ cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK; status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL); if (status != ICE_SUCCESS) { /* Don't indicate failure if there's no media in the port. * The settings have been saved and will apply when media * is inserted. */ if ((status == ICE_ERR_AQ_ERROR) && (hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)) { device_printf(dev, "%s: Setting will be applied when media is inserted\n", __func__); return (0); } else { device_printf(dev, "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } return (0); } /** * ice_print_ldo_tlv - Print out LDO TLV information * @sc: device private structure * @tlv: LDO TLV information from the adapter NVM * * Dump out the information in tlv to the kernel message buffer; intended for * debugging purposes. */ static void ice_print_ldo_tlv(struct ice_softc *sc, struct ice_link_default_override_tlv *tlv) { device_t dev = sc->dev; device_printf(dev, "TLV: -options 0x%02x\n", tlv->options); device_printf(dev, " -phy_config 0x%02x\n", tlv->phy_config); device_printf(dev, " -fec_options 0x%02x\n", tlv->fec_options); device_printf(dev, " -phy_high 0x%016llx\n", (unsigned long long)tlv->phy_type_high); device_printf(dev, " -phy_low 0x%016llx\n", (unsigned long long)tlv->phy_type_low); } /** * ice_set_link_management_mode -- Strict or lenient link management * @sc: device private structure * * Some NVMs give the adapter the option to advertise a superset of link * configurations. This checks to see if that option is enabled. * Further, the NVM could also provide a specific set of configurations * to try; these are cached in the driver's private structure if they * are available. */ void ice_set_link_management_mode(struct ice_softc *sc) { struct ice_port_info *pi = sc->hw.port_info; device_t dev = sc->dev; struct ice_link_default_override_tlv tlv = { 0 }; enum ice_status status; /* Port must be in strict mode if FW version is below a certain * version. (i.e. Don't set lenient mode features) */ if (!(ice_fw_supports_link_override(&sc->hw))) return; status = ice_get_link_default_override(&tlv, pi); if (status != ICE_SUCCESS) { device_printf(dev, "%s: ice_get_link_default_override failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); return; } if (sc->hw.debug_mask & ICE_DBG_LINK) ice_print_ldo_tlv(sc, &tlv); /* Set lenient link mode */ if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LENIENT_LINK_MODE) && (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE))) ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_en); /* FW supports reporting a default configuration */ if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_2) && ice_fw_supports_report_dflt_cfg(&sc->hw)) { ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_en); /* Knowing we're at a high enough firmware revision to * support this link management configuration, we don't * need to check/support earlier versions. */ return; } /* Default overrides only work if in lenient link mode */ if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_1) && ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE) && (tlv.options & ICE_LINK_OVERRIDE_EN)) ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_en); /* Cache the LDO TLV structure in the driver, since it * won't change during the driver's lifetime. */ sc->ldo_tlv = tlv; } /** * ice_init_saved_phy_cfg -- Set cached user PHY cfg settings with NVM defaults * @sc: device private structure * * This should be called before the tunables for these link settings * (e.g. advertise_speed) are added -- so that these defaults don't overwrite * the cached values that the sysctl handlers will write. * * This also needs to be called before ice_init_link_configuration, to ensure * that there are sane values that can be written if there is media available * in the port. */ void ice_init_saved_phy_cfg(struct ice_softc *sc) { struct ice_port_info *pi = sc->hw.port_info; struct ice_aqc_get_phy_caps_data pcaps = { 0 }; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; u64 phy_low, phy_high; u8 report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA; if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2)) report_mode = ICE_AQC_REPORT_DFLT_CFG; status = ice_aq_get_phy_caps(pi, false, report_mode, &pcaps, NULL); if (status != ICE_SUCCESS) { device_printf(dev, "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n", __func__, report_mode == ICE_AQC_REPORT_DFLT_CFG ? "DFLT" : "w/MEDIA", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return; } phy_low = le64toh(pcaps.phy_type_low); phy_high = le64toh(pcaps.phy_type_high); /* Save off initial config parameters */ pi->phy.curr_user_speed_req = ice_aq_phy_types_to_link_speeds(phy_low, phy_high); pi->phy.curr_user_fec_req = ice_caps_to_fec_mode(pcaps.caps, pcaps.link_fec_options); pi->phy.curr_user_fc_req = ice_caps_to_fc_mode(pcaps.caps); } /** * ice_module_init - Driver callback to handle module load * * Callback for handling module load events. This function should initialize * any data structures that are used for the life of the device driver. */ static int ice_module_init(void) { ice_rdma_init(); return (0); } /** * ice_module_exit - Driver callback to handle module exit * * Callback for handling module unload events. This function should release * any resources initialized during ice_module_init. * * If this function returns non-zero, the module will not be unloaded. It * should only return such a value if the module cannot be unloaded at all, * such as due to outstanding memory references that cannot be revoked. */ static int ice_module_exit(void) { ice_rdma_exit(); return (0); } /** * ice_module_event_handler - Callback for module events * @mod: unused module_t parameter * @what: the event requested * @arg: unused event argument * * Callback used to handle module events from the stack. Used to allow the * driver to define custom behavior that should happen at module load and * unload. */ int ice_module_event_handler(module_t __unused mod, int what, void __unused *arg) { switch (what) { case MOD_LOAD: return ice_module_init(); case MOD_UNLOAD: return ice_module_exit(); default: /* TODO: do we need to handle MOD_QUIESCE and MOD_SHUTDOWN? */ return (EOPNOTSUPP); } } /** * ice_handle_nvm_access_ioctl - Handle an NVM access ioctl request * @sc: the device private softc * @ifd: ifdrv ioctl request pointer */ int ice_handle_nvm_access_ioctl(struct ice_softc *sc, struct ifdrv *ifd) { union ice_nvm_access_data *data; struct ice_nvm_access_cmd *cmd; size_t ifd_len = ifd->ifd_len, malloc_len; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; u8 *nvm_buffer; int err; /* * ifioctl forwards SIOCxDRVSPEC to iflib without performing * a privilege check. In turn, iflib forwards the ioctl to the driver * without performing a privilege check. Perform one here to ensure * that non-privileged threads cannot access this interface. */ err = priv_check(curthread, PRIV_DRIVER); if (err) return (err); if (ifd_len < sizeof(struct ice_nvm_access_cmd)) { device_printf(dev, "%s: ifdrv length is too small. Got %zu, but expected %zu\n", __func__, ifd_len, sizeof(struct ice_nvm_access_cmd)); return (EINVAL); } if (ifd->ifd_data == NULL) { device_printf(dev, "%s: ifd data buffer not present.\n", __func__); return (EINVAL); } /* * If everything works correctly, ice_handle_nvm_access should not * modify data past the size of the ioctl length. However, it could * lead to memory corruption if it did. Make sure to allocate at least * enough space for the command and data regardless. This * ensures that any access to the data union will not access invalid * memory. */ malloc_len = max(ifd_len, sizeof(*data) + sizeof(*cmd)); nvm_buffer = (u8 *)malloc(malloc_len, M_ICE, M_ZERO | M_WAITOK); if (!nvm_buffer) return (ENOMEM); /* Copy the NVM access command and data in from user space */ /* coverity[tainted_data_argument] */ err = copyin(ifd->ifd_data, nvm_buffer, ifd_len); if (err) { device_printf(dev, "%s: Copying request from user space failed, err %s\n", __func__, ice_err_str(err)); goto cleanup_free_nvm_buffer; } /* * The NVM command structure is immediately followed by data which * varies in size based on the command. */ cmd = (struct ice_nvm_access_cmd *)nvm_buffer; data = (union ice_nvm_access_data *)(nvm_buffer + sizeof(struct ice_nvm_access_cmd)); /* Handle the NVM access request */ status = ice_handle_nvm_access(hw, cmd, data); if (status) ice_debug(hw, ICE_DBG_NVM, "NVM access request failed, err %s\n", ice_status_str(status)); /* Copy the possibly modified contents of the handled request out */ err = copyout(nvm_buffer, ifd->ifd_data, ifd_len); if (err) { device_printf(dev, "%s: Copying response back to user space failed, err %s\n", __func__, ice_err_str(err)); goto cleanup_free_nvm_buffer; } /* Convert private status to an error code for proper ioctl response */ switch (status) { case ICE_SUCCESS: err = (0); break; case ICE_ERR_NO_MEMORY: err = (ENOMEM); break; case ICE_ERR_OUT_OF_RANGE: err = (ENOTTY); break; case ICE_ERR_PARAM: default: err = (EINVAL); break; } cleanup_free_nvm_buffer: free(nvm_buffer, M_ICE); return err; } /** * ice_read_sff_eeprom - Read data from SFF eeprom * @sc: device softc * @dev_addr: I2C device address (typically 0xA0 or 0xA2) * @offset: offset into the eeprom * @data: pointer to data buffer to store read data in * @length: length to read; max length is 16 * * Read from the SFF eeprom in the module for this PF's port. For more details * on the contents of an SFF eeprom, refer to SFF-8724 (SFP), SFF-8636 (QSFP), * and SFF-8024 (both). */ int ice_read_sff_eeprom(struct ice_softc *sc, u16 dev_addr, u16 offset, u8* data, u16 length) { struct ice_hw *hw = &sc->hw; int ret = 0, retries = 0; enum ice_status status; if (length > 16) return (EINVAL); if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) return (ENXIO); do { status = ice_aq_sff_eeprom(hw, 0, dev_addr, offset, 0, 0, data, length, false, NULL); if (!status) { ret = 0; break; } if (status == ICE_ERR_AQ_ERROR && hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY) { ret = EBUSY; continue; } if (status == ICE_ERR_AQ_ERROR && hw->adminq.sq_last_status == ICE_AQ_RC_EACCES) { /* FW says I2C access isn't supported */ ret = EACCES; break; } if (status == ICE_ERR_AQ_ERROR && hw->adminq.sq_last_status == ICE_AQ_RC_EPERM) { device_printf(sc->dev, "%s: Module pointer location specified in command does not permit the required operation.\n", __func__); ret = EPERM; break; } else { device_printf(sc->dev, "%s: Error reading I2C data: err %s aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); ret = EIO; break; } } while (retries++ < ICE_I2C_MAX_RETRIES); if (ret == EBUSY) device_printf(sc->dev, "%s: Error reading I2C data after %d retries\n", __func__, ICE_I2C_MAX_RETRIES); return (ret); } /** * ice_handle_i2c_req - Driver independent I2C request handler * @sc: device softc * @req: The I2C parameters to use * * Read from the port's I2C eeprom using the parameters from the ioctl. */ int ice_handle_i2c_req(struct ice_softc *sc, struct ifi2creq *req) { return ice_read_sff_eeprom(sc, req->dev_addr, req->offset, req->data, req->len); } /** * ice_sysctl_read_i2c_diag_data - Read some module diagnostic data via i2c * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Read 8 bytes of diagnostic data from the SFF eeprom in the (Q)SFP module * inserted into the port. * * | SFP A2 | QSFP Lower Page * ------------|---------|---------------- * Temperature | 96-97 | 22-23 * Vcc | 98-99 | 26-27 * TX power | 102-103 | 34-35..40-41 * RX power | 104-105 | 50-51..56-57 */ static int ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; device_t dev = sc->dev; struct sbuf *sbuf; int ret; u8 data[16]; UNREFERENCED_PARAMETER(arg2); UNREFERENCED_PARAMETER(oidp); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); if (req->oldptr == NULL) { ret = SYSCTL_OUT(req, 0, 128); return (ret); } ret = ice_read_sff_eeprom(sc, 0xA0, 0, data, 1); if (ret) return (ret); /* 0x3 for SFP; 0xD/0x11 for QSFP+/QSFP28 */ if (data[0] == 0x3) { /* * Check for: * - Internally calibrated data * - Diagnostic monitoring is implemented */ ice_read_sff_eeprom(sc, 0xA0, 92, data, 1); if (!(data[0] & 0x60)) { device_printf(dev, "Module doesn't support diagnostics: 0xA0[92] = %02X\n", data[0]); return (ENODEV); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ice_read_sff_eeprom(sc, 0xA2, 96, data, 4); for (int i = 0; i < 4; i++) sbuf_printf(sbuf, "%02X ", data[i]); ice_read_sff_eeprom(sc, 0xA2, 102, data, 4); for (int i = 0; i < 4; i++) sbuf_printf(sbuf, "%02X ", data[i]); } else if (data[0] == 0xD || data[0] == 0x11) { /* * QSFP+ modules are always internally calibrated, and must indicate * what types of diagnostic monitoring are implemented */ sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ice_read_sff_eeprom(sc, 0xA0, 22, data, 2); for (int i = 0; i < 2; i++) sbuf_printf(sbuf, "%02X ", data[i]); ice_read_sff_eeprom(sc, 0xA0, 26, data, 2); for (int i = 0; i < 2; i++) sbuf_printf(sbuf, "%02X ", data[i]); ice_read_sff_eeprom(sc, 0xA0, 34, data, 2); for (int i = 0; i < 2; i++) sbuf_printf(sbuf, "%02X ", data[i]); ice_read_sff_eeprom(sc, 0xA0, 50, data, 2); for (int i = 0; i < 2; i++) sbuf_printf(sbuf, "%02X ", data[i]); } else { device_printf(dev, "Module is not SFP/SFP+/SFP28/QSFP+ (%02X)\n", data[0]); return (ENODEV); } sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_alloc_intr_tracking - Setup interrupt tracking structures * @sc: device softc structure * * Sets up the resource manager for keeping track of interrupt allocations, * and initializes the tracking maps for the PF's interrupt allocations. * * Unlike the scheme for queues, this is done in one step since both the * manager and the maps both have the same lifetime. * * @returns 0 on success, or an error code on failure. */ int ice_alloc_intr_tracking(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int err; /* Initialize the interrupt allocation manager */ err = ice_resmgr_init_contig_only(&sc->imgr, hw->func_caps.common_cap.num_msix_vectors); if (err) { device_printf(dev, "Unable to initialize PF interrupt manager: %s\n", ice_err_str(err)); return (err); } /* Allocate PF interrupt mapping storage */ if (!(sc->pf_imap = (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors, M_ICE, M_NOWAIT))) { device_printf(dev, "Unable to allocate PF imap memory\n"); err = ENOMEM; goto free_imgr; } if (!(sc->rdma_imap = (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors, M_ICE, M_NOWAIT))) { device_printf(dev, "Unable to allocate RDMA imap memory\n"); err = ENOMEM; free(sc->pf_imap, M_ICE); goto free_imgr; } for (u32 i = 0; i < hw->func_caps.common_cap.num_msix_vectors; i++) { sc->pf_imap[i] = ICE_INVALID_RES_IDX; sc->rdma_imap[i] = ICE_INVALID_RES_IDX; } return (0); free_imgr: ice_resmgr_destroy(&sc->imgr); return (err); } /** * ice_free_intr_tracking - Free PF interrupt tracking structures * @sc: device softc structure * * Frees the interrupt resource allocation manager and the PF's owned maps. * * VF maps are released when the owning VF's are destroyed, which should always * happen before this function is called. */ void ice_free_intr_tracking(struct ice_softc *sc) { if (sc->pf_imap) { ice_resmgr_release_map(&sc->imgr, sc->pf_imap, sc->lan_vectors); free(sc->pf_imap, M_ICE); sc->pf_imap = NULL; } if (sc->rdma_imap) { ice_resmgr_release_map(&sc->imgr, sc->rdma_imap, sc->lan_vectors); free(sc->rdma_imap, M_ICE); sc->rdma_imap = NULL; } ice_resmgr_destroy(&sc->imgr); } /** * ice_apply_supported_speed_filter - Mask off unsupported speeds * @report_speeds: bit-field for the desired link speeds * @mod_type: type of module/sgmii connection we have * * Given a bitmap of the desired lenient mode link speeds, * this function will mask off the speeds that are not currently * supported by the device. */ static u16 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type) { u16 speed_mask; enum { IS_SGMII, IS_SFP, IS_QSFP } module; /* * The SFF specification says 0 is unknown, so we'll * treat it like we're connected through SGMII for now. * This may need revisiting if a new type is supported * in the future. */ switch (mod_type) { case 0: module = IS_SGMII; break; case 3: module = IS_SFP; break; default: module = IS_QSFP; break; } /* We won't offer anything lower than 100M for any part, * but we'll need to mask off other speeds based on the * device and module type. */ speed_mask = ~((u16)ICE_AQ_LINK_SPEED_100MB - 1); if ((report_speeds & ICE_AQ_LINK_SPEED_10GB) && (module == IS_SFP)) speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1); if (report_speeds & ICE_AQ_LINK_SPEED_25GB) speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1); if (report_speeds & ICE_AQ_LINK_SPEED_50GB) { speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1); if (module == IS_QSFP) speed_mask = ~((u16)ICE_AQ_LINK_SPEED_10GB - 1); } if (report_speeds & ICE_AQ_LINK_SPEED_100GB) speed_mask = ~((u16)ICE_AQ_LINK_SPEED_25GB - 1); return (report_speeds & speed_mask); } /** * ice_init_health_events - Enable FW health event reporting * @sc: device softc * * Will try to enable firmware health event reporting, but shouldn't * cause any grief (to the caller) if this fails. */ void ice_init_health_events(struct ice_softc *sc) { enum ice_status status; u8 health_mask; if ((!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HEALTH_STATUS)) || (!sc->enable_health_events)) return; health_mask = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK | ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK; status = ice_aq_set_health_status_config(&sc->hw, health_mask, NULL); if (status) device_printf(sc->dev, "Failed to enable firmware health events, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); else ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_en); } /** * ice_print_health_status_string - Print message for given FW health event * @dev: the PCIe device * @elem: health status element containing status code * * A rather large list of possible health status codes and their associated * messages. */ static void ice_print_health_status_string(device_t dev, struct ice_aqc_health_status_elem *elem) { u16 status_code = le16toh(elem->health_status_code); switch (status_code) { case ICE_AQC_HEALTH_STATUS_INFO_RECOVERY: device_printf(dev, "The device is in firmware recovery mode.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS: device_printf(dev, "The flash chip cannot be accessed.\n"); device_printf(dev, "Possible Solution: If issue persists, call customer support.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH: device_printf(dev, "NVM authentication failed.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH: device_printf(dev, "Option ROM authentication failed.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH: device_printf(dev, "DDP package failed.\n"); device_printf(dev, "Possible Solution: Update to latest base driver and DDP package.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT: device_printf(dev, "NVM image is incompatible.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT: device_printf(dev, "Option ROM is incompatible.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB: device_printf(dev, "Supplied MIB file is invalid. DCB reverted to default configuration.\n"); device_printf(dev, "Possible Solution: Disable FW-LLDP and check DCBx system configuration.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT: device_printf(dev, "An unsupported module was detected.\n"); device_printf(dev, "Possible Solution 1: Check your cable connection.\n"); device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE: device_printf(dev, "Module type is not supported.\n"); device_printf(dev, "Possible Solution: Change or replace the module or cable.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL: device_printf(dev, "Module is not qualified.\n"); device_printf(dev, "Possible Solution 1: Check your cable connection.\n"); device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n"); device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM: device_printf(dev, "Device cannot communicate with the module.\n"); device_printf(dev, "Possible Solution 1: Check your cable connection.\n"); device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n"); device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT: device_printf(dev, "Unresolved module conflict.\n"); device_printf(dev, "Possible Solution 1: Manually set speed/duplex or use Intel(R) Ethernet Port Configuration Tool to change the port option.\n"); device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT: device_printf(dev, "Module is not present.\n"); device_printf(dev, "Possible Solution 1: Check that the module is inserted correctly.\n"); device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n"); break; case ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED: device_printf(dev, "Underutilized module.\n"); device_printf(dev, "Possible Solution 1: Change or replace the module or cable.\n"); device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT: device_printf(dev, "An unsupported module was detected.\n"); device_printf(dev, "Possible Solution 1: Check your cable connection.\n"); device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n"); device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG: device_printf(dev, "Invalid link configuration.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS: device_printf(dev, "Port hardware access error.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE: device_printf(dev, "A port is unreachable.\n"); device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n"); device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED: device_printf(dev, "Port speed is limited due to module.\n"); device_printf(dev, "Possible Solution: Change the module or use Intel(R) Ethernet Port Configuration Tool to configure the port option to match the current module speed.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT: device_printf(dev, "A parallel fault was detected.\n"); device_printf(dev, "Possible Solution: Check link partner connection and configuration.\n"); break; case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED: device_printf(dev, "Port speed is limited by PHY capabilities.\n"); device_printf(dev, "Possible Solution 1: Change the module to align to port option.\n"); device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO: device_printf(dev, "LOM topology netlist is corrupted.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_NETLIST: device_printf(dev, "Unrecoverable netlist error.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT: device_printf(dev, "Port topology conflict.\n"); device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n"); device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS: device_printf(dev, "Unrecoverable hardware access error.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME: device_printf(dev, "Unrecoverable runtime error.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT: device_printf(dev, "Link management engine failed to initialize.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; default: break; } } /** * ice_handle_health_status_event - helper function to output health status * @sc: device softc structure * @event: event received on a control queue * * Prints out the appropriate string based on the given Health Status Event * code. */ static void ice_handle_health_status_event(struct ice_softc *sc, struct ice_rq_event_info *event) { struct ice_aqc_health_status_elem *health_info; u16 status_count; int i; if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_HEALTH_STATUS)) return; health_info = (struct ice_aqc_health_status_elem *)event->msg_buf; status_count = le16toh(event->desc.params.get_health_status.health_status_count); if (status_count > (event->buf_len / sizeof(*health_info))) { device_printf(sc->dev, "Received a health status event with invalid event count\n"); return; } for (i = 0; i < status_count; i++) { ice_print_health_status_string(sc->dev, health_info); health_info++; } } /** * ice_set_default_local_lldp_mib - Set Local LLDP MIB to default settings * @sc: device softc structure * * This function needs to be called after link up; it makes sure the FW * has certain PFC/DCB settings. This is intended to workaround a FW behavior * where these settings seem to be cleared on link up. */ void ice_set_default_local_lldp_mib(struct ice_softc *sc) { struct ice_dcbx_cfg *dcbcfg; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi; device_t dev = sc->dev; enum ice_status status; u8 maxtcs, maxtcs_ets; pi = hw->port_info; dcbcfg = &pi->qos_cfg.local_dcbx_cfg; maxtcs = hw->func_caps.common_cap.maxtc; /* This value is only 3 bits; 8 TCs maps to 0 */ maxtcs_ets = maxtcs & ICE_IEEE_ETS_MAXTC_M; /** * Setup the default settings used by the driver for the Set Local * LLDP MIB Admin Queue command (0x0A08). (1TC w/ 100% BW, ETS, no * PFC). */ memset(dcbcfg, 0, sizeof(*dcbcfg)); dcbcfg->etscfg.willing = 1; dcbcfg->etscfg.tcbwtable[0] = 100; dcbcfg->etscfg.maxtcs = maxtcs_ets; dcbcfg->etsrec = dcbcfg->etscfg; dcbcfg->pfc.willing = 1; dcbcfg->pfc.pfccap = maxtcs; status = ice_set_dcb_cfg(pi); if (status) device_printf(dev, "Error setting Local LLDP MIB: %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } /** * ice_sbuf_print_ets_cfg - Helper function to print ETS cfg * @sbuf: string buffer to print to * @name: prefix string to use * @ets: structure to pull values from * * A helper function for ice_sysctl_dump_dcbx_cfg(), this * formats the ETS rec and cfg TLVs into text. */ static void ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name, struct ice_dcb_ets_cfg *ets) { sbuf_printf(sbuf, "%s.willing: %u\n", name, ets->willing); sbuf_printf(sbuf, "%s.cbs: %u\n", name, ets->cbs); sbuf_printf(sbuf, "%s.maxtcs: %u\n", name, ets->maxtcs); sbuf_printf(sbuf, "%s.prio_table:", name); for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) sbuf_printf(sbuf, " %d", ets->prio_table[i]); sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "%s.tcbwtable:", name); for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) sbuf_printf(sbuf, " %d", ets->tcbwtable[i]); sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "%s.tsatable:", name); for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) sbuf_printf(sbuf, " %d", ets->tsatable[i]); sbuf_printf(sbuf, "\n"); } /** * ice_sysctl_dump_dcbx_cfg - Print out DCBX/DCB config info * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: AQ define for either Local or Remote MIB * @req: sysctl request pointer * * Prints out DCB/DCBX configuration, including the contents * of either the local or remote MIB, depending on the value * used in arg2. */ static int ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg = {}; struct ice_dcbx_cfg dcb_buf = {}; struct ice_dcbx_cfg *dcbcfg; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; struct sbuf *sbuf; enum ice_status status; u8 maxtcs, dcbx_status, is_sw_lldp; UNREFERENCED_PARAMETER(oidp); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); is_sw_lldp = hw->port_info->qos_cfg.is_sw_lldp; /* The driver doesn't receive a Remote MIB via SW */ if (is_sw_lldp && arg2 == ICE_AQ_LLDP_MIB_REMOTE) return (ENOENT); dcbcfg = &hw->port_info->qos_cfg.local_dcbx_cfg; if (!is_sw_lldp) { /* Collect information from the FW in FW LLDP mode */ dcbcfg = &dcb_buf; status = ice_aq_get_dcb_cfg(hw, (u8)arg2, ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbcfg); if (status && arg2 == ICE_AQ_LLDP_MIB_REMOTE && hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) { device_printf(dev, "Unable to query Remote MIB; port has not received one yet\n"); return (ENOENT); } if (status) { device_printf(dev, "Unable to query LLDP MIB, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } status = ice_aq_get_cee_dcb_cfg(hw, &cee_cfg, NULL); if (status == ICE_SUCCESS) dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE; else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) dcbcfg->dcbx_mode = ICE_DCBX_MODE_IEEE; maxtcs = hw->func_caps.common_cap.maxtc; dcbx_status = ice_get_dcbx_status(hw); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); /* Do the actual printing */ sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "SW LLDP mode: %d\n", is_sw_lldp); sbuf_printf(sbuf, "Function caps maxtcs: %d\n", maxtcs); sbuf_printf(sbuf, "dcbx_status: %d\n", dcbx_status); sbuf_printf(sbuf, "numapps: %u\n", dcbcfg->numapps); sbuf_printf(sbuf, "CEE TLV status: %u\n", dcbcfg->tlv_status); sbuf_printf(sbuf, "pfc_mode: %s\n", (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP) ? "DSCP" : "VLAN"); sbuf_printf(sbuf, "dcbx_mode: %s\n", (dcbcfg->dcbx_mode == ICE_DCBX_MODE_IEEE) ? "IEEE" : (dcbcfg->dcbx_mode == ICE_DCBX_MODE_CEE) ? "CEE" : "Unknown"); ice_sbuf_print_ets_cfg(sbuf, "etscfg", &dcbcfg->etscfg); ice_sbuf_print_ets_cfg(sbuf, "etsrec", &dcbcfg->etsrec); sbuf_printf(sbuf, "pfc.willing: %u\n", dcbcfg->pfc.willing); sbuf_printf(sbuf, "pfc.mbc: %u\n", dcbcfg->pfc.mbc); sbuf_printf(sbuf, "pfc.pfccap: 0x%0x\n", dcbcfg->pfc.pfccap); sbuf_printf(sbuf, "pfc.pfcena: 0x%0x\n", dcbcfg->pfc.pfcena); if (arg2 == ICE_AQ_LLDP_MIB_LOCAL) { sbuf_printf(sbuf, "\nLocal registers:\n"); sbuf_printf(sbuf, "PRTDCB_GENC.NUMTC: %d\n", (rd32(hw, PRTDCB_GENC) & PRTDCB_GENC_NUMTC_M) >> PRTDCB_GENC_NUMTC_S); sbuf_printf(sbuf, "PRTDCB_TUP2TC: 0x%0x\n", (rd32(hw, PRTDCB_TUP2TC))); sbuf_printf(sbuf, "PRTDCB_RUP2TC: 0x%0x\n", (rd32(hw, PRTDCB_RUP2TC))); sbuf_printf(sbuf, "GLDCB_TC2PFC: 0x%0x\n", (rd32(hw, GLDCB_TC2PFC))); } /* Finish */ sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_dump_vsi_cfg - print PF LAN VSI configuration * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * XXX: This could be extended to apply to arbitrary PF-owned VSIs, * but for simplicity, this only works on the PF's LAN VSI. */ static int ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_vsi_ctx ctx = { 0 }; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; struct sbuf *sbuf; enum ice_status status; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Get HW absolute index of a VSI */ ctx.vsi_num = ice_get_hw_vsi_num(hw, sc->pf_vsi.idx); status = ice_aq_get_vsi_params(hw, &ctx, NULL); if (status != ICE_SUCCESS) { device_printf(dev, "Get VSI AQ call failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); /* Do the actual printing */ sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "VSI NUM: %d\n", ctx.vsi_num); sbuf_printf(sbuf, "VF NUM: %d\n", ctx.vf_num); sbuf_printf(sbuf, "VSIs allocated: %d\n", ctx.vsis_allocd); sbuf_printf(sbuf, "VSIs unallocated: %d\n", ctx.vsis_unallocated); sbuf_printf(sbuf, "Rx Queue Map method: %d\n", LE16_TO_CPU(ctx.info.mapping_flags)); /* The PF VSI is always contiguous, so there's no if-statement here */ sbuf_printf(sbuf, "Rx Queue base: %d\n", LE16_TO_CPU(ctx.info.q_mapping[0])); sbuf_printf(sbuf, "Rx Queue count: %d\n", LE16_TO_CPU(ctx.info.q_mapping[1])); sbuf_printf(sbuf, "TC qbases :"); for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { sbuf_printf(sbuf, " %4d", ctx.info.tc_mapping[i] & ICE_AQ_VSI_TC_Q_OFFSET_M); } sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "TC qcounts :"); for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { sbuf_printf(sbuf, " %4d", 1 << (ctx.info.tc_mapping[i] >> ICE_AQ_VSI_TC_Q_NUM_S)); } /* Finish */ sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_ets_str_to_tbl - Parse string into ETS table * @str: input string to parse * @table: output eight values used for ETS values * @limit: max valid value to accept for ETS values * * Parses a string and converts the eight values within * into a table that can be used in setting ETS settings * in a MIB. * * @return 0 on success, EINVAL if a parsed value is * not between 0 and limit. */ static int ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit) { const char *str_start = str; char *str_end; long token; for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { token = strtol(str_start, &str_end, 0); if (token < 0 || token > limit) return (EINVAL); table[i] = (u8)token; str_start = (str_end + 1); } return (0); } /** * ice_check_ets_bw - Check if ETS bw vals are valid * @table: eight values used for ETS bandwidth * * @return true if the sum of all 8 values in table * equals 100. */ static bool ice_check_ets_bw(u8 *table) { int sum = 0; for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) sum += (int)table[i]; return (sum == 100); } /** * ice_cfg_pba_num - Determine if PBA Number is retrievable * @sc: the device private softc structure * * Sets the feature flag for the existence of a PBA number * based on the success of the read command. This does not * cache the result. */ void ice_cfg_pba_num(struct ice_softc *sc) { u8 pba_string[32] = ""; if ((ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HAS_PBA)) && (ice_read_pba_string(&sc->hw, pba_string, sizeof(pba_string)) == 0)) ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_en); } /** * ice_sysctl_query_port_ets - print Port ETS Config from AQ * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer */ static int ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_aqc_port_ets_elem port_ets = { 0 }; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi; device_t dev = sc->dev; struct sbuf *sbuf; enum ice_status status; int i = 0; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); pi = hw->port_info; status = ice_aq_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL); if (status != ICE_SUCCESS) { device_printf(dev, "Query Port ETS AQ call failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); /* Do the actual printing */ sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "Valid TC map: 0x%x\n", port_ets.tc_valid_bits); sbuf_printf(sbuf, "TC BW %%:"); ice_for_each_traffic_class(i) { sbuf_printf(sbuf, " %3d", port_ets.tc_bw_share[i]); } sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "EIR profile ID: %d\n", port_ets.port_eir_prof_id); sbuf_printf(sbuf, "CIR profile ID: %d\n", port_ets.port_cir_prof_id); sbuf_printf(sbuf, "TC Node prio: 0x%x\n", port_ets.tc_node_prio); sbuf_printf(sbuf, "TC Node TEIDs:\n"); ice_for_each_traffic_class(i) { sbuf_printf(sbuf, "%d: %d\n", i, port_ets.tc_node_teid[i]); } /* Finish */ sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } diff --git a/sys/dev/ice/ice_rdma.c b/sys/dev/ice/ice_rdma.c index 5d89deed0f90..0f06cd700663 100644 --- a/sys/dev/ice/ice_rdma.c +++ b/sys/dev/ice/ice_rdma.c @@ -1,859 +1,859 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2022, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*$FreeBSD$*/ /** * @file ice_rdma.c * @brief RDMA client driver interface * * Functions to interface with the RDMA client driver, for enabling RMDA * functionality for the ice driver. * * The RDMA client interface is based on a simple kobject interface which is * defined by the rmda_if.m and irdma_di_if.m interfaces. * * The ice device driver provides the rmda_di_if.m interface methods, while * the client RDMA driver provides the irdma_if.m interface methods as an * extension ontop of the irdma_di_if kobject. * * The initial connection between drivers is done via the RDMA client driver * calling ice_rdma_register. */ #include "ice_iflib.h" #include "ice_rdma_internal.h" #include "irdma_if.h" #include "irdma_di_if.h" /** * @var ice_rdma * @brief global RDMA driver state * * Contains global state the driver uses to connect to a client RDMA interface * driver. */ static struct ice_rdma_state ice_rdma; /* * Helper function prototypes */ static int ice_rdma_pf_attach_locked(struct ice_softc *sc); static void ice_rdma_pf_detach_locked(struct ice_softc *sc); static int ice_rdma_check_version(struct ice_rdma_info *info); static void ice_rdma_cp_qos_info(struct ice_hw *hw, struct ice_dcbx_cfg *dcbx_cfg, struct ice_qos_params *qos_info); /* * RDMA Device Interface prototypes */ static int ice_rdma_pf_reset(struct ice_rdma_peer *peer); static int ice_rdma_pf_msix_init(struct ice_rdma_peer *peer, struct ice_rdma_msix_mapping *msix_info); static int ice_rdma_qset_register_request(struct ice_rdma_peer *peer, struct ice_rdma_qset_update *res); static int ice_rdma_update_vsi_filter(struct ice_rdma_peer *peer_dev, bool enable); static void ice_rdma_request_handler(struct ice_rdma_peer *peer, struct ice_rdma_request *req); /** * @var ice_rdma_di_methods * @brief RDMA driver interface methods * * Kobject methods implementing the driver-side interface for the RDMA peer * clients. This method table contains the operations which the client can * request from the driver. * * The client driver will then extend this kobject class with methods that the * driver can request from the client. */ static kobj_method_t ice_rdma_di_methods[] = { KOBJMETHOD(irdma_di_reset, ice_rdma_pf_reset), KOBJMETHOD(irdma_di_msix_init, ice_rdma_pf_msix_init), KOBJMETHOD(irdma_di_qset_register_request, ice_rdma_qset_register_request), KOBJMETHOD(irdma_di_vsi_filter_update, ice_rdma_update_vsi_filter), KOBJMETHOD(irdma_di_req_handler, ice_rdma_request_handler), KOBJMETHOD_END }; /* Define ice_rdma_di class which will be extended by the iRDMA driver */ DEFINE_CLASS_0(ice_rdma_di, ice_rdma_di_class, ice_rdma_di_methods, sizeof(struct ice_rdma_peer)); /** * ice_rdma_pf_reset - RDMA client interface requested a reset * @peer: the RDMA peer client structure * * Implements IRDMA_DI_RESET, called by the RDMA client driver to request * a reset of an ice driver device. */ static int ice_rdma_pf_reset(struct ice_rdma_peer *peer) { struct ice_softc *sc = ice_rdma_peer_to_sc(peer); /* * Request that the driver re-initialize by bringing the interface * down and up. */ ice_request_stack_reinit(sc); return (0); } /** * ice_rdma_pf_msix_init - RDMA client interface request MSI-X initialization * @peer: the RDMA peer client structure * @msix_info: requested MSI-X mapping * * Implements IRDMA_DI_MSIX_INIT, called by the RDMA client driver to * initialize the MSI-X resources required for RDMA functionality. */ static int ice_rdma_pf_msix_init(struct ice_rdma_peer *peer, struct ice_rdma_msix_mapping __unused *msix_info) { struct ice_softc *sc = ice_rdma_peer_to_sc(peer); MPASS(msix_info != NULL); device_printf(sc->dev, "%s: iRDMA MSI-X initialization request is not yet implemented\n", __func__); /* TODO: implement MSI-X initialization for RDMA */ return (ENOSYS); } /** * ice_rdma_register_request - RDMA client interface request qset * registration or unregistration * @peer: the RDMA peer client structure * @res: resources to be registered or unregistered */ static int ice_rdma_qset_register_request(struct ice_rdma_peer *peer, struct ice_rdma_qset_update *res) { struct ice_softc *sc = ice_rdma_peer_to_sc(peer); struct ice_vsi *vsi = NULL; struct ice_dcbx_cfg *dcbx_cfg; struct ice_hw *hw = &sc->hw; enum ice_status status; int count, i, ret = 0; uint32_t *qset_teid; uint16_t *qs_handle; uint16_t max_rdmaqs[ICE_MAX_TRAFFIC_CLASS]; uint16_t vsi_id; uint8_t ena_tc = 0; if (!res) return -EINVAL; if (res->cnt_req > ICE_MAX_TXQ_PER_TXQG) return -EINVAL; switch(res->res_type) { case ICE_RDMA_QSET_ALLOC: count = res->cnt_req; vsi_id = peer->pf_vsi_num; break; case ICE_RDMA_QSET_FREE: count = res->res_allocated; vsi_id = res->qsets.vsi_id; break; default: return -EINVAL; } qset_teid = (uint32_t *)ice_calloc(hw, count, sizeof(*qset_teid)); if (!qset_teid) return -ENOMEM; qs_handle = (uint16_t *)ice_calloc(hw, count, sizeof(*qs_handle)); if (!qs_handle) { ice_free(hw, qset_teid); return -ENOMEM; } ice_for_each_traffic_class(i) max_rdmaqs[i] = 0; for (i = 0; i < sc->num_available_vsi; i++) { if (sc->all_vsi[i] && ice_get_hw_vsi_num(hw, sc->all_vsi[i]->idx) == vsi_id) { vsi = sc->all_vsi[i]; break; } } if (!vsi) { ice_debug(hw, ICE_DBG_RDMA, "RDMA QSet invalid VSI\n"); ret = -EINVAL; goto out; } if (sc != vsi->sc) { ice_debug(hw, ICE_DBG_RDMA, "VSI is tied to unexpected device\n"); ret = -EXDEV; goto out; } for (i = 0; i < count; i++) { struct ice_rdma_qset_params *qset; qset = &res->qsets; if (qset->vsi_id != peer->pf_vsi_num) { ice_debug(hw, ICE_DBG_RDMA, "RDMA QSet invalid VSI requested %d %d\n", qset->vsi_id, peer->pf_vsi_num); ret = -EINVAL; goto out; } max_rdmaqs[qset->tc]++; qs_handle[i] = qset->qs_handle; qset_teid[i] = qset->teid; } switch(res->res_type) { case ICE_RDMA_QSET_ALLOC: dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg; for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { ena_tc |= BIT(dcbx_cfg->etscfg.prio_table[i]); } ice_debug(hw, ICE_DBG_RDMA, "%s:%d ena_tc=%x\n", __func__, __LINE__, ena_tc); status = ice_cfg_vsi_rdma(hw->port_info, vsi->idx, ena_tc, max_rdmaqs); if (status) { ice_debug(hw, ICE_DBG_RDMA, "Failed VSI RDMA qset config\n"); ret = -EINVAL; goto out; } for (i = 0; i < count; i++) { struct ice_rdma_qset_params *qset; qset = &res->qsets; status = ice_ena_vsi_rdma_qset(hw->port_info, vsi->idx, qset->tc, &qs_handle[i], 1, &qset_teid[i]); if (status) { ice_debug(hw, ICE_DBG_RDMA, "Failed VSI RDMA qset enable\n"); ret = -EINVAL; goto out; } qset->teid = qset_teid[i]; } break; case ICE_RDMA_QSET_FREE: status = ice_dis_vsi_rdma_qset(hw->port_info, count, qset_teid, qs_handle); if (status) ret = -EINVAL; break; default: ret = -EINVAL; break; } out: ice_free(hw, qs_handle); ice_free(hw, qset_teid); return ret; } /** * ice_rdma_update_vsi_filter - configure vsi information * when opening or closing rdma driver * @peer: the RDMA peer client structure * @enable: enable or disable the rdma filter */ static int ice_rdma_update_vsi_filter(struct ice_rdma_peer *peer, bool enable) { struct ice_softc *sc = ice_rdma_peer_to_sc(peer); struct ice_vsi *vsi; int ret; vsi = &sc->pf_vsi; if (!vsi) return -EINVAL; ret = ice_cfg_iwarp_fltr(&sc->hw, vsi->idx, enable); if (ret) { device_printf(sc->dev, "Failed to %sable iWARP filtering\n", enable ? "en" : "dis"); } else { if (enable) vsi->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; else vsi->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; } return ret; } /** * ice_rdma_request_handler - handle requests incoming from RDMA driver * @peer: the RDMA peer client structure * @req: structure containing request */ static void ice_rdma_request_handler(struct ice_rdma_peer *peer, struct ice_rdma_request *req) { if (!req || !peer) { log(LOG_WARNING, "%s: peer or req are not valid\n", __func__); return; } switch(req->type) { case ICE_RDMA_EVENT_RESET: break; case ICE_RDMA_EVENT_QSET_REGISTER: ice_rdma_qset_register_request(peer, &req->res); break; case ICE_RDMA_EVENT_VSI_FILTER_UPDATE: ice_rdma_update_vsi_filter(peer, req->enable_filter); break; default: log(LOG_WARNING, "%s: Event %d not supported\n", __func__, req->type); break; } } /** * ice_rdma_cp_qos_info - gather current QOS/DCB settings in LAN to pass * to RDMA driver * @hw: ice hw structure * @dcbx_cfg: current DCB settings in ice driver * @qos_info: destination of the DCB settings */ static void ice_rdma_cp_qos_info(struct ice_hw *hw, struct ice_dcbx_cfg *dcbx_cfg, struct ice_qos_params *qos_info) { u32 up2tc; u8 j; u8 num_tc = 0; u8 val_tc = 0; /* number of TC for validation */ u8 cnt_tc = 0; /* setup qos_info fields with defaults */ qos_info->num_apps = 0; qos_info->num_tc = 1; for (j = 0; j < ICE_TC_MAX_USER_PRIORITY; j++) qos_info->up2tc[j] = 0; qos_info->tc_info[0].rel_bw = 100; for (j = 1; j < IEEE_8021QAZ_MAX_TCS; j++) qos_info->tc_info[j].rel_bw = 0; /* gather current values */ up2tc = rd32(hw, PRTDCB_TUP2TC); qos_info->num_apps = dcbx_cfg->numapps; for (j = 0; j < ICE_MAX_TRAFFIC_CLASS; j++) { num_tc |= BIT(dcbx_cfg->etscfg.prio_table[j]); } for (j = 0; j < ICE_MAX_TRAFFIC_CLASS; j++) { if (num_tc & BIT(j)) { cnt_tc++; val_tc |= BIT(j); } else { break; } } qos_info->num_tc = (val_tc == num_tc && num_tc != 0) ? cnt_tc : 1; for (j = 0; j < ICE_TC_MAX_USER_PRIORITY; j++) qos_info->up2tc[j] = (up2tc >> (j * 3)) & 0x7; for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) qos_info->tc_info[j].rel_bw = dcbx_cfg->etscfg.tcbwtable[j]; for (j = 0; j < qos_info->num_apps; j++) { qos_info->apps[j].priority = dcbx_cfg->app[j].priority; qos_info->apps[j].prot_id = dcbx_cfg->app[j].prot_id; qos_info->apps[j].selector = dcbx_cfg->app[j].selector; } } /** * ice_rdma_check_version - Check that the provided RDMA version is compatible * @info: the RDMA client information structure * * Verify that the client RDMA driver provided a version that is compatible * with the driver interface. */ static int ice_rdma_check_version(struct ice_rdma_info *info) { /* Make sure the MAJOR version matches */ if (info->major_version != ICE_RDMA_MAJOR_VERSION) { log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports major version %d.x.x\n", __func__, info->major_version, info->minor_version, info->patch_version, ICE_RDMA_MAJOR_VERSION); return (ENOTSUP); } /* * Make sure that the MINOR version is compatible. * * This means that the RDMA client driver version MUST not be greater * than the version provided by the driver, as it would indicate that * the RDMA client expects features which are not supported by the * main driver. */ if (info->minor_version > ICE_RDMA_MINOR_VERSION) { log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports up to minor version %d.%d.x\n", __func__, info->major_version, info->minor_version, info->patch_version, ICE_RDMA_MAJOR_VERSION, ICE_RDMA_MINOR_VERSION); return (ENOTSUP); } /* * Make sure that the PATCH version is compatible. * * This means that the RDMA client version MUST not be greater than * the version provided by the driver, as it may indicate that the * RDMA client expects certain backwards compatible bug fixes which * are not implemented by this version of the main driver. */ if ((info->minor_version == ICE_RDMA_MINOR_VERSION) && (info->patch_version > ICE_RDMA_PATCH_VERSION)) { log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports up to patch version %d.%d.%d\n", __func__, info->major_version, info->minor_version, info->patch_version, ICE_RDMA_MAJOR_VERSION, ICE_RDMA_MINOR_VERSION, ICE_RDMA_PATCH_VERSION); return (ENOTSUP); } /* Make sure that the kobject class is initialized */ if (info->rdma_class == NULL) { log(LOG_WARNING, "%s: the iRDMA driver did not specify a kobject interface\n", __func__); return (EINVAL); } return (0); } /** * ice_rdma_register - Register an RDMA client driver * @info: the RDMA client information structure * * Called by the RDMA client driver on load. Used to initialize the RDMA * client driver interface and enable interop between the ice driver and the * RDMA client driver. * * The RDMA client driver must provide the version number it expects, along * with a pointer to a kobject class that extends the irdma_di_if class, and * implements the irdma_if class interface. */ int ice_rdma_register(struct ice_rdma_info *info) { struct ice_rdma_entry *entry; int err = 0; sx_xlock(&ice_rdma.mtx); if (!ice_enable_irdma) { log(LOG_INFO, "%s: The iRDMA driver interface has been disabled\n", __func__); err = (ECONNREFUSED); goto return_unlock; } if (ice_rdma.registered) { log(LOG_WARNING, "%s: iRDMA driver already registered\n", __func__); err = (EBUSY); goto return_unlock; } /* Make sure the iRDMA version is compatible */ err = ice_rdma_check_version(info); if (err) goto return_unlock; log(LOG_INFO, "%s: iRDMA driver registered using version %d.%d.%d\n", __func__, info->major_version, info->minor_version, info->patch_version); ice_rdma.peer_class = info->rdma_class; /* * Initialize the kobject interface and notify the RDMA client of each * existing PF interface. */ LIST_FOREACH(entry, &ice_rdma.peers, node) { kobj_init((kobj_t)&entry->peer, ice_rdma.peer_class); IRDMA_PROBE(&entry->peer); if (entry->initiated) IRDMA_OPEN(&entry->peer); } ice_rdma.registered = true; return_unlock: sx_xunlock(&ice_rdma.mtx); return (err); } /** * ice_rdma_unregister - Unregister an RDMA client driver * * Called by the RDMA client driver on unload. Used to de-initialize the RDMA * client driver interface and shut down communication between the ice driver * and the RDMA client driver. */ int ice_rdma_unregister(void) { struct ice_rdma_entry *entry; sx_xlock(&ice_rdma.mtx); if (!ice_rdma.registered) { log(LOG_WARNING, "%s: iRDMA driver was not previously registered\n", __func__); sx_xunlock(&ice_rdma.mtx); return (ENOENT); } log(LOG_INFO, "%s: iRDMA driver unregistered\n", __func__); ice_rdma.registered = false; ice_rdma.peer_class = NULL; /* * Release the kobject interface for each of the existing PF * interfaces. Note that we do not notify the client about removing * each PF, as it is assumed that the client will have already cleaned * up any associated resources when it is unregistered. */ LIST_FOREACH(entry, &ice_rdma.peers, node) kobj_delete((kobj_t)&entry->peer, NULL); sx_xunlock(&ice_rdma.mtx); return (0); } /** * ice_rdma_init - RDMA driver init routine * * Called during ice driver module initialization to setup the RDMA client * interface mutex and RDMA peer structure list. */ void ice_rdma_init(void) { LIST_INIT(&ice_rdma.peers); sx_init_flags(&ice_rdma.mtx, "ice rdma interface", SX_DUPOK); ice_rdma.registered = false; ice_rdma.peer_class = NULL; } /** * ice_rdma_exit - RDMA driver exit routine * * Called during ice driver module exit to shutdown the RDMA client interface * mutex. */ void ice_rdma_exit(void) { MPASS(LIST_EMPTY(&ice_rdma.peers)); sx_destroy(&ice_rdma.mtx); } /** * ice_rdma_pf_attach_locked - Prepare a PF for RDMA connections * @sc: the ice driver softc * * Initialize a peer entry for this PF and add it to the RDMA interface list. * Notify the client RDMA driver of a new PF device. * * @pre must be called while holding the ice_rdma mutex. */ static int ice_rdma_pf_attach_locked(struct ice_softc *sc) { struct ice_rdma_entry *entry; /* Do not attach the PF unless RDMA is supported */ if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) return (0); entry = &sc->rdma_entry; if (entry->attached) { device_printf(sc->dev, "iRDMA peer entry already exists\n"); return (EEXIST); } entry->attached = true; entry->peer.dev = sc->dev; entry->peer.ifp = sc->ifp; entry->peer.pf_id = sc->hw.pf_id; entry->peer.pci_mem = sc->bar0.res; entry->peer.pf_vsi_num = ice_get_hw_vsi_num(&sc->hw, sc->pf_vsi.idx); if (sc->rdma_imap && sc->rdma_imap[0] != ICE_INVALID_RES_IDX && sc->irdma_vectors > 0) { entry->peer.msix.base = sc->rdma_imap[0]; entry->peer.msix.count = sc->irdma_vectors; } /* Gather DCB/QOS info into peer */ memset(&entry->peer.initial_qos_info, 0, sizeof(entry->peer.initial_qos_info)); ice_rdma_cp_qos_info(&sc->hw, &sc->hw.port_info->qos_cfg.local_dcbx_cfg, &entry->peer.initial_qos_info); /* * If the RDMA client driver has already registered, initialize the * kobject and notify the client of a new PF */ if (ice_rdma.registered) { kobj_init((kobj_t)&entry->peer, ice_rdma.peer_class); IRDMA_PROBE(&entry->peer); } LIST_INSERT_HEAD(&ice_rdma.peers, entry, node); ice_set_bit(ICE_FEATURE_RDMA, sc->feat_en); return (0); } /** * ice_rdma_pf_attach - Notify the RDMA client of a new PF * @sc: the ice driver softc * * Called during PF attach to notify the RDMA client of a new PF. */ int ice_rdma_pf_attach(struct ice_softc *sc) { int err; sx_xlock(&ice_rdma.mtx); err = ice_rdma_pf_attach_locked(sc); sx_xunlock(&ice_rdma.mtx); return (err); } /** * ice_rdma_pf_detach_locked - Notify the RDMA client on PF detach * @sc: the ice driver softc * * Notify the RDMA peer client driver of removal of a PF, and release any * RDMA-specific resources associated with that PF. Remove the PF from the * list of available RDMA entries. * * @pre must be called while holding the ice_rdma mutex. */ static void ice_rdma_pf_detach_locked(struct ice_softc *sc) { struct ice_rdma_entry *entry; /* No need to detach the PF if RDMA is not enabled */ if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RDMA)) return; entry = &sc->rdma_entry; if (!entry->attached) { device_printf(sc->dev, "iRDMA peer entry was not attached\n"); return; } /* * If the RDMA client driver is registered, notify the client that * a PF has been removed, and release the kobject reference. */ if (ice_rdma.registered) { IRDMA_REMOVE(&entry->peer); kobj_delete((kobj_t)&entry->peer, NULL); } LIST_REMOVE(entry, node); entry->attached = false; ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_en); } /** * ice_rdma_pf_detach - Notify the RDMA client of a PF detaching * @sc: the ice driver softc * * Take the ice_rdma mutex and then notify the RDMA client that a PF has been * removed. */ void ice_rdma_pf_detach(struct ice_softc *sc) { sx_xlock(&ice_rdma.mtx); ice_rdma_pf_detach_locked(sc); sx_xunlock(&ice_rdma.mtx); } /** * ice_rdma_pf_init - Notify the RDMA client that a PF has initialized * @sc: the ice driver softc * * Called by the ice driver when a PF has been initialized. Notifies the RDMA * client that a PF is up and ready to operate. */ int ice_rdma_pf_init(struct ice_softc *sc) { struct ice_rdma_peer *peer = &sc->rdma_entry.peer; sx_xlock(&ice_rdma.mtx); /* Update the MTU */ - peer->mtu = sc->ifp->if_mtu; + peer->mtu = if_getmtu(sc->ifp); sc->rdma_entry.initiated = true; if (sc->rdma_entry.attached && ice_rdma.registered) { sx_xunlock(&ice_rdma.mtx); return IRDMA_OPEN(peer); } sx_xunlock(&ice_rdma.mtx); return (0); } /** * ice_rdma_pf_stop - Notify the RDMA client of a stopped PF device * @sc: the ice driver softc * * Called by the ice driver when a PF is stopped. Notifies the RDMA client * driver that the PF has stopped and is not ready to operate. */ int ice_rdma_pf_stop(struct ice_softc *sc) { sx_xlock(&ice_rdma.mtx); sc->rdma_entry.initiated = false; if (sc->rdma_entry.attached && ice_rdma.registered) { sx_xunlock(&ice_rdma.mtx); return IRDMA_CLOSE(&sc->rdma_entry.peer); } sx_xunlock(&ice_rdma.mtx); return (0); } /** * ice_rdma_link_change - Notify RDMA client of a change in link status * @sc: the ice driver softc * @linkstate: the link status * @baudrate: the link rate in bits per second * * Notify the RDMA client of a link status change, by sending it the new link * state and baudrate. * * The link state is represented the same was as in the ifnet structure. It * should be LINK_STATE_UNKNOWN, LINK_STATE_DOWN, or LINK_STATE_UP. */ void ice_rdma_link_change(struct ice_softc *sc, int linkstate, uint64_t baudrate) { struct ice_rdma_peer *peer = &sc->rdma_entry.peer; struct ice_rdma_event event; memset(&event, 0, sizeof(struct ice_rdma_event)); event.type = ICE_RDMA_EVENT_LINK_CHANGE; event.linkstate = linkstate; event.baudrate = baudrate; sx_xlock(&ice_rdma.mtx); if (sc->rdma_entry.attached && ice_rdma.registered) IRDMA_EVENT_HANDLER(peer, &event); sx_xunlock(&ice_rdma.mtx); } /** * ice_rdma_notify_dcb_qos_change - notify RDMA driver to pause traffic * @sc: the ice driver softc * * Notify the RDMA driver that QOS/DCB settings are about to change. * Once the function return, all the QPs should be suspended. */ void ice_rdma_notify_dcb_qos_change(struct ice_softc *sc) { struct ice_rdma_peer *peer = &sc->rdma_entry.peer; struct ice_rdma_event event; memset(&event, 0, sizeof(struct ice_rdma_event)); event.type = ICE_RDMA_EVENT_TC_CHANGE; /* pre-event */ event.prep = true; sx_xlock(&ice_rdma.mtx); if (sc->rdma_entry.attached && ice_rdma.registered) IRDMA_EVENT_HANDLER(peer, &event); sx_xunlock(&ice_rdma.mtx); } /** * ice_rdma_dcb_qos_update - pass the changed dcb settings to RDMA driver * @sc: the ice driver softc * @pi: the port info structure * * Pass the changed DCB settings to RDMA traffic. This function should be * called only after ice_rdma_notify_dcb_qos_change has been called and * returned before. After the function returns, all the RDMA traffic * should be resumed. */ void ice_rdma_dcb_qos_update(struct ice_softc *sc, struct ice_port_info *pi) { struct ice_rdma_peer *peer = &sc->rdma_entry.peer; struct ice_rdma_event event; memset(&event, 0, sizeof(struct ice_rdma_event)); event.type = ICE_RDMA_EVENT_TC_CHANGE; /* post-event */ event.prep = false; /* gather current configuration */ ice_rdma_cp_qos_info(&sc->hw, &pi->qos_cfg.local_dcbx_cfg, &event.port_qos); sx_xlock(&ice_rdma.mtx); if (sc->rdma_entry.attached && ice_rdma.registered) IRDMA_EVENT_HANDLER(peer, &event); sx_xunlock(&ice_rdma.mtx); } diff --git a/sys/dev/ice/if_ice_iflib.c b/sys/dev/ice/if_ice_iflib.c index 5e5034664251..bc28d7889feb 100644 --- a/sys/dev/ice/if_ice_iflib.c +++ b/sys/dev/ice/if_ice_iflib.c @@ -1,3059 +1,3059 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2021, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*$FreeBSD$*/ /** * @file if_ice_iflib.c * @brief iflib driver implementation * * Contains the main entry point for the iflib driver implementation. It * implements the various ifdi driver methods, and sets up the module and * driver values to load an iflib driver. */ #include "ice_iflib.h" #include "ice_drv_info.h" #include "ice_switch.h" #include "ice_sched.h" #include #include #include #include #include /* * Device method prototypes */ static void *ice_register(device_t); static int ice_if_attach_pre(if_ctx_t); static int ice_attach_pre_recovery_mode(struct ice_softc *sc); static int ice_if_attach_post(if_ctx_t); static void ice_attach_post_recovery_mode(struct ice_softc *sc); static int ice_if_detach(if_ctx_t); static int ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix); static void ice_if_queues_free(if_ctx_t ctx); static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void ice_if_intr_enable(if_ctx_t ctx); static void ice_if_intr_disable(if_ctx_t ctx); static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static int ice_if_promisc_set(if_ctx_t ctx, int flags); static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr); static int ice_if_media_change(if_ctx_t ctx); static void ice_if_init(if_ctx_t ctx); static void ice_if_timer(if_ctx_t ctx, uint16_t qid); static void ice_if_update_admin_status(if_ctx_t ctx); static void ice_if_multi_set(if_ctx_t ctx); static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag); static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static void ice_if_stop(if_ctx_t ctx); static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter); static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data); static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req); static int ice_if_suspend(if_ctx_t ctx); static int ice_if_resume(if_ctx_t ctx); static int ice_msix_que(void *arg); static int ice_msix_admin(void *arg); /* * Helper function prototypes */ static int ice_pci_mapping(struct ice_softc *sc); static void ice_free_pci_mapping(struct ice_softc *sc); static void ice_update_link_status(struct ice_softc *sc, bool update_media); static void ice_init_device_features(struct ice_softc *sc); static void ice_init_tx_tracking(struct ice_vsi *vsi); static void ice_handle_reset_event(struct ice_softc *sc); static void ice_handle_pf_reset_request(struct ice_softc *sc); static void ice_prepare_for_reset(struct ice_softc *sc); static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc); static void ice_rebuild(struct ice_softc *sc); static void ice_rebuild_recovery_mode(struct ice_softc *sc); static void ice_free_irqvs(struct ice_softc *sc); static void ice_update_rx_mbuf_sz(struct ice_softc *sc); static void ice_poll_for_media_avail(struct ice_softc *sc); static void ice_setup_scctx(struct ice_softc *sc); static int ice_allocate_msix(struct ice_softc *sc); static void ice_admin_timer(void *arg); static void ice_transition_recovery_mode(struct ice_softc *sc); static void ice_transition_safe_mode(struct ice_softc *sc); /* * Device Interface Declaration */ /** * @var ice_methods * @brief ice driver method entry points * * List of device methods implementing the generic device interface used by * the device stack to interact with the ice driver. Since this is an iflib * driver, most of the methods point to the generic iflib implementation. */ static device_method_t ice_methods[] = { /* Device interface */ DEVMETHOD(device_register, ice_register), DEVMETHOD(device_probe, iflib_device_probe_vendor), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; /** * @var ice_iflib_methods * @brief iflib method entry points * * List of device methods used by the iflib stack to interact with this * driver. These are the real main entry points used to interact with this * driver. */ static device_method_t ice_iflib_methods[] = { DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre), DEVMETHOD(ifdi_attach_post, ice_if_attach_post), DEVMETHOD(ifdi_detach, ice_if_detach), DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc), DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign), DEVMETHOD(ifdi_queues_free, ice_if_queues_free), DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set), DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable), DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable), DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable), DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set), DEVMETHOD(ifdi_media_status, ice_if_media_status), DEVMETHOD(ifdi_media_change, ice_if_media_change), DEVMETHOD(ifdi_init, ice_if_init), DEVMETHOD(ifdi_stop, ice_if_stop), DEVMETHOD(ifdi_timer, ice_if_timer), DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status), DEVMETHOD(ifdi_multi_set, ice_if_multi_set), DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, ice_if_get_counter), DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl), DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req), DEVMETHOD(ifdi_suspend, ice_if_suspend), DEVMETHOD(ifdi_resume, ice_if_resume), DEVMETHOD_END }; /** * @var ice_driver * @brief driver structure for the generic device stack * * driver_t definition used to setup the generic device methods. */ static driver_t ice_driver = { .name = "ice", .methods = ice_methods, .size = sizeof(struct ice_softc), }; /** * @var ice_iflib_driver * @brief driver structure for the iflib stack * * driver_t definition used to setup the iflib device methods. */ static driver_t ice_iflib_driver = { .name = "ice", .methods = ice_iflib_methods, .size = sizeof(struct ice_softc), }; extern struct if_txrx ice_txrx; extern struct if_txrx ice_recovery_txrx; /** * @var ice_sctx * @brief ice driver shared context * * Structure defining shared values (context) that is used by all instances of * the device. Primarily used to setup details about how the iflib stack * should treat this driver. Also defines the default, minimum, and maximum * number of descriptors in each ring. */ static struct if_shared_ctx ice_sctx = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = ICE_MAX_FRAME_SIZE, /* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but * that doesn't make sense since that would be larger than the maximum * size of a single packet. */ .isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE, /* XXX: This is only used by iflib to ensure that * scctx->isc_tx_tso_size_max + the VLAN header is a valid size. */ .isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header), /* XXX: This is used by iflib to set the number of segments in the TSO * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the * related ifnet parameter. */ .isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE, .isc_rx_maxsize = ICE_MAX_FRAME_SIZE, .isc_rx_nsegments = ICE_MAX_RX_SEGS, .isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = ice_vendor_info_array, .isc_driver_version = __DECONST(char *, ice_driver_version), .isc_driver = &ice_iflib_driver, /* * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available * for hardware checksum offload * * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the * IP sum field, required by our hardware to calculate valid TSO * checksums. * * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs * even when the interface is down. * * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X * vectors manually instead of relying on iflib code to do this. */ .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX, .isc_nrxd_min = {ICE_MIN_DESC_COUNT}, .isc_ntxd_min = {ICE_MIN_DESC_COUNT}, .isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT}, .isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT}, .isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT}, .isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT}, }; DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL); MODULE_VERSION(ice, 1); MODULE_DEPEND(ice, pci, 1, 1, 1); MODULE_DEPEND(ice, ether, 1, 1, 1); MODULE_DEPEND(ice, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array); /* Static driver-wide sysctls */ #include "ice_iflib_sysctls.h" /** * ice_pci_mapping - Map PCI BAR memory * @sc: device private softc * * Map PCI BAR 0 for device operation. */ static int ice_pci_mapping(struct ice_softc *sc) { int rc; /* Map BAR0 */ rc = ice_map_bar(sc->dev, &sc->bar0, 0); if (rc) return rc; return 0; } /** * ice_free_pci_mapping - Release PCI BAR memory * @sc: device private softc * * Release PCI BARs which were previously mapped by ice_pci_mapping(). */ static void ice_free_pci_mapping(struct ice_softc *sc) { /* Free BAR0 */ ice_free_bar(sc->dev, &sc->bar0); } /* * Device methods */ /** * ice_register - register device method callback * @dev: the device being registered * * Returns a pointer to the shared context structure, which is used by iflib. */ static void * ice_register(device_t dev __unused) { return &ice_sctx; } /* ice_register */ /** * ice_setup_scctx - Setup the iflib softc context structure * @sc: the device private structure * * Setup the parameters in if_softc_ctx_t structure used by the iflib stack * when loading. */ static void ice_setup_scctx(struct ice_softc *sc) { if_softc_ctx_t scctx = sc->scctx; struct ice_hw *hw = &sc->hw; bool safe_mode, recovery_mode; safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE); recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE); /* * If the driver loads in Safe mode or Recovery mode, limit iflib to * a single queue pair. */ if (safe_mode || recovery_mode) { scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets_max = 1; scctx->isc_nrxqsets_max = 1; } else { /* * iflib initially sets the isc_ntxqsets and isc_nrxqsets to * the values of the override sysctls. Cache these initial * values so that the driver can be aware of what the iflib * sysctl value is when setting up MSI-X vectors. */ sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets; sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets; if (scctx->isc_ntxqsets == 0) scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size; if (scctx->isc_nrxqsets == 0) scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size; scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq; scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq; /* * Sanity check that the iflib sysctl values are within the * maximum supported range. */ if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max) sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max; if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max) sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max; } scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct ice_tx_desc), DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN); scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS; scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS; scctx->isc_tx_tso_size_max = ICE_TSO_SIZE; scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE; scctx->isc_msix_bar = PCIR_BAR(ICE_MSIX_BAR); scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size; /* * If the driver loads in recovery mode, disable Tx/Rx functionality */ if (recovery_mode) scctx->isc_txrx = &ice_recovery_txrx; else scctx->isc_txrx = &ice_txrx; /* * If the driver loads in Safe mode or Recovery mode, disable * advanced features including hardware offloads. */ if (safe_mode || recovery_mode) { scctx->isc_capenable = ICE_SAFE_CAPS; scctx->isc_tx_csum_flags = 0; } else { scctx->isc_capenable = ICE_FULL_CAPS; scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD; } scctx->isc_capabilities = scctx->isc_capenable; } /* ice_setup_scctx */ /** * ice_if_attach_pre - Early device attach logic * @ctx: the iflib context structure * * Called by iflib during the attach process. Earliest main driver entry * point which performs necessary hardware and driver initialization. Called * before the Tx and Rx queues are allocated. */ static int ice_if_attach_pre(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_fw_modes fw_mode; enum ice_status status; if_softc_ctx_t scctx; struct ice_hw *hw; device_t dev; int err; device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n"); ice_set_state(&sc->state, ICE_STATE_ATTACHING); sc->ctx = ctx; sc->media = iflib_get_media(ctx); sc->sctx = iflib_get_sctx(ctx); sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx); dev = sc->dev = iflib_get_dev(ctx); scctx = sc->scctx = iflib_get_softc_ctx(ctx); hw = &sc->hw; hw->back = sc; snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name), "%s:admin", device_get_nameunit(dev)); mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF); callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0); ASSERT_CTX_LOCKED(sc); if (ice_pci_mapping(sc)) { err = (ENXIO); goto destroy_admin_timer; } /* Save off the PCI information */ ice_save_pci_info(hw, dev); /* create tunables as early as possible */ ice_add_device_tunables(sc); /* Setup ControlQ lengths */ ice_set_ctrlq_len(hw); fw_mode = ice_get_fw_mode(hw); if (fw_mode == ICE_FW_MODE_REC) { device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); err = ice_attach_pre_recovery_mode(sc); if (err) goto free_pci_mapping; return (0); } /* Initialize the hw data structure */ status = ice_init_hw(hw); if (status) { if (status == ICE_ERR_FW_API_VER) { /* Enter recovery mode, so that the driver remains * loaded. This way, if the system administrator * cannot update the driver, they may still attempt to * downgrade the NVM. */ err = ice_attach_pre_recovery_mode(sc); if (err) goto free_pci_mapping; return (0); } else { err = EIO; device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } goto free_pci_mapping; } /* Notify firmware of the device driver version */ err = ice_send_version(sc); if (err) goto deinit_hw; ice_load_pkg_file(sc); err = ice_init_link_events(sc); if (err) { device_printf(dev, "ice_init_link_events failed: %s\n", ice_err_str(err)); goto deinit_hw; } ice_print_nvm_version(sc); ice_init_device_features(sc); /* Setup the MAC address */ iflib_set_mac(ctx, hw->port_info->mac.lan_addr); /* Setup the iflib softc context structure */ ice_setup_scctx(sc); /* Initialize the Tx queue manager */ err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq); if (err) { device_printf(dev, "Unable to initialize Tx queue manager: %s\n", ice_err_str(err)); goto deinit_hw; } /* Initialize the Rx queue manager */ err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq); if (err) { device_printf(dev, "Unable to initialize Rx queue manager: %s\n", ice_err_str(err)); goto free_tx_qmgr; } /* Initialize the interrupt resource manager */ err = ice_alloc_intr_tracking(sc); if (err) /* Errors are already printed */ goto free_rx_qmgr; /* Determine maximum number of VSIs we'll prepare for */ sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE, hw->func_caps.guar_num_vsi); if (!sc->num_available_vsi) { err = EIO; device_printf(dev, "No VSIs allocated to host\n"); goto free_intr_tracking; } /* Allocate storage for the VSI pointers */ sc->all_vsi = (struct ice_vsi **) malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi, M_ICE, M_WAITOK | M_ZERO); if (!sc->all_vsi) { err = ENOMEM; device_printf(dev, "Unable to allocate VSI array\n"); goto free_intr_tracking; } /* * Prepare the statically allocated primary PF VSI in the softc * structure. Other VSIs will be dynamically allocated as needed. */ ice_setup_pf_vsi(sc); err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max, scctx->isc_nrxqsets_max); if (err) { device_printf(dev, "Unable to allocate VSI Queue maps\n"); goto free_main_vsi; } /* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */ err = ice_allocate_msix(sc); if (err) goto free_main_vsi; return 0; free_main_vsi: /* ice_release_vsi will free the queue maps if they were allocated */ ice_release_vsi(&sc->pf_vsi); free(sc->all_vsi, M_ICE); sc->all_vsi = NULL; free_intr_tracking: ice_free_intr_tracking(sc); free_rx_qmgr: ice_resmgr_destroy(&sc->rx_qmgr); free_tx_qmgr: ice_resmgr_destroy(&sc->tx_qmgr); deinit_hw: ice_deinit_hw(hw); free_pci_mapping: ice_free_pci_mapping(sc); destroy_admin_timer: mtx_lock(&sc->admin_mtx); callout_stop(&sc->admin_timer); mtx_unlock(&sc->admin_mtx); mtx_destroy(&sc->admin_mtx); return err; } /* ice_if_attach_pre */ /** * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery * @sc: the device private softc * * Loads the device driver in limited Firmware Recovery mode, intended to * allow users to update the firmware to attempt to recover the device. * * @remark We may enter recovery mode in case either (a) the firmware is * detected to be in an invalid state and must be re-programmed, or (b) the * driver detects that the loaded firmware has a non-compatible API version * that the driver cannot operate with. */ static int ice_attach_pre_recovery_mode(struct ice_softc *sc) { ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE); /* Setup the iflib softc context */ ice_setup_scctx(sc); /* Setup the PF VSI back pointer */ sc->pf_vsi.sc = sc; /* * We still need to allocate MSI-X vectors since we need one vector to * run the administrative admin interrupt */ return ice_allocate_msix(sc); } /** * ice_update_link_status - notify OS of link state change * @sc: device private softc structure * @update_media: true if we should update media even if link didn't change * * Called to notify iflib core of link status changes. Should be called once * during attach_post, and whenever link status changes during runtime. * * This call only updates the currently supported media types if the link * status changed, or if update_media is set to true. */ static void ice_update_link_status(struct ice_softc *sc, bool update_media) { struct ice_hw *hw = &sc->hw; enum ice_status status; /* Never report link up when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Report link status to iflib only once each time it changes */ if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) { if (sc->link_up) { /* link is up */ uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info); ice_set_default_local_lldp_mib(sc); iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate); ice_rdma_link_change(sc, LINK_STATE_UP, baudrate); ice_link_up_msg(sc); update_media = true; } else { /* link is down */ iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0); ice_rdma_link_change(sc, LINK_STATE_DOWN, 0); update_media = true; } } /* Update the supported media types */ if (update_media) { status = ice_add_media_types(sc, sc->media); if (status) device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } /* TODO: notify VFs of link state change */ } /** * ice_if_attach_post - Late device attach logic * @ctx: the iflib context structure * * Called by iflib to finish up attaching the device. Performs any attach * logic which must wait until after the Tx and Rx queues have been * allocated. */ static int ice_if_attach_post(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); int err; ASSERT_CTX_LOCKED(sc); /* We don't yet support loading if MSI-X is not supported */ if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) { device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n"); return (ENOTSUP); } /* The ifnet structure hasn't yet been initialized when the attach_pre * handler is called, so wait until attach_post to setup the * isc_max_frame_size. */ sc->ifp = ifp; - sc->scctx->isc_max_frame_size = ifp->if_mtu + + sc->scctx->isc_max_frame_size = if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; /* * If we are in recovery mode, only perform a limited subset of * initialization to support NVM recovery. */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { ice_attach_post_recovery_mode(sc); return (0); } sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size; err = ice_initialize_vsi(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to initialize Main VSI: %s\n", ice_err_str(err)); return err; } /* Enable FW health event reporting */ ice_init_health_events(sc); /* Configure the main PF VSI for RSS */ err = ice_config_rss(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to configure RSS for the main VSI, err %s\n", ice_err_str(err)); return err; } /* Configure switch to drop transmitted LLDP and PAUSE frames */ err = ice_cfg_pf_ethertype_filters(sc); if (err) return err; ice_get_and_print_bus_info(sc); ice_set_link_management_mode(sc); ice_init_saved_phy_cfg(sc); ice_cfg_pba_num(sc); ice_add_device_sysctls(sc); /* Get DCBX/LLDP state and start DCBX agent */ ice_init_dcb_setup(sc); /* Setup link configuration parameters */ ice_init_link_configuration(sc); ice_update_link_status(sc, true); /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); err = ice_rdma_pf_attach(sc); if (err) return (err); /* Start the admin timer */ mtx_lock(&sc->admin_mtx); callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc); mtx_unlock(&sc->admin_mtx); ice_clear_state(&sc->state, ICE_STATE_ATTACHING); return 0; } /* ice_if_attach_post */ /** * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery * @sc: the device private softc * * Performs minimal work to prepare the driver to recover an NVM in case the * firmware is in recovery mode. */ static void ice_attach_post_recovery_mode(struct ice_softc *sc) { /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Start the admin timer */ mtx_lock(&sc->admin_mtx); callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc); mtx_unlock(&sc->admin_mtx); ice_clear_state(&sc->state, ICE_STATE_ATTACHING); } /** * ice_free_irqvs - Free IRQ vector memory * @sc: the device private softc structure * * Free IRQ vector memory allocated during ice_if_msix_intr_assign. */ static void ice_free_irqvs(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; if_ctx_t ctx = sc->ctx; int i; /* If the irqvs array is NULL, then there are no vectors to free */ if (sc->irqvs == NULL) return; /* Free the IRQ vectors */ for (i = 0; i < sc->num_irq_vectors; i++) iflib_irq_free(ctx, &sc->irqvs[i].irq); /* Clear the irqv pointers */ for (i = 0; i < vsi->num_rx_queues; i++) vsi->rx_queues[i].irqv = NULL; for (i = 0; i < vsi->num_tx_queues; i++) vsi->tx_queues[i].irqv = NULL; /* Release the vector array memory */ free(sc->irqvs, M_ICE); sc->irqvs = NULL; sc->num_irq_vectors = 0; } /** * ice_if_detach - Device driver detach logic * @ctx: iflib context structure * * Perform device shutdown logic to detach the device driver. * * Note that there is no guarantee of the ordering of ice_if_queues_free() and * ice_if_detach(). It is possible for the functions to be called in either * order, and they must not assume to have a strict ordering. */ static int ice_if_detach(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; int i; ASSERT_CTX_LOCKED(sc); /* Indicate that we're detaching */ ice_set_state(&sc->state, ICE_STATE_DETACHING); /* Stop the admin timer */ mtx_lock(&sc->admin_mtx); callout_stop(&sc->admin_timer); mtx_unlock(&sc->admin_mtx); mtx_destroy(&sc->admin_mtx); ice_rdma_pf_detach(sc); /* Free allocated media types */ ifmedia_removeall(sc->media); /* Free the Tx and Rx sysctl contexts, and assign NULL to the node * pointers. Note, the calls here and those in ice_if_queues_free() * are *BOTH* necessary, as we cannot guarantee which path will be * run first */ ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); /* Release MSI-X resources */ ice_free_irqvs(sc); for (i = 0; i < sc->num_available_vsi; i++) { if (sc->all_vsi[i]) ice_release_vsi(sc->all_vsi[i]); } if (sc->all_vsi) { free(sc->all_vsi, M_ICE); sc->all_vsi = NULL; } /* Release MSI-X memory */ pci_release_msi(sc->dev); if (sc->msix_table != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, rman_get_rid(sc->msix_table), sc->msix_table); sc->msix_table = NULL; } ice_free_intr_tracking(sc); /* Destroy the queue managers */ ice_resmgr_destroy(&sc->tx_qmgr); ice_resmgr_destroy(&sc->rx_qmgr); if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) ice_deinit_hw(&sc->hw); ice_free_pci_mapping(sc); return 0; } /* ice_if_detach */ /** * ice_if_tx_queues_alloc - Allocate Tx queue memory * @ctx: iflib context structure * @vaddrs: virtual addresses for the queue memory * @paddrs: physical addresses for the queue memory * @ntxqs: the number of Tx queues per set (should always be 1) * @ntxqsets: the number of Tx queue sets to allocate * * Called by iflib to allocate Tx queues for the device. Allocates driver * memory to track each queue, the status arrays used for descriptor * status reporting, and Tx queue sysctls. */ static int ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int __invariant_only ntxqs, int ntxqsets) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_tx_queue *txq; int err, i, j; MPASS(ntxqs == 1); MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT); ASSERT_CTX_LOCKED(sc); /* Do not bother allocating queues if we're in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); /* Allocate queue structure memory */ if (!(vsi->tx_queues = (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_WAITOK | M_ZERO))) { device_printf(sc->dev, "Unable to allocate Tx queue memory\n"); return (ENOMEM); } /* Allocate report status arrays */ for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { if (!(txq->tx_rsq = (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_WAITOK))) { device_printf(sc->dev, "Unable to allocate tx_rsq memory\n"); err = ENOMEM; goto free_tx_queues; } /* Initialize report status array */ for (j = 0; j < sc->scctx->isc_ntxd[0]; j++) txq->tx_rsq[j] = QIDX_INVALID; } /* Assign queues from PF space to the main VSI */ err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets); if (err) { device_printf(sc->dev, "Unable to assign PF queues: %s\n", ice_err_str(err)); goto free_tx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS; /* Add Tx queue sysctls context */ ice_vsi_add_txqs_ctx(vsi); for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { /* q_handle == me when only one TC */ txq->me = txq->q_handle = i; txq->vsi = vsi; /* store the queue size for easier access */ txq->desc_count = sc->scctx->isc_ntxd[0]; /* get the virtual and physical address of the hardware queues */ txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]); txq->tx_base = (struct ice_tx_desc *)vaddrs[i]; txq->tx_paddr = paddrs[i]; ice_add_txq_sysctls(txq); } vsi->num_tx_queues = ntxqsets; return (0); free_tx_queues: for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { if (txq->tx_rsq != NULL) { free(txq->tx_rsq, M_ICE); txq->tx_rsq = NULL; } } free(vsi->tx_queues, M_ICE); vsi->tx_queues = NULL; return err; } /** * ice_if_rx_queues_alloc - Allocate Rx queue memory * @ctx: iflib context structure * @vaddrs: virtual addresses for the queue memory * @paddrs: physical addresses for the queue memory * @nrxqs: number of Rx queues per set (should always be 1) * @nrxqsets: number of Rx queue sets to allocate * * Called by iflib to allocate Rx queues for the device. Allocates driver * memory to track each queue, as well as sets up the Rx queue sysctls. */ static int ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int __invariant_only nrxqs, int nrxqsets) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_rx_queue *rxq; int err, i; MPASS(nrxqs == 1); MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT); ASSERT_CTX_LOCKED(sc); /* Do not bother allocating queues if we're in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); /* Allocate queue structure memory */ if (!(vsi->rx_queues = (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_WAITOK | M_ZERO))) { device_printf(sc->dev, "Unable to allocate Rx queue memory\n"); return (ENOMEM); } /* Assign queues from PF space to the main VSI */ err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets); if (err) { device_printf(sc->dev, "Unable to assign PF queues: %s\n", ice_err_str(err)); goto free_rx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS; /* Add Rx queue sysctls context */ ice_vsi_add_rxqs_ctx(vsi); for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) { rxq->me = i; rxq->vsi = vsi; /* store the queue size for easier access */ rxq->desc_count = sc->scctx->isc_nrxd[0]; /* get the virtual and physical address of the hardware queues */ rxq->tail = QRX_TAIL(vsi->rx_qmap[i]); rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i]; rxq->rx_paddr = paddrs[i]; ice_add_rxq_sysctls(rxq); } vsi->num_rx_queues = nrxqsets; return (0); free_rx_queues: free(vsi->rx_queues, M_ICE); vsi->rx_queues = NULL; return err; } /** * ice_if_queues_free - Free queue memory * @ctx: the iflib context structure * * Free queue memory allocated by ice_if_tx_queues_alloc() and * ice_if_rx_queues_alloc(). * * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be * called in the same order. It's possible for ice_if_queues_free() to be * called prior to ice_if_detach(), and vice versa. * * For this reason, the main VSI is a static member of the ice_softc, which is * not free'd until after iflib finishes calling both of these functions. * * Thus, care must be taken in how we manage the memory being freed by this * function, and in what tasks it can and must perform. */ static void ice_if_queues_free(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_tx_queue *txq; int i; /* Free the Tx and Rx sysctl contexts, and assign NULL to the node * pointers. Note, the calls here and those in ice_if_detach() * are *BOTH* necessary, as we cannot guarantee which path will be * run first */ ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); /* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */ ice_free_irqvs(sc); if (vsi->tx_queues != NULL) { /* free the tx_rsq arrays */ for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) { if (txq->tx_rsq != NULL) { free(txq->tx_rsq, M_ICE); txq->tx_rsq = NULL; } } free(vsi->tx_queues, M_ICE); vsi->tx_queues = NULL; vsi->num_tx_queues = 0; } if (vsi->rx_queues != NULL) { free(vsi->rx_queues, M_ICE); vsi->rx_queues = NULL; vsi->num_rx_queues = 0; } } /** * ice_msix_que - Fast interrupt handler for MSI-X receive queues * @arg: The Rx queue memory * * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when * an MSI-X interrupt for a given queue is triggered. Currently this just asks * iflib to schedule the main Rx thread. */ static int ice_msix_que(void *arg) { struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg; /* TODO: dynamic ITR algorithm?? */ return (FILTER_SCHEDULE_THREAD); } /** * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt * @arg: pointer to device softc memory * * Called by iflib when an administrative interrupt occurs. Should perform any * fast logic for handling the interrupt cause, and then indicate whether the * admin task needs to be queued. */ static int ice_msix_admin(void *arg) { struct ice_softc *sc = (struct ice_softc *)arg; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; u32 oicr; /* There is no safe way to modify the enabled miscellaneous causes of * the OICR vector at runtime, as doing so would be prone to race * conditions. Reading PFINT_OICR will unmask the associated interrupt * causes and allow future interrupts to occur. The admin interrupt * vector will not be re-enabled until after we exit this function, * but any delayed tasks must be resilient against possible "late * arrival" interrupts that occur while we're already handling the * task. This is done by using state bits and serializing these * delayed tasks via the admin status task function. */ oicr = rd32(hw, PFINT_OICR); /* Processing multiple controlq interrupts on a single vector does not * provide an indication of which controlq triggered the interrupt. * We might try reading the INTEVENT bit of the respective PFINT_*_CTL * registers. However, the INTEVENT bit is not guaranteed to be set as * it gets automatically cleared when the hardware acknowledges the * interrupt. * * This means we don't really have a good indication of whether or * which controlq triggered this interrupt. We'll just notify the * admin task that it should check all the controlqs. */ ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING); if (oicr & PFINT_OICR_VFLR_M) { ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING); } if (oicr & PFINT_OICR_MAL_DETECT_M) { ice_set_state(&sc->state, ICE_STATE_MDD_PENDING); } if (oicr & PFINT_OICR_GRST_M) { u32 reset; reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >> GLGEN_RSTAT_RESET_TYPE_S; if (reset == ICE_RESET_CORER) sc->soft_stats.corer_count++; else if (reset == ICE_RESET_GLOBR) sc->soft_stats.globr_count++; else sc->soft_stats.empr_count++; /* There are a couple of bits at play for handling resets. * First, the ICE_STATE_RESET_OICR_RECV bit is used to * indicate that the driver has received an OICR with a reset * bit active, indicating that a CORER/GLOBR/EMPR is about to * happen. Second, we set hw->reset_ongoing to indicate that * the hardware is in reset. We will set this back to false as * soon as the driver has determined that the hardware is out * of reset. * * If the driver wishes to trigger a reqest, it can set one of * the ICE_STATE_RESET_*_REQ bits, which will trigger the * correct type of reset. */ if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) hw->reset_ongoing = true; } if (oicr & PFINT_OICR_ECC_ERR_M) { device_printf(dev, "ECC Error detected!\n"); ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); } if (oicr & PFINT_OICR_PE_CRITERR_M) { device_printf(dev, "Critical Protocol Engine Error detected!\n"); ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); } if (oicr & PFINT_OICR_PCI_EXCEPTION_M) { device_printf(dev, "PCI Exception detected!\n"); ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); } if (oicr & PFINT_OICR_HMC_ERR_M) { /* Log the HMC errors, but don't disable the interrupt cause */ ice_log_hmc_error(hw, dev); } return (FILTER_SCHEDULE_THREAD); } /** * ice_allocate_msix - Allocate MSI-X vectors for the interface * @sc: the device private softc * * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process. * * First, determine a suitable total number of vectors based on the number * of CPUs, RSS buckets, the administrative vector, and other demands such as * RDMA. * * Request the desired amount of vectors, and see how many we obtain. If we * don't obtain as many as desired, reduce the demands by lowering the number * of requested queues or reducing the demand from other features such as * RDMA. * * @remark This function is required because the driver sets the * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors * manually. * * @remark This driver will only use MSI-X vectors. If this is not possible, * neither MSI or legacy interrupts will be tried. * * @post on success this function must set the following scctx parameters: * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr. * * @returns zero on success or an error code on failure. */ static int ice_allocate_msix(struct ice_softc *sc) { bool iflib_override_queue_count = false; if_softc_ctx_t scctx = sc->scctx; device_t dev = sc->dev; cpuset_t cpus; int bar, queues, vectors, requested; int err = 0; int rdma; /* Allocate the MSI-X bar */ bar = scctx->isc_msix_bar; sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (!sc->msix_table) { device_printf(dev, "Unable to map MSI-X table\n"); return (ENOMEM); } /* Check if the iflib queue count sysctls have been set */ if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs) iflib_override_queue_count = true; err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus); if (err) { device_printf(dev, "%s: Unable to fetch the CPU list: %s\n", __func__, ice_err_str(err)); CPU_COPY(&all_cpus, &cpus); } /* Attempt to mimic behavior of iflib_msix_init */ if (iflib_override_queue_count) { /* * If the override sysctls have been set, limit the queues to * the number of logical CPUs. */ queues = mp_ncpus; } else { /* * Otherwise, limit the queue count to the CPUs associated * with the NUMA node the device is associated with. */ queues = CPU_COUNT(&cpus); } /* Clamp to the number of RSS buckets */ queues = imin(queues, rss_getnumbuckets()); /* * Clamp the number of queue pairs to the minimum of the requested Tx * and Rx queues. */ queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets); queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets); if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) { /* * Choose a number of RDMA vectors based on the number of CPUs * up to a maximum */ rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX); /* Further limit by the user configurable tunable */ rdma = min(rdma, ice_rdma_max_msix); } else { rdma = 0; } /* * Determine the number of vectors to request. Note that we also need * to allocate one vector for administrative tasks. */ requested = rdma + queues + 1; vectors = requested; err = pci_alloc_msix(dev, &vectors); if (err) { device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n", vectors, ice_err_str(err)); goto err_free_msix_table; } /* If we don't receive enough vectors, reduce demands */ if (vectors < requested) { int diff = requested - vectors; device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n", requested, vectors); /* * The OS didn't grant us the requested number of vectors. * Check to see if we can reduce demands by limiting the * number of vectors allocated to certain features. */ if (rdma >= diff) { /* Reduce the number of RDMA vectors we reserve */ rdma -= diff; diff = 0; } else { /* Disable RDMA and reduce the difference */ ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap); diff -= rdma; rdma = 0; } /* * If we still have a difference, we need to reduce the number * of queue pairs. * * However, we still need at least one vector for the admin * interrupt and one queue pair. */ if (queues <= diff) { device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n"); err = (ERANGE); goto err_pci_release_msi; } queues -= diff; } device_printf(dev, "Using %d Tx and Rx queues\n", queues); if (rdma) device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n", rdma); device_printf(dev, "Using MSI-X interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = queues; scctx->isc_ntxqsets = queues; scctx->isc_intr = IFLIB_INTR_MSIX; sc->irdma_vectors = rdma; /* Interrupt allocation tracking isn't required in recovery mode, * since neither RDMA nor VFs are enabled. */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); /* Keep track of which interrupt indices are being used for what */ sc->lan_vectors = vectors - rdma; err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors); if (err) { device_printf(dev, "Unable to assign PF interrupt mapping: %s\n", ice_err_str(err)); goto err_pci_release_msi; } err = ice_resmgr_assign_contiguous(&sc->imgr, sc->rdma_imap, rdma); if (err) { device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n", ice_err_str(err)); ice_resmgr_release_map(&sc->imgr, sc->pf_imap, sc->lan_vectors); goto err_pci_release_msi; } return (0); err_pci_release_msi: pci_release_msi(dev); err_free_msix_table: if (sc->msix_table != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, rman_get_rid(sc->msix_table), sc->msix_table); sc->msix_table = NULL; } return (err); } /** * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues * @ctx: the iflib context structure * @msix: the number of vectors we were assigned * * Called by iflib to assign MSI-X vectors to queues. Currently requires that * we get at least the same number of vectors as we have queues, and that we * always have the same number of Tx and Rx queues. * * Tx queues use a softirq instead of using their own hardware interrupt. */ static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; int err, i, vector; ASSERT_CTX_LOCKED(sc); if (vsi->num_rx_queues != vsi->num_tx_queues) { device_printf(sc->dev, "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n", vsi->num_tx_queues, vsi->num_rx_queues); return (EOPNOTSUPP); } if (msix < (vsi->num_rx_queues + 1)) { device_printf(sc->dev, "Not enough MSI-X vectors to assign one vector to each queue pair\n"); return (EOPNOTSUPP); } /* Save the number of vectors for future use */ sc->num_irq_vectors = vsi->num_rx_queues + 1; /* Allocate space to store the IRQ vector data */ if (!(sc->irqvs = (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors), M_ICE, M_NOWAIT))) { device_printf(sc->dev, "Unable to allocate irqv memory\n"); return (ENOMEM); } /* Administrative interrupt events will use vector 0 */ err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN, ice_msix_admin, sc, 0, "admin"); if (err) { device_printf(sc->dev, "Failed to register Admin queue handler: %s\n", ice_err_str(err)); goto free_irqvs; } sc->irqvs[0].me = 0; /* Do not allocate queue interrupts when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) { struct ice_rx_queue *rxq = &vsi->rx_queues[i]; struct ice_tx_queue *txq = &vsi->tx_queues[i]; int rid = vector + 1; char irq_name[16]; snprintf(irq_name, sizeof(irq_name), "rxq%d", i); err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid, IFLIB_INTR_RXTX, ice_msix_que, rxq, rxq->me, irq_name); if (err) { device_printf(sc->dev, "Failed to allocate q int %d err: %s\n", i, ice_err_str(err)); vector--; i--; goto fail; } sc->irqvs[vector].me = vector; rxq->irqv = &sc->irqvs[vector]; bzero(irq_name, sizeof(irq_name)); snprintf(irq_name, sizeof(irq_name), "txq%d", i); iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq, IFLIB_INTR_TX, txq, txq->me, irq_name); txq->irqv = &sc->irqvs[vector]; } return (0); fail: for (; i >= 0; i--, vector--) iflib_irq_free(ctx, &sc->irqvs[vector].irq); iflib_irq_free(ctx, &sc->irqvs[0].irq); free_irqvs: free(sc->irqvs, M_ICE); sc->irqvs = NULL; return err; } /** * ice_if_mtu_set - Set the device MTU * @ctx: iflib context structure * @mtu: the MTU requested * * Called by iflib to configure the device's Maximum Transmission Unit (MTU). * * @pre assumes the caller holds the iflib CTX lock */ static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); ASSERT_CTX_LOCKED(sc); /* Do not support configuration when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU) return (EINVAL); sc->scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size; return (0); } /** * ice_if_intr_enable - Enable device interrupts * @ctx: iflib context structure * * Called by iflib to request enabling device interrupts. */ static void ice_if_intr_enable(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; ASSERT_CTX_LOCKED(sc); /* Enable ITR 0 */ ice_enable_intr(hw, sc->irqvs[0].me); /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Enable all queue interrupts */ for (int i = 0; i < vsi->num_rx_queues; i++) ice_enable_intr(hw, vsi->rx_queues[i].irqv->me); } /** * ice_if_intr_disable - Disable device interrupts * @ctx: iflib context structure * * Called by iflib to request disabling device interrupts. */ static void ice_if_intr_disable(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_hw *hw = &sc->hw; unsigned int i; ASSERT_CTX_LOCKED(sc); /* IFDI_INTR_DISABLE may be called prior to interrupts actually being * assigned to queues. Instead of assuming that the interrupt * assignment in the rx_queues structure is valid, just disable all * possible interrupts * * Note that we choose not to disable ITR 0 because this handles the * AdminQ interrupts, and we want to keep processing these even when * the interface is offline. */ for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++) ice_disable_intr(hw, i); } /** * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt * @ctx: iflib context structure * @rxqid: the Rx queue to enable * * Enable a specific Rx queue interrupt. * * This function is not protected by the iflib CTX lock. */ static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me); return (0); } /** * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt * @ctx: iflib context structure * @txqid: the Tx queue to enable * * Enable a specific Tx queue interrupt. * * This function is not protected by the iflib CTX lock. */ static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me); return (0); } /** * ice_if_promisc_set - Set device promiscuous mode * @ctx: iflib context structure * @flags: promiscuous flags to configure * * Called by iflib to configure device promiscuous mode. * * @remark Calls to this function will always overwrite the previous setting */ static int ice_if_promisc_set(if_ctx_t ctx, int flags) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; bool promisc_enable = flags & IFF_PROMISC; bool multi_enable = flags & IFF_ALLMULTI; /* Do not support configuration when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); if (multi_enable) return (EOPNOTSUPP); if (promisc_enable) { status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx, ICE_VSI_PROMISC_MASK, 0); if (status && status != ICE_ERR_ALREADY_EXISTS) { device_printf(dev, "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } else { status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx, ICE_VSI_PROMISC_MASK, 0); if (status) { device_printf(dev, "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } return (0); } /** * ice_if_media_change - Change device media * @ctx: device ctx structure * * Called by iflib when a media change is requested. This operation is not * supported by the hardware, so we just return an error code. */ static int ice_if_media_change(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); device_printf(sc->dev, "Media change is not supported.\n"); return (ENODEV); } /** * ice_if_media_status - Report current device media * @ctx: iflib context structure * @ifmr: ifmedia request structure to update * * Updates the provided ifmr with current device media status, including link * status and media type. */ static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_link_status *li = &sc->hw.port_info->phy.link_info; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; /* Never report link up or media types when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; if (!sc->link_up) return; ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_FDX; if (li->phy_type_low) ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low); else if (li->phy_type_high) ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high); else ifmr->ifm_active |= IFM_UNKNOWN; /* Report flow control status as well */ if (li->an_info & ICE_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (li->an_info & ICE_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; } /** * ice_init_tx_tracking - Initialize Tx queue software tracking values * @vsi: the VSI to initialize * * Initialize Tx queue software tracking values, including the Report Status * queue, and related software tracking values. */ static void ice_init_tx_tracking(struct ice_vsi *vsi) { struct ice_tx_queue *txq; size_t j; int i; for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) { txq->tx_rs_cidx = txq->tx_rs_pidx = 0; /* Initialize the last processed descriptor to be the end of * the ring, rather than the start, so that we avoid an * off-by-one error in ice_ift_txd_credits_update for the * first packet. */ txq->tx_cidx_processed = txq->desc_count - 1; for (j = 0; j < txq->desc_count; j++) txq->tx_rsq[j] = QIDX_INVALID; } } /** * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues * @sc: the device softc * * Called to update the Rx queue mbuf_sz parameter for configuring the receive * buffer sizes when programming hardware. */ static void ice_update_rx_mbuf_sz(struct ice_softc *sc) { uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx); struct ice_vsi *vsi = &sc->pf_vsi; MPASS(mbuf_sz <= UINT16_MAX); vsi->mbuf_sz = mbuf_sz; } /** * ice_if_init - Initialize the device * @ctx: iflib ctx structure * * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes * device filters and prepares the Tx and Rx engines. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_init(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); device_t dev = sc->dev; int err; ASSERT_CTX_LOCKED(sc); /* * We've seen an issue with 11.3/12.1 where sideband routines are * called after detach is called. This would call routines after * if_stop, causing issues with the teardown process. This has * seemingly been fixed in STABLE snapshots, but it seems like a * good idea to have this guard here regardless. */ if (ice_driver_is_detaching(sc)) return; if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) { device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n"); return; } if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n"); return; } ice_update_rx_mbuf_sz(sc); /* Update the MAC address... User might use a LAA */ err = ice_update_laa_mac(sc); if (err) { device_printf(dev, "LAA address change failed, err %s\n", ice_err_str(err)); return; } /* Initialize software Tx tracking values */ ice_init_tx_tracking(&sc->pf_vsi); err = ice_cfg_vsi_for_tx(&sc->pf_vsi); if (err) { device_printf(dev, "Unable to configure the main VSI for Tx: %s\n", ice_err_str(err)); return; } err = ice_cfg_vsi_for_rx(&sc->pf_vsi); if (err) { device_printf(dev, "Unable to configure the main VSI for Rx: %s\n", ice_err_str(err)); goto err_cleanup_tx; } err = ice_control_rx_queues(&sc->pf_vsi, true); if (err) { device_printf(dev, "Unable to enable Rx rings for transmit: %s\n", ice_err_str(err)); goto err_cleanup_tx; } err = ice_cfg_pf_default_mac_filters(sc); if (err) { device_printf(dev, "Unable to configure default MAC filters: %s\n", ice_err_str(err)); goto err_stop_rx; } /* We use software interrupts for Tx, so we only program the hardware * interrupts for Rx. */ ice_configure_rxq_interrupts(&sc->pf_vsi); ice_configure_rx_itr(&sc->pf_vsi); /* Configure promiscuous mode */ ice_if_promisc_set(ctx, if_getflags(sc->ifp)); ice_rdma_pf_init(sc); ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED); return; err_stop_rx: ice_control_rx_queues(&sc->pf_vsi, false); err_cleanup_tx: ice_vsi_disable_tx(&sc->pf_vsi); } /** * ice_poll_for_media_avail - Re-enable link if media is detected * @sc: device private structure * * Intended to be called from the driver's timer function, this function * sends the Get Link Status AQ command and re-enables HW link if the * command says that media is available. * * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing, * since media removal events are supposed to be sent to the driver through * a link status event. */ static void ice_poll_for_media_avail(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; struct ice_port_info *pi = hw->port_info; if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) { pi->phy.get_link_info = true; ice_get_link_status(pi, &sc->link_up); if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { enum ice_status status; /* Re-enable link and re-apply user link settings */ ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC); /* Update the OS about changes in media capability */ status = ice_add_media_types(sc, sc->media); if (status) device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA); } } } /** * ice_if_timer - called by iflib periodically * @ctx: iflib ctx structure * @qid: the queue this timer was called for * * This callback is triggered by iflib periodically. We use it to update the * hw statistics. * * @remark this function is not protected by the iflib CTX lock. */ static void ice_if_timer(if_ctx_t ctx, uint16_t qid) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx; if (qid != 0) return; /* Do not attempt to update stats when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Update device statistics */ ice_update_pf_stats(sc); /* * For proper watchdog management, the iflib stack needs to know if * we've been paused during the last interval. Check if the * link_xoff_rx stat changed, and set the isc_pause_frames, if so. */ if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx) sc->scctx->isc_pause_frames = 1; /* Update the primary VSI stats */ ice_update_vsi_hw_stats(&sc->pf_vsi); } /** * ice_admin_timer - called periodically to trigger the admin task * @arg: callout(9) argument pointing to the device private softc structure * * Timer function used as part of a callout(9) timer that will periodically * trigger the admin task, even when the interface is down. * * @remark this function is not called by iflib and is not protected by the * iflib CTX lock. * * @remark because this is a callout function, it cannot sleep and should not * attempt taking the iflib CTX lock. */ static void ice_admin_timer(void *arg) { struct ice_softc *sc = (struct ice_softc *)arg; /* * There is a point where callout routines are no longer * cancelable. So there exists a window of time where the * driver enters detach() and tries to cancel the callout, but the * callout routine has passed the cancellation point. The detach() * routine is unaware of this and tries to free resources that the * callout routine needs. So we check for the detach state flag to * at least shrink the window of opportunity. */ if (ice_driver_is_detaching(sc)) return; /* Fire off the admin task */ iflib_admin_intr_deferred(sc->ctx); /* Reschedule the admin timer */ callout_schedule(&sc->admin_timer, hz/2); } /** * ice_transition_recovery_mode - Transition to recovery mode * @sc: the device private softc * * Called when the driver detects that the firmware has entered recovery mode * at run time. */ static void ice_transition_recovery_mode(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; int i; device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); /* Tell the stack that the link has gone down */ iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0); /* Request that the device be re-initialized */ ice_request_stack_reinit(sc); ice_rdma_pf_detach(sc); ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap); ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); for (i = 0; i < sc->num_available_vsi; i++) { if (sc->all_vsi[i]) ice_release_vsi(sc->all_vsi[i]); } sc->num_available_vsi = 0; if (sc->all_vsi) { free(sc->all_vsi, M_ICE); sc->all_vsi = NULL; } /* Destroy the interrupt manager */ ice_resmgr_destroy(&sc->imgr); /* Destroy the queue managers */ ice_resmgr_destroy(&sc->tx_qmgr); ice_resmgr_destroy(&sc->rx_qmgr); ice_deinit_hw(&sc->hw); } /** * ice_transition_safe_mode - Transition to safe mode * @sc: the device private softc * * Called when the driver attempts to reload the DDP package during a device * reset, and the new download fails. If so, we must transition to safe mode * at run time. * * @remark although safe mode normally allocates only a single queue, we can't * change the number of queues dynamically when using iflib. Due to this, we * do not attempt to reduce the number of queues. */ static void ice_transition_safe_mode(struct ice_softc *sc) { /* Indicate that we are in Safe mode */ ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap); ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en); ice_rdma_pf_detach(sc); ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap); ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap); ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en); } /** * ice_if_update_admin_status - update admin status * @ctx: iflib ctx structure * * Called by iflib to update the admin status. For our purposes, this means * check the adminq, and update the link status. It's ultimately triggered by * our admin interrupt, or by the ice_if_timer periodically. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_update_admin_status(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_fw_modes fw_mode; bool reschedule = false; u16 pending = 0; ASSERT_CTX_LOCKED(sc); /* Check if the firmware entered recovery mode at run time */ fw_mode = ice_get_fw_mode(&sc->hw); if (fw_mode == ICE_FW_MODE_REC) { if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { /* If we just entered recovery mode, log a warning to * the system administrator and deinit driver state * that is no longer functional. */ ice_transition_recovery_mode(sc); } } else if (fw_mode == ICE_FW_MODE_ROLLBACK) { if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) { /* Rollback mode isn't fatal, but we don't want to * repeatedly post a message about it. */ ice_print_rollback_msg(&sc->hw); } } /* Handle global reset events */ ice_handle_reset_event(sc); /* Handle PF reset requests */ ice_handle_pf_reset_request(sc); /* Handle MDD events */ ice_handle_mdd_event(sc); if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) || ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) || ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { /* * If we know the control queues are disabled, skip processing * the control queues entirely. */ ; } else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) { ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending); if (pending > 0) reschedule = true; ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending); if (pending > 0) reschedule = true; } /* Poll for link up */ ice_poll_for_media_avail(sc); /* Check and update link status */ ice_update_link_status(sc, false); /* * If there are still messages to process, we need to reschedule * ourselves. Otherwise, we can just re-enable the interrupt. We'll be * woken up at the next interrupt or timer event. */ if (reschedule) { ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING); iflib_admin_intr_deferred(ctx); } else { ice_enable_intr(&sc->hw, sc->irqvs[0].me); } } /** * ice_prepare_for_reset - Prepare device for an impending reset * @sc: The device private softc * * Prepare the driver for an impending reset, shutting down VSIs, clearing the * scheduler setup, and shutting down controlqs. Uses the * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the * driver for reset or not. */ static void ice_prepare_for_reset(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; /* If we're already prepared, there's nothing to do */ if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) return; - log(LOG_INFO, "%s: preparing to reset device logic\n", sc->ifp->if_xname); + log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp)); /* In recovery mode, hardware is not initialized */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* stop the RDMA client */ ice_rdma_pf_stop(sc); /* Release the main PF VSI queue mappings */ ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap, sc->pf_vsi.num_tx_queues); ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap, sc->pf_vsi.num_rx_queues); ice_clear_hw_tbls(hw); if (hw->port_info) ice_sched_clear_port(hw->port_info); ice_shutdown_all_ctrlq(hw); } /** * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping * @sc: the device softc pointer * * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue * mapping after a reset occurred. */ static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; struct ice_tx_queue *txq; struct ice_rx_queue *rxq; int err, i; /* Re-assign Tx queues from PF space to the main VSI */ err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues); if (err) { device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n", ice_err_str(err)); return (err); } /* Re-assign Rx queues from PF space to this VSI */ err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, vsi->num_rx_queues); if (err) { device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n", ice_err_str(err)); goto err_release_tx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS; /* Re-assign Tx queue tail pointers */ for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]); /* Re-assign Rx queue tail pointers */ for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++) rxq->tail = QRX_TAIL(vsi->rx_qmap[i]); return (0); err_release_tx_queues: ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap, sc->pf_vsi.num_tx_queues); return (err); } /* determine if the iflib context is active */ #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING)) /** * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode * @sc: The device private softc * * Handle a driver rebuild while in recovery mode. This will only rebuild the * limited functionality supported while in recovery mode. */ static void ice_rebuild_recovery_mode(struct ice_softc *sc) { device_t dev = sc->dev; /* enable PCIe bus master */ pci_enable_busmaster(dev); /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Now that the rebuild is finished, we're no longer prepared to reset */ ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET); - log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname); + log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp)); /* In order to completely restore device functionality, the iflib core * needs to be reset. We need to request an iflib reset. Additionally, * because the state of IFC_DO_RESET is cached within task_fn_admin in * the iflib core, we also want re-run the admin task so that iflib * resets immediately instead of waiting for the next interrupt. */ ice_request_stack_reinit(sc); return; } /** * ice_rebuild - Rebuild driver state post reset * @sc: The device private softc * * Restore driver state after a reset occurred. Restart the controlqs, setup * the hardware port, and re-enable the VSIs. */ static void ice_rebuild(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; int err; sc->rebuild_ticks = ticks; /* If we're rebuilding, then a reset has succeeded. */ ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED); /* * If the firmware is in recovery mode, only restore the limited * functionality supported by recovery mode. */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { ice_rebuild_recovery_mode(sc); return; } /* enable PCIe bus master */ pci_enable_busmaster(dev); status = ice_init_all_ctrlq(hw); if (status) { device_printf(dev, "failed to re-init controlqs, err %s\n", ice_status_str(status)); goto err_shutdown_ctrlq; } /* Query the allocated resources for Tx scheduler */ status = ice_sched_query_res_alloc(hw); if (status) { device_printf(dev, "Failed to query scheduler resources, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); goto err_shutdown_ctrlq; } /* Re-enable FW logging. Keep going even if this fails */ status = ice_fwlog_set(hw, &hw->fwlog_cfg); if (!status) { /* * We should have the most updated cached copy of the * configuration, regardless of whether we're rebuilding * or not. So we'll simply check to see if logging was * enabled pre-rebuild. */ if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) { status = ice_fwlog_register(hw); if (status) device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } } else device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = ice_send_version(sc); if (err) goto err_shutdown_ctrlq; err = ice_init_link_events(sc); if (err) { device_printf(dev, "ice_init_link_events failed: %s\n", ice_err_str(err)); goto err_shutdown_ctrlq; } status = ice_clear_pf_cfg(hw); if (status) { device_printf(dev, "failed to clear PF configuration, err %s\n", ice_status_str(status)); goto err_shutdown_ctrlq; } ice_clear_pxe_mode(hw); status = ice_get_caps(hw); if (status) { device_printf(dev, "failed to get capabilities, err %s\n", ice_status_str(status)); goto err_shutdown_ctrlq; } status = ice_sched_init_port(hw->port_info); if (status) { device_printf(dev, "failed to initialize port, err %s\n", ice_status_str(status)); goto err_sched_cleanup; } /* If we previously loaded the package, it needs to be reloaded now */ if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) { status = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size); if (status) { ice_log_pkg_init(sc, &status); ice_transition_safe_mode(sc); } } ice_reset_pf_stats(sc); err = ice_rebuild_pf_vsi_qmap(sc); if (err) { device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n", ice_err_str(err)); goto err_sched_cleanup; } err = ice_initialize_vsi(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n", ice_err_str(err)); goto err_release_queue_allocations; } /* Replay all VSI configuration */ err = ice_replay_all_vsi_cfg(sc); if (err) goto err_deinit_pf_vsi; /* Re-enable FW health event reporting */ ice_init_health_events(sc); /* Reconfigure the main PF VSI for RSS */ err = ice_config_rss(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to reconfigure RSS for the main VSI, err %s\n", ice_err_str(err)); goto err_deinit_pf_vsi; } /* Refresh link status */ ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED); sc->hw.port_info->phy.get_link_info = true; ice_get_link_status(sc->hw.port_info, &sc->link_up); ice_update_link_status(sc, true); /* RDMA interface will be restarted by the stack re-init */ /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Now that the rebuild is finished, we're no longer prepared to reset */ ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET); - log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname); + log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp)); /* In order to completely restore device functionality, the iflib core * needs to be reset. We need to request an iflib reset. Additionally, * because the state of IFC_DO_RESET is cached within task_fn_admin in * the iflib core, we also want re-run the admin task so that iflib * resets immediately instead of waiting for the next interrupt. */ ice_request_stack_reinit(sc); return; err_deinit_pf_vsi: ice_deinit_vsi(&sc->pf_vsi); err_release_queue_allocations: ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap, sc->pf_vsi.num_tx_queues); ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap, sc->pf_vsi.num_rx_queues); err_sched_cleanup: ice_sched_cleanup_all(hw); err_shutdown_ctrlq: ice_shutdown_all_ctrlq(hw); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); device_printf(dev, "Driver rebuild failed, please reload the device driver\n"); } /** * ice_handle_reset_event - Handle reset events triggered by OICR * @sc: The device private softc * * Handle reset events triggered by an OICR notification. This includes CORER, * GLOBR, and EMPR resets triggered by software on this or any other PF or by * firmware. * * @pre assumes the iflib context lock is held, and will unlock it while * waiting for the hardware to finish reset. */ static void ice_handle_reset_event(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; enum ice_status status; device_t dev = sc->dev; /* When a CORER, GLOBR, or EMPR is about to happen, the hardware will * trigger an OICR interrupt. Our OICR handler will determine when * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as * appropriate. */ if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) return; ice_prepare_for_reset(sc); /* * Release the iflib context lock and wait for the device to finish * resetting. */ IFLIB_CTX_UNLOCK(sc); status = ice_check_reset(hw); IFLIB_CTX_LOCK(sc); if (status) { device_printf(dev, "Device never came out of reset, err %s\n", ice_status_str(status)); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); return; } /* We're done with the reset, so we can rebuild driver state */ sc->hw.reset_ongoing = false; ice_rebuild(sc); /* In the unlikely event that a PF reset request occurs at the same * time as a global reset, clear the request now. This avoids * resetting a second time right after we reset due to a global event. */ if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ)) device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n"); } /** * ice_handle_pf_reset_request - Initiate PF reset requested by software * @sc: The device private softc * * Initiate a PF reset requested by software. We handle this in the admin task * so that only one thread actually handles driver preparation and cleanup, * rather than having multiple threads possibly attempt to run this code * simultaneously. * * @pre assumes the iflib context lock is held and will unlock it while * waiting for the PF reset to complete. */ static void ice_handle_pf_reset_request(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; enum ice_status status; /* Check for PF reset requests */ if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ)) return; /* Make sure we're prepared for reset */ ice_prepare_for_reset(sc); /* * Release the iflib context lock and wait for the device to finish * resetting. */ IFLIB_CTX_UNLOCK(sc); status = ice_reset(hw, ICE_RESET_PFR); IFLIB_CTX_LOCK(sc); if (status) { device_printf(sc->dev, "device PF reset failed, err %s\n", ice_status_str(status)); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); return; } sc->soft_stats.pfr_count++; ice_rebuild(sc); } /** * ice_init_device_features - Init device driver features * @sc: driver softc structure * * @pre assumes that the function capabilities bits have been set up by * ice_init_hw(). */ static void ice_init_device_features(struct ice_softc *sc) { /* * A failed pkg file download triggers safe mode, disabling advanced * device feature support */ if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) return; /* Set capabilities that all devices support */ ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap); ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap); ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap); ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap); ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap); ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap); ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap); ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap); ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap); /* Disable features due to hardware limitations... */ if (!sc->hw.func_caps.common_cap.rss_table_size) ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap); if (!sc->hw.func_caps.common_cap.iwarp || !ice_enable_irdma) ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap); /* Disable features due to firmware limitations... */ if (!ice_is_fw_health_report_supported(&sc->hw)) ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap); if (!ice_fwlog_supported(&sc->hw)) ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap); if (sc->hw.fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) { if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING)) ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en); else ice_fwlog_unregister(&sc->hw); } /* Disable capabilities not supported by the OS */ ice_disable_unsupported_features(sc->feat_cap); /* RSS is always enabled for iflib */ if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS)) ice_set_bit(ICE_FEATURE_RSS, sc->feat_en); } /** * ice_if_multi_set - Callback to update Multicast filters in HW * @ctx: iflib ctx structure * * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search * the if_multiaddrs list and determine which filters have been added or * removed from the list, and update HW programming to reflect the new list. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_multi_set(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); int err; ASSERT_CTX_LOCKED(sc); /* Do not handle multicast configuration in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; err = ice_sync_multicast_filters(sc); if (err) { device_printf(sc->dev, "Failed to synchronize multicast filter list: %s\n", ice_err_str(err)); return; } } /** * ice_if_vlan_register - Register a VLAN with the hardware * @ctx: iflib ctx pointer * @vtag: VLAN to add * * Programs the main PF VSI with a hardware filter for the given VLAN. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_status status; ASSERT_CTX_LOCKED(sc); /* Do not handle VLAN configuration in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag); if (status) { device_printf(sc->dev, "Failure adding VLAN %d to main VSI, err %s aq_err %s\n", vtag, ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); } } /** * ice_if_vlan_unregister - Remove a VLAN filter from the hardware * @ctx: iflib ctx pointer * @vtag: VLAN to add * * Removes the previously programmed VLAN filter from the main PF VSI. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_status status; ASSERT_CTX_LOCKED(sc); /* Do not handle VLAN configuration in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag); if (status) { device_printf(sc->dev, "Failure removing VLAN %d from main VSI, err %s aq_err %s\n", vtag, ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); } } /** * ice_if_stop - Stop the device * @ctx: iflib context structure * * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0 * down) * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_stop(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); ASSERT_CTX_LOCKED(sc); /* * The iflib core may call IFDI_STOP prior to the first call to * IFDI_INIT. This will cause us to attempt to remove MAC filters we * don't have, and disable Tx queues which aren't yet configured. * Although it is likely these extra operations are harmless, they do * cause spurious warning messages to be displayed, which may confuse * users. * * To avoid these messages, we use a state bit indicating if we've * been initialized. It will be set when ice_if_init is called, and * cleared here in ice_if_stop. */ if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED)) return; if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) { device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n"); return; } if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n"); return; } ice_rdma_pf_stop(sc); /* Remove the MAC filters, stop Tx, and stop Rx. We don't check the * return of these functions because there's nothing we can really do * if they fail, and the functions already print error messages. * Just try to shut down as much as we can. */ ice_rm_pf_default_mac_filters(sc); /* Dissociate the Tx and Rx queues from the interrupts */ ice_flush_txq_interrupts(&sc->pf_vsi); ice_flush_rxq_interrupts(&sc->pf_vsi); /* Disable the Tx and Rx queues */ ice_vsi_disable_tx(&sc->pf_vsi); ice_control_rx_queues(&sc->pf_vsi, false); } /** * ice_if_get_counter - Get current value of an ifnet statistic * @ctx: iflib context pointer * @counter: ifnet counter to read * * Reads the current value of an ifnet counter for the device. * * This function is not protected by the iflib CTX lock. */ static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); /* Return the counter for the main PF VSI */ return ice_get_ifnet_counter(&sc->pf_vsi, counter); } /** * ice_request_stack_reinit - Request that iflib re-initialize * @sc: the device private softc * * Request that the device be brought down and up, to re-initialize. For * example, this may be called when a device reset occurs, or when Tx and Rx * queues need to be re-initialized. * * This is required because the iflib state is outside the driver, and must be * re-initialized if we need to resart Tx and Rx queues. */ void ice_request_stack_reinit(struct ice_softc *sc) { if (CTX_ACTIVE(sc->ctx)) { iflib_request_reset(sc->ctx); iflib_admin_intr_deferred(sc->ctx); } } /** * ice_driver_is_detaching - Check if the driver is detaching/unloading * @sc: device private softc * * Returns true if the driver is detaching, false otherwise. * * @remark on newer kernels, take advantage of iflib_in_detach in order to * report detachment correctly as early as possible. * * @remark this function is used by various code paths that want to avoid * running if the driver is about to be removed. This includes sysctls and * other driver access points. Note that it does not fully resolve * detach-based race conditions as it is possible for a thread to race with * iflib_in_detach. */ bool ice_driver_is_detaching(struct ice_softc *sc) { return (ice_test_state(&sc->state, ICE_STATE_DETACHING) || iflib_in_detach(sc->ctx)); } /** * ice_if_priv_ioctl - Device private ioctl handler * @ctx: iflib context pointer * @command: The ioctl command issued * @data: ioctl specific data * * iflib callback for handling custom driver specific ioctls. * * @pre Assumes that the iflib context lock is held. */ static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ifdrv *ifd; device_t dev = sc->dev; if (data == NULL) return (EINVAL); ASSERT_CTX_LOCKED(sc); /* Make sure the command type is valid */ switch (command) { case SIOCSDRVSPEC: case SIOCGDRVSPEC: /* Accepted commands */ break; case SIOCGPRIVATE_0: /* * Although we do not support this ioctl command, it's * expected that iflib will forward it to the IFDI_PRIV_IOCTL * handler. Do not print a message in this case */ return (ENOTSUP); default: /* * If we get a different command for this function, it's * definitely unexpected, so log a message indicating what * command we got for debugging purposes. */ device_printf(dev, "%s: unexpected ioctl command %08lx\n", __func__, command); return (EINVAL); } ifd = (struct ifdrv *)data; switch (ifd->ifd_cmd) { case ICE_NVM_ACCESS: return ice_handle_nvm_access_ioctl(sc, ifd); default: return EINVAL; } } /** * ice_if_i2c_req - I2C request handler for iflib * @ctx: iflib context pointer * @req: The I2C parameters to use * * Read from the port's I2C eeprom using the parameters from the ioctl. * * @remark The iflib-only part is pretty simple. */ static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); return ice_handle_i2c_req(sc, req); } /** * ice_if_suspend - PCI device suspend handler for iflib * @ctx: iflib context pointer * * Deinitializes the driver and clears HW resources in preparation for * suspend or an FLR. * * @returns 0; this return value is ignored */ static int ice_if_suspend(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); /* At least a PFR is always going to happen after this; * either via FLR or during the D3->D0 transition. */ ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ); ice_prepare_for_reset(sc); return (0); } /** * ice_if_resume - PCI device resume handler for iflib * @ctx: iflib context pointer * * Reinitializes the driver and the HW after PCI resume or after * an FLR. An init is performed by iflib after this function is finished. * * @returns 0; this return value is ignored */ static int ice_if_resume(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); ice_rebuild(sc); return (0); } diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index 352a35d95512..9df1d9792e51 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -1,1918 +1,1918 @@ /****************************************************************************** Copyright (c) 2013-2018, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl.h" #include "ixl_pf.h" #ifdef IXL_IW #include "ixl_iw.h" #include "ixl_iw_int.h" #endif #ifdef PCI_IOV #include "ixl_pf_iov.h" #endif /********************************************************************* * Driver version *********************************************************************/ #define IXL_DRIVER_VERSION_MAJOR 2 #define IXL_DRIVER_VERSION_MINOR 3 #define IXL_DRIVER_VERSION_BUILD 2 #define IXL_DRIVER_VERSION_STRING \ __XSTRING(IXL_DRIVER_VERSION_MAJOR) "." \ __XSTRING(IXL_DRIVER_VERSION_MINOR) "." \ __XSTRING(IXL_DRIVER_VERSION_BUILD) "-k" /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * * ( Vendor ID, Device ID, Branding String ) *********************************************************************/ static pci_vendor_info_t ixl_vendor_info_array[] = { PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_XL710, "Intel(R) Ethernet Controller X710 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_B, "Intel(R) Ethernet Controller XL710 for 40GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_C, "Intel(R) Ethernet Controller X710 for 10GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_A, "Intel(R) Ethernet Controller XL710 for 40GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_B, "Intel(R) Ethernet Controller XL710 for 40GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_C, "Intel(R) Ethernet Controller X710 for 10GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T, "Intel(R) Ethernet Controller X710 for 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T4, "Intel(R) Ethernet Controller X710/X557-AT 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_X722, "Intel(R) Ethernet Connection X722 for 10GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_X722, "Intel(R) Ethernet Connection X722 for 10GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_X722, "Intel(R) Ethernet Connection X722 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_1G_BASE_T_X722, "Intel(R) Ethernet Connection X722 for 1GbE"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T_X722, "Intel(R) Ethernet Connection X722 for 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_I_X722, "Intel(R) Ethernet Connection X722 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_25G_B, "Intel(R) Ethernet Controller XXV710 for 25GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_25G_SFP28, "Intel(R) Ethernet Controller XXV710 for 25GbE SFP28"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T_BC, "Intel(R) Ethernet Controller X710 for 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_SFP, "Intel(R) Ethernet Controller X710 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_B, "Intel(R) Ethernet Controller X710 for 10GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_5G_BASE_T_BC, "Intel(R) Ethernet Controller V710 for 5GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_1G_BASE_T_BC, "Intel(R) Ethernet Controller I710 for 1GBASE-T"), /* required last entry */ PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ /*** IFLIB interface ***/ static void *ixl_register(device_t dev); static int ixl_if_attach_pre(if_ctx_t ctx); static int ixl_if_attach_post(if_ctx_t ctx); static int ixl_if_detach(if_ctx_t ctx); static int ixl_if_shutdown(if_ctx_t ctx); static int ixl_if_suspend(if_ctx_t ctx); static int ixl_if_resume(if_ctx_t ctx); static int ixl_if_msix_intr_assign(if_ctx_t ctx, int msix); static void ixl_if_enable_intr(if_ctx_t ctx); static void ixl_if_disable_intr(if_ctx_t ctx); static int ixl_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int ixl_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static int ixl_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int ixl_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static void ixl_if_queues_free(if_ctx_t ctx); static void ixl_if_update_admin_status(if_ctx_t ctx); static void ixl_if_multi_set(if_ctx_t ctx); static int ixl_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void ixl_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr); static int ixl_if_media_change(if_ctx_t ctx); static int ixl_if_promisc_set(if_ctx_t ctx, int flags); static void ixl_if_timer(if_ctx_t ctx, uint16_t qid); static void ixl_if_vlan_register(if_ctx_t ctx, u16 vtag); static void ixl_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static uint64_t ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt); static int ixl_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req); static int ixl_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data); static bool ixl_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event); #ifdef PCI_IOV static void ixl_if_vflr_handle(if_ctx_t ctx); #endif /*** Other ***/ static void ixl_save_pf_tunables(struct ixl_pf *); static int ixl_allocate_pci_resources(struct ixl_pf *); static void ixl_setup_ssctx(struct ixl_pf *pf); static void ixl_admin_timer(void *arg); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixl_methods[] = { /* Device interface */ DEVMETHOD(device_register, ixl_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), #ifdef PCI_IOV DEVMETHOD(pci_iov_init, iflib_device_iov_init), DEVMETHOD(pci_iov_uninit, iflib_device_iov_uninit), DEVMETHOD(pci_iov_add_vf, iflib_device_iov_add_vf), #endif DEVMETHOD_END }; static driver_t ixl_driver = { "ixl", ixl_methods, sizeof(struct ixl_pf), }; DRIVER_MODULE(ixl, pci, ixl_driver, 0, 0); IFLIB_PNP_INFO(pci, ixl, ixl_vendor_info_array); MODULE_VERSION(ixl, 3); MODULE_DEPEND(ixl, pci, 1, 1, 1); MODULE_DEPEND(ixl, ether, 1, 1, 1); MODULE_DEPEND(ixl, iflib, 1, 1, 1); static device_method_t ixl_if_methods[] = { DEVMETHOD(ifdi_attach_pre, ixl_if_attach_pre), DEVMETHOD(ifdi_attach_post, ixl_if_attach_post), DEVMETHOD(ifdi_detach, ixl_if_detach), DEVMETHOD(ifdi_shutdown, ixl_if_shutdown), DEVMETHOD(ifdi_suspend, ixl_if_suspend), DEVMETHOD(ifdi_resume, ixl_if_resume), DEVMETHOD(ifdi_init, ixl_if_init), DEVMETHOD(ifdi_stop, ixl_if_stop), DEVMETHOD(ifdi_msix_intr_assign, ixl_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, ixl_if_enable_intr), DEVMETHOD(ifdi_intr_disable, ixl_if_disable_intr), DEVMETHOD(ifdi_rx_queue_intr_enable, ixl_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ixl_if_tx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, ixl_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ixl_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, ixl_if_queues_free), DEVMETHOD(ifdi_update_admin_status, ixl_if_update_admin_status), DEVMETHOD(ifdi_multi_set, ixl_if_multi_set), DEVMETHOD(ifdi_mtu_set, ixl_if_mtu_set), DEVMETHOD(ifdi_media_status, ixl_if_media_status), DEVMETHOD(ifdi_media_change, ixl_if_media_change), DEVMETHOD(ifdi_promisc_set, ixl_if_promisc_set), DEVMETHOD(ifdi_timer, ixl_if_timer), DEVMETHOD(ifdi_vlan_register, ixl_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, ixl_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, ixl_if_get_counter), DEVMETHOD(ifdi_i2c_req, ixl_if_i2c_req), DEVMETHOD(ifdi_priv_ioctl, ixl_if_priv_ioctl), DEVMETHOD(ifdi_needs_restart, ixl_if_needs_restart), #ifdef PCI_IOV DEVMETHOD(ifdi_iov_init, ixl_if_iov_init), DEVMETHOD(ifdi_iov_uninit, ixl_if_iov_uninit), DEVMETHOD(ifdi_iov_vf_add, ixl_if_iov_vf_add), DEVMETHOD(ifdi_vflr_handle, ixl_if_vflr_handle), #endif // ifdi_led_func // ifdi_debug DEVMETHOD_END }; static driver_t ixl_if_driver = { "ixl_if", ixl_if_methods, sizeof(struct ixl_pf) }; /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ixl, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "ixl driver parameters"); #ifdef IXL_DEBUG_FC /* * Leave this on unless you need to send flow control * frames (or other control frames) from software */ static int ixl_enable_tx_fc_filter = 1; TUNABLE_INT("hw.ixl.enable_tx_fc_filter", &ixl_enable_tx_fc_filter); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_tx_fc_filter, CTLFLAG_RDTUN, &ixl_enable_tx_fc_filter, 0, "Filter out packets with Ethertype 0x8808 from being sent out by non-HW sources"); #endif #ifdef IXL_DEBUG static int ixl_debug_recovery_mode = 0; TUNABLE_INT("hw.ixl.debug_recovery_mode", &ixl_debug_recovery_mode); SYSCTL_INT(_hw_ixl, OID_AUTO, debug_recovery_mode, CTLFLAG_RDTUN, &ixl_debug_recovery_mode, 0, "Act like when FW entered recovery mode (for debugging)"); #endif static int ixl_i2c_access_method = 0; TUNABLE_INT("hw.ixl.i2c_access_method", &ixl_i2c_access_method); SYSCTL_INT(_hw_ixl, OID_AUTO, i2c_access_method, CTLFLAG_RDTUN, &ixl_i2c_access_method, 0, IXL_SYSCTL_HELP_I2C_METHOD); static int ixl_enable_vf_loopback = 1; TUNABLE_INT("hw.ixl.enable_vf_loopback", &ixl_enable_vf_loopback); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_vf_loopback, CTLFLAG_RDTUN, &ixl_enable_vf_loopback, 0, IXL_SYSCTL_HELP_VF_LOOPBACK); /* * Different method for processing TX descriptor * completion. */ static int ixl_enable_head_writeback = 1; TUNABLE_INT("hw.ixl.enable_head_writeback", &ixl_enable_head_writeback); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_head_writeback, CTLFLAG_RDTUN, &ixl_enable_head_writeback, 0, "For detecting last completed TX descriptor by hardware, use value written by HW instead of checking descriptors"); static int ixl_core_debug_mask = 0; TUNABLE_INT("hw.ixl.core_debug_mask", &ixl_core_debug_mask); SYSCTL_INT(_hw_ixl, OID_AUTO, core_debug_mask, CTLFLAG_RDTUN, &ixl_core_debug_mask, 0, "Display debug statements that are printed in non-shared code"); static int ixl_shared_debug_mask = 0; TUNABLE_INT("hw.ixl.shared_debug_mask", &ixl_shared_debug_mask); SYSCTL_INT(_hw_ixl, OID_AUTO, shared_debug_mask, CTLFLAG_RDTUN, &ixl_shared_debug_mask, 0, "Display debug statements that are printed in shared code"); #if 0 /* ** Controls for Interrupt Throttling ** - true/false for dynamic adjustment ** - default values for static ITR */ static int ixl_dynamic_rx_itr = 0; TUNABLE_INT("hw.ixl.dynamic_rx_itr", &ixl_dynamic_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_rx_itr, CTLFLAG_RDTUN, &ixl_dynamic_rx_itr, 0, "Dynamic RX Interrupt Rate"); static int ixl_dynamic_tx_itr = 0; TUNABLE_INT("hw.ixl.dynamic_tx_itr", &ixl_dynamic_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_tx_itr, CTLFLAG_RDTUN, &ixl_dynamic_tx_itr, 0, "Dynamic TX Interrupt Rate"); #endif static int ixl_rx_itr = IXL_ITR_8K; TUNABLE_INT("hw.ixl.rx_itr", &ixl_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, rx_itr, CTLFLAG_RDTUN, &ixl_rx_itr, 0, "RX Interrupt Rate"); static int ixl_tx_itr = IXL_ITR_4K; TUNABLE_INT("hw.ixl.tx_itr", &ixl_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, tx_itr, CTLFLAG_RDTUN, &ixl_tx_itr, 0, "TX Interrupt Rate"); static int ixl_flow_control = -1; SYSCTL_INT(_hw_ixl, OID_AUTO, flow_control, CTLFLAG_RDTUN, &ixl_flow_control, 0, "Initial Flow Control setting"); #ifdef IXL_IW int ixl_enable_iwarp = 0; TUNABLE_INT("hw.ixl.enable_iwarp", &ixl_enable_iwarp); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_iwarp, CTLFLAG_RDTUN, &ixl_enable_iwarp, 0, "iWARP enabled"); #if __FreeBSD_version < 1100000 int ixl_limit_iwarp_msix = 1; #else int ixl_limit_iwarp_msix = IXL_IW_MAX_MSIX; #endif TUNABLE_INT("hw.ixl.limit_iwarp_msix", &ixl_limit_iwarp_msix); SYSCTL_INT(_hw_ixl, OID_AUTO, limit_iwarp_msix, CTLFLAG_RDTUN, &ixl_limit_iwarp_msix, 0, "Limit MSI-X vectors assigned to iWARP"); #endif extern struct if_txrx ixl_txrx_hwb; extern struct if_txrx ixl_txrx_dwb; static struct if_shared_ctx ixl_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = IXL_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_tso_maxsize = IXL_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_rx_maxsize = 16384, .isc_rx_nsegments = IXL_MAX_RX_SEGS, .isc_rx_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = ixl_vendor_info_array, .isc_driver_version = IXL_DRIVER_VERSION_STRING, .isc_driver = &ixl_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_NEED_ZERO_CSUM | IFLIB_TSO_INIT_IP | IFLIB_ADMIN_ALWAYS_RUN, .isc_nrxd_min = {IXL_MIN_RING}, .isc_ntxd_min = {IXL_MIN_RING}, .isc_nrxd_max = {IXL_MAX_RING}, .isc_ntxd_max = {IXL_MAX_RING}, .isc_nrxd_default = {IXL_DEFAULT_RING}, .isc_ntxd_default = {IXL_DEFAULT_RING}, }; /*** Functions ***/ static void * ixl_register(device_t dev) { return (&ixl_sctx_init); } static int ixl_allocate_pci_resources(struct ixl_pf *pf) { device_t dev = iflib_get_dev(pf->vsi.ctx); struct i40e_hw *hw = &pf->hw; int rid; /* Map BAR0 */ rid = PCIR_BAR(0); pf->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(pf->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: PCI memory\n"); return (ENXIO); } /* Save off the PCI information */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); /* Save off register access information */ pf->osdep.mem_bus_space_tag = rman_get_bustag(pf->pci_mem); pf->osdep.mem_bus_space_handle = rman_get_bushandle(pf->pci_mem); pf->osdep.mem_bus_space_size = rman_get_size(pf->pci_mem); pf->osdep.flush_reg = I40E_GLGEN_STAT; pf->osdep.dev = dev; pf->hw.hw_addr = (u8 *) &pf->osdep.mem_bus_space_handle; pf->hw.back = &pf->osdep; return (0); } static void ixl_setup_ssctx(struct ixl_pf *pf) { if_softc_ctx_t scctx = pf->vsi.shared; struct i40e_hw *hw = &pf->hw; if (IXL_PF_IN_RECOVERY_MODE(pf)) { scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 1; scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1; } else if (hw->mac.type == I40E_MAC_X722) scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 128; else scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 64; if (pf->vsi.enable_head_writeback) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc) + sizeof(u32), DBA_ALIGN); scctx->isc_txrx = &ixl_txrx_hwb; } else { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc), DBA_ALIGN); scctx->isc_txrx = &ixl_txrx_dwb; } scctx->isc_txrx->ift_legacy_intr = ixl_intr; scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union i40e_32byte_rx_desc), DBA_ALIGN); scctx->isc_msix_bar = PCIR_BAR(IXL_MSIX_BAR); scctx->isc_tx_nsegments = IXL_MAX_TX_SEGS; scctx->isc_tx_tso_segments_max = IXL_MAX_TSO_SEGS; scctx->isc_tx_tso_size_max = IXL_TSO_SIZE; scctx->isc_tx_tso_segsize_max = IXL_MAX_DMA_SEG_SIZE; scctx->isc_rss_table_size = pf->hw.func_caps.rss_table_size; scctx->isc_tx_csum_flags = CSUM_OFFLOAD; scctx->isc_capabilities = scctx->isc_capenable = IXL_CAPS; } static void ixl_admin_timer(void *arg) { struct ixl_pf *pf = (struct ixl_pf *)arg; /* Fire off the admin task */ iflib_admin_intr_deferred(pf->vsi.ctx); /* Reschedule the admin timer */ callout_schedule(&pf->admin_timer, hz/2); } static int ixl_attach_pre_recovery_mode(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); i40e_get_mac_addr(hw, hw->mac.addr); if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } ixl_setup_ssctx(pf); return (0); } static int ixl_if_attach_pre(if_ctx_t ctx) { device_t dev; struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; enum i40e_get_fw_lldp_status_resp lldp_status; struct i40e_filter_control_settings filter; enum i40e_status_code status; int error = 0; dev = iflib_get_dev(ctx); pf = iflib_get_softc(ctx); INIT_DBG_DEV(dev, "begin"); vsi = &pf->vsi; vsi->back = pf; pf->dev = dev; hw = &pf->hw; vsi->dev = dev; vsi->hw = &pf->hw; vsi->id = 0; vsi->num_vlans = 0; vsi->ctx = ctx; vsi->media = iflib_get_media(ctx); vsi->shared = iflib_get_softc_ctx(ctx); snprintf(pf->admin_mtx_name, sizeof(pf->admin_mtx_name), "%s:admin", device_get_nameunit(dev)); mtx_init(&pf->admin_mtx, pf->admin_mtx_name, NULL, MTX_DEF); callout_init_mtx(&pf->admin_timer, &pf->admin_mtx, 0); /* Save tunable values */ ixl_save_pf_tunables(pf); /* Do PCI setup - map BAR0, etc */ if (ixl_allocate_pci_resources(pf)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci_res; } /* Establish a clean starting point */ i40e_clear_hw(hw); i40e_set_mac_type(hw); error = ixl_pf_reset(pf); if (error) goto err_out; /* Initialize the shared code */ status = i40e_init_shared_code(hw); if (status) { device_printf(dev, "Unable to initialize shared code, error %s\n", i40e_stat_str(hw, status)); error = EIO; goto err_out; } /* Set up the admin queue */ hw->aq.num_arq_entries = IXL_AQ_LEN; hw->aq.num_asq_entries = IXL_AQ_LEN; hw->aq.arq_buf_size = IXL_AQ_BUF_SZ; hw->aq.asq_buf_size = IXL_AQ_BUF_SZ; status = i40e_init_adminq(hw); if (status != 0 && status != I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "Unable to initialize Admin Queue, error %s\n", i40e_stat_str(hw, status)); error = EIO; goto err_out; } ixl_print_nvm_version(pf); if (status == I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "The driver for the device stopped " "because the NVM image is newer than expected.\n"); device_printf(dev, "You must install the most recent version of " "the network driver.\n"); error = EIO; goto err_out; } if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) { device_printf(dev, "The driver for the device detected " "a newer version of the NVM image than expected.\n"); device_printf(dev, "Please install the most recent version " "of the network driver.\n"); } else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4) { device_printf(dev, "The driver for the device detected " "an older version of the NVM image than expected.\n"); device_printf(dev, "Please update the NVM image.\n"); } if (IXL_PF_IN_RECOVERY_MODE(pf)) { error = ixl_attach_pre_recovery_mode(pf); if (error) goto err_out; return (error); } /* Clear PXE mode */ i40e_clear_pxe_mode(hw); /* Get capabilities from the device */ error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "get_hw_capabilities failed: %d\n", error); goto err_get_cap; } /* Set up host memory cache */ error = ixl_setup_hmc(pf); if (error) goto err_mac_hmc; /* Disable LLDP from the firmware for certain NVM versions */ if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || (pf->hw.aq.fw_maj_ver < 4)) { i40e_aq_stop_lldp(hw, true, false, NULL); pf->state |= IXL_PF_STATE_FW_LLDP_DISABLED; } /* Try enabling Energy Efficient Ethernet (EEE) mode */ if (i40e_enable_eee(hw, true) == I40E_SUCCESS) atomic_set_32(&pf->state, IXL_PF_STATE_EEE_ENABLED); else atomic_clear_32(&pf->state, IXL_PF_STATE_EEE_ENABLED); /* Get MAC addresses from hardware */ i40e_get_mac_addr(hw, hw->mac.addr); error = i40e_validate_mac_addr(hw->mac.addr); if (error) { device_printf(dev, "validate_mac_addr failed: %d\n", error); goto err_mac_hmc; } bcopy(hw->mac.addr, hw->mac.perm_addr, ETHER_ADDR_LEN); iflib_set_mac(ctx, hw->mac.addr); i40e_get_port_mac_addr(hw, hw->mac.port_addr); /* Set up the device filtering */ bzero(&filter, sizeof(filter)); filter.enable_ethtype = TRUE; filter.enable_macvlan = TRUE; filter.enable_fdir = FALSE; filter.hash_lut_size = I40E_HASH_LUT_SIZE_512; if (i40e_set_filter_control(hw, &filter)) device_printf(dev, "i40e_set_filter_control() failed\n"); /* Query device FW LLDP status */ if (i40e_get_fw_lldp_status(hw, &lldp_status) == I40E_SUCCESS) { if (lldp_status == I40E_GET_FW_LLDP_STATUS_DISABLED) { atomic_set_32(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); } else { atomic_clear_32(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); } } /* Tell FW to apply DCB config on link up */ i40e_aq_set_dcb_parameters(hw, true, NULL); /* Fill out iflib parameters */ ixl_setup_ssctx(pf); INIT_DBG_DEV(dev, "end"); return (0); err_mac_hmc: ixl_shutdown_hmc(pf); err_get_cap: i40e_shutdown_adminq(hw); err_out: ixl_free_pci_resources(pf); err_pci_res: mtx_lock(&pf->admin_mtx); callout_stop(&pf->admin_timer); mtx_unlock(&pf->admin_mtx); mtx_destroy(&pf->admin_mtx); return (error); } static int ixl_if_attach_post(if_ctx_t ctx) { device_t dev; struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; int error = 0; enum i40e_status_code status; dev = iflib_get_dev(ctx); pf = iflib_get_softc(ctx); INIT_DBG_DEV(dev, "begin"); vsi = &pf->vsi; vsi->ifp = iflib_get_ifp(ctx); hw = &pf->hw; /* Save off determined number of queues for interface */ vsi->num_rx_queues = vsi->shared->isc_nrxqsets; vsi->num_tx_queues = vsi->shared->isc_ntxqsets; /* Setup OS network interface / ifnet */ if (ixl_setup_interface(dev, pf)) { device_printf(dev, "interface setup failed!\n"); error = EIO; goto err; } if (IXL_PF_IN_RECOVERY_MODE(pf)) { /* Keep admin queue interrupts active while driver is loaded */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } ixl_add_sysctls_recovery_mode(pf); /* Start the admin timer */ mtx_lock(&pf->admin_mtx); callout_reset(&pf->admin_timer, hz/2, ixl_admin_timer, pf); mtx_unlock(&pf->admin_mtx); return (0); } /* Determine link state */ if (ixl_attach_get_link_status(pf)) { error = EINVAL; goto err; } error = ixl_switch_config(pf); if (error) { device_printf(dev, "Initial ixl_switch_config() failed: %d\n", error); goto err; } /* Add protocol filters to list */ ixl_init_filters(vsi); /* Init queue allocation manager */ error = ixl_pf_qmgr_init(&pf->qmgr, hw->func_caps.num_tx_qp); if (error) { device_printf(dev, "Failed to init queue manager for PF queues, error %d\n", error); goto err; } /* reserve a contiguous allocation for the PF's VSI */ error = ixl_pf_qmgr_alloc_contiguous(&pf->qmgr, max(vsi->num_rx_queues, vsi->num_tx_queues), &pf->qtag); if (error) { device_printf(dev, "Failed to reserve queues for PF LAN VSI, error %d\n", error); goto err; } device_printf(dev, "Allocating %d queues for PF LAN VSI; %d queues active\n", pf->qtag.num_allocated, pf->qtag.num_active); /* Limit PHY interrupts to link, autoneg, and modules failure */ status = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (status) { device_printf(dev, "i40e_aq_set_phy_mask() failed: err %s," " aq_err %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); goto err; } /* Get the bus configuration and set the shared code */ ixl_get_bus_info(pf); /* Keep admin queue interrupts active while driver is loaded */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } /* Set initial advertised speed sysctl value */ ixl_set_initial_advertised_speeds(pf); /* Initialize statistics & add sysctls */ ixl_add_device_sysctls(pf); ixl_pf_reset_stats(pf); ixl_update_stats_counters(pf); ixl_add_hw_stats(pf); /* * Driver may have been reloaded. Ensure that the link state * is consistent with current settings. */ ixl_set_link(pf, (pf->state & IXL_PF_STATE_LINK_ACTIVE_ON_DOWN) != 0); hw->phy.get_link_info = true; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); #ifdef PCI_IOV ixl_initialize_sriov(pf); #endif #ifdef IXL_IW if (hw->func_caps.iwarp && ixl_enable_iwarp) { pf->iw_enabled = (pf->iw_msix > 0) ? true : false; if (pf->iw_enabled) { error = ixl_iw_pf_attach(pf); if (error) { device_printf(dev, "interfacing to iWARP driver failed: %d\n", error); goto err; } else device_printf(dev, "iWARP ready\n"); } else device_printf(dev, "iWARP disabled on this device " "(no MSI-X vectors)\n"); } else { pf->iw_enabled = false; device_printf(dev, "The device is not iWARP enabled\n"); } #endif /* Start the admin timer */ mtx_lock(&pf->admin_mtx); callout_reset(&pf->admin_timer, hz/2, ixl_admin_timer, pf); mtx_unlock(&pf->admin_mtx); INIT_DBG_DEV(dev, "end"); return (0); err: INIT_DEBUGOUT("end: error %d", error); /* ixl_if_detach() is called on error from this */ return (error); } /** * XXX: iflib always ignores the return value of detach() * -> This means that this isn't allowed to fail */ static int ixl_if_detach(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; #ifdef IXL_IW int error; #endif INIT_DBG_DEV(dev, "begin"); /* Stop the admin timer */ mtx_lock(&pf->admin_mtx); callout_stop(&pf->admin_timer); mtx_unlock(&pf->admin_mtx); mtx_destroy(&pf->admin_mtx); #ifdef IXL_IW if (ixl_enable_iwarp && pf->iw_enabled) { error = ixl_iw_pf_detach(pf); if (error == EBUSY) { device_printf(dev, "iwarp in use; stop it first.\n"); //return (error); } } #endif /* Remove all previously allocated media types */ ifmedia_removeall(vsi->media); /* Shutdown LAN HMC */ ixl_shutdown_hmc(pf); /* Shutdown admin queue */ ixl_disable_intr0(hw); status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "i40e_shutdown_adminq() failed with status %s\n", i40e_stat_str(hw, status)); ixl_pf_qmgr_destroy(&pf->qmgr); ixl_free_pci_resources(pf); ixl_free_filters(&vsi->ftl); INIT_DBG_DEV(dev, "end"); return (0); } static int ixl_if_shutdown(if_ctx_t ctx) { int error = 0; INIT_DEBUGOUT("ixl_if_shutdown: begin"); /* TODO: Call ixl_if_stop()? */ /* TODO: Then setup low power mode */ return (error); } static int ixl_if_suspend(if_ctx_t ctx) { int error = 0; INIT_DEBUGOUT("ixl_if_suspend: begin"); /* TODO: Call ixl_if_stop()? */ /* TODO: Then setup low power mode */ return (error); } static int ixl_if_resume(if_ctx_t ctx) { - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); INIT_DEBUGOUT("ixl_if_resume: begin"); /* Read & clear wake-up registers */ /* Required after D3->D0 transition */ - if (ifp->if_flags & IFF_UP) + if (if_getflags(ifp) & IFF_UP) ixl_if_init(ctx); return (0); } void ixl_if_init(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); device_t dev = iflib_get_dev(ctx); u8 tmpaddr[ETHER_ADDR_LEN]; int ret; if (IXL_PF_IN_RECOVERY_MODE(pf)) return; /* * If the aq is dead here, it probably means something outside of the driver * did something to the adapter, like a PF reset. * So, rebuild the driver's state here if that occurs. */ if (!i40e_check_asq_alive(&pf->hw)) { device_printf(dev, "Admin Queue is down; resetting...\n"); ixl_teardown_hw_structs(pf); ixl_rebuild_hw_structs_after_reset(pf, false); } /* Get the latest mac address... User might use a LAA */ - bcopy(IF_LLADDR(vsi->ifp), tmpaddr, ETH_ALEN); + bcopy(if_getlladdr(vsi->ifp), tmpaddr, ETH_ALEN); if (!ixl_ether_is_equal(hw->mac.addr, tmpaddr) && (i40e_validate_mac_addr(tmpaddr) == I40E_SUCCESS)) { ixl_del_all_vlan_filters(vsi, hw->mac.addr); bcopy(tmpaddr, hw->mac.addr, ETH_ALEN); ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_ONLY, hw->mac.addr, NULL); if (ret) { device_printf(dev, "LLA address change failed!!\n"); return; } /* * New filters are configured by ixl_reconfigure_filters * at the end of ixl_init_locked. */ } iflib_set_mac(ctx, hw->mac.addr); /* Prepare the VSI: rings, hmc contexts, etc... */ if (ixl_initialize_vsi(vsi)) { device_printf(dev, "initialize vsi failed!!\n"); return; } ixl_set_link(pf, true); /* Reconfigure multicast filters in HW */ ixl_if_multi_set(ctx); /* Set up RSS */ ixl_config_rss(pf); /* Set up MSI-X routing and the ITR settings */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_queue_intr_msix(pf); ixl_configure_itr(pf); } else ixl_configure_legacy(pf); if (vsi->enable_head_writeback) ixl_init_tx_cidx(vsi); else ixl_init_tx_rsqs(vsi); ixl_enable_rings(vsi); i40e_aq_set_default_vsi(hw, vsi->seid, NULL); /* Re-add configure filters to HW */ ixl_reconfigure_filters(vsi); /* Configure promiscuous mode */ ixl_if_promisc_set(ctx, if_getflags(ifp)); #ifdef IXL_IW if (ixl_enable_iwarp && pf->iw_enabled) { ret = ixl_iw_pf_init(pf); if (ret) device_printf(dev, "initialize iwarp failed, code %d\n", ret); } #endif } void ixl_if_stop(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); struct ixl_vsi *vsi = &pf->vsi; INIT_DEBUGOUT("ixl_if_stop: begin\n"); if (IXL_PF_IN_RECOVERY_MODE(pf)) return; // TODO: This may need to be reworked #ifdef IXL_IW /* Stop iWARP device */ if (ixl_enable_iwarp && pf->iw_enabled) ixl_iw_pf_stop(pf); #endif ixl_disable_rings_intr(vsi); ixl_disable_rings(pf, vsi, &pf->qtag); /* * Don't set link state if only reconfiguring * e.g. on MTU change. */ if ((if_getflags(ifp) & IFF_UP) == 0 && (atomic_load_acq_32(&pf->state) & IXL_PF_STATE_LINK_ACTIVE_ON_DOWN) == 0) ixl_set_link(pf, false); } static int ixl_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct ixl_rx_queue *rx_que = vsi->rx_queues; struct ixl_tx_queue *tx_que = vsi->tx_queues; int err, i, rid, vector = 0; char buf[16]; MPASS(vsi->shared->isc_nrxqsets > 0); MPASS(vsi->shared->isc_ntxqsets > 0); /* Admin Que must use vector 0*/ rid = vector + 1; err = iflib_irq_alloc_generic(ctx, &vsi->irq, rid, IFLIB_INTR_ADMIN, ixl_msix_adminq, pf, 0, "aq"); if (err) { iflib_irq_free(ctx, &vsi->irq); device_printf(iflib_get_dev(ctx), "Failed to register Admin Que handler"); return (err); } #ifdef PCI_IOV /* Create soft IRQ for handling VFLRs */ iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_IOV, pf, 0, "iov"); #endif /* Now set up the stations */ for (i = 0, vector = 1; i < vsi->shared->isc_nrxqsets; i++, vector++, rx_que++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); err = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, ixl_msix_que, rx_que, rx_que->rxr.me, buf); /* XXX: Does the driver work as expected if there are fewer num_rx_queues than * what's expected in the iflib context? */ if (err) { device_printf(iflib_get_dev(ctx), "Failed to allocate queue RX int vector %d, err: %d\n", i, err); vsi->num_rx_queues = i + 1; goto fail; } rx_que->msix = vector; } bzero(buf, sizeof(buf)); for (i = 0; i < vsi->shared->isc_ntxqsets; i++, tx_que++) { snprintf(buf, sizeof(buf), "txq%d", i); iflib_softirq_alloc_generic(ctx, &vsi->rx_queues[i % vsi->shared->isc_nrxqsets].que_irq, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); /* TODO: Maybe call a strategy function for this to figure out which * interrupts to map Tx queues to. I don't know if there's an immediately * better way than this other than a user-supplied map, though. */ tx_que->msix = (i % vsi->shared->isc_nrxqsets) + 1; } return (0); fail: iflib_irq_free(ctx, &vsi->irq); rx_que = vsi->rx_queues; for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (err); } /* * Enable all interrupts * * Called in: * iflib_init_locked, after ixl_if_init() */ static void ixl_if_enable_intr(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *que = vsi->rx_queues; ixl_enable_intr0(hw); /* Enable queue interrupts */ for (int i = 0; i < vsi->num_rx_queues; i++, que++) /* TODO: Queue index parameter is probably wrong */ ixl_enable_queue(hw, que->rxr.me); } /* * Disable queue interrupts * * Other interrupt causes need to remain active. */ static void ixl_if_disable_intr(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *rx_que = vsi->rx_queues; if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) ixl_disable_queue(hw, rx_que->msix - 1); } else { // Set PFINT_LNKLST0 FIRSTQ_INDX to 0x7FF // stops queues from triggering interrupts wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); } } static int ixl_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *rx_que = &vsi->rx_queues[rxqid]; ixl_enable_queue(hw, rx_que->msix - 1); return (0); } static int ixl_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_tx_queue *tx_que = &vsi->tx_queues[txqid]; ixl_enable_queue(hw, tx_que->msix - 1); return (0); } static int ixl_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; if_softc_ctx_t scctx = vsi->shared; struct ixl_tx_queue *que; int i, j, error = 0; MPASS(scctx->isc_ntxqsets > 0); MPASS(ntxqs == 1); MPASS(scctx->isc_ntxqsets == ntxqsets); /* Allocate queue structure memory */ if (!(vsi->tx_queues = (struct ixl_tx_queue *) malloc(sizeof(struct ixl_tx_queue) *ntxqsets, M_IXL, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); return (ENOMEM); } for (i = 0, que = vsi->tx_queues; i < ntxqsets; i++, que++) { struct tx_ring *txr = &que->txr; txr->me = i; que->vsi = vsi; if (!vsi->enable_head_writeback) { /* Allocate report status array */ if (!(txr->tx_rsq = malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_IXL, M_NOWAIT))) { device_printf(iflib_get_dev(ctx), "failed to allocate tx_rsq memory\n"); error = ENOMEM; goto fail; } /* Init report status array */ for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; } /* get the virtual and physical address of the hardware queues */ txr->tail = I40E_QTX_TAIL(txr->me); txr->tx_base = (struct i40e_tx_desc *)vaddrs[i * ntxqs]; txr->tx_paddr = paddrs[i * ntxqs]; txr->que = que; } return (0); fail: ixl_if_queues_free(ctx); return (error); } static int ixl_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct ixl_rx_queue *que; int i, error = 0; #ifdef INVARIANTS if_softc_ctx_t scctx = vsi->shared; MPASS(scctx->isc_nrxqsets > 0); MPASS(nrxqs == 1); MPASS(scctx->isc_nrxqsets == nrxqsets); #endif /* Allocate queue structure memory */ if (!(vsi->rx_queues = (struct ixl_rx_queue *) malloc(sizeof(struct ixl_rx_queue) * nrxqsets, M_IXL, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate RX ring memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = vsi->rx_queues; i < nrxqsets; i++, que++) { struct rx_ring *rxr = &que->rxr; rxr->me = i; que->vsi = vsi; /* get the virtual and physical address of the hardware queues */ rxr->tail = I40E_QRX_TAIL(rxr->me); rxr->rx_base = (union i40e_rx_desc *)vaddrs[i * nrxqs]; rxr->rx_paddr = paddrs[i * nrxqs]; rxr->que = que; } return (0); fail: ixl_if_queues_free(ctx); return (error); } static void ixl_if_queues_free(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; if (vsi->tx_queues != NULL && !vsi->enable_head_writeback) { struct ixl_tx_queue *que; int i = 0; for (i = 0, que = vsi->tx_queues; i < vsi->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; if (txr->tx_rsq != NULL) { free(txr->tx_rsq, M_IXL); txr->tx_rsq = NULL; } } } if (vsi->tx_queues != NULL) { free(vsi->tx_queues, M_IXL); vsi->tx_queues = NULL; } if (vsi->rx_queues != NULL) { free(vsi->rx_queues, M_IXL); vsi->rx_queues = NULL; } if (!IXL_PF_IN_RECOVERY_MODE(pf)) sysctl_ctx_free(&vsi->sysctl_ctx); } void ixl_update_link_status(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; u64 baudrate; if (pf->link_up) { if (vsi->link_active == FALSE) { vsi->link_active = TRUE; baudrate = ixl_max_aq_speed_to_value(hw->phy.link_info.link_speed); iflib_link_state_change(vsi->ctx, LINK_STATE_UP, baudrate); ixl_link_up_msg(pf); #ifdef PCI_IOV ixl_broadcast_link_state(pf); #endif } } else { /* Link down */ if (vsi->link_active == TRUE) { vsi->link_active = FALSE; iflib_link_state_change(vsi->ctx, LINK_STATE_DOWN, 0); #ifdef PCI_IOV ixl_broadcast_link_state(pf); #endif } } } static void ixl_handle_lan_overflow_event(struct ixl_pf *pf, struct i40e_arq_event_info *e) { device_t dev = pf->dev; u32 rxq_idx, qtx_ctl; rxq_idx = (e->desc.params.external.param0 & I40E_PRTDCB_RUPTQ_RXQNUM_MASK) >> I40E_PRTDCB_RUPTQ_RXQNUM_SHIFT; qtx_ctl = e->desc.params.external.param1; device_printf(dev, "LAN overflow event: global rxq_idx %d\n", rxq_idx); device_printf(dev, "LAN overflow event: QTX_CTL 0x%08x\n", qtx_ctl); } static int ixl_process_adminq(struct ixl_pf *pf, u16 *pending) { enum i40e_status_code status = I40E_SUCCESS; struct i40e_arq_event_info event; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u16 opcode; u32 loop = 0, reg; event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = malloc(event.buf_len, M_IXL, M_NOWAIT | M_ZERO); if (!event.msg_buf) { device_printf(dev, "%s: Unable to allocate memory for Admin" " Queue event!\n", __func__); return (ENOMEM); } /* clean and process any events */ do { status = i40e_clean_arq_element(hw, &event, pending); if (status) break; opcode = LE16_TO_CPU(event.desc.opcode); ixl_dbg(pf, IXL_DBG_AQ, "Admin Queue event: %#06x\n", opcode); switch (opcode) { case i40e_aqc_opc_get_link_status: ixl_link_event(pf, &event); break; case i40e_aqc_opc_send_msg_to_pf: #ifdef PCI_IOV ixl_handle_vf_msg(pf, &event); #endif break; /* * This should only occur on no-drop queues, which * aren't currently configured. */ case i40e_aqc_opc_event_lan_overflow: ixl_handle_lan_overflow_event(pf, &event); break; default: break; } } while (*pending && (loop++ < IXL_ADM_LIMIT)); free(event.msg_buf, M_IXL); /* Re-enable admin queue interrupt cause */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); return (status); } static void ixl_if_update_admin_status(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct i40e_hw *hw = &pf->hw; u16 pending; if (IXL_PF_IS_RESETTING(pf)) ixl_handle_empr_reset(pf); /* * Admin Queue is shut down while handling reset. * Don't proceed if it hasn't been re-initialized * e.g due to an issue with new FW. */ if (!i40e_check_asq_alive(&pf->hw)) return; if (pf->state & IXL_PF_STATE_MDD_PENDING) ixl_handle_mdd_event(pf); ixl_process_adminq(pf, &pending); ixl_update_link_status(pf); /* * If there are still messages to process, reschedule ourselves. * Otherwise, re-enable our interrupt and go to sleep. */ if (pending > 0) iflib_admin_intr_deferred(ctx); else ixl_enable_intr0(hw); } static void ixl_if_multi_set(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; int mcnt; IOCTL_DEBUGOUT("ixl_if_multi_set: begin"); /* Delete filters for removed multicast addresses */ ixl_del_multi(vsi, false); mcnt = min(if_llmaddr_count(iflib_get_ifp(ctx)), MAX_MULTICAST_ADDR); if (__predict_false(mcnt == MAX_MULTICAST_ADDR)) { i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); ixl_del_multi(vsi, true); return; } ixl_add_multi(vsi); IOCTL_DEBUGOUT("ixl_if_multi_set: end"); } static int ixl_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) return (EINVAL); vsi->shared->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; return (0); } static void ixl_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct ixl_pf *pf = iflib_get_softc(ctx); struct i40e_hw *hw = &pf->hw; INIT_DEBUGOUT("ixl_media_status: begin"); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!pf->link_up) { return; } ifmr->ifm_status |= IFM_ACTIVE; /* Hardware is always full-duplex */ ifmr->ifm_active |= IFM_FDX; switch (hw->phy.link_info.phy_type) { /* 100 M */ case I40E_PHY_TYPE_100BASE_TX: ifmr->ifm_active |= IFM_100_TX; break; /* 1 G */ case I40E_PHY_TYPE_1000BASE_T: ifmr->ifm_active |= IFM_1000_T; break; case I40E_PHY_TYPE_1000BASE_SX: ifmr->ifm_active |= IFM_1000_SX; break; case I40E_PHY_TYPE_1000BASE_LX: ifmr->ifm_active |= IFM_1000_LX; break; case I40E_PHY_TYPE_1000BASE_T_OPTICAL: ifmr->ifm_active |= IFM_1000_T; break; /* 2.5 G */ case I40E_PHY_TYPE_2_5GBASE_T_LINK_STATUS: ifmr->ifm_active |= IFM_2500_T; break; /* 5 G */ case I40E_PHY_TYPE_5GBASE_T_LINK_STATUS: ifmr->ifm_active |= IFM_5000_T; break; /* 10 G */ case I40E_PHY_TYPE_10GBASE_SFPP_CU: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_SR: ifmr->ifm_active |= IFM_10G_SR; break; case I40E_PHY_TYPE_10GBASE_LR: ifmr->ifm_active |= IFM_10G_LR; break; case I40E_PHY_TYPE_10GBASE_T: ifmr->ifm_active |= IFM_10G_T; break; case I40E_PHY_TYPE_XAUI: case I40E_PHY_TYPE_XFI: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_AOC: ifmr->ifm_active |= IFM_10G_AOC; break; /* 25 G */ case I40E_PHY_TYPE_25GBASE_KR: ifmr->ifm_active |= IFM_25G_KR; break; case I40E_PHY_TYPE_25GBASE_CR: ifmr->ifm_active |= IFM_25G_CR; break; case I40E_PHY_TYPE_25GBASE_SR: ifmr->ifm_active |= IFM_25G_SR; break; case I40E_PHY_TYPE_25GBASE_LR: ifmr->ifm_active |= IFM_25G_LR; break; case I40E_PHY_TYPE_25GBASE_AOC: ifmr->ifm_active |= IFM_25G_AOC; break; case I40E_PHY_TYPE_25GBASE_ACC: ifmr->ifm_active |= IFM_25G_ACC; break; /* 40 G */ case I40E_PHY_TYPE_40GBASE_CR4: case I40E_PHY_TYPE_40GBASE_CR4_CU: ifmr->ifm_active |= IFM_40G_CR4; break; case I40E_PHY_TYPE_40GBASE_SR4: ifmr->ifm_active |= IFM_40G_SR4; break; case I40E_PHY_TYPE_40GBASE_LR4: ifmr->ifm_active |= IFM_40G_LR4; break; case I40E_PHY_TYPE_XLAUI: ifmr->ifm_active |= IFM_OTHER; break; case I40E_PHY_TYPE_1000BASE_KX: ifmr->ifm_active |= IFM_1000_KX; break; case I40E_PHY_TYPE_SGMII: ifmr->ifm_active |= IFM_1000_SGMII; break; /* ERJ: What's the difference between these? */ case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: ifmr->ifm_active |= IFM_10G_CR1; break; case I40E_PHY_TYPE_10GBASE_KX4: ifmr->ifm_active |= IFM_10G_KX4; break; case I40E_PHY_TYPE_10GBASE_KR: ifmr->ifm_active |= IFM_10G_KR; break; case I40E_PHY_TYPE_SFI: ifmr->ifm_active |= IFM_10G_SFI; break; /* Our single 20G media type */ case I40E_PHY_TYPE_20GBASE_KR2: ifmr->ifm_active |= IFM_20G_KR2; break; case I40E_PHY_TYPE_40GBASE_KR4: ifmr->ifm_active |= IFM_40G_KR4; break; case I40E_PHY_TYPE_XLPPI: case I40E_PHY_TYPE_40GBASE_AOC: ifmr->ifm_active |= IFM_40G_XLPPI; break; /* Unknown to driver */ default: ifmr->ifm_active |= IFM_UNKNOWN; break; } /* Report flow control status as well */ if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; } static int ixl_if_media_change(if_ctx_t ctx) { struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("ixl_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if_printf(iflib_get_ifp(ctx), "Media change is not supported.\n"); return (ENODEV); } static int ixl_if_promisc_set(if_ctx_t ctx, int flags) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); struct i40e_hw *hw = vsi->hw; int err; bool uni = FALSE, multi = FALSE; if (flags & IFF_PROMISC) uni = multi = TRUE; else if (flags & IFF_ALLMULTI || if_llmaddr_count(ifp) >= MAX_MULTICAST_ADDR) multi = TRUE; err = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, uni, NULL, true); if (err) return (err); err = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, multi, NULL); return (err); } static void ixl_if_timer(if_ctx_t ctx, uint16_t qid) { struct ixl_pf *pf = iflib_get_softc(ctx); if (qid != 0) return; ixl_update_stats_counters(pf); } static void ixl_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; if_t ifp = iflib_get_ifp(ctx); if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; /* * Keep track of registered VLANS to know what * filters have to be configured when VLAN_HWFILTER * capability is enabled. */ ++vsi->num_vlans; bit_set(vsi->vlans_map, vtag); if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0) return; if (vsi->num_vlans < IXL_MAX_VLAN_FILTERS) ixl_add_filter(vsi, hw->mac.addr, vtag); else if (vsi->num_vlans == IXL_MAX_VLAN_FILTERS) { /* * There is not enough HW resources to add filters * for all registered VLANs. Re-configure filtering * to allow reception of all expected traffic. */ device_printf(vsi->dev, "Not enough HW filters for all VLANs. VLAN HW filtering disabled"); ixl_del_all_vlan_filters(vsi, hw->mac.addr); ixl_add_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); } } static void ixl_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; if_t ifp = iflib_get_ifp(ctx); if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; --vsi->num_vlans; bit_clear(vsi->vlans_map, vtag); if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0) return; if (vsi->num_vlans < IXL_MAX_VLAN_FILTERS) ixl_del_filter(vsi, hw->mac.addr, vtag); else if (vsi->num_vlans == IXL_MAX_VLAN_FILTERS) { ixl_del_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); ixl_add_vlan_filters(vsi, hw->mac.addr); } } static uint64_t ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; if_t ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_IPACKETS: return (vsi->ipackets); case IFCOUNTER_IERRORS: return (vsi->ierrors); case IFCOUNTER_OPACKETS: return (vsi->opackets); case IFCOUNTER_OERRORS: return (vsi->oerrors); case IFCOUNTER_COLLISIONS: /* Collisions are by standard impossible in 40G/10G Ethernet */ return (0); case IFCOUNTER_IBYTES: return (vsi->ibytes); case IFCOUNTER_OBYTES: return (vsi->obytes); case IFCOUNTER_IMCASTS: return (vsi->imcasts); case IFCOUNTER_OMCASTS: return (vsi->omcasts); case IFCOUNTER_IQDROPS: return (vsi->iqdrops); case IFCOUNTER_OQDROPS: return (vsi->oqdrops); case IFCOUNTER_NOPROTO: return (vsi->noproto); default: return (if_get_counter_default(ifp, cnt)); } } #ifdef PCI_IOV static void ixl_if_vflr_handle(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); ixl_handle_vflr(pf); } #endif static int ixl_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req) { struct ixl_pf *pf = iflib_get_softc(ctx); if (pf->read_i2c_byte == NULL) return (EINVAL); for (int i = 0; i < req->len; i++) if (pf->read_i2c_byte(pf, req->offset + i, req->dev_addr, &req->data[i])) return (EIO); return (0); } static int ixl_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ifdrv *ifd = (struct ifdrv *)data; int error = 0; /* * The iflib_if_ioctl forwards SIOCxDRVSPEC and SIOGPRIVATE_0 without * performing privilege checks. It is important that this function * perform the necessary checks for commands which should only be * executed by privileged threads. */ switch(command) { case SIOCGDRVSPEC: case SIOCSDRVSPEC: /* NVM update command */ if (ifd->ifd_cmd == I40E_NVM_ACCESS) { error = priv_check(curthread, PRIV_DRIVER); if (error) break; error = ixl_handle_nvmupd_cmd(pf, ifd); } else { error = EINVAL; } break; default: error = EOPNOTSUPP; } return (error); } /* ixl_if_needs_restart - Tell iflib when the driver needs to be reinitialized * @ctx: iflib context * @event: event code to check * * Defaults to returning false for every event. * * @returns true if iflib needs to reinit the interface, false otherwise */ static bool ixl_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: default: return (false); } } /* * Sanity check and save off tunable values. */ static void ixl_save_pf_tunables(struct ixl_pf *pf) { device_t dev = pf->dev; /* Save tunable information */ #ifdef IXL_DEBUG_FC pf->enable_tx_fc_filter = ixl_enable_tx_fc_filter; #endif #ifdef IXL_DEBUG pf->recovery_mode = ixl_debug_recovery_mode; #endif pf->dbg_mask = ixl_core_debug_mask; pf->hw.debug_mask = ixl_shared_debug_mask; pf->vsi.enable_head_writeback = !!(ixl_enable_head_writeback); pf->enable_vf_loopback = !!(ixl_enable_vf_loopback); #if 0 pf->dynamic_rx_itr = ixl_dynamic_rx_itr; pf->dynamic_tx_itr = ixl_dynamic_tx_itr; #endif if (ixl_i2c_access_method > 3 || ixl_i2c_access_method < 0) pf->i2c_access_method = 0; else pf->i2c_access_method = ixl_i2c_access_method; if (ixl_tx_itr < 0 || ixl_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid tx_itr value of %d set!\n", ixl_tx_itr); device_printf(dev, "tx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_4K); pf->tx_itr = IXL_ITR_4K; } else pf->tx_itr = ixl_tx_itr; if (ixl_rx_itr < 0 || ixl_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid rx_itr value of %d set!\n", ixl_rx_itr); device_printf(dev, "rx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_8K); pf->rx_itr = IXL_ITR_8K; } else pf->rx_itr = ixl_rx_itr; pf->fc = -1; if (ixl_flow_control != -1) { if (ixl_flow_control < 0 || ixl_flow_control > 3) { device_printf(dev, "Invalid flow_control value of %d set!\n", ixl_flow_control); device_printf(dev, "flow_control must be between %d and %d, " "inclusive\n", 0, 3); device_printf(dev, "Using default configuration instead\n"); } else pf->fc = ixl_flow_control; } } diff --git a/sys/dev/ixl/ixl.h b/sys/dev/ixl/ixl.h index 9828760e4ea6..641ce6b10fcc 100644 --- a/sys/dev/ixl/ixl.h +++ b/sys/dev/ixl/ixl.h @@ -1,550 +1,550 @@ /****************************************************************************** Copyright (c) 2013-2018, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _IXL_H_ #define _IXL_H_ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include "opt_ixl.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #include #endif #include "ifdi_if.h" #include "i40e_type.h" #include "i40e_prototype.h" #include "ixl_debug.h" #define PVIDV(vendor, devid, name) \ PVID(vendor, devid, name " - " IXL_DRIVER_VERSION_STRING) /* Tunables */ /* * Ring Descriptors Valid Range: 32-4096 Default Value: 1024 This value is the * number of tx/rx descriptors allocated by the driver. Increasing this * value allows the driver to queue more operations. * * Tx descriptors are always 16 bytes, but Rx descriptors can be 32 bytes. * The driver currently always uses 32 byte Rx descriptors. */ #define IXL_DEFAULT_RING 1024 #define IXL_MAX_RING 4096 #define IXL_MIN_RING 64 #define IXL_RING_INCREMENT 32 #define IXL_AQ_LEN 256 #define IXL_AQ_LEN_MAX 1024 /* Alignment for rings */ #define DBA_ALIGN 128 #define MAX_MULTICAST_ADDR 128 #define IXL_MSIX_BAR 3 #define IXL_ADM_LIMIT 2 #define IXL_TSO_SIZE ((255*1024)-1) #define IXL_TX_BUF_SZ ((u32) 1514) #define IXL_AQ_BUF_SZ ((u32) 4096) #define IXL_RX_ITR 0 #define IXL_TX_ITR 1 #define IXL_ITR_NONE 3 #define IXL_QUEUE_EOL 0x7FF #define IXL_MIN_FRAME 17 #define IXL_MAX_FRAME 9728 #define IXL_MAX_TX_SEGS 8 #define IXL_MAX_RX_SEGS 5 #define IXL_MAX_TSO_SEGS 128 #define IXL_SPARSE_CHAIN 7 #define IXL_MIN_TSO_MSS 64 #define IXL_MAX_TSO_MSS 9668 #define IXL_MAX_DMA_SEG_SIZE ((16 * 1024) - 1) #define IXL_RSS_KEY_SIZE_REG 13 #define IXL_RSS_KEY_SIZE (IXL_RSS_KEY_SIZE_REG * 4) #define IXL_RSS_VSI_LUT_SIZE 64 /* X722 -> VSI, X710 -> VF */ #define IXL_RSS_VSI_LUT_ENTRY_MASK 0x3F #define IXL_RSS_VF_LUT_ENTRY_MASK 0xF #define IXL_VF_MAX_BUFFER 0x3F80 #define IXL_VF_MAX_HDR_BUFFER 0x840 #define IXL_VF_MAX_FRAME 0x3FFF /* ERJ: hardware can support ~2k (SW5+) filters between all functions */ #define IXL_MAX_FILTERS 256 #define IXL_NVM_VERSION_LO_SHIFT 0 #define IXL_NVM_VERSION_LO_MASK (0xff << IXL_NVM_VERSION_LO_SHIFT) #define IXL_NVM_VERSION_HI_SHIFT 12 #define IXL_NVM_VERSION_HI_MASK (0xf << IXL_NVM_VERSION_HI_SHIFT) /* * Interrupt Moderation parameters * Multiply ITR values by 2 for real ITR value */ #define IXL_MAX_ITR 0x0FF0 #define IXL_ITR_100K 0x0005 #define IXL_ITR_20K 0x0019 #define IXL_ITR_8K 0x003E #define IXL_ITR_4K 0x007A #define IXL_ITR_1K 0x01F4 #define IXL_ITR_DYNAMIC 0x8000 #define IXL_LOW_LATENCY 0 #define IXL_AVE_LATENCY 1 #define IXL_BULK_LATENCY 2 /* MacVlan Flags */ #define IXL_FILTER_VLAN (u16)(1 << 0) #define IXL_FILTER_MC (u16)(1 << 1) /* used in the vlan field of the filter when not a vlan */ #define IXL_VLAN_ANY -1 /* Maximum number of MAC/VLAN filters supported by HW */ #define IXL_MAX_VLAN_FILTERS 256 #define CSUM_OFFLOAD_IPV4 (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP) #define CSUM_OFFLOAD_IPV6 (CSUM_TCP_IPV6|CSUM_UDP_IPV6|CSUM_SCTP_IPV6) #define CSUM_OFFLOAD (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6|CSUM_TSO) /* Misc flags for ixl_vsi.flags */ #define IXL_FLAGS_KEEP_TSO4 (1 << 0) #define IXL_FLAGS_KEEP_TSO6 (1 << 1) #define IXL_FLAGS_USES_MSIX (1 << 2) #define IXL_FLAGS_IS_VF (1 << 3) #define IXL_VSI_IS_PF(v) ((v->flags & IXL_FLAGS_IS_VF) == 0) #define IXL_VSI_IS_VF(v) ((v->flags & IXL_FLAGS_IS_VF) != 0) #define IXL_VF_RESET_TIMEOUT 100 #define IXL_VSI_DATA_PORT 0x01 #define IAVF_MAX_QUEUES 16 #define IXL_MAX_VSI_QUEUES (2 * (I40E_VSILAN_QTABLE_MAX_INDEX + 1)) #define IXL_RX_CTX_BASE_UNITS 128 #define IXL_TX_CTX_BASE_UNITS 128 #define IXL_PF_PCI_CIAA_VF_DEVICE_STATUS 0xAA #define IXL_PF_PCI_CIAD_VF_TRANS_PENDING_MASK 0x20 #define IXL_GLGEN_VFLRSTAT_INDEX(glb_vf) ((glb_vf) / 32) #define IXL_GLGEN_VFLRSTAT_MASK(glb_vf) (1 << ((glb_vf) % 32)) #define IXL_MAX_ITR_IDX 3 #define IXL_END_OF_INTR_LNKLST 0x7FF #define IXL_DEFAULT_RSS_HENA_BASE (\ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | \ BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | \ BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6) | \ BIT_ULL(I40E_FILTER_PCTYPE_L2_PAYLOAD)) #define IXL_DEFAULT_RSS_HENA_XL710 IXL_DEFAULT_RSS_HENA_BASE #define IXL_DEFAULT_RSS_HENA_X722 (\ IXL_DEFAULT_RSS_HENA_BASE | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK)) #define IXL_CAPS \ (IFCAP_TSO4 | IFCAP_TSO6 | \ IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | \ IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | \ IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO | \ IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | \ IFCAP_VLAN_MTU | IFCAP_JUMBO_MTU | IFCAP_LRO) #define IXL_CSUM_TCP \ (CSUM_IP_TCP|CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP6_TCP) #define IXL_CSUM_UDP \ (CSUM_IP_UDP|CSUM_IP6_UDP) #define IXL_CSUM_SCTP \ (CSUM_IP_SCTP|CSUM_IP6_SCTP) #define IXL_CSUM_IPV4 \ (CSUM_IP|CSUM_IP_TSO) /* Pre-11 counter(9) compatibility */ #if __FreeBSD_version >= 1100036 #define IXL_SET_IPACKETS(vsi, count) (vsi)->ipackets = (count) #define IXL_SET_IERRORS(vsi, count) (vsi)->ierrors = (count) #define IXL_SET_OPACKETS(vsi, count) (vsi)->opackets = (count) #define IXL_SET_OERRORS(vsi, count) (vsi)->oerrors = (count) #define IXL_SET_COLLISIONS(vsi, count) /* Do nothing; collisions is always 0. */ #define IXL_SET_IBYTES(vsi, count) (vsi)->ibytes = (count) #define IXL_SET_OBYTES(vsi, count) (vsi)->obytes = (count) #define IXL_SET_IMCASTS(vsi, count) (vsi)->imcasts = (count) #define IXL_SET_OMCASTS(vsi, count) (vsi)->omcasts = (count) #define IXL_SET_IQDROPS(vsi, count) (vsi)->iqdrops = (count) #define IXL_SET_OQDROPS(vsi, count) (vsi)->oqdrops = (count) #define IXL_SET_NOPROTO(vsi, count) (vsi)->noproto = (count) #else #define IXL_SET_IPACKETS(vsi, count) (vsi)->ifp->if_ipackets = (count) #define IXL_SET_IERRORS(vsi, count) (vsi)->ifp->if_ierrors = (count) #define IXL_SET_OPACKETS(vsi, count) (vsi)->ifp->if_opackets = (count) #define IXL_SET_OERRORS(vsi, count) (vsi)->ifp->if_oerrors = (count) #define IXL_SET_COLLISIONS(vsi, count) (vsi)->ifp->if_collisions = (count) #define IXL_SET_IBYTES(vsi, count) (vsi)->ifp->if_ibytes = (count) #define IXL_SET_OBYTES(vsi, count) (vsi)->ifp->if_obytes = (count) #define IXL_SET_IMCASTS(vsi, count) (vsi)->ifp->if_imcasts = (count) #define IXL_SET_OMCASTS(vsi, count) (vsi)->ifp->if_omcasts = (count) #define IXL_SET_IQDROPS(vsi, count) (vsi)->ifp->if_iqdrops = (count) #define IXL_SET_OQDROPS(vsi, odrops) (vsi)->ifp->if_snd.ifq_drops = (odrops) #define IXL_SET_NOPROTO(vsi, count) (vsi)->noproto = (count) #endif /* For stats sysctl naming */ #define IXL_QUEUE_NAME_LEN 32 MALLOC_DECLARE(M_IXL); #define IXL_DEV_ERR(_dev, _format, ...) \ device_printf(_dev, "%s: " _format " (%s:%d)\n", __func__, ##__VA_ARGS__, __FILE__, __LINE__) /* ***************************************************************************** * vendor_info_array * * This array contains the list of Subvendor/Subdevice IDs on which the driver * should load. * ***************************************************************************** */ typedef struct _ixl_vendor_info_t { unsigned int vendor_id; unsigned int device_id; unsigned int subvendor_id; unsigned int subdevice_id; unsigned int index; } ixl_vendor_info_t; /* ** This struct has multiple uses, multicast ** addresses, vlans, and mac filters all use it. */ struct ixl_mac_filter { LIST_ENTRY(ixl_mac_filter) ftle; u8 macaddr[ETHER_ADDR_LEN]; s16 vlan; u16 flags; }; /* * The Transmit ring control struct */ struct tx_ring { struct ixl_tx_queue *que; u32 tail; struct i40e_tx_desc *tx_base; u64 tx_paddr; u32 latency; u32 packets; u32 me; /* * For reporting completed packet status * in descriptor writeback mode */ qidx_t *tx_rsq; qidx_t tx_rs_cidx; qidx_t tx_rs_pidx; qidx_t tx_cidx_processed; /* Used for Dynamic ITR calculation */ u32 itr; u32 bytes; /* Soft Stats */ u64 tx_bytes; u64 tx_packets; u64 mss_too_small; }; /* * The Receive ring control struct */ struct rx_ring { struct ixl_rx_queue *que; union i40e_rx_desc *rx_base; uint64_t rx_paddr; bool discard; u32 itr; u32 latency; u32 mbuf_sz; u32 tail; u32 me; /* Used for Dynamic ITR calculation */ u32 packets; u32 bytes; /* Soft stats */ u64 rx_packets; u64 rx_bytes; u64 desc_errs; u64 csum_errs; }; /* ** Driver queue structs */ struct ixl_tx_queue { struct ixl_vsi *vsi; struct tx_ring txr; struct if_irq que_irq; u32 msix; /* Stats */ u64 irqs; u64 tso; }; struct ixl_rx_queue { struct ixl_vsi *vsi; struct rx_ring rxr; struct if_irq que_irq; u32 msix; /* This queue's MSIX vector */ /* Stats */ u64 irqs; }; /* ** Virtual Station Interface */ LIST_HEAD(ixl_ftl_head, ixl_mac_filter); struct ixl_vsi { if_ctx_t ctx; if_softc_ctx_t shared; - struct ifnet *ifp; + if_t ifp; device_t dev; struct i40e_hw *hw; struct ifmedia *media; int num_rx_queues; int num_tx_queues; void *back; enum i40e_vsi_type type; int id; u32 rx_itr_setting; u32 tx_itr_setting; bool enable_head_writeback; u16 vsi_num; bool link_active; u16 seid; u16 uplink_seid; u16 downlink_seid; struct ixl_tx_queue *tx_queues; /* TX queue array */ struct ixl_rx_queue *rx_queues; /* RX queue array */ struct if_irq irq; u32 link_speed; /* MAC/VLAN Filter list */ struct ixl_ftl_head ftl; u16 num_macs; u64 num_hw_filters; /* Contains readylist & stat counter id */ struct i40e_aqc_vsi_properties_data info; #define IXL_VLANS_MAP_LEN EVL_VLID_MASK + 1 bitstr_t bit_decl(vlans_map, IXL_VLANS_MAP_LEN); u16 num_vlans; /* Per-VSI stats from hardware */ struct i40e_eth_stats eth_stats; struct i40e_eth_stats eth_stats_offsets; bool stat_offsets_loaded; /* VSI stat counters */ u64 ipackets; u64 ierrors; u64 opackets; u64 oerrors; u64 ibytes; u64 obytes; u64 imcasts; u64 omcasts; u64 iqdrops; u64 oqdrops; u64 noproto; /* Misc. */ u64 flags; /* Stats sysctls for this VSI */ struct sysctl_oid *vsi_node; struct sysctl_ctx_list sysctl_ctx; }; struct ixl_add_maddr_arg { struct ixl_ftl_head to_add; struct ixl_vsi *vsi; }; /* ** Compare two ethernet addresses */ static inline bool ixl_ether_is_equal(const u8 *ea1, const u8 *ea2) { return (bcmp(ea1, ea2, ETHER_ADDR_LEN) == 0); } /* * Return next largest power of 2, unsigned * * Public domain, from Bit Twiddling Hacks */ static inline u32 next_power_of_two(u32 n) { n--; n |= n >> 1; n |= n >> 2; n |= n >> 4; n |= n >> 8; n |= n >> 16; n++; /* Next power of two > 0 is 1 */ n += (n == 0); return (n); } /* * Info for stats sysctls */ struct ixl_sysctl_info { u64 *stat; char *name; char *description; }; extern const uint8_t ixl_bcast_addr[ETHER_ADDR_LEN]; /* Common function prototypes between PF/VF driver */ void ixl_debug_core(device_t dev, u32 enabled_mask, u32 mask, char *fmt, ...); void ixl_init_tx_ring(struct ixl_vsi *vsi, struct ixl_tx_queue *que); void ixl_get_default_rss_key(u32 *); const char * i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err); void ixl_init_tx_rsqs(struct ixl_vsi *vsi); void ixl_init_tx_cidx(struct ixl_vsi *vsi); u64 ixl_max_vc_speed_to_value(u8 link_speeds); void ixl_add_vsi_sysctls(device_t dev, struct ixl_vsi *vsi, struct sysctl_ctx_list *ctx, const char *sysctl_name); void ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_eth_stats *eth_stats); void ixl_vsi_add_queues_stats(struct ixl_vsi *vsi, struct sysctl_ctx_list *ctx); #endif /* _IXL_H_ */ diff --git a/sys/dev/ixl/ixl_iw.c b/sys/dev/ixl/ixl_iw.c index 5e2d7cfcb30b..d4129808cc28 100644 --- a/sys/dev/ixl/ixl_iw.c +++ b/sys/dev/ixl/ixl_iw.c @@ -1,490 +1,490 @@ /****************************************************************************** Copyright (c) 2013-2018, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl.h" #include "ixl_pf.h" #include "ixl_iw.h" #include "ixl_iw_int.h" #ifdef IXL_IW #define IXL_IW_VEC_BASE(pf) ((pf)->msix - (pf)->iw_msix) #define IXL_IW_VEC_COUNT(pf) ((pf)->iw_msix) #define IXL_IW_VEC_LIMIT(pf) ((pf)->msix) extern int ixl_enable_iwarp; static struct ixl_iw_state ixl_iw; static int ixl_iw_ref_cnt; static void ixl_iw_pf_msix_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 reg; int vec; for (vec = IXL_IW_VEC_BASE(pf); vec < IXL_IW_VEC_LIMIT(pf); vec++) { reg = I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK; wr32(hw, I40E_PFINT_LNKLSTN(vec - 1), reg); } return; } static void ixl_iw_invoke_op(void *context, int pending) { struct ixl_iw_pf_entry *pf_entry = (struct ixl_iw_pf_entry *)context; struct ixl_iw_pf info; bool initialize; int err; INIT_DEBUGOUT("begin"); mtx_lock(&ixl_iw.mtx); if ((pf_entry->state.iw_scheduled == IXL_IW_PF_STATE_ON) && (pf_entry->state.iw_current == IXL_IW_PF_STATE_OFF)) initialize = true; else if ((pf_entry->state.iw_scheduled == IXL_IW_PF_STATE_OFF) && (pf_entry->state.iw_current == IXL_IW_PF_STATE_ON)) initialize = false; else { /* nothing to be done, so finish here */ mtx_unlock(&ixl_iw.mtx); return; } info = pf_entry->pf_info; mtx_unlock(&ixl_iw.mtx); if (initialize) { err = ixl_iw.ops->init(&info); if (err) device_printf(pf_entry->pf->dev, "%s: failed to initialize iwarp (err %d)\n", __func__, err); else pf_entry->state.iw_current = IXL_IW_PF_STATE_ON; } else { err = ixl_iw.ops->stop(&info); if (err) device_printf(pf_entry->pf->dev, "%s: failed to stop iwarp (err %d)\n", __func__, err); else { ixl_iw_pf_msix_reset(pf_entry->pf); pf_entry->state.iw_current = IXL_IW_PF_STATE_OFF; } } return; } static void ixl_iw_uninit(void) { INIT_DEBUGOUT("begin"); mtx_destroy(&ixl_iw.mtx); return; } static void ixl_iw_init(void) { INIT_DEBUGOUT("begin"); LIST_INIT(&ixl_iw.pfs); mtx_init(&ixl_iw.mtx, "ixl_iw_pfs", NULL, MTX_DEF); ixl_iw.registered = false; return; } /****************************************************************************** * if_ixl internal API *****************************************************************************/ int ixl_iw_pf_init(struct ixl_pf *pf) { struct ixl_iw_pf_entry *pf_entry; struct ixl_iw_pf *pf_info; int err = 0; INIT_DEBUGOUT("begin"); mtx_lock(&ixl_iw.mtx); LIST_FOREACH(pf_entry, &ixl_iw.pfs, node) if (pf_entry->pf == pf) break; if (pf_entry == NULL) { /* attempt to initialize PF not yet attached - sth is wrong */ device_printf(pf->dev, "%s: PF not found\n", __func__); err = ENOENT; goto out; } pf_info = &pf_entry->pf_info; pf_info->handle = (void *)pf; pf_info->ifp = pf->vsi.ifp; pf_info->dev = pf->dev; pf_info->pci_mem = pf->pci_mem; pf_info->pf_id = pf->hw.pf_id; - pf_info->mtu = pf->vsi.ifp->if_mtu; + pf_info->mtu = pf->vsi.if_getmtu(ifp); pf_info->iw_msix.count = IXL_IW_VEC_COUNT(pf); pf_info->iw_msix.base = IXL_IW_VEC_BASE(pf); for (int i = 0; i < IXL_IW_MAX_USER_PRIORITY; i++) pf_info->qs_handle[i] = le16_to_cpu(pf->vsi.info.qs_handle[0]); pf_entry->state.pf = IXL_IW_PF_STATE_ON; if (ixl_iw.registered) { pf_entry->state.iw_scheduled = IXL_IW_PF_STATE_ON; taskqueue_enqueue(ixl_iw.tq, &pf_entry->iw_task); } out: mtx_unlock(&ixl_iw.mtx); return (err); } void ixl_iw_pf_stop(struct ixl_pf *pf) { struct ixl_iw_pf_entry *pf_entry; INIT_DEBUGOUT("begin"); mtx_lock(&ixl_iw.mtx); LIST_FOREACH(pf_entry, &ixl_iw.pfs, node) if (pf_entry->pf == pf) break; if (pf_entry == NULL) { /* attempt to stop PF which has not been attached - sth is wrong */ device_printf(pf->dev, "%s: PF not found\n", __func__); goto out; } pf_entry->state.pf = IXL_IW_PF_STATE_OFF; if (pf_entry->state.iw_scheduled == IXL_IW_PF_STATE_ON) { pf_entry->state.iw_scheduled = IXL_IW_PF_STATE_OFF; if (ixl_iw.registered) taskqueue_enqueue(ixl_iw.tq, &pf_entry->iw_task); } out: mtx_unlock(&ixl_iw.mtx); return; } int ixl_iw_pf_attach(struct ixl_pf *pf) { struct ixl_iw_pf_entry *pf_entry; int err = 0; INIT_DEBUGOUT("begin"); if (ixl_iw_ref_cnt == 0) ixl_iw_init(); mtx_lock(&ixl_iw.mtx); LIST_FOREACH(pf_entry, &ixl_iw.pfs, node) if (pf_entry->pf == pf) { device_printf(pf->dev, "%s: PF already exists\n", __func__); err = EEXIST; goto out; } pf_entry = malloc(sizeof(struct ixl_iw_pf_entry), M_IXL, M_NOWAIT | M_ZERO); if (pf_entry == NULL) { device_printf(pf->dev, "%s: failed to allocate memory to attach new PF\n", __func__); err = ENOMEM; goto out; } pf_entry->pf = pf; pf_entry->state.pf = IXL_IW_PF_STATE_OFF; pf_entry->state.iw_scheduled = IXL_IW_PF_STATE_OFF; pf_entry->state.iw_current = IXL_IW_PF_STATE_OFF; LIST_INSERT_HEAD(&ixl_iw.pfs, pf_entry, node); ixl_iw_ref_cnt++; TASK_INIT(&pf_entry->iw_task, 0, ixl_iw_invoke_op, pf_entry); out: mtx_unlock(&ixl_iw.mtx); return (err); } int ixl_iw_pf_detach(struct ixl_pf *pf) { struct ixl_iw_pf_entry *pf_entry; int err = 0; INIT_DEBUGOUT("begin"); mtx_lock(&ixl_iw.mtx); LIST_FOREACH(pf_entry, &ixl_iw.pfs, node) if (pf_entry->pf == pf) break; if (pf_entry == NULL) { /* attempt to stop PF which has not been attached - sth is wrong */ device_printf(pf->dev, "%s: PF not found\n", __func__); err = ENOENT; goto out; } if (pf_entry->state.pf != IXL_IW_PF_STATE_OFF) { /* attempt to detach PF which has not yet been stopped - sth is wrong */ device_printf(pf->dev, "%s: failed - PF is still active\n", __func__); err = EBUSY; goto out; } LIST_REMOVE(pf_entry, node); free(pf_entry, M_IXL); ixl_iw_ref_cnt--; out: mtx_unlock(&ixl_iw.mtx); if (ixl_iw_ref_cnt == 0) ixl_iw_uninit(); return (err); } /****************************************************************************** * API exposed to iw_ixl module *****************************************************************************/ int ixl_iw_pf_reset(void *pf_handle) { struct ixl_pf *pf = (struct ixl_pf *)pf_handle; INIT_DEBUGOUT("begin"); IXL_PF_LOCK(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); return (0); } int ixl_iw_pf_msix_init(void *pf_handle, struct ixl_iw_msix_mapping *msix_info) { struct ixl_pf *pf = (struct ixl_pf *)pf_handle; struct i40e_hw *hw = &pf->hw; u32 reg; int vec, i; INIT_DEBUGOUT("begin"); if ((msix_info->aeq_vector < IXL_IW_VEC_BASE(pf)) || (msix_info->aeq_vector >= IXL_IW_VEC_LIMIT(pf))) { printf("%s: invalid MSI-X vector (%i) for AEQ\n", __func__, msix_info->aeq_vector); return (EINVAL); } reg = I40E_PFINT_AEQCTL_CAUSE_ENA_MASK | (msix_info->aeq_vector << I40E_PFINT_AEQCTL_MSIX_INDX_SHIFT) | (msix_info->itr_indx << I40E_PFINT_AEQCTL_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_AEQCTL, reg); for (vec = IXL_IW_VEC_BASE(pf); vec < IXL_IW_VEC_LIMIT(pf); vec++) { for (i = 0; i < msix_info->ceq_cnt; i++) if (msix_info->ceq_vector[i] == vec) break; if (i == msix_info->ceq_cnt) { /* this vector has no CEQ mapped */ reg = I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK; wr32(hw, I40E_PFINT_LNKLSTN(vec - 1), reg); } else { reg = (i & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK) | (I40E_QUEUE_TYPE_PE_CEQ << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); wr32(hw, I40E_PFINT_LNKLSTN(vec - 1), reg); reg = I40E_PFINT_CEQCTL_CAUSE_ENA_MASK | (vec << I40E_PFINT_CEQCTL_MSIX_INDX_SHIFT) | (msix_info->itr_indx << I40E_PFINT_CEQCTL_ITR_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_PFINT_CEQCTL_NEXTQ_INDX_SHIFT); wr32(hw, I40E_PFINT_CEQCTL(i), reg); } } return (0); } int ixl_iw_register(struct ixl_iw_ops *ops) { struct ixl_iw_pf_entry *pf_entry; int err = 0; int iwarp_cap_on_pfs = 0; INIT_DEBUGOUT("begin"); LIST_FOREACH(pf_entry, &ixl_iw.pfs, node) iwarp_cap_on_pfs += pf_entry->pf->hw.func_caps.iwarp; if (!iwarp_cap_on_pfs && ixl_enable_iwarp) { printf("%s: the device is not iwarp-capable, registering dropped\n", __func__); return (ENODEV); } if (ixl_enable_iwarp == 0) { printf("%s: enable_iwarp is off, registering dropped\n", __func__); return (EACCES); } if ((ops->init == NULL) || (ops->stop == NULL)) { printf("%s: invalid iwarp driver ops\n", __func__); return (EINVAL); } mtx_lock(&ixl_iw.mtx); if (ixl_iw.registered) { printf("%s: iwarp driver already registered\n", __func__); err = (EBUSY); goto out; } ixl_iw.registered = true; mtx_unlock(&ixl_iw.mtx); ixl_iw.tq = taskqueue_create("ixl_iw", M_NOWAIT, taskqueue_thread_enqueue, &ixl_iw.tq); if (ixl_iw.tq == NULL) { printf("%s: failed to create queue\n", __func__); ixl_iw.registered = false; return (ENOMEM); } taskqueue_start_threads(&ixl_iw.tq, 1, PI_NET, "ixl iw"); ixl_iw.ops = malloc(sizeof(struct ixl_iw_ops), M_IXL, M_NOWAIT | M_ZERO); if (ixl_iw.ops == NULL) { printf("%s: failed to allocate memory\n", __func__); taskqueue_free(ixl_iw.tq); ixl_iw.registered = false; return (ENOMEM); } ixl_iw.ops->init = ops->init; ixl_iw.ops->stop = ops->stop; mtx_lock(&ixl_iw.mtx); LIST_FOREACH(pf_entry, &ixl_iw.pfs, node) if (pf_entry->state.pf == IXL_IW_PF_STATE_ON) { pf_entry->state.iw_scheduled = IXL_IW_PF_STATE_ON; taskqueue_enqueue(ixl_iw.tq, &pf_entry->iw_task); } out: mtx_unlock(&ixl_iw.mtx); return (err); } int ixl_iw_unregister(void) { struct ixl_iw_pf_entry *pf_entry; int iwarp_cap_on_pfs = 0; INIT_DEBUGOUT("begin"); LIST_FOREACH(pf_entry, &ixl_iw.pfs, node) iwarp_cap_on_pfs += pf_entry->pf->hw.func_caps.iwarp; if (!iwarp_cap_on_pfs && ixl_enable_iwarp) { printf("%s: attempt to unregister driver when no iwarp-capable device present\n", __func__); return (ENODEV); } if (ixl_enable_iwarp == 0) { printf("%s: attempt to unregister driver when enable_iwarp is off\n", __func__); return (ENODEV); } mtx_lock(&ixl_iw.mtx); if (!ixl_iw.registered) { printf("%s: failed - iwarp driver has not been registered\n", __func__); mtx_unlock(&ixl_iw.mtx); return (ENOENT); } LIST_FOREACH(pf_entry, &ixl_iw.pfs, node) if (pf_entry->state.iw_scheduled == IXL_IW_PF_STATE_ON) { pf_entry->state.iw_scheduled = IXL_IW_PF_STATE_OFF; taskqueue_enqueue(ixl_iw.tq, &pf_entry->iw_task); } ixl_iw.registered = false; mtx_unlock(&ixl_iw.mtx); LIST_FOREACH(pf_entry, &ixl_iw.pfs, node) taskqueue_drain(ixl_iw.tq, &pf_entry->iw_task); taskqueue_free(ixl_iw.tq); ixl_iw.tq = NULL; free(ixl_iw.ops, M_IXL); ixl_iw.ops = NULL; return (0); } #endif /* IXL_IW */ diff --git a/sys/dev/ixl/ixl_pf_iflib.c b/sys/dev/ixl/ixl_pf_iflib.c index 6ea20389c547..eeb8f28393c2 100644 --- a/sys/dev/ixl/ixl_pf_iflib.c +++ b/sys/dev/ixl/ixl_pf_iflib.c @@ -1,1107 +1,1107 @@ /****************************************************************************** Copyright (c) 2013-2020, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl_pf.h" void ixl_configure_tx_itr(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_tx_queue *que = vsi->tx_queues; vsi->tx_itr_setting = pf->tx_itr; for (int i = 0; i < vsi->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, i), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; txr->latency = IXL_AVE_LATENCY; } } void ixl_configure_rx_itr(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_rx_queue *que = vsi->rx_queues; vsi->rx_itr_setting = pf->rx_itr; for (int i = 0; i < vsi->num_rx_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, i), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; rxr->latency = IXL_AVE_LATENCY; } } int ixl_intr(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_rx_queue *que = vsi->rx_queues; u32 icr0; ++que->irqs; /* Clear PBA at start of ISR if using legacy interrupts */ if (vsi->shared->isc_intr == IFLIB_INTR_LEGACY) wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)); icr0 = rd32(hw, I40E_PFINT_ICR0); #ifdef PCI_IOV if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) iflib_iov_intr_deferred(vsi->ctx); #endif if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) iflib_admin_intr_deferred(vsi->ctx); ixl_enable_intr0(hw); if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) return (FILTER_SCHEDULE_THREAD); else return (FILTER_HANDLED); } /********************************************************************* * * MSI-X VSI Interrupt Service routine * **********************************************************************/ int ixl_msix_que(void *arg) { struct ixl_rx_queue *rx_que = arg; ++rx_que->irqs; ixl_set_queue_rx_itr(rx_que); return (FILTER_SCHEDULE_THREAD); } /********************************************************************* * * MSI-X Admin Queue Interrupt Service routine * **********************************************************************/ int ixl_msix_adminq(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u32 reg, mask, rstat_reg; bool do_task = FALSE; DDPRINTF(dev, "begin"); ++pf->admin_irq; reg = rd32(hw, I40E_PFINT_ICR0); /* * For masking off interrupt causes that need to be handled before * they can be re-enabled */ mask = rd32(hw, I40E_PFINT_ICR0_ENA); /* Check on the cause */ if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) { mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK; do_task = TRUE; } if (reg & I40E_PFINT_ICR0_MAL_DETECT_MASK) { mask &= ~I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; atomic_set_32(&pf->state, IXL_PF_STATE_MDD_PENDING); do_task = TRUE; } if (reg & I40E_PFINT_ICR0_GRST_MASK) { const char *reset_type; mask &= ~I40E_PFINT_ICR0_ENA_GRST_MASK; rstat_reg = rd32(hw, I40E_GLGEN_RSTAT); rstat_reg = (rstat_reg & I40E_GLGEN_RSTAT_RESET_TYPE_MASK) >> I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT; switch (rstat_reg) { /* These others might be handled similarly to an EMPR reset */ case I40E_RESET_CORER: reset_type = "CORER"; break; case I40E_RESET_GLOBR: reset_type = "GLOBR"; break; case I40E_RESET_EMPR: reset_type = "EMPR"; break; default: reset_type = "POR"; break; } device_printf(dev, "Reset Requested! (%s)\n", reset_type); /* overload admin queue task to check reset progress */ atomic_set_int(&pf->state, IXL_PF_STATE_RESETTING); do_task = TRUE; } /* * PE / PCI / ECC exceptions are all handled in the same way: * mask out these three causes, then request a PF reset */ if (reg & I40E_PFINT_ICR0_ECC_ERR_MASK) device_printf(dev, "ECC Error detected!\n"); if (reg & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) device_printf(dev, "PCI Exception detected!\n"); if (reg & I40E_PFINT_ICR0_PE_CRITERR_MASK) device_printf(dev, "Critical Protocol Engine Error detected!\n"); /* Checks against the conditions above */ if (reg & IXL_ICR0_CRIT_ERR_MASK) { mask &= ~IXL_ICR0_CRIT_ERR_MASK; atomic_set_32(&pf->state, IXL_PF_STATE_PF_RESET_REQ | IXL_PF_STATE_PF_CRIT_ERR); do_task = TRUE; } if (reg & I40E_PFINT_ICR0_HMC_ERR_MASK) { reg = rd32(hw, I40E_PFHMC_ERRORINFO); if (reg & I40E_PFHMC_ERRORINFO_ERROR_DETECTED_MASK) { device_printf(dev, "HMC Error detected!\n"); device_printf(dev, "INFO 0x%08x\n", reg); reg = rd32(hw, I40E_PFHMC_ERRORDATA); device_printf(dev, "DATA 0x%08x\n", reg); wr32(hw, I40E_PFHMC_ERRORINFO, 0); } } #ifdef PCI_IOV if (reg & I40E_PFINT_ICR0_VFLR_MASK) { mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK; iflib_iov_intr_deferred(pf->vsi.ctx); } #endif wr32(hw, I40E_PFINT_ICR0_ENA, mask); ixl_enable_intr0(hw); if (do_task) return (FILTER_SCHEDULE_THREAD); else return (FILTER_HANDLED); } /* * Configure queue interrupt cause registers in hardware. * * Linked list for each vector LNKLSTN(i) -> RQCTL(i) -> TQCTL(i) -> EOL */ void ixl_configure_queue_intr_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; u32 reg; u16 vector = 1; for (int i = 0; i < max(vsi->num_rx_queues, vsi->num_tx_queues); i++, vector++) { /* Make sure interrupt is disabled */ wr32(hw, I40E_PFINT_DYN_CTLN(i), 0); /* Set linked list head to point to corresponding RX queue * e.g. vector 1 (LNKLSTN register 0) points to queue pair 0's RX queue */ reg = ((i << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK) | ((I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) & I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_MASK); wr32(hw, I40E_PFINT_LNKLSTN(i), reg); reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (i << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(i), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_TQCTL(i), reg); } } /* * Configure for single interrupt vector operation */ void ixl_configure_legacy(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; u32 reg; vsi->rx_queues[0].rxr.itr = vsi->rx_itr_setting; /* Setup "other" causes */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK ; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* No ITR for non-queue interrupts */ wr32(hw, I40E_PFINT_STAT_CTL0, IXL_ITR_NONE << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT); /* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */ wr32(hw, I40E_PFINT_LNKLST0, 0); /* Associate the queue pair to the vector and enable the q int */ reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(0), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); wr32(hw, I40E_QINT_TQCTL(0), reg); } void ixl_free_pci_resources(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; device_t dev = iflib_get_dev(vsi->ctx); struct ixl_rx_queue *rx_que = vsi->rx_queues; /* We may get here before stations are set up */ if (rx_que == NULL) goto early; /* ** Release all MSI-X VSI resources: */ iflib_irq_free(vsi->ctx, &vsi->irq); for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) iflib_irq_free(vsi->ctx, &rx_que->que_irq); early: if (pf->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(pf->pci_mem), pf->pci_mem); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ int ixl_setup_interface(device_t dev, struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; if_ctx_t ctx = vsi->ctx; struct i40e_hw *hw = &pf->hw; - struct ifnet *ifp = iflib_get_ifp(ctx); + if_t ifp = iflib_get_ifp(ctx); struct i40e_aq_get_phy_abilities_resp abilities; enum i40e_status_code aq_error = 0; INIT_DBG_DEV(dev, "begin"); vsi->shared->isc_max_frame_size = - ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; if (IXL_PF_IN_RECOVERY_MODE(pf)) goto only_auto; aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); /* May need delay to detect fiber correctly */ if (aq_error == I40E_ERR_UNKNOWN_PHY) { i40e_msec_delay(200); aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); } if (aq_error) { if (aq_error == I40E_ERR_UNKNOWN_PHY) device_printf(dev, "Unknown PHY type detected!\n"); else device_printf(dev, "Error getting supported media types, err %d," " AQ error %d\n", aq_error, hw->aq.asq_last_status); } else { pf->supported_speeds = abilities.link_speed; if_setbaudrate(ifp, ixl_max_aq_speed_to_value(pf->supported_speeds)); ixl_add_ifmedia(vsi->media, hw->phy.phy_types); } only_auto: /* Use autoselect media by default */ ifmedia_add(vsi->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(vsi->media, IFM_ETHER | IFM_AUTO); return (0); } /* ** Run when the Admin Queue gets a link state change interrupt. */ void ixl_link_event(struct ixl_pf *pf, struct i40e_arq_event_info *e) { struct i40e_hw *hw = &pf->hw; device_t dev = iflib_get_dev(pf->vsi.ctx); struct i40e_link_status *link_info = &hw->phy.link_info; /* Driver needs to re-enable delivering of link status events * by FW after each event reception. Call i40e_get_link_status * to do that. To not lose information about link state changes, * which happened between receiving an event and the call, * do not rely on status from event but use most recent * status information retrieved by the call. */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); /* Print out message if an unqualified module is found */ if ((link_info->link_info & I40E_AQ_MEDIA_AVAILABLE) && (pf->advertised_speed) && (if_getflags(pf->vsi.ifp) & IFF_UP) && (!(link_info->an_info & I40E_AQ_QUALIFIED_MODULE)) && (!(link_info->link_info & I40E_AQ_LINK_UP))) device_printf(dev, "Link failed because " "an unqualified module was detected!\n"); /* OS link info is updated elsewhere */ } /********************************************************************* * * Initialize the VSI: this handles contexts, which means things * like the number of descriptors, buffer size, * plus we init the rings thru this function. * **********************************************************************/ int ixl_initialize_vsi(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; if_softc_ctx_t scctx = iflib_get_softc_ctx(vsi->ctx); struct ixl_tx_queue *tx_que = vsi->tx_queues; struct ixl_rx_queue *rx_que = vsi->rx_queues; device_t dev = iflib_get_dev(vsi->ctx); struct i40e_hw *hw = vsi->hw; struct i40e_vsi_context ctxt; int tc_queues; int err = 0; memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = vsi->seid; if (pf->veb_seid != 0) ctxt.uplink_seid = pf->veb_seid; ctxt.pf_num = hw->pf_id; err = i40e_aq_get_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev, "i40e_aq_get_vsi_params() failed, error %d" " aq_error %d\n", err, hw->aq.asq_last_status); return (err); } ixl_dbg(pf, IXL_DBG_SWITCH_INFO, "get_vsi_params: seid: %d, uplinkseid: %d, vsi_number: %d, " "vsis_allocated: %d, vsis_unallocated: %d, flags: 0x%x, " "pfnum: %d, vfnum: %d, stat idx: %d, enabled: %d\n", ctxt.seid, ctxt.uplink_seid, ctxt.vsi_number, ctxt.vsis_allocated, ctxt.vsis_unallocated, ctxt.flags, ctxt.pf_num, ctxt.vf_num, ctxt.info.stat_counter_idx, ctxt.info.up_enable_bits); /* ** Set the queue and traffic class bits ** - when multiple traffic classes are supported ** this will need to be more robust. */ ctxt.info.valid_sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; ctxt.info.mapping_flags |= I40E_AQ_VSI_QUE_MAP_CONTIG; /* In contig mode, que_mapping[0] is first queue index used by this VSI */ ctxt.info.queue_mapping[0] = 0; /* * This VSI will only use traffic class 0; start traffic class 0's * queue allocation at queue 0, and assign it 2^tc_queues queues (though * the driver may not use all of them). */ tc_queues = fls(pf->qtag.num_allocated) - 1; ctxt.info.tc_mapping[0] = ((pf->qtag.first_qidx << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) & I40E_AQ_VSI_TC_QUE_OFFSET_MASK) | ((tc_queues << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) & I40E_AQ_VSI_TC_QUE_NUMBER_MASK); /* Set VLAN receive stripping mode */ ctxt.info.valid_sections |= I40E_AQ_VSI_PROP_VLAN_VALID; ctxt.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL; if (if_getcapenable(vsi->ifp) & IFCAP_VLAN_HWTAGGING) ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH; else ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_NOTHING; #ifdef IXL_IW /* Set TCP Enable for iWARP capable VSI */ if (ixl_enable_iwarp && pf->iw_enabled) { ctxt.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; } #endif /* Save VSI number and info for use later */ vsi->vsi_num = ctxt.vsi_number; bcopy(&ctxt.info, &vsi->info, sizeof(vsi->info)); ctxt.flags = htole16(I40E_AQ_VSI_TYPE_PF); err = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev, "i40e_aq_update_vsi_params() failed, error %d," " aq_error %d\n", err, hw->aq.asq_last_status); return (err); } for (int i = 0; i < vsi->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; struct i40e_hmc_obj_txq tctx; u32 txctl; /* Setup the HMC TX Context */ bzero(&tctx, sizeof(tctx)); tctx.new_context = 1; tctx.base = (txr->tx_paddr/IXL_TX_CTX_BASE_UNITS); tctx.qlen = scctx->isc_ntxd[0]; tctx.fc_ena = 0; /* Disable FCoE */ /* * This value needs to pulled from the VSI that this queue * is assigned to. Index into array is traffic class. */ tctx.rdylist = vsi->info.qs_handle[0]; /* * Set these to enable Head Writeback * - Address is last entry in TX ring (reserved for HWB index) * Leave these as 0 for Descriptor Writeback */ if (vsi->enable_head_writeback) { tctx.head_wb_ena = 1; tctx.head_wb_addr = txr->tx_paddr + (scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc)); } else { tctx.head_wb_ena = 0; tctx.head_wb_addr = 0; } tctx.rdylist_act = 0; err = i40e_clear_lan_tx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear TX context\n"); break; } err = i40e_set_lan_tx_queue_context(hw, i, &tctx); if (err) { device_printf(dev, "Unable to set TX context\n"); break; } /* Associate the ring with this PF */ txctl = I40E_QTX_CTL_PF_QUEUE; txctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) & I40E_QTX_CTL_PF_INDX_MASK); wr32(hw, I40E_QTX_CTL(i), txctl); ixl_flush(hw); /* Do ring (re)init */ ixl_init_tx_ring(vsi, tx_que); } for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; struct i40e_hmc_obj_rxq rctx; /* Next setup the HMC RX Context */ rxr->mbuf_sz = iflib_get_rx_mbuf_sz(vsi->ctx); u16 max_rxmax = rxr->mbuf_sz * hw->func_caps.rx_buf_chain_len; /* Set up an RX context for the HMC */ memset(&rctx, 0, sizeof(struct i40e_hmc_obj_rxq)); rctx.dbuff = rxr->mbuf_sz >> I40E_RXQ_CTX_DBUFF_SHIFT; /* ignore header split for now */ rctx.hbuff = 0 >> I40E_RXQ_CTX_HBUFF_SHIFT; rctx.rxmax = (scctx->isc_max_frame_size < max_rxmax) ? scctx->isc_max_frame_size : max_rxmax; rctx.dtype = 0; rctx.dsize = 1; /* do 32byte descriptors */ rctx.hsplit_0 = 0; /* no header split */ rctx.base = (rxr->rx_paddr/IXL_RX_CTX_BASE_UNITS); rctx.qlen = scctx->isc_nrxd[0]; rctx.tphrdesc_ena = 1; rctx.tphwdesc_ena = 1; rctx.tphdata_ena = 0; /* Header Split related */ rctx.tphhead_ena = 0; /* Header Split related */ rctx.lrxqthresh = 1; /* Interrupt at <64 desc avail */ rctx.crcstrip = 1; rctx.l2tsel = 1; rctx.showiv = 1; /* Strip inner VLAN header */ rctx.fc_ena = 0; /* Disable FCoE */ rctx.prefena = 1; /* Prefetch descriptors */ err = i40e_clear_lan_rx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear RX context %d\n", i); break; } err = i40e_set_lan_rx_queue_context(hw, i, &rctx); if (err) { device_printf(dev, "Unable to set RX context %d\n", i); break; } wr32(vsi->hw, I40E_QRX_TAIL(i), 0); } return (err); } /* ** Provide a update to the queue RX ** interrupt moderation value. */ void ixl_set_queue_rx_itr(struct ixl_rx_queue *que) { struct ixl_vsi *vsi = que->vsi; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct rx_ring *rxr = &que->rxr; u16 rx_itr; u16 rx_latency = 0; int rx_bytes; /* Idle, do nothing */ if (rxr->bytes == 0) return; if (pf->dynamic_rx_itr) { rx_bytes = rxr->bytes/rxr->itr; rx_itr = rxr->itr; /* Adjust latency range */ switch (rxr->latency) { case IXL_LOW_LATENCY: if (rx_bytes > 10) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (rx_bytes > 20) { rx_latency = IXL_BULK_LATENCY; rx_itr = IXL_ITR_8K; } else if (rx_bytes <= 10) { rx_latency = IXL_LOW_LATENCY; rx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (rx_bytes <= 20) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; } rxr->latency = rx_latency; if (rx_itr != rxr->itr) { /* do an exponential smoothing */ rx_itr = (10 * rx_itr * rxr->itr) / ((9 * rx_itr) + rxr->itr); rxr->itr = min(rx_itr, IXL_MAX_ITR); wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, rxr->me), rxr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->rx_itr_setting & IXL_ITR_DYNAMIC) vsi->rx_itr_setting = pf->rx_itr; /* Update the hardware if needed */ if (rxr->itr != vsi->rx_itr_setting) { rxr->itr = vsi->rx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, rxr->me), rxr->itr); } } rxr->bytes = 0; rxr->packets = 0; } /* ** Provide a update to the queue TX ** interrupt moderation value. */ void ixl_set_queue_tx_itr(struct ixl_tx_queue *que) { struct ixl_vsi *vsi = que->vsi; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; u16 tx_itr; u16 tx_latency = 0; int tx_bytes; /* Idle, do nothing */ if (txr->bytes == 0) return; if (pf->dynamic_tx_itr) { tx_bytes = txr->bytes/txr->itr; tx_itr = txr->itr; switch (txr->latency) { case IXL_LOW_LATENCY: if (tx_bytes > 10) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (tx_bytes > 20) { tx_latency = IXL_BULK_LATENCY; tx_itr = IXL_ITR_8K; } else if (tx_bytes <= 10) { tx_latency = IXL_LOW_LATENCY; tx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (tx_bytes <= 20) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; } txr->latency = tx_latency; if (tx_itr != txr->itr) { /* do an exponential smoothing */ tx_itr = (10 * tx_itr * txr->itr) / ((9 * tx_itr) + txr->itr); txr->itr = min(tx_itr, IXL_MAX_ITR); wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, txr->me), txr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->tx_itr_setting & IXL_ITR_DYNAMIC) vsi->tx_itr_setting = pf->tx_itr; /* Update the hardware if needed */ if (txr->itr != vsi->tx_itr_setting) { txr->itr = vsi->tx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, txr->me), txr->itr); } } txr->bytes = 0; txr->packets = 0; return; } #ifdef IXL_DEBUG /** * ixl_sysctl_qtx_tail_handler * Retrieves I40E_QTX_TAIL value from hardware * for a sysctl. */ int ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_tx_queue *tx_que; int error; u32 val; tx_que = ((struct ixl_tx_queue *)oidp->oid_arg1); if (!tx_que) return 0; val = rd32(tx_que->vsi->hw, tx_que->txr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /** * ixl_sysctl_qrx_tail_handler * Retrieves I40E_QRX_TAIL value from hardware * for a sysctl. */ int ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_rx_queue *rx_que; int error; u32 val; rx_que = ((struct ixl_rx_queue *)oidp->oid_arg1); if (!rx_que) return 0; val = rd32(rx_que->vsi->hw, rx_que->rxr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } #endif void ixl_add_hw_stats(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; device_t dev = iflib_get_dev(vsi->ctx); struct i40e_hw_port_stats *pf_stats = &pf->stats; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); /* Driver statistics */ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "admin_irq", CTLFLAG_RD, &pf->admin_irq, "Admin Queue IRQs received"); sysctl_ctx_init(&vsi->sysctl_ctx); ixl_vsi_add_sysctls(vsi, "pf", true); ixl_add_sysctls_mac_stats(ctx, child, pf_stats); } void ixl_set_rss_hlut(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = iflib_get_dev(vsi->ctx); int i, que_id; int lut_entry_width; u32 lut = 0; enum i40e_status_code status; lut_entry_width = pf->hw.func_caps.rss_table_entry_width; /* Populate the LUT with max no. of queues in round robin fashion */ u8 hlut_buf[512]; for (i = 0; i < pf->hw.func_caps.rss_table_size; i++) { #ifdef RSS /* * Fetch the RSS bucket id for the given indirection entry. * Cap it at the number of configured buckets (which is * num_queues.) */ que_id = rss_get_indirection_to_bucket(i); que_id = que_id % vsi->num_rx_queues; #else que_id = i % vsi->num_rx_queues; #endif lut = (que_id & ((0x1 << lut_entry_width) - 1)); hlut_buf[i] = lut; } if (hw->mac.type == I40E_MAC_X722) { status = i40e_aq_set_rss_lut(hw, vsi->vsi_num, TRUE, hlut_buf, sizeof(hlut_buf)); if (status) device_printf(dev, "i40e_aq_set_rss_lut status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (i = 0; i < pf->hw.func_caps.rss_table_size >> 2; i++) wr32(hw, I40E_PFQF_HLUT(i), ((u32 *)hlut_buf)[i]); ixl_flush(hw); } } /* For PF VSI only */ int ixl_enable_rings(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; int error = 0; for (int i = 0; i < vsi->num_tx_queues; i++) error = ixl_enable_tx_ring(pf, &pf->qtag, i); for (int i = 0; i < vsi->num_rx_queues; i++) error = ixl_enable_rx_ring(pf, &pf->qtag, i); return (error); } int ixl_disable_rings(struct ixl_pf *pf, struct ixl_vsi *vsi, struct ixl_pf_qtag *qtag) { int error = 0; for (int i = 0; i < vsi->num_tx_queues; i++) error = ixl_disable_tx_ring(pf, qtag, i); for (int i = 0; i < vsi->num_rx_queues; i++) error = ixl_disable_rx_ring(pf, qtag, i); return (error); } void ixl_enable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *que = vsi->rx_queues; if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { for (int i = 0; i < vsi->num_rx_queues; i++, que++) ixl_enable_queue(hw, que->rxr.me); } else ixl_enable_intr0(hw); } void ixl_disable_rings_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *que = vsi->rx_queues; for (int i = 0; i < vsi->num_rx_queues; i++, que++) ixl_disable_queue(hw, que->rxr.me); } int ixl_prepare_for_reset(struct ixl_pf *pf, bool is_up) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int error = 0; if (is_up) ixl_if_stop(pf->vsi.ctx); ixl_shutdown_hmc(pf); ixl_disable_intr0(hw); error = i40e_shutdown_adminq(hw); if (error) device_printf(dev, "Shutdown Admin queue failed with code %d\n", error); ixl_pf_qmgr_release(&pf->qmgr, &pf->qtag); return (error); } int ixl_rebuild_hw_structs_after_reset(struct ixl_pf *pf, bool is_up) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; enum i40e_get_fw_lldp_status_resp lldp_status; int error = 0; device_printf(dev, "Rebuilding driver state...\n"); /* Setup */ error = i40e_init_adminq(hw); if (error != 0 && error != I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "Unable to initialize Admin Queue, error %d\n", error); goto ixl_rebuild_hw_structs_after_reset_err; } if (IXL_PF_IN_RECOVERY_MODE(pf)) { /* Keep admin queue interrupts active while driver is loaded */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } return (0); } i40e_clear_pxe_mode(hw); error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "ixl_get_hw_capabilities failed: %d\n", error); goto ixl_rebuild_hw_structs_after_reset_err; } error = ixl_setup_hmc(pf); if (error) goto ixl_rebuild_hw_structs_after_reset_err; /* reserve a contiguous allocation for the PF's VSI */ error = ixl_pf_qmgr_alloc_contiguous(&pf->qmgr, vsi->num_tx_queues, &pf->qtag); if (error) { device_printf(dev, "Failed to reserve queues for PF LAN VSI, error %d\n", error); } error = ixl_switch_config(pf); if (error) { device_printf(dev, "ixl_rebuild_hw_structs_after_reset: ixl_switch_config() failed: %d\n", error); error = EIO; goto ixl_rebuild_hw_structs_after_reset_err; } error = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (error) { device_printf(dev, "init: i40e_aq_set_phy_mask() failed: err %d," " aq_err %d\n", error, hw->aq.asq_last_status); error = EIO; goto ixl_rebuild_hw_structs_after_reset_err; } u8 set_fc_err_mask; error = i40e_set_fc(hw, &set_fc_err_mask, true); if (error) { device_printf(dev, "init: setting link flow control failed; retcode %d," " fc_err_mask 0x%02x\n", error, set_fc_err_mask); error = EIO; goto ixl_rebuild_hw_structs_after_reset_err; } /* Remove default filters reinstalled by FW on reset */ ixl_del_default_hw_filters(vsi); /* Receive broadcast Ethernet frames */ i40e_aq_set_vsi_broadcast(&pf->hw, vsi->seid, TRUE, NULL); /* Determine link state */ if (ixl_attach_get_link_status(pf)) { error = EINVAL; } i40e_aq_set_dcb_parameters(hw, TRUE, NULL); /* Query device FW LLDP status */ if (i40e_get_fw_lldp_status(hw, &lldp_status) == I40E_SUCCESS) { if (lldp_status == I40E_GET_FW_LLDP_STATUS_DISABLED) { atomic_set_32(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); } else { atomic_clear_32(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); } } /* Keep admin queue interrupts active while driver is loaded */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } if (is_up) { iflib_request_reset(vsi->ctx); iflib_admin_intr_deferred(vsi->ctx); } device_printf(dev, "Rebuilding driver state done.\n"); return (0); ixl_rebuild_hw_structs_after_reset_err: device_printf(dev, "Reload the driver to recover\n"); return (error); } /* ** Set flow control using sysctl: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ int ixl_sysctl_set_flowcntl(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_fc, error = 0; enum i40e_status_code aq_error = 0; u8 fc_aq_err = 0; /* Get request */ requested_fc = pf->fc; error = sysctl_handle_int(oidp, &requested_fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (requested_fc < 0 || requested_fc > 3) { device_printf(dev, "Invalid fc mode; valid modes are 0 through 3\n"); return (EINVAL); } /* Set fc ability for port */ hw->fc.requested_mode = requested_fc; aq_error = i40e_set_fc(hw, &fc_aq_err, TRUE); if (aq_error) { device_printf(dev, "%s: Error setting Flow Control mode %d; fc_err %#x\n", __func__, aq_error, fc_aq_err); return (EIO); } pf->fc = requested_fc; return (0); } diff --git a/sys/dev/ixl/ixl_pf_main.c b/sys/dev/ixl/ixl_pf_main.c index 0b1604cc4918..7b1bf78dac09 100644 --- a/sys/dev/ixl/ixl_pf_main.c +++ b/sys/dev/ixl/ixl_pf_main.c @@ -1,4701 +1,4701 @@ /****************************************************************************** Copyright (c) 2013-2018, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl_pf.h" #ifdef PCI_IOV #include "ixl_pf_iov.h" #endif #ifdef IXL_IW #include "ixl_iw.h" #include "ixl_iw_int.h" #endif static u8 ixl_convert_sysctl_aq_link_speed(u8, bool); static void ixl_sbuf_print_bytes(struct sbuf *, u8 *, int, int, bool); static const char * ixl_link_speed_string(enum i40e_aq_link_speed); static u_int ixl_add_maddr(void *, struct sockaddr_dl *, u_int); static u_int ixl_match_maddr(void *, struct sockaddr_dl *, u_int); static char * ixl_switch_element_string(struct sbuf *, u8, u16); static enum ixl_fw_mode ixl_get_fw_mode(struct ixl_pf *); /* Sysctls */ static int ixl_sysctl_set_advertise(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_supported_speeds(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_current_speed(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_unallocated_queues(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_pf_tx_itr(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_pf_rx_itr(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_eee_enable(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_set_link_active(SYSCTL_HANDLER_ARGS); /* Debug Sysctls */ static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_switch_vlans(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hkey(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hena(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hlut(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fw_link_management(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_read_i2c_byte(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_write_i2c_byte(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_fc_ability(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_rs_ability(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_fc_request(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_rs_request(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_auto_enable(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_dump_debug_data(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fw_lldp(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS); /* Debug Sysctls */ static int ixl_sysctl_do_pf_reset(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_do_core_reset(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_do_global_reset(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_queue_interrupt_table(SYSCTL_HANDLER_ARGS); #ifdef IXL_DEBUG static int ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS); #endif #ifdef IXL_IW extern int ixl_enable_iwarp; extern int ixl_limit_iwarp_msix; #endif static const char * const ixl_fc_string[6] = { "None", "Rx", "Tx", "Full", "Priority", "Default" }; static char *ixl_fec_string[3] = { "CL108 RS-FEC", "CL74 FC-FEC/BASE-R", "None" }; MALLOC_DEFINE(M_IXL, "ixl", "ixl driver allocations"); /* ** Put the FW, API, NVM, EEtrackID, and OEM version information into a string */ void ixl_nvm_version_str(struct i40e_hw *hw, struct sbuf *buf) { u8 oem_ver = (u8)(hw->nvm.oem_ver >> 24); u16 oem_build = (u16)((hw->nvm.oem_ver >> 16) & 0xFFFF); u8 oem_patch = (u8)(hw->nvm.oem_ver & 0xFF); sbuf_printf(buf, "fw %d.%d.%05d api %d.%d nvm %x.%02x etid %08x oem %d.%d.%d", hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build, hw->aq.api_maj_ver, hw->aq.api_min_ver, (hw->nvm.version & IXL_NVM_VERSION_HI_MASK) >> IXL_NVM_VERSION_HI_SHIFT, (hw->nvm.version & IXL_NVM_VERSION_LO_MASK) >> IXL_NVM_VERSION_LO_SHIFT, hw->nvm.eetrack, oem_ver, oem_build, oem_patch); } void ixl_print_nvm_version(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *sbuf; sbuf = sbuf_new_auto(); ixl_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); device_printf(dev, "%s\n", sbuf_data(sbuf)); sbuf_delete(sbuf); } /** * ixl_get_fw_mode - Check the state of FW * @hw: device hardware structure * * Identify state of FW. It might be in a recovery mode * which limits functionality and requires special handling * from the driver. * * @returns FW mode (normal, recovery, unexpected EMP reset) */ static enum ixl_fw_mode ixl_get_fw_mode(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; enum ixl_fw_mode fw_mode = IXL_FW_MODE_NORMAL; u32 fwsts; #ifdef IXL_DEBUG if (pf->recovery_mode) return IXL_FW_MODE_RECOVERY; #endif fwsts = rd32(hw, I40E_GL_FWSTS) & I40E_GL_FWSTS_FWS1B_MASK; /* Is set and has one of expected values */ if ((fwsts >= I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_CORER_MASK && fwsts <= I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_NVM_MASK) || fwsts == I40E_X722_GL_FWSTS_FWS1B_REC_MOD_GLOBR_MASK || fwsts == I40E_X722_GL_FWSTS_FWS1B_REC_MOD_CORER_MASK) fw_mode = IXL_FW_MODE_RECOVERY; else { if (fwsts > I40E_GL_FWSTS_FWS1B_EMPR_0 && fwsts <= I40E_GL_FWSTS_FWS1B_EMPR_10) fw_mode = IXL_FW_MODE_UEMPR; } return (fw_mode); } /** * ixl_pf_reset - Reset the PF * @pf: PF structure * * Ensure that FW is in the right state and do the reset * if needed. * * @returns zero on success, or an error code on failure. */ int ixl_pf_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; enum i40e_status_code status; enum ixl_fw_mode fw_mode; fw_mode = ixl_get_fw_mode(pf); ixl_dbg_info(pf, "%s: before PF reset FW mode: 0x%08x\n", __func__, fw_mode); if (fw_mode == IXL_FW_MODE_RECOVERY) { atomic_set_32(&pf->state, IXL_PF_STATE_RECOVERY_MODE); /* Don't try to reset device if it's in recovery mode */ return (0); } status = i40e_pf_reset(hw); if (status == I40E_SUCCESS) return (0); /* Check FW mode again in case it has changed while * waiting for reset to complete */ fw_mode = ixl_get_fw_mode(pf); ixl_dbg_info(pf, "%s: after PF reset FW mode: 0x%08x\n", __func__, fw_mode); if (fw_mode == IXL_FW_MODE_RECOVERY) { atomic_set_32(&pf->state, IXL_PF_STATE_RECOVERY_MODE); return (0); } if (fw_mode == IXL_FW_MODE_UEMPR) device_printf(pf->dev, "Entering recovery mode due to repeated FW resets. This may take several minutes. Refer to the Intel(R) Ethernet Adapters and Devices User Guide.\n"); else device_printf(pf->dev, "PF reset failure %s\n", i40e_stat_str(hw, status)); return (EIO); } /** * ixl_setup_hmc - Setup LAN Host Memory Cache * @pf: PF structure * * Init and configure LAN Host Memory Cache * * @returns 0 on success, EIO on error */ int ixl_setup_hmc(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; enum i40e_status_code status; status = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (status) { device_printf(pf->dev, "init_lan_hmc failed: %s\n", i40e_stat_str(hw, status)); return (EIO); } status = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (status) { device_printf(pf->dev, "configure_lan_hmc failed: %s\n", i40e_stat_str(hw, status)); return (EIO); } return (0); } /** * ixl_shutdown_hmc - Shutdown LAN Host Memory Cache * @pf: PF structure * * Shutdown Host Memory Cache if configured. * */ void ixl_shutdown_hmc(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; enum i40e_status_code status; /* HMC not configured, no need to shutdown */ if (hw->hmc.hmc_obj == NULL) return; status = i40e_shutdown_lan_hmc(hw); if (status) device_printf(pf->dev, "Shutdown LAN HMC failed with code %s\n", i40e_stat_str(hw, status)); } /* * Write PF ITR values to queue ITR registers. */ void ixl_configure_itr(struct ixl_pf *pf) { ixl_configure_tx_itr(pf); ixl_configure_rx_itr(pf); } /********************************************************************* * * Get the hardware capabilities * **********************************************************************/ int ixl_get_hw_capabilities(struct ixl_pf *pf) { struct i40e_aqc_list_capabilities_element_resp *buf; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; int len, i2c_intfc_num; bool again = TRUE; u16 needed; if (IXL_PF_IN_RECOVERY_MODE(pf)) { hw->func_caps.iwarp = 0; return (0); } len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp); retry: if (!(buf = (struct i40e_aqc_list_capabilities_element_resp *) malloc(len, M_IXL, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate cap memory\n"); return (ENOMEM); } /* This populates the hw struct */ status = i40e_aq_discover_capabilities(hw, buf, len, &needed, i40e_aqc_opc_list_func_capabilities, NULL); free(buf, M_IXL); if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) && (again == TRUE)) { /* retry once with a larger buffer */ again = FALSE; len = needed; goto retry; } else if (status != I40E_SUCCESS) { device_printf(dev, "capability discovery failed; status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (ENODEV); } /* * Some devices have both MDIO and I2C; since this isn't reported * by the FW, check registers to see if an I2C interface exists. */ i2c_intfc_num = ixl_find_i2c_interface(pf); if (i2c_intfc_num != -1) pf->has_i2c = true; /* Determine functions to use for driver I2C accesses */ switch (pf->i2c_access_method) { case IXL_I2C_ACCESS_METHOD_BEST_AVAILABLE: { if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { pf->read_i2c_byte = ixl_read_i2c_byte_aq; pf->write_i2c_byte = ixl_write_i2c_byte_aq; } else { pf->read_i2c_byte = ixl_read_i2c_byte_reg; pf->write_i2c_byte = ixl_write_i2c_byte_reg; } break; } case IXL_I2C_ACCESS_METHOD_AQ: pf->read_i2c_byte = ixl_read_i2c_byte_aq; pf->write_i2c_byte = ixl_write_i2c_byte_aq; break; case IXL_I2C_ACCESS_METHOD_REGISTER_I2CCMD: pf->read_i2c_byte = ixl_read_i2c_byte_reg; pf->write_i2c_byte = ixl_write_i2c_byte_reg; break; case IXL_I2C_ACCESS_METHOD_BIT_BANG_I2CPARAMS: pf->read_i2c_byte = ixl_read_i2c_byte_bb; pf->write_i2c_byte = ixl_write_i2c_byte_bb; break; default: /* Should not happen */ device_printf(dev, "Error setting I2C access functions\n"); break; } /* Keep link active by default */ atomic_set_32(&pf->state, IXL_PF_STATE_LINK_ACTIVE_ON_DOWN); /* Print a subset of the capability information. */ device_printf(dev, "PF-ID[%d]: VFs %d, MSI-X %d, VF MSI-X %d, QPs %d, %s\n", hw->pf_id, hw->func_caps.num_vfs, hw->func_caps.num_msix_vectors, hw->func_caps.num_msix_vectors_vf, hw->func_caps.num_tx_qp, (hw->func_caps.mdio_port_mode == 2) ? "I2C" : (hw->func_caps.mdio_port_mode == 1 && pf->has_i2c) ? "MDIO & I2C" : (hw->func_caps.mdio_port_mode == 1) ? "MDIO dedicated" : "MDIO shared"); return (0); } /* For the set_advertise sysctl */ void ixl_set_initial_advertised_speeds(struct ixl_pf *pf) { device_t dev = pf->dev; int err; /* Make sure to initialize the device to the complete list of * supported speeds on driver load, to ensure unloading and * reloading the driver will restore this value. */ err = ixl_set_advertised_speeds(pf, pf->supported_speeds, true); if (err) { /* Non-fatal error */ device_printf(dev, "%s: ixl_set_advertised_speeds() error %d\n", __func__, err); return; } pf->advertised_speed = ixl_convert_sysctl_aq_link_speed(pf->supported_speeds, false); } int ixl_teardown_hw_structs(struct ixl_pf *pf) { enum i40e_status_code status = 0; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; /* Shutdown LAN HMC */ if (hw->hmc.hmc_obj) { status = i40e_shutdown_lan_hmc(hw); if (status) { device_printf(dev, "init: LAN HMC shutdown failure; status %s\n", i40e_stat_str(hw, status)); goto err_out; } } /* Shutdown admin queue */ ixl_disable_intr0(hw); status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "init: Admin Queue shutdown failure; status %s\n", i40e_stat_str(hw, status)); ixl_pf_qmgr_release(&pf->qmgr, &pf->qtag); err_out: return (status); } /* ** Creates new filter with given MAC address and VLAN ID */ static struct ixl_mac_filter * ixl_new_filter(struct ixl_ftl_head *headp, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; /* create a new empty filter */ f = malloc(sizeof(struct ixl_mac_filter), M_IXL, M_NOWAIT | M_ZERO); if (f) { LIST_INSERT_HEAD(headp, f, ftle); bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->vlan = vlan; } return (f); } /** * ixl_free_filters - Free all filters in given list * headp - pointer to list head * * Frees memory used by each entry in the list. * Does not remove filters from HW. */ void ixl_free_filters(struct ixl_ftl_head *headp) { struct ixl_mac_filter *f, *nf; f = LIST_FIRST(headp); while (f != NULL) { nf = LIST_NEXT(f, ftle); free(f, M_IXL); f = nf; } LIST_INIT(headp); } static u_int ixl_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { struct ixl_add_maddr_arg *ama = arg; struct ixl_vsi *vsi = ama->vsi; const u8 *macaddr = (u8*)LLADDR(sdl); struct ixl_mac_filter *f; /* Does one already exist */ f = ixl_find_filter(&vsi->ftl, macaddr, IXL_VLAN_ANY); if (f != NULL) return (0); f = ixl_new_filter(&ama->to_add, macaddr, IXL_VLAN_ANY); if (f == NULL) { device_printf(vsi->dev, "WARNING: no filter available!!\n"); return (0); } f->flags |= IXL_FILTER_MC; return (1); } /********************************************************************* * Filter Routines * * Routines for multicast and vlan filter management. * *********************************************************************/ void ixl_add_multi(struct ixl_vsi *vsi) { - struct ifnet *ifp = vsi->ifp; + if_t ifp = vsi->ifp; struct i40e_hw *hw = vsi->hw; int mcnt = 0; struct ixl_add_maddr_arg cb_arg; IOCTL_DEBUGOUT("ixl_add_multi: begin"); mcnt = if_llmaddr_count(ifp); if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) { i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); /* delete all existing MC filters */ ixl_del_multi(vsi, true); return; } cb_arg.vsi = vsi; LIST_INIT(&cb_arg.to_add); mcnt = if_foreach_llmaddr(ifp, ixl_add_maddr, &cb_arg); if (mcnt > 0) ixl_add_hw_filters(vsi, &cb_arg.to_add, mcnt); IOCTL_DEBUGOUT("ixl_add_multi: end"); } static u_int ixl_match_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { struct ixl_mac_filter *f = arg; if (ixl_ether_is_equal(f->macaddr, (u8 *)LLADDR(sdl))) return (1); else return (0); } void ixl_del_multi(struct ixl_vsi *vsi, bool all) { struct ixl_ftl_head to_del; - struct ifnet *ifp = vsi->ifp; + if_t ifp = vsi->ifp; struct ixl_mac_filter *f, *fn; int mcnt = 0; IOCTL_DEBUGOUT("ixl_del_multi: begin"); LIST_INIT(&to_del); /* Search for removed multicast addresses */ LIST_FOREACH_SAFE(f, &vsi->ftl, ftle, fn) { if ((f->flags & IXL_FILTER_MC) == 0 || (!all && (if_foreach_llmaddr(ifp, ixl_match_maddr, f) == 0))) continue; LIST_REMOVE(f, ftle); LIST_INSERT_HEAD(&to_del, f, ftle); mcnt++; } if (mcnt > 0) ixl_del_hw_filters(vsi, &to_del, mcnt); } void ixl_link_up_msg(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; - struct ifnet *ifp = pf->vsi.ifp; + if_t ifp = pf->vsi.ifp; char *req_fec_string, *neg_fec_string; u8 fec_abilities; fec_abilities = hw->phy.link_info.req_fec_info; /* If both RS and KR are requested, only show RS */ if (fec_abilities & I40E_AQ_REQUEST_FEC_RS) req_fec_string = ixl_fec_string[0]; else if (fec_abilities & I40E_AQ_REQUEST_FEC_KR) req_fec_string = ixl_fec_string[1]; else req_fec_string = ixl_fec_string[2]; if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA) neg_fec_string = ixl_fec_string[0]; else if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) neg_fec_string = ixl_fec_string[1]; else neg_fec_string = ixl_fec_string[2]; log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n", - ifp->if_xname, + if_name(ifp), ixl_link_speed_string(hw->phy.link_info.link_speed), req_fec_string, neg_fec_string, (hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED) ? "True" : "False", (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX && hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ? ixl_fc_string[3] : (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ? ixl_fc_string[2] : (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ? ixl_fc_string[1] : ixl_fc_string[0]); } /* * Configure admin queue/misc interrupt cause registers in hardware. */ void ixl_configure_intr0_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 reg; /* First set up the adminq - vector 0 */ wr32(hw, I40E_PFINT_ICR0_ENA, 0); /* disable all */ rd32(hw, I40E_PFINT_ICR0); /* read to clear */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* * 0x7FF is the end of the queue list. * This means we won't use MSI-X vector 0 for a queue interrupt * in MSI-X mode. */ wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); /* Value is in 2 usec units, so 0x3E is 62*2 = 124 usecs. */ wr32(hw, I40E_PFINT_ITR0(IXL_RX_ITR), 0x3E); wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK | I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK); wr32(hw, I40E_PFINT_STAT_CTL0, 0); } void ixl_add_ifmedia(struct ifmedia *media, u64 phy_types) { /* Display supported media types */ if (phy_types & (I40E_CAP_PHY_TYPE_100BASE_TX)) ifmedia_add(media, IFM_ETHER | IFM_100_TX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_T)) ifmedia_add(media, IFM_ETHER | IFM_1000_T, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_SX)) ifmedia_add(media, IFM_ETHER | IFM_1000_SX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_LX)) ifmedia_add(media, IFM_ETHER | IFM_1000_LX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_2_5GBASE_T)) ifmedia_add(media, IFM_ETHER | IFM_2500_T, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_5GBASE_T)) ifmedia_add(media, IFM_ETHER | IFM_5000_T, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_XAUI) || phy_types & (I40E_CAP_PHY_TYPE_XFI) || phy_types & (I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU)) ifmedia_add(media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_SR)) ifmedia_add(media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_LR)) ifmedia_add(media, IFM_ETHER | IFM_10G_LR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_T)) ifmedia_add(media, IFM_ETHER | IFM_10G_T, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_CR4) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_CR4_CU) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_AOC) || phy_types & (I40E_CAP_PHY_TYPE_XLAUI) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_KR4)) ifmedia_add(media, IFM_ETHER | IFM_40G_CR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_SR4)) ifmedia_add(media, IFM_ETHER | IFM_40G_SR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_LR4)) ifmedia_add(media, IFM_ETHER | IFM_40G_LR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_KX)) ifmedia_add(media, IFM_ETHER | IFM_1000_KX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) || phy_types & (I40E_CAP_PHY_TYPE_10GBASE_CR1)) ifmedia_add(media, IFM_ETHER | IFM_10G_CR1, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_AOC)) ifmedia_add(media, IFM_ETHER | IFM_10G_AOC, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_SFI)) ifmedia_add(media, IFM_ETHER | IFM_10G_SFI, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_KX4)) ifmedia_add(media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_KR)) ifmedia_add(media, IFM_ETHER | IFM_10G_KR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_20GBASE_KR2)) ifmedia_add(media, IFM_ETHER | IFM_20G_KR2, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_KR4)) ifmedia_add(media, IFM_ETHER | IFM_40G_KR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_XLPPI)) ifmedia_add(media, IFM_ETHER | IFM_40G_XLPPI, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_KR)) ifmedia_add(media, IFM_ETHER | IFM_25G_KR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_CR)) ifmedia_add(media, IFM_ETHER | IFM_25G_CR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_SR)) ifmedia_add(media, IFM_ETHER | IFM_25G_SR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_LR)) ifmedia_add(media, IFM_ETHER | IFM_25G_LR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_AOC)) ifmedia_add(media, IFM_ETHER | IFM_25G_AOC, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_ACC)) ifmedia_add(media, IFM_ETHER | IFM_25G_ACC, 0, NULL); } /********************************************************************* * * Get Firmware Switch configuration * - this will need to be more robust when more complex * switch configurations are enabled. * **********************************************************************/ int ixl_switch_config(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = iflib_get_dev(vsi->ctx); struct i40e_aqc_get_switch_config_resp *sw_config; u8 aq_buf[I40E_AQ_LARGE_BUF]; int ret; u16 next = 0; memset(&aq_buf, 0, sizeof(aq_buf)); sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; ret = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (ret) { device_printf(dev, "aq_get_switch_config() failed, error %d," " aq_error %d\n", ret, pf->hw.aq.asq_last_status); return (ret); } if (pf->dbg_mask & IXL_DBG_SWITCH_INFO) { device_printf(dev, "Switch config: header reported: %d in structure, %d total\n", LE16_TO_CPU(sw_config->header.num_reported), LE16_TO_CPU(sw_config->header.num_total)); for (int i = 0; i < LE16_TO_CPU(sw_config->header.num_reported); i++) { device_printf(dev, "-> %d: type=%d seid=%d uplink=%d downlink=%d\n", i, sw_config->element[i].element_type, LE16_TO_CPU(sw_config->element[i].seid), LE16_TO_CPU(sw_config->element[i].uplink_seid), LE16_TO_CPU(sw_config->element[i].downlink_seid)); } } /* Simplified due to a single VSI */ vsi->uplink_seid = LE16_TO_CPU(sw_config->element[0].uplink_seid); vsi->downlink_seid = LE16_TO_CPU(sw_config->element[0].downlink_seid); vsi->seid = LE16_TO_CPU(sw_config->element[0].seid); return (ret); } void ixl_vsi_add_sysctls(struct ixl_vsi * vsi, const char * sysctl_name, bool queues_sysctls) { struct sysctl_oid *tree; struct sysctl_oid_list *child; struct sysctl_oid_list *vsi_list; tree = device_get_sysctl_tree(vsi->dev); child = SYSCTL_CHILDREN(tree); vsi->vsi_node = SYSCTL_ADD_NODE(&vsi->sysctl_ctx, child, OID_AUTO, sysctl_name, CTLFLAG_RD, NULL, "VSI Number"); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); ixl_add_sysctls_eth_stats(&vsi->sysctl_ctx, vsi_list, &vsi->eth_stats); /* Copy of netstat RX errors counter for validation purposes */ SYSCTL_ADD_UQUAD(&vsi->sysctl_ctx, vsi_list, OID_AUTO, "rx_errors", CTLFLAG_RD, &vsi->ierrors, "RX packet errors"); if (queues_sysctls) ixl_vsi_add_queues_stats(vsi, &vsi->sysctl_ctx); } /* * Used to set the Tx ITR value for all of the PF LAN VSI's queues. * Writes to the ITR registers immediately. */ static int ixl_sysctl_pf_tx_itr(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int error = 0; int requested_tx_itr; requested_tx_itr = pf->tx_itr; error = sysctl_handle_int(oidp, &requested_tx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (pf->dynamic_tx_itr) { device_printf(dev, "Cannot set TX itr value while dynamic TX itr is enabled\n"); return (EINVAL); } if (requested_tx_itr < 0 || requested_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid TX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } pf->tx_itr = requested_tx_itr; ixl_configure_tx_itr(pf); return (error); } /* * Used to set the Rx ITR value for all of the PF LAN VSI's queues. * Writes to the ITR registers immediately. */ static int ixl_sysctl_pf_rx_itr(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int error = 0; int requested_rx_itr; requested_rx_itr = pf->rx_itr; error = sysctl_handle_int(oidp, &requested_rx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (pf->dynamic_rx_itr) { device_printf(dev, "Cannot set RX itr value while dynamic RX itr is enabled\n"); return (EINVAL); } if (requested_rx_itr < 0 || requested_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid RX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } pf->rx_itr = requested_rx_itr; ixl_configure_rx_itr(pf); return (error); } void ixl_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_hw_port_stats *stats) { struct sysctl_oid *stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Mac Statistics"); struct sysctl_oid_list *stat_list = SYSCTL_CHILDREN(stat_node); struct i40e_eth_stats *eth_stats = &stats->eth; ixl_add_sysctls_eth_stats(ctx, stat_list, eth_stats); struct ixl_sysctl_info ctls[] = { {&stats->crc_errors, "crc_errors", "CRC Errors"}, {&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"}, {&stats->mac_local_faults, "local_faults", "MAC Local Faults"}, {&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"}, {&stats->rx_length_errors, "rx_length_errors", "Receive Length Errors"}, /* Packet Reception Stats */ {&stats->rx_size_64, "rx_frames_64", "64 byte frames received"}, {&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"}, {&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"}, {&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"}, {&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"}, {&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"}, {&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"}, {&stats->rx_undersize, "rx_undersize", "Undersized packets received"}, {&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"}, {&stats->rx_oversize, "rx_oversized", "Oversized packets received"}, {&stats->rx_jabber, "rx_jabber", "Received Jabber"}, {&stats->checksum_error, "checksum_errors", "Checksum Errors"}, /* Packet Transmission Stats */ {&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"}, {&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"}, {&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"}, {&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"}, {&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"}, {&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"}, {&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"}, /* Flow control */ {&stats->link_xon_tx, "xon_txd", "Link XON transmitted"}, {&stats->link_xon_rx, "xon_recvd", "Link XON received"}, {&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"}, {&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"}, /* End */ {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } void ixl_set_rss_key(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; u32 rss_seed[IXL_RSS_KEY_SIZE_REG]; enum i40e_status_code status; #ifdef RSS /* Fetch the configured RSS key */ rss_getkey((uint8_t *) &rss_seed); #else ixl_get_default_rss_key(rss_seed); #endif /* Fill out hash function seed */ if (hw->mac.type == I40E_MAC_X722) { struct i40e_aqc_get_set_rss_key_data key_data; bcopy(rss_seed, &key_data, 52); status = i40e_aq_set_rss_key(hw, vsi->vsi_num, &key_data); if (status) device_printf(dev, "i40e_aq_set_rss_key status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (int i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), rss_seed[i]); } } /* * Configure enabled PCTYPES for RSS. */ void ixl_set_rss_pctypes(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u64 set_hena = 0, hena; #ifdef RSS u32 rss_hash_config; rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP); #else if (hw->mac.type == I40E_MAC_X722) set_hena = IXL_DEFAULT_RSS_HENA_X722; else set_hena = IXL_DEFAULT_RSS_HENA_XL710; #endif hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); hena |= set_hena; i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); } /* ** Setup the PF's RSS parameters. */ void ixl_config_rss(struct ixl_pf *pf) { ixl_set_rss_key(pf); ixl_set_rss_pctypes(pf); ixl_set_rss_hlut(pf); } /* * In some firmware versions there is default MAC/VLAN filter * configured which interferes with filters managed by driver. * Make sure it's removed. */ void ixl_del_default_hw_filters(struct ixl_vsi *vsi) { struct i40e_aqc_remove_macvlan_element_data e; bzero(&e, sizeof(e)); bcopy(vsi->hw->mac.perm_addr, e.mac_addr, ETHER_ADDR_LEN); e.vlan_tag = 0; e.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; i40e_aq_remove_macvlan(vsi->hw, vsi->seid, &e, 1, NULL); bzero(&e, sizeof(e)); bcopy(vsi->hw->mac.perm_addr, e.mac_addr, ETHER_ADDR_LEN); e.vlan_tag = 0; e.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH | I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; i40e_aq_remove_macvlan(vsi->hw, vsi->seid, &e, 1, NULL); } /* ** Initialize filter list and add filters that the hardware ** needs to know about. ** ** Requires VSI's seid to be set before calling. */ void ixl_init_filters(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; ixl_dbg_filter(pf, "%s: start\n", __func__); /* Initialize mac filter list for VSI */ LIST_INIT(&vsi->ftl); vsi->num_hw_filters = 0; /* Receive broadcast Ethernet frames */ i40e_aq_set_vsi_broadcast(&pf->hw, vsi->seid, TRUE, NULL); if (IXL_VSI_IS_VF(vsi)) return; ixl_del_default_hw_filters(vsi); ixl_add_filter(vsi, vsi->hw->mac.addr, IXL_VLAN_ANY); /* * Prevent Tx flow control frames from being sent out by * non-firmware transmitters. * This affects every VSI in the PF. */ #ifndef IXL_DEBUG_FC i40e_add_filter_to_drop_tx_flow_control_frames(vsi->hw, vsi->seid); #else if (pf->enable_tx_fc_filter) i40e_add_filter_to_drop_tx_flow_control_frames(vsi->hw, vsi->seid); #endif } void ixl_reconfigure_filters(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_ftl_head tmp; int cnt; /* * The ixl_add_hw_filters function adds filters configured * in HW to a list in VSI. Move all filters to a temporary * list to avoid corrupting it by concatenating to itself. */ LIST_INIT(&tmp); LIST_CONCAT(&tmp, &vsi->ftl, ixl_mac_filter, ftle); cnt = vsi->num_hw_filters; vsi->num_hw_filters = 0; ixl_add_hw_filters(vsi, &tmp, cnt); /* * When the vsi is allocated for the VFs, both vsi->hw and vsi->ifp * will be NULL. Furthermore, the ftl of such vsi already contains * IXL_VLAN_ANY filter so we can skip that as well. */ if (hw == NULL) return; /* Filter could be removed if MAC address was changed */ ixl_add_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); if ((if_getcapenable(vsi->ifp) & IFCAP_VLAN_HWFILTER) == 0) return; /* * VLAN HW filtering is enabled, make sure that filters * for all registered VLAN tags are configured */ ixl_add_vlan_filters(vsi, hw->mac.addr); } /* * This routine adds a MAC/VLAN filter to the software filter * list, then adds that new filter to the HW if it doesn't already * exist in the SW filter list. */ void ixl_add_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f, *tmp; struct ixl_pf *pf; device_t dev; struct ixl_ftl_head to_add; int to_add_cnt; pf = vsi->back; dev = pf->dev; to_add_cnt = 1; ixl_dbg_filter(pf, "ixl_add_filter: " MAC_FORMAT ", vlan %4d\n", MAC_FORMAT_ARGS(macaddr), vlan); /* Does one already exist */ f = ixl_find_filter(&vsi->ftl, macaddr, vlan); if (f != NULL) return; LIST_INIT(&to_add); f = ixl_new_filter(&to_add, macaddr, vlan); if (f == NULL) { device_printf(dev, "WARNING: no filter available!!\n"); return; } if (f->vlan != IXL_VLAN_ANY) f->flags |= IXL_FILTER_VLAN; else vsi->num_macs++; /* ** Is this the first vlan being registered, if so we ** need to remove the ANY filter that indicates we are ** not in a vlan, and replace that with a 0 filter. */ if ((vlan != IXL_VLAN_ANY) && (vsi->num_vlans == 1)) { tmp = ixl_find_filter(&vsi->ftl, macaddr, IXL_VLAN_ANY); if (tmp != NULL) { struct ixl_ftl_head to_del; /* Prepare new filter first to avoid removing * VLAN_ANY filter if allocation fails */ f = ixl_new_filter(&to_add, macaddr, 0); if (f == NULL) { device_printf(dev, "WARNING: no filter available!!\n"); free(LIST_FIRST(&to_add), M_IXL); return; } to_add_cnt++; LIST_REMOVE(tmp, ftle); LIST_INIT(&to_del); LIST_INSERT_HEAD(&to_del, tmp, ftle); ixl_del_hw_filters(vsi, &to_del, 1); } } ixl_add_hw_filters(vsi, &to_add, to_add_cnt); } /** * ixl_add_vlan_filters - Add MAC/VLAN filters for all registered VLANs * @vsi: pointer to VSI * @macaddr: MAC address * * Adds MAC/VLAN filter for each VLAN configured on the interface * if there is enough HW filters. Otherwise adds a single filter * for all tagged and untagged frames to allow all configured VLANs * to recieve traffic. */ void ixl_add_vlan_filters(struct ixl_vsi *vsi, const u8 *macaddr) { struct ixl_ftl_head to_add; struct ixl_mac_filter *f; int to_add_cnt = 0; int i, vlan = 0; if (vsi->num_vlans == 0 || vsi->num_vlans > IXL_MAX_VLAN_FILTERS) { ixl_add_filter(vsi, macaddr, IXL_VLAN_ANY); return; } LIST_INIT(&to_add); /* Add filter for untagged frames if it does not exist yet */ f = ixl_find_filter(&vsi->ftl, macaddr, 0); if (f == NULL) { f = ixl_new_filter(&to_add, macaddr, 0); if (f == NULL) { device_printf(vsi->dev, "WARNING: no filter available!!\n"); return; } to_add_cnt++; } for (i = 1; i < EVL_VLID_MASK; i = vlan + 1) { bit_ffs_at(vsi->vlans_map, i, IXL_VLANS_MAP_LEN, &vlan); if (vlan == -1) break; /* Does one already exist */ f = ixl_find_filter(&vsi->ftl, macaddr, vlan); if (f != NULL) continue; f = ixl_new_filter(&to_add, macaddr, vlan); if (f == NULL) { device_printf(vsi->dev, "WARNING: no filter available!!\n"); ixl_free_filters(&to_add); return; } to_add_cnt++; } ixl_add_hw_filters(vsi, &to_add, to_add_cnt); } void ixl_del_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f, *tmp; struct ixl_ftl_head ftl_head; int to_del_cnt = 1; ixl_dbg_filter((struct ixl_pf *)vsi->back, "ixl_del_filter: " MAC_FORMAT ", vlan %4d\n", MAC_FORMAT_ARGS(macaddr), vlan); f = ixl_find_filter(&vsi->ftl, macaddr, vlan); if (f == NULL) return; LIST_REMOVE(f, ftle); LIST_INIT(&ftl_head); LIST_INSERT_HEAD(&ftl_head, f, ftle); if (f->vlan == IXL_VLAN_ANY && (f->flags & IXL_FILTER_VLAN) != 0) vsi->num_macs--; /* If this is not the last vlan just remove the filter */ if (vlan == IXL_VLAN_ANY || vsi->num_vlans > 0) { ixl_del_hw_filters(vsi, &ftl_head, to_del_cnt); return; } /* It's the last vlan, we need to switch back to a non-vlan filter */ tmp = ixl_find_filter(&vsi->ftl, macaddr, 0); if (tmp != NULL) { LIST_REMOVE(tmp, ftle); LIST_INSERT_AFTER(f, tmp, ftle); to_del_cnt++; } ixl_del_hw_filters(vsi, &ftl_head, to_del_cnt); ixl_add_filter(vsi, macaddr, IXL_VLAN_ANY); } /** * ixl_del_all_vlan_filters - Delete all VLAN filters with given MAC * @vsi: VSI which filters need to be removed * @macaddr: MAC address * * Remove all MAC/VLAN filters with a given MAC address. For multicast * addresses there is always single filter for all VLANs used (IXL_VLAN_ANY) * so skip them to speed up processing. Those filters should be removed * using ixl_del_filter function. */ void ixl_del_all_vlan_filters(struct ixl_vsi *vsi, const u8 *macaddr) { struct ixl_mac_filter *f, *tmp; struct ixl_ftl_head to_del; int to_del_cnt = 0; LIST_INIT(&to_del); LIST_FOREACH_SAFE(f, &vsi->ftl, ftle, tmp) { if ((f->flags & IXL_FILTER_MC) != 0 || !ixl_ether_is_equal(f->macaddr, macaddr)) continue; LIST_REMOVE(f, ftle); LIST_INSERT_HEAD(&to_del, f, ftle); to_del_cnt++; } ixl_dbg_filter((struct ixl_pf *)vsi->back, "%s: " MAC_FORMAT ", to_del_cnt: %d\n", __func__, MAC_FORMAT_ARGS(macaddr), to_del_cnt); if (to_del_cnt > 0) ixl_del_hw_filters(vsi, &to_del, to_del_cnt); } /* ** Find the filter with both matching mac addr and vlan id */ struct ixl_mac_filter * ixl_find_filter(struct ixl_ftl_head *headp, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; LIST_FOREACH(f, headp, ftle) { if (ixl_ether_is_equal(f->macaddr, macaddr) && (f->vlan == vlan)) { return (f); } } return (NULL); } /* ** This routine takes additions to the vsi filter ** table and creates an Admin Queue call to create ** the filters in the hardware. */ void ixl_add_hw_filters(struct ixl_vsi *vsi, struct ixl_ftl_head *to_add, int cnt) { struct i40e_aqc_add_macvlan_element_data *a, *b; struct ixl_mac_filter *f, *fn; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; enum i40e_status_code status; int j = 0; pf = vsi->back; dev = vsi->dev; hw = &pf->hw; ixl_dbg_filter(pf, "ixl_add_hw_filters: cnt: %d\n", cnt); if (cnt < 1) { ixl_dbg_info(pf, "ixl_add_hw_filters: cnt == 0\n"); return; } a = malloc(sizeof(struct i40e_aqc_add_macvlan_element_data) * cnt, M_IXL, M_NOWAIT | M_ZERO); if (a == NULL) { device_printf(dev, "add_hw_filters failed to get memory\n"); return; } LIST_FOREACH(f, to_add, ftle) { b = &a[j]; // a pox on fvl long names :) bcopy(f->macaddr, b->mac_addr, ETHER_ADDR_LEN); if (f->vlan == IXL_VLAN_ANY) { b->vlan_tag = 0; b->flags = I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; } else { b->vlan_tag = f->vlan; b->flags = 0; } b->flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; ixl_dbg_filter(pf, "ADD: " MAC_FORMAT "\n", MAC_FORMAT_ARGS(f->macaddr)); if (++j == cnt) break; } if (j != cnt) { /* Something went wrong */ device_printf(dev, "%s ERROR: list of filters to short expected: %d, found: %d\n", __func__, cnt, j); ixl_free_filters(to_add); goto out_free; } status = i40e_aq_add_macvlan(hw, vsi->seid, a, j, NULL); if (status == I40E_SUCCESS) { LIST_CONCAT(&vsi->ftl, to_add, ixl_mac_filter, ftle); vsi->num_hw_filters += j; goto out_free; } device_printf(dev, "i40e_aq_add_macvlan status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); j = 0; /* Verify which filters were actually configured in HW * and add them to the list */ LIST_FOREACH_SAFE(f, to_add, ftle, fn) { LIST_REMOVE(f, ftle); if (a[j].match_method == I40E_AQC_MM_ERR_NO_RES) { ixl_dbg_filter(pf, "%s filter " MAC_FORMAT " VTAG: %d not added\n", __func__, MAC_FORMAT_ARGS(f->macaddr), f->vlan); free(f, M_IXL); } else { LIST_INSERT_HEAD(&vsi->ftl, f, ftle); vsi->num_hw_filters++; } j++; } out_free: free(a, M_IXL); } /* ** This routine takes removals in the vsi filter ** table and creates an Admin Queue call to delete ** the filters in the hardware. */ void ixl_del_hw_filters(struct ixl_vsi *vsi, struct ixl_ftl_head *to_del, int cnt) { struct i40e_aqc_remove_macvlan_element_data *d, *e; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; struct ixl_mac_filter *f, *f_temp; enum i40e_status_code status; int j = 0; pf = vsi->back; hw = &pf->hw; dev = vsi->dev; ixl_dbg_filter(pf, "%s: start, cnt: %d\n", __func__, cnt); d = malloc(sizeof(struct i40e_aqc_remove_macvlan_element_data) * cnt, M_IXL, M_NOWAIT | M_ZERO); if (d == NULL) { device_printf(dev, "%s: failed to get memory\n", __func__); return; } LIST_FOREACH_SAFE(f, to_del, ftle, f_temp) { e = &d[j]; // a pox on fvl long names :) bcopy(f->macaddr, e->mac_addr, ETHER_ADDR_LEN); e->flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; if (f->vlan == IXL_VLAN_ANY) { e->vlan_tag = 0; e->flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; } else { e->vlan_tag = f->vlan; } ixl_dbg_filter(pf, "DEL: " MAC_FORMAT "\n", MAC_FORMAT_ARGS(f->macaddr)); /* delete entry from the list */ LIST_REMOVE(f, ftle); free(f, M_IXL); if (++j == cnt) break; } if (j != cnt || !LIST_EMPTY(to_del)) { /* Something went wrong */ device_printf(dev, "%s ERROR: wrong size of list of filters, expected: %d, found: %d\n", __func__, cnt, j); ixl_free_filters(to_del); goto out_free; } status = i40e_aq_remove_macvlan(hw, vsi->seid, d, j, NULL); if (status) { device_printf(dev, "%s: i40e_aq_remove_macvlan status %s, error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); for (int i = 0; i < j; i++) { if (d[i].error_code == 0) continue; device_printf(dev, "%s Filter does not exist " MAC_FORMAT " VTAG: %d\n", __func__, MAC_FORMAT_ARGS(d[i].mac_addr), d[i].vlan_tag); } } vsi->num_hw_filters -= j; out_free: free(d, M_IXL); ixl_dbg_filter(pf, "%s: end\n", __func__); } int ixl_enable_tx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); ixl_dbg(pf, IXL_DBG_EN_DIS, "Enabling PF TX ring %4d / VSI TX ring %4d...\n", pf_qidx, vsi_qidx); i40e_pre_tx_queue_cfg(hw, pf_qidx, TRUE); reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); reg |= I40E_QTX_ENA_QENA_REQ_MASK | I40E_QTX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QTX_ENA(pf_qidx), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); if (reg & I40E_QTX_ENA_QENA_STAT_MASK) break; i40e_usec_delay(10); } if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "TX queue %d still disabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_enable_rx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); ixl_dbg(pf, IXL_DBG_EN_DIS, "Enabling PF RX ring %4d / VSI RX ring %4d...\n", pf_qidx, vsi_qidx); reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); reg |= I40E_QRX_ENA_QENA_REQ_MASK | I40E_QRX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QRX_ENA(pf_qidx), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); if (reg & I40E_QRX_ENA_QENA_STAT_MASK) break; i40e_usec_delay(10); } if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "RX queue %d still disabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_enable_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { int error = 0; error = ixl_enable_tx_ring(pf, qtag, vsi_qidx); /* Called function already prints error message */ if (error) return (error); error = ixl_enable_rx_ring(pf, qtag, vsi_qidx); return (error); } /* * Returns error on first ring that is detected hung. */ int ixl_disable_tx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); ixl_dbg(pf, IXL_DBG_EN_DIS, "Disabling PF TX ring %4d / VSI TX ring %4d...\n", pf_qidx, vsi_qidx); i40e_pre_tx_queue_cfg(hw, pf_qidx, FALSE); i40e_usec_delay(500); reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); reg &= ~I40E_QTX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QTX_ENA(pf_qidx), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); if (!(reg & I40E_QTX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QTX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "TX queue %d still enabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } /* * Returns error on first ring that is detected hung. */ int ixl_disable_rx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); ixl_dbg(pf, IXL_DBG_EN_DIS, "Disabling PF RX ring %4d / VSI RX ring %4d...\n", pf_qidx, vsi_qidx); reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); reg &= ~I40E_QRX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QRX_ENA(pf_qidx), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); if (!(reg & I40E_QRX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QRX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "RX queue %d still enabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_disable_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { int error = 0; error = ixl_disable_tx_ring(pf, qtag, vsi_qidx); /* Called function already prints error message */ if (error) return (error); error = ixl_disable_rx_ring(pf, qtag, vsi_qidx); return (error); } static void ixl_handle_tx_mdd_event(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct ixl_vf *vf; bool mdd_detected = false; bool pf_mdd_detected = false; bool vf_mdd_detected = false; u16 vf_num, queue; u8 pf_num, event; u8 pf_mdet_num, vp_mdet_num; u32 reg; /* find what triggered the MDD event */ reg = rd32(hw, I40E_GL_MDET_TX); if (reg & I40E_GL_MDET_TX_VALID_MASK) { pf_num = (reg & I40E_GL_MDET_TX_PF_NUM_MASK) >> I40E_GL_MDET_TX_PF_NUM_SHIFT; vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >> I40E_GL_MDET_TX_VF_NUM_SHIFT; event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >> I40E_GL_MDET_TX_EVENT_SHIFT; queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >> I40E_GL_MDET_TX_QUEUE_SHIFT; wr32(hw, I40E_GL_MDET_TX, 0xffffffff); mdd_detected = true; } if (!mdd_detected) return; reg = rd32(hw, I40E_PF_MDET_TX); if (reg & I40E_PF_MDET_TX_VALID_MASK) { wr32(hw, I40E_PF_MDET_TX, 0xFFFF); pf_mdet_num = hw->pf_id; pf_mdd_detected = true; } /* Check if MDD was caused by a VF */ for (int i = 0; i < pf->num_vfs; i++) { vf = &(pf->vfs[i]); reg = rd32(hw, I40E_VP_MDET_TX(i)); if (reg & I40E_VP_MDET_TX_VALID_MASK) { wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF); vp_mdet_num = i; vf->num_mdd_events++; vf_mdd_detected = true; } } /* Print out an error message */ if (vf_mdd_detected && pf_mdd_detected) device_printf(dev, "Malicious Driver Detection event %d" " on TX queue %d, pf number %d (PF-%d), vf number %d (VF-%d)\n", event, queue, pf_num, pf_mdet_num, vf_num, vp_mdet_num); else if (vf_mdd_detected && !pf_mdd_detected) device_printf(dev, "Malicious Driver Detection event %d" " on TX queue %d, pf number %d, vf number %d (VF-%d)\n", event, queue, pf_num, vf_num, vp_mdet_num); else if (!vf_mdd_detected && pf_mdd_detected) device_printf(dev, "Malicious Driver Detection event %d" " on TX queue %d, pf number %d (PF-%d)\n", event, queue, pf_num, pf_mdet_num); /* Theoretically shouldn't happen */ else device_printf(dev, "TX Malicious Driver Detection event (unknown)\n"); } static void ixl_handle_rx_mdd_event(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct ixl_vf *vf; bool mdd_detected = false; bool pf_mdd_detected = false; bool vf_mdd_detected = false; u16 queue; u8 pf_num, event; u8 pf_mdet_num, vp_mdet_num; u32 reg; /* * GL_MDET_RX doesn't contain VF number information, unlike * GL_MDET_TX. */ reg = rd32(hw, I40E_GL_MDET_RX); if (reg & I40E_GL_MDET_RX_VALID_MASK) { pf_num = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >> I40E_GL_MDET_RX_FUNCTION_SHIFT; event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >> I40E_GL_MDET_RX_EVENT_SHIFT; queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >> I40E_GL_MDET_RX_QUEUE_SHIFT; wr32(hw, I40E_GL_MDET_RX, 0xffffffff); mdd_detected = true; } if (!mdd_detected) return; reg = rd32(hw, I40E_PF_MDET_RX); if (reg & I40E_PF_MDET_RX_VALID_MASK) { wr32(hw, I40E_PF_MDET_RX, 0xFFFF); pf_mdet_num = hw->pf_id; pf_mdd_detected = true; } /* Check if MDD was caused by a VF */ for (int i = 0; i < pf->num_vfs; i++) { vf = &(pf->vfs[i]); reg = rd32(hw, I40E_VP_MDET_RX(i)); if (reg & I40E_VP_MDET_RX_VALID_MASK) { wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF); vp_mdet_num = i; vf->num_mdd_events++; vf_mdd_detected = true; } } /* Print out an error message */ if (vf_mdd_detected && pf_mdd_detected) device_printf(dev, "Malicious Driver Detection event %d" " on RX queue %d, pf number %d (PF-%d), (VF-%d)\n", event, queue, pf_num, pf_mdet_num, vp_mdet_num); else if (vf_mdd_detected && !pf_mdd_detected) device_printf(dev, "Malicious Driver Detection event %d" " on RX queue %d, pf number %d, (VF-%d)\n", event, queue, pf_num, vp_mdet_num); else if (!vf_mdd_detected && pf_mdd_detected) device_printf(dev, "Malicious Driver Detection event %d" " on RX queue %d, pf number %d (PF-%d)\n", event, queue, pf_num, pf_mdet_num); /* Theoretically shouldn't happen */ else device_printf(dev, "RX Malicious Driver Detection event (unknown)\n"); } /** * ixl_handle_mdd_event * * Called from interrupt handler to identify possibly malicious vfs * (But also detects events from the PF, as well) **/ void ixl_handle_mdd_event(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 reg; /* * Handle both TX/RX because it's possible they could * both trigger in the same interrupt. */ ixl_handle_tx_mdd_event(pf); ixl_handle_rx_mdd_event(pf); atomic_clear_32(&pf->state, IXL_PF_STATE_MDD_PENDING); /* re-enable mdd interrupt cause */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); ixl_flush(hw); } void ixl_enable_intr0(struct i40e_hw *hw) { u32 reg; /* Use IXL_ITR_NONE so ITR isn't updated here */ reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, reg); } void ixl_disable_intr0(struct i40e_hw *hw) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTL0, reg); ixl_flush(hw); } void ixl_enable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } void ixl_disable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } void ixl_handle_empr_reset(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; - bool is_up = !!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING); + bool is_up = !!(if_getdrvflags(vsi->ifp) & IFF_DRV_RUNNING); ixl_prepare_for_reset(pf, is_up); /* * i40e_pf_reset checks the type of reset and acts * accordingly. If EMP or Core reset was performed * doing PF reset is not necessary and it sometimes * fails. */ ixl_pf_reset(pf); if (!IXL_PF_IN_RECOVERY_MODE(pf) && ixl_get_fw_mode(pf) == IXL_FW_MODE_RECOVERY) { atomic_set_32(&pf->state, IXL_PF_STATE_RECOVERY_MODE); device_printf(pf->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); pf->link_up = FALSE; ixl_update_link_status(pf); } ixl_rebuild_hw_structs_after_reset(pf, is_up); atomic_clear_32(&pf->state, IXL_PF_STATE_RESETTING); } void ixl_update_stats_counters(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_vf *vf; u64 prev_link_xoff_rx = pf->stats.link_xoff_rx; struct i40e_hw_port_stats *nsd = &pf->stats; struct i40e_hw_port_stats *osd = &pf->stats_offsets; /* Update hw stats */ ixl_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port), pf->stat_offsets_loaded, &osd->crc_errors, &nsd->crc_errors); ixl_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port), pf->stat_offsets_loaded, &osd->illegal_bytes, &nsd->illegal_bytes); ixl_stat_update48(hw, I40E_GLPRT_GORCH(hw->port), I40E_GLPRT_GORCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_bytes, &nsd->eth.rx_bytes); ixl_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port), I40E_GLPRT_GOTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_bytes, &nsd->eth.tx_bytes); ixl_stat_update32(hw, I40E_GLPRT_RDPC(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_discards, &nsd->eth.rx_discards); ixl_stat_update48(hw, I40E_GLPRT_UPRCH(hw->port), I40E_GLPRT_UPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_unicast, &nsd->eth.rx_unicast); ixl_stat_update48(hw, I40E_GLPRT_UPTCH(hw->port), I40E_GLPRT_UPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_unicast, &nsd->eth.tx_unicast); ixl_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port), I40E_GLPRT_MPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_multicast, &nsd->eth.rx_multicast); ixl_stat_update48(hw, I40E_GLPRT_MPTCH(hw->port), I40E_GLPRT_MPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_multicast, &nsd->eth.tx_multicast); ixl_stat_update48(hw, I40E_GLPRT_BPRCH(hw->port), I40E_GLPRT_BPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_broadcast, &nsd->eth.rx_broadcast); ixl_stat_update48(hw, I40E_GLPRT_BPTCH(hw->port), I40E_GLPRT_BPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_broadcast, &nsd->eth.tx_broadcast); ixl_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port), pf->stat_offsets_loaded, &osd->tx_dropped_link_down, &nsd->tx_dropped_link_down); ixl_stat_update32(hw, I40E_GLPRT_MLFC(hw->port), pf->stat_offsets_loaded, &osd->mac_local_faults, &nsd->mac_local_faults); ixl_stat_update32(hw, I40E_GLPRT_MRFC(hw->port), pf->stat_offsets_loaded, &osd->mac_remote_faults, &nsd->mac_remote_faults); ixl_stat_update32(hw, I40E_GLPRT_RLEC(hw->port), pf->stat_offsets_loaded, &osd->rx_length_errors, &nsd->rx_length_errors); /* Flow control (LFC) stats */ ixl_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_rx, &nsd->link_xon_rx); ixl_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_tx, &nsd->link_xon_tx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_rx, &nsd->link_xoff_rx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_tx, &nsd->link_xoff_tx); /* * For watchdog management we need to know if we have been paused * during the last interval, so capture that here. */ if (pf->stats.link_xoff_rx != prev_link_xoff_rx) vsi->shared->isc_pause_frames = 1; /* Packet size stats rx */ ixl_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port), I40E_GLPRT_PRC64L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_64, &nsd->rx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port), I40E_GLPRT_PRC127L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_127, &nsd->rx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port), I40E_GLPRT_PRC255L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_255, &nsd->rx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port), I40E_GLPRT_PRC511L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_511, &nsd->rx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port), I40E_GLPRT_PRC1023L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1023, &nsd->rx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port), I40E_GLPRT_PRC1522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1522, &nsd->rx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port), I40E_GLPRT_PRC9522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_big, &nsd->rx_size_big); /* Packet size stats tx */ ixl_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port), I40E_GLPRT_PTC64L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_64, &nsd->tx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port), I40E_GLPRT_PTC127L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_127, &nsd->tx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port), I40E_GLPRT_PTC255L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_255, &nsd->tx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port), I40E_GLPRT_PTC511L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_511, &nsd->tx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port), I40E_GLPRT_PTC1023L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1023, &nsd->tx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port), I40E_GLPRT_PTC1522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1522, &nsd->tx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port), I40E_GLPRT_PTC9522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_big, &nsd->tx_size_big); ixl_stat_update32(hw, I40E_GLPRT_RUC(hw->port), pf->stat_offsets_loaded, &osd->rx_undersize, &nsd->rx_undersize); ixl_stat_update32(hw, I40E_GLPRT_RFC(hw->port), pf->stat_offsets_loaded, &osd->rx_fragments, &nsd->rx_fragments); ixl_stat_update32(hw, I40E_GLPRT_ROC(hw->port), pf->stat_offsets_loaded, &osd->rx_oversize, &nsd->rx_oversize); ixl_stat_update32(hw, I40E_GLPRT_RJC(hw->port), pf->stat_offsets_loaded, &osd->rx_jabber, &nsd->rx_jabber); /* EEE */ i40e_get_phy_lpi_status(hw, nsd); i40e_lpi_stat_update(hw, pf->stat_offsets_loaded, &osd->tx_lpi_count, &nsd->tx_lpi_count, &osd->rx_lpi_count, &nsd->rx_lpi_count); pf->stat_offsets_loaded = true; /* End hw stats */ /* Update vsi stats */ ixl_update_vsi_stats(vsi); for (int i = 0; i < pf->num_vfs; i++) { vf = &pf->vfs[i]; if (vf->vf_flags & VF_FLAG_ENABLED) ixl_update_eth_stats(&pf->vfs[i].vsi); } } /** * Update VSI-specific ethernet statistics counters. **/ void ixl_update_eth_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_eth_stats *es; struct i40e_eth_stats *oes; u16 stat_idx = vsi->info.stat_counter_idx; es = &vsi->eth_stats; oes = &vsi->eth_stats_offsets; /* Gather up the stats that the hw collects */ ixl_stat_update32(hw, I40E_GLV_TEPC(stat_idx), vsi->stat_offsets_loaded, &oes->tx_errors, &es->tx_errors); ixl_stat_update32(hw, I40E_GLV_RDPC(stat_idx), vsi->stat_offsets_loaded, &oes->rx_discards, &es->rx_discards); ixl_stat_update48(hw, I40E_GLV_GORCH(stat_idx), I40E_GLV_GORCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_bytes, &es->rx_bytes); ixl_stat_update48(hw, I40E_GLV_UPRCH(stat_idx), I40E_GLV_UPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_unicast, &es->rx_unicast); ixl_stat_update48(hw, I40E_GLV_MPRCH(stat_idx), I40E_GLV_MPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_multicast, &es->rx_multicast); ixl_stat_update48(hw, I40E_GLV_BPRCH(stat_idx), I40E_GLV_BPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_broadcast, &es->rx_broadcast); ixl_stat_update48(hw, I40E_GLV_GOTCH(stat_idx), I40E_GLV_GOTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_bytes, &es->tx_bytes); ixl_stat_update48(hw, I40E_GLV_UPTCH(stat_idx), I40E_GLV_UPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_unicast, &es->tx_unicast); ixl_stat_update48(hw, I40E_GLV_MPTCH(stat_idx), I40E_GLV_MPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_multicast, &es->tx_multicast); ixl_stat_update48(hw, I40E_GLV_BPTCH(stat_idx), I40E_GLV_BPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); vsi->stat_offsets_loaded = true; } void ixl_update_vsi_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf; struct i40e_eth_stats *es; u64 tx_discards, csum_errs; struct i40e_hw_port_stats *nsd; pf = vsi->back; es = &vsi->eth_stats; nsd = &pf->stats; ixl_update_eth_stats(vsi); tx_discards = es->tx_discards + nsd->tx_dropped_link_down; csum_errs = 0; for (int i = 0; i < vsi->num_rx_queues; i++) csum_errs += vsi->rx_queues[i].rxr.csum_errs; nsd->checksum_error = csum_errs; /* Update ifnet stats */ IXL_SET_IPACKETS(vsi, es->rx_unicast + es->rx_multicast + es->rx_broadcast); IXL_SET_OPACKETS(vsi, es->tx_unicast + es->tx_multicast + es->tx_broadcast); IXL_SET_IBYTES(vsi, es->rx_bytes); IXL_SET_OBYTES(vsi, es->tx_bytes); IXL_SET_IMCASTS(vsi, es->rx_multicast); IXL_SET_OMCASTS(vsi, es->tx_multicast); IXL_SET_IERRORS(vsi, nsd->crc_errors + nsd->illegal_bytes + nsd->checksum_error + nsd->rx_length_errors + nsd->rx_undersize + nsd->rx_fragments + nsd->rx_oversize + nsd->rx_jabber); IXL_SET_OERRORS(vsi, es->tx_errors); IXL_SET_IQDROPS(vsi, es->rx_discards + nsd->eth.rx_discards); IXL_SET_OQDROPS(vsi, tx_discards); IXL_SET_NOPROTO(vsi, es->rx_unknown_protocol); IXL_SET_COLLISIONS(vsi, 0); } /** * Reset all of the stats for the given pf **/ void ixl_pf_reset_stats(struct ixl_pf *pf) { bzero(&pf->stats, sizeof(struct i40e_hw_port_stats)); bzero(&pf->stats_offsets, sizeof(struct i40e_hw_port_stats)); pf->stat_offsets_loaded = false; } /** * Resets all stats of the given vsi **/ void ixl_vsi_reset_stats(struct ixl_vsi *vsi) { bzero(&vsi->eth_stats, sizeof(struct i40e_eth_stats)); bzero(&vsi->eth_stats_offsets, sizeof(struct i40e_eth_stats)); vsi->stat_offsets_loaded = false; } /** * Read and update a 48 bit stat from the hw * * Since the device stats are not reset at PFReset, they likely will not * be zeroed when the driver starts. We'll save the first values read * and use them as offsets to be subtracted from the raw values in order * to report stats that count from zero. **/ void ixl_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg, bool offset_loaded, u64 *offset, u64 *stat) { u64 new_data; #if defined(__FreeBSD__) && (__FreeBSD_version >= 1000000) && defined(__amd64__) new_data = rd64(hw, loreg); #else /* * Use two rd32's instead of one rd64; FreeBSD versions before * 10 don't support 64-bit bus reads/writes. */ new_data = rd32(hw, loreg); new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32; #endif if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = new_data - *offset; else *stat = (new_data + ((u64)1 << 48)) - *offset; *stat &= 0xFFFFFFFFFFFFULL; } /** * Read and update a 32 bit stat from the hw **/ void ixl_stat_update32(struct i40e_hw *hw, u32 reg, bool offset_loaded, u64 *offset, u64 *stat) { u32 new_data; new_data = rd32(hw, reg); if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = (u32)(new_data - *offset); else *stat = (u32)((new_data + ((u64)1 << 32)) - *offset); } /** * Add subset of device sysctls safe to use in recovery mode */ void ixl_add_sysctls_recovery_mode(struct ixl_pf *pf) { device_t dev = pf->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); struct sysctl_oid *debug_node; struct sysctl_oid_list *debug_list; SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_show_fw, "A", "Firmware version"); /* Add sysctls meant to print debug information, but don't list them * in "sysctl -a" output. */ debug_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, "Debug Sysctls"); debug_list = SYSCTL_CHILDREN(debug_node); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "shared_debug_mask", CTLFLAG_RW, &pf->hw.debug_mask, 0, "Shared code debug message level"); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "core_debug_mask", CTLFLAG_RW, &pf->dbg_mask, 0, "Non-shared code debug message level"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "dump_debug_data", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_dump_debug_data, "A", "Dump Debug Data from FW"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_pf_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_do_pf_reset, "I", "Tell HW to initiate a PF reset"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_core_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_do_core_reset, "I", "Tell HW to initiate a CORE reset"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_global_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_do_global_reset, "I", "Tell HW to initiate a GLOBAL reset"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "queue_interrupt_table", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_queue_interrupt_table, "A", "View MSI-X indices for TX/RX queues"); } void ixl_add_device_sysctls(struct ixl_pf *pf) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); struct sysctl_oid *debug_node; struct sysctl_oid_list *debug_list; struct sysctl_oid *fec_node; struct sysctl_oid_list *fec_list; struct sysctl_oid *eee_node; struct sysctl_oid_list *eee_list; /* Set up sysctls */ SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_set_flowcntl, "I", IXL_SYSCTL_HELP_FC); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_set_advertise, "I", IXL_SYSCTL_HELP_SET_ADVERTISE); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "supported_speeds", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_supported_speeds, "I", IXL_SYSCTL_HELP_SUPPORTED_SPEED); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_current_speed, "A", "Current Port Speed"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_show_fw, "A", "Firmware version"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "unallocated_queues", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_unallocated_queues, "I", "Queues not allocated to a PF or VF"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "tx_itr", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_pf_tx_itr, "I", "Immediately set TX ITR value for all queues"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "rx_itr", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_pf_rx_itr, "I", "Immediately set RX ITR value for all queues"); SYSCTL_ADD_INT(ctx, ctx_list, OID_AUTO, "dynamic_rx_itr", CTLFLAG_RW, &pf->dynamic_rx_itr, 0, "Enable dynamic RX ITR"); SYSCTL_ADD_INT(ctx, ctx_list, OID_AUTO, "dynamic_tx_itr", CTLFLAG_RW, &pf->dynamic_tx_itr, 0, "Enable dynamic TX ITR"); /* Add FEC sysctls for 25G adapters */ if (i40e_is_25G_device(hw->device_id)) { fec_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "fec", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "FEC Sysctls"); fec_list = SYSCTL_CHILDREN(fec_node); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "fc_ability", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fec_fc_ability, "I", "FC FEC ability enabled"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "rs_ability", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fec_rs_ability, "I", "RS FEC ability enabled"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "fc_requested", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fec_fc_request, "I", "FC FEC mode requested on link"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "rs_requested", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fec_rs_request, "I", "RS FEC mode requested on link"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "auto_fec_enabled", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fec_auto_enable, "I", "Let FW decide FEC ability/request modes"); } SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fw_lldp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fw_lldp, "I", IXL_SYSCTL_HELP_FW_LLDP); eee_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "eee", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Energy Efficient Ethernet (EEE) Sysctls"); eee_list = SYSCTL_CHILDREN(eee_node); SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "enable", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, pf, 0, ixl_sysctl_eee_enable, "I", "Enable Energy Efficient Ethernet (EEE)"); SYSCTL_ADD_UINT(ctx, eee_list, OID_AUTO, "tx_lpi_status", CTLFLAG_RD | CTLFLAG_MPSAFE, &pf->stats.tx_lpi_status, 0, "TX LPI status"); SYSCTL_ADD_UINT(ctx, eee_list, OID_AUTO, "rx_lpi_status", CTLFLAG_RD | CTLFLAG_MPSAFE, &pf->stats.rx_lpi_status, 0, "RX LPI status"); SYSCTL_ADD_UQUAD(ctx, eee_list, OID_AUTO, "tx_lpi_count", CTLFLAG_RD | CTLFLAG_MPSAFE, &pf->stats.tx_lpi_count, "TX LPI count"); SYSCTL_ADD_UQUAD(ctx, eee_list, OID_AUTO, "rx_lpi_count", CTLFLAG_RD | CTLFLAG_MPSAFE, &pf->stats.rx_lpi_count, "RX LPI count"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "link_active_on_if_down", CTLTYPE_INT | CTLFLAG_RWTUN, pf, 0, ixl_sysctl_set_link_active, "I", IXL_SYSCTL_HELP_SET_LINK_ACTIVE); /* Add sysctls meant to print debug information, but don't list them * in "sysctl -a" output. */ debug_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, "Debug Sysctls"); debug_list = SYSCTL_CHILDREN(debug_node); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "shared_debug_mask", CTLFLAG_RW, &pf->hw.debug_mask, 0, "Shared code debug message level"); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "core_debug_mask", CTLFLAG_RW, &pf->dbg_mask, 0, "Non-shared code debug message level"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "link_status", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_link_status, "A", IXL_SYSCTL_HELP_LINK_STATUS); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_abilities_init", CTLTYPE_STRING | CTLFLAG_RD, pf, 1, ixl_sysctl_phy_abilities, "A", "Initial PHY Abilities"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_abilities", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_phy_abilities, "A", "PHY Abilities"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "filter_list", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_sw_filter_list, "A", "SW Filter List"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "hw_res_alloc", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_hw_res_alloc, "A", "HW Resource Allocation"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "switch_config", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_switch_config, "A", "HW Switch Configuration"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "switch_vlans", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_switch_vlans, "I", "HW Switch VLAN Configuration"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_key", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_hkey, "A", "View RSS key"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_lut", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_hlut, "A", "View RSS lookup table"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_hena", CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_hena, "LU", "View enabled packet types for RSS"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "disable_fw_link_management", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_fw_link_management, "I", "Disable FW Link Management"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "dump_debug_data", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_dump_debug_data, "A", "Dump Debug Data from FW"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_pf_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_do_pf_reset, "I", "Tell HW to initiate a PF reset"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_core_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_do_core_reset, "I", "Tell HW to initiate a CORE reset"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_global_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_do_global_reset, "I", "Tell HW to initiate a GLOBAL reset"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "queue_interrupt_table", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_queue_interrupt_table, "A", "View MSI-X indices for TX/RX queues"); if (pf->has_i2c) { SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_byte", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_read_i2c_byte, "I", IXL_SYSCTL_HELP_READ_I2C); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "write_i2c_byte", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_write_i2c_byte, "I", IXL_SYSCTL_HELP_WRITE_I2C); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_diag_data", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pf, 0, ixl_sysctl_read_i2c_diag_data, "A", "Dump selected diagnostic data from FW"); } } /* * Primarily for finding out how many queues can be assigned to VFs, * at runtime. */ static int ixl_sysctl_unallocated_queues(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int queues; queues = (int)ixl_pf_qmgr_get_num_free(&pf->qmgr); return sysctl_handle_int(oidp, NULL, queues, req); } static const char * ixl_link_speed_string(enum i40e_aq_link_speed link_speed) { const char * link_speed_str[] = { "Unknown", "100 Mbps", "1 Gbps", "10 Gbps", "40 Gbps", "20 Gbps", "25 Gbps", "2.5 Gbps", "5 Gbps" }; int index; switch (link_speed) { case I40E_LINK_SPEED_100MB: index = 1; break; case I40E_LINK_SPEED_1GB: index = 2; break; case I40E_LINK_SPEED_10GB: index = 3; break; case I40E_LINK_SPEED_40GB: index = 4; break; case I40E_LINK_SPEED_20GB: index = 5; break; case I40E_LINK_SPEED_25GB: index = 6; break; case I40E_LINK_SPEED_2_5GB: index = 7; break; case I40E_LINK_SPEED_5GB: index = 8; break; case I40E_LINK_SPEED_UNKNOWN: default: index = 0; break; } return (link_speed_str[index]); } int ixl_sysctl_current_speed(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; int error = 0; ixl_update_link_status(pf); error = sysctl_handle_string(oidp, __DECONST(void *, ixl_link_speed_string(hw->phy.link_info.link_speed)), 8, req); return (error); } /* * Converts 8-bit speeds value to and from sysctl flags and * Admin Queue flags. */ static u8 ixl_convert_sysctl_aq_link_speed(u8 speeds, bool to_aq) { #define SPEED_MAP_SIZE 8 static u16 speedmap[SPEED_MAP_SIZE] = { (I40E_LINK_SPEED_100MB | (0x1 << 8)), (I40E_LINK_SPEED_1GB | (0x2 << 8)), (I40E_LINK_SPEED_10GB | (0x4 << 8)), (I40E_LINK_SPEED_20GB | (0x8 << 8)), (I40E_LINK_SPEED_25GB | (0x10 << 8)), (I40E_LINK_SPEED_40GB | (0x20 << 8)), (I40E_LINK_SPEED_2_5GB | (0x40 << 8)), (I40E_LINK_SPEED_5GB | (0x80 << 8)), }; u8 retval = 0; for (int i = 0; i < SPEED_MAP_SIZE; i++) { if (to_aq) retval |= (speeds & (speedmap[i] >> 8)) ? (speedmap[i] & 0xff) : 0; else retval |= (speeds & speedmap[i]) ? (speedmap[i] >> 8) : 0; } return (retval); } int ixl_set_advertised_speeds(struct ixl_pf *pf, int speeds, bool from_aq) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct i40e_aq_get_phy_abilities_resp abilities; struct i40e_aq_set_phy_config config; enum i40e_status_code aq_error = 0; /* Get current capability information */ aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, &abilities, NULL); if (aq_error) { device_printf(dev, "%s: Error getting phy capabilities %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EIO); } /* Prepare new config */ bzero(&config, sizeof(config)); if (from_aq) config.link_speed = speeds; else config.link_speed = ixl_convert_sysctl_aq_link_speed(speeds, true); config.phy_type = abilities.phy_type; config.phy_type_ext = abilities.phy_type_ext; config.abilities = abilities.abilities | I40E_AQ_PHY_ENABLE_ATOMIC_LINK; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; config.fec_config = abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_PHY_FEC_CONFIG_MASK; /* Do aq command & restart link */ aq_error = i40e_aq_set_phy_config(hw, &config, NULL); if (aq_error) { device_printf(dev, "%s: Error setting new phy config %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EIO); } return (0); } /* ** Supported link speeds ** Flags: ** 0x1 - 100 Mb ** 0x2 - 1G ** 0x4 - 10G ** 0x8 - 20G ** 0x10 - 25G ** 0x20 - 40G ** 0x40 - 2.5G ** 0x80 - 5G */ static int ixl_sysctl_supported_speeds(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int supported = ixl_convert_sysctl_aq_link_speed(pf->supported_speeds, false); return sysctl_handle_int(oidp, NULL, supported, req); } /* ** Control link advertise speed: ** Flags: ** 0x1 - advertise 100 Mb ** 0x2 - advertise 1G ** 0x4 - advertise 10G ** 0x8 - advertise 20G ** 0x10 - advertise 25G ** 0x20 - advertise 40G ** 0x40 - advertise 2.5G ** 0x80 - advertise 5G ** ** Set to 0 to disable link */ int ixl_sysctl_set_advertise(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; u8 converted_speeds; int requested_ls = 0; int error = 0; /* Read in new mode */ requested_ls = pf->advertised_speed; error = sysctl_handle_int(oidp, &requested_ls, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (IXL_PF_IN_RECOVERY_MODE(pf)) { device_printf(dev, "Interface is currently in FW recovery mode. " "Setting advertise speed not supported\n"); return (EINVAL); } /* Error out if bits outside of possible flag range are set */ if ((requested_ls & ~((u8)0xFF)) != 0) { device_printf(dev, "Input advertised speed out of range; " "valid flags are: 0x%02x\n", ixl_convert_sysctl_aq_link_speed(pf->supported_speeds, false)); return (EINVAL); } /* Check if adapter supports input value */ converted_speeds = ixl_convert_sysctl_aq_link_speed((u8)requested_ls, true); if ((converted_speeds | pf->supported_speeds) != pf->supported_speeds) { device_printf(dev, "Invalid advertised speed; " "valid flags are: 0x%02x\n", ixl_convert_sysctl_aq_link_speed(pf->supported_speeds, false)); return (EINVAL); } error = ixl_set_advertised_speeds(pf, requested_ls, false); if (error) return (error); pf->advertised_speed = requested_ls; ixl_update_link_status(pf); return (0); } /* * Input: bitmap of enum i40e_aq_link_speed */ u64 ixl_max_aq_speed_to_value(u8 link_speeds) { if (link_speeds & I40E_LINK_SPEED_40GB) return IF_Gbps(40); if (link_speeds & I40E_LINK_SPEED_25GB) return IF_Gbps(25); if (link_speeds & I40E_LINK_SPEED_20GB) return IF_Gbps(20); if (link_speeds & I40E_LINK_SPEED_10GB) return IF_Gbps(10); if (link_speeds & I40E_LINK_SPEED_5GB) return IF_Gbps(5); if (link_speeds & I40E_LINK_SPEED_2_5GB) return IF_Mbps(2500); if (link_speeds & I40E_LINK_SPEED_1GB) return IF_Gbps(1); if (link_speeds & I40E_LINK_SPEED_100MB) return IF_Mbps(100); else /* Minimum supported link speed */ return IF_Mbps(100); } /* ** Get the width and transaction speed of ** the bus this adapter is plugged into. */ void ixl_get_bus_info(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u16 link; u32 offset, num_ports; u64 max_speed; /* Some devices don't use PCIE */ if (hw->mac.type == I40E_MAC_X722) return; /* Read PCI Express Capabilities Link Status Register */ pci_find_cap(dev, PCIY_EXPRESS, &offset); link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); /* Fill out hw struct with PCIE info */ i40e_set_pci_config_data(hw, link); /* Use info to print out bandwidth messages */ device_printf(dev,"PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == i40e_bus_speed_8000) ? "8.0GT/s": (hw->bus.speed == i40e_bus_speed_5000) ? "5.0GT/s": (hw->bus.speed == i40e_bus_speed_2500) ? "2.5GT/s":"Unknown"), (hw->bus.width == i40e_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == i40e_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == i40e_bus_width_pcie_x2) ? "Width x2" : (hw->bus.width == i40e_bus_width_pcie_x1) ? "Width x1" : ("Unknown")); /* * If adapter is in slot with maximum supported speed, * no warning message needs to be printed out. */ if (hw->bus.speed >= i40e_bus_speed_8000 && hw->bus.width >= i40e_bus_width_pcie_x8) return; num_ports = bitcount32(hw->func_caps.valid_functions); max_speed = ixl_max_aq_speed_to_value(pf->supported_speeds) / 1000000; if ((num_ports * max_speed) > hw->bus.speed * hw->bus.width) { device_printf(dev, "PCI-Express bandwidth available" " for this device may be insufficient for" " optimal performance.\n"); device_printf(dev, "Please move the device to a different" " PCI-e link with more lanes and/or higher" " transfer rate.\n"); } } static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; struct sbuf *sbuf; sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ixl_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } void ixl_print_nvm_cmd(device_t dev, struct i40e_nvm_access *nvma) { u8 nvma_ptr = nvma->config & 0xFF; u8 nvma_flags = (nvma->config & 0xF00) >> 8; const char * cmd_str; switch (nvma->command) { case I40E_NVM_READ: if (nvma_ptr == 0xF && nvma_flags == 0xF && nvma->offset == 0 && nvma->data_size == 1) { device_printf(dev, "NVMUPD: Get Driver Status Command\n"); return; } cmd_str = "READ "; break; case I40E_NVM_WRITE: cmd_str = "WRITE"; break; default: device_printf(dev, "NVMUPD: unknown command: 0x%08x\n", nvma->command); return; } device_printf(dev, "NVMUPD: cmd: %s ptr: 0x%02x flags: 0x%01x offset: 0x%08x data_s: 0x%08x\n", cmd_str, nvma_ptr, nvma_flags, nvma->offset, nvma->data_size); } int ixl_handle_nvmupd_cmd(struct ixl_pf *pf, struct ifdrv *ifd) { struct i40e_hw *hw = &pf->hw; struct i40e_nvm_access *nvma; device_t dev = pf->dev; enum i40e_status_code status = 0; size_t nvma_size, ifd_len, exp_len; int err, perrno; DEBUGFUNC("ixl_handle_nvmupd_cmd"); /* Sanity checks */ nvma_size = sizeof(struct i40e_nvm_access); ifd_len = ifd->ifd_len; if (ifd_len < nvma_size || ifd->ifd_data == NULL) { device_printf(dev, "%s: incorrect ifdrv length or data pointer\n", __func__); device_printf(dev, "%s: ifdrv length: %zu, sizeof(struct i40e_nvm_access): %zu\n", __func__, ifd_len, nvma_size); device_printf(dev, "%s: data pointer: %p\n", __func__, ifd->ifd_data); return (EINVAL); } nvma = malloc(ifd_len, M_IXL, M_WAITOK); err = copyin(ifd->ifd_data, nvma, ifd_len); if (err) { device_printf(dev, "%s: Cannot get request from user space\n", __func__); free(nvma, M_IXL); return (err); } if (pf->dbg_mask & IXL_DBG_NVMUPD) ixl_print_nvm_cmd(dev, nvma); if (IXL_PF_IS_RESETTING(pf)) { int count = 0; while (count++ < 100) { i40e_msec_delay(100); if (!(IXL_PF_IS_RESETTING(pf))) break; } } if (IXL_PF_IS_RESETTING(pf)) { device_printf(dev, "%s: timeout waiting for EMP reset to finish\n", __func__); free(nvma, M_IXL); return (-EBUSY); } if (nvma->data_size < 1 || nvma->data_size > 4096) { device_printf(dev, "%s: invalid request, data size not in supported range\n", __func__); free(nvma, M_IXL); return (EINVAL); } /* * Older versions of the NVM update tool don't set ifd_len to the size * of the entire buffer passed to the ioctl. Check the data_size field * in the contained i40e_nvm_access struct and ensure everything is * copied in from userspace. */ exp_len = nvma_size + nvma->data_size - 1; /* One byte is kept in struct */ if (ifd_len < exp_len) { ifd_len = exp_len; nvma = realloc(nvma, ifd_len, M_IXL, M_WAITOK); err = copyin(ifd->ifd_data, nvma, ifd_len); if (err) { device_printf(dev, "%s: Cannot get request from user space\n", __func__); free(nvma, M_IXL); return (err); } } // TODO: Might need a different lock here // IXL_PF_LOCK(pf); status = i40e_nvmupd_command(hw, nvma, nvma->data, &perrno); // IXL_PF_UNLOCK(pf); err = copyout(nvma, ifd->ifd_data, ifd_len); free(nvma, M_IXL); if (err) { device_printf(dev, "%s: Cannot return data to user space\n", __func__); return (err); } /* Let the nvmupdate report errors, show them only when debug is enabled */ if (status != 0 && (pf->dbg_mask & IXL_DBG_NVMUPD) != 0) device_printf(dev, "i40e_nvmupd_command status %s, perrno %d\n", i40e_stat_str(hw, status), perrno); /* * -EPERM is actually ERESTART, which the kernel interprets as it needing * to run this ioctl again. So use -EACCES for -EPERM instead. */ if (perrno == -EPERM) return (-EACCES); else return (perrno); } int ixl_find_i2c_interface(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; bool i2c_en, port_matched; u32 reg; for (int i = 0; i < 4; i++) { reg = rd32(hw, I40E_GLGEN_MDIO_I2C_SEL(i)); i2c_en = (reg & I40E_GLGEN_MDIO_I2C_SEL_MDIO_I2C_SEL_MASK); port_matched = ((reg & I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_MASK) >> I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_SHIFT) & BIT(hw->port); if (i2c_en && port_matched) return (i); } return (-1); } void ixl_set_link(struct ixl_pf *pf, bool enable) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct i40e_aq_get_phy_abilities_resp abilities; struct i40e_aq_set_phy_config config; enum i40e_status_code aq_error = 0; u32 phy_type, phy_type_ext; /* Get initial capability information */ aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); if (aq_error) { device_printf(dev, "%s: Error getting phy capabilities %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return; } phy_type = abilities.phy_type; phy_type_ext = abilities.phy_type_ext; /* Get current capability information */ aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, &abilities, NULL); if (aq_error) { device_printf(dev, "%s: Error getting phy capabilities %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return; } /* Prepare new config */ memset(&config, 0, sizeof(config)); config.link_speed = abilities.link_speed; config.abilities = abilities.abilities; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; config.fec_config = abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_PHY_FEC_CONFIG_MASK; config.phy_type = 0; config.phy_type_ext = 0; config.abilities &= ~(I40E_AQ_PHY_FLAG_PAUSE_TX | I40E_AQ_PHY_FLAG_PAUSE_RX); switch (pf->fc) { case I40E_FC_FULL: config.abilities |= I40E_AQ_PHY_FLAG_PAUSE_TX | I40E_AQ_PHY_FLAG_PAUSE_RX; break; case I40E_FC_RX_PAUSE: config.abilities |= I40E_AQ_PHY_FLAG_PAUSE_RX; break; case I40E_FC_TX_PAUSE: config.abilities |= I40E_AQ_PHY_FLAG_PAUSE_TX; break; default: break; } if (enable) { config.phy_type = phy_type; config.phy_type_ext = phy_type_ext; } aq_error = i40e_aq_set_phy_config(hw, &config, NULL); if (aq_error) { device_printf(dev, "%s: Error setting new phy config %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return; } aq_error = i40e_aq_set_link_restart_an(hw, enable, NULL); if (aq_error) { device_printf(dev, "%s: Error set link config %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return; } } static char * ixl_phy_type_string(u32 bit_pos, bool ext) { static char * phy_types_str[32] = { "SGMII", "1000BASE-KX", "10GBASE-KX4", "10GBASE-KR", "40GBASE-KR4", "XAUI", "XFI", "SFI", "XLAUI", "XLPPI", "40GBASE-CR4", "10GBASE-CR1", "SFP+ Active DA", "QSFP+ Active DA", "Reserved (14)", "Reserved (15)", "Reserved (16)", "100BASE-TX", "1000BASE-T", "10GBASE-T", "10GBASE-SR", "10GBASE-LR", "10GBASE-SFP+Cu", "10GBASE-CR1", "40GBASE-CR4", "40GBASE-SR4", "40GBASE-LR4", "1000BASE-SX", "1000BASE-LX", "1000BASE-T Optical", "20GBASE-KR2", "Reserved (31)" }; static char * ext_phy_types_str[8] = { "25GBASE-KR", "25GBASE-CR", "25GBASE-SR", "25GBASE-LR", "25GBASE-AOC", "25GBASE-ACC", "2.5GBASE-T", "5GBASE-T" }; if (ext && bit_pos > 7) return "Invalid_Ext"; if (bit_pos > 31) return "Invalid"; return (ext) ? ext_phy_types_str[bit_pos] : phy_types_str[bit_pos]; } /* TODO: ERJ: I don't this is necessary anymore. */ int ixl_aq_get_link_status(struct ixl_pf *pf, struct i40e_aqc_get_link_status *link_status) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct i40e_aq_desc desc; enum i40e_status_code status; struct i40e_aqc_get_link_status *aq_link_status = (struct i40e_aqc_get_link_status *)&desc.params.raw; i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_link_status); link_status->command_flags = CPU_TO_LE16(I40E_AQ_LSE_ENABLE); status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL); if (status) { device_printf(dev, "%s: i40e_aqc_opc_get_link_status status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } bcopy(aq_link_status, link_status, sizeof(struct i40e_aqc_get_link_status)); return (0); } static char * ixl_phy_type_string_ls(u8 val) { if (val >= 0x1F) return ixl_phy_type_string(val - 0x1F, true); else return ixl_phy_type_string(val, false); } static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; struct sbuf *buf; int error = 0; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } struct i40e_aqc_get_link_status link_status; error = ixl_aq_get_link_status(pf, &link_status); if (error) { sbuf_delete(buf); return (error); } sbuf_printf(buf, "\n" "PHY Type : 0x%02x<%s>\n" "Speed : 0x%02x\n" "Link info: 0x%02x\n" "AN info : 0x%02x\n" "Ext info : 0x%02x\n" "Loopback : 0x%02x\n" "Max Frame: %d\n" "Config : 0x%02x\n" "Power : 0x%02x", link_status.phy_type, ixl_phy_type_string_ls(link_status.phy_type), link_status.link_speed, link_status.link_info, link_status.an_info, link_status.ext_info, link_status.loopback, link_status.max_frame_size, link_status.config, link_status.power_desc); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; struct i40e_aq_get_phy_abilities_resp abilities; struct sbuf *buf; int error = 0; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } status = i40e_aq_get_phy_capabilities(hw, FALSE, arg2 != 0, &abilities, NULL); if (status) { device_printf(dev, "%s: i40e_aq_get_phy_capabilities() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return (EIO); } sbuf_printf(buf, "\n" "PHY Type : %08x", abilities.phy_type); if (abilities.phy_type != 0) { sbuf_printf(buf, "<"); for (int i = 0; i < 32; i++) if ((1 << i) & abilities.phy_type) sbuf_printf(buf, "%s,", ixl_phy_type_string(i, false)); sbuf_printf(buf, ">"); } sbuf_printf(buf, "\nPHY Ext : %02x", abilities.phy_type_ext); if (abilities.phy_type_ext != 0) { sbuf_printf(buf, "<"); for (int i = 0; i < 4; i++) if ((1 << i) & abilities.phy_type_ext) sbuf_printf(buf, "%s,", ixl_phy_type_string(i, true)); sbuf_printf(buf, ">"); } sbuf_printf(buf, "\nSpeed : %02x", abilities.link_speed); if (abilities.link_speed != 0) { u8 link_speed; sbuf_printf(buf, " <"); for (int i = 0; i < 8; i++) { link_speed = (1 << i) & abilities.link_speed; if (link_speed) sbuf_printf(buf, "%s, ", ixl_link_speed_string(link_speed)); } sbuf_printf(buf, ">"); } sbuf_printf(buf, "\n" "Abilities: %02x\n" "EEE cap : %04x\n" "EEER reg : %08x\n" "D3 Lpan : %02x\n" "ID : %02x %02x %02x %02x\n" "ModType : %02x %02x %02x\n" "ModType E: %01x\n" "FEC Cfg : %02x\n" "Ext CC : %02x", abilities.abilities, abilities.eee_capability, abilities.eeer_val, abilities.d3_lpan, abilities.phy_id[0], abilities.phy_id[1], abilities.phy_id[2], abilities.phy_id[3], abilities.module_type[0], abilities.module_type[1], abilities.module_type[2], (abilities.fec_cfg_curr_mod_ext_info & 0xe0) >> 5, abilities.fec_cfg_curr_mod_ext_info & 0x1F, abilities.ext_comp_code); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct ixl_vsi *vsi = &pf->vsi; struct ixl_mac_filter *f; device_t dev = pf->dev; int error = 0, ftl_len = 0, ftl_counter = 0; struct sbuf *buf; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } sbuf_printf(buf, "\n"); /* Print MAC filters */ sbuf_printf(buf, "PF Filters:\n"); LIST_FOREACH(f, &vsi->ftl, ftle) ftl_len++; if (ftl_len < 1) sbuf_printf(buf, "(none)\n"); else { LIST_FOREACH(f, &vsi->ftl, ftle) { sbuf_printf(buf, MAC_FORMAT ", vlan %4d, flags %#06x", MAC_FORMAT_ARGS(f->macaddr), f->vlan, f->flags); /* don't print '\n' for last entry */ if (++ftl_counter != ftl_len) sbuf_printf(buf, "\n"); } } #ifdef PCI_IOV /* TODO: Give each VF its own filter list sysctl */ struct ixl_vf *vf; if (pf->num_vfs > 0) { sbuf_printf(buf, "\n\n"); for (int i = 0; i < pf->num_vfs; i++) { vf = &pf->vfs[i]; if (!(vf->vf_flags & VF_FLAG_ENABLED)) continue; vsi = &vf->vsi; ftl_len = 0, ftl_counter = 0; sbuf_printf(buf, "VF-%d Filters:\n", vf->vf_num); LIST_FOREACH(f, &vsi->ftl, ftle) ftl_len++; if (ftl_len < 1) sbuf_printf(buf, "(none)\n"); else { LIST_FOREACH(f, &vsi->ftl, ftle) { sbuf_printf(buf, MAC_FORMAT ", vlan %4d, flags %#06x\n", MAC_FORMAT_ARGS(f->macaddr), f->vlan, f->flags); } } } } #endif error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } #define IXL_SW_RES_SIZE 0x14 int ixl_res_alloc_cmp(const void *a, const void *b) { const struct i40e_aqc_switch_resource_alloc_element_resp *one, *two; one = (const struct i40e_aqc_switch_resource_alloc_element_resp *)a; two = (const struct i40e_aqc_switch_resource_alloc_element_resp *)b; return ((int)one->resource_type - (int)two->resource_type); } /* * Longest string length: 25 */ const char * ixl_switch_res_type_string(u8 type) { static const char * ixl_switch_res_type_strings[IXL_SW_RES_SIZE] = { "VEB", "VSI", "Perfect Match MAC address", "S-tag", "(Reserved)", "Multicast hash entry", "Unicast hash entry", "VLAN", "VSI List entry", "(Reserved)", "VLAN Statistic Pool", "Mirror Rule", "Queue Set", "Inner VLAN Forward filter", "(Reserved)", "Inner MAC", "IP", "GRE/VN1 Key", "VN2 Key", "Tunneling Port" }; if (type < IXL_SW_RES_SIZE) return ixl_switch_res_type_strings[type]; else return "(Reserved)"; } static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; enum i40e_status_code status; int error = 0; u8 num_entries; struct i40e_aqc_switch_resource_alloc_element_resp resp[IXL_SW_RES_SIZE]; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } bzero(resp, sizeof(resp)); status = i40e_aq_get_switch_resource_alloc(hw, &num_entries, resp, IXL_SW_RES_SIZE, NULL); if (status) { device_printf(dev, "%s: get_switch_resource_alloc() error %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return (error); } /* Sort entries by type for display */ qsort(resp, num_entries, sizeof(struct i40e_aqc_switch_resource_alloc_element_resp), &ixl_res_alloc_cmp); sbuf_cat(buf, "\n"); sbuf_printf(buf, "# of entries: %d\n", num_entries); sbuf_printf(buf, " Type | Guaranteed | Total | Used | Un-allocated\n" " | (this) | (all) | (this) | (all) \n"); for (int i = 0; i < num_entries; i++) { sbuf_printf(buf, "%25s | %10d %5d %6d %12d", ixl_switch_res_type_string(resp[i].resource_type), resp[i].guaranteed, resp[i].total, resp[i].used, resp[i].total_unalloced); if (i < num_entries - 1) sbuf_cat(buf, "\n"); } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } enum ixl_sw_seid_offset { IXL_SW_SEID_EMP = 1, IXL_SW_SEID_MAC_START = 2, IXL_SW_SEID_MAC_END = 5, IXL_SW_SEID_PF_START = 16, IXL_SW_SEID_PF_END = 31, IXL_SW_SEID_VF_START = 32, IXL_SW_SEID_VF_END = 159, }; /* * Caller must init and delete sbuf; this function will clear and * finish it for caller. * * Note: The SEID argument only applies for elements defined by FW at * power-on; these include the EMP, Ports, PFs and VFs. */ static char * ixl_switch_element_string(struct sbuf *s, u8 element_type, u16 seid) { sbuf_clear(s); /* If SEID is in certain ranges, then we can infer the * mapping of SEID to switch element. */ if (seid == IXL_SW_SEID_EMP) { sbuf_cat(s, "EMP"); goto out; } else if (seid >= IXL_SW_SEID_MAC_START && seid <= IXL_SW_SEID_MAC_END) { sbuf_printf(s, "MAC %2d", seid - IXL_SW_SEID_MAC_START); goto out; } else if (seid >= IXL_SW_SEID_PF_START && seid <= IXL_SW_SEID_PF_END) { sbuf_printf(s, "PF %3d", seid - IXL_SW_SEID_PF_START); goto out; } else if (seid >= IXL_SW_SEID_VF_START && seid <= IXL_SW_SEID_VF_END) { sbuf_printf(s, "VF %3d", seid - IXL_SW_SEID_VF_START); goto out; } switch (element_type) { case I40E_AQ_SW_ELEM_TYPE_BMC: sbuf_cat(s, "BMC"); break; case I40E_AQ_SW_ELEM_TYPE_PV: sbuf_cat(s, "PV"); break; case I40E_AQ_SW_ELEM_TYPE_VEB: sbuf_cat(s, "VEB"); break; case I40E_AQ_SW_ELEM_TYPE_PA: sbuf_cat(s, "PA"); break; case I40E_AQ_SW_ELEM_TYPE_VSI: sbuf_printf(s, "VSI"); break; default: sbuf_cat(s, "?"); break; } out: sbuf_finish(s); return sbuf_data(s); } static int ixl_sw_cfg_elem_seid_cmp(const void *a, const void *b) { const struct i40e_aqc_switch_config_element_resp *one, *two; one = (const struct i40e_aqc_switch_config_element_resp *)a; two = (const struct i40e_aqc_switch_config_element_resp *)b; return ((int)one->seid - (int)two->seid); } static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; struct sbuf *nmbuf; enum i40e_status_code status; int error = 0; u16 next = 0; u8 aq_buf[I40E_AQ_LARGE_BUF]; struct i40e_aqc_switch_config_element_resp *elem; struct i40e_aqc_get_switch_config_resp *sw_config; sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } status = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (status) { device_printf(dev, "%s: aq_get_switch_config() error %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return error; } if (next) device_printf(dev, "%s: TODO: get more config with SEID %d\n", __func__, next); nmbuf = sbuf_new_auto(); if (!nmbuf) { device_printf(dev, "Could not allocate sbuf for name output.\n"); sbuf_delete(buf); return (ENOMEM); } /* Sort entries by SEID for display */ qsort(sw_config->element, sw_config->header.num_reported, sizeof(struct i40e_aqc_switch_config_element_resp), &ixl_sw_cfg_elem_seid_cmp); sbuf_cat(buf, "\n"); /* Assuming <= 255 elements in switch */ sbuf_printf(buf, "# of reported elements: %d\n", sw_config->header.num_reported); sbuf_printf(buf, "total # of elements: %d\n", sw_config->header.num_total); /* Exclude: * Revision -- all elements are revision 1 for now */ sbuf_printf(buf, "SEID ( Name ) | Up ( Name ) | Down ( Name ) | Conn Type\n" " | | | (uplink)\n"); for (int i = 0; i < sw_config->header.num_reported; i++) { elem = &sw_config->element[i]; // "%4d (%8s) | %8s %8s %#8x", sbuf_printf(buf, "%4d", elem->seid); sbuf_cat(buf, " "); sbuf_printf(buf, "(%8s)", ixl_switch_element_string(nmbuf, elem->element_type, elem->seid)); sbuf_cat(buf, " | "); sbuf_printf(buf, "%4d", elem->uplink_seid); sbuf_cat(buf, " "); sbuf_printf(buf, "(%8s)", ixl_switch_element_string(nmbuf, 0, elem->uplink_seid)); sbuf_cat(buf, " | "); sbuf_printf(buf, "%4d", elem->downlink_seid); sbuf_cat(buf, " "); sbuf_printf(buf, "(%8s)", ixl_switch_element_string(nmbuf, 0, elem->downlink_seid)); sbuf_cat(buf, " | "); sbuf_printf(buf, "%8d", elem->connection_type); if (i < sw_config->header.num_reported - 1) sbuf_cat(buf, "\n"); } sbuf_delete(nmbuf); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_switch_vlans(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_vlan = -1; enum i40e_status_code status = 0; int error = 0; error = sysctl_handle_int(oidp, &requested_vlan, 0, req); if ((error) || (req->newptr == NULL)) return (error); if ((hw->flags & I40E_HW_FLAG_802_1AD_CAPABLE) == 0) { device_printf(dev, "Flags disallow setting of vlans\n"); return (ENODEV); } hw->switch_tag = requested_vlan; device_printf(dev, "Setting switch config to switch_tag=%04x, first_tag=%04x, second_tag=%04x\n", hw->switch_tag, hw->first_tag, hw->second_tag); status = i40e_aq_set_switch_config(hw, 0, 0, 0, NULL); if (status) { device_printf(dev, "%s: aq_set_switch_config() error %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (status); } return (0); } static int ixl_sysctl_hkey(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; enum i40e_status_code status; u32 reg; struct i40e_aqc_get_set_rss_key_data key_data; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } bzero(&key_data, sizeof(key_data)); sbuf_cat(buf, "\n"); if (hw->mac.type == I40E_MAC_X722) { status = i40e_aq_get_rss_key(hw, pf->vsi.vsi_num, &key_data); if (status) device_printf(dev, "i40e_aq_get_rss_key status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (int i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) { reg = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i)); bcopy(®, ((caddr_t)&key_data) + (i << 2), 4); } } ixl_sbuf_print_bytes(buf, (u8 *)&key_data, sizeof(key_data), 0, true); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static void ixl_sbuf_print_bytes(struct sbuf *sb, u8 *buf, int length, int label_offset, bool text) { int i, j, k, width; char c; if (length < 1 || buf == NULL) return; int byte_stride = 16; int lines = length / byte_stride; int rem = length % byte_stride; if (rem > 0) lines++; for (i = 0; i < lines; i++) { width = (rem > 0 && i == lines - 1) ? rem : byte_stride; sbuf_printf(sb, "%4d | ", label_offset + i * byte_stride); for (j = 0; j < width; j++) sbuf_printf(sb, "%02x ", buf[i * byte_stride + j]); if (width < byte_stride) { for (k = 0; k < (byte_stride - width); k++) sbuf_printf(sb, " "); } if (!text) { sbuf_printf(sb, "\n"); continue; } for (j = 0; j < width; j++) { c = (char)buf[i * byte_stride + j]; if (c < 32 || c > 126) sbuf_printf(sb, "."); else sbuf_printf(sb, "%c", c); if (j == width - 1) sbuf_printf(sb, "\n"); } } } static int ixl_sysctl_hlut(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; enum i40e_status_code status; u8 hlut[512]; u32 reg; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } bzero(hlut, sizeof(hlut)); sbuf_cat(buf, "\n"); if (hw->mac.type == I40E_MAC_X722) { status = i40e_aq_get_rss_lut(hw, pf->vsi.vsi_num, TRUE, hlut, sizeof(hlut)); if (status) device_printf(dev, "i40e_aq_get_rss_lut status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (int i = 0; i < hw->func_caps.rss_table_size >> 2; i++) { reg = rd32(hw, I40E_PFQF_HLUT(i)); bcopy(®, &hlut[i << 2], 4); } } ixl_sbuf_print_bytes(buf, hlut, 512, 0, false); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_hena(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; u64 hena; hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); return sysctl_handle_long(oidp, NULL, hena, req); } /* * Sysctl to disable firmware's link management * * 1 - Disable link management on this port * 0 - Re-enable link management * * On normal NVMs, firmware manages link by default. */ static int ixl_sysctl_fw_link_management(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_mode = -1; enum i40e_status_code status = 0; int error = 0; /* Read in new mode */ error = sysctl_handle_int(oidp, &requested_mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Check for sane value */ if (requested_mode < 0 || requested_mode > 1) { device_printf(dev, "Valid modes are 0 or 1\n"); return (EINVAL); } /* Set new mode */ status = i40e_aq_set_phy_debug(hw, !!(requested_mode) << 4, NULL); if (status) { device_printf(dev, "%s: Error setting new phy debug mode %s," " aq error: %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } return (0); } /* * Read some diagnostic data from a (Q)SFP+ module * * SFP A2 QSFP Lower Page * Temperature 96-97 22-23 * Vcc 98-99 26-27 * TX power 102-103 34-35..40-41 * RX power 104-105 50-51..56-57 */ static int ixl_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; struct sbuf *sbuf; int error = 0; u8 output; if (req->oldptr == NULL) { error = SYSCTL_OUT(req, 0, 128); return (0); } error = pf->read_i2c_byte(pf, 0, 0xA0, &output); if (error) { device_printf(dev, "Error reading from i2c\n"); return (error); } /* 0x3 for SFP; 0xD/0x11 for QSFP+/QSFP28 */ if (output == 0x3) { /* * Check for: * - Internally calibrated data * - Diagnostic monitoring is implemented */ pf->read_i2c_byte(pf, 92, 0xA0, &output); if (!(output & 0x60)) { device_printf(dev, "Module doesn't support diagnostics: %02X\n", output); return (0); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); for (u8 offset = 96; offset < 100; offset++) { pf->read_i2c_byte(pf, offset, 0xA2, &output); sbuf_printf(sbuf, "%02X ", output); } for (u8 offset = 102; offset < 106; offset++) { pf->read_i2c_byte(pf, offset, 0xA2, &output); sbuf_printf(sbuf, "%02X ", output); } } else if (output == 0xD || output == 0x11) { /* * QSFP+ modules are always internally calibrated, and must indicate * what types of diagnostic monitoring are implemented */ sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); for (u8 offset = 22; offset < 24; offset++) { pf->read_i2c_byte(pf, offset, 0xA0, &output); sbuf_printf(sbuf, "%02X ", output); } for (u8 offset = 26; offset < 28; offset++) { pf->read_i2c_byte(pf, offset, 0xA0, &output); sbuf_printf(sbuf, "%02X ", output); } /* Read the data from the first lane */ for (u8 offset = 34; offset < 36; offset++) { pf->read_i2c_byte(pf, offset, 0xA0, &output); sbuf_printf(sbuf, "%02X ", output); } for (u8 offset = 50; offset < 52; offset++) { pf->read_i2c_byte(pf, offset, 0xA0, &output); sbuf_printf(sbuf, "%02X ", output); } } else { device_printf(dev, "Module is not SFP/SFP+/SFP28/QSFP+ (%02X)\n", output); return (0); } sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /* * Sysctl to read a byte from I2C bus. * * Input: 32-bit value: * bits 0-7: device address (0xA0 or 0xA2) * bits 8-15: offset (0-255) * bits 16-31: unused * Output: 8-bit value read */ static int ixl_sysctl_read_i2c_byte(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int input = -1, error = 0; u8 dev_addr, offset, output; /* Read in I2C read parameters */ error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Validate device address */ dev_addr = input & 0xFF; if (dev_addr != 0xA0 && dev_addr != 0xA2) { return (EINVAL); } offset = (input >> 8) & 0xFF; error = pf->read_i2c_byte(pf, offset, dev_addr, &output); if (error) return (error); device_printf(dev, "%02X\n", output); return (0); } /* * Sysctl to write a byte to the I2C bus. * * Input: 32-bit value: * bits 0-7: device address (0xA0 or 0xA2) * bits 8-15: offset (0-255) * bits 16-23: value to write * bits 24-31: unused * Output: 8-bit value written */ static int ixl_sysctl_write_i2c_byte(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int input = -1, error = 0; u8 dev_addr, offset, value; /* Read in I2C write parameters */ error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Validate device address */ dev_addr = input & 0xFF; if (dev_addr != 0xA0 && dev_addr != 0xA2) { return (EINVAL); } offset = (input >> 8) & 0xFF; value = (input >> 16) & 0xFF; error = pf->write_i2c_byte(pf, offset, dev_addr, value); if (error) return (error); device_printf(dev, "%02X written\n", value); return (0); } static int ixl_get_fec_config(struct ixl_pf *pf, struct i40e_aq_get_phy_abilities_resp *abilities, u8 bit_pos, int *is_set) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; enum i40e_status_code status; if (IXL_PF_IN_RECOVERY_MODE(pf)) return (EIO); status = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, abilities, NULL); if (status) { device_printf(dev, "%s: i40e_aq_get_phy_capabilities() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } *is_set = !!(abilities->fec_cfg_curr_mod_ext_info & bit_pos); return (0); } static int ixl_set_fec_config(struct ixl_pf *pf, struct i40e_aq_get_phy_abilities_resp *abilities, u8 bit_pos, int set) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct i40e_aq_set_phy_config config; enum i40e_status_code status; /* Set new PHY config */ memset(&config, 0, sizeof(config)); config.fec_config = abilities->fec_cfg_curr_mod_ext_info & ~(bit_pos); if (set) config.fec_config |= bit_pos; if (config.fec_config != abilities->fec_cfg_curr_mod_ext_info) { config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK; config.phy_type = abilities->phy_type; config.phy_type_ext = abilities->phy_type_ext; config.link_speed = abilities->link_speed; config.eee_capability = abilities->eee_capability; config.eeer = abilities->eeer_val; config.low_power_ctrl = abilities->d3_lpan; status = i40e_aq_set_phy_config(hw, &config, NULL); if (status) { device_printf(dev, "%s: i40e_aq_set_phy_config() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } } return (0); } static int ixl_sysctl_fec_fc_ability(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_ENABLE_FEC_KR, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_KR, !!(mode)); } static int ixl_sysctl_fec_rs_ability(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_ENABLE_FEC_RS, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_RS, !!(mode)); } static int ixl_sysctl_fec_fc_request(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_REQUEST_FEC_KR, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_KR, !!(mode)); } static int ixl_sysctl_fec_rs_request(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_REQUEST_FEC_RS, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_RS, !!(mode)); } static int ixl_sysctl_fec_auto_enable(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_ENABLE_FEC_AUTO, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_AUTO, !!(mode)); } static int ixl_sysctl_dump_debug_data(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; enum i40e_status_code status; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } u8 *final_buff; /* This amount is only necessary if reading the entire cluster into memory */ #define IXL_FINAL_BUFF_SIZE (1280 * 1024) final_buff = malloc(IXL_FINAL_BUFF_SIZE, M_IXL, M_NOWAIT); if (final_buff == NULL) { device_printf(dev, "Could not allocate memory for output.\n"); goto out; } int final_buff_len = 0; u8 cluster_id = 1; bool more = true; u8 dump_buf[4096]; u16 curr_buff_size = 4096; u8 curr_next_table = 0; u32 curr_next_index = 0; u16 ret_buff_size; u8 ret_next_table; u32 ret_next_index; sbuf_cat(buf, "\n"); while (more) { status = i40e_aq_debug_dump(hw, cluster_id, curr_next_table, curr_next_index, curr_buff_size, dump_buf, &ret_buff_size, &ret_next_table, &ret_next_index, NULL); if (status) { device_printf(dev, "i40e_aq_debug_dump status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); goto free_out; } /* copy info out of temp buffer */ bcopy(dump_buf, (caddr_t)final_buff + final_buff_len, ret_buff_size); final_buff_len += ret_buff_size; if (ret_next_table != curr_next_table) { /* We're done with the current table; we can dump out read data. */ sbuf_printf(buf, "%d:", curr_next_table); int bytes_printed = 0; while (bytes_printed <= final_buff_len) { sbuf_printf(buf, "%16D", ((caddr_t)final_buff + bytes_printed), ""); bytes_printed += 16; } sbuf_cat(buf, "\n"); /* The entire cluster has been read; we're finished */ if (ret_next_table == 0xFF) break; /* Otherwise clear the output buffer and continue reading */ bzero(final_buff, IXL_FINAL_BUFF_SIZE); final_buff_len = 0; } if (ret_next_index == 0xFFFFFFFF) ret_next_index = 0; bzero(dump_buf, sizeof(dump_buf)); curr_next_table = ret_next_table; curr_next_index = ret_next_index; } free_out: free(final_buff, M_IXL); out: error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_start_fw_lldp(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; enum i40e_status_code status; status = i40e_aq_start_lldp(hw, false, NULL); if (status != I40E_SUCCESS) { switch (hw->aq.asq_last_status) { case I40E_AQ_RC_EEXIST: device_printf(pf->dev, "FW LLDP agent is already running\n"); break; case I40E_AQ_RC_EPERM: device_printf(pf->dev, "Device configuration forbids SW from starting " "the LLDP agent. Set the \"LLDP Agent\" UEFI HII " "attribute to \"Enabled\" to use this sysctl\n"); return (EINVAL); default: device_printf(pf->dev, "Starting FW LLDP agent failed: error: %s, %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EINVAL); } } atomic_clear_32(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); return (0); } static int ixl_stop_fw_lldp(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; if (hw->func_caps.npar_enable != 0) { device_printf(dev, "Disabling FW LLDP agent is not supported on this device\n"); return (EINVAL); } if ((hw->flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE) == 0) { device_printf(dev, "Disabling FW LLDP agent is not supported in this FW version. Please update FW to enable this feature.\n"); return (EINVAL); } status = i40e_aq_stop_lldp(hw, true, false, NULL); if (status != I40E_SUCCESS) { if (hw->aq.asq_last_status != I40E_AQ_RC_EPERM) { device_printf(dev, "Disabling FW LLDP agent failed: error: %s, %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EINVAL); } device_printf(dev, "FW LLDP agent is already stopped\n"); } i40e_aq_set_dcb_parameters(hw, true, NULL); atomic_set_32(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); return (0); } static int ixl_sysctl_fw_lldp(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int state, new_state, error = 0; state = new_state = ((pf->state & IXL_PF_STATE_FW_LLDP_DISABLED) == 0); /* Read in new mode */ error = sysctl_handle_int(oidp, &new_state, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Already in requested state */ if (new_state == state) return (error); if (new_state == 0) return ixl_stop_fw_lldp(pf); return ixl_start_fw_lldp(pf); } static int ixl_sysctl_eee_enable(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int state, new_state; int sysctl_handle_status = 0; enum i40e_status_code cmd_status; /* Init states' values */ state = new_state = (!!(pf->state & IXL_PF_STATE_EEE_ENABLED)); /* Get requested mode */ sysctl_handle_status = sysctl_handle_int(oidp, &new_state, 0, req); if ((sysctl_handle_status) || (req->newptr == NULL)) return (sysctl_handle_status); /* Check if state has changed */ if (new_state == state) return (0); /* Set new state */ cmd_status = i40e_enable_eee(&pf->hw, (bool)(!!new_state)); /* Save new state or report error */ if (!cmd_status) { if (new_state == 0) atomic_clear_32(&pf->state, IXL_PF_STATE_EEE_ENABLED); else atomic_set_32(&pf->state, IXL_PF_STATE_EEE_ENABLED); } else if (cmd_status == I40E_ERR_CONFIG) return (EPERM); else return (EIO); return (0); } static int ixl_sysctl_set_link_active(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int error, state; state = !!(atomic_load_acq_32(&pf->state) & IXL_PF_STATE_LINK_ACTIVE_ON_DOWN); error = sysctl_handle_int(oidp, &state, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (state == 0) atomic_clear_32(&pf->state, IXL_PF_STATE_LINK_ACTIVE_ON_DOWN); else atomic_set_32(&pf->state, IXL_PF_STATE_LINK_ACTIVE_ON_DOWN); return (0); } int ixl_attach_get_link_status(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int error = 0; if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || (hw->aq.fw_maj_ver < 4)) { i40e_msec_delay(75); error = i40e_aq_set_link_restart_an(hw, TRUE, NULL); if (error) { device_printf(dev, "link restart failed, aq_err=%d\n", pf->hw.aq.asq_last_status); return error; } } /* Determine link state */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); /* Flow Control mode not set by user, read current FW settings */ if (pf->fc == -1) pf->fc = hw->fc.current_mode; return (0); } static int ixl_sysctl_do_pf_reset(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int requested = 0, error = 0; /* Read in new mode */ error = sysctl_handle_int(oidp, &requested, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Initiate the PF reset later in the admin task */ atomic_set_32(&pf->state, IXL_PF_STATE_PF_RESET_REQ); return (error); } static int ixl_sysctl_do_core_reset(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; int requested = 0, error = 0; /* Read in new mode */ error = sysctl_handle_int(oidp, &requested, 0, req); if ((error) || (req->newptr == NULL)) return (error); wr32(hw, I40E_GLGEN_RTRIG, I40E_GLGEN_RTRIG_CORER_MASK); return (error); } static int ixl_sysctl_do_global_reset(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; int requested = 0, error = 0; /* Read in new mode */ error = sysctl_handle_int(oidp, &requested, 0, req); if ((error) || (req->newptr == NULL)) return (error); wr32(hw, I40E_GLGEN_RTRIG, I40E_GLGEN_RTRIG_GLOBR_MASK); return (error); } /* * Print out mapping of TX queue indexes and Rx queue indexes * to MSI-X vectors. */ static int ixl_sysctl_queue_interrupt_table(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; struct sbuf *buf; int error = 0; struct ixl_rx_queue *rx_que = vsi->rx_queues; struct ixl_tx_queue *tx_que = vsi->tx_queues; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_cat(buf, "\n"); for (int i = 0; i < vsi->num_rx_queues; i++) { rx_que = &vsi->rx_queues[i]; sbuf_printf(buf, "(rxq %3d): %d\n", i, rx_que->msix); } for (int i = 0; i < vsi->num_tx_queues; i++) { tx_que = &vsi->tx_queues[i]; sbuf_printf(buf, "(txq %3d): %d\n", i, tx_que->msix); } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } diff --git a/sys/dev/vmware/vmxnet3/if_vmx.c b/sys/dev/vmware/vmxnet3/if_vmx.c index 6896bd016f00..c28368bc09dc 100644 --- a/sys/dev/vmware/vmxnet3/if_vmx.c +++ b/sys/dev/vmware/vmxnet3/if_vmx.c @@ -1,2532 +1,2532 @@ /*- * Copyright (c) 2013 Tsubai Masanari * Copyright (c) 2013 Bryan Venteicher * Copyright (c) 2018 Patrick Kelsey * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $ */ /* Driver for VMware vmxnet3 virtual ethernet devices. */ #include __FBSDID("$FreeBSD$"); #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #include "if_vmxreg.h" #include "if_vmxvar.h" #include "opt_inet.h" #include "opt_inet6.h" #define VMXNET3_VMWARE_VENDOR_ID 0x15AD #define VMXNET3_VMWARE_DEVICE_ID 0x07B0 static pci_vendor_info_t vmxnet3_vendor_info_array[] = { PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"), /* required last entry */ PVID_END }; static void *vmxnet3_register(device_t); static int vmxnet3_attach_pre(if_ctx_t); static int vmxnet3_msix_intr_assign(if_ctx_t, int); static void vmxnet3_free_irqs(struct vmxnet3_softc *); static int vmxnet3_attach_post(if_ctx_t); static int vmxnet3_detach(if_ctx_t); static int vmxnet3_shutdown(if_ctx_t); static int vmxnet3_suspend(if_ctx_t); static int vmxnet3_resume(if_ctx_t); static int vmxnet3_alloc_resources(struct vmxnet3_softc *); static void vmxnet3_free_resources(struct vmxnet3_softc *); static int vmxnet3_check_version(struct vmxnet3_softc *); static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *); static int vmxnet3_queues_shared_alloc(struct vmxnet3_softc *); static void vmxnet3_init_txq(struct vmxnet3_softc *, int); static int vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static void vmxnet3_init_rxq(struct vmxnet3_softc *, int, int); static int vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static void vmxnet3_queues_free(if_ctx_t); static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *); static void vmxnet3_free_shared_data(struct vmxnet3_softc *); static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *); static void vmxnet3_free_mcast_table(struct vmxnet3_softc *); static void vmxnet3_init_shared_data(struct vmxnet3_softc *); static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *); static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *); static int vmxnet3_alloc_data(struct vmxnet3_softc *); static void vmxnet3_free_data(struct vmxnet3_softc *); static void vmxnet3_evintr(struct vmxnet3_softc *); static int vmxnet3_isc_txd_encap(void *, if_pkt_info_t); static void vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t); static int vmxnet3_isc_txd_credits_update(void *, uint16_t, bool); static int vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t); static int vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t); static void vmxnet3_isc_rxd_refill(void *, if_rxd_update_t); static void vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t); static int vmxnet3_legacy_intr(void *); static int vmxnet3_rxq_intr(void *); static int vmxnet3_event_intr(void *); static void vmxnet3_stop(if_ctx_t); static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *); static void vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *); static void vmxnet3_reinit_queues(struct vmxnet3_softc *); static int vmxnet3_enable_device(struct vmxnet3_softc *); static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *); static void vmxnet3_init(if_ctx_t); static void vmxnet3_multi_set(if_ctx_t); static int vmxnet3_mtu_set(if_ctx_t, uint32_t); static void vmxnet3_media_status(if_ctx_t, struct ifmediareq *); static int vmxnet3_media_change(if_ctx_t); static int vmxnet3_promisc_set(if_ctx_t, int); static uint64_t vmxnet3_get_counter(if_ctx_t, ift_counter); static void vmxnet3_update_admin_status(if_ctx_t); static void vmxnet3_txq_timer(if_ctx_t, uint16_t); static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int, uint16_t); static void vmxnet3_vlan_register(if_ctx_t, uint16_t); static void vmxnet3_vlan_unregister(if_ctx_t, uint16_t); static void vmxnet3_set_rxfilter(struct vmxnet3_softc *, int); static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *); static int vmxnet3_link_is_up(struct vmxnet3_softc *); static void vmxnet3_link_status(struct vmxnet3_softc *); static void vmxnet3_set_lladdr(struct vmxnet3_softc *); static void vmxnet3_get_lladdr(struct vmxnet3_softc *); static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_sysctl(struct vmxnet3_softc *); static void vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t, uint32_t); static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t); static void vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t, uint32_t); static void vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t); static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t); static int vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t); static int vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t); static void vmxnet3_link_intr_enable(if_ctx_t); static void vmxnet3_enable_intr(struct vmxnet3_softc *, int); static void vmxnet3_disable_intr(struct vmxnet3_softc *, int); static void vmxnet3_intr_enable_all(if_ctx_t); static void vmxnet3_intr_disable_all(if_ctx_t); typedef enum { VMXNET3_BARRIER_RD, VMXNET3_BARRIER_WR, VMXNET3_BARRIER_RDWR, } vmxnet3_barrier_t; static void vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t); static device_method_t vmxnet3_methods[] = { /* Device interface */ DEVMETHOD(device_register, vmxnet3_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static driver_t vmxnet3_driver = { "vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc) }; DRIVER_MODULE(vmx, pci, vmxnet3_driver, 0, 0); IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array); MODULE_VERSION(vmx, 2); MODULE_DEPEND(vmx, pci, 1, 1, 1); MODULE_DEPEND(vmx, ether, 1, 1, 1); MODULE_DEPEND(vmx, iflib, 1, 1, 1); static device_method_t vmxnet3_iflib_methods[] = { DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free), DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre), DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post), DEVMETHOD(ifdi_detach, vmxnet3_detach), DEVMETHOD(ifdi_init, vmxnet3_init), DEVMETHOD(ifdi_stop, vmxnet3_stop), DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set), DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set), DEVMETHOD(ifdi_media_status, vmxnet3_media_status), DEVMETHOD(ifdi_media_change, vmxnet3_media_change), DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set), DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter), DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status), DEVMETHOD(ifdi_timer, vmxnet3_txq_timer), DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable), DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable), DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all), DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all), DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign), DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register), DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister), DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown), DEVMETHOD(ifdi_suspend, vmxnet3_suspend), DEVMETHOD(ifdi_resume, vmxnet3_resume), DEVMETHOD_END }; static driver_t vmxnet3_iflib_driver = { "vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc) }; struct if_txrx vmxnet3_txrx = { .ift_txd_encap = vmxnet3_isc_txd_encap, .ift_txd_flush = vmxnet3_isc_txd_flush, .ift_txd_credits_update = vmxnet3_isc_txd_credits_update, .ift_rxd_available = vmxnet3_isc_rxd_available, .ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get, .ift_rxd_refill = vmxnet3_isc_rxd_refill, .ift_rxd_flush = vmxnet3_isc_rxd_flush, .ift_legacy_intr = vmxnet3_legacy_intr }; static struct if_shared_ctx vmxnet3_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = 512, .isc_tx_maxsize = VMXNET3_TX_MAXSIZE, .isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE, .isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE, /* * These values are used to configure the busdma tag used for * receive descriptors. Each receive descriptor only points to one * buffer. */ .isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */ .isc_rx_nsegments = 1, /* One mapping per descriptor */ .isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE, .isc_admin_intrcnt = 1, .isc_vendor_info = vmxnet3_vendor_info_array, .isc_driver_version = "2", .isc_driver = &vmxnet3_iflib_driver, .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY, /* * Number of receive queues per receive queue set, with associated * descriptor settings for each. */ .isc_nrxqs = 3, .isc_nfl = 2, /* one free list for each receive command queue */ .isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC}, .isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC}, .isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC}, /* * Number of transmit queues per transmit queue set, with associated * descriptor settings for each. */ .isc_ntxqs = 2, .isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC}, .isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC}, .isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC}, }; static void * vmxnet3_register(device_t dev) { return (&vmxnet3_sctx_init); } static int trunc_powerof2(int val) { return (1U << (fls(val) - 1)); } static int vmxnet3_attach_pre(if_ctx_t ctx) { device_t dev; if_softc_ctx_t scctx; struct vmxnet3_softc *sc; uint32_t intr_config; int error; dev = iflib_get_dev(ctx); sc = iflib_get_softc(ctx); sc->vmx_dev = dev; sc->vmx_ctx = ctx; sc->vmx_sctx = iflib_get_sctx(ctx); sc->vmx_scctx = iflib_get_softc_ctx(ctx); sc->vmx_ifp = iflib_get_ifp(ctx); sc->vmx_media = iflib_get_media(ctx); scctx = sc->vmx_scctx; scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS; scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS; /* isc_tx_tso_size_max doesn't include possible vlan header */ scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE; scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE; scctx->isc_txrx = &vmxnet3_txrx; /* If 0, the iflib tunable was not set, so set to the default */ if (scctx->isc_nrxqsets == 0) scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES; scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets); scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus); scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max); /* If 0, the iflib tunable was not set, so set to the default */ if (scctx->isc_ntxqsets == 0) scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES; scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets); scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus); scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max); /* * Enforce that the transmit completion queue descriptor count is * the same as the transmit command queue descriptor count. */ scctx->isc_ntxd[0] = scctx->isc_ntxd[1]; scctx->isc_txqsizes[0] = sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0]; scctx->isc_txqsizes[1] = sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1]; /* * Enforce that the receive completion queue descriptor count is the * sum of the receive command queue descriptor counts, and that the * second receive command queue descriptor count is the same as the * first one. */ scctx->isc_nrxd[2] = scctx->isc_nrxd[1]; scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2]; scctx->isc_rxqsizes[0] = sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0]; scctx->isc_rxqsizes[1] = sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1]; scctx->isc_rxqsizes[2] = sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2]; /* * Initialize the max frame size and descriptor queue buffer * sizes. */ vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp)); scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE; /* Map PCI BARs */ error = vmxnet3_alloc_resources(sc); if (error) goto fail; /* Check device versions */ error = vmxnet3_check_version(sc); if (error) goto fail; /* * The interrupt mode can be set in the hypervisor configuration via * the parameter ethernet.intrMode. */ intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG); sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03; /* * Configure the softc context to attempt to configure the interrupt * mode now indicated by intr_config. iflib will follow the usual * fallback path MSI-X -> MSI -> LEGACY, starting at the configured * starting mode. */ switch (intr_config & 0x03) { case VMXNET3_IT_AUTO: case VMXNET3_IT_MSIX: scctx->isc_msix_bar = pci_msix_table_bar(dev); break; case VMXNET3_IT_MSI: scctx->isc_msix_bar = -1; scctx->isc_disable_msix = 1; break; case VMXNET3_IT_LEGACY: scctx->isc_msix_bar = 0; break; } scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD; scctx->isc_capabilities = scctx->isc_capenable = IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU; /* These capabilities are not enabled by default. */ scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER; vmxnet3_get_lladdr(sc); iflib_set_mac(ctx, sc->vmx_lladdr); return (0); fail: /* * We must completely clean up anything allocated above as iflib * will not invoke any other driver entry points as a result of this * failure. */ vmxnet3_free_resources(sc); return (error); } static int vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; struct vmxnet3_rxqueue *rxq; int error; int i; char irq_name[16]; sc = iflib_get_softc(ctx); scctx = sc->vmx_scctx; for (i = 0; i < scctx->isc_nrxqsets; i++) { snprintf(irq_name, sizeof(irq_name), "rxq%d", i); rxq = &sc->vmx_rxq[i]; error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1, IFLIB_INTR_RXTX, vmxnet3_rxq_intr, rxq, i, irq_name); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register rxq %d interrupt handler\n", i); return (error); } } for (i = 0; i < scctx->isc_ntxqsets; i++) { snprintf(irq_name, sizeof(irq_name), "txq%d", i); /* * Don't provide the corresponding rxq irq for reference - * we want the transmit task to be attached to a task queue * that is different from the one used by the corresponding * rxq irq. That is because the TX doorbell writes are very * expensive as virtualized MMIO operations, so we want to * be able to defer them to another core when possible so * that they don't steal receive processing cycles during * stack turnarounds like TCP ACK generation. The other * piece to this approach is enabling the iflib abdicate * option (currently via an interface-specific * tunable/sysctl). */ iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i, irq_name); } error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq, scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0, "event"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register event interrupt handler\n"); return (error); } return (0); } static void vmxnet3_free_irqs(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; struct vmxnet3_rxqueue *rxq; int i; scctx = sc->vmx_scctx; for (i = 0; i < scctx->isc_nrxqsets; i++) { rxq = &sc->vmx_rxq[i]; iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq); } iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq); } static int vmxnet3_attach_post(if_ctx_t ctx) { if_softc_ctx_t scctx; struct vmxnet3_softc *sc; int error; scctx = iflib_get_softc_ctx(ctx); sc = iflib_get_softc(ctx); if (scctx->isc_nrxqsets > 1) sc->vmx_flags |= VMXNET3_FLAG_RSS; error = vmxnet3_alloc_data(sc); if (error) goto fail; vmxnet3_set_interrupt_idx(sc); vmxnet3_setup_sysctl(sc); ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO); fail: return (error); } static int vmxnet3_detach(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); vmxnet3_free_irqs(sc); vmxnet3_free_data(sc); vmxnet3_free_resources(sc); return (0); } static int vmxnet3_shutdown(if_ctx_t ctx) { return (0); } static int vmxnet3_suspend(if_ctx_t ctx) { return (0); } static int vmxnet3_resume(if_ctx_t ctx) { return (0); } static int vmxnet3_alloc_resources(struct vmxnet3_softc *sc) { device_t dev; int rid; dev = sc->vmx_dev; rid = PCIR_BAR(0); sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->vmx_res0 == NULL) { device_printf(dev, "could not map BAR0 memory\n"); return (ENXIO); } sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0); sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0); rid = PCIR_BAR(1); sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->vmx_res1 == NULL) { device_printf(dev, "could not map BAR1 memory\n"); return (ENXIO); } sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1); sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1); return (0); } static void vmxnet3_free_resources(struct vmxnet3_softc *sc) { device_t dev; dev = sc->vmx_dev; if (sc->vmx_res0 != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->vmx_res0), sc->vmx_res0); sc->vmx_res0 = NULL; } if (sc->vmx_res1 != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->vmx_res1), sc->vmx_res1); sc->vmx_res1 = NULL; } } static int vmxnet3_check_version(struct vmxnet3_softc *sc) { device_t dev; uint32_t version; dev = sc->vmx_dev; version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS); if ((version & 0x01) == 0) { device_printf(dev, "unsupported hardware version %#x\n", version); return (ENOTSUP); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1); version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS); if ((version & 0x01) == 0) { device_printf(dev, "unsupported UPT version %#x\n", version); return (ENOTSUP); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1); return (0); } static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; struct vmxnet3_txqueue *txq; struct vmxnet3_txq_shared *txs; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxq_shared *rxs; int intr_idx; int i; scctx = sc->vmx_scctx; /* * There is always one interrupt per receive queue, assigned * starting with the first interrupt. When there is only one * interrupt available, the event interrupt shares the receive queue * interrupt, otherwise it uses the interrupt following the last * receive queue interrupt. Transmit queues are not assigned * interrupts, so they are given indexes beyond the indexes that * correspond to the real interrupts. */ /* The event interrupt is always the last vector. */ sc->vmx_event_intr_idx = scctx->isc_vectors - 1; intr_idx = 0; for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) { rxq = &sc->vmx_rxq[i]; rxs = rxq->vxrxq_rs; rxq->vxrxq_intr_idx = intr_idx; rxs->intr_idx = rxq->vxrxq_intr_idx; } /* * Assign the tx queues interrupt indexes above what we are actually * using. These interrupts will never be enabled. */ intr_idx = scctx->isc_vectors; for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) { txq = &sc->vmx_txq[i]; txs = txq->vxtxq_ts; txq->vxtxq_intr_idx = intr_idx; txs->intr_idx = txq->vxtxq_intr_idx; } } static int vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; int size; int error; scctx = sc->vmx_scctx; /* * The txq and rxq shared data areas must be allocated contiguously * as vmxnet3_driver_shared contains only a single address member * for the shared queue data area. */ size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) + scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared); error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0); if (error) { device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n"); return (error); } return (0); } static void vmxnet3_init_txq(struct vmxnet3_softc *sc, int q) { struct vmxnet3_txqueue *txq; struct vmxnet3_comp_ring *txc; struct vmxnet3_txring *txr; if_softc_ctx_t scctx; txq = &sc->vmx_txq[q]; txc = &txq->vxtxq_comp_ring; txr = &txq->vxtxq_cmd_ring; scctx = sc->vmx_scctx; snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d", device_get_nameunit(sc->vmx_dev), q); txq->vxtxq_sc = sc; txq->vxtxq_id = q; txc->vxcr_ndesc = scctx->isc_ntxd[0]; txr->vxtxr_ndesc = scctx->isc_ntxd[1]; } static int vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct vmxnet3_softc *sc; int q; int error; caddr_t kva; sc = iflib_get_softc(ctx); /* Allocate the array of transmit queues */ sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) * ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vmx_txq == NULL) return (ENOMEM); /* Initialize driver state for each transmit queue */ for (q = 0; q < ntxqsets; q++) vmxnet3_init_txq(sc, q); /* * Allocate queue state that is shared with the device. This check * and call is performed in both vmxnet3_tx_queues_alloc() and * vmxnet3_rx_queues_alloc() so that we don't have to care which * order iflib invokes those routines in. */ if (sc->vmx_qs_dma.idi_size == 0) { error = vmxnet3_queues_shared_alloc(sc); if (error) return (error); } kva = sc->vmx_qs_dma.idi_vaddr; for (q = 0; q < ntxqsets; q++) { sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva; kva += sizeof(struct vmxnet3_txq_shared); } /* Record descriptor ring vaddrs and paddrs */ for (q = 0; q < ntxqsets; q++) { struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; txq = &sc->vmx_txq[q]; txc = &txq->vxtxq_comp_ring; txr = &txq->vxtxq_cmd_ring; /* Completion ring */ txc->vxcr_u.txcd = (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0]; txc->vxcr_paddr = paddrs[q * ntxqs + 0]; /* Command ring */ txr->vxtxr_txd = (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1]; txr->vxtxr_paddr = paddrs[q * ntxqs + 1]; } return (0); } static void vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs) { struct vmxnet3_rxqueue *rxq; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxring *rxr; if_softc_ctx_t scctx; int i; rxq = &sc->vmx_rxq[q]; rxc = &rxq->vxrxq_comp_ring; scctx = sc->vmx_scctx; snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d", device_get_nameunit(sc->vmx_dev), q); rxq->vxrxq_sc = sc; rxq->vxrxq_id = q; /* * First rxq is the completion queue, so there are nrxqs - 1 command * rings starting at iflib queue id 1. */ rxc->vxcr_ndesc = scctx->isc_nrxd[0]; for (i = 0; i < nrxqs - 1; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1]; } } static int vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; int q; int i; int error; caddr_t kva; sc = iflib_get_softc(ctx); scctx = sc->vmx_scctx; /* Allocate the array of receive queues */ sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vmx_rxq == NULL) return (ENOMEM); /* Initialize driver state for each receive queue */ for (q = 0; q < nrxqsets; q++) vmxnet3_init_rxq(sc, q, nrxqs); /* * Allocate queue state that is shared with the device. This check * and call is performed in both vmxnet3_tx_queues_alloc() and * vmxnet3_rx_queues_alloc() so that we don't have to care which * order iflib invokes those routines in. */ if (sc->vmx_qs_dma.idi_size == 0) { error = vmxnet3_queues_shared_alloc(sc); if (error) return (error); } kva = sc->vmx_qs_dma.idi_vaddr + scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared); for (q = 0; q < nrxqsets; q++) { sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva; kva += sizeof(struct vmxnet3_rxq_shared); } /* Record descriptor ring vaddrs and paddrs */ for (q = 0; q < nrxqsets; q++) { struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; rxq = &sc->vmx_rxq[q]; rxc = &rxq->vxrxq_comp_ring; /* Completion ring */ rxc->vxcr_u.rxcd = (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0]; rxc->vxcr_paddr = paddrs[q * nrxqs + 0]; /* Command ring(s) */ for (i = 0; i < nrxqs - 1; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_rxd = (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i]; rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i]; } } return (0); } static void vmxnet3_queues_free(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); /* Free queue state area that is shared with the device */ if (sc->vmx_qs_dma.idi_size != 0) { iflib_dma_free(&sc->vmx_qs_dma); sc->vmx_qs_dma.idi_size = 0; } /* Free array of receive queues */ if (sc->vmx_rxq != NULL) { free(sc->vmx_rxq, M_DEVBUF); sc->vmx_rxq = NULL; } /* Free array of transmit queues */ if (sc->vmx_txq != NULL) { free(sc->vmx_txq, M_DEVBUF); sc->vmx_txq = NULL; } } static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc) { device_t dev; size_t size; int error; dev = sc->vmx_dev; /* Top level state structure shared with the device */ size = sizeof(struct vmxnet3_driver_shared); error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0); if (error) { device_printf(dev, "cannot alloc shared memory\n"); return (error); } sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr; /* RSS table state shared with the device */ if (sc->vmx_flags & VMXNET3_FLAG_RSS) { size = sizeof(struct vmxnet3_rss_shared); error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_rss_dma, 0); if (error) { device_printf(dev, "cannot alloc rss shared memory\n"); return (error); } sc->vmx_rss = (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr; } return (0); } static void vmxnet3_free_shared_data(struct vmxnet3_softc *sc) { /* Free RSS table state shared with the device */ if (sc->vmx_rss != NULL) { iflib_dma_free(&sc->vmx_rss_dma); sc->vmx_rss = NULL; } /* Free top level state structure shared with the device */ if (sc->vmx_ds != NULL) { iflib_dma_free(&sc->vmx_ds_dma); sc->vmx_ds = NULL; } } static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc) { int error; /* Multicast table state shared with the device */ error = iflib_dma_alloc_align(sc->vmx_ctx, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0); if (error) device_printf(sc->vmx_dev, "unable to alloc multicast table\n"); else sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr; return (error); } static void vmxnet3_free_mcast_table(struct vmxnet3_softc *sc) { /* Free multicast table state shared with the device */ if (sc->vmx_mcast != NULL) { iflib_dma_free(&sc->vmx_mcast_dma); sc->vmx_mcast = NULL; } } static void vmxnet3_init_shared_data(struct vmxnet3_softc *sc) { struct vmxnet3_driver_shared *ds; if_softc_ctx_t scctx; struct vmxnet3_txqueue *txq; struct vmxnet3_txq_shared *txs; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxq_shared *rxs; int i; ds = sc->vmx_ds; scctx = sc->vmx_scctx; /* * Initialize fields of the shared data that remains the same across * reinits. Note the shared data is zero'd when allocated. */ ds->magic = VMXNET3_REV1_MAGIC; /* DriverInfo */ ds->version = VMXNET3_DRIVER_VERSION; ds->guest = VMXNET3_GOS_FREEBSD | #ifdef __LP64__ VMXNET3_GOS_64BIT; #else VMXNET3_GOS_32BIT; #endif ds->vmxnet3_revision = 1; ds->upt_version = 1; /* Misc. conf */ ds->driver_data = vtophys(sc); ds->driver_data_len = sizeof(struct vmxnet3_softc); ds->queue_shared = sc->vmx_qs_dma.idi_paddr; ds->queue_shared_len = sc->vmx_qs_dma.idi_size; ds->nrxsg_max = IFLIB_MAX_RX_SEGS; /* RSS conf */ if (sc->vmx_flags & VMXNET3_FLAG_RSS) { ds->rss.version = 1; ds->rss.paddr = sc->vmx_rss_dma.idi_paddr; ds->rss.len = sc->vmx_rss_dma.idi_size; } /* Interrupt control. */ ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO; /* * Total number of interrupt indexes we are using in the shared * config data, even though we don't actually allocate interrupt * resources for the tx queues. Some versions of the device will * fail to initialize successfully if interrupt indexes are used in * the shared config that exceed the number of interrupts configured * here. */ ds->nintr = (scctx->isc_vectors == 1) ? 2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1); ds->evintr = sc->vmx_event_intr_idx; ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < ds->nintr; i++) ds->modlevel[i] = UPT1_IMOD_ADAPTIVE; /* Receive filter. */ ds->mcast_table = sc->vmx_mcast_dma.idi_paddr; ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size; /* Tx queues */ for (i = 0; i < scctx->isc_ntxqsets; i++) { txq = &sc->vmx_txq[i]; txs = txq->vxtxq_ts; txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr; txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc; txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr; txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc; txs->driver_data = vtophys(txq); txs->driver_data_len = sizeof(struct vmxnet3_txqueue); } /* Rx queues */ for (i = 0; i < scctx->isc_nrxqsets; i++) { rxq = &sc->vmx_rxq[i]; rxs = rxq->vxrxq_rs; rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr; rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc; rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr; rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc; rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr; rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc; rxs->driver_data = vtophys(rxq); rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue); } } static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc) { /* * Use the same key as the Linux driver until FreeBSD can do * RSS (presumably Toeplitz) in software. */ static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = { 0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac, 0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28, 0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70, 0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3, 0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9, }; if_softc_ctx_t scctx; struct vmxnet3_rss_shared *rss; #ifdef RSS uint8_t rss_algo; #endif int i; scctx = sc->vmx_scctx; rss = sc->vmx_rss; rss->hash_type = UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 | UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6; rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ; rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE; rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE; #ifdef RSS /* * If the software RSS is configured to anything else other than * Toeplitz, then just do Toeplitz in "hardware" for the sake of * the packet distribution, but report the hash as opaque to * disengage from the software RSS. */ rss_algo = rss_gethashalgo(); if (rss_algo == RSS_HASH_TOEPLITZ) { rss_getkey(rss->hash_key); for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) { rss->ind_table[i] = rss_get_indirection_to_bucket(i) % scctx->isc_nrxqsets; } sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS; } else #endif { memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE); for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) rss->ind_table[i] = i % scctx->isc_nrxqsets; sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS; } } static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc) { - struct ifnet *ifp; + if_t ifp; struct vmxnet3_driver_shared *ds; if_softc_ctx_t scctx; ifp = sc->vmx_ifp; ds = sc->vmx_ds; scctx = sc->vmx_scctx; - ds->mtu = ifp->if_mtu; + ds->mtu = if_getmtu(ifp); ds->ntxqueue = scctx->isc_ntxqsets; ds->nrxqueue = scctx->isc_nrxqsets; ds->upt_features = 0; - if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) + if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) ds->upt_features |= UPT1_F_CSUM; - if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) + if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) ds->upt_features |= UPT1_F_VLAN; - if (ifp->if_capenable & IFCAP_LRO) + if (if_getcapenable(ifp) & IFCAP_LRO) ds->upt_features |= UPT1_F_LRO; if (sc->vmx_flags & VMXNET3_FLAG_RSS) { ds->upt_features |= UPT1_F_RSS; vmxnet3_reinit_rss_shared_data(sc); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr); vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH, (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32); } static int vmxnet3_alloc_data(struct vmxnet3_softc *sc) { int error; error = vmxnet3_alloc_shared_data(sc); if (error) return (error); error = vmxnet3_alloc_mcast_table(sc); if (error) return (error); vmxnet3_init_shared_data(sc); return (0); } static void vmxnet3_free_data(struct vmxnet3_softc *sc) { vmxnet3_free_mcast_table(sc); vmxnet3_free_shared_data(sc); } static void vmxnet3_evintr(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_txq_shared *ts; struct vmxnet3_rxq_shared *rs; uint32_t event; dev = sc->vmx_dev; /* Clear events. */ event = sc->vmx_ds->event; vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event); if (event & VMXNET3_EVENT_LINK) vmxnet3_link_status(sc); if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) { vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS); ts = sc->vmx_txq[0].vxtxq_ts; if (ts->stopped != 0) device_printf(dev, "Tx queue error %#x\n", ts->error); rs = sc->vmx_rxq[0].vxrxq_rs; if (rs->stopped != 0) device_printf(dev, "Rx queue error %#x\n", rs->error); /* XXX - rely on liflib watchdog to reset us? */ device_printf(dev, "Rx/Tx queue error event ... " "waiting for iflib watchdog reset\n"); } if (event & VMXNET3_EVENT_DIC) device_printf(dev, "device implementation change event\n"); if (event & VMXNET3_EVENT_DEBUG) device_printf(dev, "debug event\n"); } static int vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; struct vmxnet3_txdesc *txd, *sop; bus_dma_segment_t *segs; int nsegs; int pidx; int hdrlen; int i; int gen; sc = vsc; txq = &sc->vmx_txq[pi->ipi_qsidx]; txr = &txq->vxtxq_cmd_ring; segs = pi->ipi_segs; nsegs = pi->ipi_nsegs; pidx = pi->ipi_pidx; KASSERT(nsegs <= VMXNET3_TX_MAXSEGS, ("%s: packet with too many segments %d", __func__, nsegs)); sop = &txr->vxtxr_txd[pidx]; gen = txr->vxtxr_gen ^ 1; /* Owned by cpu (yet) */ for (i = 0; i < nsegs; i++) { txd = &txr->vxtxr_txd[pidx]; txd->addr = segs[i].ds_addr; txd->len = segs[i].ds_len; txd->gen = gen; txd->dtype = 0; txd->offload_mode = VMXNET3_OM_NONE; txd->offload_pos = 0; txd->hlen = 0; txd->eop = 0; txd->compreq = 0; txd->vtag_mode = 0; txd->vtag = 0; if (++pidx == txr->vxtxr_ndesc) { pidx = 0; txr->vxtxr_gen ^= 1; } gen = txr->vxtxr_gen; } txd->eop = 1; txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR); pi->ipi_new_pidx = pidx; /* * VLAN */ if (pi->ipi_mflags & M_VLANTAG) { sop->vtag_mode = 1; sop->vtag = pi->ipi_vtag; } /* * TSO and checksum offloads */ hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen; if (pi->ipi_csum_flags & CSUM_TSO) { sop->offload_mode = VMXNET3_OM_TSO; sop->hlen = hdrlen + pi->ipi_tcp_hlen; sop->offload_pos = pi->ipi_tso_segsz; } else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6)) { sop->offload_mode = VMXNET3_OM_CSUM; sop->hlen = hdrlen; sop->offload_pos = hdrlen + ((pi->ipi_ipproto == IPPROTO_TCP) ? offsetof(struct tcphdr, th_sum) : offsetof(struct udphdr, uh_sum)); } /* Finally, change the ownership. */ vmxnet3_barrier(sc, VMXNET3_BARRIER_WR); sop->gen ^= 1; return (0); } static void vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; sc = vsc; txq = &sc->vmx_txq[txqid]; /* * pidx is what we last set ipi_new_pidx to in * vmxnet3_isc_txd_encap() */ /* * Avoid expensive register updates if the flush request is * redundant. */ if (txq->vxtxq_last_flush == pidx) return; txq->vxtxq_last_flush = pidx; vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx); } static int vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; struct vmxnet3_comp_ring *txc; struct vmxnet3_txcompdesc *txcd; struct vmxnet3_txring *txr; int processed; sc = vsc; txq = &sc->vmx_txq[txqid]; txc = &txq->vxtxq_comp_ring; txr = &txq->vxtxq_cmd_ring; /* * If clear is true, we need to report the number of TX command ring * descriptors that have been processed by the device. If clear is * false, we just need to report whether or not at least one TX * command ring descriptor has been processed by the device. */ processed = 0; for (;;) { txcd = &txc->vxcr_u.txcd[txc->vxcr_next]; if (txcd->gen != txc->vxcr_gen) break; else if (!clear) return (1); vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); if (++txc->vxcr_next == txc->vxcr_ndesc) { txc->vxcr_next = 0; txc->vxcr_gen ^= 1; } if (txcd->eop_idx < txr->vxtxr_next) processed += txr->vxtxr_ndesc - (txr->vxtxr_next - txcd->eop_idx) + 1; else processed += txcd->eop_idx - txr->vxtxr_next + 1; txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc; } return (processed); } static int vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxcompdesc *rxcd; int avail; int completed_gen; #ifdef INVARIANTS int expect_sop = 1; #endif sc = vsc; rxq = &sc->vmx_rxq[rxqid]; rxc = &rxq->vxrxq_comp_ring; avail = 0; completed_gen = rxc->vxcr_gen; for (;;) { rxcd = &rxc->vxcr_u.rxcd[idx]; if (rxcd->gen != completed_gen) break; vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); #ifdef INVARIANTS if (expect_sop) KASSERT(rxcd->sop, ("%s: expected sop", __func__)); else KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__)); expect_sop = rxcd->eop; #endif if (rxcd->eop && (rxcd->len != 0)) avail++; if (avail > budget) break; if (++idx == rxc->vxcr_ndesc) { idx = 0; completed_gen ^= 1; } } return (avail); } static int vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; struct vmxnet3_rxqueue *rxq; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxcompdesc *rxcd; if_rxd_frag_t frag; int cqidx; uint16_t total_len; uint8_t nfrags; uint8_t i; uint8_t flid; sc = vsc; scctx = sc->vmx_scctx; rxq = &sc->vmx_rxq[ri->iri_qsidx]; rxc = &rxq->vxrxq_comp_ring; /* * Get a single packet starting at the given index in the completion * queue. That we have been called indicates that * vmxnet3_isc_rxd_available() has already verified that either * there is a complete packet available starting at the given index, * or there are one or more zero length packets starting at the * given index followed by a complete packet, so no verification of * ownership of the descriptors (and no associated read barrier) is * required here. */ cqidx = ri->iri_cidx; rxcd = &rxc->vxcr_u.rxcd[cqidx]; while (rxcd->len == 0) { KASSERT(rxcd->sop && rxcd->eop, ("%s: zero-length packet without both sop and eop set", __func__)); rxc->vxcr_zero_length++; if (++cqidx == rxc->vxcr_ndesc) { cqidx = 0; rxc->vxcr_gen ^= 1; } rxcd = &rxc->vxcr_u.rxcd[cqidx]; } KASSERT(rxcd->sop, ("%s: expected sop", __func__)); /* * RSS and flow ID. * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should * be used only if the software RSS is enabled and it uses the same * algorithm and the hash key as the "hardware". If the software RSS * is not enabled, then it's simply pointless to use those types. * If it's enabled but with different parameters, then hash values will * not match. */ ri->iri_flowid = rxcd->rss_hash; #ifdef RSS if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) { switch (rxcd->rss_type) { case VMXNET3_RCD_RSS_TYPE_NONE: ri->iri_flowid = ri->iri_qsidx; ri->iri_rsstype = M_HASHTYPE_NONE; break; case VMXNET3_RCD_RSS_TYPE_IPV4: ri->iri_rsstype = M_HASHTYPE_RSS_IPV4; break; case VMXNET3_RCD_RSS_TYPE_TCPIPV4: ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4; break; case VMXNET3_RCD_RSS_TYPE_IPV6: ri->iri_rsstype = M_HASHTYPE_RSS_IPV6; break; case VMXNET3_RCD_RSS_TYPE_TCPIPV6: ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6; break; default: ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH; break; } } else #endif { switch (rxcd->rss_type) { case VMXNET3_RCD_RSS_TYPE_NONE: ri->iri_flowid = ri->iri_qsidx; ri->iri_rsstype = M_HASHTYPE_NONE; break; default: ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH; break; } } /* * The queue numbering scheme used for rxcd->qid is as follows: * - All of the command ring 0s are numbered [0, nrxqsets - 1] * - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1] * * Thus, rxcd->qid less than nrxqsets indicates command ring (and * flid) 0, and rxcd->qid greater than or equal to nrxqsets * indicates command ring (and flid) 1. */ nfrags = 0; total_len = 0; do { rxcd = &rxc->vxcr_u.rxcd[cqidx]; KASSERT(rxcd->gen == rxc->vxcr_gen, ("%s: generation mismatch", __func__)); KASSERT(nfrags < IFLIB_MAX_RX_SEGS, ("%s: too many fragments", __func__)); if (__predict_true(rxcd->len != 0)) { frag = &ri->iri_frags[nfrags]; flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0; frag->irf_flid = flid; frag->irf_idx = rxcd->rxd_idx; frag->irf_len = rxcd->len; total_len += rxcd->len; nfrags++; } else { rxc->vcxr_zero_length_frag++; } if (++cqidx == rxc->vxcr_ndesc) { cqidx = 0; rxc->vxcr_gen ^= 1; } } while (!rxcd->eop); ri->iri_cidx = cqidx; ri->iri_nfrags = nfrags; ri->iri_len = total_len; /* * If there's an error, the last descriptor in the packet will * have the error indicator set. In this case, set all * fragment lengths to zero. This will cause iflib to discard * the packet, but process all associated descriptors through * the refill mechanism. */ if (__predict_false(rxcd->error)) { rxc->vxcr_pkt_errors++; for (i = 0; i < nfrags; i++) { frag = &ri->iri_frags[i]; frag->irf_len = 0; } } else { /* Checksum offload information is in the last descriptor. */ if (!rxcd->no_csum) { uint32_t csum_flags = 0; if (rxcd->ipv4) { csum_flags |= CSUM_IP_CHECKED; if (rxcd->ipcsum_ok) csum_flags |= CSUM_IP_VALID; } if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) { csum_flags |= CSUM_L4_CALC; if (rxcd->csum_ok) { csum_flags |= CSUM_L4_VALID; ri->iri_csum_data = 0xffff; } } ri->iri_csum_flags = csum_flags; } /* VLAN information is in the last descriptor. */ if (rxcd->vlan) { ri->iri_flags |= M_VLANTAG; ri->iri_vtag = rxcd->vtag; } } return (0); } static void vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; struct vmxnet3_rxdesc *rxd; uint64_t *paddrs; int count; int len; int idx; int i; uint8_t flid; uint8_t btype; count = iru->iru_count; len = iru->iru_buf_size; flid = iru->iru_flidx; paddrs = iru->iru_paddrs; sc = vsc; rxq = &sc->vmx_rxq[iru->iru_qsidx]; rxr = &rxq->vxrxq_cmd_ring[flid]; rxd = rxr->vxrxr_rxd; /* * Command ring 0 is filled with BTYPE_HEAD descriptors, and * command ring 1 is filled with BTYPE_BODY descriptors. */ btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY; /* * The refill entries from iflib will advance monotonically, * but the refilled descriptors may not be contiguous due to * earlier skipping of descriptors by the device. The refill * entries from iflib need an entire state update, while the * descriptors previously skipped by the device only need to * have their generation numbers updated. */ idx = rxr->vxrxr_refill_start; i = 0; do { if (idx == iru->iru_idxs[i]) { rxd[idx].addr = paddrs[i]; rxd[idx].len = len; rxd[idx].btype = btype; i++; } else rxr->vxrxr_desc_skips++; rxd[idx].gen = rxr->vxrxr_gen; if (++idx == rxr->vxrxr_ndesc) { idx = 0; rxr->vxrxr_gen ^= 1; } } while (i != count); rxr->vxrxr_refill_start = idx; } static void vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx) { struct vmxnet3_softc *sc; bus_size_t r; sc = vsc; if (flid == 0) r = VMXNET3_BAR0_RXH1(rxqid); else r = VMXNET3_BAR0_RXH2(rxqid); vmxnet3_write_bar0(sc, r, pidx); } static int vmxnet3_legacy_intr(void *xsc) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; if_ctx_t ctx; sc = xsc; scctx = sc->vmx_scctx; ctx = sc->vmx_ctx; /* * When there is only a single interrupt configured, this routine * runs in fast interrupt context, following which the rxq 0 task * will be enqueued. */ if (scctx->isc_intr == IFLIB_INTR_LEGACY) { if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0) return (FILTER_HANDLED); } if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_intr_disable_all(ctx); if (sc->vmx_ds->event != 0) iflib_admin_intr_deferred(ctx); /* * XXX - When there is both rxq and event activity, do we care * whether the rxq 0 task or the admin task re-enables the interrupt * first? */ return (FILTER_SCHEDULE_THREAD); } static int vmxnet3_rxq_intr(void *vrxq) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; rxq = vrxq; sc = rxq->vxrxq_sc; if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx); return (FILTER_SCHEDULE_THREAD); } static int vmxnet3_event_intr(void *vsc) { struct vmxnet3_softc *sc; sc = vsc; if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx); /* * The work will be done via vmxnet3_update_admin_status(), and the * interrupt will be re-enabled in vmxnet3_link_intr_enable(). * * The interrupt will be re-enabled by vmxnet3_link_intr_enable(). */ return (FILTER_SCHEDULE_THREAD); } static void vmxnet3_stop(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); sc->vmx_link_active = 0; vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE); vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET); } static void vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq) { struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; txq->vxtxq_last_flush = -1; txr = &txq->vxtxq_cmd_ring; txr->vxtxr_next = 0; txr->vxtxr_gen = VMXNET3_INIT_GEN; /* * iflib has zeroed out the descriptor array during the prior attach * or stop */ txc = &txq->vxtxq_comp_ring; txc->vxcr_next = 0; txc->vxcr_gen = VMXNET3_INIT_GEN; /* * iflib has zeroed out the descriptor array during the prior attach * or stop */ } static void vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq) { struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; int i; /* * The descriptors will be populated with buffers during a * subsequent invocation of vmxnet3_isc_rxd_refill() */ for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_gen = VMXNET3_INIT_GEN; rxr->vxrxr_desc_skips = 0; rxr->vxrxr_refill_start = 0; /* * iflib has zeroed out the descriptor array during the * prior attach or stop */ } for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_gen = 0; rxr->vxrxr_desc_skips = 0; rxr->vxrxr_refill_start = 0; bzero(rxr->vxrxr_rxd, rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc)); } rxc = &rxq->vxrxq_comp_ring; rxc->vxcr_next = 0; rxc->vxcr_gen = VMXNET3_INIT_GEN; rxc->vxcr_zero_length = 0; rxc->vcxr_zero_length_frag = 0; rxc->vxcr_pkt_errors = 0; /* * iflib has zeroed out the descriptor array during the prior attach * or stop */ } static void vmxnet3_reinit_queues(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; int q; scctx = sc->vmx_scctx; for (q = 0; q < scctx->isc_ntxqsets; q++) vmxnet3_txinit(sc, &sc->vmx_txq[q]); for (q = 0; q < scctx->isc_nrxqsets; q++) vmxnet3_rxinit(sc, &sc->vmx_rxq[q]); } static int vmxnet3_enable_device(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; int q; scctx = sc->vmx_scctx; if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) { device_printf(sc->vmx_dev, "device enable command failed!\n"); return (1); } /* Reset the Rx queue heads. */ for (q = 0; q < scctx->isc_nrxqsets; q++) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0); vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0); } return (0); } static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc) { - struct ifnet *ifp; + if_t ifp; ifp = sc->vmx_ifp; vmxnet3_set_rxfilter(sc, if_getflags(ifp)); - if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) + if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter, sizeof(sc->vmx_ds->vlan_filter)); else bzero(sc->vmx_ds->vlan_filter, sizeof(sc->vmx_ds->vlan_filter)); vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER); } static void vmxnet3_init(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); /* Use the current MAC address. */ - bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN); + bcopy(if_getlladdr(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN); vmxnet3_set_lladdr(sc); vmxnet3_reinit_shared_data(sc); vmxnet3_reinit_queues(sc); vmxnet3_enable_device(sc); vmxnet3_reinit_rxfilters(sc); vmxnet3_link_status(sc); } static void vmxnet3_multi_set(if_ctx_t ctx) { vmxnet3_set_rxfilter(iflib_get_softc(ctx), if_getflags(iflib_get_ifp(ctx))); } static int vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; sc = iflib_get_softc(ctx); scctx = sc->vmx_scctx; if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) return (EINVAL); /* * Update the max frame size so that the rx mbuf size is * chosen based on the new mtu during the interface init that * will occur after this routine returns. */ scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN; /* RX completion queue - n/a */ scctx->isc_rxd_buf_size[0] = 0; /* * For header-type descriptors (used for first segment of * packet), let iflib determine the buffer size based on the * max frame size. */ scctx->isc_rxd_buf_size[1] = 0; /* * For body-type descriptors (used for jumbo frames and LRO), * always use page-sized buffers. */ scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE; return (0); } static void vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (vmxnet3_link_is_up(sc) != 0) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_AUTO; } else ifmr->ifm_active |= IFM_NONE; } static int vmxnet3_media_change(if_ctx_t ctx) { /* Ignore. */ return (0); } static int vmxnet3_promisc_set(if_ctx_t ctx, int flags) { vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags); return (0); } static uint64_t vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt) { if_t ifp = iflib_get_ifp(ctx); if (cnt < IFCOUNTERS) return if_get_counter_default(ifp, cnt); return (0); } static void vmxnet3_update_admin_status(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); if (sc->vmx_ds->event != 0) vmxnet3_evintr(sc); vmxnet3_refresh_host_stats(sc); } static void vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid) { /* Host stats refresh is global, so just trigger it on txq 0 */ if (qid == 0) vmxnet3_refresh_host_stats(iflib_get_softc(ctx)); } static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag) { int idx, bit; if (tag == 0 || tag > 4095) return; idx = (tag >> 5) & 0x7F; bit = tag & 0x1F; /* Update our private VLAN bitvector. */ if (add) sc->vmx_vlan_filter[idx] |= (1 << bit); else sc->vmx_vlan_filter[idx] &= ~(1 << bit); } static void vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag) { vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag); } static void vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag) { vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag); } static u_int vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count) { struct vmxnet3_softc *sc = arg; if (count < VMXNET3_MULTICAST_MAX) bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN], ETHER_ADDR_LEN); return (1); } static void vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags) { - struct ifnet *ifp; + if_t ifp; struct vmxnet3_driver_shared *ds; u_int mode; ifp = sc->vmx_ifp; ds = sc->vmx_ds; mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST; if (flags & IFF_PROMISC) mode |= VMXNET3_RXMODE_PROMISC; if (flags & IFF_ALLMULTI) mode |= VMXNET3_RXMODE_ALLMULTI; else { int cnt; cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc); if (cnt >= VMXNET3_MULTICAST_MAX) { cnt = 0; mode |= VMXNET3_RXMODE_ALLMULTI; } else if (cnt > 0) mode |= VMXNET3_RXMODE_MCAST; ds->mcast_tablelen = cnt * ETHER_ADDR_LEN; } ds->rxmode = mode; vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER); vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE); } static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc) { vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS); } static int vmxnet3_link_is_up(struct vmxnet3_softc *sc) { uint32_t status; status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK); return !!(status & 0x1); } static void vmxnet3_link_status(struct vmxnet3_softc *sc) { if_ctx_t ctx; uint64_t speed; int link; ctx = sc->vmx_ctx; link = vmxnet3_link_is_up(sc); speed = IF_Gbps(10); if (link != 0 && sc->vmx_link_active == 0) { sc->vmx_link_active = 1; iflib_link_state_change(ctx, LINK_STATE_UP, speed); } else if (link == 0 && sc->vmx_link_active != 0) { sc->vmx_link_active = 0; iflib_link_state_change(ctx, LINK_STATE_DOWN, speed); } } static void vmxnet3_set_lladdr(struct vmxnet3_softc *sc) { uint32_t ml, mh; ml = sc->vmx_lladdr[0]; ml |= sc->vmx_lladdr[1] << 8; ml |= sc->vmx_lladdr[2] << 16; ml |= sc->vmx_lladdr[3] << 24; vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml); mh = sc->vmx_lladdr[4]; mh |= sc->vmx_lladdr[5] << 8; vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh); } static void vmxnet3_get_lladdr(struct vmxnet3_softc *sc) { uint32_t ml, mh; ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL); mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH); sc->vmx_lladdr[0] = ml; sc->vmx_lladdr[1] = ml >> 8; sc->vmx_lladdr[2] = ml >> 16; sc->vmx_lladdr[3] = ml >> 24; sc->vmx_lladdr[4] = mh; sc->vmx_lladdr[5] = mh >> 8; } static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { struct sysctl_oid *node, *txsnode; struct sysctl_oid_list *list, *txslist; struct UPT1_TxStats *txstats; char namebuf[16]; txstats = &txq->vxtxq_ts->stats; snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue"); txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node); /* * Add statistics reported by the host. These are updated by the * iflib txq timer on txq 0. */ txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics"); txslist = SYSCTL_CHILDREN(txsnode); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD, &txstats->TSO_packets, "TSO packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD, &txstats->TSO_bytes, "TSO bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD, &txstats->ucast_packets, "Unicast packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD, &txstats->ucast_bytes, "Unicast bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD, &txstats->mcast_packets, "Multicast packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD, &txstats->mcast_bytes, "Multicast bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD, &txstats->error, "Errors"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD, &txstats->discard, "Discards"); } static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { struct sysctl_oid *node, *rxsnode; struct sysctl_oid_list *list, *rxslist; struct UPT1_RxStats *rxstats; char namebuf[16]; rxstats = &rxq->vxrxq_rs->stats; snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue"); rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node); /* * Add statistics reported by the host. These are updated by the * iflib txq timer on txq 0. */ rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics"); rxslist = SYSCTL_CHILDREN(rxsnode); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD, &rxstats->LRO_packets, "LRO packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD, &rxstats->LRO_bytes, "LRO bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD, &rxstats->ucast_packets, "Unicast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD, &rxstats->ucast_bytes, "Unicast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD, &rxstats->mcast_packets, "Multicast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD, &rxstats->mcast_bytes, "Multicast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD, &rxstats->bcast_packets, "Broadcast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD, &rxstats->bcast_bytes, "Broadcast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD, &rxstats->nobuffer, "No buffer"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD, &rxstats->error, "Errors"); } static void vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { if_softc_ctx_t scctx; struct sysctl_oid *node; struct sysctl_oid_list *list; int i; scctx = sc->vmx_scctx; for (i = 0; i < scctx->isc_ntxqsets; i++) { struct vmxnet3_txqueue *txq = &sc->vmx_txq[i]; node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_next, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_gen, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_next, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_ndesc, 0,""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_gen, 0, ""); } for (i = 0; i < scctx->isc_nrxqsets; i++) { struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i]; node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_gen, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length_frag", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vcxr_zero_length_frag, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, ""); } } static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { if_softc_ctx_t scctx; int i; scctx = sc->vmx_scctx; for (i = 0; i < scctx->isc_ntxqsets; i++) vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child); for (i = 0; i < scctx->isc_nrxqsets; i++) vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child); vmxnet3_setup_debug_sysctl(sc, ctx, child); } static void vmxnet3_setup_sysctl(struct vmxnet3_softc *sc) { device_t dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; dev = sc->vmx_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); vmxnet3_setup_queue_sysctl(sc, ctx, child); } static void vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v) { bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v); } static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r) { return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r)); } static void vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v) { bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v); } static void vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd) { vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd); } static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd) { vmxnet3_write_cmd(sc, cmd); bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD)); } static void vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0); } static void vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1); } static int vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { /* Not using interrupts for TX */ return (0); } static int vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx); return (0); } static void vmxnet3_link_intr_enable(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx); } static void vmxnet3_intr_enable_all(if_ctx_t ctx) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; int i; sc = iflib_get_softc(ctx); scctx = sc->vmx_scctx; sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < scctx->isc_vectors; i++) vmxnet3_enable_intr(sc, i); } static void vmxnet3_intr_disable_all(if_ctx_t ctx) { struct vmxnet3_softc *sc; int i; sc = iflib_get_softc(ctx); /* * iflib may invoke this routine before vmxnet3_attach_post() has * run, which is before the top level shared data area is * initialized and the device made aware of it. */ if (sc->vmx_ds != NULL) sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < VMXNET3_MAX_INTRS; i++) vmxnet3_disable_intr(sc, i); } /* * Since this is a purely paravirtualized device, we do not have * to worry about DMA coherency. But at times, we must make sure * both the compiler and CPU do not reorder memory operations. */ static inline void vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type) { switch (type) { case VMXNET3_BARRIER_RD: rmb(); break; case VMXNET3_BARRIER_WR: wmb(); break; case VMXNET3_BARRIER_RDWR: mb(); break; default: panic("%s: bad barrier type %d", __func__, type); } } diff --git a/sys/dev/vmware/vmxnet3/if_vmxvar.h b/sys/dev/vmware/vmxnet3/if_vmxvar.h index 9811ae42534d..65f876cc21dc 100644 --- a/sys/dev/vmware/vmxnet3/if_vmxvar.h +++ b/sys/dev/vmware/vmxnet3/if_vmxvar.h @@ -1,200 +1,200 @@ /*- * Copyright (c) 2013 Tsubai Masanari * Copyright (c) 2013 Bryan Venteicher * Copyright (c) 2018 Patrick Kelsey * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $FreeBSD$ */ #ifndef _IF_VMXVAR_H #define _IF_VMXVAR_H struct vmxnet3_softc; /* * The number of Rx/Tx queues this driver prefers. */ #define VMXNET3_DEF_RX_QUEUES 8 #define VMXNET3_DEF_TX_QUEUES 8 /* * The number of Rx rings in each Rx queue. */ #define VMXNET3_RXRINGS_PERQ 2 /* * The number of descriptors in each Rx/Tx ring. */ #define VMXNET3_DEF_TX_NDESC 512 #define VMXNET3_MAX_TX_NDESC 4096 #define VMXNET3_MIN_TX_NDESC 32 #define VMXNET3_MASK_TX_NDESC 0x1F #define VMXNET3_DEF_RX_NDESC 512 #define VMXNET3_MAX_RX_NDESC 2048 #define VMXNET3_MIN_RX_NDESC 32 #define VMXNET3_MASK_RX_NDESC 0x1F #define VMXNET3_MAX_TX_NCOMPDESC VMXNET3_MAX_TX_NDESC #define VMXNET3_MAX_RX_NCOMPDESC \ (VMXNET3_MAX_RX_NDESC * VMXNET3_RXRINGS_PERQ) struct vmxnet3_txring { u_int vxtxr_next; u_int vxtxr_ndesc; int vxtxr_gen; struct vmxnet3_txdesc *vxtxr_txd; bus_addr_t vxtxr_paddr; }; struct vmxnet3_rxring { struct vmxnet3_rxdesc *vxrxr_rxd; u_int vxrxr_ndesc; int vxrxr_gen; bus_addr_t vxrxr_paddr; uint64_t vxrxr_desc_skips; uint16_t vxrxr_refill_start; }; struct vmxnet3_comp_ring { union { struct vmxnet3_txcompdesc *txcd; struct vmxnet3_rxcompdesc *rxcd; } vxcr_u; /* * vxcr_next is used on the transmit side to track the next index to * begin cleaning at. It is not used on the receive side. */ u_int vxcr_next; u_int vxcr_ndesc; int vxcr_gen; bus_addr_t vxcr_paddr; uint64_t vxcr_zero_length; uint64_t vcxr_zero_length_frag; uint64_t vxcr_pkt_errors; }; struct vmxnet3_txqueue { struct vmxnet3_softc *vxtxq_sc; int vxtxq_id; int vxtxq_last_flush; int vxtxq_intr_idx; struct vmxnet3_txring vxtxq_cmd_ring; struct vmxnet3_comp_ring vxtxq_comp_ring; struct vmxnet3_txq_shared *vxtxq_ts; struct sysctl_oid_list *vxtxq_sysctl; char vxtxq_name[16]; } __aligned(CACHE_LINE_SIZE); struct vmxnet3_rxqueue { struct vmxnet3_softc *vxrxq_sc; int vxrxq_id; int vxrxq_intr_idx; struct if_irq vxrxq_irq; struct vmxnet3_rxring vxrxq_cmd_ring[VMXNET3_RXRINGS_PERQ]; struct vmxnet3_comp_ring vxrxq_comp_ring; struct vmxnet3_rxq_shared *vxrxq_rs; struct sysctl_oid_list *vxrxq_sysctl; char vxrxq_name[16]; } __aligned(CACHE_LINE_SIZE); struct vmxnet3_softc { device_t vmx_dev; if_ctx_t vmx_ctx; if_shared_ctx_t vmx_sctx; if_softc_ctx_t vmx_scctx; - struct ifnet *vmx_ifp; + if_t vmx_ifp; struct vmxnet3_driver_shared *vmx_ds; uint32_t vmx_flags; #define VMXNET3_FLAG_RSS 0x0002 #define VMXNET3_FLAG_SOFT_RSS 0x0004 /* Software RSS is enabled with compatible algorithm. */ struct vmxnet3_rxqueue *vmx_rxq; struct vmxnet3_txqueue *vmx_txq; struct resource *vmx_res0; bus_space_tag_t vmx_iot0; bus_space_handle_t vmx_ioh0; struct resource *vmx_res1; bus_space_tag_t vmx_iot1; bus_space_handle_t vmx_ioh1; int vmx_link_active; int vmx_intr_mask_mode; int vmx_event_intr_idx; struct if_irq vmx_event_intr_irq; uint8_t *vmx_mcast; struct vmxnet3_rss_shared *vmx_rss; struct iflib_dma_info vmx_ds_dma; struct iflib_dma_info vmx_qs_dma; struct iflib_dma_info vmx_mcast_dma; struct iflib_dma_info vmx_rss_dma; struct ifmedia *vmx_media; uint32_t vmx_vlan_filter[4096/32]; uint8_t vmx_lladdr[ETHER_ADDR_LEN]; }; /* * Our driver version we report to the hypervisor; we just keep * this value constant. */ #define VMXNET3_DRIVER_VERSION 0x00010000 /* * Max descriptors per Tx packet. We must limit the size of the * any TSO packets based on the number of segments. */ #define VMXNET3_TX_MAXSEGS 32 /* 64K @ 2K segment size */ #define VMXNET3_TX_MAXSIZE (VMXNET3_TX_MAXSEGS * MCLBYTES) #define VMXNET3_TSO_MAXSIZE (VMXNET3_TX_MAXSIZE - ETHER_VLAN_ENCAP_LEN) /* * Maximum supported Tx segment size. The length field in the * Tx descriptor is 14 bits. * * XXX It's possible a descriptor length field of 0 means 2^14, but this * isn't confirmed, so limit to 2^14 - 1 for now. */ #define VMXNET3_TX_MAXSEGSIZE ((1 << 14) - 1) /* * Maximum supported Rx segment size. The length field in the * Rx descriptor is 14 bits. * * The reference drivers skip zero-length descriptors, which seems to be a * strong indication that on the receive side, a descriptor length field of * zero does not mean 2^14. */ #define VMXNET3_RX_MAXSEGSIZE ((1 << 14) - 1) /* * Predetermined size of the multicast MACs filter table. If the * number of multicast addresses exceeds this size, then the * ALL_MULTI mode is use instead. */ #define VMXNET3_MULTICAST_MAX 32 /* * IP protocols that we can perform Tx checksum offloading of. */ #define VMXNET3_CSUM_OFFLOAD (CSUM_TCP | CSUM_UDP) #define VMXNET3_CSUM_OFFLOAD_IPV6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6) #define VMXNET3_CSUM_ALL_OFFLOAD \ (VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6 | CSUM_TSO) #endif /* _IF_VMXVAR_H */ diff --git a/sys/net/iflib.c b/sys/net/iflib.c index b0701c1eb63c..3b743caa34e0 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -1,7350 +1,7350 @@ /*- * Copyright (c) 2014-2018, Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_acpi.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #ifdef PCI_IOV #include #endif #include /* * enable accounting of every mbuf as it comes in to and goes out of * iflib's software descriptor references */ #define MEMORY_LOGGING 0 /* * Enable mbuf vectors for compressing long mbuf chains */ /* * NB: * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead * we prefetch needs to be determined by the time spent in m_free vis a vis * the cost of a prefetch. This will of course vary based on the workload: * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which * is quite expensive, thus suggesting very little prefetch. * - small packet forwarding which is just returning a single mbuf to * UMA will typically be very fast vis a vis the cost of a memory * access. */ /* * File organization: * - private structures * - iflib private utility functions * - ifnet functions * - vlan registry and other exported functions * - iflib public core functions * * */ MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); #define IFLIB_RXEOF_MORE (1U << 0) #define IFLIB_RXEOF_EMPTY (2U << 0) struct iflib_txq; typedef struct iflib_txq *iflib_txq_t; struct iflib_rxq; typedef struct iflib_rxq *iflib_rxq_t; struct iflib_fl; typedef struct iflib_fl *iflib_fl_t; struct iflib_ctx; static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); static void iflib_timer(void *arg); static void iflib_tqg_detach(if_ctx_t ctx); typedef struct iflib_filter_info { driver_filter_t *ifi_filter; void *ifi_filter_arg; struct grouptask *ifi_task; void *ifi_ctx; } *iflib_filter_info_t; struct iflib_ctx { KOBJ_FIELDS; /* * Pointer to hardware driver's softc */ void *ifc_softc; device_t ifc_dev; if_t ifc_ifp; cpuset_t ifc_cpus; if_shared_ctx_t ifc_sctx; struct if_softc_ctx ifc_softc_ctx; struct sx ifc_ctx_sx; struct mtx ifc_state_mtx; iflib_txq_t ifc_txqs; iflib_rxq_t ifc_rxqs; uint32_t ifc_if_flags; uint32_t ifc_flags; uint32_t ifc_max_fl_buf_size; uint32_t ifc_rx_mbuf_sz; int ifc_link_state; int ifc_watchdog_events; struct cdev *ifc_led_dev; struct resource *ifc_msix_mem; struct if_irq ifc_legacy_irq; struct grouptask ifc_admin_task; struct grouptask ifc_vflr_task; struct iflib_filter_info ifc_filter_info; struct ifmedia ifc_media; struct ifmedia *ifc_mediap; struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; uint16_t ifc_sysctl_qs_eq_override; uint16_t ifc_sysctl_rx_budget; uint16_t ifc_sysctl_tx_abdicate; uint16_t ifc_sysctl_core_offset; #define CORE_OFFSET_UNSPECIFIED 0xffff uint8_t ifc_sysctl_separate_txrx; uint8_t ifc_sysctl_use_logical_cores; bool ifc_cpus_are_physical_cores; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; struct if_txrx ifc_txrx; #define isc_txd_encap ifc_txrx.ift_txd_encap #define isc_txd_flush ifc_txrx.ift_txd_flush #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update #define isc_rxd_available ifc_txrx.ift_rxd_available #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_flush ifc_txrx.ift_rxd_flush #define isc_legacy_intr ifc_txrx.ift_legacy_intr #define isc_txq_select ifc_txrx.ift_txq_select #define isc_txq_select_v2 ifc_txrx.ift_txq_select_v2 eventhandler_tag ifc_vlan_attach_event; eventhandler_tag ifc_vlan_detach_event; struct ether_addr ifc_mac; }; void * iflib_get_softc(if_ctx_t ctx) { return (ctx->ifc_softc); } device_t iflib_get_dev(if_ctx_t ctx) { return (ctx->ifc_dev); } if_t iflib_get_ifp(if_ctx_t ctx) { return (ctx->ifc_ifp); } struct ifmedia * iflib_get_media(if_ctx_t ctx) { return (ctx->ifc_mediap); } uint32_t iflib_get_flags(if_ctx_t ctx) { return (ctx->ifc_flags); } void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) { bcopy(mac, ctx->ifc_mac.octet, ETHER_ADDR_LEN); } if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx) { return (&ctx->ifc_softc_ctx); } if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx) { return (ctx->ifc_sctx); } #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2) #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1))) #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) typedef struct iflib_sw_rx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ caddr_t *ifsd_cl; /* direct cluster pointer for rx */ bus_addr_t *ifsd_ba; /* bus addr of cluster for rx */ } iflib_rxsd_array_t; typedef struct iflib_sw_tx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ bus_dmamap_t *ifsd_tso_map; /* bus_dma maps for TSO packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ } if_txsd_vec_t; /* magic number that should be high enough for any hardware */ #define IFLIB_MAX_TX_SEGS 128 #define IFLIB_RX_COPY_THRESH 128 #define IFLIB_MAX_RX_REFRESH 32 /* The minimum descriptors per second before we start coalescing */ #define IFLIB_MIN_DESC_SEC 16384 #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16 #define IFLIB_QUEUE_IDLE 0 #define IFLIB_QUEUE_HUNG 1 #define IFLIB_QUEUE_WORKING 2 /* maximum number of txqs that can share an rx interrupt */ #define IFLIB_MAX_TX_SHARED_INTR 4 /* this should really scale with ring size - this is a fairly arbitrary value */ #define TX_BATCH_SIZE 32 #define IFLIB_RESTART_BUDGET 8 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) struct iflib_txq { qidx_t ift_in_use; qidx_t ift_cidx; qidx_t ift_cidx_processed; qidx_t ift_pidx; uint8_t ift_gen; uint8_t ift_br_offset; uint16_t ift_npending; uint16_t ift_db_pending; uint16_t ift_rs_pending; /* implicit pad */ uint8_t ift_txd_size[8]; uint64_t ift_processed; uint64_t ift_cleaned; uint64_t ift_cleaned_prev; #if MEMORY_LOGGING uint64_t ift_enqueued; uint64_t ift_dequeued; #endif uint64_t ift_no_tx_dma_setup; uint64_t ift_no_desc_avail; uint64_t ift_mbuf_defrag_failed; uint64_t ift_mbuf_defrag; uint64_t ift_map_failed; uint64_t ift_txd_encap_efbig; uint64_t ift_pullups; uint64_t ift_last_timer_tick; struct mtx ift_mtx; struct mtx ift_db_mtx; /* constant values */ if_ctx_t ift_ctx; struct ifmp_ring *ift_br; struct grouptask ift_task; qidx_t ift_size; uint16_t ift_id; struct callout ift_timer; #ifdef DEV_NETMAP struct callout ift_netmap_timer; #endif /* DEV_NETMAP */ if_txsd_vec_t ift_sds; uint8_t ift_qstatus; uint8_t ift_closed; uint8_t ift_update_freq; struct iflib_filter_info ift_filter_info; bus_dma_tag_t ift_buf_tag; bus_dma_tag_t ift_tso_buf_tag; iflib_dma_info_t ift_ifdi; #define MTX_NAME_LEN 32 char ift_mtx_name[MTX_NAME_LEN]; bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ift_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); struct iflib_fl { qidx_t ifl_cidx; qidx_t ifl_pidx; qidx_t ifl_credits; uint8_t ifl_gen; uint8_t ifl_rxd_size; #if MEMORY_LOGGING uint64_t ifl_m_enqueued; uint64_t ifl_m_dequeued; uint64_t ifl_cl_enqueued; uint64_t ifl_cl_dequeued; #endif /* implicit pad */ bitstr_t *ifl_rx_bitmap; qidx_t ifl_fragidx; /* constant */ qidx_t ifl_size; uint16_t ifl_buf_size; uint16_t ifl_cltype; uma_zone_t ifl_zone; iflib_rxsd_array_t ifl_sds; iflib_rxq_t ifl_rxq; uint8_t ifl_id; bus_dma_tag_t ifl_buf_tag; iflib_dma_info_t ifl_ifdi; uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; } __aligned(CACHE_LINE_SIZE); static inline qidx_t get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) { qidx_t used; if (pidx > cidx) used = pidx - cidx; else if (pidx < cidx) used = size - cidx + pidx; else if (gen == 0 && pidx == cidx) used = 0; else if (gen == 1 && pidx == cidx) used = size; else panic("bad state"); return (used); } #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) struct iflib_rxq { if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; uint64_t ifr_rx_irq; struct pfil_head *pfil; /* * If there is a separate completion queue (IFLIB_HAS_RXCQ), this is * the completion queue consumer index. Otherwise it's unused. */ qidx_t ifr_cq_cidx; uint16_t ifr_id; uint8_t ifr_nfl; uint8_t ifr_ntxqirq; uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; uint8_t ifr_fl_offset; struct lro_ctrl ifr_lc; struct grouptask ifr_task; struct callout ifr_watchdog; struct iflib_filter_info ifr_filter_info; iflib_dma_info_t ifr_ifdi; /* dynamically allocate if any drivers need a value substantially larger than this */ struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ifr_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); typedef struct if_rxsd { caddr_t *ifsd_cl; iflib_fl_t ifsd_fl; } *if_rxsd_t; /* multiple of word size */ #ifdef __LP64__ #define PKT_INFO_SIZE 6 #define RXD_INFO_SIZE 5 #define PKT_TYPE uint64_t #else #define PKT_INFO_SIZE 11 #define RXD_INFO_SIZE 8 #define PKT_TYPE uint32_t #endif #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3) #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4) typedef struct if_pkt_info_pad { PKT_TYPE pkt_val[PKT_INFO_SIZE]; } *if_pkt_info_pad_t; typedef struct if_rxd_info_pad { PKT_TYPE rxd_val[RXD_INFO_SIZE]; } *if_rxd_info_pad_t; CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info)); CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info)); static inline void pkt_info_zero(if_pkt_info_t pi) { if_pkt_info_pad_t pi_pad; pi_pad = (if_pkt_info_pad_t)pi; pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; #ifndef __LP64__ pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; #endif } static device_method_t iflib_pseudo_methods[] = { DEVMETHOD(device_attach, noop_attach), DEVMETHOD(device_detach, iflib_pseudo_detach), DEVMETHOD_END }; driver_t iflib_pseudodriver = { "iflib_pseudo", iflib_pseudo_methods, sizeof(struct iflib_ctx), }; static inline void rxd_info_zero(if_rxd_info_t ri) { if_rxd_info_pad_t ri_pad; int i; ri_pad = (if_rxd_info_pad_t)ri; for (i = 0; i < RXD_LOOP_BOUND; i += 4) { ri_pad->rxd_val[i] = 0; ri_pad->rxd_val[i+1] = 0; ri_pad->rxd_val[i+2] = 0; ri_pad->rxd_val[i+3] = 0; } #ifdef __LP64__ ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0; #endif } /* * Only allow a single packet to take up most 1/nth of the tx ring */ #define MAX_SINGLE_PACKET_FRACTION 12 #define IF_BAD_DMA (bus_addr_t)-1 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) #define CTX_LOCK_INIT(_sc) sx_init(&(_sc)->ifc_ctx_sx, "iflib ctx lock") #define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_ctx_sx) #define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_ctx_sx) #define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_ctx_sx) #define STATE_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_state_mtx, _name, "iflib state lock", MTX_DEF) #define STATE_LOCK(ctx) mtx_lock(&(ctx)->ifc_state_mtx) #define STATE_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_state_mtx) #define STATE_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_state_mtx) #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) void iflib_set_detach(if_ctx_t ctx) { STATE_LOCK(ctx); ctx->ifc_flags |= IFC_IN_DETACH; STATE_UNLOCK(ctx); } /* Our boot-time initialization hook */ static int iflib_module_event_handler(module_t, int, void *); static moduledata_t iflib_moduledata = { "iflib", iflib_module_event_handler, NULL }; DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(iflib, 1); MODULE_DEPEND(iflib, pci, 1, 1, 1); MODULE_DEPEND(iflib, ether, 1, 1, 1); TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); TASKQGROUP_DEFINE(if_config_tqg, 1, 1); #ifndef IFLIB_DEBUG_COUNTERS #ifdef INVARIANTS #define IFLIB_DEBUG_COUNTERS 1 #else #define IFLIB_DEBUG_COUNTERS 0 #endif /* !INVARIANTS */ #endif static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "iflib driver parameters"); /* * XXX need to ensure that this can't accidentally cause the head to be moved backwards */ static int iflib_min_tx_latency = 0; SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput"); static int iflib_no_tx_batch = 0; SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW, &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput"); static int iflib_timer_default = 1000; SYSCTL_INT(_net_iflib, OID_AUTO, timer_default, CTLFLAG_RW, &iflib_timer_default, 0, "number of ticks between iflib_timer calls"); #if IFLIB_DEBUG_COUNTERS static int iflib_tx_seen; static int iflib_tx_sent; static int iflib_tx_encap; static int iflib_rx_allocs; static int iflib_fl_refills; static int iflib_fl_refills_large; static int iflib_tx_frees; SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, &iflib_tx_seen, 0, "# TX mbufs seen"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, &iflib_tx_sent, 0, "# TX mbufs sent"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, &iflib_tx_encap, 0, "# TX mbufs encapped"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, &iflib_tx_frees, 0, "# TX frees"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, &iflib_rx_allocs, 0, "# RX allocations"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, &iflib_fl_refills, 0, "# refills"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, &iflib_fl_refills_large, 0, "# large refills"); static int iflib_txq_drain_flushing; static int iflib_txq_drain_oactive; static int iflib_txq_drain_notready; SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, &iflib_txq_drain_flushing, 0, "# drain flushes"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, &iflib_txq_drain_oactive, 0, "# drain oactives"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, &iflib_txq_drain_notready, 0, "# drain notready"); static int iflib_encap_load_mbuf_fail; static int iflib_encap_pad_mbuf_fail; static int iflib_encap_txq_avail_fail; static int iflib_encap_txd_encap_fail; SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD, &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); static int iflib_task_fn_rxs; static int iflib_rx_intr_enables; static int iflib_fast_intrs; static int iflib_rx_unavail; static int iflib_rx_ctx_inactive; static int iflib_rx_if_input; static int iflib_rxd_flush; static int iflib_verbose_debug; SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, &iflib_rx_intr_enables, 0, "# RX intr enables"); SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, &iflib_fast_intrs, 0, "# fast_intr calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, &iflib_rx_unavail, 0, "# times rxeof called with no available data"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, &iflib_rx_if_input, 0, "# times rxeof called if_input"); SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, &iflib_rxd_flush, 0, "# times rxd_flush called"); SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, &iflib_verbose_debug, 0, "enable verbose debugging"); #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) static void iflib_debug_reset(void) { iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs = iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees = iflib_txq_drain_flushing = iflib_txq_drain_oactive = iflib_txq_drain_notready = iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail = iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail = iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs = iflib_rx_unavail = iflib_rx_ctx_inactive = iflib_rx_if_input = iflib_rxd_flush = 0; } #else #define DBG_COUNTER_INC(name) static void iflib_debug_reset(void) {} #endif #define IFLIB_DEBUG 0 static void iflib_tx_structures_free(if_ctx_t ctx); static void iflib_rx_structures_free(if_ctx_t ctx); static int iflib_queues_alloc(if_ctx_t ctx); static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget); static int iflib_qset_structures_setup(if_ctx_t ctx); static int iflib_msix_init(if_ctx_t ctx); static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, const char *str); static void iflib_txq_check_drain(iflib_txq_t txq, int budget); static uint32_t iflib_txq_can_drain(struct ifmp_ring *); #ifdef ALTQ static void iflib_altq_if_start(if_t ifp); static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m); #endif static int iflib_register(if_ctx_t); static void iflib_deregister(if_ctx_t); static void iflib_unregister_vlan_handlers(if_ctx_t ctx); static uint16_t iflib_get_mbuf_size_for(unsigned int size); static void iflib_init_locked(if_ctx_t ctx); static void iflib_add_device_sysctl_pre(if_ctx_t ctx); static void iflib_add_device_sysctl_post(if_ctx_t ctx); static void iflib_ifmp_purge(iflib_txq_t txq); static void _iflib_pre_assert(if_softc_ctx_t scctx); static void iflib_if_init_locked(if_ctx_t ctx); static void iflib_free_intr_mem(if_ctx_t ctx); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m); #endif static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets = SLIST_HEAD_INITIALIZER(cpu_offsets); struct cpu_offset { SLIST_ENTRY(cpu_offset) entries; cpuset_t set; unsigned int refcount; uint16_t next_cpuid; }; static struct mtx cpu_offset_mtx; MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock", MTX_DEF); DEBUGNET_DEFINE(iflib); static int iflib_num_rx_descs(if_ctx_t ctx) { if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; uint16_t first_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; return scctx->isc_nrxd[first_rxq]; } static int iflib_num_tx_descs(if_ctx_t ctx) { if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; uint16_t first_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; return scctx->isc_ntxd[first_txq]; } #ifdef DEV_NETMAP #include #include #include MODULE_DEPEND(iflib, netmap, 1, 1, 1); static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, bool init); static void iflib_netmap_timer(void *arg); /* * device-specific sysctl variables: * * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. * During regular operations the CRC is stripped, but on some * hardware reception of frames not multiple of 64 is slower, * so using crcstrip=0 helps in benchmarks. * * iflib_rx_miss, iflib_rx_miss_bufs: * count packets that might be missed due to lost interrupts. */ SYSCTL_DECL(_dev_netmap); /* * The xl driver by default strips CRCs and we do not override it. */ int iflib_crcstrip = 1; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on RX frames"); int iflib_rx_miss, iflib_rx_miss_bufs; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed RX intr"); SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed RX intr bufs"); /* * Register/unregister. We are already under netmap lock. * Only called on the first register or the last unregister. */ static int iflib_netmap_register(struct netmap_adapter *na, int onoff) { if_t ifp = na->ifp; - if_ctx_t ctx = ifp->if_softc; + if_ctx_t ctx = if_getsoftc(ifp); int status; CTX_LOCK(ctx); if (!CTX_IS_VF(ctx)) IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); iflib_stop(ctx); /* * Enable (or disable) netmap flags, and intercept (or restore) * ifp->if_transmit. This is done once the device has been stopped * to prevent race conditions. Also, this must be done after * calling netmap_disable_all_rings() and before calling * netmap_enable_all_rings(), so that these two functions see the * updated state of the NAF_NETMAP_ON bit. */ if (onoff) { nm_set_native_flags(na); } else { nm_clear_native_flags(na); } iflib_init_locked(ctx); IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ? - status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1; + status = if_getdrvflags(ifp) & IFF_DRV_RUNNING ? 0 : 1; if (status) nm_clear_native_flags(na); CTX_UNLOCK(ctx); return (status); } static int iflib_netmap_config(struct netmap_adapter *na, struct nm_config_info *info) { if_t ifp = na->ifp; - if_ctx_t ctx = ifp->if_softc; + if_ctx_t ctx = if_getsoftc(ifp); iflib_rxq_t rxq = &ctx->ifc_rxqs[0]; iflib_fl_t fl = &rxq->ifr_fl[0]; info->num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; info->num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; info->num_tx_descs = iflib_num_tx_descs(ctx); info->num_rx_descs = iflib_num_rx_descs(ctx); info->rx_buf_maxsize = fl->ifl_buf_size; nm_prinf("txr %u rxr %u txd %u rxd %u rbufsz %u", info->num_tx_rings, info->num_rx_rings, info->num_tx_descs, info->num_rx_descs, info->rx_buf_maxsize); return 0; } static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, bool init) { struct netmap_adapter *na = kring->na; u_int const lim = kring->nkr_num_slots - 1; struct netmap_ring *ring = kring->ring; bus_dmamap_t *map; struct if_rxd_update iru; if_ctx_t ctx = rxq->ifr_ctx; iflib_fl_t fl = &rxq->ifr_fl[0]; u_int nic_i_first, nic_i; u_int nm_i; int i, n; #if IFLIB_DEBUG_COUNTERS int rf_count = 0; #endif /* * This function is used both at initialization and in rxsync. * At initialization we need to prepare (with isc_rxd_refill()) * all the netmap buffers currently owned by the kernel, in * such a way to keep fl->ifl_pidx and kring->nr_hwcur in sync * (except for kring->nkr_hwofs). These may be less than * kring->nkr_num_slots if netmap_reset() was called while * an application using the kring that still owned some * buffers. * At rxsync time, both indexes point to the next buffer to be * refilled. * In any case we publish (with isc_rxd_flush()) up to * (fl->ifl_pidx - 1) % N (included), to avoid the NIC tail/prod * pointer to overrun the head/cons pointer, although this is * not necessary for some NICs (e.g. vmx). */ if (__predict_false(init)) { n = kring->nkr_num_slots - nm_kr_rxspace(kring); } else { n = kring->rhead - kring->nr_hwcur; if (n == 0) return (0); /* Nothing to do. */ if (n < 0) n += kring->nkr_num_slots; } iru_init(&iru, rxq, 0 /* flid */); map = fl->ifl_sds.ifsd_map; nic_i = fl->ifl_pidx; nm_i = netmap_idx_n2k(kring, nic_i); if (__predict_false(init)) { /* * On init/reset, nic_i must be 0, and we must * start to refill from hwtail (see netmap_reset()). */ MPASS(nic_i == 0); MPASS(nm_i == kring->nr_hwtail); } else MPASS(nm_i == kring->nr_hwcur); DBG_COUNTER_INC(fl_refills); while (n > 0) { #if IFLIB_DEBUG_COUNTERS if (++rf_count == 9) DBG_COUNTER_INC(fl_refills_large); #endif nic_i_first = nic_i; for (i = 0; n > 0 && i < IFLIB_MAX_RX_REFRESH; n--, i++) { struct netmap_slot *slot = &ring->slot[nm_i]; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); MPASS(i < IFLIB_MAX_RX_REFRESH); if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ return netmap_ring_reinit(kring); fl->ifl_bus_addrs[i] = paddr + nm_get_offset(kring, slot); fl->ifl_rxd_idxs[i] = nic_i; if (__predict_false(init)) { netmap_load_map(na, fl->ifl_buf_tag, map[nic_i], addr); } else if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, fl->ifl_buf_tag, map[nic_i], addr); } bus_dmamap_sync(fl->ifl_buf_tag, map[nic_i], BUS_DMASYNC_PREREAD); slot->flags &= ~NS_BUF_CHANGED; nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } iru.iru_pidx = nic_i_first; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); } fl->ifl_pidx = nic_i; /* * At the end of the loop we must have refilled everything * we could possibly refill. */ MPASS(nm_i == kring->rhead); kring->nr_hwcur = nm_i; bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nm_prev(nic_i, lim)); DBG_COUNTER_INC(rxd_flush); return (0); } #define NETMAP_TX_TIMER_US 90 /* * Reconcile kernel and user view of the transmit ring. * * All information is in the kring. * Userspace wants to send packets up to the one before kring->rhead, * kernel knows kring->nr_hwcur is the first unsent packet. * * Here we push packets out (as many as possible), and possibly * reclaim buffers from previously completed transmission. * * The caller (netmap) guarantees that there is only one instance * running at any time. Any interference with other driver * methods should be handled by the individual drivers. */ static int iflib_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; if_t ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap kring */ u_int nic_i; /* index into the NIC ring */ u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; struct if_pkt_info pi; int tx_pkts = 0, tx_bytes = 0; /* * interrupts on every tx packet are expensive so request * them every half ring, or where NS_REPORT is set */ u_int report_frequency = kring->nkr_num_slots >> 1; /* device-specific */ - if_ctx_t ctx = ifp->if_softc; + if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: process new packets to send. * nm_i is the current index in the netmap kring, * nic_i is the corresponding index in the NIC ring. * * If we have packets to send (nm_i != head) * iterate over the netmap ring, fetch length and update * the corresponding slot in the NIC ring. Some drivers also * need to update the buffer's physical address in the NIC slot * even NS_BUF_CHANGED is not set (PNMB computes the addresses). * * The netmap_reload_map() calls is especially expensive, * even when (as in this case) the tag is 0, so do only * when the buffer has actually changed. * * If possible do not set the report/intr bit on all slots, * but only a few times per ring or when NS_REPORT is set. * * Finally, on 10G and faster drivers, it might be useful * to prefetch the next slot and txr entry. */ nm_i = kring->nr_hwcur; if (nm_i != head) { /* we have new packets to send */ uint32_t pkt_len = 0, seg_idx = 0; int nic_i_start = -1, flags = 0; pkt_info_zero(&pi); pi.ipi_segs = txq->ift_segs; pi.ipi_qsidx = kring->ring_id; nic_i = netmap_idx_k2n(kring, nm_i); __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); while (nm_i != head) { struct netmap_slot *slot = &ring->slot[nm_i]; uint64_t offset = nm_get_offset(kring, slot); u_int len = slot->len; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); flags |= (slot->flags & NS_REPORT || nic_i == 0 || nic_i == report_frequency) ? IPI_TX_INTR : 0; /* * If this is the first packet fragment, save the * index of the first NIC slot for later. */ if (nic_i_start < 0) nic_i_start = nic_i; pi.ipi_segs[seg_idx].ds_addr = paddr + offset; pi.ipi_segs[seg_idx].ds_len = len; if (len) { pkt_len += len; seg_idx++; } if (!(slot->flags & NS_MOREFRAG)) { pi.ipi_len = pkt_len; pi.ipi_nsegs = seg_idx; pi.ipi_pidx = nic_i_start; pi.ipi_ndescs = 0; pi.ipi_flags = flags; /* Prepare the NIC TX ring. */ ctx->isc_txd_encap(ctx->ifc_softc, &pi); DBG_COUNTER_INC(tx_encap); /* Update transmit counters */ tx_bytes += pi.ipi_len; tx_pkts++; /* Reinit per-packet info for the next one. */ flags = seg_idx = pkt_len = 0; nic_i_start = -1; } /* prefetch for next round */ __builtin_prefetch(&ring->slot[nm_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); NM_CHECK_ADDR_LEN_OFF(na, len, offset); if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, txq->ift_buf_tag, txq->ift_sds.ifsd_map[nic_i], addr); } /* make sure changes to the buffer are synced */ bus_dmamap_sync(txq->ift_buf_tag, txq->ift_sds.ifsd_map[nic_i], BUS_DMASYNC_PREWRITE); slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED | NS_MOREFRAG); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } kring->nr_hwcur = nm_i; /* synchronize the NIC ring */ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* (re)start the tx unit up to slot nic_i (excluded) */ ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); } /* * Second part: reclaim buffers for completed transmissions. * * If there are unclaimed buffers, attempt to reclaim them. * If we don't manage to reclaim them all, and TX IRQs are not in use, * trigger a per-tx-queue timer to try again later. */ if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { if (iflib_tx_credits_update(ctx, txq)) { /* some tx completed, increment avail */ nic_i = txq->ift_cidx_processed; kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } } if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { callout_reset_sbt_on(&txq->ift_netmap_timer, NETMAP_TX_TIMER_US * SBT_1US, SBT_1US, iflib_netmap_timer, txq, txq->ift_netmap_timer.c_cpu, 0); } if_inc_counter(ifp, IFCOUNTER_OBYTES, tx_bytes); if_inc_counter(ifp, IFCOUNTER_OPACKETS, tx_pkts); return (0); } /* * Reconcile kernel and user view of the receive ring. * Same as for the txsync, this routine must be efficient. * The caller guarantees a single invocations, but races against * the rest of the driver should be handled here. * * On call, kring->rhead is the first packet that userspace wants * to keep, and kring->rcur is the wakeup point. * The kernel has previously reported packets up to kring->rtail. * * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective * of whether or not we received an interrupt. */ static int iflib_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; if_t ifp = na->ifp; uint32_t nm_i; /* index into the netmap ring */ uint32_t nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; int i = 0, rx_bytes = 0, rx_pkts = 0; - if_ctx_t ctx = ifp->if_softc; + if_ctx_t ctx = if_getsoftc(ifp); if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; iflib_fl_t fl = &rxq->ifr_fl[0]; struct if_rxd_info ri; qidx_t *cidxp; /* * netmap only uses free list 0, to avoid out of order consumption * of receive buffers */ bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: import newly received packets. * * nm_i is the index of the next free slot in the netmap ring, * nic_i is the index of the next received packet in the NIC ring * (or in the free list 0 if IFLIB_HAS_RXCQ is set), and they may * differ in case if_init() has been called while * in netmap mode. For the receive ring we have * * nic_i = fl->ifl_cidx; * nm_i = kring->nr_hwtail (previous) * and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size * * fl->ifl_cidx is set to 0 on a ring reinit */ if (netmap_no_pendintr || force_update) { uint32_t hwtail_lim = nm_prev(kring->nr_hwcur, lim); bool have_rxcq = sctx->isc_flags & IFLIB_HAS_RXCQ; int crclen = iflib_crcstrip ? 0 : 4; int error, avail; /* * For the free list consumer index, we use the same * logic as in iflib_rxeof(). */ if (have_rxcq) cidxp = &rxq->ifr_cq_cidx; else cidxp = &fl->ifl_cidx; avail = ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, *cidxp, USHRT_MAX); nic_i = fl->ifl_cidx; nm_i = netmap_idx_n2k(kring, nic_i); MPASS(nm_i == kring->nr_hwtail); for (n = 0; avail > 0 && nm_i != hwtail_lim; n++, avail--) { rxd_info_zero(&ri); ri.iri_frags = rxq->ifr_frags; ri.iri_qsidx = kring->ring_id; ri.iri_ifp = ctx->ifc_ifp; ri.iri_cidx = *cidxp; error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); for (i = 0; i < ri.iri_nfrags; i++) { if (error) { ring->slot[nm_i].len = 0; ring->slot[nm_i].flags = 0; } else { ring->slot[nm_i].len = ri.iri_frags[i].irf_len; if (i == (ri.iri_nfrags - 1)) { ring->slot[nm_i].len -= crclen; ring->slot[nm_i].flags = 0; /* Update receive counters */ rx_bytes += ri.iri_len; rx_pkts++; } else ring->slot[nm_i].flags = NS_MOREFRAG; } bus_dmamap_sync(fl->ifl_buf_tag, fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); fl->ifl_cidx = nic_i = nm_next(nic_i, lim); } if (have_rxcq) { *cidxp = ri.iri_cidx; while (*cidxp >= scctx->isc_nrxd[0]) *cidxp -= scctx->isc_nrxd[0]; } } if (n) { /* update the state variables */ if (netmap_no_pendintr && !force_update) { /* diagnostics */ iflib_rx_miss ++; iflib_rx_miss_bufs += n; } kring->nr_hwtail = nm_i; } kring->nr_kflags &= ~NKR_PENDINTR; } /* * Second part: skip past packets that userspace has released. * (kring->nr_hwcur to head excluded), * and make the buffers available for reception. * As usual nm_i is the index in the netmap ring, * nic_i is the index in the NIC ring, and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size */ netmap_fl_refill(rxq, kring, false); if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); return (0); } static void iflib_netmap_intr(struct netmap_adapter *na, int onoff) { - if_ctx_t ctx = na->ifp->if_softc; + if_ctx_t ctx = if_getsoftc(na->ifp); CTX_LOCK(ctx); if (onoff) { IFDI_INTR_ENABLE(ctx); } else { IFDI_INTR_DISABLE(ctx); } CTX_UNLOCK(ctx); } static int iflib_netmap_attach(if_ctx_t ctx) { struct netmap_adapter na; bzero(&na, sizeof(na)); na.ifp = ctx->ifc_ifp; na.na_flags = NAF_BDG_MAYSLEEP | NAF_MOREFRAG | NAF_OFFSETS; MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); na.num_tx_desc = iflib_num_tx_descs(ctx); na.num_rx_desc = iflib_num_rx_descs(ctx); na.nm_txsync = iflib_netmap_txsync; na.nm_rxsync = iflib_netmap_rxsync; na.nm_register = iflib_netmap_register; na.nm_intr = iflib_netmap_intr; na.nm_config = iflib_netmap_config; na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; return (netmap_attach(&na)); } static int iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_slot *slot; slot = netmap_reset(na, NR_TX, txq->ift_id, 0); if (slot == NULL) return (0); for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { /* * In netmap mode, set the map for the packet buffer. * NOTE: Some drivers (not this one) also need to set * the physical buffer address in the NIC ring. * netmap_idx_n2k() maps a nic index, i, into the corresponding * netmap slot index, si */ int si = netmap_idx_n2k(na->tx_rings[txq->ift_id], i); netmap_load_map(na, txq->ift_buf_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si)); } return (1); } static int iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_kring *kring; struct netmap_slot *slot; slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); if (slot == NULL) return (0); kring = na->rx_rings[rxq->ifr_id]; netmap_fl_refill(rxq, kring, true); return (1); } static void iflib_netmap_timer(void *arg) { iflib_txq_t txq = arg; if_ctx_t ctx = txq->ift_ctx; /* * Wake up the netmap application, to give it a chance to * call txsync and reclaim more completed TX buffers. */ netmap_tx_irq(ctx->ifc_ifp, txq->ift_id); } #define iflib_netmap_detach(ifp) netmap_detach(ifp) #else #define iflib_netmap_txq_init(ctx, txq) (0) #define iflib_netmap_rxq_init(ctx, rxq) (0) #define iflib_netmap_detach(ifp) #define netmap_enable_all_rings(ifp) #define netmap_disable_all_rings(ifp) #define iflib_netmap_attach(ctx) (0) #define netmap_rx_irq(ifp, qid, budget) (0) #endif #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } static __inline void prefetch2cachelines(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); #if (CACHE_LINE_SIZE < 128) __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); #endif } #else static __inline void prefetch(void *x) { } static __inline void prefetch2cachelines(void *x) { } #endif static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) { iflib_fl_t fl; fl = &rxq->ifr_fl[flid]; iru->iru_paddrs = fl->ifl_bus_addrs; iru->iru_idxs = fl->ifl_rxd_idxs; iru->iru_qsidx = rxq->ifr_id; iru->iru_buf_size = fl->ifl_buf_size; iru->iru_flidx = fl->ifl_id; } static void _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { if (err) return; *(bus_addr_t *) arg = segs[0].ds_addr; } #define DMA_WIDTH_TO_BUS_LOWADDR(width) \ (((width) == 0) || (width) == flsll(BUS_SPACE_MAXADDR) ? \ BUS_SPACE_MAXADDR : (1ULL << (width)) - 1ULL) int iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags) { int err; device_t dev = ctx->ifc_dev; bus_addr_t lowaddr; lowaddr = DMA_WIDTH_TO_BUS_LOWADDR(ctx->ifc_softc_ctx.isc_dma_width); err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ align, 0, /* alignment, bounds */ lowaddr, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->idi_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed: %d\n", __func__, err); goto fail_0; } err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); if (err) { device_printf(dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, err); goto fail_1; } dma->idi_paddr = IF_BAD_DMA; err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); if (err || dma->idi_paddr == IF_BAD_DMA) { device_printf(dev, "%s: bus_dmamap_load failed: %d\n", __func__, err); goto fail_2; } dma->idi_size = size; return (0); fail_2: bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); fail_1: bus_dma_tag_destroy(dma->idi_tag); fail_0: dma->idi_tag = NULL; return (err); } int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) { if_shared_ctx_t sctx = ctx->ifc_sctx; KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); return (iflib_dma_alloc_align(ctx, size, sctx->isc_q_align, dma, mapflags)); } int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) { int i, err; iflib_dma_info_t *dmaiter; dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) { if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) break; } if (err) iflib_dma_free_multi(dmalist, i); return (err); } void iflib_dma_free(iflib_dma_info_t dma) { if (dma->idi_tag == NULL) return; if (dma->idi_paddr != IF_BAD_DMA) { bus_dmamap_sync(dma->idi_tag, dma->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->idi_tag, dma->idi_map); dma->idi_paddr = IF_BAD_DMA; } if (dma->idi_vaddr != NULL) { bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); dma->idi_vaddr = NULL; } bus_dma_tag_destroy(dma->idi_tag); dma->idi_tag = NULL; } void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) { int i; iflib_dma_info_t *dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) iflib_dma_free(*dmaiter); } static int iflib_fast_intr(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; int result; DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL) { result = info->ifi_filter(info->ifi_filter_arg); if ((result & FILTER_SCHEDULE_THREAD) == 0) return (result); } GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int iflib_fast_intr_rxtx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; if_ctx_t ctx; iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; iflib_txq_t txq; void *sc; int i, cidx, result; qidx_t txqid; bool intr_enable, intr_legacy; DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL) { result = info->ifi_filter(info->ifi_filter_arg); if ((result & FILTER_SCHEDULE_THREAD) == 0) return (result); } ctx = rxq->ifr_ctx; sc = ctx->ifc_softc; intr_enable = false; intr_legacy = !!(ctx->ifc_flags & IFC_LEGACY); MPASS(rxq->ifr_ntxqirq); for (i = 0; i < rxq->ifr_ntxqirq; i++) { txqid = rxq->ifr_txqid[i]; txq = &ctx->ifc_txqs[txqid]; bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); if (!ctx->isc_txd_credits_update(sc, txqid, false)) { if (intr_legacy) intr_enable = true; else IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); continue; } GROUPTASK_ENQUEUE(&txq->ift_task); } if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) cidx = rxq->ifr_cq_cidx; else cidx = rxq->ifr_fl[0].ifl_cidx; if (iflib_rxd_avail(ctx, rxq, cidx, 1)) GROUPTASK_ENQUEUE(gtask); else { if (intr_legacy) intr_enable = true; else IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); DBG_COUNTER_INC(rx_intr_enables); } if (intr_enable) IFDI_INTR_ENABLE(ctx); return (FILTER_HANDLED); } static int iflib_fast_intr_ctx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; int result; DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL) { result = info->ifi_filter(info->ifi_filter_arg); if ((result & FILTER_SCHEDULE_THREAD) == 0) return (result); } GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, driver_intr_t handler, void *arg, const char *name) { struct resource *res; void *tag = NULL; device_t dev = ctx->ifc_dev; int flags, i, rc; flags = RF_ACTIVE; if (ctx->ifc_flags & IFC_LEGACY) flags |= RF_SHAREABLE; MPASS(rid < 512); i = rid; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i, flags); if (res == NULL) { device_printf(dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } irq->ii_res = res; KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, filter, handler, arg, &tag); if (rc != 0) { device_printf(dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name ? name : "unknown", rc); return (rc); } else if (name) bus_describe_intr(dev, res, tag, "%s", name); irq->ii_tag = tag; return (0); } /********************************************************************* * * Allocate DMA resources for TX buffers as well as memory for the TX * mbuf map. TX DMA maps (non-TSO/TSO) and TX mbuf map are kept in a * iflib_sw_tx_desc_array structure, storing all the information that * is needed to transmit a packet on the wire. This is called only * once at attach, setup is done every reset. * **********************************************************************/ static int iflib_txsd_alloc(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; bus_size_t tsomaxsize; bus_addr_t lowaddr; int err, nsegments, ntsosegments; bool tso; nsegments = scctx->isc_tx_nsegments; ntsosegments = scctx->isc_tx_tso_segments_max; tsomaxsize = scctx->isc_tx_tso_size_max; if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_VLAN_MTU) tsomaxsize += sizeof(struct ether_vlan_header); MPASS(scctx->isc_ntxd[0] > 0); MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); MPASS(nsegments > 0); if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) { MPASS(ntsosegments > 0); MPASS(sctx->isc_tso_maxsize >= tsomaxsize); } lowaddr = DMA_WIDTH_TO_BUS_LOWADDR(scctx->isc_dma_width); /* * Set up DMA tags for TX buffers. */ if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ lowaddr, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_tx_maxsize, /* maxsize */ nsegments, /* nsegments */ sctx->isc_tx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_buf_tag))) { device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); goto fail; } tso = (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) != 0; if (tso && (err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ lowaddr, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ tsomaxsize, /* maxsize */ ntsosegments, /* nsegments */ sctx->isc_tso_maxsegsize,/* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_tso_buf_tag))) { device_printf(dev, "Unable to allocate TSO TX DMA tag: %d\n", err); goto fail; } /* Allocate memory for the TX mbuf map. */ if (!(txq->ift_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX mbuf map memory\n"); err = ENOMEM; goto fail; } /* * Create the DMA maps for TX buffers. */ if ((txq->ift_sds.ifsd_map = (bus_dmamap_t *)malloc( sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { device_printf(dev, "Unable to allocate TX buffer DMA map memory\n"); err = ENOMEM; goto fail; } if (tso && (txq->ift_sds.ifsd_tso_map = (bus_dmamap_t *)malloc( sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { device_printf(dev, "Unable to allocate TSO TX buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { err = bus_dmamap_create(txq->ift_buf_tag, 0, &txq->ift_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } if (!tso) continue; err = bus_dmamap_create(txq->ift_tso_buf_tag, 0, &txq->ift_sds.ifsd_tso_map[i]); if (err != 0) { device_printf(dev, "Unable to create TSO TX DMA map\n"); goto fail; } } return (0); fail: /* We free all, it handles case where we are in the middle */ iflib_tx_structures_free(ctx); return (err); } static void iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) { bus_dmamap_t map; if (txq->ift_sds.ifsd_map != NULL) { map = txq->ift_sds.ifsd_map[i]; bus_dmamap_sync(txq->ift_buf_tag, map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_buf_tag, map); bus_dmamap_destroy(txq->ift_buf_tag, map); txq->ift_sds.ifsd_map[i] = NULL; } if (txq->ift_sds.ifsd_tso_map != NULL) { map = txq->ift_sds.ifsd_tso_map[i]; bus_dmamap_sync(txq->ift_tso_buf_tag, map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_tso_buf_tag, map); bus_dmamap_destroy(txq->ift_tso_buf_tag, map); txq->ift_sds.ifsd_tso_map[i] = NULL; } } static void iflib_txq_destroy(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; for (int i = 0; i < txq->ift_size; i++) iflib_txsd_destroy(ctx, txq, i); if (txq->ift_br != NULL) { ifmp_ring_free(txq->ift_br); txq->ift_br = NULL; } mtx_destroy(&txq->ift_mtx); if (txq->ift_sds.ifsd_map != NULL) { free(txq->ift_sds.ifsd_map, M_IFLIB); txq->ift_sds.ifsd_map = NULL; } if (txq->ift_sds.ifsd_tso_map != NULL) { free(txq->ift_sds.ifsd_tso_map, M_IFLIB); txq->ift_sds.ifsd_tso_map = NULL; } if (txq->ift_sds.ifsd_m != NULL) { free(txq->ift_sds.ifsd_m, M_IFLIB); txq->ift_sds.ifsd_m = NULL; } if (txq->ift_buf_tag != NULL) { bus_dma_tag_destroy(txq->ift_buf_tag); txq->ift_buf_tag = NULL; } if (txq->ift_tso_buf_tag != NULL) { bus_dma_tag_destroy(txq->ift_tso_buf_tag); txq->ift_tso_buf_tag = NULL; } if (txq->ift_ifdi != NULL) { free(txq->ift_ifdi, M_IFLIB); } } static void iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) { struct mbuf **mp; mp = &txq->ift_sds.ifsd_m[i]; if (*mp == NULL) return; if (txq->ift_sds.ifsd_map != NULL) { bus_dmamap_sync(txq->ift_buf_tag, txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[i]); } if (txq->ift_sds.ifsd_tso_map != NULL) { bus_dmamap_sync(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[i]); } m_freem(*mp); DBG_COUNTER_INC(tx_frees); *mp = NULL; } static int iflib_txq_setup(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; iflib_dma_info_t di; int i; /* Set number of descriptors available */ txq->ift_qstatus = IFLIB_QUEUE_IDLE; /* XXX make configurable */ txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ; /* Reset indices */ txq->ift_cidx_processed = 0; txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) bzero((void *)di->idi_vaddr, di->idi_size); IFDI_TXQ_SETUP(ctx, txq->ift_id); for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Allocate DMA resources for RX buffers as well as memory for the RX * mbuf map, direct RX cluster pointer map and RX cluster bus address * map. RX DMA map, RX mbuf map, direct RX cluster pointer map and * RX cluster map are kept in a iflib_sw_rx_desc_array structure. * Since we use use one entry in iflib_sw_rx_desc_array per received * packet, the maximum number of entries we'll need is equal to the * number of hardware receive descriptors that we've allocated. * **********************************************************************/ static int iflib_rxsd_alloc(iflib_rxq_t rxq) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; iflib_fl_t fl; bus_addr_t lowaddr; int err; MPASS(scctx->isc_nrxd[0] > 0); MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); lowaddr = DMA_WIDTH_TO_BUS_LOWADDR(scctx->isc_dma_width); fl = rxq->ifr_fl; for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ /* Set up DMA tag for RX buffers. */ err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ lowaddr, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_rx_maxsize, /* maxsize */ sctx->isc_rx_nsegments, /* nsegments */ sctx->isc_rx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &fl->ifl_buf_tag); if (err) { device_printf(dev, "Unable to allocate RX DMA tag: %d\n", err); goto fail; } /* Allocate memory for the RX mbuf map. */ if (!(fl->ifl_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX mbuf map memory\n"); err = ENOMEM; goto fail; } /* Allocate memory for the direct RX cluster pointer map. */ if (!(fl->ifl_sds.ifsd_cl = (caddr_t *) malloc(sizeof(caddr_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX cluster map memory\n"); err = ENOMEM; goto fail; } /* Allocate memory for the RX cluster bus address map. */ if (!(fl->ifl_sds.ifsd_ba = (bus_addr_t *) malloc(sizeof(bus_addr_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX bus address map memory\n"); err = ENOMEM; goto fail; } /* * Create the DMA maps for RX buffers. */ if (!(fl->ifl_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX buffer DMA map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) { err = bus_dmamap_create(fl->ifl_buf_tag, 0, &fl->ifl_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create RX buffer DMA map\n"); goto fail; } } } return (0); fail: iflib_rx_structures_free(ctx); return (err); } /* * Internal service routines */ struct rxq_refill_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; static void _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct rxq_refill_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } /** * iflib_fl_refill - refill an rxq free-buffer list * @ctx: the iflib context * @fl: the free list to refill * @count: the number of new buffers to allocate * * (Re)populate an rxq free-buffer list with up to @count new packet buffers. * The caller must assure that @count does not exceed the queue's capacity * minus one (since we always leave a descriptor unavailable). */ static uint8_t iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) { struct if_rxd_update iru; struct rxq_refill_cb_arg cb_arg; struct mbuf *m; caddr_t cl, *sd_cl; struct mbuf **sd_m; bus_dmamap_t *sd_map; bus_addr_t bus_addr, *sd_ba; int err, frag_idx, i, idx, n, pidx; qidx_t credits; MPASS(count <= fl->ifl_size - fl->ifl_credits - 1); sd_m = fl->ifl_sds.ifsd_m; sd_map = fl->ifl_sds.ifsd_map; sd_cl = fl->ifl_sds.ifsd_cl; sd_ba = fl->ifl_sds.ifsd_ba; pidx = fl->ifl_pidx; idx = pidx; frag_idx = fl->ifl_fragidx; credits = fl->ifl_credits; i = 0; n = count; MPASS(n > 0); MPASS(credits + n <= fl->ifl_size); if (pidx < fl->ifl_cidx) MPASS(pidx + n <= fl->ifl_cidx); if (pidx == fl->ifl_cidx && (credits < fl->ifl_size)) MPASS(fl->ifl_gen == 0); if (pidx > fl->ifl_cidx) MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); DBG_COUNTER_INC(fl_refills); if (n > 8) DBG_COUNTER_INC(fl_refills_large); iru_init(&iru, fl->ifl_rxq, fl->ifl_id); while (n-- > 0) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. * * If the cluster is still set then we know a minimum sized * packet was received */ bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx); if (frag_idx < 0) bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); MPASS(frag_idx >= 0); if ((cl = sd_cl[frag_idx]) == NULL) { cl = uma_zalloc(fl->ifl_zone, M_NOWAIT); if (__predict_false(cl == NULL)) break; cb_arg.error = 0; MPASS(sd_map != NULL); err = bus_dmamap_load(fl->ifl_buf_tag, sd_map[frag_idx], cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, BUS_DMA_NOWAIT); if (__predict_false(err != 0 || cb_arg.error)) { uma_zfree(fl->ifl_zone, cl); break; } sd_ba[frag_idx] = bus_addr = cb_arg.seg.ds_addr; sd_cl[frag_idx] = cl; #if MEMORY_LOGGING fl->ifl_cl_enqueued++; #endif } else { bus_addr = sd_ba[frag_idx]; } bus_dmamap_sync(fl->ifl_buf_tag, sd_map[frag_idx], BUS_DMASYNC_PREREAD); if (sd_m[frag_idx] == NULL) { m = m_gethdr_raw(M_NOWAIT, 0); if (__predict_false(m == NULL)) break; sd_m[frag_idx] = m; } bit_set(fl->ifl_rx_bitmap, frag_idx); #if MEMORY_LOGGING fl->ifl_m_enqueued++; #endif DBG_COUNTER_INC(rx_allocs); fl->ifl_rxd_idxs[i] = frag_idx; fl->ifl_bus_addrs[i] = bus_addr; credits++; i++; MPASS(credits <= fl->ifl_size); if (++idx == fl->ifl_size) { #ifdef INVARIANTS fl->ifl_gen = 1; #endif idx = 0; } if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); fl->ifl_pidx = idx; fl->ifl_credits = credits; pidx = idx; i = 0; } } if (n < count - 1) { if (i != 0) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); fl->ifl_pidx = idx; fl->ifl_credits = credits; } DBG_COUNTER_INC(rxd_flush); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, fl->ifl_pidx); if (__predict_true(bit_test(fl->ifl_rx_bitmap, frag_idx))) { fl->ifl_fragidx = frag_idx + 1; if (fl->ifl_fragidx == fl->ifl_size) fl->ifl_fragidx = 0; } else { fl->ifl_fragidx = frag_idx; } } return (n == -1 ? 0 : IFLIB_RXEOF_EMPTY); } static inline uint8_t iflib_fl_refill_all(if_ctx_t ctx, iflib_fl_t fl) { /* * We leave an unused descriptor to avoid pidx to catch up with cidx. * This is important as it confuses most NICs. For instance, * Intel NICs have (per receive ring) RDH and RDT registers, where * RDH points to the next receive descriptor to be used by the NIC, * and RDT for the next receive descriptor to be published by the * driver to the NIC (RDT - 1 is thus the last valid one). * The condition RDH == RDT means no descriptors are available to * the NIC, and thus it would be ambiguous if it also meant that * all the descriptors are available to the NIC. */ int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; #ifdef INVARIANTS int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; #endif MPASS(fl->ifl_credits <= fl->ifl_size); MPASS(reclaimable == delta); if (reclaimable > 0) return (iflib_fl_refill(ctx, fl, reclaimable)); return (0); } uint8_t iflib_in_detach(if_ctx_t ctx) { bool in_detach; STATE_LOCK(ctx); in_detach = !!(ctx->ifc_flags & IFC_IN_DETACH); STATE_UNLOCK(ctx); return (in_detach); } static void iflib_fl_bufs_free(iflib_fl_t fl) { iflib_dma_info_t idi = fl->ifl_ifdi; bus_dmamap_t sd_map; uint32_t i; for (i = 0; i < fl->ifl_size; i++) { struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i]; caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i]; if (*sd_cl != NULL) { sd_map = fl->ifl_sds.ifsd_map[i]; bus_dmamap_sync(fl->ifl_buf_tag, sd_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fl->ifl_buf_tag, sd_map); uma_zfree(fl->ifl_zone, *sd_cl); *sd_cl = NULL; if (*sd_m != NULL) { m_init(*sd_m, M_NOWAIT, MT_DATA, 0); m_free_raw(*sd_m); *sd_m = NULL; } } else { MPASS(*sd_m == NULL); } #if MEMORY_LOGGING fl->ifl_m_dequeued++; fl->ifl_cl_dequeued++; #endif } #ifdef INVARIANTS for (i = 0; i < fl->ifl_size; i++) { MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); MPASS(fl->ifl_sds.ifsd_m[i] == NULL); } #endif /* * Reset free list values */ fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; bzero(idi->idi_vaddr, idi->idi_size); } /********************************************************************* * * Initialize a free list and its buffers. * **********************************************************************/ static int iflib_fl_setup(iflib_fl_t fl) { iflib_rxq_t rxq = fl->ifl_rxq; if_ctx_t ctx = rxq->ifr_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int qidx; bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1); /* ** Free current RX buffer structs and their mbufs */ iflib_fl_bufs_free(fl); /* Now replenish the mbufs */ MPASS(fl->ifl_credits == 0); qidx = rxq->ifr_fl_offset + fl->ifl_id; if (scctx->isc_rxd_buf_size[qidx] != 0) fl->ifl_buf_size = scctx->isc_rxd_buf_size[qidx]; else fl->ifl_buf_size = ctx->ifc_rx_mbuf_sz; /* * ifl_buf_size may be a driver-supplied value, so pull it up * to the selected mbuf size. */ fl->ifl_buf_size = iflib_get_mbuf_size_for(fl->ifl_buf_size); if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; fl->ifl_cltype = m_gettype(fl->ifl_buf_size); fl->ifl_zone = m_getzone(fl->ifl_buf_size); /* * Avoid pre-allocating zillions of clusters to an idle card * potentially speeding up attach. In any case make sure * to leave a descriptor unavailable. See the comment in * iflib_fl_refill_all(). */ MPASS(fl->ifl_size > 0); (void)iflib_fl_refill(ctx, fl, min(128, fl->ifl_size - 1)); if (min(128, fl->ifl_size - 1) != fl->ifl_credits) return (ENOBUFS); /* * handle failure */ MPASS(rxq != NULL); MPASS(fl->ifl_ifdi != NULL); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void iflib_rx_sds_free(iflib_rxq_t rxq) { iflib_fl_t fl; int i, j; if (rxq->ifr_fl != NULL) { for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; if (fl->ifl_buf_tag != NULL) { if (fl->ifl_sds.ifsd_map != NULL) { for (j = 0; j < fl->ifl_size; j++) { bus_dmamap_sync( fl->ifl_buf_tag, fl->ifl_sds.ifsd_map[j], BUS_DMASYNC_POSTREAD); bus_dmamap_unload( fl->ifl_buf_tag, fl->ifl_sds.ifsd_map[j]); bus_dmamap_destroy( fl->ifl_buf_tag, fl->ifl_sds.ifsd_map[j]); } } bus_dma_tag_destroy(fl->ifl_buf_tag); fl->ifl_buf_tag = NULL; } free(fl->ifl_sds.ifsd_m, M_IFLIB); free(fl->ifl_sds.ifsd_cl, M_IFLIB); free(fl->ifl_sds.ifsd_ba, M_IFLIB); free(fl->ifl_sds.ifsd_map, M_IFLIB); free(fl->ifl_rx_bitmap, M_IFLIB); fl->ifl_sds.ifsd_m = NULL; fl->ifl_sds.ifsd_cl = NULL; fl->ifl_sds.ifsd_ba = NULL; fl->ifl_sds.ifsd_map = NULL; fl->ifl_rx_bitmap = NULL; } free(rxq->ifr_fl, M_IFLIB); rxq->ifr_fl = NULL; free(rxq->ifr_ifdi, M_IFLIB); rxq->ifr_ifdi = NULL; rxq->ifr_cq_cidx = 0; } } /* * Timer routine */ static void iflib_timer(void *arg) { iflib_txq_t txq = arg; if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; uint64_t this_tick = ticks; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ if (this_tick - txq->ift_last_timer_tick >= iflib_timer_default) { txq->ift_last_timer_tick = this_tick; IFDI_TIMER(ctx, txq->ift_id); if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && ((txq->ift_cleaned_prev == txq->ift_cleaned) || (sctx->isc_pause_frames == 0))) goto hung; if (txq->ift_qstatus != IFLIB_QUEUE_IDLE && ifmp_ring_is_stalled(txq->ift_br)) { KASSERT(ctx->ifc_link_state == LINK_STATE_UP, ("queue can't be marked as hung if interface is down")); txq->ift_qstatus = IFLIB_QUEUE_HUNG; } txq->ift_cleaned_prev = txq->ift_cleaned; } /* handle any laggards */ if (txq->ift_db_pending) GROUPTASK_ENQUEUE(&txq->ift_task); sctx->isc_pause_frames = 0; if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, txq, txq->ift_timer.c_cpu); return; hung: device_printf(ctx->ifc_dev, "Watchdog timeout (TX: %d desc avail: %d pidx: %d) -- resetting\n", txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); STATE_LOCK(ctx); if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); ctx->ifc_flags |= (IFC_DO_WATCHDOG|IFC_DO_RESET); iflib_admin_intr_deferred(ctx); STATE_UNLOCK(ctx); } static uint16_t iflib_get_mbuf_size_for(unsigned int size) { if (size <= MCLBYTES) return (MCLBYTES); else return (MJUMPAGESIZE); } static void iflib_calc_rx_mbuf_sz(if_ctx_t ctx) { if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; /* * XXX don't set the max_frame_size to larger * than the hardware can handle */ ctx->ifc_rx_mbuf_sz = iflib_get_mbuf_size_for(sctx->isc_max_frame_size); } uint32_t iflib_get_rx_mbuf_sz(if_ctx_t ctx) { return (ctx->ifc_rx_mbuf_sz); } static void iflib_init_locked(if_ctx_t ctx) { if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_t ifp = ctx->ifc_ifp; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); /* * See iflib_stop(). Useful in case iflib_init_locked() is * called without first calling iflib_stop(). */ netmap_disable_all_rings(ifp); tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, tx_ip_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) if_sethwassistbits(ifp, tx_ip6_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TSO4) if_sethwassistbits(ifp, CSUM_IP_TSO, 0); if (if_getcapenable(ifp) & IFCAP_TSO6) if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); #ifdef DEV_NETMAP callout_stop(&txq->ift_netmap_timer); #endif /* DEV_NETMAP */ CALLOUT_UNLOCK(txq); (void)iflib_netmap_txq_init(ctx, txq); } /* * Calculate a suitable Rx mbuf size prior to calling IFDI_INIT, so * that drivers can use the value when setting up the hardware receive * buffers. */ iflib_calc_rx_mbuf_sz(ctx); #ifdef INVARIANTS i = if_getdrvflags(ifp); #endif IFDI_INIT(ctx); MPASS(if_getdrvflags(ifp) == i); for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { if (iflib_netmap_rxq_init(ctx, rxq) > 0) { /* This rxq is in netmap mode. Skip normal init. */ continue; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { device_printf(ctx->ifc_dev, "setting up free list %d failed - " "check cluster settings\n", j); goto done; } } } done: if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); IFDI_INTR_ENABLE(ctx); txq = ctx->ifc_txqs; for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, txq, txq->ift_timer.c_cpu); /* Re-enable txsync/rxsync. */ netmap_enable_all_rings(ifp); } static int iflib_media_change(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); int err; CTX_LOCK(ctx); if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); return (err); } static void iflib_media_status(if_t ifp, struct ifmediareq *ifmr) { if_ctx_t ctx = if_getsoftc(ifp); CTX_LOCK(ctx); IFDI_UPDATE_ADMIN_STATUS(ctx); IFDI_MEDIA_STATUS(ctx, ifmr); CTX_UNLOCK(ctx); } void iflib_stop(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; iflib_dma_info_t di; iflib_fl_t fl; int i, j; /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); DELAY(1000); IFDI_STOP(ctx); DELAY(1000); /* * Stop any pending txsync/rxsync and prevent new ones * form starting. Processes blocked in poll() will get * POLLERR. */ netmap_disable_all_rings(ctx->ifc_ifp); iflib_debug_reset(); /* Wait for current tx queue users to exit to disarm watchdog timer. */ for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { /* make sure all transmitters have completed before proceeding XXX */ CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); #ifdef DEV_NETMAP callout_stop(&txq->ift_netmap_timer); #endif /* DEV_NETMAP */ CALLOUT_UNLOCK(txq); /* clean any enqueued buffers */ iflib_ifmp_purge(txq); /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; txq->ift_in_use = txq->ift_gen = txq->ift_no_desc_avail = 0; if (sctx->isc_flags & IFLIB_PRESERVE_TX_INDICES) txq->ift_cidx = txq->ift_pidx; else txq->ift_cidx = txq->ift_pidx = 0; txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; txq->ift_pullups = 0; ifmp_ring_reset_stats(txq->ift_br); for (j = 0, di = txq->ift_ifdi; j < sctx->isc_ntxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); } for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { gtaskqueue_drain(rxq->ifr_task.gt_taskqueue, &rxq->ifr_task.gt_task); rxq->ifr_cq_cidx = 0; for (j = 0, di = rxq->ifr_ifdi; j < sctx->isc_nrxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); /* also resets the free lists pidx/cidx */ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) iflib_fl_bufs_free(fl); } } static inline caddr_t calc_next_rxd(iflib_fl_t fl, int cidx) { qidx_t size; int nrxd; caddr_t start, end, cur, next; nrxd = fl->ifl_size; size = fl->ifl_rxd_size; start = fl->ifl_ifdi->idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*nrxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } static inline void prefetch_pkts(iflib_fl_t fl, int cidx) { int nextptr; int nrxd = fl->ifl_size; caddr_t next_rxd; nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1); prefetch(&fl->ifl_sds.ifsd_m[nextptr]); prefetch(&fl->ifl_sds.ifsd_cl[nextptr]); next_rxd = calc_next_rxd(fl, cidx); prefetch(next_rxd); prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); } static struct mbuf * rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, bool unload, if_rxsd_t sd, int *pf_rv, if_rxd_info_t ri) { bus_dmamap_t map; iflib_fl_t fl; caddr_t payload; struct mbuf *m; int flid, cidx, len, next; map = NULL; flid = irf->irf_flid; cidx = irf->irf_idx; fl = &rxq->ifr_fl[flid]; sd->ifsd_fl = fl; m = fl->ifl_sds.ifsd_m[cidx]; sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; fl->ifl_credits--; #if MEMORY_LOGGING fl->ifl_m_dequeued++; #endif if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH) prefetch_pkts(fl, cidx); next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_map[next]); map = fl->ifl_sds.ifsd_map[cidx]; bus_dmamap_sync(fl->ifl_buf_tag, map, BUS_DMASYNC_POSTREAD); if (rxq->pfil != NULL && PFIL_HOOKED_IN(rxq->pfil) && pf_rv != NULL && irf->irf_len != 0) { payload = *sd->ifsd_cl; payload += ri->iri_pad; len = ri->iri_len - ri->iri_pad; *pf_rv = pfil_run_hooks(rxq->pfil, payload, ri->iri_ifp, len | PFIL_MEMPTR | PFIL_IN, NULL); switch (*pf_rv) { case PFIL_DROPPED: case PFIL_CONSUMED: /* * The filter ate it. Everything is recycled. */ m = NULL; unload = 0; break; case PFIL_REALLOCED: /* * The filter copied it. Everything is recycled. */ m = pfil_mem2mbuf(payload); unload = 0; break; case PFIL_PASS: /* * Filter said it was OK, so receive like * normal */ fl->ifl_sds.ifsd_m[cidx] = NULL; break; default: MPASS(0); } } else { fl->ifl_sds.ifsd_m[cidx] = NULL; if (pf_rv != NULL) *pf_rv = PFIL_PASS; } if (unload && irf->irf_len != 0) bus_dmamap_unload(fl->ifl_buf_tag, map); fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); if (__predict_false(fl->ifl_cidx == 0)) fl->ifl_gen = 0; bit_clear(fl->ifl_rx_bitmap, cidx); return (m); } static struct mbuf * assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd, int *pf_rv) { struct mbuf *m, *mh, *mt; caddr_t cl; int *pf_rv_ptr, flags, i, padlen; bool consumed; i = 0; mh = NULL; consumed = false; *pf_rv = PFIL_PASS; pf_rv_ptr = pf_rv; do { m = rxd_frag_to_sd(rxq, &ri->iri_frags[i], !consumed, sd, pf_rv_ptr, ri); MPASS(*sd->ifsd_cl != NULL); /* * Exclude zero-length frags & frags from * packets the filter has consumed or dropped */ if (ri->iri_frags[i].irf_len == 0 || consumed || *pf_rv == PFIL_CONSUMED || *pf_rv == PFIL_DROPPED) { if (mh == NULL) { /* everything saved here */ consumed = true; pf_rv_ptr = NULL; continue; } /* XXX we can save the cluster here, but not the mbuf */ m_init(m, M_NOWAIT, MT_DATA, 0); m_free(m); continue; } if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; padlen = ri->iri_pad; } else { flags = M_EXT; mt->m_next = m; mt = m; /* assuming padding is only on the first fragment */ padlen = 0; } cl = *sd->ifsd_cl; *sd->ifsd_cl = NULL; /* Can these two be made one ? */ m_init(m, M_NOWAIT, MT_DATA, flags); m_cljset(m, cl, sd->ifsd_fl->ifl_cltype); /* * These must follow m_init and m_cljset */ m->m_data += padlen; ri->iri_len -= padlen; m->m_len = ri->iri_frags[i].irf_len; } while (++i < ri->iri_nfrags); return (mh); } /* * Process one software descriptor */ static struct mbuf * iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) { struct if_rxsd sd; struct mbuf *m; int pf_rv; /* should I merge this back in now that the two paths are basically duplicated? */ if (ri->iri_nfrags == 1 && ri->iri_frags[0].irf_len != 0 && ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) { m = rxd_frag_to_sd(rxq, &ri->iri_frags[0], false, &sd, &pf_rv, ri); if (pf_rv != PFIL_PASS && pf_rv != PFIL_REALLOCED) return (m); if (pf_rv == PFIL_PASS) { m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m) && ri->iri_pad == 0) m->m_data += 2; #endif memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); m->m_len = ri->iri_frags[0].irf_len; m->m_data += ri->iri_pad; ri->iri_len -= ri->iri_pad; } } else { m = assemble_segments(rxq, ri, &sd, &pf_rv); if (m == NULL) return (NULL); if (pf_rv != PFIL_PASS && pf_rv != PFIL_REALLOCED) return (m); } m->m_pkthdr.len = ri->iri_len; m->m_pkthdr.rcvif = ri->iri_ifp; m->m_flags |= ri->iri_flags; m->m_pkthdr.ether_vtag = ri->iri_vtag; m->m_pkthdr.flowid = ri->iri_flowid; M_HASHTYPE_SET(m, ri->iri_rsstype); m->m_pkthdr.csum_flags = ri->iri_csum_flags; m->m_pkthdr.csum_data = ri->iri_csum_data; return (m); } #if defined(INET6) || defined(INET) static void iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6) { - CURVNET_SET(lc->ifp->if_vnet); + CURVNET_SET(lc->ifp->if_vnet); /* XXX - DRVAPI */ #if defined(INET6) *v6 = V_ip6_forwarding; #endif #if defined(INET) *v4 = V_ipforwarding; #endif CURVNET_RESTORE(); } /* * Returns true if it's possible this packet could be LROed. * if it returns false, it is guaranteed that tcp_lro_rx() * would not return zero. */ static bool iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding) { struct ether_header *eh; eh = mtod(m, struct ether_header *); switch (eh->ether_type) { #if defined(INET6) case htons(ETHERTYPE_IPV6): return (!v6_forwarding); #endif #if defined (INET) case htons(ETHERTYPE_IP): return (!v4_forwarding); #endif } return false; } #else static void iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused) { } #endif static void _task_fn_rx_watchdog(void *context) { iflib_rxq_t rxq = context; GROUPTASK_ENQUEUE(&rxq->ifr_task); } static uint8_t iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) { if_t ifp; if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int avail, i; qidx_t *cidxp; struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; int lro_enabled; bool v4_forwarding, v6_forwarding, lro_possible; uint8_t retval = 0; /* * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ struct mbuf *m, *mh, *mt, *mf; NET_EPOCH_ASSERT(); lro_possible = v4_forwarding = v6_forwarding = false; ifp = ctx->ifc_ifp; mh = mt = NULL; MPASS(budget > 0); rx_pkts = rx_bytes = 0; if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else cidxp = &rxq->ifr_fl[0].ifl_cidx; if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) retval |= iflib_fl_refill_all(ctx, fl); DBG_COUNTER_INC(rx_unavail); return (retval); } /* pfil needs the vnet to be set */ - CURVNET_SET_QUIET(ifp->if_vnet); + CURVNET_SET_QUIET(ifp->if_vnet); /* XXX - DRVAPI */ for (budget_left = budget; budget_left > 0 && avail > 0;) { if (__predict_false(!CTX_ACTIVE(ctx))) { DBG_COUNTER_INC(rx_ctx_inactive); break; } /* * Reset client set fields to their default values */ rxd_info_zero(&ri); ri.iri_qsidx = rxq->ifr_id; ri.iri_cidx = *cidxp; ri.iri_ifp = ifp; ri.iri_frags = rxq->ifr_frags; err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); if (err) goto err; rx_pkts += 1; rx_bytes += ri.iri_len; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { *cidxp = ri.iri_cidx; /* Update our consumer index */ /* XXX NB: shurd - check if this is still safe */ while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; /* was this only a completion queue message? */ if (__predict_false(ri.iri_nfrags == 0)) continue; } MPASS(ri.iri_nfrags != 0); MPASS(ri.iri_len != 0); /* will advance the cidx on the corresponding free lists */ m = iflib_rxd_pkt_get(rxq, &ri); avail--; budget_left--; if (avail == 0 && budget_left) avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); if (__predict_false(m == NULL)) continue; /* imm_pkt: -- cxgb */ if (mh == NULL) mh = mt = m; else { mt->m_nextpkt = m; mt = m; } } CURVNET_RESTORE(); /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) retval |= iflib_fl_refill_all(ctx, fl); lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); if (lro_enabled) iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding); mt = mf = NULL; while (mh != NULL) { m = mh; mh = mh->m_nextpkt; m->m_nextpkt = NULL; #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) continue; #endif #if defined(INET6) || defined(INET) if (lro_enabled) { if (!lro_possible) { lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding); if (lro_possible && mf != NULL) { - ifp->if_input(ifp, mf); + if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); mt = mf = NULL; } } if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) == (CSUM_L4_CALC|CSUM_L4_VALID)) { if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) continue; } } #endif if (lro_possible) { - ifp->if_input(ifp, m); + if_input(ifp, m); DBG_COUNTER_INC(rx_if_input); continue; } if (mf == NULL) mf = m; if (mt != NULL) mt->m_nextpkt = m; mt = m; } if (mf != NULL) { - ifp->if_input(ifp, mf); + if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); } if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); /* * Flush any outstanding LRO work */ #if defined(INET6) || defined(INET) tcp_lro_flush_all(&rxq->ifr_lc); #endif if (avail != 0 || iflib_rxd_avail(ctx, rxq, *cidxp, 1) != 0) retval |= IFLIB_RXEOF_MORE; return (retval); err: STATE_LOCK(ctx); ctx->ifc_flags |= IFC_DO_RESET; iflib_admin_intr_deferred(ctx); STATE_UNLOCK(ctx); return (0); } #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1) static inline qidx_t txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (in_use > 4*minthresh) return (notify_count); if (in_use > 2*minthresh) return (notify_count >> 1); if (in_use > minthresh) return (notify_count >> 3); return (0); } static inline qidx_t txq_max_rs_deferred(iflib_txq_t txq) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (txq->ift_in_use > 4*minthresh) return (notify_count); if (txq->ift_in_use > 2*minthresh) return (notify_count >> 1); if (txq->ift_in_use > minthresh) return (notify_count >> 2); return (2); } #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use)) #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq) #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) /* forward compatibility for cxgb */ #define FIRST_QSET(ctx) 0 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) /* XXX we should be setting this to something other than zero */ #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) #define MAX_TX_DESC(ctx) MAX((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max, \ (ctx)->ifc_softc_ctx.isc_tx_nsegments) static inline bool iflib_txd_db_check(iflib_txq_t txq, int ring) { if_ctx_t ctx = txq->ift_ctx; qidx_t dbval, max; max = TXQ_MAX_DB_DEFERRED(txq, txq->ift_in_use); /* force || threshold exceeded || at the edge of the ring */ if (ring || (txq->ift_db_pending >= max) || (TXQ_AVAIL(txq) <= MAX_TX_DESC(ctx) + 2)) { /* * 'npending' is used if the card's doorbell is in terms of the number of descriptors * pending flush (BRCM). 'pidx' is used in cases where the card's doorbeel uses the * producer index explicitly (INTC). */ dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); /* * Absent bugs there are zero packets pending so reset pending counts to zero. */ txq->ift_db_pending = txq->ift_npending = 0; return (true); } return (false); } #ifdef PKT_DEBUG static void print_pkt(if_pkt_info_t pi) { printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); } #endif #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) #define IS_TX_OFFLOAD4(pi) ((pi)->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP_TSO)) #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) #define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO)) /** * Parses out ethernet header information in the given mbuf. * Returns in pi: ipi_etype (EtherType) and ipi_ehdrlen (Ethernet header length) * * This will account for the VLAN header if present. * * XXX: This doesn't handle QinQ, which could prevent TX offloads for those * types of packets. */ static int iflib_parse_ether_header(if_pkt_info_t pi, struct mbuf **mp, uint64_t *pullups) { struct ether_vlan_header *eh; struct mbuf *m; m = *mp; if (__predict_false(m->m_len < sizeof(*eh))) { (*pullups)++; if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) return (ENOMEM); } eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { pi->ipi_etype = ntohs(eh->evl_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { pi->ipi_etype = ntohs(eh->evl_encap_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN; } *mp = m; return (0); } /** * Parse up to the L3 header and extract IPv4/IPv6 header information into pi. * Currently this information includes: IP ToS value, IP header version/presence * * This is missing some checks and doesn't edit the packet content as it goes, * unlike iflib_parse_header(), in order to keep the amount of code here minimal. */ static int iflib_parse_header_partial(if_pkt_info_t pi, struct mbuf **mp, uint64_t *pullups) { struct mbuf *m; int err; *pullups = 0; m = *mp; if (!M_WRITABLE(m)) { if ((m = m_dup(m, M_NOWAIT)) == NULL) { return (ENOMEM); } else { m_freem(*mp); DBG_COUNTER_INC(tx_frees); *mp = m; } } /* Fills out pi->ipi_etype */ err = iflib_parse_ether_header(pi, mp, pullups); if (err) return (err); m = *mp; switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: { struct mbuf *n; struct ip *ip = NULL; int miniplen; miniplen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip)); if (__predict_false(m->m_len < miniplen)) { /* * Check for common case where the first mbuf only contains * the Ethernet header */ if (m->m_len == pi->ipi_ehdrlen) { n = m->m_next; MPASS(n); /* If next mbuf contains at least the minimal IP header, then stop */ if (n->m_len >= sizeof(*ip)) { ip = (struct ip *)n->m_data; } else { (*pullups)++; if (__predict_false((m = m_pullup(m, miniplen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); } } else { (*pullups)++; if (__predict_false((m = m_pullup(m, miniplen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); } } else { ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); } /* Have the IPv4 header w/ no options here */ pi->ipi_ip_hlen = ip->ip_hl << 2; pi->ipi_ipproto = ip->ip_p; pi->ipi_ip_tos = ip->ip_tos; pi->ipi_flags |= IPI_TX_IPV4; break; } #endif #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6; if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { (*pullups)++; if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) return (ENOMEM); } ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); /* Have the IPv6 fixed header here */ pi->ipi_ip_hlen = sizeof(struct ip6_hdr); pi->ipi_ipproto = ip6->ip6_nxt; pi->ipi_ip_tos = IPV6_TRAFFIC_CLASS(ip6); pi->ipi_flags |= IPI_TX_IPV6; break; } #endif default: pi->ipi_csum_flags &= ~CSUM_OFFLOAD; pi->ipi_ip_hlen = 0; break; } *mp = m; return (0); } static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; struct mbuf *m; int err; m = *mp; if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && M_WRITABLE(m) == 0) { if ((m = m_dup(m, M_NOWAIT)) == NULL) { return (ENOMEM); } else { m_freem(*mp); DBG_COUNTER_INC(tx_frees); *mp = m; } } /* Fills out pi->ipi_etype */ err = iflib_parse_ether_header(pi, mp, &txq->ift_pullups); if (__predict_false(err)) return (err); m = *mp; switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: { struct mbuf *n; struct ip *ip = NULL; struct tcphdr *th = NULL; int minthlen; minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); if (__predict_false(m->m_len < minthlen)) { /* * if this code bloat is causing too much of a hit * move it to a separate function and mark it noinline */ if (m->m_len == pi->ipi_ehdrlen) { n = m->m_next; MPASS(n); if (n->m_len >= sizeof(*ip)) { ip = (struct ip *)n->m_data; if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); } } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } } else { ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } pi->ipi_ip_hlen = ip->ip_hl << 2; pi->ipi_ipproto = ip->ip_p; pi->ipi_ip_tos = ip->ip_tos; pi->ipi_flags |= IPI_TX_IPV4; /* TCP checksum offload may require TCP header length */ if (IS_TX_OFFLOAD4(pi)) { if (__predict_true(pi->ipi_ipproto == IPPROTO_TCP)) { if (__predict_false(th == NULL)) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) return (ENOMEM); th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; pi->ipi_tcp_seq = th->th_seq; } if (IS_TSO4(pi)) { if (__predict_false(ip->ip_p != IPPROTO_TCP)) return (ENXIO); /* * TSO always requires hardware checksum offload. */ pi->ipi_csum_flags |= (CSUM_IP_TCP | CSUM_IP); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; if (sctx->isc_flags & IFLIB_TSO_INIT_IP) { ip->ip_sum = 0; ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz); } } } if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) ip->ip_sum = 0; break; } #endif #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); struct tcphdr *th; pi->ipi_ip_hlen = sizeof(struct ip6_hdr); if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) return (ENOMEM); } th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); /* XXX-BZ this will go badly in case of ext hdrs. */ pi->ipi_ipproto = ip6->ip6_nxt; pi->ipi_ip_tos = IPV6_TRAFFIC_CLASS(ip6); pi->ipi_flags |= IPI_TX_IPV6; /* TCP checksum offload may require TCP header length */ if (IS_TX_OFFLOAD6(pi)) { if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) return (ENOMEM); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; pi->ipi_tcp_seq = th->th_seq; } if (IS_TSO6(pi)) { if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) return (ENXIO); /* * TSO always requires hardware checksum offload. */ pi->ipi_csum_flags |= CSUM_IP6_TCP; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; } } break; } #endif default: pi->ipi_csum_flags &= ~CSUM_OFFLOAD; pi->ipi_ip_hlen = 0; break; } *mp = m; return (0); } /* * If dodgy hardware rejects the scatter gather chain we've handed it * we'll need to remove the mbuf chain from ifsg_m[] before we can add the * m_defrag'd mbufs */ static __noinline struct mbuf * iflib_remove_mbuf(iflib_txq_t txq) { int ntxd, pidx; struct mbuf *m, **ifsd_m; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; pidx = txq->ift_pidx & (ntxd - 1); ifsd_m = txq->ift_sds.ifsd_m; m = ifsd_m[pidx]; ifsd_m[pidx] = NULL; bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[pidx]); if (txq->ift_sds.ifsd_tso_map != NULL) bus_dmamap_unload(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[pidx]); #if MEMORY_LOGGING txq->ift_dequeued++; #endif return (m); } static inline caddr_t calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid) { qidx_t size; int ntxd; caddr_t start, end, cur, next; ntxd = txq->ift_size; size = txq->ift_txd_size[qid]; start = txq->ift_ifdi[qid].idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*ntxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } /* * Pad an mbuf to ensure a minimum ethernet frame size. * min_frame_size is the frame size (less CRC) to pad the mbuf to */ static __noinline int iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size) { /* * 18 is enough bytes to pad an ARP packet to 46 bytes, and * and ARP message is the smallest common payload I can think of */ static char pad[18]; /* just zeros */ int n; struct mbuf *new_head; if (!M_WRITABLE(*m_head)) { new_head = m_dup(*m_head, M_NOWAIT); if (new_head == NULL) { m_freem(*m_head); device_printf(dev, "cannot pad short frame, m_dup() failed"); DBG_COUNTER_INC(encap_pad_mbuf_fail); DBG_COUNTER_INC(tx_frees); return ENOMEM; } m_freem(*m_head); *m_head = new_head; } for (n = min_frame_size - (*m_head)->m_pkthdr.len; n > 0; n -= sizeof(pad)) if (!m_append(*m_head, min(n, sizeof(pad)), pad)) break; if (n > 0) { m_freem(*m_head); device_printf(dev, "cannot pad short frame\n"); DBG_COUNTER_INC(encap_pad_mbuf_fail); DBG_COUNTER_INC(tx_frees); return (ENOBUFS); } return 0; } static int iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; bus_dma_tag_t buf_tag; bus_dma_segment_t *segs; struct mbuf *m_head, **ifsd_m; void *next_txd; bus_dmamap_t map; struct if_pkt_info pi; int remap = 0; int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; segs = txq->ift_segs; ntxd = txq->ift_size; m_head = *m_headp; map = NULL; /* * If we're doing TSO the next descriptor to clean may be quite far ahead */ cidx = txq->ift_cidx; pidx = txq->ift_pidx; if (ctx->ifc_flags & IFC_PREFETCH) { next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) { next_txd = calc_next_txd(txq, cidx, 0); prefetch(next_txd); } /* prefetch the next cache line of mbuf pointers and flags */ prefetch(&txq->ift_sds.ifsd_m[next]); prefetch(&txq->ift_sds.ifsd_map[next]); next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); } map = txq->ift_sds.ifsd_map[pidx]; ifsd_m = txq->ift_sds.ifsd_m; if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { buf_tag = txq->ift_tso_buf_tag; max_segs = scctx->isc_tx_tso_segments_max; map = txq->ift_sds.ifsd_tso_map[pidx]; MPASS(buf_tag != NULL); MPASS(max_segs > 0); } else { buf_tag = txq->ift_buf_tag; max_segs = scctx->isc_tx_nsegments; map = txq->ift_sds.ifsd_map[pidx]; } if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) && __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) { err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size); if (err) { DBG_COUNTER_INC(encap_txd_encap_fail); return err; } } m_head = *m_headp; pkt_info_zero(&pi); pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); pi.ipi_pidx = pidx; pi.ipi_qsidx = txq->ift_id; pi.ipi_len = m_head->m_pkthdr.len; pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; pi.ipi_vtag = M_HAS_VLANTAG(m_head) ? m_head->m_pkthdr.ether_vtag : 0; /* deliberate bitwise OR to make one condition */ if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) { DBG_COUNTER_INC(encap_txd_encap_fail); return (err); } m_head = *m_headp; } retry: err = bus_dmamap_load_mbuf_sg(buf_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); defrag: if (__predict_false(err)) { switch (err) { case EFBIG: /* try collapse once and defrag once */ if (remap == 0) { m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); /* try defrag if collapsing fails */ if (m_head == NULL) remap++; } if (remap == 1) { txq->ift_mbuf_defrag++; m_head = m_defrag(*m_headp, M_NOWAIT); } /* * remap should never be >1 unless bus_dmamap_load_mbuf_sg * failed to map an mbuf that was run through m_defrag */ MPASS(remap <= 1); if (__predict_false(m_head == NULL || remap > 1)) goto defrag_failed; remap++; *m_headp = m_head; goto retry; break; case ENOMEM: txq->ift_no_tx_dma_setup++; break; default: txq->ift_no_tx_dma_setup++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; break; } txq->ift_map_failed++; DBG_COUNTER_INC(encap_load_mbuf_fail); DBG_COUNTER_INC(encap_txd_encap_fail); return (err); } ifsd_m[pidx] = m_head; /* * XXX assumes a 1 to 1 relationship between segments and * descriptors - this does not hold true on all drivers, e.g. * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { txq->ift_no_desc_avail++; bus_dmamap_unload(buf_tag, map); DBG_COUNTER_INC(encap_txq_avail_fail); DBG_COUNTER_INC(encap_txd_encap_fail); if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) GROUPTASK_ENQUEUE(&txq->ift_task); return (ENOBUFS); } /* * On Intel cards we can greatly reduce the number of TX interrupts * we see by only setting report status on every Nth descriptor. * However, this also means that the driver will need to keep track * of the descriptors that RS was set on to check them for the DD bit. */ txq->ift_rs_pending += nsegs + 1; if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) || iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs) <= MAX_TX_DESC(ctx) + 2) { pi.ipi_flags |= IPI_TX_INTR; txq->ift_rs_pending = 0; } pi.ipi_segs = segs; pi.ipi_nsegs = nsegs; MPASS(pidx >= 0 && pidx < txq->ift_size); #ifdef PKT_DEBUG print_pkt(&pi); #endif if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { bus_dmamap_sync(buf_tag, map, BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); MPASS(pi.ipi_new_pidx < txq->ift_size); ndesc = pi.ipi_new_pidx - pi.ipi_pidx; if (pi.ipi_new_pidx < pi.ipi_pidx) { ndesc += txq->ift_size; txq->ift_gen = 1; } /* * drivers can need as many as * two sentinels */ MPASS(ndesc <= pi.ipi_nsegs + 2); MPASS(pi.ipi_new_pidx != pidx); MPASS(ndesc > 0); txq->ift_in_use += ndesc; txq->ift_db_pending += ndesc; /* * We update the last software descriptor again here because there may * be a sentinel and/or there may be more mbufs than segments */ txq->ift_pidx = pi.ipi_new_pidx; txq->ift_npending += pi.ipi_ndescs; } else { *m_headp = m_head = iflib_remove_mbuf(txq); if (err == EFBIG) { txq->ift_txd_encap_efbig++; if (remap < 2) { remap = 1; goto defrag; } } goto defrag_failed; } /* * err can't possibly be non-zero here, so we don't neet to test it * to see if we need to DBG_COUNTER_INC(encap_txd_encap_fail). */ return (err); defrag_failed: txq->ift_mbuf_defrag_failed++; txq->ift_map_failed++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; DBG_COUNTER_INC(encap_txd_encap_fail); return (ENOMEM); } static void iflib_tx_desc_free(iflib_txq_t txq, int n) { uint32_t qsize, cidx, mask, gen; struct mbuf *m, **ifsd_m; bool do_prefetch; cidx = txq->ift_cidx; gen = txq->ift_gen; qsize = txq->ift_size; mask = qsize-1; ifsd_m = txq->ift_sds.ifsd_m; do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); while (n-- > 0) { if (do_prefetch) { prefetch(ifsd_m[(cidx + 3) & mask]); prefetch(ifsd_m[(cidx + 4) & mask]); } if ((m = ifsd_m[cidx]) != NULL) { prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); if (m->m_pkthdr.csum_flags & CSUM_TSO) { bus_dmamap_sync(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[cidx], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[cidx]); } else { bus_dmamap_sync(txq->ift_buf_tag, txq->ift_sds.ifsd_map[cidx], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[cidx]); } /* XXX we don't support any drivers that batch packets yet */ MPASS(m->m_nextpkt == NULL); m_freem(m); ifsd_m[cidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif DBG_COUNTER_INC(tx_frees); } if (__predict_false(++cidx == qsize)) { cidx = 0; gen = 0; } } txq->ift_cidx = cidx; txq->ift_gen = gen; } static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) { int reclaim; if_ctx_t ctx = txq->ift_ctx; KASSERT(thresh >= 0, ("invalid threshold to reclaim")); MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); /* * Need a rate-limiting check so that this isn't called every time */ iflib_tx_credits_update(ctx, txq); reclaim = DESC_RECLAIMABLE(txq); if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { #ifdef INVARIANTS if (iflib_verbose_debug) { printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, reclaim, thresh); } #endif return (0); } iflib_tx_desc_free(txq, reclaim); txq->ift_cleaned += reclaim; txq->ift_in_use -= reclaim; return (reclaim); } static struct mbuf ** _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) { int next, size; struct mbuf **items; size = r->size; next = (cidx + CACHE_PTR_INCREMENT) & (size-1); items = __DEVOLATILE(struct mbuf **, &r->items[0]); prefetch(items[(cidx + offset) & (size-1)]); if (remaining > 1) { prefetch2cachelines(&items[next]); prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]); prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]); prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]); } return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); } static void iflib_txq_check_drain(iflib_txq_t txq, int budget) { ifmp_ring_check_drainage(txq->ift_br, budget); } static uint32_t iflib_txq_can_drain(struct ifmp_ring *r) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; if (TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) return (1); bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); return (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)); } static uint32_t iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; if_t ifp = ctx->ifc_ifp; struct mbuf *m, **mp; int avail, bytes_sent, skipped, count, err, i; int mcast_sent, pkt_sent, reclaimed; bool do_prefetch, rang, ring; if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(txq_drain_notready); return (0); } reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); rang = iflib_txd_db_check(txq, reclaimed && txq->ift_db_pending); avail = IDXDIFF(pidx, cidx, r->size); if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { /* * The driver is unloading so we need to free all pending packets. */ DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { if (__predict_true(r->items[(cidx + i) & (r->size-1)] != (void *)txq)) m_freem(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } return (avail); } if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); DBG_COUNTER_INC(txq_drain_oactive); return (0); } /* * If we've reclaimed any packets this queue cannot be hung. */ if (reclaimed) txq->ift_qstatus = IFLIB_QUEUE_IDLE; skipped = mcast_sent = bytes_sent = pkt_sent = 0; count = MIN(avail, TX_BATCH_SIZE); #ifdef INVARIANTS if (iflib_verbose_debug) printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__, avail, ctx->ifc_flags, TXQ_AVAIL(txq)); #endif do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); err = 0; for (i = 0; i < count && TXQ_AVAIL(txq) >= MAX_TX_DESC(ctx) + 2; i++) { int rem = do_prefetch ? count - i : 0; mp = _ring_peek_one(r, cidx, i, rem); MPASS(mp != NULL && *mp != NULL); /* * Completion interrupts will use the address of the txq * as a sentinel to enqueue _something_ in order to acquire * the lock on the mp_ring (there's no direct lock call). * We obviously whave to check for these sentinel cases * and skip them. */ if (__predict_false(*mp == (struct mbuf *)txq)) { skipped++; continue; } err = iflib_encap(txq, mp); if (__predict_false(err)) { /* no room - bail out */ if (err == ENOBUFS) break; skipped++; /* we can't send this packet - skip it */ continue; } pkt_sent++; m = *mp; DBG_COUNTER_INC(tx_sent); bytes_sent += m->m_pkthdr.len; mcast_sent += !!(m->m_flags & M_MCAST); - if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) + if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))) break; ETHER_BPF_MTAP(ifp, m); rang = iflib_txd_db_check(txq, false); } /* deliberate use of bitwise or to avoid gratuitous short-circuit */ ring = rang ? false : (iflib_min_tx_latency | err); iflib_txd_db_check(txq, ring); if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); if (mcast_sent) if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); #ifdef INVARIANTS if (iflib_verbose_debug) printf("consumed=%d\n", skipped + pkt_sent); #endif return (skipped + pkt_sent); } static uint32_t iflib_txq_drain_always(struct ifmp_ring *r) { return (1); } static uint32_t iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { int i, avail; struct mbuf **mp; iflib_txq_t txq; txq = r->cookie; txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); avail = IDXDIFF(pidx, cidx, r->size); for (i = 0; i < avail; i++) { mp = _ring_peek_one(r, cidx, i, avail - i); if (__predict_false(*mp == (struct mbuf *)txq)) continue; m_freem(*mp); DBG_COUNTER_INC(tx_frees); } MPASS(ifmp_ring_is_stalled(r) == 0); return (avail); } static void iflib_ifmp_purge(iflib_txq_t txq) { struct ifmp_ring *r; r = txq->ift_br; r->drain = iflib_txq_drain_free; r->can_drain = iflib_txq_drain_always; ifmp_ring_check_drainage(r, r->size); r->drain = iflib_txq_drain; r->can_drain = iflib_txq_can_drain; } static void _task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; if_t ifp = ctx->ifc_ifp; int abdicate = ctx->ifc_sysctl_tx_abdicate; #ifdef IFLIB_DIAGNOSTICS txq->ift_cpu_exec_count[curcpu]++; #endif if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) return; #ifdef DEV_NETMAP if ((if_getcapenable(ifp) & IFCAP_NETMAP) && netmap_tx_irq(ifp, txq->ift_id)) goto skip_ifmp; #endif #ifdef ALTQ - if (ALTQ_IS_ENABLED(&ifp->if_snd)) + if (ALTQ_IS_ENABLED(&ifp->if_snd)) /* XXX - DRVAPI */ iflib_altq_if_start(ifp); #endif if (txq->ift_db_pending) ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE, abdicate); else if (!abdicate) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); /* * When abdicating, we always need to check drainage, not just when we don't enqueue */ if (abdicate) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); #ifdef DEV_NETMAP skip_ifmp: #endif if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); } static void _task_fn_rx(void *context) { iflib_rxq_t rxq = context; if_ctx_t ctx = rxq->ifr_ctx; uint8_t more; uint16_t budget; #ifdef DEV_NETMAP u_int work = 0; int nmirq; #endif #ifdef IFLIB_DIAGNOSTICS rxq->ifr_cpu_exec_count[curcpu]++; #endif DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; #ifdef DEV_NETMAP nmirq = netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work); if (nmirq != NM_IRQ_PASS) { more = (nmirq == NM_IRQ_RESCHED) ? IFLIB_RXEOF_MORE : 0; goto skip_rxeof; } #endif budget = ctx->ifc_sysctl_rx_budget; if (budget == 0) budget = 16; /* XXX */ more = iflib_rxeof(rxq, budget); #ifdef DEV_NETMAP skip_rxeof: #endif if ((more & IFLIB_RXEOF_MORE) == 0) { if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); DBG_COUNTER_INC(rx_intr_enables); } if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; if (more & IFLIB_RXEOF_MORE) GROUPTASK_ENQUEUE(&rxq->ifr_task); else if (more & IFLIB_RXEOF_EMPTY) callout_reset_curcpu(&rxq->ifr_watchdog, 1, &_task_fn_rx_watchdog, rxq); } static void _task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; iflib_txq_t txq; int i; bool oactive, running, do_reset, do_watchdog, in_detach; STATE_LOCK(ctx); running = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); oactive = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE); do_reset = (ctx->ifc_flags & IFC_DO_RESET); do_watchdog = (ctx->ifc_flags & IFC_DO_WATCHDOG); in_detach = (ctx->ifc_flags & IFC_IN_DETACH); ctx->ifc_flags &= ~(IFC_DO_RESET|IFC_DO_WATCHDOG); STATE_UNLOCK(ctx); if ((!running && !oactive) && !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN)) return; if (in_detach) return; CTX_LOCK(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); } if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_ADMINCQ) IFDI_ADMIN_COMPLETION_HANDLE(ctx); if (do_watchdog) { ctx->ifc_watchdog_events++; IFDI_WATCHDOG_RESET(ctx); } IFDI_UPDATE_ADMIN_STATUS(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, txq, txq->ift_timer.c_cpu); } IFDI_LINK_INTR_ENABLE(ctx); if (do_reset) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); if (LINK_ACTIVE(ctx) == 0) return; for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); } static void _task_fn_iov(void *context) { if_ctx_t ctx = context; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) && !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN)) return; CTX_LOCK(ctx); IFDI_VFLR_HANDLE(ctx); CTX_UNLOCK(ctx); } static int iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { int err; if_int_delay_info_t info; if_ctx_t ctx; info = (if_int_delay_info_t)arg1; ctx = info->iidi_ctx; info->iidi_req = req; info->iidi_oidp = oidp; CTX_LOCK(ctx); err = IFDI_SYSCTL_INT_DELAY(ctx, info); CTX_UNLOCK(ctx); return (err); } /********************************************************************* * * IFNET FUNCTIONS * **********************************************************************/ static void iflib_if_init_locked(if_ctx_t ctx) { iflib_stop(ctx); iflib_init_locked(ctx); } static void iflib_if_init(void *arg) { if_ctx_t ctx = arg; CTX_LOCK(ctx); iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static int iflib_if_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq; int err, qidx; int abdicate; - if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { + if (__predict_false((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(tx_frees); m_freem(m); return (ENETDOWN); } MPASS(m->m_nextpkt == NULL); /* ALTQ-enabled interfaces always use queue 0. */ qidx = 0; /* Use driver-supplied queue selection method if it exists */ if (ctx->isc_txq_select_v2) { struct if_pkt_info pi; uint64_t early_pullups = 0; pkt_info_zero(&pi); err = iflib_parse_header_partial(&pi, &m, &early_pullups); if (__predict_false(err != 0)) { /* Assign pullups for bad pkts to default queue */ ctx->ifc_txqs[0].ift_pullups += early_pullups; DBG_COUNTER_INC(encap_txd_encap_fail); return (err); } /* Let driver make queueing decision */ qidx = ctx->isc_txq_select_v2(ctx->ifc_softc, m, &pi); ctx->ifc_txqs[qidx].ift_pullups += early_pullups; } /* Backwards compatibility w/ simpler queue select */ else if (ctx->isc_txq_select) qidx = ctx->isc_txq_select(ctx->ifc_softc, m); /* If not, use iflib's standard method */ else if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m) && !ALTQ_IS_ENABLED(&ifp->if_snd)) qidx = QIDX(ctx, m); /* Set TX queue */ txq = &ctx->ifc_txqs[qidx]; #ifdef DRIVER_BACKPRESSURE if (txq->ift_closed) { while (m != NULL) { next = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); DBG_COUNTER_INC(tx_frees); m = next; } return (ENOBUFS); } #endif #ifdef notyet qidx = count = 0; mp = marr; next = m; do { count++; next = next->m_nextpkt; } while (next != NULL); if (count > nitems(marr)) if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { /* XXX check nextpkt */ m_freem(m); /* XXX simplify for now */ DBG_COUNTER_INC(tx_frees); return (ENOBUFS); } for (next = m, i = 0; next != NULL; i++) { mp[i] = next; next = next->m_nextpkt; mp[i]->m_nextpkt = NULL; } #endif DBG_COUNTER_INC(tx_seen); abdicate = ctx->ifc_sysctl_tx_abdicate; err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE, abdicate); if (abdicate) GROUPTASK_ENQUEUE(&txq->ift_task); if (err) { if (!abdicate) GROUPTASK_ENQUEUE(&txq->ift_task); /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); m_freem(m); DBG_COUNTER_INC(tx_frees); } return (err); } #ifdef ALTQ /* * The overall approach to integrating iflib with ALTQ is to continue to use * the iflib mp_ring machinery between the ALTQ queue(s) and the hardware * ring. Technically, when using ALTQ, queueing to an intermediate mp_ring * is redundant/unnecessary, but doing so minimizes the amount of * ALTQ-specific code required in iflib. It is assumed that the overhead of * redundantly queueing to an intermediate mp_ring is swamped by the * performance limitations inherent in using ALTQ. * * When ALTQ support is compiled in, all iflib drivers will use a transmit * routine, iflib_altq_if_transmit(), that checks if ALTQ is enabled for the * given interface. If ALTQ is enabled for an interface, then all * transmitted packets for that interface will be submitted to the ALTQ * subsystem via IFQ_ENQUEUE(). We don't use the legacy if_transmit() * implementation because it uses IFQ_HANDOFF(), which will duplicatively * update stats that the iflib machinery handles, and which is sensitve to * the disused IFF_DRV_OACTIVE flag. Additionally, iflib_altq_if_start() * will be installed as the start routine for use by ALTQ facilities that * need to trigger queue drains on a scheduled basis. * */ static void iflib_altq_if_start(if_t ifp) { - struct ifaltq *ifq = &ifp->if_snd; + struct ifaltq *ifq = &ifp->if_snd; /* XXX - DRVAPI */ struct mbuf *m; IFQ_LOCK(ifq); IFQ_DEQUEUE_NOLOCK(ifq, m); while (m != NULL) { iflib_if_transmit(ifp, m); IFQ_DEQUEUE_NOLOCK(ifq, m); } IFQ_UNLOCK(ifq); } static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m) { int err; - if (ALTQ_IS_ENABLED(&ifp->if_snd)) { - IFQ_ENQUEUE(&ifp->if_snd, m, err); + if (ALTQ_IS_ENABLED(&ifp->if_snd)) { /* XXX - DRVAPI */ + IFQ_ENQUEUE(&ifp->if_snd, m, err); /* XXX - DRVAPI */ if (err == 0) iflib_altq_if_start(ifp); } else err = iflib_if_transmit(ifp, m); return (err); } #endif /* ALTQ */ static void iflib_if_qflush(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq = ctx->ifc_txqs; int i; STATE_LOCK(ctx); ctx->ifc_flags |= IFC_QFLUSH; STATE_UNLOCK(ctx); for (i = 0; i < NTXQSETS(ctx); i++, txq++) while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br))) iflib_txq_check_drain(txq, 0); STATE_LOCK(ctx); ctx->ifc_flags &= ~IFC_QFLUSH; STATE_UNLOCK(ctx); /* * When ALTQ is enabled, this will also take care of purging the * ALTQ queue(s). */ if_qflush(ifp); } #define IFCAP_FLAGS (IFCAP_HWCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \ IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | \ IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | IFCAP_MEXTPG) static int iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) { if_ctx_t ctx = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = false; int err = 0, reinit = 0, bits; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = true; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = true; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp, IFF_UP,0); if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) reinit = 1; #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else err = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: CTX_LOCK(ctx); if (ifr->ifr_mtu == if_getmtu(ifp)) { CTX_UNLOCK(ctx); break; } bits = if_getdrvflags(ifp); /* stop the driver and free any clusters before proceeding */ iflib_stop(ctx); if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { STATE_LOCK(ctx); if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) ctx->ifc_flags |= IFC_MULTISEG; else ctx->ifc_flags &= ~IFC_MULTISEG; STATE_UNLOCK(ctx); err = if_setmtu(ifp, ifr->ifr_mtu); } iflib_init_locked(ctx); STATE_LOCK(ctx); if_setdrvflags(ifp, bits); STATE_UNLOCK(ctx); CTX_UNLOCK(ctx); break; case SIOCSIFFLAGS: CTX_LOCK(ctx); if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { CTX_UNLOCK(ctx); err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); CTX_LOCK(ctx); } } else reinit = 1; } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { iflib_stop(ctx); } ctx->ifc_if_flags = if_getflags(ifp); CTX_UNLOCK(ctx); break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); IFDI_MULTI_SET(ctx); IFDI_INTR_ENABLE(ctx); CTX_UNLOCK(ctx); } break; case SIOCSIFMEDIA: CTX_LOCK(ctx); IFDI_MEDIA_SET(ctx); CTX_UNLOCK(ctx); /* FALLTHROUGH */ case SIOCGIFMEDIA: case SIOCGIFXMEDIA: err = ifmedia_ioctl(ifp, ifr, ctx->ifc_mediap, command); break; case SIOCGI2C: { struct ifi2creq i2c; err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (err != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { err = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { err = EINVAL; break; } if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) err = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } case SIOCSIFCAP: { int mask, setmask, oldmask; oldmask = if_getcapenable(ifp); mask = ifr->ifr_reqcap ^ oldmask; mask &= ctx->ifc_softc_ctx.isc_capabilities | IFCAP_MEXTPG; setmask = 0; #ifdef TCP_OFFLOAD setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); #endif setmask |= (mask & IFCAP_FLAGS); setmask |= (mask & IFCAP_WOL); /* * If any RX csum has changed, change all the ones that * are supported by the driver. */ if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { setmask |= ctx->ifc_softc_ctx.isc_capabilities & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); } /* * want to ensure that traffic has stopped before we change any of the flags */ if (setmask) { CTX_LOCK(ctx); bits = if_getdrvflags(ifp); if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL) iflib_stop(ctx); STATE_LOCK(ctx); if_togglecapenable(ifp, setmask); ctx->ifc_softc_ctx.isc_capenable ^= setmask; STATE_UNLOCK(ctx); if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL) iflib_init_locked(ctx); STATE_LOCK(ctx); if_setdrvflags(ifp, bits); STATE_UNLOCK(ctx); CTX_UNLOCK(ctx); } if_vlancap(ifp); break; } case SIOCGPRIVATE_0: case SIOCSDRVSPEC: case SIOCGDRVSPEC: CTX_LOCK(ctx); err = IFDI_PRIV_IOCTL(ctx, command, data); CTX_UNLOCK(ctx); break; default: err = ether_ioctl(ifp, command, data); break; } if (reinit) iflib_if_init(ctx); return (err); } static uint64_t iflib_if_get_counter(if_t ifp, ift_counter cnt) { if_ctx_t ctx = if_getsoftc(ifp); return (IFDI_GET_COUNTER(ctx, cnt)); } /********************************************************************* * * OTHER FUNCTIONS EXPORTED TO THE STACK * **********************************************************************/ static void iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; if (iflib_in_detach(ctx)) return; CTX_LOCK(ctx); /* Driver may need all untagged packets to be flushed */ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) iflib_stop(ctx); IFDI_VLAN_REGISTER(ctx, vtag); /* Re-init to load the changes, if required */ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) iflib_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); /* Driver may need all tagged packets to be flushed */ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) iflib_stop(ctx); IFDI_VLAN_UNREGISTER(ctx, vtag); /* Re-init to load the changes, if required */ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) iflib_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_led_func(void *arg, int onoff) { if_ctx_t ctx = arg; CTX_LOCK(ctx); IFDI_LED_FUNC(ctx, onoff); CTX_UNLOCK(ctx); } /********************************************************************* * * BUS FUNCTION DEFINITIONS * **********************************************************************/ int iflib_device_probe(device_t dev) { const pci_vendor_info_t *ent; if_shared_ctx_t sctx; uint16_t pci_device_id, pci_rev_id, pci_subdevice_id, pci_subvendor_id; uint16_t pci_vendor_id; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); pci_rev_id = pci_get_revid(dev); if (sctx->isc_parse_devinfo != NULL) sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); ent = sctx->isc_vendor_info; while (ent->pvi_vendor_id != 0) { if (pci_vendor_id != ent->pvi_vendor_id) { ent++; continue; } if ((pci_device_id == ent->pvi_device_id) && ((pci_subvendor_id == ent->pvi_subvendor_id) || (ent->pvi_subvendor_id == 0)) && ((pci_subdevice_id == ent->pvi_subdevice_id) || (ent->pvi_subdevice_id == 0)) && ((pci_rev_id == ent->pvi_rev_id) || (ent->pvi_rev_id == 0))) { device_set_desc_copy(dev, ent->pvi_name); /* this needs to be changed to zero if the bus probing code * ever stops re-probing on best match because the sctx * may have its values over written by register calls * in subsequent probes */ return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } int iflib_device_probe_vendor(device_t dev) { int probe; probe = iflib_device_probe(dev); if (probe == BUS_PROBE_DEFAULT) return (BUS_PROBE_VENDOR); else return (probe); } static void iflib_reset_qvalues(if_ctx_t ctx) { if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; device_t dev = ctx->ifc_dev; int i; if (ctx->ifc_sysctl_ntxqs != 0) scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; if (ctx->ifc_sysctl_nrxqs != 0) scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; for (i = 0; i < sctx->isc_ntxqs; i++) { if (ctx->ifc_sysctl_ntxds[i] != 0) scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; else scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (ctx->ifc_sysctl_nrxds[i] != 0) scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; else scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; } if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; } if (!powerof2(scctx->isc_nrxd[i])) { device_printf(dev, "nrxd%d: %d is not a power of 2 - using default value of %d\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_default[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; } if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } if (!powerof2(scctx->isc_ntxd[i])) { device_printf(dev, "ntxd%d: %d is not a power of 2 - using default value of %d\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_default[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; } } } static void iflib_add_pfil(if_ctx_t ctx) { struct pfil_head *pfil; struct pfil_head_args pa; iflib_rxq_t rxq; int i; pa.pa_version = PFIL_VERSION; pa.pa_flags = PFIL_IN; pa.pa_type = PFIL_TYPE_ETHERNET; - pa.pa_headname = ctx->ifc_ifp->if_xname; + pa.pa_headname = if_name(ctx->ifc_ifp); pfil = pfil_head_register(&pa); for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { rxq->pfil = pfil; } } static void iflib_rem_pfil(if_ctx_t ctx) { struct pfil_head *pfil; iflib_rxq_t rxq; int i; rxq = ctx->ifc_rxqs; pfil = rxq->pfil; for (i = 0; i < NRXQSETS(ctx); i++, rxq++) { rxq->pfil = NULL; } pfil_head_unregister(pfil); } /* * Advance forward by n members of the cpuset ctx->ifc_cpus starting from * cpuid and wrapping as necessary. */ static unsigned int cpuid_advance(if_ctx_t ctx, unsigned int cpuid, unsigned int n) { unsigned int first_valid; unsigned int last_valid; /* cpuid should always be in the valid set */ MPASS(CPU_ISSET(cpuid, &ctx->ifc_cpus)); /* valid set should never be empty */ MPASS(!CPU_EMPTY(&ctx->ifc_cpus)); first_valid = CPU_FFS(&ctx->ifc_cpus) - 1; last_valid = CPU_FLS(&ctx->ifc_cpus) - 1; n = n % CPU_COUNT(&ctx->ifc_cpus); while (n > 0) { do { cpuid++; if (cpuid > last_valid) cpuid = first_valid; } while (!CPU_ISSET(cpuid, &ctx->ifc_cpus)); n--; } return (cpuid); } #if defined(SMP) && defined(SCHED_ULE) extern struct cpu_group *cpu_top; /* CPU topology */ static int find_child_with_core(int cpu, struct cpu_group *grp) { int i; if (grp->cg_children == 0) return -1; MPASS(grp->cg_child); for (i = 0; i < grp->cg_children; i++) { if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) return i; } return -1; } /* * Find an L2 neighbor of the given CPU or return -1 if none found. This * does not distinguish among multiple L2 neighbors if the given CPU has * more than one (it will always return the same result in that case). */ static int find_l2_neighbor(int cpu) { struct cpu_group *grp; int i; grp = cpu_top; if (grp == NULL) return -1; /* * Find the smallest CPU group that contains the given core. */ i = 0; while ((i = find_child_with_core(cpu, grp)) != -1) { /* * If the smallest group containing the given CPU has less * than two members, we conclude the given CPU has no * L2 neighbor. */ if (grp->cg_child[i].cg_count <= 1) return (-1); grp = &grp->cg_child[i]; } /* Must share L2. */ if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) return -1; /* * Select the first member of the set that isn't the reference * CPU, which at this point is guaranteed to exist. */ for (i = 0; i < CPU_SETSIZE; i++) { if (CPU_ISSET(i, &grp->cg_mask) && i != cpu) return (i); } /* Should never be reached */ return (-1); } #else static int find_l2_neighbor(int cpu) { return (-1); } #endif /* * CPU mapping behaviors * --------------------- * 'separate txrx' refers to the separate_txrx sysctl * 'use logical' refers to the use_logical_cores sysctl * 'INTR CPUS' indicates whether bus_get_cpus(INTR_CPUS) succeeded * * separate use INTR * txrx logical CPUS result * ---------- --------- ------ ------------------------------------------------ * - - X RX and TX queues mapped to consecutive physical * cores with RX/TX pairs on same core and excess * of either following * - X X RX and TX queues mapped to consecutive cores * of any type with RX/TX pairs on same core and * excess of either following * X - X RX and TX queues mapped to consecutive physical * cores; all RX then all TX * X X X RX queues mapped to consecutive physical cores * first, then TX queues mapped to L2 neighbor of * the corresponding RX queue if one exists, * otherwise to consecutive physical cores * - n/a - RX and TX queues mapped to consecutive cores of * any type with RX/TX pairs on same core and excess * of either following * X n/a - RX and TX queues mapped to consecutive cores of * any type; all RX then all TX */ static unsigned int get_cpuid_for_queue(if_ctx_t ctx, unsigned int base_cpuid, unsigned int qid, bool is_tx) { if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; unsigned int core_index; if (ctx->ifc_sysctl_separate_txrx) { /* * When using separate CPUs for TX and RX, the assignment * will always be of a consecutive CPU out of the set of * context CPUs, except for the specific case where the * context CPUs are phsyical cores, the use of logical cores * has been enabled, the assignment is for TX, the TX qid * corresponds to an RX qid, and the CPU assigned to the * corresponding RX queue has an L2 neighbor. */ if (ctx->ifc_sysctl_use_logical_cores && ctx->ifc_cpus_are_physical_cores && is_tx && qid < scctx->isc_nrxqsets) { int l2_neighbor; unsigned int rx_cpuid; rx_cpuid = cpuid_advance(ctx, base_cpuid, qid); l2_neighbor = find_l2_neighbor(rx_cpuid); if (l2_neighbor != -1) { return (l2_neighbor); } /* * ... else fall through to the normal * consecutive-after-RX assignment scheme. * * Note that we are assuming that all RX queue CPUs * have an L2 neighbor, or all do not. If a mixed * scenario is possible, we will have to keep track * separately of how many queues prior to this one * were not able to be assigned to an L2 neighbor. */ } if (is_tx) core_index = scctx->isc_nrxqsets + qid; else core_index = qid; } else { core_index = qid; } return (cpuid_advance(ctx, base_cpuid, core_index)); } static uint16_t get_ctx_core_offset(if_ctx_t ctx) { if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; struct cpu_offset *op; cpuset_t assigned_cpus; unsigned int cores_consumed; unsigned int base_cpuid = ctx->ifc_sysctl_core_offset; unsigned int first_valid; unsigned int last_valid; unsigned int i; first_valid = CPU_FFS(&ctx->ifc_cpus) - 1; last_valid = CPU_FLS(&ctx->ifc_cpus) - 1; if (base_cpuid != CORE_OFFSET_UNSPECIFIED) { /* * Align the user-chosen base CPU ID to the next valid CPU * for this device. If the chosen base CPU ID is smaller * than the first valid CPU or larger than the last valid * CPU, we assume the user does not know what the valid * range is for this device and is thinking in terms of a * zero-based reference frame, and so we shift the given * value into the valid range (and wrap accordingly) so the * intent is translated to the proper frame of reference. * If the base CPU ID is within the valid first/last, but * does not correspond to a valid CPU, it is advanced to the * next valid CPU (wrapping if necessary). */ if (base_cpuid < first_valid || base_cpuid > last_valid) { /* shift from zero-based to first_valid-based */ base_cpuid += first_valid; /* wrap to range [first_valid, last_valid] */ base_cpuid = (base_cpuid - first_valid) % (last_valid - first_valid + 1); } if (!CPU_ISSET(base_cpuid, &ctx->ifc_cpus)) { /* * base_cpuid is in [first_valid, last_valid], but * not a member of the valid set. In this case, * there will always be a member of the valid set * with a CPU ID that is greater than base_cpuid, * and we simply advance to it. */ while (!CPU_ISSET(base_cpuid, &ctx->ifc_cpus)) base_cpuid++; } return (base_cpuid); } /* * Determine how many cores will be consumed by performing the CPU * assignments and counting how many of the assigned CPUs correspond * to CPUs in the set of context CPUs. This is done using the CPU * ID first_valid as the base CPU ID, as the base CPU must be within * the set of context CPUs. * * Note not all assigned CPUs will be in the set of context CPUs * when separate CPUs are being allocated to TX and RX queues, * assignment to logical cores has been enabled, the set of context * CPUs contains only physical CPUs, and TX queues are mapped to L2 * neighbors of CPUs that RX queues have been mapped to - in this * case we do only want to count how many CPUs in the set of context * CPUs have been consumed, as that determines the next CPU in that * set to start allocating at for the next device for which * core_offset is not set. */ CPU_ZERO(&assigned_cpus); for (i = 0; i < scctx->isc_ntxqsets; i++) CPU_SET(get_cpuid_for_queue(ctx, first_valid, i, true), &assigned_cpus); for (i = 0; i < scctx->isc_nrxqsets; i++) CPU_SET(get_cpuid_for_queue(ctx, first_valid, i, false), &assigned_cpus); CPU_AND(&assigned_cpus, &assigned_cpus, &ctx->ifc_cpus); cores_consumed = CPU_COUNT(&assigned_cpus); mtx_lock(&cpu_offset_mtx); SLIST_FOREACH(op, &cpu_offsets, entries) { if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { base_cpuid = op->next_cpuid; op->next_cpuid = cpuid_advance(ctx, op->next_cpuid, cores_consumed); MPASS(op->refcount < UINT_MAX); op->refcount++; break; } } if (base_cpuid == CORE_OFFSET_UNSPECIFIED) { base_cpuid = first_valid; op = malloc(sizeof(struct cpu_offset), M_IFLIB, M_NOWAIT | M_ZERO); if (op == NULL) { device_printf(ctx->ifc_dev, "allocation for cpu offset failed.\n"); } else { op->next_cpuid = cpuid_advance(ctx, base_cpuid, cores_consumed); op->refcount = 1; CPU_COPY(&ctx->ifc_cpus, &op->set); SLIST_INSERT_HEAD(&cpu_offsets, op, entries); } } mtx_unlock(&cpu_offset_mtx); return (base_cpuid); } static void unref_ctx_core_offset(if_ctx_t ctx) { struct cpu_offset *op, *top; mtx_lock(&cpu_offset_mtx); SLIST_FOREACH_SAFE(op, &cpu_offsets, entries, top) { if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { MPASS(op->refcount > 0); op->refcount--; if (op->refcount == 0) { SLIST_REMOVE(&cpu_offsets, op, cpu_offset, entries); free(op, M_IFLIB); } break; } } mtx_unlock(&cpu_offset_mtx); } int iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) { if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; kobjop_desc_t kobj_desc; kobj_method_t *kobj_method; int err, msix, rid; int num_txd, num_rxd; ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); if (sc == NULL) { sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); device_set_softc(dev, ctx); ctx->ifc_flags |= IFC_SC_ALLOCATED; } ctx->ifc_sctx = sctx; ctx->ifc_dev = dev; ctx->ifc_softc = sc; if ((err = iflib_register(ctx)) != 0) { device_printf(dev, "iflib_register failed %d\n", err); goto fail_ctx_free; } iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; iflib_reset_qvalues(ctx); IFNET_WLOCK(); CTX_LOCK(ctx); if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); goto fail_unlock; } _iflib_pre_assert(scctx); ctx->ifc_txrx = *scctx->isc_txrx; MPASS(scctx->isc_dma_width <= flsll(BUS_SPACE_MAXADDR)); if (sctx->isc_flags & IFLIB_DRIVER_MEDIA) ctx->ifc_mediap = scctx->isc_media; #ifdef INVARIANTS if (scctx->isc_capabilities & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif if_setcapabilities(ifp, scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_MEXTPG); if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_MEXTPG); if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; num_txd = iflib_num_tx_descs(ctx); num_rxd = iflib_num_rx_descs(ctx); /* XXX change for per-queue sizes */ device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n", num_txd, num_rxd); if (scctx->isc_tx_nsegments > num_txd / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_nsegments = max(1, num_txd / MAX_SINGLE_PACKET_FRACTION); if (scctx->isc_tx_tso_segments_max > num_txd / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_tso_segments_max = max(1, num_txd / MAX_SINGLE_PACKET_FRACTION); /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ if (if_getcapabilities(ifp) & IFCAP_TSO) { /* * The stack can't handle a TSO size larger than IP_MAXPACKET, * but some MACs do. */ if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max, IP_MAXPACKET)); /* * Take maximum number of m_pullup(9)'s in iflib_parse_header() * into account. In the worst case, each of these calls will * add another mbuf and, thus, the requirement for another DMA * segment. So for best performance, it doesn't make sense to * advertize a maximum of TSO segments that typically will * require defragmentation in iflib_encap(). */ if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3); if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max); } if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, NULL, NULL, "admin"); /* Set up cpu set. If it fails, use the set of all CPUs. */ if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) { device_printf(dev, "Unable to fetch CPU list\n"); CPU_COPY(&all_cpus, &ctx->ifc_cpus); ctx->ifc_cpus_are_physical_cores = false; } else ctx->ifc_cpus_are_physical_cores = true; MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0); /* ** Now set up MSI or MSI-X, should return us the number of supported ** vectors (will be 1 for a legacy interrupt and MSI). */ if (sctx->isc_flags & IFLIB_SKIP_MSIX) { msix = scctx->isc_vectors; } else if (scctx->isc_msix_bar != 0) /* * The simple fact that isc_msix_bar is not 0 does not mean we * we have a good value there that is known to work. */ msix = iflib_msix_init(ctx); else { scctx->isc_vectors = 1; scctx->isc_ntxqsets = 1; scctx->isc_nrxqsets = 1; scctx->isc_intr = IFLIB_INTR_LEGACY; msix = 0; } /* Get memory for the station queues */ if ((err = iflib_queues_alloc(ctx))) { device_printf(dev, "Unable to allocate queue memory\n"); goto fail_intr_free; } if ((err = iflib_qset_structures_setup(ctx))) goto fail_queues; /* * Now that we know how many queues there are, get the core offset. */ ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx); if (msix > 1) { /* * When using MSI-X, ensure that ifdi_{r,t}x_queue_intr_enable * aren't the default NULL implementation. */ kobj_desc = &ifdi_rx_queue_intr_enable_desc; kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL, kobj_desc); if (kobj_method == &kobj_desc->deflt) { device_printf(dev, "MSI-X requires ifdi_rx_queue_intr_enable method"); err = EOPNOTSUPP; goto fail_queues; } kobj_desc = &ifdi_tx_queue_intr_enable_desc; kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL, kobj_desc); if (kobj_method == &kobj_desc->deflt) { device_printf(dev, "MSI-X requires ifdi_tx_queue_intr_enable method"); err = EOPNOTSUPP; goto fail_queues; } /* * Assign the MSI-X vectors. * Note that the default NULL ifdi_msix_intr_assign method will * fail here, too. */ err = IFDI_MSIX_INTR_ASSIGN(ctx, msix); if (err != 0) { device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err); goto fail_queues; } } else if (scctx->isc_intr != IFLIB_INTR_MSIX) { rid = 0; if (scctx->isc_intr == IFLIB_INTR_MSI) { MPASS(msix == 1); rid = 1; } if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { device_printf(dev, "iflib_legacy_setup failed %d\n", err); goto fail_queues; } } else { device_printf(dev, "Cannot use iflib with only 1 MSI-X interrupt!\n"); err = ENODEV; goto fail_queues; } ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } /* * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. * This must appear after the call to ether_ifattach() because * ether_ifattach() sets if_hdrlen to the default value. */ if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if ((err = iflib_netmap_attach(ctx))) { device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); goto fail_detach; } *ctxp = ctx; DEBUGNET_SET(ctx->ifc_ifp, iflib); if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); iflib_add_pfil(ctx); ctx->ifc_flags |= IFC_INIT_DONE; CTX_UNLOCK(ctx); IFNET_WUNLOCK(); return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_queues: iflib_tqg_detach(ctx); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); IFDI_DETACH(ctx); IFDI_QUEUES_FREE(ctx); fail_intr_free: iflib_free_intr_mem(ctx); fail_unlock: CTX_UNLOCK(ctx); IFNET_WUNLOCK(); iflib_deregister(ctx); fail_ctx_free: device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (err); } int iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, struct iflib_cloneattach_ctx *clctx) { int num_txd, num_rxd; int err; if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; int i; void *sc; ctx = malloc(sizeof(*ctx), M_IFLIB, M_WAITOK|M_ZERO); sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); ctx->ifc_flags |= IFC_SC_ALLOCATED; if (sctx->isc_flags & (IFLIB_PSEUDO|IFLIB_VIRTUAL)) ctx->ifc_flags |= IFC_PSEUDO; ctx->ifc_sctx = sctx; ctx->ifc_softc = sc; ctx->ifc_dev = dev; if ((err = iflib_register(ctx)) != 0) { device_printf(dev, "%s: iflib_register failed %d\n", __func__, err); goto fail_ctx_free; } iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; iflib_reset_qvalues(ctx); CTX_LOCK(ctx); if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); goto fail_unlock; } if (sctx->isc_flags & IFLIB_GEN_MAC) ether_gen_addr(ifp, &ctx->ifc_mac); if ((err = IFDI_CLONEATTACH(ctx, clctx->cc_ifc, clctx->cc_name, clctx->cc_params)) != 0) { device_printf(dev, "IFDI_CLONEATTACH failed %d\n", err); goto fail_unlock; } #ifdef INVARIANTS if (scctx->isc_capabilities & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif if_setcapabilities(ifp, scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_LINKSTATE); if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_LINKSTATE); - ifp->if_flags |= IFF_NOGROUP; + if_setflagbits(ifp, IFF_NOGROUP, 0); if (sctx->isc_flags & IFLIB_PSEUDO) { ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO); if (sctx->isc_flags & IFLIB_PSEUDO_ETHER) { ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); } else { if_attach(ctx->ifc_ifp); bpfattach(ctx->ifc_ifp, DLT_NULL, sizeof(u_int32_t)); } if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } *ctxp = ctx; /* * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. * This must appear after the call to ether_ifattach() because * ether_ifattach() sets if_hdrlen to the default value. */ if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); ctx->ifc_flags |= IFC_INIT_DONE; CTX_UNLOCK(ctx); return (0); } ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO); _iflib_pre_assert(scctx); ctx->ifc_txrx = *scctx->isc_txrx; if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; num_txd = iflib_num_tx_descs(ctx); num_rxd = iflib_num_rx_descs(ctx); /* XXX change for per-queue sizes */ device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n", num_txd, num_rxd); if (scctx->isc_tx_nsegments > num_txd / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_nsegments = max(1, num_txd / MAX_SINGLE_PACKET_FRACTION); if (scctx->isc_tx_tso_segments_max > num_txd / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_tso_segments_max = max(1, num_txd / MAX_SINGLE_PACKET_FRACTION); /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ if (if_getcapabilities(ifp) & IFCAP_TSO) { /* * The stack can't handle a TSO size larger than IP_MAXPACKET, * but some MACs do. */ if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max, IP_MAXPACKET)); /* * Take maximum number of m_pullup(9)'s in iflib_parse_header() * into account. In the worst case, each of these calls will * add another mbuf and, thus, the requirement for another DMA * segment. So for best performance, it doesn't make sense to * advertize a maximum of TSO segments that typically will * require defragmentation in iflib_encap(). */ if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3); if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max); } if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, NULL, NULL, "admin"); /* XXX --- can support > 1 -- but keep it simple for now */ scctx->isc_intr = IFLIB_INTR_LEGACY; /* Get memory for the station queues */ if ((err = iflib_queues_alloc(ctx))) { device_printf(dev, "Unable to allocate queue memory\n"); goto fail_iflib_detach; } if ((err = iflib_qset_structures_setup(ctx))) { device_printf(dev, "qset structure setup failed %d\n", err); goto fail_queues; } /* * XXX What if anything do we want to do about interrupts? */ ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } /* * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. * This must appear after the call to ether_ifattach() because * ether_ifattach() sets if_hdrlen to the default value. */ if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); /* XXX handle more than one queue */ for (i = 0; i < scctx->isc_nrxqsets; i++) IFDI_RX_CLSET(ctx, 0, i, ctx->ifc_rxqs[i].ifr_fl[0].ifl_sds.ifsd_cl); *ctxp = ctx; if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); ctx->ifc_flags |= IFC_INIT_DONE; CTX_UNLOCK(ctx); return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_queues: iflib_tqg_detach(ctx); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); fail_iflib_detach: IFDI_DETACH(ctx); IFDI_QUEUES_FREE(ctx); fail_unlock: CTX_UNLOCK(ctx); iflib_deregister(ctx); fail_ctx_free: free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (err); } int iflib_pseudo_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; if_shared_ctx_t sctx = ctx->ifc_sctx; /* Unregister VLAN event handlers early */ iflib_unregister_vlan_handlers(ctx); if ((sctx->isc_flags & IFLIB_PSEUDO) && (sctx->isc_flags & IFLIB_PSEUDO_ETHER) == 0) { bpfdetach(ifp); if_detach(ifp); } else { ether_ifdetach(ifp); } iflib_tqg_detach(ctx); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); IFDI_DETACH(ctx); IFDI_QUEUES_FREE(ctx); iflib_deregister(ctx); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (0); } int iflib_device_attach(device_t dev) { if_ctx_t ctx; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_enable_busmaster(dev); return (iflib_device_register(dev, NULL, sctx, &ctx)); } int iflib_device_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; device_t dev = ctx->ifc_dev; /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev, "Vlan in use, detach first\n"); return (EBUSY); } #ifdef PCI_IOV if (!CTX_IS_VF(ctx) && pci_iov_detach(dev) != 0) { device_printf(dev, "SR-IOV in use; detach first.\n"); return (EBUSY); } #endif STATE_LOCK(ctx); ctx->ifc_flags |= IFC_IN_DETACH; STATE_UNLOCK(ctx); /* Unregister VLAN handlers before calling iflib_stop() */ iflib_unregister_vlan_handlers(ctx); iflib_netmap_detach(ifp); ether_ifdetach(ifp); CTX_LOCK(ctx); iflib_stop(ctx); CTX_UNLOCK(ctx); iflib_rem_pfil(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); iflib_tqg_detach(ctx); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); CTX_LOCK(ctx); IFDI_DETACH(ctx); IFDI_QUEUES_FREE(ctx); CTX_UNLOCK(ctx); /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ iflib_free_intr_mem(ctx); bus_generic_detach(dev); iflib_deregister(ctx); device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); unref_ctx_core_offset(ctx); free(ctx, M_IFLIB); return (0); } static void iflib_tqg_detach(if_ctx_t ctx) { iflib_txq_t txq; iflib_rxq_t rxq; int i; struct taskqgroup *tqg; /* XXX drain any dependent tasks */ tqg = qgroup_if_io_tqg; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); #ifdef DEV_NETMAP callout_drain(&txq->ift_netmap_timer); #endif /* DEV_NETMAP */ if (txq->ift_task.gt_uniq != NULL) taskqgroup_detach(tqg, &txq->ift_task); } for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { if (rxq->ifr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &rxq->ifr_task); } tqg = qgroup_if_config_tqg; if (ctx->ifc_admin_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_admin_task); if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); } static void iflib_free_intr_mem(if_ctx_t ctx) { if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { iflib_irq_free(ctx, &ctx->ifc_legacy_irq); } if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(ctx->ifc_dev); } if (ctx->ifc_msix_mem != NULL) { bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, rman_get_rid(ctx->ifc_msix_mem), ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } } int iflib_device_detach(device_t dev) { if_ctx_t ctx = device_get_softc(dev); return (iflib_device_deregister(ctx)); } int iflib_device_suspend(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SUSPEND(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_shutdown(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SHUTDOWN(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_resume(device_t dev) { if_ctx_t ctx = device_get_softc(dev); iflib_txq_t txq = ctx->ifc_txqs; CTX_LOCK(ctx); IFDI_RESUME(ctx); iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); for (int i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); return (bus_generic_resume(dev)); } int iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_INIT(ctx, num_vfs, params); CTX_UNLOCK(ctx); return (error); } void iflib_device_iov_uninit(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_IOV_UNINIT(ctx); CTX_UNLOCK(ctx); } int iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_VF_ADD(ctx, vfnum, params); CTX_UNLOCK(ctx); return (error); } /********************************************************************* * * MODULE FUNCTION DEFINITIONS * **********************************************************************/ /* * - Start a fast taskqueue thread for each core * - Start a taskqueue for control operations */ static int iflib_module_init(void) { iflib_timer_default = hz / 2; return (0); } static int iflib_module_event_handler(module_t mod, int what, void *arg) { int err; switch (what) { case MOD_LOAD: if ((err = iflib_module_init()) != 0) return (err); break; case MOD_UNLOAD: return (EBUSY); default: return (EOPNOTSUPP); } return (0); } /********************************************************************* * * PUBLIC FUNCTION DEFINITIONS * ordered as in iflib.h * **********************************************************************/ static void _iflib_assert(if_shared_ctx_t sctx) { int i; MPASS(sctx->isc_tx_maxsize); MPASS(sctx->isc_tx_maxsegsize); MPASS(sctx->isc_rx_maxsize); MPASS(sctx->isc_rx_nsegments); MPASS(sctx->isc_rx_maxsegsize); MPASS(sctx->isc_nrxqs >= 1 && sctx->isc_nrxqs <= 8); for (i = 0; i < sctx->isc_nrxqs; i++) { MPASS(sctx->isc_nrxd_min[i]); MPASS(powerof2(sctx->isc_nrxd_min[i])); MPASS(sctx->isc_nrxd_max[i]); MPASS(powerof2(sctx->isc_nrxd_max[i])); MPASS(sctx->isc_nrxd_default[i]); MPASS(powerof2(sctx->isc_nrxd_default[i])); } MPASS(sctx->isc_ntxqs >= 1 && sctx->isc_ntxqs <= 8); for (i = 0; i < sctx->isc_ntxqs; i++) { MPASS(sctx->isc_ntxd_min[i]); MPASS(powerof2(sctx->isc_ntxd_min[i])); MPASS(sctx->isc_ntxd_max[i]); MPASS(powerof2(sctx->isc_ntxd_max[i])); MPASS(sctx->isc_ntxd_default[i]); MPASS(powerof2(sctx->isc_ntxd_default[i])); } } static void _iflib_pre_assert(if_softc_ctx_t scctx) { MPASS(scctx->isc_txrx->ift_txd_encap); MPASS(scctx->isc_txrx->ift_txd_flush); MPASS(scctx->isc_txrx->ift_txd_credits_update); MPASS(scctx->isc_txrx->ift_rxd_available); MPASS(scctx->isc_txrx->ift_rxd_pkt_get); MPASS(scctx->isc_txrx->ift_rxd_refill); MPASS(scctx->isc_txrx->ift_rxd_flush); } static int iflib_register(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; driver_t *driver = sctx->isc_driver; device_t dev = ctx->ifc_dev; if_t ifp; u_char type; int iflags; if ((sctx->isc_flags & IFLIB_PSEUDO) == 0) _iflib_assert(sctx); CTX_LOCK_INIT(ctx); STATE_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); if (sctx->isc_flags & IFLIB_PSEUDO) { if (sctx->isc_flags & IFLIB_PSEUDO_ETHER) type = IFT_ETHER; else type = IFT_PPP; } else type = IFT_ETHER; ifp = ctx->ifc_ifp = if_alloc(type); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (ENOMEM); } /* * Initialize our context's device specific methods */ kobj_init((kobj_t) ctx, (kobj_class_t) driver); kobj_class_compile((kobj_class_t) driver); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setsoftc(ifp, ctx); if_setdev(ifp, dev); if_setinitfn(ifp, iflib_if_init); if_setioctlfn(ifp, iflib_if_ioctl); #ifdef ALTQ if_setstartfn(ifp, iflib_altq_if_start); if_settransmitfn(ifp, iflib_altq_if_transmit); if_setsendqready(ifp); #else if_settransmitfn(ifp, iflib_if_transmit); #endif if_setqflushfn(ifp, iflib_if_qflush); iflags = IFF_MULTICAST | IFF_KNOWSEPOCH; if ((sctx->isc_flags & IFLIB_PSEUDO) && (sctx->isc_flags & IFLIB_PSEUDO_ETHER) == 0) iflags |= IFF_POINTOPOINT; else iflags |= IFF_BROADCAST | IFF_SIMPLEX; if_setflags(ifp, iflags); ctx->ifc_vlan_attach_event = EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, EVENTHANDLER_PRI_FIRST); ctx->ifc_vlan_detach_event = EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, EVENTHANDLER_PRI_FIRST); if ((sctx->isc_flags & IFLIB_DRIVER_MEDIA) == 0) { ctx->ifc_mediap = &ctx->ifc_media; ifmedia_init(ctx->ifc_mediap, IFM_IMASK, iflib_media_change, iflib_media_status); } return (0); } static void iflib_unregister_vlan_handlers(if_ctx_t ctx) { /* Unregister VLAN events */ if (ctx->ifc_vlan_attach_event != NULL) { EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); ctx->ifc_vlan_attach_event = NULL; } if (ctx->ifc_vlan_detach_event != NULL) { EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); ctx->ifc_vlan_detach_event = NULL; } } static void iflib_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; /* Remove all media */ ifmedia_removeall(&ctx->ifc_media); /* Ensure that VLAN event handlers are unregistered */ iflib_unregister_vlan_handlers(ctx); /* Release kobject reference */ kobj_delete((kobj_t) ctx, NULL); /* Free the ifnet structure */ if_free(ifp); STATE_LOCK_DESTROY(ctx); /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ CTX_LOCK_DESTROY(ctx); } static int iflib_queues_alloc(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int nrxqsets = scctx->isc_nrxqsets; int ntxqsets = scctx->isc_ntxqsets; iflib_txq_t txq; iflib_rxq_t rxq; iflib_fl_t fl = NULL; int i, j, cpu, err, txconf, rxconf; iflib_dma_info_t ifdip; uint32_t *rxqsizes = scctx->isc_rxqsizes; uint32_t *txqsizes = scctx->isc_txqsizes; uint8_t nrxqs = sctx->isc_nrxqs; uint8_t ntxqs = sctx->isc_ntxqs; int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; int fl_offset = (sctx->isc_flags & IFLIB_HAS_RXCQ ? 1 : 0); caddr_t *vaddrs; uint64_t *paddrs; KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); KASSERT(nrxqs >= fl_offset + nfree_lists, ("there must be at least a rxq for each free list")); /* Allocate the TX ring struct memory */ if (!(ctx->ifc_txqs = (iflib_txq_t) malloc(sizeof(struct iflib_txq) * ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); err = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(ctx->ifc_rxqs = (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); err = ENOMEM; goto rx_fail; } txq = ctx->ifc_txqs; rxq = ctx->ifc_rxqs; /* * XXX handle allocation failure */ for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { device_printf(dev, "Unable to allocate TX DMA info memory\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_ifdi = ifdip; for (j = 0; j < ntxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, 0)) { device_printf(dev, "Unable to allocate TX descriptors\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_txd_size[j] = scctx->isc_txd_size[j]; bzero((void *)ifdip->idi_vaddr, txqsizes[j]); } txq->ift_ctx = ctx; txq->ift_id = i; if (sctx->isc_flags & IFLIB_HAS_TXCQ) { txq->ift_br_offset = 1; } else { txq->ift_br_offset = 0; } if (iflib_txsd_alloc(txq)) { device_printf(dev, "Critical Failure setting up TX buffers\n"); err = ENOMEM; goto err_tx_desc; } /* Initialize the TX lock */ snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:TX(%d):callout", device_get_nameunit(dev), txq->ift_id); mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); txq->ift_timer.c_cpu = cpu; #ifdef DEV_NETMAP callout_init_mtx(&txq->ift_netmap_timer, &txq->ift_mtx, 0); txq->ift_netmap_timer.c_cpu = cpu; #endif /* DEV_NETMAP */ err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, iflib_txq_can_drain, M_IFLIB, M_WAITOK); if (err) { /* XXX free any allocated rings */ device_printf(dev, "Unable to allocate buf_ring\n"); goto err_tx_desc; } } for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { /* Set up some basics */ callout_init(&rxq->ifr_watchdog, 1); if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { device_printf(dev, "Unable to allocate RX DMA info memory\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_ifdi = ifdip; /* XXX this needs to be changed if #rx queues != #tx queues */ rxq->ifr_ntxqirq = 1; rxq->ifr_txqid[0] = i; for (j = 0; j < nrxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, 0)) { device_printf(dev, "Unable to allocate RX descriptors\n"); err = ENOMEM; goto err_tx_desc; } bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); } rxq->ifr_ctx = ctx; rxq->ifr_id = i; rxq->ifr_fl_offset = fl_offset; rxq->ifr_nfl = nfree_lists; if (!(fl = (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate free list memory\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_fl = fl; for (j = 0; j < nfree_lists; j++) { fl[j].ifl_rxq = rxq; fl[j].ifl_id = j; fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; fl[j].ifl_rxd_size = scctx->isc_rxd_size[j]; } /* Allocate receive buffers for the ring */ if (iflib_rxsd_alloc(rxq)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); err = ENOMEM; goto err_rx_desc; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK); } /* TXQs */ vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); for (i = 0; i < ntxqsets; i++) { iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; for (j = 0; j < ntxqs; j++, di++) { vaddrs[i*ntxqs + j] = di->idi_vaddr; paddrs[i*ntxqs + j] = di->idi_paddr; } } if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { device_printf(ctx->ifc_dev, "Unable to allocate device TX queue\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); /* RXQs */ vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); for (i = 0; i < nrxqsets; i++) { iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; for (j = 0; j < nrxqs; j++, di++) { vaddrs[i*nrxqs + j] = di->idi_vaddr; paddrs[i*nrxqs + j] = di->idi_paddr; } } if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { device_printf(ctx->ifc_dev, "Unable to allocate device RX queue\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); return (0); /* XXX handle allocation failure changes */ err_rx_desc: err_tx_desc: rx_fail: if (ctx->ifc_rxqs != NULL) free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; if (ctx->ifc_txqs != NULL) free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; fail: return (err); } static int iflib_tx_structures_setup(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i; for (i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_setup(txq); return (0); } static void iflib_tx_structures_free(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; if_shared_ctx_t sctx = ctx->ifc_sctx; int i, j; for (i = 0; i < NTXQSETS(ctx); i++, txq++) { for (j = 0; j < sctx->isc_ntxqs; j++) iflib_dma_free(&txq->ift_ifdi[j]); iflib_txq_destroy(txq); } free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int iflib_rx_structures_setup(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; int q; #if defined(INET6) || defined(INET) int err, i; #endif for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { #if defined(INET6) || defined(INET) err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, TCP_LRO_ENTRIES, min(1024, ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset])); if (err != 0) { device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); goto fail; } #endif IFDI_RXQ_SETUP(ctx, rxq->ifr_id); } return (0); #if defined(INET6) || defined(INET) fail: /* * Free LRO resources allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ rxq = ctx->ifc_rxqs; for (i = 0; i < q; ++i, rxq++) { tcp_lro_free(&rxq->ifr_lc); } return (err); #endif } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void iflib_rx_structures_free(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; if_shared_ctx_t sctx = ctx->ifc_sctx; int i, j; for (i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { for (j = 0; j < sctx->isc_nrxqs; j++) iflib_dma_free(&rxq->ifr_ifdi[j]); iflib_rx_sds_free(rxq); #if defined(INET6) || defined(INET) tcp_lro_free(&rxq->ifr_lc); #endif } free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; } static int iflib_qset_structures_setup(if_ctx_t ctx) { int err; /* * It is expected that the caller takes care of freeing queues if this * fails. */ if ((err = iflib_tx_structures_setup(ctx)) != 0) { device_printf(ctx->ifc_dev, "iflib_tx_structures_setup failed: %d\n", err); return (err); } if ((err = iflib_rx_structures_setup(ctx)) != 0) device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); return (err); } int iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, const char *name) { return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); } /* Just to avoid copy/paste */ static inline int iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, int qid, struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, const char *name) { device_t dev; unsigned int base_cpuid, cpuid; int err; dev = ctx->ifc_dev; base_cpuid = ctx->ifc_sysctl_core_offset; cpuid = get_cpuid_for_queue(ctx, base_cpuid, qid, type == IFLIB_INTR_TX); err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, dev, irq ? irq->ii_res : NULL, name); if (err) { device_printf(dev, "taskqgroup_attach_cpu failed %d\n", err); return (err); } #ifdef notyet if (cpuid > ctx->ifc_cpuid_highest) ctx->ifc_cpuid_highest = cpuid; #endif return (0); } int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, const char *name) { device_t dev; struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; gtask_fn_t *fn; int tqrid, err; driver_filter_t *intr_fast; void *q; info = &ctx->ifc_filter_info; tqrid = rid; switch (type) { /* XXX merge tx/rx for netmap? */ case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; info = &ctx->ifc_txqs[qid].ift_filter_info; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); ctx->ifc_flags |= IFC_NETMAP_TX_IRQ; break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr; NET_GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RXTX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr_rxtx; NET_GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_ADMIN: q = ctx; tqrid = -1; info = &ctx->ifc_filter_info; gtask = &ctx->ifc_admin_task; tqg = qgroup_if_config_tqg; fn = _task_fn_admin; intr_fast = iflib_fast_intr_ctx; break; default: device_printf(ctx->ifc_dev, "%s: unknown net intr type\n", __func__); return (EINVAL); } info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = q; dev = ctx->ifc_dev; err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); if (err != 0) { device_printf(dev, "_iflib_irq_alloc failed %d\n", err); return (err); } if (type == IFLIB_INTR_ADMIN) return (0); if (tqrid != -1) { err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, q, name); if (err) return (err); } else { taskqgroup_attach(tqg, gtask, q, dev, irq->ii_res, name); } return (0); } void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, const char *name) { device_t dev; struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; void *q; int err; switch (type) { case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; NET_GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_IOV: q = ctx; gtask = &ctx->ifc_vflr_task; tqg = qgroup_if_config_tqg; fn = _task_fn_iov; GROUPTASK_INIT(gtask, 0, fn, q); break; default: panic("unknown net intr type"); } err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, q, name); if (err) { dev = ctx->ifc_dev; taskqgroup_attach(tqg, gtask, q, dev, irq ? irq->ii_res : NULL, name); } } void iflib_irq_free(if_ctx_t ctx, if_irq_t irq) { if (irq->ii_tag) bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); if (irq->ii_res) bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, rman_get_rid(irq->ii_res), irq->ii_res); } static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, const char *name) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_irq_t irq = &ctx->ifc_legacy_irq; iflib_filter_info_t info; device_t dev; struct grouptask *gtask; struct resource *res; struct taskqgroup *tqg; void *q; int err, tqrid; bool rx_only; q = &ctx->ifc_rxqs[0]; info = &rxq[0].ifr_filter_info; gtask = &rxq[0].ifr_task; tqg = qgroup_if_io_tqg; tqrid = *rid; rx_only = (ctx->ifc_sctx->isc_flags & IFLIB_SINGLE_IRQ_RX_ONLY) != 0; ctx->ifc_flags |= IFC_LEGACY; info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = rx_only ? ctx : q; dev = ctx->ifc_dev; /* We allocate a single interrupt resource */ err = _iflib_irq_alloc(ctx, irq, tqrid, rx_only ? iflib_fast_intr_ctx : iflib_fast_intr_rxtx, NULL, info, name); if (err != 0) return (err); NET_GROUPTASK_INIT(gtask, 0, _task_fn_rx, q); res = irq->ii_res; taskqgroup_attach(tqg, gtask, q, dev, res, name); GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, dev, res, "tx"); return (0); } void iflib_led_create(if_ctx_t ctx) { ctx->ifc_led_dev = led_create(iflib_led_func, ctx, device_get_nameunit(ctx->ifc_dev)); } void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) { GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) { GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); } void iflib_admin_intr_deferred(if_ctx_t ctx) { MPASS(ctx->ifc_admin_task.gt_taskqueue != NULL); GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); } void iflib_iov_intr_deferred(if_ctx_t ctx) { GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); } void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, const char *name) { taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, NULL, NULL, name); } void iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, const char *name) { GROUPTASK_INIT(gtask, 0, fn, ctx); taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, NULL, NULL, name); } void iflib_config_gtask_deinit(struct grouptask *gtask) { taskqgroup_detach(qgroup_if_config_tqg, gtask); } void iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq = ctx->ifc_txqs; if_setbaudrate(ifp, baudrate); if (baudrate >= IF_Gbps(10)) { STATE_LOCK(ctx); ctx->ifc_flags |= IFC_PREFETCH; STATE_UNLOCK(ctx); } /* If link down, disable watchdog */ if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) txq->ift_qstatus = IFLIB_QUEUE_IDLE; } ctx->ifc_link_state = link_state; if_link_state_change(ifp, link_state); } static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) { int credits; #ifdef INVARIANTS int credits_pre = txq->ift_cidx_processed; #endif bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) return (0); txq->ift_processed += credits; txq->ift_cidx_processed += credits; MPASS(credits_pre + credits == txq->ift_cidx_processed); if (txq->ift_cidx_processed >= txq->ift_size) txq->ift_cidx_processed -= txq->ift_size; return (credits); } static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) { iflib_fl_t fl; u_int i; for (i = 0, fl = &rxq->ifr_fl[0]; i < rxq->ifr_nfl; i++, fl++) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, budget)); } void iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, const char *description, if_int_delay_info_t info, int offset, int value) { info->iidi_ctx = ctx; info->iidi_offset = offset; info->iidi_value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, info, 0, iflib_sysctl_int_delay, "I", description); } struct sx * iflib_ctx_lock_get(if_ctx_t ctx) { return (&ctx->ifc_ctx_sx); } static int iflib_msix_init(if_ctx_t ctx) { device_t dev = ctx->ifc_dev; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int admincnt, bar, err, iflib_num_rx_queues, iflib_num_tx_queues; int msgs, queuemsgs, queues, rx_queues, tx_queues, vectors; iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; if (bootverbose) device_printf(dev, "msix_init qsets capped at %d\n", imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); /* Override by tuneable */ if (scctx->isc_disable_msix) goto msi; /* First try MSI-X */ if ((msgs = pci_msix_count(dev)) == 0) { if (bootverbose) device_printf(dev, "MSI-X not supported or disabled\n"); goto msi; } bar = ctx->ifc_softc_ctx.isc_msix_bar; /* * bar == -1 => "trust me I know what I'm doing" * Some drivers are for hardware that is so shoddily * documented that no one knows which bars are which * so the developer has to map all bars. This hack * allows shoddy garbage to use MSI-X in this framework. */ if (bar != -1) { ctx->ifc_msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (ctx->ifc_msix_mem == NULL) { device_printf(dev, "Unable to map MSI-X table\n"); goto msi; } } admincnt = sctx->isc_admin_intrcnt; #if IFLIB_DEBUG /* use only 1 qset in debug mode */ queuemsgs = min(msgs - admincnt, 1); #else queuemsgs = msgs - admincnt; #endif #ifdef RSS queues = imin(queuemsgs, rss_getnumbuckets()); #else queues = queuemsgs; #endif queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); if (bootverbose) device_printf(dev, "intr CPUs: %d queue msgs: %d admincnt: %d\n", CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) rx_queues = iflib_num_rx_queues; else rx_queues = queues; if (rx_queues > scctx->isc_nrxqsets) rx_queues = scctx->isc_nrxqsets; /* * We want this to be all logical CPUs by default */ if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) tx_queues = iflib_num_tx_queues; else tx_queues = mp_ncpus; if (tx_queues > scctx->isc_ntxqsets) tx_queues = scctx->isc_ntxqsets; if (ctx->ifc_sysctl_qs_eq_override == 0) { #ifdef INVARIANTS if (tx_queues != rx_queues) device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", min(rx_queues, tx_queues), min(rx_queues, tx_queues)); #endif tx_queues = min(rx_queues, tx_queues); rx_queues = min(rx_queues, tx_queues); } vectors = rx_queues + admincnt; if (msgs < vectors) { device_printf(dev, "insufficient number of MSI-X vectors " "(supported %d, need %d)\n", msgs, vectors); goto msi; } device_printf(dev, "Using %d RX queues %d TX queues\n", rx_queues, tx_queues); msgs = vectors; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { if (vectors != msgs) { device_printf(dev, "Unable to allocate sufficient MSI-X vectors " "(got %d, need %d)\n", vectors, msgs); pci_release_msi(dev); if (bar != -1) { bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } goto msi; } device_printf(dev, "Using MSI-X interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; return (vectors); } else { device_printf(dev, "failed to allocate %d MSI-X vectors, err: %d\n", vectors, err); if (bar != -1) { bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } } msi: vectors = pci_msi_count(dev); scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets = 1; scctx->isc_vectors = vectors; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { device_printf(dev,"Using an MSI interrupt\n"); scctx->isc_intr = IFLIB_INTR_MSI; } else { scctx->isc_vectors = 1; device_printf(dev,"Using a Legacy interrupt\n"); scctx->isc_intr = IFLIB_INTR_LEGACY; } return (vectors); } static const char *ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; static int mp_ring_state_handler(SYSCTL_HANDLER_ARGS) { int rc; uint16_t *state = ((uint16_t *)oidp->oid_arg1); struct sbuf *sb; const char *ring_state = "UNKNOWN"; /* XXX needed ? */ rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); MPASS(sb != NULL); if (sb == NULL) return (ENOMEM); if (state[3] <= 3) ring_state = ring_states[state[3]]; sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", state[0], state[1], state[2], ring_state); rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } enum iflib_ndesc_handler { IFLIB_NTXD_HANDLER, IFLIB_NRXD_HANDLER, }; static int mp_ndesc_handler(SYSCTL_HANDLER_ARGS) { if_ctx_t ctx = (void *)arg1; enum iflib_ndesc_handler type = arg2; char buf[256] = {0}; qidx_t *ndesc; char *p, *next; int nqs, rc, i; nqs = 8; switch(type) { case IFLIB_NTXD_HANDLER: ndesc = ctx->ifc_sysctl_ntxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_ntxqs; break; case IFLIB_NRXD_HANDLER: ndesc = ctx->ifc_sysctl_nrxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_nrxqs; break; default: printf("%s: unhandled type\n", __func__); return (EINVAL); } if (nqs == 0) nqs = 8; for (i=0; i<8; i++) { if (i >= nqs) break; if (i) strcat(buf, ","); sprintf(strchr(buf, 0), "%d", ndesc[i]); } rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (rc || req->newptr == NULL) return rc; for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; i++, p = strsep(&next, " ,")) { ndesc[i] = strtoul(p, NULL, 10); } return(rc); } #define NAME_BUFLEN 32 static void iflib_add_device_sysctl_pre(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child, *oid_list; struct sysctl_ctx_list *ctx_list; struct sysctl_oid *node; ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, "driver version"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, "# of rxqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, "disable MSI-X (default 0)"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, "set the RX budget"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate", CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0, "cause TX to abdicate instead of running to completion"); ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED; SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset", CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0, "offset to start using cores at"); SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx", CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0, "use separate cores for TX and RX"); SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "use_logical_cores", CTLFLAG_RDTUN, &ctx->ifc_sysctl_use_logical_cores, 0, "try to make use of logical cores for TX and RX"); /* XXX change for per-queue sizes */ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", "list of # of TX descriptors to use, 0 = use default #"); SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of RX descriptors to use, 0 = use default #"); } static void iflib_add_device_sysctl_post(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx_list; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j; char namebuf[NAME_BUFLEN]; char *qfmt; struct sysctl_oid *queue_node, *fl_node, *node; struct sysctl_oid_list *queue_list, *fl_list; ctx_list = device_get_sysctl_ctx(dev); node = ctx->ifc_sysctl_node; child = SYSCTL_CHILDREN(node); if (scctx->isc_ntxqsets > 100) qfmt = "txq%03d"; else if (scctx->isc_ntxqsets > 10) qfmt = "txq%02d"; else qfmt = "txq%d"; for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_INT(ctx_list, queue_list, OID_AUTO, "cpu", CTLFLAG_RD, &txq->ift_task.gt_cpu, 0, "cpu this queue is bound to"); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", CTLFLAG_RD, &txq->ift_dequeued, "total mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", CTLFLAG_RD, &txq->ift_enqueued, "total mbufs enqueued"); #endif SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", CTLFLAG_RD, &txq->ift_mbuf_defrag, "# of times m_defrag was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", CTLFLAG_RD, &txq->ift_pullups, "# of times m_pullup was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txq->ift_no_desc_avail, "# of times no descriptors were available"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", CTLFLAG_RD, &txq->ift_map_failed, "# of times DMA map failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", CTLFLAG_RD, &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", CTLFLAG_RD, &txq->ift_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", CTLFLAG_RD, &txq->ift_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", CTLFLAG_RD, &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", CTLFLAG_RD, &txq->ift_in_use, 1, "descriptors in use"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", CTLFLAG_RD, &txq->ift_processed, "descriptors procesed for clean"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", CTLFLAG_RD, &txq->ift_cleaned, "total cleaned"); SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, __DEVOLATILE(uint64_t *, &txq->ift_br->state), 0, mp_ring_state_handler, "A", "soft ring state"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->ift_br->enqueues, "# of enqueues to the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->ift_br->drops, "# of drops in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->ift_br->starts, "# of normal consumer starts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->ift_br->stalls, "# of consumer stalls in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->ift_br->restarts, "# of consumer restarts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->ift_br->abdications, "# of consumer abdications in the mp_ring for this queue"); } if (scctx->isc_nrxqsets > 100) qfmt = "rxq%03d"; else if (scctx->isc_nrxqsets > 10) qfmt = "rxq%02d"; else qfmt = "rxq%d"; for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_INT(ctx_list, queue_list, OID_AUTO, "cpu", CTLFLAG_RD, &rxq->ifr_task.gt_cpu, 0, "cpu this queue is bound to"); if (sctx->isc_flags & IFLIB_HAS_RXCQ) { SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", CTLFLAG_RD, &rxq->ifr_cq_cidx, 1, "Consumer Index"); } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist Name"); fl_list = SYSCTL_CHILDREN(fl_node); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", CTLFLAG_RD, &fl->ifl_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", CTLFLAG_RD, &fl->ifl_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", CTLFLAG_RD, &fl->ifl_credits, 1, "credits available"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "buf_size", CTLFLAG_RD, &fl->ifl_buf_size, 1, "buffer size"); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", CTLFLAG_RD, &fl->ifl_m_enqueued, "mbufs allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", CTLFLAG_RD, &fl->ifl_m_dequeued, "mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", CTLFLAG_RD, &fl->ifl_cl_enqueued, "clusters allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", CTLFLAG_RD, &fl->ifl_cl_dequeued, "clusters freed"); #endif } } } void iflib_request_reset(if_ctx_t ctx) { STATE_LOCK(ctx); ctx->ifc_flags |= IFC_DO_RESET; STATE_UNLOCK(ctx); } #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m) { struct mbuf *n; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; n = m; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return (NULL); } bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; } return (n); } #endif #ifdef DEBUGNET static void iflib_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize) { if_ctx_t ctx; ctx = if_getsoftc(ifp); CTX_LOCK(ctx); *nrxr = NRXQSETS(ctx); *ncl = ctx->ifc_rxqs[0].ifr_fl->ifl_size; *clsize = ctx->ifc_rxqs[0].ifr_fl->ifl_buf_size; CTX_UNLOCK(ctx); } static void iflib_debugnet_event(if_t ifp, enum debugnet_ev event) { if_ctx_t ctx; if_softc_ctx_t scctx; iflib_fl_t fl; iflib_rxq_t rxq; int i, j; ctx = if_getsoftc(ifp); scctx = &ctx->ifc_softc_ctx; switch (event) { case DEBUGNET_START: for (i = 0; i < scctx->isc_nrxqsets; i++) { rxq = &ctx->ifc_rxqs[i]; for (j = 0; j < rxq->ifr_nfl; j++) { fl = rxq->ifr_fl; fl->ifl_zone = m_getzone(fl->ifl_buf_size); } } iflib_no_tx_batch = 1; break; default: break; } } static int iflib_debugnet_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx; iflib_txq_t txq; int error; ctx = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); txq = &ctx->ifc_txqs[0]; error = iflib_encap(txq, &m); if (error == 0) (void)iflib_txd_db_check(txq, true); return (error); } static int iflib_debugnet_poll(if_t ifp, int count) { struct epoch_tracker et; if_ctx_t ctx; if_softc_ctx_t scctx; iflib_txq_t txq; int i; ctx = if_getsoftc(ifp); scctx = &ctx->ifc_softc_ctx; if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); txq = &ctx->ifc_txqs[0]; (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); NET_EPOCH_ENTER(et); for (i = 0; i < scctx->isc_nrxqsets; i++) (void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */); NET_EPOCH_EXIT(et); return (0); } #endif /* DEBUGNET */