Index: user/markj/netdump/sys/dev/alc/if_alc.c =================================================================== --- user/markj/netdump/sys/dev/alc/if_alc.c (revision 332406) +++ user/markj/netdump/sys/dev/alc/if_alc.c (revision 332407) @@ -1,4707 +1,4708 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009, Pyun YongHyeon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* Driver for Atheros AR813x/AR815x PCIe Ethernet. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" #undef ALC_USE_CUSTOM_CSUM #ifdef ALC_USE_CUSTOM_CSUM #define ALC_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) #else #define ALC_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP) #endif MODULE_DEPEND(alc, pci, 1, 1, 1); MODULE_DEPEND(alc, ether, 1, 1, 1); MODULE_DEPEND(alc, miibus, 1, 1, 1); /* Tunables. */ static int msi_disable = 0; static int msix_disable = 0; TUNABLE_INT("hw.alc.msi_disable", &msi_disable); TUNABLE_INT("hw.alc.msix_disable", &msix_disable); /* * Devices supported by this driver. */ static struct alc_ident alc_ident_table[] = { { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8131, 9 * 1024, "Atheros AR8131 PCIe Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8132, 9 * 1024, "Atheros AR8132 PCIe Fast Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8151, 6 * 1024, "Atheros AR8151 v1.0 PCIe Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8151_V2, 6 * 1024, "Atheros AR8151 v2.0 PCIe Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8152_B, 6 * 1024, "Atheros AR8152 v1.1 PCIe Fast Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8152_B2, 6 * 1024, "Atheros AR8152 v2.0 PCIe Fast Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8161, 9 * 1024, "Atheros AR8161 PCIe Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8162, 9 * 1024, "Atheros AR8162 PCIe Fast Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8171, 9 * 1024, "Atheros AR8171 PCIe Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8172, 9 * 1024, "Atheros AR8172 PCIe Fast Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_E2200, 9 * 1024, "Killer E2200 Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_E2400, 9 * 1024, "Killer E2400 Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_E2500, 9 * 1024, "Killer E2500 Gigabit Ethernet" }, { 0, 0, 0, NULL} }; static void alc_aspm(struct alc_softc *, int, int); static void alc_aspm_813x(struct alc_softc *, int); static void alc_aspm_816x(struct alc_softc *, int); static int alc_attach(device_t); static int alc_check_boundary(struct alc_softc *); static void alc_config_msi(struct alc_softc *); static int alc_detach(device_t); static void alc_disable_l0s_l1(struct alc_softc *); static int alc_dma_alloc(struct alc_softc *); static void alc_dma_free(struct alc_softc *); static void alc_dmamap_cb(void *, bus_dma_segment_t *, int, int); static void alc_dsp_fixup(struct alc_softc *, int); static int alc_encap(struct alc_softc *, struct mbuf **); static struct alc_ident * alc_find_ident(device_t); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * alc_fixup_rx(struct ifnet *, struct mbuf *); #endif static void alc_get_macaddr(struct alc_softc *); static void alc_get_macaddr_813x(struct alc_softc *); static void alc_get_macaddr_816x(struct alc_softc *); static void alc_get_macaddr_par(struct alc_softc *); static void alc_init(void *); static void alc_init_cmb(struct alc_softc *); static void alc_init_locked(struct alc_softc *); static void alc_init_rr_ring(struct alc_softc *); static int alc_init_rx_ring(struct alc_softc *); static void alc_init_smb(struct alc_softc *); static void alc_init_tx_ring(struct alc_softc *); static void alc_int_task(void *, int); static int alc_intr(void *); static int alc_ioctl(struct ifnet *, u_long, caddr_t); static void alc_mac_config(struct alc_softc *); static uint32_t alc_mii_readreg_813x(struct alc_softc *, int, int); static uint32_t alc_mii_readreg_816x(struct alc_softc *, int, int); static uint32_t alc_mii_writereg_813x(struct alc_softc *, int, int, int); static uint32_t alc_mii_writereg_816x(struct alc_softc *, int, int, int); static int alc_miibus_readreg(device_t, int, int); static void alc_miibus_statchg(device_t); static int alc_miibus_writereg(device_t, int, int, int); static uint32_t alc_miidbg_readreg(struct alc_softc *, int); static uint32_t alc_miidbg_writereg(struct alc_softc *, int, int); static uint32_t alc_miiext_readreg(struct alc_softc *, int, int); static uint32_t alc_miiext_writereg(struct alc_softc *, int, int, int); static int alc_mediachange(struct ifnet *); static int alc_mediachange_locked(struct alc_softc *); static void alc_mediastatus(struct ifnet *, struct ifmediareq *); static int alc_newbuf(struct alc_softc *, struct alc_rxdesc *); static void alc_osc_reset(struct alc_softc *); static void alc_phy_down(struct alc_softc *); static void alc_phy_reset(struct alc_softc *); static void alc_phy_reset_813x(struct alc_softc *); static void alc_phy_reset_816x(struct alc_softc *); static int alc_probe(device_t); static void alc_reset(struct alc_softc *); static int alc_resume(device_t); static void alc_rxeof(struct alc_softc *, struct rx_rdesc *); static int alc_rxintr(struct alc_softc *, int); static void alc_rxfilter(struct alc_softc *); static void alc_rxvlan(struct alc_softc *); static void alc_setlinkspeed(struct alc_softc *); static void alc_setwol(struct alc_softc *); static void alc_setwol_813x(struct alc_softc *); static void alc_setwol_816x(struct alc_softc *); static int alc_shutdown(device_t); static void alc_start(struct ifnet *); static void alc_start_locked(struct ifnet *); static void alc_start_queue(struct alc_softc *); static void alc_start_tx(struct alc_softc *); static void alc_stats_clear(struct alc_softc *); static void alc_stats_update(struct alc_softc *); static void alc_stop(struct alc_softc *); static void alc_stop_mac(struct alc_softc *); static void alc_stop_queue(struct alc_softc *); static int alc_suspend(device_t); static void alc_sysctl_node(struct alc_softc *); static void alc_tick(void *); static void alc_txeof(struct alc_softc *); static void alc_watchdog(struct alc_softc *); static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int, int); static int sysctl_hw_alc_proc_limit(SYSCTL_HANDLER_ARGS); static int sysctl_hw_alc_int_mod(SYSCTL_HANDLER_ARGS); NETDUMP_DEFINE(alc); static device_method_t alc_methods[] = { /* Device interface. */ DEVMETHOD(device_probe, alc_probe), DEVMETHOD(device_attach, alc_attach), DEVMETHOD(device_detach, alc_detach), DEVMETHOD(device_shutdown, alc_shutdown), DEVMETHOD(device_suspend, alc_suspend), DEVMETHOD(device_resume, alc_resume), /* MII interface. */ DEVMETHOD(miibus_readreg, alc_miibus_readreg), DEVMETHOD(miibus_writereg, alc_miibus_writereg), DEVMETHOD(miibus_statchg, alc_miibus_statchg), DEVMETHOD_END }; static driver_t alc_driver = { "alc", alc_methods, sizeof(struct alc_softc) }; static devclass_t alc_devclass; DRIVER_MODULE(alc, pci, alc_driver, alc_devclass, 0, 0); DRIVER_MODULE(miibus, alc, miibus_driver, miibus_devclass, 0, 0); static struct resource_spec alc_res_spec_mem[] = { { SYS_RES_MEMORY, PCIR_BAR(0), RF_ACTIVE }, { -1, 0, 0 } }; static struct resource_spec alc_irq_spec_legacy[] = { { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE }, { -1, 0, 0 } }; static struct resource_spec alc_irq_spec_msi[] = { { SYS_RES_IRQ, 1, RF_ACTIVE }, { -1, 0, 0 } }; static struct resource_spec alc_irq_spec_msix[] = { { SYS_RES_IRQ, 1, RF_ACTIVE }, { -1, 0, 0 } }; static uint32_t alc_dma_burst[] = { 128, 256, 512, 1024, 2048, 4096, 0, 0 }; static int alc_miibus_readreg(device_t dev, int phy, int reg) { struct alc_softc *sc; int v; sc = device_get_softc(dev); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) v = alc_mii_readreg_816x(sc, phy, reg); else v = alc_mii_readreg_813x(sc, phy, reg); return (v); } static uint32_t alc_mii_readreg_813x(struct alc_softc *sc, int phy, int reg) { uint32_t v; int i; /* * For AR8132 fast ethernet controller, do not report 1000baseT * capability to mii(4). Even though AR8132 uses the same * model/revision number of F1 gigabit PHY, the PHY has no * ability to establish 1000baseT link. */ if ((sc->alc_flags & ALC_FLAG_FASTETHER) != 0 && reg == MII_EXTSR) return (0); CSR_WRITE_4(sc, ALC_MDIO, MDIO_OP_EXECUTE | MDIO_OP_READ | MDIO_SUP_PREAMBLE | MDIO_CLK_25_4 | MDIO_REG_ADDR(reg)); for (i = ALC_PHY_TIMEOUT; i > 0; i--) { DELAY(5); v = CSR_READ_4(sc, ALC_MDIO); if ((v & (MDIO_OP_EXECUTE | MDIO_OP_BUSY)) == 0) break; } if (i == 0) { device_printf(sc->alc_dev, "phy read timeout : %d\n", reg); return (0); } return ((v & MDIO_DATA_MASK) >> MDIO_DATA_SHIFT); } static uint32_t alc_mii_readreg_816x(struct alc_softc *sc, int phy, int reg) { uint32_t clk, v; int i; if ((sc->alc_flags & ALC_FLAG_LINK) != 0) clk = MDIO_CLK_25_128; else clk = MDIO_CLK_25_4; CSR_WRITE_4(sc, ALC_MDIO, MDIO_OP_EXECUTE | MDIO_OP_READ | MDIO_SUP_PREAMBLE | clk | MDIO_REG_ADDR(reg)); for (i = ALC_PHY_TIMEOUT; i > 0; i--) { DELAY(5); v = CSR_READ_4(sc, ALC_MDIO); if ((v & MDIO_OP_BUSY) == 0) break; } if (i == 0) { device_printf(sc->alc_dev, "phy read timeout : %d\n", reg); return (0); } return ((v & MDIO_DATA_MASK) >> MDIO_DATA_SHIFT); } static int alc_miibus_writereg(device_t dev, int phy, int reg, int val) { struct alc_softc *sc; int v; sc = device_get_softc(dev); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) v = alc_mii_writereg_816x(sc, phy, reg, val); else v = alc_mii_writereg_813x(sc, phy, reg, val); return (v); } static uint32_t alc_mii_writereg_813x(struct alc_softc *sc, int phy, int reg, int val) { uint32_t v; int i; CSR_WRITE_4(sc, ALC_MDIO, MDIO_OP_EXECUTE | MDIO_OP_WRITE | (val & MDIO_DATA_MASK) << MDIO_DATA_SHIFT | MDIO_SUP_PREAMBLE | MDIO_CLK_25_4 | MDIO_REG_ADDR(reg)); for (i = ALC_PHY_TIMEOUT; i > 0; i--) { DELAY(5); v = CSR_READ_4(sc, ALC_MDIO); if ((v & (MDIO_OP_EXECUTE | MDIO_OP_BUSY)) == 0) break; } if (i == 0) device_printf(sc->alc_dev, "phy write timeout : %d\n", reg); return (0); } static uint32_t alc_mii_writereg_816x(struct alc_softc *sc, int phy, int reg, int val) { uint32_t clk, v; int i; if ((sc->alc_flags & ALC_FLAG_LINK) != 0) clk = MDIO_CLK_25_128; else clk = MDIO_CLK_25_4; CSR_WRITE_4(sc, ALC_MDIO, MDIO_OP_EXECUTE | MDIO_OP_WRITE | ((val & MDIO_DATA_MASK) << MDIO_DATA_SHIFT) | MDIO_REG_ADDR(reg) | MDIO_SUP_PREAMBLE | clk); for (i = ALC_PHY_TIMEOUT; i > 0; i--) { DELAY(5); v = CSR_READ_4(sc, ALC_MDIO); if ((v & MDIO_OP_BUSY) == 0) break; } if (i == 0) device_printf(sc->alc_dev, "phy write timeout : %d\n", reg); return (0); } static void alc_miibus_statchg(device_t dev) { struct alc_softc *sc; struct mii_data *mii; struct ifnet *ifp; uint32_t reg; sc = device_get_softc(dev); mii = device_get_softc(sc->alc_miibus); ifp = sc->alc_ifp; if (mii == NULL || ifp == NULL || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; sc->alc_flags &= ~ALC_FLAG_LINK; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: sc->alc_flags |= ALC_FLAG_LINK; break; case IFM_1000_T: if ((sc->alc_flags & ALC_FLAG_FASTETHER) == 0) sc->alc_flags |= ALC_FLAG_LINK; break; default: break; } } /* Stop Rx/Tx MACs. */ alc_stop_mac(sc); /* Program MACs with resolved speed/duplex/flow-control. */ if ((sc->alc_flags & ALC_FLAG_LINK) != 0) { alc_start_queue(sc); alc_mac_config(sc); /* Re-enable Tx/Rx MACs. */ reg = CSR_READ_4(sc, ALC_MAC_CFG); reg |= MAC_CFG_TX_ENB | MAC_CFG_RX_ENB; CSR_WRITE_4(sc, ALC_MAC_CFG, reg); } alc_aspm(sc, 0, IFM_SUBTYPE(mii->mii_media_active)); alc_dsp_fixup(sc, IFM_SUBTYPE(mii->mii_media_active)); } static uint32_t alc_miidbg_readreg(struct alc_softc *sc, int reg) { alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, reg); return (alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA)); } static uint32_t alc_miidbg_writereg(struct alc_softc *sc, int reg, int val) { alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, reg); return (alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, val)); } static uint32_t alc_miiext_readreg(struct alc_softc *sc, int devaddr, int reg) { uint32_t clk, v; int i; CSR_WRITE_4(sc, ALC_EXT_MDIO, EXT_MDIO_REG(reg) | EXT_MDIO_DEVADDR(devaddr)); if ((sc->alc_flags & ALC_FLAG_LINK) != 0) clk = MDIO_CLK_25_128; else clk = MDIO_CLK_25_4; CSR_WRITE_4(sc, ALC_MDIO, MDIO_OP_EXECUTE | MDIO_OP_READ | MDIO_SUP_PREAMBLE | clk | MDIO_MODE_EXT); for (i = ALC_PHY_TIMEOUT; i > 0; i--) { DELAY(5); v = CSR_READ_4(sc, ALC_MDIO); if ((v & MDIO_OP_BUSY) == 0) break; } if (i == 0) { device_printf(sc->alc_dev, "phy ext read timeout : %d, %d\n", devaddr, reg); return (0); } return ((v & MDIO_DATA_MASK) >> MDIO_DATA_SHIFT); } static uint32_t alc_miiext_writereg(struct alc_softc *sc, int devaddr, int reg, int val) { uint32_t clk, v; int i; CSR_WRITE_4(sc, ALC_EXT_MDIO, EXT_MDIO_REG(reg) | EXT_MDIO_DEVADDR(devaddr)); if ((sc->alc_flags & ALC_FLAG_LINK) != 0) clk = MDIO_CLK_25_128; else clk = MDIO_CLK_25_4; CSR_WRITE_4(sc, ALC_MDIO, MDIO_OP_EXECUTE | MDIO_OP_WRITE | ((val & MDIO_DATA_MASK) << MDIO_DATA_SHIFT) | MDIO_SUP_PREAMBLE | clk | MDIO_MODE_EXT); for (i = ALC_PHY_TIMEOUT; i > 0; i--) { DELAY(5); v = CSR_READ_4(sc, ALC_MDIO); if ((v & MDIO_OP_BUSY) == 0) break; } if (i == 0) device_printf(sc->alc_dev, "phy ext write timeout : %d, %d\n", devaddr, reg); return (0); } static void alc_dsp_fixup(struct alc_softc *sc, int media) { uint16_t agc, len, val; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) return; if (AR816X_REV(sc->alc_rev) >= AR816X_REV_C0) return; /* * Vendor PHY magic. * 1000BT/AZ, wrong cable length */ if ((sc->alc_flags & ALC_FLAG_LINK) != 0) { len = alc_miiext_readreg(sc, MII_EXT_PCS, MII_EXT_CLDCTL6); len = (len >> EXT_CLDCTL6_CAB_LEN_SHIFT) & EXT_CLDCTL6_CAB_LEN_MASK; agc = alc_miidbg_readreg(sc, MII_DBG_AGC); agc = (agc >> DBG_AGC_2_VGA_SHIFT) & DBG_AGC_2_VGA_MASK; if ((media == IFM_1000_T && len > EXT_CLDCTL6_CAB_LEN_SHORT1G && agc > DBG_AGC_LONG1G_LIMT) || (media == IFM_100_TX && len > DBG_AGC_LONG100M_LIMT && agc > DBG_AGC_LONG1G_LIMT)) { alc_miidbg_writereg(sc, MII_DBG_AZ_ANADECT, DBG_AZ_ANADECT_LONG); val = alc_miiext_readreg(sc, MII_EXT_ANEG, MII_EXT_ANEG_AFE); val |= ANEG_AFEE_10BT_100M_TH; alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_AFE, val); } else { alc_miidbg_writereg(sc, MII_DBG_AZ_ANADECT, DBG_AZ_ANADECT_DEFAULT); val = alc_miiext_readreg(sc, MII_EXT_ANEG, MII_EXT_ANEG_AFE); val &= ~ANEG_AFEE_10BT_100M_TH; alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_AFE, val); } if ((sc->alc_flags & ALC_FLAG_LINK_WAR) != 0 && AR816X_REV(sc->alc_rev) == AR816X_REV_B0) { if (media == IFM_1000_T) { /* * Giga link threshold, raise the tolerance of * noise 50%. */ val = alc_miidbg_readreg(sc, MII_DBG_MSE20DB); val &= ~DBG_MSE20DB_TH_MASK; val |= (DBG_MSE20DB_TH_HI << DBG_MSE20DB_TH_SHIFT); alc_miidbg_writereg(sc, MII_DBG_MSE20DB, val); } else if (media == IFM_100_TX) alc_miidbg_writereg(sc, MII_DBG_MSE16DB, DBG_MSE16DB_UP); } } else { val = alc_miiext_readreg(sc, MII_EXT_ANEG, MII_EXT_ANEG_AFE); val &= ~ANEG_AFEE_10BT_100M_TH; alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_AFE, val); if ((sc->alc_flags & ALC_FLAG_LINK_WAR) != 0 && AR816X_REV(sc->alc_rev) == AR816X_REV_B0) { alc_miidbg_writereg(sc, MII_DBG_MSE16DB, DBG_MSE16DB_DOWN); val = alc_miidbg_readreg(sc, MII_DBG_MSE20DB); val &= ~DBG_MSE20DB_TH_MASK; val |= (DBG_MSE20DB_TH_DEFAULT << DBG_MSE20DB_TH_SHIFT); alc_miidbg_writereg(sc, MII_DBG_MSE20DB, val); } } } static void alc_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr) { struct alc_softc *sc; struct mii_data *mii; sc = ifp->if_softc; ALC_LOCK(sc); if ((ifp->if_flags & IFF_UP) == 0) { ALC_UNLOCK(sc); return; } mii = device_get_softc(sc->alc_miibus); mii_pollstat(mii); ifmr->ifm_status = mii->mii_media_status; ifmr->ifm_active = mii->mii_media_active; ALC_UNLOCK(sc); } static int alc_mediachange(struct ifnet *ifp) { struct alc_softc *sc; int error; sc = ifp->if_softc; ALC_LOCK(sc); error = alc_mediachange_locked(sc); ALC_UNLOCK(sc); return (error); } static int alc_mediachange_locked(struct alc_softc *sc) { struct mii_data *mii; struct mii_softc *miisc; int error; ALC_LOCK_ASSERT(sc); mii = device_get_softc(sc->alc_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); error = mii_mediachg(mii); return (error); } static struct alc_ident * alc_find_ident(device_t dev) { struct alc_ident *ident; uint16_t vendor, devid; vendor = pci_get_vendor(dev); devid = pci_get_device(dev); for (ident = alc_ident_table; ident->name != NULL; ident++) { if (vendor == ident->vendorid && devid == ident->deviceid) return (ident); } return (NULL); } static int alc_probe(device_t dev) { struct alc_ident *ident; ident = alc_find_ident(dev); if (ident != NULL) { device_set_desc(dev, ident->name); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static void alc_get_macaddr(struct alc_softc *sc) { if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) alc_get_macaddr_816x(sc); else alc_get_macaddr_813x(sc); } static void alc_get_macaddr_813x(struct alc_softc *sc) { uint32_t opt; uint16_t val; int eeprom, i; eeprom = 0; opt = CSR_READ_4(sc, ALC_OPT_CFG); if ((CSR_READ_4(sc, ALC_MASTER_CFG) & MASTER_OTP_SEL) != 0 && (CSR_READ_4(sc, ALC_TWSI_DEBUG) & TWSI_DEBUG_DEV_EXIST) != 0) { /* * EEPROM found, let TWSI reload EEPROM configuration. * This will set ethernet address of controller. */ eeprom++; switch (sc->alc_ident->deviceid) { case DEVICEID_ATHEROS_AR8131: case DEVICEID_ATHEROS_AR8132: if ((opt & OPT_CFG_CLK_ENB) == 0) { opt |= OPT_CFG_CLK_ENB; CSR_WRITE_4(sc, ALC_OPT_CFG, opt); CSR_READ_4(sc, ALC_OPT_CFG); DELAY(1000); } break; case DEVICEID_ATHEROS_AR8151: case DEVICEID_ATHEROS_AR8151_V2: case DEVICEID_ATHEROS_AR8152_B: case DEVICEID_ATHEROS_AR8152_B2: alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x00); val = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, val & 0xFF7F); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x3B); val = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, val | 0x0008); DELAY(20); break; } CSR_WRITE_4(sc, ALC_LTSSM_ID_CFG, CSR_READ_4(sc, ALC_LTSSM_ID_CFG) & ~LTSSM_ID_WRO_ENB); CSR_WRITE_4(sc, ALC_WOL_CFG, 0); CSR_READ_4(sc, ALC_WOL_CFG); CSR_WRITE_4(sc, ALC_TWSI_CFG, CSR_READ_4(sc, ALC_TWSI_CFG) | TWSI_CFG_SW_LD_START); for (i = 100; i > 0; i--) { DELAY(1000); if ((CSR_READ_4(sc, ALC_TWSI_CFG) & TWSI_CFG_SW_LD_START) == 0) break; } if (i == 0) device_printf(sc->alc_dev, "reloading EEPROM timeout!\n"); } else { if (bootverbose) device_printf(sc->alc_dev, "EEPROM not found!\n"); } if (eeprom != 0) { switch (sc->alc_ident->deviceid) { case DEVICEID_ATHEROS_AR8131: case DEVICEID_ATHEROS_AR8132: if ((opt & OPT_CFG_CLK_ENB) != 0) { opt &= ~OPT_CFG_CLK_ENB; CSR_WRITE_4(sc, ALC_OPT_CFG, opt); CSR_READ_4(sc, ALC_OPT_CFG); DELAY(1000); } break; case DEVICEID_ATHEROS_AR8151: case DEVICEID_ATHEROS_AR8151_V2: case DEVICEID_ATHEROS_AR8152_B: case DEVICEID_ATHEROS_AR8152_B2: alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x00); val = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, val | 0x0080); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x3B); val = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, val & 0xFFF7); DELAY(20); break; } } alc_get_macaddr_par(sc); } static void alc_get_macaddr_816x(struct alc_softc *sc) { uint32_t reg; int i, reloaded; reloaded = 0; /* Try to reload station address via TWSI. */ for (i = 100; i > 0; i--) { reg = CSR_READ_4(sc, ALC_SLD); if ((reg & (SLD_PROGRESS | SLD_START)) == 0) break; DELAY(1000); } if (i != 0) { CSR_WRITE_4(sc, ALC_SLD, reg | SLD_START); for (i = 100; i > 0; i--) { DELAY(1000); reg = CSR_READ_4(sc, ALC_SLD); if ((reg & SLD_START) == 0) break; } if (i != 0) reloaded++; else if (bootverbose) device_printf(sc->alc_dev, "reloading station address via TWSI timed out!\n"); } /* Try to reload station address from EEPROM or FLASH. */ if (reloaded == 0) { reg = CSR_READ_4(sc, ALC_EEPROM_LD); if ((reg & (EEPROM_LD_EEPROM_EXIST | EEPROM_LD_FLASH_EXIST)) != 0) { for (i = 100; i > 0; i--) { reg = CSR_READ_4(sc, ALC_EEPROM_LD); if ((reg & (EEPROM_LD_PROGRESS | EEPROM_LD_START)) == 0) break; DELAY(1000); } if (i != 0) { CSR_WRITE_4(sc, ALC_EEPROM_LD, reg | EEPROM_LD_START); for (i = 100; i > 0; i--) { DELAY(1000); reg = CSR_READ_4(sc, ALC_EEPROM_LD); if ((reg & EEPROM_LD_START) == 0) break; } } else if (bootverbose) device_printf(sc->alc_dev, "reloading EEPROM/FLASH timed out!\n"); } } alc_get_macaddr_par(sc); } static void alc_get_macaddr_par(struct alc_softc *sc) { uint32_t ea[2]; ea[0] = CSR_READ_4(sc, ALC_PAR0); ea[1] = CSR_READ_4(sc, ALC_PAR1); sc->alc_eaddr[0] = (ea[1] >> 8) & 0xFF; sc->alc_eaddr[1] = (ea[1] >> 0) & 0xFF; sc->alc_eaddr[2] = (ea[0] >> 24) & 0xFF; sc->alc_eaddr[3] = (ea[0] >> 16) & 0xFF; sc->alc_eaddr[4] = (ea[0] >> 8) & 0xFF; sc->alc_eaddr[5] = (ea[0] >> 0) & 0xFF; } static void alc_disable_l0s_l1(struct alc_softc *sc) { uint32_t pmcfg; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { /* Another magic from vendor. */ pmcfg = CSR_READ_4(sc, ALC_PM_CFG); pmcfg &= ~(PM_CFG_L1_ENTRY_TIMER_MASK | PM_CFG_CLK_SWH_L1 | PM_CFG_ASPM_L0S_ENB | PM_CFG_ASPM_L1_ENB | PM_CFG_MAC_ASPM_CHK | PM_CFG_SERDES_PD_EX_L1); pmcfg |= PM_CFG_SERDES_BUDS_RX_L1_ENB | PM_CFG_SERDES_PLL_L1_ENB | PM_CFG_SERDES_L1_ENB; CSR_WRITE_4(sc, ALC_PM_CFG, pmcfg); } } static void alc_phy_reset(struct alc_softc *sc) { if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) alc_phy_reset_816x(sc); else alc_phy_reset_813x(sc); } static void alc_phy_reset_813x(struct alc_softc *sc) { uint16_t data; /* Reset magic from Linux. */ CSR_WRITE_2(sc, ALC_GPHY_CFG, GPHY_CFG_SEL_ANA_RESET); CSR_READ_2(sc, ALC_GPHY_CFG); DELAY(10 * 1000); CSR_WRITE_2(sc, ALC_GPHY_CFG, GPHY_CFG_EXT_RESET | GPHY_CFG_SEL_ANA_RESET); CSR_READ_2(sc, ALC_GPHY_CFG); DELAY(10 * 1000); /* DSP fixup, Vendor magic. */ if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B) { alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x000A); data = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data & 0xDFFF); } if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151_V2 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B2) { alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x003B); data = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data & 0xFFF7); DELAY(20 * 1000); } if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151) { alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x0029); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, 0x929D); } if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8131 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8132 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151_V2 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B2) { alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x0029); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, 0xB6DD); } /* Load DSP codes, vendor magic. */ data = ANA_LOOP_SEL_10BT | ANA_EN_MASK_TB | ANA_EN_10BT_IDLE | ((1 << ANA_INTERVAL_SEL_TIMER_SHIFT) & ANA_INTERVAL_SEL_TIMER_MASK); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, MII_ANA_CFG18); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); data = ((2 << ANA_SERDES_CDR_BW_SHIFT) & ANA_SERDES_CDR_BW_MASK) | ANA_SERDES_EN_DEEM | ANA_SERDES_SEL_HSP | ANA_SERDES_EN_PLL | ANA_SERDES_EN_LCKDT; alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, MII_ANA_CFG5); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); data = ((44 << ANA_LONG_CABLE_TH_100_SHIFT) & ANA_LONG_CABLE_TH_100_MASK) | ((33 << ANA_SHORT_CABLE_TH_100_SHIFT) & ANA_SHORT_CABLE_TH_100_SHIFT) | ANA_BP_BAD_LINK_ACCUM | ANA_BP_SMALL_BW; alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, MII_ANA_CFG54); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); data = ((11 << ANA_IECHO_ADJ_3_SHIFT) & ANA_IECHO_ADJ_3_MASK) | ((11 << ANA_IECHO_ADJ_2_SHIFT) & ANA_IECHO_ADJ_2_MASK) | ((8 << ANA_IECHO_ADJ_1_SHIFT) & ANA_IECHO_ADJ_1_MASK) | ((8 << ANA_IECHO_ADJ_0_SHIFT) & ANA_IECHO_ADJ_0_MASK); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, MII_ANA_CFG4); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); data = ((7 & ANA_MANUL_SWICH_ON_SHIFT) & ANA_MANUL_SWICH_ON_MASK) | ANA_RESTART_CAL | ANA_MAN_ENABLE | ANA_SEL_HSP | ANA_EN_HB | ANA_OEN_125M; alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, MII_ANA_CFG0); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); DELAY(1000); /* Disable hibernation. */ alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x0029); data = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); data &= ~0x8000; alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x000B); data = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); data &= ~0x8000; alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); } static void alc_phy_reset_816x(struct alc_softc *sc) { uint32_t val; val = CSR_READ_4(sc, ALC_GPHY_CFG); val &= ~(GPHY_CFG_EXT_RESET | GPHY_CFG_LED_MODE | GPHY_CFG_GATE_25M_ENB | GPHY_CFG_PHY_IDDQ | GPHY_CFG_PHY_PLL_ON | GPHY_CFG_PWDOWN_HW | GPHY_CFG_100AB_ENB); val |= GPHY_CFG_SEL_ANA_RESET; #ifdef notyet val |= GPHY_CFG_HIB_PULSE | GPHY_CFG_HIB_EN | GPHY_CFG_SEL_ANA_RESET; #else /* Disable PHY hibernation. */ val &= ~(GPHY_CFG_HIB_PULSE | GPHY_CFG_HIB_EN); #endif CSR_WRITE_4(sc, ALC_GPHY_CFG, val); DELAY(10); CSR_WRITE_4(sc, ALC_GPHY_CFG, val | GPHY_CFG_EXT_RESET); DELAY(800); /* Vendor PHY magic. */ #ifdef notyet alc_miidbg_writereg(sc, MII_DBG_LEGCYPS, DBG_LEGCYPS_DEFAULT); alc_miidbg_writereg(sc, MII_DBG_SYSMODCTL, DBG_SYSMODCTL_DEFAULT); alc_miiext_writereg(sc, MII_EXT_PCS, MII_EXT_VDRVBIAS, EXT_VDRVBIAS_DEFAULT); #else /* Disable PHY hibernation. */ alc_miidbg_writereg(sc, MII_DBG_LEGCYPS, DBG_LEGCYPS_DEFAULT & ~DBG_LEGCYPS_ENB); alc_miidbg_writereg(sc, MII_DBG_HIBNEG, DBG_HIBNEG_DEFAULT & ~(DBG_HIBNEG_PSHIB_EN | DBG_HIBNEG_HIB_PULSE)); alc_miidbg_writereg(sc, MII_DBG_GREENCFG, DBG_GREENCFG_DEFAULT); #endif /* XXX Disable EEE. */ val = CSR_READ_4(sc, ALC_LPI_CTL); val &= ~LPI_CTL_ENB; CSR_WRITE_4(sc, ALC_LPI_CTL, val); alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_LOCAL_EEEADV, 0); /* PHY power saving. */ alc_miidbg_writereg(sc, MII_DBG_TST10BTCFG, DBG_TST10BTCFG_DEFAULT); alc_miidbg_writereg(sc, MII_DBG_SRDSYSMOD, DBG_SRDSYSMOD_DEFAULT); alc_miidbg_writereg(sc, MII_DBG_TST100BTCFG, DBG_TST100BTCFG_DEFAULT); alc_miidbg_writereg(sc, MII_DBG_ANACTL, DBG_ANACTL_DEFAULT); val = alc_miidbg_readreg(sc, MII_DBG_GREENCFG2); val &= ~DBG_GREENCFG2_GATE_DFSE_EN; alc_miidbg_writereg(sc, MII_DBG_GREENCFG2, val); /* RTL8139C, 120m issue. */ alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_NLP78, ANEG_NLP78_120M_DEFAULT); alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_S3DIG10, ANEG_S3DIG10_DEFAULT); if ((sc->alc_flags & ALC_FLAG_LINK_WAR) != 0) { /* Turn off half amplitude. */ val = alc_miiext_readreg(sc, MII_EXT_PCS, MII_EXT_CLDCTL3); val |= EXT_CLDCTL3_BP_CABLE1TH_DET_GT; alc_miiext_writereg(sc, MII_EXT_PCS, MII_EXT_CLDCTL3, val); /* Turn off Green feature. */ val = alc_miidbg_readreg(sc, MII_DBG_GREENCFG2); val |= DBG_GREENCFG2_BP_GREEN; alc_miidbg_writereg(sc, MII_DBG_GREENCFG2, val); /* Turn off half bias. */ val = alc_miiext_readreg(sc, MII_EXT_PCS, MII_EXT_CLDCTL5); val |= EXT_CLDCTL5_BP_VD_HLFBIAS; alc_miiext_writereg(sc, MII_EXT_PCS, MII_EXT_CLDCTL5, val); } } static void alc_phy_down(struct alc_softc *sc) { uint32_t gphy; switch (sc->alc_ident->deviceid) { case DEVICEID_ATHEROS_AR8161: case DEVICEID_ATHEROS_E2200: case DEVICEID_ATHEROS_E2400: case DEVICEID_ATHEROS_E2500: case DEVICEID_ATHEROS_AR8162: case DEVICEID_ATHEROS_AR8171: case DEVICEID_ATHEROS_AR8172: gphy = CSR_READ_4(sc, ALC_GPHY_CFG); gphy &= ~(GPHY_CFG_EXT_RESET | GPHY_CFG_LED_MODE | GPHY_CFG_100AB_ENB | GPHY_CFG_PHY_PLL_ON); gphy |= GPHY_CFG_HIB_EN | GPHY_CFG_HIB_PULSE | GPHY_CFG_SEL_ANA_RESET; gphy |= GPHY_CFG_PHY_IDDQ | GPHY_CFG_PWDOWN_HW; CSR_WRITE_4(sc, ALC_GPHY_CFG, gphy); break; case DEVICEID_ATHEROS_AR8151: case DEVICEID_ATHEROS_AR8151_V2: case DEVICEID_ATHEROS_AR8152_B: case DEVICEID_ATHEROS_AR8152_B2: /* * GPHY power down caused more problems on AR8151 v2.0. * When driver is reloaded after GPHY power down, * accesses to PHY/MAC registers hung the system. Only * cold boot recovered from it. I'm not sure whether * AR8151 v1.0 also requires this one though. I don't * have AR8151 v1.0 controller in hand. * The only option left is to isolate the PHY and * initiates power down the PHY which in turn saves * more power when driver is unloaded. */ alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, MII_BMCR, BMCR_ISO | BMCR_PDOWN); break; default: /* Force PHY down. */ CSR_WRITE_2(sc, ALC_GPHY_CFG, GPHY_CFG_EXT_RESET | GPHY_CFG_SEL_ANA_RESET | GPHY_CFG_PHY_IDDQ | GPHY_CFG_PWDOWN_HW); DELAY(1000); break; } } static void alc_aspm(struct alc_softc *sc, int init, int media) { if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) alc_aspm_816x(sc, init); else alc_aspm_813x(sc, media); } static void alc_aspm_813x(struct alc_softc *sc, int media) { uint32_t pmcfg; uint16_t linkcfg; if ((sc->alc_flags & ALC_FLAG_LINK) == 0) return; pmcfg = CSR_READ_4(sc, ALC_PM_CFG); if ((sc->alc_flags & (ALC_FLAG_APS | ALC_FLAG_PCIE)) == (ALC_FLAG_APS | ALC_FLAG_PCIE)) linkcfg = CSR_READ_2(sc, sc->alc_expcap + PCIER_LINK_CTL); else linkcfg = 0; pmcfg &= ~PM_CFG_SERDES_PD_EX_L1; pmcfg &= ~(PM_CFG_L1_ENTRY_TIMER_MASK | PM_CFG_LCKDET_TIMER_MASK); pmcfg |= PM_CFG_MAC_ASPM_CHK; pmcfg |= (PM_CFG_LCKDET_TIMER_DEFAULT << PM_CFG_LCKDET_TIMER_SHIFT); pmcfg &= ~(PM_CFG_ASPM_L1_ENB | PM_CFG_ASPM_L0S_ENB); if ((sc->alc_flags & ALC_FLAG_APS) != 0) { /* Disable extended sync except AR8152 B v1.0 */ linkcfg &= ~PCIEM_LINK_CTL_EXTENDED_SYNC; if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B && sc->alc_rev == ATHEROS_AR8152_B_V10) linkcfg |= PCIEM_LINK_CTL_EXTENDED_SYNC; CSR_WRITE_2(sc, sc->alc_expcap + PCIER_LINK_CTL, linkcfg); pmcfg &= ~(PM_CFG_EN_BUFS_RX_L0S | PM_CFG_SA_DLY_ENB | PM_CFG_HOTRST); pmcfg |= (PM_CFG_L1_ENTRY_TIMER_DEFAULT << PM_CFG_L1_ENTRY_TIMER_SHIFT); pmcfg &= ~PM_CFG_PM_REQ_TIMER_MASK; pmcfg |= (PM_CFG_PM_REQ_TIMER_DEFAULT << PM_CFG_PM_REQ_TIMER_SHIFT); pmcfg |= PM_CFG_SERDES_PD_EX_L1 | PM_CFG_PCIE_RECV; } if ((sc->alc_flags & ALC_FLAG_LINK) != 0) { if ((sc->alc_flags & ALC_FLAG_L0S) != 0) pmcfg |= PM_CFG_ASPM_L0S_ENB; if ((sc->alc_flags & ALC_FLAG_L1S) != 0) pmcfg |= PM_CFG_ASPM_L1_ENB; if ((sc->alc_flags & ALC_FLAG_APS) != 0) { if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B) pmcfg &= ~PM_CFG_ASPM_L0S_ENB; pmcfg &= ~(PM_CFG_SERDES_L1_ENB | PM_CFG_SERDES_PLL_L1_ENB | PM_CFG_SERDES_BUDS_RX_L1_ENB); pmcfg |= PM_CFG_CLK_SWH_L1; if (media == IFM_100_TX || media == IFM_1000_T) { pmcfg &= ~PM_CFG_L1_ENTRY_TIMER_MASK; switch (sc->alc_ident->deviceid) { case DEVICEID_ATHEROS_AR8152_B: pmcfg |= (7 << PM_CFG_L1_ENTRY_TIMER_SHIFT); break; case DEVICEID_ATHEROS_AR8152_B2: case DEVICEID_ATHEROS_AR8151_V2: pmcfg |= (4 << PM_CFG_L1_ENTRY_TIMER_SHIFT); break; default: pmcfg |= (15 << PM_CFG_L1_ENTRY_TIMER_SHIFT); break; } } } else { pmcfg |= PM_CFG_SERDES_L1_ENB | PM_CFG_SERDES_PLL_L1_ENB | PM_CFG_SERDES_BUDS_RX_L1_ENB; pmcfg &= ~(PM_CFG_CLK_SWH_L1 | PM_CFG_ASPM_L1_ENB | PM_CFG_ASPM_L0S_ENB); } } else { pmcfg &= ~(PM_CFG_SERDES_BUDS_RX_L1_ENB | PM_CFG_SERDES_L1_ENB | PM_CFG_SERDES_PLL_L1_ENB); pmcfg |= PM_CFG_CLK_SWH_L1; if ((sc->alc_flags & ALC_FLAG_L1S) != 0) pmcfg |= PM_CFG_ASPM_L1_ENB; } CSR_WRITE_4(sc, ALC_PM_CFG, pmcfg); } static void alc_aspm_816x(struct alc_softc *sc, int init) { uint32_t pmcfg; pmcfg = CSR_READ_4(sc, ALC_PM_CFG); pmcfg &= ~PM_CFG_L1_ENTRY_TIMER_816X_MASK; pmcfg |= PM_CFG_L1_ENTRY_TIMER_816X_DEFAULT; pmcfg &= ~PM_CFG_PM_REQ_TIMER_MASK; pmcfg |= PM_CFG_PM_REQ_TIMER_816X_DEFAULT; pmcfg &= ~PM_CFG_LCKDET_TIMER_MASK; pmcfg |= PM_CFG_LCKDET_TIMER_DEFAULT; pmcfg |= PM_CFG_SERDES_PD_EX_L1 | PM_CFG_CLK_SWH_L1 | PM_CFG_PCIE_RECV; pmcfg &= ~(PM_CFG_RX_L1_AFTER_L0S | PM_CFG_TX_L1_AFTER_L0S | PM_CFG_ASPM_L1_ENB | PM_CFG_ASPM_L0S_ENB | PM_CFG_SERDES_L1_ENB | PM_CFG_SERDES_PLL_L1_ENB | PM_CFG_SERDES_BUDS_RX_L1_ENB | PM_CFG_SA_DLY_ENB | PM_CFG_MAC_ASPM_CHK | PM_CFG_HOTRST); if (AR816X_REV(sc->alc_rev) <= AR816X_REV_A1 && (sc->alc_rev & 0x01) != 0) pmcfg |= PM_CFG_SERDES_L1_ENB | PM_CFG_SERDES_PLL_L1_ENB; if ((sc->alc_flags & ALC_FLAG_LINK) != 0) { /* Link up, enable both L0s, L1s. */ pmcfg |= PM_CFG_ASPM_L0S_ENB | PM_CFG_ASPM_L1_ENB | PM_CFG_MAC_ASPM_CHK; } else { if (init != 0) pmcfg |= PM_CFG_ASPM_L0S_ENB | PM_CFG_ASPM_L1_ENB | PM_CFG_MAC_ASPM_CHK; else if ((sc->alc_ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) pmcfg |= PM_CFG_ASPM_L1_ENB | PM_CFG_MAC_ASPM_CHK; } CSR_WRITE_4(sc, ALC_PM_CFG, pmcfg); } static void alc_init_pcie(struct alc_softc *sc) { const char *aspm_state[] = { "L0s/L1", "L0s", "L1", "L0s/L1" }; uint32_t cap, ctl, val; int state; /* Clear data link and flow-control protocol error. */ val = CSR_READ_4(sc, ALC_PEX_UNC_ERR_SEV); val &= ~(PEX_UNC_ERR_SEV_DLP | PEX_UNC_ERR_SEV_FCP); CSR_WRITE_4(sc, ALC_PEX_UNC_ERR_SEV, val); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { CSR_WRITE_4(sc, ALC_LTSSM_ID_CFG, CSR_READ_4(sc, ALC_LTSSM_ID_CFG) & ~LTSSM_ID_WRO_ENB); CSR_WRITE_4(sc, ALC_PCIE_PHYMISC, CSR_READ_4(sc, ALC_PCIE_PHYMISC) | PCIE_PHYMISC_FORCE_RCV_DET); if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B && sc->alc_rev == ATHEROS_AR8152_B_V10) { val = CSR_READ_4(sc, ALC_PCIE_PHYMISC2); val &= ~(PCIE_PHYMISC2_SERDES_CDR_MASK | PCIE_PHYMISC2_SERDES_TH_MASK); val |= 3 << PCIE_PHYMISC2_SERDES_CDR_SHIFT; val |= 3 << PCIE_PHYMISC2_SERDES_TH_SHIFT; CSR_WRITE_4(sc, ALC_PCIE_PHYMISC2, val); } /* Disable ASPM L0S and L1. */ cap = CSR_READ_2(sc, sc->alc_expcap + PCIER_LINK_CAP); if ((cap & PCIEM_LINK_CAP_ASPM) != 0) { ctl = CSR_READ_2(sc, sc->alc_expcap + PCIER_LINK_CTL); if ((ctl & PCIEM_LINK_CTL_RCB) != 0) sc->alc_rcb = DMA_CFG_RCB_128; if (bootverbose) device_printf(sc->alc_dev, "RCB %u bytes\n", sc->alc_rcb == DMA_CFG_RCB_64 ? 64 : 128); state = ctl & PCIEM_LINK_CTL_ASPMC; if (state & PCIEM_LINK_CTL_ASPMC_L0S) sc->alc_flags |= ALC_FLAG_L0S; if (state & PCIEM_LINK_CTL_ASPMC_L1) sc->alc_flags |= ALC_FLAG_L1S; if (bootverbose) device_printf(sc->alc_dev, "ASPM %s %s\n", aspm_state[state], state == 0 ? "disabled" : "enabled"); alc_disable_l0s_l1(sc); } else { if (bootverbose) device_printf(sc->alc_dev, "no ASPM support\n"); } } else { val = CSR_READ_4(sc, ALC_PDLL_TRNS1); val &= ~PDLL_TRNS1_D3PLLOFF_ENB; CSR_WRITE_4(sc, ALC_PDLL_TRNS1, val); val = CSR_READ_4(sc, ALC_MASTER_CFG); if (AR816X_REV(sc->alc_rev) <= AR816X_REV_A1 && (sc->alc_rev & 0x01) != 0) { if ((val & MASTER_WAKEN_25M) == 0 || (val & MASTER_CLK_SEL_DIS) == 0) { val |= MASTER_WAKEN_25M | MASTER_CLK_SEL_DIS; CSR_WRITE_4(sc, ALC_MASTER_CFG, val); } } else { if ((val & MASTER_WAKEN_25M) == 0 || (val & MASTER_CLK_SEL_DIS) != 0) { val |= MASTER_WAKEN_25M; val &= ~MASTER_CLK_SEL_DIS; CSR_WRITE_4(sc, ALC_MASTER_CFG, val); } } } alc_aspm(sc, 1, IFM_UNKNOWN); } static void alc_config_msi(struct alc_softc *sc) { uint32_t ctl, mod; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { /* * It seems interrupt moderation is controlled by * ALC_MSI_RETRANS_TIMER register if MSI/MSIX is active. * Driver uses RX interrupt moderation parameter to * program ALC_MSI_RETRANS_TIMER register. */ ctl = CSR_READ_4(sc, ALC_MSI_RETRANS_TIMER); ctl &= ~MSI_RETRANS_TIMER_MASK; ctl &= ~MSI_RETRANS_MASK_SEL_LINE; mod = ALC_USECS(sc->alc_int_rx_mod); if (mod == 0) mod = 1; ctl |= mod; if ((sc->alc_flags & ALC_FLAG_MSIX) != 0) CSR_WRITE_4(sc, ALC_MSI_RETRANS_TIMER, ctl | MSI_RETRANS_MASK_SEL_STD); else if ((sc->alc_flags & ALC_FLAG_MSI) != 0) CSR_WRITE_4(sc, ALC_MSI_RETRANS_TIMER, ctl | MSI_RETRANS_MASK_SEL_LINE); else CSR_WRITE_4(sc, ALC_MSI_RETRANS_TIMER, 0); } } static int alc_attach(device_t dev) { struct alc_softc *sc; struct ifnet *ifp; int base, error, i, msic, msixc; uint16_t burst; error = 0; sc = device_get_softc(dev); sc->alc_dev = dev; sc->alc_rev = pci_get_revid(dev); mtx_init(&sc->alc_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->alc_tick_ch, &sc->alc_mtx, 0); TASK_INIT(&sc->alc_int_task, 0, alc_int_task, sc); sc->alc_ident = alc_find_ident(dev); /* Map the device. */ pci_enable_busmaster(dev); sc->alc_res_spec = alc_res_spec_mem; sc->alc_irq_spec = alc_irq_spec_legacy; error = bus_alloc_resources(dev, sc->alc_res_spec, sc->alc_res); if (error != 0) { device_printf(dev, "cannot allocate memory resources.\n"); goto fail; } /* Set PHY address. */ sc->alc_phyaddr = ALC_PHY_ADDR; /* * One odd thing is AR8132 uses the same PHY hardware(F1 * gigabit PHY) of AR8131. So atphy(4) of AR8132 reports * the PHY supports 1000Mbps but that's not true. The PHY * used in AR8132 can't establish gigabit link even if it * shows the same PHY model/revision number of AR8131. */ switch (sc->alc_ident->deviceid) { case DEVICEID_ATHEROS_E2200: case DEVICEID_ATHEROS_E2400: case DEVICEID_ATHEROS_E2500: sc->alc_flags |= ALC_FLAG_E2X00; /* FALLTHROUGH */ case DEVICEID_ATHEROS_AR8161: if (pci_get_subvendor(dev) == VENDORID_ATHEROS && pci_get_subdevice(dev) == 0x0091 && sc->alc_rev == 0) sc->alc_flags |= ALC_FLAG_LINK_WAR; /* FALLTHROUGH */ case DEVICEID_ATHEROS_AR8171: sc->alc_flags |= ALC_FLAG_AR816X_FAMILY; break; case DEVICEID_ATHEROS_AR8162: case DEVICEID_ATHEROS_AR8172: sc->alc_flags |= ALC_FLAG_FASTETHER | ALC_FLAG_AR816X_FAMILY; break; case DEVICEID_ATHEROS_AR8152_B: case DEVICEID_ATHEROS_AR8152_B2: sc->alc_flags |= ALC_FLAG_APS; /* FALLTHROUGH */ case DEVICEID_ATHEROS_AR8132: sc->alc_flags |= ALC_FLAG_FASTETHER; break; case DEVICEID_ATHEROS_AR8151: case DEVICEID_ATHEROS_AR8151_V2: sc->alc_flags |= ALC_FLAG_APS; /* FALLTHROUGH */ default: break; } sc->alc_flags |= ALC_FLAG_JUMBO; /* * It seems that AR813x/AR815x has silicon bug for SMB. In * addition, Atheros said that enabling SMB wouldn't improve * performance. However I think it's bad to access lots of * registers to extract MAC statistics. */ sc->alc_flags |= ALC_FLAG_SMB_BUG; /* * Don't use Tx CMB. It is known to have silicon bug. */ sc->alc_flags |= ALC_FLAG_CMB_BUG; sc->alc_chip_rev = CSR_READ_4(sc, ALC_MASTER_CFG) >> MASTER_CHIP_REV_SHIFT; if (bootverbose) { device_printf(dev, "PCI device revision : 0x%04x\n", sc->alc_rev); device_printf(dev, "Chip id/revision : 0x%04x\n", sc->alc_chip_rev); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) device_printf(dev, "AR816x revision : 0x%x\n", AR816X_REV(sc->alc_rev)); } device_printf(dev, "%u Tx FIFO, %u Rx FIFO\n", CSR_READ_4(sc, ALC_SRAM_TX_FIFO_LEN) * 8, CSR_READ_4(sc, ALC_SRAM_RX_FIFO_LEN) * 8); /* Initialize DMA parameters. */ sc->alc_dma_rd_burst = 0; sc->alc_dma_wr_burst = 0; sc->alc_rcb = DMA_CFG_RCB_64; if (pci_find_cap(dev, PCIY_EXPRESS, &base) == 0) { sc->alc_flags |= ALC_FLAG_PCIE; sc->alc_expcap = base; burst = CSR_READ_2(sc, base + PCIER_DEVICE_CTL); sc->alc_dma_rd_burst = (burst & PCIEM_CTL_MAX_READ_REQUEST) >> 12; sc->alc_dma_wr_burst = (burst & PCIEM_CTL_MAX_PAYLOAD) >> 5; if (bootverbose) { device_printf(dev, "Read request size : %u bytes.\n", alc_dma_burst[sc->alc_dma_rd_burst]); device_printf(dev, "TLP payload size : %u bytes.\n", alc_dma_burst[sc->alc_dma_wr_burst]); } if (alc_dma_burst[sc->alc_dma_rd_burst] > 1024) sc->alc_dma_rd_burst = 3; if (alc_dma_burst[sc->alc_dma_wr_burst] > 1024) sc->alc_dma_wr_burst = 3; /* * Force maximum payload size to 128 bytes for * E2200/E2400/E2500. * Otherwise it triggers DMA write error. */ if ((sc->alc_flags & ALC_FLAG_E2X00) != 0) sc->alc_dma_wr_burst = 0; alc_init_pcie(sc); } /* Reset PHY. */ alc_phy_reset(sc); /* Reset the ethernet controller. */ alc_stop_mac(sc); alc_reset(sc); /* Allocate IRQ resources. */ msixc = pci_msix_count(dev); msic = pci_msi_count(dev); if (bootverbose) { device_printf(dev, "MSIX count : %d\n", msixc); device_printf(dev, "MSI count : %d\n", msic); } if (msixc > 1) msixc = 1; if (msic > 1) msic = 1; /* * Prefer MSIX over MSI. * AR816x controller has a silicon bug that MSI interrupt * does not assert if PCIM_CMD_INTxDIS bit of command * register is set. pci(4) was taught to handle that case. */ if (msix_disable == 0 || msi_disable == 0) { if (msix_disable == 0 && msixc > 0 && pci_alloc_msix(dev, &msixc) == 0) { if (msic == 1) { device_printf(dev, "Using %d MSIX message(s).\n", msixc); sc->alc_flags |= ALC_FLAG_MSIX; sc->alc_irq_spec = alc_irq_spec_msix; } else pci_release_msi(dev); } if (msi_disable == 0 && (sc->alc_flags & ALC_FLAG_MSIX) == 0 && msic > 0 && pci_alloc_msi(dev, &msic) == 0) { if (msic == 1) { device_printf(dev, "Using %d MSI message(s).\n", msic); sc->alc_flags |= ALC_FLAG_MSI; sc->alc_irq_spec = alc_irq_spec_msi; } else pci_release_msi(dev); } } error = bus_alloc_resources(dev, sc->alc_irq_spec, sc->alc_irq); if (error != 0) { device_printf(dev, "cannot allocate IRQ resources.\n"); goto fail; } /* Create device sysctl node. */ alc_sysctl_node(sc); if ((error = alc_dma_alloc(sc)) != 0) goto fail; /* Load station address. */ alc_get_macaddr(sc); ifp = sc->alc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "cannot allocate ifnet structure.\n"); error = ENXIO; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = alc_ioctl; ifp->if_start = alc_start; ifp->if_init = alc_init; ifp->if_snd.ifq_drv_maxlen = ALC_TX_RING_CNT - 1; IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO4; ifp->if_hwassist = ALC_CSUM_FEATURES | CSUM_TSO; if (pci_find_cap(dev, PCIY_PMG, &base) == 0) { ifp->if_capabilities |= IFCAP_WOL_MAGIC | IFCAP_WOL_MCAST; sc->alc_flags |= ALC_FLAG_PM; sc->alc_pmcap = base; } ifp->if_capenable = ifp->if_capabilities; /* Set up MII bus. */ error = mii_attach(dev, &sc->alc_miibus, ifp, alc_mediachange, alc_mediastatus, BMSR_DEFCAPMASK, sc->alc_phyaddr, MII_OFFSET_ANY, MIIF_DOPAUSE); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } ether_ifattach(ifp, sc->alc_eaddr); /* VLAN capability setup. */ ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO; ifp->if_capenable = ifp->if_capabilities; /* * XXX * It seems enabling Tx checksum offloading makes more trouble. * Sometimes the controller does not receive any frames when * Tx checksum offloading is enabled. I'm not sure whether this * is a bug in Tx checksum offloading logic or I got broken * sample boards. To safety, don't enable Tx checksum offloading * by default but give chance to users to toggle it if they know * their controllers work without problems. * Fortunately, Tx checksum offloading for AR816x family * seems to work. */ if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { ifp->if_capenable &= ~IFCAP_TXCSUM; ifp->if_hwassist &= ~ALC_CSUM_FEATURES; } /* Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); /* Create local taskq. */ sc->alc_tq = taskqueue_create_fast("alc_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->alc_tq); if (sc->alc_tq == NULL) { device_printf(dev, "could not create taskqueue.\n"); ether_ifdetach(ifp); error = ENXIO; goto fail; } taskqueue_start_threads(&sc->alc_tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->alc_dev)); alc_config_msi(sc); if ((sc->alc_flags & ALC_FLAG_MSIX) != 0) msic = ALC_MSIX_MESSAGES; else if ((sc->alc_flags & ALC_FLAG_MSI) != 0) msic = ALC_MSI_MESSAGES; else msic = 1; for (i = 0; i < msic; i++) { error = bus_setup_intr(dev, sc->alc_irq[i], INTR_TYPE_NET | INTR_MPSAFE, alc_intr, NULL, sc, &sc->alc_intrhand[i]); if (error != 0) break; } if (error != 0) { device_printf(dev, "could not set up interrupt handler.\n"); taskqueue_free(sc->alc_tq); sc->alc_tq = NULL; ether_ifdetach(ifp); goto fail; } /* Attach driver netdump methods. */ NETDUMP_SET(ifp, alc); fail: if (error != 0) alc_detach(dev); return (error); } static int alc_detach(device_t dev) { struct alc_softc *sc; struct ifnet *ifp; int i, msic; sc = device_get_softc(dev); ifp = sc->alc_ifp; if (device_is_attached(dev)) { ether_ifdetach(ifp); ALC_LOCK(sc); alc_stop(sc); ALC_UNLOCK(sc); callout_drain(&sc->alc_tick_ch); taskqueue_drain(sc->alc_tq, &sc->alc_int_task); } if (sc->alc_tq != NULL) { taskqueue_drain(sc->alc_tq, &sc->alc_int_task); taskqueue_free(sc->alc_tq); sc->alc_tq = NULL; } if (sc->alc_miibus != NULL) { device_delete_child(dev, sc->alc_miibus); sc->alc_miibus = NULL; } bus_generic_detach(dev); alc_dma_free(sc); if (ifp != NULL) { if_free(ifp); sc->alc_ifp = NULL; } if ((sc->alc_flags & ALC_FLAG_MSIX) != 0) msic = ALC_MSIX_MESSAGES; else if ((sc->alc_flags & ALC_FLAG_MSI) != 0) msic = ALC_MSI_MESSAGES; else msic = 1; for (i = 0; i < msic; i++) { if (sc->alc_intrhand[i] != NULL) { bus_teardown_intr(dev, sc->alc_irq[i], sc->alc_intrhand[i]); sc->alc_intrhand[i] = NULL; } } if (sc->alc_res[0] != NULL) alc_phy_down(sc); bus_release_resources(dev, sc->alc_irq_spec, sc->alc_irq); if ((sc->alc_flags & (ALC_FLAG_MSI | ALC_FLAG_MSIX)) != 0) pci_release_msi(dev); bus_release_resources(dev, sc->alc_res_spec, sc->alc_res); mtx_destroy(&sc->alc_mtx); return (0); } #define ALC_SYSCTL_STAT_ADD32(c, h, n, p, d) \ SYSCTL_ADD_UINT(c, h, OID_AUTO, n, CTLFLAG_RD, p, 0, d) #define ALC_SYSCTL_STAT_ADD64(c, h, n, p, d) \ SYSCTL_ADD_UQUAD(c, h, OID_AUTO, n, CTLFLAG_RD, p, d) static void alc_sysctl_node(struct alc_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *child, *parent; struct sysctl_oid *tree; struct alc_hw_stats *stats; int error; stats = &sc->alc_stats; ctx = device_get_sysctl_ctx(sc->alc_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->alc_dev)); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "int_rx_mod", CTLTYPE_INT | CTLFLAG_RW, &sc->alc_int_rx_mod, 0, sysctl_hw_alc_int_mod, "I", "alc Rx interrupt moderation"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "int_tx_mod", CTLTYPE_INT | CTLFLAG_RW, &sc->alc_int_tx_mod, 0, sysctl_hw_alc_int_mod, "I", "alc Tx interrupt moderation"); /* Pull in device tunables. */ sc->alc_int_rx_mod = ALC_IM_RX_TIMER_DEFAULT; error = resource_int_value(device_get_name(sc->alc_dev), device_get_unit(sc->alc_dev), "int_rx_mod", &sc->alc_int_rx_mod); if (error == 0) { if (sc->alc_int_rx_mod < ALC_IM_TIMER_MIN || sc->alc_int_rx_mod > ALC_IM_TIMER_MAX) { device_printf(sc->alc_dev, "int_rx_mod value out of " "range; using default: %d\n", ALC_IM_RX_TIMER_DEFAULT); sc->alc_int_rx_mod = ALC_IM_RX_TIMER_DEFAULT; } } sc->alc_int_tx_mod = ALC_IM_TX_TIMER_DEFAULT; error = resource_int_value(device_get_name(sc->alc_dev), device_get_unit(sc->alc_dev), "int_tx_mod", &sc->alc_int_tx_mod); if (error == 0) { if (sc->alc_int_tx_mod < ALC_IM_TIMER_MIN || sc->alc_int_tx_mod > ALC_IM_TIMER_MAX) { device_printf(sc->alc_dev, "int_tx_mod value out of " "range; using default: %d\n", ALC_IM_TX_TIMER_DEFAULT); sc->alc_int_tx_mod = ALC_IM_TX_TIMER_DEFAULT; } } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "process_limit", CTLTYPE_INT | CTLFLAG_RW, &sc->alc_process_limit, 0, sysctl_hw_alc_proc_limit, "I", "max number of Rx events to process"); /* Pull in device tunables. */ sc->alc_process_limit = ALC_PROC_DEFAULT; error = resource_int_value(device_get_name(sc->alc_dev), device_get_unit(sc->alc_dev), "process_limit", &sc->alc_process_limit); if (error == 0) { if (sc->alc_process_limit < ALC_PROC_MIN || sc->alc_process_limit > ALC_PROC_MAX) { device_printf(sc->alc_dev, "process_limit value out of range; " "using default: %d\n", ALC_PROC_DEFAULT); sc->alc_process_limit = ALC_PROC_DEFAULT; } } tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", CTLFLAG_RD, NULL, "ALC statistics"); parent = SYSCTL_CHILDREN(tree); /* Rx statistics. */ tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "rx", CTLFLAG_RD, NULL, "Rx MAC statistics"); child = SYSCTL_CHILDREN(tree); ALC_SYSCTL_STAT_ADD32(ctx, child, "good_frames", &stats->rx_frames, "Good frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "good_bcast_frames", &stats->rx_bcast_frames, "Good broadcast frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "good_mcast_frames", &stats->rx_mcast_frames, "Good multicast frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "pause_frames", &stats->rx_pause_frames, "Pause control frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "control_frames", &stats->rx_control_frames, "Control frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "crc_errs", &stats->rx_crcerrs, "CRC errors"); ALC_SYSCTL_STAT_ADD32(ctx, child, "len_errs", &stats->rx_lenerrs, "Frames with length mismatched"); ALC_SYSCTL_STAT_ADD64(ctx, child, "good_octets", &stats->rx_bytes, "Good octets"); ALC_SYSCTL_STAT_ADD64(ctx, child, "good_bcast_octets", &stats->rx_bcast_bytes, "Good broadcast octets"); ALC_SYSCTL_STAT_ADD64(ctx, child, "good_mcast_octets", &stats->rx_mcast_bytes, "Good multicast octets"); ALC_SYSCTL_STAT_ADD32(ctx, child, "runts", &stats->rx_runts, "Too short frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "fragments", &stats->rx_fragments, "Fragmented frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_64", &stats->rx_pkts_64, "64 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_65_127", &stats->rx_pkts_65_127, "65 to 127 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_128_255", &stats->rx_pkts_128_255, "128 to 255 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_256_511", &stats->rx_pkts_256_511, "256 to 511 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_512_1023", &stats->rx_pkts_512_1023, "512 to 1023 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_1024_1518", &stats->rx_pkts_1024_1518, "1024 to 1518 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_1519_max", &stats->rx_pkts_1519_max, "1519 to max frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "trunc_errs", &stats->rx_pkts_truncated, "Truncated frames due to MTU size"); ALC_SYSCTL_STAT_ADD32(ctx, child, "fifo_oflows", &stats->rx_fifo_oflows, "FIFO overflows"); ALC_SYSCTL_STAT_ADD32(ctx, child, "rrs_errs", &stats->rx_rrs_errs, "Return status write-back errors"); ALC_SYSCTL_STAT_ADD32(ctx, child, "align_errs", &stats->rx_alignerrs, "Alignment errors"); ALC_SYSCTL_STAT_ADD32(ctx, child, "filtered", &stats->rx_pkts_filtered, "Frames dropped due to address filtering"); /* Tx statistics. */ tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "tx", CTLFLAG_RD, NULL, "Tx MAC statistics"); child = SYSCTL_CHILDREN(tree); ALC_SYSCTL_STAT_ADD32(ctx, child, "good_frames", &stats->tx_frames, "Good frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "good_bcast_frames", &stats->tx_bcast_frames, "Good broadcast frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "good_mcast_frames", &stats->tx_mcast_frames, "Good multicast frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "pause_frames", &stats->tx_pause_frames, "Pause control frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "control_frames", &stats->tx_control_frames, "Control frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "excess_defers", &stats->tx_excess_defer, "Frames with excessive derferrals"); ALC_SYSCTL_STAT_ADD32(ctx, child, "defers", &stats->tx_excess_defer, "Frames with derferrals"); ALC_SYSCTL_STAT_ADD64(ctx, child, "good_octets", &stats->tx_bytes, "Good octets"); ALC_SYSCTL_STAT_ADD64(ctx, child, "good_bcast_octets", &stats->tx_bcast_bytes, "Good broadcast octets"); ALC_SYSCTL_STAT_ADD64(ctx, child, "good_mcast_octets", &stats->tx_mcast_bytes, "Good multicast octets"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_64", &stats->tx_pkts_64, "64 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_65_127", &stats->tx_pkts_65_127, "65 to 127 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_128_255", &stats->tx_pkts_128_255, "128 to 255 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_256_511", &stats->tx_pkts_256_511, "256 to 511 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_512_1023", &stats->tx_pkts_512_1023, "512 to 1023 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_1024_1518", &stats->tx_pkts_1024_1518, "1024 to 1518 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_1519_max", &stats->tx_pkts_1519_max, "1519 to max frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "single_colls", &stats->tx_single_colls, "Single collisions"); ALC_SYSCTL_STAT_ADD32(ctx, child, "multi_colls", &stats->tx_multi_colls, "Multiple collisions"); ALC_SYSCTL_STAT_ADD32(ctx, child, "late_colls", &stats->tx_late_colls, "Late collisions"); ALC_SYSCTL_STAT_ADD32(ctx, child, "excess_colls", &stats->tx_excess_colls, "Excessive collisions"); ALC_SYSCTL_STAT_ADD32(ctx, child, "underruns", &stats->tx_underrun, "FIFO underruns"); ALC_SYSCTL_STAT_ADD32(ctx, child, "desc_underruns", &stats->tx_desc_underrun, "Descriptor write-back errors"); ALC_SYSCTL_STAT_ADD32(ctx, child, "len_errs", &stats->tx_lenerrs, "Frames with length mismatched"); ALC_SYSCTL_STAT_ADD32(ctx, child, "trunc_errs", &stats->tx_pkts_truncated, "Truncated frames due to MTU size"); } #undef ALC_SYSCTL_STAT_ADD32 #undef ALC_SYSCTL_STAT_ADD64 struct alc_dmamap_arg { bus_addr_t alc_busaddr; }; static void alc_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct alc_dmamap_arg *ctx; if (error != 0) return; KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); ctx = (struct alc_dmamap_arg *)arg; ctx->alc_busaddr = segs[0].ds_addr; } /* * Normal and high Tx descriptors shares single Tx high address. * Four Rx descriptor/return rings and CMB shares the same Rx * high address. */ static int alc_check_boundary(struct alc_softc *sc) { bus_addr_t cmb_end, rx_ring_end, rr_ring_end, tx_ring_end; rx_ring_end = sc->alc_rdata.alc_rx_ring_paddr + ALC_RX_RING_SZ; rr_ring_end = sc->alc_rdata.alc_rr_ring_paddr + ALC_RR_RING_SZ; cmb_end = sc->alc_rdata.alc_cmb_paddr + ALC_CMB_SZ; tx_ring_end = sc->alc_rdata.alc_tx_ring_paddr + ALC_TX_RING_SZ; /* 4GB boundary crossing is not allowed. */ if ((ALC_ADDR_HI(rx_ring_end) != ALC_ADDR_HI(sc->alc_rdata.alc_rx_ring_paddr)) || (ALC_ADDR_HI(rr_ring_end) != ALC_ADDR_HI(sc->alc_rdata.alc_rr_ring_paddr)) || (ALC_ADDR_HI(cmb_end) != ALC_ADDR_HI(sc->alc_rdata.alc_cmb_paddr)) || (ALC_ADDR_HI(tx_ring_end) != ALC_ADDR_HI(sc->alc_rdata.alc_tx_ring_paddr))) return (EFBIG); /* * Make sure Rx return descriptor/Rx descriptor/CMB use * the same high address. */ if ((ALC_ADDR_HI(rx_ring_end) != ALC_ADDR_HI(rr_ring_end)) || (ALC_ADDR_HI(rx_ring_end) != ALC_ADDR_HI(cmb_end))) return (EFBIG); return (0); } static int alc_dma_alloc(struct alc_softc *sc) { struct alc_txdesc *txd; struct alc_rxdesc *rxd; bus_addr_t lowaddr; struct alc_dmamap_arg ctx; int error, i; lowaddr = BUS_SPACE_MAXADDR; again: /* Create parent DMA tag. */ error = bus_dma_tag_create( bus_get_dma_tag(sc->alc_dev), /* parent */ 1, 0, /* alignment, boundary */ lowaddr, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT, /* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_parent_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create parent DMA tag.\n"); goto fail; } /* Create DMA tag for Tx descriptor ring. */ error = bus_dma_tag_create( sc->alc_cdata.alc_parent_tag, /* parent */ ALC_TX_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ALC_TX_RING_SZ, /* maxsize */ 1, /* nsegments */ ALC_TX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_tx_ring_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create Tx ring DMA tag.\n"); goto fail; } /* Create DMA tag for Rx free descriptor ring. */ error = bus_dma_tag_create( sc->alc_cdata.alc_parent_tag, /* parent */ ALC_RX_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ALC_RX_RING_SZ, /* maxsize */ 1, /* nsegments */ ALC_RX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_rx_ring_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create Rx ring DMA tag.\n"); goto fail; } /* Create DMA tag for Rx return descriptor ring. */ error = bus_dma_tag_create( sc->alc_cdata.alc_parent_tag, /* parent */ ALC_RR_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ALC_RR_RING_SZ, /* maxsize */ 1, /* nsegments */ ALC_RR_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_rr_ring_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create Rx return ring DMA tag.\n"); goto fail; } /* Create DMA tag for coalescing message block. */ error = bus_dma_tag_create( sc->alc_cdata.alc_parent_tag, /* parent */ ALC_CMB_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ALC_CMB_SZ, /* maxsize */ 1, /* nsegments */ ALC_CMB_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_cmb_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create CMB DMA tag.\n"); goto fail; } /* Create DMA tag for status message block. */ error = bus_dma_tag_create( sc->alc_cdata.alc_parent_tag, /* parent */ ALC_SMB_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ALC_SMB_SZ, /* maxsize */ 1, /* nsegments */ ALC_SMB_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_smb_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create SMB DMA tag.\n"); goto fail; } /* Allocate DMA'able memory and load the DMA map for Tx ring. */ error = bus_dmamem_alloc(sc->alc_cdata.alc_tx_ring_tag, (void **)&sc->alc_rdata.alc_tx_ring, BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &sc->alc_cdata.alc_tx_ring_map); if (error != 0) { device_printf(sc->alc_dev, "could not allocate DMA'able memory for Tx ring.\n"); goto fail; } ctx.alc_busaddr = 0; error = bus_dmamap_load(sc->alc_cdata.alc_tx_ring_tag, sc->alc_cdata.alc_tx_ring_map, sc->alc_rdata.alc_tx_ring, ALC_TX_RING_SZ, alc_dmamap_cb, &ctx, 0); if (error != 0 || ctx.alc_busaddr == 0) { device_printf(sc->alc_dev, "could not load DMA'able memory for Tx ring.\n"); goto fail; } sc->alc_rdata.alc_tx_ring_paddr = ctx.alc_busaddr; /* Allocate DMA'able memory and load the DMA map for Rx ring. */ error = bus_dmamem_alloc(sc->alc_cdata.alc_rx_ring_tag, (void **)&sc->alc_rdata.alc_rx_ring, BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &sc->alc_cdata.alc_rx_ring_map); if (error != 0) { device_printf(sc->alc_dev, "could not allocate DMA'able memory for Rx ring.\n"); goto fail; } ctx.alc_busaddr = 0; error = bus_dmamap_load(sc->alc_cdata.alc_rx_ring_tag, sc->alc_cdata.alc_rx_ring_map, sc->alc_rdata.alc_rx_ring, ALC_RX_RING_SZ, alc_dmamap_cb, &ctx, 0); if (error != 0 || ctx.alc_busaddr == 0) { device_printf(sc->alc_dev, "could not load DMA'able memory for Rx ring.\n"); goto fail; } sc->alc_rdata.alc_rx_ring_paddr = ctx.alc_busaddr; /* Allocate DMA'able memory and load the DMA map for Rx return ring. */ error = bus_dmamem_alloc(sc->alc_cdata.alc_rr_ring_tag, (void **)&sc->alc_rdata.alc_rr_ring, BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &sc->alc_cdata.alc_rr_ring_map); if (error != 0) { device_printf(sc->alc_dev, "could not allocate DMA'able memory for Rx return ring.\n"); goto fail; } ctx.alc_busaddr = 0; error = bus_dmamap_load(sc->alc_cdata.alc_rr_ring_tag, sc->alc_cdata.alc_rr_ring_map, sc->alc_rdata.alc_rr_ring, ALC_RR_RING_SZ, alc_dmamap_cb, &ctx, 0); if (error != 0 || ctx.alc_busaddr == 0) { device_printf(sc->alc_dev, "could not load DMA'able memory for Tx ring.\n"); goto fail; } sc->alc_rdata.alc_rr_ring_paddr = ctx.alc_busaddr; /* Allocate DMA'able memory and load the DMA map for CMB. */ error = bus_dmamem_alloc(sc->alc_cdata.alc_cmb_tag, (void **)&sc->alc_rdata.alc_cmb, BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &sc->alc_cdata.alc_cmb_map); if (error != 0) { device_printf(sc->alc_dev, "could not allocate DMA'able memory for CMB.\n"); goto fail; } ctx.alc_busaddr = 0; error = bus_dmamap_load(sc->alc_cdata.alc_cmb_tag, sc->alc_cdata.alc_cmb_map, sc->alc_rdata.alc_cmb, ALC_CMB_SZ, alc_dmamap_cb, &ctx, 0); if (error != 0 || ctx.alc_busaddr == 0) { device_printf(sc->alc_dev, "could not load DMA'able memory for CMB.\n"); goto fail; } sc->alc_rdata.alc_cmb_paddr = ctx.alc_busaddr; /* Allocate DMA'able memory and load the DMA map for SMB. */ error = bus_dmamem_alloc(sc->alc_cdata.alc_smb_tag, (void **)&sc->alc_rdata.alc_smb, BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &sc->alc_cdata.alc_smb_map); if (error != 0) { device_printf(sc->alc_dev, "could not allocate DMA'able memory for SMB.\n"); goto fail; } ctx.alc_busaddr = 0; error = bus_dmamap_load(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map, sc->alc_rdata.alc_smb, ALC_SMB_SZ, alc_dmamap_cb, &ctx, 0); if (error != 0 || ctx.alc_busaddr == 0) { device_printf(sc->alc_dev, "could not load DMA'able memory for CMB.\n"); goto fail; } sc->alc_rdata.alc_smb_paddr = ctx.alc_busaddr; /* Make sure we've not crossed 4GB boundary. */ if (lowaddr != BUS_SPACE_MAXADDR_32BIT && (error = alc_check_boundary(sc)) != 0) { device_printf(sc->alc_dev, "4GB boundary crossed, " "switching to 32bit DMA addressing mode.\n"); alc_dma_free(sc); /* * Limit max allowable DMA address space to 32bit * and try again. */ lowaddr = BUS_SPACE_MAXADDR_32BIT; goto again; } /* * Create Tx buffer parent tag. * AR81[3567]x allows 64bit DMA addressing of Tx/Rx buffers * so it needs separate parent DMA tag as parent DMA address * space could be restricted to be within 32bit address space * by 4GB boundary crossing. */ error = bus_dma_tag_create( bus_get_dma_tag(sc->alc_dev), /* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT, /* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_buffer_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create parent buffer DMA tag.\n"); goto fail; } /* Create DMA tag for Tx buffers. */ error = bus_dma_tag_create( sc->alc_cdata.alc_buffer_tag, /* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ALC_TSO_MAXSIZE, /* maxsize */ ALC_MAXTXSEGS, /* nsegments */ ALC_TSO_MAXSEGSIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_tx_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create Tx DMA tag.\n"); goto fail; } /* Create DMA tag for Rx buffers. */ error = bus_dma_tag_create( sc->alc_cdata.alc_buffer_tag, /* parent */ ALC_RX_BUF_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES, /* maxsize */ 1, /* nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_rx_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create Rx DMA tag.\n"); goto fail; } /* Create DMA maps for Tx buffers. */ for (i = 0; i < ALC_TX_RING_CNT; i++) { txd = &sc->alc_cdata.alc_txdesc[i]; txd->tx_m = NULL; txd->tx_dmamap = NULL; error = bus_dmamap_create(sc->alc_cdata.alc_tx_tag, 0, &txd->tx_dmamap); if (error != 0) { device_printf(sc->alc_dev, "could not create Tx dmamap.\n"); goto fail; } } /* Create DMA maps for Rx buffers. */ if ((error = bus_dmamap_create(sc->alc_cdata.alc_rx_tag, 0, &sc->alc_cdata.alc_rx_sparemap)) != 0) { device_printf(sc->alc_dev, "could not create spare Rx dmamap.\n"); goto fail; } for (i = 0; i < ALC_RX_RING_CNT; i++) { rxd = &sc->alc_cdata.alc_rxdesc[i]; rxd->rx_m = NULL; rxd->rx_dmamap = NULL; error = bus_dmamap_create(sc->alc_cdata.alc_rx_tag, 0, &rxd->rx_dmamap); if (error != 0) { device_printf(sc->alc_dev, "could not create Rx dmamap.\n"); goto fail; } } fail: return (error); } static void alc_dma_free(struct alc_softc *sc) { struct alc_txdesc *txd; struct alc_rxdesc *rxd; int i; /* Tx buffers. */ if (sc->alc_cdata.alc_tx_tag != NULL) { for (i = 0; i < ALC_TX_RING_CNT; i++) { txd = &sc->alc_cdata.alc_txdesc[i]; if (txd->tx_dmamap != NULL) { bus_dmamap_destroy(sc->alc_cdata.alc_tx_tag, txd->tx_dmamap); txd->tx_dmamap = NULL; } } bus_dma_tag_destroy(sc->alc_cdata.alc_tx_tag); sc->alc_cdata.alc_tx_tag = NULL; } /* Rx buffers */ if (sc->alc_cdata.alc_rx_tag != NULL) { for (i = 0; i < ALC_RX_RING_CNT; i++) { rxd = &sc->alc_cdata.alc_rxdesc[i]; if (rxd->rx_dmamap != NULL) { bus_dmamap_destroy(sc->alc_cdata.alc_rx_tag, rxd->rx_dmamap); rxd->rx_dmamap = NULL; } } if (sc->alc_cdata.alc_rx_sparemap != NULL) { bus_dmamap_destroy(sc->alc_cdata.alc_rx_tag, sc->alc_cdata.alc_rx_sparemap); sc->alc_cdata.alc_rx_sparemap = NULL; } bus_dma_tag_destroy(sc->alc_cdata.alc_rx_tag); sc->alc_cdata.alc_rx_tag = NULL; } /* Tx descriptor ring. */ if (sc->alc_cdata.alc_tx_ring_tag != NULL) { if (sc->alc_rdata.alc_tx_ring_paddr != 0) bus_dmamap_unload(sc->alc_cdata.alc_tx_ring_tag, sc->alc_cdata.alc_tx_ring_map); if (sc->alc_rdata.alc_tx_ring != NULL) bus_dmamem_free(sc->alc_cdata.alc_tx_ring_tag, sc->alc_rdata.alc_tx_ring, sc->alc_cdata.alc_tx_ring_map); sc->alc_rdata.alc_tx_ring_paddr = 0; sc->alc_rdata.alc_tx_ring = NULL; bus_dma_tag_destroy(sc->alc_cdata.alc_tx_ring_tag); sc->alc_cdata.alc_tx_ring_tag = NULL; } /* Rx ring. */ if (sc->alc_cdata.alc_rx_ring_tag != NULL) { if (sc->alc_rdata.alc_rx_ring_paddr != 0) bus_dmamap_unload(sc->alc_cdata.alc_rx_ring_tag, sc->alc_cdata.alc_rx_ring_map); if (sc->alc_rdata.alc_rx_ring != NULL) bus_dmamem_free(sc->alc_cdata.alc_rx_ring_tag, sc->alc_rdata.alc_rx_ring, sc->alc_cdata.alc_rx_ring_map); sc->alc_rdata.alc_rx_ring_paddr = 0; sc->alc_rdata.alc_rx_ring = NULL; bus_dma_tag_destroy(sc->alc_cdata.alc_rx_ring_tag); sc->alc_cdata.alc_rx_ring_tag = NULL; } /* Rx return ring. */ if (sc->alc_cdata.alc_rr_ring_tag != NULL) { if (sc->alc_rdata.alc_rr_ring_paddr != 0) bus_dmamap_unload(sc->alc_cdata.alc_rr_ring_tag, sc->alc_cdata.alc_rr_ring_map); if (sc->alc_rdata.alc_rr_ring != NULL) bus_dmamem_free(sc->alc_cdata.alc_rr_ring_tag, sc->alc_rdata.alc_rr_ring, sc->alc_cdata.alc_rr_ring_map); sc->alc_rdata.alc_rr_ring_paddr = 0; sc->alc_rdata.alc_rr_ring = NULL; bus_dma_tag_destroy(sc->alc_cdata.alc_rr_ring_tag); sc->alc_cdata.alc_rr_ring_tag = NULL; } /* CMB block */ if (sc->alc_cdata.alc_cmb_tag != NULL) { if (sc->alc_rdata.alc_cmb_paddr != 0) bus_dmamap_unload(sc->alc_cdata.alc_cmb_tag, sc->alc_cdata.alc_cmb_map); if (sc->alc_rdata.alc_cmb != NULL) bus_dmamem_free(sc->alc_cdata.alc_cmb_tag, sc->alc_rdata.alc_cmb, sc->alc_cdata.alc_cmb_map); sc->alc_rdata.alc_cmb_paddr = 0; sc->alc_rdata.alc_cmb = NULL; bus_dma_tag_destroy(sc->alc_cdata.alc_cmb_tag); sc->alc_cdata.alc_cmb_tag = NULL; } /* SMB block */ if (sc->alc_cdata.alc_smb_tag != NULL) { if (sc->alc_rdata.alc_smb_paddr != 0) bus_dmamap_unload(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map); if (sc->alc_rdata.alc_smb != NULL) bus_dmamem_free(sc->alc_cdata.alc_smb_tag, sc->alc_rdata.alc_smb, sc->alc_cdata.alc_smb_map); sc->alc_rdata.alc_smb_paddr = 0; sc->alc_rdata.alc_smb = NULL; bus_dma_tag_destroy(sc->alc_cdata.alc_smb_tag); sc->alc_cdata.alc_smb_tag = NULL; } if (sc->alc_cdata.alc_buffer_tag != NULL) { bus_dma_tag_destroy(sc->alc_cdata.alc_buffer_tag); sc->alc_cdata.alc_buffer_tag = NULL; } if (sc->alc_cdata.alc_parent_tag != NULL) { bus_dma_tag_destroy(sc->alc_cdata.alc_parent_tag); sc->alc_cdata.alc_parent_tag = NULL; } } static int alc_shutdown(device_t dev) { return (alc_suspend(dev)); } /* * Note, this driver resets the link speed to 10/100Mbps by * restarting auto-negotiation in suspend/shutdown phase but we * don't know whether that auto-negotiation would succeed or not * as driver has no control after powering off/suspend operation. * If the renegotiation fail WOL may not work. Running at 1Gbps * will draw more power than 375mA at 3.3V which is specified in * PCI specification and that would result in complete * shutdowning power to ethernet controller. * * TODO * Save current negotiated media speed/duplex/flow-control to * softc and restore the same link again after resuming. PHY * handling such as power down/resetting to 100Mbps may be better * handled in suspend method in phy driver. */ static void alc_setlinkspeed(struct alc_softc *sc) { struct mii_data *mii; int aneg, i; mii = device_get_softc(sc->alc_miibus); mii_pollstat(mii); aneg = 0; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch IFM_SUBTYPE(mii->mii_media_active) { case IFM_10_T: case IFM_100_TX: return; case IFM_1000_T: aneg++; break; default: break; } } alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, MII_100T2CR, 0); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, MII_ANAR, ANAR_TX_FD | ANAR_TX | ANAR_10_FD | ANAR_10 | ANAR_CSMA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, MII_BMCR, BMCR_RESET | BMCR_AUTOEN | BMCR_STARTNEG); DELAY(1000); if (aneg != 0) { /* * Poll link state until alc(4) get a 10/100Mbps link. */ for (i = 0; i < MII_ANEGTICKS_GIGE; i++) { mii_pollstat(mii); if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE( mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: alc_mac_config(sc); return; default: break; } } ALC_UNLOCK(sc); pause("alclnk", hz); ALC_LOCK(sc); } if (i == MII_ANEGTICKS_GIGE) device_printf(sc->alc_dev, "establishing a link failed, WOL may not work!"); } /* * No link, force MAC to have 100Mbps, full-duplex link. * This is the last resort and may/may not work. */ mii->mii_media_status = IFM_AVALID | IFM_ACTIVE; mii->mii_media_active = IFM_ETHER | IFM_100_TX | IFM_FDX; alc_mac_config(sc); } static void alc_setwol(struct alc_softc *sc) { if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) alc_setwol_816x(sc); else alc_setwol_813x(sc); } static void alc_setwol_813x(struct alc_softc *sc) { struct ifnet *ifp; uint32_t reg, pmcs; uint16_t pmstat; ALC_LOCK_ASSERT(sc); alc_disable_l0s_l1(sc); ifp = sc->alc_ifp; if ((sc->alc_flags & ALC_FLAG_PM) == 0) { /* Disable WOL. */ CSR_WRITE_4(sc, ALC_WOL_CFG, 0); reg = CSR_READ_4(sc, ALC_PCIE_PHYMISC); reg |= PCIE_PHYMISC_FORCE_RCV_DET; CSR_WRITE_4(sc, ALC_PCIE_PHYMISC, reg); /* Force PHY power down. */ alc_phy_down(sc); CSR_WRITE_4(sc, ALC_MASTER_CFG, CSR_READ_4(sc, ALC_MASTER_CFG) | MASTER_CLK_SEL_DIS); return; } if ((ifp->if_capenable & IFCAP_WOL) != 0) { if ((sc->alc_flags & ALC_FLAG_FASTETHER) == 0) alc_setlinkspeed(sc); CSR_WRITE_4(sc, ALC_MASTER_CFG, CSR_READ_4(sc, ALC_MASTER_CFG) & ~MASTER_CLK_SEL_DIS); } pmcs = 0; if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) pmcs |= WOL_CFG_MAGIC | WOL_CFG_MAGIC_ENB; CSR_WRITE_4(sc, ALC_WOL_CFG, pmcs); reg = CSR_READ_4(sc, ALC_MAC_CFG); reg &= ~(MAC_CFG_DBG | MAC_CFG_PROMISC | MAC_CFG_ALLMULTI | MAC_CFG_BCAST); if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0) reg |= MAC_CFG_ALLMULTI | MAC_CFG_BCAST; if ((ifp->if_capenable & IFCAP_WOL) != 0) reg |= MAC_CFG_RX_ENB; CSR_WRITE_4(sc, ALC_MAC_CFG, reg); reg = CSR_READ_4(sc, ALC_PCIE_PHYMISC); reg |= PCIE_PHYMISC_FORCE_RCV_DET; CSR_WRITE_4(sc, ALC_PCIE_PHYMISC, reg); if ((ifp->if_capenable & IFCAP_WOL) == 0) { /* WOL disabled, PHY power down. */ alc_phy_down(sc); CSR_WRITE_4(sc, ALC_MASTER_CFG, CSR_READ_4(sc, ALC_MASTER_CFG) | MASTER_CLK_SEL_DIS); } /* Request PME. */ pmstat = pci_read_config(sc->alc_dev, sc->alc_pmcap + PCIR_POWER_STATUS, 2); pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if ((ifp->if_capenable & IFCAP_WOL) != 0) pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(sc->alc_dev, sc->alc_pmcap + PCIR_POWER_STATUS, pmstat, 2); } static void alc_setwol_816x(struct alc_softc *sc) { struct ifnet *ifp; uint32_t gphy, mac, master, pmcs, reg; uint16_t pmstat; ALC_LOCK_ASSERT(sc); ifp = sc->alc_ifp; master = CSR_READ_4(sc, ALC_MASTER_CFG); master &= ~MASTER_CLK_SEL_DIS; gphy = CSR_READ_4(sc, ALC_GPHY_CFG); gphy &= ~(GPHY_CFG_EXT_RESET | GPHY_CFG_LED_MODE | GPHY_CFG_100AB_ENB | GPHY_CFG_PHY_PLL_ON); gphy |= GPHY_CFG_HIB_EN | GPHY_CFG_HIB_PULSE | GPHY_CFG_SEL_ANA_RESET; if ((sc->alc_flags & ALC_FLAG_PM) == 0) { CSR_WRITE_4(sc, ALC_WOL_CFG, 0); gphy |= GPHY_CFG_PHY_IDDQ | GPHY_CFG_PWDOWN_HW; mac = CSR_READ_4(sc, ALC_MAC_CFG); } else { if ((ifp->if_capenable & IFCAP_WOL) != 0) { gphy |= GPHY_CFG_EXT_RESET; if ((sc->alc_flags & ALC_FLAG_FASTETHER) == 0) alc_setlinkspeed(sc); } pmcs = 0; if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) pmcs |= WOL_CFG_MAGIC | WOL_CFG_MAGIC_ENB; CSR_WRITE_4(sc, ALC_WOL_CFG, pmcs); mac = CSR_READ_4(sc, ALC_MAC_CFG); mac &= ~(MAC_CFG_DBG | MAC_CFG_PROMISC | MAC_CFG_ALLMULTI | MAC_CFG_BCAST); if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0) mac |= MAC_CFG_ALLMULTI | MAC_CFG_BCAST; if ((ifp->if_capenable & IFCAP_WOL) != 0) mac |= MAC_CFG_RX_ENB; alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_S3DIG10, ANEG_S3DIG10_SL); } /* Enable OSC. */ reg = CSR_READ_4(sc, ALC_MISC); reg &= ~MISC_INTNLOSC_OPEN; CSR_WRITE_4(sc, ALC_MISC, reg); reg |= MISC_INTNLOSC_OPEN; CSR_WRITE_4(sc, ALC_MISC, reg); CSR_WRITE_4(sc, ALC_MASTER_CFG, master); CSR_WRITE_4(sc, ALC_MAC_CFG, mac); CSR_WRITE_4(sc, ALC_GPHY_CFG, gphy); reg = CSR_READ_4(sc, ALC_PDLL_TRNS1); reg |= PDLL_TRNS1_D3PLLOFF_ENB; CSR_WRITE_4(sc, ALC_PDLL_TRNS1, reg); if ((sc->alc_flags & ALC_FLAG_PM) != 0) { /* Request PME. */ pmstat = pci_read_config(sc->alc_dev, sc->alc_pmcap + PCIR_POWER_STATUS, 2); pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if ((ifp->if_capenable & IFCAP_WOL) != 0) pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(sc->alc_dev, sc->alc_pmcap + PCIR_POWER_STATUS, pmstat, 2); } } static int alc_suspend(device_t dev) { struct alc_softc *sc; sc = device_get_softc(dev); ALC_LOCK(sc); alc_stop(sc); alc_setwol(sc); ALC_UNLOCK(sc); return (0); } static int alc_resume(device_t dev) { struct alc_softc *sc; struct ifnet *ifp; uint16_t pmstat; sc = device_get_softc(dev); ALC_LOCK(sc); if ((sc->alc_flags & ALC_FLAG_PM) != 0) { /* Disable PME and clear PME status. */ pmstat = pci_read_config(sc->alc_dev, sc->alc_pmcap + PCIR_POWER_STATUS, 2); if ((pmstat & PCIM_PSTAT_PMEENABLE) != 0) { pmstat &= ~PCIM_PSTAT_PMEENABLE; pci_write_config(sc->alc_dev, sc->alc_pmcap + PCIR_POWER_STATUS, pmstat, 2); } } /* Reset PHY. */ alc_phy_reset(sc); ifp = sc->alc_ifp; if ((ifp->if_flags & IFF_UP) != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; alc_init_locked(sc); } ALC_UNLOCK(sc); return (0); } static int alc_encap(struct alc_softc *sc, struct mbuf **m_head) { struct alc_txdesc *txd, *txd_last; struct tx_desc *desc; struct mbuf *m; struct ip *ip; struct tcphdr *tcp; bus_dma_segment_t txsegs[ALC_MAXTXSEGS]; bus_dmamap_t map; uint32_t cflags, hdrlen, ip_off, poff, vtag; int error, idx, nsegs, prod; ALC_LOCK_ASSERT(sc); M_ASSERTPKTHDR((*m_head)); m = *m_head; ip = NULL; tcp = NULL; ip_off = poff = 0; if ((m->m_pkthdr.csum_flags & (ALC_CSUM_FEATURES | CSUM_TSO)) != 0) { /* * AR81[3567]x requires offset of TCP/UDP header in its * Tx descriptor to perform Tx checksum offloading. TSO * also requires TCP header offset and modification of * IP/TCP header. This kind of operation takes many CPU * cycles on FreeBSD so fast host CPU is required to get * smooth TSO performance. */ struct ether_header *eh; if (M_WRITABLE(m) == 0) { /* Get a writable copy. */ m = m_dup(*m_head, M_NOWAIT); /* Release original mbufs. */ m_freem(*m_head); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } *m_head = m; } ip_off = sizeof(struct ether_header); m = m_pullup(m, ip_off); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } eh = mtod(m, struct ether_header *); /* * Check if hardware VLAN insertion is off. * Additional check for LLC/SNAP frame? */ if (eh->ether_type == htons(ETHERTYPE_VLAN)) { ip_off = sizeof(struct ether_vlan_header); m = m_pullup(m, ip_off); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } } m = m_pullup(m, ip_off + sizeof(struct ip)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m, char *) + ip_off); poff = ip_off + (ip->ip_hl << 2); if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { m = m_pullup(m, poff + sizeof(struct tcphdr)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } tcp = (struct tcphdr *)(mtod(m, char *) + poff); m = m_pullup(m, poff + (tcp->th_off << 2)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } /* * Due to strict adherence of Microsoft NDIS * Large Send specification, hardware expects * a pseudo TCP checksum inserted by upper * stack. Unfortunately the pseudo TCP * checksum that NDIS refers to does not include * TCP payload length so driver should recompute * the pseudo checksum here. Hopefully this * wouldn't be much burden on modern CPUs. * * Reset IP checksum and recompute TCP pseudo * checksum as NDIS specification said. */ ip = (struct ip *)(mtod(m, char *) + ip_off); tcp = (struct tcphdr *)(mtod(m, char *) + poff); ip->ip_sum = 0; tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } *m_head = m; } prod = sc->alc_cdata.alc_tx_prod; txd = &sc->alc_cdata.alc_txdesc[prod]; txd_last = txd; map = txd->tx_dmamap; error = bus_dmamap_load_mbuf_sg(sc->alc_cdata.alc_tx_tag, map, *m_head, txsegs, &nsegs, 0); if (error == EFBIG) { m = m_collapse(*m_head, M_NOWAIT, ALC_MAXTXSEGS); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOMEM); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc->alc_cdata.alc_tx_tag, map, *m_head, txsegs, &nsegs, 0); if (error != 0) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); if (nsegs == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } /* Check descriptor overrun. */ if (sc->alc_cdata.alc_tx_cnt + nsegs >= ALC_TX_RING_CNT - 3) { bus_dmamap_unload(sc->alc_cdata.alc_tx_tag, map); return (ENOBUFS); } bus_dmamap_sync(sc->alc_cdata.alc_tx_tag, map, BUS_DMASYNC_PREWRITE); m = *m_head; cflags = TD_ETHERNET; vtag = 0; desc = NULL; idx = 0; /* Configure VLAN hardware tag insertion. */ if ((m->m_flags & M_VLANTAG) != 0) { vtag = htons(m->m_pkthdr.ether_vtag); vtag = (vtag << TD_VLAN_SHIFT) & TD_VLAN_MASK; cflags |= TD_INS_VLAN_TAG; } if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { /* Request TSO and set MSS. */ cflags |= TD_TSO | TD_TSO_DESCV1; cflags |= ((uint32_t)m->m_pkthdr.tso_segsz << TD_MSS_SHIFT) & TD_MSS_MASK; /* Set TCP header offset. */ cflags |= (poff << TD_TCPHDR_OFFSET_SHIFT) & TD_TCPHDR_OFFSET_MASK; /* * AR81[3567]x requires the first buffer should * only hold IP/TCP header data. Payload should * be handled in other descriptors. */ hdrlen = poff + (tcp->th_off << 2); desc = &sc->alc_rdata.alc_tx_ring[prod]; desc->len = htole32(TX_BYTES(hdrlen | vtag)); desc->flags = htole32(cflags); desc->addr = htole64(txsegs[0].ds_addr); sc->alc_cdata.alc_tx_cnt++; ALC_DESC_INC(prod, ALC_TX_RING_CNT); if (m->m_len - hdrlen > 0) { /* Handle remaining payload of the first fragment. */ desc = &sc->alc_rdata.alc_tx_ring[prod]; desc->len = htole32(TX_BYTES((m->m_len - hdrlen) | vtag)); desc->flags = htole32(cflags); desc->addr = htole64(txsegs[0].ds_addr + hdrlen); sc->alc_cdata.alc_tx_cnt++; ALC_DESC_INC(prod, ALC_TX_RING_CNT); } /* Handle remaining fragments. */ idx = 1; } else if ((m->m_pkthdr.csum_flags & ALC_CSUM_FEATURES) != 0) { /* Configure Tx checksum offload. */ #ifdef ALC_USE_CUSTOM_CSUM cflags |= TD_CUSTOM_CSUM; /* Set checksum start offset. */ cflags |= ((poff >> 1) << TD_PLOAD_OFFSET_SHIFT) & TD_PLOAD_OFFSET_MASK; /* Set checksum insertion position of TCP/UDP. */ cflags |= (((poff + m->m_pkthdr.csum_data) >> 1) << TD_CUSTOM_CSUM_OFFSET_SHIFT) & TD_CUSTOM_CSUM_OFFSET_MASK; #else if ((m->m_pkthdr.csum_flags & CSUM_IP) != 0) cflags |= TD_IPCSUM; if ((m->m_pkthdr.csum_flags & CSUM_TCP) != 0) cflags |= TD_TCPCSUM; if ((m->m_pkthdr.csum_flags & CSUM_UDP) != 0) cflags |= TD_UDPCSUM; /* Set TCP/UDP header offset. */ cflags |= (poff << TD_L4HDR_OFFSET_SHIFT) & TD_L4HDR_OFFSET_MASK; #endif } for (; idx < nsegs; idx++) { desc = &sc->alc_rdata.alc_tx_ring[prod]; desc->len = htole32(TX_BYTES(txsegs[idx].ds_len) | vtag); desc->flags = htole32(cflags); desc->addr = htole64(txsegs[idx].ds_addr); sc->alc_cdata.alc_tx_cnt++; ALC_DESC_INC(prod, ALC_TX_RING_CNT); } /* Update producer index. */ sc->alc_cdata.alc_tx_prod = prod; /* Finally set EOP on the last descriptor. */ prod = (prod + ALC_TX_RING_CNT - 1) % ALC_TX_RING_CNT; desc = &sc->alc_rdata.alc_tx_ring[prod]; desc->flags |= htole32(TD_EOP); /* Swap dmamap of the first and the last. */ txd = &sc->alc_cdata.alc_txdesc[prod]; map = txd_last->tx_dmamap; txd_last->tx_dmamap = txd->tx_dmamap; txd->tx_dmamap = map; txd->tx_m = m; return (0); } static void alc_start(struct ifnet *ifp) { struct alc_softc *sc; sc = ifp->if_softc; ALC_LOCK(sc); alc_start_locked(ifp); ALC_UNLOCK(sc); } static void alc_start_locked(struct ifnet *ifp) { struct alc_softc *sc; struct mbuf *m_head; int enq; sc = ifp->if_softc; ALC_LOCK_ASSERT(sc); /* Reclaim transmitted frames. */ if (sc->alc_cdata.alc_tx_cnt >= ALC_TX_DESC_HIWAT) alc_txeof(sc); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (sc->alc_flags & ALC_FLAG_LINK) == 0) return; for (enq = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd); ) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Pack the data into the transmit ring. If we * don't have room, set the OACTIVE flag and wait * for the NIC to drain the ring. */ if (alc_encap(sc, &m_head)) { if (m_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } enq++; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ ETHER_BPF_MTAP(ifp, m_head); } if (enq > 0) alc_start_tx(sc); } static void alc_start_tx(struct alc_softc *sc) { /* Sync descriptors. */ bus_dmamap_sync(sc->alc_cdata.alc_tx_ring_tag, sc->alc_cdata.alc_tx_ring_map, BUS_DMASYNC_PREWRITE); /* Kick. Assume we're using normal Tx priority queue. */ if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) CSR_WRITE_2(sc, ALC_MBOX_TD_PRI0_PROD_IDX, (uint16_t)sc->alc_cdata.alc_tx_prod); else CSR_WRITE_4(sc, ALC_MBOX_TD_PROD_IDX, (sc->alc_cdata.alc_tx_prod << MBOX_TD_PROD_LO_IDX_SHIFT) & MBOX_TD_PROD_LO_IDX_MASK); /* Set a timeout in case the chip goes out to lunch. */ sc->alc_watchdog_timer = ALC_TX_TIMEOUT; } static void alc_watchdog(struct alc_softc *sc) { struct ifnet *ifp; ALC_LOCK_ASSERT(sc); if (sc->alc_watchdog_timer == 0 || --sc->alc_watchdog_timer) return; ifp = sc->alc_ifp; if ((sc->alc_flags & ALC_FLAG_LINK) == 0) { if_printf(sc->alc_ifp, "watchdog timeout (lost link)\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; alc_init_locked(sc); return; } if_printf(sc->alc_ifp, "watchdog timeout -- resetting\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; alc_init_locked(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) alc_start_locked(ifp); } static int alc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct alc_softc *sc; struct ifreq *ifr; struct mii_data *mii; int error, mask; sc = ifp->if_softc; ifr = (struct ifreq *)data; error = 0; switch (cmd) { case SIOCSIFMTU: if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > (sc->alc_ident->max_framelen - sizeof(struct ether_vlan_header) - ETHER_CRC_LEN) || ((sc->alc_flags & ALC_FLAG_JUMBO) == 0 && ifr->ifr_mtu > ETHERMTU)) error = EINVAL; else if (ifp->if_mtu != ifr->ifr_mtu) { ALC_LOCK(sc); ifp->if_mtu = ifr->ifr_mtu; /* AR81[3567]x has 13 bits MSS field. */ if (ifp->if_mtu > ALC_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; VLAN_CAPABILITIES(ifp); } ALC_UNLOCK(sc); } break; case SIOCSIFFLAGS: ALC_LOCK(sc); if ((ifp->if_flags & IFF_UP) != 0) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && ((ifp->if_flags ^ sc->alc_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) != 0) alc_rxfilter(sc); else alc_init_locked(sc); } else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) alc_stop(sc); sc->alc_if_flags = ifp->if_flags; ALC_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: ALC_LOCK(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) alc_rxfilter(sc); ALC_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: mii = device_get_softc(sc->alc_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, cmd); break; case SIOCSIFCAP: ALC_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if ((mask & IFCAP_TXCSUM) != 0 && (ifp->if_capabilities & IFCAP_TXCSUM) != 0) { ifp->if_capenable ^= IFCAP_TXCSUM; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist |= ALC_CSUM_FEATURES; else ifp->if_hwassist &= ~ALC_CSUM_FEATURES; } if ((mask & IFCAP_TSO4) != 0 && (ifp->if_capabilities & IFCAP_TSO4) != 0) { ifp->if_capenable ^= IFCAP_TSO4; if ((ifp->if_capenable & IFCAP_TSO4) != 0) { /* AR81[3567]x has 13 bits MSS field. */ if (ifp->if_mtu > ALC_TSO_MTU) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; } else ifp->if_hwassist |= CSUM_TSO; } else ifp->if_hwassist &= ~CSUM_TSO; } if ((mask & IFCAP_WOL_MCAST) != 0 && (ifp->if_capabilities & IFCAP_WOL_MCAST) != 0) ifp->if_capenable ^= IFCAP_WOL_MCAST; if ((mask & IFCAP_WOL_MAGIC) != 0 && (ifp->if_capabilities & IFCAP_WOL_MAGIC) != 0) ifp->if_capenable ^= IFCAP_WOL_MAGIC; if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; alc_rxvlan(sc); } if ((mask & IFCAP_VLAN_HWCSUM) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWCSUM) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; if ((mask & IFCAP_VLAN_HWTSO) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) ifp->if_capenable &= ~(IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM); ALC_UNLOCK(sc); VLAN_CAPABILITIES(ifp); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void alc_mac_config(struct alc_softc *sc) { struct mii_data *mii; uint32_t reg; ALC_LOCK_ASSERT(sc); mii = device_get_softc(sc->alc_miibus); reg = CSR_READ_4(sc, ALC_MAC_CFG); reg &= ~(MAC_CFG_FULL_DUPLEX | MAC_CFG_TX_FC | MAC_CFG_RX_FC | MAC_CFG_SPEED_MASK); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151_V2 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B2) reg |= MAC_CFG_HASH_ALG_CRC32 | MAC_CFG_SPEED_MODE_SW; /* Reprogram MAC with resolved speed/duplex. */ switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: reg |= MAC_CFG_SPEED_10_100; break; case IFM_1000_T: reg |= MAC_CFG_SPEED_1000; break; } if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) { reg |= MAC_CFG_FULL_DUPLEX; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_TXPAUSE) != 0) reg |= MAC_CFG_TX_FC; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) reg |= MAC_CFG_RX_FC; } CSR_WRITE_4(sc, ALC_MAC_CFG, reg); } static void alc_stats_clear(struct alc_softc *sc) { struct smb sb, *smb; uint32_t *reg; int i; if ((sc->alc_flags & ALC_FLAG_SMB_BUG) == 0) { bus_dmamap_sync(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); smb = sc->alc_rdata.alc_smb; /* Update done, clear. */ smb->updated = 0; bus_dmamap_sync(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } else { for (reg = &sb.rx_frames, i = 0; reg <= &sb.rx_pkts_filtered; reg++) { CSR_READ_4(sc, ALC_RX_MIB_BASE + i); i += sizeof(uint32_t); } /* Read Tx statistics. */ for (reg = &sb.tx_frames, i = 0; reg <= &sb.tx_mcast_bytes; reg++) { CSR_READ_4(sc, ALC_TX_MIB_BASE + i); i += sizeof(uint32_t); } } } static void alc_stats_update(struct alc_softc *sc) { struct alc_hw_stats *stat; struct smb sb, *smb; struct ifnet *ifp; uint32_t *reg; int i; ALC_LOCK_ASSERT(sc); ifp = sc->alc_ifp; stat = &sc->alc_stats; if ((sc->alc_flags & ALC_FLAG_SMB_BUG) == 0) { bus_dmamap_sync(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); smb = sc->alc_rdata.alc_smb; if (smb->updated == 0) return; } else { smb = &sb; /* Read Rx statistics. */ for (reg = &sb.rx_frames, i = 0; reg <= &sb.rx_pkts_filtered; reg++) { *reg = CSR_READ_4(sc, ALC_RX_MIB_BASE + i); i += sizeof(uint32_t); } /* Read Tx statistics. */ for (reg = &sb.tx_frames, i = 0; reg <= &sb.tx_mcast_bytes; reg++) { *reg = CSR_READ_4(sc, ALC_TX_MIB_BASE + i); i += sizeof(uint32_t); } } /* Rx stats. */ stat->rx_frames += smb->rx_frames; stat->rx_bcast_frames += smb->rx_bcast_frames; stat->rx_mcast_frames += smb->rx_mcast_frames; stat->rx_pause_frames += smb->rx_pause_frames; stat->rx_control_frames += smb->rx_control_frames; stat->rx_crcerrs += smb->rx_crcerrs; stat->rx_lenerrs += smb->rx_lenerrs; stat->rx_bytes += smb->rx_bytes; stat->rx_runts += smb->rx_runts; stat->rx_fragments += smb->rx_fragments; stat->rx_pkts_64 += smb->rx_pkts_64; stat->rx_pkts_65_127 += smb->rx_pkts_65_127; stat->rx_pkts_128_255 += smb->rx_pkts_128_255; stat->rx_pkts_256_511 += smb->rx_pkts_256_511; stat->rx_pkts_512_1023 += smb->rx_pkts_512_1023; stat->rx_pkts_1024_1518 += smb->rx_pkts_1024_1518; stat->rx_pkts_1519_max += smb->rx_pkts_1519_max; stat->rx_pkts_truncated += smb->rx_pkts_truncated; stat->rx_fifo_oflows += smb->rx_fifo_oflows; stat->rx_rrs_errs += smb->rx_rrs_errs; stat->rx_alignerrs += smb->rx_alignerrs; stat->rx_bcast_bytes += smb->rx_bcast_bytes; stat->rx_mcast_bytes += smb->rx_mcast_bytes; stat->rx_pkts_filtered += smb->rx_pkts_filtered; /* Tx stats. */ stat->tx_frames += smb->tx_frames; stat->tx_bcast_frames += smb->tx_bcast_frames; stat->tx_mcast_frames += smb->tx_mcast_frames; stat->tx_pause_frames += smb->tx_pause_frames; stat->tx_excess_defer += smb->tx_excess_defer; stat->tx_control_frames += smb->tx_control_frames; stat->tx_deferred += smb->tx_deferred; stat->tx_bytes += smb->tx_bytes; stat->tx_pkts_64 += smb->tx_pkts_64; stat->tx_pkts_65_127 += smb->tx_pkts_65_127; stat->tx_pkts_128_255 += smb->tx_pkts_128_255; stat->tx_pkts_256_511 += smb->tx_pkts_256_511; stat->tx_pkts_512_1023 += smb->tx_pkts_512_1023; stat->tx_pkts_1024_1518 += smb->tx_pkts_1024_1518; stat->tx_pkts_1519_max += smb->tx_pkts_1519_max; stat->tx_single_colls += smb->tx_single_colls; stat->tx_multi_colls += smb->tx_multi_colls; stat->tx_late_colls += smb->tx_late_colls; stat->tx_excess_colls += smb->tx_excess_colls; stat->tx_underrun += smb->tx_underrun; stat->tx_desc_underrun += smb->tx_desc_underrun; stat->tx_lenerrs += smb->tx_lenerrs; stat->tx_pkts_truncated += smb->tx_pkts_truncated; stat->tx_bcast_bytes += smb->tx_bcast_bytes; stat->tx_mcast_bytes += smb->tx_mcast_bytes; /* Update counters in ifnet. */ if_inc_counter(ifp, IFCOUNTER_OPACKETS, smb->tx_frames); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, smb->tx_single_colls + smb->tx_multi_colls * 2 + smb->tx_late_colls + smb->tx_excess_colls * HDPX_CFG_RETRY_DEFAULT); if_inc_counter(ifp, IFCOUNTER_OERRORS, smb->tx_late_colls + smb->tx_excess_colls + smb->tx_underrun + smb->tx_pkts_truncated); if_inc_counter(ifp, IFCOUNTER_IPACKETS, smb->rx_frames); if_inc_counter(ifp, IFCOUNTER_IERRORS, smb->rx_crcerrs + smb->rx_lenerrs + smb->rx_runts + smb->rx_pkts_truncated + smb->rx_fifo_oflows + smb->rx_rrs_errs + smb->rx_alignerrs); if ((sc->alc_flags & ALC_FLAG_SMB_BUG) == 0) { /* Update done, clear. */ smb->updated = 0; bus_dmamap_sync(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } } static int alc_intr(void *arg) { struct alc_softc *sc; uint32_t status; sc = (struct alc_softc *)arg; status = CSR_READ_4(sc, ALC_INTR_STATUS); if ((status & ALC_INTRS) == 0) return (FILTER_STRAY); /* Disable interrupts. */ CSR_WRITE_4(sc, ALC_INTR_STATUS, INTR_DIS_INT); taskqueue_enqueue(sc->alc_tq, &sc->alc_int_task); return (FILTER_HANDLED); } static void alc_int_task(void *arg, int pending) { struct alc_softc *sc; struct ifnet *ifp; uint32_t status; int more; sc = (struct alc_softc *)arg; ifp = sc->alc_ifp; status = CSR_READ_4(sc, ALC_INTR_STATUS); ALC_LOCK(sc); if (sc->alc_morework != 0) { sc->alc_morework = 0; status |= INTR_RX_PKT; } if ((status & ALC_INTRS) == 0) goto done; /* Acknowledge interrupts but still disable interrupts. */ CSR_WRITE_4(sc, ALC_INTR_STATUS, status | INTR_DIS_INT); more = 0; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if ((status & INTR_RX_PKT) != 0) { more = alc_rxintr(sc, sc->alc_process_limit); if (more == EAGAIN) sc->alc_morework = 1; else if (more == EIO) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; alc_init_locked(sc); ALC_UNLOCK(sc); return; } } if ((status & (INTR_DMA_RD_TO_RST | INTR_DMA_WR_TO_RST | INTR_TXQ_TO_RST)) != 0) { if ((status & INTR_DMA_RD_TO_RST) != 0) device_printf(sc->alc_dev, "DMA read error! -- resetting\n"); if ((status & INTR_DMA_WR_TO_RST) != 0) device_printf(sc->alc_dev, "DMA write error! -- resetting\n"); if ((status & INTR_TXQ_TO_RST) != 0) device_printf(sc->alc_dev, "TxQ reset! -- resetting\n"); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; alc_init_locked(sc); ALC_UNLOCK(sc); return; } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) alc_start_locked(ifp); } if (more == EAGAIN || (CSR_READ_4(sc, ALC_INTR_STATUS) & ALC_INTRS) != 0) { ALC_UNLOCK(sc); taskqueue_enqueue(sc->alc_tq, &sc->alc_int_task); return; } done: if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { /* Re-enable interrupts if we're running. */ CSR_WRITE_4(sc, ALC_INTR_STATUS, 0x7FFFFFFF); } ALC_UNLOCK(sc); } static void alc_txeof(struct alc_softc *sc) { struct ifnet *ifp; struct alc_txdesc *txd; uint32_t cons, prod; int prog; ALC_LOCK_ASSERT(sc); ifp = sc->alc_ifp; if (sc->alc_cdata.alc_tx_cnt == 0) return; bus_dmamap_sync(sc->alc_cdata.alc_tx_ring_tag, sc->alc_cdata.alc_tx_ring_map, BUS_DMASYNC_POSTWRITE); if ((sc->alc_flags & ALC_FLAG_CMB_BUG) == 0) { bus_dmamap_sync(sc->alc_cdata.alc_cmb_tag, sc->alc_cdata.alc_cmb_map, BUS_DMASYNC_POSTREAD); prod = sc->alc_rdata.alc_cmb->cons; } else { if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) prod = CSR_READ_2(sc, ALC_MBOX_TD_PRI0_CONS_IDX); else { prod = CSR_READ_4(sc, ALC_MBOX_TD_CONS_IDX); /* Assume we're using normal Tx priority queue. */ prod = (prod & MBOX_TD_CONS_LO_IDX_MASK) >> MBOX_TD_CONS_LO_IDX_SHIFT; } } cons = sc->alc_cdata.alc_tx_cons; /* * Go through our Tx list and free mbufs for those * frames which have been transmitted. */ for (prog = 0; cons != prod; prog++, ALC_DESC_INC(cons, ALC_TX_RING_CNT)) { if (sc->alc_cdata.alc_tx_cnt <= 0) break; prog++; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->alc_cdata.alc_tx_cnt--; txd = &sc->alc_cdata.alc_txdesc[cons]; if (txd->tx_m != NULL) { /* Reclaim transmitted mbufs. */ bus_dmamap_sync(sc->alc_cdata.alc_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->alc_cdata.alc_tx_tag, txd->tx_dmamap); m_freem(txd->tx_m); txd->tx_m = NULL; } } if ((sc->alc_flags & ALC_FLAG_CMB_BUG) == 0) bus_dmamap_sync(sc->alc_cdata.alc_cmb_tag, sc->alc_cdata.alc_cmb_map, BUS_DMASYNC_PREREAD); sc->alc_cdata.alc_tx_cons = cons; /* * Unarm watchdog timer only when there is no pending * frames in Tx queue. */ if (sc->alc_cdata.alc_tx_cnt == 0) sc->alc_watchdog_timer = 0; } static int alc_newbuf(struct alc_softc *sc, struct alc_rxdesc *rxd) { struct mbuf *m; bus_dma_segment_t segs[1]; bus_dmamap_t map; int nsegs; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = RX_BUF_SIZE_MAX; #ifndef __NO_STRICT_ALIGNMENT m_adj(m, sizeof(uint64_t)); #endif if (bus_dmamap_load_mbuf_sg(sc->alc_cdata.alc_rx_tag, sc->alc_cdata.alc_rx_sparemap, m, segs, &nsegs, 0) != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->alc_cdata.alc_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->alc_cdata.alc_rx_tag, rxd->rx_dmamap); } map = rxd->rx_dmamap; rxd->rx_dmamap = sc->alc_cdata.alc_rx_sparemap; sc->alc_cdata.alc_rx_sparemap = map; bus_dmamap_sync(sc->alc_cdata.alc_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); rxd->rx_m = m; rxd->rx_desc->addr = htole64(segs[0].ds_addr); return (0); } static int alc_rxintr(struct alc_softc *sc, int count) { struct ifnet *ifp; struct rx_rdesc *rrd; uint32_t nsegs, status; int rr_cons, prog; bus_dmamap_sync(sc->alc_cdata.alc_rr_ring_tag, sc->alc_cdata.alc_rr_ring_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_sync(sc->alc_cdata.alc_rx_ring_tag, sc->alc_cdata.alc_rx_ring_map, BUS_DMASYNC_POSTWRITE); rr_cons = sc->alc_cdata.alc_rr_cons; ifp = sc->alc_ifp; for (prog = 0; (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0;) { if (count-- <= 0) break; rrd = &sc->alc_rdata.alc_rr_ring[rr_cons]; status = le32toh(rrd->status); if ((status & RRD_VALID) == 0) break; nsegs = RRD_RD_CNT(le32toh(rrd->rdinfo)); if (nsegs == 0) { /* This should not happen! */ device_printf(sc->alc_dev, "unexpected segment count -- resetting\n"); return (EIO); } alc_rxeof(sc, rrd); /* Clear Rx return status. */ rrd->status = 0; ALC_DESC_INC(rr_cons, ALC_RR_RING_CNT); sc->alc_cdata.alc_rx_cons += nsegs; sc->alc_cdata.alc_rx_cons %= ALC_RR_RING_CNT; prog += nsegs; } if (prog > 0) { /* Update the consumer index. */ sc->alc_cdata.alc_rr_cons = rr_cons; /* Sync Rx return descriptors. */ bus_dmamap_sync(sc->alc_cdata.alc_rr_ring_tag, sc->alc_cdata.alc_rr_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Sync updated Rx descriptors such that controller see * modified buffer addresses. */ bus_dmamap_sync(sc->alc_cdata.alc_rx_ring_tag, sc->alc_cdata.alc_rx_ring_map, BUS_DMASYNC_PREWRITE); /* * Let controller know availability of new Rx buffers. * Since alc(4) use RXQ_CFG_RD_BURST_DEFAULT descriptors * it may be possible to update ALC_MBOX_RD0_PROD_IDX * only when Rx buffer pre-fetching is required. In * addition we already set ALC_RX_RD_FREE_THRESH to * RX_RD_FREE_THRESH_LO_DEFAULT descriptors. However * it still seems that pre-fetching needs more * experimentation. */ if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) CSR_WRITE_2(sc, ALC_MBOX_RD0_PROD_IDX, (uint16_t)sc->alc_cdata.alc_rx_cons); else CSR_WRITE_4(sc, ALC_MBOX_RD0_PROD_IDX, sc->alc_cdata.alc_rx_cons); } return (count > 0 ? 0 : EAGAIN); } #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * alc_fixup_rx(struct ifnet *ifp, struct mbuf *m) { struct mbuf *n; int i; uint16_t *src, *dst; src = mtod(m, uint16_t *); dst = src - 3; if (m->m_next == NULL) { for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++) *dst++ = *src++; m->m_data -= 6; return (m); } /* * Append a new mbuf to received mbuf chain and copy ethernet * header from the mbuf chain. This can save lots of CPU * cycles for jumbo frame. */ MGETHDR(n, M_NOWAIT, MT_DATA); if (n == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); m_freem(m); return (NULL); } bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; return (n); } #endif /* Receive a frame. */ static void alc_rxeof(struct alc_softc *sc, struct rx_rdesc *rrd) { struct alc_rxdesc *rxd; struct ifnet *ifp; struct mbuf *mp, *m; uint32_t rdinfo, status, vtag; int count, nsegs, rx_cons; ifp = sc->alc_ifp; status = le32toh(rrd->status); rdinfo = le32toh(rrd->rdinfo); rx_cons = RRD_RD_IDX(rdinfo); nsegs = RRD_RD_CNT(rdinfo); sc->alc_cdata.alc_rxlen = RRD_BYTES(status); if ((status & (RRD_ERR_SUM | RRD_ERR_LENGTH)) != 0) { /* * We want to pass the following frames to upper * layer regardless of error status of Rx return * ring. * * o IP/TCP/UDP checksum is bad. * o frame length and protocol specific length * does not match. * * Force network stack compute checksum for * errored frames. */ status |= RRD_TCP_UDPCSUM_NOK | RRD_IPCSUM_NOK; if ((status & (RRD_ERR_CRC | RRD_ERR_ALIGN | RRD_ERR_TRUNC | RRD_ERR_RUNT)) != 0) return; } for (count = 0; count < nsegs; count++, ALC_DESC_INC(rx_cons, ALC_RX_RING_CNT)) { rxd = &sc->alc_cdata.alc_rxdesc[rx_cons]; mp = rxd->rx_m; /* Add a new receive buffer to the ring. */ if (alc_newbuf(sc, rxd) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); /* Reuse Rx buffers. */ if (sc->alc_cdata.alc_rxhead != NULL) m_freem(sc->alc_cdata.alc_rxhead); break; } /* * Assume we've received a full sized frame. * Actual size is fixed when we encounter the end of * multi-segmented frame. */ mp->m_len = sc->alc_buf_size; /* Chain received mbufs. */ if (sc->alc_cdata.alc_rxhead == NULL) { sc->alc_cdata.alc_rxhead = mp; sc->alc_cdata.alc_rxtail = mp; } else { mp->m_flags &= ~M_PKTHDR; sc->alc_cdata.alc_rxprev_tail = sc->alc_cdata.alc_rxtail; sc->alc_cdata.alc_rxtail->m_next = mp; sc->alc_cdata.alc_rxtail = mp; } if (count == nsegs - 1) { /* Last desc. for this frame. */ m = sc->alc_cdata.alc_rxhead; m->m_flags |= M_PKTHDR; /* * It seems that L1C/L2C controller has no way * to tell hardware to strip CRC bytes. */ m->m_pkthdr.len = sc->alc_cdata.alc_rxlen - ETHER_CRC_LEN; if (nsegs > 1) { /* Set last mbuf size. */ mp->m_len = sc->alc_cdata.alc_rxlen - (nsegs - 1) * sc->alc_buf_size; /* Remove the CRC bytes in chained mbufs. */ if (mp->m_len <= ETHER_CRC_LEN) { sc->alc_cdata.alc_rxtail = sc->alc_cdata.alc_rxprev_tail; sc->alc_cdata.alc_rxtail->m_len -= (ETHER_CRC_LEN - mp->m_len); sc->alc_cdata.alc_rxtail->m_next = NULL; m_freem(mp); } else { mp->m_len -= ETHER_CRC_LEN; } } else m->m_len = m->m_pkthdr.len; m->m_pkthdr.rcvif = ifp; /* * Due to hardware bugs, Rx checksum offloading * was intentionally disabled. */ if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (status & RRD_VLAN_TAG) != 0) { vtag = RRD_VLAN(le32toh(rrd->vtag)); m->m_pkthdr.ether_vtag = ntohs(vtag); m->m_flags |= M_VLANTAG; } #ifndef __NO_STRICT_ALIGNMENT m = alc_fixup_rx(ifp, m); if (m != NULL) #endif { /* Pass it on. */ ALC_UNLOCK(sc); (*ifp->if_input)(ifp, m); ALC_LOCK(sc); } } } /* Reset mbuf chains. */ ALC_RXCHAIN_RESET(sc); } static void alc_tick(void *arg) { struct alc_softc *sc; struct mii_data *mii; sc = (struct alc_softc *)arg; ALC_LOCK_ASSERT(sc); mii = device_get_softc(sc->alc_miibus); mii_tick(mii); alc_stats_update(sc); /* * alc(4) does not rely on Tx completion interrupts to reclaim * transferred buffers. Instead Tx completion interrupts are * used to hint for scheduling Tx task. So it's necessary to * release transmitted buffers by kicking Tx completion * handler. This limits the maximum reclamation delay to a hz. */ alc_txeof(sc); alc_watchdog(sc); callout_reset(&sc->alc_tick_ch, hz, alc_tick, sc); } static void alc_osc_reset(struct alc_softc *sc) { uint32_t reg; reg = CSR_READ_4(sc, ALC_MISC3); reg &= ~MISC3_25M_BY_SW; reg |= MISC3_25M_NOTO_INTNL; CSR_WRITE_4(sc, ALC_MISC3, reg); reg = CSR_READ_4(sc, ALC_MISC); if (AR816X_REV(sc->alc_rev) >= AR816X_REV_B0) { /* * Restore over-current protection default value. * This value could be reset by MAC reset. */ reg &= ~MISC_PSW_OCP_MASK; reg |= (MISC_PSW_OCP_DEFAULT << MISC_PSW_OCP_SHIFT); reg &= ~MISC_INTNLOSC_OPEN; CSR_WRITE_4(sc, ALC_MISC, reg); CSR_WRITE_4(sc, ALC_MISC, reg | MISC_INTNLOSC_OPEN); reg = CSR_READ_4(sc, ALC_MISC2); reg &= ~MISC2_CALB_START; CSR_WRITE_4(sc, ALC_MISC2, reg); CSR_WRITE_4(sc, ALC_MISC2, reg | MISC2_CALB_START); } else { reg &= ~MISC_INTNLOSC_OPEN; /* Disable isolate for revision A devices. */ if (AR816X_REV(sc->alc_rev) <= AR816X_REV_A1) reg &= ~MISC_ISO_ENB; CSR_WRITE_4(sc, ALC_MISC, reg | MISC_INTNLOSC_OPEN); CSR_WRITE_4(sc, ALC_MISC, reg); } DELAY(20); } static void alc_reset(struct alc_softc *sc) { uint32_t pmcfg, reg; int i; pmcfg = 0; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { /* Reset workaround. */ CSR_WRITE_4(sc, ALC_MBOX_RD0_PROD_IDX, 1); if (AR816X_REV(sc->alc_rev) <= AR816X_REV_A1 && (sc->alc_rev & 0x01) != 0) { /* Disable L0s/L1s before reset. */ pmcfg = CSR_READ_4(sc, ALC_PM_CFG); if ((pmcfg & (PM_CFG_ASPM_L0S_ENB | PM_CFG_ASPM_L1_ENB)) != 0) { pmcfg &= ~(PM_CFG_ASPM_L0S_ENB | PM_CFG_ASPM_L1_ENB); CSR_WRITE_4(sc, ALC_PM_CFG, pmcfg); } } } reg = CSR_READ_4(sc, ALC_MASTER_CFG); reg |= MASTER_OOB_DIS_OFF | MASTER_RESET; CSR_WRITE_4(sc, ALC_MASTER_CFG, reg); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { for (i = ALC_RESET_TIMEOUT; i > 0; i--) { DELAY(10); if (CSR_READ_4(sc, ALC_MBOX_RD0_PROD_IDX) == 0) break; } if (i == 0) device_printf(sc->alc_dev, "MAC reset timeout!\n"); } for (i = ALC_RESET_TIMEOUT; i > 0; i--) { DELAY(10); if ((CSR_READ_4(sc, ALC_MASTER_CFG) & MASTER_RESET) == 0) break; } if (i == 0) device_printf(sc->alc_dev, "master reset timeout!\n"); for (i = ALC_RESET_TIMEOUT; i > 0; i--) { reg = CSR_READ_4(sc, ALC_IDLE_STATUS); if ((reg & (IDLE_STATUS_RXMAC | IDLE_STATUS_TXMAC | IDLE_STATUS_RXQ | IDLE_STATUS_TXQ)) == 0) break; DELAY(10); } if (i == 0) device_printf(sc->alc_dev, "reset timeout(0x%08x)!\n", reg); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { if (AR816X_REV(sc->alc_rev) <= AR816X_REV_A1 && (sc->alc_rev & 0x01) != 0) { reg = CSR_READ_4(sc, ALC_MASTER_CFG); reg |= MASTER_CLK_SEL_DIS; CSR_WRITE_4(sc, ALC_MASTER_CFG, reg); /* Restore L0s/L1s config. */ if ((pmcfg & (PM_CFG_ASPM_L0S_ENB | PM_CFG_ASPM_L1_ENB)) != 0) CSR_WRITE_4(sc, ALC_PM_CFG, pmcfg); } alc_osc_reset(sc); reg = CSR_READ_4(sc, ALC_MISC3); reg &= ~MISC3_25M_BY_SW; reg |= MISC3_25M_NOTO_INTNL; CSR_WRITE_4(sc, ALC_MISC3, reg); reg = CSR_READ_4(sc, ALC_MISC); reg &= ~MISC_INTNLOSC_OPEN; if (AR816X_REV(sc->alc_rev) <= AR816X_REV_A1) reg &= ~MISC_ISO_ENB; CSR_WRITE_4(sc, ALC_MISC, reg); DELAY(20); } if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151_V2) CSR_WRITE_4(sc, ALC_SERDES_LOCK, CSR_READ_4(sc, ALC_SERDES_LOCK) | SERDES_MAC_CLK_SLOWDOWN | SERDES_PHY_CLK_SLOWDOWN); } static void alc_init(void *xsc) { struct alc_softc *sc; sc = (struct alc_softc *)xsc; ALC_LOCK(sc); alc_init_locked(sc); ALC_UNLOCK(sc); } static void alc_init_locked(struct alc_softc *sc) { struct ifnet *ifp; struct mii_data *mii; uint8_t eaddr[ETHER_ADDR_LEN]; bus_addr_t paddr; uint32_t reg, rxf_hi, rxf_lo; ALC_LOCK_ASSERT(sc); ifp = sc->alc_ifp; mii = device_get_softc(sc->alc_miibus); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) return; /* * Cancel any pending I/O. */ alc_stop(sc); /* * Reset the chip to a known state. */ alc_reset(sc); /* Initialize Rx descriptors. */ if (alc_init_rx_ring(sc) != 0) { device_printf(sc->alc_dev, "no memory for Rx buffers.\n"); alc_stop(sc); return; } alc_init_rr_ring(sc); alc_init_tx_ring(sc); alc_init_cmb(sc); alc_init_smb(sc); /* Enable all clocks. */ if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { CSR_WRITE_4(sc, ALC_CLK_GATING_CFG, CLK_GATING_DMAW_ENB | CLK_GATING_DMAR_ENB | CLK_GATING_TXQ_ENB | CLK_GATING_RXQ_ENB | CLK_GATING_TXMAC_ENB | CLK_GATING_RXMAC_ENB); if (AR816X_REV(sc->alc_rev) >= AR816X_REV_B0) CSR_WRITE_4(sc, ALC_IDLE_DECISN_TIMER, IDLE_DECISN_TIMER_DEFAULT_1MS); } else CSR_WRITE_4(sc, ALC_CLK_GATING_CFG, 0); /* Reprogram the station address. */ bcopy(IF_LLADDR(ifp), eaddr, ETHER_ADDR_LEN); CSR_WRITE_4(sc, ALC_PAR0, eaddr[2] << 24 | eaddr[3] << 16 | eaddr[4] << 8 | eaddr[5]); CSR_WRITE_4(sc, ALC_PAR1, eaddr[0] << 8 | eaddr[1]); /* * Clear WOL status and disable all WOL feature as WOL * would interfere Rx operation under normal environments. */ CSR_READ_4(sc, ALC_WOL_CFG); CSR_WRITE_4(sc, ALC_WOL_CFG, 0); /* Set Tx descriptor base addresses. */ paddr = sc->alc_rdata.alc_tx_ring_paddr; CSR_WRITE_4(sc, ALC_TX_BASE_ADDR_HI, ALC_ADDR_HI(paddr)); CSR_WRITE_4(sc, ALC_TDL_HEAD_ADDR_LO, ALC_ADDR_LO(paddr)); /* We don't use high priority ring. */ CSR_WRITE_4(sc, ALC_TDH_HEAD_ADDR_LO, 0); /* Set Tx descriptor counter. */ CSR_WRITE_4(sc, ALC_TD_RING_CNT, (ALC_TX_RING_CNT << TD_RING_CNT_SHIFT) & TD_RING_CNT_MASK); /* Set Rx descriptor base addresses. */ paddr = sc->alc_rdata.alc_rx_ring_paddr; CSR_WRITE_4(sc, ALC_RX_BASE_ADDR_HI, ALC_ADDR_HI(paddr)); CSR_WRITE_4(sc, ALC_RD0_HEAD_ADDR_LO, ALC_ADDR_LO(paddr)); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { /* We use one Rx ring. */ CSR_WRITE_4(sc, ALC_RD1_HEAD_ADDR_LO, 0); CSR_WRITE_4(sc, ALC_RD2_HEAD_ADDR_LO, 0); CSR_WRITE_4(sc, ALC_RD3_HEAD_ADDR_LO, 0); } /* Set Rx descriptor counter. */ CSR_WRITE_4(sc, ALC_RD_RING_CNT, (ALC_RX_RING_CNT << RD_RING_CNT_SHIFT) & RD_RING_CNT_MASK); /* * Let hardware split jumbo frames into alc_max_buf_sized chunks. * if it do not fit the buffer size. Rx return descriptor holds * a counter that indicates how many fragments were made by the * hardware. The buffer size should be multiple of 8 bytes. * Since hardware has limit on the size of buffer size, always * use the maximum value. * For strict-alignment architectures make sure to reduce buffer * size by 8 bytes to make room for alignment fixup. */ #ifndef __NO_STRICT_ALIGNMENT sc->alc_buf_size = RX_BUF_SIZE_MAX - sizeof(uint64_t); #else sc->alc_buf_size = RX_BUF_SIZE_MAX; #endif CSR_WRITE_4(sc, ALC_RX_BUF_SIZE, sc->alc_buf_size); paddr = sc->alc_rdata.alc_rr_ring_paddr; /* Set Rx return descriptor base addresses. */ CSR_WRITE_4(sc, ALC_RRD0_HEAD_ADDR_LO, ALC_ADDR_LO(paddr)); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { /* We use one Rx return ring. */ CSR_WRITE_4(sc, ALC_RRD1_HEAD_ADDR_LO, 0); CSR_WRITE_4(sc, ALC_RRD2_HEAD_ADDR_LO, 0); CSR_WRITE_4(sc, ALC_RRD3_HEAD_ADDR_LO, 0); } /* Set Rx return descriptor counter. */ CSR_WRITE_4(sc, ALC_RRD_RING_CNT, (ALC_RR_RING_CNT << RRD_RING_CNT_SHIFT) & RRD_RING_CNT_MASK); paddr = sc->alc_rdata.alc_cmb_paddr; CSR_WRITE_4(sc, ALC_CMB_BASE_ADDR_LO, ALC_ADDR_LO(paddr)); paddr = sc->alc_rdata.alc_smb_paddr; CSR_WRITE_4(sc, ALC_SMB_BASE_ADDR_HI, ALC_ADDR_HI(paddr)); CSR_WRITE_4(sc, ALC_SMB_BASE_ADDR_LO, ALC_ADDR_LO(paddr)); if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B) { /* Reconfigure SRAM - Vendor magic. */ CSR_WRITE_4(sc, ALC_SRAM_RX_FIFO_LEN, 0x000002A0); CSR_WRITE_4(sc, ALC_SRAM_TX_FIFO_LEN, 0x00000100); CSR_WRITE_4(sc, ALC_SRAM_RX_FIFO_ADDR, 0x029F0000); CSR_WRITE_4(sc, ALC_SRAM_RD0_ADDR, 0x02BF02A0); CSR_WRITE_4(sc, ALC_SRAM_TX_FIFO_ADDR, 0x03BF02C0); CSR_WRITE_4(sc, ALC_SRAM_TD_ADDR, 0x03DF03C0); CSR_WRITE_4(sc, ALC_TXF_WATER_MARK, 0x00000000); CSR_WRITE_4(sc, ALC_RD_DMA_CFG, 0x00000000); } /* Tell hardware that we're ready to load DMA blocks. */ CSR_WRITE_4(sc, ALC_DMA_BLOCK, DMA_BLOCK_LOAD); /* Configure interrupt moderation timer. */ reg = ALC_USECS(sc->alc_int_rx_mod) << IM_TIMER_RX_SHIFT; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) reg |= ALC_USECS(sc->alc_int_tx_mod) << IM_TIMER_TX_SHIFT; CSR_WRITE_4(sc, ALC_IM_TIMER, reg); /* * We don't want to automatic interrupt clear as task queue * for the interrupt should know interrupt status. */ reg = CSR_READ_4(sc, ALC_MASTER_CFG); reg &= ~(MASTER_IM_RX_TIMER_ENB | MASTER_IM_TX_TIMER_ENB); reg |= MASTER_SA_TIMER_ENB; if (ALC_USECS(sc->alc_int_rx_mod) != 0) reg |= MASTER_IM_RX_TIMER_ENB; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0 && ALC_USECS(sc->alc_int_tx_mod) != 0) reg |= MASTER_IM_TX_TIMER_ENB; CSR_WRITE_4(sc, ALC_MASTER_CFG, reg); /* * Disable interrupt re-trigger timer. We don't want automatic * re-triggering of un-ACKed interrupts. */ CSR_WRITE_4(sc, ALC_INTR_RETRIG_TIMER, ALC_USECS(0)); /* Configure CMB. */ if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { CSR_WRITE_4(sc, ALC_CMB_TD_THRESH, ALC_TX_RING_CNT / 3); CSR_WRITE_4(sc, ALC_CMB_TX_TIMER, ALC_USECS(sc->alc_int_tx_mod)); } else { if ((sc->alc_flags & ALC_FLAG_CMB_BUG) == 0) { CSR_WRITE_4(sc, ALC_CMB_TD_THRESH, 4); CSR_WRITE_4(sc, ALC_CMB_TX_TIMER, ALC_USECS(5000)); } else CSR_WRITE_4(sc, ALC_CMB_TX_TIMER, ALC_USECS(0)); } /* * Hardware can be configured to issue SMB interrupt based * on programmed interval. Since there is a callout that is * invoked for every hz in driver we use that instead of * relying on periodic SMB interrupt. */ CSR_WRITE_4(sc, ALC_SMB_STAT_TIMER, ALC_USECS(0)); /* Clear MAC statistics. */ alc_stats_clear(sc); /* * Always use maximum frame size that controller can support. * Otherwise received frames that has larger frame length * than alc(4) MTU would be silently dropped in hardware. This * would make path-MTU discovery hard as sender wouldn't get * any responses from receiver. alc(4) supports * multi-fragmented frames on Rx path so it has no issue on * assembling fragmented frames. Using maximum frame size also * removes the need to reinitialize hardware when interface * MTU configuration was changed. * * Be conservative in what you do, be liberal in what you * accept from others - RFC 793. */ CSR_WRITE_4(sc, ALC_FRAME_SIZE, sc->alc_ident->max_framelen); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { /* Disable header split(?) */ CSR_WRITE_4(sc, ALC_HDS_CFG, 0); /* Configure IPG/IFG parameters. */ CSR_WRITE_4(sc, ALC_IPG_IFG_CFG, ((IPG_IFG_IPGT_DEFAULT << IPG_IFG_IPGT_SHIFT) & IPG_IFG_IPGT_MASK) | ((IPG_IFG_MIFG_DEFAULT << IPG_IFG_MIFG_SHIFT) & IPG_IFG_MIFG_MASK) | ((IPG_IFG_IPG1_DEFAULT << IPG_IFG_IPG1_SHIFT) & IPG_IFG_IPG1_MASK) | ((IPG_IFG_IPG2_DEFAULT << IPG_IFG_IPG2_SHIFT) & IPG_IFG_IPG2_MASK)); /* Set parameters for half-duplex media. */ CSR_WRITE_4(sc, ALC_HDPX_CFG, ((HDPX_CFG_LCOL_DEFAULT << HDPX_CFG_LCOL_SHIFT) & HDPX_CFG_LCOL_MASK) | ((HDPX_CFG_RETRY_DEFAULT << HDPX_CFG_RETRY_SHIFT) & HDPX_CFG_RETRY_MASK) | HDPX_CFG_EXC_DEF_EN | ((HDPX_CFG_ABEBT_DEFAULT << HDPX_CFG_ABEBT_SHIFT) & HDPX_CFG_ABEBT_MASK) | ((HDPX_CFG_JAMIPG_DEFAULT << HDPX_CFG_JAMIPG_SHIFT) & HDPX_CFG_JAMIPG_MASK)); } /* * Set TSO/checksum offload threshold. For frames that is * larger than this threshold, hardware wouldn't do * TSO/checksum offloading. */ reg = (sc->alc_ident->max_framelen >> TSO_OFFLOAD_THRESH_UNIT_SHIFT) & TSO_OFFLOAD_THRESH_MASK; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) reg |= TSO_OFFLOAD_ERRLGPKT_DROP_ENB; CSR_WRITE_4(sc, ALC_TSO_OFFLOAD_THRESH, reg); /* Configure TxQ. */ reg = (alc_dma_burst[sc->alc_dma_rd_burst] << TXQ_CFG_TX_FIFO_BURST_SHIFT) & TXQ_CFG_TX_FIFO_BURST_MASK; if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B2) reg >>= 1; reg |= (TXQ_CFG_TD_BURST_DEFAULT << TXQ_CFG_TD_BURST_SHIFT) & TXQ_CFG_TD_BURST_MASK; reg |= TXQ_CFG_IP_OPTION_ENB | TXQ_CFG_8023_ENB; CSR_WRITE_4(sc, ALC_TXQ_CFG, reg | TXQ_CFG_ENHANCED_MODE); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { reg = (TXQ_CFG_TD_BURST_DEFAULT << HQTD_CFG_Q1_BURST_SHIFT | TXQ_CFG_TD_BURST_DEFAULT << HQTD_CFG_Q2_BURST_SHIFT | TXQ_CFG_TD_BURST_DEFAULT << HQTD_CFG_Q3_BURST_SHIFT | HQTD_CFG_BURST_ENB); CSR_WRITE_4(sc, ALC_HQTD_CFG, reg); reg = WRR_PRI_RESTRICT_NONE; reg |= (WRR_PRI_DEFAULT << WRR_PRI0_SHIFT | WRR_PRI_DEFAULT << WRR_PRI1_SHIFT | WRR_PRI_DEFAULT << WRR_PRI2_SHIFT | WRR_PRI_DEFAULT << WRR_PRI3_SHIFT); CSR_WRITE_4(sc, ALC_WRR, reg); } else { /* Configure Rx free descriptor pre-fetching. */ CSR_WRITE_4(sc, ALC_RX_RD_FREE_THRESH, ((RX_RD_FREE_THRESH_HI_DEFAULT << RX_RD_FREE_THRESH_HI_SHIFT) & RX_RD_FREE_THRESH_HI_MASK) | ((RX_RD_FREE_THRESH_LO_DEFAULT << RX_RD_FREE_THRESH_LO_SHIFT) & RX_RD_FREE_THRESH_LO_MASK)); } /* * Configure flow control parameters. * XON : 80% of Rx FIFO * XOFF : 30% of Rx FIFO */ if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { reg = CSR_READ_4(sc, ALC_SRAM_RX_FIFO_LEN); reg &= SRAM_RX_FIFO_LEN_MASK; reg *= 8; if (reg > 8 * 1024) reg -= RX_FIFO_PAUSE_816X_RSVD; else reg -= RX_BUF_SIZE_MAX; reg /= 8; CSR_WRITE_4(sc, ALC_RX_FIFO_PAUSE_THRESH, ((reg << RX_FIFO_PAUSE_THRESH_LO_SHIFT) & RX_FIFO_PAUSE_THRESH_LO_MASK) | (((RX_FIFO_PAUSE_816X_RSVD / 8) << RX_FIFO_PAUSE_THRESH_HI_SHIFT) & RX_FIFO_PAUSE_THRESH_HI_MASK)); } else if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8131 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8132) { reg = CSR_READ_4(sc, ALC_SRAM_RX_FIFO_LEN); rxf_hi = (reg * 8) / 10; rxf_lo = (reg * 3) / 10; CSR_WRITE_4(sc, ALC_RX_FIFO_PAUSE_THRESH, ((rxf_lo << RX_FIFO_PAUSE_THRESH_LO_SHIFT) & RX_FIFO_PAUSE_THRESH_LO_MASK) | ((rxf_hi << RX_FIFO_PAUSE_THRESH_HI_SHIFT) & RX_FIFO_PAUSE_THRESH_HI_MASK)); } if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { /* Disable RSS until I understand L1C/L2C's RSS logic. */ CSR_WRITE_4(sc, ALC_RSS_IDT_TABLE0, 0); CSR_WRITE_4(sc, ALC_RSS_CPU, 0); } /* Configure RxQ. */ reg = (RXQ_CFG_RD_BURST_DEFAULT << RXQ_CFG_RD_BURST_SHIFT) & RXQ_CFG_RD_BURST_MASK; reg |= RXQ_CFG_RSS_MODE_DIS; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { reg |= (RXQ_CFG_816X_IDT_TBL_SIZE_DEFAULT << RXQ_CFG_816X_IDT_TBL_SIZE_SHIFT) & RXQ_CFG_816X_IDT_TBL_SIZE_MASK; if ((sc->alc_flags & ALC_FLAG_FASTETHER) == 0) reg |= RXQ_CFG_ASPM_THROUGHPUT_LIMIT_100M; } else { if ((sc->alc_flags & ALC_FLAG_FASTETHER) == 0 && sc->alc_ident->deviceid != DEVICEID_ATHEROS_AR8151_V2) reg |= RXQ_CFG_ASPM_THROUGHPUT_LIMIT_100M; } CSR_WRITE_4(sc, ALC_RXQ_CFG, reg); /* Configure DMA parameters. */ reg = DMA_CFG_OUT_ORDER | DMA_CFG_RD_REQ_PRI; reg |= sc->alc_rcb; if ((sc->alc_flags & ALC_FLAG_CMB_BUG) == 0) reg |= DMA_CFG_CMB_ENB; if ((sc->alc_flags & ALC_FLAG_SMB_BUG) == 0) reg |= DMA_CFG_SMB_ENB; else reg |= DMA_CFG_SMB_DIS; reg |= (sc->alc_dma_rd_burst & DMA_CFG_RD_BURST_MASK) << DMA_CFG_RD_BURST_SHIFT; reg |= (sc->alc_dma_wr_burst & DMA_CFG_WR_BURST_MASK) << DMA_CFG_WR_BURST_SHIFT; reg |= (DMA_CFG_RD_DELAY_CNT_DEFAULT << DMA_CFG_RD_DELAY_CNT_SHIFT) & DMA_CFG_RD_DELAY_CNT_MASK; reg |= (DMA_CFG_WR_DELAY_CNT_DEFAULT << DMA_CFG_WR_DELAY_CNT_SHIFT) & DMA_CFG_WR_DELAY_CNT_MASK; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { switch (AR816X_REV(sc->alc_rev)) { case AR816X_REV_A0: case AR816X_REV_A1: reg |= DMA_CFG_RD_CHNL_SEL_2; break; case AR816X_REV_B0: /* FALLTHROUGH */ default: reg |= DMA_CFG_RD_CHNL_SEL_4; break; } } CSR_WRITE_4(sc, ALC_DMA_CFG, reg); /* * Configure Tx/Rx MACs. * - Auto-padding for short frames. * - Enable CRC generation. * Actual reconfiguration of MAC for resolved speed/duplex * is followed after detection of link establishment. * AR813x/AR815x always does checksum computation regardless * of MAC_CFG_RXCSUM_ENB bit. Also the controller is known to * have bug in protocol field in Rx return structure so * these controllers can't handle fragmented frames. Disable * Rx checksum offloading until there is a newer controller * that has sane implementation. */ reg = MAC_CFG_TX_CRC_ENB | MAC_CFG_TX_AUTO_PAD | MAC_CFG_FULL_DUPLEX | ((MAC_CFG_PREAMBLE_DEFAULT << MAC_CFG_PREAMBLE_SHIFT) & MAC_CFG_PREAMBLE_MASK); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151_V2 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B2) reg |= MAC_CFG_HASH_ALG_CRC32 | MAC_CFG_SPEED_MODE_SW; if ((sc->alc_flags & ALC_FLAG_FASTETHER) != 0) reg |= MAC_CFG_SPEED_10_100; else reg |= MAC_CFG_SPEED_1000; CSR_WRITE_4(sc, ALC_MAC_CFG, reg); /* Set up the receive filter. */ alc_rxfilter(sc); alc_rxvlan(sc); /* Acknowledge all pending interrupts and clear it. */ CSR_WRITE_4(sc, ALC_INTR_MASK, ALC_INTRS); CSR_WRITE_4(sc, ALC_INTR_STATUS, 0xFFFFFFFF); CSR_WRITE_4(sc, ALC_INTR_STATUS, 0); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->alc_flags &= ~ALC_FLAG_LINK; /* Switch to the current media. */ alc_mediachange_locked(sc); callout_reset(&sc->alc_tick_ch, hz, alc_tick, sc); } static void alc_stop(struct alc_softc *sc) { struct ifnet *ifp; struct alc_txdesc *txd; struct alc_rxdesc *rxd; uint32_t reg; int i; ALC_LOCK_ASSERT(sc); /* * Mark the interface down and cancel the watchdog timer. */ ifp = sc->alc_ifp; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->alc_flags &= ~ALC_FLAG_LINK; callout_stop(&sc->alc_tick_ch); sc->alc_watchdog_timer = 0; alc_stats_update(sc); /* Disable interrupts. */ CSR_WRITE_4(sc, ALC_INTR_MASK, 0); CSR_WRITE_4(sc, ALC_INTR_STATUS, 0xFFFFFFFF); /* Disable DMA. */ reg = CSR_READ_4(sc, ALC_DMA_CFG); reg &= ~(DMA_CFG_CMB_ENB | DMA_CFG_SMB_ENB); reg |= DMA_CFG_SMB_DIS; CSR_WRITE_4(sc, ALC_DMA_CFG, reg); DELAY(1000); /* Stop Rx/Tx MACs. */ alc_stop_mac(sc); /* Disable interrupts which might be touched in taskq handler. */ CSR_WRITE_4(sc, ALC_INTR_STATUS, 0xFFFFFFFF); /* Disable L0s/L1s */ alc_aspm(sc, 0, IFM_UNKNOWN); /* Reclaim Rx buffers that have been processed. */ if (sc->alc_cdata.alc_rxhead != NULL) m_freem(sc->alc_cdata.alc_rxhead); ALC_RXCHAIN_RESET(sc); /* * Free Tx/Rx mbufs still in the queues. */ for (i = 0; i < ALC_RX_RING_CNT; i++) { rxd = &sc->alc_cdata.alc_rxdesc[i]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->alc_cdata.alc_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->alc_cdata.alc_rx_tag, rxd->rx_dmamap); m_freem(rxd->rx_m); rxd->rx_m = NULL; } } for (i = 0; i < ALC_TX_RING_CNT; i++) { txd = &sc->alc_cdata.alc_txdesc[i]; if (txd->tx_m != NULL) { bus_dmamap_sync(sc->alc_cdata.alc_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->alc_cdata.alc_tx_tag, txd->tx_dmamap); m_freem(txd->tx_m); txd->tx_m = NULL; } } } static void alc_stop_mac(struct alc_softc *sc) { uint32_t reg; int i; alc_stop_queue(sc); /* Disable Rx/Tx MAC. */ reg = CSR_READ_4(sc, ALC_MAC_CFG); if ((reg & (MAC_CFG_TX_ENB | MAC_CFG_RX_ENB)) != 0) { reg &= ~(MAC_CFG_TX_ENB | MAC_CFG_RX_ENB); CSR_WRITE_4(sc, ALC_MAC_CFG, reg); } for (i = ALC_TIMEOUT; i > 0; i--) { reg = CSR_READ_4(sc, ALC_IDLE_STATUS); if ((reg & (IDLE_STATUS_RXMAC | IDLE_STATUS_TXMAC)) == 0) break; DELAY(10); } if (i == 0) device_printf(sc->alc_dev, "could not disable Rx/Tx MAC(0x%08x)!\n", reg); } static void alc_start_queue(struct alc_softc *sc) { uint32_t qcfg[] = { 0, RXQ_CFG_QUEUE0_ENB, RXQ_CFG_QUEUE0_ENB | RXQ_CFG_QUEUE1_ENB, RXQ_CFG_QUEUE0_ENB | RXQ_CFG_QUEUE1_ENB | RXQ_CFG_QUEUE2_ENB, RXQ_CFG_ENB }; uint32_t cfg; ALC_LOCK_ASSERT(sc); /* Enable RxQ. */ cfg = CSR_READ_4(sc, ALC_RXQ_CFG); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { cfg &= ~RXQ_CFG_ENB; cfg |= qcfg[1]; } else cfg |= RXQ_CFG_QUEUE0_ENB; CSR_WRITE_4(sc, ALC_RXQ_CFG, cfg); /* Enable TxQ. */ cfg = CSR_READ_4(sc, ALC_TXQ_CFG); cfg |= TXQ_CFG_ENB; CSR_WRITE_4(sc, ALC_TXQ_CFG, cfg); } static void alc_stop_queue(struct alc_softc *sc) { uint32_t reg; int i; /* Disable RxQ. */ reg = CSR_READ_4(sc, ALC_RXQ_CFG); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { if ((reg & RXQ_CFG_ENB) != 0) { reg &= ~RXQ_CFG_ENB; CSR_WRITE_4(sc, ALC_RXQ_CFG, reg); } } else { if ((reg & RXQ_CFG_QUEUE0_ENB) != 0) { reg &= ~RXQ_CFG_QUEUE0_ENB; CSR_WRITE_4(sc, ALC_RXQ_CFG, reg); } } /* Disable TxQ. */ reg = CSR_READ_4(sc, ALC_TXQ_CFG); if ((reg & TXQ_CFG_ENB) != 0) { reg &= ~TXQ_CFG_ENB; CSR_WRITE_4(sc, ALC_TXQ_CFG, reg); } DELAY(40); for (i = ALC_TIMEOUT; i > 0; i--) { reg = CSR_READ_4(sc, ALC_IDLE_STATUS); if ((reg & (IDLE_STATUS_RXQ | IDLE_STATUS_TXQ)) == 0) break; DELAY(10); } if (i == 0) device_printf(sc->alc_dev, "could not disable RxQ/TxQ (0x%08x)!\n", reg); } static void alc_init_tx_ring(struct alc_softc *sc) { struct alc_ring_data *rd; struct alc_txdesc *txd; int i; ALC_LOCK_ASSERT(sc); sc->alc_cdata.alc_tx_prod = 0; sc->alc_cdata.alc_tx_cons = 0; sc->alc_cdata.alc_tx_cnt = 0; rd = &sc->alc_rdata; bzero(rd->alc_tx_ring, ALC_TX_RING_SZ); for (i = 0; i < ALC_TX_RING_CNT; i++) { txd = &sc->alc_cdata.alc_txdesc[i]; txd->tx_m = NULL; } bus_dmamap_sync(sc->alc_cdata.alc_tx_ring_tag, sc->alc_cdata.alc_tx_ring_map, BUS_DMASYNC_PREWRITE); } static int alc_init_rx_ring(struct alc_softc *sc) { struct alc_ring_data *rd; struct alc_rxdesc *rxd; int i; ALC_LOCK_ASSERT(sc); sc->alc_cdata.alc_rx_cons = ALC_RX_RING_CNT - 1; sc->alc_morework = 0; rd = &sc->alc_rdata; bzero(rd->alc_rx_ring, ALC_RX_RING_SZ); for (i = 0; i < ALC_RX_RING_CNT; i++) { rxd = &sc->alc_cdata.alc_rxdesc[i]; rxd->rx_m = NULL; rxd->rx_desc = &rd->alc_rx_ring[i]; if (alc_newbuf(sc, rxd) != 0) return (ENOBUFS); } /* * Since controller does not update Rx descriptors, driver * does have to read Rx descriptors back so BUS_DMASYNC_PREWRITE * is enough to ensure coherence. */ bus_dmamap_sync(sc->alc_cdata.alc_rx_ring_tag, sc->alc_cdata.alc_rx_ring_map, BUS_DMASYNC_PREWRITE); /* Let controller know availability of new Rx buffers. */ CSR_WRITE_4(sc, ALC_MBOX_RD0_PROD_IDX, sc->alc_cdata.alc_rx_cons); return (0); } static void alc_init_rr_ring(struct alc_softc *sc) { struct alc_ring_data *rd; ALC_LOCK_ASSERT(sc); sc->alc_cdata.alc_rr_cons = 0; ALC_RXCHAIN_RESET(sc); rd = &sc->alc_rdata; bzero(rd->alc_rr_ring, ALC_RR_RING_SZ); bus_dmamap_sync(sc->alc_cdata.alc_rr_ring_tag, sc->alc_cdata.alc_rr_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static void alc_init_cmb(struct alc_softc *sc) { struct alc_ring_data *rd; ALC_LOCK_ASSERT(sc); rd = &sc->alc_rdata; bzero(rd->alc_cmb, ALC_CMB_SZ); bus_dmamap_sync(sc->alc_cdata.alc_cmb_tag, sc->alc_cdata.alc_cmb_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static void alc_init_smb(struct alc_softc *sc) { struct alc_ring_data *rd; ALC_LOCK_ASSERT(sc); rd = &sc->alc_rdata; bzero(rd->alc_smb, ALC_SMB_SZ); bus_dmamap_sync(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static void alc_rxvlan(struct alc_softc *sc) { struct ifnet *ifp; uint32_t reg; ALC_LOCK_ASSERT(sc); ifp = sc->alc_ifp; reg = CSR_READ_4(sc, ALC_MAC_CFG); if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) reg |= MAC_CFG_VLAN_TAG_STRIP; else reg &= ~MAC_CFG_VLAN_TAG_STRIP; CSR_WRITE_4(sc, ALC_MAC_CFG, reg); } static void alc_rxfilter(struct alc_softc *sc) { struct ifnet *ifp; struct ifmultiaddr *ifma; uint32_t crc; uint32_t mchash[2]; uint32_t rxcfg; ALC_LOCK_ASSERT(sc); ifp = sc->alc_ifp; bzero(mchash, sizeof(mchash)); rxcfg = CSR_READ_4(sc, ALC_MAC_CFG); rxcfg &= ~(MAC_CFG_ALLMULTI | MAC_CFG_BCAST | MAC_CFG_PROMISC); if ((ifp->if_flags & IFF_BROADCAST) != 0) rxcfg |= MAC_CFG_BCAST; if ((ifp->if_flags & (IFF_PROMISC | IFF_ALLMULTI)) != 0) { if ((ifp->if_flags & IFF_PROMISC) != 0) rxcfg |= MAC_CFG_PROMISC; if ((ifp->if_flags & IFF_ALLMULTI) != 0) rxcfg |= MAC_CFG_ALLMULTI; mchash[0] = 0xFFFFFFFF; mchash[1] = 0xFFFFFFFF; goto chipit; } if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &sc->alc_ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; crc = ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN); mchash[crc >> 31] |= 1 << ((crc >> 26) & 0x1f); } if_maddr_runlock(ifp); chipit: CSR_WRITE_4(sc, ALC_MAR0, mchash[0]); CSR_WRITE_4(sc, ALC_MAR1, mchash[1]); CSR_WRITE_4(sc, ALC_MAC_CFG, rxcfg); } static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high) { int error, value; if (arg1 == NULL) return (EINVAL); value = *(int *)arg1; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); if (value < low || value > high) return (EINVAL); *(int *)arg1 = value; return (0); } static int sysctl_hw_alc_proc_limit(SYSCTL_HANDLER_ARGS) { return (sysctl_int_range(oidp, arg1, arg2, req, ALC_PROC_MIN, ALC_PROC_MAX)); } static int sysctl_hw_alc_int_mod(SYSCTL_HANDLER_ARGS) { return (sysctl_int_range(oidp, arg1, arg2, req, ALC_IM_TIMER_MIN, ALC_IM_TIMER_MAX)); } #ifdef NETDUMP static void -alc_netdump_init(struct ifnet *ifp, int *nrxr, int *clsize) +alc_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { struct alc_softc *sc; sc = if_getsoftc(ifp); KASSERT(sc->alc_buf_size <= MCLBYTES, ("incorrect cluster size")); *nrxr = ALC_RX_RING_CNT; + *ncl = NETDUMP_MAX_IN_FLIGHT; *clsize = MCLBYTES; } static void alc_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) { } static int alc_netdump_transmit(struct ifnet *ifp, struct mbuf *m) { struct alc_softc *sc; int error; sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); error = alc_encap(sc, &m); if (error == 0) alc_start_tx(sc); return (error); } static int alc_netdump_poll(struct ifnet *ifp, int count) { struct alc_softc *sc; sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); alc_txeof(sc); return (alc_rxintr(sc, count)); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/dev/bge/if_bge.c =================================================================== --- user/markj/netdump/sys/dev/bge/if_bge.c (revision 332406) +++ user/markj/netdump/sys/dev/bge/if_bge.c (revision 332407) @@ -1,6880 +1,6881 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2001 Wind River Systems * Copyright (c) 1997, 1998, 1999, 2001 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Broadcom BCM57xx(x)/BCM590x NetXtreme and NetLink family Ethernet driver * * The Broadcom BCM5700 is based on technology originally developed by * Alteon Networks as part of the Tigon I and Tigon II Gigabit Ethernet * MAC chips. The BCM5700, sometimes referred to as the Tigon III, has * two on-board MIPS R4000 CPUs and can have as much as 16MB of external * SSRAM. The BCM5700 supports TCP, UDP and IP checksum offload, jumbo * frames, highly configurable RX filtering, and 16 RX and TX queues * (which, along with RX filter rules, can be used for QOS applications). * Other features, such as TCP segmentation, may be available as part * of value-added firmware updates. Unlike the Tigon I and Tigon II, * firmware images can be stored in hardware and need not be compiled * into the driver. * * The BCM5700 supports the PCI v2.2 and PCI-X v1.0 standards, and will * function in a 32-bit/64-bit 33/66Mhz bus, or a 64-bit/133Mhz bus. * * The BCM5701 is a single-chip solution incorporating both the BCM5700 * MAC and a BCM5401 10/100/1000 PHY. Unlike the BCM5700, the BCM5701 * does not support external SSRAM. * * Broadcom also produces a variation of the BCM5700 under the "Altima" * brand name, which is functionally similar but lacks PCI-X support. * * Without external SSRAM, you can only have at most 4 TX rings, * and the use of the mini RX ring is disabled. This seems to imply * that these features are simply not available on the BCM5701. As a * result, this driver does not implement any support for the mini RX * ring. */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "miidevs.h" #include #ifdef __sparc64__ #include #include #include #include #endif #include #include #include #define BGE_CSUM_FEATURES (CSUM_IP | CSUM_TCP) #define ETHER_MIN_NOPAD (ETHER_MIN_LEN - ETHER_CRC_LEN) /* i.e., 60 */ MODULE_DEPEND(bge, pci, 1, 1, 1); MODULE_DEPEND(bge, ether, 1, 1, 1); MODULE_DEPEND(bge, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" /* * Various supported device vendors/types and their names. Note: the * spec seems to indicate that the hardware still has Alteon's vendor * ID burned into it, though it will always be overriden by the vendor * ID in the EEPROM. Just to be safe, we cover all possibilities. */ static const struct bge_type { uint16_t bge_vid; uint16_t bge_did; } bge_devs[] = { { ALTEON_VENDORID, ALTEON_DEVICEID_BCM5700 }, { ALTEON_VENDORID, ALTEON_DEVICEID_BCM5701 }, { ALTIMA_VENDORID, ALTIMA_DEVICE_AC1000 }, { ALTIMA_VENDORID, ALTIMA_DEVICE_AC1002 }, { ALTIMA_VENDORID, ALTIMA_DEVICE_AC9100 }, { APPLE_VENDORID, APPLE_DEVICE_BCM5701 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5700 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5701 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5702 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5702_ALT }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5702X }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5703 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5703_ALT }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5703X }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5704C }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5704S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5704S_ALT }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705K }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705M_ALT }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5714C }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5714S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5715 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5715S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5717 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5717C }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5718 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5719 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5720 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5721 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5722 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5723 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5725 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5727 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5750 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5750M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5751 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5751F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5751M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5752 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5752M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5753 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5753F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5753M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5754 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5754M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5755 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5755M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5756 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5761 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5761E }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5761S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5761SE }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5762 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5764 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5780 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5780S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5781 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5782 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5784 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5785F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5785G }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5786 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5787 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5787F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5787M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5788 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5789 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5901 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5901A2 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5903M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5906 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5906M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57760 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57761 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57762 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57764 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57765 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57766 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57767 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57780 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57781 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57782 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57785 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57786 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57787 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57788 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57790 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57791 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57795 }, { SK_VENDORID, SK_DEVICEID_ALTIMA }, { TC_VENDORID, TC_DEVICEID_3C996 }, { FJTSU_VENDORID, FJTSU_DEVICEID_PW008GE4 }, { FJTSU_VENDORID, FJTSU_DEVICEID_PW008GE5 }, { FJTSU_VENDORID, FJTSU_DEVICEID_PP250450 }, { 0, 0 } }; static const struct bge_vendor { uint16_t v_id; const char *v_name; } bge_vendors[] = { { ALTEON_VENDORID, "Alteon" }, { ALTIMA_VENDORID, "Altima" }, { APPLE_VENDORID, "Apple" }, { BCOM_VENDORID, "Broadcom" }, { SK_VENDORID, "SysKonnect" }, { TC_VENDORID, "3Com" }, { FJTSU_VENDORID, "Fujitsu" }, { 0, NULL } }; static const struct bge_revision { uint32_t br_chipid; const char *br_name; } bge_revisions[] = { { BGE_CHIPID_BCM5700_A0, "BCM5700 A0" }, { BGE_CHIPID_BCM5700_A1, "BCM5700 A1" }, { BGE_CHIPID_BCM5700_B0, "BCM5700 B0" }, { BGE_CHIPID_BCM5700_B1, "BCM5700 B1" }, { BGE_CHIPID_BCM5700_B2, "BCM5700 B2" }, { BGE_CHIPID_BCM5700_B3, "BCM5700 B3" }, { BGE_CHIPID_BCM5700_ALTIMA, "BCM5700 Altima" }, { BGE_CHIPID_BCM5700_C0, "BCM5700 C0" }, { BGE_CHIPID_BCM5701_A0, "BCM5701 A0" }, { BGE_CHIPID_BCM5701_B0, "BCM5701 B0" }, { BGE_CHIPID_BCM5701_B2, "BCM5701 B2" }, { BGE_CHIPID_BCM5701_B5, "BCM5701 B5" }, { BGE_CHIPID_BCM5703_A0, "BCM5703 A0" }, { BGE_CHIPID_BCM5703_A1, "BCM5703 A1" }, { BGE_CHIPID_BCM5703_A2, "BCM5703 A2" }, { BGE_CHIPID_BCM5703_A3, "BCM5703 A3" }, { BGE_CHIPID_BCM5703_B0, "BCM5703 B0" }, { BGE_CHIPID_BCM5704_A0, "BCM5704 A0" }, { BGE_CHIPID_BCM5704_A1, "BCM5704 A1" }, { BGE_CHIPID_BCM5704_A2, "BCM5704 A2" }, { BGE_CHIPID_BCM5704_A3, "BCM5704 A3" }, { BGE_CHIPID_BCM5704_B0, "BCM5704 B0" }, { BGE_CHIPID_BCM5705_A0, "BCM5705 A0" }, { BGE_CHIPID_BCM5705_A1, "BCM5705 A1" }, { BGE_CHIPID_BCM5705_A2, "BCM5705 A2" }, { BGE_CHIPID_BCM5705_A3, "BCM5705 A3" }, { BGE_CHIPID_BCM5750_A0, "BCM5750 A0" }, { BGE_CHIPID_BCM5750_A1, "BCM5750 A1" }, { BGE_CHIPID_BCM5750_A3, "BCM5750 A3" }, { BGE_CHIPID_BCM5750_B0, "BCM5750 B0" }, { BGE_CHIPID_BCM5750_B1, "BCM5750 B1" }, { BGE_CHIPID_BCM5750_C0, "BCM5750 C0" }, { BGE_CHIPID_BCM5750_C1, "BCM5750 C1" }, { BGE_CHIPID_BCM5750_C2, "BCM5750 C2" }, { BGE_CHIPID_BCM5714_A0, "BCM5714 A0" }, { BGE_CHIPID_BCM5752_A0, "BCM5752 A0" }, { BGE_CHIPID_BCM5752_A1, "BCM5752 A1" }, { BGE_CHIPID_BCM5752_A2, "BCM5752 A2" }, { BGE_CHIPID_BCM5714_B0, "BCM5714 B0" }, { BGE_CHIPID_BCM5714_B3, "BCM5714 B3" }, { BGE_CHIPID_BCM5715_A0, "BCM5715 A0" }, { BGE_CHIPID_BCM5715_A1, "BCM5715 A1" }, { BGE_CHIPID_BCM5715_A3, "BCM5715 A3" }, { BGE_CHIPID_BCM5717_A0, "BCM5717 A0" }, { BGE_CHIPID_BCM5717_B0, "BCM5717 B0" }, { BGE_CHIPID_BCM5717_C0, "BCM5717 C0" }, { BGE_CHIPID_BCM5719_A0, "BCM5719 A0" }, { BGE_CHIPID_BCM5720_A0, "BCM5720 A0" }, { BGE_CHIPID_BCM5755_A0, "BCM5755 A0" }, { BGE_CHIPID_BCM5755_A1, "BCM5755 A1" }, { BGE_CHIPID_BCM5755_A2, "BCM5755 A2" }, { BGE_CHIPID_BCM5722_A0, "BCM5722 A0" }, { BGE_CHIPID_BCM5761_A0, "BCM5761 A0" }, { BGE_CHIPID_BCM5761_A1, "BCM5761 A1" }, { BGE_CHIPID_BCM5762_A0, "BCM5762 A0" }, { BGE_CHIPID_BCM5784_A0, "BCM5784 A0" }, { BGE_CHIPID_BCM5784_A1, "BCM5784 A1" }, /* 5754 and 5787 share the same ASIC ID */ { BGE_CHIPID_BCM5787_A0, "BCM5754/5787 A0" }, { BGE_CHIPID_BCM5787_A1, "BCM5754/5787 A1" }, { BGE_CHIPID_BCM5787_A2, "BCM5754/5787 A2" }, { BGE_CHIPID_BCM5906_A1, "BCM5906 A1" }, { BGE_CHIPID_BCM5906_A2, "BCM5906 A2" }, { BGE_CHIPID_BCM57765_A0, "BCM57765 A0" }, { BGE_CHIPID_BCM57765_B0, "BCM57765 B0" }, { BGE_CHIPID_BCM57780_A0, "BCM57780 A0" }, { BGE_CHIPID_BCM57780_A1, "BCM57780 A1" }, { 0, NULL } }; /* * Some defaults for major revisions, so that newer steppings * that we don't know about have a shot at working. */ static const struct bge_revision bge_majorrevs[] = { { BGE_ASICREV_BCM5700, "unknown BCM5700" }, { BGE_ASICREV_BCM5701, "unknown BCM5701" }, { BGE_ASICREV_BCM5703, "unknown BCM5703" }, { BGE_ASICREV_BCM5704, "unknown BCM5704" }, { BGE_ASICREV_BCM5705, "unknown BCM5705" }, { BGE_ASICREV_BCM5750, "unknown BCM5750" }, { BGE_ASICREV_BCM5714_A0, "unknown BCM5714" }, { BGE_ASICREV_BCM5752, "unknown BCM5752" }, { BGE_ASICREV_BCM5780, "unknown BCM5780" }, { BGE_ASICREV_BCM5714, "unknown BCM5714" }, { BGE_ASICREV_BCM5755, "unknown BCM5755" }, { BGE_ASICREV_BCM5761, "unknown BCM5761" }, { BGE_ASICREV_BCM5784, "unknown BCM5784" }, { BGE_ASICREV_BCM5785, "unknown BCM5785" }, /* 5754 and 5787 share the same ASIC ID */ { BGE_ASICREV_BCM5787, "unknown BCM5754/5787" }, { BGE_ASICREV_BCM5906, "unknown BCM5906" }, { BGE_ASICREV_BCM57765, "unknown BCM57765" }, { BGE_ASICREV_BCM57766, "unknown BCM57766" }, { BGE_ASICREV_BCM57780, "unknown BCM57780" }, { BGE_ASICREV_BCM5717, "unknown BCM5717" }, { BGE_ASICREV_BCM5719, "unknown BCM5719" }, { BGE_ASICREV_BCM5720, "unknown BCM5720" }, { BGE_ASICREV_BCM5762, "unknown BCM5762" }, { 0, NULL } }; #define BGE_IS_JUMBO_CAPABLE(sc) ((sc)->bge_flags & BGE_FLAG_JUMBO) #define BGE_IS_5700_FAMILY(sc) ((sc)->bge_flags & BGE_FLAG_5700_FAMILY) #define BGE_IS_5705_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_5705_PLUS) #define BGE_IS_5714_FAMILY(sc) ((sc)->bge_flags & BGE_FLAG_5714_FAMILY) #define BGE_IS_575X_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_575X_PLUS) #define BGE_IS_5755_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_5755_PLUS) #define BGE_IS_5717_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_5717_PLUS) #define BGE_IS_57765_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_57765_PLUS) static uint32_t bge_chipid(device_t); static const struct bge_vendor * bge_lookup_vendor(uint16_t); static const struct bge_revision * bge_lookup_rev(uint32_t); typedef int (*bge_eaddr_fcn_t)(struct bge_softc *, uint8_t[]); static int bge_probe(device_t); static int bge_attach(device_t); static int bge_detach(device_t); static int bge_suspend(device_t); static int bge_resume(device_t); static void bge_release_resources(struct bge_softc *); static void bge_dma_map_addr(void *, bus_dma_segment_t *, int, int); static int bge_dma_alloc(struct bge_softc *); static void bge_dma_free(struct bge_softc *); static int bge_dma_ring_alloc(struct bge_softc *, bus_size_t, bus_size_t, bus_dma_tag_t *, uint8_t **, bus_dmamap_t *, bus_addr_t *, const char *); static void bge_devinfo(struct bge_softc *); static int bge_mbox_reorder(struct bge_softc *); static int bge_get_eaddr_fw(struct bge_softc *sc, uint8_t ether_addr[]); static int bge_get_eaddr_mem(struct bge_softc *, uint8_t[]); static int bge_get_eaddr_nvram(struct bge_softc *, uint8_t[]); static int bge_get_eaddr_eeprom(struct bge_softc *, uint8_t[]); static int bge_get_eaddr(struct bge_softc *, uint8_t[]); static void bge_txeof(struct bge_softc *, uint16_t); static void bge_rxcsum(struct bge_softc *, struct bge_rx_bd *, struct mbuf *); static int bge_rxeof(struct bge_softc *, uint16_t, int); static void bge_asf_driver_up (struct bge_softc *); static void bge_tick(void *); static void bge_stats_clear_regs(struct bge_softc *); static void bge_stats_update(struct bge_softc *); static void bge_stats_update_regs(struct bge_softc *); static struct mbuf *bge_check_short_dma(struct mbuf *); static struct mbuf *bge_setup_tso(struct bge_softc *, struct mbuf *, uint16_t *, uint16_t *); static int bge_encap(struct bge_softc *, struct mbuf **, uint32_t *); static void bge_intr(void *); static int bge_msi_intr(void *); static void bge_intr_task(void *, int); static void bge_start(if_t); static void bge_start_locked(if_t); static void bge_start_tx(struct bge_softc *, uint32_t); static int bge_ioctl(if_t, u_long, caddr_t); static void bge_init_locked(struct bge_softc *); static void bge_init(void *); static void bge_stop_block(struct bge_softc *, bus_size_t, uint32_t); static void bge_stop(struct bge_softc *); static void bge_watchdog(struct bge_softc *); static int bge_shutdown(device_t); static int bge_ifmedia_upd_locked(if_t); static int bge_ifmedia_upd(if_t); static void bge_ifmedia_sts(if_t, struct ifmediareq *); static uint64_t bge_get_counter(if_t, ift_counter); static uint8_t bge_nvram_getbyte(struct bge_softc *, int, uint8_t *); static int bge_read_nvram(struct bge_softc *, caddr_t, int, int); static uint8_t bge_eeprom_getbyte(struct bge_softc *, int, uint8_t *); static int bge_read_eeprom(struct bge_softc *, caddr_t, int, int); static void bge_setpromisc(struct bge_softc *); static void bge_setmulti(struct bge_softc *); static void bge_setvlan(struct bge_softc *); static __inline void bge_rxreuse_std(struct bge_softc *, int); static __inline void bge_rxreuse_jumbo(struct bge_softc *, int); static int bge_newbuf_std(struct bge_softc *, int); static int bge_newbuf_jumbo(struct bge_softc *, int); static int bge_init_rx_ring_std(struct bge_softc *); static void bge_free_rx_ring_std(struct bge_softc *); static int bge_init_rx_ring_jumbo(struct bge_softc *); static void bge_free_rx_ring_jumbo(struct bge_softc *); static void bge_free_tx_ring(struct bge_softc *); static int bge_init_tx_ring(struct bge_softc *); static int bge_chipinit(struct bge_softc *); static int bge_blockinit(struct bge_softc *); static uint32_t bge_dma_swap_options(struct bge_softc *); static int bge_has_eaddr(struct bge_softc *); static uint32_t bge_readmem_ind(struct bge_softc *, int); static void bge_writemem_ind(struct bge_softc *, int, int); static void bge_writembx(struct bge_softc *, int, int); #ifdef notdef static uint32_t bge_readreg_ind(struct bge_softc *, int); #endif static void bge_writemem_direct(struct bge_softc *, int, int); static void bge_writereg_ind(struct bge_softc *, int, int); static int bge_miibus_readreg(device_t, int, int); static int bge_miibus_writereg(device_t, int, int, int); static void bge_miibus_statchg(device_t); #ifdef DEVICE_POLLING static int bge_poll(if_t ifp, enum poll_cmd cmd, int count); #endif #define BGE_RESET_SHUTDOWN 0 #define BGE_RESET_START 1 #define BGE_RESET_SUSPEND 2 static void bge_sig_post_reset(struct bge_softc *, int); static void bge_sig_legacy(struct bge_softc *, int); static void bge_sig_pre_reset(struct bge_softc *, int); static void bge_stop_fw(struct bge_softc *); static int bge_reset(struct bge_softc *); static void bge_link_upd(struct bge_softc *); static void bge_ape_lock_init(struct bge_softc *); static void bge_ape_read_fw_ver(struct bge_softc *); static int bge_ape_lock(struct bge_softc *, int); static void bge_ape_unlock(struct bge_softc *, int); static void bge_ape_send_event(struct bge_softc *, uint32_t); static void bge_ape_driver_state_change(struct bge_softc *, int); /* * The BGE_REGISTER_DEBUG option is only for low-level debugging. It may * leak information to untrusted users. It is also known to cause alignment * traps on certain architectures. */ #ifdef BGE_REGISTER_DEBUG static int bge_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static int bge_sysctl_reg_read(SYSCTL_HANDLER_ARGS); static int bge_sysctl_ape_read(SYSCTL_HANDLER_ARGS); static int bge_sysctl_mem_read(SYSCTL_HANDLER_ARGS); #endif static void bge_add_sysctls(struct bge_softc *); static void bge_add_sysctl_stats_regs(struct bge_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void bge_add_sysctl_stats(struct bge_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static int bge_sysctl_stats(SYSCTL_HANDLER_ARGS); NETDUMP_DEFINE(bge); static device_method_t bge_methods[] = { /* Device interface */ DEVMETHOD(device_probe, bge_probe), DEVMETHOD(device_attach, bge_attach), DEVMETHOD(device_detach, bge_detach), DEVMETHOD(device_shutdown, bge_shutdown), DEVMETHOD(device_suspend, bge_suspend), DEVMETHOD(device_resume, bge_resume), /* MII interface */ DEVMETHOD(miibus_readreg, bge_miibus_readreg), DEVMETHOD(miibus_writereg, bge_miibus_writereg), DEVMETHOD(miibus_statchg, bge_miibus_statchg), DEVMETHOD_END }; static driver_t bge_driver = { "bge", bge_methods, sizeof(struct bge_softc) }; static devclass_t bge_devclass; DRIVER_MODULE(bge, pci, bge_driver, bge_devclass, 0, 0); DRIVER_MODULE(miibus, bge, miibus_driver, miibus_devclass, 0, 0); static int bge_allow_asf = 1; static SYSCTL_NODE(_hw, OID_AUTO, bge, CTLFLAG_RD, 0, "BGE driver parameters"); SYSCTL_INT(_hw_bge, OID_AUTO, allow_asf, CTLFLAG_RDTUN, &bge_allow_asf, 0, "Allow ASF mode if available"); #define SPARC64_BLADE_1500_MODEL "SUNW,Sun-Blade-1500" #define SPARC64_BLADE_1500_PATH_BGE "/pci@1f,700000/network@2" #define SPARC64_BLADE_2500_MODEL "SUNW,Sun-Blade-2500" #define SPARC64_BLADE_2500_PATH_BGE "/pci@1c,600000/network@3" #define SPARC64_OFW_SUBVENDOR "subsystem-vendor-id" static int bge_has_eaddr(struct bge_softc *sc) { #ifdef __sparc64__ char buf[sizeof(SPARC64_BLADE_1500_PATH_BGE)]; device_t dev; uint32_t subvendor; dev = sc->bge_dev; /* * The on-board BGEs found in sun4u machines aren't fitted with * an EEPROM which means that we have to obtain the MAC address * via OFW and that some tests will always fail. We distinguish * such BGEs by the subvendor ID, which also has to be obtained * from OFW instead of the PCI configuration space as the latter * indicates Broadcom as the subvendor of the netboot interface. * For early Blade 1500 and 2500 we even have to check the OFW * device path as the subvendor ID always defaults to Broadcom * there. */ if (OF_getprop(ofw_bus_get_node(dev), SPARC64_OFW_SUBVENDOR, &subvendor, sizeof(subvendor)) == sizeof(subvendor) && (subvendor == FJTSU_VENDORID || subvendor == SUN_VENDORID)) return (0); memset(buf, 0, sizeof(buf)); if (OF_package_to_path(ofw_bus_get_node(dev), buf, sizeof(buf)) > 0) { if (strcmp(sparc64_model, SPARC64_BLADE_1500_MODEL) == 0 && strcmp(buf, SPARC64_BLADE_1500_PATH_BGE) == 0) return (0); if (strcmp(sparc64_model, SPARC64_BLADE_2500_MODEL) == 0 && strcmp(buf, SPARC64_BLADE_2500_PATH_BGE) == 0) return (0); } #endif return (1); } static uint32_t bge_readmem_ind(struct bge_softc *sc, int off) { device_t dev; uint32_t val; if (sc->bge_asicrev == BGE_ASICREV_BCM5906 && off >= BGE_STATS_BLOCK && off < BGE_SEND_RING_1_TO_4) return (0); dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4); val = pci_read_config(dev, BGE_PCI_MEMWIN_DATA, 4); pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4); return (val); } static void bge_writemem_ind(struct bge_softc *sc, int off, int val) { device_t dev; if (sc->bge_asicrev == BGE_ASICREV_BCM5906 && off >= BGE_STATS_BLOCK && off < BGE_SEND_RING_1_TO_4) return; dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4); pci_write_config(dev, BGE_PCI_MEMWIN_DATA, val, 4); pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4); } #ifdef notdef static uint32_t bge_readreg_ind(struct bge_softc *sc, int off) { device_t dev; dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_REG_BASEADDR, off, 4); return (pci_read_config(dev, BGE_PCI_REG_DATA, 4)); } #endif static void bge_writereg_ind(struct bge_softc *sc, int off, int val) { device_t dev; dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_REG_BASEADDR, off, 4); pci_write_config(dev, BGE_PCI_REG_DATA, val, 4); } static void bge_writemem_direct(struct bge_softc *sc, int off, int val) { CSR_WRITE_4(sc, off, val); } static void bge_writembx(struct bge_softc *sc, int off, int val) { if (sc->bge_asicrev == BGE_ASICREV_BCM5906) off += BGE_LPMBX_IRQ0_HI - BGE_MBX_IRQ0_HI; CSR_WRITE_4(sc, off, val); if ((sc->bge_flags & BGE_FLAG_MBOX_REORDER) != 0) CSR_READ_4(sc, off); } /* * Clear all stale locks and select the lock for this driver instance. */ static void bge_ape_lock_init(struct bge_softc *sc) { uint32_t bit, regbase; int i; if (sc->bge_asicrev == BGE_ASICREV_BCM5761) regbase = BGE_APE_LOCK_GRANT; else regbase = BGE_APE_PER_LOCK_GRANT; /* Clear any stale locks. */ for (i = BGE_APE_LOCK_PHY0; i <= BGE_APE_LOCK_GPIO; i++) { switch (i) { case BGE_APE_LOCK_PHY0: case BGE_APE_LOCK_PHY1: case BGE_APE_LOCK_PHY2: case BGE_APE_LOCK_PHY3: bit = BGE_APE_LOCK_GRANT_DRIVER0; break; default: if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_GRANT_DRIVER0; else bit = (1 << sc->bge_func_addr); } APE_WRITE_4(sc, regbase + 4 * i, bit); } /* Select the PHY lock based on the device's function number. */ switch (sc->bge_func_addr) { case 0: sc->bge_phy_ape_lock = BGE_APE_LOCK_PHY0; break; case 1: sc->bge_phy_ape_lock = BGE_APE_LOCK_PHY1; break; case 2: sc->bge_phy_ape_lock = BGE_APE_LOCK_PHY2; break; case 3: sc->bge_phy_ape_lock = BGE_APE_LOCK_PHY3; break; default: device_printf(sc->bge_dev, "PHY lock not supported on this function\n"); } } /* * Check for APE firmware, set flags, and print version info. */ static void bge_ape_read_fw_ver(struct bge_softc *sc) { const char *fwtype; uint32_t apedata, features; /* Check for a valid APE signature in shared memory. */ apedata = APE_READ_4(sc, BGE_APE_SEG_SIG); if (apedata != BGE_APE_SEG_SIG_MAGIC) { sc->bge_mfw_flags &= ~ BGE_MFW_ON_APE; return; } /* Check if APE firmware is running. */ apedata = APE_READ_4(sc, BGE_APE_FW_STATUS); if ((apedata & BGE_APE_FW_STATUS_READY) == 0) { device_printf(sc->bge_dev, "APE signature found " "but FW status not ready! 0x%08x\n", apedata); return; } sc->bge_mfw_flags |= BGE_MFW_ON_APE; /* Fetch the APE firwmare type and version. */ apedata = APE_READ_4(sc, BGE_APE_FW_VERSION); features = APE_READ_4(sc, BGE_APE_FW_FEATURES); if ((features & BGE_APE_FW_FEATURE_NCSI) != 0) { sc->bge_mfw_flags |= BGE_MFW_TYPE_NCSI; fwtype = "NCSI"; } else if ((features & BGE_APE_FW_FEATURE_DASH) != 0) { sc->bge_mfw_flags |= BGE_MFW_TYPE_DASH; fwtype = "DASH"; } else fwtype = "UNKN"; /* Print the APE firmware version. */ device_printf(sc->bge_dev, "APE FW version: %s v%d.%d.%d.%d\n", fwtype, (apedata & BGE_APE_FW_VERSION_MAJMSK) >> BGE_APE_FW_VERSION_MAJSFT, (apedata & BGE_APE_FW_VERSION_MINMSK) >> BGE_APE_FW_VERSION_MINSFT, (apedata & BGE_APE_FW_VERSION_REVMSK) >> BGE_APE_FW_VERSION_REVSFT, (apedata & BGE_APE_FW_VERSION_BLDMSK)); } static int bge_ape_lock(struct bge_softc *sc, int locknum) { uint32_t bit, gnt, req, status; int i, off; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) == 0) return (0); /* Lock request/grant registers have different bases. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5761) { req = BGE_APE_LOCK_REQ; gnt = BGE_APE_LOCK_GRANT; } else { req = BGE_APE_PER_LOCK_REQ; gnt = BGE_APE_PER_LOCK_GRANT; } off = 4 * locknum; switch (locknum) { case BGE_APE_LOCK_GPIO: /* Lock required when using GPIO. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5761) return (0); if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_REQ_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_GRC: /* Lock required to reset the device. */ if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_REQ_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_MEM: /* Lock required when accessing certain APE memory. */ if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_REQ_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_PHY0: case BGE_APE_LOCK_PHY1: case BGE_APE_LOCK_PHY2: case BGE_APE_LOCK_PHY3: /* Lock required when accessing PHYs. */ bit = BGE_APE_LOCK_REQ_DRIVER0; break; default: return (EINVAL); } /* Request a lock. */ APE_WRITE_4(sc, req + off, bit); /* Wait up to 1 second to acquire lock. */ for (i = 0; i < 20000; i++) { status = APE_READ_4(sc, gnt + off); if (status == bit) break; DELAY(50); } /* Handle any errors. */ if (status != bit) { device_printf(sc->bge_dev, "APE lock %d request failed! " "request = 0x%04x[0x%04x], status = 0x%04x[0x%04x]\n", locknum, req + off, bit & 0xFFFF, gnt + off, status & 0xFFFF); /* Revoke the lock request. */ APE_WRITE_4(sc, gnt + off, bit); return (EBUSY); } return (0); } static void bge_ape_unlock(struct bge_softc *sc, int locknum) { uint32_t bit, gnt; int off; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) == 0) return; if (sc->bge_asicrev == BGE_ASICREV_BCM5761) gnt = BGE_APE_LOCK_GRANT; else gnt = BGE_APE_PER_LOCK_GRANT; off = 4 * locknum; switch (locknum) { case BGE_APE_LOCK_GPIO: if (sc->bge_asicrev == BGE_ASICREV_BCM5761) return; if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_GRANT_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_GRC: if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_GRANT_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_MEM: if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_GRANT_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_PHY0: case BGE_APE_LOCK_PHY1: case BGE_APE_LOCK_PHY2: case BGE_APE_LOCK_PHY3: bit = BGE_APE_LOCK_GRANT_DRIVER0; break; default: return; } APE_WRITE_4(sc, gnt + off, bit); } /* * Send an event to the APE firmware. */ static void bge_ape_send_event(struct bge_softc *sc, uint32_t event) { uint32_t apedata; int i; /* NCSI does not support APE events. */ if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) == 0) return; /* Wait up to 1ms for APE to service previous event. */ for (i = 10; i > 0; i--) { if (bge_ape_lock(sc, BGE_APE_LOCK_MEM) != 0) break; apedata = APE_READ_4(sc, BGE_APE_EVENT_STATUS); if ((apedata & BGE_APE_EVENT_STATUS_EVENT_PENDING) == 0) { APE_WRITE_4(sc, BGE_APE_EVENT_STATUS, event | BGE_APE_EVENT_STATUS_EVENT_PENDING); bge_ape_unlock(sc, BGE_APE_LOCK_MEM); APE_WRITE_4(sc, BGE_APE_EVENT, BGE_APE_EVENT_1); break; } bge_ape_unlock(sc, BGE_APE_LOCK_MEM); DELAY(100); } if (i == 0) device_printf(sc->bge_dev, "APE event 0x%08x send timed out\n", event); } static void bge_ape_driver_state_change(struct bge_softc *sc, int kind) { uint32_t apedata, event; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) == 0) return; switch (kind) { case BGE_RESET_START: /* If this is the first load, clear the load counter. */ apedata = APE_READ_4(sc, BGE_APE_HOST_SEG_SIG); if (apedata != BGE_APE_HOST_SEG_SIG_MAGIC) APE_WRITE_4(sc, BGE_APE_HOST_INIT_COUNT, 0); else { apedata = APE_READ_4(sc, BGE_APE_HOST_INIT_COUNT); APE_WRITE_4(sc, BGE_APE_HOST_INIT_COUNT, ++apedata); } APE_WRITE_4(sc, BGE_APE_HOST_SEG_SIG, BGE_APE_HOST_SEG_SIG_MAGIC); APE_WRITE_4(sc, BGE_APE_HOST_SEG_LEN, BGE_APE_HOST_SEG_LEN_MAGIC); /* Add some version info if bge(4) supports it. */ APE_WRITE_4(sc, BGE_APE_HOST_DRIVER_ID, BGE_APE_HOST_DRIVER_ID_MAGIC(1, 0)); APE_WRITE_4(sc, BGE_APE_HOST_BEHAVIOR, BGE_APE_HOST_BEHAV_NO_PHYLOCK); APE_WRITE_4(sc, BGE_APE_HOST_HEARTBEAT_INT_MS, BGE_APE_HOST_HEARTBEAT_INT_DISABLE); APE_WRITE_4(sc, BGE_APE_HOST_DRVR_STATE, BGE_APE_HOST_DRVR_STATE_START); event = BGE_APE_EVENT_STATUS_STATE_START; break; case BGE_RESET_SHUTDOWN: APE_WRITE_4(sc, BGE_APE_HOST_DRVR_STATE, BGE_APE_HOST_DRVR_STATE_UNLOAD); event = BGE_APE_EVENT_STATUS_STATE_UNLOAD; break; case BGE_RESET_SUSPEND: event = BGE_APE_EVENT_STATUS_STATE_SUSPEND; break; default: return; } bge_ape_send_event(sc, event | BGE_APE_EVENT_STATUS_DRIVER_EVNT | BGE_APE_EVENT_STATUS_STATE_CHNGE); } /* * Map a single buffer address. */ static void bge_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct bge_dmamap_arg *ctx; if (error) return; KASSERT(nseg == 1, ("%s: %d segments returned!", __func__, nseg)); ctx = arg; ctx->bge_busaddr = segs->ds_addr; } static uint8_t bge_nvram_getbyte(struct bge_softc *sc, int addr, uint8_t *dest) { uint32_t access, byte = 0; int i; /* Lock. */ CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_SET1); for (i = 0; i < 8000; i++) { if (CSR_READ_4(sc, BGE_NVRAM_SWARB) & BGE_NVRAMSWARB_GNT1) break; DELAY(20); } if (i == 8000) return (1); /* Enable access. */ access = CSR_READ_4(sc, BGE_NVRAM_ACCESS); CSR_WRITE_4(sc, BGE_NVRAM_ACCESS, access | BGE_NVRAMACC_ENABLE); CSR_WRITE_4(sc, BGE_NVRAM_ADDR, addr & 0xfffffffc); CSR_WRITE_4(sc, BGE_NVRAM_CMD, BGE_NVRAM_READCMD); for (i = 0; i < BGE_TIMEOUT * 10; i++) { DELAY(10); if (CSR_READ_4(sc, BGE_NVRAM_CMD) & BGE_NVRAMCMD_DONE) { DELAY(10); break; } } if (i == BGE_TIMEOUT * 10) { if_printf(sc->bge_ifp, "nvram read timed out\n"); return (1); } /* Get result. */ byte = CSR_READ_4(sc, BGE_NVRAM_RDDATA); *dest = (bswap32(byte) >> ((addr % 4) * 8)) & 0xFF; /* Disable access. */ CSR_WRITE_4(sc, BGE_NVRAM_ACCESS, access); /* Unlock. */ CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_CLR1); CSR_READ_4(sc, BGE_NVRAM_SWARB); return (0); } /* * Read a sequence of bytes from NVRAM. */ static int bge_read_nvram(struct bge_softc *sc, caddr_t dest, int off, int cnt) { int err = 0, i; uint8_t byte = 0; if (sc->bge_asicrev != BGE_ASICREV_BCM5906) return (1); for (i = 0; i < cnt; i++) { err = bge_nvram_getbyte(sc, off + i, &byte); if (err) break; *(dest + i) = byte; } return (err ? 1 : 0); } /* * Read a byte of data stored in the EEPROM at address 'addr.' The * BCM570x supports both the traditional bitbang interface and an * auto access interface for reading the EEPROM. We use the auto * access method. */ static uint8_t bge_eeprom_getbyte(struct bge_softc *sc, int addr, uint8_t *dest) { int i; uint32_t byte = 0; /* * Enable use of auto EEPROM access so we can avoid * having to use the bitbang method. */ BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_AUTO_EEPROM); /* Reset the EEPROM, load the clock period. */ CSR_WRITE_4(sc, BGE_EE_ADDR, BGE_EEADDR_RESET | BGE_EEHALFCLK(BGE_HALFCLK_384SCL)); DELAY(20); /* Issue the read EEPROM command. */ CSR_WRITE_4(sc, BGE_EE_ADDR, BGE_EE_READCMD | addr); /* Wait for completion */ for(i = 0; i < BGE_TIMEOUT * 10; i++) { DELAY(10); if (CSR_READ_4(sc, BGE_EE_ADDR) & BGE_EEADDR_DONE) break; } if (i == BGE_TIMEOUT * 10) { device_printf(sc->bge_dev, "EEPROM read timed out\n"); return (1); } /* Get result. */ byte = CSR_READ_4(sc, BGE_EE_DATA); *dest = (byte >> ((addr % 4) * 8)) & 0xFF; return (0); } /* * Read a sequence of bytes from the EEPROM. */ static int bge_read_eeprom(struct bge_softc *sc, caddr_t dest, int off, int cnt) { int i, error = 0; uint8_t byte = 0; for (i = 0; i < cnt; i++) { error = bge_eeprom_getbyte(sc, off + i, &byte); if (error) break; *(dest + i) = byte; } return (error ? 1 : 0); } static int bge_miibus_readreg(device_t dev, int phy, int reg) { struct bge_softc *sc; uint32_t val; int i; sc = device_get_softc(dev); if (bge_ape_lock(sc, sc->bge_phy_ape_lock) != 0) return (0); /* Clear the autopoll bit if set, otherwise may trigger PCI errors. */ if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode & ~BGE_MIMODE_AUTOPOLL); DELAY(80); } CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_READ | BGE_MICOMM_BUSY | BGE_MIPHY(phy) | BGE_MIREG(reg)); /* Poll for the PHY register access to complete. */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); val = CSR_READ_4(sc, BGE_MI_COMM); if ((val & BGE_MICOMM_BUSY) == 0) { DELAY(5); val = CSR_READ_4(sc, BGE_MI_COMM); break; } } if (i == BGE_TIMEOUT) { device_printf(sc->bge_dev, "PHY read timed out (phy %d, reg %d, val 0x%08x)\n", phy, reg, val); val = 0; } /* Restore the autopoll bit if necessary. */ if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode); DELAY(80); } bge_ape_unlock(sc, sc->bge_phy_ape_lock); if (val & BGE_MICOMM_READFAIL) return (0); return (val & 0xFFFF); } static int bge_miibus_writereg(device_t dev, int phy, int reg, int val) { struct bge_softc *sc; int i; sc = device_get_softc(dev); if (sc->bge_asicrev == BGE_ASICREV_BCM5906 && (reg == BRGPHY_MII_1000CTL || reg == BRGPHY_MII_AUXCTL)) return (0); if (bge_ape_lock(sc, sc->bge_phy_ape_lock) != 0) return (0); /* Clear the autopoll bit if set, otherwise may trigger PCI errors. */ if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode & ~BGE_MIMODE_AUTOPOLL); DELAY(80); } CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_WRITE | BGE_MICOMM_BUSY | BGE_MIPHY(phy) | BGE_MIREG(reg) | val); for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_4(sc, BGE_MI_COMM) & BGE_MICOMM_BUSY)) { DELAY(5); CSR_READ_4(sc, BGE_MI_COMM); /* dummy read */ break; } } /* Restore the autopoll bit if necessary. */ if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode); DELAY(80); } bge_ape_unlock(sc, sc->bge_phy_ape_lock); if (i == BGE_TIMEOUT) device_printf(sc->bge_dev, "PHY write timed out (phy %d, reg %d, val 0x%04x)\n", phy, reg, val); return (0); } static void bge_miibus_statchg(device_t dev) { struct bge_softc *sc; struct mii_data *mii; uint32_t mac_mode, rx_mode, tx_mode; sc = device_get_softc(dev); if ((if_getdrvflags(sc->bge_ifp) & IFF_DRV_RUNNING) == 0) return; mii = device_get_softc(sc->bge_miibus); if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: sc->bge_link = 1; break; case IFM_1000_T: case IFM_1000_SX: case IFM_2500_SX: if (sc->bge_asicrev != BGE_ASICREV_BCM5906) sc->bge_link = 1; else sc->bge_link = 0; break; default: sc->bge_link = 0; break; } } else sc->bge_link = 0; if (sc->bge_link == 0) return; /* * APE firmware touches these registers to keep the MAC * connected to the outside world. Try to keep the * accesses atomic. */ /* Set the port mode (MII/GMII) to match the link speed. */ mac_mode = CSR_READ_4(sc, BGE_MAC_MODE) & ~(BGE_MACMODE_PORTMODE | BGE_MACMODE_HALF_DUPLEX); tx_mode = CSR_READ_4(sc, BGE_TX_MODE); rx_mode = CSR_READ_4(sc, BGE_RX_MODE); if (IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T || IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_SX) mac_mode |= BGE_PORTMODE_GMII; else mac_mode |= BGE_PORTMODE_MII; /* Set MAC flow control behavior to match link flow control settings. */ tx_mode &= ~BGE_TXMODE_FLOWCTL_ENABLE; rx_mode &= ~BGE_RXMODE_FLOWCTL_ENABLE; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) { if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_TXPAUSE) != 0) tx_mode |= BGE_TXMODE_FLOWCTL_ENABLE; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) rx_mode |= BGE_RXMODE_FLOWCTL_ENABLE; } else mac_mode |= BGE_MACMODE_HALF_DUPLEX; CSR_WRITE_4(sc, BGE_MAC_MODE, mac_mode); DELAY(40); CSR_WRITE_4(sc, BGE_TX_MODE, tx_mode); CSR_WRITE_4(sc, BGE_RX_MODE, rx_mode); } /* * Intialize a standard receive ring descriptor. */ static int bge_newbuf_std(struct bge_softc *sc, int i) { struct mbuf *m; struct bge_rx_bd *r; bus_dma_segment_t segs[1]; bus_dmamap_t map; int error, nsegs; if (sc->bge_flags & BGE_FLAG_JUMBO_STD && (if_getmtu(sc->bge_ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN))) { m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MJUM9BYTES; } else { m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MCLBYTES; } if ((sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) == 0) m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_sparemap, m, segs, &nsegs, 0); if (error != 0) { m_freem(m); return (error); } if (sc->bge_cdata.bge_rx_std_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i]); } map = sc->bge_cdata.bge_rx_std_dmamap[i]; sc->bge_cdata.bge_rx_std_dmamap[i] = sc->bge_cdata.bge_rx_std_sparemap; sc->bge_cdata.bge_rx_std_sparemap = map; sc->bge_cdata.bge_rx_std_chain[i] = m; sc->bge_cdata.bge_rx_std_seglen[i] = segs[0].ds_len; r = &sc->bge_ldata.bge_rx_std_ring[sc->bge_std]; r->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[0].ds_addr); r->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[0].ds_addr); r->bge_flags = BGE_RXBDFLAG_END; r->bge_len = segs[0].ds_len; r->bge_idx = i; bus_dmamap_sync(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i], BUS_DMASYNC_PREREAD); return (0); } /* * Initialize a jumbo receive ring descriptor. This allocates * a jumbo buffer from the pool managed internally by the driver. */ static int bge_newbuf_jumbo(struct bge_softc *sc, int i) { bus_dma_segment_t segs[BGE_NSEG_JUMBO]; bus_dmamap_t map; struct bge_extrx_bd *r; struct mbuf *m; int error, nsegs; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); if (m_cljget(m, M_NOWAIT, MJUM9BYTES) == NULL) { m_freem(m); return (ENOBUFS); } m->m_len = m->m_pkthdr.len = MJUM9BYTES; if ((sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) == 0) m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_sparemap, m, segs, &nsegs, 0); if (error != 0) { m_freem(m); return (error); } if (sc->bge_cdata.bge_rx_jumbo_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i]); } map = sc->bge_cdata.bge_rx_jumbo_dmamap[i]; sc->bge_cdata.bge_rx_jumbo_dmamap[i] = sc->bge_cdata.bge_rx_jumbo_sparemap; sc->bge_cdata.bge_rx_jumbo_sparemap = map; sc->bge_cdata.bge_rx_jumbo_chain[i] = m; sc->bge_cdata.bge_rx_jumbo_seglen[i][0] = 0; sc->bge_cdata.bge_rx_jumbo_seglen[i][1] = 0; sc->bge_cdata.bge_rx_jumbo_seglen[i][2] = 0; sc->bge_cdata.bge_rx_jumbo_seglen[i][3] = 0; /* * Fill in the extended RX buffer descriptor. */ r = &sc->bge_ldata.bge_rx_jumbo_ring[sc->bge_jumbo]; r->bge_flags = BGE_RXBDFLAG_JUMBO_RING | BGE_RXBDFLAG_END; r->bge_idx = i; r->bge_len3 = r->bge_len2 = r->bge_len1 = 0; switch (nsegs) { case 4: r->bge_addr3.bge_addr_lo = BGE_ADDR_LO(segs[3].ds_addr); r->bge_addr3.bge_addr_hi = BGE_ADDR_HI(segs[3].ds_addr); r->bge_len3 = segs[3].ds_len; sc->bge_cdata.bge_rx_jumbo_seglen[i][3] = segs[3].ds_len; case 3: r->bge_addr2.bge_addr_lo = BGE_ADDR_LO(segs[2].ds_addr); r->bge_addr2.bge_addr_hi = BGE_ADDR_HI(segs[2].ds_addr); r->bge_len2 = segs[2].ds_len; sc->bge_cdata.bge_rx_jumbo_seglen[i][2] = segs[2].ds_len; case 2: r->bge_addr1.bge_addr_lo = BGE_ADDR_LO(segs[1].ds_addr); r->bge_addr1.bge_addr_hi = BGE_ADDR_HI(segs[1].ds_addr); r->bge_len1 = segs[1].ds_len; sc->bge_cdata.bge_rx_jumbo_seglen[i][1] = segs[1].ds_len; case 1: r->bge_addr0.bge_addr_lo = BGE_ADDR_LO(segs[0].ds_addr); r->bge_addr0.bge_addr_hi = BGE_ADDR_HI(segs[0].ds_addr); r->bge_len0 = segs[0].ds_len; sc->bge_cdata.bge_rx_jumbo_seglen[i][0] = segs[0].ds_len; break; default: panic("%s: %d segments\n", __func__, nsegs); } bus_dmamap_sync(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i], BUS_DMASYNC_PREREAD); return (0); } static int bge_init_rx_ring_std(struct bge_softc *sc) { int error, i; bzero(sc->bge_ldata.bge_rx_std_ring, BGE_STD_RX_RING_SZ); sc->bge_std = 0; for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { if ((error = bge_newbuf_std(sc, i)) != 0) return (error); BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT); } bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_PREWRITE); sc->bge_std = 0; bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, BGE_STD_RX_RING_CNT - 1); return (0); } static void bge_free_rx_ring_std(struct bge_softc *sc) { int i; for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { if (sc->bge_cdata.bge_rx_std_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i]); m_freem(sc->bge_cdata.bge_rx_std_chain[i]); sc->bge_cdata.bge_rx_std_chain[i] = NULL; } bzero((char *)&sc->bge_ldata.bge_rx_std_ring[i], sizeof(struct bge_rx_bd)); } } static int bge_init_rx_ring_jumbo(struct bge_softc *sc) { struct bge_rcb *rcb; int error, i; bzero(sc->bge_ldata.bge_rx_jumbo_ring, BGE_JUMBO_RX_RING_SZ); sc->bge_jumbo = 0; for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) { if ((error = bge_newbuf_jumbo(sc, i)) != 0) return (error); BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); } bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); sc->bge_jumbo = 0; /* Enable the jumbo receive producer ring. */ rcb = &sc->bge_ldata.bge_info.bge_jumbo_rx_rcb; rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_USE_EXT_RX_BD); CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, BGE_JUMBO_RX_RING_CNT - 1); return (0); } static void bge_free_rx_ring_jumbo(struct bge_softc *sc) { int i; for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) { if (sc->bge_cdata.bge_rx_jumbo_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i]); m_freem(sc->bge_cdata.bge_rx_jumbo_chain[i]); sc->bge_cdata.bge_rx_jumbo_chain[i] = NULL; } bzero((char *)&sc->bge_ldata.bge_rx_jumbo_ring[i], sizeof(struct bge_extrx_bd)); } } static void bge_free_tx_ring(struct bge_softc *sc) { int i; if (sc->bge_ldata.bge_tx_ring == NULL) return; for (i = 0; i < BGE_TX_RING_CNT; i++) { if (sc->bge_cdata.bge_tx_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[i]); m_freem(sc->bge_cdata.bge_tx_chain[i]); sc->bge_cdata.bge_tx_chain[i] = NULL; } bzero((char *)&sc->bge_ldata.bge_tx_ring[i], sizeof(struct bge_tx_bd)); } } static int bge_init_tx_ring(struct bge_softc *sc) { sc->bge_txcnt = 0; sc->bge_tx_saved_considx = 0; bzero(sc->bge_ldata.bge_tx_ring, BGE_TX_RING_SZ); bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_PREWRITE); /* Initialize transmit producer index for host-memory send ring. */ sc->bge_tx_prodidx = 0; bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, sc->bge_tx_prodidx); /* 5700 b2 errata */ if (sc->bge_chiprev == BGE_CHIPREV_5700_BX) bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, sc->bge_tx_prodidx); /* NIC-memory send ring not used; initialize to zero. */ bge_writembx(sc, BGE_MBX_TX_NIC_PROD0_LO, 0); /* 5700 b2 errata */ if (sc->bge_chiprev == BGE_CHIPREV_5700_BX) bge_writembx(sc, BGE_MBX_TX_NIC_PROD0_LO, 0); return (0); } static void bge_setpromisc(struct bge_softc *sc) { if_t ifp; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; /* Enable or disable promiscuous mode as needed. */ if (if_getflags(ifp) & IFF_PROMISC) BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC); else BGE_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC); } static void bge_setmulti(struct bge_softc *sc) { if_t ifp; int mc_count = 0; uint32_t hashes[4] = { 0, 0, 0, 0 }; int h, i, mcnt; unsigned char *mta; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; mc_count = if_multiaddr_count(ifp, -1); mta = malloc(sizeof(unsigned char) * ETHER_ADDR_LEN * mc_count, M_DEVBUF, M_NOWAIT); if(mta == NULL) { device_printf(sc->bge_dev, "Failed to allocated temp mcast list\n"); return; } if (if_getflags(ifp) & IFF_ALLMULTI || if_getflags(ifp) & IFF_PROMISC) { for (i = 0; i < 4; i++) CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0xFFFFFFFF); free(mta, M_DEVBUF); return; } /* First, zot all the existing filters. */ for (i = 0; i < 4; i++) CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0); if_multiaddr_array(ifp, mta, &mcnt, mc_count); for(i = 0; i < mcnt; i++) { h = ether_crc32_le(mta + (i * ETHER_ADDR_LEN), ETHER_ADDR_LEN) & 0x7F; hashes[(h & 0x60) >> 5] |= 1 << (h & 0x1F); } for (i = 0; i < 4; i++) CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), hashes[i]); free(mta, M_DEVBUF); } static void bge_setvlan(struct bge_softc *sc) { if_t ifp; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; /* Enable or disable VLAN tag stripping as needed. */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) BGE_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_KEEP_VLAN_DIAG); else BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_KEEP_VLAN_DIAG); } static void bge_sig_pre_reset(struct bge_softc *sc, int type) { /* * Some chips don't like this so only do this if ASF is enabled */ if (sc->bge_asf_mode) bge_writemem_ind(sc, BGE_SRAM_FW_MB, BGE_SRAM_FW_MB_MAGIC); if (sc->bge_asf_mode & ASF_NEW_HANDSHAKE) { switch (type) { case BGE_RESET_START: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_START); break; case BGE_RESET_SHUTDOWN: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_UNLOAD); break; case BGE_RESET_SUSPEND: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_SUSPEND); break; } } if (type == BGE_RESET_START || type == BGE_RESET_SUSPEND) bge_ape_driver_state_change(sc, type); } static void bge_sig_post_reset(struct bge_softc *sc, int type) { if (sc->bge_asf_mode & ASF_NEW_HANDSHAKE) { switch (type) { case BGE_RESET_START: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_START_DONE); /* START DONE */ break; case BGE_RESET_SHUTDOWN: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_UNLOAD_DONE); break; } } if (type == BGE_RESET_SHUTDOWN) bge_ape_driver_state_change(sc, type); } static void bge_sig_legacy(struct bge_softc *sc, int type) { if (sc->bge_asf_mode) { switch (type) { case BGE_RESET_START: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_START); break; case BGE_RESET_SHUTDOWN: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_UNLOAD); break; } } } static void bge_stop_fw(struct bge_softc *sc) { int i; if (sc->bge_asf_mode) { bge_writemem_ind(sc, BGE_SRAM_FW_CMD_MB, BGE_FW_CMD_PAUSE); CSR_WRITE_4(sc, BGE_RX_CPU_EVENT, CSR_READ_4(sc, BGE_RX_CPU_EVENT) | BGE_RX_CPU_DRV_EVENT); for (i = 0; i < 100; i++ ) { if (!(CSR_READ_4(sc, BGE_RX_CPU_EVENT) & BGE_RX_CPU_DRV_EVENT)) break; DELAY(10); } } } static uint32_t bge_dma_swap_options(struct bge_softc *sc) { uint32_t dma_options; dma_options = BGE_MODECTL_WORDSWAP_NONFRAME | BGE_MODECTL_BYTESWAP_DATA | BGE_MODECTL_WORDSWAP_DATA; #if BYTE_ORDER == BIG_ENDIAN dma_options |= BGE_MODECTL_BYTESWAP_NONFRAME; #endif return (dma_options); } /* * Do endian, PCI and DMA initialization. */ static int bge_chipinit(struct bge_softc *sc) { uint32_t dma_rw_ctl, misc_ctl, mode_ctl; uint16_t val; int i; /* Set endianness before we access any non-PCI registers. */ misc_ctl = BGE_INIT; if (sc->bge_flags & BGE_FLAG_TAGGED_STATUS) misc_ctl |= BGE_PCIMISCCTL_TAGGED_STATUS; pci_write_config(sc->bge_dev, BGE_PCI_MISC_CTL, misc_ctl, 4); /* * Clear the MAC statistics block in the NIC's * internal memory. */ for (i = BGE_STATS_BLOCK; i < BGE_STATS_BLOCK_END + 1; i += sizeof(uint32_t)) BGE_MEMWIN_WRITE(sc, i, 0); for (i = BGE_STATUS_BLOCK; i < BGE_STATUS_BLOCK_END + 1; i += sizeof(uint32_t)) BGE_MEMWIN_WRITE(sc, i, 0); if (sc->bge_chiprev == BGE_CHIPREV_5704_BX) { /* * Fix data corruption caused by non-qword write with WB. * Fix master abort in PCI mode. * Fix PCI latency timer. */ val = pci_read_config(sc->bge_dev, BGE_PCI_MSI_DATA + 2, 2); val |= (1 << 10) | (1 << 12) | (1 << 13); pci_write_config(sc->bge_dev, BGE_PCI_MSI_DATA + 2, val, 2); } if (sc->bge_asicrev == BGE_ASICREV_BCM57765 || sc->bge_asicrev == BGE_ASICREV_BCM57766) { /* * For the 57766 and non Ax versions of 57765, bootcode * needs to setup the PCIE Fast Training Sequence (FTS) * value to prevent transmit hangs. */ if (sc->bge_chiprev != BGE_CHIPREV_57765_AX) { CSR_WRITE_4(sc, BGE_CPMU_PADRNG_CTL, CSR_READ_4(sc, BGE_CPMU_PADRNG_CTL) | BGE_CPMU_PADRNG_CTL_RDIV2); } } /* * Set up the PCI DMA control register. */ dma_rw_ctl = BGE_PCIDMARWCTL_RD_CMD_SHIFT(6) | BGE_PCIDMARWCTL_WR_CMD_SHIFT(7); if (sc->bge_flags & BGE_FLAG_PCIE) { if (sc->bge_mps >= 256) dma_rw_ctl |= BGE_PCIDMARWCTL_WR_WAT_SHIFT(7); else dma_rw_ctl |= BGE_PCIDMARWCTL_WR_WAT_SHIFT(3); } else if (sc->bge_flags & BGE_FLAG_PCIX) { if (BGE_IS_5714_FAMILY(sc)) { /* 256 bytes for read and write. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(2) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(2); dma_rw_ctl |= (sc->bge_asicrev == BGE_ASICREV_BCM5780) ? BGE_PCIDMARWCTL_ONEDMA_ATONCE_GLOBAL : BGE_PCIDMARWCTL_ONEDMA_ATONCE_LOCAL; } else if (sc->bge_asicrev == BGE_ASICREV_BCM5703) { /* * In the BCM5703, the DMA read watermark should * be set to less than or equal to the maximum * memory read byte count of the PCI-X command * register. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(4) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(3); } else if (sc->bge_asicrev == BGE_ASICREV_BCM5704) { /* 1536 bytes for read, 384 bytes for write. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(7) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(3); } else { /* 384 bytes for read and write. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(3) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(3) | 0x0F; } if (sc->bge_asicrev == BGE_ASICREV_BCM5703 || sc->bge_asicrev == BGE_ASICREV_BCM5704) { uint32_t tmp; /* Set ONE_DMA_AT_ONCE for hardware workaround. */ tmp = CSR_READ_4(sc, BGE_PCI_CLKCTL) & 0x1F; if (tmp == 6 || tmp == 7) dma_rw_ctl |= BGE_PCIDMARWCTL_ONEDMA_ATONCE_GLOBAL; /* Set PCI-X DMA write workaround. */ dma_rw_ctl |= BGE_PCIDMARWCTL_ASRT_ALL_BE; } } else { /* Conventional PCI bus: 256 bytes for read and write. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(7) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(7); if (sc->bge_asicrev != BGE_ASICREV_BCM5705 && sc->bge_asicrev != BGE_ASICREV_BCM5750) dma_rw_ctl |= 0x0F; } if (sc->bge_asicrev == BGE_ASICREV_BCM5700 || sc->bge_asicrev == BGE_ASICREV_BCM5701) dma_rw_ctl |= BGE_PCIDMARWCTL_USE_MRM | BGE_PCIDMARWCTL_ASRT_ALL_BE; if (sc->bge_asicrev == BGE_ASICREV_BCM5703 || sc->bge_asicrev == BGE_ASICREV_BCM5704) dma_rw_ctl &= ~BGE_PCIDMARWCTL_MINDMA; if (BGE_IS_5717_PLUS(sc)) { dma_rw_ctl &= ~BGE_PCIDMARWCTL_DIS_CACHE_ALIGNMENT; if (sc->bge_chipid == BGE_CHIPID_BCM57765_A0) dma_rw_ctl &= ~BGE_PCIDMARWCTL_CRDRDR_RDMA_MRRS_MSK; /* * Enable HW workaround for controllers that misinterpret * a status tag update and leave interrupts permanently * disabled. */ if (!BGE_IS_57765_PLUS(sc) && sc->bge_asicrev != BGE_ASICREV_BCM5717 && sc->bge_asicrev != BGE_ASICREV_BCM5762) dma_rw_ctl |= BGE_PCIDMARWCTL_TAGGED_STATUS_WA; } pci_write_config(sc->bge_dev, BGE_PCI_DMA_RW_CTL, dma_rw_ctl, 4); /* * Set up general mode register. */ mode_ctl = bge_dma_swap_options(sc); if (sc->bge_asicrev == BGE_ASICREV_BCM5720 || sc->bge_asicrev == BGE_ASICREV_BCM5762) { /* Retain Host-2-BMC settings written by APE firmware. */ mode_ctl |= CSR_READ_4(sc, BGE_MODE_CTL) & (BGE_MODECTL_BYTESWAP_B2HRX_DATA | BGE_MODECTL_WORDSWAP_B2HRX_DATA | BGE_MODECTL_B2HRX_ENABLE | BGE_MODECTL_HTX2B_ENABLE); } mode_ctl |= BGE_MODECTL_MAC_ATTN_INTR | BGE_MODECTL_HOST_SEND_BDS | BGE_MODECTL_TX_NO_PHDR_CSUM; /* * BCM5701 B5 have a bug causing data corruption when using * 64-bit DMA reads, which can be terminated early and then * completed later as 32-bit accesses, in combination with * certain bridges. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5701 && sc->bge_chipid == BGE_CHIPID_BCM5701_B5) mode_ctl |= BGE_MODECTL_FORCE_PCI32; /* * Tell the firmware the driver is running */ if (sc->bge_asf_mode & ASF_STACKUP) mode_ctl |= BGE_MODECTL_STACKUP; CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl); /* * Disable memory write invalidate. Apparently it is not supported * properly by these devices. */ PCI_CLRBIT(sc->bge_dev, BGE_PCI_CMD, PCIM_CMD_MWIEN, 4); /* Set the timer prescaler (always 66 MHz). */ CSR_WRITE_4(sc, BGE_MISC_CFG, BGE_32BITTIME_66MHZ); /* XXX: The Linux tg3 driver does this at the start of brgphy_reset. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { DELAY(40); /* XXX */ /* Put PHY into ready state */ BGE_CLRBIT(sc, BGE_MISC_CFG, BGE_MISCCFG_EPHY_IDDQ); CSR_READ_4(sc, BGE_MISC_CFG); /* Flush */ DELAY(40); } return (0); } static int bge_blockinit(struct bge_softc *sc) { struct bge_rcb *rcb; bus_size_t vrcb; bge_hostaddr taddr; uint32_t dmactl, rdmareg, val; int i, limit; /* * Initialize the memory window pointer register so that * we can access the first 32K of internal NIC RAM. This will * allow us to set up the TX send ring RCBs and the RX return * ring RCBs, plus other things which live in NIC memory. */ CSR_WRITE_4(sc, BGE_PCI_MEMWIN_BASEADDR, 0); /* Note: the BCM5704 has a smaller mbuf space than other chips. */ if (!(BGE_IS_5705_PLUS(sc))) { /* Configure mbuf memory pool */ CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_BASEADDR, BGE_BUFFPOOL_1); if (sc->bge_asicrev == BGE_ASICREV_BCM5704) CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_LEN, 0x10000); else CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_LEN, 0x18000); /* Configure DMA resource pool */ CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_BASEADDR, BGE_DMA_DESCRIPTORS); CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_LEN, 0x2000); } /* Configure mbuf pool watermarks */ if (BGE_IS_5717_PLUS(sc)) { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0); if (if_getmtu(sc->bge_ifp) > ETHERMTU) { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x7e); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xea); } else { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x2a); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xa0); } } else if (!BGE_IS_5705_PLUS(sc)) { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x50); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x20); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x60); } else if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x04); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x10); } else { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x10); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x60); } /* Configure DMA resource watermarks */ CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_LOWAT, 5); CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_HIWAT, 10); /* Enable buffer manager */ val = BGE_BMANMODE_ENABLE | BGE_BMANMODE_LOMBUF_ATTN; /* * Change the arbitration algorithm of TXMBUF read request to * round-robin instead of priority based for BCM5719. When * TXFIFO is almost empty, RDMA will hold its request until * TXFIFO is not almost empty. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5719) val |= BGE_BMANMODE_NO_TX_UNDERRUN; CSR_WRITE_4(sc, BGE_BMAN_MODE, val); /* Poll for buffer manager start indication */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); if (CSR_READ_4(sc, BGE_BMAN_MODE) & BGE_BMANMODE_ENABLE) break; } if (i == BGE_TIMEOUT) { device_printf(sc->bge_dev, "buffer manager failed to start\n"); return (ENXIO); } /* Enable flow-through queues */ CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF); CSR_WRITE_4(sc, BGE_FTQ_RESET, 0); /* Wait until queue initialization is complete */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); if (CSR_READ_4(sc, BGE_FTQ_RESET) == 0) break; } if (i == BGE_TIMEOUT) { device_printf(sc->bge_dev, "flow-through queue init failed\n"); return (ENXIO); } /* * Summary of rings supported by the controller: * * Standard Receive Producer Ring * - This ring is used to feed receive buffers for "standard" * sized frames (typically 1536 bytes) to the controller. * * Jumbo Receive Producer Ring * - This ring is used to feed receive buffers for jumbo sized * frames (i.e. anything bigger than the "standard" frames) * to the controller. * * Mini Receive Producer Ring * - This ring is used to feed receive buffers for "mini" * sized frames to the controller. * - This feature required external memory for the controller * but was never used in a production system. Should always * be disabled. * * Receive Return Ring * - After the controller has placed an incoming frame into a * receive buffer that buffer is moved into a receive return * ring. The driver is then responsible to passing the * buffer up to the stack. Many versions of the controller * support multiple RR rings. * * Send Ring * - This ring is used for outgoing frames. Many versions of * the controller support multiple send rings. */ /* Initialize the standard receive producer ring control block. */ rcb = &sc->bge_ldata.bge_info.bge_std_rx_rcb; rcb->bge_hostaddr.bge_addr_lo = BGE_ADDR_LO(sc->bge_ldata.bge_rx_std_ring_paddr); rcb->bge_hostaddr.bge_addr_hi = BGE_ADDR_HI(sc->bge_ldata.bge_rx_std_ring_paddr); bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_PREREAD); if (BGE_IS_5717_PLUS(sc)) { /* * Bits 31-16: Programmable ring size (2048, 1024, 512, .., 32) * Bits 15-2 : Maximum RX frame size * Bit 1 : 1 = Ring Disabled, 0 = Ring ENabled * Bit 0 : Reserved */ rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(512, BGE_MAX_FRAMELEN << 2); } else if (BGE_IS_5705_PLUS(sc)) { /* * Bits 31-16: Programmable ring size (512, 256, 128, 64, 32) * Bits 15-2 : Reserved (should be 0) * Bit 1 : 1 = Ring Disabled, 0 = Ring Enabled * Bit 0 : Reserved */ rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(512, 0); } else { /* * Ring size is always XXX entries * Bits 31-16: Maximum RX frame size * Bits 15-2 : Reserved (should be 0) * Bit 1 : 1 = Ring Disabled, 0 = Ring Enabled * Bit 0 : Reserved */ rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(BGE_MAX_FRAMELEN, 0); } if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) rcb->bge_nicaddr = BGE_STD_RX_RINGS_5717; else rcb->bge_nicaddr = BGE_STD_RX_RINGS; /* Write the standard receive producer ring control block. */ CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_HI, rcb->bge_hostaddr.bge_addr_hi); CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo); CSR_WRITE_4(sc, BGE_RX_STD_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); CSR_WRITE_4(sc, BGE_RX_STD_RCB_NICADDR, rcb->bge_nicaddr); /* Reset the standard receive producer ring producer index. */ bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, 0); /* * Initialize the jumbo RX producer ring control * block. We set the 'ring disabled' bit in the * flags field until we're actually ready to start * using this ring (i.e. once we set the MTU * high enough to require it). */ if (BGE_IS_JUMBO_CAPABLE(sc)) { rcb = &sc->bge_ldata.bge_info.bge_jumbo_rx_rcb; /* Get the jumbo receive producer ring RCB parameters. */ rcb->bge_hostaddr.bge_addr_lo = BGE_ADDR_LO(sc->bge_ldata.bge_rx_jumbo_ring_paddr); rcb->bge_hostaddr.bge_addr_hi = BGE_ADDR_HI(sc->bge_ldata.bge_rx_jumbo_ring_paddr); bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_PREREAD); rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_USE_EXT_RX_BD | BGE_RCB_FLAG_RING_DISABLED); if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS_5717; else rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS; CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_HI, rcb->bge_hostaddr.bge_addr_hi); CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo); /* Program the jumbo receive producer ring RCB parameters. */ CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_NICADDR, rcb->bge_nicaddr); /* Reset the jumbo receive producer ring producer index. */ bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, 0); } /* Disable the mini receive producer ring RCB. */ if (BGE_IS_5700_FAMILY(sc)) { rcb = &sc->bge_ldata.bge_info.bge_mini_rx_rcb; rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_RING_DISABLED); CSR_WRITE_4(sc, BGE_RX_MINI_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); /* Reset the mini receive producer ring producer index. */ bge_writembx(sc, BGE_MBX_RX_MINI_PROD_LO, 0); } /* Choose de-pipeline mode for BCM5906 A0, A1 and A2. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { if (sc->bge_chipid == BGE_CHIPID_BCM5906_A0 || sc->bge_chipid == BGE_CHIPID_BCM5906_A1 || sc->bge_chipid == BGE_CHIPID_BCM5906_A2) CSR_WRITE_4(sc, BGE_ISO_PKT_TX, (CSR_READ_4(sc, BGE_ISO_PKT_TX) & ~3) | 2); } /* * The BD ring replenish thresholds control how often the * hardware fetches new BD's from the producer rings in host * memory. Setting the value too low on a busy system can * starve the hardware and recue the throughpout. * * Set the BD ring replentish thresholds. The recommended * values are 1/8th the number of descriptors allocated to * each ring. * XXX The 5754 requires a lower threshold, so it might be a * requirement of all 575x family chips. The Linux driver sets * the lower threshold for all 5705 family chips as well, but there * are reports that it might not need to be so strict. * * XXX Linux does some extra fiddling here for the 5906 parts as * well. */ if (BGE_IS_5705_PLUS(sc)) val = 8; else val = BGE_STD_RX_RING_CNT / 8; CSR_WRITE_4(sc, BGE_RBDI_STD_REPL_THRESH, val); if (BGE_IS_JUMBO_CAPABLE(sc)) CSR_WRITE_4(sc, BGE_RBDI_JUMBO_REPL_THRESH, BGE_JUMBO_RX_RING_CNT/8); if (BGE_IS_5717_PLUS(sc)) { CSR_WRITE_4(sc, BGE_STD_REPLENISH_LWM, 32); CSR_WRITE_4(sc, BGE_JMB_REPLENISH_LWM, 16); } /* * Disable all send rings by setting the 'ring disabled' bit * in the flags field of all the TX send ring control blocks, * located in NIC memory. */ if (!BGE_IS_5705_PLUS(sc)) /* 5700 to 5704 had 16 send rings. */ limit = BGE_TX_RINGS_EXTSSRAM_MAX; else if (BGE_IS_57765_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5762) limit = 2; else if (BGE_IS_5717_PLUS(sc)) limit = 4; else limit = 1; vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB; for (i = 0; i < limit; i++) { RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_RING_DISABLED)); RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0); vrcb += sizeof(struct bge_rcb); } /* Configure send ring RCB 0 (we use only the first ring) */ vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB; BGE_HOSTADDR(taddr, sc->bge_ldata.bge_tx_ring_paddr); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, taddr.bge_addr_hi); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, taddr.bge_addr_lo); if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) RCB_WRITE_4(sc, vrcb, bge_nicaddr, BGE_SEND_RING_5717); else RCB_WRITE_4(sc, vrcb, bge_nicaddr, BGE_NIC_TXRING_ADDR(0, BGE_TX_RING_CNT)); RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_MAXLEN_FLAGS(BGE_TX_RING_CNT, 0)); /* * Disable all receive return rings by setting the * 'ring diabled' bit in the flags field of all the receive * return ring control blocks, located in NIC memory. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) { /* Should be 17, use 16 until we get an SRAM map. */ limit = 16; } else if (!BGE_IS_5705_PLUS(sc)) limit = BGE_RX_RINGS_MAX; else if (sc->bge_asicrev == BGE_ASICREV_BCM5755 || sc->bge_asicrev == BGE_ASICREV_BCM5762 || BGE_IS_57765_PLUS(sc)) limit = 4; else limit = 1; /* Disable all receive return rings. */ vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB; for (i = 0; i < limit; i++) { RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, 0); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, 0); RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_FLAG_RING_DISABLED); RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0); bge_writembx(sc, BGE_MBX_RX_CONS0_LO + (i * (sizeof(uint64_t))), 0); vrcb += sizeof(struct bge_rcb); } /* * Set up receive return ring 0. Note that the NIC address * for RX return rings is 0x0. The return rings live entirely * within the host, so the nicaddr field in the RCB isn't used. */ vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB; BGE_HOSTADDR(taddr, sc->bge_ldata.bge_rx_return_ring_paddr); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, taddr.bge_addr_hi); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, taddr.bge_addr_lo); RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0); RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_MAXLEN_FLAGS(sc->bge_return_ring_cnt, 0)); /* Set random backoff seed for TX */ CSR_WRITE_4(sc, BGE_TX_RANDOM_BACKOFF, (IF_LLADDR(sc->bge_ifp)[0] + IF_LLADDR(sc->bge_ifp)[1] + IF_LLADDR(sc->bge_ifp)[2] + IF_LLADDR(sc->bge_ifp)[3] + IF_LLADDR(sc->bge_ifp)[4] + IF_LLADDR(sc->bge_ifp)[5]) & BGE_TX_BACKOFF_SEED_MASK); /* Set inter-packet gap */ val = 0x2620; if (sc->bge_asicrev == BGE_ASICREV_BCM5720 || sc->bge_asicrev == BGE_ASICREV_BCM5762) val |= CSR_READ_4(sc, BGE_TX_LENGTHS) & (BGE_TXLEN_JMB_FRM_LEN_MSK | BGE_TXLEN_CNT_DN_VAL_MSK); CSR_WRITE_4(sc, BGE_TX_LENGTHS, val); /* * Specify which ring to use for packets that don't match * any RX rules. */ CSR_WRITE_4(sc, BGE_RX_RULES_CFG, 0x08); /* * Configure number of RX lists. One interrupt distribution * list, sixteen active lists, one bad frames class. */ CSR_WRITE_4(sc, BGE_RXLP_CFG, 0x181); /* Inialize RX list placement stats mask. */ CSR_WRITE_4(sc, BGE_RXLP_STATS_ENABLE_MASK, 0x007FFFFF); CSR_WRITE_4(sc, BGE_RXLP_STATS_CTL, 0x1); /* Disable host coalescing until we get it set up */ CSR_WRITE_4(sc, BGE_HCC_MODE, 0x00000000); /* Poll to make sure it's shut down. */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_4(sc, BGE_HCC_MODE) & BGE_HCCMODE_ENABLE)) break; } if (i == BGE_TIMEOUT) { device_printf(sc->bge_dev, "host coalescing engine failed to idle\n"); return (ENXIO); } /* Set up host coalescing defaults */ CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS, sc->bge_rx_coal_ticks); CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS, sc->bge_tx_coal_ticks); CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS, sc->bge_rx_max_coal_bds); CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS, sc->bge_tx_max_coal_bds); if (!(BGE_IS_5705_PLUS(sc))) { CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS_INT, 0); CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS_INT, 0); } CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT, 1); CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT, 1); /* Set up address of statistics block */ if (!(BGE_IS_5705_PLUS(sc))) { CSR_WRITE_4(sc, BGE_HCC_STATS_ADDR_HI, BGE_ADDR_HI(sc->bge_ldata.bge_stats_paddr)); CSR_WRITE_4(sc, BGE_HCC_STATS_ADDR_LO, BGE_ADDR_LO(sc->bge_ldata.bge_stats_paddr)); CSR_WRITE_4(sc, BGE_HCC_STATS_BASEADDR, BGE_STATS_BLOCK); CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_BASEADDR, BGE_STATUS_BLOCK); CSR_WRITE_4(sc, BGE_HCC_STATS_TICKS, sc->bge_stat_ticks); } /* Set up address of status block */ CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_HI, BGE_ADDR_HI(sc->bge_ldata.bge_status_block_paddr)); CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_LO, BGE_ADDR_LO(sc->bge_ldata.bge_status_block_paddr)); /* Set up status block size. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_C0) { val = BGE_STATBLKSZ_FULL; bzero(sc->bge_ldata.bge_status_block, BGE_STATUS_BLK_SZ); } else { val = BGE_STATBLKSZ_32BYTE; bzero(sc->bge_ldata.bge_status_block, 32); } bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Turn on host coalescing state machine */ CSR_WRITE_4(sc, BGE_HCC_MODE, val | BGE_HCCMODE_ENABLE); /* Turn on RX BD completion state machine and enable attentions */ CSR_WRITE_4(sc, BGE_RBDC_MODE, BGE_RBDCMODE_ENABLE | BGE_RBDCMODE_ATTN); /* Turn on RX list placement state machine */ CSR_WRITE_4(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE); /* Turn on RX list selector state machine. */ if (!(BGE_IS_5705_PLUS(sc))) CSR_WRITE_4(sc, BGE_RXLS_MODE, BGE_RXLSMODE_ENABLE); /* Turn on DMA, clear stats. */ val = BGE_MACMODE_TXDMA_ENB | BGE_MACMODE_RXDMA_ENB | BGE_MACMODE_RX_STATS_CLEAR | BGE_MACMODE_TX_STATS_CLEAR | BGE_MACMODE_RX_STATS_ENB | BGE_MACMODE_TX_STATS_ENB | BGE_MACMODE_FRMHDR_DMA_ENB; if (sc->bge_flags & BGE_FLAG_TBI) val |= BGE_PORTMODE_TBI; else if (sc->bge_flags & BGE_FLAG_MII_SERDES) val |= BGE_PORTMODE_GMII; else val |= BGE_PORTMODE_MII; /* Allow APE to send/receive frames. */ if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) != 0) val |= BGE_MACMODE_APE_RX_EN | BGE_MACMODE_APE_TX_EN; CSR_WRITE_4(sc, BGE_MAC_MODE, val); DELAY(40); /* Set misc. local control, enable interrupts on attentions */ BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_ONATTN); #ifdef notdef /* Assert GPIO pins for PHY reset */ BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUT0 | BGE_MLC_MISCIO_OUT1 | BGE_MLC_MISCIO_OUT2); BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUTEN0 | BGE_MLC_MISCIO_OUTEN1 | BGE_MLC_MISCIO_OUTEN2); #endif /* Turn on DMA completion state machine */ if (!(BGE_IS_5705_PLUS(sc))) CSR_WRITE_4(sc, BGE_DMAC_MODE, BGE_DMACMODE_ENABLE); val = BGE_WDMAMODE_ENABLE | BGE_WDMAMODE_ALL_ATTNS; /* Enable host coalescing bug fix. */ if (BGE_IS_5755_PLUS(sc)) val |= BGE_WDMAMODE_STATUS_TAG_FIX; /* Request larger DMA burst size to get better performance. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5785) val |= BGE_WDMAMODE_BURST_ALL_DATA; /* Turn on write DMA state machine */ CSR_WRITE_4(sc, BGE_WDMA_MODE, val); DELAY(40); /* Turn on read DMA state machine */ val = BGE_RDMAMODE_ENABLE | BGE_RDMAMODE_ALL_ATTNS; if (sc->bge_asicrev == BGE_ASICREV_BCM5717) val |= BGE_RDMAMODE_MULT_DMA_RD_DIS; if (sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5785 || sc->bge_asicrev == BGE_ASICREV_BCM57780) val |= BGE_RDMAMODE_BD_SBD_CRPT_ATTN | BGE_RDMAMODE_MBUF_RBD_CRPT_ATTN | BGE_RDMAMODE_MBUF_SBD_CRPT_ATTN; if (sc->bge_flags & BGE_FLAG_PCIE) val |= BGE_RDMAMODE_FIFO_LONG_BURST; if (sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) { val |= BGE_RDMAMODE_TSO4_ENABLE; if (sc->bge_flags & BGE_FLAG_TSO3 || sc->bge_asicrev == BGE_ASICREV_BCM5785 || sc->bge_asicrev == BGE_ASICREV_BCM57780) val |= BGE_RDMAMODE_TSO6_ENABLE; } if (sc->bge_asicrev == BGE_ASICREV_BCM5720 || sc->bge_asicrev == BGE_ASICREV_BCM5762) { val |= CSR_READ_4(sc, BGE_RDMA_MODE) & BGE_RDMAMODE_H2BNC_VLAN_DET; /* * Allow multiple outstanding read requests from * non-LSO read DMA engine. */ val &= ~BGE_RDMAMODE_MULT_DMA_RD_DIS; } if (sc->bge_asicrev == BGE_ASICREV_BCM5761 || sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5785 || sc->bge_asicrev == BGE_ASICREV_BCM57780 || BGE_IS_5717_PLUS(sc) || BGE_IS_57765_PLUS(sc)) { if (sc->bge_asicrev == BGE_ASICREV_BCM5762) rdmareg = BGE_RDMA_RSRVCTRL_REG2; else rdmareg = BGE_RDMA_RSRVCTRL; dmactl = CSR_READ_4(sc, rdmareg); /* * Adjust tx margin to prevent TX data corruption and * fix internal FIFO overflow. */ if (sc->bge_chipid == BGE_CHIPID_BCM5719_A0 || sc->bge_asicrev == BGE_ASICREV_BCM5762) { dmactl &= ~(BGE_RDMA_RSRVCTRL_FIFO_LWM_MASK | BGE_RDMA_RSRVCTRL_FIFO_HWM_MASK | BGE_RDMA_RSRVCTRL_TXMRGN_MASK); dmactl |= BGE_RDMA_RSRVCTRL_FIFO_LWM_1_5K | BGE_RDMA_RSRVCTRL_FIFO_HWM_1_5K | BGE_RDMA_RSRVCTRL_TXMRGN_320B; } /* * Enable fix for read DMA FIFO overruns. * The fix is to limit the number of RX BDs * the hardware would fetch at a fime. */ CSR_WRITE_4(sc, rdmareg, dmactl | BGE_RDMA_RSRVCTRL_FIFO_OFLW_FIX); } if (sc->bge_asicrev == BGE_ASICREV_BCM5719) { CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL, CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_4K | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K); } else if (sc->bge_asicrev == BGE_ASICREV_BCM5720) { /* * Allow 4KB burst length reads for non-LSO frames. * Enable 512B burst length reads for buffer descriptors. */ CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL, CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_512 | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K); } else if (sc->bge_asicrev == BGE_ASICREV_BCM5762) { CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL_REG2, CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL_REG2) | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_4K | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K); } CSR_WRITE_4(sc, BGE_RDMA_MODE, val); DELAY(40); if (sc->bge_flags & BGE_FLAG_RDMA_BUG) { for (i = 0; i < BGE_NUM_RDMA_CHANNELS / 2; i++) { val = CSR_READ_4(sc, BGE_RDMA_LENGTH + i * 4); if ((val & 0xFFFF) > BGE_FRAMELEN) break; if (((val >> 16) & 0xFFFF) > BGE_FRAMELEN) break; } if (i != BGE_NUM_RDMA_CHANNELS / 2) { val = CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL); if (sc->bge_asicrev == BGE_ASICREV_BCM5719) val |= BGE_RDMA_TX_LENGTH_WA_5719; else val |= BGE_RDMA_TX_LENGTH_WA_5720; CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL, val); } } /* Turn on RX data completion state machine */ CSR_WRITE_4(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE); /* Turn on RX BD initiator state machine */ CSR_WRITE_4(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE); /* Turn on RX data and RX BD initiator state machine */ CSR_WRITE_4(sc, BGE_RDBDI_MODE, BGE_RDBDIMODE_ENABLE); /* Turn on Mbuf cluster free state machine */ if (!(BGE_IS_5705_PLUS(sc))) CSR_WRITE_4(sc, BGE_MBCF_MODE, BGE_MBCFMODE_ENABLE); /* Turn on send BD completion state machine */ CSR_WRITE_4(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE); /* Turn on send data completion state machine */ val = BGE_SDCMODE_ENABLE; if (sc->bge_asicrev == BGE_ASICREV_BCM5761) val |= BGE_SDCMODE_CDELAY; CSR_WRITE_4(sc, BGE_SDC_MODE, val); /* Turn on send data initiator state machine */ if (sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE | BGE_SDIMODE_HW_LSO_PRE_DMA); else CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE); /* Turn on send BD initiator state machine */ CSR_WRITE_4(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE); /* Turn on send BD selector state machine */ CSR_WRITE_4(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE); CSR_WRITE_4(sc, BGE_SDI_STATS_ENABLE_MASK, 0x007FFFFF); CSR_WRITE_4(sc, BGE_SDI_STATS_CTL, BGE_SDISTATSCTL_ENABLE | BGE_SDISTATSCTL_FASTER); /* ack/clear link change events */ CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED | BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE | BGE_MACSTAT_LINK_CHANGED); CSR_WRITE_4(sc, BGE_MI_STS, 0); /* * Enable attention when the link has changed state for * devices that use auto polling. */ if (sc->bge_flags & BGE_FLAG_TBI) { CSR_WRITE_4(sc, BGE_MI_STS, BGE_MISTS_LINK); } else { if (sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode); DELAY(80); } if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) CSR_WRITE_4(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_MI_INTERRUPT); } /* * Clear any pending link state attention. * Otherwise some link state change events may be lost until attention * is cleared by bge_intr() -> bge_link_upd() sequence. * It's not necessary on newer BCM chips - perhaps enabling link * state change attentions implies clearing pending attention. */ CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED | BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE | BGE_MACSTAT_LINK_CHANGED); /* Enable link state change attentions. */ BGE_SETBIT(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_LINK_CHANGED); return (0); } static const struct bge_revision * bge_lookup_rev(uint32_t chipid) { const struct bge_revision *br; for (br = bge_revisions; br->br_name != NULL; br++) { if (br->br_chipid == chipid) return (br); } for (br = bge_majorrevs; br->br_name != NULL; br++) { if (br->br_chipid == BGE_ASICREV(chipid)) return (br); } return (NULL); } static const struct bge_vendor * bge_lookup_vendor(uint16_t vid) { const struct bge_vendor *v; for (v = bge_vendors; v->v_name != NULL; v++) if (v->v_id == vid) return (v); return (NULL); } static uint32_t bge_chipid(device_t dev) { uint32_t id; id = pci_read_config(dev, BGE_PCI_MISC_CTL, 4) >> BGE_PCIMISCCTL_ASICREV_SHIFT; if (BGE_ASICREV(id) == BGE_ASICREV_USE_PRODID_REG) { /* * Find the ASCI revision. Different chips use different * registers. */ switch (pci_get_device(dev)) { case BCOM_DEVICEID_BCM5717C: /* 5717 C0 seems to belong to 5720 line. */ id = BGE_CHIPID_BCM5720_A0; break; case BCOM_DEVICEID_BCM5717: case BCOM_DEVICEID_BCM5718: case BCOM_DEVICEID_BCM5719: case BCOM_DEVICEID_BCM5720: case BCOM_DEVICEID_BCM5725: case BCOM_DEVICEID_BCM5727: case BCOM_DEVICEID_BCM5762: case BCOM_DEVICEID_BCM57764: case BCOM_DEVICEID_BCM57767: case BCOM_DEVICEID_BCM57787: id = pci_read_config(dev, BGE_PCI_GEN2_PRODID_ASICREV, 4); break; case BCOM_DEVICEID_BCM57761: case BCOM_DEVICEID_BCM57762: case BCOM_DEVICEID_BCM57765: case BCOM_DEVICEID_BCM57766: case BCOM_DEVICEID_BCM57781: case BCOM_DEVICEID_BCM57782: case BCOM_DEVICEID_BCM57785: case BCOM_DEVICEID_BCM57786: case BCOM_DEVICEID_BCM57791: case BCOM_DEVICEID_BCM57795: id = pci_read_config(dev, BGE_PCI_GEN15_PRODID_ASICREV, 4); break; default: id = pci_read_config(dev, BGE_PCI_PRODID_ASICREV, 4); } } return (id); } /* * Probe for a Broadcom chip. Check the PCI vendor and device IDs * against our list and return its name if we find a match. * * Note that since the Broadcom controller contains VPD support, we * try to get the device name string from the controller itself instead * of the compiled-in string. It guarantees we'll always announce the * right product name. We fall back to the compiled-in string when * VPD is unavailable or corrupt. */ static int bge_probe(device_t dev) { char buf[96]; char model[64]; const struct bge_revision *br; const char *pname; struct bge_softc *sc; const struct bge_type *t = bge_devs; const struct bge_vendor *v; uint32_t id; uint16_t did, vid; sc = device_get_softc(dev); sc->bge_dev = dev; vid = pci_get_vendor(dev); did = pci_get_device(dev); while(t->bge_vid != 0) { if ((vid == t->bge_vid) && (did == t->bge_did)) { id = bge_chipid(dev); br = bge_lookup_rev(id); if (bge_has_eaddr(sc) && pci_get_vpd_ident(dev, &pname) == 0) snprintf(model, sizeof(model), "%s", pname); else { v = bge_lookup_vendor(vid); snprintf(model, sizeof(model), "%s %s", v != NULL ? v->v_name : "Unknown", br != NULL ? br->br_name : "NetXtreme/NetLink Ethernet Controller"); } snprintf(buf, sizeof(buf), "%s, %sASIC rev. %#08x", model, br != NULL ? "" : "unknown ", id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } t++; } return (ENXIO); } static void bge_dma_free(struct bge_softc *sc) { int i; /* Destroy DMA maps for RX buffers. */ for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { if (sc->bge_cdata.bge_rx_std_dmamap[i]) bus_dmamap_destroy(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i]); } if (sc->bge_cdata.bge_rx_std_sparemap) bus_dmamap_destroy(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_sparemap); /* Destroy DMA maps for jumbo RX buffers. */ for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) { if (sc->bge_cdata.bge_rx_jumbo_dmamap[i]) bus_dmamap_destroy(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i]); } if (sc->bge_cdata.bge_rx_jumbo_sparemap) bus_dmamap_destroy(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_sparemap); /* Destroy DMA maps for TX buffers. */ for (i = 0; i < BGE_TX_RING_CNT; i++) { if (sc->bge_cdata.bge_tx_dmamap[i]) bus_dmamap_destroy(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[i]); } if (sc->bge_cdata.bge_rx_mtag) bus_dma_tag_destroy(sc->bge_cdata.bge_rx_mtag); if (sc->bge_cdata.bge_mtag_jumbo) bus_dma_tag_destroy(sc->bge_cdata.bge_mtag_jumbo); if (sc->bge_cdata.bge_tx_mtag) bus_dma_tag_destroy(sc->bge_cdata.bge_tx_mtag); /* Destroy standard RX ring. */ if (sc->bge_ldata.bge_rx_std_ring_paddr) bus_dmamap_unload(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map); if (sc->bge_ldata.bge_rx_std_ring) bus_dmamem_free(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_ldata.bge_rx_std_ring, sc->bge_cdata.bge_rx_std_ring_map); if (sc->bge_cdata.bge_rx_std_ring_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_rx_std_ring_tag); /* Destroy jumbo RX ring. */ if (sc->bge_ldata.bge_rx_jumbo_ring_paddr) bus_dmamap_unload(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map); if (sc->bge_ldata.bge_rx_jumbo_ring) bus_dmamem_free(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_ldata.bge_rx_jumbo_ring, sc->bge_cdata.bge_rx_jumbo_ring_map); if (sc->bge_cdata.bge_rx_jumbo_ring_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_rx_jumbo_ring_tag); /* Destroy RX return ring. */ if (sc->bge_ldata.bge_rx_return_ring_paddr) bus_dmamap_unload(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_cdata.bge_rx_return_ring_map); if (sc->bge_ldata.bge_rx_return_ring) bus_dmamem_free(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_ldata.bge_rx_return_ring, sc->bge_cdata.bge_rx_return_ring_map); if (sc->bge_cdata.bge_rx_return_ring_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_rx_return_ring_tag); /* Destroy TX ring. */ if (sc->bge_ldata.bge_tx_ring_paddr) bus_dmamap_unload(sc->bge_cdata.bge_tx_ring_tag, sc->bge_cdata.bge_tx_ring_map); if (sc->bge_ldata.bge_tx_ring) bus_dmamem_free(sc->bge_cdata.bge_tx_ring_tag, sc->bge_ldata.bge_tx_ring, sc->bge_cdata.bge_tx_ring_map); if (sc->bge_cdata.bge_tx_ring_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_tx_ring_tag); /* Destroy status block. */ if (sc->bge_ldata.bge_status_block_paddr) bus_dmamap_unload(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map); if (sc->bge_ldata.bge_status_block) bus_dmamem_free(sc->bge_cdata.bge_status_tag, sc->bge_ldata.bge_status_block, sc->bge_cdata.bge_status_map); if (sc->bge_cdata.bge_status_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_status_tag); /* Destroy statistics block. */ if (sc->bge_ldata.bge_stats_paddr) bus_dmamap_unload(sc->bge_cdata.bge_stats_tag, sc->bge_cdata.bge_stats_map); if (sc->bge_ldata.bge_stats) bus_dmamem_free(sc->bge_cdata.bge_stats_tag, sc->bge_ldata.bge_stats, sc->bge_cdata.bge_stats_map); if (sc->bge_cdata.bge_stats_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_stats_tag); if (sc->bge_cdata.bge_buffer_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_buffer_tag); /* Destroy the parent tag. */ if (sc->bge_cdata.bge_parent_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_parent_tag); } static int bge_dma_ring_alloc(struct bge_softc *sc, bus_size_t alignment, bus_size_t maxsize, bus_dma_tag_t *tag, uint8_t **ring, bus_dmamap_t *map, bus_addr_t *paddr, const char *msg) { struct bge_dmamap_arg ctx; int error; error = bus_dma_tag_create(sc->bge_cdata.bge_parent_tag, alignment, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, maxsize, 1, maxsize, 0, NULL, NULL, tag); if (error != 0) { device_printf(sc->bge_dev, "could not create %s dma tag\n", msg); return (ENOMEM); } /* Allocate DMA'able memory for ring. */ error = bus_dmamem_alloc(*tag, (void **)ring, BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, map); if (error != 0) { device_printf(sc->bge_dev, "could not allocate DMA'able memory for %s\n", msg); return (ENOMEM); } /* Load the address of the ring. */ ctx.bge_busaddr = 0; error = bus_dmamap_load(*tag, *map, *ring, maxsize, bge_dma_map_addr, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->bge_dev, "could not load DMA'able memory for %s\n", msg); return (ENOMEM); } *paddr = ctx.bge_busaddr; return (0); } static int bge_dma_alloc(struct bge_softc *sc) { bus_addr_t lowaddr; bus_size_t rxmaxsegsz, sbsz, txsegsz, txmaxsegsz; int i, error; lowaddr = BUS_SPACE_MAXADDR; if ((sc->bge_flags & BGE_FLAG_40BIT_BUG) != 0) lowaddr = BGE_DMA_MAXADDR; /* * Allocate the parent bus DMA tag appropriate for PCI. */ error = bus_dma_tag_create(bus_get_dma_tag(sc->bge_dev), 1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->bge_cdata.bge_parent_tag); if (error != 0) { device_printf(sc->bge_dev, "could not allocate parent dma tag\n"); return (ENOMEM); } /* Create tag for standard RX ring. */ error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_STD_RX_RING_SZ, &sc->bge_cdata.bge_rx_std_ring_tag, (uint8_t **)&sc->bge_ldata.bge_rx_std_ring, &sc->bge_cdata.bge_rx_std_ring_map, &sc->bge_ldata.bge_rx_std_ring_paddr, "RX ring"); if (error) return (error); /* Create tag for RX return ring. */ error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_RX_RTN_RING_SZ(sc), &sc->bge_cdata.bge_rx_return_ring_tag, (uint8_t **)&sc->bge_ldata.bge_rx_return_ring, &sc->bge_cdata.bge_rx_return_ring_map, &sc->bge_ldata.bge_rx_return_ring_paddr, "RX return ring"); if (error) return (error); /* Create tag for TX ring. */ error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_TX_RING_SZ, &sc->bge_cdata.bge_tx_ring_tag, (uint8_t **)&sc->bge_ldata.bge_tx_ring, &sc->bge_cdata.bge_tx_ring_map, &sc->bge_ldata.bge_tx_ring_paddr, "TX ring"); if (error) return (error); /* * Create tag for status block. * Because we only use single Tx/Rx/Rx return ring, use * minimum status block size except BCM5700 AX/BX which * seems to want to see full status block size regardless * of configured number of ring. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_C0) sbsz = BGE_STATUS_BLK_SZ; else sbsz = 32; error = bge_dma_ring_alloc(sc, PAGE_SIZE, sbsz, &sc->bge_cdata.bge_status_tag, (uint8_t **)&sc->bge_ldata.bge_status_block, &sc->bge_cdata.bge_status_map, &sc->bge_ldata.bge_status_block_paddr, "status block"); if (error) return (error); /* Create tag for statistics block. */ error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_STATS_SZ, &sc->bge_cdata.bge_stats_tag, (uint8_t **)&sc->bge_ldata.bge_stats, &sc->bge_cdata.bge_stats_map, &sc->bge_ldata.bge_stats_paddr, "statistics block"); if (error) return (error); /* Create tag for jumbo RX ring. */ if (BGE_IS_JUMBO_CAPABLE(sc)) { error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_JUMBO_RX_RING_SZ, &sc->bge_cdata.bge_rx_jumbo_ring_tag, (uint8_t **)&sc->bge_ldata.bge_rx_jumbo_ring, &sc->bge_cdata.bge_rx_jumbo_ring_map, &sc->bge_ldata.bge_rx_jumbo_ring_paddr, "jumbo RX ring"); if (error) return (error); } /* Create parent tag for buffers. */ if ((sc->bge_flags & BGE_FLAG_4G_BNDRY_BUG) != 0) { /* * XXX * watchdog timeout issue was observed on BCM5704 which * lives behind PCI-X bridge(e.g AMD 8131 PCI-X bridge). * Both limiting DMA address space to 32bits and flushing * mailbox write seem to address the issue. */ if (sc->bge_pcixcap != 0) lowaddr = BUS_SPACE_MAXADDR_32BIT; } error = bus_dma_tag_create(bus_get_dma_tag(sc->bge_dev), 1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->bge_cdata.bge_buffer_tag); if (error != 0) { device_printf(sc->bge_dev, "could not allocate buffer dma tag\n"); return (ENOMEM); } /* Create tag for Tx mbufs. */ if (sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) { txsegsz = BGE_TSOSEG_SZ; txmaxsegsz = 65535 + sizeof(struct ether_vlan_header); } else { txsegsz = MCLBYTES; txmaxsegsz = MCLBYTES * BGE_NSEG_NEW; } error = bus_dma_tag_create(sc->bge_cdata.bge_buffer_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, txmaxsegsz, BGE_NSEG_NEW, txsegsz, 0, NULL, NULL, &sc->bge_cdata.bge_tx_mtag); if (error) { device_printf(sc->bge_dev, "could not allocate TX dma tag\n"); return (ENOMEM); } /* Create tag for Rx mbufs. */ if (sc->bge_flags & BGE_FLAG_JUMBO_STD) rxmaxsegsz = MJUM9BYTES; else rxmaxsegsz = MCLBYTES; error = bus_dma_tag_create(sc->bge_cdata.bge_buffer_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, rxmaxsegsz, 1, rxmaxsegsz, 0, NULL, NULL, &sc->bge_cdata.bge_rx_mtag); if (error) { device_printf(sc->bge_dev, "could not allocate RX dma tag\n"); return (ENOMEM); } /* Create DMA maps for RX buffers. */ error = bus_dmamap_create(sc->bge_cdata.bge_rx_mtag, 0, &sc->bge_cdata.bge_rx_std_sparemap); if (error) { device_printf(sc->bge_dev, "can't create spare DMA map for RX\n"); return (ENOMEM); } for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { error = bus_dmamap_create(sc->bge_cdata.bge_rx_mtag, 0, &sc->bge_cdata.bge_rx_std_dmamap[i]); if (error) { device_printf(sc->bge_dev, "can't create DMA map for RX\n"); return (ENOMEM); } } /* Create DMA maps for TX buffers. */ for (i = 0; i < BGE_TX_RING_CNT; i++) { error = bus_dmamap_create(sc->bge_cdata.bge_tx_mtag, 0, &sc->bge_cdata.bge_tx_dmamap[i]); if (error) { device_printf(sc->bge_dev, "can't create DMA map for TX\n"); return (ENOMEM); } } /* Create tags for jumbo RX buffers. */ if (BGE_IS_JUMBO_CAPABLE(sc)) { error = bus_dma_tag_create(sc->bge_cdata.bge_buffer_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM9BYTES, BGE_NSEG_JUMBO, PAGE_SIZE, 0, NULL, NULL, &sc->bge_cdata.bge_mtag_jumbo); if (error) { device_printf(sc->bge_dev, "could not allocate jumbo dma tag\n"); return (ENOMEM); } /* Create DMA maps for jumbo RX buffers. */ error = bus_dmamap_create(sc->bge_cdata.bge_mtag_jumbo, 0, &sc->bge_cdata.bge_rx_jumbo_sparemap); if (error) { device_printf(sc->bge_dev, "can't create spare DMA map for jumbo RX\n"); return (ENOMEM); } for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) { error = bus_dmamap_create(sc->bge_cdata.bge_mtag_jumbo, 0, &sc->bge_cdata.bge_rx_jumbo_dmamap[i]); if (error) { device_printf(sc->bge_dev, "can't create DMA map for jumbo RX\n"); return (ENOMEM); } } } return (0); } /* * Return true if this device has more than one port. */ static int bge_has_multiple_ports(struct bge_softc *sc) { device_t dev = sc->bge_dev; u_int b, d, f, fscan, s; d = pci_get_domain(dev); b = pci_get_bus(dev); s = pci_get_slot(dev); f = pci_get_function(dev); for (fscan = 0; fscan <= PCI_FUNCMAX; fscan++) if (fscan != f && pci_find_dbsf(d, b, s, fscan) != NULL) return (1); return (0); } /* * Return true if MSI can be used with this device. */ static int bge_can_use_msi(struct bge_softc *sc) { int can_use_msi = 0; if (sc->bge_msi == 0) return (0); /* Disable MSI for polling(4). */ #ifdef DEVICE_POLLING return (0); #endif switch (sc->bge_asicrev) { case BGE_ASICREV_BCM5714_A0: case BGE_ASICREV_BCM5714: /* * Apparently, MSI doesn't work when these chips are * configured in single-port mode. */ if (bge_has_multiple_ports(sc)) can_use_msi = 1; break; case BGE_ASICREV_BCM5750: if (sc->bge_chiprev != BGE_CHIPREV_5750_AX && sc->bge_chiprev != BGE_CHIPREV_5750_BX) can_use_msi = 1; break; default: if (BGE_IS_575X_PLUS(sc)) can_use_msi = 1; } return (can_use_msi); } static int bge_mbox_reorder(struct bge_softc *sc) { /* Lists of PCI bridges that are known to reorder mailbox writes. */ static const struct mbox_reorder { const uint16_t vendor; const uint16_t device; const char *desc; } mbox_reorder_lists[] = { { 0x1022, 0x7450, "AMD-8131 PCI-X Bridge" }, }; devclass_t pci, pcib; device_t bus, dev; int i; pci = devclass_find("pci"); pcib = devclass_find("pcib"); dev = sc->bge_dev; bus = device_get_parent(dev); for (;;) { dev = device_get_parent(bus); bus = device_get_parent(dev); if (device_get_devclass(dev) != pcib) break; for (i = 0; i < nitems(mbox_reorder_lists); i++) { if (pci_get_vendor(dev) == mbox_reorder_lists[i].vendor && pci_get_device(dev) == mbox_reorder_lists[i].device) { device_printf(sc->bge_dev, "enabling MBOX workaround for %s\n", mbox_reorder_lists[i].desc); return (1); } } if (device_get_devclass(bus) != pci) break; } return (0); } static void bge_devinfo(struct bge_softc *sc) { uint32_t cfg, clk; device_printf(sc->bge_dev, "CHIP ID 0x%08x; ASIC REV 0x%02x; CHIP REV 0x%02x; ", sc->bge_chipid, sc->bge_asicrev, sc->bge_chiprev); if (sc->bge_flags & BGE_FLAG_PCIE) printf("PCI-E\n"); else if (sc->bge_flags & BGE_FLAG_PCIX) { printf("PCI-X "); cfg = CSR_READ_4(sc, BGE_MISC_CFG) & BGE_MISCCFG_BOARD_ID_MASK; if (cfg == BGE_MISCCFG_BOARD_ID_5704CIOBE) clk = 133; else { clk = CSR_READ_4(sc, BGE_PCI_CLKCTL) & 0x1F; switch (clk) { case 0: clk = 33; break; case 2: clk = 50; break; case 4: clk = 66; break; case 6: clk = 100; break; case 7: clk = 133; break; } } printf("%u MHz\n", clk); } else { if (sc->bge_pcixcap != 0) printf("PCI on PCI-X "); else printf("PCI "); cfg = pci_read_config(sc->bge_dev, BGE_PCI_PCISTATE, 4); if (cfg & BGE_PCISTATE_PCI_BUSSPEED) clk = 66; else clk = 33; if (cfg & BGE_PCISTATE_32BIT_BUS) printf("%u MHz; 32bit\n", clk); else printf("%u MHz; 64bit\n", clk); } } static int bge_attach(device_t dev) { if_t ifp; struct bge_softc *sc; uint32_t hwcfg = 0, misccfg, pcistate; u_char eaddr[ETHER_ADDR_LEN]; int capmask, error, reg, rid, trys; sc = device_get_softc(dev); sc->bge_dev = dev; BGE_LOCK_INIT(sc, device_get_nameunit(dev)); TASK_INIT(&sc->bge_intr_task, 0, bge_intr_task, sc); callout_init_mtx(&sc->bge_stat_ch, &sc->bge_mtx, 0); pci_enable_busmaster(dev); /* * Allocate control/status registers. */ rid = PCIR_BAR(0); sc->bge_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->bge_res == NULL) { device_printf (sc->bge_dev, "couldn't map BAR0 memory\n"); error = ENXIO; goto fail; } /* Save various chip information. */ sc->bge_func_addr = pci_get_function(dev); sc->bge_chipid = bge_chipid(dev); sc->bge_asicrev = BGE_ASICREV(sc->bge_chipid); sc->bge_chiprev = BGE_CHIPREV(sc->bge_chipid); /* Set default PHY address. */ sc->bge_phy_addr = 1; /* * PHY address mapping for various devices. * * | F0 Cu | F0 Sr | F1 Cu | F1 Sr | * ---------+-------+-------+-------+-------+ * BCM57XX | 1 | X | X | X | * BCM5704 | 1 | X | 1 | X | * BCM5717 | 1 | 8 | 2 | 9 | * BCM5719 | 1 | 8 | 2 | 9 | * BCM5720 | 1 | 8 | 2 | 9 | * * | F2 Cu | F2 Sr | F3 Cu | F3 Sr | * ---------+-------+-------+-------+-------+ * BCM57XX | X | X | X | X | * BCM5704 | X | X | X | X | * BCM5717 | X | X | X | X | * BCM5719 | 3 | 10 | 4 | 11 | * BCM5720 | X | X | X | X | * * Other addresses may respond but they are not * IEEE compliant PHYs and should be ignored. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) { if (sc->bge_chipid != BGE_CHIPID_BCM5717_A0) { if (CSR_READ_4(sc, BGE_SGDIG_STS) & BGE_SGDIGSTS_IS_SERDES) sc->bge_phy_addr = sc->bge_func_addr + 8; else sc->bge_phy_addr = sc->bge_func_addr + 1; } else { if (CSR_READ_4(sc, BGE_CPMU_PHY_STRAP) & BGE_CPMU_PHY_STRAP_IS_SERDES) sc->bge_phy_addr = sc->bge_func_addr + 8; else sc->bge_phy_addr = sc->bge_func_addr + 1; } } if (bge_has_eaddr(sc)) sc->bge_flags |= BGE_FLAG_EADDR; /* Save chipset family. */ switch (sc->bge_asicrev) { case BGE_ASICREV_BCM5762: case BGE_ASICREV_BCM57765: case BGE_ASICREV_BCM57766: sc->bge_flags |= BGE_FLAG_57765_PLUS; /* FALLTHROUGH */ case BGE_ASICREV_BCM5717: case BGE_ASICREV_BCM5719: case BGE_ASICREV_BCM5720: sc->bge_flags |= BGE_FLAG_5717_PLUS | BGE_FLAG_5755_PLUS | BGE_FLAG_575X_PLUS | BGE_FLAG_5705_PLUS | BGE_FLAG_JUMBO | BGE_FLAG_JUMBO_FRAME; if (sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) { /* * Enable work around for DMA engine miscalculation * of TXMBUF available space. */ sc->bge_flags |= BGE_FLAG_RDMA_BUG; if (sc->bge_asicrev == BGE_ASICREV_BCM5719 && sc->bge_chipid == BGE_CHIPID_BCM5719_A0) { /* Jumbo frame on BCM5719 A0 does not work. */ sc->bge_flags &= ~BGE_FLAG_JUMBO; } } break; case BGE_ASICREV_BCM5755: case BGE_ASICREV_BCM5761: case BGE_ASICREV_BCM5784: case BGE_ASICREV_BCM5785: case BGE_ASICREV_BCM5787: case BGE_ASICREV_BCM57780: sc->bge_flags |= BGE_FLAG_5755_PLUS | BGE_FLAG_575X_PLUS | BGE_FLAG_5705_PLUS; break; case BGE_ASICREV_BCM5700: case BGE_ASICREV_BCM5701: case BGE_ASICREV_BCM5703: case BGE_ASICREV_BCM5704: sc->bge_flags |= BGE_FLAG_5700_FAMILY | BGE_FLAG_JUMBO; break; case BGE_ASICREV_BCM5714_A0: case BGE_ASICREV_BCM5780: case BGE_ASICREV_BCM5714: sc->bge_flags |= BGE_FLAG_5714_FAMILY | BGE_FLAG_JUMBO_STD; /* FALLTHROUGH */ case BGE_ASICREV_BCM5750: case BGE_ASICREV_BCM5752: case BGE_ASICREV_BCM5906: sc->bge_flags |= BGE_FLAG_575X_PLUS; /* FALLTHROUGH */ case BGE_ASICREV_BCM5705: sc->bge_flags |= BGE_FLAG_5705_PLUS; break; } /* Identify chips with APE processor. */ switch (sc->bge_asicrev) { case BGE_ASICREV_BCM5717: case BGE_ASICREV_BCM5719: case BGE_ASICREV_BCM5720: case BGE_ASICREV_BCM5761: case BGE_ASICREV_BCM5762: sc->bge_flags |= BGE_FLAG_APE; break; } /* Chips with APE need BAR2 access for APE registers/memory. */ if ((sc->bge_flags & BGE_FLAG_APE) != 0) { rid = PCIR_BAR(2); sc->bge_res2 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->bge_res2 == NULL) { device_printf (sc->bge_dev, "couldn't map BAR2 memory\n"); error = ENXIO; goto fail; } /* Enable APE register/memory access by host driver. */ pcistate = pci_read_config(dev, BGE_PCI_PCISTATE, 4); pcistate |= BGE_PCISTATE_ALLOW_APE_CTLSPC_WR | BGE_PCISTATE_ALLOW_APE_SHMEM_WR | BGE_PCISTATE_ALLOW_APE_PSPACE_WR; pci_write_config(dev, BGE_PCI_PCISTATE, pcistate, 4); bge_ape_lock_init(sc); bge_ape_read_fw_ver(sc); } /* Add SYSCTLs, requires the chipset family to be set. */ bge_add_sysctls(sc); /* Identify the chips that use an CPMU. */ if (BGE_IS_5717_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5761 || sc->bge_asicrev == BGE_ASICREV_BCM5785 || sc->bge_asicrev == BGE_ASICREV_BCM57780) sc->bge_flags |= BGE_FLAG_CPMU_PRESENT; if ((sc->bge_flags & BGE_FLAG_CPMU_PRESENT) != 0) sc->bge_mi_mode = BGE_MIMODE_500KHZ_CONST; else sc->bge_mi_mode = BGE_MIMODE_BASE; /* Enable auto polling for BCM570[0-5]. */ if (BGE_IS_5700_FAMILY(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5705) sc->bge_mi_mode |= BGE_MIMODE_AUTOPOLL; /* * All Broadcom controllers have 4GB boundary DMA bug. * Whenever an address crosses a multiple of the 4GB boundary * (including 4GB, 8Gb, 12Gb, etc.) and makes the transition * from 0xX_FFFF_FFFF to 0x(X+1)_0000_0000 an internal DMA * state machine will lockup and cause the device to hang. */ sc->bge_flags |= BGE_FLAG_4G_BNDRY_BUG; /* BCM5755 or higher and BCM5906 have short DMA bug. */ if (BGE_IS_5755_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5906) sc->bge_flags |= BGE_FLAG_SHORT_DMA_BUG; /* * BCM5719 cannot handle DMA requests for DMA segments that * have larger than 4KB in size. However the maximum DMA * segment size created in DMA tag is 4KB for TSO, so we * wouldn't encounter the issue here. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5719) sc->bge_flags |= BGE_FLAG_4K_RDMA_BUG; misccfg = CSR_READ_4(sc, BGE_MISC_CFG) & BGE_MISCCFG_BOARD_ID_MASK; if (sc->bge_asicrev == BGE_ASICREV_BCM5705) { if (misccfg == BGE_MISCCFG_BOARD_ID_5788 || misccfg == BGE_MISCCFG_BOARD_ID_5788M) sc->bge_flags |= BGE_FLAG_5788; } capmask = BMSR_DEFCAPMASK; if ((sc->bge_asicrev == BGE_ASICREV_BCM5703 && (misccfg == 0x4000 || misccfg == 0x8000)) || (sc->bge_asicrev == BGE_ASICREV_BCM5705 && pci_get_vendor(dev) == BCOM_VENDORID && (pci_get_device(dev) == BCOM_DEVICEID_BCM5901 || pci_get_device(dev) == BCOM_DEVICEID_BCM5901A2 || pci_get_device(dev) == BCOM_DEVICEID_BCM5705F)) || (pci_get_vendor(dev) == BCOM_VENDORID && (pci_get_device(dev) == BCOM_DEVICEID_BCM5751F || pci_get_device(dev) == BCOM_DEVICEID_BCM5753F || pci_get_device(dev) == BCOM_DEVICEID_BCM5787F)) || pci_get_device(dev) == BCOM_DEVICEID_BCM57790 || pci_get_device(dev) == BCOM_DEVICEID_BCM57791 || pci_get_device(dev) == BCOM_DEVICEID_BCM57795 || sc->bge_asicrev == BGE_ASICREV_BCM5906) { /* These chips are 10/100 only. */ capmask &= ~BMSR_EXTSTAT; sc->bge_phy_flags |= BGE_PHY_NO_WIRESPEED; } /* * Some controllers seem to require a special firmware to use * TSO. But the firmware is not available to FreeBSD and Linux * claims that the TSO performed by the firmware is slower than * hardware based TSO. Moreover the firmware based TSO has one * known bug which can't handle TSO if Ethernet header + IP/TCP * header is greater than 80 bytes. A workaround for the TSO * bug exist but it seems it's too expensive than not using * TSO at all. Some hardwares also have the TSO bug so limit * the TSO to the controllers that are not affected TSO issues * (e.g. 5755 or higher). */ if (BGE_IS_5717_PLUS(sc)) { /* BCM5717 requires different TSO configuration. */ sc->bge_flags |= BGE_FLAG_TSO3; if (sc->bge_asicrev == BGE_ASICREV_BCM5719 && sc->bge_chipid == BGE_CHIPID_BCM5719_A0) { /* TSO on BCM5719 A0 does not work. */ sc->bge_flags &= ~BGE_FLAG_TSO3; } } else if (BGE_IS_5755_PLUS(sc)) { /* * BCM5754 and BCM5787 shares the same ASIC id so * explicit device id check is required. * Due to unknown reason TSO does not work on BCM5755M. */ if (pci_get_device(dev) != BCOM_DEVICEID_BCM5754 && pci_get_device(dev) != BCOM_DEVICEID_BCM5754M && pci_get_device(dev) != BCOM_DEVICEID_BCM5755M) sc->bge_flags |= BGE_FLAG_TSO; } /* * Check if this is a PCI-X or PCI Express device. */ if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { /* * Found a PCI Express capabilities register, this * must be a PCI Express device. */ sc->bge_flags |= BGE_FLAG_PCIE; sc->bge_expcap = reg; /* Extract supported maximum payload size. */ sc->bge_mps = pci_read_config(dev, sc->bge_expcap + PCIER_DEVICE_CAP, 2); sc->bge_mps = 128 << (sc->bge_mps & PCIEM_CAP_MAX_PAYLOAD); if (sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) sc->bge_expmrq = 2048; else sc->bge_expmrq = 4096; pci_set_max_read_req(dev, sc->bge_expmrq); } else { /* * Check if the device is in PCI-X Mode. * (This bit is not valid on PCI Express controllers.) */ if (pci_find_cap(dev, PCIY_PCIX, ®) == 0) sc->bge_pcixcap = reg; if ((pci_read_config(dev, BGE_PCI_PCISTATE, 4) & BGE_PCISTATE_PCI_BUSMODE) == 0) sc->bge_flags |= BGE_FLAG_PCIX; } /* * The 40bit DMA bug applies to the 5714/5715 controllers and is * not actually a MAC controller bug but an issue with the embedded * PCIe to PCI-X bridge in the device. Use 40bit DMA workaround. */ if (BGE_IS_5714_FAMILY(sc) && (sc->bge_flags & BGE_FLAG_PCIX)) sc->bge_flags |= BGE_FLAG_40BIT_BUG; /* * Some PCI-X bridges are known to trigger write reordering to * the mailbox registers. Typical phenomena is watchdog timeouts * caused by out-of-order TX completions. Enable workaround for * PCI-X devices that live behind these bridges. * Note, PCI-X controllers can run in PCI mode so we can't use * BGE_FLAG_PCIX flag to detect PCI-X controllers. */ if (sc->bge_pcixcap != 0 && bge_mbox_reorder(sc) != 0) sc->bge_flags |= BGE_FLAG_MBOX_REORDER; /* * Allocate the interrupt, using MSI if possible. These devices * support 8 MSI messages, but only the first one is used in * normal operation. */ rid = 0; if (pci_find_cap(sc->bge_dev, PCIY_MSI, ®) == 0) { sc->bge_msicap = reg; reg = 1; if (bge_can_use_msi(sc) && pci_alloc_msi(dev, ®) == 0) { rid = 1; sc->bge_flags |= BGE_FLAG_MSI; } } /* * All controllers except BCM5700 supports tagged status but * we use tagged status only for MSI case on BCM5717. Otherwise * MSI on BCM5717 does not work. */ #ifndef DEVICE_POLLING if (sc->bge_flags & BGE_FLAG_MSI && BGE_IS_5717_PLUS(sc)) sc->bge_flags |= BGE_FLAG_TAGGED_STATUS; #endif sc->bge_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | (rid != 0 ? 0 : RF_SHAREABLE)); if (sc->bge_irq == NULL) { device_printf(sc->bge_dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } bge_devinfo(sc); sc->bge_asf_mode = 0; /* No ASF if APE present. */ if ((sc->bge_flags & BGE_FLAG_APE) == 0) { if (bge_allow_asf && (bge_readmem_ind(sc, BGE_SRAM_DATA_SIG) == BGE_SRAM_DATA_SIG_MAGIC)) { if (bge_readmem_ind(sc, BGE_SRAM_DATA_CFG) & BGE_HWCFG_ASF) { sc->bge_asf_mode |= ASF_ENABLE; sc->bge_asf_mode |= ASF_STACKUP; if (BGE_IS_575X_PLUS(sc)) sc->bge_asf_mode |= ASF_NEW_HANDSHAKE; } } } bge_stop_fw(sc); bge_sig_pre_reset(sc, BGE_RESET_SHUTDOWN); if (bge_reset(sc)) { device_printf(sc->bge_dev, "chip reset failed\n"); error = ENXIO; goto fail; } bge_sig_legacy(sc, BGE_RESET_SHUTDOWN); bge_sig_post_reset(sc, BGE_RESET_SHUTDOWN); if (bge_chipinit(sc)) { device_printf(sc->bge_dev, "chip initialization failed\n"); error = ENXIO; goto fail; } error = bge_get_eaddr(sc, eaddr); if (error) { device_printf(sc->bge_dev, "failed to read station address\n"); error = ENXIO; goto fail; } /* 5705 limits RX return ring to 512 entries. */ if (BGE_IS_5717_PLUS(sc)) sc->bge_return_ring_cnt = BGE_RETURN_RING_CNT; else if (BGE_IS_5705_PLUS(sc)) sc->bge_return_ring_cnt = BGE_RETURN_RING_CNT_5705; else sc->bge_return_ring_cnt = BGE_RETURN_RING_CNT; if (bge_dma_alloc(sc)) { device_printf(sc->bge_dev, "failed to allocate DMA resources\n"); error = ENXIO; goto fail; } /* Set default tuneable values. */ sc->bge_stat_ticks = BGE_TICKS_PER_SEC; sc->bge_rx_coal_ticks = 150; sc->bge_tx_coal_ticks = 150; sc->bge_rx_max_coal_bds = 10; sc->bge_tx_max_coal_bds = 10; /* Initialize checksum features to use. */ sc->bge_csum_features = BGE_CSUM_FEATURES; if (sc->bge_forced_udpcsum != 0) sc->bge_csum_features |= CSUM_UDP; /* Set up ifnet structure */ ifp = sc->bge_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(sc->bge_dev, "failed to if_alloc()\n"); error = ENXIO; goto fail; } if_setsoftc(ifp, sc); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); if_setioctlfn(ifp, bge_ioctl); if_setstartfn(ifp, bge_start); if_setinitfn(ifp, bge_init); if_setgetcounterfn(ifp, bge_get_counter); if_setsendqlen(ifp, BGE_TX_RING_CNT - 1); if_setsendqready(ifp); if_sethwassist(ifp, sc->bge_csum_features); if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU); if ((sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) != 0) { if_sethwassistbits(ifp, CSUM_TSO, 0); if_setcapabilitiesbit(ifp, IFCAP_TSO4 | IFCAP_VLAN_HWTSO, 0); } #ifdef IFCAP_VLAN_HWCSUM if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWCSUM, 0); #endif if_setcapenable(ifp, if_getcapabilities(ifp)); #ifdef DEVICE_POLLING if_setcapabilitiesbit(ifp, IFCAP_POLLING, 0); #endif /* * 5700 B0 chips do not support checksumming correctly due * to hardware bugs. */ if (sc->bge_chipid == BGE_CHIPID_BCM5700_B0) { if_setcapabilitiesbit(ifp, 0, IFCAP_HWCSUM); if_setcapenablebit(ifp, 0, IFCAP_HWCSUM); if_sethwassist(ifp, 0); } /* * Figure out what sort of media we have by checking the * hardware config word in the first 32k of NIC internal memory, * or fall back to examining the EEPROM if necessary. * Note: on some BCM5700 cards, this value appears to be unset. * If that's the case, we have to rely on identifying the NIC * by its PCI subsystem ID, as we do below for the SysKonnect * SK-9D41. */ if (bge_readmem_ind(sc, BGE_SRAM_DATA_SIG) == BGE_SRAM_DATA_SIG_MAGIC) hwcfg = bge_readmem_ind(sc, BGE_SRAM_DATA_CFG); else if ((sc->bge_flags & BGE_FLAG_EADDR) && (sc->bge_asicrev != BGE_ASICREV_BCM5906)) { if (bge_read_eeprom(sc, (caddr_t)&hwcfg, BGE_EE_HWCFG_OFFSET, sizeof(hwcfg))) { device_printf(sc->bge_dev, "failed to read EEPROM\n"); error = ENXIO; goto fail; } hwcfg = ntohl(hwcfg); } /* The SysKonnect SK-9D41 is a 1000baseSX card. */ if ((pci_read_config(dev, BGE_PCI_SUBSYS, 4) >> 16) == SK_SUBSYSID_9D41 || (hwcfg & BGE_HWCFG_MEDIA) == BGE_MEDIA_FIBER) { if (BGE_IS_5705_PLUS(sc)) { sc->bge_flags |= BGE_FLAG_MII_SERDES; sc->bge_phy_flags |= BGE_PHY_NO_WIRESPEED; } else sc->bge_flags |= BGE_FLAG_TBI; } /* Set various PHY bug flags. */ if (sc->bge_chipid == BGE_CHIPID_BCM5701_A0 || sc->bge_chipid == BGE_CHIPID_BCM5701_B0) sc->bge_phy_flags |= BGE_PHY_CRC_BUG; if (sc->bge_chiprev == BGE_CHIPREV_5703_AX || sc->bge_chiprev == BGE_CHIPREV_5704_AX) sc->bge_phy_flags |= BGE_PHY_ADC_BUG; if (sc->bge_chipid == BGE_CHIPID_BCM5704_A0) sc->bge_phy_flags |= BGE_PHY_5704_A0_BUG; if (pci_get_subvendor(dev) == DELL_VENDORID) sc->bge_phy_flags |= BGE_PHY_NO_3LED; if ((BGE_IS_5705_PLUS(sc)) && sc->bge_asicrev != BGE_ASICREV_BCM5906 && sc->bge_asicrev != BGE_ASICREV_BCM5785 && sc->bge_asicrev != BGE_ASICREV_BCM57780 && !BGE_IS_5717_PLUS(sc)) { if (sc->bge_asicrev == BGE_ASICREV_BCM5755 || sc->bge_asicrev == BGE_ASICREV_BCM5761 || sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5787) { if (pci_get_device(dev) != BCOM_DEVICEID_BCM5722 && pci_get_device(dev) != BCOM_DEVICEID_BCM5756) sc->bge_phy_flags |= BGE_PHY_JITTER_BUG; if (pci_get_device(dev) == BCOM_DEVICEID_BCM5755M) sc->bge_phy_flags |= BGE_PHY_ADJUST_TRIM; } else sc->bge_phy_flags |= BGE_PHY_BER_BUG; } /* * Don't enable Ethernet@WireSpeed for the 5700 or the * 5705 A0 and A1 chips. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 || (sc->bge_asicrev == BGE_ASICREV_BCM5705 && (sc->bge_chipid != BGE_CHIPID_BCM5705_A0 && sc->bge_chipid != BGE_CHIPID_BCM5705_A1))) sc->bge_phy_flags |= BGE_PHY_NO_WIRESPEED; if (sc->bge_flags & BGE_FLAG_TBI) { ifmedia_init(&sc->bge_ifmedia, IFM_IMASK, bge_ifmedia_upd, bge_ifmedia_sts); ifmedia_add(&sc->bge_ifmedia, IFM_ETHER | IFM_1000_SX, 0, NULL); ifmedia_add(&sc->bge_ifmedia, IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL); ifmedia_add(&sc->bge_ifmedia, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->bge_ifmedia, IFM_ETHER | IFM_AUTO); sc->bge_ifmedia.ifm_media = sc->bge_ifmedia.ifm_cur->ifm_media; } else { /* * Do transceiver setup and tell the firmware the * driver is down so we can try to get access the * probe if ASF is running. Retry a couple of times * if we get a conflict with the ASF firmware accessing * the PHY. */ trys = 0; BGE_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); again: bge_asf_driver_up(sc); error = mii_attach(dev, &sc->bge_miibus, ifp, (ifm_change_cb_t)bge_ifmedia_upd, (ifm_stat_cb_t)bge_ifmedia_sts, capmask, sc->bge_phy_addr, MII_OFFSET_ANY, MIIF_DOPAUSE); if (error != 0) { if (trys++ < 4) { device_printf(sc->bge_dev, "Try again\n"); bge_miibus_writereg(sc->bge_dev, sc->bge_phy_addr, MII_BMCR, BMCR_RESET); goto again; } device_printf(sc->bge_dev, "attaching PHYs failed\n"); goto fail; } /* * Now tell the firmware we are going up after probing the PHY */ if (sc->bge_asf_mode & ASF_STACKUP) BGE_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); } /* * When using the BCM5701 in PCI-X mode, data corruption has * been observed in the first few bytes of some received packets. * Aligning the packet buffer in memory eliminates the corruption. * Unfortunately, this misaligns the packet payloads. On platforms * which do not support unaligned accesses, we will realign the * payloads by copying the received packets. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5701 && sc->bge_flags & BGE_FLAG_PCIX) sc->bge_flags |= BGE_FLAG_RX_ALIGNBUG; /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); /* Tell upper layer we support long frames. */ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); /* * Hookup IRQ last. */ if (BGE_IS_5755_PLUS(sc) && sc->bge_flags & BGE_FLAG_MSI) { /* Take advantage of single-shot MSI. */ CSR_WRITE_4(sc, BGE_MSI_MODE, CSR_READ_4(sc, BGE_MSI_MODE) & ~BGE_MSIMODE_ONE_SHOT_DISABLE); sc->bge_tq = taskqueue_create_fast("bge_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->bge_tq); if (sc->bge_tq == NULL) { device_printf(dev, "could not create taskqueue.\n"); ether_ifdetach(ifp); error = ENOMEM; goto fail; } error = taskqueue_start_threads(&sc->bge_tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->bge_dev)); if (error != 0) { device_printf(dev, "could not start threads.\n"); ether_ifdetach(ifp); goto fail; } error = bus_setup_intr(dev, sc->bge_irq, INTR_TYPE_NET | INTR_MPSAFE, bge_msi_intr, NULL, sc, &sc->bge_intrhand); } else error = bus_setup_intr(dev, sc->bge_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, bge_intr, sc, &sc->bge_intrhand); if (error) { ether_ifdetach(ifp); device_printf(sc->bge_dev, "couldn't set up irq\n"); goto fail; } /* Attach driver netdump methods. */ NETDUMP_SET(ifp, bge); fail: if (error) bge_detach(dev); return (error); } static int bge_detach(device_t dev) { struct bge_softc *sc; if_t ifp; sc = device_get_softc(dev); ifp = sc->bge_ifp; #ifdef DEVICE_POLLING if (if_getcapenable(ifp) & IFCAP_POLLING) ether_poll_deregister(ifp); #endif if (device_is_attached(dev)) { ether_ifdetach(ifp); BGE_LOCK(sc); bge_stop(sc); BGE_UNLOCK(sc); callout_drain(&sc->bge_stat_ch); } if (sc->bge_tq) taskqueue_drain(sc->bge_tq, &sc->bge_intr_task); if (sc->bge_flags & BGE_FLAG_TBI) ifmedia_removeall(&sc->bge_ifmedia); else if (sc->bge_miibus != NULL) { bus_generic_detach(dev); device_delete_child(dev, sc->bge_miibus); } bge_release_resources(sc); return (0); } static void bge_release_resources(struct bge_softc *sc) { device_t dev; dev = sc->bge_dev; if (sc->bge_tq != NULL) taskqueue_free(sc->bge_tq); if (sc->bge_intrhand != NULL) bus_teardown_intr(dev, sc->bge_irq, sc->bge_intrhand); if (sc->bge_irq != NULL) { bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->bge_irq), sc->bge_irq); pci_release_msi(dev); } if (sc->bge_res != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->bge_res), sc->bge_res); if (sc->bge_res2 != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->bge_res2), sc->bge_res2); if (sc->bge_ifp != NULL) if_free(sc->bge_ifp); bge_dma_free(sc); if (mtx_initialized(&sc->bge_mtx)) /* XXX */ BGE_LOCK_DESTROY(sc); } static int bge_reset(struct bge_softc *sc) { device_t dev; uint32_t cachesize, command, mac_mode, mac_mode_mask, reset, val; void (*write_op)(struct bge_softc *, int, int); uint16_t devctl; int i; dev = sc->bge_dev; mac_mode_mask = BGE_MACMODE_HALF_DUPLEX | BGE_MACMODE_PORTMODE; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) != 0) mac_mode_mask |= BGE_MACMODE_APE_RX_EN | BGE_MACMODE_APE_TX_EN; mac_mode = CSR_READ_4(sc, BGE_MAC_MODE) & mac_mode_mask; if (BGE_IS_575X_PLUS(sc) && !BGE_IS_5714_FAMILY(sc) && (sc->bge_asicrev != BGE_ASICREV_BCM5906)) { if (sc->bge_flags & BGE_FLAG_PCIE) write_op = bge_writemem_direct; else write_op = bge_writemem_ind; } else write_op = bge_writereg_ind; if (sc->bge_asicrev != BGE_ASICREV_BCM5700 && sc->bge_asicrev != BGE_ASICREV_BCM5701) { CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_SET1); for (i = 0; i < 8000; i++) { if (CSR_READ_4(sc, BGE_NVRAM_SWARB) & BGE_NVRAMSWARB_GNT1) break; DELAY(20); } if (i == 8000) { if (bootverbose) device_printf(dev, "NVRAM lock timedout!\n"); } } /* Take APE lock when performing reset. */ bge_ape_lock(sc, BGE_APE_LOCK_GRC); /* Save some important PCI state. */ cachesize = pci_read_config(dev, BGE_PCI_CACHESZ, 4); command = pci_read_config(dev, BGE_PCI_CMD, 4); pci_write_config(dev, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_INDIRECT_ACCESS | BGE_PCIMISCCTL_MASK_PCI_INTR | BGE_HIF_SWAP_OPTIONS | BGE_PCIMISCCTL_PCISTATE_RW, 4); /* Disable fastboot on controllers that support it. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5752 || BGE_IS_5755_PLUS(sc)) { if (bootverbose) device_printf(dev, "Disabling fastboot\n"); CSR_WRITE_4(sc, BGE_FASTBOOT_PC, 0x0); } /* * Write the magic number to SRAM at offset 0xB50. * When firmware finishes its initialization it will * write ~BGE_SRAM_FW_MB_MAGIC to the same location. */ bge_writemem_ind(sc, BGE_SRAM_FW_MB, BGE_SRAM_FW_MB_MAGIC); reset = BGE_MISCCFG_RESET_CORE_CLOCKS | BGE_32BITTIME_66MHZ; /* XXX: Broadcom Linux driver. */ if (sc->bge_flags & BGE_FLAG_PCIE) { if (sc->bge_asicrev != BGE_ASICREV_BCM5785 && (sc->bge_flags & BGE_FLAG_5717_PLUS) == 0) { if (CSR_READ_4(sc, 0x7E2C) == 0x60) /* PCIE 1.0 */ CSR_WRITE_4(sc, 0x7E2C, 0x20); } if (sc->bge_chipid != BGE_CHIPID_BCM5750_A0) { /* Prevent PCIE link training during global reset */ CSR_WRITE_4(sc, BGE_MISC_CFG, 1 << 29); reset |= 1 << 29; } } if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { val = CSR_READ_4(sc, BGE_VCPU_STATUS); CSR_WRITE_4(sc, BGE_VCPU_STATUS, val | BGE_VCPU_STATUS_DRV_RESET); val = CSR_READ_4(sc, BGE_VCPU_EXT_CTRL); CSR_WRITE_4(sc, BGE_VCPU_EXT_CTRL, val & ~BGE_VCPU_EXT_CTRL_HALT_CPU); } /* * Set GPHY Power Down Override to leave GPHY * powered up in D0 uninitialized. */ if (BGE_IS_5705_PLUS(sc) && (sc->bge_flags & BGE_FLAG_CPMU_PRESENT) == 0) reset |= BGE_MISCCFG_GPHY_PD_OVERRIDE; /* Issue global reset */ write_op(sc, BGE_MISC_CFG, reset); if (sc->bge_flags & BGE_FLAG_PCIE) DELAY(100 * 1000); else DELAY(1000); /* XXX: Broadcom Linux driver. */ if (sc->bge_flags & BGE_FLAG_PCIE) { if (sc->bge_chipid == BGE_CHIPID_BCM5750_A0) { DELAY(500000); /* wait for link training to complete */ val = pci_read_config(dev, 0xC4, 4); pci_write_config(dev, 0xC4, val | (1 << 15), 4); } devctl = pci_read_config(dev, sc->bge_expcap + PCIER_DEVICE_CTL, 2); /* Clear enable no snoop and disable relaxed ordering. */ devctl &= ~(PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE); pci_write_config(dev, sc->bge_expcap + PCIER_DEVICE_CTL, devctl, 2); pci_set_max_read_req(dev, sc->bge_expmrq); /* Clear error status. */ pci_write_config(dev, sc->bge_expcap + PCIER_DEVICE_STA, PCIEM_STA_CORRECTABLE_ERROR | PCIEM_STA_NON_FATAL_ERROR | PCIEM_STA_FATAL_ERROR | PCIEM_STA_UNSUPPORTED_REQ, 2); } /* Reset some of the PCI state that got zapped by reset. */ pci_write_config(dev, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_INDIRECT_ACCESS | BGE_PCIMISCCTL_MASK_PCI_INTR | BGE_HIF_SWAP_OPTIONS | BGE_PCIMISCCTL_PCISTATE_RW, 4); val = BGE_PCISTATE_ROM_ENABLE | BGE_PCISTATE_ROM_RETRY_ENABLE; if (sc->bge_chipid == BGE_CHIPID_BCM5704_A0 && (sc->bge_flags & BGE_FLAG_PCIX) != 0) val |= BGE_PCISTATE_RETRY_SAME_DMA; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) != 0) val |= BGE_PCISTATE_ALLOW_APE_CTLSPC_WR | BGE_PCISTATE_ALLOW_APE_SHMEM_WR | BGE_PCISTATE_ALLOW_APE_PSPACE_WR; pci_write_config(dev, BGE_PCI_PCISTATE, val, 4); pci_write_config(dev, BGE_PCI_CACHESZ, cachesize, 4); pci_write_config(dev, BGE_PCI_CMD, command, 4); /* * Disable PCI-X relaxed ordering to ensure status block update * comes first then packet buffer DMA. Otherwise driver may * read stale status block. */ if (sc->bge_flags & BGE_FLAG_PCIX) { devctl = pci_read_config(dev, sc->bge_pcixcap + PCIXR_COMMAND, 2); devctl &= ~PCIXM_COMMAND_ERO; if (sc->bge_asicrev == BGE_ASICREV_BCM5703) { devctl &= ~PCIXM_COMMAND_MAX_READ; devctl |= PCIXM_COMMAND_MAX_READ_2048; } else if (sc->bge_asicrev == BGE_ASICREV_BCM5704) { devctl &= ~(PCIXM_COMMAND_MAX_SPLITS | PCIXM_COMMAND_MAX_READ); devctl |= PCIXM_COMMAND_MAX_READ_2048; } pci_write_config(dev, sc->bge_pcixcap + PCIXR_COMMAND, devctl, 2); } /* Re-enable MSI, if necessary, and enable the memory arbiter. */ if (BGE_IS_5714_FAMILY(sc)) { /* This chip disables MSI on reset. */ if (sc->bge_flags & BGE_FLAG_MSI) { val = pci_read_config(dev, sc->bge_msicap + PCIR_MSI_CTRL, 2); pci_write_config(dev, sc->bge_msicap + PCIR_MSI_CTRL, val | PCIM_MSICTRL_MSI_ENABLE, 2); val = CSR_READ_4(sc, BGE_MSI_MODE); CSR_WRITE_4(sc, BGE_MSI_MODE, val | BGE_MSIMODE_ENABLE); } val = CSR_READ_4(sc, BGE_MARB_MODE); CSR_WRITE_4(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE | val); } else CSR_WRITE_4(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE); /* Fix up byte swapping. */ CSR_WRITE_4(sc, BGE_MODE_CTL, bge_dma_swap_options(sc)); val = CSR_READ_4(sc, BGE_MAC_MODE); val = (val & ~mac_mode_mask) | mac_mode; CSR_WRITE_4(sc, BGE_MAC_MODE, val); DELAY(40); bge_ape_unlock(sc, BGE_APE_LOCK_GRC); if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { for (i = 0; i < BGE_TIMEOUT; i++) { val = CSR_READ_4(sc, BGE_VCPU_STATUS); if (val & BGE_VCPU_STATUS_INIT_DONE) break; DELAY(100); } if (i == BGE_TIMEOUT) { device_printf(dev, "reset timed out\n"); return (1); } } else { /* * Poll until we see the 1's complement of the magic number. * This indicates that the firmware initialization is complete. * We expect this to fail if no chip containing the Ethernet * address is fitted though. */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); val = bge_readmem_ind(sc, BGE_SRAM_FW_MB); if (val == ~BGE_SRAM_FW_MB_MAGIC) break; } if ((sc->bge_flags & BGE_FLAG_EADDR) && i == BGE_TIMEOUT) device_printf(dev, "firmware handshake timed out, found 0x%08x\n", val); /* BCM57765 A0 needs additional time before accessing. */ if (sc->bge_chipid == BGE_CHIPID_BCM57765_A0) DELAY(10 * 1000); /* XXX */ } /* * The 5704 in TBI mode apparently needs some special * adjustment to insure the SERDES drive level is set * to 1.2V. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5704 && sc->bge_flags & BGE_FLAG_TBI) { val = CSR_READ_4(sc, BGE_SERDES_CFG); val = (val & ~0xFFF) | 0x880; CSR_WRITE_4(sc, BGE_SERDES_CFG, val); } /* XXX: Broadcom Linux driver. */ if (sc->bge_flags & BGE_FLAG_PCIE && !BGE_IS_5717_PLUS(sc) && sc->bge_chipid != BGE_CHIPID_BCM5750_A0 && sc->bge_asicrev != BGE_ASICREV_BCM5785) { /* Enable Data FIFO protection. */ val = CSR_READ_4(sc, 0x7C00); CSR_WRITE_4(sc, 0x7C00, val | (1 << 25)); } if (sc->bge_asicrev == BGE_ASICREV_BCM5720) BGE_CLRBIT(sc, BGE_CPMU_CLCK_ORIDE, CPMU_CLCK_ORIDE_MAC_ORIDE_EN); return (0); } static __inline void bge_rxreuse_std(struct bge_softc *sc, int i) { struct bge_rx_bd *r; r = &sc->bge_ldata.bge_rx_std_ring[sc->bge_std]; r->bge_flags = BGE_RXBDFLAG_END; r->bge_len = sc->bge_cdata.bge_rx_std_seglen[i]; r->bge_idx = i; BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT); } static __inline void bge_rxreuse_jumbo(struct bge_softc *sc, int i) { struct bge_extrx_bd *r; r = &sc->bge_ldata.bge_rx_jumbo_ring[sc->bge_jumbo]; r->bge_flags = BGE_RXBDFLAG_JUMBO_RING | BGE_RXBDFLAG_END; r->bge_len0 = sc->bge_cdata.bge_rx_jumbo_seglen[i][0]; r->bge_len1 = sc->bge_cdata.bge_rx_jumbo_seglen[i][1]; r->bge_len2 = sc->bge_cdata.bge_rx_jumbo_seglen[i][2]; r->bge_len3 = sc->bge_cdata.bge_rx_jumbo_seglen[i][3]; r->bge_idx = i; BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); } /* * Frame reception handling. This is called if there's a frame * on the receive return list. * * Note: we have to be able to handle two possibilities here: * 1) the frame is from the jumbo receive ring * 2) the frame is from the standard receive ring */ static int bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck) { if_t ifp; int rx_npkts = 0, stdcnt = 0, jumbocnt = 0; uint16_t rx_cons; rx_cons = sc->bge_rx_saved_considx; /* Nothing to do. */ if (rx_cons == rx_prod) return (rx_npkts); ifp = sc->bge_ifp; bus_dmamap_sync(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_cdata.bge_rx_return_ring_map, BUS_DMASYNC_POSTREAD); bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_POSTWRITE); if (BGE_IS_JUMBO_CAPABLE(sc) && if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN)) bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_POSTWRITE); while (rx_cons != rx_prod) { struct bge_rx_bd *cur_rx; uint32_t rxidx; struct mbuf *m = NULL; uint16_t vlan_tag = 0; int have_tag = 0; #ifdef DEVICE_POLLING if (if_getcapenable(ifp) & IFCAP_POLLING) { if (sc->rxcycles <= 0) break; sc->rxcycles--; } #endif cur_rx = &sc->bge_ldata.bge_rx_return_ring[rx_cons]; rxidx = cur_rx->bge_idx; BGE_INC(rx_cons, sc->bge_return_ring_cnt); if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING && cur_rx->bge_flags & BGE_RXBDFLAG_VLAN_TAG) { have_tag = 1; vlan_tag = cur_rx->bge_vlan_tag; } if (cur_rx->bge_flags & BGE_RXBDFLAG_JUMBO_RING) { jumbocnt++; m = sc->bge_cdata.bge_rx_jumbo_chain[rxidx]; if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) { bge_rxreuse_jumbo(sc, rxidx); continue; } if (bge_newbuf_jumbo(sc, rxidx) != 0) { bge_rxreuse_jumbo(sc, rxidx); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); continue; } BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); } else { stdcnt++; m = sc->bge_cdata.bge_rx_std_chain[rxidx]; if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) { bge_rxreuse_std(sc, rxidx); continue; } if (bge_newbuf_std(sc, rxidx) != 0) { bge_rxreuse_std(sc, rxidx); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); continue; } BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT); } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); #ifndef __NO_STRICT_ALIGNMENT /* * For architectures with strict alignment we must make sure * the payload is aligned. */ if (sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) { bcopy(m->m_data, m->m_data + ETHER_ALIGN, cur_rx->bge_len); m->m_data += ETHER_ALIGN; } #endif m->m_pkthdr.len = m->m_len = cur_rx->bge_len - ETHER_CRC_LEN; m->m_pkthdr.rcvif = ifp; if (if_getcapenable(ifp) & IFCAP_RXCSUM) bge_rxcsum(sc, cur_rx, m); /* * If we received a packet with a vlan tag, * attach that information to the packet. */ if (have_tag) { m->m_pkthdr.ether_vtag = vlan_tag; m->m_flags |= M_VLANTAG; } if (holdlck != 0) { BGE_UNLOCK(sc); if_input(ifp, m); BGE_LOCK(sc); } else if_input(ifp, m); rx_npkts++; if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) return (rx_npkts); } bus_dmamap_sync(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_cdata.bge_rx_return_ring_map, BUS_DMASYNC_PREREAD); if (stdcnt > 0) bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_PREWRITE); if (jumbocnt > 0) bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); sc->bge_rx_saved_considx = rx_cons; bge_writembx(sc, BGE_MBX_RX_CONS0_LO, sc->bge_rx_saved_considx); if (stdcnt) bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, (sc->bge_std + BGE_STD_RX_RING_CNT - 1) % BGE_STD_RX_RING_CNT); if (jumbocnt) bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, (sc->bge_jumbo + BGE_JUMBO_RX_RING_CNT - 1) % BGE_JUMBO_RX_RING_CNT); #ifdef notyet /* * This register wraps very quickly under heavy packet drops. * If you need correct statistics, you can enable this check. */ if (BGE_IS_5705_PLUS(sc)) if_incierrors(ifp, CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS)); #endif return (rx_npkts); } static void bge_rxcsum(struct bge_softc *sc, struct bge_rx_bd *cur_rx, struct mbuf *m) { if (BGE_IS_5717_PLUS(sc)) { if ((cur_rx->bge_flags & BGE_RXBDFLAG_IPV6) == 0) { if (cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if ((cur_rx->bge_error_flag & BGE_RXERRFLAG_IP_CSUM_NOK) == 0) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) { m->m_pkthdr.csum_data = cur_rx->bge_tcp_udp_csum; m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; } } } else { if (cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if ((cur_rx->bge_ip_csum ^ 0xFFFF) == 0) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM && m->m_pkthdr.len >= ETHER_MIN_NOPAD) { m->m_pkthdr.csum_data = cur_rx->bge_tcp_udp_csum; m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; } } } static void bge_txeof(struct bge_softc *sc, uint16_t tx_cons) { struct bge_tx_bd *cur_tx; if_t ifp; BGE_LOCK_ASSERT(sc); /* Nothing to do. */ if (sc->bge_tx_saved_considx == tx_cons) return; ifp = sc->bge_ifp; bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_POSTWRITE); /* * Go through our tx ring and free mbufs for those * frames that have been sent. */ while (sc->bge_tx_saved_considx != tx_cons) { uint32_t idx; idx = sc->bge_tx_saved_considx; cur_tx = &sc->bge_ldata.bge_tx_ring[idx]; if (cur_tx->bge_flags & BGE_TXBDFLAG_END) if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (sc->bge_cdata.bge_tx_chain[idx] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[idx], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[idx]); m_freem(sc->bge_cdata.bge_tx_chain[idx]); sc->bge_cdata.bge_tx_chain[idx] = NULL; } sc->bge_txcnt--; BGE_INC(sc->bge_tx_saved_considx, BGE_TX_RING_CNT); } if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); if (sc->bge_txcnt == 0) sc->bge_timer = 0; } #ifdef DEVICE_POLLING static int bge_poll(if_t ifp, enum poll_cmd cmd, int count) { struct bge_softc *sc = if_getsoftc(ifp); uint16_t rx_prod, tx_cons; uint32_t statusword; int rx_npkts = 0; BGE_LOCK(sc); if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { BGE_UNLOCK(sc); return (rx_npkts); } bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Fetch updates from the status block. */ rx_prod = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; tx_cons = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; statusword = sc->bge_ldata.bge_status_block->bge_status; /* Clear the status so the next pass only sees the changes. */ sc->bge_ldata.bge_status_block->bge_status = 0; bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Note link event. It will be processed by POLL_AND_CHECK_STATUS. */ if (statusword & BGE_STATFLAG_LINKSTATE_CHANGED) sc->bge_link_evt++; if (cmd == POLL_AND_CHECK_STATUS) if ((sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) || sc->bge_link_evt || (sc->bge_flags & BGE_FLAG_TBI)) bge_link_upd(sc); sc->rxcycles = count; rx_npkts = bge_rxeof(sc, rx_prod, 1); if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { BGE_UNLOCK(sc); return (rx_npkts); } bge_txeof(sc, tx_cons); if (!if_sendq_empty(ifp)) bge_start_locked(ifp); BGE_UNLOCK(sc); return (rx_npkts); } #endif /* DEVICE_POLLING */ static int bge_msi_intr(void *arg) { struct bge_softc *sc; sc = (struct bge_softc *)arg; /* * This interrupt is not shared and controller already * disabled further interrupt. */ taskqueue_enqueue(sc->bge_tq, &sc->bge_intr_task); return (FILTER_HANDLED); } static void bge_intr_task(void *arg, int pending) { struct bge_softc *sc; if_t ifp; uint32_t status, status_tag; uint16_t rx_prod, tx_cons; sc = (struct bge_softc *)arg; ifp = sc->bge_ifp; BGE_LOCK(sc); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { BGE_UNLOCK(sc); return; } /* Get updated status block. */ bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Save producer/consumer indices. */ rx_prod = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; tx_cons = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; status = sc->bge_ldata.bge_status_block->bge_status; status_tag = sc->bge_ldata.bge_status_block->bge_status_tag << 24; /* Dirty the status flag. */ sc->bge_ldata.bge_status_block->bge_status = 0; bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if ((sc->bge_flags & BGE_FLAG_TAGGED_STATUS) == 0) status_tag = 0; if ((status & BGE_STATFLAG_LINKSTATE_CHANGED) != 0) bge_link_upd(sc); /* Let controller work. */ bge_writembx(sc, BGE_MBX_IRQ0_LO, status_tag); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING && sc->bge_rx_saved_considx != rx_prod) { /* Check RX return ring producer/consumer. */ BGE_UNLOCK(sc); bge_rxeof(sc, rx_prod, 0); BGE_LOCK(sc); } if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { /* Check TX ring producer/consumer. */ bge_txeof(sc, tx_cons); if (!if_sendq_empty(ifp)) bge_start_locked(ifp); } BGE_UNLOCK(sc); } static void bge_intr(void *xsc) { struct bge_softc *sc; if_t ifp; uint32_t statusword; uint16_t rx_prod, tx_cons; sc = xsc; BGE_LOCK(sc); ifp = sc->bge_ifp; #ifdef DEVICE_POLLING if (if_getcapenable(ifp) & IFCAP_POLLING) { BGE_UNLOCK(sc); return; } #endif /* * Ack the interrupt by writing something to BGE_MBX_IRQ0_LO. Don't * disable interrupts by writing nonzero like we used to, since with * our current organization this just gives complications and * pessimizations for re-enabling interrupts. We used to have races * instead of the necessary complications. Disabling interrupts * would just reduce the chance of a status update while we are * running (by switching to the interrupt-mode coalescence * parameters), but this chance is already very low so it is more * efficient to get another interrupt than prevent it. * * We do the ack first to ensure another interrupt if there is a * status update after the ack. We don't check for the status * changing later because it is more efficient to get another * interrupt than prevent it, not quite as above (not checking is * a smaller optimization than not toggling the interrupt enable, * since checking doesn't involve PCI accesses and toggling require * the status check). So toggling would probably be a pessimization * even with MSI. It would only be needed for using a task queue. */ bge_writembx(sc, BGE_MBX_IRQ0_LO, 0); /* * Do the mandatory PCI flush as well as get the link status. */ statusword = CSR_READ_4(sc, BGE_MAC_STS) & BGE_MACSTAT_LINK_CHANGED; /* Make sure the descriptor ring indexes are coherent. */ bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); rx_prod = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; tx_cons = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; sc->bge_ldata.bge_status_block->bge_status = 0; bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if ((sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) || statusword || sc->bge_link_evt) bge_link_upd(sc); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { /* Check RX return ring producer/consumer. */ bge_rxeof(sc, rx_prod, 1); } if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { /* Check TX ring producer/consumer. */ bge_txeof(sc, tx_cons); } if (if_getdrvflags(ifp) & IFF_DRV_RUNNING && !if_sendq_empty(ifp)) bge_start_locked(ifp); BGE_UNLOCK(sc); } static void bge_asf_driver_up(struct bge_softc *sc) { if (sc->bge_asf_mode & ASF_STACKUP) { /* Send ASF heartbeat aprox. every 2s */ if (sc->bge_asf_count) sc->bge_asf_count --; else { sc->bge_asf_count = 2; bge_writemem_ind(sc, BGE_SRAM_FW_CMD_MB, BGE_FW_CMD_DRV_ALIVE); bge_writemem_ind(sc, BGE_SRAM_FW_CMD_LEN_MB, 4); bge_writemem_ind(sc, BGE_SRAM_FW_CMD_DATA_MB, BGE_FW_HB_TIMEOUT_SEC); CSR_WRITE_4(sc, BGE_RX_CPU_EVENT, CSR_READ_4(sc, BGE_RX_CPU_EVENT) | BGE_RX_CPU_DRV_EVENT); } } } static void bge_tick(void *xsc) { struct bge_softc *sc = xsc; struct mii_data *mii = NULL; BGE_LOCK_ASSERT(sc); /* Synchronize with possible callout reset/stop. */ if (callout_pending(&sc->bge_stat_ch) || !callout_active(&sc->bge_stat_ch)) return; if (BGE_IS_5705_PLUS(sc)) bge_stats_update_regs(sc); else bge_stats_update(sc); /* XXX Add APE heartbeat check here? */ if ((sc->bge_flags & BGE_FLAG_TBI) == 0) { mii = device_get_softc(sc->bge_miibus); /* * Do not touch PHY if we have link up. This could break * IPMI/ASF mode or produce extra input errors * (extra errors was reported for bcm5701 & bcm5704). */ if (!sc->bge_link) mii_tick(mii); } else { /* * Since in TBI mode auto-polling can't be used we should poll * link status manually. Here we register pending link event * and trigger interrupt. */ #ifdef DEVICE_POLLING /* In polling mode we poll link state in bge_poll(). */ if (!(if_getcapenable(sc->bge_ifp) & IFCAP_POLLING)) #endif { sc->bge_link_evt++; if (sc->bge_asicrev == BGE_ASICREV_BCM5700 || sc->bge_flags & BGE_FLAG_5788) BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_SET); else BGE_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW); } } bge_asf_driver_up(sc); bge_watchdog(sc); callout_reset(&sc->bge_stat_ch, hz, bge_tick, sc); } static void bge_stats_update_regs(struct bge_softc *sc) { if_t ifp; struct bge_mac_stats *stats; uint32_t val; ifp = sc->bge_ifp; stats = &sc->bge_mac_stats; stats->ifHCOutOctets += CSR_READ_4(sc, BGE_TX_MAC_STATS_OCTETS); stats->etherStatsCollisions += CSR_READ_4(sc, BGE_TX_MAC_STATS_COLLS); stats->outXonSent += CSR_READ_4(sc, BGE_TX_MAC_STATS_XON_SENT); stats->outXoffSent += CSR_READ_4(sc, BGE_TX_MAC_STATS_XOFF_SENT); stats->dot3StatsInternalMacTransmitErrors += CSR_READ_4(sc, BGE_TX_MAC_STATS_ERRORS); stats->dot3StatsSingleCollisionFrames += CSR_READ_4(sc, BGE_TX_MAC_STATS_SINGLE_COLL); stats->dot3StatsMultipleCollisionFrames += CSR_READ_4(sc, BGE_TX_MAC_STATS_MULTI_COLL); stats->dot3StatsDeferredTransmissions += CSR_READ_4(sc, BGE_TX_MAC_STATS_DEFERRED); stats->dot3StatsExcessiveCollisions += CSR_READ_4(sc, BGE_TX_MAC_STATS_EXCESS_COLL); stats->dot3StatsLateCollisions += CSR_READ_4(sc, BGE_TX_MAC_STATS_LATE_COLL); stats->ifHCOutUcastPkts += CSR_READ_4(sc, BGE_TX_MAC_STATS_UCAST); stats->ifHCOutMulticastPkts += CSR_READ_4(sc, BGE_TX_MAC_STATS_MCAST); stats->ifHCOutBroadcastPkts += CSR_READ_4(sc, BGE_TX_MAC_STATS_BCAST); stats->ifHCInOctets += CSR_READ_4(sc, BGE_RX_MAC_STATS_OCTESTS); stats->etherStatsFragments += CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAGMENTS); stats->ifHCInUcastPkts += CSR_READ_4(sc, BGE_RX_MAC_STATS_UCAST); stats->ifHCInMulticastPkts += CSR_READ_4(sc, BGE_RX_MAC_STATS_MCAST); stats->ifHCInBroadcastPkts += CSR_READ_4(sc, BGE_RX_MAC_STATS_BCAST); stats->dot3StatsFCSErrors += CSR_READ_4(sc, BGE_RX_MAC_STATS_FCS_ERRORS); stats->dot3StatsAlignmentErrors += CSR_READ_4(sc, BGE_RX_MAC_STATS_ALGIN_ERRORS); stats->xonPauseFramesReceived += CSR_READ_4(sc, BGE_RX_MAC_STATS_XON_RCVD); stats->xoffPauseFramesReceived += CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_RCVD); stats->macControlFramesReceived += CSR_READ_4(sc, BGE_RX_MAC_STATS_CTRL_RCVD); stats->xoffStateEntered += CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_ENTERED); stats->dot3StatsFramesTooLong += CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAME_TOO_LONG); stats->etherStatsJabbers += CSR_READ_4(sc, BGE_RX_MAC_STATS_JABBERS); stats->etherStatsUndersizePkts += CSR_READ_4(sc, BGE_RX_MAC_STATS_UNDERSIZE); stats->FramesDroppedDueToFilters += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_FILTDROP); stats->DmaWriteQueueFull += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_WRQ_FULL); stats->DmaWriteHighPriQueueFull += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_HPWRQ_FULL); stats->NoMoreRxBDs += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_OUT_OF_BDS); /* * XXX * Unlike other controllers, BGE_RXLP_LOCSTAT_IFIN_DROPS * counter of BCM5717, BCM5718, BCM5719 A0 and BCM5720 A0 * includes number of unwanted multicast frames. This comes * from silicon bug and known workaround to get rough(not * exact) counter is to enable interrupt on MBUF low water * attention. This can be accomplished by setting * BGE_HCCMODE_ATTN bit of BGE_HCC_MODE, * BGE_BMANMODE_LOMBUF_ATTN bit of BGE_BMAN_MODE and * BGE_MODECTL_FLOWCTL_ATTN_INTR bit of BGE_MODE_CTL. * However that change would generate more interrupts and * there are still possibilities of losing multiple frames * during BGE_MODECTL_FLOWCTL_ATTN_INTR interrupt handling. * Given that the workaround still would not get correct * counter I don't think it's worth to implement it. So * ignore reading the counter on controllers that have the * silicon bug. */ if (sc->bge_asicrev != BGE_ASICREV_BCM5717 && sc->bge_chipid != BGE_CHIPID_BCM5719_A0 && sc->bge_chipid != BGE_CHIPID_BCM5720_A0) stats->InputDiscards += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS); stats->InputErrors += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_ERRORS); stats->RecvThresholdHit += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_RXTHRESH_HIT); if (sc->bge_flags & BGE_FLAG_RDMA_BUG) { /* * If controller transmitted more than BGE_NUM_RDMA_CHANNELS * frames, it's safe to disable workaround for DMA engine's * miscalculation of TXMBUF space. */ if (stats->ifHCOutUcastPkts + stats->ifHCOutMulticastPkts + stats->ifHCOutBroadcastPkts > BGE_NUM_RDMA_CHANNELS) { val = CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL); if (sc->bge_asicrev == BGE_ASICREV_BCM5719) val &= ~BGE_RDMA_TX_LENGTH_WA_5719; else val &= ~BGE_RDMA_TX_LENGTH_WA_5720; CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL, val); sc->bge_flags &= ~BGE_FLAG_RDMA_BUG; } } } static void bge_stats_clear_regs(struct bge_softc *sc) { CSR_READ_4(sc, BGE_TX_MAC_STATS_OCTETS); CSR_READ_4(sc, BGE_TX_MAC_STATS_COLLS); CSR_READ_4(sc, BGE_TX_MAC_STATS_XON_SENT); CSR_READ_4(sc, BGE_TX_MAC_STATS_XOFF_SENT); CSR_READ_4(sc, BGE_TX_MAC_STATS_ERRORS); CSR_READ_4(sc, BGE_TX_MAC_STATS_SINGLE_COLL); CSR_READ_4(sc, BGE_TX_MAC_STATS_MULTI_COLL); CSR_READ_4(sc, BGE_TX_MAC_STATS_DEFERRED); CSR_READ_4(sc, BGE_TX_MAC_STATS_EXCESS_COLL); CSR_READ_4(sc, BGE_TX_MAC_STATS_LATE_COLL); CSR_READ_4(sc, BGE_TX_MAC_STATS_UCAST); CSR_READ_4(sc, BGE_TX_MAC_STATS_MCAST); CSR_READ_4(sc, BGE_TX_MAC_STATS_BCAST); CSR_READ_4(sc, BGE_RX_MAC_STATS_OCTESTS); CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAGMENTS); CSR_READ_4(sc, BGE_RX_MAC_STATS_UCAST); CSR_READ_4(sc, BGE_RX_MAC_STATS_MCAST); CSR_READ_4(sc, BGE_RX_MAC_STATS_BCAST); CSR_READ_4(sc, BGE_RX_MAC_STATS_FCS_ERRORS); CSR_READ_4(sc, BGE_RX_MAC_STATS_ALGIN_ERRORS); CSR_READ_4(sc, BGE_RX_MAC_STATS_XON_RCVD); CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_RCVD); CSR_READ_4(sc, BGE_RX_MAC_STATS_CTRL_RCVD); CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_ENTERED); CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAME_TOO_LONG); CSR_READ_4(sc, BGE_RX_MAC_STATS_JABBERS); CSR_READ_4(sc, BGE_RX_MAC_STATS_UNDERSIZE); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_FILTDROP); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_WRQ_FULL); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_HPWRQ_FULL); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_OUT_OF_BDS); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_ERRORS); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_RXTHRESH_HIT); } static void bge_stats_update(struct bge_softc *sc) { if_t ifp; bus_size_t stats; uint32_t cnt; /* current register value */ ifp = sc->bge_ifp; stats = BGE_MEMWIN_START + BGE_STATS_BLOCK; #define READ_STAT(sc, stats, stat) \ CSR_READ_4(sc, stats + offsetof(struct bge_stats, stat)) cnt = READ_STAT(sc, stats, txstats.etherStatsCollisions.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, cnt - sc->bge_tx_collisions); sc->bge_tx_collisions = cnt; cnt = READ_STAT(sc, stats, nicNoMoreRxBDs.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_IERRORS, cnt - sc->bge_rx_nobds); sc->bge_rx_nobds = cnt; cnt = READ_STAT(sc, stats, ifInErrors.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_IERRORS, cnt - sc->bge_rx_inerrs); sc->bge_rx_inerrs = cnt; cnt = READ_STAT(sc, stats, ifInDiscards.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_IERRORS, cnt - sc->bge_rx_discards); sc->bge_rx_discards = cnt; cnt = READ_STAT(sc, stats, txstats.ifOutDiscards.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_OERRORS, cnt - sc->bge_tx_discards); sc->bge_tx_discards = cnt; #undef READ_STAT } /* * Pad outbound frame to ETHER_MIN_NOPAD for an unusual reason. * The bge hardware will pad out Tx runts to ETHER_MIN_NOPAD, * but when such padded frames employ the bge IP/TCP checksum offload, * the hardware checksum assist gives incorrect results (possibly * from incorporating its own padding into the UDP/TCP checksum; who knows). * If we pad such runts with zeros, the onboard checksum comes out correct. */ static __inline int bge_cksum_pad(struct mbuf *m) { int padlen = ETHER_MIN_NOPAD - m->m_pkthdr.len; struct mbuf *last; /* If there's only the packet-header and we can pad there, use it. */ if (m->m_pkthdr.len == m->m_len && M_WRITABLE(m) && M_TRAILINGSPACE(m) >= padlen) { last = m; } else { /* * Walk packet chain to find last mbuf. We will either * pad there, or append a new mbuf and pad it. */ for (last = m; last->m_next != NULL; last = last->m_next); if (!(M_WRITABLE(last) && M_TRAILINGSPACE(last) >= padlen)) { /* Allocate new empty mbuf, pad it. Compact later. */ struct mbuf *n; MGET(n, M_NOWAIT, MT_DATA); if (n == NULL) return (ENOBUFS); n->m_len = 0; last->m_next = n; last = n; } } /* Now zero the pad area, to avoid the bge cksum-assist bug. */ memset(mtod(last, caddr_t) + last->m_len, 0, padlen); last->m_len += padlen; m->m_pkthdr.len += padlen; return (0); } static struct mbuf * bge_check_short_dma(struct mbuf *m) { struct mbuf *n; int found; /* * If device receive two back-to-back send BDs with less than * or equal to 8 total bytes then the device may hang. The two * back-to-back send BDs must in the same frame for this failure * to occur. Scan mbuf chains and see whether two back-to-back * send BDs are there. If this is the case, allocate new mbuf * and copy the frame to workaround the silicon bug. */ for (n = m, found = 0; n != NULL; n = n->m_next) { if (n->m_len < 8) { found++; if (found > 1) break; continue; } found = 0; } if (found > 1) { n = m_defrag(m, M_NOWAIT); if (n == NULL) m_freem(m); } else n = m; return (n); } static struct mbuf * bge_setup_tso(struct bge_softc *sc, struct mbuf *m, uint16_t *mss, uint16_t *flags) { struct ip *ip; struct tcphdr *tcp; struct mbuf *n; uint16_t hlen; uint32_t poff; if (M_WRITABLE(m) == 0) { /* Get a writable copy. */ n = m_dup(m, M_NOWAIT); m_freem(m); if (n == NULL) return (NULL); m = n; } m = m_pullup(m, sizeof(struct ether_header) + sizeof(struct ip)); if (m == NULL) return (NULL); ip = (struct ip *)(mtod(m, char *) + sizeof(struct ether_header)); poff = sizeof(struct ether_header) + (ip->ip_hl << 2); m = m_pullup(m, poff + sizeof(struct tcphdr)); if (m == NULL) return (NULL); tcp = (struct tcphdr *)(mtod(m, char *) + poff); m = m_pullup(m, poff + (tcp->th_off << 2)); if (m == NULL) return (NULL); /* * It seems controller doesn't modify IP length and TCP pseudo * checksum. These checksum computed by upper stack should be 0. */ *mss = m->m_pkthdr.tso_segsz; ip = (struct ip *)(mtod(m, char *) + sizeof(struct ether_header)); ip->ip_sum = 0; ip->ip_len = htons(*mss + (ip->ip_hl << 2) + (tcp->th_off << 2)); /* Clear pseudo checksum computed by TCP stack. */ tcp = (struct tcphdr *)(mtod(m, char *) + poff); tcp->th_sum = 0; /* * Broadcom controllers uses different descriptor format for * TSO depending on ASIC revision. Due to TSO-capable firmware * license issue and lower performance of firmware based TSO * we only support hardware based TSO. */ /* Calculate header length, incl. TCP/IP options, in 32 bit units. */ hlen = ((ip->ip_hl << 2) + (tcp->th_off << 2)) >> 2; if (sc->bge_flags & BGE_FLAG_TSO3) { /* * For BCM5717 and newer controllers, hardware based TSO * uses the 14 lower bits of the bge_mss field to store the * MSS and the upper 2 bits to store the lowest 2 bits of * the IP/TCP header length. The upper 6 bits of the header * length are stored in the bge_flags[14:10,4] field. Jumbo * frames are supported. */ *mss |= ((hlen & 0x3) << 14); *flags |= ((hlen & 0xF8) << 7) | ((hlen & 0x4) << 2); } else { /* * For BCM5755 and newer controllers, hardware based TSO uses * the lower 11 bits to store the MSS and the upper 5 bits to * store the IP/TCP header length. Jumbo frames are not * supported. */ *mss |= (hlen << 11); } return (m); } /* * Encapsulate an mbuf chain in the tx ring by coupling the mbuf data * pointers to descriptors. */ static int bge_encap(struct bge_softc *sc, struct mbuf **m_head, uint32_t *txidx) { bus_dma_segment_t segs[BGE_NSEG_NEW]; bus_dmamap_t map; struct bge_tx_bd *d; struct mbuf *m = *m_head; uint32_t idx = *txidx; uint16_t csum_flags, mss, vlan_tag; int nsegs, i, error; csum_flags = 0; mss = 0; vlan_tag = 0; if ((sc->bge_flags & BGE_FLAG_SHORT_DMA_BUG) != 0 && m->m_next != NULL) { *m_head = bge_check_short_dma(m); if (*m_head == NULL) return (ENOBUFS); m = *m_head; } if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { *m_head = m = bge_setup_tso(sc, m, &mss, &csum_flags); if (*m_head == NULL) return (ENOBUFS); csum_flags |= BGE_TXBDFLAG_CPU_PRE_DMA | BGE_TXBDFLAG_CPU_POST_DMA; } else if ((m->m_pkthdr.csum_flags & sc->bge_csum_features) != 0) { if (m->m_pkthdr.csum_flags & CSUM_IP) csum_flags |= BGE_TXBDFLAG_IP_CSUM; if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)) { csum_flags |= BGE_TXBDFLAG_TCP_UDP_CSUM; if (m->m_pkthdr.len < ETHER_MIN_NOPAD && (error = bge_cksum_pad(m)) != 0) { m_freem(m); *m_head = NULL; return (error); } } } if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { if (sc->bge_flags & BGE_FLAG_JUMBO_FRAME && m->m_pkthdr.len > ETHER_MAX_LEN) csum_flags |= BGE_TXBDFLAG_JUMBO_FRAME; if (sc->bge_forced_collapse > 0 && (sc->bge_flags & BGE_FLAG_PCIE) != 0 && m->m_next != NULL) { /* * Forcedly collapse mbuf chains to overcome hardware * limitation which only support a single outstanding * DMA read operation. */ if (sc->bge_forced_collapse == 1) m = m_defrag(m, M_NOWAIT); else m = m_collapse(m, M_NOWAIT, sc->bge_forced_collapse); if (m == NULL) m = *m_head; *m_head = m; } } map = sc->bge_cdata.bge_tx_dmamap[idx]; error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_tx_mtag, map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { m = m_collapse(m, M_NOWAIT, BGE_NSEG_NEW); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_tx_mtag, map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { m_freem(m); *m_head = NULL; return (error); } } else if (error != 0) return (error); /* Check if we have enough free send BDs. */ if (sc->bge_txcnt + nsegs >= BGE_TX_RING_CNT) { bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag, map); return (ENOBUFS); } bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, map, BUS_DMASYNC_PREWRITE); if (m->m_flags & M_VLANTAG) { csum_flags |= BGE_TXBDFLAG_VLAN_TAG; vlan_tag = m->m_pkthdr.ether_vtag; } if (sc->bge_asicrev == BGE_ASICREV_BCM5762 && (m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { /* * 5725 family of devices corrupts TSO packets when TSO DMA * buffers cross into regions which are within MSS bytes of * a 4GB boundary. If we encounter the condition, drop the * packet. */ for (i = 0; ; i++) { d = &sc->bge_ldata.bge_tx_ring[idx]; d->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[i].ds_addr); d->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[i].ds_addr); d->bge_len = segs[i].ds_len; if (d->bge_addr.bge_addr_lo + segs[i].ds_len + mss < d->bge_addr.bge_addr_lo) break; d->bge_flags = csum_flags; d->bge_vlan_tag = vlan_tag; d->bge_mss = mss; if (i == nsegs - 1) break; BGE_INC(idx, BGE_TX_RING_CNT); } if (i != nsegs - 1) { bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag, map); m_freem(*m_head); *m_head = NULL; return (EIO); } } else { for (i = 0; ; i++) { d = &sc->bge_ldata.bge_tx_ring[idx]; d->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[i].ds_addr); d->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[i].ds_addr); d->bge_len = segs[i].ds_len; d->bge_flags = csum_flags; d->bge_vlan_tag = vlan_tag; d->bge_mss = mss; if (i == nsegs - 1) break; BGE_INC(idx, BGE_TX_RING_CNT); } } /* Mark the last segment as end of packet... */ d->bge_flags |= BGE_TXBDFLAG_END; /* * Insure that the map for this transmission * is placed at the array index of the last descriptor * in this chain. */ sc->bge_cdata.bge_tx_dmamap[*txidx] = sc->bge_cdata.bge_tx_dmamap[idx]; sc->bge_cdata.bge_tx_dmamap[idx] = map; sc->bge_cdata.bge_tx_chain[idx] = m; sc->bge_txcnt += nsegs; BGE_INC(idx, BGE_TX_RING_CNT); *txidx = idx; return (0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit descriptors. */ static void bge_start_locked(if_t ifp) { struct bge_softc *sc; struct mbuf *m_head; uint32_t prodidx; int count; sc = if_getsoftc(ifp); BGE_LOCK_ASSERT(sc); if (!sc->bge_link || (if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; prodidx = sc->bge_tx_prodidx; for (count = 0; !if_sendq_empty(ifp);) { if (sc->bge_txcnt > BGE_TX_RING_CNT - 16) { if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); break; } m_head = if_dequeue(ifp); if (m_head == NULL) break; /* * Pack the data into the transmit ring. If we * don't have room, set the OACTIVE flag and wait * for the NIC to drain the ring. */ if (bge_encap(sc, &m_head, &prodidx)) { if (m_head == NULL) break; if_sendq_prepend(ifp, m_head); if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); break; } ++count; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ if_bpfmtap(ifp, m_head); } if (count > 0) bge_start_tx(sc, prodidx); } static void bge_start_tx(struct bge_softc *sc, uint32_t prodidx) { bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_PREWRITE); /* Transmit. */ bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx); /* 5700 b2 errata */ if (sc->bge_chiprev == BGE_CHIPREV_5700_BX) bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx); sc->bge_tx_prodidx = prodidx; /* Set a timeout in case the chip goes out to lunch. */ sc->bge_timer = BGE_TX_TIMEOUT; } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit descriptors. */ static void bge_start(if_t ifp) { struct bge_softc *sc; sc = if_getsoftc(ifp); BGE_LOCK(sc); bge_start_locked(ifp); BGE_UNLOCK(sc); } static void bge_init_locked(struct bge_softc *sc) { if_t ifp; uint16_t *m; uint32_t mode; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) return; /* Cancel pending I/O and flush buffers. */ bge_stop(sc); bge_stop_fw(sc); bge_sig_pre_reset(sc, BGE_RESET_START); bge_reset(sc); bge_sig_legacy(sc, BGE_RESET_START); bge_sig_post_reset(sc, BGE_RESET_START); bge_chipinit(sc); /* * Init the various state machines, ring * control blocks and firmware. */ if (bge_blockinit(sc)) { device_printf(sc->bge_dev, "initialization failure\n"); return; } ifp = sc->bge_ifp; /* Specify MTU. */ CSR_WRITE_4(sc, BGE_RX_MTU, if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + (if_getcapenable(ifp) & IFCAP_VLAN_MTU ? ETHER_VLAN_ENCAP_LEN : 0)); /* Load our MAC address. */ m = (uint16_t *)IF_LLADDR(sc->bge_ifp); CSR_WRITE_4(sc, BGE_MAC_ADDR1_LO, htons(m[0])); CSR_WRITE_4(sc, BGE_MAC_ADDR1_HI, (htons(m[1]) << 16) | htons(m[2])); /* Program promiscuous mode. */ bge_setpromisc(sc); /* Program multicast filter. */ bge_setmulti(sc); /* Program VLAN tag stripping. */ bge_setvlan(sc); /* Override UDP checksum offloading. */ if (sc->bge_forced_udpcsum == 0) sc->bge_csum_features &= ~CSUM_UDP; else sc->bge_csum_features |= CSUM_UDP; if (if_getcapabilities(ifp) & IFCAP_TXCSUM && if_getcapenable(ifp) & IFCAP_TXCSUM) { if_sethwassistbits(ifp, 0, (BGE_CSUM_FEATURES | CSUM_UDP)); if_sethwassistbits(ifp, sc->bge_csum_features, 0); } /* Init RX ring. */ if (bge_init_rx_ring_std(sc) != 0) { device_printf(sc->bge_dev, "no memory for std Rx buffers.\n"); bge_stop(sc); return; } /* * Workaround for a bug in 5705 ASIC rev A0. Poll the NIC's * memory to insure that the chip has in fact read the first * entry of the ring. */ if (sc->bge_chipid == BGE_CHIPID_BCM5705_A0) { uint32_t v, i; for (i = 0; i < 10; i++) { DELAY(20); v = bge_readmem_ind(sc, BGE_STD_RX_RINGS + 8); if (v == (MCLBYTES - ETHER_ALIGN)) break; } if (i == 10) device_printf (sc->bge_dev, "5705 A0 chip failed to load RX ring\n"); } /* Init jumbo RX ring. */ if (BGE_IS_JUMBO_CAPABLE(sc) && if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN)) { if (bge_init_rx_ring_jumbo(sc) != 0) { device_printf(sc->bge_dev, "no memory for jumbo Rx buffers.\n"); bge_stop(sc); return; } } /* Init our RX return ring index. */ sc->bge_rx_saved_considx = 0; /* Init our RX/TX stat counters. */ sc->bge_rx_discards = sc->bge_tx_discards = sc->bge_tx_collisions = 0; /* Init TX ring. */ bge_init_tx_ring(sc); /* Enable TX MAC state machine lockup fix. */ mode = CSR_READ_4(sc, BGE_TX_MODE); if (BGE_IS_5755_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5906) mode |= BGE_TXMODE_MBUF_LOCKUP_FIX; if (sc->bge_asicrev == BGE_ASICREV_BCM5720 || sc->bge_asicrev == BGE_ASICREV_BCM5762) { mode &= ~(BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE); mode |= CSR_READ_4(sc, BGE_TX_MODE) & (BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE); } /* Turn on transmitter. */ CSR_WRITE_4(sc, BGE_TX_MODE, mode | BGE_TXMODE_ENABLE); DELAY(100); /* Turn on receiver. */ mode = CSR_READ_4(sc, BGE_RX_MODE); if (BGE_IS_5755_PLUS(sc)) mode |= BGE_RXMODE_IPV6_ENABLE; if (sc->bge_asicrev == BGE_ASICREV_BCM5762) mode |= BGE_RXMODE_IPV4_FRAG_FIX; CSR_WRITE_4(sc,BGE_RX_MODE, mode | BGE_RXMODE_ENABLE); DELAY(10); /* * Set the number of good frames to receive after RX MBUF * Low Watermark has been reached. After the RX MAC receives * this number of frames, it will drop subsequent incoming * frames until the MBUF High Watermark is reached. */ if (BGE_IS_57765_PLUS(sc)) CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 1); else CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 2); /* Clear MAC statistics. */ if (BGE_IS_5705_PLUS(sc)) bge_stats_clear_regs(sc); /* Tell firmware we're alive. */ BGE_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); #ifdef DEVICE_POLLING /* Disable interrupts if we are polling. */ if (if_getcapenable(ifp) & IFCAP_POLLING) { BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 1); } else #endif /* Enable host interrupts. */ { BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_CLEAR_INTA); BGE_CLRBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 0); } if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); bge_ifmedia_upd_locked(ifp); callout_reset(&sc->bge_stat_ch, hz, bge_tick, sc); } static void bge_init(void *xsc) { struct bge_softc *sc = xsc; BGE_LOCK(sc); bge_init_locked(sc); BGE_UNLOCK(sc); } /* * Set media options. */ static int bge_ifmedia_upd(if_t ifp) { struct bge_softc *sc = if_getsoftc(ifp); int res; BGE_LOCK(sc); res = bge_ifmedia_upd_locked(ifp); BGE_UNLOCK(sc); return (res); } static int bge_ifmedia_upd_locked(if_t ifp) { struct bge_softc *sc = if_getsoftc(ifp); struct mii_data *mii; struct mii_softc *miisc; struct ifmedia *ifm; BGE_LOCK_ASSERT(sc); ifm = &sc->bge_ifmedia; /* If this is a 1000baseX NIC, enable the TBI port. */ if (sc->bge_flags & BGE_FLAG_TBI) { if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch(IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: /* * The BCM5704 ASIC appears to have a special * mechanism for programming the autoneg * advertisement registers in TBI mode. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5704) { uint32_t sgdig; sgdig = CSR_READ_4(sc, BGE_SGDIG_STS); if (sgdig & BGE_SGDIGSTS_DONE) { CSR_WRITE_4(sc, BGE_TX_TBI_AUTONEG, 0); sgdig = CSR_READ_4(sc, BGE_SGDIG_CFG); sgdig |= BGE_SGDIGCFG_AUTO | BGE_SGDIGCFG_PAUSE_CAP | BGE_SGDIGCFG_ASYM_PAUSE; CSR_WRITE_4(sc, BGE_SGDIG_CFG, sgdig | BGE_SGDIGCFG_SEND); DELAY(5); CSR_WRITE_4(sc, BGE_SGDIG_CFG, sgdig); } } break; case IFM_1000_SX: if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) { BGE_CLRBIT(sc, BGE_MAC_MODE, BGE_MACMODE_HALF_DUPLEX); } else { BGE_SETBIT(sc, BGE_MAC_MODE, BGE_MACMODE_HALF_DUPLEX); } DELAY(40); break; default: return (EINVAL); } return (0); } sc->bge_link_evt++; mii = device_get_softc(sc->bge_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); mii_mediachg(mii); /* * Force an interrupt so that we will call bge_link_upd * if needed and clear any pending link state attention. * Without this we are not getting any further interrupts * for link state changes and thus will not UP the link and * not be able to send in bge_start_locked. The only * way to get things working was to receive a packet and * get an RX intr. * bge_tick should help for fiber cards and we might not * need to do this here if BGE_FLAG_TBI is set but as * we poll for fiber anyway it should not harm. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 || sc->bge_flags & BGE_FLAG_5788) BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_SET); else BGE_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW); return (0); } /* * Report current media status. */ static void bge_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) { struct bge_softc *sc = if_getsoftc(ifp); struct mii_data *mii; BGE_LOCK(sc); if ((if_getflags(ifp) & IFF_UP) == 0) { BGE_UNLOCK(sc); return; } if (sc->bge_flags & BGE_FLAG_TBI) { ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (CSR_READ_4(sc, BGE_MAC_STS) & BGE_MACSTAT_TBI_PCS_SYNCHED) ifmr->ifm_status |= IFM_ACTIVE; else { ifmr->ifm_active |= IFM_NONE; BGE_UNLOCK(sc); return; } ifmr->ifm_active |= IFM_1000_SX; if (CSR_READ_4(sc, BGE_MAC_MODE) & BGE_MACMODE_HALF_DUPLEX) ifmr->ifm_active |= IFM_HDX; else ifmr->ifm_active |= IFM_FDX; BGE_UNLOCK(sc); return; } mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; BGE_UNLOCK(sc); } static int bge_ioctl(if_t ifp, u_long command, caddr_t data) { struct bge_softc *sc = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *) data; struct mii_data *mii; int flags, mask, error = 0; switch (command) { case SIOCSIFMTU: if (BGE_IS_JUMBO_CAPABLE(sc) || (sc->bge_flags & BGE_FLAG_JUMBO_STD)) { if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > BGE_JUMBO_MTU) { error = EINVAL; break; } } else if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > ETHERMTU) { error = EINVAL; break; } BGE_LOCK(sc); if (if_getmtu(ifp) != ifr->ifr_mtu) { if_setmtu(ifp, ifr->ifr_mtu); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); bge_init_locked(sc); } } BGE_UNLOCK(sc); break; case SIOCSIFFLAGS: BGE_LOCK(sc); if (if_getflags(ifp) & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. Similarly for ALLMULTI. */ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { flags = if_getflags(ifp) ^ sc->bge_if_flags; if (flags & IFF_PROMISC) bge_setpromisc(sc); if (flags & IFF_ALLMULTI) bge_setmulti(sc); } else bge_init_locked(sc); } else { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { bge_stop(sc); } } sc->bge_if_flags = if_getflags(ifp); BGE_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { BGE_LOCK(sc); bge_setmulti(sc); BGE_UNLOCK(sc); error = 0; } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: if (sc->bge_flags & BGE_FLAG_TBI) { error = ifmedia_ioctl(ifp, ifr, &sc->bge_ifmedia, command); } else { mii = device_get_softc(sc->bge_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); } break; case SIOCSIFCAP: mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(bge_poll, ifp); if (error) return (error); BGE_LOCK(sc); BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 1); if_setcapenablebit(ifp, IFCAP_POLLING, 0); BGE_UNLOCK(sc); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ BGE_LOCK(sc); BGE_CLRBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 0); if_setcapenablebit(ifp, 0, IFCAP_POLLING); BGE_UNLOCK(sc); } } #endif if ((mask & IFCAP_TXCSUM) != 0 && (if_getcapabilities(ifp) & IFCAP_TXCSUM) != 0) { if_togglecapenable(ifp, IFCAP_TXCSUM); if ((if_getcapenable(ifp) & IFCAP_TXCSUM) != 0) if_sethwassistbits(ifp, sc->bge_csum_features, 0); else if_sethwassistbits(ifp, 0, sc->bge_csum_features); } if ((mask & IFCAP_RXCSUM) != 0 && (if_getcapabilities(ifp) & IFCAP_RXCSUM) != 0) if_togglecapenable(ifp, IFCAP_RXCSUM); if ((mask & IFCAP_TSO4) != 0 && (if_getcapabilities(ifp) & IFCAP_TSO4) != 0) { if_togglecapenable(ifp, IFCAP_TSO4); if ((if_getcapenable(ifp) & IFCAP_TSO4) != 0) if_sethwassistbits(ifp, CSUM_TSO, 0); else if_sethwassistbits(ifp, 0, CSUM_TSO); } if (mask & IFCAP_VLAN_MTU) { if_togglecapenable(ifp, IFCAP_VLAN_MTU); if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); bge_init(sc); } if ((mask & IFCAP_VLAN_HWTSO) != 0 && (if_getcapabilities(ifp) & IFCAP_VLAN_HWTSO) != 0) if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (if_getcapabilities(ifp) & IFCAP_VLAN_HWTAGGING) != 0) { if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING); if ((if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) == 0) if_setcapenablebit(ifp, 0, IFCAP_VLAN_HWTSO); BGE_LOCK(sc); bge_setvlan(sc); BGE_UNLOCK(sc); } #ifdef VLAN_CAPABILITIES if_vlancap(ifp); #endif break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static void bge_watchdog(struct bge_softc *sc) { if_t ifp; uint32_t status; BGE_LOCK_ASSERT(sc); if (sc->bge_timer == 0 || --sc->bge_timer) return; /* If pause frames are active then don't reset the hardware. */ if ((CSR_READ_4(sc, BGE_RX_MODE) & BGE_RXMODE_FLOWCTL_ENABLE) != 0) { status = CSR_READ_4(sc, BGE_RX_STS); if ((status & BGE_RXSTAT_REMOTE_XOFFED) != 0) { /* * If link partner has us in XOFF state then wait for * the condition to clear. */ CSR_WRITE_4(sc, BGE_RX_STS, status); sc->bge_timer = BGE_TX_TIMEOUT; return; } else if ((status & BGE_RXSTAT_RCVD_XOFF) != 0 && (status & BGE_RXSTAT_RCVD_XON) != 0) { /* * If link partner has us in XOFF state then wait for * the condition to clear. */ CSR_WRITE_4(sc, BGE_RX_STS, status); sc->bge_timer = BGE_TX_TIMEOUT; return; } /* * Any other condition is unexpected and the controller * should be reset. */ } ifp = sc->bge_ifp; if_printf(ifp, "watchdog timeout -- resetting\n"); if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); bge_init_locked(sc); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } static void bge_stop_block(struct bge_softc *sc, bus_size_t reg, uint32_t bit) { int i; BGE_CLRBIT(sc, reg, bit); for (i = 0; i < BGE_TIMEOUT; i++) { if ((CSR_READ_4(sc, reg) & bit) == 0) return; DELAY(100); } } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void bge_stop(struct bge_softc *sc) { if_t ifp; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; callout_stop(&sc->bge_stat_ch); /* Disable host interrupts. */ BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 1); /* * Tell firmware we're shutting down. */ bge_stop_fw(sc); bge_sig_pre_reset(sc, BGE_RESET_SHUTDOWN); /* * Disable all of the receiver blocks. */ bge_stop_block(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE); bge_stop_block(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE); bge_stop_block(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE); if (BGE_IS_5700_FAMILY(sc)) bge_stop_block(sc, BGE_RXLS_MODE, BGE_RXLSMODE_ENABLE); bge_stop_block(sc, BGE_RDBDI_MODE, BGE_RBDIMODE_ENABLE); bge_stop_block(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE); bge_stop_block(sc, BGE_RBDC_MODE, BGE_RBDCMODE_ENABLE); /* * Disable all of the transmit blocks. */ bge_stop_block(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE); bge_stop_block(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE); bge_stop_block(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE); bge_stop_block(sc, BGE_RDMA_MODE, BGE_RDMAMODE_ENABLE); bge_stop_block(sc, BGE_SDC_MODE, BGE_SDCMODE_ENABLE); if (BGE_IS_5700_FAMILY(sc)) bge_stop_block(sc, BGE_DMAC_MODE, BGE_DMACMODE_ENABLE); bge_stop_block(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE); /* * Shut down all of the memory managers and related * state machines. */ bge_stop_block(sc, BGE_HCC_MODE, BGE_HCCMODE_ENABLE); bge_stop_block(sc, BGE_WDMA_MODE, BGE_WDMAMODE_ENABLE); if (BGE_IS_5700_FAMILY(sc)) bge_stop_block(sc, BGE_MBCF_MODE, BGE_MBCFMODE_ENABLE); CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF); CSR_WRITE_4(sc, BGE_FTQ_RESET, 0); if (!(BGE_IS_5705_PLUS(sc))) { BGE_CLRBIT(sc, BGE_BMAN_MODE, BGE_BMANMODE_ENABLE); BGE_CLRBIT(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE); } /* Update MAC statistics. */ if (BGE_IS_5705_PLUS(sc)) bge_stats_update_regs(sc); bge_reset(sc); bge_sig_legacy(sc, BGE_RESET_SHUTDOWN); bge_sig_post_reset(sc, BGE_RESET_SHUTDOWN); /* * Keep the ASF firmware running if up. */ if (sc->bge_asf_mode & ASF_STACKUP) BGE_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); else BGE_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); /* Free the RX lists. */ bge_free_rx_ring_std(sc); /* Free jumbo RX list. */ if (BGE_IS_JUMBO_CAPABLE(sc)) bge_free_rx_ring_jumbo(sc); /* Free TX buffers. */ bge_free_tx_ring(sc); sc->bge_tx_saved_considx = BGE_TXCONS_UNSET; /* Clear MAC's link state (PHY may still have link UP). */ if (bootverbose && sc->bge_link) if_printf(sc->bge_ifp, "link DOWN\n"); sc->bge_link = 0; if_setdrvflagbits(ifp, 0, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int bge_shutdown(device_t dev) { struct bge_softc *sc; sc = device_get_softc(dev); BGE_LOCK(sc); bge_stop(sc); BGE_UNLOCK(sc); return (0); } static int bge_suspend(device_t dev) { struct bge_softc *sc; sc = device_get_softc(dev); BGE_LOCK(sc); bge_stop(sc); BGE_UNLOCK(sc); return (0); } static int bge_resume(device_t dev) { struct bge_softc *sc; if_t ifp; sc = device_get_softc(dev); BGE_LOCK(sc); ifp = sc->bge_ifp; if (if_getflags(ifp) & IFF_UP) { bge_init_locked(sc); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) bge_start_locked(ifp); } BGE_UNLOCK(sc); return (0); } static void bge_link_upd(struct bge_softc *sc) { struct mii_data *mii; uint32_t link, status; BGE_LOCK_ASSERT(sc); /* Clear 'pending link event' flag. */ sc->bge_link_evt = 0; /* * Process link state changes. * Grrr. The link status word in the status block does * not work correctly on the BCM5700 rev AX and BX chips, * according to all available information. Hence, we have * to enable MII interrupts in order to properly obtain * async link changes. Unfortunately, this also means that * we have to read the MAC status register to detect link * changes, thereby adding an additional register access to * the interrupt handler. * * XXX: perhaps link state detection procedure used for * BGE_CHIPID_BCM5700_B2 can be used for others BCM5700 revisions. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) { status = CSR_READ_4(sc, BGE_MAC_STS); if (status & BGE_MACSTAT_MI_INTERRUPT) { mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); if (!sc->bge_link && mii->mii_media_status & IFM_ACTIVE && IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { sc->bge_link++; if (bootverbose) if_printf(sc->bge_ifp, "link UP\n"); } else if (sc->bge_link && (!(mii->mii_media_status & IFM_ACTIVE) || IFM_SUBTYPE(mii->mii_media_active) == IFM_NONE)) { sc->bge_link = 0; if (bootverbose) if_printf(sc->bge_ifp, "link DOWN\n"); } /* Clear the interrupt. */ CSR_WRITE_4(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_MI_INTERRUPT); bge_miibus_readreg(sc->bge_dev, sc->bge_phy_addr, BRGPHY_MII_ISR); bge_miibus_writereg(sc->bge_dev, sc->bge_phy_addr, BRGPHY_MII_IMR, BRGPHY_INTRS); } return; } if (sc->bge_flags & BGE_FLAG_TBI) { status = CSR_READ_4(sc, BGE_MAC_STS); if (status & BGE_MACSTAT_TBI_PCS_SYNCHED) { if (!sc->bge_link) { sc->bge_link++; if (sc->bge_asicrev == BGE_ASICREV_BCM5704) { BGE_CLRBIT(sc, BGE_MAC_MODE, BGE_MACMODE_TBI_SEND_CFGS); DELAY(40); } CSR_WRITE_4(sc, BGE_MAC_STS, 0xFFFFFFFF); if (bootverbose) if_printf(sc->bge_ifp, "link UP\n"); if_link_state_change(sc->bge_ifp, LINK_STATE_UP); } } else if (sc->bge_link) { sc->bge_link = 0; if (bootverbose) if_printf(sc->bge_ifp, "link DOWN\n"); if_link_state_change(sc->bge_ifp, LINK_STATE_DOWN); } } else if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { /* * Some broken BCM chips have BGE_STATFLAG_LINKSTATE_CHANGED bit * in status word always set. Workaround this bug by reading * PHY link status directly. */ link = (CSR_READ_4(sc, BGE_MI_STS) & BGE_MISTS_LINK) ? 1 : 0; if (link != sc->bge_link || sc->bge_asicrev == BGE_ASICREV_BCM5700) { mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); if (!sc->bge_link && mii->mii_media_status & IFM_ACTIVE && IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { sc->bge_link++; if (bootverbose) if_printf(sc->bge_ifp, "link UP\n"); } else if (sc->bge_link && (!(mii->mii_media_status & IFM_ACTIVE) || IFM_SUBTYPE(mii->mii_media_active) == IFM_NONE)) { sc->bge_link = 0; if (bootverbose) if_printf(sc->bge_ifp, "link DOWN\n"); } } } else { /* * For controllers that call mii_tick, we have to poll * link status. */ mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); bge_miibus_statchg(sc->bge_dev); } /* Disable MAC attention when link is up. */ CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED | BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE | BGE_MACSTAT_LINK_CHANGED); } static void bge_add_sysctls(struct bge_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; int unit; ctx = device_get_sysctl_ctx(sc->bge_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->bge_dev)); #ifdef BGE_REGISTER_DEBUG SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "debug_info", CTLTYPE_INT | CTLFLAG_RW, sc, 0, bge_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "reg_read", CTLTYPE_INT | CTLFLAG_RW, sc, 0, bge_sysctl_reg_read, "I", "MAC Register Read"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ape_read", CTLTYPE_INT | CTLFLAG_RW, sc, 0, bge_sysctl_ape_read, "I", "APE Register Read"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mem_read", CTLTYPE_INT | CTLFLAG_RW, sc, 0, bge_sysctl_mem_read, "I", "Memory Read"); #endif unit = device_get_unit(sc->bge_dev); /* * A common design characteristic for many Broadcom client controllers * is that they only support a single outstanding DMA read operation * on the PCIe bus. This means that it will take twice as long to fetch * a TX frame that is split into header and payload buffers as it does * to fetch a single, contiguous TX frame (2 reads vs. 1 read). For * these controllers, coalescing buffers to reduce the number of memory * reads is effective way to get maximum performance(about 940Mbps). * Without collapsing TX buffers the maximum TCP bulk transfer * performance is about 850Mbps. However forcing coalescing mbufs * consumes a lot of CPU cycles, so leave it off by default. */ sc->bge_forced_collapse = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "forced_collapse", CTLFLAG_RWTUN, &sc->bge_forced_collapse, 0, "Number of fragmented TX buffers of a frame allowed before " "forced collapsing"); sc->bge_msi = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "msi", CTLFLAG_RDTUN, &sc->bge_msi, 0, "Enable MSI"); /* * It seems all Broadcom controllers have a bug that can generate UDP * datagrams with checksum value 0 when TX UDP checksum offloading is * enabled. Generating UDP checksum value 0 is RFC 768 violation. * Even though the probability of generating such UDP datagrams is * low, I don't want to see FreeBSD boxes to inject such datagrams * into network so disable UDP checksum offloading by default. Users * still override this behavior by setting a sysctl variable, * dev.bge.0.forced_udpcsum. */ sc->bge_forced_udpcsum = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "forced_udpcsum", CTLFLAG_RWTUN, &sc->bge_forced_udpcsum, 0, "Enable UDP checksum offloading even if controller can " "generate UDP checksum value 0"); if (BGE_IS_5705_PLUS(sc)) bge_add_sysctl_stats_regs(sc, ctx, children); else bge_add_sysctl_stats(sc, ctx, children); } #define BGE_SYSCTL_STAT(sc, ctx, desc, parent, node, oid) \ SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, oid, CTLTYPE_UINT|CTLFLAG_RD, \ sc, offsetof(struct bge_stats, node), bge_sysctl_stats, "IU", \ desc) static void bge_add_sysctl_stats(struct bge_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent) { struct sysctl_oid *tree; struct sysctl_oid_list *children, *schildren; tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "stats", CTLFLAG_RD, NULL, "BGE Statistics"); schildren = children = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT(sc, ctx, "Frames Dropped Due To Filters", children, COSFramesDroppedDueToFilters, "FramesDroppedDueToFilters"); BGE_SYSCTL_STAT(sc, ctx, "NIC DMA Write Queue Full", children, nicDmaWriteQueueFull, "DmaWriteQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC DMA Write High Priority Queue Full", children, nicDmaWriteHighPriQueueFull, "DmaWriteHighPriQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC No More RX Buffer Descriptors", children, nicNoMoreRxBDs, "NoMoreRxBDs"); BGE_SYSCTL_STAT(sc, ctx, "Discarded Input Frames", children, ifInDiscards, "InputDiscards"); BGE_SYSCTL_STAT(sc, ctx, "Input Errors", children, ifInErrors, "InputErrors"); BGE_SYSCTL_STAT(sc, ctx, "NIC Recv Threshold Hit", children, nicRecvThresholdHit, "RecvThresholdHit"); BGE_SYSCTL_STAT(sc, ctx, "NIC DMA Read Queue Full", children, nicDmaReadQueueFull, "DmaReadQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC DMA Read High Priority Queue Full", children, nicDmaReadHighPriQueueFull, "DmaReadHighPriQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC Send Data Complete Queue Full", children, nicSendDataCompQueueFull, "SendDataCompQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC Ring Set Send Producer Index", children, nicRingSetSendProdIndex, "RingSetSendProdIndex"); BGE_SYSCTL_STAT(sc, ctx, "NIC Ring Status Update", children, nicRingStatusUpdate, "RingStatusUpdate"); BGE_SYSCTL_STAT(sc, ctx, "NIC Interrupts", children, nicInterrupts, "Interrupts"); BGE_SYSCTL_STAT(sc, ctx, "NIC Avoided Interrupts", children, nicAvoidedInterrupts, "AvoidedInterrupts"); BGE_SYSCTL_STAT(sc, ctx, "NIC Send Threshold Hit", children, nicSendThresholdHit, "SendThresholdHit"); tree = SYSCTL_ADD_NODE(ctx, schildren, OID_AUTO, "rx", CTLFLAG_RD, NULL, "BGE RX Statistics"); children = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT(sc, ctx, "Inbound Octets", children, rxstats.ifHCInOctets, "ifHCInOctets"); BGE_SYSCTL_STAT(sc, ctx, "Fragments", children, rxstats.etherStatsFragments, "Fragments"); BGE_SYSCTL_STAT(sc, ctx, "Inbound Unicast Packets", children, rxstats.ifHCInUcastPkts, "UnicastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Inbound Multicast Packets", children, rxstats.ifHCInMulticastPkts, "MulticastPkts"); BGE_SYSCTL_STAT(sc, ctx, "FCS Errors", children, rxstats.dot3StatsFCSErrors, "FCSErrors"); BGE_SYSCTL_STAT(sc, ctx, "Alignment Errors", children, rxstats.dot3StatsAlignmentErrors, "AlignmentErrors"); BGE_SYSCTL_STAT(sc, ctx, "XON Pause Frames Received", children, rxstats.xonPauseFramesReceived, "xonPauseFramesReceived"); BGE_SYSCTL_STAT(sc, ctx, "XOFF Pause Frames Received", children, rxstats.xoffPauseFramesReceived, "xoffPauseFramesReceived"); BGE_SYSCTL_STAT(sc, ctx, "MAC Control Frames Received", children, rxstats.macControlFramesReceived, "ControlFramesReceived"); BGE_SYSCTL_STAT(sc, ctx, "XOFF State Entered", children, rxstats.xoffStateEntered, "xoffStateEntered"); BGE_SYSCTL_STAT(sc, ctx, "Frames Too Long", children, rxstats.dot3StatsFramesTooLong, "FramesTooLong"); BGE_SYSCTL_STAT(sc, ctx, "Jabbers", children, rxstats.etherStatsJabbers, "Jabbers"); BGE_SYSCTL_STAT(sc, ctx, "Undersized Packets", children, rxstats.etherStatsUndersizePkts, "UndersizePkts"); BGE_SYSCTL_STAT(sc, ctx, "Inbound Range Length Errors", children, rxstats.inRangeLengthError, "inRangeLengthError"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Range Length Errors", children, rxstats.outRangeLengthError, "outRangeLengthError"); tree = SYSCTL_ADD_NODE(ctx, schildren, OID_AUTO, "tx", CTLFLAG_RD, NULL, "BGE TX Statistics"); children = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT(sc, ctx, "Outbound Octets", children, txstats.ifHCOutOctets, "ifHCOutOctets"); BGE_SYSCTL_STAT(sc, ctx, "TX Collisions", children, txstats.etherStatsCollisions, "Collisions"); BGE_SYSCTL_STAT(sc, ctx, "XON Sent", children, txstats.outXonSent, "XonSent"); BGE_SYSCTL_STAT(sc, ctx, "XOFF Sent", children, txstats.outXoffSent, "XoffSent"); BGE_SYSCTL_STAT(sc, ctx, "Flow Control Done", children, txstats.flowControlDone, "flowControlDone"); BGE_SYSCTL_STAT(sc, ctx, "Internal MAC TX errors", children, txstats.dot3StatsInternalMacTransmitErrors, "InternalMacTransmitErrors"); BGE_SYSCTL_STAT(sc, ctx, "Single Collision Frames", children, txstats.dot3StatsSingleCollisionFrames, "SingleCollisionFrames"); BGE_SYSCTL_STAT(sc, ctx, "Multiple Collision Frames", children, txstats.dot3StatsMultipleCollisionFrames, "MultipleCollisionFrames"); BGE_SYSCTL_STAT(sc, ctx, "Deferred Transmissions", children, txstats.dot3StatsDeferredTransmissions, "DeferredTransmissions"); BGE_SYSCTL_STAT(sc, ctx, "Excessive Collisions", children, txstats.dot3StatsExcessiveCollisions, "ExcessiveCollisions"); BGE_SYSCTL_STAT(sc, ctx, "Late Collisions", children, txstats.dot3StatsLateCollisions, "LateCollisions"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Unicast Packets", children, txstats.ifHCOutUcastPkts, "UnicastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Multicast Packets", children, txstats.ifHCOutMulticastPkts, "MulticastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Broadcast Packets", children, txstats.ifHCOutBroadcastPkts, "BroadcastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Carrier Sense Errors", children, txstats.dot3StatsCarrierSenseErrors, "CarrierSenseErrors"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Discards", children, txstats.ifOutDiscards, "Discards"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Errors", children, txstats.ifOutErrors, "Errors"); } #undef BGE_SYSCTL_STAT #define BGE_SYSCTL_STAT_ADD64(c, h, n, p, d) \ SYSCTL_ADD_UQUAD(c, h, OID_AUTO, n, CTLFLAG_RD, p, d) static void bge_add_sysctl_stats_regs(struct bge_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent) { struct sysctl_oid *tree; struct sysctl_oid_list *child, *schild; struct bge_mac_stats *stats; stats = &sc->bge_mac_stats; tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "stats", CTLFLAG_RD, NULL, "BGE Statistics"); schild = child = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT_ADD64(ctx, child, "FramesDroppedDueToFilters", &stats->FramesDroppedDueToFilters, "Frames Dropped Due to Filters"); BGE_SYSCTL_STAT_ADD64(ctx, child, "DmaWriteQueueFull", &stats->DmaWriteQueueFull, "NIC DMA Write Queue Full"); BGE_SYSCTL_STAT_ADD64(ctx, child, "DmaWriteHighPriQueueFull", &stats->DmaWriteHighPriQueueFull, "NIC DMA Write High Priority Queue Full"); BGE_SYSCTL_STAT_ADD64(ctx, child, "NoMoreRxBDs", &stats->NoMoreRxBDs, "NIC No More RX Buffer Descriptors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "InputDiscards", &stats->InputDiscards, "Discarded Input Frames"); BGE_SYSCTL_STAT_ADD64(ctx, child, "InputErrors", &stats->InputErrors, "Input Errors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "RecvThresholdHit", &stats->RecvThresholdHit, "NIC Recv Threshold Hit"); tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "rx", CTLFLAG_RD, NULL, "BGE RX Statistics"); child = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT_ADD64(ctx, child, "ifHCInOctets", &stats->ifHCInOctets, "Inbound Octets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "Fragments", &stats->etherStatsFragments, "Fragments"); BGE_SYSCTL_STAT_ADD64(ctx, child, "UnicastPkts", &stats->ifHCInUcastPkts, "Inbound Unicast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "MulticastPkts", &stats->ifHCInMulticastPkts, "Inbound Multicast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "BroadcastPkts", &stats->ifHCInBroadcastPkts, "Inbound Broadcast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "FCSErrors", &stats->dot3StatsFCSErrors, "FCS Errors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "AlignmentErrors", &stats->dot3StatsAlignmentErrors, "Alignment Errors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "xonPauseFramesReceived", &stats->xonPauseFramesReceived, "XON Pause Frames Received"); BGE_SYSCTL_STAT_ADD64(ctx, child, "xoffPauseFramesReceived", &stats->xoffPauseFramesReceived, "XOFF Pause Frames Received"); BGE_SYSCTL_STAT_ADD64(ctx, child, "ControlFramesReceived", &stats->macControlFramesReceived, "MAC Control Frames Received"); BGE_SYSCTL_STAT_ADD64(ctx, child, "xoffStateEntered", &stats->xoffStateEntered, "XOFF State Entered"); BGE_SYSCTL_STAT_ADD64(ctx, child, "FramesTooLong", &stats->dot3StatsFramesTooLong, "Frames Too Long"); BGE_SYSCTL_STAT_ADD64(ctx, child, "Jabbers", &stats->etherStatsJabbers, "Jabbers"); BGE_SYSCTL_STAT_ADD64(ctx, child, "UndersizePkts", &stats->etherStatsUndersizePkts, "Undersized Packets"); tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "tx", CTLFLAG_RD, NULL, "BGE TX Statistics"); child = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT_ADD64(ctx, child, "ifHCOutOctets", &stats->ifHCOutOctets, "Outbound Octets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "Collisions", &stats->etherStatsCollisions, "TX Collisions"); BGE_SYSCTL_STAT_ADD64(ctx, child, "XonSent", &stats->outXonSent, "XON Sent"); BGE_SYSCTL_STAT_ADD64(ctx, child, "XoffSent", &stats->outXoffSent, "XOFF Sent"); BGE_SYSCTL_STAT_ADD64(ctx, child, "InternalMacTransmitErrors", &stats->dot3StatsInternalMacTransmitErrors, "Internal MAC TX Errors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "SingleCollisionFrames", &stats->dot3StatsSingleCollisionFrames, "Single Collision Frames"); BGE_SYSCTL_STAT_ADD64(ctx, child, "MultipleCollisionFrames", &stats->dot3StatsMultipleCollisionFrames, "Multiple Collision Frames"); BGE_SYSCTL_STAT_ADD64(ctx, child, "DeferredTransmissions", &stats->dot3StatsDeferredTransmissions, "Deferred Transmissions"); BGE_SYSCTL_STAT_ADD64(ctx, child, "ExcessiveCollisions", &stats->dot3StatsExcessiveCollisions, "Excessive Collisions"); BGE_SYSCTL_STAT_ADD64(ctx, child, "LateCollisions", &stats->dot3StatsLateCollisions, "Late Collisions"); BGE_SYSCTL_STAT_ADD64(ctx, child, "UnicastPkts", &stats->ifHCOutUcastPkts, "Outbound Unicast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "MulticastPkts", &stats->ifHCOutMulticastPkts, "Outbound Multicast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "BroadcastPkts", &stats->ifHCOutBroadcastPkts, "Outbound Broadcast Packets"); } #undef BGE_SYSCTL_STAT_ADD64 static int bge_sysctl_stats(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; uint32_t result; int offset; sc = (struct bge_softc *)arg1; offset = arg2; result = CSR_READ_4(sc, BGE_MEMWIN_START + BGE_STATS_BLOCK + offset + offsetof(bge_hostaddr, bge_addr_lo)); return (sysctl_handle_int(oidp, &result, 0, req)); } #ifdef BGE_REGISTER_DEBUG static int bge_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; uint16_t *sbdata; int error, result, sbsz; int i, j; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || (req->newptr == NULL)) return (error); if (result == 1) { sc = (struct bge_softc *)arg1; if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_C0) sbsz = BGE_STATUS_BLK_SZ; else sbsz = 32; sbdata = (uint16_t *)sc->bge_ldata.bge_status_block; printf("Status Block:\n"); BGE_LOCK(sc); bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (i = 0x0; i < sbsz / sizeof(uint16_t); ) { printf("%06x:", i); for (j = 0; j < 8; j++) printf(" %04x", sbdata[i++]); printf("\n"); } printf("Registers:\n"); for (i = 0x800; i < 0xA00; ) { printf("%06x:", i); for (j = 0; j < 8; j++) { printf(" %08x", CSR_READ_4(sc, i)); i += 4; } printf("\n"); } BGE_UNLOCK(sc); printf("Hardware Flags:\n"); if (BGE_IS_5717_PLUS(sc)) printf(" - 5717 Plus\n"); if (BGE_IS_5755_PLUS(sc)) printf(" - 5755 Plus\n"); if (BGE_IS_575X_PLUS(sc)) printf(" - 575X Plus\n"); if (BGE_IS_5705_PLUS(sc)) printf(" - 5705 Plus\n"); if (BGE_IS_5714_FAMILY(sc)) printf(" - 5714 Family\n"); if (BGE_IS_5700_FAMILY(sc)) printf(" - 5700 Family\n"); if (sc->bge_flags & BGE_FLAG_JUMBO) printf(" - Supports Jumbo Frames\n"); if (sc->bge_flags & BGE_FLAG_PCIX) printf(" - PCI-X Bus\n"); if (sc->bge_flags & BGE_FLAG_PCIE) printf(" - PCI Express Bus\n"); if (sc->bge_phy_flags & BGE_PHY_NO_3LED) printf(" - No 3 LEDs\n"); if (sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) printf(" - RX Alignment Bug\n"); } return (error); } static int bge_sysctl_reg_read(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; int error; uint16_t result; uint32_t val; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || (req->newptr == NULL)) return (error); if (result < 0x8000) { sc = (struct bge_softc *)arg1; val = CSR_READ_4(sc, result); printf("reg 0x%06X = 0x%08X\n", result, val); } return (error); } static int bge_sysctl_ape_read(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; int error; uint16_t result; uint32_t val; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || (req->newptr == NULL)) return (error); if (result < 0x8000) { sc = (struct bge_softc *)arg1; val = APE_READ_4(sc, result); printf("reg 0x%06X = 0x%08X\n", result, val); } return (error); } static int bge_sysctl_mem_read(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; int error; uint16_t result; uint32_t val; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || (req->newptr == NULL)) return (error); if (result < 0x8000) { sc = (struct bge_softc *)arg1; val = bge_readmem_ind(sc, result); printf("mem 0x%06X = 0x%08X\n", result, val); } return (error); } #endif static int bge_get_eaddr_fw(struct bge_softc *sc, uint8_t ether_addr[]) { if (sc->bge_flags & BGE_FLAG_EADDR) return (1); #ifdef __sparc64__ OF_getetheraddr(sc->bge_dev, ether_addr); return (0); #endif return (1); } static int bge_get_eaddr_mem(struct bge_softc *sc, uint8_t ether_addr[]) { uint32_t mac_addr; mac_addr = bge_readmem_ind(sc, BGE_SRAM_MAC_ADDR_HIGH_MB); if ((mac_addr >> 16) == 0x484b) { ether_addr[0] = (uint8_t)(mac_addr >> 8); ether_addr[1] = (uint8_t)mac_addr; mac_addr = bge_readmem_ind(sc, BGE_SRAM_MAC_ADDR_LOW_MB); ether_addr[2] = (uint8_t)(mac_addr >> 24); ether_addr[3] = (uint8_t)(mac_addr >> 16); ether_addr[4] = (uint8_t)(mac_addr >> 8); ether_addr[5] = (uint8_t)mac_addr; return (0); } return (1); } static int bge_get_eaddr_nvram(struct bge_softc *sc, uint8_t ether_addr[]) { int mac_offset = BGE_EE_MAC_OFFSET; if (sc->bge_asicrev == BGE_ASICREV_BCM5906) mac_offset = BGE_EE_MAC_OFFSET_5906; return (bge_read_nvram(sc, ether_addr, mac_offset + 2, ETHER_ADDR_LEN)); } static int bge_get_eaddr_eeprom(struct bge_softc *sc, uint8_t ether_addr[]) { if (sc->bge_asicrev == BGE_ASICREV_BCM5906) return (1); return (bge_read_eeprom(sc, ether_addr, BGE_EE_MAC_OFFSET + 2, ETHER_ADDR_LEN)); } static int bge_get_eaddr(struct bge_softc *sc, uint8_t eaddr[]) { static const bge_eaddr_fcn_t bge_eaddr_funcs[] = { /* NOTE: Order is critical */ bge_get_eaddr_fw, bge_get_eaddr_mem, bge_get_eaddr_nvram, bge_get_eaddr_eeprom, NULL }; const bge_eaddr_fcn_t *func; for (func = bge_eaddr_funcs; *func != NULL; ++func) { if ((*func)(sc, eaddr) == 0) break; } return (*func == NULL ? ENXIO : 0); } static uint64_t bge_get_counter(if_t ifp, ift_counter cnt) { struct bge_softc *sc; struct bge_mac_stats *stats; sc = if_getsoftc(ifp); if (!BGE_IS_5705_PLUS(sc)) return (if_get_counter_default(ifp, cnt)); stats = &sc->bge_mac_stats; switch (cnt) { case IFCOUNTER_IERRORS: return (stats->NoMoreRxBDs + stats->InputDiscards + stats->InputErrors); case IFCOUNTER_COLLISIONS: return (stats->etherStatsCollisions); default: return (if_get_counter_default(ifp, cnt)); } } #ifdef NETDUMP static void -bge_netdump_init(if_t ifp, int *nrxr, int *clsize) +bge_netdump_init(if_t ifp, int *nrxr, int *ncl, int *clsize) { struct bge_softc *sc; sc = if_getsoftc(ifp); BGE_LOCK(sc); *nrxr = sc->bge_return_ring_cnt; + *ncl = NETDUMP_MAX_IN_FLIGHT; if ((sc->bge_flags & BGE_FLAG_JUMBO_STD) != 0 && (if_getmtu(sc->bge_ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN))) *clsize = MJUM9BYTES; else *clsize = MCLBYTES; BGE_UNLOCK(sc); } static void bge_netdump_event(if_t ifp __unused, enum netdump_ev event __unused) { } static int bge_netdump_transmit(if_t ifp, struct mbuf *m) { struct bge_softc *sc; uint32_t prodidx; int error; sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (1); prodidx = sc->bge_tx_prodidx; error = bge_encap(sc, &m, &prodidx); if (error == 0) bge_start_tx(sc, prodidx); return (error); } static int bge_netdump_poll(if_t ifp, int count) { struct bge_softc *sc; uint32_t rx_prod, tx_cons; sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (1); bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); rx_prod = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; tx_cons = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); (void)bge_rxeof(sc, rx_prod, 0); bge_txeof(sc, tx_cons); return (0); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/dev/bxe/bxe.c =================================================================== --- user/markj/netdump/sys/dev/bxe/bxe.c (revision 332406) +++ user/markj/netdump/sys/dev/bxe/bxe.c (revision 332407) @@ -1,19224 +1,19225 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2007-2014 QLogic Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define BXE_DRIVER_VERSION "1.78.90" #include "bxe.h" #include "ecore_sp.h" #include "ecore_init.h" #include "ecore_init_ops.h" #include "57710_int_offsets.h" #include "57711_int_offsets.h" #include "57712_int_offsets.h" /* * CTLTYPE_U64 and sysctl_handle_64 were added in r217616. Define these * explicitly here for older kernels that don't include this changeset. */ #ifndef CTLTYPE_U64 #define CTLTYPE_U64 CTLTYPE_QUAD #define sysctl_handle_64 sysctl_handle_quad #endif /* * CSUM_TCP_IPV6 and CSUM_UDP_IPV6 were added in r236170. Define these * here as zero(0) for older kernels that don't include this changeset * thereby masking the functionality. */ #ifndef CSUM_TCP_IPV6 #define CSUM_TCP_IPV6 0 #define CSUM_UDP_IPV6 0 #endif /* * pci_find_cap was added in r219865. Re-define this at pci_find_extcap * for older kernels that don't include this changeset. */ #if __FreeBSD_version < 900035 #define pci_find_cap pci_find_extcap #endif #define BXE_DEF_SB_ATT_IDX 0x0001 #define BXE_DEF_SB_IDX 0x0002 /* * FLR Support - bxe_pf_flr_clnup() is called during nic_load in the per * function HW initialization. */ #define FLR_WAIT_USEC 10000 /* 10 msecs */ #define FLR_WAIT_INTERVAL 50 /* usecs */ #define FLR_POLL_CNT (FLR_WAIT_USEC / FLR_WAIT_INTERVAL) /* 200 */ struct pbf_pN_buf_regs { int pN; uint32_t init_crd; uint32_t crd; uint32_t crd_freed; }; struct pbf_pN_cmd_regs { int pN; uint32_t lines_occup; uint32_t lines_freed; }; /* * PCI Device ID Table used by bxe_probe(). */ #define BXE_DEVDESC_MAX 64 static struct bxe_device_type bxe_devs[] = { { BRCM_VENDORID, CHIP_NUM_57710, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57710 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57711, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57711 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57711E, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57711E 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57712, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57712 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57712_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57712 MF 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57800, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57800 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57800_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57800 MF 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57810, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57810 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57810_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57810 MF 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57811, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57811 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57811_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57811 MF 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57840_4_10, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57840 4x10GbE" }, { QLOGIC_VENDORID, CHIP_NUM_57840_4_10, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57840 4x10GbE" }, { BRCM_VENDORID, CHIP_NUM_57840_2_20, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57840 2x20GbE" }, { BRCM_VENDORID, CHIP_NUM_57840_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57840 MF 10GbE" }, { 0, 0, 0, 0, NULL } }; MALLOC_DECLARE(M_BXE_ILT); MALLOC_DEFINE(M_BXE_ILT, "bxe_ilt", "bxe ILT pointer"); /* * FreeBSD device entry points. */ static int bxe_probe(device_t); static int bxe_attach(device_t); static int bxe_detach(device_t); static int bxe_shutdown(device_t); /* * FreeBSD KLD module/device interface event handler method. */ static device_method_t bxe_methods[] = { /* Device interface (device_if.h) */ DEVMETHOD(device_probe, bxe_probe), DEVMETHOD(device_attach, bxe_attach), DEVMETHOD(device_detach, bxe_detach), DEVMETHOD(device_shutdown, bxe_shutdown), /* Bus interface (bus_if.h) */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), KOBJMETHOD_END }; /* * FreeBSD KLD Module data declaration */ static driver_t bxe_driver = { "bxe", /* module name */ bxe_methods, /* event handler */ sizeof(struct bxe_softc) /* extra data */ }; /* * FreeBSD dev class is needed to manage dev instances and * to associate with a bus type */ static devclass_t bxe_devclass; MODULE_DEPEND(bxe, pci, 1, 1, 1); MODULE_DEPEND(bxe, ether, 1, 1, 1); DRIVER_MODULE(bxe, pci, bxe_driver, bxe_devclass, 0, 0); NETDUMP_DEFINE(bxe); /* resources needed for unloading a previously loaded device */ #define BXE_PREV_WAIT_NEEDED 1 struct mtx bxe_prev_mtx; MTX_SYSINIT(bxe_prev_mtx, &bxe_prev_mtx, "bxe_prev_lock", MTX_DEF); struct bxe_prev_list_node { LIST_ENTRY(bxe_prev_list_node) node; uint8_t bus; uint8_t slot; uint8_t path; uint8_t aer; /* XXX automatic error recovery */ uint8_t undi; }; static LIST_HEAD(, bxe_prev_list_node) bxe_prev_list = LIST_HEAD_INITIALIZER(bxe_prev_list); static int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */ /* Tunable device values... */ SYSCTL_NODE(_hw, OID_AUTO, bxe, CTLFLAG_RD, 0, "bxe driver parameters"); /* Debug */ unsigned long bxe_debug = 0; SYSCTL_ULONG(_hw_bxe, OID_AUTO, debug, CTLFLAG_RDTUN, &bxe_debug, 0, "Debug logging mode"); /* Interrupt Mode: 0 (IRQ), 1 (MSI/IRQ), and 2 (MSI-X/MSI/IRQ) */ static int bxe_interrupt_mode = INTR_MODE_MSIX; SYSCTL_INT(_hw_bxe, OID_AUTO, interrupt_mode, CTLFLAG_RDTUN, &bxe_interrupt_mode, 0, "Interrupt (MSI-X/MSI/INTx) mode"); /* Number of Queues: 0 (Auto) or 1 to 16 (fixed queue number) */ static int bxe_queue_count = 4; SYSCTL_INT(_hw_bxe, OID_AUTO, queue_count, CTLFLAG_RDTUN, &bxe_queue_count, 0, "Multi-Queue queue count"); /* max number of buffers per queue (default RX_BD_USABLE) */ static int bxe_max_rx_bufs = 0; SYSCTL_INT(_hw_bxe, OID_AUTO, max_rx_bufs, CTLFLAG_RDTUN, &bxe_max_rx_bufs, 0, "Maximum Number of Rx Buffers Per Queue"); /* Host interrupt coalescing RX tick timer (usecs) */ static int bxe_hc_rx_ticks = 25; SYSCTL_INT(_hw_bxe, OID_AUTO, hc_rx_ticks, CTLFLAG_RDTUN, &bxe_hc_rx_ticks, 0, "Host Coalescing Rx ticks"); /* Host interrupt coalescing TX tick timer (usecs) */ static int bxe_hc_tx_ticks = 50; SYSCTL_INT(_hw_bxe, OID_AUTO, hc_tx_ticks, CTLFLAG_RDTUN, &bxe_hc_tx_ticks, 0, "Host Coalescing Tx ticks"); /* Maximum number of Rx packets to process at a time */ static int bxe_rx_budget = 0xffffffff; SYSCTL_INT(_hw_bxe, OID_AUTO, rx_budget, CTLFLAG_TUN, &bxe_rx_budget, 0, "Rx processing budget"); /* Maximum LRO aggregation size */ static int bxe_max_aggregation_size = 0; SYSCTL_INT(_hw_bxe, OID_AUTO, max_aggregation_size, CTLFLAG_TUN, &bxe_max_aggregation_size, 0, "max aggregation size"); /* PCI MRRS: -1 (Auto), 0 (128B), 1 (256B), 2 (512B), 3 (1KB) */ static int bxe_mrrs = -1; SYSCTL_INT(_hw_bxe, OID_AUTO, mrrs, CTLFLAG_RDTUN, &bxe_mrrs, 0, "PCIe maximum read request size"); /* AutoGrEEEn: 0 (hardware default), 1 (force on), 2 (force off) */ static int bxe_autogreeen = 0; SYSCTL_INT(_hw_bxe, OID_AUTO, autogreeen, CTLFLAG_RDTUN, &bxe_autogreeen, 0, "AutoGrEEEn support"); /* 4-tuple RSS support for UDP: 0 (disabled), 1 (enabled) */ static int bxe_udp_rss = 0; SYSCTL_INT(_hw_bxe, OID_AUTO, udp_rss, CTLFLAG_RDTUN, &bxe_udp_rss, 0, "UDP RSS support"); #define STAT_NAME_LEN 32 /* no stat names below can be longer than this */ #define STATS_OFFSET32(stat_name) \ (offsetof(struct bxe_eth_stats, stat_name) / 4) #define Q_STATS_OFFSET32(stat_name) \ (offsetof(struct bxe_eth_q_stats, stat_name) / 4) static const struct { uint32_t offset; uint32_t size; uint32_t flags; #define STATS_FLAGS_PORT 1 #define STATS_FLAGS_FUNC 2 /* MF only cares about function stats */ #define STATS_FLAGS_BOTH (STATS_FLAGS_FUNC | STATS_FLAGS_PORT) char string[STAT_NAME_LEN]; } bxe_eth_stats_arr[] = { { STATS_OFFSET32(total_bytes_received_hi), 8, STATS_FLAGS_BOTH, "rx_bytes" }, { STATS_OFFSET32(error_bytes_received_hi), 8, STATS_FLAGS_BOTH, "rx_error_bytes" }, { STATS_OFFSET32(total_unicast_packets_received_hi), 8, STATS_FLAGS_BOTH, "rx_ucast_packets" }, { STATS_OFFSET32(total_multicast_packets_received_hi), 8, STATS_FLAGS_BOTH, "rx_mcast_packets" }, { STATS_OFFSET32(total_broadcast_packets_received_hi), 8, STATS_FLAGS_BOTH, "rx_bcast_packets" }, { STATS_OFFSET32(rx_stat_dot3statsfcserrors_hi), 8, STATS_FLAGS_PORT, "rx_crc_errors" }, { STATS_OFFSET32(rx_stat_dot3statsalignmenterrors_hi), 8, STATS_FLAGS_PORT, "rx_align_errors" }, { STATS_OFFSET32(rx_stat_etherstatsundersizepkts_hi), 8, STATS_FLAGS_PORT, "rx_undersize_packets" }, { STATS_OFFSET32(etherstatsoverrsizepkts_hi), 8, STATS_FLAGS_PORT, "rx_oversize_packets" }, { STATS_OFFSET32(rx_stat_etherstatsfragments_hi), 8, STATS_FLAGS_PORT, "rx_fragments" }, { STATS_OFFSET32(rx_stat_etherstatsjabbers_hi), 8, STATS_FLAGS_PORT, "rx_jabbers" }, { STATS_OFFSET32(no_buff_discard_hi), 8, STATS_FLAGS_BOTH, "rx_discards" }, { STATS_OFFSET32(mac_filter_discard), 4, STATS_FLAGS_PORT, "rx_filtered_packets" }, { STATS_OFFSET32(mf_tag_discard), 4, STATS_FLAGS_PORT, "rx_mf_tag_discard" }, { STATS_OFFSET32(pfc_frames_received_hi), 8, STATS_FLAGS_PORT, "pfc_frames_received" }, { STATS_OFFSET32(pfc_frames_sent_hi), 8, STATS_FLAGS_PORT, "pfc_frames_sent" }, { STATS_OFFSET32(brb_drop_hi), 8, STATS_FLAGS_PORT, "rx_brb_discard" }, { STATS_OFFSET32(brb_truncate_hi), 8, STATS_FLAGS_PORT, "rx_brb_truncate" }, { STATS_OFFSET32(pause_frames_received_hi), 8, STATS_FLAGS_PORT, "rx_pause_frames" }, { STATS_OFFSET32(rx_stat_maccontrolframesreceived_hi), 8, STATS_FLAGS_PORT, "rx_mac_ctrl_frames" }, { STATS_OFFSET32(nig_timer_max), 4, STATS_FLAGS_PORT, "rx_constant_pause_events" }, { STATS_OFFSET32(total_bytes_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_bytes" }, { STATS_OFFSET32(tx_stat_ifhcoutbadoctets_hi), 8, STATS_FLAGS_PORT, "tx_error_bytes" }, { STATS_OFFSET32(total_unicast_packets_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_ucast_packets" }, { STATS_OFFSET32(total_multicast_packets_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_mcast_packets" }, { STATS_OFFSET32(total_broadcast_packets_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_bcast_packets" }, { STATS_OFFSET32(tx_stat_dot3statsinternalmactransmiterrors_hi), 8, STATS_FLAGS_PORT, "tx_mac_errors" }, { STATS_OFFSET32(rx_stat_dot3statscarriersenseerrors_hi), 8, STATS_FLAGS_PORT, "tx_carrier_errors" }, { STATS_OFFSET32(tx_stat_dot3statssinglecollisionframes_hi), 8, STATS_FLAGS_PORT, "tx_single_collisions" }, { STATS_OFFSET32(tx_stat_dot3statsmultiplecollisionframes_hi), 8, STATS_FLAGS_PORT, "tx_multi_collisions" }, { STATS_OFFSET32(tx_stat_dot3statsdeferredtransmissions_hi), 8, STATS_FLAGS_PORT, "tx_deferred" }, { STATS_OFFSET32(tx_stat_dot3statsexcessivecollisions_hi), 8, STATS_FLAGS_PORT, "tx_excess_collisions" }, { STATS_OFFSET32(tx_stat_dot3statslatecollisions_hi), 8, STATS_FLAGS_PORT, "tx_late_collisions" }, { STATS_OFFSET32(tx_stat_etherstatscollisions_hi), 8, STATS_FLAGS_PORT, "tx_total_collisions" }, { STATS_OFFSET32(tx_stat_etherstatspkts64octets_hi), 8, STATS_FLAGS_PORT, "tx_64_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts65octetsto127octets_hi), 8, STATS_FLAGS_PORT, "tx_65_to_127_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts128octetsto255octets_hi), 8, STATS_FLAGS_PORT, "tx_128_to_255_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts256octetsto511octets_hi), 8, STATS_FLAGS_PORT, "tx_256_to_511_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts512octetsto1023octets_hi), 8, STATS_FLAGS_PORT, "tx_512_to_1023_byte_packets" }, { STATS_OFFSET32(etherstatspkts1024octetsto1522octets_hi), 8, STATS_FLAGS_PORT, "tx_1024_to_1522_byte_packets" }, { STATS_OFFSET32(etherstatspktsover1522octets_hi), 8, STATS_FLAGS_PORT, "tx_1523_to_9022_byte_packets" }, { STATS_OFFSET32(pause_frames_sent_hi), 8, STATS_FLAGS_PORT, "tx_pause_frames" }, { STATS_OFFSET32(total_tpa_aggregations_hi), 8, STATS_FLAGS_FUNC, "tpa_aggregations" }, { STATS_OFFSET32(total_tpa_aggregated_frames_hi), 8, STATS_FLAGS_FUNC, "tpa_aggregated_frames"}, { STATS_OFFSET32(total_tpa_bytes_hi), 8, STATS_FLAGS_FUNC, "tpa_bytes"}, { STATS_OFFSET32(eee_tx_lpi), 4, STATS_FLAGS_PORT, "eee_tx_lpi"}, { STATS_OFFSET32(rx_calls), 4, STATS_FLAGS_FUNC, "rx_calls"}, { STATS_OFFSET32(rx_pkts), 4, STATS_FLAGS_FUNC, "rx_pkts"}, { STATS_OFFSET32(rx_tpa_pkts), 4, STATS_FLAGS_FUNC, "rx_tpa_pkts"}, { STATS_OFFSET32(rx_erroneous_jumbo_sge_pkts), 4, STATS_FLAGS_FUNC, "rx_erroneous_jumbo_sge_pkts"}, { STATS_OFFSET32(rx_bxe_service_rxsgl), 4, STATS_FLAGS_FUNC, "rx_bxe_service_rxsgl"}, { STATS_OFFSET32(rx_jumbo_sge_pkts), 4, STATS_FLAGS_FUNC, "rx_jumbo_sge_pkts"}, { STATS_OFFSET32(rx_soft_errors), 4, STATS_FLAGS_FUNC, "rx_soft_errors"}, { STATS_OFFSET32(rx_hw_csum_errors), 4, STATS_FLAGS_FUNC, "rx_hw_csum_errors"}, { STATS_OFFSET32(rx_ofld_frames_csum_ip), 4, STATS_FLAGS_FUNC, "rx_ofld_frames_csum_ip"}, { STATS_OFFSET32(rx_ofld_frames_csum_tcp_udp), 4, STATS_FLAGS_FUNC, "rx_ofld_frames_csum_tcp_udp"}, { STATS_OFFSET32(rx_budget_reached), 4, STATS_FLAGS_FUNC, "rx_budget_reached"}, { STATS_OFFSET32(tx_pkts), 4, STATS_FLAGS_FUNC, "tx_pkts"}, { STATS_OFFSET32(tx_soft_errors), 4, STATS_FLAGS_FUNC, "tx_soft_errors"}, { STATS_OFFSET32(tx_ofld_frames_csum_ip), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_csum_ip"}, { STATS_OFFSET32(tx_ofld_frames_csum_tcp), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_csum_tcp"}, { STATS_OFFSET32(tx_ofld_frames_csum_udp), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_csum_udp"}, { STATS_OFFSET32(tx_ofld_frames_lso), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_lso"}, { STATS_OFFSET32(tx_ofld_frames_lso_hdr_splits), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_lso_hdr_splits"}, { STATS_OFFSET32(tx_encap_failures), 4, STATS_FLAGS_FUNC, "tx_encap_failures"}, { STATS_OFFSET32(tx_hw_queue_full), 4, STATS_FLAGS_FUNC, "tx_hw_queue_full"}, { STATS_OFFSET32(tx_hw_max_queue_depth), 4, STATS_FLAGS_FUNC, "tx_hw_max_queue_depth"}, { STATS_OFFSET32(tx_dma_mapping_failure), 4, STATS_FLAGS_FUNC, "tx_dma_mapping_failure"}, { STATS_OFFSET32(tx_max_drbr_queue_depth), 4, STATS_FLAGS_FUNC, "tx_max_drbr_queue_depth"}, { STATS_OFFSET32(tx_window_violation_std), 4, STATS_FLAGS_FUNC, "tx_window_violation_std"}, { STATS_OFFSET32(tx_window_violation_tso), 4, STATS_FLAGS_FUNC, "tx_window_violation_tso"}, { STATS_OFFSET32(tx_chain_lost_mbuf), 4, STATS_FLAGS_FUNC, "tx_chain_lost_mbuf"}, { STATS_OFFSET32(tx_frames_deferred), 4, STATS_FLAGS_FUNC, "tx_frames_deferred"}, { STATS_OFFSET32(tx_queue_xoff), 4, STATS_FLAGS_FUNC, "tx_queue_xoff"}, { STATS_OFFSET32(mbuf_defrag_attempts), 4, STATS_FLAGS_FUNC, "mbuf_defrag_attempts"}, { STATS_OFFSET32(mbuf_defrag_failures), 4, STATS_FLAGS_FUNC, "mbuf_defrag_failures"}, { STATS_OFFSET32(mbuf_rx_bd_alloc_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_bd_alloc_failed"}, { STATS_OFFSET32(mbuf_rx_bd_mapping_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_bd_mapping_failed"}, { STATS_OFFSET32(mbuf_rx_tpa_alloc_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_tpa_alloc_failed"}, { STATS_OFFSET32(mbuf_rx_tpa_mapping_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_tpa_mapping_failed"}, { STATS_OFFSET32(mbuf_rx_sge_alloc_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_sge_alloc_failed"}, { STATS_OFFSET32(mbuf_rx_sge_mapping_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_sge_mapping_failed"}, { STATS_OFFSET32(mbuf_alloc_tx), 4, STATS_FLAGS_FUNC, "mbuf_alloc_tx"}, { STATS_OFFSET32(mbuf_alloc_rx), 4, STATS_FLAGS_FUNC, "mbuf_alloc_rx"}, { STATS_OFFSET32(mbuf_alloc_sge), 4, STATS_FLAGS_FUNC, "mbuf_alloc_sge"}, { STATS_OFFSET32(mbuf_alloc_tpa), 4, STATS_FLAGS_FUNC, "mbuf_alloc_tpa"}, { STATS_OFFSET32(tx_queue_full_return), 4, STATS_FLAGS_FUNC, "tx_queue_full_return"}, { STATS_OFFSET32(bxe_tx_mq_sc_state_failures), 4, STATS_FLAGS_FUNC, "bxe_tx_mq_sc_state_failures"}, { STATS_OFFSET32(tx_request_link_down_failures), 4, STATS_FLAGS_FUNC, "tx_request_link_down_failures"}, { STATS_OFFSET32(bd_avail_too_less_failures), 4, STATS_FLAGS_FUNC, "bd_avail_too_less_failures"}, { STATS_OFFSET32(tx_mq_not_empty), 4, STATS_FLAGS_FUNC, "tx_mq_not_empty"}, { STATS_OFFSET32(nsegs_path1_errors), 4, STATS_FLAGS_FUNC, "nsegs_path1_errors"}, { STATS_OFFSET32(nsegs_path2_errors), 4, STATS_FLAGS_FUNC, "nsegs_path2_errors"} }; static const struct { uint32_t offset; uint32_t size; char string[STAT_NAME_LEN]; } bxe_eth_q_stats_arr[] = { { Q_STATS_OFFSET32(total_bytes_received_hi), 8, "rx_bytes" }, { Q_STATS_OFFSET32(total_unicast_packets_received_hi), 8, "rx_ucast_packets" }, { Q_STATS_OFFSET32(total_multicast_packets_received_hi), 8, "rx_mcast_packets" }, { Q_STATS_OFFSET32(total_broadcast_packets_received_hi), 8, "rx_bcast_packets" }, { Q_STATS_OFFSET32(no_buff_discard_hi), 8, "rx_discards" }, { Q_STATS_OFFSET32(total_bytes_transmitted_hi), 8, "tx_bytes" }, { Q_STATS_OFFSET32(total_unicast_packets_transmitted_hi), 8, "tx_ucast_packets" }, { Q_STATS_OFFSET32(total_multicast_packets_transmitted_hi), 8, "tx_mcast_packets" }, { Q_STATS_OFFSET32(total_broadcast_packets_transmitted_hi), 8, "tx_bcast_packets" }, { Q_STATS_OFFSET32(total_tpa_aggregations_hi), 8, "tpa_aggregations" }, { Q_STATS_OFFSET32(total_tpa_aggregated_frames_hi), 8, "tpa_aggregated_frames"}, { Q_STATS_OFFSET32(total_tpa_bytes_hi), 8, "tpa_bytes"}, { Q_STATS_OFFSET32(rx_calls), 4, "rx_calls"}, { Q_STATS_OFFSET32(rx_pkts), 4, "rx_pkts"}, { Q_STATS_OFFSET32(rx_tpa_pkts), 4, "rx_tpa_pkts"}, { Q_STATS_OFFSET32(rx_erroneous_jumbo_sge_pkts), 4, "rx_erroneous_jumbo_sge_pkts"}, { Q_STATS_OFFSET32(rx_bxe_service_rxsgl), 4, "rx_bxe_service_rxsgl"}, { Q_STATS_OFFSET32(rx_jumbo_sge_pkts), 4, "rx_jumbo_sge_pkts"}, { Q_STATS_OFFSET32(rx_soft_errors), 4, "rx_soft_errors"}, { Q_STATS_OFFSET32(rx_hw_csum_errors), 4, "rx_hw_csum_errors"}, { Q_STATS_OFFSET32(rx_ofld_frames_csum_ip), 4, "rx_ofld_frames_csum_ip"}, { Q_STATS_OFFSET32(rx_ofld_frames_csum_tcp_udp), 4, "rx_ofld_frames_csum_tcp_udp"}, { Q_STATS_OFFSET32(rx_budget_reached), 4, "rx_budget_reached"}, { Q_STATS_OFFSET32(tx_pkts), 4, "tx_pkts"}, { Q_STATS_OFFSET32(tx_soft_errors), 4, "tx_soft_errors"}, { Q_STATS_OFFSET32(tx_ofld_frames_csum_ip), 4, "tx_ofld_frames_csum_ip"}, { Q_STATS_OFFSET32(tx_ofld_frames_csum_tcp), 4, "tx_ofld_frames_csum_tcp"}, { Q_STATS_OFFSET32(tx_ofld_frames_csum_udp), 4, "tx_ofld_frames_csum_udp"}, { Q_STATS_OFFSET32(tx_ofld_frames_lso), 4, "tx_ofld_frames_lso"}, { Q_STATS_OFFSET32(tx_ofld_frames_lso_hdr_splits), 4, "tx_ofld_frames_lso_hdr_splits"}, { Q_STATS_OFFSET32(tx_encap_failures), 4, "tx_encap_failures"}, { Q_STATS_OFFSET32(tx_hw_queue_full), 4, "tx_hw_queue_full"}, { Q_STATS_OFFSET32(tx_hw_max_queue_depth), 4, "tx_hw_max_queue_depth"}, { Q_STATS_OFFSET32(tx_dma_mapping_failure), 4, "tx_dma_mapping_failure"}, { Q_STATS_OFFSET32(tx_max_drbr_queue_depth), 4, "tx_max_drbr_queue_depth"}, { Q_STATS_OFFSET32(tx_window_violation_std), 4, "tx_window_violation_std"}, { Q_STATS_OFFSET32(tx_window_violation_tso), 4, "tx_window_violation_tso"}, { Q_STATS_OFFSET32(tx_chain_lost_mbuf), 4, "tx_chain_lost_mbuf"}, { Q_STATS_OFFSET32(tx_frames_deferred), 4, "tx_frames_deferred"}, { Q_STATS_OFFSET32(tx_queue_xoff), 4, "tx_queue_xoff"}, { Q_STATS_OFFSET32(mbuf_defrag_attempts), 4, "mbuf_defrag_attempts"}, { Q_STATS_OFFSET32(mbuf_defrag_failures), 4, "mbuf_defrag_failures"}, { Q_STATS_OFFSET32(mbuf_rx_bd_alloc_failed), 4, "mbuf_rx_bd_alloc_failed"}, { Q_STATS_OFFSET32(mbuf_rx_bd_mapping_failed), 4, "mbuf_rx_bd_mapping_failed"}, { Q_STATS_OFFSET32(mbuf_rx_tpa_alloc_failed), 4, "mbuf_rx_tpa_alloc_failed"}, { Q_STATS_OFFSET32(mbuf_rx_tpa_mapping_failed), 4, "mbuf_rx_tpa_mapping_failed"}, { Q_STATS_OFFSET32(mbuf_rx_sge_alloc_failed), 4, "mbuf_rx_sge_alloc_failed"}, { Q_STATS_OFFSET32(mbuf_rx_sge_mapping_failed), 4, "mbuf_rx_sge_mapping_failed"}, { Q_STATS_OFFSET32(mbuf_alloc_tx), 4, "mbuf_alloc_tx"}, { Q_STATS_OFFSET32(mbuf_alloc_rx), 4, "mbuf_alloc_rx"}, { Q_STATS_OFFSET32(mbuf_alloc_sge), 4, "mbuf_alloc_sge"}, { Q_STATS_OFFSET32(mbuf_alloc_tpa), 4, "mbuf_alloc_tpa"}, { Q_STATS_OFFSET32(tx_queue_full_return), 4, "tx_queue_full_return"}, { Q_STATS_OFFSET32(bxe_tx_mq_sc_state_failures), 4, "bxe_tx_mq_sc_state_failures"}, { Q_STATS_OFFSET32(tx_request_link_down_failures), 4, "tx_request_link_down_failures"}, { Q_STATS_OFFSET32(bd_avail_too_less_failures), 4, "bd_avail_too_less_failures"}, { Q_STATS_OFFSET32(tx_mq_not_empty), 4, "tx_mq_not_empty"}, { Q_STATS_OFFSET32(nsegs_path1_errors), 4, "nsegs_path1_errors"}, { Q_STATS_OFFSET32(nsegs_path2_errors), 4, "nsegs_path2_errors"} }; #define BXE_NUM_ETH_STATS ARRAY_SIZE(bxe_eth_stats_arr) #define BXE_NUM_ETH_Q_STATS ARRAY_SIZE(bxe_eth_q_stats_arr) static void bxe_cmng_fns_init(struct bxe_softc *sc, uint8_t read_cfg, uint8_t cmng_type); static int bxe_get_cmng_fns_mode(struct bxe_softc *sc); static void storm_memset_cmng(struct bxe_softc *sc, struct cmng_init *cmng, uint8_t port); static void bxe_set_reset_global(struct bxe_softc *sc); static void bxe_set_reset_in_progress(struct bxe_softc *sc); static uint8_t bxe_reset_is_done(struct bxe_softc *sc, int engine); static uint8_t bxe_clear_pf_load(struct bxe_softc *sc); static uint8_t bxe_chk_parity_attn(struct bxe_softc *sc, uint8_t *global, uint8_t print); static void bxe_int_disable(struct bxe_softc *sc); static int bxe_release_leader_lock(struct bxe_softc *sc); static void bxe_pf_disable(struct bxe_softc *sc); static void bxe_free_fp_buffers(struct bxe_softc *sc); static inline void bxe_update_rx_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t rx_bd_prod, uint16_t rx_cq_prod, uint16_t rx_sge_prod); static void bxe_link_report_locked(struct bxe_softc *sc); static void bxe_link_report(struct bxe_softc *sc); static void bxe_link_status_update(struct bxe_softc *sc); static void bxe_periodic_callout_func(void *xsc); static void bxe_periodic_start(struct bxe_softc *sc); static void bxe_periodic_stop(struct bxe_softc *sc); static int bxe_alloc_rx_bd_mbuf(struct bxe_fastpath *fp, uint16_t prev_index, uint16_t index); static int bxe_alloc_rx_tpa_mbuf(struct bxe_fastpath *fp, int queue); static int bxe_alloc_rx_sge_mbuf(struct bxe_fastpath *fp, uint16_t index); static uint8_t bxe_txeof(struct bxe_softc *sc, struct bxe_fastpath *fp); static void bxe_task_fp(struct bxe_fastpath *fp); static __noinline void bxe_dump_mbuf(struct bxe_softc *sc, struct mbuf *m, uint8_t contents); static int bxe_alloc_mem(struct bxe_softc *sc); static void bxe_free_mem(struct bxe_softc *sc); static int bxe_alloc_fw_stats_mem(struct bxe_softc *sc); static void bxe_free_fw_stats_mem(struct bxe_softc *sc); static int bxe_interrupt_attach(struct bxe_softc *sc); static void bxe_interrupt_detach(struct bxe_softc *sc); static void bxe_set_rx_mode(struct bxe_softc *sc); static int bxe_init_locked(struct bxe_softc *sc); static int bxe_stop_locked(struct bxe_softc *sc); static __noinline int bxe_nic_load(struct bxe_softc *sc, int load_mode); static __noinline int bxe_nic_unload(struct bxe_softc *sc, uint32_t unload_mode, uint8_t keep_link); static void bxe_handle_sp_tq(void *context, int pending); static void bxe_handle_fp_tq(void *context, int pending); static int bxe_add_cdev(struct bxe_softc *sc); static void bxe_del_cdev(struct bxe_softc *sc); int bxe_grc_dump(struct bxe_softc *sc); static int bxe_alloc_buf_rings(struct bxe_softc *sc); static void bxe_free_buf_rings(struct bxe_softc *sc); /* calculate crc32 on a buffer (NOTE: crc32_length MUST be aligned to 8) */ uint32_t calc_crc32(uint8_t *crc32_packet, uint32_t crc32_length, uint32_t crc32_seed, uint8_t complement) { uint32_t byte = 0; uint32_t bit = 0; uint8_t msb = 0; uint32_t temp = 0; uint32_t shft = 0; uint8_t current_byte = 0; uint32_t crc32_result = crc32_seed; const uint32_t CRC32_POLY = 0x1edc6f41; if ((crc32_packet == NULL) || (crc32_length == 0) || ((crc32_length % 8) != 0)) { return (crc32_result); } for (byte = 0; byte < crc32_length; byte = byte + 1) { current_byte = crc32_packet[byte]; for (bit = 0; bit < 8; bit = bit + 1) { /* msb = crc32_result[31]; */ msb = (uint8_t)(crc32_result >> 31); crc32_result = crc32_result << 1; /* it (msb != current_byte[bit]) */ if (msb != (0x1 & (current_byte >> bit))) { crc32_result = crc32_result ^ CRC32_POLY; /* crc32_result[0] = 1 */ crc32_result |= 1; } } } /* Last step is to: * 1. "mirror" every bit * 2. swap the 4 bytes * 3. complement each bit */ /* Mirror */ temp = crc32_result; shft = sizeof(crc32_result) * 8 - 1; for (crc32_result >>= 1; crc32_result; crc32_result >>= 1) { temp <<= 1; temp |= crc32_result & 1; shft-- ; } /* temp[31-bit] = crc32_result[bit] */ temp <<= shft; /* Swap */ /* crc32_result = {temp[7:0], temp[15:8], temp[23:16], temp[31:24]} */ { uint32_t t0, t1, t2, t3; t0 = (0x000000ff & (temp >> 24)); t1 = (0x0000ff00 & (temp >> 8)); t2 = (0x00ff0000 & (temp << 8)); t3 = (0xff000000 & (temp << 24)); crc32_result = t0 | t1 | t2 | t3; } /* Complement */ if (complement) { crc32_result = ~crc32_result; } return (crc32_result); } int bxe_test_bit(int nr, volatile unsigned long *addr) { return ((atomic_load_acq_long(addr) & (1 << nr)) != 0); } void bxe_set_bit(unsigned int nr, volatile unsigned long *addr) { atomic_set_acq_long(addr, (1 << nr)); } void bxe_clear_bit(int nr, volatile unsigned long *addr) { atomic_clear_acq_long(addr, (1 << nr)); } int bxe_test_and_set_bit(int nr, volatile unsigned long *addr) { unsigned long x; nr = (1 << nr); do { x = *addr; } while (atomic_cmpset_acq_long(addr, x, x | nr) == 0); // if (x & nr) bit_was_set; else bit_was_not_set; return (x & nr); } int bxe_test_and_clear_bit(int nr, volatile unsigned long *addr) { unsigned long x; nr = (1 << nr); do { x = *addr; } while (atomic_cmpset_acq_long(addr, x, x & ~nr) == 0); // if (x & nr) bit_was_set; else bit_was_not_set; return (x & nr); } int bxe_cmpxchg(volatile int *addr, int old, int new) { int x; do { x = *addr; } while (atomic_cmpset_acq_int(addr, old, new) == 0); return (x); } /* * Get DMA memory from the OS. * * Validates that the OS has provided DMA buffers in response to a * bus_dmamap_load call and saves the physical address of those buffers. * When the callback is used the OS will return 0 for the mapping function * (bus_dmamap_load) so we use the value of map_arg->maxsegs to pass any * failures back to the caller. * * Returns: * Nothing. */ static void bxe_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct bxe_dma *dma = arg; if (error) { dma->paddr = 0; dma->nseg = 0; BLOGE(dma->sc, "Failed DMA alloc '%s' (%d)!\n", dma->msg, error); } else { dma->paddr = segs->ds_addr; dma->nseg = nseg; } } /* * Allocate a block of memory and map it for DMA. No partial completions * allowed and release any resources acquired if we can't acquire all * resources. * * Returns: * 0 = Success, !0 = Failure */ int bxe_dma_alloc(struct bxe_softc *sc, bus_size_t size, struct bxe_dma *dma, const char *msg) { int rc; if (dma->size > 0) { BLOGE(sc, "dma block '%s' already has size %lu\n", msg, (unsigned long)dma->size); return (1); } memset(dma, 0, sizeof(*dma)); /* sanity */ dma->sc = sc; dma->size = size; snprintf(dma->msg, sizeof(dma->msg), "%s", msg); rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ BCM_PAGE_SIZE, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ size, /* max map size */ 1, /* num discontinuous */ size, /* max seg size */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &dma->tag); /* returned dma tag */ if (rc != 0) { BLOGE(sc, "Failed to create dma tag for '%s' (%d)\n", msg, rc); memset(dma, 0, sizeof(*dma)); return (1); } rc = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr, (BUS_DMA_NOWAIT | BUS_DMA_ZERO), &dma->map); if (rc != 0) { BLOGE(sc, "Failed to alloc dma mem for '%s' (%d)\n", msg, rc); bus_dma_tag_destroy(dma->tag); memset(dma, 0, sizeof(*dma)); return (1); } rc = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, bxe_dma_map_addr, /* BLOGD in here */ dma, BUS_DMA_NOWAIT); if (rc != 0) { BLOGE(sc, "Failed to load dma map for '%s' (%d)\n", msg, rc); bus_dmamem_free(dma->tag, dma->vaddr, dma->map); bus_dma_tag_destroy(dma->tag); memset(dma, 0, sizeof(*dma)); return (1); } return (0); } void bxe_dma_free(struct bxe_softc *sc, struct bxe_dma *dma) { if (dma->size > 0) { DBASSERT(sc, (dma->tag != NULL), ("dma tag is NULL")); bus_dmamap_sync(dma->tag, dma->map, (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)); bus_dmamap_unload(dma->tag, dma->map); bus_dmamem_free(dma->tag, dma->vaddr, dma->map); bus_dma_tag_destroy(dma->tag); } memset(dma, 0, sizeof(*dma)); } /* * These indirect read and write routines are only during init. * The locking is handled by the MCP. */ void bxe_reg_wr_ind(struct bxe_softc *sc, uint32_t addr, uint32_t val) { pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, addr, 4); pci_write_config(sc->dev, PCICFG_GRC_DATA, val, 4); pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, 0, 4); } uint32_t bxe_reg_rd_ind(struct bxe_softc *sc, uint32_t addr) { uint32_t val; pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, addr, 4); val = pci_read_config(sc->dev, PCICFG_GRC_DATA, 4); pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, 0, 4); return (val); } static int bxe_acquire_hw_lock(struct bxe_softc *sc, uint32_t resource) { uint32_t lock_status; uint32_t resource_bit = (1 << resource); int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; int cnt; /* validate the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { BLOGE(sc, "(resource 0x%x > HW_LOCK_MAX_RESOURCE_VALUE)" " resource_bit 0x%x\n", resource, resource_bit); return (-1); } if (func <= 5) { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_1 + (func * 8)); } else { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_7 + ((func - 6) * 8)); } /* validate the resource is not already taken */ lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { BLOGE(sc, "resource (0x%x) in use (status 0x%x bit 0x%x)\n", resource, lock_status, resource_bit); return (-1); } /* try every 5ms for 5 seconds */ for (cnt = 0; cnt < 1000; cnt++) { REG_WR(sc, (hw_lock_control_reg + 4), resource_bit); lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { return (0); } DELAY(5000); } BLOGE(sc, "Resource 0x%x resource_bit 0x%x lock timeout!\n", resource, resource_bit); return (-1); } static int bxe_release_hw_lock(struct bxe_softc *sc, uint32_t resource) { uint32_t lock_status; uint32_t resource_bit = (1 << resource); int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; /* validate the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { BLOGE(sc, "(resource 0x%x > HW_LOCK_MAX_RESOURCE_VALUE)" " resource_bit 0x%x\n", resource, resource_bit); return (-1); } if (func <= 5) { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_1 + (func * 8)); } else { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_7 + ((func - 6) * 8)); } /* validate the resource is currently taken */ lock_status = REG_RD(sc, hw_lock_control_reg); if (!(lock_status & resource_bit)) { BLOGE(sc, "resource (0x%x) not in use (status 0x%x bit 0x%x)\n", resource, lock_status, resource_bit); return (-1); } REG_WR(sc, hw_lock_control_reg, resource_bit); return (0); } static void bxe_acquire_phy_lock(struct bxe_softc *sc) { BXE_PHY_LOCK(sc); bxe_acquire_hw_lock(sc,HW_LOCK_RESOURCE_MDIO); } static void bxe_release_phy_lock(struct bxe_softc *sc) { bxe_release_hw_lock(sc,HW_LOCK_RESOURCE_MDIO); BXE_PHY_UNLOCK(sc); } /* * Per pf misc lock must be acquired before the per port mcp lock. Otherwise, * had we done things the other way around, if two pfs from the same port * would attempt to access nvram at the same time, we could run into a * scenario such as: * pf A takes the port lock. * pf B succeeds in taking the same lock since they are from the same port. * pf A takes the per pf misc lock. Performs eeprom access. * pf A finishes. Unlocks the per pf misc lock. * Pf B takes the lock and proceeds to perform it's own access. * pf A unlocks the per port lock, while pf B is still working (!). * mcp takes the per port lock and corrupts pf B's access (and/or has it's own * access corrupted by pf B).* */ static int bxe_acquire_nvram_lock(struct bxe_softc *sc) { int port = SC_PORT(sc); int count, i; uint32_t val = 0; /* acquire HW lock: protect against other PFs in PF Direct Assignment */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_NVRAM); /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* request access to nvram interface */ REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_SET1 << port)); for (i = 0; i < count*10; i++) { val = REG_RD(sc, MCP_REG_MCPR_NVM_SW_ARB); if (val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port)) { break; } DELAY(5); } if (!(val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port))) { BLOGE(sc, "Cannot get access to nvram interface " "port %d val 0x%x (MCPR_NVM_SW_ARB_ARB_ARB1 << port)\n", port, val); return (-1); } return (0); } static int bxe_release_nvram_lock(struct bxe_softc *sc) { int port = SC_PORT(sc); int count, i; uint32_t val = 0; /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* relinquish nvram interface */ REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_CLR1 << port)); for (i = 0; i < count*10; i++) { val = REG_RD(sc, MCP_REG_MCPR_NVM_SW_ARB); if (!(val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port))) { break; } DELAY(5); } if (val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port)) { BLOGE(sc, "Cannot free access to nvram interface " "port %d val 0x%x (MCPR_NVM_SW_ARB_ARB_ARB1 << port)\n", port, val); return (-1); } /* release HW lock: protect against other PFs in PF Direct Assignment */ bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_NVRAM); return (0); } static void bxe_enable_nvram_access(struct bxe_softc *sc) { uint32_t val; val = REG_RD(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE); /* enable both bits, even on read */ REG_WR(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE, (val | MCPR_NVM_ACCESS_ENABLE_EN | MCPR_NVM_ACCESS_ENABLE_WR_EN)); } static void bxe_disable_nvram_access(struct bxe_softc *sc) { uint32_t val; val = REG_RD(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE); /* disable both bits, even after read */ REG_WR(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE, (val & ~(MCPR_NVM_ACCESS_ENABLE_EN | MCPR_NVM_ACCESS_ENABLE_WR_EN))); } static int bxe_nvram_read_dword(struct bxe_softc *sc, uint32_t offset, uint32_t *ret_val, uint32_t cmd_flags) { int count, i, rc; uint32_t val; /* build the command word */ cmd_flags |= MCPR_NVM_COMMAND_DOIT; /* need to clear DONE bit separately */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, MCPR_NVM_COMMAND_DONE); /* address of the NVRAM to read from */ REG_WR(sc, MCP_REG_MCPR_NVM_ADDR, (offset & MCPR_NVM_ADDR_NVM_ADDR_VALUE)); /* issue a read command */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, cmd_flags); /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* wait for completion */ *ret_val = 0; rc = -1; for (i = 0; i < count; i++) { DELAY(5); val = REG_RD(sc, MCP_REG_MCPR_NVM_COMMAND); if (val & MCPR_NVM_COMMAND_DONE) { val = REG_RD(sc, MCP_REG_MCPR_NVM_READ); /* we read nvram data in cpu order * but ethtool sees it as an array of bytes * converting to big-endian will do the work */ *ret_val = htobe32(val); rc = 0; break; } } if (rc == -1) { BLOGE(sc, "nvram read timeout expired " "(offset 0x%x cmd_flags 0x%x val 0x%x)\n", offset, cmd_flags, val); } return (rc); } static int bxe_nvram_read(struct bxe_softc *sc, uint32_t offset, uint8_t *ret_buf, int buf_size) { uint32_t cmd_flags; uint32_t val; int rc; if ((offset & 0x03) || (buf_size & 0x03) || (buf_size == 0)) { BLOGE(sc, "Invalid parameter, offset 0x%x buf_size 0x%x\n", offset, buf_size); return (-1); } if ((offset + buf_size) > sc->devinfo.flash_size) { BLOGE(sc, "Invalid parameter, " "offset 0x%x + buf_size 0x%x > flash_size 0x%x\n", offset, buf_size, sc->devinfo.flash_size); return (-1); } /* request access to nvram interface */ rc = bxe_acquire_nvram_lock(sc); if (rc) { return (rc); } /* enable access to nvram interface */ bxe_enable_nvram_access(sc); /* read the first word(s) */ cmd_flags = MCPR_NVM_COMMAND_FIRST; while ((buf_size > sizeof(uint32_t)) && (rc == 0)) { rc = bxe_nvram_read_dword(sc, offset, &val, cmd_flags); memcpy(ret_buf, &val, 4); /* advance to the next dword */ offset += sizeof(uint32_t); ret_buf += sizeof(uint32_t); buf_size -= sizeof(uint32_t); cmd_flags = 0; } if (rc == 0) { cmd_flags |= MCPR_NVM_COMMAND_LAST; rc = bxe_nvram_read_dword(sc, offset, &val, cmd_flags); memcpy(ret_buf, &val, 4); } /* disable access to nvram interface */ bxe_disable_nvram_access(sc); bxe_release_nvram_lock(sc); return (rc); } static int bxe_nvram_write_dword(struct bxe_softc *sc, uint32_t offset, uint32_t val, uint32_t cmd_flags) { int count, i, rc; /* build the command word */ cmd_flags |= (MCPR_NVM_COMMAND_DOIT | MCPR_NVM_COMMAND_WR); /* need to clear DONE bit separately */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, MCPR_NVM_COMMAND_DONE); /* write the data */ REG_WR(sc, MCP_REG_MCPR_NVM_WRITE, val); /* address of the NVRAM to write to */ REG_WR(sc, MCP_REG_MCPR_NVM_ADDR, (offset & MCPR_NVM_ADDR_NVM_ADDR_VALUE)); /* issue the write command */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, cmd_flags); /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* wait for completion */ rc = -1; for (i = 0; i < count; i++) { DELAY(5); val = REG_RD(sc, MCP_REG_MCPR_NVM_COMMAND); if (val & MCPR_NVM_COMMAND_DONE) { rc = 0; break; } } if (rc == -1) { BLOGE(sc, "nvram write timeout expired " "(offset 0x%x cmd_flags 0x%x val 0x%x)\n", offset, cmd_flags, val); } return (rc); } #define BYTE_OFFSET(offset) (8 * (offset & 0x03)) static int bxe_nvram_write1(struct bxe_softc *sc, uint32_t offset, uint8_t *data_buf, int buf_size) { uint32_t cmd_flags; uint32_t align_offset; uint32_t val; int rc; if ((offset + buf_size) > sc->devinfo.flash_size) { BLOGE(sc, "Invalid parameter, " "offset 0x%x + buf_size 0x%x > flash_size 0x%x\n", offset, buf_size, sc->devinfo.flash_size); return (-1); } /* request access to nvram interface */ rc = bxe_acquire_nvram_lock(sc); if (rc) { return (rc); } /* enable access to nvram interface */ bxe_enable_nvram_access(sc); cmd_flags = (MCPR_NVM_COMMAND_FIRST | MCPR_NVM_COMMAND_LAST); align_offset = (offset & ~0x03); rc = bxe_nvram_read_dword(sc, align_offset, &val, cmd_flags); if (rc == 0) { val &= ~(0xff << BYTE_OFFSET(offset)); val |= (*data_buf << BYTE_OFFSET(offset)); /* nvram data is returned as an array of bytes * convert it back to cpu order */ val = be32toh(val); rc = bxe_nvram_write_dword(sc, align_offset, val, cmd_flags); } /* disable access to nvram interface */ bxe_disable_nvram_access(sc); bxe_release_nvram_lock(sc); return (rc); } static int bxe_nvram_write(struct bxe_softc *sc, uint32_t offset, uint8_t *data_buf, int buf_size) { uint32_t cmd_flags; uint32_t val; uint32_t written_so_far; int rc; if (buf_size == 1) { return (bxe_nvram_write1(sc, offset, data_buf, buf_size)); } if ((offset & 0x03) || (buf_size & 0x03) /* || (buf_size == 0) */) { BLOGE(sc, "Invalid parameter, offset 0x%x buf_size 0x%x\n", offset, buf_size); return (-1); } if (buf_size == 0) { return (0); /* nothing to do */ } if ((offset + buf_size) > sc->devinfo.flash_size) { BLOGE(sc, "Invalid parameter, " "offset 0x%x + buf_size 0x%x > flash_size 0x%x\n", offset, buf_size, sc->devinfo.flash_size); return (-1); } /* request access to nvram interface */ rc = bxe_acquire_nvram_lock(sc); if (rc) { return (rc); } /* enable access to nvram interface */ bxe_enable_nvram_access(sc); written_so_far = 0; cmd_flags = MCPR_NVM_COMMAND_FIRST; while ((written_so_far < buf_size) && (rc == 0)) { if (written_so_far == (buf_size - sizeof(uint32_t))) { cmd_flags |= MCPR_NVM_COMMAND_LAST; } else if (((offset + 4) % NVRAM_PAGE_SIZE) == 0) { cmd_flags |= MCPR_NVM_COMMAND_LAST; } else if ((offset % NVRAM_PAGE_SIZE) == 0) { cmd_flags |= MCPR_NVM_COMMAND_FIRST; } memcpy(&val, data_buf, 4); rc = bxe_nvram_write_dword(sc, offset, val, cmd_flags); /* advance to the next dword */ offset += sizeof(uint32_t); data_buf += sizeof(uint32_t); written_so_far += sizeof(uint32_t); cmd_flags = 0; } /* disable access to nvram interface */ bxe_disable_nvram_access(sc); bxe_release_nvram_lock(sc); return (rc); } /* copy command into DMAE command memory and set DMAE command Go */ void bxe_post_dmae(struct bxe_softc *sc, struct dmae_cmd *dmae, int idx) { uint32_t cmd_offset; int i; cmd_offset = (DMAE_REG_CMD_MEM + (sizeof(struct dmae_cmd) * idx)); for (i = 0; i < ((sizeof(struct dmae_cmd) / 4)); i++) { REG_WR(sc, (cmd_offset + (i * 4)), *(((uint32_t *)dmae) + i)); } REG_WR(sc, dmae_reg_go_c[idx], 1); } uint32_t bxe_dmae_opcode_add_comp(uint32_t opcode, uint8_t comp_type) { return (opcode | ((comp_type << DMAE_CMD_C_DST_SHIFT) | DMAE_CMD_C_TYPE_ENABLE)); } uint32_t bxe_dmae_opcode_clr_src_reset(uint32_t opcode) { return (opcode & ~DMAE_CMD_SRC_RESET); } uint32_t bxe_dmae_opcode(struct bxe_softc *sc, uint8_t src_type, uint8_t dst_type, uint8_t with_comp, uint8_t comp_type) { uint32_t opcode = 0; opcode |= ((src_type << DMAE_CMD_SRC_SHIFT) | (dst_type << DMAE_CMD_DST_SHIFT)); opcode |= (DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET); opcode |= (SC_PORT(sc) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0); opcode |= ((SC_VN(sc) << DMAE_CMD_E1HVN_SHIFT) | (SC_VN(sc) << DMAE_CMD_DST_VN_SHIFT)); opcode |= (DMAE_COM_SET_ERR << DMAE_CMD_ERR_POLICY_SHIFT); #ifdef __BIG_ENDIAN opcode |= DMAE_CMD_ENDIANITY_B_DW_SWAP; #else opcode |= DMAE_CMD_ENDIANITY_DW_SWAP; #endif if (with_comp) { opcode = bxe_dmae_opcode_add_comp(opcode, comp_type); } return (opcode); } static void bxe_prep_dmae_with_comp(struct bxe_softc *sc, struct dmae_cmd *dmae, uint8_t src_type, uint8_t dst_type) { memset(dmae, 0, sizeof(struct dmae_cmd)); /* set the opcode */ dmae->opcode = bxe_dmae_opcode(sc, src_type, dst_type, TRUE, DMAE_COMP_PCI); /* fill in the completion parameters */ dmae->comp_addr_lo = U64_LO(BXE_SP_MAPPING(sc, wb_comp)); dmae->comp_addr_hi = U64_HI(BXE_SP_MAPPING(sc, wb_comp)); dmae->comp_val = DMAE_COMP_VAL; } /* issue a DMAE command over the init channel and wait for completion */ static int bxe_issue_dmae_with_comp(struct bxe_softc *sc, struct dmae_cmd *dmae) { uint32_t *wb_comp = BXE_SP(sc, wb_comp); int timeout = CHIP_REV_IS_SLOW(sc) ? 400000 : 4000; BXE_DMAE_LOCK(sc); /* reset completion */ *wb_comp = 0; /* post the command on the channel used for initializations */ bxe_post_dmae(sc, dmae, INIT_DMAE_C(sc)); /* wait for completion */ DELAY(5); while ((*wb_comp & ~DMAE_PCI_ERR_FLAG) != DMAE_COMP_VAL) { if (!timeout || (sc->recovery_state != BXE_RECOVERY_DONE && sc->recovery_state != BXE_RECOVERY_NIC_LOADING)) { BLOGE(sc, "DMAE timeout! *wb_comp 0x%x recovery_state 0x%x\n", *wb_comp, sc->recovery_state); BXE_DMAE_UNLOCK(sc); return (DMAE_TIMEOUT); } timeout--; DELAY(50); } if (*wb_comp & DMAE_PCI_ERR_FLAG) { BLOGE(sc, "DMAE PCI error! *wb_comp 0x%x recovery_state 0x%x\n", *wb_comp, sc->recovery_state); BXE_DMAE_UNLOCK(sc); return (DMAE_PCI_ERROR); } BXE_DMAE_UNLOCK(sc); return (0); } void bxe_read_dmae(struct bxe_softc *sc, uint32_t src_addr, uint32_t len32) { struct dmae_cmd dmae; uint32_t *data; int i, rc; DBASSERT(sc, (len32 <= 4), ("DMAE read length is %d", len32)); if (!sc->dmae_ready) { data = BXE_SP(sc, wb_data[0]); for (i = 0; i < len32; i++) { data[i] = (CHIP_IS_E1(sc)) ? bxe_reg_rd_ind(sc, (src_addr + (i * 4))) : REG_RD(sc, (src_addr + (i * 4))); } return; } /* set opcode and fixed command fields */ bxe_prep_dmae_with_comp(sc, &dmae, DMAE_SRC_GRC, DMAE_DST_PCI); /* fill in addresses and len */ dmae.src_addr_lo = (src_addr >> 2); /* GRC addr has dword resolution */ dmae.src_addr_hi = 0; dmae.dst_addr_lo = U64_LO(BXE_SP_MAPPING(sc, wb_data)); dmae.dst_addr_hi = U64_HI(BXE_SP_MAPPING(sc, wb_data)); dmae.len = len32; /* issue the command and wait for completion */ if ((rc = bxe_issue_dmae_with_comp(sc, &dmae)) != 0) { bxe_panic(sc, ("DMAE failed (%d)\n", rc)); } } void bxe_write_dmae(struct bxe_softc *sc, bus_addr_t dma_addr, uint32_t dst_addr, uint32_t len32) { struct dmae_cmd dmae; int rc; if (!sc->dmae_ready) { DBASSERT(sc, (len32 <= 4), ("DMAE not ready and length is %d", len32)); if (CHIP_IS_E1(sc)) { ecore_init_ind_wr(sc, dst_addr, BXE_SP(sc, wb_data[0]), len32); } else { ecore_init_str_wr(sc, dst_addr, BXE_SP(sc, wb_data[0]), len32); } return; } /* set opcode and fixed command fields */ bxe_prep_dmae_with_comp(sc, &dmae, DMAE_SRC_PCI, DMAE_DST_GRC); /* fill in addresses and len */ dmae.src_addr_lo = U64_LO(dma_addr); dmae.src_addr_hi = U64_HI(dma_addr); dmae.dst_addr_lo = (dst_addr >> 2); /* GRC addr has dword resolution */ dmae.dst_addr_hi = 0; dmae.len = len32; /* issue the command and wait for completion */ if ((rc = bxe_issue_dmae_with_comp(sc, &dmae)) != 0) { bxe_panic(sc, ("DMAE failed (%d)\n", rc)); } } void bxe_write_dmae_phys_len(struct bxe_softc *sc, bus_addr_t phys_addr, uint32_t addr, uint32_t len) { int dmae_wr_max = DMAE_LEN32_WR_MAX(sc); int offset = 0; while (len > dmae_wr_max) { bxe_write_dmae(sc, (phys_addr + offset), /* src DMA address */ (addr + offset), /* dst GRC address */ dmae_wr_max); offset += (dmae_wr_max * 4); len -= dmae_wr_max; } bxe_write_dmae(sc, (phys_addr + offset), /* src DMA address */ (addr + offset), /* dst GRC address */ len); } void bxe_set_ctx_validation(struct bxe_softc *sc, struct eth_context *cxt, uint32_t cid) { /* ustorm cxt validation */ cxt->ustorm_ag_context.cdu_usage = CDU_RSRVD_VALUE_TYPE_A(HW_CID(sc, cid), CDU_REGION_NUMBER_UCM_AG, ETH_CONNECTION_TYPE); /* xcontext validation */ cxt->xstorm_ag_context.cdu_reserved = CDU_RSRVD_VALUE_TYPE_A(HW_CID(sc, cid), CDU_REGION_NUMBER_XCM_AG, ETH_CONNECTION_TYPE); } static void bxe_storm_memset_hc_timeout(struct bxe_softc *sc, uint8_t port, uint8_t fw_sb_id, uint8_t sb_index, uint8_t ticks) { uint32_t addr = (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_TIMEOUT_OFFSET(fw_sb_id, sb_index)); REG_WR8(sc, addr, ticks); BLOGD(sc, DBG_LOAD, "port %d fw_sb_id %d sb_index %d ticks %d\n", port, fw_sb_id, sb_index, ticks); } static void bxe_storm_memset_hc_disable(struct bxe_softc *sc, uint8_t port, uint16_t fw_sb_id, uint8_t sb_index, uint8_t disable) { uint32_t enable_flag = (disable) ? 0 : (1 << HC_INDEX_DATA_HC_ENABLED_SHIFT); uint32_t addr = (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_FLAGS_OFFSET(fw_sb_id, sb_index)); uint8_t flags; /* clear and set */ flags = REG_RD8(sc, addr); flags &= ~HC_INDEX_DATA_HC_ENABLED; flags |= enable_flag; REG_WR8(sc, addr, flags); BLOGD(sc, DBG_LOAD, "port %d fw_sb_id %d sb_index %d disable %d\n", port, fw_sb_id, sb_index, disable); } void bxe_update_coalesce_sb_index(struct bxe_softc *sc, uint8_t fw_sb_id, uint8_t sb_index, uint8_t disable, uint16_t usec) { int port = SC_PORT(sc); uint8_t ticks = (usec / 4); /* XXX ??? */ bxe_storm_memset_hc_timeout(sc, port, fw_sb_id, sb_index, ticks); disable = (disable) ? 1 : ((usec) ? 0 : 1); bxe_storm_memset_hc_disable(sc, port, fw_sb_id, sb_index, disable); } void elink_cb_udelay(struct bxe_softc *sc, uint32_t usecs) { DELAY(usecs); } uint32_t elink_cb_reg_read(struct bxe_softc *sc, uint32_t reg_addr) { return (REG_RD(sc, reg_addr)); } void elink_cb_reg_write(struct bxe_softc *sc, uint32_t reg_addr, uint32_t val) { REG_WR(sc, reg_addr, val); } void elink_cb_reg_wb_write(struct bxe_softc *sc, uint32_t offset, uint32_t *wb_write, uint16_t len) { REG_WR_DMAE(sc, offset, wb_write, len); } void elink_cb_reg_wb_read(struct bxe_softc *sc, uint32_t offset, uint32_t *wb_write, uint16_t len) { REG_RD_DMAE(sc, offset, wb_write, len); } uint8_t elink_cb_path_id(struct bxe_softc *sc) { return (SC_PATH(sc)); } void elink_cb_event_log(struct bxe_softc *sc, const elink_log_id_t elink_log_id, ...) { /* XXX */ BLOGI(sc, "ELINK EVENT LOG (%d)\n", elink_log_id); } static int bxe_set_spio(struct bxe_softc *sc, int spio, uint32_t mode) { uint32_t spio_reg; /* Only 2 SPIOs are configurable */ if ((spio != MISC_SPIO_SPIO4) && (spio != MISC_SPIO_SPIO5)) { BLOGE(sc, "Invalid SPIO 0x%x mode 0x%x\n", spio, mode); return (-1); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_SPIO); /* read SPIO and mask except the float bits */ spio_reg = (REG_RD(sc, MISC_REG_SPIO) & MISC_SPIO_FLOAT); switch (mode) { case MISC_SPIO_OUTPUT_LOW: BLOGD(sc, DBG_LOAD, "Set SPIO 0x%x -> output low\n", spio); /* clear FLOAT and set CLR */ spio_reg &= ~(spio << MISC_SPIO_FLOAT_POS); spio_reg |= (spio << MISC_SPIO_CLR_POS); break; case MISC_SPIO_OUTPUT_HIGH: BLOGD(sc, DBG_LOAD, "Set SPIO 0x%x -> output high\n", spio); /* clear FLOAT and set SET */ spio_reg &= ~(spio << MISC_SPIO_FLOAT_POS); spio_reg |= (spio << MISC_SPIO_SET_POS); break; case MISC_SPIO_INPUT_HI_Z: BLOGD(sc, DBG_LOAD, "Set SPIO 0x%x -> input\n", spio); /* set FLOAT */ spio_reg |= (spio << MISC_SPIO_FLOAT_POS); break; default: break; } REG_WR(sc, MISC_REG_SPIO, spio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_SPIO); return (0); } static int bxe_gpio_read(struct bxe_softc *sc, int gpio_num, uint8_t port) { /* The GPIO should be swapped if swap register is set and active */ int gpio_port = ((REG_RD(sc, NIG_REG_PORT_SWAP) && REG_RD(sc, NIG_REG_STRAP_OVERRIDE)) ^ port); int gpio_shift = (gpio_num + (gpio_port ? MISC_REGISTERS_GPIO_PORT_SHIFT : 0)); uint32_t gpio_mask = (1 << gpio_shift); uint32_t gpio_reg; if (gpio_num > MISC_REGISTERS_GPIO_3) { BLOGE(sc, "Invalid GPIO %d port 0x%x gpio_port %d gpio_shift %d" " gpio_mask 0x%x\n", gpio_num, port, gpio_port, gpio_shift, gpio_mask); return (-1); } /* read GPIO value */ gpio_reg = REG_RD(sc, MISC_REG_GPIO); /* get the requested pin value */ return ((gpio_reg & gpio_mask) == gpio_mask) ? 1 : 0; } static int bxe_gpio_write(struct bxe_softc *sc, int gpio_num, uint32_t mode, uint8_t port) { /* The GPIO should be swapped if swap register is set and active */ int gpio_port = ((REG_RD(sc, NIG_REG_PORT_SWAP) && REG_RD(sc, NIG_REG_STRAP_OVERRIDE)) ^ port); int gpio_shift = (gpio_num + (gpio_port ? MISC_REGISTERS_GPIO_PORT_SHIFT : 0)); uint32_t gpio_mask = (1 << gpio_shift); uint32_t gpio_reg; if (gpio_num > MISC_REGISTERS_GPIO_3) { BLOGE(sc, "Invalid GPIO %d mode 0x%x port 0x%x gpio_port %d" " gpio_shift %d gpio_mask 0x%x\n", gpio_num, mode, port, gpio_port, gpio_shift, gpio_mask); return (-1); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); /* read GPIO and mask except the float bits */ gpio_reg = (REG_RD(sc, MISC_REG_GPIO) & MISC_REGISTERS_GPIO_FLOAT); switch (mode) { case MISC_REGISTERS_GPIO_OUTPUT_LOW: BLOGD(sc, DBG_PHY, "Set GPIO %d (shift %d) -> output low\n", gpio_num, gpio_shift); /* clear FLOAT and set CLR */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_FLOAT_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_CLR_POS); break; case MISC_REGISTERS_GPIO_OUTPUT_HIGH: BLOGD(sc, DBG_PHY, "Set GPIO %d (shift %d) -> output high\n", gpio_num, gpio_shift); /* clear FLOAT and set SET */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_FLOAT_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_SET_POS); break; case MISC_REGISTERS_GPIO_INPUT_HI_Z: BLOGD(sc, DBG_PHY, "Set GPIO %d (shift %d) -> input\n", gpio_num, gpio_shift); /* set FLOAT */ gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_FLOAT_POS); break; default: break; } REG_WR(sc, MISC_REG_GPIO, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (0); } static int bxe_gpio_mult_write(struct bxe_softc *sc, uint8_t pins, uint32_t mode) { uint32_t gpio_reg; /* any port swapping should be handled by caller */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); /* read GPIO and mask except the float bits */ gpio_reg = REG_RD(sc, MISC_REG_GPIO); gpio_reg &= ~(pins << MISC_REGISTERS_GPIO_FLOAT_POS); gpio_reg &= ~(pins << MISC_REGISTERS_GPIO_CLR_POS); gpio_reg &= ~(pins << MISC_REGISTERS_GPIO_SET_POS); switch (mode) { case MISC_REGISTERS_GPIO_OUTPUT_LOW: BLOGD(sc, DBG_PHY, "Set GPIO 0x%x -> output low\n", pins); /* set CLR */ gpio_reg |= (pins << MISC_REGISTERS_GPIO_CLR_POS); break; case MISC_REGISTERS_GPIO_OUTPUT_HIGH: BLOGD(sc, DBG_PHY, "Set GPIO 0x%x -> output high\n", pins); /* set SET */ gpio_reg |= (pins << MISC_REGISTERS_GPIO_SET_POS); break; case MISC_REGISTERS_GPIO_INPUT_HI_Z: BLOGD(sc, DBG_PHY, "Set GPIO 0x%x -> input\n", pins); /* set FLOAT */ gpio_reg |= (pins << MISC_REGISTERS_GPIO_FLOAT_POS); break; default: BLOGE(sc, "Invalid GPIO mode assignment pins 0x%x mode 0x%x" " gpio_reg 0x%x\n", pins, mode, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (-1); } REG_WR(sc, MISC_REG_GPIO, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (0); } static int bxe_gpio_int_write(struct bxe_softc *sc, int gpio_num, uint32_t mode, uint8_t port) { /* The GPIO should be swapped if swap register is set and active */ int gpio_port = ((REG_RD(sc, NIG_REG_PORT_SWAP) && REG_RD(sc, NIG_REG_STRAP_OVERRIDE)) ^ port); int gpio_shift = (gpio_num + (gpio_port ? MISC_REGISTERS_GPIO_PORT_SHIFT : 0)); uint32_t gpio_mask = (1 << gpio_shift); uint32_t gpio_reg; if (gpio_num > MISC_REGISTERS_GPIO_3) { BLOGE(sc, "Invalid GPIO %d mode 0x%x port 0x%x gpio_port %d" " gpio_shift %d gpio_mask 0x%x\n", gpio_num, mode, port, gpio_port, gpio_shift, gpio_mask); return (-1); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); /* read GPIO int */ gpio_reg = REG_RD(sc, MISC_REG_GPIO_INT); switch (mode) { case MISC_REGISTERS_GPIO_INT_OUTPUT_CLR: BLOGD(sc, DBG_PHY, "Clear GPIO INT %d (shift %d) -> output low\n", gpio_num, gpio_shift); /* clear SET and set CLR */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_INT_SET_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_INT_CLR_POS); break; case MISC_REGISTERS_GPIO_INT_OUTPUT_SET: BLOGD(sc, DBG_PHY, "Set GPIO INT %d (shift %d) -> output high\n", gpio_num, gpio_shift); /* clear CLR and set SET */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_INT_CLR_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_INT_SET_POS); break; default: break; } REG_WR(sc, MISC_REG_GPIO_INT, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (0); } uint32_t elink_cb_gpio_read(struct bxe_softc *sc, uint16_t gpio_num, uint8_t port) { return (bxe_gpio_read(sc, gpio_num, port)); } uint8_t elink_cb_gpio_write(struct bxe_softc *sc, uint16_t gpio_num, uint8_t mode, /* 0=low 1=high */ uint8_t port) { return (bxe_gpio_write(sc, gpio_num, mode, port)); } uint8_t elink_cb_gpio_mult_write(struct bxe_softc *sc, uint8_t pins, uint8_t mode) /* 0=low 1=high */ { return (bxe_gpio_mult_write(sc, pins, mode)); } uint8_t elink_cb_gpio_int_write(struct bxe_softc *sc, uint16_t gpio_num, uint8_t mode, /* 0=low 1=high */ uint8_t port) { return (bxe_gpio_int_write(sc, gpio_num, mode, port)); } void elink_cb_notify_link_changed(struct bxe_softc *sc) { REG_WR(sc, (MISC_REG_AEU_GENERAL_ATTN_12 + (SC_FUNC(sc) * sizeof(uint32_t))), 1); } /* send the MCP a request, block until there is a reply */ uint32_t elink_cb_fw_command(struct bxe_softc *sc, uint32_t command, uint32_t param) { int mb_idx = SC_FW_MB_IDX(sc); uint32_t seq; uint32_t rc = 0; uint32_t cnt = 1; uint8_t delay = CHIP_REV_IS_SLOW(sc) ? 100 : 10; BXE_FWMB_LOCK(sc); seq = ++sc->fw_seq; SHMEM_WR(sc, func_mb[mb_idx].drv_mb_param, param); SHMEM_WR(sc, func_mb[mb_idx].drv_mb_header, (command | seq)); BLOGD(sc, DBG_PHY, "wrote command 0x%08x to FW MB param 0x%08x\n", (command | seq), param); /* Let the FW do it's magic. GIve it up to 5 seconds... */ do { DELAY(delay * 1000); rc = SHMEM_RD(sc, func_mb[mb_idx].fw_mb_header); } while ((seq != (rc & FW_MSG_SEQ_NUMBER_MASK)) && (cnt++ < 500)); BLOGD(sc, DBG_PHY, "[after %d ms] read 0x%x seq 0x%x from FW MB\n", cnt*delay, rc, seq); /* is this a reply to our command? */ if (seq == (rc & FW_MSG_SEQ_NUMBER_MASK)) { rc &= FW_MSG_CODE_MASK; } else { /* Ruh-roh! */ BLOGE(sc, "FW failed to respond!\n"); // XXX bxe_fw_dump(sc); rc = 0; } BXE_FWMB_UNLOCK(sc); return (rc); } static uint32_t bxe_fw_command(struct bxe_softc *sc, uint32_t command, uint32_t param) { return (elink_cb_fw_command(sc, command, param)); } static void __storm_memset_dma_mapping(struct bxe_softc *sc, uint32_t addr, bus_addr_t mapping) { REG_WR(sc, addr, U64_LO(mapping)); REG_WR(sc, (addr + 4), U64_HI(mapping)); } static void storm_memset_spq_addr(struct bxe_softc *sc, bus_addr_t mapping, uint16_t abs_fid) { uint32_t addr = (XSEM_REG_FAST_MEMORY + XSTORM_SPQ_PAGE_BASE_OFFSET(abs_fid)); __storm_memset_dma_mapping(sc, addr, mapping); } static void storm_memset_vf_to_pf(struct bxe_softc *sc, uint16_t abs_fid, uint16_t pf_id) { REG_WR8(sc, (BAR_XSTRORM_INTMEM + XSTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); REG_WR8(sc, (BAR_CSTRORM_INTMEM + CSTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); REG_WR8(sc, (BAR_TSTRORM_INTMEM + TSTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); REG_WR8(sc, (BAR_USTRORM_INTMEM + USTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); } static void storm_memset_func_en(struct bxe_softc *sc, uint16_t abs_fid, uint8_t enable) { REG_WR8(sc, (BAR_XSTRORM_INTMEM + XSTORM_FUNC_EN_OFFSET(abs_fid)), enable); REG_WR8(sc, (BAR_CSTRORM_INTMEM + CSTORM_FUNC_EN_OFFSET(abs_fid)), enable); REG_WR8(sc, (BAR_TSTRORM_INTMEM + TSTORM_FUNC_EN_OFFSET(abs_fid)), enable); REG_WR8(sc, (BAR_USTRORM_INTMEM + USTORM_FUNC_EN_OFFSET(abs_fid)), enable); } static void storm_memset_eq_data(struct bxe_softc *sc, struct event_ring_data *eq_data, uint16_t pfid) { uint32_t addr; size_t size; addr = (BAR_CSTRORM_INTMEM + CSTORM_EVENT_RING_DATA_OFFSET(pfid)); size = sizeof(struct event_ring_data); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)eq_data); } static void storm_memset_eq_prod(struct bxe_softc *sc, uint16_t eq_prod, uint16_t pfid) { uint32_t addr = (BAR_CSTRORM_INTMEM + CSTORM_EVENT_RING_PROD_OFFSET(pfid)); REG_WR16(sc, addr, eq_prod); } /* * Post a slowpath command. * * A slowpath command is used to propagate a configuration change through * the controller in a controlled manner, allowing each STORM processor and * other H/W blocks to phase in the change. The commands sent on the * slowpath are referred to as ramrods. Depending on the ramrod used the * completion of the ramrod will occur in different ways. Here's a * breakdown of ramrods and how they complete: * * RAMROD_CMD_ID_ETH_PORT_SETUP * Used to setup the leading connection on a port. Completes on the * Receive Completion Queue (RCQ) of that port (typically fp[0]). * * RAMROD_CMD_ID_ETH_CLIENT_SETUP * Used to setup an additional connection on a port. Completes on the * RCQ of the multi-queue/RSS connection being initialized. * * RAMROD_CMD_ID_ETH_STAT_QUERY * Used to force the storm processors to update the statistics database * in host memory. This ramrod is send on the leading connection CID and * completes as an index increment of the CSTORM on the default status * block. * * RAMROD_CMD_ID_ETH_UPDATE * Used to update the state of the leading connection, usually to udpate * the RSS indirection table. Completes on the RCQ of the leading * connection. (Not currently used under FreeBSD until OS support becomes * available.) * * RAMROD_CMD_ID_ETH_HALT * Used when tearing down a connection prior to driver unload. Completes * on the RCQ of the multi-queue/RSS connection being torn down. Don't * use this on the leading connection. * * RAMROD_CMD_ID_ETH_SET_MAC * Sets the Unicast/Broadcast/Multicast used by the port. Completes on * the RCQ of the leading connection. * * RAMROD_CMD_ID_ETH_CFC_DEL * Used when tearing down a conneciton prior to driver unload. Completes * on the RCQ of the leading connection (since the current connection * has been completely removed from controller memory). * * RAMROD_CMD_ID_ETH_PORT_DEL * Used to tear down the leading connection prior to driver unload, * typically fp[0]. Completes as an index increment of the CSTORM on the * default status block. * * RAMROD_CMD_ID_ETH_FORWARD_SETUP * Used for connection offload. Completes on the RCQ of the multi-queue * RSS connection that is being offloaded. (Not currently used under * FreeBSD.) * * There can only be one command pending per function. * * Returns: * 0 = Success, !0 = Failure. */ /* must be called under the spq lock */ static inline struct eth_spe *bxe_sp_get_next(struct bxe_softc *sc) { struct eth_spe *next_spe = sc->spq_prod_bd; if (sc->spq_prod_bd == sc->spq_last_bd) { /* wrap back to the first eth_spq */ sc->spq_prod_bd = sc->spq; sc->spq_prod_idx = 0; } else { sc->spq_prod_bd++; sc->spq_prod_idx++; } return (next_spe); } /* must be called under the spq lock */ static inline void bxe_sp_prod_update(struct bxe_softc *sc) { int func = SC_FUNC(sc); /* * Make sure that BD data is updated before writing the producer. * BD data is written to the memory, the producer is read from the * memory, thus we need a full memory barrier to ensure the ordering. */ mb(); REG_WR16(sc, (BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func)), sc->spq_prod_idx); bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); } /** * bxe_is_contextless_ramrod - check if the current command ends on EQ * * @cmd: command to check * @cmd_type: command type */ static inline int bxe_is_contextless_ramrod(int cmd, int cmd_type) { if ((cmd_type == NONE_CONNECTION_TYPE) || (cmd == RAMROD_CMD_ID_ETH_FORWARD_SETUP) || (cmd == RAMROD_CMD_ID_ETH_CLASSIFICATION_RULES) || (cmd == RAMROD_CMD_ID_ETH_FILTER_RULES) || (cmd == RAMROD_CMD_ID_ETH_MULTICAST_RULES) || (cmd == RAMROD_CMD_ID_ETH_SET_MAC) || (cmd == RAMROD_CMD_ID_ETH_RSS_UPDATE)) { return (TRUE); } else { return (FALSE); } } /** * bxe_sp_post - place a single command on an SP ring * * @sc: driver handle * @command: command to place (e.g. SETUP, FILTER_RULES, etc.) * @cid: SW CID the command is related to * @data_hi: command private data address (high 32 bits) * @data_lo: command private data address (low 32 bits) * @cmd_type: command type (e.g. NONE, ETH) * * SP data is handled as if it's always an address pair, thus data fields are * not swapped to little endian in upper functions. Instead this function swaps * data as if it's two uint32 fields. */ int bxe_sp_post(struct bxe_softc *sc, int command, int cid, uint32_t data_hi, uint32_t data_lo, int cmd_type) { struct eth_spe *spe; uint16_t type; int common; common = bxe_is_contextless_ramrod(command, cmd_type); BXE_SP_LOCK(sc); if (common) { if (!atomic_load_acq_long(&sc->eq_spq_left)) { BLOGE(sc, "EQ ring is full!\n"); BXE_SP_UNLOCK(sc); return (-1); } } else { if (!atomic_load_acq_long(&sc->cq_spq_left)) { BLOGE(sc, "SPQ ring is full!\n"); BXE_SP_UNLOCK(sc); return (-1); } } spe = bxe_sp_get_next(sc); /* CID needs port number to be encoded int it */ spe->hdr.conn_and_cmd_data = htole32((command << SPE_HDR_T_CMD_ID_SHIFT) | HW_CID(sc, cid)); type = (cmd_type << SPE_HDR_T_CONN_TYPE_SHIFT) & SPE_HDR_T_CONN_TYPE; /* TBD: Check if it works for VFs */ type |= ((SC_FUNC(sc) << SPE_HDR_T_FUNCTION_ID_SHIFT) & SPE_HDR_T_FUNCTION_ID); spe->hdr.type = htole16(type); spe->data.update_data_addr.hi = htole32(data_hi); spe->data.update_data_addr.lo = htole32(data_lo); /* * It's ok if the actual decrement is issued towards the memory * somewhere between the lock and unlock. Thus no more explict * memory barrier is needed. */ if (common) { atomic_subtract_acq_long(&sc->eq_spq_left, 1); } else { atomic_subtract_acq_long(&sc->cq_spq_left, 1); } BLOGD(sc, DBG_SP, "SPQE -> %#jx\n", (uintmax_t)sc->spq_dma.paddr); BLOGD(sc, DBG_SP, "FUNC_RDATA -> %p / %#jx\n", BXE_SP(sc, func_rdata), (uintmax_t)BXE_SP_MAPPING(sc, func_rdata)); BLOGD(sc, DBG_SP, "SPQE[%x] (%x:%x) (cmd, common?) (%d,%d) hw_cid %x data (%x:%x) type(0x%x) left (CQ, EQ) (%lx,%lx)\n", sc->spq_prod_idx, (uint32_t)U64_HI(sc->spq_dma.paddr), (uint32_t)(U64_LO(sc->spq_dma.paddr) + (uint8_t *)sc->spq_prod_bd - (uint8_t *)sc->spq), command, common, HW_CID(sc, cid), data_hi, data_lo, type, atomic_load_acq_long(&sc->cq_spq_left), atomic_load_acq_long(&sc->eq_spq_left)); bxe_sp_prod_update(sc); BXE_SP_UNLOCK(sc); return (0); } /** * bxe_debug_print_ind_table - prints the indirection table configuration. * * @sc: driver hanlde * @p: pointer to rss configuration */ /* * FreeBSD Device probe function. * * Compares the device found to the driver's list of supported devices and * reports back to the bsd loader whether this is the right driver for the device. * This is the driver entry function called from the "kldload" command. * * Returns: * BUS_PROBE_DEFAULT on success, positive value on failure. */ static int bxe_probe(device_t dev) { struct bxe_device_type *t; char *descbuf; uint16_t did, sdid, svid, vid; /* Find our device structure */ t = bxe_devs; /* Get the data for the device to be probed. */ vid = pci_get_vendor(dev); did = pci_get_device(dev); svid = pci_get_subvendor(dev); sdid = pci_get_subdevice(dev); /* Look through the list of known devices for a match. */ while (t->bxe_name != NULL) { if ((vid == t->bxe_vid) && (did == t->bxe_did) && ((svid == t->bxe_svid) || (t->bxe_svid == PCI_ANY_ID)) && ((sdid == t->bxe_sdid) || (t->bxe_sdid == PCI_ANY_ID))) { descbuf = malloc(BXE_DEVDESC_MAX, M_TEMP, M_NOWAIT); if (descbuf == NULL) return (ENOMEM); /* Print out the device identity. */ snprintf(descbuf, BXE_DEVDESC_MAX, "%s (%c%d) BXE v:%s\n", t->bxe_name, (((pci_read_config(dev, PCIR_REVID, 4) & 0xf0) >> 4) + 'A'), (pci_read_config(dev, PCIR_REVID, 4) & 0xf), BXE_DRIVER_VERSION); device_set_desc_copy(dev, descbuf); free(descbuf, M_TEMP); return (BUS_PROBE_DEFAULT); } t++; } return (ENXIO); } static void bxe_init_mutexes(struct bxe_softc *sc) { #ifdef BXE_CORE_LOCK_SX snprintf(sc->core_sx_name, sizeof(sc->core_sx_name), "bxe%d_core_lock", sc->unit); sx_init(&sc->core_sx, sc->core_sx_name); #else snprintf(sc->core_mtx_name, sizeof(sc->core_mtx_name), "bxe%d_core_lock", sc->unit); mtx_init(&sc->core_mtx, sc->core_mtx_name, NULL, MTX_DEF); #endif snprintf(sc->sp_mtx_name, sizeof(sc->sp_mtx_name), "bxe%d_sp_lock", sc->unit); mtx_init(&sc->sp_mtx, sc->sp_mtx_name, NULL, MTX_DEF); snprintf(sc->dmae_mtx_name, sizeof(sc->dmae_mtx_name), "bxe%d_dmae_lock", sc->unit); mtx_init(&sc->dmae_mtx, sc->dmae_mtx_name, NULL, MTX_DEF); snprintf(sc->port.phy_mtx_name, sizeof(sc->port.phy_mtx_name), "bxe%d_phy_lock", sc->unit); mtx_init(&sc->port.phy_mtx, sc->port.phy_mtx_name, NULL, MTX_DEF); snprintf(sc->fwmb_mtx_name, sizeof(sc->fwmb_mtx_name), "bxe%d_fwmb_lock", sc->unit); mtx_init(&sc->fwmb_mtx, sc->fwmb_mtx_name, NULL, MTX_DEF); snprintf(sc->print_mtx_name, sizeof(sc->print_mtx_name), "bxe%d_print_lock", sc->unit); mtx_init(&(sc->print_mtx), sc->print_mtx_name, NULL, MTX_DEF); snprintf(sc->stats_mtx_name, sizeof(sc->stats_mtx_name), "bxe%d_stats_lock", sc->unit); mtx_init(&(sc->stats_mtx), sc->stats_mtx_name, NULL, MTX_DEF); snprintf(sc->mcast_mtx_name, sizeof(sc->mcast_mtx_name), "bxe%d_mcast_lock", sc->unit); mtx_init(&(sc->mcast_mtx), sc->mcast_mtx_name, NULL, MTX_DEF); } static void bxe_release_mutexes(struct bxe_softc *sc) { #ifdef BXE_CORE_LOCK_SX sx_destroy(&sc->core_sx); #else if (mtx_initialized(&sc->core_mtx)) { mtx_destroy(&sc->core_mtx); } #endif if (mtx_initialized(&sc->sp_mtx)) { mtx_destroy(&sc->sp_mtx); } if (mtx_initialized(&sc->dmae_mtx)) { mtx_destroy(&sc->dmae_mtx); } if (mtx_initialized(&sc->port.phy_mtx)) { mtx_destroy(&sc->port.phy_mtx); } if (mtx_initialized(&sc->fwmb_mtx)) { mtx_destroy(&sc->fwmb_mtx); } if (mtx_initialized(&sc->print_mtx)) { mtx_destroy(&sc->print_mtx); } if (mtx_initialized(&sc->stats_mtx)) { mtx_destroy(&sc->stats_mtx); } if (mtx_initialized(&sc->mcast_mtx)) { mtx_destroy(&sc->mcast_mtx); } } static void bxe_tx_disable(struct bxe_softc* sc) { if_t ifp = sc->ifp; /* tell the stack the driver is stopped and TX queue is full */ if (ifp != NULL) { if_setdrvflags(ifp, 0); } } static void bxe_drv_pulse(struct bxe_softc *sc) { SHMEM_WR(sc, func_mb[SC_FW_MB_IDX(sc)].drv_pulse_mb, sc->fw_drv_pulse_wr_seq); } static inline uint16_t bxe_tx_avail(struct bxe_softc *sc, struct bxe_fastpath *fp) { int16_t used; uint16_t prod; uint16_t cons; prod = fp->tx_bd_prod; cons = fp->tx_bd_cons; used = SUB_S16(prod, cons); return (int16_t)(sc->tx_ring_size) - used; } static inline int bxe_tx_queue_has_work(struct bxe_fastpath *fp) { uint16_t hw_cons; mb(); /* status block fields can change */ hw_cons = le16toh(*fp->tx_cons_sb); return (hw_cons != fp->tx_pkt_cons); } static inline uint8_t bxe_has_tx_work(struct bxe_fastpath *fp) { /* expand this for multi-cos if ever supported */ return (bxe_tx_queue_has_work(fp)) ? TRUE : FALSE; } static inline int bxe_has_rx_work(struct bxe_fastpath *fp) { uint16_t rx_cq_cons_sb; mb(); /* status block fields can change */ rx_cq_cons_sb = le16toh(*fp->rx_cq_cons_sb); if ((rx_cq_cons_sb & RCQ_MAX) == RCQ_MAX) rx_cq_cons_sb++; return (fp->rx_cq_cons != rx_cq_cons_sb); } static void bxe_sp_event(struct bxe_softc *sc, struct bxe_fastpath *fp, union eth_rx_cqe *rr_cqe) { int cid = SW_CID(rr_cqe->ramrod_cqe.conn_and_cmd_data); int command = CQE_CMD(rr_cqe->ramrod_cqe.conn_and_cmd_data); enum ecore_queue_cmd drv_cmd = ECORE_Q_CMD_MAX; struct ecore_queue_sp_obj *q_obj = &BXE_SP_OBJ(sc, fp).q_obj; BLOGD(sc, DBG_SP, "fp=%d cid=%d got ramrod #%d state is %x type is %d\n", fp->index, cid, command, sc->state, rr_cqe->ramrod_cqe.ramrod_type); switch (command) { case (RAMROD_CMD_ID_ETH_CLIENT_UPDATE): BLOGD(sc, DBG_SP, "got UPDATE ramrod. CID %d\n", cid); drv_cmd = ECORE_Q_CMD_UPDATE; break; case (RAMROD_CMD_ID_ETH_CLIENT_SETUP): BLOGD(sc, DBG_SP, "got MULTI[%d] setup ramrod\n", cid); drv_cmd = ECORE_Q_CMD_SETUP; break; case (RAMROD_CMD_ID_ETH_TX_QUEUE_SETUP): BLOGD(sc, DBG_SP, "got MULTI[%d] tx-only setup ramrod\n", cid); drv_cmd = ECORE_Q_CMD_SETUP_TX_ONLY; break; case (RAMROD_CMD_ID_ETH_HALT): BLOGD(sc, DBG_SP, "got MULTI[%d] halt ramrod\n", cid); drv_cmd = ECORE_Q_CMD_HALT; break; case (RAMROD_CMD_ID_ETH_TERMINATE): BLOGD(sc, DBG_SP, "got MULTI[%d] teminate ramrod\n", cid); drv_cmd = ECORE_Q_CMD_TERMINATE; break; case (RAMROD_CMD_ID_ETH_EMPTY): BLOGD(sc, DBG_SP, "got MULTI[%d] empty ramrod\n", cid); drv_cmd = ECORE_Q_CMD_EMPTY; break; default: BLOGD(sc, DBG_SP, "ERROR: unexpected MC reply (%d) on fp[%d]\n", command, fp->index); return; } if ((drv_cmd != ECORE_Q_CMD_MAX) && q_obj->complete_cmd(sc, q_obj, drv_cmd)) { /* * q_obj->complete_cmd() failure means that this was * an unexpected completion. * * In this case we don't want to increase the sc->spq_left * because apparently we haven't sent this command the first * place. */ // bxe_panic(sc, ("Unexpected SP completion\n")); return; } atomic_add_acq_long(&sc->cq_spq_left, 1); BLOGD(sc, DBG_SP, "sc->cq_spq_left 0x%lx\n", atomic_load_acq_long(&sc->cq_spq_left)); } /* * The current mbuf is part of an aggregation. Move the mbuf into the TPA * aggregation queue, put an empty mbuf back onto the receive chain, and mark * the current aggregation queue as in-progress. */ static void bxe_tpa_start(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t queue, uint16_t cons, uint16_t prod, struct eth_fast_path_rx_cqe *cqe) { struct bxe_sw_rx_bd tmp_bd; struct bxe_sw_rx_bd *rx_buf; struct eth_rx_bd *rx_bd; int max_agg_queues; struct bxe_sw_tpa_info *tpa_info = &fp->rx_tpa_info[queue]; uint16_t index; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA START " "cons=%d prod=%d\n", fp->index, queue, cons, prod); max_agg_queues = MAX_AGG_QS(sc); KASSERT((queue < max_agg_queues), ("fp[%02d] invalid aggr queue (%d >= %d)!", fp->index, queue, max_agg_queues)); KASSERT((tpa_info->state == BXE_TPA_STATE_STOP), ("fp[%02d].tpa[%02d] starting aggr on queue not stopped!", fp->index, queue)); /* copy the existing mbuf and mapping from the TPA pool */ tmp_bd = tpa_info->bd; if (tmp_bd.m == NULL) { uint32_t *tmp; tmp = (uint32_t *)cqe; BLOGE(sc, "fp[%02d].tpa[%02d] cons[%d] prod[%d]mbuf not allocated!\n", fp->index, queue, cons, prod); BLOGE(sc, "cqe [0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x]\n", *tmp, *(tmp+1), *(tmp+2), *(tmp+3), *(tmp+4), *(tmp+5), *(tmp+6), *(tmp+7)); /* XXX Error handling? */ return; } /* change the TPA queue to the start state */ tpa_info->state = BXE_TPA_STATE_START; tpa_info->placement_offset = cqe->placement_offset; tpa_info->parsing_flags = le16toh(cqe->pars_flags.flags); tpa_info->vlan_tag = le16toh(cqe->vlan_tag); tpa_info->len_on_bd = le16toh(cqe->len_on_bd); fp->rx_tpa_queue_used |= (1 << queue); /* * If all the buffer descriptors are filled with mbufs then fill in * the current consumer index with a new BD. Else if a maximum Rx * buffer limit is imposed then fill in the next producer index. */ index = (sc->max_rx_bufs != RX_BD_USABLE) ? prod : cons; /* move the received mbuf and mapping to TPA pool */ tpa_info->bd = fp->rx_mbuf_chain[cons]; /* release any existing RX BD mbuf mappings */ if (cons != index) { rx_buf = &fp->rx_mbuf_chain[cons]; if (rx_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, rx_buf->m_map); } /* * We get here when the maximum number of rx buffers is less than * RX_BD_USABLE. The mbuf is already saved above so it's OK to NULL * it out here without concern of a memory leak. */ fp->rx_mbuf_chain[cons].m = NULL; } /* update the Rx SW BD with the mbuf info from the TPA pool */ fp->rx_mbuf_chain[index] = tmp_bd; /* update the Rx BD with the empty mbuf phys address from the TPA pool */ rx_bd = &fp->rx_chain[index]; rx_bd->addr_hi = htole32(U64_HI(tpa_info->seg.ds_addr)); rx_bd->addr_lo = htole32(U64_LO(tpa_info->seg.ds_addr)); } /* * When a TPA aggregation is completed, loop through the individual mbufs * of the aggregation, combining them into a single mbuf which will be sent * up the stack. Refill all freed SGEs with mbufs as we go along. */ static int bxe_fill_frag_mbuf(struct bxe_softc *sc, struct bxe_fastpath *fp, struct bxe_sw_tpa_info *tpa_info, uint16_t queue, uint16_t pages, struct mbuf *m, struct eth_end_agg_rx_cqe *cqe, uint16_t cqe_idx) { struct mbuf *m_frag; uint32_t frag_len, frag_size, i; uint16_t sge_idx; int rc = 0; int j; frag_size = le16toh(cqe->pkt_len) - tpa_info->len_on_bd; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA fill len_on_bd=%d frag_size=%d pages=%d\n", fp->index, queue, tpa_info->len_on_bd, frag_size, pages); /* make sure the aggregated frame is not too big to handle */ if (pages > 8 * PAGES_PER_SGE) { uint32_t *tmp = (uint32_t *)cqe; BLOGE(sc, "fp[%02d].sge[0x%04x] has too many pages (%d)! " "pkt_len=%d len_on_bd=%d frag_size=%d\n", fp->index, cqe_idx, pages, le16toh(cqe->pkt_len), tpa_info->len_on_bd, frag_size); BLOGE(sc, "cqe [0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x]\n", *tmp, *(tmp+1), *(tmp+2), *(tmp+3), *(tmp+4), *(tmp+5), *(tmp+6), *(tmp+7)); bxe_panic(sc, ("sge page count error\n")); return (EINVAL); } /* * Scan through the scatter gather list pulling individual mbufs into a * single mbuf for the host stack. */ for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) { sge_idx = RX_SGE(le16toh(cqe->sgl_or_raw_data.sgl[j])); /* * Firmware gives the indices of the SGE as if the ring is an array * (meaning that the "next" element will consume 2 indices). */ frag_len = min(frag_size, (uint32_t)(SGE_PAGES)); BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA fill i=%d j=%d " "sge_idx=%d frag_size=%d frag_len=%d\n", fp->index, queue, i, j, sge_idx, frag_size, frag_len); m_frag = fp->rx_sge_mbuf_chain[sge_idx].m; /* allocate a new mbuf for the SGE */ rc = bxe_alloc_rx_sge_mbuf(fp, sge_idx); if (rc) { /* Leave all remaining SGEs in the ring! */ return (rc); } /* update the fragment length */ m_frag->m_len = frag_len; /* concatenate the fragment to the head mbuf */ m_cat(m, m_frag); fp->eth_q_stats.mbuf_alloc_sge--; /* update the TPA mbuf size and remaining fragment size */ m->m_pkthdr.len += frag_len; frag_size -= frag_len; } BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA fill done frag_size=%d\n", fp->index, queue, frag_size); return (rc); } static inline void bxe_clear_sge_mask_next_elems(struct bxe_fastpath *fp) { int i, j; for (i = 1; i <= RX_SGE_NUM_PAGES; i++) { int idx = RX_SGE_TOTAL_PER_PAGE * i - 1; for (j = 0; j < 2; j++) { BIT_VEC64_CLEAR_BIT(fp->sge_mask, idx); idx--; } } } static inline void bxe_init_sge_ring_bit_mask(struct bxe_fastpath *fp) { /* set the mask to all 1's, it's faster to compare to 0 than to 0xf's */ memset(fp->sge_mask, 0xff, sizeof(fp->sge_mask)); /* * Clear the two last indices in the page to 1. These are the indices that * correspond to the "next" element, hence will never be indicated and * should be removed from the calculations. */ bxe_clear_sge_mask_next_elems(fp); } static inline void bxe_update_last_max_sge(struct bxe_fastpath *fp, uint16_t idx) { uint16_t last_max = fp->last_max_sge; if (SUB_S16(idx, last_max) > 0) { fp->last_max_sge = idx; } } static inline void bxe_update_sge_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t sge_len, union eth_sgl_or_raw_data *cqe) { uint16_t last_max, last_elem, first_elem; uint16_t delta = 0; uint16_t i; if (!sge_len) { return; } /* first mark all used pages */ for (i = 0; i < sge_len; i++) { BIT_VEC64_CLEAR_BIT(fp->sge_mask, RX_SGE(le16toh(cqe->sgl[i]))); } BLOGD(sc, DBG_LRO, "fp[%02d] fp_cqe->sgl[%d] = %d\n", fp->index, sge_len - 1, le16toh(cqe->sgl[sge_len - 1])); /* assume that the last SGE index is the biggest */ bxe_update_last_max_sge(fp, le16toh(cqe->sgl[sge_len - 1])); last_max = RX_SGE(fp->last_max_sge); last_elem = last_max >> BIT_VEC64_ELEM_SHIFT; first_elem = RX_SGE(fp->rx_sge_prod) >> BIT_VEC64_ELEM_SHIFT; /* if ring is not full */ if (last_elem + 1 != first_elem) { last_elem++; } /* now update the prod */ for (i = first_elem; i != last_elem; i = RX_SGE_NEXT_MASK_ELEM(i)) { if (__predict_true(fp->sge_mask[i])) { break; } fp->sge_mask[i] = BIT_VEC64_ELEM_ONE_MASK; delta += BIT_VEC64_ELEM_SZ; } if (delta > 0) { fp->rx_sge_prod += delta; /* clear page-end entries */ bxe_clear_sge_mask_next_elems(fp); } BLOGD(sc, DBG_LRO, "fp[%02d] fp->last_max_sge=%d fp->rx_sge_prod=%d\n", fp->index, fp->last_max_sge, fp->rx_sge_prod); } /* * The aggregation on the current TPA queue has completed. Pull the individual * mbuf fragments together into a single mbuf, perform all necessary checksum * calculations, and send the resuting mbuf to the stack. */ static void bxe_tpa_stop(struct bxe_softc *sc, struct bxe_fastpath *fp, struct bxe_sw_tpa_info *tpa_info, uint16_t queue, uint16_t pages, struct eth_end_agg_rx_cqe *cqe, uint16_t cqe_idx) { if_t ifp = sc->ifp; struct mbuf *m; int rc = 0; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] pad=%d pkt_len=%d pages=%d vlan=%d\n", fp->index, queue, tpa_info->placement_offset, le16toh(cqe->pkt_len), pages, tpa_info->vlan_tag); m = tpa_info->bd.m; /* allocate a replacement before modifying existing mbuf */ rc = bxe_alloc_rx_tpa_mbuf(fp, queue); if (rc) { /* drop the frame and log an error */ fp->eth_q_stats.rx_soft_errors++; goto bxe_tpa_stop_exit; } /* we have a replacement, fixup the current mbuf */ m_adj(m, tpa_info->placement_offset); m->m_pkthdr.len = m->m_len = tpa_info->len_on_bd; /* mark the checksums valid (taken care of by the firmware) */ fp->eth_q_stats.rx_ofld_frames_csum_ip++; fp->eth_q_stats.rx_ofld_frames_csum_tcp_udp++; m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); /* aggregate all of the SGEs into a single mbuf */ rc = bxe_fill_frag_mbuf(sc, fp, tpa_info, queue, pages, m, cqe, cqe_idx); if (rc) { /* drop the packet and log an error */ fp->eth_q_stats.rx_soft_errors++; m_freem(m); } else { if (tpa_info->parsing_flags & PARSING_FLAGS_INNER_VLAN_EXIST) { m->m_pkthdr.ether_vtag = tpa_info->vlan_tag; m->m_flags |= M_VLANTAG; } /* assign packet to this interface interface */ if_setrcvif(m, ifp); #if __FreeBSD_version >= 800000 /* specify what RSS queue was used for this flow */ m->m_pkthdr.flowid = fp->index; BXE_SET_FLOWID(m); #endif if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); fp->eth_q_stats.rx_tpa_pkts++; /* pass the frame to the stack */ if_input(ifp, m); } /* we passed an mbuf up the stack or dropped the frame */ fp->eth_q_stats.mbuf_alloc_tpa--; bxe_tpa_stop_exit: fp->rx_tpa_info[queue].state = BXE_TPA_STATE_STOP; fp->rx_tpa_queue_used &= ~(1 << queue); } static uint8_t bxe_service_rxsgl( struct bxe_fastpath *fp, uint16_t len, uint16_t lenonbd, struct mbuf *m, struct eth_fast_path_rx_cqe *cqe_fp) { struct mbuf *m_frag; uint16_t frags, frag_len; uint16_t sge_idx = 0; uint16_t j; uint8_t i, rc = 0; uint32_t frag_size; /* adjust the mbuf */ m->m_len = lenonbd; frag_size = len - lenonbd; frags = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT; for (i = 0, j = 0; i < frags; i += PAGES_PER_SGE, j++) { sge_idx = RX_SGE(le16toh(cqe_fp->sgl_or_raw_data.sgl[j])); m_frag = fp->rx_sge_mbuf_chain[sge_idx].m; frag_len = min(frag_size, (uint32_t)(SGE_PAGE_SIZE)); m_frag->m_len = frag_len; /* allocate a new mbuf for the SGE */ rc = bxe_alloc_rx_sge_mbuf(fp, sge_idx); if (rc) { /* Leave all remaining SGEs in the ring! */ return (rc); } fp->eth_q_stats.mbuf_alloc_sge--; /* concatenate the fragment to the head mbuf */ m_cat(m, m_frag); frag_size -= frag_len; } bxe_update_sge_prod(fp->sc, fp, frags, &cqe_fp->sgl_or_raw_data); return rc; } static uint8_t bxe_rxeof(struct bxe_softc *sc, struct bxe_fastpath *fp) { if_t ifp = sc->ifp; uint16_t bd_cons, bd_prod, bd_prod_fw, comp_ring_cons; uint16_t hw_cq_cons, sw_cq_cons, sw_cq_prod; int rx_pkts = 0; int rc = 0; BXE_FP_RX_LOCK(fp); /* CQ "next element" is of the size of the regular element */ hw_cq_cons = le16toh(*fp->rx_cq_cons_sb); if ((hw_cq_cons & RCQ_USABLE_PER_PAGE) == RCQ_USABLE_PER_PAGE) { hw_cq_cons++; } bd_cons = fp->rx_bd_cons; bd_prod = fp->rx_bd_prod; bd_prod_fw = bd_prod; sw_cq_cons = fp->rx_cq_cons; sw_cq_prod = fp->rx_cq_prod; /* * Memory barrier necessary as speculative reads of the rx * buffer can be ahead of the index in the status block */ rmb(); BLOGD(sc, DBG_RX, "fp[%02d] Rx START hw_cq_cons=%u sw_cq_cons=%u\n", fp->index, hw_cq_cons, sw_cq_cons); while (sw_cq_cons != hw_cq_cons) { struct bxe_sw_rx_bd *rx_buf = NULL; union eth_rx_cqe *cqe; struct eth_fast_path_rx_cqe *cqe_fp; uint8_t cqe_fp_flags; enum eth_rx_cqe_type cqe_fp_type; uint16_t len, lenonbd, pad; struct mbuf *m = NULL; comp_ring_cons = RCQ(sw_cq_cons); bd_prod = RX_BD(bd_prod); bd_cons = RX_BD(bd_cons); cqe = &fp->rcq_chain[comp_ring_cons]; cqe_fp = &cqe->fast_path_cqe; cqe_fp_flags = cqe_fp->type_error_flags; cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE; BLOGD(sc, DBG_RX, "fp[%02d] Rx hw_cq_cons=%d hw_sw_cons=%d " "BD prod=%d cons=%d CQE type=0x%x err=0x%x " "status=0x%x rss_hash=0x%x vlan=0x%x len=%u lenonbd=%u\n", fp->index, hw_cq_cons, sw_cq_cons, bd_prod, bd_cons, CQE_TYPE(cqe_fp_flags), cqe_fp_flags, cqe_fp->status_flags, le32toh(cqe_fp->rss_hash_result), le16toh(cqe_fp->vlan_tag), le16toh(cqe_fp->pkt_len_or_gro_seg_len), le16toh(cqe_fp->len_on_bd)); /* is this a slowpath msg? */ if (__predict_false(CQE_TYPE_SLOW(cqe_fp_type))) { bxe_sp_event(sc, fp, cqe); goto next_cqe; } rx_buf = &fp->rx_mbuf_chain[bd_cons]; if (!CQE_TYPE_FAST(cqe_fp_type)) { struct bxe_sw_tpa_info *tpa_info; uint16_t frag_size, pages; uint8_t queue; if (CQE_TYPE_START(cqe_fp_type)) { bxe_tpa_start(sc, fp, cqe_fp->queue_index, bd_cons, bd_prod, cqe_fp); m = NULL; /* packet not ready yet */ goto next_rx; } KASSERT(CQE_TYPE_STOP(cqe_fp_type), ("CQE type is not STOP! (0x%x)\n", cqe_fp_type)); queue = cqe->end_agg_cqe.queue_index; tpa_info = &fp->rx_tpa_info[queue]; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA STOP\n", fp->index, queue); frag_size = (le16toh(cqe->end_agg_cqe.pkt_len) - tpa_info->len_on_bd); pages = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT; bxe_tpa_stop(sc, fp, tpa_info, queue, pages, &cqe->end_agg_cqe, comp_ring_cons); bxe_update_sge_prod(sc, fp, pages, &cqe->end_agg_cqe.sgl_or_raw_data); goto next_cqe; } /* non TPA */ /* is this an error packet? */ if (__predict_false(cqe_fp_flags & ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG)) { BLOGE(sc, "flags 0x%x rx packet %u\n", cqe_fp_flags, sw_cq_cons); fp->eth_q_stats.rx_soft_errors++; goto next_rx; } len = le16toh(cqe_fp->pkt_len_or_gro_seg_len); lenonbd = le16toh(cqe_fp->len_on_bd); pad = cqe_fp->placement_offset; m = rx_buf->m; if (__predict_false(m == NULL)) { BLOGE(sc, "No mbuf in rx chain descriptor %d for fp[%02d]\n", bd_cons, fp->index); goto next_rx; } /* XXX double copy if packet length under a threshold */ /* * If all the buffer descriptors are filled with mbufs then fill in * the current consumer index with a new BD. Else if a maximum Rx * buffer limit is imposed then fill in the next producer index. */ rc = bxe_alloc_rx_bd_mbuf(fp, bd_cons, (sc->max_rx_bufs != RX_BD_USABLE) ? bd_prod : bd_cons); if (rc != 0) { /* we simply reuse the received mbuf and don't post it to the stack */ m = NULL; BLOGE(sc, "mbuf alloc fail for fp[%02d] rx chain (%d)\n", fp->index, rc); fp->eth_q_stats.rx_soft_errors++; if (sc->max_rx_bufs != RX_BD_USABLE) { /* copy this consumer index to the producer index */ memcpy(&fp->rx_mbuf_chain[bd_prod], rx_buf, sizeof(struct bxe_sw_rx_bd)); memset(rx_buf, 0, sizeof(struct bxe_sw_rx_bd)); } goto next_rx; } /* current mbuf was detached from the bd */ fp->eth_q_stats.mbuf_alloc_rx--; /* we allocated a replacement mbuf, fixup the current one */ m_adj(m, pad); m->m_pkthdr.len = m->m_len = len; if ((len > 60) && (len > lenonbd)) { fp->eth_q_stats.rx_bxe_service_rxsgl++; rc = bxe_service_rxsgl(fp, len, lenonbd, m, cqe_fp); if (rc) break; fp->eth_q_stats.rx_jumbo_sge_pkts++; } else if (lenonbd < len) { fp->eth_q_stats.rx_erroneous_jumbo_sge_pkts++; } /* assign packet to this interface interface */ if_setrcvif(m, ifp); /* assume no hardware checksum has complated */ m->m_pkthdr.csum_flags = 0; /* validate checksum if offload enabled */ if (if_getcapenable(ifp) & IFCAP_RXCSUM) { /* check for a valid IP frame */ if (!(cqe->fast_path_cqe.status_flags & ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG)) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (__predict_false(cqe_fp_flags & ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG)) { fp->eth_q_stats.rx_hw_csum_errors++; } else { fp->eth_q_stats.rx_ofld_frames_csum_ip++; m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } } /* check for a valid TCP/UDP frame */ if (!(cqe->fast_path_cqe.status_flags & ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)) { if (__predict_false(cqe_fp_flags & ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG)) { fp->eth_q_stats.rx_hw_csum_errors++; } else { fp->eth_q_stats.rx_ofld_frames_csum_tcp_udp++; m->m_pkthdr.csum_data = 0xFFFF; m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); } } } /* if there is a VLAN tag then flag that info */ if (cqe->fast_path_cqe.pars_flags.flags & PARSING_FLAGS_INNER_VLAN_EXIST) { m->m_pkthdr.ether_vtag = cqe->fast_path_cqe.vlan_tag; m->m_flags |= M_VLANTAG; } #if __FreeBSD_version >= 800000 /* specify what RSS queue was used for this flow */ m->m_pkthdr.flowid = fp->index; BXE_SET_FLOWID(m); #endif next_rx: bd_cons = RX_BD_NEXT(bd_cons); bd_prod = RX_BD_NEXT(bd_prod); bd_prod_fw = RX_BD_NEXT(bd_prod_fw); /* pass the frame to the stack */ if (__predict_true(m != NULL)) { if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); rx_pkts++; if_input(ifp, m); } next_cqe: sw_cq_prod = RCQ_NEXT(sw_cq_prod); sw_cq_cons = RCQ_NEXT(sw_cq_cons); /* limit spinning on the queue */ if (rc != 0) break; if (rx_pkts == sc->rx_budget) { fp->eth_q_stats.rx_budget_reached++; break; } } /* while work to do */ fp->rx_bd_cons = bd_cons; fp->rx_bd_prod = bd_prod_fw; fp->rx_cq_cons = sw_cq_cons; fp->rx_cq_prod = sw_cq_prod; /* Update producers */ bxe_update_rx_prod(sc, fp, bd_prod_fw, sw_cq_prod, fp->rx_sge_prod); fp->eth_q_stats.rx_pkts += rx_pkts; fp->eth_q_stats.rx_calls++; BXE_FP_RX_UNLOCK(fp); return (sw_cq_cons != hw_cq_cons); } static uint16_t bxe_free_tx_pkt(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t idx) { struct bxe_sw_tx_bd *tx_buf = &fp->tx_mbuf_chain[idx]; struct eth_tx_start_bd *tx_start_bd; uint16_t bd_idx = TX_BD(tx_buf->first_bd); uint16_t new_cons; int nbd; /* unmap the mbuf from non-paged memory */ bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); tx_start_bd = &fp->tx_chain[bd_idx].start_bd; nbd = le16toh(tx_start_bd->nbd) - 1; new_cons = (tx_buf->first_bd + nbd); /* free the mbuf */ if (__predict_true(tx_buf->m != NULL)) { m_freem(tx_buf->m); fp->eth_q_stats.mbuf_alloc_tx--; } else { fp->eth_q_stats.tx_chain_lost_mbuf++; } tx_buf->m = NULL; tx_buf->first_bd = 0; return (new_cons); } /* transmit timeout watchdog */ static int bxe_watchdog(struct bxe_softc *sc, struct bxe_fastpath *fp) { BXE_FP_TX_LOCK(fp); if ((fp->watchdog_timer == 0) || (--fp->watchdog_timer)) { BXE_FP_TX_UNLOCK(fp); return (0); } BLOGE(sc, "TX watchdog timeout on fp[%02d], resetting!\n", fp->index); if(sc->trigger_grcdump) { /* taking grcdump */ bxe_grc_dump(sc); } BXE_FP_TX_UNLOCK(fp); atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_REINIT); taskqueue_enqueue(sc->chip_tq, &sc->chip_tq_task); return (-1); } /* processes transmit completions */ static uint8_t bxe_txeof(struct bxe_softc *sc, struct bxe_fastpath *fp) { if_t ifp = sc->ifp; uint16_t bd_cons, hw_cons, sw_cons, pkt_cons; uint16_t tx_bd_avail; BXE_FP_TX_LOCK_ASSERT(fp); bd_cons = fp->tx_bd_cons; hw_cons = le16toh(*fp->tx_cons_sb); sw_cons = fp->tx_pkt_cons; while (sw_cons != hw_cons) { pkt_cons = TX_BD(sw_cons); BLOGD(sc, DBG_TX, "TX: fp[%d]: hw_cons=%u sw_cons=%u pkt_cons=%u\n", fp->index, hw_cons, sw_cons, pkt_cons); bd_cons = bxe_free_tx_pkt(sc, fp, pkt_cons); sw_cons++; } fp->tx_pkt_cons = sw_cons; fp->tx_bd_cons = bd_cons; BLOGD(sc, DBG_TX, "TX done: fp[%d]: hw_cons=%u sw_cons=%u sw_prod=%u\n", fp->index, hw_cons, fp->tx_pkt_cons, fp->tx_pkt_prod); mb(); tx_bd_avail = bxe_tx_avail(sc, fp); if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); } else { if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); } if (fp->tx_pkt_prod != fp->tx_pkt_cons) { /* reset the watchdog timer if there are pending transmits */ fp->watchdog_timer = BXE_TX_TIMEOUT; return (TRUE); } else { /* clear watchdog when there are no pending transmits */ fp->watchdog_timer = 0; return (FALSE); } } static void bxe_drain_tx_queues(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i, count; /* wait until all TX fastpath tasks have completed */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; count = 1000; while (bxe_has_tx_work(fp)) { BXE_FP_TX_LOCK(fp); bxe_txeof(sc, fp); BXE_FP_TX_UNLOCK(fp); if (count == 0) { BLOGE(sc, "Timeout waiting for fp[%d] " "transmits to complete!\n", i); bxe_panic(sc, ("tx drain failure\n")); return; } count--; DELAY(1000); rmb(); } } return; } static int bxe_del_all_macs(struct bxe_softc *sc, struct ecore_vlan_mac_obj *mac_obj, int mac_type, uint8_t wait_for_comp) { unsigned long ramrod_flags = 0, vlan_mac_flags = 0; int rc; /* wait for completion of requested */ if (wait_for_comp) { bxe_set_bit(RAMROD_COMP_WAIT, &ramrod_flags); } /* Set the mac type of addresses we want to clear */ bxe_set_bit(mac_type, &vlan_mac_flags); rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc < 0) { BLOGE(sc, "Failed to delete MACs (%d) mac_type %d wait_for_comp 0x%x\n", rc, mac_type, wait_for_comp); } return (rc); } static int bxe_fill_accept_flags(struct bxe_softc *sc, uint32_t rx_mode, unsigned long *rx_accept_flags, unsigned long *tx_accept_flags) { /* Clear the flags first */ *rx_accept_flags = 0; *tx_accept_flags = 0; switch (rx_mode) { case BXE_RX_MODE_NONE: /* * 'drop all' supersedes any accept flags that may have been * passed to the function. */ break; case BXE_RX_MODE_NORMAL: bxe_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags); /* internal switching mode */ bxe_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags); break; case BXE_RX_MODE_ALLMULTI: bxe_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags); /* internal switching mode */ bxe_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags); break; case BXE_RX_MODE_PROMISC: /* * According to deffinition of SI mode, iface in promisc mode * should receive matched and unmatched (in resolution of port) * unicast packets. */ bxe_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags); /* internal switching mode */ bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags); if (IS_MF_SI(sc)) { bxe_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags); } else { bxe_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags); } break; default: BLOGE(sc, "Unknown rx_mode (0x%x)\n", rx_mode); return (-1); } /* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */ if (rx_mode != BXE_RX_MODE_NONE) { bxe_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags); } return (0); } static int bxe_set_q_rx_mode(struct bxe_softc *sc, uint8_t cl_id, unsigned long rx_mode_flags, unsigned long rx_accept_flags, unsigned long tx_accept_flags, unsigned long ramrod_flags) { struct ecore_rx_mode_ramrod_params ramrod_param; int rc; memset(&ramrod_param, 0, sizeof(ramrod_param)); /* Prepare ramrod parameters */ ramrod_param.cid = 0; ramrod_param.cl_id = cl_id; ramrod_param.rx_mode_obj = &sc->rx_mode_obj; ramrod_param.func_id = SC_FUNC(sc); ramrod_param.pstate = &sc->sp_state; ramrod_param.state = ECORE_FILTER_RX_MODE_PENDING; ramrod_param.rdata = BXE_SP(sc, rx_mode_rdata); ramrod_param.rdata_mapping = BXE_SP_MAPPING(sc, rx_mode_rdata); bxe_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state); ramrod_param.ramrod_flags = ramrod_flags; ramrod_param.rx_mode_flags = rx_mode_flags; ramrod_param.rx_accept_flags = rx_accept_flags; ramrod_param.tx_accept_flags = tx_accept_flags; rc = ecore_config_rx_mode(sc, &ramrod_param); if (rc < 0) { BLOGE(sc, "Set rx_mode %d cli_id 0x%x rx_mode_flags 0x%x " "rx_accept_flags 0x%x tx_accept_flags 0x%x " "ramrod_flags 0x%x rc %d failed\n", sc->rx_mode, cl_id, (uint32_t)rx_mode_flags, (uint32_t)rx_accept_flags, (uint32_t)tx_accept_flags, (uint32_t)ramrod_flags, rc); return (rc); } return (0); } static int bxe_set_storm_rx_mode(struct bxe_softc *sc) { unsigned long rx_mode_flags = 0, ramrod_flags = 0; unsigned long rx_accept_flags = 0, tx_accept_flags = 0; int rc; rc = bxe_fill_accept_flags(sc, sc->rx_mode, &rx_accept_flags, &tx_accept_flags); if (rc) { return (rc); } bxe_set_bit(RAMROD_RX, &ramrod_flags); bxe_set_bit(RAMROD_TX, &ramrod_flags); /* XXX ensure all fastpath have same cl_id and/or move it to bxe_softc */ return (bxe_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags, rx_accept_flags, tx_accept_flags, ramrod_flags)); } /* returns the "mcp load_code" according to global load_count array */ static int bxe_nic_load_no_mcp(struct bxe_softc *sc) { int path = SC_PATH(sc); int port = SC_PORT(sc); BLOGI(sc, "NO MCP - load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); load_count[path][0]++; load_count[path][1 + port]++; BLOGI(sc, "NO MCP - new load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); if (load_count[path][0] == 1) { return (FW_MSG_CODE_DRV_LOAD_COMMON); } else if (load_count[path][1 + port] == 1) { return (FW_MSG_CODE_DRV_LOAD_PORT); } else { return (FW_MSG_CODE_DRV_LOAD_FUNCTION); } } /* returns the "mcp load_code" according to global load_count array */ static int bxe_nic_unload_no_mcp(struct bxe_softc *sc) { int port = SC_PORT(sc); int path = SC_PATH(sc); BLOGI(sc, "NO MCP - load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); load_count[path][0]--; load_count[path][1 + port]--; BLOGI(sc, "NO MCP - new load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); if (load_count[path][0] == 0) { return (FW_MSG_CODE_DRV_UNLOAD_COMMON); } else if (load_count[path][1 + port] == 0) { return (FW_MSG_CODE_DRV_UNLOAD_PORT); } else { return (FW_MSG_CODE_DRV_UNLOAD_FUNCTION); } } /* request unload mode from the MCP: COMMON, PORT or FUNCTION */ static uint32_t bxe_send_unload_req(struct bxe_softc *sc, int unload_mode) { uint32_t reset_code = 0; /* Select the UNLOAD request mode */ if (unload_mode == UNLOAD_NORMAL) { reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS; } else { reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS; } /* Send the request to the MCP */ if (!BXE_NOMCP(sc)) { reset_code = bxe_fw_command(sc, reset_code, 0); } else { reset_code = bxe_nic_unload_no_mcp(sc); } return (reset_code); } /* send UNLOAD_DONE command to the MCP */ static void bxe_send_unload_done(struct bxe_softc *sc, uint8_t keep_link) { uint32_t reset_param = keep_link ? DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET : 0; /* Report UNLOAD_DONE to MCP */ if (!BXE_NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, reset_param); } } static int bxe_func_wait_started(struct bxe_softc *sc) { int tout = 50; if (!sc->port.pmf) { return (0); } /* * (assumption: No Attention from MCP at this stage) * PMF probably in the middle of TX disable/enable transaction * 1. Sync IRS for default SB * 2. Sync SP queue - this guarantees us that attention handling started * 3. Wait, that TX disable/enable transaction completes * * 1+2 guarantee that if DCBX attention was scheduled it already changed * pending bit of transaction from STARTED-->TX_STOPPED, if we already * received completion for the transaction the state is TX_STOPPED. * State will return to STARTED after completion of TX_STOPPED-->STARTED * transaction. */ /* XXX make sure default SB ISR is done */ /* need a way to synchronize an irq (intr_mtx?) */ /* XXX flush any work queues */ while (ecore_func_get_state(sc, &sc->func_obj) != ECORE_F_STATE_STARTED && tout--) { DELAY(20000); } if (ecore_func_get_state(sc, &sc->func_obj) != ECORE_F_STATE_STARTED) { /* * Failed to complete the transaction in a "good way" * Force both transactions with CLR bit. */ struct ecore_func_state_params func_params = { NULL }; BLOGE(sc, "Unexpected function state! " "Forcing STARTED-->TX_STOPPED-->STARTED\n"); func_params.f_obj = &sc->func_obj; bxe_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags); /* STARTED-->TX_STOPPED */ func_params.cmd = ECORE_F_CMD_TX_STOP; ecore_func_state_change(sc, &func_params); /* TX_STOPPED-->STARTED */ func_params.cmd = ECORE_F_CMD_TX_START; return (ecore_func_state_change(sc, &func_params)); } return (0); } static int bxe_stop_queue(struct bxe_softc *sc, int index) { struct bxe_fastpath *fp = &sc->fp[index]; struct ecore_queue_state_params q_params = { NULL }; int rc; BLOGD(sc, DBG_LOAD, "stopping queue %d cid %d\n", index, fp->index); q_params.q_obj = &sc->sp_objs[fp->index].q_obj; /* We want to wait for completion in this context */ bxe_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); /* Stop the primary connection: */ /* ...halt the connection */ q_params.cmd = ECORE_Q_CMD_HALT; rc = ecore_queue_state_change(sc, &q_params); if (rc) { return (rc); } /* ...terminate the connection */ q_params.cmd = ECORE_Q_CMD_TERMINATE; memset(&q_params.params.terminate, 0, sizeof(q_params.params.terminate)); q_params.params.terminate.cid_index = FIRST_TX_COS_INDEX; rc = ecore_queue_state_change(sc, &q_params); if (rc) { return (rc); } /* ...delete cfc entry */ q_params.cmd = ECORE_Q_CMD_CFC_DEL; memset(&q_params.params.cfc_del, 0, sizeof(q_params.params.cfc_del)); q_params.params.cfc_del.cid_index = FIRST_TX_COS_INDEX; return (ecore_queue_state_change(sc, &q_params)); } /* wait for the outstanding SP commands */ static inline uint8_t bxe_wait_sp_comp(struct bxe_softc *sc, unsigned long mask) { unsigned long tmp; int tout = 5000; /* wait for 5 secs tops */ while (tout--) { mb(); if (!(atomic_load_acq_long(&sc->sp_state) & mask)) { return (TRUE); } DELAY(1000); } mb(); tmp = atomic_load_acq_long(&sc->sp_state); if (tmp & mask) { BLOGE(sc, "Filtering completion timed out: " "sp_state 0x%lx, mask 0x%lx\n", tmp, mask); return (FALSE); } return (FALSE); } static int bxe_func_stop(struct bxe_softc *sc) { struct ecore_func_state_params func_params = { NULL }; int rc; /* prepare parameters for function state transitions */ bxe_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_STOP; /* * Try to stop the function the 'good way'. If it fails (in case * of a parity error during bxe_chip_cleanup()) and we are * not in a debug mode, perform a state transaction in order to * enable further HW_RESET transaction. */ rc = ecore_func_state_change(sc, &func_params); if (rc) { BLOGE(sc, "FUNC_STOP ramrod failed. " "Running a dry transaction (%d)\n", rc); bxe_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags); return (ecore_func_state_change(sc, &func_params)); } return (0); } static int bxe_reset_hw(struct bxe_softc *sc, uint32_t load_code) { struct ecore_func_state_params func_params = { NULL }; /* Prepare parameters for function state transitions */ bxe_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_HW_RESET; func_params.params.hw_init.load_phase = load_code; return (ecore_func_state_change(sc, &func_params)); } static void bxe_int_disable_sync(struct bxe_softc *sc, int disable_hw) { if (disable_hw) { /* prevent the HW from sending interrupts */ bxe_int_disable(sc); } /* XXX need a way to synchronize ALL irqs (intr_mtx?) */ /* make sure all ISRs are done */ /* XXX make sure sp_task is not running */ /* cancel and flush work queues */ } static void bxe_chip_cleanup(struct bxe_softc *sc, uint32_t unload_mode, uint8_t keep_link) { int port = SC_PORT(sc); struct ecore_mcast_ramrod_params rparam = { NULL }; uint32_t reset_code; int i, rc = 0; bxe_drain_tx_queues(sc); /* give HW time to discard old tx messages */ DELAY(1000); /* Clean all ETH MACs */ rc = bxe_del_all_macs(sc, &sc->sp_objs[0].mac_obj, ECORE_ETH_MAC, FALSE); if (rc < 0) { BLOGE(sc, "Failed to delete all ETH MACs (%d)\n", rc); } /* Clean up UC list */ rc = bxe_del_all_macs(sc, &sc->sp_objs[0].mac_obj, ECORE_UC_LIST_MAC, TRUE); if (rc < 0) { BLOGE(sc, "Failed to delete UC MACs list (%d)\n", rc); } /* Disable LLH */ if (!CHIP_IS_E1(sc)) { REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 0); } /* Set "drop all" to stop Rx */ /* * We need to take the BXE_MCAST_LOCK() here in order to prevent * a race between the completion code and this code. */ BXE_MCAST_LOCK(sc); if (bxe_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) { bxe_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state); } else { bxe_set_storm_rx_mode(sc); } /* Clean up multicast configuration */ rparam.mcast_obj = &sc->mcast_obj; rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL); if (rc < 0) { BLOGE(sc, "Failed to send DEL MCAST command (%d)\n", rc); } BXE_MCAST_UNLOCK(sc); // XXX bxe_iov_chip_cleanup(sc); /* * Send the UNLOAD_REQUEST to the MCP. This will return if * this function should perform FUNCTION, PORT, or COMMON HW * reset. */ reset_code = bxe_send_unload_req(sc, unload_mode); /* * (assumption: No Attention from MCP at this stage) * PMF probably in the middle of TX disable/enable transaction */ rc = bxe_func_wait_started(sc); if (rc) { BLOGE(sc, "bxe_func_wait_started failed (%d)\n", rc); } /* * Close multi and leading connections * Completions for ramrods are collected in a synchronous way */ for (i = 0; i < sc->num_queues; i++) { if (bxe_stop_queue(sc, i)) { goto unload_error; } } /* * If SP settings didn't get completed so far - something * very wrong has happen. */ if (!bxe_wait_sp_comp(sc, ~0x0UL)) { BLOGE(sc, "Common slow path ramrods got stuck!(%d)\n", rc); } unload_error: rc = bxe_func_stop(sc); if (rc) { BLOGE(sc, "Function stop failed!(%d)\n", rc); } /* disable HW interrupts */ bxe_int_disable_sync(sc, TRUE); /* detach interrupts */ bxe_interrupt_detach(sc); /* Reset the chip */ rc = bxe_reset_hw(sc, reset_code); if (rc) { BLOGE(sc, "Hardware reset failed(%d)\n", rc); } /* Report UNLOAD_DONE to MCP */ bxe_send_unload_done(sc, keep_link); } static void bxe_disable_close_the_gate(struct bxe_softc *sc) { uint32_t val; int port = SC_PORT(sc); BLOGD(sc, DBG_LOAD, "Disabling 'close the gates'\n"); if (CHIP_IS_E1(sc)) { uint32_t addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 : MISC_REG_AEU_MASK_ATTN_FUNC_0; val = REG_RD(sc, addr); val &= ~(0x300); REG_WR(sc, addr, val); } else { val = REG_RD(sc, MISC_REG_AEU_GENERAL_MASK); val &= ~(MISC_AEU_GENERAL_MASK_REG_AEU_PXP_CLOSE_MASK | MISC_AEU_GENERAL_MASK_REG_AEU_NIG_CLOSE_MASK); REG_WR(sc, MISC_REG_AEU_GENERAL_MASK, val); } } /* * Cleans the object that have internal lists without sending * ramrods. Should be run when interrutps are disabled. */ static void bxe_squeeze_objects(struct bxe_softc *sc) { unsigned long ramrod_flags = 0, vlan_mac_flags = 0; struct ecore_mcast_ramrod_params rparam = { NULL }; struct ecore_vlan_mac_obj *mac_obj = &sc->sp_objs->mac_obj; int rc; /* Cleanup MACs' object first... */ /* Wait for completion of requested */ bxe_set_bit(RAMROD_COMP_WAIT, &ramrod_flags); /* Perform a dry cleanup */ bxe_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags); /* Clean ETH primary MAC */ bxe_set_bit(ECORE_ETH_MAC, &vlan_mac_flags); rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc != 0) { BLOGE(sc, "Failed to clean ETH MACs (%d)\n", rc); } /* Cleanup UC list */ vlan_mac_flags = 0; bxe_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags); rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc != 0) { BLOGE(sc, "Failed to clean UC list MACs (%d)\n", rc); } /* Now clean mcast object... */ rparam.mcast_obj = &sc->mcast_obj; bxe_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags); /* Add a DEL command... */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL); if (rc < 0) { BLOGE(sc, "Failed to send DEL MCAST command (%d)\n", rc); } /* now wait until all pending commands are cleared */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT); while (rc != 0) { if (rc < 0) { BLOGE(sc, "Failed to clean MCAST object (%d)\n", rc); return; } rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT); } } /* stop the controller */ static __noinline int bxe_nic_unload(struct bxe_softc *sc, uint32_t unload_mode, uint8_t keep_link) { uint8_t global = FALSE; uint32_t val; int i; BXE_CORE_LOCK_ASSERT(sc); if_setdrvflagbits(sc->ifp, 0, IFF_DRV_RUNNING); for (i = 0; i < sc->num_queues; i++) { struct bxe_fastpath *fp; fp = &sc->fp[i]; BXE_FP_TX_LOCK(fp); BXE_FP_TX_UNLOCK(fp); } BLOGD(sc, DBG_LOAD, "Starting NIC unload...\n"); /* mark driver as unloaded in shmem2 */ if (IS_PF(sc) && SHMEM2_HAS(sc, drv_capabilities_flag)) { val = SHMEM2_RD(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)]); SHMEM2_WR(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)], val & ~DRV_FLAGS_CAPABILITIES_LOADED_L2); } if (IS_PF(sc) && sc->recovery_state != BXE_RECOVERY_DONE && (sc->state == BXE_STATE_CLOSED || sc->state == BXE_STATE_ERROR)) { /* * We can get here if the driver has been unloaded * during parity error recovery and is either waiting for a * leader to complete or for other functions to unload and * then ifconfig down has been issued. In this case we want to * unload and let other functions to complete a recovery * process. */ sc->recovery_state = BXE_RECOVERY_DONE; sc->is_leader = 0; bxe_release_leader_lock(sc); mb(); BLOGD(sc, DBG_LOAD, "Releasing a leadership...\n"); BLOGE(sc, "Can't unload in closed or error state recover_state 0x%x" " state = 0x%x\n", sc->recovery_state, sc->state); return (-1); } /* * Nothing to do during unload if previous bxe_nic_load() * did not completed successfully - all resourses are released. */ if ((sc->state == BXE_STATE_CLOSED) || (sc->state == BXE_STATE_ERROR)) { return (0); } sc->state = BXE_STATE_CLOSING_WAITING_HALT; mb(); /* stop tx */ bxe_tx_disable(sc); sc->rx_mode = BXE_RX_MODE_NONE; /* XXX set rx mode ??? */ if (IS_PF(sc) && !sc->grcdump_done) { /* set ALWAYS_ALIVE bit in shmem */ sc->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE; bxe_drv_pulse(sc); bxe_stats_handle(sc, STATS_EVENT_STOP); bxe_save_statistics(sc); } /* wait till consumers catch up with producers in all queues */ bxe_drain_tx_queues(sc); /* if VF indicate to PF this function is going down (PF will delete sp * elements and clear initializations */ if (IS_VF(sc)) { ; /* bxe_vfpf_close_vf(sc); */ } else if (unload_mode != UNLOAD_RECOVERY) { /* if this is a normal/close unload need to clean up chip */ if (!sc->grcdump_done) bxe_chip_cleanup(sc, unload_mode, keep_link); } else { /* Send the UNLOAD_REQUEST to the MCP */ bxe_send_unload_req(sc, unload_mode); /* * Prevent transactions to host from the functions on the * engine that doesn't reset global blocks in case of global * attention once gloabl blocks are reset and gates are opened * (the engine which leader will perform the recovery * last). */ if (!CHIP_IS_E1x(sc)) { bxe_pf_disable(sc); } /* disable HW interrupts */ bxe_int_disable_sync(sc, TRUE); /* detach interrupts */ bxe_interrupt_detach(sc); /* Report UNLOAD_DONE to MCP */ bxe_send_unload_done(sc, FALSE); } /* * At this stage no more interrupts will arrive so we may safely clean * the queue'able objects here in case they failed to get cleaned so far. */ if (IS_PF(sc)) { bxe_squeeze_objects(sc); } /* There should be no more pending SP commands at this stage */ sc->sp_state = 0; sc->port.pmf = 0; bxe_free_fp_buffers(sc); if (IS_PF(sc)) { bxe_free_mem(sc); } bxe_free_fw_stats_mem(sc); sc->state = BXE_STATE_CLOSED; /* * Check if there are pending parity attentions. If there are - set * RECOVERY_IN_PROGRESS. */ if (IS_PF(sc) && bxe_chk_parity_attn(sc, &global, FALSE)) { bxe_set_reset_in_progress(sc); /* Set RESET_IS_GLOBAL if needed */ if (global) { bxe_set_reset_global(sc); } } /* * The last driver must disable a "close the gate" if there is no * parity attention or "process kill" pending. */ if (IS_PF(sc) && !bxe_clear_pf_load(sc) && bxe_reset_is_done(sc, SC_PATH(sc))) { bxe_disable_close_the_gate(sc); } BLOGD(sc, DBG_LOAD, "Ended NIC unload\n"); return (0); } /* * Called by the OS to set various media options (i.e. link, speed, etc.) when * the user runs "ifconfig bxe media ..." or "ifconfig bxe mediaopt ...". */ static int bxe_ifmedia_update(struct ifnet *ifp) { struct bxe_softc *sc = (struct bxe_softc *)if_getsoftc(ifp); struct ifmedia *ifm; ifm = &sc->ifmedia; /* We only support Ethernet media type. */ if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) { return (EINVAL); } switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; case IFM_10G_CX4: case IFM_10G_SR: case IFM_10G_T: case IFM_10G_TWINAX: default: /* We don't support changing the media type. */ BLOGD(sc, DBG_LOAD, "Invalid media type (%d)\n", IFM_SUBTYPE(ifm->ifm_media)); return (EINVAL); } return (0); } /* * Called by the OS to get the current media status (i.e. link, speed, etc.). */ static void bxe_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct bxe_softc *sc = if_getsoftc(ifp); /* Report link down if the driver isn't running. */ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { ifmr->ifm_active |= IFM_NONE; return; } /* Setup the default interface info. */ ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (sc->link_vars.link_up) { ifmr->ifm_status |= IFM_ACTIVE; } else { ifmr->ifm_active |= IFM_NONE; return; } ifmr->ifm_active |= sc->media; if (sc->link_vars.duplex == DUPLEX_FULL) { ifmr->ifm_active |= IFM_FDX; } else { ifmr->ifm_active |= IFM_HDX; } } static void bxe_handle_chip_tq(void *context, int pending) { struct bxe_softc *sc = (struct bxe_softc *)context; long work = atomic_load_acq_long(&sc->chip_tq_flags); switch (work) { case CHIP_TQ_REINIT: if (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) { /* restart the interface */ BLOGD(sc, DBG_LOAD, "Restarting the interface...\n"); bxe_periodic_stop(sc); BXE_CORE_LOCK(sc); bxe_stop_locked(sc); bxe_init_locked(sc); BXE_CORE_UNLOCK(sc); } break; default: break; } } /* * Handles any IOCTL calls from the operating system. * * Returns: * 0 = Success, >0 Failure */ static int bxe_ioctl(if_t ifp, u_long command, caddr_t data) { struct bxe_softc *sc = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; int mask = 0; int reinit = 0; int error = 0; int mtu_min = (ETH_MIN_PACKET_SIZE - ETH_HLEN); int mtu_max = (MJUM9BYTES - ETH_OVERHEAD - IP_HEADER_ALIGNMENT_PADDING); switch (command) { case SIOCSIFMTU: BLOGD(sc, DBG_IOCTL, "Received SIOCSIFMTU ioctl (mtu=%d)\n", ifr->ifr_mtu); if (sc->mtu == ifr->ifr_mtu) { /* nothing to change */ break; } if ((ifr->ifr_mtu < mtu_min) || (ifr->ifr_mtu > mtu_max)) { BLOGE(sc, "Unsupported MTU size %d (range is %d-%d)\n", ifr->ifr_mtu, mtu_min, mtu_max); error = EINVAL; break; } atomic_store_rel_int((volatile unsigned int *)&sc->mtu, (unsigned long)ifr->ifr_mtu); /* atomic_store_rel_long((volatile unsigned long *)&if_getmtu(ifp), (unsigned long)ifr->ifr_mtu); XXX - Not sure why it needs to be atomic */ if_setmtu(ifp, ifr->ifr_mtu); reinit = 1; break; case SIOCSIFFLAGS: /* toggle the interface state up or down */ BLOGD(sc, DBG_IOCTL, "Received SIOCSIFFLAGS ioctl\n"); BXE_CORE_LOCK(sc); /* check if the interface is up */ if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { /* set the receive mode flags */ bxe_set_rx_mode(sc); } else if(sc->state != BXE_STATE_DISABLED) { bxe_init_locked(sc); } } else { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { bxe_periodic_stop(sc); bxe_stop_locked(sc); } } BXE_CORE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: /* add/delete multicast addresses */ BLOGD(sc, DBG_IOCTL, "Received SIOCADDMULTI/SIOCDELMULTI ioctl\n"); /* check if the interface is up */ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { /* set the receive mode flags */ BXE_CORE_LOCK(sc); bxe_set_rx_mode(sc); BXE_CORE_UNLOCK(sc); } break; case SIOCSIFCAP: /* find out which capabilities have changed */ mask = (ifr->ifr_reqcap ^ if_getcapenable(ifp)); BLOGD(sc, DBG_IOCTL, "Received SIOCSIFCAP ioctl (mask=0x%08x)\n", mask); /* toggle the LRO capabilites enable flag */ if (mask & IFCAP_LRO) { if_togglecapenable(ifp, IFCAP_LRO); BLOGD(sc, DBG_IOCTL, "Turning LRO %s\n", (if_getcapenable(ifp) & IFCAP_LRO) ? "ON" : "OFF"); reinit = 1; } /* toggle the TXCSUM checksum capabilites enable flag */ if (mask & IFCAP_TXCSUM) { if_togglecapenable(ifp, IFCAP_TXCSUM); BLOGD(sc, DBG_IOCTL, "Turning TXCSUM %s\n", (if_getcapenable(ifp) & IFCAP_TXCSUM) ? "ON" : "OFF"); if (if_getcapenable(ifp) & IFCAP_TXCSUM) { if_sethwassistbits(ifp, (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6), 0); } else { if_clearhwassist(ifp); /* XXX */ } } /* toggle the RXCSUM checksum capabilities enable flag */ if (mask & IFCAP_RXCSUM) { if_togglecapenable(ifp, IFCAP_RXCSUM); BLOGD(sc, DBG_IOCTL, "Turning RXCSUM %s\n", (if_getcapenable(ifp) & IFCAP_RXCSUM) ? "ON" : "OFF"); if (if_getcapenable(ifp) & IFCAP_RXCSUM) { if_sethwassistbits(ifp, (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6), 0); } else { if_clearhwassist(ifp); /* XXX */ } } /* toggle TSO4 capabilities enabled flag */ if (mask & IFCAP_TSO4) { if_togglecapenable(ifp, IFCAP_TSO4); BLOGD(sc, DBG_IOCTL, "Turning TSO4 %s\n", (if_getcapenable(ifp) & IFCAP_TSO4) ? "ON" : "OFF"); } /* toggle TSO6 capabilities enabled flag */ if (mask & IFCAP_TSO6) { if_togglecapenable(ifp, IFCAP_TSO6); BLOGD(sc, DBG_IOCTL, "Turning TSO6 %s\n", (if_getcapenable(ifp) & IFCAP_TSO6) ? "ON" : "OFF"); } /* toggle VLAN_HWTSO capabilities enabled flag */ if (mask & IFCAP_VLAN_HWTSO) { if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); BLOGD(sc, DBG_IOCTL, "Turning VLAN_HWTSO %s\n", (if_getcapenable(ifp) & IFCAP_VLAN_HWTSO) ? "ON" : "OFF"); } /* toggle VLAN_HWCSUM capabilities enabled flag */ if (mask & IFCAP_VLAN_HWCSUM) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_HWCSUM is not supported!\n"); error = EINVAL; } /* toggle VLAN_MTU capabilities enable flag */ if (mask & IFCAP_VLAN_MTU) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_MTU is not supported!\n"); error = EINVAL; } /* toggle VLAN_HWTAGGING capabilities enabled flag */ if (mask & IFCAP_VLAN_HWTAGGING) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_HWTAGGING is not supported!\n"); error = EINVAL; } /* toggle VLAN_HWFILTER capabilities enabled flag */ if (mask & IFCAP_VLAN_HWFILTER) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_HWFILTER is not supported!\n"); error = EINVAL; } /* XXX not yet... * IFCAP_WOL_MAGIC */ break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: /* set/get interface media */ BLOGD(sc, DBG_IOCTL, "Received SIOCSIFMEDIA/SIOCGIFMEDIA ioctl (cmd=%lu)\n", (command & 0xff)); error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); break; default: BLOGD(sc, DBG_IOCTL, "Received Unknown Ioctl (cmd=%lu)\n", (command & 0xff)); error = ether_ioctl(ifp, command, data); break; } if (reinit && (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING)) { BLOGD(sc, DBG_LOAD | DBG_IOCTL, "Re-initializing hardware from IOCTL change\n"); bxe_periodic_stop(sc); BXE_CORE_LOCK(sc); bxe_stop_locked(sc); bxe_init_locked(sc); BXE_CORE_UNLOCK(sc); } return (error); } static __noinline void bxe_dump_mbuf(struct bxe_softc *sc, struct mbuf *m, uint8_t contents) { char * type; int i = 0; if (!(sc->debug & DBG_MBUF)) { return; } if (m == NULL) { BLOGD(sc, DBG_MBUF, "mbuf: null pointer\n"); return; } while (m) { #if __FreeBSD_version >= 1000000 BLOGD(sc, DBG_MBUF, "%02d: mbuf=%p m_len=%d m_flags=0x%b m_data=%p\n", i, m, m->m_len, m->m_flags, M_FLAG_BITS, m->m_data); if (m->m_flags & M_PKTHDR) { BLOGD(sc, DBG_MBUF, "%02d: - m_pkthdr: tot_len=%d flags=0x%b csum_flags=%b\n", i, m->m_pkthdr.len, m->m_flags, M_FLAG_BITS, (int)m->m_pkthdr.csum_flags, CSUM_BITS); } #else BLOGD(sc, DBG_MBUF, "%02d: mbuf=%p m_len=%d m_flags=0x%b m_data=%p\n", i, m, m->m_len, m->m_flags, "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY", m->m_data); if (m->m_flags & M_PKTHDR) { BLOGD(sc, DBG_MBUF, "%02d: - m_pkthdr: tot_len=%d flags=0x%b csum_flags=%b\n", i, m->m_pkthdr.len, m->m_flags, "\20\12M_BCAST\13M_MCAST\14M_FRAG" "\15M_FIRSTFRAG\16M_LASTFRAG\21M_VLANTAG" "\22M_PROMISC\23M_NOFREE", (int)m->m_pkthdr.csum_flags, "\20\1CSUM_IP\2CSUM_TCP\3CSUM_UDP\4CSUM_IP_FRAGS" "\5CSUM_FRAGMENT\6CSUM_TSO\11CSUM_IP_CHECKED" "\12CSUM_IP_VALID\13CSUM_DATA_VALID" "\14CSUM_PSEUDO_HDR"); } #endif /* #if __FreeBSD_version >= 1000000 */ if (m->m_flags & M_EXT) { switch (m->m_ext.ext_type) { case EXT_CLUSTER: type = "EXT_CLUSTER"; break; case EXT_SFBUF: type = "EXT_SFBUF"; break; case EXT_JUMBOP: type = "EXT_JUMBOP"; break; case EXT_JUMBO9: type = "EXT_JUMBO9"; break; case EXT_JUMBO16: type = "EXT_JUMBO16"; break; case EXT_PACKET: type = "EXT_PACKET"; break; case EXT_MBUF: type = "EXT_MBUF"; break; case EXT_NET_DRV: type = "EXT_NET_DRV"; break; case EXT_MOD_TYPE: type = "EXT_MOD_TYPE"; break; case EXT_DISPOSABLE: type = "EXT_DISPOSABLE"; break; case EXT_EXTREF: type = "EXT_EXTREF"; break; default: type = "UNKNOWN"; break; } BLOGD(sc, DBG_MBUF, "%02d: - m_ext: %p ext_size=%d type=%s\n", i, m->m_ext.ext_buf, m->m_ext.ext_size, type); } if (contents) { bxe_dump_mbuf_data(sc, "mbuf data", m, TRUE); } m = m->m_next; i++; } } /* * Checks to ensure the 13 bd sliding window is >= MSS for TSO. * Check that (13 total bds - 3 bds) = 10 bd window >= MSS. * The window: 3 bds are = 1 for headers BD + 2 for parse BD and last BD * The headers comes in a separate bd in FreeBSD so 13-3=10. * Returns: 0 if OK to send, 1 if packet needs further defragmentation */ static int bxe_chktso_window(struct bxe_softc *sc, int nsegs, bus_dma_segment_t *segs, struct mbuf *m) { uint32_t num_wnds, wnd_size, wnd_sum; int32_t frag_idx, wnd_idx; unsigned short lso_mss; int defrag; defrag = 0; wnd_sum = 0; wnd_size = 10; num_wnds = nsegs - wnd_size; lso_mss = htole16(m->m_pkthdr.tso_segsz); /* * Total header lengths Eth+IP+TCP in first FreeBSD mbuf so calculate the * first window sum of data while skipping the first assuming it is the * header in FreeBSD. */ for (frag_idx = 1; (frag_idx <= wnd_size); frag_idx++) { wnd_sum += htole16(segs[frag_idx].ds_len); } /* check the first 10 bd window size */ if (wnd_sum < lso_mss) { return (1); } /* run through the windows */ for (wnd_idx = 0; wnd_idx < num_wnds; wnd_idx++, frag_idx++) { /* subtract the first mbuf->m_len of the last wndw(-header) */ wnd_sum -= htole16(segs[wnd_idx+1].ds_len); /* add the next mbuf len to the len of our new window */ wnd_sum += htole16(segs[frag_idx].ds_len); if (wnd_sum < lso_mss) { return (1); } } return (0); } static uint8_t bxe_set_pbd_csum_e2(struct bxe_fastpath *fp, struct mbuf *m, uint32_t *parsing_data) { struct ether_vlan_header *eh = NULL; struct ip *ip4 = NULL; struct ip6_hdr *ip6 = NULL; caddr_t ip = NULL; struct tcphdr *th = NULL; int e_hlen, ip_hlen, l4_off; uint16_t proto; if (m->m_pkthdr.csum_flags == CSUM_IP) { /* no L4 checksum offload needed */ return (0); } /* get the Ethernet header */ eh = mtod(m, struct ether_vlan_header *); /* handle VLAN encapsulation if present */ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { e_hlen = (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); proto = ntohs(eh->evl_proto); } else { e_hlen = ETHER_HDR_LEN; proto = ntohs(eh->evl_encap_proto); } switch (proto) { case ETHERTYPE_IP: /* get the IP header, if mbuf len < 20 then header in next mbuf */ ip4 = (m->m_len < sizeof(struct ip)) ? (struct ip *)m->m_next->m_data : (struct ip *)(m->m_data + e_hlen); /* ip_hl is number of 32-bit words */ ip_hlen = (ip4->ip_hl << 2); ip = (caddr_t)ip4; break; case ETHERTYPE_IPV6: /* get the IPv6 header, if mbuf len < 40 then header in next mbuf */ ip6 = (m->m_len < sizeof(struct ip6_hdr)) ? (struct ip6_hdr *)m->m_next->m_data : (struct ip6_hdr *)(m->m_data + e_hlen); /* XXX cannot support offload with IPv6 extensions */ ip_hlen = sizeof(struct ip6_hdr); ip = (caddr_t)ip6; break; default: /* We can't offload in this case... */ /* XXX error stat ??? */ return (0); } /* XXX assuming L4 header is contiguous to IPv4/IPv6 in the same mbuf */ l4_off = (e_hlen + ip_hlen); *parsing_data |= (((l4_off >> 1) << ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) & ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W); if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TSO | CSUM_TCP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_tcp++; th = (struct tcphdr *)(ip + ip_hlen); /* th_off is number of 32-bit words */ *parsing_data |= ((th->th_off << ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) & ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW); return (l4_off + (th->th_off << 2)); /* entire header length */ } else if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_udp++; return (l4_off + sizeof(struct udphdr)); /* entire header length */ } else { /* XXX error stat ??? */ return (0); } } static uint8_t bxe_set_pbd_csum(struct bxe_fastpath *fp, struct mbuf *m, struct eth_tx_parse_bd_e1x *pbd) { struct ether_vlan_header *eh = NULL; struct ip *ip4 = NULL; struct ip6_hdr *ip6 = NULL; caddr_t ip = NULL; struct tcphdr *th = NULL; struct udphdr *uh = NULL; int e_hlen, ip_hlen; uint16_t proto; uint8_t hlen; uint16_t tmp_csum; uint32_t *tmp_uh; /* get the Ethernet header */ eh = mtod(m, struct ether_vlan_header *); /* handle VLAN encapsulation if present */ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { e_hlen = (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); proto = ntohs(eh->evl_proto); } else { e_hlen = ETHER_HDR_LEN; proto = ntohs(eh->evl_encap_proto); } switch (proto) { case ETHERTYPE_IP: /* get the IP header, if mbuf len < 20 then header in next mbuf */ ip4 = (m->m_len < sizeof(struct ip)) ? (struct ip *)m->m_next->m_data : (struct ip *)(m->m_data + e_hlen); /* ip_hl is number of 32-bit words */ ip_hlen = (ip4->ip_hl << 1); ip = (caddr_t)ip4; break; case ETHERTYPE_IPV6: /* get the IPv6 header, if mbuf len < 40 then header in next mbuf */ ip6 = (m->m_len < sizeof(struct ip6_hdr)) ? (struct ip6_hdr *)m->m_next->m_data : (struct ip6_hdr *)(m->m_data + e_hlen); /* XXX cannot support offload with IPv6 extensions */ ip_hlen = (sizeof(struct ip6_hdr) >> 1); ip = (caddr_t)ip6; break; default: /* We can't offload in this case... */ /* XXX error stat ??? */ return (0); } hlen = (e_hlen >> 1); /* note that rest of global_data is indirectly zeroed here */ if (m->m_flags & M_VLANTAG) { pbd->global_data = htole16(hlen | (1 << ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT)); } else { pbd->global_data = htole16(hlen); } pbd->ip_hlen_w = ip_hlen; hlen += pbd->ip_hlen_w; /* XXX assuming L4 header is contiguous to IPv4/IPv6 in the same mbuf */ if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TSO | CSUM_TCP_IPV6)) { th = (struct tcphdr *)(ip + (ip_hlen << 1)); /* th_off is number of 32-bit words */ hlen += (uint16_t)(th->th_off << 1); } else if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)) { uh = (struct udphdr *)(ip + (ip_hlen << 1)); hlen += (sizeof(struct udphdr) / 2); } else { /* valid case as only CSUM_IP was set */ return (0); } pbd->total_hlen_w = htole16(hlen); if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TSO | CSUM_TCP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_tcp++; pbd->tcp_pseudo_csum = ntohs(th->th_sum); } else if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_udp++; /* * Everest1 (i.e. 57710, 57711, 57711E) does not natively support UDP * checksums and does not know anything about the UDP header and where * the checksum field is located. It only knows about TCP. Therefore * we "lie" to the hardware for outgoing UDP packets w/ checksum * offload. Since the checksum field offset for TCP is 16 bytes and * for UDP it is 6 bytes we pass a pointer to the hardware that is 10 * bytes less than the start of the UDP header. This allows the * hardware to write the checksum in the correct spot. But the * hardware will compute a checksum which includes the last 10 bytes * of the IP header. To correct this we tweak the stack computed * pseudo checksum by folding in the calculation of the inverse * checksum for those final 10 bytes of the IP header. This allows * the correct checksum to be computed by the hardware. */ /* set pointer 10 bytes before UDP header */ tmp_uh = (uint32_t *)((uint8_t *)uh - 10); /* calculate a pseudo header checksum over the first 10 bytes */ tmp_csum = in_pseudo(*tmp_uh, *(tmp_uh + 1), *(uint16_t *)(tmp_uh + 2)); pbd->tcp_pseudo_csum = ntohs(in_addword(uh->uh_sum, ~tmp_csum)); } return (hlen * 2); /* entire header length, number of bytes */ } static void bxe_set_pbd_lso_e2(struct mbuf *m, uint32_t *parsing_data) { *parsing_data |= ((m->m_pkthdr.tso_segsz << ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) & ETH_TX_PARSE_BD_E2_LSO_MSS); /* XXX test for IPv6 with extension header... */ } static void bxe_set_pbd_lso(struct mbuf *m, struct eth_tx_parse_bd_e1x *pbd) { struct ether_vlan_header *eh = NULL; struct ip *ip = NULL; struct tcphdr *th = NULL; int e_hlen; /* get the Ethernet header */ eh = mtod(m, struct ether_vlan_header *); /* handle VLAN encapsulation if present */ e_hlen = (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) ? (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) : ETHER_HDR_LEN; /* get the IP and TCP header, with LSO entire header in first mbuf */ /* XXX assuming IPv4 */ ip = (struct ip *)(m->m_data + e_hlen); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); pbd->lso_mss = htole16(m->m_pkthdr.tso_segsz); pbd->tcp_send_seq = ntohl(th->th_seq); pbd->tcp_flags = ((ntohl(((uint32_t *)th)[3]) >> 16) & 0xff); #if 1 /* XXX IPv4 */ pbd->ip_id = ntohs(ip->ip_id); pbd->tcp_pseudo_csum = ntohs(in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP))); #else /* XXX IPv6 */ pbd->tcp_pseudo_csum = ntohs(in_pseudo(&ip6->ip6_src, &ip6->ip6_dst, htons(IPPROTO_TCP))); #endif pbd->global_data |= htole16(ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN); } /* * Encapsulte an mbuf cluster into the tx bd chain and makes the memory * visible to the controller. * * If an mbuf is submitted to this routine and cannot be given to the * controller (e.g. it has too many fragments) then the function may free * the mbuf and return to the caller. * * Returns: * 0 = Success, !0 = Failure * Note the side effect that an mbuf may be freed if it causes a problem. */ static int bxe_tx_encap(struct bxe_fastpath *fp, struct mbuf **m_head) { bus_dma_segment_t segs[32]; struct mbuf *m0; struct bxe_sw_tx_bd *tx_buf; struct eth_tx_parse_bd_e1x *pbd_e1x = NULL; struct eth_tx_parse_bd_e2 *pbd_e2 = NULL; /* struct eth_tx_parse_2nd_bd *pbd2 = NULL; */ struct eth_tx_bd *tx_data_bd; struct eth_tx_bd *tx_total_pkt_size_bd; struct eth_tx_start_bd *tx_start_bd; uint16_t bd_prod, pkt_prod, total_pkt_size; uint8_t mac_type; int defragged, error, nsegs, rc, nbds, vlan_off, ovlan; struct bxe_softc *sc; uint16_t tx_bd_avail; struct ether_vlan_header *eh; uint32_t pbd_e2_parsing_data = 0; uint8_t hlen = 0; int tmp_bd; int i; sc = fp->sc; #if __FreeBSD_version >= 800000 M_ASSERTPKTHDR(*m_head); #endif /* #if __FreeBSD_version >= 800000 */ m0 = *m_head; rc = defragged = nbds = ovlan = vlan_off = total_pkt_size = 0; tx_start_bd = NULL; tx_data_bd = NULL; tx_total_pkt_size_bd = NULL; /* get the H/W pointer for packets and BDs */ pkt_prod = fp->tx_pkt_prod; bd_prod = fp->tx_bd_prod; mac_type = UNICAST_ADDRESS; /* map the mbuf into the next open DMAable memory */ tx_buf = &fp->tx_mbuf_chain[TX_BD(pkt_prod)]; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, tx_buf->m_map, m0, segs, &nsegs, BUS_DMA_NOWAIT); /* mapping errors */ if(__predict_false(error != 0)) { fp->eth_q_stats.tx_dma_mapping_failure++; if (error == ENOMEM) { /* resource issue, try again later */ rc = ENOMEM; } else if (error == EFBIG) { /* possibly recoverable with defragmentation */ fp->eth_q_stats.mbuf_defrag_attempts++; m0 = m_defrag(*m_head, M_NOWAIT); if (m0 == NULL) { fp->eth_q_stats.mbuf_defrag_failures++; rc = ENOBUFS; } else { /* defrag successful, try mapping again */ *m_head = m0; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, tx_buf->m_map, m0, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { fp->eth_q_stats.tx_dma_mapping_failure++; rc = error; } } } else { /* unknown, unrecoverable mapping error */ BLOGE(sc, "Unknown TX mapping error rc=%d\n", error); bxe_dump_mbuf(sc, m0, FALSE); rc = error; } goto bxe_tx_encap_continue; } tx_bd_avail = bxe_tx_avail(sc, fp); /* make sure there is enough room in the send queue */ if (__predict_false(tx_bd_avail < (nsegs + 2))) { /* Recoverable, try again later. */ fp->eth_q_stats.tx_hw_queue_full++; bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); rc = ENOMEM; goto bxe_tx_encap_continue; } /* capture the current H/W TX chain high watermark */ if (__predict_false(fp->eth_q_stats.tx_hw_max_queue_depth < (TX_BD_USABLE - tx_bd_avail))) { fp->eth_q_stats.tx_hw_max_queue_depth = (TX_BD_USABLE - tx_bd_avail); } /* make sure it fits in the packet window */ if (__predict_false(nsegs > BXE_MAX_SEGMENTS)) { /* * The mbuf may be to big for the controller to handle. If the frame * is a TSO frame we'll need to do an additional check. */ if (m0->m_pkthdr.csum_flags & CSUM_TSO) { if (bxe_chktso_window(sc, nsegs, segs, m0) == 0) { goto bxe_tx_encap_continue; /* OK to send */ } else { fp->eth_q_stats.tx_window_violation_tso++; } } else { fp->eth_q_stats.tx_window_violation_std++; } /* lets try to defragment this mbuf and remap it */ fp->eth_q_stats.mbuf_defrag_attempts++; bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); m0 = m_defrag(*m_head, M_NOWAIT); if (m0 == NULL) { fp->eth_q_stats.mbuf_defrag_failures++; /* Ugh, just drop the frame... :( */ rc = ENOBUFS; } else { /* defrag successful, try mapping again */ *m_head = m0; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, tx_buf->m_map, m0, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { fp->eth_q_stats.tx_dma_mapping_failure++; /* No sense in trying to defrag/copy chain, drop it. :( */ rc = error; } else { /* if the chain is still too long then drop it */ if(m0->m_pkthdr.csum_flags & CSUM_TSO) { /* * in case TSO is enabled nsegs should be checked against * BXE_TSO_MAX_SEGMENTS */ if (__predict_false(nsegs > BXE_TSO_MAX_SEGMENTS)) { bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); fp->eth_q_stats.nsegs_path1_errors++; rc = ENODEV; } } else { if (__predict_false(nsegs > BXE_MAX_SEGMENTS)) { bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); fp->eth_q_stats.nsegs_path2_errors++; rc = ENODEV; } } } } } bxe_tx_encap_continue: /* Check for errors */ if (rc) { if (rc == ENOMEM) { /* recoverable try again later */ } else { fp->eth_q_stats.tx_soft_errors++; fp->eth_q_stats.mbuf_alloc_tx--; m_freem(*m_head); *m_head = NULL; } return (rc); } /* set flag according to packet type (UNICAST_ADDRESS is default) */ if (m0->m_flags & M_BCAST) { mac_type = BROADCAST_ADDRESS; } else if (m0->m_flags & M_MCAST) { mac_type = MULTICAST_ADDRESS; } /* store the mbuf into the mbuf ring */ tx_buf->m = m0; tx_buf->first_bd = fp->tx_bd_prod; tx_buf->flags = 0; /* prepare the first transmit (start) BD for the mbuf */ tx_start_bd = &fp->tx_chain[TX_BD(bd_prod)].start_bd; BLOGD(sc, DBG_TX, "sending pkt_prod=%u tx_buf=%p next_idx=%u bd=%u tx_start_bd=%p\n", pkt_prod, tx_buf, fp->tx_pkt_prod, bd_prod, tx_start_bd); tx_start_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr)); tx_start_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr)); tx_start_bd->nbytes = htole16(segs[0].ds_len); total_pkt_size += tx_start_bd->nbytes; tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; tx_start_bd->general_data = (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT); /* all frames have at least Start BD + Parsing BD */ nbds = nsegs + 1; tx_start_bd->nbd = htole16(nbds); if (m0->m_flags & M_VLANTAG) { tx_start_bd->vlan_or_ethertype = htole16(m0->m_pkthdr.ether_vtag); tx_start_bd->bd_flags.as_bitfield |= (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT); } else { /* vf tx, start bd must hold the ethertype for fw to enforce it */ if (IS_VF(sc)) { /* map ethernet header to find type and header length */ eh = mtod(m0, struct ether_vlan_header *); tx_start_bd->vlan_or_ethertype = eh->evl_encap_proto; } else { /* used by FW for packet accounting */ tx_start_bd->vlan_or_ethertype = htole16(fp->tx_pkt_prod); } } /* * add a parsing BD from the chain. The parsing BD is always added * though it is only used for TSO and chksum */ bd_prod = TX_BD_NEXT(bd_prod); if (m0->m_pkthdr.csum_flags) { if (m0->m_pkthdr.csum_flags & CSUM_IP) { fp->eth_q_stats.tx_ofld_frames_csum_ip++; tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IP_CSUM; } if (m0->m_pkthdr.csum_flags & CSUM_TCP_IPV6) { tx_start_bd->bd_flags.as_bitfield |= (ETH_TX_BD_FLAGS_IPV6 | ETH_TX_BD_FLAGS_L4_CSUM); } else if (m0->m_pkthdr.csum_flags & CSUM_UDP_IPV6) { tx_start_bd->bd_flags.as_bitfield |= (ETH_TX_BD_FLAGS_IPV6 | ETH_TX_BD_FLAGS_IS_UDP | ETH_TX_BD_FLAGS_L4_CSUM); } else if ((m0->m_pkthdr.csum_flags & CSUM_TCP) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM; } else if (m0->m_pkthdr.csum_flags & CSUM_UDP) { tx_start_bd->bd_flags.as_bitfield |= (ETH_TX_BD_FLAGS_L4_CSUM | ETH_TX_BD_FLAGS_IS_UDP); } } if (!CHIP_IS_E1x(sc)) { pbd_e2 = &fp->tx_chain[TX_BD(bd_prod)].parse_bd_e2; memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2)); if (m0->m_pkthdr.csum_flags) { hlen = bxe_set_pbd_csum_e2(fp, m0, &pbd_e2_parsing_data); } SET_FLAG(pbd_e2_parsing_data, ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE, mac_type); } else { uint16_t global_data = 0; pbd_e1x = &fp->tx_chain[TX_BD(bd_prod)].parse_bd_e1x; memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x)); if (m0->m_pkthdr.csum_flags) { hlen = bxe_set_pbd_csum(fp, m0, pbd_e1x); } SET_FLAG(global_data, ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE, mac_type); pbd_e1x->global_data |= htole16(global_data); } /* setup the parsing BD with TSO specific info */ if (m0->m_pkthdr.csum_flags & CSUM_TSO) { fp->eth_q_stats.tx_ofld_frames_lso++; tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO; if (__predict_false(tx_start_bd->nbytes > hlen)) { fp->eth_q_stats.tx_ofld_frames_lso_hdr_splits++; /* split the first BD into header/data making the fw job easy */ nbds++; tx_start_bd->nbd = htole16(nbds); tx_start_bd->nbytes = htole16(hlen); bd_prod = TX_BD_NEXT(bd_prod); /* new transmit BD after the tx_parse_bd */ tx_data_bd = &fp->tx_chain[TX_BD(bd_prod)].reg_bd; tx_data_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr + hlen)); tx_data_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr + hlen)); tx_data_bd->nbytes = htole16(segs[0].ds_len - hlen); if (tx_total_pkt_size_bd == NULL) { tx_total_pkt_size_bd = tx_data_bd; } BLOGD(sc, DBG_TX, "TSO split header size is %d (%x:%x) nbds %d\n", le16toh(tx_start_bd->nbytes), le32toh(tx_start_bd->addr_hi), le32toh(tx_start_bd->addr_lo), nbds); } if (!CHIP_IS_E1x(sc)) { bxe_set_pbd_lso_e2(m0, &pbd_e2_parsing_data); } else { bxe_set_pbd_lso(m0, pbd_e1x); } } if (pbd_e2_parsing_data) { pbd_e2->parsing_data = htole32(pbd_e2_parsing_data); } /* prepare remaining BDs, start tx bd contains first seg/frag */ for (i = 1; i < nsegs ; i++) { bd_prod = TX_BD_NEXT(bd_prod); tx_data_bd = &fp->tx_chain[TX_BD(bd_prod)].reg_bd; tx_data_bd->addr_lo = htole32(U64_LO(segs[i].ds_addr)); tx_data_bd->addr_hi = htole32(U64_HI(segs[i].ds_addr)); tx_data_bd->nbytes = htole16(segs[i].ds_len); if (tx_total_pkt_size_bd == NULL) { tx_total_pkt_size_bd = tx_data_bd; } total_pkt_size += tx_data_bd->nbytes; } BLOGD(sc, DBG_TX, "last bd %p\n", tx_data_bd); if (tx_total_pkt_size_bd != NULL) { tx_total_pkt_size_bd->total_pkt_bytes = total_pkt_size; } if (__predict_false(sc->debug & DBG_TX)) { tmp_bd = tx_buf->first_bd; for (i = 0; i < nbds; i++) { if (i == 0) { BLOGD(sc, DBG_TX, "TX Strt: %p bd=%d nbd=%d vlan=0x%x " "bd_flags=0x%x hdr_nbds=%d\n", tx_start_bd, tmp_bd, le16toh(tx_start_bd->nbd), le16toh(tx_start_bd->vlan_or_ethertype), tx_start_bd->bd_flags.as_bitfield, (tx_start_bd->general_data & ETH_TX_START_BD_HDR_NBDS)); } else if (i == 1) { if (pbd_e1x) { BLOGD(sc, DBG_TX, "-> Prse: %p bd=%d global=0x%x ip_hlen_w=%u " "ip_id=%u lso_mss=%u tcp_flags=0x%x csum=0x%x " "tcp_seq=%u total_hlen_w=%u\n", pbd_e1x, tmp_bd, pbd_e1x->global_data, pbd_e1x->ip_hlen_w, pbd_e1x->ip_id, pbd_e1x->lso_mss, pbd_e1x->tcp_flags, pbd_e1x->tcp_pseudo_csum, pbd_e1x->tcp_send_seq, le16toh(pbd_e1x->total_hlen_w)); } else { /* if (pbd_e2) */ BLOGD(sc, DBG_TX, "-> Parse: %p bd=%d dst=%02x:%02x:%02x " "src=%02x:%02x:%02x parsing_data=0x%x\n", pbd_e2, tmp_bd, pbd_e2->data.mac_addr.dst_hi, pbd_e2->data.mac_addr.dst_mid, pbd_e2->data.mac_addr.dst_lo, pbd_e2->data.mac_addr.src_hi, pbd_e2->data.mac_addr.src_mid, pbd_e2->data.mac_addr.src_lo, pbd_e2->parsing_data); } } if (i != 1) { /* skip parse db as it doesn't hold data */ tx_data_bd = &fp->tx_chain[TX_BD(tmp_bd)].reg_bd; BLOGD(sc, DBG_TX, "-> Frag: %p bd=%d nbytes=%d hi=0x%x lo: 0x%x\n", tx_data_bd, tmp_bd, le16toh(tx_data_bd->nbytes), le32toh(tx_data_bd->addr_hi), le32toh(tx_data_bd->addr_lo)); } tmp_bd = TX_BD_NEXT(tmp_bd); } } BLOGD(sc, DBG_TX, "doorbell: nbds=%d bd=%u\n", nbds, bd_prod); /* update TX BD producer index value for next TX */ bd_prod = TX_BD_NEXT(bd_prod); /* * If the chain of tx_bd's describing this frame is adjacent to or spans * an eth_tx_next_bd element then we need to increment the nbds value. */ if (TX_BD_IDX(bd_prod) < nbds) { nbds++; } /* don't allow reordering of writes for nbd and packets */ mb(); fp->tx_db.data.prod += nbds; /* producer points to the next free tx_bd at this point */ fp->tx_pkt_prod++; fp->tx_bd_prod = bd_prod; DOORBELL(sc, fp->index, fp->tx_db.raw); fp->eth_q_stats.tx_pkts++; /* Prevent speculative reads from getting ahead of the status block. */ bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_READ); /* Prevent speculative reads from getting ahead of the doorbell. */ bus_space_barrier(sc->bar[BAR2].tag, sc->bar[BAR2].handle, 0, 0, BUS_SPACE_BARRIER_READ); return (0); } static void bxe_tx_start_locked(struct bxe_softc *sc, if_t ifp, struct bxe_fastpath *fp) { struct mbuf *m = NULL; int tx_count = 0; uint16_t tx_bd_avail; BXE_FP_TX_LOCK_ASSERT(fp); /* keep adding entries while there are frames to send */ while (!if_sendq_empty(ifp)) { /* * check for any frames to send * dequeue can still be NULL even if queue is not empty */ m = if_dequeue(ifp); if (__predict_false(m == NULL)) { break; } /* the mbuf now belongs to us */ fp->eth_q_stats.mbuf_alloc_tx++; /* * Put the frame into the transmit ring. If we don't have room, * place the mbuf back at the head of the TX queue, set the * OACTIVE flag, and wait for the NIC to drain the chain. */ if (__predict_false(bxe_tx_encap(fp, &m))) { fp->eth_q_stats.tx_encap_failures++; if (m != NULL) { /* mark the TX queue as full and return the frame */ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); if_sendq_prepend(ifp, m); fp->eth_q_stats.mbuf_alloc_tx--; fp->eth_q_stats.tx_queue_xoff++; } /* stop looking for more work */ break; } /* the frame was enqueued successfully */ tx_count++; /* send a copy of the frame to any BPF listeners. */ if_etherbpfmtap(ifp, m); tx_bd_avail = bxe_tx_avail(sc, fp); /* handle any completions if we're running low */ if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { /* bxe_txeof will set IFF_DRV_OACTIVE appropriately */ bxe_txeof(sc, fp); if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE) { break; } } } /* all TX packets were dequeued and/or the tx ring is full */ if (tx_count > 0) { /* reset the TX watchdog timeout timer */ fp->watchdog_timer = BXE_TX_TIMEOUT; } } /* Legacy (non-RSS) dispatch routine */ static void bxe_tx_start(if_t ifp) { struct bxe_softc *sc; struct bxe_fastpath *fp; sc = if_getsoftc(ifp); if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { BLOGW(sc, "Interface not running, ignoring transmit request\n"); return; } if (!sc->link_vars.link_up) { BLOGW(sc, "Interface link is down, ignoring transmit request\n"); return; } fp = &sc->fp[0]; if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE) { fp->eth_q_stats.tx_queue_full_return++; return; } BXE_FP_TX_LOCK(fp); bxe_tx_start_locked(sc, ifp, fp); BXE_FP_TX_UNLOCK(fp); } #if __FreeBSD_version >= 901504 static int bxe_tx_mq_start_locked(struct bxe_softc *sc, if_t ifp, struct bxe_fastpath *fp, struct mbuf *m) { struct buf_ring *tx_br = fp->tx_br; struct mbuf *next; int depth, rc, tx_count; uint16_t tx_bd_avail; rc = tx_count = 0; BXE_FP_TX_LOCK_ASSERT(fp); if (sc->state != BXE_STATE_OPEN) { fp->eth_q_stats.bxe_tx_mq_sc_state_failures++; return ENETDOWN; } if (!tx_br) { BLOGE(sc, "Multiqueue TX and no buf_ring!\n"); return (EINVAL); } if (m != NULL) { rc = drbr_enqueue(ifp, tx_br, m); if (rc != 0) { fp->eth_q_stats.tx_soft_errors++; goto bxe_tx_mq_start_locked_exit; } } if (!sc->link_vars.link_up || !(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { fp->eth_q_stats.tx_request_link_down_failures++; goto bxe_tx_mq_start_locked_exit; } /* fetch the depth of the driver queue */ depth = drbr_inuse_drv(ifp, tx_br); if (depth > fp->eth_q_stats.tx_max_drbr_queue_depth) { fp->eth_q_stats.tx_max_drbr_queue_depth = depth; } /* keep adding entries while there are frames to send */ while ((next = drbr_peek(ifp, tx_br)) != NULL) { /* handle any completions if we're running low */ tx_bd_avail = bxe_tx_avail(sc, fp); if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { /* bxe_txeof will set IFF_DRV_OACTIVE appropriately */ bxe_txeof(sc, fp); tx_bd_avail = bxe_tx_avail(sc, fp); if (tx_bd_avail < (BXE_TSO_MAX_SEGMENTS + 1)) { fp->eth_q_stats.bd_avail_too_less_failures++; m_freem(next); drbr_advance(ifp, tx_br); rc = ENOBUFS; break; } } /* the mbuf now belongs to us */ fp->eth_q_stats.mbuf_alloc_tx++; /* * Put the frame into the transmit ring. If we don't have room, * place the mbuf back at the head of the TX queue, set the * OACTIVE flag, and wait for the NIC to drain the chain. */ rc = bxe_tx_encap(fp, &next); if (__predict_false(rc != 0)) { fp->eth_q_stats.tx_encap_failures++; if (next != NULL) { /* mark the TX queue as full and save the frame */ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); drbr_putback(ifp, tx_br, next); fp->eth_q_stats.mbuf_alloc_tx--; fp->eth_q_stats.tx_frames_deferred++; } else drbr_advance(ifp, tx_br); /* stop looking for more work */ break; } /* the transmit frame was enqueued successfully */ tx_count++; /* send a copy of the frame to any BPF listeners */ if_etherbpfmtap(ifp, next); drbr_advance(ifp, tx_br); } /* all TX packets were dequeued and/or the tx ring is full */ if (tx_count > 0) { /* reset the TX watchdog timeout timer */ fp->watchdog_timer = BXE_TX_TIMEOUT; } bxe_tx_mq_start_locked_exit: /* If we didn't drain the drbr, enqueue a task in the future to do it. */ if (!drbr_empty(ifp, tx_br)) { fp->eth_q_stats.tx_mq_not_empty++; taskqueue_enqueue_timeout(fp->tq, &fp->tx_timeout_task, 1); } return (rc); } static void bxe_tx_mq_start_deferred(void *arg, int pending) { struct bxe_fastpath *fp = (struct bxe_fastpath *)arg; struct bxe_softc *sc = fp->sc; if_t ifp = sc->ifp; BXE_FP_TX_LOCK(fp); bxe_tx_mq_start_locked(sc, ifp, fp, NULL); BXE_FP_TX_UNLOCK(fp); } /* Multiqueue (TSS) dispatch routine. */ static int bxe_tx_mq_start(struct ifnet *ifp, struct mbuf *m) { struct bxe_softc *sc = if_getsoftc(ifp); struct bxe_fastpath *fp; int fp_index, rc; fp_index = 0; /* default is the first queue */ /* check if flowid is set */ if (BXE_VALID_FLOWID(m)) fp_index = (m->m_pkthdr.flowid % sc->num_queues); fp = &sc->fp[fp_index]; if (sc->state != BXE_STATE_OPEN) { fp->eth_q_stats.bxe_tx_mq_sc_state_failures++; return ENETDOWN; } if (BXE_FP_TX_TRYLOCK(fp)) { rc = bxe_tx_mq_start_locked(sc, ifp, fp, m); BXE_FP_TX_UNLOCK(fp); } else { rc = drbr_enqueue(ifp, fp->tx_br, m); taskqueue_enqueue(fp->tq, &fp->tx_task); } return (rc); } static void bxe_mq_flush(struct ifnet *ifp) { struct bxe_softc *sc = if_getsoftc(ifp); struct bxe_fastpath *fp; struct mbuf *m; int i; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if (fp->state != BXE_FP_STATE_IRQ) { BLOGD(sc, DBG_LOAD, "Not clearing fp[%02d] buf_ring (state=%d)\n", fp->index, fp->state); continue; } if (fp->tx_br != NULL) { BLOGD(sc, DBG_LOAD, "Clearing fp[%02d] buf_ring\n", fp->index); BXE_FP_TX_LOCK(fp); while ((m = buf_ring_dequeue_sc(fp->tx_br)) != NULL) { m_freem(m); } BXE_FP_TX_UNLOCK(fp); } } if_qflush(ifp); } #endif /* FreeBSD_version >= 901504 */ static uint16_t bxe_cid_ilt_lines(struct bxe_softc *sc) { if (IS_SRIOV(sc)) { return ((BXE_FIRST_VF_CID + BXE_VF_CIDS) / ILT_PAGE_CIDS); } return (L2_ILT_LINES(sc)); } static void bxe_ilt_set_info(struct bxe_softc *sc) { struct ilt_client_info *ilt_client; struct ecore_ilt *ilt = sc->ilt; uint16_t line = 0; ilt->start_line = FUNC_ILT_BASE(SC_FUNC(sc)); BLOGD(sc, DBG_LOAD, "ilt starts at line %d\n", ilt->start_line); /* CDU */ ilt_client = &ilt->clients[ILT_CLIENT_CDU]; ilt_client->client_num = ILT_CLIENT_CDU; ilt_client->page_size = CDU_ILT_PAGE_SZ; ilt_client->flags = ILT_CLIENT_SKIP_MEM; ilt_client->start = line; line += bxe_cid_ilt_lines(sc); if (CNIC_SUPPORT(sc)) { line += CNIC_ILT_LINES; } ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[CDU]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); /* QM */ if (QM_INIT(sc->qm_cid_count)) { ilt_client = &ilt->clients[ILT_CLIENT_QM]; ilt_client->client_num = ILT_CLIENT_QM; ilt_client->page_size = QM_ILT_PAGE_SZ; ilt_client->flags = 0; ilt_client->start = line; /* 4 bytes for each cid */ line += DIV_ROUND_UP(sc->qm_cid_count * QM_QUEUES_PER_FUNC * 4, QM_ILT_PAGE_SZ); ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[QM]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); } if (CNIC_SUPPORT(sc)) { /* SRC */ ilt_client = &ilt->clients[ILT_CLIENT_SRC]; ilt_client->client_num = ILT_CLIENT_SRC; ilt_client->page_size = SRC_ILT_PAGE_SZ; ilt_client->flags = 0; ilt_client->start = line; line += SRC_ILT_LINES; ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[SRC]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); /* TM */ ilt_client = &ilt->clients[ILT_CLIENT_TM]; ilt_client->client_num = ILT_CLIENT_TM; ilt_client->page_size = TM_ILT_PAGE_SZ; ilt_client->flags = 0; ilt_client->start = line; line += TM_ILT_LINES; ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[TM]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); } KASSERT((line <= ILT_MAX_LINES), ("Invalid number of ILT lines!")); } static void bxe_set_fp_rx_buf_size(struct bxe_softc *sc) { int i; uint32_t rx_buf_size; rx_buf_size = (IP_HEADER_ALIGNMENT_PADDING + ETH_OVERHEAD + sc->mtu); for (i = 0; i < sc->num_queues; i++) { if(rx_buf_size <= MCLBYTES){ sc->fp[i].rx_buf_size = rx_buf_size; sc->fp[i].mbuf_alloc_size = MCLBYTES; }else if (rx_buf_size <= MJUMPAGESIZE){ sc->fp[i].rx_buf_size = rx_buf_size; sc->fp[i].mbuf_alloc_size = MJUMPAGESIZE; }else if (rx_buf_size <= (MJUMPAGESIZE + MCLBYTES)){ sc->fp[i].rx_buf_size = MCLBYTES; sc->fp[i].mbuf_alloc_size = MCLBYTES; }else if (rx_buf_size <= (2 * MJUMPAGESIZE)){ sc->fp[i].rx_buf_size = MJUMPAGESIZE; sc->fp[i].mbuf_alloc_size = MJUMPAGESIZE; }else { sc->fp[i].rx_buf_size = MCLBYTES; sc->fp[i].mbuf_alloc_size = MCLBYTES; } } } static int bxe_alloc_ilt_mem(struct bxe_softc *sc) { int rc = 0; if ((sc->ilt = (struct ecore_ilt *)malloc(sizeof(struct ecore_ilt), M_BXE_ILT, (M_NOWAIT | M_ZERO))) == NULL) { rc = 1; } return (rc); } static int bxe_alloc_ilt_lines_mem(struct bxe_softc *sc) { int rc = 0; if ((sc->ilt->lines = (struct ilt_line *)malloc((sizeof(struct ilt_line) * ILT_MAX_LINES), M_BXE_ILT, (M_NOWAIT | M_ZERO))) == NULL) { rc = 1; } return (rc); } static void bxe_free_ilt_mem(struct bxe_softc *sc) { if (sc->ilt != NULL) { free(sc->ilt, M_BXE_ILT); sc->ilt = NULL; } } static void bxe_free_ilt_lines_mem(struct bxe_softc *sc) { if (sc->ilt->lines != NULL) { free(sc->ilt->lines, M_BXE_ILT); sc->ilt->lines = NULL; } } static void bxe_free_mem(struct bxe_softc *sc) { int i; for (i = 0; i < L2_ILT_LINES(sc); i++) { bxe_dma_free(sc, &sc->context[i].vcxt_dma); sc->context[i].vcxt = NULL; sc->context[i].size = 0; } ecore_ilt_mem_op(sc, ILT_MEMOP_FREE); bxe_free_ilt_lines_mem(sc); } static int bxe_alloc_mem(struct bxe_softc *sc) { int context_size; int allocated; int i; /* * Allocate memory for CDU context: * This memory is allocated separately and not in the generic ILT * functions because CDU differs in few aspects: * 1. There can be multiple entities allocating memory for context - * regular L2, CNIC, and SRIOV drivers. Each separately controls * its own ILT lines. * 2. Since CDU page-size is not a single 4KB page (which is the case * for the other ILT clients), to be efficient we want to support * allocation of sub-page-size in the last entry. * 3. Context pointers are used by the driver to pass to FW / update * the context (for the other ILT clients the pointers are used just to * free the memory during unload). */ context_size = (sizeof(union cdu_context) * BXE_L2_CID_COUNT(sc)); for (i = 0, allocated = 0; allocated < context_size; i++) { sc->context[i].size = min(CDU_ILT_PAGE_SZ, (context_size - allocated)); if (bxe_dma_alloc(sc, sc->context[i].size, &sc->context[i].vcxt_dma, "cdu context") != 0) { bxe_free_mem(sc); return (-1); } sc->context[i].vcxt = (union cdu_context *)sc->context[i].vcxt_dma.vaddr; allocated += sc->context[i].size; } bxe_alloc_ilt_lines_mem(sc); BLOGD(sc, DBG_LOAD, "ilt=%p start_line=%u lines=%p\n", sc->ilt, sc->ilt->start_line, sc->ilt->lines); { for (i = 0; i < 4; i++) { BLOGD(sc, DBG_LOAD, "c%d page_size=%u start=%u end=%u num=%u flags=0x%x\n", i, sc->ilt->clients[i].page_size, sc->ilt->clients[i].start, sc->ilt->clients[i].end, sc->ilt->clients[i].client_num, sc->ilt->clients[i].flags); } } if (ecore_ilt_mem_op(sc, ILT_MEMOP_ALLOC)) { BLOGE(sc, "ecore_ilt_mem_op ILT_MEMOP_ALLOC failed\n"); bxe_free_mem(sc); return (-1); } return (0); } static void bxe_free_rx_bd_chain(struct bxe_fastpath *fp) { struct bxe_softc *sc; int i; sc = fp->sc; if (fp->rx_mbuf_tag == NULL) { return; } /* free all mbufs and unload all maps */ for (i = 0; i < RX_BD_TOTAL; i++) { if (fp->rx_mbuf_chain[i].m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, fp->rx_mbuf_chain[i].m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_chain[i].m_map); } if (fp->rx_mbuf_chain[i].m != NULL) { m_freem(fp->rx_mbuf_chain[i].m); fp->rx_mbuf_chain[i].m = NULL; fp->eth_q_stats.mbuf_alloc_rx--; } } } static void bxe_free_tpa_pool(struct bxe_fastpath *fp) { struct bxe_softc *sc; int i, max_agg_queues; sc = fp->sc; if (fp->rx_mbuf_tag == NULL) { return; } max_agg_queues = MAX_AGG_QS(sc); /* release all mbufs and unload all DMA maps in the TPA pool */ for (i = 0; i < max_agg_queues; i++) { if (fp->rx_tpa_info[i].bd.m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, fp->rx_tpa_info[i].bd.m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_tpa_info[i].bd.m_map); } if (fp->rx_tpa_info[i].bd.m != NULL) { m_freem(fp->rx_tpa_info[i].bd.m); fp->rx_tpa_info[i].bd.m = NULL; fp->eth_q_stats.mbuf_alloc_tpa--; } } } static void bxe_free_sge_chain(struct bxe_fastpath *fp) { struct bxe_softc *sc; int i; sc = fp->sc; if (fp->rx_sge_mbuf_tag == NULL) { return; } /* rree all mbufs and unload all maps */ for (i = 0; i < RX_SGE_TOTAL; i++) { if (fp->rx_sge_mbuf_chain[i].m_map != NULL) { bus_dmamap_sync(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[i].m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[i].m_map); } if (fp->rx_sge_mbuf_chain[i].m != NULL) { m_freem(fp->rx_sge_mbuf_chain[i].m); fp->rx_sge_mbuf_chain[i].m = NULL; fp->eth_q_stats.mbuf_alloc_sge--; } } } static void bxe_free_fp_buffers(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; #if __FreeBSD_version >= 901504 if (fp->tx_br != NULL) { /* just in case bxe_mq_flush() wasn't called */ if (mtx_initialized(&fp->tx_mtx)) { struct mbuf *m; BXE_FP_TX_LOCK(fp); while ((m = buf_ring_dequeue_sc(fp->tx_br)) != NULL) m_freem(m); BXE_FP_TX_UNLOCK(fp); } } #endif /* free all RX buffers */ bxe_free_rx_bd_chain(fp); bxe_free_tpa_pool(fp); bxe_free_sge_chain(fp); if (fp->eth_q_stats.mbuf_alloc_rx != 0) { BLOGE(sc, "failed to claim all rx mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_rx); } if (fp->eth_q_stats.mbuf_alloc_sge != 0) { BLOGE(sc, "failed to claim all sge mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_sge); } if (fp->eth_q_stats.mbuf_alloc_tpa != 0) { BLOGE(sc, "failed to claim all sge mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_tpa); } if (fp->eth_q_stats.mbuf_alloc_tx != 0) { BLOGE(sc, "failed to release tx mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_tx); } /* XXX verify all mbufs were reclaimed */ } } static int bxe_alloc_rx_bd_mbuf(struct bxe_fastpath *fp, uint16_t prev_index, uint16_t index) { struct bxe_sw_rx_bd *rx_buf; struct eth_rx_bd *rx_bd; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *m; int nsegs, rc; rc = 0; /* allocate the new RX BD mbuf */ m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, fp->mbuf_alloc_size); if (__predict_false(m == NULL)) { fp->eth_q_stats.mbuf_rx_bd_alloc_failed++; return (ENOBUFS); } fp->eth_q_stats.mbuf_alloc_rx++; /* initialize the mbuf buffer length */ m->m_pkthdr.len = m->m_len = fp->rx_buf_size; /* map the mbuf into non-paged pool */ rc = bus_dmamap_load_mbuf_sg(fp->rx_mbuf_tag, fp->rx_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(rc != 0)) { fp->eth_q_stats.mbuf_rx_bd_mapping_failed++; m_freem(m); fp->eth_q_stats.mbuf_alloc_rx--; return (rc); } /* all mbufs must map to a single segment */ KASSERT((nsegs == 1), ("Too many segments, %d returned!", nsegs)); /* release any existing RX BD mbuf mappings */ if (prev_index != index) { rx_buf = &fp->rx_mbuf_chain[prev_index]; if (rx_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, rx_buf->m_map); } /* * We only get here from bxe_rxeof() when the maximum number * of rx buffers is less than RX_BD_USABLE. bxe_rxeof() already * holds the mbuf in the prev_index so it's OK to NULL it out * here without concern of a memory leak. */ fp->rx_mbuf_chain[prev_index].m = NULL; } rx_buf = &fp->rx_mbuf_chain[index]; if (rx_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, rx_buf->m_map); } /* save the mbuf and mapping info for a future packet */ map = (prev_index != index) ? fp->rx_mbuf_chain[prev_index].m_map : rx_buf->m_map; rx_buf->m_map = fp->rx_mbuf_spare_map; fp->rx_mbuf_spare_map = map; bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_PREREAD); rx_buf->m = m; rx_bd = &fp->rx_chain[index]; rx_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr)); rx_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr)); return (rc); } static int bxe_alloc_rx_tpa_mbuf(struct bxe_fastpath *fp, int queue) { struct bxe_sw_tpa_info *tpa_info = &fp->rx_tpa_info[queue]; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *m; int nsegs; int rc = 0; /* allocate the new TPA mbuf */ m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, fp->mbuf_alloc_size); if (__predict_false(m == NULL)) { fp->eth_q_stats.mbuf_rx_tpa_alloc_failed++; return (ENOBUFS); } fp->eth_q_stats.mbuf_alloc_tpa++; /* initialize the mbuf buffer length */ m->m_pkthdr.len = m->m_len = fp->rx_buf_size; /* map the mbuf into non-paged pool */ rc = bus_dmamap_load_mbuf_sg(fp->rx_mbuf_tag, fp->rx_tpa_info_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(rc != 0)) { fp->eth_q_stats.mbuf_rx_tpa_mapping_failed++; m_free(m); fp->eth_q_stats.mbuf_alloc_tpa--; return (rc); } /* all mbufs must map to a single segment */ KASSERT((nsegs == 1), ("Too many segments, %d returned!", nsegs)); /* release any existing TPA mbuf mapping */ if (tpa_info->bd.m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, tpa_info->bd.m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, tpa_info->bd.m_map); } /* save the mbuf and mapping info for the TPA mbuf */ map = tpa_info->bd.m_map; tpa_info->bd.m_map = fp->rx_tpa_info_mbuf_spare_map; fp->rx_tpa_info_mbuf_spare_map = map; bus_dmamap_sync(fp->rx_mbuf_tag, tpa_info->bd.m_map, BUS_DMASYNC_PREREAD); tpa_info->bd.m = m; tpa_info->seg = segs[0]; return (rc); } /* * Allocate an mbuf and assign it to the receive scatter gather chain. The * caller must take care to save a copy of the existing mbuf in the SG mbuf * chain. */ static int bxe_alloc_rx_sge_mbuf(struct bxe_fastpath *fp, uint16_t index) { struct bxe_sw_rx_bd *sge_buf; struct eth_rx_sge *sge; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *m; int nsegs; int rc = 0; /* allocate a new SGE mbuf */ m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, SGE_PAGE_SIZE); if (__predict_false(m == NULL)) { fp->eth_q_stats.mbuf_rx_sge_alloc_failed++; return (ENOMEM); } fp->eth_q_stats.mbuf_alloc_sge++; /* initialize the mbuf buffer length */ m->m_pkthdr.len = m->m_len = SGE_PAGE_SIZE; /* map the SGE mbuf into non-paged pool */ rc = bus_dmamap_load_mbuf_sg(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(rc != 0)) { fp->eth_q_stats.mbuf_rx_sge_mapping_failed++; m_freem(m); fp->eth_q_stats.mbuf_alloc_sge--; return (rc); } /* all mbufs must map to a single segment */ KASSERT((nsegs == 1), ("Too many segments, %d returned!", nsegs)); sge_buf = &fp->rx_sge_mbuf_chain[index]; /* release any existing SGE mbuf mapping */ if (sge_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_sge_mbuf_tag, sge_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_sge_mbuf_tag, sge_buf->m_map); } /* save the mbuf and mapping info for a future packet */ map = sge_buf->m_map; sge_buf->m_map = fp->rx_sge_mbuf_spare_map; fp->rx_sge_mbuf_spare_map = map; bus_dmamap_sync(fp->rx_sge_mbuf_tag, sge_buf->m_map, BUS_DMASYNC_PREREAD); sge_buf->m = m; sge = &fp->rx_sge_chain[index]; sge->addr_hi = htole32(U64_HI(segs[0].ds_addr)); sge->addr_lo = htole32(U64_LO(segs[0].ds_addr)); return (rc); } static __noinline int bxe_alloc_fp_buffers(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i, j, rc = 0; int ring_prod, cqe_ring_prod; int max_agg_queues; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; ring_prod = cqe_ring_prod = 0; fp->rx_bd_cons = 0; fp->rx_cq_cons = 0; /* allocate buffers for the RX BDs in RX BD chain */ for (j = 0; j < sc->max_rx_bufs; j++) { rc = bxe_alloc_rx_bd_mbuf(fp, ring_prod, ring_prod); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] rx chain (%d)\n", i, rc); goto bxe_alloc_fp_buffers_error; } ring_prod = RX_BD_NEXT(ring_prod); cqe_ring_prod = RCQ_NEXT(cqe_ring_prod); } fp->rx_bd_prod = ring_prod; fp->rx_cq_prod = cqe_ring_prod; fp->eth_q_stats.rx_calls = fp->eth_q_stats.rx_pkts = 0; max_agg_queues = MAX_AGG_QS(sc); fp->tpa_enable = TRUE; /* fill the TPA pool */ for (j = 0; j < max_agg_queues; j++) { rc = bxe_alloc_rx_tpa_mbuf(fp, j); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] TPA queue %d\n", i, j); fp->tpa_enable = FALSE; goto bxe_alloc_fp_buffers_error; } fp->rx_tpa_info[j].state = BXE_TPA_STATE_STOP; } if (fp->tpa_enable) { /* fill the RX SGE chain */ ring_prod = 0; for (j = 0; j < RX_SGE_USABLE; j++) { rc = bxe_alloc_rx_sge_mbuf(fp, ring_prod); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] SGE %d\n", i, ring_prod); fp->tpa_enable = FALSE; ring_prod = 0; goto bxe_alloc_fp_buffers_error; } ring_prod = RX_SGE_NEXT(ring_prod); } fp->rx_sge_prod = ring_prod; } } return (0); bxe_alloc_fp_buffers_error: /* unwind what was already allocated */ bxe_free_rx_bd_chain(fp); bxe_free_tpa_pool(fp); bxe_free_sge_chain(fp); return (ENOBUFS); } static void bxe_free_fw_stats_mem(struct bxe_softc *sc) { bxe_dma_free(sc, &sc->fw_stats_dma); sc->fw_stats_num = 0; sc->fw_stats_req_size = 0; sc->fw_stats_req = NULL; sc->fw_stats_req_mapping = 0; sc->fw_stats_data_size = 0; sc->fw_stats_data = NULL; sc->fw_stats_data_mapping = 0; } static int bxe_alloc_fw_stats_mem(struct bxe_softc *sc) { uint8_t num_queue_stats; int num_groups; /* number of queues for statistics is number of eth queues */ num_queue_stats = BXE_NUM_ETH_QUEUES(sc); /* * Total number of FW statistics requests = * 1 for port stats + 1 for PF stats + num of queues */ sc->fw_stats_num = (2 + num_queue_stats); /* * Request is built from stats_query_header and an array of * stats_query_cmd_group each of which contains STATS_QUERY_CMD_COUNT * rules. The real number or requests is configured in the * stats_query_header. */ num_groups = ((sc->fw_stats_num / STATS_QUERY_CMD_COUNT) + ((sc->fw_stats_num % STATS_QUERY_CMD_COUNT) ? 1 : 0)); BLOGD(sc, DBG_LOAD, "stats fw_stats_num %d num_groups %d\n", sc->fw_stats_num, num_groups); sc->fw_stats_req_size = (sizeof(struct stats_query_header) + (num_groups * sizeof(struct stats_query_cmd_group))); /* * Data for statistics requests + stats_counter. * stats_counter holds per-STORM counters that are incremented when * STORM has finished with the current request. Memory for FCoE * offloaded statistics are counted anyway, even if they will not be sent. * VF stats are not accounted for here as the data of VF stats is stored * in memory allocated by the VF, not here. */ sc->fw_stats_data_size = (sizeof(struct stats_counter) + sizeof(struct per_port_stats) + sizeof(struct per_pf_stats) + /* sizeof(struct fcoe_statistics_params) + */ (sizeof(struct per_queue_stats) * num_queue_stats)); if (bxe_dma_alloc(sc, (sc->fw_stats_req_size + sc->fw_stats_data_size), &sc->fw_stats_dma, "fw stats") != 0) { bxe_free_fw_stats_mem(sc); return (-1); } /* set up the shortcuts */ sc->fw_stats_req = (struct bxe_fw_stats_req *)sc->fw_stats_dma.vaddr; sc->fw_stats_req_mapping = sc->fw_stats_dma.paddr; sc->fw_stats_data = (struct bxe_fw_stats_data *)((uint8_t *)sc->fw_stats_dma.vaddr + sc->fw_stats_req_size); sc->fw_stats_data_mapping = (sc->fw_stats_dma.paddr + sc->fw_stats_req_size); BLOGD(sc, DBG_LOAD, "statistics request base address set to %#jx\n", (uintmax_t)sc->fw_stats_req_mapping); BLOGD(sc, DBG_LOAD, "statistics data base address set to %#jx\n", (uintmax_t)sc->fw_stats_data_mapping); return (0); } /* * Bits map: * 0-7 - Engine0 load counter. * 8-15 - Engine1 load counter. * 16 - Engine0 RESET_IN_PROGRESS bit. * 17 - Engine1 RESET_IN_PROGRESS bit. * 18 - Engine0 ONE_IS_LOADED. Set when there is at least one active * function on the engine * 19 - Engine1 ONE_IS_LOADED. * 20 - Chip reset flow bit. When set none-leader must wait for both engines * leader to complete (check for both RESET_IN_PROGRESS bits and not * for just the one belonging to its engine). */ #define BXE_RECOVERY_GLOB_REG MISC_REG_GENERIC_POR_1 #define BXE_PATH0_LOAD_CNT_MASK 0x000000ff #define BXE_PATH0_LOAD_CNT_SHIFT 0 #define BXE_PATH1_LOAD_CNT_MASK 0x0000ff00 #define BXE_PATH1_LOAD_CNT_SHIFT 8 #define BXE_PATH0_RST_IN_PROG_BIT 0x00010000 #define BXE_PATH1_RST_IN_PROG_BIT 0x00020000 #define BXE_GLOBAL_RESET_BIT 0x00040000 /* set the GLOBAL_RESET bit, should be run under rtnl lock */ static void bxe_set_reset_global(struct bxe_softc *sc) { uint32_t val; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val | BXE_GLOBAL_RESET_BIT); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* clear the GLOBAL_RESET bit, should be run under rtnl lock */ static void bxe_clear_reset_global(struct bxe_softc *sc) { uint32_t val; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val & (~BXE_GLOBAL_RESET_BIT)); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* checks the GLOBAL_RESET bit, should be run under rtnl lock */ static uint8_t bxe_reset_is_global(struct bxe_softc *sc) { uint32_t val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "GLOB_REG=0x%08x\n", val); return (val & BXE_GLOBAL_RESET_BIT) ? TRUE : FALSE; } /* clear RESET_IN_PROGRESS bit for the engine, should be run under rtnl lock */ static void bxe_set_reset_done(struct bxe_softc *sc) { uint32_t val; uint32_t bit = SC_PATH(sc) ? BXE_PATH1_RST_IN_PROG_BIT : BXE_PATH0_RST_IN_PROG_BIT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); /* Clear the bit */ val &= ~bit; REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* set RESET_IN_PROGRESS for the engine, should be run under rtnl lock */ static void bxe_set_reset_in_progress(struct bxe_softc *sc) { uint32_t val; uint32_t bit = SC_PATH(sc) ? BXE_PATH1_RST_IN_PROG_BIT : BXE_PATH0_RST_IN_PROG_BIT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); /* Set the bit */ val |= bit; REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* check RESET_IN_PROGRESS bit for an engine, should be run under rtnl lock */ static uint8_t bxe_reset_is_done(struct bxe_softc *sc, int engine) { uint32_t val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); uint32_t bit = engine ? BXE_PATH1_RST_IN_PROG_BIT : BXE_PATH0_RST_IN_PROG_BIT; /* return false if bit is set */ return (val & bit) ? FALSE : TRUE; } /* get the load status for an engine, should be run under rtnl lock */ static uint8_t bxe_get_load_status(struct bxe_softc *sc, int engine) { uint32_t mask = engine ? BXE_PATH1_LOAD_CNT_MASK : BXE_PATH0_LOAD_CNT_MASK; uint32_t shift = engine ? BXE_PATH1_LOAD_CNT_SHIFT : BXE_PATH0_LOAD_CNT_SHIFT; uint32_t val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "Old value for GLOB_REG=0x%08x\n", val); val = ((val & mask) >> shift); BLOGD(sc, DBG_LOAD, "Load mask engine %d = 0x%08x\n", engine, val); return (val != 0); } /* set pf load mark */ /* XXX needs to be under rtnl lock */ static void bxe_set_pf_load(struct bxe_softc *sc) { uint32_t val; uint32_t val1; uint32_t mask = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_MASK : BXE_PATH0_LOAD_CNT_MASK; uint32_t shift = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_SHIFT : BXE_PATH0_LOAD_CNT_SHIFT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "Old value for GLOB_REG=0x%08x\n", val); /* get the current counter value */ val1 = ((val & mask) >> shift); /* set bit of this PF */ val1 |= (1 << SC_ABS_FUNC(sc)); /* clear the old value */ val &= ~mask; /* set the new one */ val |= ((val1 << shift) & mask); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* clear pf load mark */ /* XXX needs to be under rtnl lock */ static uint8_t bxe_clear_pf_load(struct bxe_softc *sc) { uint32_t val1, val; uint32_t mask = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_MASK : BXE_PATH0_LOAD_CNT_MASK; uint32_t shift = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_SHIFT : BXE_PATH0_LOAD_CNT_SHIFT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "Old GEN_REG_VAL=0x%08x\n", val); /* get the current counter value */ val1 = (val & mask) >> shift; /* clear bit of that PF */ val1 &= ~(1 << SC_ABS_FUNC(sc)); /* clear the old value */ val &= ~mask; /* set the new one */ val |= ((val1 << shift) & mask); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); return (val1 != 0); } /* send load requrest to mcp and analyze response */ static int bxe_nic_load_request(struct bxe_softc *sc, uint32_t *load_code) { /* init fw_seq */ sc->fw_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK); BLOGD(sc, DBG_LOAD, "initial fw_seq 0x%04x\n", sc->fw_seq); /* get the current FW pulse sequence */ sc->fw_drv_pulse_wr_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_pulse_mb) & DRV_PULSE_SEQ_MASK); BLOGD(sc, DBG_LOAD, "initial drv_pulse 0x%04x\n", sc->fw_drv_pulse_wr_seq); /* load request */ (*load_code) = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_REQ, DRV_MSG_CODE_LOAD_REQ_WITH_LFA); /* if the MCP fails to respond we must abort */ if (!(*load_code)) { BLOGE(sc, "MCP response failure!\n"); return (-1); } /* if MCP refused then must abort */ if ((*load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED) { BLOGE(sc, "MCP refused load request\n"); return (-1); } return (0); } /* * Check whether another PF has already loaded FW to chip. In virtualized * environments a pf from anoth VM may have already initialized the device * including loading FW. */ static int bxe_nic_load_analyze_req(struct bxe_softc *sc, uint32_t load_code) { uint32_t my_fw, loaded_fw; /* is another pf loaded on this engine? */ if ((load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) && (load_code != FW_MSG_CODE_DRV_LOAD_COMMON)) { /* build my FW version dword */ my_fw = (BCM_5710_FW_MAJOR_VERSION + (BCM_5710_FW_MINOR_VERSION << 8 ) + (BCM_5710_FW_REVISION_VERSION << 16) + (BCM_5710_FW_ENGINEERING_VERSION << 24)); /* read loaded FW from chip */ loaded_fw = REG_RD(sc, XSEM_REG_PRAM); BLOGD(sc, DBG_LOAD, "loaded FW 0x%08x / my FW 0x%08x\n", loaded_fw, my_fw); /* abort nic load if version mismatch */ if (my_fw != loaded_fw) { BLOGE(sc, "FW 0x%08x already loaded (mine is 0x%08x)", loaded_fw, my_fw); return (-1); } } return (0); } /* mark PMF if applicable */ static void bxe_nic_load_pmf(struct bxe_softc *sc, uint32_t load_code) { uint32_t ncsi_oem_data_addr; if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) || (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) || (load_code == FW_MSG_CODE_DRV_LOAD_PORT)) { /* * Barrier here for ordering between the writing to sc->port.pmf here * and reading it from the periodic task. */ sc->port.pmf = 1; mb(); } else { sc->port.pmf = 0; } BLOGD(sc, DBG_LOAD, "pmf %d\n", sc->port.pmf); /* XXX needed? */ if (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) { if (SHMEM2_HAS(sc, ncsi_oem_data_addr)) { ncsi_oem_data_addr = SHMEM2_RD(sc, ncsi_oem_data_addr); if (ncsi_oem_data_addr) { REG_WR(sc, (ncsi_oem_data_addr + offsetof(struct glob_ncsi_oem_data, driver_version)), 0); } } } } static void bxe_read_mf_cfg(struct bxe_softc *sc) { int n = (CHIP_IS_MODE_4_PORT(sc) ? 2 : 1); int abs_func; int vn; if (BXE_NOMCP(sc)) { return; /* what should be the default bvalue in this case */ } /* * The formula for computing the absolute function number is... * For 2 port configuration (4 functions per port): * abs_func = 2 * vn + SC_PORT + SC_PATH * For 4 port configuration (2 functions per port): * abs_func = 4 * vn + 2 * SC_PORT + SC_PATH */ for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { abs_func = (n * (2 * vn + SC_PORT(sc)) + SC_PATH(sc)); if (abs_func >= E1H_FUNC_MAX) { break; } sc->devinfo.mf_info.mf_config[vn] = MFCFG_RD(sc, func_mf_config[abs_func].config); } if (sc->devinfo.mf_info.mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_DISABLED) { BLOGD(sc, DBG_LOAD, "mf_cfg function disabled\n"); sc->flags |= BXE_MF_FUNC_DIS; } else { BLOGD(sc, DBG_LOAD, "mf_cfg function enabled\n"); sc->flags &= ~BXE_MF_FUNC_DIS; } } /* acquire split MCP access lock register */ static int bxe_acquire_alr(struct bxe_softc *sc) { uint32_t j, val; for (j = 0; j < 1000; j++) { val = (1UL << 31); REG_WR(sc, GRCBASE_MCP + 0x9c, val); val = REG_RD(sc, GRCBASE_MCP + 0x9c); if (val & (1L << 31)) break; DELAY(5000); } if (!(val & (1L << 31))) { BLOGE(sc, "Cannot acquire MCP access lock register\n"); return (-1); } return (0); } /* release split MCP access lock register */ static void bxe_release_alr(struct bxe_softc *sc) { REG_WR(sc, GRCBASE_MCP + 0x9c, 0); } static void bxe_fan_failure(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t ext_phy_config; /* mark the failure */ ext_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config); ext_phy_config &= ~PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK; ext_phy_config |= PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE; SHMEM_WR(sc, dev_info.port_hw_config[port].external_phy_config, ext_phy_config); /* log the failure */ BLOGW(sc, "Fan Failure has caused the driver to shutdown " "the card to prevent permanent damage. " "Please contact OEM Support for assistance\n"); /* XXX */ #if 1 bxe_panic(sc, ("Schedule task to handle fan failure\n")); #else /* * Schedule device reset (unload) * This is due to some boards consuming sufficient power when driver is * up to overheat if fan fails. */ bxe_set_bit(BXE_SP_RTNL_FAN_FAILURE, &sc->sp_rtnl_state); schedule_delayed_work(&sc->sp_rtnl_task, 0); #endif } /* this function is called upon a link interrupt */ static void bxe_link_attn(struct bxe_softc *sc) { uint32_t pause_enabled = 0; struct host_port_stats *pstats; int cmng_fns; struct bxe_fastpath *fp; int i; /* Make sure that we are synced with the current statistics */ bxe_stats_handle(sc, STATS_EVENT_STOP); BLOGI(sc, "link_vars phy_flags : %x\n", sc->link_vars.phy_flags); elink_link_update(&sc->link_params, &sc->link_vars); if (sc->link_vars.link_up) { /* dropless flow control */ if (!CHIP_IS_E1(sc) && sc->dropless_fc) { pause_enabled = 0; if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) { pause_enabled = 1; } REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_ETH_PAUSE_ENABLED_OFFSET(SC_PORT(sc))), pause_enabled); } if (sc->link_vars.mac_type != ELINK_MAC_TYPE_EMAC) { pstats = BXE_SP(sc, port_stats); /* reset old mac stats */ memset(&(pstats->mac_stx[0]), 0, sizeof(struct mac_stx)); } if (sc->state == BXE_STATE_OPEN) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } /* Restart tx when the link comes back. */ FOR_EACH_ETH_QUEUE(sc, i) { fp = &sc->fp[i]; taskqueue_enqueue(fp->tq, &fp->tx_task); } } if (sc->link_vars.link_up && sc->link_vars.line_speed) { cmng_fns = bxe_get_cmng_fns_mode(sc); if (cmng_fns != CMNG_FNS_NONE) { bxe_cmng_fns_init(sc, FALSE, cmng_fns); storm_memset_cmng(sc, &sc->cmng, SC_PORT(sc)); } else { /* rate shaping and fairness are disabled */ BLOGD(sc, DBG_LOAD, "single function mode without fairness\n"); } } bxe_link_report_locked(sc); if (IS_MF(sc)) { ; // XXX bxe_link_sync_notify(sc); } } static void bxe_attn_int_asserted(struct bxe_softc *sc, uint32_t asserted) { int port = SC_PORT(sc); uint32_t aeu_addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 : MISC_REG_AEU_MASK_ATTN_FUNC_0; uint32_t nig_int_mask_addr = port ? NIG_REG_MASK_INTERRUPT_PORT1 : NIG_REG_MASK_INTERRUPT_PORT0; uint32_t aeu_mask; uint32_t nig_mask = 0; uint32_t reg_addr; uint32_t igu_acked; uint32_t cnt; if (sc->attn_state & asserted) { BLOGE(sc, "IGU ERROR attn=0x%08x\n", asserted); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); aeu_mask = REG_RD(sc, aeu_addr); BLOGD(sc, DBG_INTR, "aeu_mask 0x%08x newly asserted 0x%08x\n", aeu_mask, asserted); aeu_mask &= ~(asserted & 0x3ff); BLOGD(sc, DBG_INTR, "new mask 0x%08x\n", aeu_mask); REG_WR(sc, aeu_addr, aeu_mask); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); BLOGD(sc, DBG_INTR, "attn_state 0x%08x\n", sc->attn_state); sc->attn_state |= asserted; BLOGD(sc, DBG_INTR, "new state 0x%08x\n", sc->attn_state); if (asserted & ATTN_HARD_WIRED_MASK) { if (asserted & ATTN_NIG_FOR_FUNC) { bxe_acquire_phy_lock(sc); /* save nig interrupt mask */ nig_mask = REG_RD(sc, nig_int_mask_addr); /* If nig_mask is not set, no need to call the update function */ if (nig_mask) { REG_WR(sc, nig_int_mask_addr, 0); bxe_link_attn(sc); } /* handle unicore attn? */ } if (asserted & ATTN_SW_TIMER_4_FUNC) { BLOGD(sc, DBG_INTR, "ATTN_SW_TIMER_4_FUNC!\n"); } if (asserted & GPIO_2_FUNC) { BLOGD(sc, DBG_INTR, "GPIO_2_FUNC!\n"); } if (asserted & GPIO_3_FUNC) { BLOGD(sc, DBG_INTR, "GPIO_3_FUNC!\n"); } if (asserted & GPIO_4_FUNC) { BLOGD(sc, DBG_INTR, "GPIO_4_FUNC!\n"); } if (port == 0) { if (asserted & ATTN_GENERAL_ATTN_1) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_1!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_1, 0x0); } if (asserted & ATTN_GENERAL_ATTN_2) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_2!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_2, 0x0); } if (asserted & ATTN_GENERAL_ATTN_3) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_3!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_3, 0x0); } } else { if (asserted & ATTN_GENERAL_ATTN_4) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_4!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_4, 0x0); } if (asserted & ATTN_GENERAL_ATTN_5) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_5!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_5, 0x0); } if (asserted & ATTN_GENERAL_ATTN_6) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_6!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_6, 0x0); } } } /* hardwired */ if (sc->devinfo.int_block == INT_BLOCK_HC) { reg_addr = (HC_REG_COMMAND_REG + port*32 + COMMAND_REG_ATTN_BITS_SET); } else { reg_addr = (BAR_IGU_INTMEM + IGU_CMD_ATTN_BIT_SET_UPPER*8); } BLOGD(sc, DBG_INTR, "about to mask 0x%08x at %s addr 0x%08x\n", asserted, (sc->devinfo.int_block == INT_BLOCK_HC) ? "HC" : "IGU", reg_addr); REG_WR(sc, reg_addr, asserted); /* now set back the mask */ if (asserted & ATTN_NIG_FOR_FUNC) { /* * Verify that IGU ack through BAR was written before restoring * NIG mask. This loop should exit after 2-3 iterations max. */ if (sc->devinfo.int_block != INT_BLOCK_HC) { cnt = 0; do { igu_acked = REG_RD(sc, IGU_REG_ATTENTION_ACK_BITS); } while (((igu_acked & ATTN_NIG_FOR_FUNC) == 0) && (++cnt < MAX_IGU_ATTN_ACK_TO)); if (!igu_acked) { BLOGE(sc, "Failed to verify IGU ack on time\n"); } mb(); } REG_WR(sc, nig_int_mask_addr, nig_mask); bxe_release_phy_lock(sc); } } static void bxe_print_next_block(struct bxe_softc *sc, int idx, const char *blk) { BLOGI(sc, "%s%s", idx ? ", " : "", blk); } static int bxe_check_blocks_with_parity0(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_BRB_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "BRB"); break; case AEU_INPUTS_ATTN_BITS_PARSER_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PARSER"); break; case AEU_INPUTS_ATTN_BITS_TSDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TSDM"); break; case AEU_INPUTS_ATTN_BITS_SEARCHER_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "SEARCHER"); break; case AEU_INPUTS_ATTN_BITS_TCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TCM"); break; case AEU_INPUTS_ATTN_BITS_TSEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TSEMI"); break; case AEU_INPUTS_ATTN_BITS_PBCLIENT_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XPB"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity1(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t *global, uint8_t print) { int i = 0; uint32_t cur_bit = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_PBF_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PBF"); break; case AEU_INPUTS_ATTN_BITS_QM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "QM"); break; case AEU_INPUTS_ATTN_BITS_TIMERS_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TM"); break; case AEU_INPUTS_ATTN_BITS_XSDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XSDM"); break; case AEU_INPUTS_ATTN_BITS_XCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XCM"); break; case AEU_INPUTS_ATTN_BITS_XSEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XSEMI"); break; case AEU_INPUTS_ATTN_BITS_DOORBELLQ_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "DOORBELLQ"); break; case AEU_INPUTS_ATTN_BITS_NIG_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "NIG"); break; case AEU_INPUTS_ATTN_BITS_VAUX_PCI_CORE_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "VAUX PCI CORE"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_DEBUG_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "DEBUG"); break; case AEU_INPUTS_ATTN_BITS_USDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "USDM"); break; case AEU_INPUTS_ATTN_BITS_UCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "UCM"); break; case AEU_INPUTS_ATTN_BITS_USEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "USEMI"); break; case AEU_INPUTS_ATTN_BITS_UPB_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "UPB"); break; case AEU_INPUTS_ATTN_BITS_CSDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CSDM"); break; case AEU_INPUTS_ATTN_BITS_CCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CCM"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity2(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_CSEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CSEMI"); break; case AEU_INPUTS_ATTN_BITS_PXP_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PXP"); break; case AEU_IN_ATTN_BITS_PXPPCICLOCKCLIENT_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PXPPCICLOCKCLIENT"); break; case AEU_INPUTS_ATTN_BITS_CFC_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CFC"); break; case AEU_INPUTS_ATTN_BITS_CDU_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CDU"); break; case AEU_INPUTS_ATTN_BITS_DMAE_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "DMAE"); break; case AEU_INPUTS_ATTN_BITS_IGU_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "IGU"); break; case AEU_INPUTS_ATTN_BITS_MISC_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "MISC"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity3(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t *global, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP ROM"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP UMP RX"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP UMP TX"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP SCPAD"); *global = TRUE; break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity4(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PGLUE_B"); break; case AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "ATC"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static uint8_t bxe_parity_attn(struct bxe_softc *sc, uint8_t *global, uint8_t print, uint32_t *sig) { int par_num = 0; if ((sig[0] & HW_PRTY_ASSERT_SET_0) || (sig[1] & HW_PRTY_ASSERT_SET_1) || (sig[2] & HW_PRTY_ASSERT_SET_2) || (sig[3] & HW_PRTY_ASSERT_SET_3) || (sig[4] & HW_PRTY_ASSERT_SET_4)) { BLOGE(sc, "Parity error: HW block parity attention:\n" "[0]:0x%08x [1]:0x%08x [2]:0x%08x [3]:0x%08x [4]:0x%08x\n", (uint32_t)(sig[0] & HW_PRTY_ASSERT_SET_0), (uint32_t)(sig[1] & HW_PRTY_ASSERT_SET_1), (uint32_t)(sig[2] & HW_PRTY_ASSERT_SET_2), (uint32_t)(sig[3] & HW_PRTY_ASSERT_SET_3), (uint32_t)(sig[4] & HW_PRTY_ASSERT_SET_4)); if (print) BLOGI(sc, "Parity errors detected in blocks: "); par_num = bxe_check_blocks_with_parity0(sc, sig[0] & HW_PRTY_ASSERT_SET_0, par_num, print); par_num = bxe_check_blocks_with_parity1(sc, sig[1] & HW_PRTY_ASSERT_SET_1, par_num, global, print); par_num = bxe_check_blocks_with_parity2(sc, sig[2] & HW_PRTY_ASSERT_SET_2, par_num, print); par_num = bxe_check_blocks_with_parity3(sc, sig[3] & HW_PRTY_ASSERT_SET_3, par_num, global, print); par_num = bxe_check_blocks_with_parity4(sc, sig[4] & HW_PRTY_ASSERT_SET_4, par_num, print); if (print) BLOGI(sc, "\n"); return (TRUE); } return (FALSE); } static uint8_t bxe_chk_parity_attn(struct bxe_softc *sc, uint8_t *global, uint8_t print) { struct attn_route attn = { {0} }; int port = SC_PORT(sc); attn.sig[0] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + port*4); attn.sig[1] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_2_FUNC_0 + port*4); attn.sig[2] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_3_FUNC_0 + port*4); attn.sig[3] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_4_FUNC_0 + port*4); /* * Since MCP attentions can't be disabled inside the block, we need to * read AEU registers to see whether they're currently disabled */ attn.sig[3] &= ((REG_RD(sc, (!port ? MISC_REG_AEU_ENABLE4_FUNC_0_OUT_0 : MISC_REG_AEU_ENABLE4_FUNC_1_OUT_0)) & MISC_AEU_ENABLE_MCP_PRTY_BITS) | ~MISC_AEU_ENABLE_MCP_PRTY_BITS); if (!CHIP_IS_E1x(sc)) attn.sig[4] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_5_FUNC_0 + port*4); return (bxe_parity_attn(sc, global, print, attn.sig)); } static void bxe_attn_int_deasserted4(struct bxe_softc *sc, uint32_t attn) { uint32_t val; if (attn & AEU_INPUTS_ATTN_BITS_PGLUE_HW_INTERRUPT) { val = REG_RD(sc, PGLUE_B_REG_PGLUE_B_INT_STS_CLR); BLOGE(sc, "PGLUE hw attention 0x%08x\n", val); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_ADDRESS_ERROR) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_ADDRESS_ERROR\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_INCORRECT_RCV_BEHAVIOR) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_INCORRECT_RCV_BEHAVIOR\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_LENGTH_VIOLATION_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_VF_LENGTH_VIOLATION_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_GRC_SPACE_VIOLATION_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_VF_GRC_SPACE_VIOLATION_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_MSIX_BAR_VIOLATION_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_VF_MSIX_BAR_VIOLATION_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_ERROR_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_ERROR_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_IN_TWO_RCBS_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_IN_TWO_RCBS_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_CSSNOOP_FIFO_OVERFLOW) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_CSSNOOP_FIFO_OVERFLOW\n"); } if (attn & AEU_INPUTS_ATTN_BITS_ATC_HW_INTERRUPT) { val = REG_RD(sc, ATC_REG_ATC_INT_STS_CLR); BLOGE(sc, "ATC hw attention 0x%08x\n", val); if (val & ATC_ATC_INT_STS_REG_ADDRESS_ERROR) BLOGE(sc, "ATC_ATC_INT_STS_REG_ADDRESS_ERROR\n"); if (val & ATC_ATC_INT_STS_REG_ATC_TCPL_TO_NOT_PEND) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_TCPL_TO_NOT_PEND\n"); if (val & ATC_ATC_INT_STS_REG_ATC_GPA_MULTIPLE_HITS) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_GPA_MULTIPLE_HITS\n"); if (val & ATC_ATC_INT_STS_REG_ATC_RCPL_TO_EMPTY_CNT) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_RCPL_TO_EMPTY_CNT\n"); if (val & ATC_ATC_INT_STS_REG_ATC_TCPL_ERROR) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_TCPL_ERROR\n"); if (val & ATC_ATC_INT_STS_REG_ATC_IREQ_LESS_THAN_STU) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_IREQ_LESS_THAN_STU\n"); } if (attn & (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR | AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR)) { BLOGE(sc, "FATAL parity attention set4 0x%08x\n", (uint32_t)(attn & (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR | AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR))); } } static void bxe_e1h_disable(struct bxe_softc *sc) { int port = SC_PORT(sc); bxe_tx_disable(sc); REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 0); } static void bxe_e1h_enable(struct bxe_softc *sc) { int port = SC_PORT(sc); REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 1); // XXX bxe_tx_enable(sc); } /* * called due to MCP event (on pmf): * reread new bandwidth configuration * configure FW * notify others function about the change */ static void bxe_config_mf_bw(struct bxe_softc *sc) { if (sc->link_vars.link_up) { bxe_cmng_fns_init(sc, TRUE, CMNG_FNS_MINMAX); // XXX bxe_link_sync_notify(sc); } storm_memset_cmng(sc, &sc->cmng, SC_PORT(sc)); } static void bxe_set_mf_bw(struct bxe_softc *sc) { bxe_config_mf_bw(sc); bxe_fw_command(sc, DRV_MSG_CODE_SET_MF_BW_ACK, 0); } static void bxe_handle_eee_event(struct bxe_softc *sc) { BLOGD(sc, DBG_INTR, "EEE - LLDP event\n"); bxe_fw_command(sc, DRV_MSG_CODE_EEE_RESULTS_ACK, 0); } #define DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED 3 static void bxe_drv_info_ether_stat(struct bxe_softc *sc) { struct eth_stats_info *ether_stat = &sc->sp->drv_info_to_mcp.ether_stat; strlcpy(ether_stat->version, BXE_DRIVER_VERSION, ETH_STAT_INFO_VERSION_LEN); /* XXX (+ MAC_PAD) taken from other driver... verify this is right */ sc->sp_objs[0].mac_obj.get_n_elements(sc, &sc->sp_objs[0].mac_obj, DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED, ether_stat->mac_local + MAC_PAD, MAC_PAD, ETH_ALEN); ether_stat->mtu_size = sc->mtu; ether_stat->feature_flags |= FEATURE_ETH_CHKSUM_OFFLOAD_MASK; if (if_getcapenable(sc->ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) { ether_stat->feature_flags |= FEATURE_ETH_LSO_MASK; } // XXX ether_stat->feature_flags |= ???; ether_stat->promiscuous_mode = 0; // (flags & PROMISC) ? 1 : 0; ether_stat->txq_size = sc->tx_ring_size; ether_stat->rxq_size = sc->rx_ring_size; } static void bxe_handle_drv_info_req(struct bxe_softc *sc) { enum drv_info_opcode op_code; uint32_t drv_info_ctl = SHMEM2_RD(sc, drv_info_control); /* if drv_info version supported by MFW doesn't match - send NACK */ if ((drv_info_ctl & DRV_INFO_CONTROL_VER_MASK) != DRV_INFO_CUR_VER) { bxe_fw_command(sc, DRV_MSG_CODE_DRV_INFO_NACK, 0); return; } op_code = ((drv_info_ctl & DRV_INFO_CONTROL_OP_CODE_MASK) >> DRV_INFO_CONTROL_OP_CODE_SHIFT); memset(&sc->sp->drv_info_to_mcp, 0, sizeof(union drv_info_to_mcp)); switch (op_code) { case ETH_STATS_OPCODE: bxe_drv_info_ether_stat(sc); break; case FCOE_STATS_OPCODE: case ISCSI_STATS_OPCODE: default: /* if op code isn't supported - send NACK */ bxe_fw_command(sc, DRV_MSG_CODE_DRV_INFO_NACK, 0); return; } /* * If we got drv_info attn from MFW then these fields are defined in * shmem2 for sure */ SHMEM2_WR(sc, drv_info_host_addr_lo, U64_LO(BXE_SP_MAPPING(sc, drv_info_to_mcp))); SHMEM2_WR(sc, drv_info_host_addr_hi, U64_HI(BXE_SP_MAPPING(sc, drv_info_to_mcp))); bxe_fw_command(sc, DRV_MSG_CODE_DRV_INFO_ACK, 0); } static void bxe_dcc_event(struct bxe_softc *sc, uint32_t dcc_event) { BLOGD(sc, DBG_INTR, "dcc_event 0x%08x\n", dcc_event); if (dcc_event & DRV_STATUS_DCC_DISABLE_ENABLE_PF) { /* * This is the only place besides the function initialization * where the sc->flags can change so it is done without any * locks */ if (sc->devinfo.mf_info.mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_DISABLED) { BLOGD(sc, DBG_INTR, "mf_cfg function disabled\n"); sc->flags |= BXE_MF_FUNC_DIS; bxe_e1h_disable(sc); } else { BLOGD(sc, DBG_INTR, "mf_cfg function enabled\n"); sc->flags &= ~BXE_MF_FUNC_DIS; bxe_e1h_enable(sc); } dcc_event &= ~DRV_STATUS_DCC_DISABLE_ENABLE_PF; } if (dcc_event & DRV_STATUS_DCC_BANDWIDTH_ALLOCATION) { bxe_config_mf_bw(sc); dcc_event &= ~DRV_STATUS_DCC_BANDWIDTH_ALLOCATION; } /* Report results to MCP */ if (dcc_event) bxe_fw_command(sc, DRV_MSG_CODE_DCC_FAILURE, 0); else bxe_fw_command(sc, DRV_MSG_CODE_DCC_OK, 0); } static void bxe_pmf_update(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t val; sc->port.pmf = 1; BLOGD(sc, DBG_INTR, "pmf %d\n", sc->port.pmf); /* * We need the mb() to ensure the ordering between the writing to * sc->port.pmf here and reading it from the bxe_periodic_task(). */ mb(); /* queue a periodic task */ // XXX schedule task... // XXX bxe_dcbx_pmf_update(sc); /* enable nig attention */ val = (0xff0f | (1 << (SC_VN(sc) + 4))); if (sc->devinfo.int_block == INT_BLOCK_HC) { REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, val); REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, val); } else if (!CHIP_IS_E1x(sc)) { REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, val); REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, val); } bxe_stats_handle(sc, STATS_EVENT_PMF); } static int bxe_mc_assert(struct bxe_softc *sc) { char last_idx; int i, rc = 0; uint32_t row0, row1, row2, row3; /* XSTORM */ last_idx = REG_RD8(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) BLOGE(sc, "XSTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "XSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } /* TSTORM */ last_idx = REG_RD8(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) { BLOGE(sc, "TSTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); } /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "TSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } /* CSTORM */ last_idx = REG_RD8(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) { BLOGE(sc, "CSTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); } /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "CSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } /* USTORM */ last_idx = REG_RD8(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) { BLOGE(sc, "USTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); } /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "USTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } return (rc); } static void bxe_attn_int_deasserted3(struct bxe_softc *sc, uint32_t attn) { int func = SC_FUNC(sc); uint32_t val; if (attn & EVEREST_GEN_ATTN_IN_USE_MASK) { if (attn & BXE_PMF_LINK_ASSERT(sc)) { REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0); bxe_read_mf_cfg(sc); sc->devinfo.mf_info.mf_config[SC_VN(sc)] = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].config); val = SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_status); if (val & DRV_STATUS_DCC_EVENT_MASK) bxe_dcc_event(sc, (val & DRV_STATUS_DCC_EVENT_MASK)); if (val & DRV_STATUS_SET_MF_BW) bxe_set_mf_bw(sc); if (val & DRV_STATUS_DRV_INFO_REQ) bxe_handle_drv_info_req(sc); if ((sc->port.pmf == 0) && (val & DRV_STATUS_PMF)) bxe_pmf_update(sc); if (val & DRV_STATUS_EEE_NEGOTIATION_RESULTS) bxe_handle_eee_event(sc); if (sc->link_vars.periodic_flags & ELINK_PERIODIC_FLAGS_LINK_EVENT) { /* sync with link */ bxe_acquire_phy_lock(sc); sc->link_vars.periodic_flags &= ~ELINK_PERIODIC_FLAGS_LINK_EVENT; bxe_release_phy_lock(sc); if (IS_MF(sc)) ; // XXX bxe_link_sync_notify(sc); bxe_link_report(sc); } /* * Always call it here: bxe_link_report() will * prevent the link indication duplication. */ bxe_link_status_update(sc); } else if (attn & BXE_MC_ASSERT_BITS) { BLOGE(sc, "MC assert!\n"); bxe_mc_assert(sc); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_10, 0); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_9, 0); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_8, 0); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_7, 0); bxe_panic(sc, ("MC assert!\n")); } else if (attn & BXE_MCP_ASSERT) { BLOGE(sc, "MCP assert!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_11, 0); // XXX bxe_fw_dump(sc); } else { BLOGE(sc, "Unknown HW assert! (attn 0x%08x)\n", attn); } } if (attn & EVEREST_LATCHED_ATTN_IN_USE_MASK) { BLOGE(sc, "LATCHED attention 0x%08x (masked)\n", attn); if (attn & BXE_GRC_TIMEOUT) { val = CHIP_IS_E1(sc) ? 0 : REG_RD(sc, MISC_REG_GRC_TIMEOUT_ATTN); BLOGE(sc, "GRC time-out 0x%08x\n", val); } if (attn & BXE_GRC_RSV) { val = CHIP_IS_E1(sc) ? 0 : REG_RD(sc, MISC_REG_GRC_RSV_ATTN); BLOGE(sc, "GRC reserved 0x%08x\n", val); } REG_WR(sc, MISC_REG_AEU_CLR_LATCH_SIGNAL, 0x7ff); } } static void bxe_attn_int_deasserted2(struct bxe_softc *sc, uint32_t attn) { int port = SC_PORT(sc); int reg_offset; uint32_t val0, mask0, val1, mask1; uint32_t val; if (attn & AEU_INPUTS_ATTN_BITS_CFC_HW_INTERRUPT) { val = REG_RD(sc, CFC_REG_CFC_INT_STS_CLR); BLOGE(sc, "CFC hw attention 0x%08x\n", val); /* CFC error attention */ if (val & 0x2) { BLOGE(sc, "FATAL error from CFC\n"); } } if (attn & AEU_INPUTS_ATTN_BITS_PXP_HW_INTERRUPT) { val = REG_RD(sc, PXP_REG_PXP_INT_STS_CLR_0); BLOGE(sc, "PXP hw attention-0 0x%08x\n", val); /* RQ_USDMDP_FIFO_OVERFLOW */ if (val & 0x18000) { BLOGE(sc, "FATAL error from PXP\n"); } if (!CHIP_IS_E1x(sc)) { val = REG_RD(sc, PXP_REG_PXP_INT_STS_CLR_1); BLOGE(sc, "PXP hw attention-1 0x%08x\n", val); } } #define PXP2_EOP_ERROR_BIT PXP2_PXP2_INT_STS_CLR_0_REG_WR_PGLUE_EOP_ERROR #define AEU_PXP2_HW_INT_BIT AEU_INPUTS_ATTN_BITS_PXPPCICLOCKCLIENT_HW_INTERRUPT if (attn & AEU_PXP2_HW_INT_BIT) { /* CQ47854 workaround do not panic on * PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR */ if (!CHIP_IS_E1x(sc)) { mask0 = REG_RD(sc, PXP2_REG_PXP2_INT_MASK_0); val1 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_1); mask1 = REG_RD(sc, PXP2_REG_PXP2_INT_MASK_1); val0 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_0); /* * If the only PXP2_EOP_ERROR_BIT is set in * STS0 and STS1 - clear it * * probably we lose additional attentions between * STS0 and STS_CLR0, in this case user will not * be notified about them */ if (val0 & mask0 & PXP2_EOP_ERROR_BIT && !(val1 & mask1)) val0 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_CLR_0); /* print the register, since no one can restore it */ BLOGE(sc, "PXP2_REG_PXP2_INT_STS_CLR_0 0x%08x\n", val0); /* * if PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR * then notify */ if (val0 & PXP2_EOP_ERROR_BIT) { BLOGE(sc, "PXP2_WR_PGLUE_EOP_ERROR\n"); /* * if only PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR is * set then clear attention from PXP2 block without panic */ if (((val0 & mask0) == PXP2_EOP_ERROR_BIT) && ((val1 & mask1) == 0)) attn &= ~AEU_PXP2_HW_INT_BIT; } } } if (attn & HW_INTERRUT_ASSERT_SET_2) { reg_offset = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_2 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_2); val = REG_RD(sc, reg_offset); val &= ~(attn & HW_INTERRUT_ASSERT_SET_2); REG_WR(sc, reg_offset, val); BLOGE(sc, "FATAL HW block attention set2 0x%x\n", (uint32_t)(attn & HW_INTERRUT_ASSERT_SET_2)); bxe_panic(sc, ("HW block attention set2\n")); } } static void bxe_attn_int_deasserted1(struct bxe_softc *sc, uint32_t attn) { int port = SC_PORT(sc); int reg_offset; uint32_t val; if (attn & AEU_INPUTS_ATTN_BITS_DOORBELLQ_HW_INTERRUPT) { val = REG_RD(sc, DORQ_REG_DORQ_INT_STS_CLR); BLOGE(sc, "DB hw attention 0x%08x\n", val); /* DORQ discard attention */ if (val & 0x2) { BLOGE(sc, "FATAL error from DORQ\n"); } } if (attn & HW_INTERRUT_ASSERT_SET_1) { reg_offset = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_1 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_1); val = REG_RD(sc, reg_offset); val &= ~(attn & HW_INTERRUT_ASSERT_SET_1); REG_WR(sc, reg_offset, val); BLOGE(sc, "FATAL HW block attention set1 0x%08x\n", (uint32_t)(attn & HW_INTERRUT_ASSERT_SET_1)); bxe_panic(sc, ("HW block attention set1\n")); } } static void bxe_attn_int_deasserted0(struct bxe_softc *sc, uint32_t attn) { int port = SC_PORT(sc); int reg_offset; uint32_t val; reg_offset = (port) ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0; if (attn & AEU_INPUTS_ATTN_BITS_SPIO5) { val = REG_RD(sc, reg_offset); val &= ~AEU_INPUTS_ATTN_BITS_SPIO5; REG_WR(sc, reg_offset, val); BLOGW(sc, "SPIO5 hw attention\n"); /* Fan failure attention */ elink_hw_reset_phy(&sc->link_params); bxe_fan_failure(sc); } if ((attn & sc->link_vars.aeu_int_mask) && sc->port.pmf) { bxe_acquire_phy_lock(sc); elink_handle_module_detect_int(&sc->link_params); bxe_release_phy_lock(sc); } if (attn & HW_INTERRUT_ASSERT_SET_0) { val = REG_RD(sc, reg_offset); val &= ~(attn & HW_INTERRUT_ASSERT_SET_0); REG_WR(sc, reg_offset, val); bxe_panic(sc, ("FATAL HW block attention set0 0x%lx\n", (attn & HW_INTERRUT_ASSERT_SET_0))); } } static void bxe_attn_int_deasserted(struct bxe_softc *sc, uint32_t deasserted) { struct attn_route attn; struct attn_route *group_mask; int port = SC_PORT(sc); int index; uint32_t reg_addr; uint32_t val; uint32_t aeu_mask; uint8_t global = FALSE; /* * Need to take HW lock because MCP or other port might also * try to handle this event. */ bxe_acquire_alr(sc); if (bxe_chk_parity_attn(sc, &global, TRUE)) { /* XXX * In case of parity errors don't handle attentions so that * other function would "see" parity errors. */ sc->recovery_state = BXE_RECOVERY_INIT; // XXX schedule a recovery task... /* disable HW interrupts */ bxe_int_disable(sc); bxe_release_alr(sc); return; } attn.sig[0] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + port*4); attn.sig[1] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_2_FUNC_0 + port*4); attn.sig[2] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_3_FUNC_0 + port*4); attn.sig[3] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_4_FUNC_0 + port*4); if (!CHIP_IS_E1x(sc)) { attn.sig[4] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_5_FUNC_0 + port*4); } else { attn.sig[4] = 0; } BLOGD(sc, DBG_INTR, "attn: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", attn.sig[0], attn.sig[1], attn.sig[2], attn.sig[3], attn.sig[4]); for (index = 0; index < MAX_DYNAMIC_ATTN_GRPS; index++) { if (deasserted & (1 << index)) { group_mask = &sc->attn_group[index]; BLOGD(sc, DBG_INTR, "group[%d]: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", index, group_mask->sig[0], group_mask->sig[1], group_mask->sig[2], group_mask->sig[3], group_mask->sig[4]); bxe_attn_int_deasserted4(sc, attn.sig[4] & group_mask->sig[4]); bxe_attn_int_deasserted3(sc, attn.sig[3] & group_mask->sig[3]); bxe_attn_int_deasserted1(sc, attn.sig[1] & group_mask->sig[1]); bxe_attn_int_deasserted2(sc, attn.sig[2] & group_mask->sig[2]); bxe_attn_int_deasserted0(sc, attn.sig[0] & group_mask->sig[0]); } } bxe_release_alr(sc); if (sc->devinfo.int_block == INT_BLOCK_HC) { reg_addr = (HC_REG_COMMAND_REG + port*32 + COMMAND_REG_ATTN_BITS_CLR); } else { reg_addr = (BAR_IGU_INTMEM + IGU_CMD_ATTN_BIT_CLR_UPPER*8); } val = ~deasserted; BLOGD(sc, DBG_INTR, "about to mask 0x%08x at %s addr 0x%08x\n", val, (sc->devinfo.int_block == INT_BLOCK_HC) ? "HC" : "IGU", reg_addr); REG_WR(sc, reg_addr, val); if (~sc->attn_state & deasserted) { BLOGE(sc, "IGU error\n"); } reg_addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 : MISC_REG_AEU_MASK_ATTN_FUNC_0; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); aeu_mask = REG_RD(sc, reg_addr); BLOGD(sc, DBG_INTR, "aeu_mask 0x%08x newly deasserted 0x%08x\n", aeu_mask, deasserted); aeu_mask |= (deasserted & 0x3ff); BLOGD(sc, DBG_INTR, "new mask 0x%08x\n", aeu_mask); REG_WR(sc, reg_addr, aeu_mask); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); BLOGD(sc, DBG_INTR, "attn_state 0x%08x\n", sc->attn_state); sc->attn_state &= ~deasserted; BLOGD(sc, DBG_INTR, "new state 0x%08x\n", sc->attn_state); } static void bxe_attn_int(struct bxe_softc *sc) { /* read local copy of bits */ uint32_t attn_bits = le32toh(sc->def_sb->atten_status_block.attn_bits); uint32_t attn_ack = le32toh(sc->def_sb->atten_status_block.attn_bits_ack); uint32_t attn_state = sc->attn_state; /* look for changed bits */ uint32_t asserted = attn_bits & ~attn_ack & ~attn_state; uint32_t deasserted = ~attn_bits & attn_ack & attn_state; BLOGD(sc, DBG_INTR, "attn_bits 0x%08x attn_ack 0x%08x asserted 0x%08x deasserted 0x%08x\n", attn_bits, attn_ack, asserted, deasserted); if (~(attn_bits ^ attn_ack) & (attn_bits ^ attn_state)) { BLOGE(sc, "BAD attention state\n"); } /* handle bits that were raised */ if (asserted) { bxe_attn_int_asserted(sc, asserted); } if (deasserted) { bxe_attn_int_deasserted(sc, deasserted); } } static uint16_t bxe_update_dsb_idx(struct bxe_softc *sc) { struct host_sp_status_block *def_sb = sc->def_sb; uint16_t rc = 0; mb(); /* status block is written to by the chip */ if (sc->def_att_idx != def_sb->atten_status_block.attn_bits_index) { sc->def_att_idx = def_sb->atten_status_block.attn_bits_index; rc |= BXE_DEF_SB_ATT_IDX; } if (sc->def_idx != def_sb->sp_sb.running_index) { sc->def_idx = def_sb->sp_sb.running_index; rc |= BXE_DEF_SB_IDX; } mb(); return (rc); } static inline struct ecore_queue_sp_obj * bxe_cid_to_q_obj(struct bxe_softc *sc, uint32_t cid) { BLOGD(sc, DBG_SP, "retrieving fp from cid %d\n", cid); return (&sc->sp_objs[CID_TO_FP(cid, sc)].q_obj); } static void bxe_handle_mcast_eqe(struct bxe_softc *sc) { struct ecore_mcast_ramrod_params rparam; int rc; memset(&rparam, 0, sizeof(rparam)); rparam.mcast_obj = &sc->mcast_obj; BXE_MCAST_LOCK(sc); /* clear pending state for the last command */ sc->mcast_obj.raw.clear_pending(&sc->mcast_obj.raw); /* if there are pending mcast commands - send them */ if (sc->mcast_obj.check_pending(&sc->mcast_obj)) { rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT); if (rc < 0) { BLOGD(sc, DBG_SP, "ERROR: Failed to send pending mcast commands (%d)\n", rc); } } BXE_MCAST_UNLOCK(sc); } static void bxe_handle_classification_eqe(struct bxe_softc *sc, union event_ring_elem *elem) { unsigned long ramrod_flags = 0; int rc = 0; uint32_t cid = elem->message.data.eth_event.echo & BXE_SWCID_MASK; struct ecore_vlan_mac_obj *vlan_mac_obj; /* always push next commands out, don't wait here */ bit_set(&ramrod_flags, RAMROD_CONT); switch (le32toh(elem->message.data.eth_event.echo) >> BXE_SWCID_SHIFT) { case ECORE_FILTER_MAC_PENDING: BLOGD(sc, DBG_SP, "Got SETUP_MAC completions\n"); vlan_mac_obj = &sc->sp_objs[cid].mac_obj; break; case ECORE_FILTER_MCAST_PENDING: BLOGD(sc, DBG_SP, "Got SETUP_MCAST completions\n"); /* * This is only relevant for 57710 where multicast MACs are * configured as unicast MACs using the same ramrod. */ bxe_handle_mcast_eqe(sc); return; default: BLOGE(sc, "Unsupported classification command: %d\n", elem->message.data.eth_event.echo); return; } rc = vlan_mac_obj->complete(sc, vlan_mac_obj, elem, &ramrod_flags); if (rc < 0) { BLOGE(sc, "Failed to schedule new commands (%d)\n", rc); } else if (rc > 0) { BLOGD(sc, DBG_SP, "Scheduled next pending commands...\n"); } } static void bxe_handle_rx_mode_eqe(struct bxe_softc *sc, union event_ring_elem *elem) { bxe_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state); /* send rx_mode command again if was requested */ if (bxe_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) { bxe_set_storm_rx_mode(sc); } } static void bxe_update_eq_prod(struct bxe_softc *sc, uint16_t prod) { storm_memset_eq_prod(sc, prod, SC_FUNC(sc)); wmb(); /* keep prod updates ordered */ } static void bxe_eq_int(struct bxe_softc *sc) { uint16_t hw_cons, sw_cons, sw_prod; union event_ring_elem *elem; uint8_t echo; uint32_t cid; uint8_t opcode; int spqe_cnt = 0; struct ecore_queue_sp_obj *q_obj; struct ecore_func_sp_obj *f_obj = &sc->func_obj; struct ecore_raw_obj *rss_raw = &sc->rss_conf_obj.raw; hw_cons = le16toh(*sc->eq_cons_sb); /* * The hw_cons range is 1-255, 257 - the sw_cons range is 0-254, 256. * when we get to the next-page we need to adjust so the loop * condition below will be met. The next element is the size of a * regular element and hence incrementing by 1 */ if ((hw_cons & EQ_DESC_MAX_PAGE) == EQ_DESC_MAX_PAGE) { hw_cons++; } /* * This function may never run in parallel with itself for a * specific sc and no need for a read memory barrier here. */ sw_cons = sc->eq_cons; sw_prod = sc->eq_prod; BLOGD(sc, DBG_SP,"EQ: hw_cons=%u sw_cons=%u eq_spq_left=0x%lx\n", hw_cons, sw_cons, atomic_load_acq_long(&sc->eq_spq_left)); for (; sw_cons != hw_cons; sw_prod = NEXT_EQ_IDX(sw_prod), sw_cons = NEXT_EQ_IDX(sw_cons)) { elem = &sc->eq[EQ_DESC(sw_cons)]; /* elem CID originates from FW, actually LE */ cid = SW_CID(elem->message.data.cfc_del_event.cid); opcode = elem->message.opcode; /* handle eq element */ switch (opcode) { case EVENT_RING_OPCODE_STAT_QUERY: BLOGD(sc, DBG_SP, "got statistics completion event %d\n", sc->stats_comp++); /* nothing to do with stats comp */ goto next_spqe; case EVENT_RING_OPCODE_CFC_DEL: /* handle according to cid range */ /* we may want to verify here that the sc state is HALTING */ BLOGD(sc, DBG_SP, "got delete ramrod for MULTI[%d]\n", cid); q_obj = bxe_cid_to_q_obj(sc, cid); if (q_obj->complete_cmd(sc, q_obj, ECORE_Q_CMD_CFC_DEL)) { break; } goto next_spqe; case EVENT_RING_OPCODE_STOP_TRAFFIC: BLOGD(sc, DBG_SP, "got STOP TRAFFIC\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_TX_STOP)) { break; } // XXX bxe_dcbx_set_params(sc, BXE_DCBX_STATE_TX_PAUSED); goto next_spqe; case EVENT_RING_OPCODE_START_TRAFFIC: BLOGD(sc, DBG_SP, "got START TRAFFIC\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_TX_START)) { break; } // XXX bxe_dcbx_set_params(sc, BXE_DCBX_STATE_TX_RELEASED); goto next_spqe; case EVENT_RING_OPCODE_FUNCTION_UPDATE: echo = elem->message.data.function_update_event.echo; if (echo == SWITCH_UPDATE) { BLOGD(sc, DBG_SP, "got FUNC_SWITCH_UPDATE ramrod\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_SWITCH_UPDATE)) { break; } } else { BLOGD(sc, DBG_SP, "AFEX: ramrod completed FUNCTION_UPDATE\n"); } goto next_spqe; case EVENT_RING_OPCODE_FORWARD_SETUP: q_obj = &bxe_fwd_sp_obj(sc, q_obj); if (q_obj->complete_cmd(sc, q_obj, ECORE_Q_CMD_SETUP_TX_ONLY)) { break; } goto next_spqe; case EVENT_RING_OPCODE_FUNCTION_START: BLOGD(sc, DBG_SP, "got FUNC_START ramrod\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_START)) { break; } goto next_spqe; case EVENT_RING_OPCODE_FUNCTION_STOP: BLOGD(sc, DBG_SP, "got FUNC_STOP ramrod\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_STOP)) { break; } goto next_spqe; } switch (opcode | sc->state) { case (EVENT_RING_OPCODE_RSS_UPDATE_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_RSS_UPDATE_RULES | BXE_STATE_OPENING_WAITING_PORT): cid = elem->message.data.eth_event.echo & BXE_SWCID_MASK; BLOGD(sc, DBG_SP, "got RSS_UPDATE ramrod. CID %d\n", cid); rss_raw->clear_pending(rss_raw); break; case (EVENT_RING_OPCODE_SET_MAC | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_SET_MAC | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_SET_MAC | BXE_STATE_CLOSING_WAITING_HALT): case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BXE_STATE_CLOSING_WAITING_HALT): BLOGD(sc, DBG_SP, "got (un)set mac ramrod\n"); bxe_handle_classification_eqe(sc, elem); break; case (EVENT_RING_OPCODE_MULTICAST_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_MULTICAST_RULES | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_MULTICAST_RULES | BXE_STATE_CLOSING_WAITING_HALT): BLOGD(sc, DBG_SP, "got mcast ramrod\n"); bxe_handle_mcast_eqe(sc); break; case (EVENT_RING_OPCODE_FILTERS_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_FILTERS_RULES | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_FILTERS_RULES | BXE_STATE_CLOSING_WAITING_HALT): BLOGD(sc, DBG_SP, "got rx_mode ramrod\n"); bxe_handle_rx_mode_eqe(sc, elem); break; default: /* unknown event log error and continue */ BLOGE(sc, "Unknown EQ event %d, sc->state 0x%x\n", elem->message.opcode, sc->state); } next_spqe: spqe_cnt++; } /* for */ mb(); atomic_add_acq_long(&sc->eq_spq_left, spqe_cnt); sc->eq_cons = sw_cons; sc->eq_prod = sw_prod; /* make sure that above mem writes were issued towards the memory */ wmb(); /* update producer */ bxe_update_eq_prod(sc, sc->eq_prod); } static void bxe_handle_sp_tq(void *context, int pending) { struct bxe_softc *sc = (struct bxe_softc *)context; uint16_t status; BLOGD(sc, DBG_SP, "---> SP TASK <---\n"); /* what work needs to be performed? */ status = bxe_update_dsb_idx(sc); BLOGD(sc, DBG_SP, "dsb status 0x%04x\n", status); /* HW attentions */ if (status & BXE_DEF_SB_ATT_IDX) { BLOGD(sc, DBG_SP, "---> ATTN INTR <---\n"); bxe_attn_int(sc); status &= ~BXE_DEF_SB_ATT_IDX; } /* SP events: STAT_QUERY and others */ if (status & BXE_DEF_SB_IDX) { /* handle EQ completions */ BLOGD(sc, DBG_SP, "---> EQ INTR <---\n"); bxe_eq_int(sc); bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, le16toh(sc->def_idx), IGU_INT_NOP, 1); status &= ~BXE_DEF_SB_IDX; } /* if status is non zero then something went wrong */ if (__predict_false(status)) { BLOGE(sc, "Got an unknown SP interrupt! (0x%04x)\n", status); } /* ack status block only if something was actually handled */ bxe_ack_sb(sc, sc->igu_dsb_id, ATTENTION_ID, le16toh(sc->def_att_idx), IGU_INT_ENABLE, 1); /* * Must be called after the EQ processing (since eq leads to sriov * ramrod completion flows). * This flow may have been scheduled by the arrival of a ramrod * completion, or by the sriov code rescheduling itself. */ // XXX bxe_iov_sp_task(sc); } static void bxe_handle_fp_tq(void *context, int pending) { struct bxe_fastpath *fp = (struct bxe_fastpath *)context; struct bxe_softc *sc = fp->sc; uint8_t more_tx = FALSE; uint8_t more_rx = FALSE; BLOGD(sc, DBG_INTR, "---> FP TASK QUEUE (%d) <---\n", fp->index); /* XXX * IFF_DRV_RUNNING state can't be checked here since we process * slowpath events on a client queue during setup. Instead * we need to add a "process/continue" flag here that the driver * can use to tell the task here not to do anything. */ #if 0 if (!(if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING)) { return; } #endif /* update the fastpath index */ bxe_update_fp_sb_idx(fp); /* XXX add loop here if ever support multiple tx CoS */ /* fp->txdata[cos] */ if (bxe_has_tx_work(fp)) { BXE_FP_TX_LOCK(fp); more_tx = bxe_txeof(sc, fp); BXE_FP_TX_UNLOCK(fp); } if (bxe_has_rx_work(fp)) { more_rx = bxe_rxeof(sc, fp); } if (more_rx /*|| more_tx*/) { /* still more work to do */ taskqueue_enqueue(fp->tq, &fp->tq_task); return; } bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, le16toh(fp->fp_hc_idx), IGU_INT_ENABLE, 1); } static void bxe_task_fp(struct bxe_fastpath *fp) { struct bxe_softc *sc = fp->sc; uint8_t more_tx = FALSE; uint8_t more_rx = FALSE; BLOGD(sc, DBG_INTR, "---> FP TASK ISR (%d) <---\n", fp->index); /* update the fastpath index */ bxe_update_fp_sb_idx(fp); /* XXX add loop here if ever support multiple tx CoS */ /* fp->txdata[cos] */ if (bxe_has_tx_work(fp)) { BXE_FP_TX_LOCK(fp); more_tx = bxe_txeof(sc, fp); BXE_FP_TX_UNLOCK(fp); } if (bxe_has_rx_work(fp)) { more_rx = bxe_rxeof(sc, fp); } if (more_rx /*|| more_tx*/) { /* still more work to do, bail out if this ISR and process later */ taskqueue_enqueue(fp->tq, &fp->tq_task); return; } /* * Here we write the fastpath index taken before doing any tx or rx work. * It is very well possible other hw events occurred up to this point and * they were actually processed accordingly above. Since we're going to * write an older fastpath index, an interrupt is coming which we might * not do any work in. */ bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, le16toh(fp->fp_hc_idx), IGU_INT_ENABLE, 1); } /* * Legacy interrupt entry point. * * Verifies that the controller generated the interrupt and * then calls a separate routine to handle the various * interrupt causes: link, RX, and TX. */ static void bxe_intr_legacy(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; struct bxe_fastpath *fp; uint16_t status, mask; int i; BLOGD(sc, DBG_INTR, "---> BXE INTx <---\n"); /* * 0 for ustorm, 1 for cstorm * the bits returned from ack_int() are 0-15 * bit 0 = attention status block * bit 1 = fast path status block * a mask of 0x2 or more = tx/rx event * a mask of 1 = slow path event */ status = bxe_ack_int(sc); /* the interrupt is not for us */ if (__predict_false(status == 0)) { BLOGD(sc, DBG_INTR, "Not our interrupt!\n"); return; } BLOGD(sc, DBG_INTR, "Interrupt status 0x%04x\n", status); FOR_EACH_ETH_QUEUE(sc, i) { fp = &sc->fp[i]; mask = (0x2 << (fp->index + CNIC_SUPPORT(sc))); if (status & mask) { /* acknowledge and disable further fastpath interrupts */ bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); bxe_task_fp(fp); status &= ~mask; } } if (__predict_false(status & 0x1)) { /* acknowledge and disable further slowpath interrupts */ bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); /* schedule slowpath handler */ taskqueue_enqueue(sc->sp_tq, &sc->sp_tq_task); status &= ~0x1; } if (__predict_false(status)) { BLOGW(sc, "Unexpected fastpath status (0x%08x)!\n", status); } } /* slowpath interrupt entry point */ static void bxe_intr_sp(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; BLOGD(sc, (DBG_INTR | DBG_SP), "---> SP INTR <---\n"); /* acknowledge and disable further slowpath interrupts */ bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); /* schedule slowpath handler */ taskqueue_enqueue(sc->sp_tq, &sc->sp_tq_task); } /* fastpath interrupt entry point */ static void bxe_intr_fp(void *xfp) { struct bxe_fastpath *fp = (struct bxe_fastpath *)xfp; struct bxe_softc *sc = fp->sc; BLOGD(sc, DBG_INTR, "---> FP INTR %d <---\n", fp->index); BLOGD(sc, DBG_INTR, "(cpu=%d) MSI-X fp=%d fw_sb=%d igu_sb=%d\n", curcpu, fp->index, fp->fw_sb_id, fp->igu_sb_id); /* acknowledge and disable further fastpath interrupts */ bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); bxe_task_fp(fp); } /* Release all interrupts allocated by the driver. */ static void bxe_interrupt_free(struct bxe_softc *sc) { int i; switch (sc->interrupt_mode) { case INTR_MODE_INTX: BLOGD(sc, DBG_LOAD, "Releasing legacy INTx vector\n"); if (sc->intr[0].resource != NULL) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[0].rid, sc->intr[0].resource); } break; case INTR_MODE_MSI: for (i = 0; i < sc->intr_count; i++) { BLOGD(sc, DBG_LOAD, "Releasing MSI vector %d\n", i); if (sc->intr[i].resource && sc->intr[i].rid) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[i].rid, sc->intr[i].resource); } } pci_release_msi(sc->dev); break; case INTR_MODE_MSIX: for (i = 0; i < sc->intr_count; i++) { BLOGD(sc, DBG_LOAD, "Releasing MSI-X vector %d\n", i); if (sc->intr[i].resource && sc->intr[i].rid) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[i].rid, sc->intr[i].resource); } } pci_release_msi(sc->dev); break; default: /* nothing to do as initial allocation failed */ break; } } /* * This function determines and allocates the appropriate * interrupt based on system capabilites and user request. * * The user may force a particular interrupt mode, specify * the number of receive queues, specify the method for * distribuitng received frames to receive queues, or use * the default settings which will automatically select the * best supported combination. In addition, the OS may or * may not support certain combinations of these settings. * This routine attempts to reconcile the settings requested * by the user with the capabilites available from the system * to select the optimal combination of features. * * Returns: * 0 = Success, !0 = Failure. */ static int bxe_interrupt_alloc(struct bxe_softc *sc) { int msix_count = 0; int msi_count = 0; int num_requested = 0; int num_allocated = 0; int rid, i, j; int rc; /* get the number of available MSI/MSI-X interrupts from the OS */ if (sc->interrupt_mode > 0) { if (sc->devinfo.pcie_cap_flags & BXE_MSIX_CAPABLE_FLAG) { msix_count = pci_msix_count(sc->dev); } if (sc->devinfo.pcie_cap_flags & BXE_MSI_CAPABLE_FLAG) { msi_count = pci_msi_count(sc->dev); } BLOGD(sc, DBG_LOAD, "%d MSI and %d MSI-X vectors available\n", msi_count, msix_count); } do { /* try allocating MSI-X interrupt resources (at least 2) */ if (sc->interrupt_mode != INTR_MODE_MSIX) { break; } if (((sc->devinfo.pcie_cap_flags & BXE_MSIX_CAPABLE_FLAG) == 0) || (msix_count < 2)) { sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ break; } /* ask for the necessary number of MSI-X vectors */ num_requested = min((sc->num_queues + 1), msix_count); BLOGD(sc, DBG_LOAD, "Requesting %d MSI-X vectors\n", num_requested); num_allocated = num_requested; if ((rc = pci_alloc_msix(sc->dev, &num_allocated)) != 0) { BLOGE(sc, "MSI-X alloc failed! (%d)\n", rc); sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ break; } if (num_allocated < 2) { /* possible? */ BLOGE(sc, "MSI-X allocation less than 2!\n"); sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ pci_release_msi(sc->dev); break; } BLOGI(sc, "MSI-X vectors Requested %d and Allocated %d\n", num_requested, num_allocated); /* best effort so use the number of vectors allocated to us */ sc->intr_count = num_allocated; sc->num_queues = num_allocated - 1; rid = 1; /* initial resource identifier */ /* allocate the MSI-X vectors */ for (i = 0; i < num_allocated; i++) { sc->intr[i].rid = (rid + i); if ((sc->intr[i].resource = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->intr[i].rid, RF_ACTIVE)) == NULL) { BLOGE(sc, "Failed to map MSI-X[%d] (rid=%d)!\n", i, (rid + i)); for (j = (i - 1); j >= 0; j--) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[j].rid, sc->intr[j].resource); } sc->intr_count = 0; sc->num_queues = 0; sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ pci_release_msi(sc->dev); break; } BLOGD(sc, DBG_LOAD, "Mapped MSI-X[%d] (rid=%d)\n", i, (rid + i)); } } while (0); do { /* try allocating MSI vector resources (at least 2) */ if (sc->interrupt_mode != INTR_MODE_MSI) { break; } if (((sc->devinfo.pcie_cap_flags & BXE_MSI_CAPABLE_FLAG) == 0) || (msi_count < 1)) { sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ break; } /* ask for a single MSI vector */ num_requested = 1; BLOGD(sc, DBG_LOAD, "Requesting %d MSI vectors\n", num_requested); num_allocated = num_requested; if ((rc = pci_alloc_msi(sc->dev, &num_allocated)) != 0) { BLOGE(sc, "MSI alloc failed (%d)!\n", rc); sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ break; } if (num_allocated != 1) { /* possible? */ BLOGE(sc, "MSI allocation is not 1!\n"); sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ pci_release_msi(sc->dev); break; } BLOGI(sc, "MSI vectors Requested %d and Allocated %d\n", num_requested, num_allocated); /* best effort so use the number of vectors allocated to us */ sc->intr_count = num_allocated; sc->num_queues = num_allocated; rid = 1; /* initial resource identifier */ sc->intr[0].rid = rid; if ((sc->intr[0].resource = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->intr[0].rid, RF_ACTIVE)) == NULL) { BLOGE(sc, "Failed to map MSI[0] (rid=%d)!\n", rid); sc->intr_count = 0; sc->num_queues = 0; sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ pci_release_msi(sc->dev); break; } BLOGD(sc, DBG_LOAD, "Mapped MSI[0] (rid=%d)\n", rid); } while (0); do { /* try allocating INTx vector resources */ if (sc->interrupt_mode != INTR_MODE_INTX) { break; } BLOGD(sc, DBG_LOAD, "Requesting legacy INTx interrupt\n"); /* only one vector for INTx */ sc->intr_count = 1; sc->num_queues = 1; rid = 0; /* initial resource identifier */ sc->intr[0].rid = rid; if ((sc->intr[0].resource = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->intr[0].rid, (RF_ACTIVE | RF_SHAREABLE))) == NULL) { BLOGE(sc, "Failed to map INTx (rid=%d)!\n", rid); sc->intr_count = 0; sc->num_queues = 0; sc->interrupt_mode = -1; /* Failed! */ break; } BLOGD(sc, DBG_LOAD, "Mapped INTx (rid=%d)\n", rid); } while (0); if (sc->interrupt_mode == -1) { BLOGE(sc, "Interrupt Allocation: FAILED!!!\n"); rc = 1; } else { BLOGD(sc, DBG_LOAD, "Interrupt Allocation: interrupt_mode=%d, num_queues=%d\n", sc->interrupt_mode, sc->num_queues); rc = 0; } return (rc); } static void bxe_interrupt_detach(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; /* release interrupt resources */ for (i = 0; i < sc->intr_count; i++) { if (sc->intr[i].resource && sc->intr[i].tag) { BLOGD(sc, DBG_LOAD, "Disabling interrupt vector %d\n", i); bus_teardown_intr(sc->dev, sc->intr[i].resource, sc->intr[i].tag); } } for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if (fp->tq) { taskqueue_drain(fp->tq, &fp->tq_task); taskqueue_drain(fp->tq, &fp->tx_task); while (taskqueue_cancel_timeout(fp->tq, &fp->tx_timeout_task, NULL)) taskqueue_drain_timeout(fp->tq, &fp->tx_timeout_task); taskqueue_free(fp->tq); fp->tq = NULL; } } if (sc->sp_tq) { taskqueue_drain(sc->sp_tq, &sc->sp_tq_task); taskqueue_free(sc->sp_tq); sc->sp_tq = NULL; } } /* * Enables interrupts and attach to the ISR. * * When using multiple MSI/MSI-X vectors the first vector * is used for slowpath operations while all remaining * vectors are used for fastpath operations. If only a * single MSI/MSI-X vector is used (SINGLE_ISR) then the * ISR must look for both slowpath and fastpath completions. */ static int bxe_interrupt_attach(struct bxe_softc *sc) { struct bxe_fastpath *fp; int rc = 0; int i; snprintf(sc->sp_tq_name, sizeof(sc->sp_tq_name), "bxe%d_sp_tq", sc->unit); TASK_INIT(&sc->sp_tq_task, 0, bxe_handle_sp_tq, sc); sc->sp_tq = taskqueue_create(sc->sp_tq_name, M_NOWAIT, taskqueue_thread_enqueue, &sc->sp_tq); taskqueue_start_threads(&sc->sp_tq, 1, PWAIT, /* lower priority */ "%s", sc->sp_tq_name); for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; snprintf(fp->tq_name, sizeof(fp->tq_name), "bxe%d_fp%d_tq", sc->unit, i); TASK_INIT(&fp->tq_task, 0, bxe_handle_fp_tq, fp); TASK_INIT(&fp->tx_task, 0, bxe_tx_mq_start_deferred, fp); fp->tq = taskqueue_create(fp->tq_name, M_NOWAIT, taskqueue_thread_enqueue, &fp->tq); TIMEOUT_TASK_INIT(fp->tq, &fp->tx_timeout_task, 0, bxe_tx_mq_start_deferred, fp); taskqueue_start_threads(&fp->tq, 1, PI_NET, /* higher priority */ "%s", fp->tq_name); } /* setup interrupt handlers */ if (sc->interrupt_mode == INTR_MODE_MSIX) { BLOGD(sc, DBG_LOAD, "Enabling slowpath MSI-X[0] vector\n"); /* * Setup the interrupt handler. Note that we pass the driver instance * to the interrupt handler for the slowpath. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[0].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_sp, sc, &sc->intr[0].tag)) != 0) { BLOGE(sc, "Failed to allocate MSI-X[0] vector (%d)\n", rc); goto bxe_interrupt_attach_exit; } bus_describe_intr(sc->dev, sc->intr[0].resource, sc->intr[0].tag, "sp"); /* bus_bind_intr(sc->dev, sc->intr[0].resource, 0); */ /* initialize the fastpath vectors (note the first was used for sp) */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; BLOGD(sc, DBG_LOAD, "Enabling MSI-X[%d] vector\n", (i + 1)); /* * Setup the interrupt handler. Note that we pass the * fastpath context to the interrupt handler in this * case. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[i + 1].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_fp, fp, &sc->intr[i + 1].tag)) != 0) { BLOGE(sc, "Failed to allocate MSI-X[%d] vector (%d)\n", (i + 1), rc); goto bxe_interrupt_attach_exit; } bus_describe_intr(sc->dev, sc->intr[i + 1].resource, sc->intr[i + 1].tag, "fp%02d", i); /* bind the fastpath instance to a cpu */ if (sc->num_queues > 1) { bus_bind_intr(sc->dev, sc->intr[i + 1].resource, i); } fp->state = BXE_FP_STATE_IRQ; } } else if (sc->interrupt_mode == INTR_MODE_MSI) { BLOGD(sc, DBG_LOAD, "Enabling MSI[0] vector\n"); /* * Setup the interrupt handler. Note that we pass the * driver instance to the interrupt handler which * will handle both the slowpath and fastpath. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[0].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_legacy, sc, &sc->intr[0].tag)) != 0) { BLOGE(sc, "Failed to allocate MSI[0] vector (%d)\n", rc); goto bxe_interrupt_attach_exit; } } else { /* (sc->interrupt_mode == INTR_MODE_INTX) */ BLOGD(sc, DBG_LOAD, "Enabling INTx interrupts\n"); /* * Setup the interrupt handler. Note that we pass the * driver instance to the interrupt handler which * will handle both the slowpath and fastpath. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[0].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_legacy, sc, &sc->intr[0].tag)) != 0) { BLOGE(sc, "Failed to allocate INTx interrupt (%d)\n", rc); goto bxe_interrupt_attach_exit; } } bxe_interrupt_attach_exit: return (rc); } static int bxe_init_hw_common_chip(struct bxe_softc *sc); static int bxe_init_hw_common(struct bxe_softc *sc); static int bxe_init_hw_port(struct bxe_softc *sc); static int bxe_init_hw_func(struct bxe_softc *sc); static void bxe_reset_common(struct bxe_softc *sc); static void bxe_reset_port(struct bxe_softc *sc); static void bxe_reset_func(struct bxe_softc *sc); static int bxe_gunzip_init(struct bxe_softc *sc); static void bxe_gunzip_end(struct bxe_softc *sc); static int bxe_init_firmware(struct bxe_softc *sc); static void bxe_release_firmware(struct bxe_softc *sc); static struct ecore_func_sp_drv_ops bxe_func_sp_drv = { .init_hw_cmn_chip = bxe_init_hw_common_chip, .init_hw_cmn = bxe_init_hw_common, .init_hw_port = bxe_init_hw_port, .init_hw_func = bxe_init_hw_func, .reset_hw_cmn = bxe_reset_common, .reset_hw_port = bxe_reset_port, .reset_hw_func = bxe_reset_func, .gunzip_init = bxe_gunzip_init, .gunzip_end = bxe_gunzip_end, .init_fw = bxe_init_firmware, .release_fw = bxe_release_firmware, }; static void bxe_init_func_obj(struct bxe_softc *sc) { sc->dmae_ready = 0; ecore_init_func_obj(sc, &sc->func_obj, BXE_SP(sc, func_rdata), BXE_SP_MAPPING(sc, func_rdata), BXE_SP(sc, func_afex_rdata), BXE_SP_MAPPING(sc, func_afex_rdata), &bxe_func_sp_drv); } static int bxe_init_hw(struct bxe_softc *sc, uint32_t load_code) { struct ecore_func_state_params func_params = { NULL }; int rc; /* prepare the parameters for function state transitions */ bit_set(&func_params.ramrod_flags, RAMROD_COMP_WAIT); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_HW_INIT; func_params.params.hw_init.load_phase = load_code; /* * Via a plethora of function pointers, we will eventually reach * bxe_init_hw_common(), bxe_init_hw_port(), or bxe_init_hw_func(). */ rc = ecore_func_state_change(sc, &func_params); return (rc); } static void bxe_fill(struct bxe_softc *sc, uint32_t addr, int fill, uint32_t len) { uint32_t i; if (!(len % 4) && !(addr % 4)) { for (i = 0; i < len; i += 4) { REG_WR(sc, (addr + i), fill); } } else { for (i = 0; i < len; i++) { REG_WR8(sc, (addr + i), fill); } } } /* writes FP SP data to FW - data_size in dwords */ static void bxe_wr_fp_sb_data(struct bxe_softc *sc, int fw_sb_id, uint32_t *sb_data_p, uint32_t data_size) { int index; for (index = 0; index < data_size; index++) { REG_WR(sc, (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_OFFSET(fw_sb_id) + (sizeof(uint32_t) * index)), *(sb_data_p + index)); } } static void bxe_zero_fp_sb(struct bxe_softc *sc, int fw_sb_id) { struct hc_status_block_data_e2 sb_data_e2; struct hc_status_block_data_e1x sb_data_e1x; uint32_t *sb_data_p; uint32_t data_size = 0; if (!CHIP_IS_E1x(sc)) { memset(&sb_data_e2, 0, sizeof(struct hc_status_block_data_e2)); sb_data_e2.common.state = SB_DISABLED; sb_data_e2.common.p_func.vf_valid = FALSE; sb_data_p = (uint32_t *)&sb_data_e2; data_size = (sizeof(struct hc_status_block_data_e2) / sizeof(uint32_t)); } else { memset(&sb_data_e1x, 0, sizeof(struct hc_status_block_data_e1x)); sb_data_e1x.common.state = SB_DISABLED; sb_data_e1x.common.p_func.vf_valid = FALSE; sb_data_p = (uint32_t *)&sb_data_e1x; data_size = (sizeof(struct hc_status_block_data_e1x) / sizeof(uint32_t)); } bxe_wr_fp_sb_data(sc, fw_sb_id, sb_data_p, data_size); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_OFFSET(fw_sb_id)), 0, CSTORM_STATUS_BLOCK_SIZE); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_SYNC_BLOCK_OFFSET(fw_sb_id)), 0, CSTORM_SYNC_BLOCK_SIZE); } static void bxe_wr_sp_sb_data(struct bxe_softc *sc, struct hc_sp_status_block_data *sp_sb_data) { int i; for (i = 0; i < (sizeof(struct hc_sp_status_block_data) / sizeof(uint32_t)); i++) { REG_WR(sc, (BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_DATA_OFFSET(SC_FUNC(sc)) + (i * sizeof(uint32_t))), *((uint32_t *)sp_sb_data + i)); } } static void bxe_zero_sp_sb(struct bxe_softc *sc) { struct hc_sp_status_block_data sp_sb_data; memset(&sp_sb_data, 0, sizeof(struct hc_sp_status_block_data)); sp_sb_data.state = SB_DISABLED; sp_sb_data.p_func.vf_valid = FALSE; bxe_wr_sp_sb_data(sc, &sp_sb_data); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_OFFSET(SC_FUNC(sc))), 0, CSTORM_SP_STATUS_BLOCK_SIZE); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_SP_SYNC_BLOCK_OFFSET(SC_FUNC(sc))), 0, CSTORM_SP_SYNC_BLOCK_SIZE); } static void bxe_setup_ndsb_state_machine(struct hc_status_block_sm *hc_sm, int igu_sb_id, int igu_seg_id) { hc_sm->igu_sb_id = igu_sb_id; hc_sm->igu_seg_id = igu_seg_id; hc_sm->timer_value = 0xFF; hc_sm->time_to_expire = 0xFFFFFFFF; } static void bxe_map_sb_state_machines(struct hc_index_data *index_data) { /* zero out state machine indices */ /* rx indices */ index_data[HC_INDEX_ETH_RX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID; /* tx indices */ index_data[HC_INDEX_OOO_TX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID; index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags &= ~HC_INDEX_DATA_SM_ID; index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags &= ~HC_INDEX_DATA_SM_ID; index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags &= ~HC_INDEX_DATA_SM_ID; /* map indices */ /* rx indices */ index_data[HC_INDEX_ETH_RX_CQ_CONS].flags |= (SM_RX_ID << HC_INDEX_DATA_SM_ID_SHIFT); /* tx indices */ index_data[HC_INDEX_OOO_TX_CQ_CONS].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); } static void bxe_init_sb(struct bxe_softc *sc, bus_addr_t busaddr, int vfid, uint8_t vf_valid, int fw_sb_id, int igu_sb_id) { struct hc_status_block_data_e2 sb_data_e2; struct hc_status_block_data_e1x sb_data_e1x; struct hc_status_block_sm *hc_sm_p; uint32_t *sb_data_p; int igu_seg_id; int data_size; if (CHIP_INT_MODE_IS_BC(sc)) { igu_seg_id = HC_SEG_ACCESS_NORM; } else { igu_seg_id = IGU_SEG_ACCESS_NORM; } bxe_zero_fp_sb(sc, fw_sb_id); if (!CHIP_IS_E1x(sc)) { memset(&sb_data_e2, 0, sizeof(struct hc_status_block_data_e2)); sb_data_e2.common.state = SB_ENABLED; sb_data_e2.common.p_func.pf_id = SC_FUNC(sc); sb_data_e2.common.p_func.vf_id = vfid; sb_data_e2.common.p_func.vf_valid = vf_valid; sb_data_e2.common.p_func.vnic_id = SC_VN(sc); sb_data_e2.common.same_igu_sb_1b = TRUE; sb_data_e2.common.host_sb_addr.hi = U64_HI(busaddr); sb_data_e2.common.host_sb_addr.lo = U64_LO(busaddr); hc_sm_p = sb_data_e2.common.state_machine; sb_data_p = (uint32_t *)&sb_data_e2; data_size = (sizeof(struct hc_status_block_data_e2) / sizeof(uint32_t)); bxe_map_sb_state_machines(sb_data_e2.index_data); } else { memset(&sb_data_e1x, 0, sizeof(struct hc_status_block_data_e1x)); sb_data_e1x.common.state = SB_ENABLED; sb_data_e1x.common.p_func.pf_id = SC_FUNC(sc); sb_data_e1x.common.p_func.vf_id = 0xff; sb_data_e1x.common.p_func.vf_valid = FALSE; sb_data_e1x.common.p_func.vnic_id = SC_VN(sc); sb_data_e1x.common.same_igu_sb_1b = TRUE; sb_data_e1x.common.host_sb_addr.hi = U64_HI(busaddr); sb_data_e1x.common.host_sb_addr.lo = U64_LO(busaddr); hc_sm_p = sb_data_e1x.common.state_machine; sb_data_p = (uint32_t *)&sb_data_e1x; data_size = (sizeof(struct hc_status_block_data_e1x) / sizeof(uint32_t)); bxe_map_sb_state_machines(sb_data_e1x.index_data); } bxe_setup_ndsb_state_machine(&hc_sm_p[SM_RX_ID], igu_sb_id, igu_seg_id); bxe_setup_ndsb_state_machine(&hc_sm_p[SM_TX_ID], igu_sb_id, igu_seg_id); BLOGD(sc, DBG_LOAD, "Init FW SB %d\n", fw_sb_id); /* write indices to HW - PCI guarantees endianity of regpairs */ bxe_wr_fp_sb_data(sc, fw_sb_id, sb_data_p, data_size); } static inline uint8_t bxe_fp_qzone_id(struct bxe_fastpath *fp) { if (CHIP_IS_E1x(fp->sc)) { return (fp->cl_id + SC_PORT(fp->sc) * ETH_MAX_RX_CLIENTS_E1H); } else { return (fp->cl_id); } } static inline uint32_t bxe_rx_ustorm_prods_offset(struct bxe_softc *sc, struct bxe_fastpath *fp) { uint32_t offset = BAR_USTRORM_INTMEM; if (!CHIP_IS_E1x(sc)) { offset += USTORM_RX_PRODS_E2_OFFSET(fp->cl_qzone_id); } else { offset += USTORM_RX_PRODS_E1X_OFFSET(SC_PORT(sc), fp->cl_id); } return (offset); } static void bxe_init_eth_fp(struct bxe_softc *sc, int idx) { struct bxe_fastpath *fp = &sc->fp[idx]; uint32_t cids[ECORE_MULTI_TX_COS] = { 0 }; unsigned long q_type = 0; int cos; fp->sc = sc; fp->index = idx; fp->igu_sb_id = (sc->igu_base_sb + idx + CNIC_SUPPORT(sc)); fp->fw_sb_id = (sc->base_fw_ndsb + idx + CNIC_SUPPORT(sc)); fp->cl_id = (CHIP_IS_E1x(sc)) ? (SC_L_ID(sc) + idx) : /* want client ID same as IGU SB ID for non-E1 */ fp->igu_sb_id; fp->cl_qzone_id = bxe_fp_qzone_id(fp); /* setup sb indices */ if (!CHIP_IS_E1x(sc)) { fp->sb_index_values = fp->status_block.e2_sb->sb.index_values; fp->sb_running_index = fp->status_block.e2_sb->sb.running_index; } else { fp->sb_index_values = fp->status_block.e1x_sb->sb.index_values; fp->sb_running_index = fp->status_block.e1x_sb->sb.running_index; } /* init shortcut */ fp->ustorm_rx_prods_offset = bxe_rx_ustorm_prods_offset(sc, fp); fp->rx_cq_cons_sb = &fp->sb_index_values[HC_INDEX_ETH_RX_CQ_CONS]; /* * XXX If multiple CoS is ever supported then each fastpath structure * will need to maintain tx producer/consumer/dma/etc values *per* CoS. */ for (cos = 0; cos < sc->max_cos; cos++) { cids[cos] = idx; } fp->tx_cons_sb = &fp->sb_index_values[HC_INDEX_ETH_TX_CQ_CONS_COS0]; /* nothing more for a VF to do */ if (IS_VF(sc)) { return; } bxe_init_sb(sc, fp->sb_dma.paddr, BXE_VF_ID_INVALID, FALSE, fp->fw_sb_id, fp->igu_sb_id); bxe_update_fp_sb_idx(fp); /* Configure Queue State object */ bit_set(&q_type, ECORE_Q_TYPE_HAS_RX); bit_set(&q_type, ECORE_Q_TYPE_HAS_TX); ecore_init_queue_obj(sc, &sc->sp_objs[idx].q_obj, fp->cl_id, cids, sc->max_cos, SC_FUNC(sc), BXE_SP(sc, q_rdata), BXE_SP_MAPPING(sc, q_rdata), q_type); /* configure classification DBs */ ecore_init_mac_obj(sc, &sc->sp_objs[idx].mac_obj, fp->cl_id, idx, SC_FUNC(sc), BXE_SP(sc, mac_rdata), BXE_SP_MAPPING(sc, mac_rdata), ECORE_FILTER_MAC_PENDING, &sc->sp_state, ECORE_OBJ_TYPE_RX_TX, &sc->macs_pool); BLOGD(sc, DBG_LOAD, "fp[%d]: sb=%p cl_id=%d fw_sb=%d igu_sb=%d\n", idx, fp->status_block.e2_sb, fp->cl_id, fp->fw_sb_id, fp->igu_sb_id); } static inline void bxe_update_rx_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t rx_bd_prod, uint16_t rx_cq_prod, uint16_t rx_sge_prod) { struct ustorm_eth_rx_producers rx_prods = { 0 }; uint32_t i; /* update producers */ rx_prods.bd_prod = rx_bd_prod; rx_prods.cqe_prod = rx_cq_prod; rx_prods.sge_prod = rx_sge_prod; /* * Make sure that the BD and SGE data is updated before updating the * producers since FW might read the BD/SGE right after the producer * is updated. * This is only applicable for weak-ordered memory model archs such * as IA-64. The following barrier is also mandatory since FW will * assumes BDs must have buffers. */ wmb(); for (i = 0; i < (sizeof(rx_prods) / 4); i++) { REG_WR(sc, (fp->ustorm_rx_prods_offset + (i * 4)), ((uint32_t *)&rx_prods)[i]); } wmb(); /* keep prod updates ordered */ BLOGD(sc, DBG_RX, "RX fp[%d]: wrote prods bd_prod=%u cqe_prod=%u sge_prod=%u\n", fp->index, rx_bd_prod, rx_cq_prod, rx_sge_prod); } static void bxe_init_rx_rings(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; fp->rx_bd_cons = 0; /* * Activate the BD ring... * Warning, this will generate an interrupt (to the TSTORM) * so this can only be done after the chip is initialized */ bxe_update_rx_prod(sc, fp, fp->rx_bd_prod, fp->rx_cq_prod, fp->rx_sge_prod); if (i != 0) { continue; } if (CHIP_IS_E1(sc)) { REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(SC_FUNC(sc))), U64_LO(fp->rcq_dma.paddr)); REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(SC_FUNC(sc)) + 4), U64_HI(fp->rcq_dma.paddr)); } } } static void bxe_init_tx_ring_one(struct bxe_fastpath *fp) { SET_FLAG(fp->tx_db.data.header.data, DOORBELL_HDR_T_DB_TYPE, 1); fp->tx_db.data.zero_fill1 = 0; fp->tx_db.data.prod = 0; fp->tx_pkt_prod = 0; fp->tx_pkt_cons = 0; fp->tx_bd_prod = 0; fp->tx_bd_cons = 0; fp->eth_q_stats.tx_pkts = 0; } static inline void bxe_init_tx_rings(struct bxe_softc *sc) { int i; for (i = 0; i < sc->num_queues; i++) { bxe_init_tx_ring_one(&sc->fp[i]); } } static void bxe_init_def_sb(struct bxe_softc *sc) { struct host_sp_status_block *def_sb = sc->def_sb; bus_addr_t mapping = sc->def_sb_dma.paddr; int igu_sp_sb_index; int igu_seg_id; int port = SC_PORT(sc); int func = SC_FUNC(sc); int reg_offset, reg_offset_en5; uint64_t section; int index, sindex; struct hc_sp_status_block_data sp_sb_data; memset(&sp_sb_data, 0, sizeof(struct hc_sp_status_block_data)); if (CHIP_INT_MODE_IS_BC(sc)) { igu_sp_sb_index = DEF_SB_IGU_ID; igu_seg_id = HC_SEG_ACCESS_DEF; } else { igu_sp_sb_index = sc->igu_dsb_id; igu_seg_id = IGU_SEG_ACCESS_DEF; } /* attentions */ section = ((uint64_t)mapping + offsetof(struct host_sp_status_block, atten_status_block)); def_sb->atten_status_block.status_block_id = igu_sp_sb_index; sc->attn_state = 0; reg_offset = (port) ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0; reg_offset_en5 = (port) ? MISC_REG_AEU_ENABLE5_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE5_FUNC_0_OUT_0; for (index = 0; index < MAX_DYNAMIC_ATTN_GRPS; index++) { /* take care of sig[0]..sig[4] */ for (sindex = 0; sindex < 4; sindex++) { sc->attn_group[index].sig[sindex] = REG_RD(sc, (reg_offset + (sindex * 0x4) + (0x10 * index))); } if (!CHIP_IS_E1x(sc)) { /* * enable5 is separate from the rest of the registers, * and the address skip is 4 and not 16 between the * different groups */ sc->attn_group[index].sig[4] = REG_RD(sc, (reg_offset_en5 + (0x4 * index))); } else { sc->attn_group[index].sig[4] = 0; } } if (sc->devinfo.int_block == INT_BLOCK_HC) { reg_offset = (port) ? HC_REG_ATTN_MSG1_ADDR_L : HC_REG_ATTN_MSG0_ADDR_L; REG_WR(sc, reg_offset, U64_LO(section)); REG_WR(sc, (reg_offset + 4), U64_HI(section)); } else if (!CHIP_IS_E1x(sc)) { REG_WR(sc, IGU_REG_ATTN_MSG_ADDR_L, U64_LO(section)); REG_WR(sc, IGU_REG_ATTN_MSG_ADDR_H, U64_HI(section)); } section = ((uint64_t)mapping + offsetof(struct host_sp_status_block, sp_sb)); bxe_zero_sp_sb(sc); /* PCI guarantees endianity of regpair */ sp_sb_data.state = SB_ENABLED; sp_sb_data.host_sb_addr.lo = U64_LO(section); sp_sb_data.host_sb_addr.hi = U64_HI(section); sp_sb_data.igu_sb_id = igu_sp_sb_index; sp_sb_data.igu_seg_id = igu_seg_id; sp_sb_data.p_func.pf_id = func; sp_sb_data.p_func.vnic_id = SC_VN(sc); sp_sb_data.p_func.vf_id = 0xff; bxe_wr_sp_sb_data(sc, &sp_sb_data); bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_ENABLE, 0); } static void bxe_init_sp_ring(struct bxe_softc *sc) { atomic_store_rel_long(&sc->cq_spq_left, MAX_SPQ_PENDING); sc->spq_prod_idx = 0; sc->dsb_sp_prod = &sc->def_sb->sp_sb.index_values[HC_SP_INDEX_ETH_DEF_CONS]; sc->spq_prod_bd = sc->spq; sc->spq_last_bd = (sc->spq_prod_bd + MAX_SP_DESC_CNT); } static void bxe_init_eq_ring(struct bxe_softc *sc) { union event_ring_elem *elem; int i; for (i = 1; i <= NUM_EQ_PAGES; i++) { elem = &sc->eq[EQ_DESC_CNT_PAGE * i - 1]; elem->next_page.addr.hi = htole32(U64_HI(sc->eq_dma.paddr + BCM_PAGE_SIZE * (i % NUM_EQ_PAGES))); elem->next_page.addr.lo = htole32(U64_LO(sc->eq_dma.paddr + BCM_PAGE_SIZE * (i % NUM_EQ_PAGES))); } sc->eq_cons = 0; sc->eq_prod = NUM_EQ_DESC; sc->eq_cons_sb = &sc->def_sb->sp_sb.index_values[HC_SP_INDEX_EQ_CONS]; atomic_store_rel_long(&sc->eq_spq_left, (min((MAX_SP_DESC_CNT - MAX_SPQ_PENDING), NUM_EQ_DESC) - 1)); } static void bxe_init_internal_common(struct bxe_softc *sc) { int i; /* * Zero this manually as its initialization is currently missing * in the initTool. */ for (i = 0; i < (USTORM_AGG_DATA_SIZE >> 2); i++) { REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_AGG_DATA_OFFSET + (i * 4)), 0); } if (!CHIP_IS_E1x(sc)) { REG_WR8(sc, (BAR_CSTRORM_INTMEM + CSTORM_IGU_MODE_OFFSET), CHIP_INT_MODE_IS_BC(sc) ? HC_IGU_BC_MODE : HC_IGU_NBC_MODE); } } static void bxe_init_internal(struct bxe_softc *sc, uint32_t load_code) { switch (load_code) { case FW_MSG_CODE_DRV_LOAD_COMMON: case FW_MSG_CODE_DRV_LOAD_COMMON_CHIP: bxe_init_internal_common(sc); /* no break */ case FW_MSG_CODE_DRV_LOAD_PORT: /* nothing to do */ /* no break */ case FW_MSG_CODE_DRV_LOAD_FUNCTION: /* internal memory per function is initialized inside bxe_pf_init */ break; default: BLOGE(sc, "Unknown load_code (0x%x) from MCP\n", load_code); break; } } static void storm_memset_func_cfg(struct bxe_softc *sc, struct tstorm_eth_function_common_config *tcfg, uint16_t abs_fid) { uint32_t addr; size_t size; addr = (BAR_TSTRORM_INTMEM + TSTORM_FUNCTION_COMMON_CONFIG_OFFSET(abs_fid)); size = sizeof(struct tstorm_eth_function_common_config); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)tcfg); } static void bxe_func_init(struct bxe_softc *sc, struct bxe_func_init_params *p) { struct tstorm_eth_function_common_config tcfg = { 0 }; if (CHIP_IS_E1x(sc)) { storm_memset_func_cfg(sc, &tcfg, p->func_id); } /* Enable the function in the FW */ storm_memset_vf_to_pf(sc, p->func_id, p->pf_id); storm_memset_func_en(sc, p->func_id, 1); /* spq */ if (p->func_flgs & FUNC_FLG_SPQ) { storm_memset_spq_addr(sc, p->spq_map, p->func_id); REG_WR(sc, (XSEM_REG_FAST_MEMORY + XSTORM_SPQ_PROD_OFFSET(p->func_id)), p->spq_prod); } } /* * Calculates the sum of vn_min_rates. * It's needed for further normalizing of the min_rates. * Returns: * sum of vn_min_rates. * or * 0 - if all the min_rates are 0. * In the later case fainess algorithm should be deactivated. * If all min rates are not zero then those that are zeroes will be set to 1. */ static void bxe_calc_vn_min(struct bxe_softc *sc, struct cmng_init_input *input) { uint32_t vn_cfg; uint32_t vn_min_rate; int all_zero = 1; int vn; for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { vn_cfg = sc->devinfo.mf_info.mf_config[vn]; vn_min_rate = (((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >> FUNC_MF_CFG_MIN_BW_SHIFT) * 100); if (vn_cfg & FUNC_MF_CFG_FUNC_HIDE) { /* skip hidden VNs */ vn_min_rate = 0; } else if (!vn_min_rate) { /* If min rate is zero - set it to 100 */ vn_min_rate = DEF_MIN_RATE; } else { all_zero = 0; } input->vnic_min_rate[vn] = vn_min_rate; } /* if ETS or all min rates are zeros - disable fairness */ if (BXE_IS_ETS_ENABLED(sc)) { input->flags.cmng_enables &= ~CMNG_FLAGS_PER_PORT_FAIRNESS_VN; BLOGD(sc, DBG_LOAD, "Fairness disabled (ETS)\n"); } else if (all_zero) { input->flags.cmng_enables &= ~CMNG_FLAGS_PER_PORT_FAIRNESS_VN; BLOGD(sc, DBG_LOAD, "Fariness disabled (all MIN values are zeroes)\n"); } else { input->flags.cmng_enables |= CMNG_FLAGS_PER_PORT_FAIRNESS_VN; } } static inline uint16_t bxe_extract_max_cfg(struct bxe_softc *sc, uint32_t mf_cfg) { uint16_t max_cfg = ((mf_cfg & FUNC_MF_CFG_MAX_BW_MASK) >> FUNC_MF_CFG_MAX_BW_SHIFT); if (!max_cfg) { BLOGD(sc, DBG_LOAD, "Max BW configured to 0 - using 100 instead\n"); max_cfg = 100; } return (max_cfg); } static void bxe_calc_vn_max(struct bxe_softc *sc, int vn, struct cmng_init_input *input) { uint16_t vn_max_rate; uint32_t vn_cfg = sc->devinfo.mf_info.mf_config[vn]; uint32_t max_cfg; if (vn_cfg & FUNC_MF_CFG_FUNC_HIDE) { vn_max_rate = 0; } else { max_cfg = bxe_extract_max_cfg(sc, vn_cfg); if (IS_MF_SI(sc)) { /* max_cfg in percents of linkspeed */ vn_max_rate = ((sc->link_vars.line_speed * max_cfg) / 100); } else { /* SD modes */ /* max_cfg is absolute in 100Mb units */ vn_max_rate = (max_cfg * 100); } } BLOGD(sc, DBG_LOAD, "vn %d: vn_max_rate %d\n", vn, vn_max_rate); input->vnic_max_rate[vn] = vn_max_rate; } static void bxe_cmng_fns_init(struct bxe_softc *sc, uint8_t read_cfg, uint8_t cmng_type) { struct cmng_init_input input; int vn; memset(&input, 0, sizeof(struct cmng_init_input)); input.port_rate = sc->link_vars.line_speed; if (cmng_type == CMNG_FNS_MINMAX) { /* read mf conf from shmem */ if (read_cfg) { bxe_read_mf_cfg(sc); } /* get VN min rate and enable fairness if not 0 */ bxe_calc_vn_min(sc, &input); /* get VN max rate */ if (sc->port.pmf) { for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { bxe_calc_vn_max(sc, vn, &input); } } /* always enable rate shaping and fairness */ input.flags.cmng_enables |= CMNG_FLAGS_PER_PORT_RATE_SHAPING_VN; ecore_init_cmng(&input, &sc->cmng); return; } /* rate shaping and fairness are disabled */ BLOGD(sc, DBG_LOAD, "rate shaping and fairness have been disabled\n"); } static int bxe_get_cmng_fns_mode(struct bxe_softc *sc) { if (CHIP_REV_IS_SLOW(sc)) { return (CMNG_FNS_NONE); } if (IS_MF(sc)) { return (CMNG_FNS_MINMAX); } return (CMNG_FNS_NONE); } static void storm_memset_cmng(struct bxe_softc *sc, struct cmng_init *cmng, uint8_t port) { int vn; int func; uint32_t addr; size_t size; addr = (BAR_XSTRORM_INTMEM + XSTORM_CMNG_PER_PORT_VARS_OFFSET(port)); size = sizeof(struct cmng_struct_per_port); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)&cmng->port); for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { func = func_by_vn(sc, vn); addr = (BAR_XSTRORM_INTMEM + XSTORM_RATE_SHAPING_PER_VN_VARS_OFFSET(func)); size = sizeof(struct rate_shaping_vars_per_vn); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)&cmng->vnic.vnic_max_rate[vn]); addr = (BAR_XSTRORM_INTMEM + XSTORM_FAIRNESS_PER_VN_VARS_OFFSET(func)); size = sizeof(struct fairness_vars_per_vn); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)&cmng->vnic.vnic_min_rate[vn]); } } static void bxe_pf_init(struct bxe_softc *sc) { struct bxe_func_init_params func_init = { 0 }; struct event_ring_data eq_data = { { 0 } }; uint16_t flags; if (!CHIP_IS_E1x(sc)) { /* reset IGU PF statistics: MSIX + ATTN */ /* PF */ REG_WR(sc, (IGU_REG_STATISTIC_NUM_MESSAGE_SENT + (BXE_IGU_STAS_MSG_VF_CNT * 4) + ((CHIP_IS_MODE_4_PORT(sc) ? SC_FUNC(sc) : SC_VN(sc)) * 4)), 0); /* ATTN */ REG_WR(sc, (IGU_REG_STATISTIC_NUM_MESSAGE_SENT + (BXE_IGU_STAS_MSG_VF_CNT * 4) + (BXE_IGU_STAS_MSG_PF_CNT * 4) + ((CHIP_IS_MODE_4_PORT(sc) ? SC_FUNC(sc) : SC_VN(sc)) * 4)), 0); } /* function setup flags */ flags = (FUNC_FLG_STATS | FUNC_FLG_LEADING | FUNC_FLG_SPQ); /* * This flag is relevant for E1x only. * E2 doesn't have a TPA configuration in a function level. */ flags |= (if_getcapenable(sc->ifp) & IFCAP_LRO) ? FUNC_FLG_TPA : 0; func_init.func_flgs = flags; func_init.pf_id = SC_FUNC(sc); func_init.func_id = SC_FUNC(sc); func_init.spq_map = sc->spq_dma.paddr; func_init.spq_prod = sc->spq_prod_idx; bxe_func_init(sc, &func_init); memset(&sc->cmng, 0, sizeof(struct cmng_struct_per_port)); /* * Congestion management values depend on the link rate. * There is no active link so initial link rate is set to 10Gbps. * When the link comes up the congestion management values are * re-calculated according to the actual link rate. */ sc->link_vars.line_speed = SPEED_10000; bxe_cmng_fns_init(sc, TRUE, bxe_get_cmng_fns_mode(sc)); /* Only the PMF sets the HW */ if (sc->port.pmf) { storm_memset_cmng(sc, &sc->cmng, SC_PORT(sc)); } /* init Event Queue - PCI bus guarantees correct endainity */ eq_data.base_addr.hi = U64_HI(sc->eq_dma.paddr); eq_data.base_addr.lo = U64_LO(sc->eq_dma.paddr); eq_data.producer = sc->eq_prod; eq_data.index_id = HC_SP_INDEX_EQ_CONS; eq_data.sb_id = DEF_SB_ID; storm_memset_eq_data(sc, &eq_data, SC_FUNC(sc)); } static void bxe_hc_int_enable(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t addr = (port) ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0; uint32_t val = REG_RD(sc, addr); uint8_t msix = (sc->interrupt_mode == INTR_MODE_MSIX) ? TRUE : FALSE; uint8_t single_msix = ((sc->interrupt_mode == INTR_MODE_MSIX) && (sc->intr_count == 1)) ? TRUE : FALSE; uint8_t msi = (sc->interrupt_mode == INTR_MODE_MSI) ? TRUE : FALSE; if (msix) { val &= ~(HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0); val |= (HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); if (single_msix) { val |= HC_CONFIG_0_REG_SINGLE_ISR_EN_0; } } else if (msi) { val &= ~HC_CONFIG_0_REG_INT_LINE_EN_0; val |= (HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); } else { val |= (HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); if (!CHIP_IS_E1(sc)) { BLOGD(sc, DBG_INTR, "write %x to HC %d (addr 0x%x)\n", val, port, addr); REG_WR(sc, addr, val); val &= ~HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0; } } if (CHIP_IS_E1(sc)) { REG_WR(sc, (HC_REG_INT_MASK + port*4), 0x1FFFF); } BLOGD(sc, DBG_INTR, "write %x to HC %d (addr 0x%x) mode %s\n", val, port, addr, ((msix) ? "MSI-X" : ((msi) ? "MSI" : "INTx"))); REG_WR(sc, addr, val); /* ensure that HC_CONFIG is written before leading/trailing edge config */ mb(); if (!CHIP_IS_E1(sc)) { /* init leading/trailing edge */ if (IS_MF(sc)) { val = (0xee0f | (1 << (SC_VN(sc) + 4))); if (sc->port.pmf) { /* enable nig and gpio3 attention */ val |= 0x1100; } } else { val = 0xffff; } REG_WR(sc, (HC_REG_TRAILING_EDGE_0 + port*8), val); REG_WR(sc, (HC_REG_LEADING_EDGE_0 + port*8), val); } /* make sure that interrupts are indeed enabled from here on */ mb(); } static void bxe_igu_int_enable(struct bxe_softc *sc) { uint32_t val; uint8_t msix = (sc->interrupt_mode == INTR_MODE_MSIX) ? TRUE : FALSE; uint8_t single_msix = ((sc->interrupt_mode == INTR_MODE_MSIX) && (sc->intr_count == 1)) ? TRUE : FALSE; uint8_t msi = (sc->interrupt_mode == INTR_MODE_MSI) ? TRUE : FALSE; val = REG_RD(sc, IGU_REG_PF_CONFIGURATION); if (msix) { val &= ~(IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_SINGLE_ISR_EN); val |= (IGU_PF_CONF_MSI_MSIX_EN | IGU_PF_CONF_ATTN_BIT_EN); if (single_msix) { val |= IGU_PF_CONF_SINGLE_ISR_EN; } } else if (msi) { val &= ~IGU_PF_CONF_INT_LINE_EN; val |= (IGU_PF_CONF_MSI_MSIX_EN | IGU_PF_CONF_ATTN_BIT_EN | IGU_PF_CONF_SINGLE_ISR_EN); } else { val &= ~IGU_PF_CONF_MSI_MSIX_EN; val |= (IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_ATTN_BIT_EN | IGU_PF_CONF_SINGLE_ISR_EN); } /* clean previous status - need to configure igu prior to ack*/ if ((!msix) || single_msix) { REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); bxe_ack_int(sc); } val |= IGU_PF_CONF_FUNC_EN; BLOGD(sc, DBG_INTR, "write 0x%x to IGU mode %s\n", val, ((msix) ? "MSI-X" : ((msi) ? "MSI" : "INTx"))); REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); mb(); /* init leading/trailing edge */ if (IS_MF(sc)) { val = (0xee0f | (1 << (SC_VN(sc) + 4))); if (sc->port.pmf) { /* enable nig and gpio3 attention */ val |= 0x1100; } } else { val = 0xffff; } REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, val); REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, val); /* make sure that interrupts are indeed enabled from here on */ mb(); } static void bxe_int_enable(struct bxe_softc *sc) { if (sc->devinfo.int_block == INT_BLOCK_HC) { bxe_hc_int_enable(sc); } else { bxe_igu_int_enable(sc); } } static void bxe_hc_int_disable(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t addr = (port) ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0; uint32_t val = REG_RD(sc, addr); /* * In E1 we must use only PCI configuration space to disable MSI/MSIX * capablility. It's forbidden to disable IGU_PF_CONF_MSI_MSIX_EN in HC * block */ if (CHIP_IS_E1(sc)) { /* * Since IGU_PF_CONF_MSI_MSIX_EN still always on use mask register * to prevent from HC sending interrupts after we exit the function */ REG_WR(sc, (HC_REG_INT_MASK + port*4), 0); val &= ~(HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); } else { val &= ~(HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); } BLOGD(sc, DBG_INTR, "write %x to HC %d (addr 0x%x)\n", val, port, addr); /* flush all outstanding writes */ mb(); REG_WR(sc, addr, val); if (REG_RD(sc, addr) != val) { BLOGE(sc, "proper val not read from HC IGU!\n"); } } static void bxe_igu_int_disable(struct bxe_softc *sc) { uint32_t val = REG_RD(sc, IGU_REG_PF_CONFIGURATION); val &= ~(IGU_PF_CONF_MSI_MSIX_EN | IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_ATTN_BIT_EN); BLOGD(sc, DBG_INTR, "write %x to IGU\n", val); /* flush all outstanding writes */ mb(); REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); if (REG_RD(sc, IGU_REG_PF_CONFIGURATION) != val) { BLOGE(sc, "proper val not read from IGU!\n"); } } static void bxe_int_disable(struct bxe_softc *sc) { if (sc->devinfo.int_block == INT_BLOCK_HC) { bxe_hc_int_disable(sc); } else { bxe_igu_int_disable(sc); } } static void bxe_nic_init(struct bxe_softc *sc, int load_code) { int i; for (i = 0; i < sc->num_queues; i++) { bxe_init_eth_fp(sc, i); } rmb(); /* ensure status block indices were read */ bxe_init_rx_rings(sc); bxe_init_tx_rings(sc); if (IS_VF(sc)) { return; } /* initialize MOD_ABS interrupts */ elink_init_mod_abs_int(sc, &sc->link_vars, sc->devinfo.chip_id, sc->devinfo.shmem_base, sc->devinfo.shmem2_base, SC_PORT(sc)); bxe_init_def_sb(sc); bxe_update_dsb_idx(sc); bxe_init_sp_ring(sc); bxe_init_eq_ring(sc); bxe_init_internal(sc, load_code); bxe_pf_init(sc); bxe_stats_init(sc); /* flush all before enabling interrupts */ mb(); bxe_int_enable(sc); /* check for SPIO5 */ bxe_attn_int_deasserted0(sc, REG_RD(sc, (MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + SC_PORT(sc)*4)) & AEU_INPUTS_ATTN_BITS_SPIO5); } static inline void bxe_init_objs(struct bxe_softc *sc) { /* mcast rules must be added to tx if tx switching is enabled */ ecore_obj_type o_type = (sc->flags & BXE_TX_SWITCHING) ? ECORE_OBJ_TYPE_RX_TX : ECORE_OBJ_TYPE_RX; /* RX_MODE controlling object */ ecore_init_rx_mode_obj(sc, &sc->rx_mode_obj); /* multicast configuration controlling object */ ecore_init_mcast_obj(sc, &sc->mcast_obj, sc->fp[0].cl_id, sc->fp[0].index, SC_FUNC(sc), SC_FUNC(sc), BXE_SP(sc, mcast_rdata), BXE_SP_MAPPING(sc, mcast_rdata), ECORE_FILTER_MCAST_PENDING, &sc->sp_state, o_type); /* Setup CAM credit pools */ ecore_init_mac_credit_pool(sc, &sc->macs_pool, SC_FUNC(sc), CHIP_IS_E1x(sc) ? VNICS_PER_PORT(sc) : VNICS_PER_PATH(sc)); ecore_init_vlan_credit_pool(sc, &sc->vlans_pool, SC_ABS_FUNC(sc) >> 1, CHIP_IS_E1x(sc) ? VNICS_PER_PORT(sc) : VNICS_PER_PATH(sc)); /* RSS configuration object */ ecore_init_rss_config_obj(sc, &sc->rss_conf_obj, sc->fp[0].cl_id, sc->fp[0].index, SC_FUNC(sc), SC_FUNC(sc), BXE_SP(sc, rss_rdata), BXE_SP_MAPPING(sc, rss_rdata), ECORE_FILTER_RSS_CONF_PENDING, &sc->sp_state, ECORE_OBJ_TYPE_RX); } /* * Initialize the function. This must be called before sending CLIENT_SETUP * for the first client. */ static inline int bxe_func_start(struct bxe_softc *sc) { struct ecore_func_state_params func_params = { NULL }; struct ecore_func_start_params *start_params = &func_params.params.start; /* Prepare parameters for function state transitions */ bit_set(&func_params.ramrod_flags, RAMROD_COMP_WAIT); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_START; /* Function parameters */ start_params->mf_mode = sc->devinfo.mf_info.mf_mode; start_params->sd_vlan_tag = OVLAN(sc); if (CHIP_IS_E2(sc) || CHIP_IS_E3(sc)) { start_params->network_cos_mode = STATIC_COS; } else { /* CHIP_IS_E1X */ start_params->network_cos_mode = FW_WRR; } //start_params->gre_tunnel_mode = 0; //start_params->gre_tunnel_rss = 0; return (ecore_func_state_change(sc, &func_params)); } static int bxe_set_power_state(struct bxe_softc *sc, uint8_t state) { uint16_t pmcsr; /* If there is no power capability, silently succeed */ if (!(sc->devinfo.pcie_cap_flags & BXE_PM_CAPABLE_FLAG)) { BLOGW(sc, "No power capability\n"); return (0); } pmcsr = pci_read_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), 2); switch (state) { case PCI_PM_D0: pci_write_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), ((pmcsr & ~PCIM_PSTAT_DMASK) | PCIM_PSTAT_PME), 2); if (pmcsr & PCIM_PSTAT_DMASK) { /* delay required during transition out of D3hot */ DELAY(20000); } break; case PCI_PM_D3hot: /* XXX if there are other clients above don't shut down the power */ /* don't shut down the power for emulation and FPGA */ if (CHIP_REV_IS_SLOW(sc)) { return (0); } pmcsr &= ~PCIM_PSTAT_DMASK; pmcsr |= PCIM_PSTAT_D3; if (sc->wol) { pmcsr |= PCIM_PSTAT_PMEENABLE; } pci_write_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), pmcsr, 4); /* * No more memory access after this point until device is brought back * to D0 state. */ break; default: BLOGE(sc, "Can't support PCI power state = 0x%x pmcsr 0x%x\n", state, pmcsr); return (-1); } return (0); } /* return true if succeeded to acquire the lock */ static uint8_t bxe_trylock_hw_lock(struct bxe_softc *sc, uint32_t resource) { uint32_t lock_status; uint32_t resource_bit = (1 << resource); int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; BLOGD(sc, DBG_LOAD, "Trying to take a resource lock 0x%x\n", resource); /* Validating that the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { BLOGD(sc, DBG_LOAD, "resource(0x%x) > HW_LOCK_MAX_RESOURCE_VALUE(0x%x)\n", resource, HW_LOCK_MAX_RESOURCE_VALUE); return (FALSE); } if (func <= 5) { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_1 + func*8); } else { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_7 + (func - 6)*8); } /* try to acquire the lock */ REG_WR(sc, hw_lock_control_reg + 4, resource_bit); lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { return (TRUE); } BLOGE(sc, "Failed to get a resource lock 0x%x func %d " "lock_status 0x%x resource_bit 0x%x\n", resource, func, lock_status, resource_bit); return (FALSE); } /* * Get the recovery leader resource id according to the engine this function * belongs to. Currently only only 2 engines is supported. */ static int bxe_get_leader_lock_resource(struct bxe_softc *sc) { if (SC_PATH(sc)) { return (HW_LOCK_RESOURCE_RECOVERY_LEADER_1); } else { return (HW_LOCK_RESOURCE_RECOVERY_LEADER_0); } } /* try to acquire a leader lock for current engine */ static uint8_t bxe_trylock_leader_lock(struct bxe_softc *sc) { return (bxe_trylock_hw_lock(sc, bxe_get_leader_lock_resource(sc))); } static int bxe_release_leader_lock(struct bxe_softc *sc) { return (bxe_release_hw_lock(sc, bxe_get_leader_lock_resource(sc))); } /* close gates #2, #3 and #4 */ static void bxe_set_234_gates(struct bxe_softc *sc, uint8_t close) { uint32_t val; /* gates #2 and #4a are closed/opened for "not E1" only */ if (!CHIP_IS_E1(sc)) { /* #4 */ REG_WR(sc, PXP_REG_HST_DISCARD_DOORBELLS, !!close); /* #2 */ REG_WR(sc, PXP_REG_HST_DISCARD_INTERNAL_WRITES, !!close); } /* #3 */ if (CHIP_IS_E1x(sc)) { /* prevent interrupts from HC on both ports */ val = REG_RD(sc, HC_REG_CONFIG_1); REG_WR(sc, HC_REG_CONFIG_1, (!close) ? (val | HC_CONFIG_1_REG_BLOCK_DISABLE_1) : (val & ~(uint32_t)HC_CONFIG_1_REG_BLOCK_DISABLE_1)); val = REG_RD(sc, HC_REG_CONFIG_0); REG_WR(sc, HC_REG_CONFIG_0, (!close) ? (val | HC_CONFIG_0_REG_BLOCK_DISABLE_0) : (val & ~(uint32_t)HC_CONFIG_0_REG_BLOCK_DISABLE_0)); } else { /* Prevent incoming interrupts in IGU */ val = REG_RD(sc, IGU_REG_BLOCK_CONFIGURATION); REG_WR(sc, IGU_REG_BLOCK_CONFIGURATION, (!close) ? (val | IGU_BLOCK_CONFIGURATION_REG_BLOCK_ENABLE) : (val & ~(uint32_t)IGU_BLOCK_CONFIGURATION_REG_BLOCK_ENABLE)); } BLOGD(sc, DBG_LOAD, "%s gates #2, #3 and #4\n", close ? "closing" : "opening"); wmb(); } /* poll for pending writes bit, it should get cleared in no more than 1s */ static int bxe_er_poll_igu_vq(struct bxe_softc *sc) { uint32_t cnt = 1000; uint32_t pend_bits = 0; do { pend_bits = REG_RD(sc, IGU_REG_PENDING_BITS_STATUS); if (pend_bits == 0) { break; } DELAY(1000); } while (--cnt > 0); if (cnt == 0) { BLOGE(sc, "Still pending IGU requests bits=0x%08x!\n", pend_bits); return (-1); } return (0); } #define SHARED_MF_CLP_MAGIC 0x80000000 /* 'magic' bit */ static void bxe_clp_reset_prep(struct bxe_softc *sc, uint32_t *magic_val) { /* Do some magic... */ uint32_t val = MFCFG_RD(sc, shared_mf_config.clp_mb); *magic_val = val & SHARED_MF_CLP_MAGIC; MFCFG_WR(sc, shared_mf_config.clp_mb, val | SHARED_MF_CLP_MAGIC); } /* restore the value of the 'magic' bit */ static void bxe_clp_reset_done(struct bxe_softc *sc, uint32_t magic_val) { /* Restore the 'magic' bit value... */ uint32_t val = MFCFG_RD(sc, shared_mf_config.clp_mb); MFCFG_WR(sc, shared_mf_config.clp_mb, (val & (~SHARED_MF_CLP_MAGIC)) | magic_val); } /* prepare for MCP reset, takes care of CLP configurations */ static void bxe_reset_mcp_prep(struct bxe_softc *sc, uint32_t *magic_val) { uint32_t shmem; uint32_t validity_offset; /* set `magic' bit in order to save MF config */ if (!CHIP_IS_E1(sc)) { bxe_clp_reset_prep(sc, magic_val); } /* get shmem offset */ shmem = REG_RD(sc, MISC_REG_SHARED_MEM_ADDR); validity_offset = offsetof(struct shmem_region, validity_map[SC_PORT(sc)]); /* Clear validity map flags */ if (shmem > 0) { REG_WR(sc, shmem + validity_offset, 0); } } #define MCP_TIMEOUT 5000 /* 5 seconds (in ms) */ #define MCP_ONE_TIMEOUT 100 /* 100 ms */ static void bxe_mcp_wait_one(struct bxe_softc *sc) { /* special handling for emulation and FPGA (10 times longer) */ if (CHIP_REV_IS_SLOW(sc)) { DELAY((MCP_ONE_TIMEOUT*10) * 1000); } else { DELAY((MCP_ONE_TIMEOUT) * 1000); } } /* initialize shmem_base and waits for validity signature to appear */ static int bxe_init_shmem(struct bxe_softc *sc) { int cnt = 0; uint32_t val = 0; do { sc->devinfo.shmem_base = sc->link_params.shmem_base = REG_RD(sc, MISC_REG_SHARED_MEM_ADDR); if (sc->devinfo.shmem_base) { val = SHMEM_RD(sc, validity_map[SC_PORT(sc)]); if (val & SHR_MEM_VALIDITY_MB) return (0); } bxe_mcp_wait_one(sc); } while (cnt++ < (MCP_TIMEOUT / MCP_ONE_TIMEOUT)); BLOGE(sc, "BAD MCP validity signature\n"); return (-1); } static int bxe_reset_mcp_comp(struct bxe_softc *sc, uint32_t magic_val) { int rc = bxe_init_shmem(sc); /* Restore the `magic' bit value */ if (!CHIP_IS_E1(sc)) { bxe_clp_reset_done(sc, magic_val); } return (rc); } static void bxe_pxp_prep(struct bxe_softc *sc) { if (!CHIP_IS_E1(sc)) { REG_WR(sc, PXP2_REG_RD_START_INIT, 0); REG_WR(sc, PXP2_REG_RQ_RBC_DONE, 0); wmb(); } } /* * Reset the whole chip except for: * - PCIE core * - PCI Glue, PSWHST, PXP/PXP2 RF (all controlled by one reset bit) * - IGU * - MISC (including AEU) * - GRC * - RBCN, RBCP */ static void bxe_process_kill_chip_reset(struct bxe_softc *sc, uint8_t global) { uint32_t not_reset_mask1, reset_mask1, not_reset_mask2, reset_mask2; uint32_t global_bits2, stay_reset2; /* * Bits that have to be set in reset_mask2 if we want to reset 'global' * (per chip) blocks. */ global_bits2 = MISC_REGISTERS_RESET_REG_2_RST_MCP_N_RESET_CMN_CPU | MISC_REGISTERS_RESET_REG_2_RST_MCP_N_RESET_CMN_CORE; /* * Don't reset the following blocks. * Important: per port blocks (such as EMAC, BMAC, UMAC) can't be * reset, as in 4 port device they might still be owned * by the MCP (there is only one leader per path). */ not_reset_mask1 = MISC_REGISTERS_RESET_REG_1_RST_HC | MISC_REGISTERS_RESET_REG_1_RST_PXPV | MISC_REGISTERS_RESET_REG_1_RST_PXP; not_reset_mask2 = MISC_REGISTERS_RESET_REG_2_RST_PCI_MDIO | MISC_REGISTERS_RESET_REG_2_RST_EMAC0_HARD_CORE | MISC_REGISTERS_RESET_REG_2_RST_EMAC1_HARD_CORE | MISC_REGISTERS_RESET_REG_2_RST_MISC_CORE | MISC_REGISTERS_RESET_REG_2_RST_RBCN | MISC_REGISTERS_RESET_REG_2_RST_GRC | MISC_REGISTERS_RESET_REG_2_RST_MCP_N_RESET_REG_HARD_CORE | MISC_REGISTERS_RESET_REG_2_RST_MCP_N_HARD_CORE_RST_B | MISC_REGISTERS_RESET_REG_2_RST_ATC | MISC_REGISTERS_RESET_REG_2_PGLC | MISC_REGISTERS_RESET_REG_2_RST_BMAC0 | MISC_REGISTERS_RESET_REG_2_RST_BMAC1 | MISC_REGISTERS_RESET_REG_2_RST_EMAC0 | MISC_REGISTERS_RESET_REG_2_RST_EMAC1 | MISC_REGISTERS_RESET_REG_2_UMAC0 | MISC_REGISTERS_RESET_REG_2_UMAC1; /* * Keep the following blocks in reset: * - all xxMACs are handled by the elink code. */ stay_reset2 = MISC_REGISTERS_RESET_REG_2_XMAC | MISC_REGISTERS_RESET_REG_2_XMAC_SOFT; /* Full reset masks according to the chip */ reset_mask1 = 0xffffffff; if (CHIP_IS_E1(sc)) reset_mask2 = 0xffff; else if (CHIP_IS_E1H(sc)) reset_mask2 = 0x1ffff; else if (CHIP_IS_E2(sc)) reset_mask2 = 0xfffff; else /* CHIP_IS_E3 */ reset_mask2 = 0x3ffffff; /* Don't reset global blocks unless we need to */ if (!global) reset_mask2 &= ~global_bits2; /* * In case of attention in the QM, we need to reset PXP * (MISC_REGISTERS_RESET_REG_2_RST_PXP_RQ_RD_WR) before QM * because otherwise QM reset would release 'close the gates' shortly * before resetting the PXP, then the PSWRQ would send a write * request to PGLUE. Then when PXP is reset, PGLUE would try to * read the payload data from PSWWR, but PSWWR would not * respond. The write queue in PGLUE would stuck, dmae commands * would not return. Therefore it's important to reset the second * reset register (containing the * MISC_REGISTERS_RESET_REG_2_RST_PXP_RQ_RD_WR bit) before the * first one (containing the MISC_REGISTERS_RESET_REG_1_RST_QM * bit). */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR, reset_mask2 & (~not_reset_mask2)); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, reset_mask1 & (~not_reset_mask1)); mb(); wmb(); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, reset_mask2 & (~stay_reset2)); mb(); wmb(); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, reset_mask1); wmb(); } static int bxe_process_kill(struct bxe_softc *sc, uint8_t global) { int cnt = 1000; uint32_t val = 0; uint32_t sr_cnt, blk_cnt, port_is_idle_0, port_is_idle_1, pgl_exp_rom2; uint32_t tags_63_32 = 0; /* Empty the Tetris buffer, wait for 1s */ do { sr_cnt = REG_RD(sc, PXP2_REG_RD_SR_CNT); blk_cnt = REG_RD(sc, PXP2_REG_RD_BLK_CNT); port_is_idle_0 = REG_RD(sc, PXP2_REG_RD_PORT_IS_IDLE_0); port_is_idle_1 = REG_RD(sc, PXP2_REG_RD_PORT_IS_IDLE_1); pgl_exp_rom2 = REG_RD(sc, PXP2_REG_PGL_EXP_ROM2); if (CHIP_IS_E3(sc)) { tags_63_32 = REG_RD(sc, PGLUE_B_REG_TAGS_63_32); } if ((sr_cnt == 0x7e) && (blk_cnt == 0xa0) && ((port_is_idle_0 & 0x1) == 0x1) && ((port_is_idle_1 & 0x1) == 0x1) && (pgl_exp_rom2 == 0xffffffff) && (!CHIP_IS_E3(sc) || (tags_63_32 == 0xffffffff))) break; DELAY(1000); } while (cnt-- > 0); if (cnt <= 0) { BLOGE(sc, "ERROR: Tetris buffer didn't get empty or there " "are still outstanding read requests after 1s! " "sr_cnt=0x%08x, blk_cnt=0x%08x, port_is_idle_0=0x%08x, " "port_is_idle_1=0x%08x, pgl_exp_rom2=0x%08x\n", sr_cnt, blk_cnt, port_is_idle_0, port_is_idle_1, pgl_exp_rom2); return (-1); } mb(); /* Close gates #2, #3 and #4 */ bxe_set_234_gates(sc, TRUE); /* Poll for IGU VQs for 57712 and newer chips */ if (!CHIP_IS_E1x(sc) && bxe_er_poll_igu_vq(sc)) { return (-1); } /* XXX indicate that "process kill" is in progress to MCP */ /* clear "unprepared" bit */ REG_WR(sc, MISC_REG_UNPREPARED, 0); mb(); /* Make sure all is written to the chip before the reset */ wmb(); /* * Wait for 1ms to empty GLUE and PCI-E core queues, * PSWHST, GRC and PSWRD Tetris buffer. */ DELAY(1000); /* Prepare to chip reset: */ /* MCP */ if (global) { bxe_reset_mcp_prep(sc, &val); } /* PXP */ bxe_pxp_prep(sc); mb(); /* reset the chip */ bxe_process_kill_chip_reset(sc, global); mb(); /* clear errors in PGB */ if (!CHIP_IS_E1(sc)) REG_WR(sc, PGLUE_B_REG_LATCHED_ERRORS_CLR, 0x7f); /* Recover after reset: */ /* MCP */ if (global && bxe_reset_mcp_comp(sc, val)) { return (-1); } /* XXX add resetting the NO_MCP mode DB here */ /* Open the gates #2, #3 and #4 */ bxe_set_234_gates(sc, FALSE); /* XXX * IGU/AEU preparation bring back the AEU/IGU to a reset state * re-enable attentions */ return (0); } static int bxe_leader_reset(struct bxe_softc *sc) { int rc = 0; uint8_t global = bxe_reset_is_global(sc); uint32_t load_code; /* * If not going to reset MCP, load "fake" driver to reset HW while * driver is owner of the HW. */ if (!global && !BXE_NOMCP(sc)) { load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_REQ, DRV_MSG_CODE_LOAD_REQ_WITH_LFA); if (!load_code) { BLOGE(sc, "MCP response failure, aborting\n"); rc = -1; goto exit_leader_reset; } if ((load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) && (load_code != FW_MSG_CODE_DRV_LOAD_COMMON)) { BLOGE(sc, "MCP unexpected response, aborting\n"); rc = -1; goto exit_leader_reset2; } load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); if (!load_code) { BLOGE(sc, "MCP response failure, aborting\n"); rc = -1; goto exit_leader_reset2; } } /* try to recover after the failure */ if (bxe_process_kill(sc, global)) { BLOGE(sc, "Something bad occurred on engine %d!\n", SC_PATH(sc)); rc = -1; goto exit_leader_reset2; } /* * Clear the RESET_IN_PROGRESS and RESET_GLOBAL bits and update the driver * state. */ bxe_set_reset_done(sc); if (global) { bxe_clear_reset_global(sc); } exit_leader_reset2: /* unload "fake driver" if it was loaded */ if (!global && !BXE_NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0); bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, 0); } exit_leader_reset: sc->is_leader = 0; bxe_release_leader_lock(sc); mb(); return (rc); } /* * prepare INIT transition, parameters configured: * - HC configuration * - Queue's CDU context */ static void bxe_pf_q_prep_init(struct bxe_softc *sc, struct bxe_fastpath *fp, struct ecore_queue_init_params *init_params) { uint8_t cos; int cxt_index, cxt_offset; bxe_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags); bxe_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags); bxe_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags); bxe_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags); /* HC rate */ init_params->rx.hc_rate = sc->hc_rx_ticks ? (1000000 / sc->hc_rx_ticks) : 0; init_params->tx.hc_rate = sc->hc_tx_ticks ? (1000000 / sc->hc_tx_ticks) : 0; /* FW SB ID */ init_params->rx.fw_sb_id = init_params->tx.fw_sb_id = fp->fw_sb_id; /* CQ index among the SB indices */ init_params->rx.sb_cq_index = HC_INDEX_ETH_RX_CQ_CONS; init_params->tx.sb_cq_index = HC_INDEX_ETH_FIRST_TX_CQ_CONS; /* set maximum number of COSs supported by this queue */ init_params->max_cos = sc->max_cos; BLOGD(sc, DBG_LOAD, "fp %d setting queue params max cos to %d\n", fp->index, init_params->max_cos); /* set the context pointers queue object */ for (cos = FIRST_TX_COS_INDEX; cos < init_params->max_cos; cos++) { /* XXX change index/cid here if ever support multiple tx CoS */ /* fp->txdata[cos]->cid */ cxt_index = fp->index / ILT_PAGE_CIDS; cxt_offset = fp->index - (cxt_index * ILT_PAGE_CIDS); init_params->cxts[cos] = &sc->context[cxt_index].vcxt[cxt_offset].eth; } } /* set flags that are common for the Tx-only and not normal connections */ static unsigned long bxe_get_common_flags(struct bxe_softc *sc, struct bxe_fastpath *fp, uint8_t zero_stats) { unsigned long flags = 0; /* PF driver will always initialize the Queue to an ACTIVE state */ bxe_set_bit(ECORE_Q_FLG_ACTIVE, &flags); /* * tx only connections collect statistics (on the same index as the * parent connection). The statistics are zeroed when the parent * connection is initialized. */ bxe_set_bit(ECORE_Q_FLG_STATS, &flags); if (zero_stats) { bxe_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags); } /* * tx only connections can support tx-switching, though their * CoS-ness doesn't survive the loopback */ if (sc->flags & BXE_TX_SWITCHING) { bxe_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags); } bxe_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags); return (flags); } static unsigned long bxe_get_q_flags(struct bxe_softc *sc, struct bxe_fastpath *fp, uint8_t leading) { unsigned long flags = 0; if (IS_MF_SD(sc)) { bxe_set_bit(ECORE_Q_FLG_OV, &flags); } if (if_getcapenable(sc->ifp) & IFCAP_LRO) { bxe_set_bit(ECORE_Q_FLG_TPA, &flags); #if __FreeBSD_version >= 800000 bxe_set_bit(ECORE_Q_FLG_TPA_IPV6, &flags); #endif } if (leading) { bxe_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags); bxe_set_bit(ECORE_Q_FLG_MCAST, &flags); } bxe_set_bit(ECORE_Q_FLG_VLAN, &flags); /* merge with common flags */ return (flags | bxe_get_common_flags(sc, fp, TRUE)); } static void bxe_pf_q_prep_general(struct bxe_softc *sc, struct bxe_fastpath *fp, struct ecore_general_setup_params *gen_init, uint8_t cos) { gen_init->stat_id = bxe_stats_id(fp); gen_init->spcl_id = fp->cl_id; gen_init->mtu = sc->mtu; gen_init->cos = cos; } static void bxe_pf_rx_q_prep(struct bxe_softc *sc, struct bxe_fastpath *fp, struct rxq_pause_params *pause, struct ecore_rxq_setup_params *rxq_init) { uint8_t max_sge = 0; uint16_t sge_sz = 0; uint16_t tpa_agg_size = 0; pause->sge_th_lo = SGE_TH_LO(sc); pause->sge_th_hi = SGE_TH_HI(sc); /* validate SGE ring has enough to cross high threshold */ if (sc->dropless_fc && (pause->sge_th_hi + FW_PREFETCH_CNT) > (RX_SGE_USABLE_PER_PAGE * RX_SGE_NUM_PAGES)) { BLOGW(sc, "sge ring threshold limit\n"); } /* minimum max_aggregation_size is 2*MTU (two full buffers) */ tpa_agg_size = (2 * sc->mtu); if (tpa_agg_size < sc->max_aggregation_size) { tpa_agg_size = sc->max_aggregation_size; } max_sge = SGE_PAGE_ALIGN(sc->mtu) >> SGE_PAGE_SHIFT; max_sge = ((max_sge + PAGES_PER_SGE - 1) & (~(PAGES_PER_SGE - 1))) >> PAGES_PER_SGE_SHIFT; sge_sz = (uint16_t)min(SGE_PAGES, 0xffff); /* pause - not for e1 */ if (!CHIP_IS_E1(sc)) { pause->bd_th_lo = BD_TH_LO(sc); pause->bd_th_hi = BD_TH_HI(sc); pause->rcq_th_lo = RCQ_TH_LO(sc); pause->rcq_th_hi = RCQ_TH_HI(sc); /* validate rings have enough entries to cross high thresholds */ if (sc->dropless_fc && pause->bd_th_hi + FW_PREFETCH_CNT > sc->rx_ring_size) { BLOGW(sc, "rx bd ring threshold limit\n"); } if (sc->dropless_fc && pause->rcq_th_hi + FW_PREFETCH_CNT > RCQ_NUM_PAGES * RCQ_USABLE_PER_PAGE) { BLOGW(sc, "rcq ring threshold limit\n"); } pause->pri_map = 1; } /* rxq setup */ rxq_init->dscr_map = fp->rx_dma.paddr; rxq_init->sge_map = fp->rx_sge_dma.paddr; rxq_init->rcq_map = fp->rcq_dma.paddr; rxq_init->rcq_np_map = (fp->rcq_dma.paddr + BCM_PAGE_SIZE); /* * This should be a maximum number of data bytes that may be * placed on the BD (not including paddings). */ rxq_init->buf_sz = (fp->rx_buf_size - IP_HEADER_ALIGNMENT_PADDING); rxq_init->cl_qzone_id = fp->cl_qzone_id; rxq_init->tpa_agg_sz = tpa_agg_size; rxq_init->sge_buf_sz = sge_sz; rxq_init->max_sges_pkt = max_sge; rxq_init->rss_engine_id = SC_FUNC(sc); rxq_init->mcast_engine_id = SC_FUNC(sc); /* * Maximum number or simultaneous TPA aggregation for this Queue. * For PF Clients it should be the maximum available number. * VF driver(s) may want to define it to a smaller value. */ rxq_init->max_tpa_queues = MAX_AGG_QS(sc); rxq_init->cache_line_log = BXE_RX_ALIGN_SHIFT; rxq_init->fw_sb_id = fp->fw_sb_id; rxq_init->sb_cq_index = HC_INDEX_ETH_RX_CQ_CONS; /* * configure silent vlan removal * if multi function mode is afex, then mask default vlan */ if (IS_MF_AFEX(sc)) { rxq_init->silent_removal_value = sc->devinfo.mf_info.afex_def_vlan_tag; rxq_init->silent_removal_mask = EVL_VLID_MASK; } } static void bxe_pf_tx_q_prep(struct bxe_softc *sc, struct bxe_fastpath *fp, struct ecore_txq_setup_params *txq_init, uint8_t cos) { /* * XXX If multiple CoS is ever supported then each fastpath structure * will need to maintain tx producer/consumer/dma/etc values *per* CoS. * fp->txdata[cos]->tx_dma.paddr; */ txq_init->dscr_map = fp->tx_dma.paddr; txq_init->sb_cq_index = HC_INDEX_ETH_FIRST_TX_CQ_CONS + cos; txq_init->traffic_type = LLFC_TRAFFIC_TYPE_NW; txq_init->fw_sb_id = fp->fw_sb_id; /* * set the TSS leading client id for TX classfication to the * leading RSS client id */ txq_init->tss_leading_cl_id = BXE_FP(sc, 0, cl_id); } /* * This function performs 2 steps in a queue state machine: * 1) RESET->INIT * 2) INIT->SETUP */ static int bxe_setup_queue(struct bxe_softc *sc, struct bxe_fastpath *fp, uint8_t leading) { struct ecore_queue_state_params q_params = { NULL }; struct ecore_queue_setup_params *setup_params = &q_params.params.setup; int rc; BLOGD(sc, DBG_LOAD, "setting up queue %d\n", fp->index); bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_ENABLE, 0); q_params.q_obj = &BXE_SP_OBJ(sc, fp).q_obj; /* we want to wait for completion in this context */ bxe_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); /* prepare the INIT parameters */ bxe_pf_q_prep_init(sc, fp, &q_params.params.init); /* Set the command */ q_params.cmd = ECORE_Q_CMD_INIT; /* Change the state to INIT */ rc = ecore_queue_state_change(sc, &q_params); if (rc) { BLOGE(sc, "Queue(%d) INIT failed rc = %d\n", fp->index, rc); return (rc); } BLOGD(sc, DBG_LOAD, "init complete\n"); /* now move the Queue to the SETUP state */ memset(setup_params, 0, sizeof(*setup_params)); /* set Queue flags */ setup_params->flags = bxe_get_q_flags(sc, fp, leading); /* set general SETUP parameters */ bxe_pf_q_prep_general(sc, fp, &setup_params->gen_params, FIRST_TX_COS_INDEX); bxe_pf_rx_q_prep(sc, fp, &setup_params->pause_params, &setup_params->rxq_params); bxe_pf_tx_q_prep(sc, fp, &setup_params->txq_params, FIRST_TX_COS_INDEX); /* Set the command */ q_params.cmd = ECORE_Q_CMD_SETUP; /* change the state to SETUP */ rc = ecore_queue_state_change(sc, &q_params); if (rc) { BLOGE(sc, "Queue(%d) SETUP failed (rc = %d)\n", fp->index, rc); return (rc); } return (rc); } static int bxe_setup_leading(struct bxe_softc *sc) { return (bxe_setup_queue(sc, &sc->fp[0], TRUE)); } static int bxe_config_rss_pf(struct bxe_softc *sc, struct ecore_rss_config_obj *rss_obj, uint8_t config_hash) { struct ecore_config_rss_params params = { NULL }; int i; /* * Although RSS is meaningless when there is a single HW queue we * still need it enabled in order to have HW Rx hash generated. */ params.rss_obj = rss_obj; bxe_set_bit(RAMROD_COMP_WAIT, ¶ms.ramrod_flags); bxe_set_bit(ECORE_RSS_MODE_REGULAR, ¶ms.rss_flags); /* RSS configuration */ bxe_set_bit(ECORE_RSS_IPV4, ¶ms.rss_flags); bxe_set_bit(ECORE_RSS_IPV4_TCP, ¶ms.rss_flags); bxe_set_bit(ECORE_RSS_IPV6, ¶ms.rss_flags); bxe_set_bit(ECORE_RSS_IPV6_TCP, ¶ms.rss_flags); if (rss_obj->udp_rss_v4) { bxe_set_bit(ECORE_RSS_IPV4_UDP, ¶ms.rss_flags); } if (rss_obj->udp_rss_v6) { bxe_set_bit(ECORE_RSS_IPV6_UDP, ¶ms.rss_flags); } /* Hash bits */ params.rss_result_mask = MULTI_MASK; memcpy(params.ind_table, rss_obj->ind_table, sizeof(params.ind_table)); if (config_hash) { /* RSS keys */ for (i = 0; i < sizeof(params.rss_key) / 4; i++) { params.rss_key[i] = arc4random(); } bxe_set_bit(ECORE_RSS_SET_SRCH, ¶ms.rss_flags); } return (ecore_config_rss(sc, ¶ms)); } static int bxe_config_rss_eth(struct bxe_softc *sc, uint8_t config_hash) { return (bxe_config_rss_pf(sc, &sc->rss_conf_obj, config_hash)); } static int bxe_init_rss_pf(struct bxe_softc *sc) { uint8_t num_eth_queues = BXE_NUM_ETH_QUEUES(sc); int i; /* * Prepare the initial contents of the indirection table if * RSS is enabled */ for (i = 0; i < sizeof(sc->rss_conf_obj.ind_table); i++) { sc->rss_conf_obj.ind_table[i] = (sc->fp->cl_id + (i % num_eth_queues)); } if (sc->udp_rss) { sc->rss_conf_obj.udp_rss_v4 = sc->rss_conf_obj.udp_rss_v6 = 1; } /* * For 57710 and 57711 SEARCHER configuration (rss_keys) is * per-port, so if explicit configuration is needed, do it only * for a PMF. * * For 57712 and newer it's a per-function configuration. */ return (bxe_config_rss_eth(sc, sc->port.pmf || !CHIP_IS_E1x(sc))); } static int bxe_set_mac_one(struct bxe_softc *sc, uint8_t *mac, struct ecore_vlan_mac_obj *obj, uint8_t set, int mac_type, unsigned long *ramrod_flags) { struct ecore_vlan_mac_ramrod_params ramrod_param; int rc; memset(&ramrod_param, 0, sizeof(ramrod_param)); /* fill in general parameters */ ramrod_param.vlan_mac_obj = obj; ramrod_param.ramrod_flags = *ramrod_flags; /* fill a user request section if needed */ if (!bxe_test_bit(RAMROD_CONT, ramrod_flags)) { memcpy(ramrod_param.user_req.u.mac.mac, mac, ETH_ALEN); bxe_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags); /* Set the command: ADD or DEL */ ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD : ECORE_VLAN_MAC_DEL; } rc = ecore_config_vlan_mac(sc, &ramrod_param); if (rc == ECORE_EXISTS) { BLOGD(sc, DBG_SP, "Failed to schedule ADD operations (EEXIST)\n"); /* do not treat adding same MAC as error */ rc = 0; } else if (rc < 0) { BLOGE(sc, "%s MAC failed (%d)\n", (set ? "Set" : "Delete"), rc); } return (rc); } static int bxe_set_eth_mac(struct bxe_softc *sc, uint8_t set) { unsigned long ramrod_flags = 0; BLOGD(sc, DBG_LOAD, "Adding Ethernet MAC\n"); bxe_set_bit(RAMROD_COMP_WAIT, &ramrod_flags); /* Eth MAC is set on RSS leading client (fp[0]) */ return (bxe_set_mac_one(sc, sc->link_params.mac_addr, &sc->sp_objs->mac_obj, set, ECORE_ETH_MAC, &ramrod_flags)); } static int bxe_get_cur_phy_idx(struct bxe_softc *sc) { uint32_t sel_phy_idx = 0; if (sc->link_params.num_phys <= 1) { return (ELINK_INT_PHY); } if (sc->link_vars.link_up) { sel_phy_idx = ELINK_EXT_PHY1; /* In case link is SERDES, check if the ELINK_EXT_PHY2 is the one */ if ((sc->link_vars.link_status & LINK_STATUS_SERDES_LINK) && (sc->link_params.phy[ELINK_EXT_PHY2].supported & ELINK_SUPPORTED_FIBRE)) sel_phy_idx = ELINK_EXT_PHY2; } else { switch (elink_phy_selection(&sc->link_params)) { case PORT_HW_CFG_PHY_SELECTION_HARDWARE_DEFAULT: case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY: case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY: sel_phy_idx = ELINK_EXT_PHY1; break; case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY: case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY: sel_phy_idx = ELINK_EXT_PHY2; break; } } return (sel_phy_idx); } static int bxe_get_link_cfg_idx(struct bxe_softc *sc) { uint32_t sel_phy_idx = bxe_get_cur_phy_idx(sc); /* * The selected activated PHY is always after swapping (in case PHY * swapping is enabled). So when swapping is enabled, we need to reverse * the configuration */ if (sc->link_params.multi_phy_config & PORT_HW_CFG_PHY_SWAPPED_ENABLED) { if (sel_phy_idx == ELINK_EXT_PHY1) sel_phy_idx = ELINK_EXT_PHY2; else if (sel_phy_idx == ELINK_EXT_PHY2) sel_phy_idx = ELINK_EXT_PHY1; } return (ELINK_LINK_CONFIG_IDX(sel_phy_idx)); } static void bxe_set_requested_fc(struct bxe_softc *sc) { /* * Initialize link parameters structure variables * It is recommended to turn off RX FC for jumbo frames * for better performance */ if (CHIP_IS_E1x(sc) && (sc->mtu > 5000)) { sc->link_params.req_fc_auto_adv = ELINK_FLOW_CTRL_TX; } else { sc->link_params.req_fc_auto_adv = ELINK_FLOW_CTRL_BOTH; } } static void bxe_calc_fc_adv(struct bxe_softc *sc) { uint8_t cfg_idx = bxe_get_link_cfg_idx(sc); sc->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause | ADVERTISED_Pause); switch (sc->link_vars.ieee_fc & MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK) { case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH: sc->port.advertising[cfg_idx] |= (ADVERTISED_Asym_Pause | ADVERTISED_Pause); break; case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC: sc->port.advertising[cfg_idx] |= ADVERTISED_Asym_Pause; break; default: break; } } static uint16_t bxe_get_mf_speed(struct bxe_softc *sc) { uint16_t line_speed = sc->link_vars.line_speed; if (IS_MF(sc)) { uint16_t maxCfg = bxe_extract_max_cfg(sc, sc->devinfo.mf_info.mf_config[SC_VN(sc)]); /* calculate the current MAX line speed limit for the MF devices */ if (IS_MF_SI(sc)) { line_speed = (line_speed * maxCfg) / 100; } else { /* SD mode */ uint16_t vn_max_rate = maxCfg * 100; if (vn_max_rate < line_speed) { line_speed = vn_max_rate; } } } return (line_speed); } static void bxe_fill_report_data(struct bxe_softc *sc, struct bxe_link_report_data *data) { uint16_t line_speed = bxe_get_mf_speed(sc); memset(data, 0, sizeof(*data)); /* fill the report data with the effective line speed */ data->line_speed = line_speed; /* Link is down */ if (!sc->link_vars.link_up || (sc->flags & BXE_MF_FUNC_DIS)) { bxe_set_bit(BXE_LINK_REPORT_LINK_DOWN, &data->link_report_flags); } /* Full DUPLEX */ if (sc->link_vars.duplex == DUPLEX_FULL) { bxe_set_bit(BXE_LINK_REPORT_FULL_DUPLEX, &data->link_report_flags); } /* Rx Flow Control is ON */ if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) { bxe_set_bit(BXE_LINK_REPORT_RX_FC_ON, &data->link_report_flags); } /* Tx Flow Control is ON */ if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) { bxe_set_bit(BXE_LINK_REPORT_TX_FC_ON, &data->link_report_flags); } } /* report link status to OS, should be called under phy_lock */ static void bxe_link_report_locked(struct bxe_softc *sc) { struct bxe_link_report_data cur_data; /* reread mf_cfg */ if (IS_PF(sc) && !CHIP_IS_E1(sc)) { bxe_read_mf_cfg(sc); } /* Read the current link report info */ bxe_fill_report_data(sc, &cur_data); /* Don't report link down or exactly the same link status twice */ if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) || (bxe_test_bit(BXE_LINK_REPORT_LINK_DOWN, &sc->last_reported_link.link_report_flags) && bxe_test_bit(BXE_LINK_REPORT_LINK_DOWN, &cur_data.link_report_flags))) { return; } ELINK_DEBUG_P2(sc, "Change in link status : cur_data = %x, last_reported_link = %x\n", cur_data.link_report_flags, sc->last_reported_link.link_report_flags); sc->link_cnt++; ELINK_DEBUG_P1(sc, "link status change count = %x\n", sc->link_cnt); /* report new link params and remember the state for the next time */ memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data)); if (bxe_test_bit(BXE_LINK_REPORT_LINK_DOWN, &cur_data.link_report_flags)) { if_link_state_change(sc->ifp, LINK_STATE_DOWN); } else { const char *duplex; const char *flow; if (bxe_test_and_clear_bit(BXE_LINK_REPORT_FULL_DUPLEX, &cur_data.link_report_flags)) { duplex = "full"; ELINK_DEBUG_P0(sc, "link set to full duplex\n"); } else { duplex = "half"; ELINK_DEBUG_P0(sc, "link set to half duplex\n"); } /* * Handle the FC at the end so that only these flags would be * possibly set. This way we may easily check if there is no FC * enabled. */ if (cur_data.link_report_flags) { if (bxe_test_bit(BXE_LINK_REPORT_RX_FC_ON, &cur_data.link_report_flags) && bxe_test_bit(BXE_LINK_REPORT_TX_FC_ON, &cur_data.link_report_flags)) { flow = "ON - receive & transmit"; } else if (bxe_test_bit(BXE_LINK_REPORT_RX_FC_ON, &cur_data.link_report_flags) && !bxe_test_bit(BXE_LINK_REPORT_TX_FC_ON, &cur_data.link_report_flags)) { flow = "ON - receive"; } else if (!bxe_test_bit(BXE_LINK_REPORT_RX_FC_ON, &cur_data.link_report_flags) && bxe_test_bit(BXE_LINK_REPORT_TX_FC_ON, &cur_data.link_report_flags)) { flow = "ON - transmit"; } else { flow = "none"; /* possible? */ } } else { flow = "none"; } if_link_state_change(sc->ifp, LINK_STATE_UP); BLOGI(sc, "NIC Link is Up, %d Mbps %s duplex, Flow control: %s\n", cur_data.line_speed, duplex, flow); } } static void bxe_link_report(struct bxe_softc *sc) { bxe_acquire_phy_lock(sc); bxe_link_report_locked(sc); bxe_release_phy_lock(sc); } static void bxe_link_status_update(struct bxe_softc *sc) { if (sc->state != BXE_STATE_OPEN) { return; } if (IS_PF(sc) && !CHIP_REV_IS_SLOW(sc)) { elink_link_status_update(&sc->link_params, &sc->link_vars); } else { sc->port.supported[0] |= (ELINK_SUPPORTED_10baseT_Half | ELINK_SUPPORTED_10baseT_Full | ELINK_SUPPORTED_100baseT_Half | ELINK_SUPPORTED_100baseT_Full | ELINK_SUPPORTED_1000baseT_Full | ELINK_SUPPORTED_2500baseX_Full | ELINK_SUPPORTED_10000baseT_Full | ELINK_SUPPORTED_TP | ELINK_SUPPORTED_FIBRE | ELINK_SUPPORTED_Autoneg | ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause); sc->port.advertising[0] = sc->port.supported[0]; sc->link_params.sc = sc; sc->link_params.port = SC_PORT(sc); sc->link_params.req_duplex[0] = DUPLEX_FULL; sc->link_params.req_flow_ctrl[0] = ELINK_FLOW_CTRL_NONE; sc->link_params.req_line_speed[0] = SPEED_10000; sc->link_params.speed_cap_mask[0] = 0x7f0000; sc->link_params.switch_cfg = ELINK_SWITCH_CFG_10G; if (CHIP_REV_IS_FPGA(sc)) { sc->link_vars.mac_type = ELINK_MAC_TYPE_EMAC; sc->link_vars.line_speed = ELINK_SPEED_1000; sc->link_vars.link_status = (LINK_STATUS_LINK_UP | LINK_STATUS_SPEED_AND_DUPLEX_1000TFD); } else { sc->link_vars.mac_type = ELINK_MAC_TYPE_BMAC; sc->link_vars.line_speed = ELINK_SPEED_10000; sc->link_vars.link_status = (LINK_STATUS_LINK_UP | LINK_STATUS_SPEED_AND_DUPLEX_10GTFD); } sc->link_vars.link_up = 1; sc->link_vars.duplex = DUPLEX_FULL; sc->link_vars.flow_ctrl = ELINK_FLOW_CTRL_NONE; if (IS_PF(sc)) { REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + sc->link_params.port*4, 0); bxe_stats_handle(sc, STATS_EVENT_LINK_UP); bxe_link_report(sc); } } if (IS_PF(sc)) { if (sc->link_vars.link_up) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } else { bxe_stats_handle(sc, STATS_EVENT_STOP); } bxe_link_report(sc); } else { bxe_link_report(sc); bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } } static int bxe_initial_phy_init(struct bxe_softc *sc, int load_mode) { int rc, cfg_idx = bxe_get_link_cfg_idx(sc); uint16_t req_line_speed = sc->link_params.req_line_speed[cfg_idx]; struct elink_params *lp = &sc->link_params; bxe_set_requested_fc(sc); if (CHIP_REV_IS_SLOW(sc)) { uint32_t bond = CHIP_BOND_ID(sc); uint32_t feat = 0; if (CHIP_IS_E2(sc) && CHIP_IS_MODE_4_PORT(sc)) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC; } else if (bond & 0x4) { if (CHIP_IS_E3(sc)) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC; } else { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC; } } else if (bond & 0x8) { if (CHIP_IS_E3(sc)) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC; } else { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC; } } /* disable EMAC for E3 and above */ if (bond & 0x2) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC; } sc->link_params.feature_config_flags |= feat; } bxe_acquire_phy_lock(sc); if (load_mode == LOAD_DIAG) { lp->loopback_mode = ELINK_LOOPBACK_XGXS; /* Prefer doing PHY loopback at 10G speed, if possible */ if (lp->req_line_speed[cfg_idx] < ELINK_SPEED_10000) { if (lp->speed_cap_mask[cfg_idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) { lp->req_line_speed[cfg_idx] = ELINK_SPEED_10000; } else { lp->req_line_speed[cfg_idx] = ELINK_SPEED_1000; } } } if (load_mode == LOAD_LOOPBACK_EXT) { lp->loopback_mode = ELINK_LOOPBACK_EXT; } rc = elink_phy_init(&sc->link_params, &sc->link_vars); bxe_release_phy_lock(sc); bxe_calc_fc_adv(sc); if (sc->link_vars.link_up) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); bxe_link_report(sc); } if (!CHIP_REV_IS_SLOW(sc)) { bxe_periodic_start(sc); } sc->link_params.req_line_speed[cfg_idx] = req_line_speed; return (rc); } /* must be called under IF_ADDR_LOCK */ static int bxe_set_mc_list(struct bxe_softc *sc) { struct ecore_mcast_ramrod_params rparam = { NULL }; int rc = 0; int mc_count = 0; int mcnt, i; struct ecore_mcast_list_elem *mc_mac, *mc_mac_start; unsigned char *mta; if_t ifp = sc->ifp; mc_count = if_multiaddr_count(ifp, -1);/* XXX they don't have a limit */ if (!mc_count) return (0); mta = malloc(sizeof(unsigned char) * ETHER_ADDR_LEN * mc_count, M_DEVBUF, M_NOWAIT); if(mta == NULL) { BLOGE(sc, "Failed to allocate temp mcast list\n"); return (-1); } bzero(mta, (sizeof(unsigned char) * ETHER_ADDR_LEN * mc_count)); mc_mac = mallocarray(mc_count, sizeof(*mc_mac), M_DEVBUF, (M_NOWAIT | M_ZERO)); mc_mac_start = mc_mac; if (!mc_mac) { free(mta, M_DEVBUF); BLOGE(sc, "Failed to allocate temp mcast list\n"); return (-1); } bzero(mc_mac, (sizeof(*mc_mac) * mc_count)); /* mta and mcnt not expected to be different */ if_multiaddr_array(ifp, mta, &mcnt, mc_count); rparam.mcast_obj = &sc->mcast_obj; ECORE_LIST_INIT(&rparam.mcast_list); for(i=0; i< mcnt; i++) { mc_mac->mac = (uint8_t *)(mta + (i * ETHER_ADDR_LEN)); ECORE_LIST_PUSH_TAIL(&mc_mac->link, &rparam.mcast_list); BLOGD(sc, DBG_LOAD, "Setting MCAST %02X:%02X:%02X:%02X:%02X:%02X\n", mc_mac->mac[0], mc_mac->mac[1], mc_mac->mac[2], mc_mac->mac[3], mc_mac->mac[4], mc_mac->mac[5]); mc_mac++; } rparam.mcast_list_len = mc_count; BXE_MCAST_LOCK(sc); /* first, clear all configured multicast MACs */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL); if (rc < 0) { BLOGE(sc, "Failed to clear multicast configuration: %d\n", rc); BXE_MCAST_UNLOCK(sc); free(mc_mac_start, M_DEVBUF); free(mta, M_DEVBUF); return (rc); } /* Now add the new MACs */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_ADD); if (rc < 0) { BLOGE(sc, "Failed to set new mcast config (%d)\n", rc); } BXE_MCAST_UNLOCK(sc); free(mc_mac_start, M_DEVBUF); free(mta, M_DEVBUF); return (rc); } static int bxe_set_uc_list(struct bxe_softc *sc) { if_t ifp = sc->ifp; struct ecore_vlan_mac_obj *mac_obj = &sc->sp_objs->mac_obj; struct ifaddr *ifa; unsigned long ramrod_flags = 0; int rc; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_addr_rlock(ifp); #endif /* first schedule a cleanup up of old configuration */ rc = bxe_del_all_macs(sc, mac_obj, ECORE_UC_LIST_MAC, FALSE); if (rc < 0) { BLOGE(sc, "Failed to schedule delete of all ETH MACs (%d)\n", rc); #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_addr_runlock(ifp); #endif return (rc); } ifa = if_getifaddr(ifp); /* XXX Is this structure */ while (ifa) { if (ifa->ifa_addr->sa_family != AF_LINK) { ifa = TAILQ_NEXT(ifa, ifa_link); continue; } rc = bxe_set_mac_one(sc, (uint8_t *)LLADDR((struct sockaddr_dl *)ifa->ifa_addr), mac_obj, TRUE, ECORE_UC_LIST_MAC, &ramrod_flags); if (rc == -EEXIST) { BLOGD(sc, DBG_SP, "Failed to schedule ADD operations (EEXIST)\n"); /* do not treat adding same MAC as an error */ rc = 0; } else if (rc < 0) { BLOGE(sc, "Failed to schedule ADD operations (%d)\n", rc); #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_addr_runlock(ifp); #endif return (rc); } ifa = TAILQ_NEXT(ifa, ifa_link); } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_addr_runlock(ifp); #endif /* Execute the pending commands */ bit_set(&ramrod_flags, RAMROD_CONT); return (bxe_set_mac_one(sc, NULL, mac_obj, FALSE /* don't care */, ECORE_UC_LIST_MAC, &ramrod_flags)); } static void bxe_set_rx_mode(struct bxe_softc *sc) { if_t ifp = sc->ifp; uint32_t rx_mode = BXE_RX_MODE_NORMAL; if (sc->state != BXE_STATE_OPEN) { BLOGD(sc, DBG_SP, "state is %x, returning\n", sc->state); return; } BLOGD(sc, DBG_SP, "if_flags(ifp)=0x%x\n", if_getflags(sc->ifp)); if (if_getflags(ifp) & IFF_PROMISC) { rx_mode = BXE_RX_MODE_PROMISC; } else if ((if_getflags(ifp) & IFF_ALLMULTI) || ((if_getamcount(ifp) > BXE_MAX_MULTICAST) && CHIP_IS_E1(sc))) { rx_mode = BXE_RX_MODE_ALLMULTI; } else { if (IS_PF(sc)) { /* some multicasts */ if (bxe_set_mc_list(sc) < 0) { rx_mode = BXE_RX_MODE_ALLMULTI; } if (bxe_set_uc_list(sc) < 0) { rx_mode = BXE_RX_MODE_PROMISC; } } } sc->rx_mode = rx_mode; /* schedule the rx_mode command */ if (bxe_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) { BLOGD(sc, DBG_LOAD, "Scheduled setting rx_mode with ECORE...\n"); bxe_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state); return; } if (IS_PF(sc)) { bxe_set_storm_rx_mode(sc); } } /* update flags in shmem */ static void bxe_update_drv_flags(struct bxe_softc *sc, uint32_t flags, uint32_t set) { uint32_t drv_flags; if (SHMEM2_HAS(sc, drv_flags)) { bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_DRV_FLAGS); drv_flags = SHMEM2_RD(sc, drv_flags); if (set) { SET_FLAGS(drv_flags, flags); } else { RESET_FLAGS(drv_flags, flags); } SHMEM2_WR(sc, drv_flags, drv_flags); BLOGD(sc, DBG_LOAD, "drv_flags 0x%08x\n", drv_flags); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_DRV_FLAGS); } } /* periodic timer callout routine, only runs when the interface is up */ static void bxe_periodic_callout_func(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; int i; if (!BXE_CORE_TRYLOCK(sc)) { /* just bail and try again next time */ if ((sc->state == BXE_STATE_OPEN) && (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_GO)) { /* schedule the next periodic callout */ callout_reset(&sc->periodic_callout, hz, bxe_periodic_callout_func, sc); } return; } if ((sc->state != BXE_STATE_OPEN) || (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_STOP)) { BLOGW(sc, "periodic callout exit (state=0x%x)\n", sc->state); BXE_CORE_UNLOCK(sc); return; } /* Check for TX timeouts on any fastpath. */ FOR_EACH_QUEUE(sc, i) { if (bxe_watchdog(sc, &sc->fp[i]) != 0) { /* Ruh-Roh, chip was reset! */ break; } } if (!CHIP_REV_IS_SLOW(sc)) { /* * This barrier is needed to ensure the ordering between the writing * to the sc->port.pmf in the bxe_nic_load() or bxe_pmf_update() and * the reading here. */ mb(); if (sc->port.pmf) { bxe_acquire_phy_lock(sc); elink_period_func(&sc->link_params, &sc->link_vars); bxe_release_phy_lock(sc); } } if (IS_PF(sc) && !(sc->flags & BXE_NO_PULSE)) { int mb_idx = SC_FW_MB_IDX(sc); uint32_t drv_pulse; uint32_t mcp_pulse; ++sc->fw_drv_pulse_wr_seq; sc->fw_drv_pulse_wr_seq &= DRV_PULSE_SEQ_MASK; drv_pulse = sc->fw_drv_pulse_wr_seq; bxe_drv_pulse(sc); mcp_pulse = (SHMEM_RD(sc, func_mb[mb_idx].mcp_pulse_mb) & MCP_PULSE_SEQ_MASK); /* * The delta between driver pulse and mcp response should * be 1 (before mcp response) or 0 (after mcp response). */ if ((drv_pulse != mcp_pulse) && (drv_pulse != ((mcp_pulse + 1) & MCP_PULSE_SEQ_MASK))) { /* someone lost a heartbeat... */ BLOGE(sc, "drv_pulse (0x%x) != mcp_pulse (0x%x)\n", drv_pulse, mcp_pulse); } } /* state is BXE_STATE_OPEN */ bxe_stats_handle(sc, STATS_EVENT_UPDATE); BXE_CORE_UNLOCK(sc); if ((sc->state == BXE_STATE_OPEN) && (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_GO)) { /* schedule the next periodic callout */ callout_reset(&sc->periodic_callout, hz, bxe_periodic_callout_func, sc); } } static void bxe_periodic_start(struct bxe_softc *sc) { atomic_store_rel_long(&sc->periodic_flags, PERIODIC_GO); callout_reset(&sc->periodic_callout, hz, bxe_periodic_callout_func, sc); } static void bxe_periodic_stop(struct bxe_softc *sc) { atomic_store_rel_long(&sc->periodic_flags, PERIODIC_STOP); callout_drain(&sc->periodic_callout); } /* start the controller */ static __noinline int bxe_nic_load(struct bxe_softc *sc, int load_mode) { uint32_t val; int load_code = 0; int i, rc = 0; BXE_CORE_LOCK_ASSERT(sc); BLOGD(sc, DBG_LOAD, "Starting NIC load...\n"); sc->state = BXE_STATE_OPENING_WAITING_LOAD; if (IS_PF(sc)) { /* must be called before memory allocation and HW init */ bxe_ilt_set_info(sc); } sc->last_reported_link_state = LINK_STATE_UNKNOWN; bxe_set_fp_rx_buf_size(sc); if (bxe_alloc_fp_buffers(sc) != 0) { BLOGE(sc, "Failed to allocate fastpath memory\n"); sc->state = BXE_STATE_CLOSED; rc = ENOMEM; goto bxe_nic_load_error0; } if (bxe_alloc_mem(sc) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENOMEM; goto bxe_nic_load_error0; } if (bxe_alloc_fw_stats_mem(sc) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENOMEM; goto bxe_nic_load_error0; } if (IS_PF(sc)) { /* set pf load just before approaching the MCP */ bxe_set_pf_load(sc); /* if MCP exists send load request and analyze response */ if (!BXE_NOMCP(sc)) { /* attempt to load pf */ if (bxe_nic_load_request(sc, &load_code) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error1; } /* what did the MCP say? */ if (bxe_nic_load_analyze_req(sc, load_code) != 0) { bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error2; } } else { BLOGI(sc, "Device has no MCP!\n"); load_code = bxe_nic_load_no_mcp(sc); } /* mark PMF if applicable */ bxe_nic_load_pmf(sc, load_code); /* Init Function state controlling object */ bxe_init_func_obj(sc); /* Initialize HW */ if (bxe_init_hw(sc, load_code) != 0) { BLOGE(sc, "HW init failed\n"); bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error2; } } /* set ALWAYS_ALIVE bit in shmem */ sc->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE; bxe_drv_pulse(sc); sc->flags |= BXE_NO_PULSE; /* attach interrupts */ if (bxe_interrupt_attach(sc) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error2; } bxe_nic_init(sc, load_code); /* Init per-function objects */ if (IS_PF(sc)) { bxe_init_objs(sc); // XXX bxe_iov_nic_init(sc); /* set AFEX default VLAN tag to an invalid value */ sc->devinfo.mf_info.afex_def_vlan_tag = -1; // XXX bxe_nic_load_afex_dcc(sc, load_code); sc->state = BXE_STATE_OPENING_WAITING_PORT; rc = bxe_func_start(sc); if (rc) { BLOGE(sc, "Function start failed! rc = %d\n", rc); bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } /* send LOAD_DONE command to MCP */ if (!BXE_NOMCP(sc)) { load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); if (!load_code) { BLOGE(sc, "MCP response failure, aborting\n"); sc->state = BXE_STATE_ERROR; rc = ENXIO; goto bxe_nic_load_error3; } } rc = bxe_setup_leading(sc); if (rc) { BLOGE(sc, "Setup leading failed! rc = %d\n", rc); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } FOR_EACH_NONDEFAULT_ETH_QUEUE(sc, i) { rc = bxe_setup_queue(sc, &sc->fp[i], FALSE); if (rc) { BLOGE(sc, "Queue(%d) setup failed rc = %d\n", i, rc); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } rc = bxe_init_rss_pf(sc); if (rc) { BLOGE(sc, "PF RSS init failed\n"); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } /* XXX VF */ /* now when Clients are configured we are ready to work */ sc->state = BXE_STATE_OPEN; /* Configure a ucast MAC */ if (IS_PF(sc)) { rc = bxe_set_eth_mac(sc, TRUE); } if (rc) { BLOGE(sc, "Setting Ethernet MAC failed rc = %d\n", rc); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } if (sc->port.pmf) { rc = bxe_initial_phy_init(sc, /* XXX load_mode */LOAD_OPEN); if (rc) { sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } sc->link_params.feature_config_flags &= ~ELINK_FEATURE_CONFIG_BOOT_FROM_SAN; /* start fast path */ /* Initialize Rx filter */ bxe_set_rx_mode(sc); /* start the Tx */ switch (/* XXX load_mode */LOAD_OPEN) { case LOAD_NORMAL: case LOAD_OPEN: break; case LOAD_DIAG: case LOAD_LOOPBACK_EXT: sc->state = BXE_STATE_DIAG; break; default: break; } if (sc->port.pmf) { bxe_update_drv_flags(sc, 1 << DRV_FLAGS_PORT_MASK, 0); } else { bxe_link_status_update(sc); } /* start the periodic timer callout */ bxe_periodic_start(sc); if (IS_PF(sc) && SHMEM2_HAS(sc, drv_capabilities_flag)) { /* mark driver is loaded in shmem2 */ val = SHMEM2_RD(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)]); SHMEM2_WR(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)], (val | DRV_FLAGS_CAPABILITIES_LOADED_SUPPORTED | DRV_FLAGS_CAPABILITIES_LOADED_L2)); } /* wait for all pending SP commands to complete */ if (IS_PF(sc) && !bxe_wait_sp_comp(sc, ~0x0UL)) { BLOGE(sc, "Timeout waiting for all SPs to complete!\n"); bxe_periodic_stop(sc); bxe_nic_unload(sc, UNLOAD_CLOSE, FALSE); return (ENXIO); } /* Tell the stack the driver is running! */ if_setdrvflags(sc->ifp, IFF_DRV_RUNNING); BLOGD(sc, DBG_LOAD, "NIC successfully loaded\n"); return (0); bxe_nic_load_error3: if (IS_PF(sc)) { bxe_int_disable_sync(sc, 1); /* clean out queued objects */ bxe_squeeze_objects(sc); } bxe_interrupt_detach(sc); bxe_nic_load_error2: if (IS_PF(sc) && !BXE_NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0); bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, 0); } sc->port.pmf = 0; bxe_nic_load_error1: /* clear pf_load status, as it was already set */ if (IS_PF(sc)) { bxe_clear_pf_load(sc); } bxe_nic_load_error0: bxe_free_fw_stats_mem(sc); bxe_free_fp_buffers(sc); bxe_free_mem(sc); return (rc); } static int bxe_init_locked(struct bxe_softc *sc) { int other_engine = SC_PATH(sc) ? 0 : 1; uint8_t other_load_status, load_status; uint8_t global = FALSE; int rc; BXE_CORE_LOCK_ASSERT(sc); /* check if the driver is already running */ if (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) { BLOGD(sc, DBG_LOAD, "Init called while driver is running!\n"); return (0); } bxe_set_power_state(sc, PCI_PM_D0); /* * If parity occurred during the unload, then attentions and/or * RECOVERY_IN_PROGRES may still be set. If so we want the first function * loaded on the current engine to complete the recovery. Parity recovery * is only relevant for PF driver. */ if (IS_PF(sc)) { other_load_status = bxe_get_load_status(sc, other_engine); load_status = bxe_get_load_status(sc, SC_PATH(sc)); if (!bxe_reset_is_done(sc, SC_PATH(sc)) || bxe_chk_parity_attn(sc, &global, TRUE)) { do { /* * If there are attentions and they are in global blocks, set * the GLOBAL_RESET bit regardless whether it will be this * function that will complete the recovery or not. */ if (global) { bxe_set_reset_global(sc); } /* * Only the first function on the current engine should try * to recover in open. In case of attentions in global blocks * only the first in the chip should try to recover. */ if ((!load_status && (!global || !other_load_status)) && bxe_trylock_leader_lock(sc) && !bxe_leader_reset(sc)) { BLOGI(sc, "Recovered during init\n"); break; } /* recovery has failed... */ bxe_set_power_state(sc, PCI_PM_D3hot); sc->recovery_state = BXE_RECOVERY_FAILED; BLOGE(sc, "Recovery flow hasn't properly " "completed yet, try again later. " "If you still see this message after a " "few retries then power cycle is required.\n"); rc = ENXIO; goto bxe_init_locked_done; } while (0); } } sc->recovery_state = BXE_RECOVERY_DONE; rc = bxe_nic_load(sc, LOAD_OPEN); bxe_init_locked_done: if (rc) { /* Tell the stack the driver is NOT running! */ BLOGE(sc, "Initialization failed, " "stack notified driver is NOT running!\n"); if_setdrvflagbits(sc->ifp, 0, IFF_DRV_RUNNING); } return (rc); } static int bxe_stop_locked(struct bxe_softc *sc) { BXE_CORE_LOCK_ASSERT(sc); return (bxe_nic_unload(sc, UNLOAD_NORMAL, TRUE)); } /* * Handles controller initialization when called from an unlocked routine. * ifconfig calls this function. * * Returns: * void */ static void bxe_init(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; BXE_CORE_LOCK(sc); bxe_init_locked(sc); BXE_CORE_UNLOCK(sc); } static int bxe_init_ifnet(struct bxe_softc *sc) { if_t ifp; int capabilities; /* ifconfig entrypoint for media type/status reporting */ ifmedia_init(&sc->ifmedia, IFM_IMASK, bxe_ifmedia_update, bxe_ifmedia_status); /* set the default interface values */ ifmedia_add(&sc->ifmedia, (IFM_ETHER | IFM_FDX | sc->media), 0, NULL); ifmedia_add(&sc->ifmedia, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&sc->ifmedia, (IFM_ETHER | IFM_AUTO)); sc->ifmedia.ifm_media = sc->ifmedia.ifm_cur->ifm_media; /* XXX ? */ BLOGI(sc, "IFMEDIA flags : %x\n", sc->ifmedia.ifm_media); /* allocate the ifnet structure */ if ((ifp = if_gethandle(IFT_ETHER)) == NULL) { BLOGE(sc, "Interface allocation failed!\n"); return (ENXIO); } if_setsoftc(ifp, sc); if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); if_setflags(ifp, (IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST)); if_setioctlfn(ifp, bxe_ioctl); if_setstartfn(ifp, bxe_tx_start); if_setgetcounterfn(ifp, bxe_get_counter); #if __FreeBSD_version >= 901504 if_settransmitfn(ifp, bxe_tx_mq_start); if_setqflushfn(ifp, bxe_mq_flush); #endif #ifdef FreeBSD8_0 if_settimer(ifp, 0); #endif if_setinitfn(ifp, bxe_init); if_setmtu(ifp, sc->mtu); if_sethwassist(ifp, (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6)); capabilities = #if __FreeBSD_version < 700000 (IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_HWCSUM | IFCAP_JUMBO_MTU | IFCAP_LRO); #else (IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWCSUM | IFCAP_HWCSUM | IFCAP_JUMBO_MTU | IFCAP_LRO | IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_WOL_MAGIC); #endif if_setcapabilitiesbit(ifp, capabilities, 0); /* XXX */ if_setcapenable(ifp, if_getcapabilities(ifp)); if_setbaudrate(ifp, IF_Gbps(10)); /* XXX */ if_setsendqlen(ifp, sc->tx_ring_size); if_setsendqready(ifp); /* XXX */ sc->ifp = ifp; /* attach to the Ethernet interface list */ ether_ifattach(ifp, sc->link_params.mac_addr); /* Attach driver netdump methods. */ NETDUMP_SET(ifp, bxe); return (0); } static void bxe_deallocate_bars(struct bxe_softc *sc) { int i; for (i = 0; i < MAX_BARS; i++) { if (sc->bar[i].resource != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->bar[i].rid, sc->bar[i].resource); BLOGD(sc, DBG_LOAD, "Released PCI BAR%d [%02x] memory\n", i, PCIR_BAR(i)); } } } static int bxe_allocate_bars(struct bxe_softc *sc) { u_int flags; int i; memset(sc->bar, 0, sizeof(sc->bar)); for (i = 0; i < MAX_BARS; i++) { /* memory resources reside at BARs 0, 2, 4 */ /* Run `pciconf -lb` to see mappings */ if ((i != 0) && (i != 2) && (i != 4)) { continue; } sc->bar[i].rid = PCIR_BAR(i); flags = RF_ACTIVE; if (i == 0) { flags |= RF_SHAREABLE; } if ((sc->bar[i].resource = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->bar[i].rid, flags)) == NULL) { return (0); } sc->bar[i].tag = rman_get_bustag(sc->bar[i].resource); sc->bar[i].handle = rman_get_bushandle(sc->bar[i].resource); sc->bar[i].kva = (vm_offset_t)rman_get_virtual(sc->bar[i].resource); BLOGI(sc, "PCI BAR%d [%02x] memory allocated: %p-%p (%jd) -> %p\n", i, PCIR_BAR(i), (void *)rman_get_start(sc->bar[i].resource), (void *)rman_get_end(sc->bar[i].resource), rman_get_size(sc->bar[i].resource), (void *)sc->bar[i].kva); } return (0); } static void bxe_get_function_num(struct bxe_softc *sc) { uint32_t val = 0; /* * Read the ME register to get the function number. The ME register * holds the relative-function number and absolute-function number. The * absolute-function number appears only in E2 and above. Before that * these bits always contained zero, therefore we cannot blindly use them. */ val = REG_RD(sc, BAR_ME_REGISTER); sc->pfunc_rel = (uint8_t)((val & ME_REG_PF_NUM) >> ME_REG_PF_NUM_SHIFT); sc->path_id = (uint8_t)((val & ME_REG_ABS_PF_NUM) >> ME_REG_ABS_PF_NUM_SHIFT) & 1; if (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) { sc->pfunc_abs = ((sc->pfunc_rel << 1) | sc->path_id); } else { sc->pfunc_abs = (sc->pfunc_rel | sc->path_id); } BLOGD(sc, DBG_LOAD, "Relative function %d, Absolute function %d, Path %d\n", sc->pfunc_rel, sc->pfunc_abs, sc->path_id); } static uint32_t bxe_get_shmem_mf_cfg_base(struct bxe_softc *sc) { uint32_t shmem2_size; uint32_t offset; uint32_t mf_cfg_offset_value; /* Non 57712 */ offset = (SHMEM_RD(sc, func_mb) + (MAX_FUNC_NUM * sizeof(struct drv_func_mb))); /* 57712 plus */ if (sc->devinfo.shmem2_base != 0) { shmem2_size = SHMEM2_RD(sc, size); if (shmem2_size > offsetof(struct shmem2_region, mf_cfg_addr)) { mf_cfg_offset_value = SHMEM2_RD(sc, mf_cfg_addr); if (SHMEM_MF_CFG_ADDR_NONE != mf_cfg_offset_value) { offset = mf_cfg_offset_value; } } } return (offset); } static uint32_t bxe_pcie_capability_read(struct bxe_softc *sc, int reg, int width) { int pcie_reg; /* ensure PCIe capability is enabled */ if (pci_find_cap(sc->dev, PCIY_EXPRESS, &pcie_reg) == 0) { if (pcie_reg != 0) { BLOGD(sc, DBG_LOAD, "PCIe capability at 0x%04x\n", pcie_reg); return (pci_read_config(sc->dev, (pcie_reg + reg), width)); } } BLOGE(sc, "PCIe capability NOT FOUND!!!\n"); return (0); } static uint8_t bxe_is_pcie_pending(struct bxe_softc *sc) { return (bxe_pcie_capability_read(sc, PCIR_EXPRESS_DEVICE_STA, 2) & PCIM_EXP_STA_TRANSACTION_PND); } /* * Walk the PCI capabiites list for the device to find what features are * supported. These capabilites may be enabled/disabled by firmware so it's * best to walk the list rather than make assumptions. */ static void bxe_probe_pci_caps(struct bxe_softc *sc) { uint16_t link_status; int reg; /* check if PCI Power Management is enabled */ if (pci_find_cap(sc->dev, PCIY_PMG, ®) == 0) { if (reg != 0) { BLOGD(sc, DBG_LOAD, "Found PM capability at 0x%04x\n", reg); sc->devinfo.pcie_cap_flags |= BXE_PM_CAPABLE_FLAG; sc->devinfo.pcie_pm_cap_reg = (uint16_t)reg; } } link_status = bxe_pcie_capability_read(sc, PCIR_EXPRESS_LINK_STA, 2); /* handle PCIe 2.0 workarounds for 57710 */ if (CHIP_IS_E1(sc)) { /* workaround for 57710 errata E4_57710_27462 */ sc->devinfo.pcie_link_speed = (REG_RD(sc, 0x3d04) & (1 << 24)) ? 2 : 1; /* workaround for 57710 errata E4_57710_27488 */ sc->devinfo.pcie_link_width = ((link_status & PCIM_LINK_STA_WIDTH) >> 4); if (sc->devinfo.pcie_link_speed > 1) { sc->devinfo.pcie_link_width = ((link_status & PCIM_LINK_STA_WIDTH) >> 4) >> 1; } } else { sc->devinfo.pcie_link_speed = (link_status & PCIM_LINK_STA_SPEED); sc->devinfo.pcie_link_width = ((link_status & PCIM_LINK_STA_WIDTH) >> 4); } BLOGD(sc, DBG_LOAD, "PCIe link speed=%d width=%d\n", sc->devinfo.pcie_link_speed, sc->devinfo.pcie_link_width); sc->devinfo.pcie_cap_flags |= BXE_PCIE_CAPABLE_FLAG; sc->devinfo.pcie_pcie_cap_reg = (uint16_t)reg; /* check if MSI capability is enabled */ if (pci_find_cap(sc->dev, PCIY_MSI, ®) == 0) { if (reg != 0) { BLOGD(sc, DBG_LOAD, "Found MSI capability at 0x%04x\n", reg); sc->devinfo.pcie_cap_flags |= BXE_MSI_CAPABLE_FLAG; sc->devinfo.pcie_msi_cap_reg = (uint16_t)reg; } } /* check if MSI-X capability is enabled */ if (pci_find_cap(sc->dev, PCIY_MSIX, ®) == 0) { if (reg != 0) { BLOGD(sc, DBG_LOAD, "Found MSI-X capability at 0x%04x\n", reg); sc->devinfo.pcie_cap_flags |= BXE_MSIX_CAPABLE_FLAG; sc->devinfo.pcie_msix_cap_reg = (uint16_t)reg; } } } static int bxe_get_shmem_mf_cfg_info_sd(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t val; /* get the outer vlan if we're in switch-dependent mode */ val = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].e1hov_tag); mf_info->ext_id = (uint16_t)val; mf_info->multi_vnics_mode = 1; if (!VALID_OVLAN(mf_info->ext_id)) { BLOGE(sc, "Invalid VLAN (%d)\n", mf_info->ext_id); return (1); } /* get the capabilities */ if ((mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_PROTOCOL_MASK) == FUNC_MF_CFG_PROTOCOL_ISCSI) { mf_info->mf_protos_supported |= MF_PROTO_SUPPORT_ISCSI; } else if ((mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_PROTOCOL_MASK) == FUNC_MF_CFG_PROTOCOL_FCOE) { mf_info->mf_protos_supported |= MF_PROTO_SUPPORT_FCOE; } else { mf_info->mf_protos_supported |= MF_PROTO_SUPPORT_ETHERNET; } mf_info->vnics_per_port = (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4; return (0); } static uint32_t bxe_get_shmem_ext_proto_support_flags(struct bxe_softc *sc) { uint32_t retval = 0; uint32_t val; val = MFCFG_RD(sc, func_ext_config[SC_ABS_FUNC(sc)].func_cfg); if (val & MACP_FUNC_CFG_FLAGS_ENABLED) { if (val & MACP_FUNC_CFG_FLAGS_ETHERNET) { retval |= MF_PROTO_SUPPORT_ETHERNET; } if (val & MACP_FUNC_CFG_FLAGS_ISCSI_OFFLOAD) { retval |= MF_PROTO_SUPPORT_ISCSI; } if (val & MACP_FUNC_CFG_FLAGS_FCOE_OFFLOAD) { retval |= MF_PROTO_SUPPORT_FCOE; } } return (retval); } static int bxe_get_shmem_mf_cfg_info_si(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t val; /* * There is no outer vlan if we're in switch-independent mode. * If the mac is valid then assume multi-function. */ val = MFCFG_RD(sc, func_ext_config[SC_ABS_FUNC(sc)].func_cfg); mf_info->multi_vnics_mode = ((val & MACP_FUNC_CFG_FLAGS_MASK) != 0); mf_info->mf_protos_supported = bxe_get_shmem_ext_proto_support_flags(sc); mf_info->vnics_per_port = (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4; return (0); } static int bxe_get_shmem_mf_cfg_info_niv(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t e1hov_tag; uint32_t func_config; uint32_t niv_config; mf_info->multi_vnics_mode = 1; e1hov_tag = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].e1hov_tag); func_config = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].config); niv_config = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].afex_config); mf_info->ext_id = (uint16_t)((e1hov_tag & FUNC_MF_CFG_E1HOV_TAG_MASK) >> FUNC_MF_CFG_E1HOV_TAG_SHIFT); mf_info->default_vlan = (uint16_t)((e1hov_tag & FUNC_MF_CFG_AFEX_VLAN_MASK) >> FUNC_MF_CFG_AFEX_VLAN_SHIFT); mf_info->niv_allowed_priorities = (uint8_t)((niv_config & FUNC_MF_CFG_AFEX_COS_FILTER_MASK) >> FUNC_MF_CFG_AFEX_COS_FILTER_SHIFT); mf_info->niv_default_cos = (uint8_t)((func_config & FUNC_MF_CFG_TRANSMIT_PRIORITY_MASK) >> FUNC_MF_CFG_TRANSMIT_PRIORITY_SHIFT); mf_info->afex_vlan_mode = ((niv_config & FUNC_MF_CFG_AFEX_VLAN_MODE_MASK) >> FUNC_MF_CFG_AFEX_VLAN_MODE_SHIFT); mf_info->niv_mba_enabled = ((niv_config & FUNC_MF_CFG_AFEX_MBA_ENABLED_MASK) >> FUNC_MF_CFG_AFEX_MBA_ENABLED_SHIFT); mf_info->mf_protos_supported = bxe_get_shmem_ext_proto_support_flags(sc); mf_info->vnics_per_port = (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4; return (0); } static int bxe_check_valid_mf_cfg(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t mf_cfg1; uint32_t mf_cfg2; uint32_t ovlan1; uint32_t ovlan2; uint8_t i, j; BLOGD(sc, DBG_LOAD, "MF config parameters for function %d\n", SC_PORT(sc)); BLOGD(sc, DBG_LOAD, "\tmf_config=0x%x\n", mf_info->mf_config[SC_VN(sc)]); BLOGD(sc, DBG_LOAD, "\tmulti_vnics_mode=%d\n", mf_info->multi_vnics_mode); BLOGD(sc, DBG_LOAD, "\tvnics_per_port=%d\n", mf_info->vnics_per_port); BLOGD(sc, DBG_LOAD, "\tovlan/vifid=%d\n", mf_info->ext_id); BLOGD(sc, DBG_LOAD, "\tmin_bw=%d/%d/%d/%d\n", mf_info->min_bw[0], mf_info->min_bw[1], mf_info->min_bw[2], mf_info->min_bw[3]); BLOGD(sc, DBG_LOAD, "\tmax_bw=%d/%d/%d/%d\n", mf_info->max_bw[0], mf_info->max_bw[1], mf_info->max_bw[2], mf_info->max_bw[3]); BLOGD(sc, DBG_LOAD, "\tmac_addr: %s\n", sc->mac_addr_str); /* various MF mode sanity checks... */ if (mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_HIDE) { BLOGE(sc, "Enumerated function %d is marked as hidden\n", SC_PORT(sc)); return (1); } if ((mf_info->vnics_per_port > 1) && !mf_info->multi_vnics_mode) { BLOGE(sc, "vnics_per_port=%d multi_vnics_mode=%d\n", mf_info->vnics_per_port, mf_info->multi_vnics_mode); return (1); } if (mf_info->mf_mode == MULTI_FUNCTION_SD) { /* vnic id > 0 must have valid ovlan in switch-dependent mode */ if ((SC_VN(sc) > 0) && !VALID_OVLAN(OVLAN(sc))) { BLOGE(sc, "mf_mode=SD vnic_id=%d ovlan=%d\n", SC_VN(sc), OVLAN(sc)); return (1); } if (!VALID_OVLAN(OVLAN(sc)) && mf_info->multi_vnics_mode) { BLOGE(sc, "mf_mode=SD multi_vnics_mode=%d ovlan=%d\n", mf_info->multi_vnics_mode, OVLAN(sc)); return (1); } /* * Verify all functions are either MF or SF mode. If MF, make sure * sure that all non-hidden functions have a valid ovlan. If SF, * make sure that all non-hidden functions have an invalid ovlan. */ FOREACH_ABS_FUNC_IN_PORT(sc, i) { mf_cfg1 = MFCFG_RD(sc, func_mf_config[i].config); ovlan1 = MFCFG_RD(sc, func_mf_config[i].e1hov_tag); if (!(mf_cfg1 & FUNC_MF_CFG_FUNC_HIDE) && (((mf_info->multi_vnics_mode) && !VALID_OVLAN(ovlan1)) || ((!mf_info->multi_vnics_mode) && VALID_OVLAN(ovlan1)))) { BLOGE(sc, "mf_mode=SD function %d MF config " "mismatch, multi_vnics_mode=%d ovlan=%d\n", i, mf_info->multi_vnics_mode, ovlan1); return (1); } } /* Verify all funcs on the same port each have a different ovlan. */ FOREACH_ABS_FUNC_IN_PORT(sc, i) { mf_cfg1 = MFCFG_RD(sc, func_mf_config[i].config); ovlan1 = MFCFG_RD(sc, func_mf_config[i].e1hov_tag); /* iterate from the next function on the port to the max func */ for (j = i + 2; j < MAX_FUNC_NUM; j += 2) { mf_cfg2 = MFCFG_RD(sc, func_mf_config[j].config); ovlan2 = MFCFG_RD(sc, func_mf_config[j].e1hov_tag); if (!(mf_cfg1 & FUNC_MF_CFG_FUNC_HIDE) && VALID_OVLAN(ovlan1) && !(mf_cfg2 & FUNC_MF_CFG_FUNC_HIDE) && VALID_OVLAN(ovlan2) && (ovlan1 == ovlan2)) { BLOGE(sc, "mf_mode=SD functions %d and %d " "have the same ovlan (%d)\n", i, j, ovlan1); return (1); } } } } /* MULTI_FUNCTION_SD */ return (0); } static int bxe_get_mf_cfg_info(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t val, mac_upper; uint8_t i, vnic; /* initialize mf_info defaults */ mf_info->vnics_per_port = 1; mf_info->multi_vnics_mode = FALSE; mf_info->path_has_ovlan = FALSE; mf_info->mf_mode = SINGLE_FUNCTION; if (!CHIP_IS_MF_CAP(sc)) { return (0); } if (sc->devinfo.mf_cfg_base == SHMEM_MF_CFG_ADDR_NONE) { BLOGE(sc, "Invalid mf_cfg_base!\n"); return (1); } /* get the MF mode (switch dependent / independent / single-function) */ val = SHMEM_RD(sc, dev_info.shared_feature_config.config); switch (val & SHARED_FEAT_CFG_FORCE_SF_MODE_MASK) { case SHARED_FEAT_CFG_FORCE_SF_MODE_SWITCH_INDEPT: mac_upper = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_upper); /* check for legal upper mac bytes */ if (mac_upper != FUNC_MF_CFG_UPPERMAC_DEFAULT) { mf_info->mf_mode = MULTI_FUNCTION_SI; } else { BLOGE(sc, "Invalid config for Switch Independent mode\n"); } break; case SHARED_FEAT_CFG_FORCE_SF_MODE_MF_ALLOWED: case SHARED_FEAT_CFG_FORCE_SF_MODE_SPIO4: /* get outer vlan configuration */ val = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].e1hov_tag); if ((val & FUNC_MF_CFG_E1HOV_TAG_MASK) != FUNC_MF_CFG_E1HOV_TAG_DEFAULT) { mf_info->mf_mode = MULTI_FUNCTION_SD; } else { BLOGE(sc, "Invalid config for Switch Dependent mode\n"); } break; case SHARED_FEAT_CFG_FORCE_SF_MODE_FORCED_SF: /* not in MF mode, vnics_per_port=1 and multi_vnics_mode=FALSE */ return (0); case SHARED_FEAT_CFG_FORCE_SF_MODE_AFEX_MODE: /* * Mark MF mode as NIV if MCP version includes NPAR-SD support * and the MAC address is valid. */ mac_upper = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_upper); if ((SHMEM2_HAS(sc, afex_driver_support)) && (mac_upper != FUNC_MF_CFG_UPPERMAC_DEFAULT)) { mf_info->mf_mode = MULTI_FUNCTION_AFEX; } else { BLOGE(sc, "Invalid config for AFEX mode\n"); } break; default: BLOGE(sc, "Unknown MF mode (0x%08x)\n", (val & SHARED_FEAT_CFG_FORCE_SF_MODE_MASK)); return (1); } /* set path mf_mode (which could be different than function mf_mode) */ if (mf_info->mf_mode == MULTI_FUNCTION_SD) { mf_info->path_has_ovlan = TRUE; } else if (mf_info->mf_mode == SINGLE_FUNCTION) { /* * Decide on path multi vnics mode. If we're not in MF mode and in * 4-port mode, this is good enough to check vnic-0 of the other port * on the same path */ if (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) { uint8_t other_port = !(PORT_ID(sc) & 1); uint8_t abs_func_other_port = (SC_PATH(sc) + (2 * other_port)); val = MFCFG_RD(sc, func_mf_config[abs_func_other_port].e1hov_tag); mf_info->path_has_ovlan = VALID_OVLAN((uint16_t)val) ? 1 : 0; } } if (mf_info->mf_mode == SINGLE_FUNCTION) { /* invalid MF config */ if (SC_VN(sc) >= 1) { BLOGE(sc, "VNIC ID >= 1 in SF mode\n"); return (1); } return (0); } /* get the MF configuration */ mf_info->mf_config[SC_VN(sc)] = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].config); switch(mf_info->mf_mode) { case MULTI_FUNCTION_SD: bxe_get_shmem_mf_cfg_info_sd(sc); break; case MULTI_FUNCTION_SI: bxe_get_shmem_mf_cfg_info_si(sc); break; case MULTI_FUNCTION_AFEX: bxe_get_shmem_mf_cfg_info_niv(sc); break; default: BLOGE(sc, "Get MF config failed (mf_mode=0x%08x)\n", mf_info->mf_mode); return (1); } /* get the congestion management parameters */ vnic = 0; FOREACH_ABS_FUNC_IN_PORT(sc, i) { /* get min/max bw */ val = MFCFG_RD(sc, func_mf_config[i].config); mf_info->min_bw[vnic] = ((val & FUNC_MF_CFG_MIN_BW_MASK) >> FUNC_MF_CFG_MIN_BW_SHIFT); mf_info->max_bw[vnic] = ((val & FUNC_MF_CFG_MAX_BW_MASK) >> FUNC_MF_CFG_MAX_BW_SHIFT); vnic++; } return (bxe_check_valid_mf_cfg(sc)); } static int bxe_get_shmem_info(struct bxe_softc *sc) { int port; uint32_t mac_hi, mac_lo, val; port = SC_PORT(sc); mac_hi = mac_lo = 0; sc->link_params.sc = sc; sc->link_params.port = port; /* get the hardware config info */ sc->devinfo.hw_config = SHMEM_RD(sc, dev_info.shared_hw_config.config); sc->devinfo.hw_config2 = SHMEM_RD(sc, dev_info.shared_hw_config.config2); sc->link_params.hw_led_mode = ((sc->devinfo.hw_config & SHARED_HW_CFG_LED_MODE_MASK) >> SHARED_HW_CFG_LED_MODE_SHIFT); /* get the port feature config */ sc->port.config = SHMEM_RD(sc, dev_info.port_feature_config[port].config); /* get the link params */ sc->link_params.speed_cap_mask[0] = SHMEM_RD(sc, dev_info.port_hw_config[port].speed_capability_mask); sc->link_params.speed_cap_mask[1] = SHMEM_RD(sc, dev_info.port_hw_config[port].speed_capability_mask2); /* get the lane config */ sc->link_params.lane_config = SHMEM_RD(sc, dev_info.port_hw_config[port].lane_config); /* get the link config */ val = SHMEM_RD(sc, dev_info.port_feature_config[port].link_config); sc->port.link_config[ELINK_INT_PHY] = val; sc->link_params.switch_cfg = (val & PORT_FEATURE_CONNECTED_SWITCH_MASK); sc->port.link_config[ELINK_EXT_PHY1] = SHMEM_RD(sc, dev_info.port_feature_config[port].link_config2); /* get the override preemphasis flag and enable it or turn it off */ val = SHMEM_RD(sc, dev_info.shared_feature_config.config); if (val & SHARED_FEAT_CFG_OVERRIDE_PREEMPHASIS_CFG_ENABLED) { sc->link_params.feature_config_flags |= ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED; } else { sc->link_params.feature_config_flags &= ~ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED; } /* get the initial value of the link params */ sc->link_params.multi_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].multi_phy_config); /* get external phy info */ sc->port.ext_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config); /* get the multifunction configuration */ bxe_get_mf_cfg_info(sc); /* get the mac address */ if (IS_MF(sc)) { mac_hi = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_upper); mac_lo = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_lower); } else { mac_hi = SHMEM_RD(sc, dev_info.port_hw_config[port].mac_upper); mac_lo = SHMEM_RD(sc, dev_info.port_hw_config[port].mac_lower); } if ((mac_lo == 0) && (mac_hi == 0)) { *sc->mac_addr_str = 0; BLOGE(sc, "No Ethernet address programmed!\n"); } else { sc->link_params.mac_addr[0] = (uint8_t)(mac_hi >> 8); sc->link_params.mac_addr[1] = (uint8_t)(mac_hi); sc->link_params.mac_addr[2] = (uint8_t)(mac_lo >> 24); sc->link_params.mac_addr[3] = (uint8_t)(mac_lo >> 16); sc->link_params.mac_addr[4] = (uint8_t)(mac_lo >> 8); sc->link_params.mac_addr[5] = (uint8_t)(mac_lo); snprintf(sc->mac_addr_str, sizeof(sc->mac_addr_str), "%02x:%02x:%02x:%02x:%02x:%02x", sc->link_params.mac_addr[0], sc->link_params.mac_addr[1], sc->link_params.mac_addr[2], sc->link_params.mac_addr[3], sc->link_params.mac_addr[4], sc->link_params.mac_addr[5]); BLOGD(sc, DBG_LOAD, "Ethernet address: %s\n", sc->mac_addr_str); } return (0); } static void bxe_get_tunable_params(struct bxe_softc *sc) { /* sanity checks */ if ((bxe_interrupt_mode != INTR_MODE_INTX) && (bxe_interrupt_mode != INTR_MODE_MSI) && (bxe_interrupt_mode != INTR_MODE_MSIX)) { BLOGW(sc, "invalid interrupt_mode value (%d)\n", bxe_interrupt_mode); bxe_interrupt_mode = INTR_MODE_MSIX; } if ((bxe_queue_count < 0) || (bxe_queue_count > MAX_RSS_CHAINS)) { BLOGW(sc, "invalid queue_count value (%d)\n", bxe_queue_count); bxe_queue_count = 0; } if ((bxe_max_rx_bufs < 1) || (bxe_max_rx_bufs > RX_BD_USABLE)) { if (bxe_max_rx_bufs == 0) { bxe_max_rx_bufs = RX_BD_USABLE; } else { BLOGW(sc, "invalid max_rx_bufs (%d)\n", bxe_max_rx_bufs); bxe_max_rx_bufs = 2048; } } if ((bxe_hc_rx_ticks < 1) || (bxe_hc_rx_ticks > 100)) { BLOGW(sc, "invalid hc_rx_ticks (%d)\n", bxe_hc_rx_ticks); bxe_hc_rx_ticks = 25; } if ((bxe_hc_tx_ticks < 1) || (bxe_hc_tx_ticks > 100)) { BLOGW(sc, "invalid hc_tx_ticks (%d)\n", bxe_hc_tx_ticks); bxe_hc_tx_ticks = 50; } if (bxe_max_aggregation_size == 0) { bxe_max_aggregation_size = TPA_AGG_SIZE; } if (bxe_max_aggregation_size > 0xffff) { BLOGW(sc, "invalid max_aggregation_size (%d)\n", bxe_max_aggregation_size); bxe_max_aggregation_size = TPA_AGG_SIZE; } if ((bxe_mrrs < -1) || (bxe_mrrs > 3)) { BLOGW(sc, "invalid mrrs (%d)\n", bxe_mrrs); bxe_mrrs = -1; } if ((bxe_autogreeen < 0) || (bxe_autogreeen > 2)) { BLOGW(sc, "invalid autogreeen (%d)\n", bxe_autogreeen); bxe_autogreeen = 0; } if ((bxe_udp_rss < 0) || (bxe_udp_rss > 1)) { BLOGW(sc, "invalid udp_rss (%d)\n", bxe_udp_rss); bxe_udp_rss = 0; } /* pull in user settings */ sc->interrupt_mode = bxe_interrupt_mode; sc->max_rx_bufs = bxe_max_rx_bufs; sc->hc_rx_ticks = bxe_hc_rx_ticks; sc->hc_tx_ticks = bxe_hc_tx_ticks; sc->max_aggregation_size = bxe_max_aggregation_size; sc->mrrs = bxe_mrrs; sc->autogreeen = bxe_autogreeen; sc->udp_rss = bxe_udp_rss; if (bxe_interrupt_mode == INTR_MODE_INTX) { sc->num_queues = 1; } else { /* INTR_MODE_MSI or INTR_MODE_MSIX */ sc->num_queues = min((bxe_queue_count ? bxe_queue_count : mp_ncpus), MAX_RSS_CHAINS); if (sc->num_queues > mp_ncpus) { sc->num_queues = mp_ncpus; } } BLOGD(sc, DBG_LOAD, "User Config: " "debug=0x%lx " "interrupt_mode=%d " "queue_count=%d " "hc_rx_ticks=%d " "hc_tx_ticks=%d " "rx_budget=%d " "max_aggregation_size=%d " "mrrs=%d " "autogreeen=%d " "udp_rss=%d\n", bxe_debug, sc->interrupt_mode, sc->num_queues, sc->hc_rx_ticks, sc->hc_tx_ticks, bxe_rx_budget, sc->max_aggregation_size, sc->mrrs, sc->autogreeen, sc->udp_rss); } static int bxe_media_detect(struct bxe_softc *sc) { int port_type; uint32_t phy_idx = bxe_get_cur_phy_idx(sc); switch (sc->link_params.phy[phy_idx].media_type) { case ELINK_ETH_PHY_SFPP_10G_FIBER: case ELINK_ETH_PHY_XFP_FIBER: BLOGI(sc, "Found 10Gb Fiber media.\n"); sc->media = IFM_10G_SR; port_type = PORT_FIBRE; break; case ELINK_ETH_PHY_SFP_1G_FIBER: BLOGI(sc, "Found 1Gb Fiber media.\n"); sc->media = IFM_1000_SX; port_type = PORT_FIBRE; break; case ELINK_ETH_PHY_KR: case ELINK_ETH_PHY_CX4: BLOGI(sc, "Found 10GBase-CX4 media.\n"); sc->media = IFM_10G_CX4; port_type = PORT_FIBRE; break; case ELINK_ETH_PHY_DA_TWINAX: BLOGI(sc, "Found 10Gb Twinax media.\n"); sc->media = IFM_10G_TWINAX; port_type = PORT_DA; break; case ELINK_ETH_PHY_BASE_T: if (sc->link_params.speed_cap_mask[0] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) { BLOGI(sc, "Found 10GBase-T media.\n"); sc->media = IFM_10G_T; port_type = PORT_TP; } else { BLOGI(sc, "Found 1000Base-T media.\n"); sc->media = IFM_1000_T; port_type = PORT_TP; } break; case ELINK_ETH_PHY_NOT_PRESENT: BLOGI(sc, "Media not present.\n"); sc->media = 0; port_type = PORT_OTHER; break; case ELINK_ETH_PHY_UNSPECIFIED: default: BLOGI(sc, "Unknown media!\n"); sc->media = 0; port_type = PORT_OTHER; break; } return port_type; } #define GET_FIELD(value, fname) \ (((value) & (fname##_MASK)) >> (fname##_SHIFT)) #define IGU_FID(val) GET_FIELD((val), IGU_REG_MAPPING_MEMORY_FID) #define IGU_VEC(val) GET_FIELD((val), IGU_REG_MAPPING_MEMORY_VECTOR) static int bxe_get_igu_cam_info(struct bxe_softc *sc) { int pfid = SC_FUNC(sc); int igu_sb_id; uint32_t val; uint8_t fid, igu_sb_cnt = 0; sc->igu_base_sb = 0xff; if (CHIP_INT_MODE_IS_BC(sc)) { int vn = SC_VN(sc); igu_sb_cnt = sc->igu_sb_cnt; sc->igu_base_sb = ((CHIP_IS_MODE_4_PORT(sc) ? pfid : vn) * FP_SB_MAX_E1x); sc->igu_dsb_id = (E1HVN_MAX * FP_SB_MAX_E1x + (CHIP_IS_MODE_4_PORT(sc) ? pfid : vn)); return (0); } /* IGU in normal mode - read CAM */ for (igu_sb_id = 0; igu_sb_id < IGU_REG_MAPPING_MEMORY_SIZE; igu_sb_id++) { val = REG_RD(sc, IGU_REG_MAPPING_MEMORY + igu_sb_id * 4); if (!(val & IGU_REG_MAPPING_MEMORY_VALID)) { continue; } fid = IGU_FID(val); if ((fid & IGU_FID_ENCODE_IS_PF)) { if ((fid & IGU_FID_PF_NUM_MASK) != pfid) { continue; } if (IGU_VEC(val) == 0) { /* default status block */ sc->igu_dsb_id = igu_sb_id; } else { if (sc->igu_base_sb == 0xff) { sc->igu_base_sb = igu_sb_id; } igu_sb_cnt++; } } } /* * Due to new PF resource allocation by MFW T7.4 and above, it's optional * that number of CAM entries will not be equal to the value advertised in * PCI. Driver should use the minimal value of both as the actual status * block count */ sc->igu_sb_cnt = min(sc->igu_sb_cnt, igu_sb_cnt); if (igu_sb_cnt == 0) { BLOGE(sc, "CAM configuration error\n"); return (-1); } return (0); } /* * Gather various information from the device config space, the device itself, * shmem, and the user input. */ static int bxe_get_device_info(struct bxe_softc *sc) { uint32_t val; int rc; /* Get the data for the device */ sc->devinfo.vendor_id = pci_get_vendor(sc->dev); sc->devinfo.device_id = pci_get_device(sc->dev); sc->devinfo.subvendor_id = pci_get_subvendor(sc->dev); sc->devinfo.subdevice_id = pci_get_subdevice(sc->dev); /* get the chip revision (chip metal comes from pci config space) */ sc->devinfo.chip_id = sc->link_params.chip_id = (((REG_RD(sc, MISC_REG_CHIP_NUM) & 0xffff) << 16) | ((REG_RD(sc, MISC_REG_CHIP_REV) & 0xf) << 12) | (((REG_RD(sc, PCICFG_OFFSET + PCI_ID_VAL3) >> 24) & 0xf) << 4) | ((REG_RD(sc, MISC_REG_BOND_ID) & 0xf) << 0)); /* force 57811 according to MISC register */ if (REG_RD(sc, MISC_REG_CHIP_TYPE) & MISC_REG_CHIP_TYPE_57811_MASK) { if (CHIP_IS_57810(sc)) { sc->devinfo.chip_id = ((CHIP_NUM_57811 << 16) | (sc->devinfo.chip_id & 0x0000ffff)); } else if (CHIP_IS_57810_MF(sc)) { sc->devinfo.chip_id = ((CHIP_NUM_57811_MF << 16) | (sc->devinfo.chip_id & 0x0000ffff)); } sc->devinfo.chip_id |= 0x1; } BLOGD(sc, DBG_LOAD, "chip_id=0x%08x (num=0x%04x rev=0x%01x metal=0x%02x bond=0x%01x)\n", sc->devinfo.chip_id, ((sc->devinfo.chip_id >> 16) & 0xffff), ((sc->devinfo.chip_id >> 12) & 0xf), ((sc->devinfo.chip_id >> 4) & 0xff), ((sc->devinfo.chip_id >> 0) & 0xf)); val = (REG_RD(sc, 0x2874) & 0x55); if ((sc->devinfo.chip_id & 0x1) || (CHIP_IS_E1(sc) && val) || (CHIP_IS_E1H(sc) && (val == 0x55))) { sc->flags |= BXE_ONE_PORT_FLAG; BLOGD(sc, DBG_LOAD, "single port device\n"); } /* set the doorbell size */ sc->doorbell_size = (1 << BXE_DB_SHIFT); /* determine whether the device is in 2 port or 4 port mode */ sc->devinfo.chip_port_mode = CHIP_PORT_MODE_NONE; /* E1 & E1h*/ if (CHIP_IS_E2E3(sc)) { /* * Read port4mode_en_ovwr[0]: * If 1, four port mode is in port4mode_en_ovwr[1]. * If 0, four port mode is in port4mode_en[0]. */ val = REG_RD(sc, MISC_REG_PORT4MODE_EN_OVWR); if (val & 1) { val = ((val >> 1) & 1); } else { val = REG_RD(sc, MISC_REG_PORT4MODE_EN); } sc->devinfo.chip_port_mode = (val) ? CHIP_4_PORT_MODE : CHIP_2_PORT_MODE; BLOGD(sc, DBG_LOAD, "Port mode = %s\n", (val) ? "4" : "2"); } /* get the function and path info for the device */ bxe_get_function_num(sc); /* get the shared memory base address */ sc->devinfo.shmem_base = sc->link_params.shmem_base = REG_RD(sc, MISC_REG_SHARED_MEM_ADDR); sc->devinfo.shmem2_base = REG_RD(sc, (SC_PATH(sc) ? MISC_REG_GENERIC_CR_1 : MISC_REG_GENERIC_CR_0)); BLOGD(sc, DBG_LOAD, "shmem_base=0x%08x, shmem2_base=0x%08x\n", sc->devinfo.shmem_base, sc->devinfo.shmem2_base); if (!sc->devinfo.shmem_base) { /* this should ONLY prevent upcoming shmem reads */ BLOGI(sc, "MCP not active\n"); sc->flags |= BXE_NO_MCP_FLAG; return (0); } /* make sure the shared memory contents are valid */ val = SHMEM_RD(sc, validity_map[SC_PORT(sc)]); if ((val & (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB)) != (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB)) { BLOGE(sc, "Invalid SHMEM validity signature: 0x%08x\n", val); return (0); } BLOGD(sc, DBG_LOAD, "Valid SHMEM validity signature: 0x%08x\n", val); /* get the bootcode version */ sc->devinfo.bc_ver = SHMEM_RD(sc, dev_info.bc_rev); snprintf(sc->devinfo.bc_ver_str, sizeof(sc->devinfo.bc_ver_str), "%d.%d.%d", ((sc->devinfo.bc_ver >> 24) & 0xff), ((sc->devinfo.bc_ver >> 16) & 0xff), ((sc->devinfo.bc_ver >> 8) & 0xff)); BLOGD(sc, DBG_LOAD, "Bootcode version: %s\n", sc->devinfo.bc_ver_str); /* get the bootcode shmem address */ sc->devinfo.mf_cfg_base = bxe_get_shmem_mf_cfg_base(sc); BLOGD(sc, DBG_LOAD, "mf_cfg_base=0x08%x \n", sc->devinfo.mf_cfg_base); /* clean indirect addresses as they're not used */ pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, 0, 4); if (IS_PF(sc)) { REG_WR(sc, PXP2_REG_PGL_ADDR_88_F0, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_8C_F0, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_90_F0, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_94_F0, 0); if (CHIP_IS_E1x(sc)) { REG_WR(sc, PXP2_REG_PGL_ADDR_88_F1, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_8C_F1, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_90_F1, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_94_F1, 0); } /* * Enable internal target-read (in case we are probed after PF * FLR). Must be done prior to any BAR read access. Only for * 57712 and up */ if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1); } } /* get the nvram size */ val = REG_RD(sc, MCP_REG_MCPR_NVM_CFG4); sc->devinfo.flash_size = (NVRAM_1MB_SIZE << (val & MCPR_NVM_CFG4_FLASH_SIZE)); BLOGD(sc, DBG_LOAD, "nvram flash size: %d\n", sc->devinfo.flash_size); /* get PCI capabilites */ bxe_probe_pci_caps(sc); bxe_set_power_state(sc, PCI_PM_D0); /* get various configuration parameters from shmem */ bxe_get_shmem_info(sc); if (sc->devinfo.pcie_msix_cap_reg != 0) { val = pci_read_config(sc->dev, (sc->devinfo.pcie_msix_cap_reg + PCIR_MSIX_CTRL), 2); sc->igu_sb_cnt = (val & PCIM_MSIXCTRL_TABLE_SIZE); } else { sc->igu_sb_cnt = 1; } sc->igu_base_addr = BAR_IGU_INTMEM; /* initialize IGU parameters */ if (CHIP_IS_E1x(sc)) { sc->devinfo.int_block = INT_BLOCK_HC; sc->igu_dsb_id = DEF_SB_IGU_ID; sc->igu_base_sb = 0; } else { sc->devinfo.int_block = INT_BLOCK_IGU; /* do not allow device reset during IGU info preocessing */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RESET); val = REG_RD(sc, IGU_REG_BLOCK_CONFIGURATION); if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) { int tout = 5000; BLOGD(sc, DBG_LOAD, "FORCING IGU Normal Mode\n"); val &= ~(IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN); REG_WR(sc, IGU_REG_BLOCK_CONFIGURATION, val); REG_WR(sc, IGU_REG_RESET_MEMORIES, 0x7f); while (tout && REG_RD(sc, IGU_REG_RESET_MEMORIES)) { tout--; DELAY(1000); } if (REG_RD(sc, IGU_REG_RESET_MEMORIES)) { BLOGD(sc, DBG_LOAD, "FORCING IGU Normal Mode failed!!!\n"); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET); return (-1); } } if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) { BLOGD(sc, DBG_LOAD, "IGU Backward Compatible Mode\n"); sc->devinfo.int_block |= INT_BLOCK_MODE_BW_COMP; } else { BLOGD(sc, DBG_LOAD, "IGU Normal Mode\n"); } rc = bxe_get_igu_cam_info(sc); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET); if (rc) { return (rc); } } /* * Get base FW non-default (fast path) status block ID. This value is * used to initialize the fw_sb_id saved on the fp/queue structure to * determine the id used by the FW. */ if (CHIP_IS_E1x(sc)) { sc->base_fw_ndsb = ((SC_PORT(sc) * FP_SB_MAX_E1x) + SC_L_ID(sc)); } else { /* * 57712+ - We currently use one FW SB per IGU SB (Rx and Tx of * the same queue are indicated on the same IGU SB). So we prefer * FW and IGU SBs to be the same value. */ sc->base_fw_ndsb = sc->igu_base_sb; } BLOGD(sc, DBG_LOAD, "igu_dsb_id=%d igu_base_sb=%d igu_sb_cnt=%d base_fw_ndsb=%d\n", sc->igu_dsb_id, sc->igu_base_sb, sc->igu_sb_cnt, sc->base_fw_ndsb); elink_phy_probe(&sc->link_params); return (0); } static void bxe_link_settings_supported(struct bxe_softc *sc, uint32_t switch_cfg) { uint32_t cfg_size = 0; uint32_t idx; uint8_t port = SC_PORT(sc); /* aggregation of supported attributes of all external phys */ sc->port.supported[0] = 0; sc->port.supported[1] = 0; switch (sc->link_params.num_phys) { case 1: sc->port.supported[0] = sc->link_params.phy[ELINK_INT_PHY].supported; cfg_size = 1; break; case 2: sc->port.supported[0] = sc->link_params.phy[ELINK_EXT_PHY1].supported; cfg_size = 1; break; case 3: if (sc->link_params.multi_phy_config & PORT_HW_CFG_PHY_SWAPPED_ENABLED) { sc->port.supported[1] = sc->link_params.phy[ELINK_EXT_PHY1].supported; sc->port.supported[0] = sc->link_params.phy[ELINK_EXT_PHY2].supported; } else { sc->port.supported[0] = sc->link_params.phy[ELINK_EXT_PHY1].supported; sc->port.supported[1] = sc->link_params.phy[ELINK_EXT_PHY2].supported; } cfg_size = 2; break; } if (!(sc->port.supported[0] || sc->port.supported[1])) { BLOGE(sc, "Invalid phy config in NVRAM (PHY1=0x%08x PHY2=0x%08x)\n", SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config), SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config2)); return; } if (CHIP_IS_E3(sc)) sc->port.phy_addr = REG_RD(sc, MISC_REG_WC0_CTRL_PHY_ADDR); else { switch (switch_cfg) { case ELINK_SWITCH_CFG_1G: sc->port.phy_addr = REG_RD(sc, NIG_REG_SERDES0_CTRL_PHY_ADDR + port*0x10); break; case ELINK_SWITCH_CFG_10G: sc->port.phy_addr = REG_RD(sc, NIG_REG_XGXS0_CTRL_PHY_ADDR + port*0x18); break; default: BLOGE(sc, "Invalid switch config in link_config=0x%08x\n", sc->port.link_config[0]); return; } } BLOGD(sc, DBG_LOAD, "PHY addr 0x%08x\n", sc->port.phy_addr); /* mask what we support according to speed_cap_mask per configuration */ for (idx = 0; idx < cfg_size; idx++) { if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_10baseT_Half; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_10baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_100baseT_Half; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_100baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_1000baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_2_5G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_2500baseX_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_10000baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_20000baseKR2_Full; } } BLOGD(sc, DBG_LOAD, "PHY supported 0=0x%08x 1=0x%08x\n", sc->port.supported[0], sc->port.supported[1]); ELINK_DEBUG_P2(sc, "PHY supported 0=0x%08x 1=0x%08x\n", sc->port.supported[0], sc->port.supported[1]); } static void bxe_link_settings_requested(struct bxe_softc *sc) { uint32_t link_config; uint32_t idx; uint32_t cfg_size = 0; sc->port.advertising[0] = 0; sc->port.advertising[1] = 0; switch (sc->link_params.num_phys) { case 1: case 2: cfg_size = 1; break; case 3: cfg_size = 2; break; } for (idx = 0; idx < cfg_size; idx++) { sc->link_params.req_duplex[idx] = DUPLEX_FULL; link_config = sc->port.link_config[idx]; switch (link_config & PORT_FEATURE_LINK_SPEED_MASK) { case PORT_FEATURE_LINK_SPEED_AUTO: if (sc->port.supported[idx] & ELINK_SUPPORTED_Autoneg) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_AUTO_NEG; sc->port.advertising[idx] |= sc->port.supported[idx]; if (sc->link_params.phy[ELINK_EXT_PHY1].type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) sc->port.advertising[idx] |= (ELINK_SUPPORTED_100baseT_Half | ELINK_SUPPORTED_100baseT_Full); } else { /* force 10G, no AN */ sc->link_params.req_line_speed[idx] = ELINK_SPEED_10000; sc->port.advertising[idx] |= (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); continue; } break; case PORT_FEATURE_LINK_SPEED_10M_FULL: if (sc->port.supported[idx] & ELINK_SUPPORTED_10baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_10; sc->port.advertising[idx] |= (ADVERTISED_10baseT_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_10M_HALF: if (sc->port.supported[idx] & ELINK_SUPPORTED_10baseT_Half) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_10; sc->link_params.req_duplex[idx] = DUPLEX_HALF; sc->port.advertising[idx] |= (ADVERTISED_10baseT_Half | ADVERTISED_TP); ELINK_DEBUG_P1(sc, "driver requesting DUPLEX_HALF req_duplex = %x!\n", sc->link_params.req_duplex[idx]); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_100M_FULL: if (sc->port.supported[idx] & ELINK_SUPPORTED_100baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_100; sc->port.advertising[idx] |= (ADVERTISED_100baseT_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_100M_HALF: if (sc->port.supported[idx] & ELINK_SUPPORTED_100baseT_Half) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_100; sc->link_params.req_duplex[idx] = DUPLEX_HALF; sc->port.advertising[idx] |= (ADVERTISED_100baseT_Half | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_1G: if (sc->port.supported[idx] & ELINK_SUPPORTED_1000baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_1000; sc->port.advertising[idx] |= (ADVERTISED_1000baseT_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_2_5G: if (sc->port.supported[idx] & ELINK_SUPPORTED_2500baseX_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_2500; sc->port.advertising[idx] |= (ADVERTISED_2500baseX_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_10G_CX4: if (sc->port.supported[idx] & ELINK_SUPPORTED_10000baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_10000; sc->port.advertising[idx] |= (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_20G: sc->link_params.req_line_speed[idx] = ELINK_SPEED_20000; break; default: BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); sc->link_params.req_line_speed[idx] = ELINK_SPEED_AUTO_NEG; sc->port.advertising[idx] = sc->port.supported[idx]; break; } sc->link_params.req_flow_ctrl[idx] = (link_config & PORT_FEATURE_FLOW_CONTROL_MASK); if (sc->link_params.req_flow_ctrl[idx] == ELINK_FLOW_CTRL_AUTO) { if (!(sc->port.supported[idx] & ELINK_SUPPORTED_Autoneg)) { sc->link_params.req_flow_ctrl[idx] = ELINK_FLOW_CTRL_NONE; } else { bxe_set_requested_fc(sc); } } BLOGD(sc, DBG_LOAD, "req_line_speed=%d req_duplex=%d " "req_flow_ctrl=0x%x advertising=0x%x\n", sc->link_params.req_line_speed[idx], sc->link_params.req_duplex[idx], sc->link_params.req_flow_ctrl[idx], sc->port.advertising[idx]); ELINK_DEBUG_P3(sc, "req_line_speed=%d req_duplex=%d " "advertising=0x%x\n", sc->link_params.req_line_speed[idx], sc->link_params.req_duplex[idx], sc->port.advertising[idx]); } } static void bxe_get_phy_info(struct bxe_softc *sc) { uint8_t port = SC_PORT(sc); uint32_t config = sc->port.config; uint32_t eee_mode; /* shmem data already read in bxe_get_shmem_info() */ ELINK_DEBUG_P3(sc, "lane_config=0x%08x speed_cap_mask0=0x%08x " "link_config0=0x%08x\n", sc->link_params.lane_config, sc->link_params.speed_cap_mask[0], sc->port.link_config[0]); bxe_link_settings_supported(sc, sc->link_params.switch_cfg); bxe_link_settings_requested(sc); if (sc->autogreeen == AUTO_GREEN_FORCE_ON) { sc->link_params.feature_config_flags |= ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED; } else if (sc->autogreeen == AUTO_GREEN_FORCE_OFF) { sc->link_params.feature_config_flags &= ~ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED; } else if (config & PORT_FEAT_CFG_AUTOGREEEN_ENABLED) { sc->link_params.feature_config_flags |= ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED; } /* configure link feature according to nvram value */ eee_mode = (((SHMEM_RD(sc, dev_info.port_feature_config[port].eee_power_mode)) & PORT_FEAT_CFG_EEE_POWER_MODE_MASK) >> PORT_FEAT_CFG_EEE_POWER_MODE_SHIFT); if (eee_mode != PORT_FEAT_CFG_EEE_POWER_MODE_DISABLED) { sc->link_params.eee_mode = (ELINK_EEE_MODE_ADV_LPI | ELINK_EEE_MODE_ENABLE_LPI | ELINK_EEE_MODE_OUTPUT_TIME); } else { sc->link_params.eee_mode = 0; } /* get the media type */ bxe_media_detect(sc); ELINK_DEBUG_P1(sc, "detected media type\n", sc->media); } static void bxe_get_params(struct bxe_softc *sc) { /* get user tunable params */ bxe_get_tunable_params(sc); /* select the RX and TX ring sizes */ sc->tx_ring_size = TX_BD_USABLE; sc->rx_ring_size = RX_BD_USABLE; /* XXX disable WoL */ sc->wol = 0; } static void bxe_set_modes_bitmap(struct bxe_softc *sc) { uint32_t flags = 0; if (CHIP_REV_IS_FPGA(sc)) { SET_FLAGS(flags, MODE_FPGA); } else if (CHIP_REV_IS_EMUL(sc)) { SET_FLAGS(flags, MODE_EMUL); } else { SET_FLAGS(flags, MODE_ASIC); } if (CHIP_IS_MODE_4_PORT(sc)) { SET_FLAGS(flags, MODE_PORT4); } else { SET_FLAGS(flags, MODE_PORT2); } if (CHIP_IS_E2(sc)) { SET_FLAGS(flags, MODE_E2); } else if (CHIP_IS_E3(sc)) { SET_FLAGS(flags, MODE_E3); if (CHIP_REV(sc) == CHIP_REV_Ax) { SET_FLAGS(flags, MODE_E3_A0); } else /*if (CHIP_REV(sc) == CHIP_REV_Bx)*/ { SET_FLAGS(flags, MODE_E3_B0 | MODE_COS3); } } if (IS_MF(sc)) { SET_FLAGS(flags, MODE_MF); switch (sc->devinfo.mf_info.mf_mode) { case MULTI_FUNCTION_SD: SET_FLAGS(flags, MODE_MF_SD); break; case MULTI_FUNCTION_SI: SET_FLAGS(flags, MODE_MF_SI); break; case MULTI_FUNCTION_AFEX: SET_FLAGS(flags, MODE_MF_AFEX); break; } } else { SET_FLAGS(flags, MODE_SF); } #if defined(__LITTLE_ENDIAN) SET_FLAGS(flags, MODE_LITTLE_ENDIAN); #else /* __BIG_ENDIAN */ SET_FLAGS(flags, MODE_BIG_ENDIAN); #endif INIT_MODE_FLAGS(sc) = flags; } static int bxe_alloc_hsi_mem(struct bxe_softc *sc) { struct bxe_fastpath *fp; bus_addr_t busaddr; int max_agg_queues; int max_segments; bus_size_t max_size; bus_size_t max_seg_size; char buf[32]; int rc; int i, j; /* XXX zero out all vars here and call bxe_alloc_hsi_mem on error */ /* allocate the parent bus DMA tag */ rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ BUS_SPACE_MAXSIZE_32BIT, /* max map size */ BUS_SPACE_UNRESTRICTED, /* num discontinuous */ BUS_SPACE_MAXSIZE_32BIT, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &sc->parent_dma_tag); /* returned dma tag */ if (rc != 0) { BLOGE(sc, "Failed to alloc parent DMA tag (%d)!\n", rc); return (1); } /************************/ /* DEFAULT STATUS BLOCK */ /************************/ if (bxe_dma_alloc(sc, sizeof(struct host_sp_status_block), &sc->def_sb_dma, "default status block") != 0) { /* XXX */ bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->def_sb = (struct host_sp_status_block *)sc->def_sb_dma.vaddr; /***************/ /* EVENT QUEUE */ /***************/ if (bxe_dma_alloc(sc, BCM_PAGE_SIZE, &sc->eq_dma, "event queue") != 0) { /* XXX */ bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->eq = (union event_ring_elem * )sc->eq_dma.vaddr; /*************/ /* SLOW PATH */ /*************/ if (bxe_dma_alloc(sc, sizeof(struct bxe_slowpath), &sc->sp_dma, "slow path") != 0) { /* XXX */ bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->sp = (struct bxe_slowpath *)sc->sp_dma.vaddr; /*******************/ /* SLOW PATH QUEUE */ /*******************/ if (bxe_dma_alloc(sc, BCM_PAGE_SIZE, &sc->spq_dma, "slow path queue") != 0) { /* XXX */ bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->spq = (struct eth_spe *)sc->spq_dma.vaddr; /***************************/ /* FW DECOMPRESSION BUFFER */ /***************************/ if (bxe_dma_alloc(sc, FW_BUF_SIZE, &sc->gz_buf_dma, "fw decompression buffer") != 0) { /* XXX */ bxe_dma_free(sc, &sc->spq_dma); sc->spq = NULL; bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->gz_buf = (void *)sc->gz_buf_dma.vaddr; if ((sc->gz_strm = malloc(sizeof(*sc->gz_strm), M_DEVBUF, M_NOWAIT)) == NULL) { /* XXX */ bxe_dma_free(sc, &sc->gz_buf_dma); sc->gz_buf = NULL; bxe_dma_free(sc, &sc->spq_dma); sc->spq = NULL; bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } /*************/ /* FASTPATHS */ /*************/ /* allocate DMA memory for each fastpath structure */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; fp->sc = sc; fp->index = i; /*******************/ /* FP STATUS BLOCK */ /*******************/ snprintf(buf, sizeof(buf), "fp %d status block", i); if (bxe_dma_alloc(sc, sizeof(union bxe_host_hc_status_block), &fp->sb_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { if (CHIP_IS_E2E3(sc)) { fp->status_block.e2_sb = (struct host_hc_status_block_e2 *)fp->sb_dma.vaddr; } else { fp->status_block.e1x_sb = (struct host_hc_status_block_e1x *)fp->sb_dma.vaddr; } } /******************/ /* FP TX BD CHAIN */ /******************/ snprintf(buf, sizeof(buf), "fp %d tx bd chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * TX_BD_NUM_PAGES), &fp->tx_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->tx_chain = (union eth_tx_bd_types *)fp->tx_dma.vaddr; } /* link together the tx bd chain pages */ for (j = 1; j <= TX_BD_NUM_PAGES; j++) { /* index into the tx bd chain array to last entry per page */ struct eth_tx_next_bd *tx_next_bd = &fp->tx_chain[TX_BD_TOTAL_PER_PAGE * j - 1].next_bd; /* point to the next page and wrap from last page */ busaddr = (fp->tx_dma.paddr + (BCM_PAGE_SIZE * (j % TX_BD_NUM_PAGES))); tx_next_bd->addr_hi = htole32(U64_HI(busaddr)); tx_next_bd->addr_lo = htole32(U64_LO(busaddr)); } /******************/ /* FP RX BD CHAIN */ /******************/ snprintf(buf, sizeof(buf), "fp %d rx bd chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * RX_BD_NUM_PAGES), &fp->rx_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->rx_chain = (struct eth_rx_bd *)fp->rx_dma.vaddr; } /* link together the rx bd chain pages */ for (j = 1; j <= RX_BD_NUM_PAGES; j++) { /* index into the rx bd chain array to last entry per page */ struct eth_rx_bd *rx_bd = &fp->rx_chain[RX_BD_TOTAL_PER_PAGE * j - 2]; /* point to the next page and wrap from last page */ busaddr = (fp->rx_dma.paddr + (BCM_PAGE_SIZE * (j % RX_BD_NUM_PAGES))); rx_bd->addr_hi = htole32(U64_HI(busaddr)); rx_bd->addr_lo = htole32(U64_LO(busaddr)); } /*******************/ /* FP RX RCQ CHAIN */ /*******************/ snprintf(buf, sizeof(buf), "fp %d rcq chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * RCQ_NUM_PAGES), &fp->rcq_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->rcq_chain = (union eth_rx_cqe *)fp->rcq_dma.vaddr; } /* link together the rcq chain pages */ for (j = 1; j <= RCQ_NUM_PAGES; j++) { /* index into the rcq chain array to last entry per page */ struct eth_rx_cqe_next_page *rx_cqe_next = (struct eth_rx_cqe_next_page *) &fp->rcq_chain[RCQ_TOTAL_PER_PAGE * j - 1]; /* point to the next page and wrap from last page */ busaddr = (fp->rcq_dma.paddr + (BCM_PAGE_SIZE * (j % RCQ_NUM_PAGES))); rx_cqe_next->addr_hi = htole32(U64_HI(busaddr)); rx_cqe_next->addr_lo = htole32(U64_LO(busaddr)); } /*******************/ /* FP RX SGE CHAIN */ /*******************/ snprintf(buf, sizeof(buf), "fp %d sge chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * RX_SGE_NUM_PAGES), &fp->rx_sge_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->rx_sge_chain = (struct eth_rx_sge *)fp->rx_sge_dma.vaddr; } /* link together the sge chain pages */ for (j = 1; j <= RX_SGE_NUM_PAGES; j++) { /* index into the rcq chain array to last entry per page */ struct eth_rx_sge *rx_sge = &fp->rx_sge_chain[RX_SGE_TOTAL_PER_PAGE * j - 2]; /* point to the next page and wrap from last page */ busaddr = (fp->rx_sge_dma.paddr + (BCM_PAGE_SIZE * (j % RX_SGE_NUM_PAGES))); rx_sge->addr_hi = htole32(U64_HI(busaddr)); rx_sge->addr_lo = htole32(U64_LO(busaddr)); } /***********************/ /* FP TX MBUF DMA MAPS */ /***********************/ /* set required sizes before mapping to conserve resources */ if (if_getcapenable(sc->ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) { max_size = BXE_TSO_MAX_SIZE; max_segments = BXE_TSO_MAX_SEGMENTS; max_seg_size = BXE_TSO_MAX_SEG_SIZE; } else { max_size = (MCLBYTES * BXE_MAX_SEGMENTS); max_segments = BXE_MAX_SEGMENTS; max_seg_size = MCLBYTES; } /* create a dma tag for the tx mbufs */ rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ max_size, /* max map size */ max_segments, /* num discontinuous */ max_seg_size, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &fp->tx_mbuf_tag); /* returned dma tag */ if (rc != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma tag for " "'fp %d tx mbufs' (%d)\n", i, rc); return (1); } /* create dma maps for each of the tx mbuf clusters */ for (j = 0; j < TX_BD_TOTAL; j++) { if (bus_dmamap_create(fp->tx_mbuf_tag, BUS_DMA_NOWAIT, &fp->tx_mbuf_chain[j].m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d tx mbuf %d' (%d)\n", i, j, rc); return (1); } } /***********************/ /* FP RX MBUF DMA MAPS */ /***********************/ /* create a dma tag for the rx mbufs */ rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ MJUM9BYTES, /* max map size */ 1, /* num discontinuous */ MJUM9BYTES, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &fp->rx_mbuf_tag); /* returned dma tag */ if (rc != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma tag for " "'fp %d rx mbufs' (%d)\n", i, rc); return (1); } /* create dma maps for each of the rx mbuf clusters */ for (j = 0; j < RX_BD_TOTAL; j++) { if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_mbuf_chain[j].m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d rx mbuf %d' (%d)\n", i, j, rc); return (1); } } /* create dma map for the spare rx mbuf cluster */ if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_mbuf_spare_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d spare rx mbuf' (%d)\n", i, rc); return (1); } /***************************/ /* FP RX SGE MBUF DMA MAPS */ /***************************/ /* create a dma tag for the rx sge mbufs */ rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ BCM_PAGE_SIZE, /* max map size */ 1, /* num discontinuous */ BCM_PAGE_SIZE, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &fp->rx_sge_mbuf_tag); /* returned dma tag */ if (rc != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma tag for " "'fp %d rx sge mbufs' (%d)\n", i, rc); return (1); } /* create dma maps for the rx sge mbuf clusters */ for (j = 0; j < RX_SGE_TOTAL; j++) { if (bus_dmamap_create(fp->rx_sge_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_sge_mbuf_chain[j].m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d rx sge mbuf %d' (%d)\n", i, j, rc); return (1); } } /* create dma map for the spare rx sge mbuf cluster */ if (bus_dmamap_create(fp->rx_sge_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_sge_mbuf_spare_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d spare rx sge mbuf' (%d)\n", i, rc); return (1); } /***************************/ /* FP RX TPA MBUF DMA MAPS */ /***************************/ /* create dma maps for the rx tpa mbuf clusters */ max_agg_queues = MAX_AGG_QS(sc); for (j = 0; j < max_agg_queues; j++) { if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_tpa_info[j].bd.m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d rx tpa mbuf %d' (%d)\n", i, j, rc); return (1); } } /* create dma map for the spare rx tpa mbuf cluster */ if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_tpa_info_mbuf_spare_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d spare rx tpa mbuf' (%d)\n", i, rc); return (1); } bxe_init_sge_ring_bit_mask(fp); } return (0); } static void bxe_free_hsi_mem(struct bxe_softc *sc) { struct bxe_fastpath *fp; int max_agg_queues; int i, j; if (sc->parent_dma_tag == NULL) { return; /* assume nothing was allocated */ } for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; /*******************/ /* FP STATUS BLOCK */ /*******************/ bxe_dma_free(sc, &fp->sb_dma); memset(&fp->status_block, 0, sizeof(fp->status_block)); /******************/ /* FP TX BD CHAIN */ /******************/ bxe_dma_free(sc, &fp->tx_dma); fp->tx_chain = NULL; /******************/ /* FP RX BD CHAIN */ /******************/ bxe_dma_free(sc, &fp->rx_dma); fp->rx_chain = NULL; /*******************/ /* FP RX RCQ CHAIN */ /*******************/ bxe_dma_free(sc, &fp->rcq_dma); fp->rcq_chain = NULL; /*******************/ /* FP RX SGE CHAIN */ /*******************/ bxe_dma_free(sc, &fp->rx_sge_dma); fp->rx_sge_chain = NULL; /***********************/ /* FP TX MBUF DMA MAPS */ /***********************/ if (fp->tx_mbuf_tag != NULL) { for (j = 0; j < TX_BD_TOTAL; j++) { if (fp->tx_mbuf_chain[j].m_map != NULL) { bus_dmamap_unload(fp->tx_mbuf_tag, fp->tx_mbuf_chain[j].m_map); bus_dmamap_destroy(fp->tx_mbuf_tag, fp->tx_mbuf_chain[j].m_map); } } bus_dma_tag_destroy(fp->tx_mbuf_tag); fp->tx_mbuf_tag = NULL; } /***********************/ /* FP RX MBUF DMA MAPS */ /***********************/ if (fp->rx_mbuf_tag != NULL) { for (j = 0; j < RX_BD_TOTAL; j++) { if (fp->rx_mbuf_chain[j].m_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_chain[j].m_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_mbuf_chain[j].m_map); } } if (fp->rx_mbuf_spare_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_spare_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_mbuf_spare_map); } /***************************/ /* FP RX TPA MBUF DMA MAPS */ /***************************/ max_agg_queues = MAX_AGG_QS(sc); for (j = 0; j < max_agg_queues; j++) { if (fp->rx_tpa_info[j].bd.m_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_tpa_info[j].bd.m_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_tpa_info[j].bd.m_map); } } if (fp->rx_tpa_info_mbuf_spare_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_tpa_info_mbuf_spare_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_tpa_info_mbuf_spare_map); } bus_dma_tag_destroy(fp->rx_mbuf_tag); fp->rx_mbuf_tag = NULL; } /***************************/ /* FP RX SGE MBUF DMA MAPS */ /***************************/ if (fp->rx_sge_mbuf_tag != NULL) { for (j = 0; j < RX_SGE_TOTAL; j++) { if (fp->rx_sge_mbuf_chain[j].m_map != NULL) { bus_dmamap_unload(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[j].m_map); bus_dmamap_destroy(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[j].m_map); } } if (fp->rx_sge_mbuf_spare_map != NULL) { bus_dmamap_unload(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_spare_map); bus_dmamap_destroy(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_spare_map); } bus_dma_tag_destroy(fp->rx_sge_mbuf_tag); fp->rx_sge_mbuf_tag = NULL; } } /***************************/ /* FW DECOMPRESSION BUFFER */ /***************************/ bxe_dma_free(sc, &sc->gz_buf_dma); sc->gz_buf = NULL; free(sc->gz_strm, M_DEVBUF); sc->gz_strm = NULL; /*******************/ /* SLOW PATH QUEUE */ /*******************/ bxe_dma_free(sc, &sc->spq_dma); sc->spq = NULL; /*************/ /* SLOW PATH */ /*************/ bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; /***************/ /* EVENT QUEUE */ /***************/ bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; /************************/ /* DEFAULT STATUS BLOCK */ /************************/ bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); sc->parent_dma_tag = NULL; } /* * Previous driver DMAE transaction may have occurred when pre-boot stage * ended and boot began. This would invalidate the addresses of the * transaction, resulting in was-error bit set in the PCI causing all * hw-to-host PCIe transactions to timeout. If this happened we want to clear * the interrupt which detected this from the pglueb and the was-done bit */ static void bxe_prev_interrupted_dmae(struct bxe_softc *sc) { uint32_t val; if (!CHIP_IS_E1x(sc)) { val = REG_RD(sc, PGLUE_B_REG_PGLUE_B_INT_STS); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN) { BLOGD(sc, DBG_LOAD, "Clearing 'was-error' bit that was set in pglueb"); REG_WR(sc, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, 1 << SC_FUNC(sc)); } } } static int bxe_prev_mcp_done(struct bxe_softc *sc) { uint32_t rc = bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET); if (!rc) { BLOGE(sc, "MCP response failure, aborting\n"); return (-1); } return (0); } static struct bxe_prev_list_node * bxe_prev_path_get_entry(struct bxe_softc *sc) { struct bxe_prev_list_node *tmp; LIST_FOREACH(tmp, &bxe_prev_list, node) { if ((sc->pcie_bus == tmp->bus) && (sc->pcie_device == tmp->slot) && (SC_PATH(sc) == tmp->path)) { return (tmp); } } return (NULL); } static uint8_t bxe_prev_is_path_marked(struct bxe_softc *sc) { struct bxe_prev_list_node *tmp; int rc = FALSE; mtx_lock(&bxe_prev_mtx); tmp = bxe_prev_path_get_entry(sc); if (tmp) { if (tmp->aer) { BLOGD(sc, DBG_LOAD, "Path %d/%d/%d was marked by AER\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); } else { rc = TRUE; BLOGD(sc, DBG_LOAD, "Path %d/%d/%d was already cleaned from previous drivers\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); } } mtx_unlock(&bxe_prev_mtx); return (rc); } static int bxe_prev_mark_path(struct bxe_softc *sc, uint8_t after_undi) { struct bxe_prev_list_node *tmp; mtx_lock(&bxe_prev_mtx); /* Check whether the entry for this path already exists */ tmp = bxe_prev_path_get_entry(sc); if (tmp) { if (!tmp->aer) { BLOGD(sc, DBG_LOAD, "Re-marking AER in path %d/%d/%d\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); } else { BLOGD(sc, DBG_LOAD, "Removing AER indication from path %d/%d/%d\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); tmp->aer = 0; } mtx_unlock(&bxe_prev_mtx); return (0); } mtx_unlock(&bxe_prev_mtx); /* Create an entry for this path and add it */ tmp = malloc(sizeof(struct bxe_prev_list_node), M_DEVBUF, (M_NOWAIT | M_ZERO)); if (!tmp) { BLOGE(sc, "Failed to allocate 'bxe_prev_list_node'\n"); return (-1); } tmp->bus = sc->pcie_bus; tmp->slot = sc->pcie_device; tmp->path = SC_PATH(sc); tmp->aer = 0; tmp->undi = after_undi ? (1 << SC_PORT(sc)) : 0; mtx_lock(&bxe_prev_mtx); BLOGD(sc, DBG_LOAD, "Marked path %d/%d/%d - finished previous unload\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); LIST_INSERT_HEAD(&bxe_prev_list, tmp, node); mtx_unlock(&bxe_prev_mtx); return (0); } static int bxe_do_flr(struct bxe_softc *sc) { int i; /* only E2 and onwards support FLR */ if (CHIP_IS_E1x(sc)) { BLOGD(sc, DBG_LOAD, "FLR not supported in E1/E1H\n"); return (-1); } /* only bootcode REQ_BC_VER_4_INITIATE_FLR and onwards support flr */ if (sc->devinfo.bc_ver < REQ_BC_VER_4_INITIATE_FLR) { BLOGD(sc, DBG_LOAD, "FLR not supported by BC_VER: 0x%08x\n", sc->devinfo.bc_ver); return (-1); } /* Wait for Transaction Pending bit clean */ for (i = 0; i < 4; i++) { if (i) { DELAY(((1 << (i - 1)) * 100) * 1000); } if (!bxe_is_pcie_pending(sc)) { goto clear; } } BLOGE(sc, "PCIE transaction is not cleared, " "proceeding with reset anyway\n"); clear: BLOGD(sc, DBG_LOAD, "Initiating FLR\n"); bxe_fw_command(sc, DRV_MSG_CODE_INITIATE_FLR, 0); return (0); } struct bxe_mac_vals { uint32_t xmac_addr; uint32_t xmac_val; uint32_t emac_addr; uint32_t emac_val; uint32_t umac_addr; uint32_t umac_val; uint32_t bmac_addr; uint32_t bmac_val[2]; }; static void bxe_prev_unload_close_mac(struct bxe_softc *sc, struct bxe_mac_vals *vals) { uint32_t val, base_addr, offset, mask, reset_reg; uint8_t mac_stopped = FALSE; uint8_t port = SC_PORT(sc); uint32_t wb_data[2]; /* reset addresses as they also mark which values were changed */ vals->bmac_addr = 0; vals->umac_addr = 0; vals->xmac_addr = 0; vals->emac_addr = 0; reset_reg = REG_RD(sc, MISC_REG_RESET_REG_2); if (!CHIP_IS_E3(sc)) { val = REG_RD(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4); mask = MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port; if ((mask & reset_reg) && val) { BLOGD(sc, DBG_LOAD, "Disable BMAC Rx\n"); base_addr = SC_PORT(sc) ? NIG_REG_INGRESS_BMAC1_MEM : NIG_REG_INGRESS_BMAC0_MEM; offset = CHIP_IS_E2(sc) ? BIGMAC2_REGISTER_BMAC_CONTROL : BIGMAC_REGISTER_BMAC_CONTROL; /* * use rd/wr since we cannot use dmae. This is safe * since MCP won't access the bus due to the request * to unload, and no function on the path can be * loaded at this time. */ wb_data[0] = REG_RD(sc, base_addr + offset); wb_data[1] = REG_RD(sc, base_addr + offset + 0x4); vals->bmac_addr = base_addr + offset; vals->bmac_val[0] = wb_data[0]; vals->bmac_val[1] = wb_data[1]; wb_data[0] &= ~ELINK_BMAC_CONTROL_RX_ENABLE; REG_WR(sc, vals->bmac_addr, wb_data[0]); REG_WR(sc, vals->bmac_addr + 0x4, wb_data[1]); } BLOGD(sc, DBG_LOAD, "Disable EMAC Rx\n"); vals->emac_addr = NIG_REG_NIG_EMAC0_EN + SC_PORT(sc)*4; vals->emac_val = REG_RD(sc, vals->emac_addr); REG_WR(sc, vals->emac_addr, 0); mac_stopped = TRUE; } else { if (reset_reg & MISC_REGISTERS_RESET_REG_2_XMAC) { BLOGD(sc, DBG_LOAD, "Disable XMAC Rx\n"); base_addr = SC_PORT(sc) ? GRCBASE_XMAC1 : GRCBASE_XMAC0; val = REG_RD(sc, base_addr + XMAC_REG_PFC_CTRL_HI); REG_WR(sc, base_addr + XMAC_REG_PFC_CTRL_HI, val & ~(1 << 1)); REG_WR(sc, base_addr + XMAC_REG_PFC_CTRL_HI, val | (1 << 1)); vals->xmac_addr = base_addr + XMAC_REG_CTRL; vals->xmac_val = REG_RD(sc, vals->xmac_addr); REG_WR(sc, vals->xmac_addr, 0); mac_stopped = TRUE; } mask = MISC_REGISTERS_RESET_REG_2_UMAC0 << port; if (mask & reset_reg) { BLOGD(sc, DBG_LOAD, "Disable UMAC Rx\n"); base_addr = SC_PORT(sc) ? GRCBASE_UMAC1 : GRCBASE_UMAC0; vals->umac_addr = base_addr + UMAC_REG_COMMAND_CONFIG; vals->umac_val = REG_RD(sc, vals->umac_addr); REG_WR(sc, vals->umac_addr, 0); mac_stopped = TRUE; } } if (mac_stopped) { DELAY(20000); } } #define BXE_PREV_UNDI_PROD_ADDR(p) (BAR_TSTRORM_INTMEM + 0x1508 + ((p) << 4)) #define BXE_PREV_UNDI_RCQ(val) ((val) & 0xffff) #define BXE_PREV_UNDI_BD(val) ((val) >> 16 & 0xffff) #define BXE_PREV_UNDI_PROD(rcq, bd) ((bd) << 16 | (rcq)) static void bxe_prev_unload_undi_inc(struct bxe_softc *sc, uint8_t port, uint8_t inc) { uint16_t rcq, bd; uint32_t tmp_reg = REG_RD(sc, BXE_PREV_UNDI_PROD_ADDR(port)); rcq = BXE_PREV_UNDI_RCQ(tmp_reg) + inc; bd = BXE_PREV_UNDI_BD(tmp_reg) + inc; tmp_reg = BXE_PREV_UNDI_PROD(rcq, bd); REG_WR(sc, BXE_PREV_UNDI_PROD_ADDR(port), tmp_reg); BLOGD(sc, DBG_LOAD, "UNDI producer [%d] rings bd -> 0x%04x, rcq -> 0x%04x\n", port, bd, rcq); } static int bxe_prev_unload_common(struct bxe_softc *sc) { uint32_t reset_reg, tmp_reg = 0, rc; uint8_t prev_undi = FALSE; struct bxe_mac_vals mac_vals; uint32_t timer_count = 1000; uint32_t prev_brb; /* * It is possible a previous function received 'common' answer, * but hasn't loaded yet, therefore creating a scenario of * multiple functions receiving 'common' on the same path. */ BLOGD(sc, DBG_LOAD, "Common unload Flow\n"); memset(&mac_vals, 0, sizeof(mac_vals)); if (bxe_prev_is_path_marked(sc)) { return (bxe_prev_mcp_done(sc)); } reset_reg = REG_RD(sc, MISC_REG_RESET_REG_1); /* Reset should be performed after BRB is emptied */ if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_BRB1) { /* Close the MAC Rx to prevent BRB from filling up */ bxe_prev_unload_close_mac(sc, &mac_vals); /* close LLH filters towards the BRB */ elink_set_rx_filter(&sc->link_params, 0); /* * Check if the UNDI driver was previously loaded. * UNDI driver initializes CID offset for normal bell to 0x7 */ if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_DORQ) { tmp_reg = REG_RD(sc, DORQ_REG_NORM_CID_OFST); if (tmp_reg == 0x7) { BLOGD(sc, DBG_LOAD, "UNDI previously loaded\n"); prev_undi = TRUE; /* clear the UNDI indication */ REG_WR(sc, DORQ_REG_NORM_CID_OFST, 0); /* clear possible idle check errors */ REG_RD(sc, NIG_REG_NIG_INT_STS_CLR_0); } } /* wait until BRB is empty */ tmp_reg = REG_RD(sc, BRB1_REG_NUM_OF_FULL_BLOCKS); while (timer_count) { prev_brb = tmp_reg; tmp_reg = REG_RD(sc, BRB1_REG_NUM_OF_FULL_BLOCKS); if (!tmp_reg) { break; } BLOGD(sc, DBG_LOAD, "BRB still has 0x%08x\n", tmp_reg); /* reset timer as long as BRB actually gets emptied */ if (prev_brb > tmp_reg) { timer_count = 1000; } else { timer_count--; } /* If UNDI resides in memory, manually increment it */ if (prev_undi) { bxe_prev_unload_undi_inc(sc, SC_PORT(sc), 1); } DELAY(10); } if (!timer_count) { BLOGE(sc, "Failed to empty BRB\n"); } } /* No packets are in the pipeline, path is ready for reset */ bxe_reset_common(sc); if (mac_vals.xmac_addr) { REG_WR(sc, mac_vals.xmac_addr, mac_vals.xmac_val); } if (mac_vals.umac_addr) { REG_WR(sc, mac_vals.umac_addr, mac_vals.umac_val); } if (mac_vals.emac_addr) { REG_WR(sc, mac_vals.emac_addr, mac_vals.emac_val); } if (mac_vals.bmac_addr) { REG_WR(sc, mac_vals.bmac_addr, mac_vals.bmac_val[0]); REG_WR(sc, mac_vals.bmac_addr + 4, mac_vals.bmac_val[1]); } rc = bxe_prev_mark_path(sc, prev_undi); if (rc) { bxe_prev_mcp_done(sc); return (rc); } return (bxe_prev_mcp_done(sc)); } static int bxe_prev_unload_uncommon(struct bxe_softc *sc) { int rc; BLOGD(sc, DBG_LOAD, "Uncommon unload Flow\n"); /* Test if previous unload process was already finished for this path */ if (bxe_prev_is_path_marked(sc)) { return (bxe_prev_mcp_done(sc)); } BLOGD(sc, DBG_LOAD, "Path is unmarked\n"); /* * If function has FLR capabilities, and existing FW version matches * the one required, then FLR will be sufficient to clean any residue * left by previous driver */ rc = bxe_nic_load_analyze_req(sc, FW_MSG_CODE_DRV_LOAD_FUNCTION); if (!rc) { /* fw version is good */ BLOGD(sc, DBG_LOAD, "FW version matches our own, attempting FLR\n"); rc = bxe_do_flr(sc); } if (!rc) { /* FLR was performed */ BLOGD(sc, DBG_LOAD, "FLR successful\n"); return (0); } BLOGD(sc, DBG_LOAD, "Could not FLR\n"); /* Close the MCP request, return failure*/ rc = bxe_prev_mcp_done(sc); if (!rc) { rc = BXE_PREV_WAIT_NEEDED; } return (rc); } static int bxe_prev_unload(struct bxe_softc *sc) { int time_counter = 10; uint32_t fw, hw_lock_reg, hw_lock_val; uint32_t rc = 0; /* * Clear HW from errors which may have resulted from an interrupted * DMAE transaction. */ bxe_prev_interrupted_dmae(sc); /* Release previously held locks */ hw_lock_reg = (SC_FUNC(sc) <= 5) ? (MISC_REG_DRIVER_CONTROL_1 + SC_FUNC(sc) * 8) : (MISC_REG_DRIVER_CONTROL_7 + (SC_FUNC(sc) - 6) * 8); hw_lock_val = (REG_RD(sc, hw_lock_reg)); if (hw_lock_val) { if (hw_lock_val & HW_LOCK_RESOURCE_NVRAM) { BLOGD(sc, DBG_LOAD, "Releasing previously held NVRAM lock\n"); REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_CLR1 << SC_PORT(sc))); } BLOGD(sc, DBG_LOAD, "Releasing previously held HW lock\n"); REG_WR(sc, hw_lock_reg, 0xffffffff); } else { BLOGD(sc, DBG_LOAD, "No need to release HW/NVRAM locks\n"); } if (MCPR_ACCESS_LOCK_LOCK & REG_RD(sc, MCP_REG_MCPR_ACCESS_LOCK)) { BLOGD(sc, DBG_LOAD, "Releasing previously held ALR\n"); REG_WR(sc, MCP_REG_MCPR_ACCESS_LOCK, 0); } do { /* Lock MCP using an unload request */ fw = bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS, 0); if (!fw) { BLOGE(sc, "MCP response failure, aborting\n"); rc = -1; break; } if (fw == FW_MSG_CODE_DRV_UNLOAD_COMMON) { rc = bxe_prev_unload_common(sc); break; } /* non-common reply from MCP night require looping */ rc = bxe_prev_unload_uncommon(sc); if (rc != BXE_PREV_WAIT_NEEDED) { break; } DELAY(20000); } while (--time_counter); if (!time_counter || rc) { BLOGE(sc, "Failed to unload previous driver!" " time_counter %d rc %d\n", time_counter, rc); rc = -1; } return (rc); } void bxe_dcbx_set_state(struct bxe_softc *sc, uint8_t dcb_on, uint32_t dcbx_enabled) { if (!CHIP_IS_E1x(sc)) { sc->dcb_state = dcb_on; sc->dcbx_enabled = dcbx_enabled; } else { sc->dcb_state = FALSE; sc->dcbx_enabled = BXE_DCBX_ENABLED_INVALID; } BLOGD(sc, DBG_LOAD, "DCB state [%s:%s]\n", dcb_on ? "ON" : "OFF", (dcbx_enabled == BXE_DCBX_ENABLED_OFF) ? "user-mode" : (dcbx_enabled == BXE_DCBX_ENABLED_ON_NEG_OFF) ? "on-chip static" : (dcbx_enabled == BXE_DCBX_ENABLED_ON_NEG_ON) ? "on-chip with negotiation" : "invalid"); } /* must be called after sriov-enable */ static int bxe_set_qm_cid_count(struct bxe_softc *sc) { int cid_count = BXE_L2_MAX_CID(sc); if (IS_SRIOV(sc)) { cid_count += BXE_VF_CIDS; } if (CNIC_SUPPORT(sc)) { cid_count += CNIC_CID_MAX; } return (roundup(cid_count, QM_CID_ROUND)); } static void bxe_init_multi_cos(struct bxe_softc *sc) { int pri, cos; uint32_t pri_map = 0; /* XXX change to user config */ for (pri = 0; pri < BXE_MAX_PRIORITY; pri++) { cos = ((pri_map & (0xf << (pri * 4))) >> (pri * 4)); if (cos < sc->max_cos) { sc->prio_to_cos[pri] = cos; } else { BLOGW(sc, "Invalid COS %d for priority %d " "(max COS is %d), setting to 0\n", cos, pri, (sc->max_cos - 1)); sc->prio_to_cos[pri] = 0; } } } static int bxe_sysctl_state(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc; int error, result; result = 0; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) { return (error); } if (result == 1) { uint32_t temp; sc = (struct bxe_softc *)arg1; BLOGI(sc, "... dumping driver state ...\n"); temp = SHMEM2_RD(sc, temperature_in_half_celsius); BLOGI(sc, "\t Device Temperature = %d Celsius\n", (temp/2)); } return (error); } static int bxe_sysctl_eth_stat(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc = (struct bxe_softc *)arg1; uint32_t *eth_stats = (uint32_t *)&sc->eth_stats; uint32_t *offset; uint64_t value = 0; int index = (int)arg2; if (index >= BXE_NUM_ETH_STATS) { BLOGE(sc, "bxe_eth_stats index out of range (%d)\n", index); return (-1); } offset = (eth_stats + bxe_eth_stats_arr[index].offset); switch (bxe_eth_stats_arr[index].size) { case 4: value = (uint64_t)*offset; break; case 8: value = HILO_U64(*offset, *(offset + 1)); break; default: BLOGE(sc, "Invalid bxe_eth_stats size (index=%d size=%d)\n", index, bxe_eth_stats_arr[index].size); return (-1); } return (sysctl_handle_64(oidp, &value, 0, req)); } static int bxe_sysctl_eth_q_stat(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc = (struct bxe_softc *)arg1; uint32_t *eth_stats; uint32_t *offset; uint64_t value = 0; uint32_t q_stat = (uint32_t)arg2; uint32_t fp_index = ((q_stat >> 16) & 0xffff); uint32_t index = (q_stat & 0xffff); eth_stats = (uint32_t *)&sc->fp[fp_index].eth_q_stats; if (index >= BXE_NUM_ETH_Q_STATS) { BLOGE(sc, "bxe_eth_q_stats index out of range (%d)\n", index); return (-1); } offset = (eth_stats + bxe_eth_q_stats_arr[index].offset); switch (bxe_eth_q_stats_arr[index].size) { case 4: value = (uint64_t)*offset; break; case 8: value = HILO_U64(*offset, *(offset + 1)); break; default: BLOGE(sc, "Invalid bxe_eth_q_stats size (index=%d size=%d)\n", index, bxe_eth_q_stats_arr[index].size); return (-1); } return (sysctl_handle_64(oidp, &value, 0, req)); } static void bxe_force_link_reset(struct bxe_softc *sc) { bxe_acquire_phy_lock(sc); elink_link_reset(&sc->link_params, &sc->link_vars, 1); bxe_release_phy_lock(sc); } static int bxe_sysctl_pauseparam(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc = (struct bxe_softc *)arg1;; uint32_t cfg_idx = bxe_get_link_cfg_idx(sc); int rc = 0; int error; int result; error = sysctl_handle_int(oidp, &sc->bxe_pause_param, 0, req); if (error || !req->newptr) { return (error); } if ((sc->bxe_pause_param < 0) || (sc->bxe_pause_param > 8)) { BLOGW(sc, "invalid pause param (%d) - use intergers between 1 & 8\n",sc->bxe_pause_param); sc->bxe_pause_param = 8; } result = (sc->bxe_pause_param << PORT_FEATURE_FLOW_CONTROL_SHIFT); if((result & 0x400) && !(sc->port.supported[cfg_idx] & ELINK_SUPPORTED_Autoneg)) { BLOGW(sc, "Does not support Autoneg pause_param %d\n", sc->bxe_pause_param); return -EINVAL; } if(IS_MF(sc)) return 0; sc->link_params.req_flow_ctrl[cfg_idx] = ELINK_FLOW_CTRL_AUTO; if(result & ELINK_FLOW_CTRL_RX) sc->link_params.req_flow_ctrl[cfg_idx] |= ELINK_FLOW_CTRL_RX; if(result & ELINK_FLOW_CTRL_TX) sc->link_params.req_flow_ctrl[cfg_idx] |= ELINK_FLOW_CTRL_TX; if(sc->link_params.req_flow_ctrl[cfg_idx] == ELINK_FLOW_CTRL_AUTO) sc->link_params.req_flow_ctrl[cfg_idx] = ELINK_FLOW_CTRL_NONE; if(result & 0x400) { if (sc->link_params.req_line_speed[cfg_idx] == ELINK_SPEED_AUTO_NEG) { sc->link_params.req_flow_ctrl[cfg_idx] = ELINK_FLOW_CTRL_AUTO; } sc->link_params.req_fc_auto_adv = 0; if (result & ELINK_FLOW_CTRL_RX) sc->link_params.req_fc_auto_adv |= ELINK_FLOW_CTRL_RX; if (result & ELINK_FLOW_CTRL_TX) sc->link_params.req_fc_auto_adv |= ELINK_FLOW_CTRL_TX; if (!sc->link_params.req_fc_auto_adv) sc->link_params.req_fc_auto_adv |= ELINK_FLOW_CTRL_NONE; } if (IS_PF(sc)) { if (sc->link_vars.link_up) { bxe_stats_handle(sc, STATS_EVENT_STOP); } if (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) { bxe_force_link_reset(sc); bxe_acquire_phy_lock(sc); rc = elink_phy_init(&sc->link_params, &sc->link_vars); bxe_release_phy_lock(sc); bxe_calc_fc_adv(sc); } } return rc; } static void bxe_add_sysctls(struct bxe_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; struct sysctl_oid *queue_top, *queue; struct sysctl_oid_list *queue_top_children, *queue_children; char queue_num_buf[32]; uint32_t q_stat; int i, j; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "version", CTLFLAG_RD, BXE_DRIVER_VERSION, 0, "version"); snprintf(sc->fw_ver_str, sizeof(sc->fw_ver_str), "%d.%d.%d.%d", BCM_5710_FW_MAJOR_VERSION, BCM_5710_FW_MINOR_VERSION, BCM_5710_FW_REVISION_VERSION, BCM_5710_FW_ENGINEERING_VERSION); snprintf(sc->mf_mode_str, sizeof(sc->mf_mode_str), "%s", ((sc->devinfo.mf_info.mf_mode == SINGLE_FUNCTION) ? "Single" : (sc->devinfo.mf_info.mf_mode == MULTI_FUNCTION_SD) ? "MF-SD" : (sc->devinfo.mf_info.mf_mode == MULTI_FUNCTION_SI) ? "MF-SI" : (sc->devinfo.mf_info.mf_mode == MULTI_FUNCTION_AFEX) ? "MF-AFEX" : "Unknown")); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "mf_vnics", CTLFLAG_RD, &sc->devinfo.mf_info.vnics_per_port, 0, "multifunction vnics per port"); snprintf(sc->pci_link_str, sizeof(sc->pci_link_str), "%s x%d", ((sc->devinfo.pcie_link_speed == 1) ? "2.5GT/s" : (sc->devinfo.pcie_link_speed == 2) ? "5.0GT/s" : (sc->devinfo.pcie_link_speed == 4) ? "8.0GT/s" : "???GT/s"), sc->devinfo.pcie_link_width); sc->debug = bxe_debug; #if __FreeBSD_version >= 900000 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bc_version", CTLFLAG_RD, sc->devinfo.bc_ver_str, 0, "bootcode version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "fw_version", CTLFLAG_RD, sc->fw_ver_str, 0, "firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "mf_mode", CTLFLAG_RD, sc->mf_mode_str, 0, "multifunction mode"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "mac_addr", CTLFLAG_RD, sc->mac_addr_str, 0, "mac address"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pci_link", CTLFLAG_RD, sc->pci_link_str, 0, "pci link status"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "debug", CTLFLAG_RW, &sc->debug, "debug logging mode"); #else SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bc_version", CTLFLAG_RD, &sc->devinfo.bc_ver_str, 0, "bootcode version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "fw_version", CTLFLAG_RD, &sc->fw_ver_str, 0, "firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "mf_mode", CTLFLAG_RD, &sc->mf_mode_str, 0, "multifunction mode"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "mac_addr", CTLFLAG_RD, &sc->mac_addr_str, 0, "mac address"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pci_link", CTLFLAG_RD, &sc->pci_link_str, 0, "pci link status"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "debug", CTLFLAG_RW, &sc->debug, 0, "debug logging mode"); #endif /* #if __FreeBSD_version >= 900000 */ sc->trigger_grcdump = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "trigger_grcdump", CTLFLAG_RW, &sc->trigger_grcdump, 0, "trigger grcdump should be invoked" " before collecting grcdump"); sc->grcdump_started = 0; sc->grcdump_done = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "grcdump_done", CTLFLAG_RD, &sc->grcdump_done, 0, "set by driver when grcdump is done"); sc->rx_budget = bxe_rx_budget; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_budget", CTLFLAG_RW, &sc->rx_budget, 0, "rx processing budget"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_param", CTLTYPE_UINT | CTLFLAG_RW, sc, 0, bxe_sysctl_pauseparam, "IU", "need pause frames- DEF:0/TX:1/RX:2/BOTH:3/AUTO:4/AUTOTX:5/AUTORX:6/AUTORXTX:7/NONE:8"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "state", CTLTYPE_UINT | CTLFLAG_RW, sc, 0, bxe_sysctl_state, "IU", "dump driver state"); for (i = 0; i < BXE_NUM_ETH_STATS; i++) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, bxe_eth_stats_arr[i].string, CTLTYPE_U64 | CTLFLAG_RD, sc, i, bxe_sysctl_eth_stat, "LU", bxe_eth_stats_arr[i].string); } /* add a new parent node for all queues "dev.bxe.#.queue" */ queue_top = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "queue", CTLFLAG_RD, NULL, "queue"); queue_top_children = SYSCTL_CHILDREN(queue_top); for (i = 0; i < sc->num_queues; i++) { /* add a new parent node for a single queue "dev.bxe.#.queue.#" */ snprintf(queue_num_buf, sizeof(queue_num_buf), "%d", i); queue = SYSCTL_ADD_NODE(ctx, queue_top_children, OID_AUTO, queue_num_buf, CTLFLAG_RD, NULL, "single queue"); queue_children = SYSCTL_CHILDREN(queue); for (j = 0; j < BXE_NUM_ETH_Q_STATS; j++) { q_stat = ((i << 16) | j); SYSCTL_ADD_PROC(ctx, queue_children, OID_AUTO, bxe_eth_q_stats_arr[j].string, CTLTYPE_U64 | CTLFLAG_RD, sc, q_stat, bxe_sysctl_eth_q_stat, "LU", bxe_eth_q_stats_arr[j].string); } } } static int bxe_alloc_buf_rings(struct bxe_softc *sc) { #if __FreeBSD_version >= 901504 int i; struct bxe_fastpath *fp; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; fp->tx_br = buf_ring_alloc(BXE_BR_SIZE, M_DEVBUF, M_NOWAIT, &fp->tx_mtx); if (fp->tx_br == NULL) return (-1); } #endif return (0); } static void bxe_free_buf_rings(struct bxe_softc *sc) { #if __FreeBSD_version >= 901504 int i; struct bxe_fastpath *fp; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if (fp->tx_br) { buf_ring_free(fp->tx_br, M_DEVBUF); fp->tx_br = NULL; } } #endif } static void bxe_init_fp_mutexs(struct bxe_softc *sc) { int i; struct bxe_fastpath *fp; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; snprintf(fp->tx_mtx_name, sizeof(fp->tx_mtx_name), "bxe%d_fp%d_tx_lock", sc->unit, i); mtx_init(&fp->tx_mtx, fp->tx_mtx_name, NULL, MTX_DEF); snprintf(fp->rx_mtx_name, sizeof(fp->rx_mtx_name), "bxe%d_fp%d_rx_lock", sc->unit, i); mtx_init(&fp->rx_mtx, fp->rx_mtx_name, NULL, MTX_DEF); } } static void bxe_destroy_fp_mutexs(struct bxe_softc *sc) { int i; struct bxe_fastpath *fp; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if (mtx_initialized(&fp->tx_mtx)) { mtx_destroy(&fp->tx_mtx); } if (mtx_initialized(&fp->rx_mtx)) { mtx_destroy(&fp->rx_mtx); } } } /* * Device attach function. * * Allocates device resources, performs secondary chip identification, and * initializes driver instance variables. This function is called from driver * load after a successful probe. * * Returns: * 0 = Success, >0 = Failure */ static int bxe_attach(device_t dev) { struct bxe_softc *sc; sc = device_get_softc(dev); BLOGD(sc, DBG_LOAD, "Starting attach...\n"); sc->state = BXE_STATE_CLOSED; sc->dev = dev; sc->unit = device_get_unit(dev); BLOGD(sc, DBG_LOAD, "softc = %p\n", sc); sc->pcie_bus = pci_get_bus(dev); sc->pcie_device = pci_get_slot(dev); sc->pcie_func = pci_get_function(dev); /* enable bus master capability */ pci_enable_busmaster(dev); /* get the BARs */ if (bxe_allocate_bars(sc) != 0) { return (ENXIO); } /* initialize the mutexes */ bxe_init_mutexes(sc); /* prepare the periodic callout */ callout_init(&sc->periodic_callout, 0); /* prepare the chip taskqueue */ sc->chip_tq_flags = CHIP_TQ_NONE; snprintf(sc->chip_tq_name, sizeof(sc->chip_tq_name), "bxe%d_chip_tq", sc->unit); TASK_INIT(&sc->chip_tq_task, 0, bxe_handle_chip_tq, sc); sc->chip_tq = taskqueue_create(sc->chip_tq_name, M_NOWAIT, taskqueue_thread_enqueue, &sc->chip_tq); taskqueue_start_threads(&sc->chip_tq, 1, PWAIT, /* lower priority */ "%s", sc->chip_tq_name); /* get device info and set params */ if (bxe_get_device_info(sc) != 0) { BLOGE(sc, "getting device info\n"); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* get final misc params */ bxe_get_params(sc); /* set the default MTU (changed via ifconfig) */ sc->mtu = ETHERMTU; bxe_set_modes_bitmap(sc); /* XXX * If in AFEX mode and the function is configured for FCoE * then bail... no L2 allowed. */ /* get phy settings from shmem and 'and' against admin settings */ bxe_get_phy_info(sc); /* initialize the FreeBSD ifnet interface */ if (bxe_init_ifnet(sc) != 0) { bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } if (bxe_add_cdev(sc) != 0) { if (sc->ifp != NULL) { ether_ifdetach(sc->ifp); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* allocate device interrupts */ if (bxe_interrupt_alloc(sc) != 0) { bxe_del_cdev(sc); if (sc->ifp != NULL) { ether_ifdetach(sc->ifp); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } bxe_init_fp_mutexs(sc); if (bxe_alloc_buf_rings(sc) != 0) { bxe_free_buf_rings(sc); bxe_interrupt_free(sc); bxe_del_cdev(sc); if (sc->ifp != NULL) { ether_ifdetach(sc->ifp); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* allocate ilt */ if (bxe_alloc_ilt_mem(sc) != 0) { bxe_free_buf_rings(sc); bxe_interrupt_free(sc); bxe_del_cdev(sc); if (sc->ifp != NULL) { ether_ifdetach(sc->ifp); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* allocate the host hardware/software hsi structures */ if (bxe_alloc_hsi_mem(sc) != 0) { bxe_free_ilt_mem(sc); bxe_free_buf_rings(sc); bxe_interrupt_free(sc); bxe_del_cdev(sc); if (sc->ifp != NULL) { ether_ifdetach(sc->ifp); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* need to reset chip if UNDI was active */ if (IS_PF(sc) && !BXE_NOMCP(sc)) { /* init fw_seq */ sc->fw_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK); BLOGD(sc, DBG_LOAD, "prev unload fw_seq 0x%04x\n", sc->fw_seq); bxe_prev_unload(sc); } #if 1 /* XXX */ bxe_dcbx_set_state(sc, FALSE, BXE_DCBX_ENABLED_OFF); #else if (SHMEM2_HAS(sc, dcbx_lldp_params_offset) && SHMEM2_HAS(sc, dcbx_lldp_dcbx_stat_offset) && SHMEM2_RD(sc, dcbx_lldp_params_offset) && SHMEM2_RD(sc, dcbx_lldp_dcbx_stat_offset)) { bxe_dcbx_set_state(sc, TRUE, BXE_DCBX_ENABLED_ON_NEG_ON); bxe_dcbx_init_params(sc); } else { bxe_dcbx_set_state(sc, FALSE, BXE_DCBX_ENABLED_OFF); } #endif /* calculate qm_cid_count */ sc->qm_cid_count = bxe_set_qm_cid_count(sc); BLOGD(sc, DBG_LOAD, "qm_cid_count=%d\n", sc->qm_cid_count); sc->max_cos = 1; bxe_init_multi_cos(sc); bxe_add_sysctls(sc); return (0); } /* * Device detach function. * * Stops the controller, resets the controller, and releases resources. * * Returns: * 0 = Success, >0 = Failure */ static int bxe_detach(device_t dev) { struct bxe_softc *sc; if_t ifp; sc = device_get_softc(dev); BLOGD(sc, DBG_LOAD, "Starting detach...\n"); ifp = sc->ifp; if (ifp != NULL && if_vlantrunkinuse(ifp)) { BLOGE(sc, "Cannot detach while VLANs are in use.\n"); return(EBUSY); } bxe_del_cdev(sc); /* stop the periodic callout */ bxe_periodic_stop(sc); /* stop the chip taskqueue */ atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_NONE); if (sc->chip_tq) { taskqueue_drain(sc->chip_tq, &sc->chip_tq_task); taskqueue_free(sc->chip_tq); sc->chip_tq = NULL; } /* stop and reset the controller if it was open */ if (sc->state != BXE_STATE_CLOSED) { BXE_CORE_LOCK(sc); bxe_nic_unload(sc, UNLOAD_CLOSE, TRUE); sc->state = BXE_STATE_DISABLED; BXE_CORE_UNLOCK(sc); } /* release the network interface */ if (ifp != NULL) { ether_ifdetach(ifp); } ifmedia_removeall(&sc->ifmedia); /* XXX do the following based on driver state... */ /* free the host hardware/software hsi structures */ bxe_free_hsi_mem(sc); /* free ilt */ bxe_free_ilt_mem(sc); bxe_free_buf_rings(sc); /* release the interrupts */ bxe_interrupt_free(sc); /* Release the mutexes*/ bxe_destroy_fp_mutexs(sc); bxe_release_mutexes(sc); /* Release the PCIe BAR mapped memory */ bxe_deallocate_bars(sc); /* Release the FreeBSD interface. */ if (sc->ifp != NULL) { if_free(sc->ifp); } pci_disable_busmaster(dev); return (0); } /* * Device shutdown function. * * Stops and resets the controller. * * Returns: * Nothing */ static int bxe_shutdown(device_t dev) { struct bxe_softc *sc; sc = device_get_softc(dev); BLOGD(sc, DBG_LOAD, "Starting shutdown...\n"); /* stop the periodic callout */ bxe_periodic_stop(sc); BXE_CORE_LOCK(sc); bxe_nic_unload(sc, UNLOAD_NORMAL, FALSE); BXE_CORE_UNLOCK(sc); return (0); } void bxe_igu_ack_sb(struct bxe_softc *sc, uint8_t igu_sb_id, uint8_t segment, uint16_t index, uint8_t op, uint8_t update) { uint32_t igu_addr = sc->igu_base_addr; igu_addr += (IGU_CMD_INT_ACK_BASE + igu_sb_id)*8; bxe_igu_ack_sb_gen(sc, igu_sb_id, segment, index, op, update, igu_addr); } static void bxe_igu_clear_sb_gen(struct bxe_softc *sc, uint8_t func, uint8_t idu_sb_id, uint8_t is_pf) { uint32_t data, ctl, cnt = 100; uint32_t igu_addr_data = IGU_REG_COMMAND_REG_32LSB_DATA; uint32_t igu_addr_ctl = IGU_REG_COMMAND_REG_CTRL; uint32_t igu_addr_ack = IGU_REG_CSTORM_TYPE_0_SB_CLEANUP + (idu_sb_id/32)*4; uint32_t sb_bit = 1 << (idu_sb_id%32); uint32_t func_encode = func | (is_pf ? 1 : 0) << IGU_FID_ENCODE_IS_PF_SHIFT; uint32_t addr_encode = IGU_CMD_E2_PROD_UPD_BASE + idu_sb_id; /* Not supported in BC mode */ if (CHIP_INT_MODE_IS_BC(sc)) { return; } data = ((IGU_USE_REGISTER_cstorm_type_0_sb_cleanup << IGU_REGULAR_CLEANUP_TYPE_SHIFT) | IGU_REGULAR_CLEANUP_SET | IGU_REGULAR_BCLEANUP); ctl = ((addr_encode << IGU_CTRL_REG_ADDRESS_SHIFT) | (func_encode << IGU_CTRL_REG_FID_SHIFT) | (IGU_CTRL_CMD_TYPE_WR << IGU_CTRL_REG_TYPE_SHIFT)); BLOGD(sc, DBG_LOAD, "write 0x%08x to IGU(via GRC) addr 0x%x\n", data, igu_addr_data); REG_WR(sc, igu_addr_data, data); bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); mb(); BLOGD(sc, DBG_LOAD, "write 0x%08x to IGU(via GRC) addr 0x%x\n", ctl, igu_addr_ctl); REG_WR(sc, igu_addr_ctl, ctl); bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); mb(); /* wait for clean up to finish */ while (!(REG_RD(sc, igu_addr_ack) & sb_bit) && --cnt) { DELAY(20000); } if (!(REG_RD(sc, igu_addr_ack) & sb_bit)) { BLOGD(sc, DBG_LOAD, "Unable to finish IGU cleanup: " "idu_sb_id %d offset %d bit %d (cnt %d)\n", idu_sb_id, idu_sb_id/32, idu_sb_id%32, cnt); } } static void bxe_igu_clear_sb(struct bxe_softc *sc, uint8_t idu_sb_id) { bxe_igu_clear_sb_gen(sc, SC_FUNC(sc), idu_sb_id, TRUE /*PF*/); } /*******************/ /* ECORE CALLBACKS */ /*******************/ static void bxe_reset_common(struct bxe_softc *sc) { uint32_t val = 0x1400; /* reset_common */ REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR), 0xd3ffff7f); if (CHIP_IS_E3(sc)) { val |= MISC_REGISTERS_RESET_REG_2_MSTAT0; val |= MISC_REGISTERS_RESET_REG_2_MSTAT1; } REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR), val); } static void bxe_common_init_phy(struct bxe_softc *sc) { uint32_t shmem_base[2]; uint32_t shmem2_base[2]; /* Avoid common init in case MFW supports LFA */ if (SHMEM2_RD(sc, size) > (uint32_t)offsetof(struct shmem2_region, lfa_host_addr[SC_PORT(sc)])) { return; } shmem_base[0] = sc->devinfo.shmem_base; shmem2_base[0] = sc->devinfo.shmem2_base; if (!CHIP_IS_E1x(sc)) { shmem_base[1] = SHMEM2_RD(sc, other_shmem_base_addr); shmem2_base[1] = SHMEM2_RD(sc, other_shmem2_base_addr); } bxe_acquire_phy_lock(sc); elink_common_init_phy(sc, shmem_base, shmem2_base, sc->devinfo.chip_id, 0); bxe_release_phy_lock(sc); } static void bxe_pf_disable(struct bxe_softc *sc) { uint32_t val = REG_RD(sc, IGU_REG_PF_CONFIGURATION); val &= ~IGU_PF_CONF_FUNC_EN; REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 0); REG_WR(sc, CFC_REG_WEAK_ENABLE_PF, 0); } static void bxe_init_pxp(struct bxe_softc *sc) { uint16_t devctl; int r_order, w_order; devctl = bxe_pcie_capability_read(sc, PCIR_EXPRESS_DEVICE_CTL, 2); BLOGD(sc, DBG_LOAD, "read 0x%08x from devctl\n", devctl); w_order = ((devctl & PCIM_EXP_CTL_MAX_PAYLOAD) >> 5); if (sc->mrrs == -1) { r_order = ((devctl & PCIM_EXP_CTL_MAX_READ_REQUEST) >> 12); } else { BLOGD(sc, DBG_LOAD, "forcing read order to %d\n", sc->mrrs); r_order = sc->mrrs; } ecore_init_pxp_arb(sc, r_order, w_order); } static uint32_t bxe_get_pretend_reg(struct bxe_softc *sc) { uint32_t base = PXP2_REG_PGL_PRETEND_FUNC_F0; uint32_t stride = (PXP2_REG_PGL_PRETEND_FUNC_F1 - base); return (base + (SC_ABS_FUNC(sc)) * stride); } /* * Called only on E1H or E2. * When pretending to be PF, the pretend value is the function number 0..7. * When pretending to be VF, the pretend val is the PF-num:VF-valid:ABS-VFID * combination. */ static int bxe_pretend_func(struct bxe_softc *sc, uint16_t pretend_func_val) { uint32_t pretend_reg; if (CHIP_IS_E1H(sc) && (pretend_func_val > E1H_FUNC_MAX)) { return (-1); } /* get my own pretend register */ pretend_reg = bxe_get_pretend_reg(sc); REG_WR(sc, pretend_reg, pretend_func_val); REG_RD(sc, pretend_reg); return (0); } static void bxe_iov_init_dmae(struct bxe_softc *sc) { return; } static void bxe_iov_init_dq(struct bxe_softc *sc) { return; } /* send a NIG loopback debug packet */ static void bxe_lb_pckt(struct bxe_softc *sc) { uint32_t wb_write[3]; /* Ethernet source and destination addresses */ wb_write[0] = 0x55555555; wb_write[1] = 0x55555555; wb_write[2] = 0x20; /* SOP */ REG_WR_DMAE(sc, NIG_REG_DEBUG_PACKET_LB, wb_write, 3); /* NON-IP protocol */ wb_write[0] = 0x09000000; wb_write[1] = 0x55555555; wb_write[2] = 0x10; /* EOP, eop_bvalid = 0 */ REG_WR_DMAE(sc, NIG_REG_DEBUG_PACKET_LB, wb_write, 3); } /* * Some of the internal memories are not directly readable from the driver. * To test them we send debug packets. */ static int bxe_int_mem_test(struct bxe_softc *sc) { int factor; int count, i; uint32_t val = 0; if (CHIP_REV_IS_FPGA(sc)) { factor = 120; } else if (CHIP_REV_IS_EMUL(sc)) { factor = 200; } else { factor = 1; } /* disable inputs of parser neighbor blocks */ REG_WR(sc, TSDM_REG_ENABLE_IN1, 0x0); REG_WR(sc, TCM_REG_PRS_IFEN, 0x0); REG_WR(sc, CFC_REG_DEBUG0, 0x1); REG_WR(sc, NIG_REG_PRS_REQ_IN_EN, 0x0); /* write 0 to parser credits for CFC search request */ REG_WR(sc, PRS_REG_CFC_SEARCH_INITIAL_CREDIT, 0x0); /* send Ethernet packet */ bxe_lb_pckt(sc); /* TODO do i reset NIG statistic? */ /* Wait until NIG register shows 1 packet of size 0x10 */ count = 1000 * factor; while (count) { bxe_read_dmae(sc, NIG_REG_STAT2_BRB_OCTET, 2); val = *BXE_SP(sc, wb_data[0]); if (val == 0x10) { break; } DELAY(10000); count--; } if (val != 0x10) { BLOGE(sc, "NIG timeout val=0x%x\n", val); return (-1); } /* wait until PRS register shows 1 packet */ count = (1000 * factor); while (count) { val = REG_RD(sc, PRS_REG_NUM_OF_PACKETS); if (val == 1) { break; } DELAY(10000); count--; } if (val != 0x1) { BLOGE(sc, "PRS timeout val=0x%x\n", val); return (-2); } /* Reset and init BRB, PRS */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0x03); DELAY(50000); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x03); DELAY(50000); ecore_init_block(sc, BLOCK_BRB1, PHASE_COMMON); ecore_init_block(sc, BLOCK_PRS, PHASE_COMMON); /* Disable inputs of parser neighbor blocks */ REG_WR(sc, TSDM_REG_ENABLE_IN1, 0x0); REG_WR(sc, TCM_REG_PRS_IFEN, 0x0); REG_WR(sc, CFC_REG_DEBUG0, 0x1); REG_WR(sc, NIG_REG_PRS_REQ_IN_EN, 0x0); /* Write 0 to parser credits for CFC search request */ REG_WR(sc, PRS_REG_CFC_SEARCH_INITIAL_CREDIT, 0x0); /* send 10 Ethernet packets */ for (i = 0; i < 10; i++) { bxe_lb_pckt(sc); } /* Wait until NIG register shows 10+1 packets of size 11*0x10 = 0xb0 */ count = (1000 * factor); while (count) { bxe_read_dmae(sc, NIG_REG_STAT2_BRB_OCTET, 2); val = *BXE_SP(sc, wb_data[0]); if (val == 0xb0) { break; } DELAY(10000); count--; } if (val != 0xb0) { BLOGE(sc, "NIG timeout val=0x%x\n", val); return (-3); } /* Wait until PRS register shows 2 packets */ val = REG_RD(sc, PRS_REG_NUM_OF_PACKETS); if (val != 2) { BLOGE(sc, "PRS timeout val=0x%x\n", val); } /* Write 1 to parser credits for CFC search request */ REG_WR(sc, PRS_REG_CFC_SEARCH_INITIAL_CREDIT, 0x1); /* Wait until PRS register shows 3 packets */ DELAY(10000 * factor); /* Wait until NIG register shows 1 packet of size 0x10 */ val = REG_RD(sc, PRS_REG_NUM_OF_PACKETS); if (val != 3) { BLOGE(sc, "PRS timeout val=0x%x\n", val); } /* clear NIG EOP FIFO */ for (i = 0; i < 11; i++) { REG_RD(sc, NIG_REG_INGRESS_EOP_LB_FIFO); } val = REG_RD(sc, NIG_REG_INGRESS_EOP_LB_EMPTY); if (val != 1) { BLOGE(sc, "clear of NIG failed val=0x%x\n", val); return (-4); } /* Reset and init BRB, PRS, NIG */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0x03); DELAY(50000); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x03); DELAY(50000); ecore_init_block(sc, BLOCK_BRB1, PHASE_COMMON); ecore_init_block(sc, BLOCK_PRS, PHASE_COMMON); if (!CNIC_SUPPORT(sc)) { /* set NIC mode */ REG_WR(sc, PRS_REG_NIC_MODE, 1); } /* Enable inputs of parser neighbor blocks */ REG_WR(sc, TSDM_REG_ENABLE_IN1, 0x7fffffff); REG_WR(sc, TCM_REG_PRS_IFEN, 0x1); REG_WR(sc, CFC_REG_DEBUG0, 0x0); REG_WR(sc, NIG_REG_PRS_REQ_IN_EN, 0x1); return (0); } static void bxe_setup_fan_failure_detection(struct bxe_softc *sc) { int is_required; uint32_t val; int port; is_required = 0; val = (SHMEM_RD(sc, dev_info.shared_hw_config.config2) & SHARED_HW_CFG_FAN_FAILURE_MASK); if (val == SHARED_HW_CFG_FAN_FAILURE_ENABLED) { is_required = 1; } /* * The fan failure mechanism is usually related to the PHY type since * the power consumption of the board is affected by the PHY. Currently, * fan is required for most designs with SFX7101, BCM8727 and BCM8481. */ else if (val == SHARED_HW_CFG_FAN_FAILURE_PHY_TYPE) { for (port = PORT_0; port < PORT_MAX; port++) { is_required |= elink_fan_failure_det_req(sc, sc->devinfo.shmem_base, sc->devinfo.shmem2_base, port); } } BLOGD(sc, DBG_LOAD, "fan detection setting: %d\n", is_required); if (is_required == 0) { return; } /* Fan failure is indicated by SPIO 5 */ bxe_set_spio(sc, MISC_SPIO_SPIO5, MISC_SPIO_INPUT_HI_Z); /* set to active low mode */ val = REG_RD(sc, MISC_REG_SPIO_INT); val |= (MISC_SPIO_SPIO5 << MISC_SPIO_INT_OLD_SET_POS); REG_WR(sc, MISC_REG_SPIO_INT, val); /* enable interrupt to signal the IGU */ val = REG_RD(sc, MISC_REG_SPIO_EVENT_EN); val |= MISC_SPIO_SPIO5; REG_WR(sc, MISC_REG_SPIO_EVENT_EN, val); } static void bxe_enable_blocks_attention(struct bxe_softc *sc) { uint32_t val; REG_WR(sc, PXP_REG_PXP_INT_MASK_0, 0); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PXP_REG_PXP_INT_MASK_1, 0x40); } else { REG_WR(sc, PXP_REG_PXP_INT_MASK_1, 0); } REG_WR(sc, DORQ_REG_DORQ_INT_MASK, 0); REG_WR(sc, CFC_REG_CFC_INT_MASK, 0); /* * mask read length error interrupts in brb for parser * (parsing unit and 'checksum and crc' unit) * these errors are legal (PU reads fixed length and CAC can cause * read length error on truncated packets) */ REG_WR(sc, BRB1_REG_BRB1_INT_MASK, 0xFC00); REG_WR(sc, QM_REG_QM_INT_MASK, 0); REG_WR(sc, TM_REG_TM_INT_MASK, 0); REG_WR(sc, XSDM_REG_XSDM_INT_MASK_0, 0); REG_WR(sc, XSDM_REG_XSDM_INT_MASK_1, 0); REG_WR(sc, XCM_REG_XCM_INT_MASK, 0); /* REG_WR(sc, XSEM_REG_XSEM_INT_MASK_0, 0); */ /* REG_WR(sc, XSEM_REG_XSEM_INT_MASK_1, 0); */ REG_WR(sc, USDM_REG_USDM_INT_MASK_0, 0); REG_WR(sc, USDM_REG_USDM_INT_MASK_1, 0); REG_WR(sc, UCM_REG_UCM_INT_MASK, 0); /* REG_WR(sc, USEM_REG_USEM_INT_MASK_0, 0); */ /* REG_WR(sc, USEM_REG_USEM_INT_MASK_1, 0); */ REG_WR(sc, GRCBASE_UPB + PB_REG_PB_INT_MASK, 0); REG_WR(sc, CSDM_REG_CSDM_INT_MASK_0, 0); REG_WR(sc, CSDM_REG_CSDM_INT_MASK_1, 0); REG_WR(sc, CCM_REG_CCM_INT_MASK, 0); /* REG_WR(sc, CSEM_REG_CSEM_INT_MASK_0, 0); */ /* REG_WR(sc, CSEM_REG_CSEM_INT_MASK_1, 0); */ val = (PXP2_PXP2_INT_MASK_0_REG_PGL_CPL_AFT | PXP2_PXP2_INT_MASK_0_REG_PGL_CPL_OF | PXP2_PXP2_INT_MASK_0_REG_PGL_PCIE_ATTN); if (!CHIP_IS_E1x(sc)) { val |= (PXP2_PXP2_INT_MASK_0_REG_PGL_READ_BLOCKED | PXP2_PXP2_INT_MASK_0_REG_PGL_WRITE_BLOCKED); } REG_WR(sc, PXP2_REG_PXP2_INT_MASK_0, val); REG_WR(sc, TSDM_REG_TSDM_INT_MASK_0, 0); REG_WR(sc, TSDM_REG_TSDM_INT_MASK_1, 0); REG_WR(sc, TCM_REG_TCM_INT_MASK, 0); /* REG_WR(sc, TSEM_REG_TSEM_INT_MASK_0, 0); */ if (!CHIP_IS_E1x(sc)) { /* enable VFC attentions: bits 11 and 12, bits 31:13 reserved */ REG_WR(sc, TSEM_REG_TSEM_INT_MASK_1, 0x07ff); } REG_WR(sc, CDU_REG_CDU_INT_MASK, 0); REG_WR(sc, DMAE_REG_DMAE_INT_MASK, 0); /* REG_WR(sc, MISC_REG_MISC_INT_MASK, 0); */ REG_WR(sc, PBF_REG_PBF_INT_MASK, 0x18); /* bit 3,4 masked */ } /** * bxe_init_hw_common - initialize the HW at the COMMON phase. * * @sc: driver handle */ static int bxe_init_hw_common(struct bxe_softc *sc) { uint8_t abs_func_id; uint32_t val; BLOGD(sc, DBG_LOAD, "starting common init for func %d\n", SC_ABS_FUNC(sc)); /* * take the RESET lock to protect undi_unload flow from accessing * registers while we are resetting the chip */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RESET); bxe_reset_common(sc); REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET), 0xffffffff); val = 0xfffc; if (CHIP_IS_E3(sc)) { val |= MISC_REGISTERS_RESET_REG_2_MSTAT0; val |= MISC_REGISTERS_RESET_REG_2_MSTAT1; } REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET), val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET); ecore_init_block(sc, BLOCK_MISC, PHASE_COMMON); BLOGD(sc, DBG_LOAD, "after misc block init\n"); if (!CHIP_IS_E1x(sc)) { /* * 4-port mode or 2-port mode we need to turn off master-enable for * everyone. After that we turn it back on for self. So, we disregard * multi-function, and always disable all functions on the given path, * this means 0,2,4,6 for path 0 and 1,3,5,7 for path 1 */ for (abs_func_id = SC_PATH(sc); abs_func_id < (E2_FUNC_MAX * 2); abs_func_id += 2) { if (abs_func_id == SC_ABS_FUNC(sc)) { REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); continue; } bxe_pretend_func(sc, abs_func_id); /* clear pf enable */ bxe_pf_disable(sc); bxe_pretend_func(sc, SC_ABS_FUNC(sc)); } } BLOGD(sc, DBG_LOAD, "after pf disable\n"); ecore_init_block(sc, BLOCK_PXP, PHASE_COMMON); if (CHIP_IS_E1(sc)) { /* * enable HW interrupt from PXP on USDM overflow * bit 16 on INT_MASK_0 */ REG_WR(sc, PXP_REG_PXP_INT_MASK_0, 0); } ecore_init_block(sc, BLOCK_PXP2, PHASE_COMMON); bxe_init_pxp(sc); #ifdef __BIG_ENDIAN REG_WR(sc, PXP2_REG_RQ_QM_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_TM_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_SRC_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_CDU_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_DBG_ENDIAN_M, 1); /* make sure this value is 0 */ REG_WR(sc, PXP2_REG_RQ_HC_ENDIAN_M, 0); //REG_WR(sc, PXP2_REG_RD_PBF_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_QM_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_TM_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_SRC_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_CDURD_SWAP_MODE, 1); #endif ecore_ilt_init_page_size(sc, INITOP_SET); if (CHIP_REV_IS_FPGA(sc) && CHIP_IS_E1H(sc)) { REG_WR(sc, PXP2_REG_PGL_TAGS_LIMIT, 0x1); } /* let the HW do it's magic... */ DELAY(100000); /* finish PXP init */ val = REG_RD(sc, PXP2_REG_RQ_CFG_DONE); if (val != 1) { BLOGE(sc, "PXP2 CFG failed PXP2_REG_RQ_CFG_DONE val = 0x%x\n", val); return (-1); } val = REG_RD(sc, PXP2_REG_RD_INIT_DONE); if (val != 1) { BLOGE(sc, "PXP2 RD_INIT failed val = 0x%x\n", val); return (-1); } BLOGD(sc, DBG_LOAD, "after pxp init\n"); /* * Timer bug workaround for E2 only. We need to set the entire ILT to have * entries with value "0" and valid bit on. This needs to be done by the * first PF that is loaded in a path (i.e. common phase) */ if (!CHIP_IS_E1x(sc)) { /* * In E2 there is a bug in the timers block that can cause function 6 / 7 * (i.e. vnic3) to start even if it is marked as "scan-off". * This occurs when a different function (func2,3) is being marked * as "scan-off". Real-life scenario for example: if a driver is being * load-unloaded while func6,7 are down. This will cause the timer to access * the ilt, translate to a logical address and send a request to read/write. * Since the ilt for the function that is down is not valid, this will cause * a translation error which is unrecoverable. * The Workaround is intended to make sure that when this happens nothing * fatal will occur. The workaround: * 1. First PF driver which loads on a path will: * a. After taking the chip out of reset, by using pretend, * it will write "0" to the following registers of * the other vnics. * REG_WR(pdev, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 0); * REG_WR(pdev, CFC_REG_WEAK_ENABLE_PF,0); * REG_WR(pdev, CFC_REG_STRONG_ENABLE_PF,0); * And for itself it will write '1' to * PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER to enable * dmae-operations (writing to pram for example.) * note: can be done for only function 6,7 but cleaner this * way. * b. Write zero+valid to the entire ILT. * c. Init the first_timers_ilt_entry, last_timers_ilt_entry of * VNIC3 (of that port). The range allocated will be the * entire ILT. This is needed to prevent ILT range error. * 2. Any PF driver load flow: * a. ILT update with the physical addresses of the allocated * logical pages. * b. Wait 20msec. - note that this timeout is needed to make * sure there are no requests in one of the PXP internal * queues with "old" ILT addresses. * c. PF enable in the PGLC. * d. Clear the was_error of the PF in the PGLC. (could have * occurred while driver was down) * e. PF enable in the CFC (WEAK + STRONG) * f. Timers scan enable * 3. PF driver unload flow: * a. Clear the Timers scan_en. * b. Polling for scan_on=0 for that PF. * c. Clear the PF enable bit in the PXP. * d. Clear the PF enable in the CFC (WEAK + STRONG) * e. Write zero+valid to all ILT entries (The valid bit must * stay set) * f. If this is VNIC 3 of a port then also init * first_timers_ilt_entry to zero and last_timers_ilt_entry * to the last enrty in the ILT. * * Notes: * Currently the PF error in the PGLC is non recoverable. * In the future the there will be a recovery routine for this error. * Currently attention is masked. * Having an MCP lock on the load/unload process does not guarantee that * there is no Timer disable during Func6/7 enable. This is because the * Timers scan is currently being cleared by the MCP on FLR. * Step 2.d can be done only for PF6/7 and the driver can also check if * there is error before clearing it. But the flow above is simpler and * more general. * All ILT entries are written by zero+valid and not just PF6/7 * ILT entries since in the future the ILT entries allocation for * PF-s might be dynamic. */ struct ilt_client_info ilt_cli; struct ecore_ilt ilt; memset(&ilt_cli, 0, sizeof(struct ilt_client_info)); memset(&ilt, 0, sizeof(struct ecore_ilt)); /* initialize dummy TM client */ ilt_cli.start = 0; ilt_cli.end = ILT_NUM_PAGE_ENTRIES - 1; ilt_cli.client_num = ILT_CLIENT_TM; /* * Step 1: set zeroes to all ilt page entries with valid bit on * Step 2: set the timers first/last ilt entry to point * to the entire range to prevent ILT range error for 3rd/4th * vnic (this code assumes existence of the vnic) * * both steps performed by call to ecore_ilt_client_init_op() * with dummy TM client * * we must use pretend since PXP2_REG_RQ_##blk##_FIRST_ILT * and his brother are split registers */ bxe_pretend_func(sc, (SC_PATH(sc) + 6)); ecore_ilt_client_init_op_ilt(sc, &ilt, &ilt_cli, INITOP_CLEAR); bxe_pretend_func(sc, SC_ABS_FUNC(sc)); REG_WR(sc, PXP2_REG_RQ_DRAM_ALIGN, BXE_PXP_DRAM_ALIGN); REG_WR(sc, PXP2_REG_RQ_DRAM_ALIGN_RD, BXE_PXP_DRAM_ALIGN); REG_WR(sc, PXP2_REG_RQ_DRAM_ALIGN_SEL, 1); } REG_WR(sc, PXP2_REG_RQ_DISABLE_INPUTS, 0); REG_WR(sc, PXP2_REG_RD_DISABLE_INPUTS, 0); if (!CHIP_IS_E1x(sc)) { int factor = CHIP_REV_IS_EMUL(sc) ? 1000 : (CHIP_REV_IS_FPGA(sc) ? 400 : 0); ecore_init_block(sc, BLOCK_PGLUE_B, PHASE_COMMON); ecore_init_block(sc, BLOCK_ATC, PHASE_COMMON); /* let the HW do it's magic... */ do { DELAY(200000); val = REG_RD(sc, ATC_REG_ATC_INIT_DONE); } while (factor-- && (val != 1)); if (val != 1) { BLOGE(sc, "ATC_INIT failed val = 0x%x\n", val); return (-1); } } BLOGD(sc, DBG_LOAD, "after pglue and atc init\n"); ecore_init_block(sc, BLOCK_DMAE, PHASE_COMMON); bxe_iov_init_dmae(sc); /* clean the DMAE memory */ sc->dmae_ready = 1; ecore_init_fill(sc, TSEM_REG_PRAM, 0, 8, 1); ecore_init_block(sc, BLOCK_TCM, PHASE_COMMON); ecore_init_block(sc, BLOCK_UCM, PHASE_COMMON); ecore_init_block(sc, BLOCK_CCM, PHASE_COMMON); ecore_init_block(sc, BLOCK_XCM, PHASE_COMMON); bxe_read_dmae(sc, XSEM_REG_PASSIVE_BUFFER, 3); bxe_read_dmae(sc, CSEM_REG_PASSIVE_BUFFER, 3); bxe_read_dmae(sc, TSEM_REG_PASSIVE_BUFFER, 3); bxe_read_dmae(sc, USEM_REG_PASSIVE_BUFFER, 3); ecore_init_block(sc, BLOCK_QM, PHASE_COMMON); /* QM queues pointers table */ ecore_qm_init_ptr_table(sc, sc->qm_cid_count, INITOP_SET); /* soft reset pulse */ REG_WR(sc, QM_REG_SOFT_RESET, 1); REG_WR(sc, QM_REG_SOFT_RESET, 0); if (CNIC_SUPPORT(sc)) ecore_init_block(sc, BLOCK_TM, PHASE_COMMON); ecore_init_block(sc, BLOCK_DORQ, PHASE_COMMON); REG_WR(sc, DORQ_REG_DPM_CID_OFST, BXE_DB_SHIFT); if (!CHIP_REV_IS_SLOW(sc)) { /* enable hw interrupt from doorbell Q */ REG_WR(sc, DORQ_REG_DORQ_INT_MASK, 0); } ecore_init_block(sc, BLOCK_BRB1, PHASE_COMMON); ecore_init_block(sc, BLOCK_PRS, PHASE_COMMON); REG_WR(sc, PRS_REG_A_PRSU_20, 0xf); if (!CHIP_IS_E1(sc)) { REG_WR(sc, PRS_REG_E1HOV_MODE, sc->devinfo.mf_info.path_has_ovlan); } if (!CHIP_IS_E1x(sc) && !CHIP_IS_E3B0(sc)) { if (IS_MF_AFEX(sc)) { /* * configure that AFEX and VLAN headers must be * received in AFEX mode */ REG_WR(sc, PRS_REG_HDRS_AFTER_BASIC, 0xE); REG_WR(sc, PRS_REG_MUST_HAVE_HDRS, 0xA); REG_WR(sc, PRS_REG_HDRS_AFTER_TAG_0, 0x6); REG_WR(sc, PRS_REG_TAG_ETHERTYPE_0, 0x8926); REG_WR(sc, PRS_REG_TAG_LEN_0, 0x4); } else { /* * Bit-map indicating which L2 hdrs may appear * after the basic Ethernet header */ REG_WR(sc, PRS_REG_HDRS_AFTER_BASIC, sc->devinfo.mf_info.path_has_ovlan ? 7 : 6); } } ecore_init_block(sc, BLOCK_TSDM, PHASE_COMMON); ecore_init_block(sc, BLOCK_CSDM, PHASE_COMMON); ecore_init_block(sc, BLOCK_USDM, PHASE_COMMON); ecore_init_block(sc, BLOCK_XSDM, PHASE_COMMON); if (!CHIP_IS_E1x(sc)) { /* reset VFC memories */ REG_WR(sc, TSEM_REG_FAST_MEMORY + VFC_REG_MEMORIES_RST, VFC_MEMORIES_RST_REG_CAM_RST | VFC_MEMORIES_RST_REG_RAM_RST); REG_WR(sc, XSEM_REG_FAST_MEMORY + VFC_REG_MEMORIES_RST, VFC_MEMORIES_RST_REG_CAM_RST | VFC_MEMORIES_RST_REG_RAM_RST); DELAY(20000); } ecore_init_block(sc, BLOCK_TSEM, PHASE_COMMON); ecore_init_block(sc, BLOCK_USEM, PHASE_COMMON); ecore_init_block(sc, BLOCK_CSEM, PHASE_COMMON); ecore_init_block(sc, BLOCK_XSEM, PHASE_COMMON); /* sync semi rtc */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0x80000000); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x80000000); ecore_init_block(sc, BLOCK_UPB, PHASE_COMMON); ecore_init_block(sc, BLOCK_XPB, PHASE_COMMON); ecore_init_block(sc, BLOCK_PBF, PHASE_COMMON); if (!CHIP_IS_E1x(sc)) { if (IS_MF_AFEX(sc)) { /* * configure that AFEX and VLAN headers must be * sent in AFEX mode */ REG_WR(sc, PBF_REG_HDRS_AFTER_BASIC, 0xE); REG_WR(sc, PBF_REG_MUST_HAVE_HDRS, 0xA); REG_WR(sc, PBF_REG_HDRS_AFTER_TAG_0, 0x6); REG_WR(sc, PBF_REG_TAG_ETHERTYPE_0, 0x8926); REG_WR(sc, PBF_REG_TAG_LEN_0, 0x4); } else { REG_WR(sc, PBF_REG_HDRS_AFTER_BASIC, sc->devinfo.mf_info.path_has_ovlan ? 7 : 6); } } REG_WR(sc, SRC_REG_SOFT_RST, 1); ecore_init_block(sc, BLOCK_SRC, PHASE_COMMON); if (CNIC_SUPPORT(sc)) { REG_WR(sc, SRC_REG_KEYSEARCH_0, 0x63285672); REG_WR(sc, SRC_REG_KEYSEARCH_1, 0x24b8f2cc); REG_WR(sc, SRC_REG_KEYSEARCH_2, 0x223aef9b); REG_WR(sc, SRC_REG_KEYSEARCH_3, 0x26001e3a); REG_WR(sc, SRC_REG_KEYSEARCH_4, 0x7ae91116); REG_WR(sc, SRC_REG_KEYSEARCH_5, 0x5ce5230b); REG_WR(sc, SRC_REG_KEYSEARCH_6, 0x298d8adf); REG_WR(sc, SRC_REG_KEYSEARCH_7, 0x6eb0ff09); REG_WR(sc, SRC_REG_KEYSEARCH_8, 0x1830f82f); REG_WR(sc, SRC_REG_KEYSEARCH_9, 0x01e46be7); } REG_WR(sc, SRC_REG_SOFT_RST, 0); if (sizeof(union cdu_context) != 1024) { /* we currently assume that a context is 1024 bytes */ BLOGE(sc, "please adjust the size of cdu_context(%ld)\n", (long)sizeof(union cdu_context)); } ecore_init_block(sc, BLOCK_CDU, PHASE_COMMON); val = (4 << 24) + (0 << 12) + 1024; REG_WR(sc, CDU_REG_CDU_GLOBAL_PARAMS, val); ecore_init_block(sc, BLOCK_CFC, PHASE_COMMON); REG_WR(sc, CFC_REG_INIT_REG, 0x7FF); /* enable context validation interrupt from CFC */ REG_WR(sc, CFC_REG_CFC_INT_MASK, 0); /* set the thresholds to prevent CFC/CDU race */ REG_WR(sc, CFC_REG_DEBUG0, 0x20020000); ecore_init_block(sc, BLOCK_HC, PHASE_COMMON); if (!CHIP_IS_E1x(sc) && BXE_NOMCP(sc)) { REG_WR(sc, IGU_REG_RESET_MEMORIES, 0x36); } ecore_init_block(sc, BLOCK_IGU, PHASE_COMMON); ecore_init_block(sc, BLOCK_MISC_AEU, PHASE_COMMON); /* Reset PCIE errors for debug */ REG_WR(sc, 0x2814, 0xffffffff); REG_WR(sc, 0x3820, 0xffffffff); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PCICFG_OFFSET + PXPCS_TL_CONTROL_5, (PXPCS_TL_CONTROL_5_ERR_UNSPPORT1 | PXPCS_TL_CONTROL_5_ERR_UNSPPORT)); REG_WR(sc, PCICFG_OFFSET + PXPCS_TL_FUNC345_STAT, (PXPCS_TL_FUNC345_STAT_ERR_UNSPPORT4 | PXPCS_TL_FUNC345_STAT_ERR_UNSPPORT3 | PXPCS_TL_FUNC345_STAT_ERR_UNSPPORT2)); REG_WR(sc, PCICFG_OFFSET + PXPCS_TL_FUNC678_STAT, (PXPCS_TL_FUNC678_STAT_ERR_UNSPPORT7 | PXPCS_TL_FUNC678_STAT_ERR_UNSPPORT6 | PXPCS_TL_FUNC678_STAT_ERR_UNSPPORT5)); } ecore_init_block(sc, BLOCK_NIG, PHASE_COMMON); if (!CHIP_IS_E1(sc)) { /* in E3 this done in per-port section */ if (!CHIP_IS_E3(sc)) REG_WR(sc, NIG_REG_LLH_MF_MODE, IS_MF(sc)); } if (CHIP_IS_E1H(sc)) { /* not applicable for E2 (and above ...) */ REG_WR(sc, NIG_REG_LLH_E1HOV_MODE, IS_MF_SD(sc)); } if (CHIP_REV_IS_SLOW(sc)) { DELAY(200000); } /* finish CFC init */ val = reg_poll(sc, CFC_REG_LL_INIT_DONE, 1, 100, 10); if (val != 1) { BLOGE(sc, "CFC LL_INIT failed val=0x%x\n", val); return (-1); } val = reg_poll(sc, CFC_REG_AC_INIT_DONE, 1, 100, 10); if (val != 1) { BLOGE(sc, "CFC AC_INIT failed val=0x%x\n", val); return (-1); } val = reg_poll(sc, CFC_REG_CAM_INIT_DONE, 1, 100, 10); if (val != 1) { BLOGE(sc, "CFC CAM_INIT failed val=0x%x\n", val); return (-1); } REG_WR(sc, CFC_REG_DEBUG0, 0); if (CHIP_IS_E1(sc)) { /* read NIG statistic to see if this is our first up since powerup */ bxe_read_dmae(sc, NIG_REG_STAT2_BRB_OCTET, 2); val = *BXE_SP(sc, wb_data[0]); /* do internal memory self test */ if ((val == 0) && bxe_int_mem_test(sc)) { BLOGE(sc, "internal mem self test failed val=0x%x\n", val); return (-1); } } bxe_setup_fan_failure_detection(sc); /* clear PXP2 attentions */ REG_RD(sc, PXP2_REG_PXP2_INT_STS_CLR_0); bxe_enable_blocks_attention(sc); if (!CHIP_REV_IS_SLOW(sc)) { ecore_enable_blocks_parity(sc); } if (!BXE_NOMCP(sc)) { if (CHIP_IS_E1x(sc)) { bxe_common_init_phy(sc); } } return (0); } /** * bxe_init_hw_common_chip - init HW at the COMMON_CHIP phase. * * @sc: driver handle */ static int bxe_init_hw_common_chip(struct bxe_softc *sc) { int rc = bxe_init_hw_common(sc); if (rc) { BLOGE(sc, "bxe_init_hw_common failed rc=%d\n", rc); return (rc); } /* In E2 2-PORT mode, same ext phy is used for the two paths */ if (!BXE_NOMCP(sc)) { bxe_common_init_phy(sc); } return (0); } static int bxe_init_hw_port(struct bxe_softc *sc) { int port = SC_PORT(sc); int init_phase = port ? PHASE_PORT1 : PHASE_PORT0; uint32_t low, high; uint32_t val; BLOGD(sc, DBG_LOAD, "starting port init for port %d\n", port); REG_WR(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port*4, 0); ecore_init_block(sc, BLOCK_MISC, init_phase); ecore_init_block(sc, BLOCK_PXP, init_phase); ecore_init_block(sc, BLOCK_PXP2, init_phase); /* * Timers bug workaround: disables the pf_master bit in pglue at * common phase, we need to enable it here before any dmae access are * attempted. Therefore we manually added the enable-master to the * port phase (it also happens in the function phase) */ if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); } ecore_init_block(sc, BLOCK_ATC, init_phase); ecore_init_block(sc, BLOCK_DMAE, init_phase); ecore_init_block(sc, BLOCK_PGLUE_B, init_phase); ecore_init_block(sc, BLOCK_QM, init_phase); ecore_init_block(sc, BLOCK_TCM, init_phase); ecore_init_block(sc, BLOCK_UCM, init_phase); ecore_init_block(sc, BLOCK_CCM, init_phase); ecore_init_block(sc, BLOCK_XCM, init_phase); /* QM cid (connection) count */ ecore_qm_init_cid_count(sc, sc->qm_cid_count, INITOP_SET); if (CNIC_SUPPORT(sc)) { ecore_init_block(sc, BLOCK_TM, init_phase); REG_WR(sc, TM_REG_LIN0_SCAN_TIME + port*4, 20); REG_WR(sc, TM_REG_LIN0_MAX_ACTIVE_CID + port*4, 31); } ecore_init_block(sc, BLOCK_DORQ, init_phase); ecore_init_block(sc, BLOCK_BRB1, init_phase); if (CHIP_IS_E1(sc) || CHIP_IS_E1H(sc)) { if (IS_MF(sc)) { low = (BXE_ONE_PORT(sc) ? 160 : 246); } else if (sc->mtu > 4096) { if (BXE_ONE_PORT(sc)) { low = 160; } else { val = sc->mtu; /* (24*1024 + val*4)/256 */ low = (96 + (val / 64) + ((val % 64) ? 1 : 0)); } } else { low = (BXE_ONE_PORT(sc) ? 80 : 160); } high = (low + 56); /* 14*1024/256 */ REG_WR(sc, BRB1_REG_PAUSE_LOW_THRESHOLD_0 + port*4, low); REG_WR(sc, BRB1_REG_PAUSE_HIGH_THRESHOLD_0 + port*4, high); } if (CHIP_IS_MODE_4_PORT(sc)) { REG_WR(sc, SC_PORT(sc) ? BRB1_REG_MAC_GUARANTIED_1 : BRB1_REG_MAC_GUARANTIED_0, 40); } ecore_init_block(sc, BLOCK_PRS, init_phase); if (CHIP_IS_E3B0(sc)) { if (IS_MF_AFEX(sc)) { /* configure headers for AFEX mode */ REG_WR(sc, SC_PORT(sc) ? PRS_REG_HDRS_AFTER_BASIC_PORT_1 : PRS_REG_HDRS_AFTER_BASIC_PORT_0, 0xE); REG_WR(sc, SC_PORT(sc) ? PRS_REG_HDRS_AFTER_TAG_0_PORT_1 : PRS_REG_HDRS_AFTER_TAG_0_PORT_0, 0x6); REG_WR(sc, SC_PORT(sc) ? PRS_REG_MUST_HAVE_HDRS_PORT_1 : PRS_REG_MUST_HAVE_HDRS_PORT_0, 0xA); } else { /* Ovlan exists only if we are in multi-function + * switch-dependent mode, in switch-independent there * is no ovlan headers */ REG_WR(sc, SC_PORT(sc) ? PRS_REG_HDRS_AFTER_BASIC_PORT_1 : PRS_REG_HDRS_AFTER_BASIC_PORT_0, (sc->devinfo.mf_info.path_has_ovlan ? 7 : 6)); } } ecore_init_block(sc, BLOCK_TSDM, init_phase); ecore_init_block(sc, BLOCK_CSDM, init_phase); ecore_init_block(sc, BLOCK_USDM, init_phase); ecore_init_block(sc, BLOCK_XSDM, init_phase); ecore_init_block(sc, BLOCK_TSEM, init_phase); ecore_init_block(sc, BLOCK_USEM, init_phase); ecore_init_block(sc, BLOCK_CSEM, init_phase); ecore_init_block(sc, BLOCK_XSEM, init_phase); ecore_init_block(sc, BLOCK_UPB, init_phase); ecore_init_block(sc, BLOCK_XPB, init_phase); ecore_init_block(sc, BLOCK_PBF, init_phase); if (CHIP_IS_E1x(sc)) { /* configure PBF to work without PAUSE mtu 9000 */ REG_WR(sc, PBF_REG_P0_PAUSE_ENABLE + port*4, 0); /* update threshold */ REG_WR(sc, PBF_REG_P0_ARB_THRSH + port*4, (9040/16)); /* update init credit */ REG_WR(sc, PBF_REG_P0_INIT_CRD + port*4, (9040/16) + 553 - 22); /* probe changes */ REG_WR(sc, PBF_REG_INIT_P0 + port*4, 1); DELAY(50); REG_WR(sc, PBF_REG_INIT_P0 + port*4, 0); } if (CNIC_SUPPORT(sc)) { ecore_init_block(sc, BLOCK_SRC, init_phase); } ecore_init_block(sc, BLOCK_CDU, init_phase); ecore_init_block(sc, BLOCK_CFC, init_phase); if (CHIP_IS_E1(sc)) { REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, 0); } ecore_init_block(sc, BLOCK_HC, init_phase); ecore_init_block(sc, BLOCK_IGU, init_phase); ecore_init_block(sc, BLOCK_MISC_AEU, init_phase); /* init aeu_mask_attn_func_0/1: * - SF mode: bits 3-7 are masked. only bits 0-2 are in use * - MF mode: bit 3 is masked. bits 0-2 are in use as in SF * bits 4-7 are used for "per vn group attention" */ val = IS_MF(sc) ? 0xF7 : 0x7; /* Enable DCBX attention for all but E1 */ val |= CHIP_IS_E1(sc) ? 0 : 0x10; REG_WR(sc, MISC_REG_AEU_MASK_ATTN_FUNC_0 + port*4, val); ecore_init_block(sc, BLOCK_NIG, init_phase); if (!CHIP_IS_E1x(sc)) { /* Bit-map indicating which L2 hdrs may appear after the * basic Ethernet header */ if (IS_MF_AFEX(sc)) { REG_WR(sc, SC_PORT(sc) ? NIG_REG_P1_HDRS_AFTER_BASIC : NIG_REG_P0_HDRS_AFTER_BASIC, 0xE); } else { REG_WR(sc, SC_PORT(sc) ? NIG_REG_P1_HDRS_AFTER_BASIC : NIG_REG_P0_HDRS_AFTER_BASIC, IS_MF_SD(sc) ? 7 : 6); } if (CHIP_IS_E3(sc)) { REG_WR(sc, SC_PORT(sc) ? NIG_REG_LLH1_MF_MODE : NIG_REG_LLH_MF_MODE, IS_MF(sc)); } } if (!CHIP_IS_E3(sc)) { REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port*4, 1); } if (!CHIP_IS_E1(sc)) { /* 0x2 disable mf_ov, 0x1 enable */ REG_WR(sc, NIG_REG_LLH0_BRB1_DRV_MASK_MF + port*4, (IS_MF_SD(sc) ? 0x1 : 0x2)); if (!CHIP_IS_E1x(sc)) { val = 0; switch (sc->devinfo.mf_info.mf_mode) { case MULTI_FUNCTION_SD: val = 1; break; case MULTI_FUNCTION_SI: case MULTI_FUNCTION_AFEX: val = 2; break; } REG_WR(sc, (SC_PORT(sc) ? NIG_REG_LLH1_CLS_TYPE : NIG_REG_LLH0_CLS_TYPE), val); } REG_WR(sc, NIG_REG_LLFC_ENABLE_0 + port*4, 0); REG_WR(sc, NIG_REG_LLFC_OUT_EN_0 + port*4, 0); REG_WR(sc, NIG_REG_PAUSE_ENABLE_0 + port*4, 1); } /* If SPIO5 is set to generate interrupts, enable it for this port */ val = REG_RD(sc, MISC_REG_SPIO_EVENT_EN); if (val & MISC_SPIO_SPIO5) { uint32_t reg_addr = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0); val = REG_RD(sc, reg_addr); val |= AEU_INPUTS_ATTN_BITS_SPIO5; REG_WR(sc, reg_addr, val); } return (0); } static uint32_t bxe_flr_clnup_reg_poll(struct bxe_softc *sc, uint32_t reg, uint32_t expected, uint32_t poll_count) { uint32_t cur_cnt = poll_count; uint32_t val; while ((val = REG_RD(sc, reg)) != expected && cur_cnt--) { DELAY(FLR_WAIT_INTERVAL); } return (val); } static int bxe_flr_clnup_poll_hw_counter(struct bxe_softc *sc, uint32_t reg, char *msg, uint32_t poll_cnt) { uint32_t val = bxe_flr_clnup_reg_poll(sc, reg, 0, poll_cnt); if (val != 0) { BLOGE(sc, "%s usage count=%d\n", msg, val); return (1); } return (0); } /* Common routines with VF FLR cleanup */ static uint32_t bxe_flr_clnup_poll_count(struct bxe_softc *sc) { /* adjust polling timeout */ if (CHIP_REV_IS_EMUL(sc)) { return (FLR_POLL_CNT * 2000); } if (CHIP_REV_IS_FPGA(sc)) { return (FLR_POLL_CNT * 120); } return (FLR_POLL_CNT); } static int bxe_poll_hw_usage_counters(struct bxe_softc *sc, uint32_t poll_cnt) { /* wait for CFC PF usage-counter to zero (includes all the VFs) */ if (bxe_flr_clnup_poll_hw_counter(sc, CFC_REG_NUM_LCIDS_INSIDE_PF, "CFC PF usage counter timed out", poll_cnt)) { return (1); } /* Wait for DQ PF usage-counter to zero (until DQ cleanup) */ if (bxe_flr_clnup_poll_hw_counter(sc, DORQ_REG_PF_USAGE_CNT, "DQ PF usage counter timed out", poll_cnt)) { return (1); } /* Wait for QM PF usage-counter to zero (until DQ cleanup) */ if (bxe_flr_clnup_poll_hw_counter(sc, QM_REG_PF_USG_CNT_0 + 4*SC_FUNC(sc), "QM PF usage counter timed out", poll_cnt)) { return (1); } /* Wait for Timer PF usage-counters to zero (until DQ cleanup) */ if (bxe_flr_clnup_poll_hw_counter(sc, TM_REG_LIN0_VNIC_UC + 4*SC_PORT(sc), "Timers VNIC usage counter timed out", poll_cnt)) { return (1); } if (bxe_flr_clnup_poll_hw_counter(sc, TM_REG_LIN0_NUM_SCANS + 4*SC_PORT(sc), "Timers NUM_SCANS usage counter timed out", poll_cnt)) { return (1); } /* Wait DMAE PF usage counter to zero */ if (bxe_flr_clnup_poll_hw_counter(sc, dmae_reg_go_c[INIT_DMAE_C(sc)], "DMAE dommand register timed out", poll_cnt)) { return (1); } return (0); } #define OP_GEN_PARAM(param) \ (((param) << SDM_OP_GEN_COMP_PARAM_SHIFT) & SDM_OP_GEN_COMP_PARAM) #define OP_GEN_TYPE(type) \ (((type) << SDM_OP_GEN_COMP_TYPE_SHIFT) & SDM_OP_GEN_COMP_TYPE) #define OP_GEN_AGG_VECT(index) \ (((index) << SDM_OP_GEN_AGG_VECT_IDX_SHIFT) & SDM_OP_GEN_AGG_VECT_IDX) static int bxe_send_final_clnup(struct bxe_softc *sc, uint8_t clnup_func, uint32_t poll_cnt) { uint32_t op_gen_command = 0; uint32_t comp_addr = (BAR_CSTRORM_INTMEM + CSTORM_FINAL_CLEANUP_COMPLETE_OFFSET(clnup_func)); int ret = 0; if (REG_RD(sc, comp_addr)) { BLOGE(sc, "Cleanup complete was not 0 before sending\n"); return (1); } op_gen_command |= OP_GEN_PARAM(XSTORM_AGG_INT_FINAL_CLEANUP_INDEX); op_gen_command |= OP_GEN_TYPE(XSTORM_AGG_INT_FINAL_CLEANUP_COMP_TYPE); op_gen_command |= OP_GEN_AGG_VECT(clnup_func); op_gen_command |= 1 << SDM_OP_GEN_AGG_VECT_IDX_VALID_SHIFT; BLOGD(sc, DBG_LOAD, "sending FW Final cleanup\n"); REG_WR(sc, XSDM_REG_OPERATION_GEN, op_gen_command); if (bxe_flr_clnup_reg_poll(sc, comp_addr, 1, poll_cnt) != 1) { BLOGE(sc, "FW final cleanup did not succeed\n"); BLOGD(sc, DBG_LOAD, "At timeout completion address contained %x\n", (REG_RD(sc, comp_addr))); bxe_panic(sc, ("FLR cleanup failed\n")); return (1); } /* Zero completion for nxt FLR */ REG_WR(sc, comp_addr, 0); return (ret); } static void bxe_pbf_pN_buf_flushed(struct bxe_softc *sc, struct pbf_pN_buf_regs *regs, uint32_t poll_count) { uint32_t init_crd, crd, crd_start, crd_freed, crd_freed_start; uint32_t cur_cnt = poll_count; crd_freed = crd_freed_start = REG_RD(sc, regs->crd_freed); crd = crd_start = REG_RD(sc, regs->crd); init_crd = REG_RD(sc, regs->init_crd); BLOGD(sc, DBG_LOAD, "INIT CREDIT[%d] : %x\n", regs->pN, init_crd); BLOGD(sc, DBG_LOAD, "CREDIT[%d] : s:%x\n", regs->pN, crd); BLOGD(sc, DBG_LOAD, "CREDIT_FREED[%d]: s:%x\n", regs->pN, crd_freed); while ((crd != init_crd) && ((uint32_t)((int32_t)crd_freed - (int32_t)crd_freed_start) < (init_crd - crd_start))) { if (cur_cnt--) { DELAY(FLR_WAIT_INTERVAL); crd = REG_RD(sc, regs->crd); crd_freed = REG_RD(sc, regs->crd_freed); } else { BLOGD(sc, DBG_LOAD, "PBF tx buffer[%d] timed out\n", regs->pN); BLOGD(sc, DBG_LOAD, "CREDIT[%d] : c:%x\n", regs->pN, crd); BLOGD(sc, DBG_LOAD, "CREDIT_FREED[%d]: c:%x\n", regs->pN, crd_freed); break; } } BLOGD(sc, DBG_LOAD, "Waited %d*%d usec for PBF tx buffer[%d]\n", poll_count-cur_cnt, FLR_WAIT_INTERVAL, regs->pN); } static void bxe_pbf_pN_cmd_flushed(struct bxe_softc *sc, struct pbf_pN_cmd_regs *regs, uint32_t poll_count) { uint32_t occup, to_free, freed, freed_start; uint32_t cur_cnt = poll_count; occup = to_free = REG_RD(sc, regs->lines_occup); freed = freed_start = REG_RD(sc, regs->lines_freed); BLOGD(sc, DBG_LOAD, "OCCUPANCY[%d] : s:%x\n", regs->pN, occup); BLOGD(sc, DBG_LOAD, "LINES_FREED[%d] : s:%x\n", regs->pN, freed); while (occup && ((uint32_t)((int32_t)freed - (int32_t)freed_start) < to_free)) { if (cur_cnt--) { DELAY(FLR_WAIT_INTERVAL); occup = REG_RD(sc, regs->lines_occup); freed = REG_RD(sc, regs->lines_freed); } else { BLOGD(sc, DBG_LOAD, "PBF cmd queue[%d] timed out\n", regs->pN); BLOGD(sc, DBG_LOAD, "OCCUPANCY[%d] : s:%x\n", regs->pN, occup); BLOGD(sc, DBG_LOAD, "LINES_FREED[%d] : s:%x\n", regs->pN, freed); break; } } BLOGD(sc, DBG_LOAD, "Waited %d*%d usec for PBF cmd queue[%d]\n", poll_count - cur_cnt, FLR_WAIT_INTERVAL, regs->pN); } static void bxe_tx_hw_flushed(struct bxe_softc *sc, uint32_t poll_count) { struct pbf_pN_cmd_regs cmd_regs[] = { {0, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_OCCUPANCY_Q0 : PBF_REG_P0_TQ_OCCUPANCY, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_LINES_FREED_CNT_Q0 : PBF_REG_P0_TQ_LINES_FREED_CNT}, {1, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_OCCUPANCY_Q1 : PBF_REG_P1_TQ_OCCUPANCY, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_LINES_FREED_CNT_Q1 : PBF_REG_P1_TQ_LINES_FREED_CNT}, {4, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_OCCUPANCY_LB_Q : PBF_REG_P4_TQ_OCCUPANCY, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_LINES_FREED_CNT_LB_Q : PBF_REG_P4_TQ_LINES_FREED_CNT} }; struct pbf_pN_buf_regs buf_regs[] = { {0, (CHIP_IS_E3B0(sc)) ? PBF_REG_INIT_CRD_Q0 : PBF_REG_P0_INIT_CRD , (CHIP_IS_E3B0(sc)) ? PBF_REG_CREDIT_Q0 : PBF_REG_P0_CREDIT, (CHIP_IS_E3B0(sc)) ? PBF_REG_INTERNAL_CRD_FREED_CNT_Q0 : PBF_REG_P0_INTERNAL_CRD_FREED_CNT}, {1, (CHIP_IS_E3B0(sc)) ? PBF_REG_INIT_CRD_Q1 : PBF_REG_P1_INIT_CRD, (CHIP_IS_E3B0(sc)) ? PBF_REG_CREDIT_Q1 : PBF_REG_P1_CREDIT, (CHIP_IS_E3B0(sc)) ? PBF_REG_INTERNAL_CRD_FREED_CNT_Q1 : PBF_REG_P1_INTERNAL_CRD_FREED_CNT}, {4, (CHIP_IS_E3B0(sc)) ? PBF_REG_INIT_CRD_LB_Q : PBF_REG_P4_INIT_CRD, (CHIP_IS_E3B0(sc)) ? PBF_REG_CREDIT_LB_Q : PBF_REG_P4_CREDIT, (CHIP_IS_E3B0(sc)) ? PBF_REG_INTERNAL_CRD_FREED_CNT_LB_Q : PBF_REG_P4_INTERNAL_CRD_FREED_CNT}, }; int i; /* Verify the command queues are flushed P0, P1, P4 */ for (i = 0; i < ARRAY_SIZE(cmd_regs); i++) { bxe_pbf_pN_cmd_flushed(sc, &cmd_regs[i], poll_count); } /* Verify the transmission buffers are flushed P0, P1, P4 */ for (i = 0; i < ARRAY_SIZE(buf_regs); i++) { bxe_pbf_pN_buf_flushed(sc, &buf_regs[i], poll_count); } } static void bxe_hw_enable_status(struct bxe_softc *sc) { uint32_t val; val = REG_RD(sc, CFC_REG_WEAK_ENABLE_PF); BLOGD(sc, DBG_LOAD, "CFC_REG_WEAK_ENABLE_PF is 0x%x\n", val); val = REG_RD(sc, PBF_REG_DISABLE_PF); BLOGD(sc, DBG_LOAD, "PBF_REG_DISABLE_PF is 0x%x\n", val); val = REG_RD(sc, IGU_REG_PCI_PF_MSI_EN); BLOGD(sc, DBG_LOAD, "IGU_REG_PCI_PF_MSI_EN is 0x%x\n", val); val = REG_RD(sc, IGU_REG_PCI_PF_MSIX_EN); BLOGD(sc, DBG_LOAD, "IGU_REG_PCI_PF_MSIX_EN is 0x%x\n", val); val = REG_RD(sc, IGU_REG_PCI_PF_MSIX_FUNC_MASK); BLOGD(sc, DBG_LOAD, "IGU_REG_PCI_PF_MSIX_FUNC_MASK is 0x%x\n", val); val = REG_RD(sc, PGLUE_B_REG_SHADOW_BME_PF_7_0_CLR); BLOGD(sc, DBG_LOAD, "PGLUE_B_REG_SHADOW_BME_PF_7_0_CLR is 0x%x\n", val); val = REG_RD(sc, PGLUE_B_REG_FLR_REQUEST_PF_7_0_CLR); BLOGD(sc, DBG_LOAD, "PGLUE_B_REG_FLR_REQUEST_PF_7_0_CLR is 0x%x\n", val); val = REG_RD(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER); BLOGD(sc, DBG_LOAD, "PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER is 0x%x\n", val); } static int bxe_pf_flr_clnup(struct bxe_softc *sc) { uint32_t poll_cnt = bxe_flr_clnup_poll_count(sc); BLOGD(sc, DBG_LOAD, "Cleanup after FLR PF[%d]\n", SC_ABS_FUNC(sc)); /* Re-enable PF target read access */ REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1); /* Poll HW usage counters */ BLOGD(sc, DBG_LOAD, "Polling usage counters\n"); if (bxe_poll_hw_usage_counters(sc, poll_cnt)) { return (-1); } /* Zero the igu 'trailing edge' and 'leading edge' */ /* Send the FW cleanup command */ if (bxe_send_final_clnup(sc, (uint8_t)SC_FUNC(sc), poll_cnt)) { return (-1); } /* ATC cleanup */ /* Verify TX hw is flushed */ bxe_tx_hw_flushed(sc, poll_cnt); /* Wait 100ms (not adjusted according to platform) */ DELAY(100000); /* Verify no pending pci transactions */ if (bxe_is_pcie_pending(sc)) { BLOGE(sc, "PCIE Transactions still pending\n"); } /* Debug */ bxe_hw_enable_status(sc); /* * Master enable - Due to WB DMAE writes performed before this * register is re-initialized as part of the regular function init */ REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); return (0); } static int bxe_init_hw_func(struct bxe_softc *sc) { int port = SC_PORT(sc); int func = SC_FUNC(sc); int init_phase = PHASE_PF0 + func; struct ecore_ilt *ilt = sc->ilt; uint16_t cdu_ilt_start; uint32_t addr, val; uint32_t main_mem_base, main_mem_size, main_mem_prty_clr; int i, main_mem_width, rc; BLOGD(sc, DBG_LOAD, "starting func init for func %d\n", func); /* FLR cleanup */ if (!CHIP_IS_E1x(sc)) { rc = bxe_pf_flr_clnup(sc); if (rc) { BLOGE(sc, "FLR cleanup failed!\n"); // XXX bxe_fw_dump(sc); // XXX bxe_idle_chk(sc); return (rc); } } /* set MSI reconfigure capability */ if (sc->devinfo.int_block == INT_BLOCK_HC) { addr = (port ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0); val = REG_RD(sc, addr); val |= HC_CONFIG_0_REG_MSI_ATTN_EN_0; REG_WR(sc, addr, val); } ecore_init_block(sc, BLOCK_PXP, init_phase); ecore_init_block(sc, BLOCK_PXP2, init_phase); ilt = sc->ilt; cdu_ilt_start = ilt->clients[ILT_CLIENT_CDU].start; for (i = 0; i < L2_ILT_LINES(sc); i++) { ilt->lines[cdu_ilt_start + i].page = sc->context[i].vcxt; ilt->lines[cdu_ilt_start + i].page_mapping = sc->context[i].vcxt_dma.paddr; ilt->lines[cdu_ilt_start + i].size = sc->context[i].size; } ecore_ilt_init_op(sc, INITOP_SET); /* Set NIC mode */ REG_WR(sc, PRS_REG_NIC_MODE, 1); BLOGD(sc, DBG_LOAD, "NIC MODE configured\n"); if (!CHIP_IS_E1x(sc)) { uint32_t pf_conf = IGU_PF_CONF_FUNC_EN; /* Turn on a single ISR mode in IGU if driver is going to use * INT#x or MSI */ if (sc->interrupt_mode != INTR_MODE_MSIX) { pf_conf |= IGU_PF_CONF_SINGLE_ISR_EN; } /* * Timers workaround bug: function init part. * Need to wait 20msec after initializing ILT, * needed to make sure there are no requests in * one of the PXP internal queues with "old" ILT addresses */ DELAY(20000); /* * Master enable - Due to WB DMAE writes performed before this * register is re-initialized as part of the regular function * init */ REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); /* Enable the function in IGU */ REG_WR(sc, IGU_REG_PF_CONFIGURATION, pf_conf); } sc->dmae_ready = 1; ecore_init_block(sc, BLOCK_PGLUE_B, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, func); ecore_init_block(sc, BLOCK_ATC, init_phase); ecore_init_block(sc, BLOCK_DMAE, init_phase); ecore_init_block(sc, BLOCK_NIG, init_phase); ecore_init_block(sc, BLOCK_SRC, init_phase); ecore_init_block(sc, BLOCK_MISC, init_phase); ecore_init_block(sc, BLOCK_TCM, init_phase); ecore_init_block(sc, BLOCK_UCM, init_phase); ecore_init_block(sc, BLOCK_CCM, init_phase); ecore_init_block(sc, BLOCK_XCM, init_phase); ecore_init_block(sc, BLOCK_TSEM, init_phase); ecore_init_block(sc, BLOCK_USEM, init_phase); ecore_init_block(sc, BLOCK_CSEM, init_phase); ecore_init_block(sc, BLOCK_XSEM, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, QM_REG_PF_EN, 1); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, TSEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); REG_WR(sc, USEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); REG_WR(sc, CSEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); REG_WR(sc, XSEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); } ecore_init_block(sc, BLOCK_QM, init_phase); ecore_init_block(sc, BLOCK_TM, init_phase); ecore_init_block(sc, BLOCK_DORQ, init_phase); bxe_iov_init_dq(sc); ecore_init_block(sc, BLOCK_BRB1, init_phase); ecore_init_block(sc, BLOCK_PRS, init_phase); ecore_init_block(sc, BLOCK_TSDM, init_phase); ecore_init_block(sc, BLOCK_CSDM, init_phase); ecore_init_block(sc, BLOCK_USDM, init_phase); ecore_init_block(sc, BLOCK_XSDM, init_phase); ecore_init_block(sc, BLOCK_UPB, init_phase); ecore_init_block(sc, BLOCK_XPB, init_phase); ecore_init_block(sc, BLOCK_PBF, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, PBF_REG_DISABLE_PF, 0); ecore_init_block(sc, BLOCK_CDU, init_phase); ecore_init_block(sc, BLOCK_CFC, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, CFC_REG_WEAK_ENABLE_PF, 1); if (IS_MF(sc)) { REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 1); REG_WR(sc, NIG_REG_LLH0_FUNC_VLAN_ID + port*8, OVLAN(sc)); } ecore_init_block(sc, BLOCK_MISC_AEU, init_phase); /* HC init per function */ if (sc->devinfo.int_block == INT_BLOCK_HC) { if (CHIP_IS_E1H(sc)) { REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0); REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, 0); } ecore_init_block(sc, BLOCK_HC, init_phase); } else { int num_segs, sb_idx, prod_offset; REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, 0); REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, 0); } ecore_init_block(sc, BLOCK_IGU, init_phase); if (!CHIP_IS_E1x(sc)) { int dsb_idx = 0; /** * Producer memory: * E2 mode: address 0-135 match to the mapping memory; * 136 - PF0 default prod; 137 - PF1 default prod; * 138 - PF2 default prod; 139 - PF3 default prod; * 140 - PF0 attn prod; 141 - PF1 attn prod; * 142 - PF2 attn prod; 143 - PF3 attn prod; * 144-147 reserved. * * E1.5 mode - In backward compatible mode; * for non default SB; each even line in the memory * holds the U producer and each odd line hold * the C producer. The first 128 producers are for * NDSB (PF0 - 0-31; PF1 - 32-63 and so on). The last 20 * producers are for the DSB for each PF. * Each PF has five segments: (the order inside each * segment is PF0; PF1; PF2; PF3) - 128-131 U prods; * 132-135 C prods; 136-139 X prods; 140-143 T prods; * 144-147 attn prods; */ /* non-default-status-blocks */ num_segs = CHIP_INT_MODE_IS_BC(sc) ? IGU_BC_NDSB_NUM_SEGS : IGU_NORM_NDSB_NUM_SEGS; for (sb_idx = 0; sb_idx < sc->igu_sb_cnt; sb_idx++) { prod_offset = (sc->igu_base_sb + sb_idx) * num_segs; for (i = 0; i < num_segs; i++) { addr = IGU_REG_PROD_CONS_MEMORY + (prod_offset + i) * 4; REG_WR(sc, addr, 0); } /* send consumer update with value 0 */ bxe_ack_sb(sc, sc->igu_base_sb + sb_idx, USTORM_ID, 0, IGU_INT_NOP, 1); bxe_igu_clear_sb(sc, sc->igu_base_sb + sb_idx); } /* default-status-blocks */ num_segs = CHIP_INT_MODE_IS_BC(sc) ? IGU_BC_DSB_NUM_SEGS : IGU_NORM_DSB_NUM_SEGS; if (CHIP_IS_MODE_4_PORT(sc)) dsb_idx = SC_FUNC(sc); else dsb_idx = SC_VN(sc); prod_offset = (CHIP_INT_MODE_IS_BC(sc) ? IGU_BC_BASE_DSB_PROD + dsb_idx : IGU_NORM_BASE_DSB_PROD + dsb_idx); /* * igu prods come in chunks of E1HVN_MAX (4) - * does not matters what is the current chip mode */ for (i = 0; i < (num_segs * E1HVN_MAX); i += E1HVN_MAX) { addr = IGU_REG_PROD_CONS_MEMORY + (prod_offset + i)*4; REG_WR(sc, addr, 0); } /* send consumer update with 0 */ if (CHIP_INT_MODE_IS_BC(sc)) { bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, CSTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, XSTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, TSTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, ATTENTION_ID, 0, IGU_INT_NOP, 1); } else { bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, ATTENTION_ID, 0, IGU_INT_NOP, 1); } bxe_igu_clear_sb(sc, sc->igu_dsb_id); /* !!! these should become driver const once rf-tool supports split-68 const */ REG_WR(sc, IGU_REG_SB_INT_BEFORE_MASK_LSB, 0); REG_WR(sc, IGU_REG_SB_INT_BEFORE_MASK_MSB, 0); REG_WR(sc, IGU_REG_SB_MASK_LSB, 0); REG_WR(sc, IGU_REG_SB_MASK_MSB, 0); REG_WR(sc, IGU_REG_PBA_STATUS_LSB, 0); REG_WR(sc, IGU_REG_PBA_STATUS_MSB, 0); } } /* Reset PCIE errors for debug */ REG_WR(sc, 0x2114, 0xffffffff); REG_WR(sc, 0x2120, 0xffffffff); if (CHIP_IS_E1x(sc)) { main_mem_size = HC_REG_MAIN_MEMORY_SIZE / 2; /*dwords*/ main_mem_base = HC_REG_MAIN_MEMORY + SC_PORT(sc) * (main_mem_size * 4); main_mem_prty_clr = HC_REG_HC_PRTY_STS_CLR; main_mem_width = 8; val = REG_RD(sc, main_mem_prty_clr); if (val) { BLOGD(sc, DBG_LOAD, "Parity errors in HC block during function init (0x%x)!\n", val); } /* Clear "false" parity errors in MSI-X table */ for (i = main_mem_base; i < main_mem_base + main_mem_size * 4; i += main_mem_width) { bxe_read_dmae(sc, i, main_mem_width / 4); bxe_write_dmae(sc, BXE_SP_MAPPING(sc, wb_data), i, main_mem_width / 4); } /* Clear HC parity attention */ REG_RD(sc, main_mem_prty_clr); } #if 1 /* Enable STORMs SP logging */ REG_WR8(sc, BAR_USTRORM_INTMEM + USTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); REG_WR8(sc, BAR_TSTRORM_INTMEM + TSTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); REG_WR8(sc, BAR_XSTRORM_INTMEM + XSTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); #endif elink_phy_probe(&sc->link_params); return (0); } static void bxe_link_reset(struct bxe_softc *sc) { if (!BXE_NOMCP(sc)) { bxe_acquire_phy_lock(sc); elink_lfa_reset(&sc->link_params, &sc->link_vars); bxe_release_phy_lock(sc); } else { if (!CHIP_REV_IS_SLOW(sc)) { BLOGW(sc, "Bootcode is missing - cannot reset link\n"); } } } static void bxe_reset_port(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t val; ELINK_DEBUG_P0(sc, "bxe_reset_port called\n"); /* reset physical Link */ bxe_link_reset(sc); REG_WR(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port*4, 0); /* Do not rcv packets to BRB */ REG_WR(sc, NIG_REG_LLH0_BRB1_DRV_MASK + port*4, 0x0); /* Do not direct rcv packets that are not for MCP to the BRB */ REG_WR(sc, (port ? NIG_REG_LLH1_BRB1_NOT_MCP : NIG_REG_LLH0_BRB1_NOT_MCP), 0x0); /* Configure AEU */ REG_WR(sc, MISC_REG_AEU_MASK_ATTN_FUNC_0 + port*4, 0); DELAY(100000); /* Check for BRB port occupancy */ val = REG_RD(sc, BRB1_REG_PORT_NUM_OCC_BLOCKS_0 + port*4); if (val) { BLOGD(sc, DBG_LOAD, "BRB1 is not empty, %d blocks are occupied\n", val); } /* TODO: Close Doorbell port? */ } static void bxe_ilt_wr(struct bxe_softc *sc, uint32_t index, bus_addr_t addr) { int reg; uint32_t wb_write[2]; if (CHIP_IS_E1(sc)) { reg = PXP2_REG_RQ_ONCHIP_AT + index*8; } else { reg = PXP2_REG_RQ_ONCHIP_AT_B0 + index*8; } wb_write[0] = ONCHIP_ADDR1(addr); wb_write[1] = ONCHIP_ADDR2(addr); REG_WR_DMAE(sc, reg, wb_write, 2); } static void bxe_clear_func_ilt(struct bxe_softc *sc, uint32_t func) { uint32_t i, base = FUNC_ILT_BASE(func); for (i = base; i < base + ILT_PER_FUNC; i++) { bxe_ilt_wr(sc, i, 0); } } static void bxe_reset_func(struct bxe_softc *sc) { struct bxe_fastpath *fp; int port = SC_PORT(sc); int func = SC_FUNC(sc); int i; /* Disable the function in the FW */ REG_WR8(sc, BAR_XSTRORM_INTMEM + XSTORM_FUNC_EN_OFFSET(func), 0); REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_FUNC_EN_OFFSET(func), 0); REG_WR8(sc, BAR_TSTRORM_INTMEM + TSTORM_FUNC_EN_OFFSET(func), 0); REG_WR8(sc, BAR_USTRORM_INTMEM + USTORM_FUNC_EN_OFFSET(func), 0); /* FP SBs */ FOR_EACH_ETH_QUEUE(sc, i) { fp = &sc->fp[i]; REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_STATE_OFFSET(fp->fw_sb_id), SB_DISABLED); } /* SP SB */ REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_DATA_STATE_OFFSET(func), SB_DISABLED); for (i = 0; i < XSTORM_SPQ_DATA_SIZE / 4; i++) { REG_WR(sc, BAR_XSTRORM_INTMEM + XSTORM_SPQ_DATA_OFFSET(func), 0); } /* Configure IGU */ if (sc->devinfo.int_block == INT_BLOCK_HC) { REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, 0); } else { REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, 0); REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, 0); } if (CNIC_LOADED(sc)) { /* Disable Timer scan */ REG_WR(sc, TM_REG_EN_LINEAR0_TIMER + port*4, 0); /* * Wait for at least 10ms and up to 2 second for the timers * scan to complete */ for (i = 0; i < 200; i++) { DELAY(10000); if (!REG_RD(sc, TM_REG_LIN0_SCAN_ON + port*4)) break; } } /* Clear ILT */ bxe_clear_func_ilt(sc, func); /* * Timers workaround bug for E2: if this is vnic-3, * we need to set the entire ilt range for this timers. */ if (!CHIP_IS_E1x(sc) && SC_VN(sc) == 3) { struct ilt_client_info ilt_cli; /* use dummy TM client */ memset(&ilt_cli, 0, sizeof(struct ilt_client_info)); ilt_cli.start = 0; ilt_cli.end = ILT_NUM_PAGE_ENTRIES - 1; ilt_cli.client_num = ILT_CLIENT_TM; ecore_ilt_boundry_init_op(sc, &ilt_cli, 0, INITOP_CLEAR); } /* this assumes that reset_port() called before reset_func()*/ if (!CHIP_IS_E1x(sc)) { bxe_pf_disable(sc); } sc->dmae_ready = 0; } static int bxe_gunzip_init(struct bxe_softc *sc) { return (0); } static void bxe_gunzip_end(struct bxe_softc *sc) { return; } static int bxe_init_firmware(struct bxe_softc *sc) { if (CHIP_IS_E1(sc)) { ecore_init_e1_firmware(sc); sc->iro_array = e1_iro_arr; } else if (CHIP_IS_E1H(sc)) { ecore_init_e1h_firmware(sc); sc->iro_array = e1h_iro_arr; } else if (!CHIP_IS_E1x(sc)) { ecore_init_e2_firmware(sc); sc->iro_array = e2_iro_arr; } else { BLOGE(sc, "Unsupported chip revision\n"); return (-1); } return (0); } static void bxe_release_firmware(struct bxe_softc *sc) { /* Do nothing */ return; } static int ecore_gunzip(struct bxe_softc *sc, const uint8_t *zbuf, int len) { /* XXX : Implement... */ BLOGD(sc, DBG_LOAD, "ECORE_GUNZIP NOT IMPLEMENTED\n"); return (FALSE); } static void ecore_reg_wr_ind(struct bxe_softc *sc, uint32_t addr, uint32_t val) { bxe_reg_wr_ind(sc, addr, val); } static void ecore_write_dmae_phys_len(struct bxe_softc *sc, bus_addr_t phys_addr, uint32_t addr, uint32_t len) { bxe_write_dmae_phys_len(sc, phys_addr, addr, len); } void ecore_storm_memset_struct(struct bxe_softc *sc, uint32_t addr, size_t size, uint32_t *data) { uint8_t i; for (i = 0; i < size/4; i++) { REG_WR(sc, addr + (i * 4), data[i]); } } /* * character device - ioctl interface definitions */ #include "bxe_dump.h" #include "bxe_ioctl.h" #include static int bxe_eioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td); static struct cdevsw bxe_cdevsw = { .d_version = D_VERSION, .d_ioctl = bxe_eioctl, .d_name = "bxecnic", }; #define BXE_PATH(sc) (CHIP_IS_E1x(sc) ? 0 : (sc->pcie_func & 1)) #define DUMP_ALL_PRESETS 0x1FFF #define DUMP_MAX_PRESETS 13 #define IS_E1_REG(chips) ((chips & DUMP_CHIP_E1) == DUMP_CHIP_E1) #define IS_E1H_REG(chips) ((chips & DUMP_CHIP_E1H) == DUMP_CHIP_E1H) #define IS_E2_REG(chips) ((chips & DUMP_CHIP_E2) == DUMP_CHIP_E2) #define IS_E3A0_REG(chips) ((chips & DUMP_CHIP_E3A0) == DUMP_CHIP_E3A0) #define IS_E3B0_REG(chips) ((chips & DUMP_CHIP_E3B0) == DUMP_CHIP_E3B0) #define IS_REG_IN_PRESET(presets, idx) \ ((presets & (1 << (idx-1))) == (1 << (idx-1))) static int bxe_get_preset_regs_len(struct bxe_softc *sc, uint32_t preset) { if (CHIP_IS_E1(sc)) return dump_num_registers[0][preset-1]; else if (CHIP_IS_E1H(sc)) return dump_num_registers[1][preset-1]; else if (CHIP_IS_E2(sc)) return dump_num_registers[2][preset-1]; else if (CHIP_IS_E3A0(sc)) return dump_num_registers[3][preset-1]; else if (CHIP_IS_E3B0(sc)) return dump_num_registers[4][preset-1]; else return 0; } static int bxe_get_total_regs_len32(struct bxe_softc *sc) { uint32_t preset_idx; int regdump_len32 = 0; /* Calculate the total preset regs length */ for (preset_idx = 1; preset_idx <= DUMP_MAX_PRESETS; preset_idx++) { regdump_len32 += bxe_get_preset_regs_len(sc, preset_idx); } return regdump_len32; } static const uint32_t * __bxe_get_page_addr_ar(struct bxe_softc *sc) { if (CHIP_IS_E2(sc)) return page_vals_e2; else if (CHIP_IS_E3(sc)) return page_vals_e3; else return NULL; } static uint32_t __bxe_get_page_reg_num(struct bxe_softc *sc) { if (CHIP_IS_E2(sc)) return PAGE_MODE_VALUES_E2; else if (CHIP_IS_E3(sc)) return PAGE_MODE_VALUES_E3; else return 0; } static const uint32_t * __bxe_get_page_write_ar(struct bxe_softc *sc) { if (CHIP_IS_E2(sc)) return page_write_regs_e2; else if (CHIP_IS_E3(sc)) return page_write_regs_e3; else return NULL; } static uint32_t __bxe_get_page_write_num(struct bxe_softc *sc) { if (CHIP_IS_E2(sc)) return PAGE_WRITE_REGS_E2; else if (CHIP_IS_E3(sc)) return PAGE_WRITE_REGS_E3; else return 0; } static const struct reg_addr * __bxe_get_page_read_ar(struct bxe_softc *sc) { if (CHIP_IS_E2(sc)) return page_read_regs_e2; else if (CHIP_IS_E3(sc)) return page_read_regs_e3; else return NULL; } static uint32_t __bxe_get_page_read_num(struct bxe_softc *sc) { if (CHIP_IS_E2(sc)) return PAGE_READ_REGS_E2; else if (CHIP_IS_E3(sc)) return PAGE_READ_REGS_E3; else return 0; } static bool bxe_is_reg_in_chip(struct bxe_softc *sc, const struct reg_addr *reg_info) { if (CHIP_IS_E1(sc)) return IS_E1_REG(reg_info->chips); else if (CHIP_IS_E1H(sc)) return IS_E1H_REG(reg_info->chips); else if (CHIP_IS_E2(sc)) return IS_E2_REG(reg_info->chips); else if (CHIP_IS_E3A0(sc)) return IS_E3A0_REG(reg_info->chips); else if (CHIP_IS_E3B0(sc)) return IS_E3B0_REG(reg_info->chips); else return 0; } static bool bxe_is_wreg_in_chip(struct bxe_softc *sc, const struct wreg_addr *wreg_info) { if (CHIP_IS_E1(sc)) return IS_E1_REG(wreg_info->chips); else if (CHIP_IS_E1H(sc)) return IS_E1H_REG(wreg_info->chips); else if (CHIP_IS_E2(sc)) return IS_E2_REG(wreg_info->chips); else if (CHIP_IS_E3A0(sc)) return IS_E3A0_REG(wreg_info->chips); else if (CHIP_IS_E3B0(sc)) return IS_E3B0_REG(wreg_info->chips); else return 0; } /** * bxe_read_pages_regs - read "paged" registers * * @bp device handle * @p output buffer * * Reads "paged" memories: memories that may only be read by first writing to a * specific address ("write address") and then reading from a specific address * ("read address"). There may be more than one write address per "page" and * more than one read address per write address. */ static void bxe_read_pages_regs(struct bxe_softc *sc, uint32_t *p, uint32_t preset) { uint32_t i, j, k, n; /* addresses of the paged registers */ const uint32_t *page_addr = __bxe_get_page_addr_ar(sc); /* number of paged registers */ int num_pages = __bxe_get_page_reg_num(sc); /* write addresses */ const uint32_t *write_addr = __bxe_get_page_write_ar(sc); /* number of write addresses */ int write_num = __bxe_get_page_write_num(sc); /* read addresses info */ const struct reg_addr *read_addr = __bxe_get_page_read_ar(sc); /* number of read addresses */ int read_num = __bxe_get_page_read_num(sc); uint32_t addr, size; for (i = 0; i < num_pages; i++) { for (j = 0; j < write_num; j++) { REG_WR(sc, write_addr[j], page_addr[i]); for (k = 0; k < read_num; k++) { if (IS_REG_IN_PRESET(read_addr[k].presets, preset)) { size = read_addr[k].size; for (n = 0; n < size; n++) { addr = read_addr[k].addr + n*4; *p++ = REG_RD(sc, addr); } } } } } return; } static int bxe_get_preset_regs(struct bxe_softc *sc, uint32_t *p, uint32_t preset) { uint32_t i, j, addr; const struct wreg_addr *wreg_addr_p = NULL; if (CHIP_IS_E1(sc)) wreg_addr_p = &wreg_addr_e1; else if (CHIP_IS_E1H(sc)) wreg_addr_p = &wreg_addr_e1h; else if (CHIP_IS_E2(sc)) wreg_addr_p = &wreg_addr_e2; else if (CHIP_IS_E3A0(sc)) wreg_addr_p = &wreg_addr_e3; else if (CHIP_IS_E3B0(sc)) wreg_addr_p = &wreg_addr_e3b0; else return (-1); /* Read the idle_chk registers */ for (i = 0; i < IDLE_REGS_COUNT; i++) { if (bxe_is_reg_in_chip(sc, &idle_reg_addrs[i]) && IS_REG_IN_PRESET(idle_reg_addrs[i].presets, preset)) { for (j = 0; j < idle_reg_addrs[i].size; j++) *p++ = REG_RD(sc, idle_reg_addrs[i].addr + j*4); } } /* Read the regular registers */ for (i = 0; i < REGS_COUNT; i++) { if (bxe_is_reg_in_chip(sc, ®_addrs[i]) && IS_REG_IN_PRESET(reg_addrs[i].presets, preset)) { for (j = 0; j < reg_addrs[i].size; j++) *p++ = REG_RD(sc, reg_addrs[i].addr + j*4); } } /* Read the CAM registers */ if (bxe_is_wreg_in_chip(sc, wreg_addr_p) && IS_REG_IN_PRESET(wreg_addr_p->presets, preset)) { for (i = 0; i < wreg_addr_p->size; i++) { *p++ = REG_RD(sc, wreg_addr_p->addr + i*4); /* In case of wreg_addr register, read additional registers from read_regs array */ for (j = 0; j < wreg_addr_p->read_regs_count; j++) { addr = *(wreg_addr_p->read_regs); *p++ = REG_RD(sc, addr + j*4); } } } /* Paged registers are supported in E2 & E3 only */ if (CHIP_IS_E2(sc) || CHIP_IS_E3(sc)) { /* Read "paged" registers */ bxe_read_pages_regs(sc, p, preset); } return 0; } int bxe_grc_dump(struct bxe_softc *sc) { int rval = 0; uint32_t preset_idx; uint8_t *buf; uint32_t size; struct dump_header *d_hdr; uint32_t i; uint32_t reg_val; uint32_t reg_addr; uint32_t cmd_offset; struct ecore_ilt *ilt = SC_ILT(sc); struct bxe_fastpath *fp; struct ilt_client_info *ilt_cli; int grc_dump_size; if (sc->grcdump_done || sc->grcdump_started) return (rval); sc->grcdump_started = 1; BLOGI(sc, "Started collecting grcdump\n"); grc_dump_size = (bxe_get_total_regs_len32(sc) * sizeof(uint32_t)) + sizeof(struct dump_header); sc->grc_dump = malloc(grc_dump_size, M_DEVBUF, M_NOWAIT); if (sc->grc_dump == NULL) { BLOGW(sc, "Unable to allocate memory for grcdump collection\n"); return(ENOMEM); } /* Disable parity attentions as long as following dump may * cause false alarms by reading never written registers. We * will re-enable parity attentions right after the dump. */ /* Disable parity on path 0 */ bxe_pretend_func(sc, 0); ecore_disable_blocks_parity(sc); /* Disable parity on path 1 */ bxe_pretend_func(sc, 1); ecore_disable_blocks_parity(sc); /* Return to current function */ bxe_pretend_func(sc, SC_ABS_FUNC(sc)); buf = sc->grc_dump; d_hdr = sc->grc_dump; d_hdr->header_size = (sizeof(struct dump_header) >> 2) - 1; d_hdr->version = BNX2X_DUMP_VERSION; d_hdr->preset = DUMP_ALL_PRESETS; if (CHIP_IS_E1(sc)) { d_hdr->dump_meta_data = DUMP_CHIP_E1; } else if (CHIP_IS_E1H(sc)) { d_hdr->dump_meta_data = DUMP_CHIP_E1H; } else if (CHIP_IS_E2(sc)) { d_hdr->dump_meta_data = DUMP_CHIP_E2 | (BXE_PATH(sc) ? DUMP_PATH_1 : DUMP_PATH_0); } else if (CHIP_IS_E3A0(sc)) { d_hdr->dump_meta_data = DUMP_CHIP_E3A0 | (BXE_PATH(sc) ? DUMP_PATH_1 : DUMP_PATH_0); } else if (CHIP_IS_E3B0(sc)) { d_hdr->dump_meta_data = DUMP_CHIP_E3B0 | (BXE_PATH(sc) ? DUMP_PATH_1 : DUMP_PATH_0); } buf += sizeof(struct dump_header); for (preset_idx = 1; preset_idx <= DUMP_MAX_PRESETS; preset_idx++) { /* Skip presets with IOR */ if ((preset_idx == 2) || (preset_idx == 5) || (preset_idx == 8) || (preset_idx == 11)) continue; rval = bxe_get_preset_regs(sc, (uint32_t *)buf, preset_idx); if (rval) break; size = bxe_get_preset_regs_len(sc, preset_idx) * (sizeof (uint32_t)); buf += size; } bxe_pretend_func(sc, 0); ecore_clear_blocks_parity(sc); ecore_enable_blocks_parity(sc); bxe_pretend_func(sc, 1); ecore_clear_blocks_parity(sc); ecore_enable_blocks_parity(sc); /* Return to current function */ bxe_pretend_func(sc, SC_ABS_FUNC(sc)); if(sc->state == BXE_STATE_OPEN) { if(sc->fw_stats_req != NULL) { BLOGI(sc, "fw stats start_paddr %#jx end_paddr %#jx vaddr %p size 0x%x\n", (uintmax_t)sc->fw_stats_req_mapping, (uintmax_t)sc->fw_stats_data_mapping, sc->fw_stats_req, (sc->fw_stats_req_size + sc->fw_stats_data_size)); } if(sc->def_sb != NULL) { BLOGI(sc, "def_status_block paddr %p vaddr %p size 0x%zx\n", (void *)sc->def_sb_dma.paddr, sc->def_sb, sizeof(struct host_sp_status_block)); } if(sc->eq_dma.vaddr != NULL) { BLOGI(sc, "event_queue paddr %#jx vaddr %p size 0x%x\n", (uintmax_t)sc->eq_dma.paddr, sc->eq_dma.vaddr, BCM_PAGE_SIZE); } if(sc->sp_dma.vaddr != NULL) { BLOGI(sc, "slow path paddr %#jx vaddr %p size 0x%zx\n", (uintmax_t)sc->sp_dma.paddr, sc->sp_dma.vaddr, sizeof(struct bxe_slowpath)); } if(sc->spq_dma.vaddr != NULL) { BLOGI(sc, "slow path queue paddr %#jx vaddr %p size 0x%x\n", (uintmax_t)sc->spq_dma.paddr, sc->spq_dma.vaddr, BCM_PAGE_SIZE); } if(sc->gz_buf_dma.vaddr != NULL) { BLOGI(sc, "fw_buf paddr %#jx vaddr %p size 0x%x\n", (uintmax_t)sc->gz_buf_dma.paddr, sc->gz_buf_dma.vaddr, FW_BUF_SIZE); } for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if(fp->sb_dma.vaddr != NULL && fp->tx_dma.vaddr != NULL && fp->rx_dma.vaddr != NULL && fp->rcq_dma.vaddr != NULL && fp->rx_sge_dma.vaddr != NULL) { BLOGI(sc, "FP status block fp %d paddr %#jx vaddr %p size 0x%zx\n", i, (uintmax_t)fp->sb_dma.paddr, fp->sb_dma.vaddr, sizeof(union bxe_host_hc_status_block)); BLOGI(sc, "TX BD CHAIN fp %d paddr %#jx vaddr %p size 0x%x\n", i, (uintmax_t)fp->tx_dma.paddr, fp->tx_dma.vaddr, (BCM_PAGE_SIZE * TX_BD_NUM_PAGES)); BLOGI(sc, "RX BD CHAIN fp %d paddr %#jx vaddr %p size 0x%x\n", i, (uintmax_t)fp->rx_dma.paddr, fp->rx_dma.vaddr, (BCM_PAGE_SIZE * RX_BD_NUM_PAGES)); BLOGI(sc, "RX RCQ CHAIN fp %d paddr %#jx vaddr %p size 0x%zx\n", i, (uintmax_t)fp->rcq_dma.paddr, fp->rcq_dma.vaddr, (BCM_PAGE_SIZE * RCQ_NUM_PAGES)); BLOGI(sc, "RX SGE CHAIN fp %d paddr %#jx vaddr %p size 0x%x\n", i, (uintmax_t)fp->rx_sge_dma.paddr, fp->rx_sge_dma.vaddr, (BCM_PAGE_SIZE * RX_SGE_NUM_PAGES)); } } if(ilt != NULL ) { ilt_cli = &ilt->clients[1]; if(ilt->lines != NULL) { for (i = ilt_cli->start; i <= ilt_cli->end; i++) { BLOGI(sc, "ECORE_ILT paddr %#jx vaddr %p size 0x%x\n", (uintmax_t)(((struct bxe_dma *)((&ilt->lines[i])->page))->paddr), ((struct bxe_dma *)((&ilt->lines[i])->page))->vaddr, BCM_PAGE_SIZE); } } } cmd_offset = DMAE_REG_CMD_MEM; for (i = 0; i < 224; i++) { reg_addr = (cmd_offset +(i * 4)); reg_val = REG_RD(sc, reg_addr); BLOGI(sc, "DMAE_REG_CMD_MEM i=%d reg_addr 0x%x reg_val 0x%08x\n",i, reg_addr, reg_val); } } BLOGI(sc, "Collection of grcdump done\n"); sc->grcdump_done = 1; return(rval); } static int bxe_add_cdev(struct bxe_softc *sc) { sc->eeprom = malloc(BXE_EEPROM_MAX_DATA_LEN, M_DEVBUF, M_NOWAIT); if (sc->eeprom == NULL) { BLOGW(sc, "Unable to alloc for eeprom size buffer\n"); return (-1); } sc->ioctl_dev = make_dev(&bxe_cdevsw, sc->ifp->if_dunit, UID_ROOT, GID_WHEEL, 0600, "%s", if_name(sc->ifp)); if (sc->ioctl_dev == NULL) { free(sc->eeprom, M_DEVBUF); sc->eeprom = NULL; return (-1); } sc->ioctl_dev->si_drv1 = sc; return (0); } static void bxe_del_cdev(struct bxe_softc *sc) { if (sc->ioctl_dev != NULL) destroy_dev(sc->ioctl_dev); if (sc->eeprom != NULL) { free(sc->eeprom, M_DEVBUF); sc->eeprom = NULL; } sc->ioctl_dev = NULL; return; } static bool bxe_is_nvram_accessible(struct bxe_softc *sc) { if ((if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) == 0) return FALSE; return TRUE; } static int bxe_wr_eeprom(struct bxe_softc *sc, void *data, uint32_t offset, uint32_t len) { int rval = 0; if(!bxe_is_nvram_accessible(sc)) { BLOGW(sc, "Cannot access eeprom when interface is down\n"); return (-EAGAIN); } rval = bxe_nvram_write(sc, offset, (uint8_t *)data, len); return (rval); } static int bxe_rd_eeprom(struct bxe_softc *sc, void *data, uint32_t offset, uint32_t len) { int rval = 0; if(!bxe_is_nvram_accessible(sc)) { BLOGW(sc, "Cannot access eeprom when interface is down\n"); return (-EAGAIN); } rval = bxe_nvram_read(sc, offset, (uint8_t *)data, len); return (rval); } static int bxe_eeprom_rd_wr(struct bxe_softc *sc, bxe_eeprom_t *eeprom) { int rval = 0; switch (eeprom->eeprom_cmd) { case BXE_EEPROM_CMD_SET_EEPROM: rval = copyin(eeprom->eeprom_data, sc->eeprom, eeprom->eeprom_data_len); if (rval) break; rval = bxe_wr_eeprom(sc, sc->eeprom, eeprom->eeprom_offset, eeprom->eeprom_data_len); break; case BXE_EEPROM_CMD_GET_EEPROM: rval = bxe_rd_eeprom(sc, sc->eeprom, eeprom->eeprom_offset, eeprom->eeprom_data_len); if (rval) { break; } rval = copyout(sc->eeprom, eeprom->eeprom_data, eeprom->eeprom_data_len); break; default: rval = EINVAL; break; } if (rval) { BLOGW(sc, "ioctl cmd %d failed rval %d\n", eeprom->eeprom_cmd, rval); } return (rval); } static int bxe_get_settings(struct bxe_softc *sc, bxe_dev_setting_t *dev_p) { uint32_t ext_phy_config; int port = SC_PORT(sc); int cfg_idx = bxe_get_link_cfg_idx(sc); dev_p->supported = sc->port.supported[cfg_idx] | (sc->port.supported[cfg_idx ^ 1] & (ELINK_SUPPORTED_TP | ELINK_SUPPORTED_FIBRE)); dev_p->advertising = sc->port.advertising[cfg_idx]; if(sc->link_params.phy[bxe_get_cur_phy_idx(sc)].media_type == ELINK_ETH_PHY_SFP_1G_FIBER) { dev_p->supported = ~(ELINK_SUPPORTED_10000baseT_Full); dev_p->advertising &= ~(ADVERTISED_10000baseT_Full); } if ((sc->state == BXE_STATE_OPEN) && sc->link_vars.link_up && !(sc->flags & BXE_MF_FUNC_DIS)) { dev_p->duplex = sc->link_vars.duplex; if (IS_MF(sc) && !BXE_NOMCP(sc)) dev_p->speed = bxe_get_mf_speed(sc); else dev_p->speed = sc->link_vars.line_speed; } else { dev_p->duplex = DUPLEX_UNKNOWN; dev_p->speed = SPEED_UNKNOWN; } dev_p->port = bxe_media_detect(sc); ext_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config); if((ext_phy_config & PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK) == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT) dev_p->phy_address = sc->port.phy_addr; else if(((ext_phy_config & PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK) != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE) && ((ext_phy_config & PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK) != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_NOT_CONN)) dev_p->phy_address = ELINK_XGXS_EXT_PHY_ADDR(ext_phy_config); else dev_p->phy_address = 0; if(sc->link_params.req_line_speed[cfg_idx] == ELINK_SPEED_AUTO_NEG) dev_p->autoneg = AUTONEG_ENABLE; else dev_p->autoneg = AUTONEG_DISABLE; return 0; } static int bxe_eioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct bxe_softc *sc; int rval = 0; device_t pci_dev; bxe_grcdump_t *dump = NULL; int grc_dump_size; bxe_drvinfo_t *drv_infop = NULL; bxe_dev_setting_t *dev_p; bxe_dev_setting_t dev_set; bxe_get_regs_t *reg_p; bxe_reg_rdw_t *reg_rdw_p; bxe_pcicfg_rdw_t *cfg_rdw_p; bxe_perm_mac_addr_t *mac_addr_p; if ((sc = (struct bxe_softc *)dev->si_drv1) == NULL) return ENXIO; pci_dev= sc->dev; dump = (bxe_grcdump_t *)data; switch(cmd) { case BXE_GRC_DUMP_SIZE: dump->pci_func = sc->pcie_func; dump->grcdump_size = (bxe_get_total_regs_len32(sc) * sizeof(uint32_t)) + sizeof(struct dump_header); break; case BXE_GRC_DUMP: grc_dump_size = (bxe_get_total_regs_len32(sc) * sizeof(uint32_t)) + sizeof(struct dump_header); if ((!sc->trigger_grcdump) || (dump->grcdump == NULL) || (dump->grcdump_size < grc_dump_size)) { rval = EINVAL; break; } if((sc->trigger_grcdump) && (!sc->grcdump_done) && (!sc->grcdump_started)) { rval = bxe_grc_dump(sc); } if((!rval) && (sc->grcdump_done) && (sc->grcdump_started) && (sc->grc_dump != NULL)) { dump->grcdump_dwords = grc_dump_size >> 2; rval = copyout(sc->grc_dump, dump->grcdump, grc_dump_size); free(sc->grc_dump, M_DEVBUF); sc->grc_dump = NULL; sc->grcdump_started = 0; sc->grcdump_done = 0; } break; case BXE_DRV_INFO: drv_infop = (bxe_drvinfo_t *)data; snprintf(drv_infop->drv_name, BXE_DRV_NAME_LENGTH, "%s", "bxe"); snprintf(drv_infop->drv_version, BXE_DRV_VERSION_LENGTH, "v:%s", BXE_DRIVER_VERSION); snprintf(drv_infop->mfw_version, BXE_MFW_VERSION_LENGTH, "%s", sc->devinfo.bc_ver_str); snprintf(drv_infop->stormfw_version, BXE_STORMFW_VERSION_LENGTH, "%s", sc->fw_ver_str); drv_infop->eeprom_dump_len = sc->devinfo.flash_size; drv_infop->reg_dump_len = (bxe_get_total_regs_len32(sc) * sizeof(uint32_t)) + sizeof(struct dump_header); snprintf(drv_infop->bus_info, BXE_BUS_INFO_LENGTH, "%d:%d:%d", sc->pcie_bus, sc->pcie_device, sc->pcie_func); break; case BXE_DEV_SETTING: dev_p = (bxe_dev_setting_t *)data; bxe_get_settings(sc, &dev_set); dev_p->supported = dev_set.supported; dev_p->advertising = dev_set.advertising; dev_p->speed = dev_set.speed; dev_p->duplex = dev_set.duplex; dev_p->port = dev_set.port; dev_p->phy_address = dev_set.phy_address; dev_p->autoneg = dev_set.autoneg; break; case BXE_GET_REGS: reg_p = (bxe_get_regs_t *)data; grc_dump_size = reg_p->reg_buf_len; if((!sc->grcdump_done) && (!sc->grcdump_started)) { bxe_grc_dump(sc); } if((sc->grcdump_done) && (sc->grcdump_started) && (sc->grc_dump != NULL)) { rval = copyout(sc->grc_dump, reg_p->reg_buf, grc_dump_size); free(sc->grc_dump, M_DEVBUF); sc->grc_dump = NULL; sc->grcdump_started = 0; sc->grcdump_done = 0; } break; case BXE_RDW_REG: reg_rdw_p = (bxe_reg_rdw_t *)data; if((reg_rdw_p->reg_cmd == BXE_READ_REG_CMD) && (reg_rdw_p->reg_access_type == BXE_REG_ACCESS_DIRECT)) reg_rdw_p->reg_val = REG_RD(sc, reg_rdw_p->reg_id); if((reg_rdw_p->reg_cmd == BXE_WRITE_REG_CMD) && (reg_rdw_p->reg_access_type == BXE_REG_ACCESS_DIRECT)) REG_WR(sc, reg_rdw_p->reg_id, reg_rdw_p->reg_val); break; case BXE_RDW_PCICFG: cfg_rdw_p = (bxe_pcicfg_rdw_t *)data; if(cfg_rdw_p->cfg_cmd == BXE_READ_PCICFG) { cfg_rdw_p->cfg_val = pci_read_config(sc->dev, cfg_rdw_p->cfg_id, cfg_rdw_p->cfg_width); } else if(cfg_rdw_p->cfg_cmd == BXE_WRITE_PCICFG) { pci_write_config(sc->dev, cfg_rdw_p->cfg_id, cfg_rdw_p->cfg_val, cfg_rdw_p->cfg_width); } else { BLOGW(sc, "BXE_RDW_PCICFG ioctl wrong cmd passed\n"); } break; case BXE_MAC_ADDR: mac_addr_p = (bxe_perm_mac_addr_t *)data; snprintf(mac_addr_p->mac_addr_str, sizeof(sc->mac_addr_str), "%s", sc->mac_addr_str); break; case BXE_EEPROM: rval = bxe_eeprom_rd_wr(sc, (bxe_eeprom_t *)data); break; default: break; } return (rval); } #ifdef NETDUMP static void -bxe_netdump_init(struct ifnet *ifp, int *nrxr, int *clsize) +bxe_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { struct bxe_softc *sc; sc = if_getsoftc(ifp); BXE_CORE_LOCK(sc); *nrxr = sc->num_queues; + *ncl = NETDUMP_MAX_IN_FLIGHT; *clsize = sc->fp[0].mbuf_alloc_size; BXE_CORE_UNLOCK(sc); } static void bxe_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) { } static int bxe_netdump_transmit(struct ifnet *ifp, struct mbuf *m) { struct bxe_softc *sc; int error; sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || !sc->link_vars.link_up) return (ENOENT); error = bxe_tx_encap(&sc->fp[0], &m); if (error != 0 && m != NULL) m_freem(m); return (error); } static int bxe_netdump_poll(struct ifnet *ifp, int count) { struct bxe_softc *sc; int i; sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || !sc->link_vars.link_up) return (ENOENT); for (i = 0; i < sc->num_queues; i++) (void)bxe_rxeof(sc, &sc->fp[0]); (void)bxe_txeof(sc, &sc->fp[0]); return (0); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/dev/cxgb/cxgb_main.c =================================================================== --- user/markj/netdump/sys/dev/cxgb/cxgb_main.c (revision 332406) +++ user/markj/netdump/sys/dev/cxgb/cxgb_main.c (revision 332407) @@ -1,3654 +1,3655 @@ /************************************************************************** SPDX-License-Identifier: BSD-2-Clause-FreeBSD Copyright (c) 2007-2009, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PRIV_SUPPORTED #include #endif static int cxgb_setup_interrupts(adapter_t *); static void cxgb_teardown_interrupts(adapter_t *); static void cxgb_init(void *); static int cxgb_init_locked(struct port_info *); static int cxgb_uninit_locked(struct port_info *); static int cxgb_uninit_synchronized(struct port_info *); static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgb_media_change(struct ifnet *); static int cxgb_ifm_type(int); static void cxgb_build_medialist(struct port_info *); static void cxgb_media_status(struct ifnet *, struct ifmediareq *); static uint64_t cxgb_get_counter(struct ifnet *, ift_counter); static int setup_sge_qsets(adapter_t *); static void cxgb_async_intr(void *); static void cxgb_tick_handler(void *, int); static void cxgb_tick(void *); static void link_check_callout(void *); static void check_link_status(void *, int); static void setup_rss(adapter_t *sc); static int alloc_filters(struct adapter *); static int setup_hw_filters(struct adapter *); static int set_filter(struct adapter *, int, const struct filter_info *); static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int, unsigned int, u64, u64); static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int, unsigned int, u64, u64); #ifdef TCP_OFFLOAD static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *); #endif /* Attachment glue for the PCI controller end of the device. Each port of * the device is attached separately, as defined later. */ static int cxgb_controller_probe(device_t); static int cxgb_controller_attach(device_t); static int cxgb_controller_detach(device_t); static void cxgb_free(struct adapter *); static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start, unsigned int end); static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf); static int cxgb_get_regs_len(void); static void touch_bars(device_t dev); static void cxgb_update_mac_settings(struct port_info *p); #ifdef TCP_OFFLOAD static int toe_capability(struct port_info *, int); #endif static device_method_t cxgb_controller_methods[] = { DEVMETHOD(device_probe, cxgb_controller_probe), DEVMETHOD(device_attach, cxgb_controller_attach), DEVMETHOD(device_detach, cxgb_controller_detach), DEVMETHOD_END }; static driver_t cxgb_controller_driver = { "cxgbc", cxgb_controller_methods, sizeof(struct adapter) }; static int cxgbc_mod_event(module_t, int, void *); static devclass_t cxgb_controller_devclass; DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, cxgbc_mod_event, 0); MODULE_VERSION(cxgbc, 1); MODULE_DEPEND(cxgbc, firmware, 1, 1, 1); /* * Attachment glue for the ports. Attachment is done directly to the * controller device. */ static int cxgb_port_probe(device_t); static int cxgb_port_attach(device_t); static int cxgb_port_detach(device_t); static device_method_t cxgb_port_methods[] = { DEVMETHOD(device_probe, cxgb_port_probe), DEVMETHOD(device_attach, cxgb_port_attach), DEVMETHOD(device_detach, cxgb_port_detach), { 0, 0 } }; static driver_t cxgb_port_driver = { "cxgb", cxgb_port_methods, 0 }; static d_ioctl_t cxgb_extension_ioctl; static d_open_t cxgb_extension_open; static d_close_t cxgb_extension_close; static struct cdevsw cxgb_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = cxgb_extension_open, .d_close = cxgb_extension_close, .d_ioctl = cxgb_extension_ioctl, .d_name = "cxgb", }; static devclass_t cxgb_port_devclass; DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0); MODULE_VERSION(cxgb, 1); NETDUMP_DEFINE(cxgb); static struct mtx t3_list_lock; static SLIST_HEAD(, adapter) t3_list; #ifdef TCP_OFFLOAD static struct mtx t3_uld_list_lock; static SLIST_HEAD(, uld_info) t3_uld_list; #endif /* * The driver uses the best interrupt scheme available on a platform in the * order MSI-X, MSI, legacy pin interrupts. This parameter determines which * of these schemes the driver may consider as follows: * * msi = 2: choose from among all three options * msi = 1 : only consider MSI and pin interrupts * msi = 0: force pin interrupts */ static int msi_allowed = 2; SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters"); SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0, "MSI-X, MSI, INTx selector"); /* * The driver uses an auto-queue algorithm by default. * To disable it and force a single queue-set per port, use multiq = 0 */ static int multiq = 1; SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0, "use min(ncpus/ports, 8) queue-sets per port"); /* * By default the driver will not update the firmware unless * it was compiled against a newer version * */ static int force_fw_update = 0; SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0, "update firmware even if up to date"); int cxgb_use_16k_clusters = -1; SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN, &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue "); static int nfilters = -1; SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN, &nfilters, 0, "max number of entries in the filter table"); enum { MAX_TXQ_ENTRIES = 16384, MAX_CTRL_TXQ_ENTRIES = 1024, MAX_RSPQ_ENTRIES = 16384, MAX_RX_BUFFERS = 16384, MAX_RX_JUMBO_BUFFERS = 16384, MIN_TXQ_ENTRIES = 4, MIN_CTRL_TXQ_ENTRIES = 4, MIN_RSPQ_ENTRIES = 32, MIN_FL_ENTRIES = 32, MIN_FL_JUMBO_ENTRIES = 32 }; struct filter_info { u32 sip; u32 sip_mask; u32 dip; u16 sport; u16 dport; u32 vlan:12; u32 vlan_prio:3; u32 mac_hit:1; u32 mac_idx:4; u32 mac_vld:1; u32 pkt_type:2; u32 report_filter_id:1; u32 pass:1; u32 rss:1; u32 qset:3; u32 locked:1; u32 valid:1; }; enum { FILTER_NO_VLAN_PRI = 7 }; #define EEPROM_MAGIC 0x38E2F10C #define PORT_MASK ((1 << MAX_NPORTS) - 1) /* Table for probing the cards. The desc field isn't actually used */ struct cxgb_ident { uint16_t vendor; uint16_t device; int index; char *desc; } cxgb_identifiers[] = { {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"}, {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"}, {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"}, {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"}, {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"}, {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"}, {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"}, {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"}, {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"}, {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"}, {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"}, {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"}, {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"}, {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"}, {0, 0, 0, NULL} }; static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset); static __inline char t3rev2char(struct adapter *adapter) { char rev = 'z'; switch(adapter->params.rev) { case T3_REV_A: rev = 'a'; break; case T3_REV_B: case T3_REV_B2: rev = 'b'; break; case T3_REV_C: rev = 'c'; break; } return rev; } static struct cxgb_ident * cxgb_get_ident(device_t dev) { struct cxgb_ident *id; for (id = cxgb_identifiers; id->desc != NULL; id++) { if ((id->vendor == pci_get_vendor(dev)) && (id->device == pci_get_device(dev))) { return (id); } } return (NULL); } static const struct adapter_info * cxgb_get_adapter_info(device_t dev) { struct cxgb_ident *id; const struct adapter_info *ai; id = cxgb_get_ident(dev); if (id == NULL) return (NULL); ai = t3_get_adapter_info(id->index); return (ai); } static int cxgb_controller_probe(device_t dev) { const struct adapter_info *ai; char *ports, buf[80]; int nports; ai = cxgb_get_adapter_info(dev); if (ai == NULL) return (ENXIO); nports = ai->nports0 + ai->nports1; if (nports == 1) ports = "port"; else ports = "ports"; snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } #define FW_FNAME "cxgb_t3fw" #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom" #define TPSRAM_NAME "cxgb_t3%c_protocol_sram" static int upgrade_fw(adapter_t *sc) { const struct firmware *fw; int status; u32 vers; if ((fw = firmware_get(FW_FNAME)) == NULL) { device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME); return (ENOENT); } else device_printf(sc->dev, "installing firmware on card\n"); status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize); if (status != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", status); } else { t3_get_fw_version(sc, &vers); snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d", G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers)); } firmware_put(fw, FIRMWARE_UNLOAD); return (status); } /* * The cxgb_controller_attach function is responsible for the initial * bringup of the device. Its responsibilities include: * * 1. Determine if the device supports MSI or MSI-X. * 2. Allocate bus resources so that we can access the Base Address Register * 3. Create and initialize mutexes for the controller and its control * logic such as SGE and MDIO. * 4. Call hardware specific setup routine for the adapter as a whole. * 5. Allocate the BAR for doing MSI-X. * 6. Setup the line interrupt iff MSI-X is not supported. * 7. Create the driver's taskq. * 8. Start one task queue service thread. * 9. Check if the firmware and SRAM are up-to-date. They will be * auto-updated later (before FULL_INIT_DONE), if required. * 10. Create a child device for each MAC (port) * 11. Initialize T3 private state. * 12. Trigger the LED * 13. Setup offload iff supported. * 14. Reset/restart the tick callout. * 15. Attach sysctls * * NOTE: Any modification or deviation from this list MUST be reflected in * the above comment. Failure to do so will result in problems on various * error conditions including link flapping. */ static int cxgb_controller_attach(device_t dev) { device_t child; const struct adapter_info *ai; struct adapter *sc; int i, error = 0; uint32_t vers; int port_qsets = 1; int msi_needed, reg; char buf[80]; sc = device_get_softc(dev); sc->dev = dev; sc->msi_count = 0; ai = cxgb_get_adapter_info(dev); snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d", device_get_unit(dev)); ADAPTER_LOCK_INIT(sc, sc->lockbuf); snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d", device_get_unit(dev)); snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d", device_get_unit(dev)); snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d", device_get_unit(dev)); MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN); MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF); MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF); mtx_lock(&t3_list_lock); SLIST_INSERT_HEAD(&t3_list, sc, link); mtx_unlock(&t3_list_lock); /* find the PCIe link width and set max read request to 4KB*/ if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { uint16_t lnk; lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2); sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4; if (sc->link_width < 8 && (ai->caps & SUPPORTED_10000baseT_Full)) { device_printf(sc->dev, "PCIe x%d Link, expect reduced performance\n", sc->link_width); } pci_set_max_read_req(dev, 4096); } touch_bars(dev); pci_enable_busmaster(dev); /* * Allocate the registers and make them available to the driver. * The registers that we care about for NIC mode are in BAR 0 */ sc->regs_rid = PCIR_BAR(0); if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE)) == NULL) { device_printf(dev, "Cannot allocate BAR region 0\n"); error = ENXIO; goto out; } sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); for (i = 0; i < MAX_NPORTS; i++) sc->port[i].adapter = sc; if (t3_prep_adapter(sc, ai, 1) < 0) { printf("prep adapter failed\n"); error = ENODEV; goto out; } sc->udbs_rid = PCIR_BAR(2); sc->udbs_res = NULL; if (is_offload(sc) && ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->udbs_rid, RF_ACTIVE)) == NULL)) { device_printf(dev, "Cannot allocate BAR region 1\n"); error = ENXIO; goto out; } /* Allocate the BAR for doing MSI-X. If it succeeds, try to allocate * enough messages for the queue sets. If that fails, try falling * back to MSI. If that fails, then try falling back to the legacy * interrupt pin model. */ sc->msix_regs_rid = 0x20; if ((msi_allowed >= 2) && (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->msix_regs_rid, RF_ACTIVE)) != NULL) { if (multiq) port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus); msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1; if (pci_msix_count(dev) == 0 || (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 || sc->msi_count != msi_needed) { device_printf(dev, "alloc msix failed - " "msi_count=%d, msi_needed=%d, err=%d; " "will try MSI\n", sc->msi_count, msi_needed, error); sc->msi_count = 0; port_qsets = 1; pci_release_msi(dev); bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_regs_rid, sc->msix_regs_res); sc->msix_regs_res = NULL; } else { sc->flags |= USING_MSIX; sc->cxgb_intr = cxgb_async_intr; device_printf(dev, "using MSI-X interrupts (%u vectors)\n", sc->msi_count); } } if ((msi_allowed >= 1) && (sc->msi_count == 0)) { sc->msi_count = 1; if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) { device_printf(dev, "alloc msi failed - " "err=%d; will try INTx\n", error); sc->msi_count = 0; port_qsets = 1; pci_release_msi(dev); } else { sc->flags |= USING_MSI; sc->cxgb_intr = t3_intr_msi; device_printf(dev, "using MSI interrupts\n"); } } if (sc->msi_count == 0) { device_printf(dev, "using line interrupts\n"); sc->cxgb_intr = t3b_intr; } /* Create a private taskqueue thread for handling driver events */ sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq); if (sc->tq == NULL) { device_printf(dev, "failed to allocate controller task queue\n"); goto out; } taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", device_get_nameunit(dev)); TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc); /* Create a periodic callout for checking adapter status */ callout_init(&sc->cxgb_tick_ch, 1); if (t3_check_fw_version(sc) < 0 || force_fw_update) { /* * Warn user that a firmware update will be attempted in init. */ device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n", FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO); sc->flags &= ~FW_UPTODATE; } else { sc->flags |= FW_UPTODATE; } if (t3_check_tpsram_version(sc) < 0) { /* * Warn user that a firmware update will be attempted in init. */ device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n", t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO); sc->flags &= ~TPS_UPTODATE; } else { sc->flags |= TPS_UPTODATE; } /* * Create a child device for each MAC. The ethernet attachment * will be done in these children. */ for (i = 0; i < (sc)->params.nports; i++) { struct port_info *pi; if ((child = device_add_child(dev, "cxgb", -1)) == NULL) { device_printf(dev, "failed to add child port\n"); error = EINVAL; goto out; } pi = &sc->port[i]; pi->adapter = sc; pi->nqsets = port_qsets; pi->first_qset = i*port_qsets; pi->port_id = i; pi->tx_chan = i >= ai->nports0; pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i; sc->rxpkt_map[pi->txpkt_intf] = i; sc->port[i].tx_chan = i >= ai->nports0; sc->portdev[i] = child; device_set_softc(child, pi); } if ((error = bus_generic_attach(dev)) != 0) goto out; /* initialize sge private state */ t3_sge_init_adapter(sc); t3_led_ready(sc); error = t3_get_fw_version(sc, &vers); if (error) goto out; snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d", G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers)); snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s", ai->desc, is_offload(sc) ? "R" : "", sc->params.vpd.ec, sc->params.vpd.sn); device_set_desc_copy(dev, buf); snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x", sc->params.vpd.port_type[0], sc->params.vpd.port_type[1], sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]); device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]); callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc); t3_add_attach_sysctls(sc); #ifdef TCP_OFFLOAD for (i = 0; i < NUM_CPL_HANDLERS; i++) sc->cpl_handler[i] = cpl_not_handled; #endif t3_intr_clear(sc); error = cxgb_setup_interrupts(sc); out: if (error) cxgb_free(sc); return (error); } /* * The cxgb_controller_detach routine is called with the device is * unloaded from the system. */ static int cxgb_controller_detach(device_t dev) { struct adapter *sc; sc = device_get_softc(dev); cxgb_free(sc); return (0); } /* * The cxgb_free() is called by the cxgb_controller_detach() routine * to tear down the structures that were built up in * cxgb_controller_attach(), and should be the final piece of work * done when fully unloading the driver. * * * 1. Shutting down the threads started by the cxgb_controller_attach() * routine. * 2. Stopping the lower level device and all callouts (cxgb_down_locked()). * 3. Detaching all of the port devices created during the * cxgb_controller_attach() routine. * 4. Removing the device children created via cxgb_controller_attach(). * 5. Releasing PCI resources associated with the device. * 6. Turning off the offload support, iff it was turned on. * 7. Destroying the mutexes created in cxgb_controller_attach(). * */ static void cxgb_free(struct adapter *sc) { int i, nqsets = 0; ADAPTER_LOCK(sc); sc->flags |= CXGB_SHUTDOWN; ADAPTER_UNLOCK(sc); /* * Make sure all child devices are gone. */ bus_generic_detach(sc->dev); for (i = 0; i < (sc)->params.nports; i++) { if (sc->portdev[i] && device_delete_child(sc->dev, sc->portdev[i]) != 0) device_printf(sc->dev, "failed to delete child port\n"); nqsets += sc->port[i].nqsets; } /* * At this point, it is as if cxgb_port_detach has run on all ports, and * cxgb_down has run on the adapter. All interrupts have been silenced, * all open devices have been closed. */ KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)", __func__, sc->open_device_map)); for (i = 0; i < sc->params.nports; i++) { KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!", __func__, i)); } /* * Finish off the adapter's callouts. */ callout_drain(&sc->cxgb_tick_ch); callout_drain(&sc->sge_timer_ch); /* * Release resources grabbed under FULL_INIT_DONE by cxgb_up. The * sysctls are cleaned up by the kernel linker. */ if (sc->flags & FULL_INIT_DONE) { t3_free_sge_resources(sc, nqsets); sc->flags &= ~FULL_INIT_DONE; } /* * Release all interrupt resources. */ cxgb_teardown_interrupts(sc); if (sc->flags & (USING_MSI | USING_MSIX)) { device_printf(sc->dev, "releasing msi message(s)\n"); pci_release_msi(sc->dev); } else { device_printf(sc->dev, "no msi message to release\n"); } if (sc->msix_regs_res != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid, sc->msix_regs_res); } /* * Free the adapter's taskqueue. */ if (sc->tq != NULL) { taskqueue_free(sc->tq); sc->tq = NULL; } free(sc->filters, M_DEVBUF); t3_sge_free(sc); if (sc->udbs_res != NULL) bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid, sc->udbs_res); if (sc->regs_res != NULL) bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); MTX_DESTROY(&sc->mdio_lock); MTX_DESTROY(&sc->sge.reg_lock); MTX_DESTROY(&sc->elmer_lock); mtx_lock(&t3_list_lock); SLIST_REMOVE(&t3_list, sc, adapter, link); mtx_unlock(&t3_list_lock); ADAPTER_LOCK_DEINIT(sc); } /** * setup_sge_qsets - configure SGE Tx/Rx/response queues * @sc: the controller softc * * Determines how many sets of SGE queues to use and initializes them. * We support multiple queue sets per port if we have MSI-X, otherwise * just one queue set per port. */ static int setup_sge_qsets(adapter_t *sc) { int i, j, err, irq_idx = 0, qset_idx = 0; u_int ntxq = SGE_TXQ_PER_SET; if ((err = t3_sge_alloc(sc)) != 0) { device_printf(sc->dev, "t3_sge_alloc returned %d\n", err); return (err); } if (sc->params.rev > 0 && !(sc->flags & USING_MSI)) irq_idx = -1; for (i = 0; i < (sc)->params.nports; i++) { struct port_info *pi = &sc->port[i]; for (j = 0; j < pi->nqsets; j++, qset_idx++) { err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports, (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx, &sc->params.sge.qset[qset_idx], ntxq, pi); if (err) { t3_free_sge_resources(sc, qset_idx); device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err); return (err); } } } return (0); } static void cxgb_teardown_interrupts(adapter_t *sc) { int i; for (i = 0; i < SGE_QSETS; i++) { if (sc->msix_intr_tag[i] == NULL) { /* Should have been setup fully or not at all */ KASSERT(sc->msix_irq_res[i] == NULL && sc->msix_irq_rid[i] == 0, ("%s: half-done interrupt (%d).", __func__, i)); continue; } bus_teardown_intr(sc->dev, sc->msix_irq_res[i], sc->msix_intr_tag[i]); bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i], sc->msix_irq_res[i]); sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL; sc->msix_irq_rid[i] = 0; } if (sc->intr_tag) { KASSERT(sc->irq_res != NULL, ("%s: half-done interrupt.", __func__)); bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag); bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); sc->irq_res = sc->intr_tag = NULL; sc->irq_rid = 0; } } static int cxgb_setup_interrupts(adapter_t *sc) { struct resource *res; void *tag; int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX); sc->irq_rid = intr_flag ? 1 : 0; sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE); if (sc->irq_res == NULL) { device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n", intr_flag, sc->irq_rid); err = EINVAL; sc->irq_rid = 0; } else { err = bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE | INTR_TYPE_NET, NULL, sc->cxgb_intr, sc, &sc->intr_tag); if (err) { device_printf(sc->dev, "Cannot set up interrupt (%x, %u, %d)\n", intr_flag, sc->irq_rid, err); bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); sc->irq_res = sc->intr_tag = NULL; sc->irq_rid = 0; } } /* That's all for INTx or MSI */ if (!(intr_flag & USING_MSIX) || err) return (err); bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err"); for (i = 0; i < sc->msi_count - 1; i++) { rid = i + 2; res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (res == NULL) { device_printf(sc->dev, "Cannot allocate interrupt " "for message %d\n", rid); err = EINVAL; break; } err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET, NULL, t3_intr_msix, &sc->sge.qs[i], &tag); if (err) { device_printf(sc->dev, "Cannot set up interrupt " "for message %d (%d)\n", rid, err); bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res); break; } sc->msix_irq_rid[i] = rid; sc->msix_irq_res[i] = res; sc->msix_intr_tag[i] = tag; bus_describe_intr(sc->dev, res, tag, "qs%d", i); } if (err) cxgb_teardown_interrupts(sc); return (err); } static int cxgb_port_probe(device_t dev) { struct port_info *p; char buf[80]; const char *desc; p = device_get_softc(dev); desc = p->phy.desc; snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc); device_set_desc_copy(dev, buf); return (0); } static int cxgb_makedev(struct port_info *pi) { pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit, UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp)); if (pi->port_cdev == NULL) return (ENOMEM); pi->port_cdev->si_drv1 = (void *)pi; return (0); } #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6) #define CXGB_CAP_ENABLE CXGB_CAP static int cxgb_port_attach(device_t dev) { struct port_info *p; struct ifnet *ifp; int err; struct adapter *sc; p = device_get_softc(dev); sc = p->adapter; snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d", device_get_unit(device_get_parent(dev)), p->port_id); PORT_LOCK_INIT(p, p->lockbuf); callout_init(&p->link_check_ch, 1); TASK_INIT(&p->link_check_task, 0, check_link_status, p); /* Allocate an ifnet object and set it up */ ifp = p->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_init = cxgb_init; ifp->if_softc = p; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = cxgb_ioctl; ifp->if_transmit = cxgb_transmit; ifp->if_qflush = cxgb_qflush; ifp->if_get_counter = cxgb_get_counter; ifp->if_capabilities = CXGB_CAP; #ifdef TCP_OFFLOAD if (is_offload(sc)) ifp->if_capabilities |= IFCAP_TOE4; #endif ifp->if_capenable = CXGB_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6; /* * Disable TSO on 4-port - it isn't supported by the firmware. */ if (sc->params.nports > 2) { ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO); ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO); ifp->if_hwassist &= ~CSUM_TSO; } ether_ifattach(ifp, p->hw_addr); /* Attach driver netdump methods. */ NETDUMP_SET(ifp, cxgb); #ifdef DEFAULT_JUMBO if (sc->params.nports <= 2) ifp->if_mtu = ETHERMTU_JUMBO; #endif if ((err = cxgb_makedev(p)) != 0) { printf("makedev failed %d\n", err); return (err); } /* Create a list of media supported by this port */ ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change, cxgb_media_status); cxgb_build_medialist(p); t3_sge_init_port(p); return (err); } /* * cxgb_port_detach() is called via the device_detach methods when * cxgb_free() calls the bus_generic_detach. It is responsible for * removing the device from the view of the kernel, i.e. from all * interfaces lists etc. This routine is only called when the driver is * being unloaded, not when the link goes down. */ static int cxgb_port_detach(device_t dev) { struct port_info *p; struct adapter *sc; int i; p = device_get_softc(dev); sc = p->adapter; /* Tell cxgb_ioctl and if_init that the port is going away */ ADAPTER_LOCK(sc); SET_DOOMED(p); wakeup(&sc->flags); while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0); SET_BUSY(sc); ADAPTER_UNLOCK(sc); if (p->port_cdev != NULL) destroy_dev(p->port_cdev); cxgb_uninit_synchronized(p); ether_ifdetach(p->ifp); for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) { struct sge_qset *qs = &sc->sge.qs[i]; struct sge_txq *txq = &qs->txq[TXQ_ETH]; callout_drain(&txq->txq_watchdog); callout_drain(&txq->txq_timer); } PORT_LOCK_DEINIT(p); if_free(p->ifp); p->ifp = NULL; ADAPTER_LOCK(sc); CLR_BUSY(sc); wakeup_one(&sc->flags); ADAPTER_UNLOCK(sc); return (0); } void t3_fatal_err(struct adapter *sc) { u_int fw_status[4]; if (sc->flags & FULL_INIT_DONE) { t3_sge_stop(sc); t3_write_reg(sc, A_XGM_TX_CTRL, 0); t3_write_reg(sc, A_XGM_RX_CTRL, 0); t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0); t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0); t3_intr_disable(sc); } device_printf(sc->dev,"encountered fatal error, operation suspended\n"); if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status)) device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n", fw_status[0], fw_status[1], fw_status[2], fw_status[3]); } int t3_os_find_pci_capability(adapter_t *sc, int cap) { device_t dev; struct pci_devinfo *dinfo; pcicfgregs *cfg; uint32_t status; uint8_t ptr; dev = sc->dev; dinfo = device_get_ivars(dev); cfg = &dinfo->cfg; status = pci_read_config(dev, PCIR_STATUS, 2); if (!(status & PCIM_STATUS_CAPPRESENT)) return (0); switch (cfg->hdrtype & PCIM_HDRTYPE) { case 0: case 1: ptr = PCIR_CAP_PTR; break; case 2: ptr = PCIR_CAP_PTR_2; break; default: return (0); break; } ptr = pci_read_config(dev, ptr, 1); while (ptr != 0) { if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap) return (ptr); ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1); } return (0); } int t3_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t3_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } /** * t3_os_link_changed - handle link status changes * @sc: the adapter associated with the link change * @port_id: the port index whose link status has changed * @link_status: the new status of the link * @speed: the new speed setting * @duplex: the new duplex setting * @fc: the new flow-control setting * * This is the OS-dependent handler for link status changes. The OS * neutral handler takes care of most of the processing for these events, * then calls this handler for any OS-specific processing. */ void t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed, int duplex, int fc, int mac_was_reset) { struct port_info *pi = &adapter->port[port_id]; struct ifnet *ifp = pi->ifp; /* no race with detach, so ifp should always be good */ KASSERT(ifp, ("%s: if detached.", __func__)); /* Reapply mac settings if they were lost due to a reset */ if (mac_was_reset) { PORT_LOCK(pi); cxgb_update_mac_settings(pi); PORT_UNLOCK(pi); } if (link_status) { ifp->if_baudrate = IF_Mbps(speed); if_link_state_change(ifp, LINK_STATE_UP); } else if_link_state_change(ifp, LINK_STATE_DOWN); } /** * t3_os_phymod_changed - handle PHY module changes * @phy: the PHY reporting the module change * @mod_type: new module type * * This is the OS-dependent handler for PHY module changes. It is * invoked when a PHY module is removed or inserted for any OS-specific * processing. */ void t3_os_phymod_changed(struct adapter *adap, int port_id) { static const char *mod_str[] = { NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown" }; struct port_info *pi = &adap->port[port_id]; int mod = pi->phy.modtype; if (mod != pi->media.ifm_cur->ifm_data) cxgb_build_medialist(pi); if (mod == phy_modtype_none) if_printf(pi->ifp, "PHY module unplugged\n"); else { KASSERT(mod < ARRAY_SIZE(mod_str), ("invalid PHY module type %d", mod)); if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]); } } void t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[]) { /* * The ifnet might not be allocated before this gets called, * as this is called early on in attach by t3_prep_adapter * save the address off in the port structure */ if (cxgb_debug) printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":"); bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN); } /* * Programs the XGMAC based on the settings in the ifnet. These settings * include MTU, MAC address, mcast addresses, etc. */ static void cxgb_update_mac_settings(struct port_info *p) { struct ifnet *ifp = p->ifp; struct t3_rx_mode rm; struct cmac *mac = &p->mac; int mtu, hwtagging; PORT_LOCK_ASSERT_OWNED(p); bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN); mtu = ifp->if_mtu; if (ifp->if_capenable & IFCAP_VLAN_MTU) mtu += ETHER_VLAN_ENCAP_LEN; hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0; t3_mac_set_mtu(mac, mtu); t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging); t3_mac_set_address(mac, 0, p->hw_addr); t3_init_rx_mode(&rm, p); t3_mac_set_rx_mode(mac, &rm); } static int await_mgmt_replies(struct adapter *adap, unsigned long init_cnt, unsigned long n) { int attempts = 5; while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) { if (!--attempts) return (ETIMEDOUT); t3_os_sleep(10); } return 0; } static int init_tp_parity(struct adapter *adap) { int i; struct mbuf *m; struct cpl_set_tcb_field *greq; unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts; t3_tp_set_offload_mode(adap, 1); for (i = 0; i < 16; i++) { struct cpl_smt_write_req *req; m = m_gethdr(M_WAITOK, MT_DATA); req = mtod(m, struct cpl_smt_write_req *); m->m_len = m->m_pkthdr.len = sizeof(*req); memset(req, 0, sizeof(*req)); req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i)); req->iff = i; t3_mgmt_tx(adap, m); } for (i = 0; i < 2048; i++) { struct cpl_l2t_write_req *req; m = m_gethdr(M_WAITOK, MT_DATA); req = mtod(m, struct cpl_l2t_write_req *); m->m_len = m->m_pkthdr.len = sizeof(*req); memset(req, 0, sizeof(*req)); req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i)); req->params = htonl(V_L2T_W_IDX(i)); t3_mgmt_tx(adap, m); } for (i = 0; i < 2048; i++) { struct cpl_rte_write_req *req; m = m_gethdr(M_WAITOK, MT_DATA); req = mtod(m, struct cpl_rte_write_req *); m->m_len = m->m_pkthdr.len = sizeof(*req); memset(req, 0, sizeof(*req)); req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i)); req->l2t_idx = htonl(V_L2T_W_IDX(i)); t3_mgmt_tx(adap, m); } m = m_gethdr(M_WAITOK, MT_DATA); greq = mtod(m, struct cpl_set_tcb_field *); m->m_len = m->m_pkthdr.len = sizeof(*greq); memset(greq, 0, sizeof(*greq)); greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0)); greq->mask = htobe64(1); t3_mgmt_tx(adap, m); i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1); t3_tp_set_offload_mode(adap, 0); return (i); } /** * setup_rss - configure Receive Side Steering (per-queue connection demux) * @adap: the adapter * * Sets up RSS to distribute packets to multiple receive queues. We * configure the RSS CPU lookup table to distribute to the number of HW * receive queues, and the response queue lookup table to narrow that * down to the response queues actually configured for each port. * We always configure the RSS mapping for two ports since the mapping * table has plenty of entries. */ static void setup_rss(adapter_t *adap) { int i; u_int nq[2]; uint8_t cpus[SGE_QSETS + 1]; uint16_t rspq_map[RSS_TABLE_SIZE]; for (i = 0; i < SGE_QSETS; ++i) cpus[i] = i; cpus[SGE_QSETS] = 0xff; nq[0] = nq[1] = 0; for_each_port(adap, i) { const struct port_info *pi = adap2pinfo(adap, i); nq[pi->tx_chan] += pi->nqsets; } for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) { rspq_map[i] = nq[0] ? i % nq[0] : 0; rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0; } /* Calculate the reverse RSS map table */ for (i = 0; i < SGE_QSETS; ++i) adap->rrss_map[i] = 0xff; for (i = 0; i < RSS_TABLE_SIZE; ++i) if (adap->rrss_map[rspq_map[i]] == 0xff) adap->rrss_map[rspq_map[i]] = i; t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN | F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN | F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ, cpus, rspq_map); } static void send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo, int hi, int port) { struct mbuf *m; struct mngt_pktsched_wr *req; m = m_gethdr(M_NOWAIT, MT_DATA); if (m) { req = mtod(m, struct mngt_pktsched_wr *); req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT)); req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET; req->sched = sched; req->idx = qidx; req->min = lo; req->max = hi; req->binding = port; m->m_len = m->m_pkthdr.len = sizeof(*req); t3_mgmt_tx(adap, m); } } static void bind_qsets(adapter_t *sc) { int i, j; for (i = 0; i < (sc)->params.nports; ++i) { const struct port_info *pi = adap2pinfo(sc, i); for (j = 0; j < pi->nqsets; ++j) { send_pktsched_cmd(sc, 1, pi->first_qset + j, -1, -1, pi->tx_chan); } } } static void update_tpeeprom(struct adapter *adap) { const struct firmware *tpeeprom; uint32_t version; unsigned int major, minor; int ret, len; char rev, name[32]; t3_seeprom_read(adap, TP_SRAM_OFFSET, &version); major = G_TP_VERSION_MAJOR(version); minor = G_TP_VERSION_MINOR(version); if (major == TP_VERSION_MAJOR && minor == TP_VERSION_MINOR) return; rev = t3rev2char(adap); snprintf(name, sizeof(name), TPEEPROM_NAME, rev); tpeeprom = firmware_get(name); if (tpeeprom == NULL) { device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n", name); return; } len = tpeeprom->datasize - 4; ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize); if (ret) goto release_tpeeprom; if (len != TP_SRAM_LEN) { device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", name, len, TP_SRAM_LEN); return; } ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize, TP_SRAM_OFFSET); if (!ret) { device_printf(adap->dev, "Protocol SRAM image updated in EEPROM to %d.%d.%d\n", TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO); } else device_printf(adap->dev, "Protocol SRAM image update in EEPROM failed\n"); release_tpeeprom: firmware_put(tpeeprom, FIRMWARE_UNLOAD); return; } static int update_tpsram(struct adapter *adap) { const struct firmware *tpsram; int ret; char rev, name[32]; rev = t3rev2char(adap); snprintf(name, sizeof(name), TPSRAM_NAME, rev); update_tpeeprom(adap); tpsram = firmware_get(name); if (tpsram == NULL){ device_printf(adap->dev, "could not load TP SRAM\n"); return (EINVAL); } else device_printf(adap->dev, "updating TP SRAM\n"); ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize); if (ret) goto release_tpsram; ret = t3_set_proto_sram(adap, tpsram->data); if (ret) device_printf(adap->dev, "loading protocol SRAM failed\n"); release_tpsram: firmware_put(tpsram, FIRMWARE_UNLOAD); return ret; } /** * cxgb_up - enable the adapter * @adap: adapter being enabled * * Called when the first port is enabled, this function performs the * actions necessary to make an adapter operational, such as completing * the initialization of HW modules, and enabling interrupts. */ static int cxgb_up(struct adapter *sc) { int err = 0; unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS; KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)", __func__, sc->open_device_map)); if ((sc->flags & FULL_INIT_DONE) == 0) { ADAPTER_LOCK_ASSERT_NOTOWNED(sc); if ((sc->flags & FW_UPTODATE) == 0) if ((err = upgrade_fw(sc))) goto out; if ((sc->flags & TPS_UPTODATE) == 0) if ((err = update_tpsram(sc))) goto out; if (is_offload(sc) && nfilters != 0) { sc->params.mc5.nservers = 0; if (nfilters < 0) sc->params.mc5.nfilters = mxf; else sc->params.mc5.nfilters = min(nfilters, mxf); } err = t3_init_hw(sc, 0); if (err) goto out; t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT); t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12)); err = setup_sge_qsets(sc); if (err) goto out; alloc_filters(sc); setup_rss(sc); t3_add_configured_sysctls(sc); sc->flags |= FULL_INIT_DONE; } t3_intr_clear(sc); t3_sge_start(sc); t3_intr_enable(sc); if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) && is_offload(sc) && init_tp_parity(sc) == 0) sc->flags |= TP_PARITY_INIT; if (sc->flags & TP_PARITY_INIT) { t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR); t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff); } if (!(sc->flags & QUEUES_BOUND)) { bind_qsets(sc); setup_hw_filters(sc); sc->flags |= QUEUES_BOUND; } t3_sge_reset_adapter(sc); out: return (err); } /* * Called when the last open device is closed. Does NOT undo all of cxgb_up's * work. Specifically, the resources grabbed under FULL_INIT_DONE are released * during controller_detach, not here. */ static void cxgb_down(struct adapter *sc) { t3_sge_stop(sc); t3_intr_disable(sc); } /* * if_init for cxgb ports. */ static void cxgb_init(void *arg) { struct port_info *p = arg; struct adapter *sc = p->adapter; ADAPTER_LOCK(sc); cxgb_init_locked(p); /* releases adapter lock */ ADAPTER_LOCK_ASSERT_NOTOWNED(sc); } static int cxgb_init_locked(struct port_info *p) { struct adapter *sc = p->adapter; struct ifnet *ifp = p->ifp; struct cmac *mac = &p->mac; int i, rc = 0, may_sleep = 0, gave_up_lock = 0; ADAPTER_LOCK_ASSERT_OWNED(sc); while (!IS_DOOMED(p) && IS_BUSY(sc)) { gave_up_lock = 1; if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) { rc = EINTR; goto done; } } if (IS_DOOMED(p)) { rc = ENXIO; goto done; } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); /* * The code that runs during one-time adapter initialization can sleep * so it's important not to hold any locks across it. */ may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1; if (may_sleep) { SET_BUSY(sc); gave_up_lock = 1; ADAPTER_UNLOCK(sc); } if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0)) goto done; PORT_LOCK(p); if (isset(&sc->open_device_map, p->port_id) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(p); goto done; } t3_port_intr_enable(sc, p->port_id); if (!mac->multiport) t3_mac_init(mac); cxgb_update_mac_settings(p); t3_link_start(&p->phy, mac, &p->link_config); t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; PORT_UNLOCK(p); for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) { struct sge_qset *qs = &sc->sge.qs[i]; struct sge_txq *txq = &qs->txq[TXQ_ETH]; callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs, txq->txq_watchdog.c_cpu); } /* all ok */ setbit(&sc->open_device_map, p->port_id); callout_reset(&p->link_check_ch, p->phy.caps & SUPPORTED_LINK_IRQ ? hz * 3 : hz / 4, link_check_callout, p); done: if (may_sleep) { ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); } if (gave_up_lock) wakeup_one(&sc->flags); ADAPTER_UNLOCK(sc); return (rc); } static int cxgb_uninit_locked(struct port_info *p) { struct adapter *sc = p->adapter; int rc; ADAPTER_LOCK_ASSERT_OWNED(sc); while (!IS_DOOMED(p) && IS_BUSY(sc)) { if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) { rc = EINTR; goto done; } } if (IS_DOOMED(p)) { rc = ENXIO; goto done; } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); ADAPTER_UNLOCK(sc); rc = cxgb_uninit_synchronized(p); ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup_one(&sc->flags); done: ADAPTER_UNLOCK(sc); return (rc); } /* * Called on "ifconfig down", and from port_detach */ static int cxgb_uninit_synchronized(struct port_info *pi) { struct adapter *sc = pi->adapter; struct ifnet *ifp = pi->ifp; /* * taskqueue_drain may cause a deadlock if the adapter lock is held. */ ADAPTER_LOCK_ASSERT_NOTOWNED(sc); /* * Clear this port's bit from the open device map, and then drain all * the tasks that can access/manipulate this port's port_info or ifp. * We disable this port's interrupts here and so the slow/ext * interrupt tasks won't be enqueued. The tick task will continue to * be enqueued every second but the runs after this drain will not see * this port in the open device map. * * A well behaved task must take open_device_map into account and ignore * ports that are not open. */ clrbit(&sc->open_device_map, pi->port_id); t3_port_intr_disable(sc, pi->port_id); taskqueue_drain(sc->tq, &sc->slow_intr_task); taskqueue_drain(sc->tq, &sc->tick_task); callout_drain(&pi->link_check_ch); taskqueue_drain(sc->tq, &pi->link_check_task); PORT_LOCK(pi); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* disable pause frames */ t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0); /* Reset RX FIFO HWM */ t3_set_reg_field(sc, A_XGM_RXFIFO_CFG + pi->mac.offset, V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0); DELAY(100 * 1000); /* Wait for TXFIFO empty */ t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset, F_TXFIFO_EMPTY, 1, 20, 5); DELAY(100 * 1000); t3_mac_disable(&pi->mac, MAC_DIRECTION_RX); pi->phy.ops->power_down(&pi->phy, 1); PORT_UNLOCK(pi); pi->link_config.link_ok = 0; t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0); if (sc->open_device_map == 0) cxgb_down(pi->adapter); return (0); } /* * Mark lro enabled or disabled in all qsets for this port */ static int cxgb_set_lro(struct port_info *p, int enabled) { int i; struct adapter *adp = p->adapter; struct sge_qset *q; for (i = 0; i < p->nqsets; i++) { q = &adp->sge.qs[p->first_qset + i]; q->lro.enabled = (enabled != 0); } return (0); } static int cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data) { struct port_info *p = ifp->if_softc; struct adapter *sc = p->adapter; struct ifreq *ifr = (struct ifreq *)data; int flags, error = 0, mtu; uint32_t mask; switch (command) { case SIOCSIFMTU: ADAPTER_LOCK(sc); error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (error) { fail: ADAPTER_UNLOCK(sc); return (error); } mtu = ifr->ifr_mtu; if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) { error = EINVAL; } else { ifp->if_mtu = mtu; PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } ADAPTER_UNLOCK(sc); break; case SIOCSIFFLAGS: ADAPTER_LOCK(sc); if (IS_DOOMED(p)) { error = ENXIO; goto fail; } if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = p->if_flags; if (((ifp->if_flags ^ flags) & IFF_PROMISC) || ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) { if (IS_BUSY(sc)) { error = EBUSY; goto fail; } PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } ADAPTER_UNLOCK(sc); } else error = cxgb_init_locked(p); p->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) error = cxgb_uninit_locked(p); else ADAPTER_UNLOCK(sc); ADAPTER_LOCK_ASSERT_NOTOWNED(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: ADAPTER_LOCK(sc); error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (error) goto fail; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } ADAPTER_UNLOCK(sc); break; case SIOCSIFCAP: ADAPTER_LOCK(sc); error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (error) goto fail; mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; /* * Note that we leave CSUM_TSO alone (it is always set). The * kernel takes both IFCAP_TSOx and CSUM_TSO into account before * sending a TSO request our way, so it's sufficient to toggle * IFCAP_TSOx only. */ if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); error = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO4; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); error = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO6; } if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; /* Safe to do this even if cxgb_up not called yet */ cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO); } #ifdef TCP_OFFLOAD if (mask & IFCAP_TOE4) { int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4; error = toe_capability(p, enable); if (error == 0) ifp->if_capenable ^= mask; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif ADAPTER_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &p->media, command); break; default: error = ether_ioctl(ifp, command, data); } return (error); } static int cxgb_media_change(struct ifnet *ifp) { return (EOPNOTSUPP); } /* * Translates phy->modtype to the correct Ethernet media subtype. */ static int cxgb_ifm_type(int mod) { switch (mod) { case phy_modtype_sr: return (IFM_10G_SR); case phy_modtype_lr: return (IFM_10G_LR); case phy_modtype_lrm: return (IFM_10G_LRM); case phy_modtype_twinax: return (IFM_10G_TWINAX); case phy_modtype_twinax_long: return (IFM_10G_TWINAX_LONG); case phy_modtype_none: return (IFM_NONE); case phy_modtype_unknown: return (IFM_UNKNOWN); } KASSERT(0, ("%s: modtype %d unknown", __func__, mod)); return (IFM_UNKNOWN); } /* * Rebuilds the ifmedia list for this port, and sets the current media. */ static void cxgb_build_medialist(struct port_info *p) { struct cphy *phy = &p->phy; struct ifmedia *media = &p->media; int mod = phy->modtype; int m = IFM_ETHER | IFM_FDX; PORT_LOCK(p); ifmedia_removeall(media); if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) { /* Copper (RJ45) */ if (phy->caps & SUPPORTED_10000baseT_Full) ifmedia_add(media, m | IFM_10G_T, mod, NULL); if (phy->caps & SUPPORTED_1000baseT_Full) ifmedia_add(media, m | IFM_1000_T, mod, NULL); if (phy->caps & SUPPORTED_100baseT_Full) ifmedia_add(media, m | IFM_100_TX, mod, NULL); if (phy->caps & SUPPORTED_10baseT_Full) ifmedia_add(media, m | IFM_10_T, mod, NULL); ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); } else if (phy->caps & SUPPORTED_TP) { /* Copper (CX4) */ KASSERT(phy->caps & SUPPORTED_10000baseT_Full, ("%s: unexpected cap 0x%x", __func__, phy->caps)); ifmedia_add(media, m | IFM_10G_CX4, mod, NULL); ifmedia_set(media, m | IFM_10G_CX4); } else if (phy->caps & SUPPORTED_FIBRE && phy->caps & SUPPORTED_10000baseT_Full) { /* 10G optical (but includes SFP+ twinax) */ m |= cxgb_ifm_type(mod); if (IFM_SUBTYPE(m) == IFM_NONE) m &= ~IFM_FDX; ifmedia_add(media, m, mod, NULL); ifmedia_set(media, m); } else if (phy->caps & SUPPORTED_FIBRE && phy->caps & SUPPORTED_1000baseT_Full) { /* 1G optical */ /* XXX: Lie and claim to be SX, could actually be any 1G-X */ ifmedia_add(media, m | IFM_1000_SX, mod, NULL); ifmedia_set(media, m | IFM_1000_SX); } else { KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__, phy->caps)); } PORT_UNLOCK(p); } static void cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct port_info *p = ifp->if_softc; struct ifmedia_entry *cur = p->media.ifm_cur; int speed = p->link_config.speed; if (cur->ifm_data != p->phy.modtype) { cxgb_build_medialist(p); cur = p->media.ifm_cur; } ifmr->ifm_status = IFM_AVALID; if (!p->link_config.link_ok) return; ifmr->ifm_status |= IFM_ACTIVE; /* * active and current will differ iff current media is autoselect. That * can happen only for copper RJ45. */ if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO) return; KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg, ("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps)); ifmr->ifm_active = IFM_ETHER | IFM_FDX; if (speed == SPEED_10000) ifmr->ifm_active |= IFM_10G_T; else if (speed == SPEED_1000) ifmr->ifm_active |= IFM_1000_T; else if (speed == SPEED_100) ifmr->ifm_active |= IFM_100_TX; else if (speed == SPEED_10) ifmr->ifm_active |= IFM_10_T; else KASSERT(0, ("%s: link up but speed unknown (%u)", __func__, speed)); } static uint64_t cxgb_get_counter(struct ifnet *ifp, ift_counter c) { struct port_info *pi = ifp->if_softc; struct adapter *sc = pi->adapter; struct cmac *mac = &pi->mac; struct mac_stats *mstats = &mac->stats; cxgb_refresh_stats(pi); switch (c) { case IFCOUNTER_IPACKETS: return (mstats->rx_frames); case IFCOUNTER_IERRORS: return (mstats->rx_jabber + mstats->rx_data_errs + mstats->rx_sequence_errs + mstats->rx_runt + mstats->rx_too_long + mstats->rx_mac_internal_errs + mstats->rx_short + mstats->rx_fcs_errs); case IFCOUNTER_OPACKETS: return (mstats->tx_frames); case IFCOUNTER_OERRORS: return (mstats->tx_excess_collisions + mstats->tx_underrun + mstats->tx_len_errs + mstats->tx_mac_internal_errs + mstats->tx_excess_deferral + mstats->tx_fcs_errs); case IFCOUNTER_COLLISIONS: return (mstats->tx_total_collisions); case IFCOUNTER_IBYTES: return (mstats->rx_octets); case IFCOUNTER_OBYTES: return (mstats->tx_octets); case IFCOUNTER_IMCASTS: return (mstats->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (mstats->tx_mcast_frames); case IFCOUNTER_IQDROPS: return (mstats->rx_cong_drops); case IFCOUNTER_OQDROPS: { int i; uint64_t drops; drops = 0; if (sc->flags & FULL_INIT_DONE) { for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++) drops += sc->sge.qs[i].txq[TXQ_ETH].txq_mr->br_drops; } return (drops); } default: return (if_get_counter_default(ifp, c)); } } static void cxgb_async_intr(void *data) { adapter_t *sc = data; t3_write_reg(sc, A_PL_INT_ENABLE0, 0); (void) t3_read_reg(sc, A_PL_INT_ENABLE0); taskqueue_enqueue(sc->tq, &sc->slow_intr_task); } static void link_check_callout(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; if (!isset(&sc->open_device_map, pi->port_id)) return; taskqueue_enqueue(sc->tq, &pi->link_check_task); } static void check_link_status(void *arg, int pending) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; if (!isset(&sc->open_device_map, pi->port_id)) return; t3_link_changed(sc, pi->port_id); if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ) || pi->link_config.link_ok == 0) callout_reset(&pi->link_check_ch, hz, link_check_callout, pi); } void t3_os_link_intr(struct port_info *pi) { /* * Schedule a link check in the near future. If the link is flapping * rapidly we'll keep resetting the callout and delaying the check until * things stabilize a bit. */ callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi); } static void check_t3b2_mac(struct adapter *sc) { int i; if (sc->flags & CXGB_SHUTDOWN) return; for_each_port(sc, i) { struct port_info *p = &sc->port[i]; int status; #ifdef INVARIANTS struct ifnet *ifp = p->ifp; #endif if (!isset(&sc->open_device_map, p->port_id) || p->link_fault || !p->link_config.link_ok) continue; KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING, ("%s: state mismatch (drv_flags %x, device_map %x)", __func__, ifp->if_drv_flags, sc->open_device_map)); PORT_LOCK(p); status = t3b2_mac_watchdog_task(&p->mac); if (status == 1) p->mac.stats.num_toggled++; else if (status == 2) { struct cmac *mac = &p->mac; cxgb_update_mac_settings(p); t3_link_start(&p->phy, mac, &p->link_config); t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); t3_port_intr_enable(sc, p->port_id); p->mac.stats.num_resets++; } PORT_UNLOCK(p); } } static void cxgb_tick(void *arg) { adapter_t *sc = (adapter_t *)arg; if (sc->flags & CXGB_SHUTDOWN) return; taskqueue_enqueue(sc->tq, &sc->tick_task); callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc); } void cxgb_refresh_stats(struct port_info *pi) { struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &pi->last_refreshed, <)) return; PORT_LOCK(pi); t3_mac_update_stats(&pi->mac); PORT_UNLOCK(pi); getmicrotime(&pi->last_refreshed); } static void cxgb_tick_handler(void *arg, int count) { adapter_t *sc = (adapter_t *)arg; const struct adapter_params *p = &sc->params; int i; uint32_t cause, reset; if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE)) return; if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) check_t3b2_mac(sc); cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY); if (cause) { struct sge_qset *qs = &sc->sge.qs[0]; uint32_t mask, v; v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00; mask = 1; for (i = 0; i < SGE_QSETS; i++) { if (v & mask) qs[i].rspq.starved++; mask <<= 1; } mask <<= SGE_QSETS; /* skip RSPQXDISABLED */ for (i = 0; i < SGE_QSETS * 2; i++) { if (v & mask) { qs[i / 2].fl[i % 2].empty++; } mask <<= 1; } /* clear */ t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v); t3_write_reg(sc, A_SG_INT_CAUSE, cause); } for (i = 0; i < sc->params.nports; i++) { struct port_info *pi = &sc->port[i]; struct cmac *mac = &pi->mac; if (!isset(&sc->open_device_map, pi->port_id)) continue; cxgb_refresh_stats(pi); if (mac->multiport) continue; /* Count rx fifo overflows, once per second */ cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset); reset = 0; if (cause & F_RXFIFO_OVERFLOW) { mac->stats.rx_fifo_ovfl++; reset |= F_RXFIFO_OVERFLOW; } t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset); } } static void touch_bars(device_t dev) { /* * Don't enable yet */ #if !defined(__LP64__) && 0 u32 v; pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v); pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v); pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v); pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v); pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v); pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v); #endif } static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset) { uint8_t *buf; int err = 0; u32 aligned_offset, aligned_len, *p; struct adapter *adapter = pi->adapter; aligned_offset = offset & ~3; aligned_len = (len + (offset & 3) + 3) & ~3; if (aligned_offset != offset || aligned_len != len) { buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO); if (!buf) return (ENOMEM); err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf); if (!err && aligned_len > 4) err = t3_seeprom_read(adapter, aligned_offset + aligned_len - 4, (u32 *)&buf[aligned_len - 4]); if (err) goto out; memcpy(buf + (offset & 3), data, len); } else buf = (uint8_t *)(uintptr_t)data; err = t3_seeprom_wp(adapter, 0); if (err) goto out; for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) { err = t3_seeprom_write(adapter, aligned_offset, *p); aligned_offset += 4; } if (!err) err = t3_seeprom_wp(adapter, 1); out: if (buf != data) free(buf, M_DEVBUF); return err; } static int in_range(int val, int lo, int hi) { return val < 0 || (val <= hi && val >= lo); } static int cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td) { return (0); } static int cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td) { return (0); } static int cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int mmd, error = 0; struct port_info *pi = dev->si_drv1; adapter_t *sc = pi->adapter; #ifdef PRIV_SUPPORTED if (priv_check(td, PRIV_DRIVER)) { if (cxgb_debug) printf("user does not have access to privileged ioctls\n"); return (EPERM); } #else if (suser(td)) { if (cxgb_debug) printf("user does not have access to privileged ioctls\n"); return (EPERM); } #endif switch (cmd) { case CHELSIO_GET_MIIREG: { uint32_t val; struct cphy *phy = &pi->phy; struct ch_mii_data *mid = (struct ch_mii_data *)data; if (!phy->mdio_read) return (EOPNOTSUPP); if (is_10G(sc)) { mmd = mid->phy_id >> 8; if (!mmd) mmd = MDIO_DEV_PCS; else if (mmd > MDIO_DEV_VEND2) return (EINVAL); error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd, mid->reg_num, &val); } else error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0, mid->reg_num & 0x1f, &val); if (error == 0) mid->val_out = val; break; } case CHELSIO_SET_MIIREG: { struct cphy *phy = &pi->phy; struct ch_mii_data *mid = (struct ch_mii_data *)data; if (!phy->mdio_write) return (EOPNOTSUPP); if (is_10G(sc)) { mmd = mid->phy_id >> 8; if (!mmd) mmd = MDIO_DEV_PCS; else if (mmd > MDIO_DEV_VEND2) return (EINVAL); error = phy->mdio_write(sc, mid->phy_id & 0x1f, mmd, mid->reg_num, mid->val_in); } else error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0, mid->reg_num & 0x1f, mid->val_in); break; } case CHELSIO_SETREG: { struct ch_reg *edata = (struct ch_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); t3_write_reg(sc, edata->addr, edata->val); break; } case CHELSIO_GETREG: { struct ch_reg *edata = (struct ch_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); edata->val = t3_read_reg(sc, edata->addr); break; } case CHELSIO_GET_SGE_CONTEXT: { struct ch_cntxt *ecntxt = (struct ch_cntxt *)data; mtx_lock_spin(&sc->sge.reg_lock); switch (ecntxt->cntxt_type) { case CNTXT_TYPE_EGRESS: error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id, ecntxt->data); break; case CNTXT_TYPE_FL: error = -t3_sge_read_fl(sc, ecntxt->cntxt_id, ecntxt->data); break; case CNTXT_TYPE_RSP: error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id, ecntxt->data); break; case CNTXT_TYPE_CQ: error = -t3_sge_read_cq(sc, ecntxt->cntxt_id, ecntxt->data); break; default: error = EINVAL; break; } mtx_unlock_spin(&sc->sge.reg_lock); break; } case CHELSIO_GET_SGE_DESC: { struct ch_desc *edesc = (struct ch_desc *)data; int ret; if (edesc->queue_num >= SGE_QSETS * 6) return (EINVAL); ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6], edesc->queue_num % 6, edesc->idx, edesc->data); if (ret < 0) return (EINVAL); edesc->size = ret; break; } case CHELSIO_GET_QSET_PARAMS: { struct qset_params *q; struct ch_qset_params *t = (struct ch_qset_params *)data; int q1 = pi->first_qset; int nqsets = pi->nqsets; int i; if (t->qset_idx >= nqsets) return EINVAL; i = q1 + t->qset_idx; q = &sc->params.sge.qset[i]; t->rspq_size = q->rspq_size; t->txq_size[0] = q->txq_size[0]; t->txq_size[1] = q->txq_size[1]; t->txq_size[2] = q->txq_size[2]; t->fl_size[0] = q->fl_size; t->fl_size[1] = q->jumbo_size; t->polling = q->polling; t->lro = q->lro; t->intr_lat = q->coalesce_usecs; t->cong_thres = q->cong_thres; t->qnum = i; if ((sc->flags & FULL_INIT_DONE) == 0) t->vector = 0; else if (sc->flags & USING_MSIX) t->vector = rman_get_start(sc->msix_irq_res[i]); else t->vector = rman_get_start(sc->irq_res); break; } case CHELSIO_GET_QSET_NUM: { struct ch_reg *edata = (struct ch_reg *)data; edata->val = pi->nqsets; break; } case CHELSIO_LOAD_FW: { uint8_t *fw_data; uint32_t vers; struct ch_mem_range *t = (struct ch_mem_range *)data; /* * You're allowed to load a firmware only before FULL_INIT_DONE * * FW_UPTODATE is also set so the rest of the initialization * will not overwrite what was loaded here. This gives you the * flexibility to load any firmware (and maybe shoot yourself in * the foot). */ ADAPTER_LOCK(sc); if (sc->open_device_map || sc->flags & FULL_INIT_DONE) { ADAPTER_UNLOCK(sc); return (EBUSY); } fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT); if (!fw_data) error = ENOMEM; else error = copyin(t->buf, fw_data, t->len); if (!error) error = -t3_load_fw(sc, fw_data, t->len); if (t3_get_fw_version(sc, &vers) == 0) { snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d", G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers)); } if (!error) sc->flags |= FW_UPTODATE; free(fw_data, M_DEVBUF); ADAPTER_UNLOCK(sc); break; } case CHELSIO_LOAD_BOOT: { uint8_t *boot_data; struct ch_mem_range *t = (struct ch_mem_range *)data; boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT); if (!boot_data) return ENOMEM; error = copyin(t->buf, boot_data, t->len); if (!error) error = -t3_load_boot(sc, boot_data, t->len); free(boot_data, M_DEVBUF); break; } case CHELSIO_GET_PM: { struct ch_pm *m = (struct ch_pm *)data; struct tp_params *p = &sc->params.tp; if (!is_offload(sc)) return (EOPNOTSUPP); m->tx_pg_sz = p->tx_pg_size; m->tx_num_pg = p->tx_num_pgs; m->rx_pg_sz = p->rx_pg_size; m->rx_num_pg = p->rx_num_pgs; m->pm_total = p->pmtx_size + p->chan_rx_size * p->nchan; break; } case CHELSIO_SET_PM: { struct ch_pm *m = (struct ch_pm *)data; struct tp_params *p = &sc->params.tp; if (!is_offload(sc)) return (EOPNOTSUPP); if (sc->flags & FULL_INIT_DONE) return (EBUSY); if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) || !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1))) return (EINVAL); /* not power of 2 */ if (!(m->rx_pg_sz & 0x14000)) return (EINVAL); /* not 16KB or 64KB */ if (!(m->tx_pg_sz & 0x1554000)) return (EINVAL); if (m->tx_num_pg == -1) m->tx_num_pg = p->tx_num_pgs; if (m->rx_num_pg == -1) m->rx_num_pg = p->rx_num_pgs; if (m->tx_num_pg % 24 || m->rx_num_pg % 24) return (EINVAL); if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size || m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size) return (EINVAL); p->rx_pg_size = m->rx_pg_sz; p->tx_pg_size = m->tx_pg_sz; p->rx_num_pgs = m->rx_num_pg; p->tx_num_pgs = m->tx_num_pg; break; } case CHELSIO_SETMTUTAB: { struct ch_mtus *m = (struct ch_mtus *)data; int i; if (!is_offload(sc)) return (EOPNOTSUPP); if (offload_running(sc)) return (EBUSY); if (m->nmtus != NMTUS) return (EINVAL); if (m->mtus[0] < 81) /* accommodate SACK */ return (EINVAL); /* * MTUs must be in ascending order */ for (i = 1; i < NMTUS; ++i) if (m->mtus[i] < m->mtus[i - 1]) return (EINVAL); memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus)); break; } case CHELSIO_GETMTUTAB: { struct ch_mtus *m = (struct ch_mtus *)data; if (!is_offload(sc)) return (EOPNOTSUPP); memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus)); m->nmtus = NMTUS; break; } case CHELSIO_GET_MEM: { struct ch_mem_range *t = (struct ch_mem_range *)data; struct mc7 *mem; uint8_t *useraddr; u64 buf[32]; /* * Use these to avoid modifying len/addr in the return * struct */ uint32_t len = t->len, addr = t->addr; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EIO); /* need the memory controllers */ if ((addr & 0x7) || (len & 0x7)) return (EINVAL); if (t->mem_id == MEM_CM) mem = &sc->cm; else if (t->mem_id == MEM_PMRX) mem = &sc->pmrx; else if (t->mem_id == MEM_PMTX) mem = &sc->pmtx; else return (EINVAL); /* * Version scheme: * bits 0..9: chip version * bits 10..15: chip revision */ t->version = 3 | (sc->params.rev << 10); /* * Read 256 bytes at a time as len can be large and we don't * want to use huge intermediate buffers. */ useraddr = (uint8_t *)t->buf; while (len) { unsigned int chunk = min(len, sizeof(buf)); error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf); if (error) return (-error); if (copyout(buf, useraddr, chunk)) return (EFAULT); useraddr += chunk; addr += chunk; len -= chunk; } break; } case CHELSIO_READ_TCAM_WORD: { struct ch_tcam_word *t = (struct ch_tcam_word *)data; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EIO); /* need MC5 */ return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf); break; } case CHELSIO_SET_TRACE_FILTER: { struct ch_trace *t = (struct ch_trace *)data; const struct trace_params *tp; tp = (const struct trace_params *)&t->sip; if (t->config_tx) t3_config_trace_filter(sc, tp, 0, t->invert_match, t->trace_tx); if (t->config_rx) t3_config_trace_filter(sc, tp, 1, t->invert_match, t->trace_rx); break; } case CHELSIO_SET_PKTSCHED: { struct ch_pktsched_params *p = (struct ch_pktsched_params *)data; if (sc->open_device_map == 0) return (EAGAIN); send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max, p->binding); break; } case CHELSIO_IFCONF_GETREGS: { struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data; int reglen = cxgb_get_regs_len(); uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT); if (buf == NULL) { return (ENOMEM); } if (regs->len > reglen) regs->len = reglen; else if (regs->len < reglen) error = ENOBUFS; if (!error) { cxgb_get_regs(sc, regs, buf); error = copyout(buf, regs->data, reglen); } free(buf, M_DEVBUF); break; } case CHELSIO_SET_HW_SCHED: { struct ch_hw_sched *t = (struct ch_hw_sched *)data; unsigned int ticks_per_usec = core_ticks_per_usec(sc); if ((sc->flags & FULL_INIT_DONE) == 0) return (EAGAIN); /* need TP to be initialized */ if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) || !in_range(t->channel, 0, 1) || !in_range(t->kbps, 0, 10000000) || !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) || !in_range(t->flow_ipg, 0, dack_ticks_to_usec(sc, 0x7ff))) return (EINVAL); if (t->kbps >= 0) { error = t3_config_sched(sc, t->kbps, t->sched); if (error < 0) return (-error); } if (t->class_ipg >= 0) t3_set_sched_ipg(sc, t->sched, t->class_ipg); if (t->flow_ipg >= 0) { t->flow_ipg *= 1000; /* us -> ns */ t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1); } if (t->mode >= 0) { int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched); t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP, bit, t->mode ? bit : 0); } if (t->channel >= 0) t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP, 1 << t->sched, t->channel << t->sched); break; } case CHELSIO_GET_EEPROM: { int i; struct ch_eeprom *e = (struct ch_eeprom *)data; uint8_t *buf; if (e->offset & 3 || e->offset >= EEPROMSIZE || e->len > EEPROMSIZE || e->offset + e->len > EEPROMSIZE) { return (EINVAL); } buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT); if (buf == NULL) { return (ENOMEM); } e->magic = EEPROM_MAGIC; for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4) error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]); if (!error) error = copyout(buf + e->offset, e->data, e->len); free(buf, M_DEVBUF); break; } case CHELSIO_CLEAR_STATS: { if (!(sc->flags & FULL_INIT_DONE)) return EAGAIN; PORT_LOCK(pi); t3_mac_update_stats(&pi->mac); memset(&pi->mac.stats, 0, sizeof(pi->mac.stats)); PORT_UNLOCK(pi); break; } case CHELSIO_GET_UP_LA: { struct ch_up_la *la = (struct ch_up_la *)data; uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT); if (buf == NULL) { return (ENOMEM); } if (la->bufsize < LA_BUFSIZE) error = ENOBUFS; if (!error) error = -t3_get_up_la(sc, &la->stopped, &la->idx, &la->bufsize, buf); if (!error) error = copyout(buf, la->data, la->bufsize); free(buf, M_DEVBUF); break; } case CHELSIO_GET_UP_IOQS: { struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data; uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT); uint32_t *v; if (buf == NULL) { return (ENOMEM); } if (ioqs->bufsize < IOQS_BUFSIZE) error = ENOBUFS; if (!error) error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf); if (!error) { v = (uint32_t *)buf; ioqs->ioq_rx_enable = *v++; ioqs->ioq_tx_enable = *v++; ioqs->ioq_rx_status = *v++; ioqs->ioq_tx_status = *v++; error = copyout(v, ioqs->data, ioqs->bufsize); } free(buf, M_DEVBUF); break; } case CHELSIO_SET_FILTER: { struct ch_filter *f = (struct ch_filter *)data; struct filter_info *p; unsigned int nfilters = sc->params.mc5.nfilters; if (!is_offload(sc)) return (EOPNOTSUPP); /* No TCAM */ if (!(sc->flags & FULL_INIT_DONE)) return (EAGAIN); /* mc5 not setup yet */ if (nfilters == 0) return (EBUSY); /* TOE will use TCAM */ /* sanity checks */ if (f->filter_id >= nfilters || (f->val.dip && f->mask.dip != 0xffffffff) || (f->val.sport && f->mask.sport != 0xffff) || (f->val.dport && f->mask.dport != 0xffff) || (f->val.vlan && f->mask.vlan != 0xfff) || (f->val.vlan_prio && f->mask.vlan_prio != FILTER_NO_VLAN_PRI) || (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) || f->qset >= SGE_QSETS || sc->rrss_map[f->qset] >= RSS_TABLE_SIZE) return (EINVAL); /* Was allocated with M_WAITOK */ KASSERT(sc->filters, ("filter table NULL\n")); p = &sc->filters[f->filter_id]; if (p->locked) return (EPERM); bzero(p, sizeof(*p)); p->sip = f->val.sip; p->sip_mask = f->mask.sip; p->dip = f->val.dip; p->sport = f->val.sport; p->dport = f->val.dport; p->vlan = f->mask.vlan ? f->val.vlan : 0xfff; p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) : FILTER_NO_VLAN_PRI; p->mac_hit = f->mac_hit; p->mac_vld = f->mac_addr_idx != 0xffff; p->mac_idx = f->mac_addr_idx; p->pkt_type = f->proto; p->report_filter_id = f->want_filter_id; p->pass = f->pass; p->rss = f->rss; p->qset = f->qset; error = set_filter(sc, f->filter_id, p); if (error == 0) p->valid = 1; break; } case CHELSIO_DEL_FILTER: { struct ch_filter *f = (struct ch_filter *)data; struct filter_info *p; unsigned int nfilters = sc->params.mc5.nfilters; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EAGAIN); if (nfilters == 0 || sc->filters == NULL) return (EINVAL); if (f->filter_id >= nfilters) return (EINVAL); p = &sc->filters[f->filter_id]; if (p->locked) return (EPERM); if (!p->valid) return (EFAULT); /* Read "Bad address" as "Bad index" */ bzero(p, sizeof(*p)); p->sip = p->sip_mask = 0xffffffff; p->vlan = 0xfff; p->vlan_prio = FILTER_NO_VLAN_PRI; p->pkt_type = 1; error = set_filter(sc, f->filter_id, p); break; } case CHELSIO_GET_FILTER: { struct ch_filter *f = (struct ch_filter *)data; struct filter_info *p; unsigned int i, nfilters = sc->params.mc5.nfilters; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EAGAIN); if (nfilters == 0 || sc->filters == NULL) return (EINVAL); i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1; for (; i < nfilters; i++) { p = &sc->filters[i]; if (!p->valid) continue; bzero(f, sizeof(*f)); f->filter_id = i; f->val.sip = p->sip; f->mask.sip = p->sip_mask; f->val.dip = p->dip; f->mask.dip = p->dip ? 0xffffffff : 0; f->val.sport = p->sport; f->mask.sport = p->sport ? 0xffff : 0; f->val.dport = p->dport; f->mask.dport = p->dport ? 0xffff : 0; f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan; f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff; f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ? 0 : p->vlan_prio; f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ? 0 : FILTER_NO_VLAN_PRI; f->mac_hit = p->mac_hit; f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff; f->proto = p->pkt_type; f->want_filter_id = p->report_filter_id; f->pass = p->pass; f->rss = p->rss; f->qset = p->qset; break; } if (i == nfilters) f->filter_id = 0xffffffff; break; } default: return (EOPNOTSUPP); break; } return (error); } static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start, unsigned int end) { uint32_t *p = (uint32_t *)(buf + start); for ( ; start <= end; start += sizeof(uint32_t)) *p++ = t3_read_reg(ap, start); } #define T3_REGMAP_SIZE (3 * 1024) static int cxgb_get_regs_len(void) { return T3_REGMAP_SIZE; } static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf) { /* * Version scheme: * bits 0..9: chip version * bits 10..15: chip revision * bit 31: set for PCIe cards */ regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31); /* * We skip the MAC statistics registers because they are clear-on-read. * Also reading multi-register stats would need to synchronize with the * periodic mac stats accumulation. Hard to justify the complexity. */ memset(buf, 0, cxgb_get_regs_len()); reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN); reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT); reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE); reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA); reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3); reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0, XGM_REG(A_XGM_SERDES_STAT3, 1)); reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1), XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1)); } static int alloc_filters(struct adapter *sc) { struct filter_info *p; unsigned int nfilters = sc->params.mc5.nfilters; if (nfilters == 0) return (0); p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO); sc->filters = p; p = &sc->filters[nfilters - 1]; p->vlan = 0xfff; p->vlan_prio = FILTER_NO_VLAN_PRI; p->pass = p->rss = p->valid = p->locked = 1; return (0); } static int setup_hw_filters(struct adapter *sc) { int i, rc; unsigned int nfilters = sc->params.mc5.nfilters; if (!sc->filters) return (0); t3_enable_filters(sc); for (i = rc = 0; i < nfilters && !rc; i++) { if (sc->filters[i].locked) rc = set_filter(sc, i, &sc->filters[i]); } return (rc); } static int set_filter(struct adapter *sc, int id, const struct filter_info *f) { int len; struct mbuf *m; struct ulp_txpkt *txpkt; struct work_request_hdr *wr; struct cpl_pass_open_req *oreq; struct cpl_set_tcb_field *sreq; len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq); KASSERT(len <= MHLEN, ("filter request too big for an mbuf")); id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes - sc->params.mc5.nfilters; m = m_gethdr(M_WAITOK, MT_DATA); m->m_len = m->m_pkthdr.len = len; bzero(mtod(m, char *), len); wr = mtod(m, struct work_request_hdr *); wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); oreq = (struct cpl_pass_open_req *)(wr + 1); txpkt = (struct ulp_txpkt *)oreq; txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8)); OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id)); oreq->local_port = htons(f->dport); oreq->peer_port = htons(f->sport); oreq->local_ip = htonl(f->dip); oreq->peer_ip = htonl(f->sip); oreq->peer_netmask = htonl(f->sip_mask); oreq->opt0h = 0; oreq->opt0l = htonl(F_NO_OFFLOAD); oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) | V_CONN_POLICY(CPL_CONN_POLICY_FILTER) | V_VLAN_PRI(f->vlan_prio >> 1) | V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) | V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) | V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4))); sreq = (struct cpl_set_tcb_field *)(oreq + 1); set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL, (f->report_filter_id << 15) | (1 << 23) | ((u64)f->pass << 35) | ((u64)!f->rss << 36)); set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1); t3_mgmt_tx(sc, m); if (f->pass && !f->rss) { len = sizeof(*sreq); m = m_gethdr(M_WAITOK, MT_DATA); m->m_len = m->m_pkthdr.len = len; bzero(mtod(m, char *), len); sreq = mtod(m, struct cpl_set_tcb_field *); sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); mk_set_tcb_field(sreq, id, 25, 0x3f80000, (u64)sc->rrss_map[f->qset] << 19); t3_mgmt_tx(sc, m); } return 0; } static inline void mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid, unsigned int word, u64 mask, u64 val) { OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); req->reply = V_NO_REPLY(1); req->cpu_idx = 0; req->word = htons(word); req->mask = htobe64(mask); req->val = htobe64(val); } static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, unsigned int word, u64 mask, u64 val) { struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); mk_set_tcb_field(req, tid, word, mask, val); } void t3_iterate(void (*func)(struct adapter *, void *), void *arg) { struct adapter *sc; mtx_lock(&t3_list_lock); SLIST_FOREACH(sc, &t3_list, link) { /* * func should not make any assumptions about what state sc is * in - the only guarantee is that sc->sc_lock is a valid lock. */ func(sc, arg); } mtx_unlock(&t3_list_lock); } #ifdef TCP_OFFLOAD static int toe_capability(struct port_info *pi, int enable) { int rc; struct adapter *sc = pi->adapter; ADAPTER_LOCK_ASSERT_OWNED(sc); if (!is_offload(sc)) return (ENODEV); if (enable) { if (!(sc->flags & FULL_INIT_DONE)) { log(LOG_WARNING, "You must enable a cxgb interface first\n"); return (EAGAIN); } if (isset(&sc->offload_map, pi->port_id)) return (0); if (!(sc->flags & TOM_INIT_DONE)) { rc = t3_activate_uld(sc, ULD_TOM); if (rc == EAGAIN) { log(LOG_WARNING, "You must kldload t3_tom.ko before trying " "to enable TOE on a cxgb interface.\n"); } if (rc != 0) return (rc); KASSERT(sc->tom_softc != NULL, ("%s: TOM activated but softc NULL", __func__)); KASSERT(sc->flags & TOM_INIT_DONE, ("%s: TOM activated but flag not set", __func__)); } setbit(&sc->offload_map, pi->port_id); /* * XXX: Temporary code to allow iWARP to be enabled when TOE is * enabled on any port. Need to figure out how to enable, * disable, load, and unload iWARP cleanly. */ if (!isset(&sc->offload_map, MAX_NPORTS) && t3_activate_uld(sc, ULD_IWARP) == 0) setbit(&sc->offload_map, MAX_NPORTS); } else { if (!isset(&sc->offload_map, pi->port_id)) return (0); KASSERT(sc->flags & TOM_INIT_DONE, ("%s: TOM never initialized?", __func__)); clrbit(&sc->offload_map, pi->port_id); } return (0); } /* * Add an upper layer driver to the global list. */ int t3_register_uld(struct uld_info *ui) { int rc = 0; struct uld_info *u; mtx_lock(&t3_uld_list_lock); SLIST_FOREACH(u, &t3_uld_list, link) { if (u->uld_id == ui->uld_id) { rc = EEXIST; goto done; } } SLIST_INSERT_HEAD(&t3_uld_list, ui, link); ui->refcount = 0; done: mtx_unlock(&t3_uld_list_lock); return (rc); } int t3_unregister_uld(struct uld_info *ui) { int rc = EINVAL; struct uld_info *u; mtx_lock(&t3_uld_list_lock); SLIST_FOREACH(u, &t3_uld_list, link) { if (u == ui) { if (ui->refcount > 0) { rc = EBUSY; goto done; } SLIST_REMOVE(&t3_uld_list, ui, uld_info, link); rc = 0; goto done; } } done: mtx_unlock(&t3_uld_list_lock); return (rc); } int t3_activate_uld(struct adapter *sc, int id) { int rc = EAGAIN; struct uld_info *ui; mtx_lock(&t3_uld_list_lock); SLIST_FOREACH(ui, &t3_uld_list, link) { if (ui->uld_id == id) { rc = ui->activate(sc); if (rc == 0) ui->refcount++; goto done; } } done: mtx_unlock(&t3_uld_list_lock); return (rc); } int t3_deactivate_uld(struct adapter *sc, int id) { int rc = EINVAL; struct uld_info *ui; mtx_lock(&t3_uld_list_lock); SLIST_FOREACH(ui, &t3_uld_list, link) { if (ui->uld_id == id) { rc = ui->deactivate(sc); if (rc == 0) ui->refcount--; goto done; } } done: mtx_unlock(&t3_uld_list_lock); return (rc); } static int cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused, struct mbuf *m) { m_freem(m); return (EDOOFUS); } int t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h) { uintptr_t *loc, new; if (opcode >= NUM_CPL_HANDLERS) return (EINVAL); new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled; loc = (uintptr_t *) &sc->cpl_handler[opcode]; atomic_store_rel_ptr(loc, new); return (0); } #endif static int cxgbc_mod_event(module_t mod, int cmd, void *arg) { int rc = 0; switch (cmd) { case MOD_LOAD: mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF); SLIST_INIT(&t3_list); #ifdef TCP_OFFLOAD mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF); SLIST_INIT(&t3_uld_list); #endif break; case MOD_UNLOAD: #ifdef TCP_OFFLOAD mtx_lock(&t3_uld_list_lock); if (!SLIST_EMPTY(&t3_uld_list)) { rc = EBUSY; mtx_unlock(&t3_uld_list_lock); break; } mtx_unlock(&t3_uld_list_lock); mtx_destroy(&t3_uld_list_lock); #endif mtx_lock(&t3_list_lock); if (!SLIST_EMPTY(&t3_list)) { rc = EBUSY; mtx_unlock(&t3_list_lock); break; } mtx_unlock(&t3_list_lock); mtx_destroy(&t3_list_lock); break; } return (rc); } #ifdef NETDUMP static void -cxgb_netdump_init(struct ifnet *ifp, int *nrxr, int *clsize) +cxgb_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { struct port_info *pi; adapter_t *adap; pi = if_getsoftc(ifp); adap = pi->adapter; ADAPTER_LOCK(adap); *nrxr = SGE_QSETS; + *ncl = adap->sge.qs[0].fl[1].size; *clsize = adap->sge.qs[0].fl[1].buf_size; ADAPTER_UNLOCK(adap); } static void cxgb_netdump_event(struct ifnet *ifp, enum netdump_ev event) { struct port_info *pi; struct sge_qset *qs; int i; pi = if_getsoftc(ifp); if (event == NETDUMP_START) for (i = 0; i < SGE_QSETS; i++) { qs = &pi->adapter->sge.qs[i]; /* Need to reinit after netdump_mbuf_dump(). */ qs->fl[0].zone = zone_pack; qs->fl[1].zone = zone_clust; qs->lro.enabled = 0; } } static int cxgb_netdump_transmit(struct ifnet *ifp, struct mbuf *m) { struct port_info *pi; struct sge_qset *qs; pi = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (ENOENT); qs = &pi->adapter->sge.qs[pi->first_qset]; return (cxgb_netdump_encap(qs, &m)); } static int cxgb_netdump_poll(struct ifnet *ifp, int count) { struct port_info *pi; adapter_t *adap; int i; pi = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) return (ENOENT); adap = pi->adapter; for (i = 0; i < SGE_QSETS; i++) (void)cxgb_netdump_poll_rx(adap, &adap->sge.qs[i]); (void)cxgb_netdump_poll_tx(&adap->sge.qs[pi->first_qset]); return (0); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c =================================================================== --- user/markj/netdump/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c (revision 332406) +++ user/markj/netdump/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c (revision 332407) @@ -1,2937 +1,2938 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include #include #ifdef CONFIG_NET_RX_BUSY_POLL #include #endif #include #include #include #include #include #include #include #include #include "en.h" #include "en_port.h" NETDUMP_DEFINE(mlx4_en); static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv); static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv); #ifdef CONFIG_NET_RX_BUSY_POLL /* must be called with local_bh_disable()d */ static int mlx4_en_low_latency_recv(struct napi_struct *napi) { struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; int done; if (!priv->port_up) return LL_FLUSH_FAILED; if (!mlx4_en_cq_lock_poll(cq)) return LL_FLUSH_BUSY; done = mlx4_en_process_rx_cq(dev, cq, 4); #ifdef LL_EXTENDED_STATS if (likely(done)) rx_ring->cleaned += done; else rx_ring->misses++; #endif mlx4_en_cq_unlock_poll(cq); return done; } #endif /* CONFIG_NET_RX_BUSY_POLL */ #ifdef CONFIG_RFS_ACCEL struct mlx4_en_filter { struct list_head next; struct work_struct work; u8 ip_proto; __be32 src_ip; __be32 dst_ip; __be16 src_port; __be16 dst_port; int rxq_index; struct mlx4_en_priv *priv; u32 flow_id; /* RFS infrastructure id */ int id; /* mlx4_en driver id */ u64 reg_id; /* Flow steering API id */ u8 activated; /* Used to prevent expiry before filter * is attached */ struct hlist_node filter_chain; }; static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv); static enum mlx4_net_trans_rule_id mlx4_ip_proto_to_trans_rule_id(u8 ip_proto) { switch (ip_proto) { case IPPROTO_UDP: return MLX4_NET_TRANS_RULE_ID_UDP; case IPPROTO_TCP: return MLX4_NET_TRANS_RULE_ID_TCP; default: return MLX4_NET_TRANS_RULE_NUM; } }; static void mlx4_en_filter_work(struct work_struct *work) { struct mlx4_en_filter *filter = container_of(work, struct mlx4_en_filter, work); struct mlx4_en_priv *priv = filter->priv; struct mlx4_spec_list spec_tcp_udp = { .id = mlx4_ip_proto_to_trans_rule_id(filter->ip_proto), { .tcp_udp = { .dst_port = filter->dst_port, .dst_port_msk = (__force __be16)-1, .src_port = filter->src_port, .src_port_msk = (__force __be16)-1, }, }, }; struct mlx4_spec_list spec_ip = { .id = MLX4_NET_TRANS_RULE_ID_IPV4, { .ipv4 = { .dst_ip = filter->dst_ip, .dst_ip_msk = (__force __be32)-1, .src_ip = filter->src_ip, .src_ip_msk = (__force __be32)-1, }, }, }; struct mlx4_spec_list spec_eth = { .id = MLX4_NET_TRANS_RULE_ID_ETH, }; struct mlx4_net_trans_rule rule = { .list = LIST_HEAD_INIT(rule.list), .queue_mode = MLX4_NET_TRANS_Q_LIFO, .exclusive = 1, .allow_loopback = 1, .promisc_mode = MLX4_FS_REGULAR, .port = priv->port, .priority = MLX4_DOMAIN_RFS, }; int rc; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); if (spec_tcp_udp.id >= MLX4_NET_TRANS_RULE_NUM) { en_warn(priv, "RFS: ignoring unsupported ip protocol (%d)\n", filter->ip_proto); goto ignore; } list_add_tail(&spec_eth.list, &rule.list); list_add_tail(&spec_ip.list, &rule.list); list_add_tail(&spec_tcp_udp.list, &rule.list); rule.qpn = priv->rss_map.qps[filter->rxq_index].qpn; memcpy(spec_eth.eth.dst_mac, priv->dev->dev_addr, ETH_ALEN); memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); filter->activated = 0; if (filter->reg_id) { rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); if (rc && rc != -ENOENT) en_err(priv, "Error detaching flow. rc = %d\n", rc); } rc = mlx4_flow_attach(priv->mdev->dev, &rule, &filter->reg_id); if (rc) en_err(priv, "Error attaching flow. err = %d\n", rc); ignore: mlx4_en_filter_rfs_expire(priv); filter->activated = 1; } static inline struct hlist_head * filter_hash_bucket(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, __be16 src_port, __be16 dst_port) { unsigned long l; int bucket_idx; l = (__force unsigned long)src_port | ((__force unsigned long)dst_port << 2); l ^= (__force unsigned long)(src_ip ^ dst_ip); bucket_idx = hash_long(l, MLX4_EN_FILTER_HASH_SHIFT); return &priv->filter_hash[bucket_idx]; } static struct mlx4_en_filter * mlx4_en_filter_alloc(struct mlx4_en_priv *priv, int rxq_index, __be32 src_ip, __be32 dst_ip, u8 ip_proto, __be16 src_port, __be16 dst_port, u32 flow_id) { struct mlx4_en_filter *filter = NULL; filter = kzalloc(sizeof(struct mlx4_en_filter), GFP_ATOMIC); if (!filter) return NULL; filter->priv = priv; filter->rxq_index = rxq_index; INIT_WORK(&filter->work, mlx4_en_filter_work); filter->src_ip = src_ip; filter->dst_ip = dst_ip; filter->ip_proto = ip_proto; filter->src_port = src_port; filter->dst_port = dst_port; filter->flow_id = flow_id; filter->id = priv->last_filter_id++ % RPS_NO_FILTER; list_add_tail(&filter->next, &priv->filters); hlist_add_head(&filter->filter_chain, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port)); return filter; } static void mlx4_en_filter_free(struct mlx4_en_filter *filter) { struct mlx4_en_priv *priv = filter->priv; int rc; list_del(&filter->next); rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); if (rc && rc != -ENOENT) en_err(priv, "Error detaching flow. rc = %d\n", rc); kfree(filter); } static inline struct mlx4_en_filter * mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, u8 ip_proto, __be16 src_port, __be16 dst_port) { struct mlx4_en_filter *filter; struct mlx4_en_filter *ret = NULL; hlist_for_each_entry(filter, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port), filter_chain) { if (filter->src_ip == src_ip && filter->dst_ip == dst_ip && filter->ip_proto == ip_proto && filter->src_port == src_port && filter->dst_port == dst_port) { ret = filter; break; } } return ret; } static int mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id) { struct mlx4_en_priv *priv = netdev_priv(net_dev); struct mlx4_en_filter *filter; const struct iphdr *ip; const __be16 *ports; u8 ip_proto; __be32 src_ip; __be32 dst_ip; __be16 src_port; __be16 dst_port; int nhoff = skb_network_offset(skb); int ret = 0; if (skb->protocol != htons(ETH_P_IP)) return -EPROTONOSUPPORT; ip = (const struct iphdr *)(skb->data + nhoff); if (ip_is_fragment(ip)) return -EPROTONOSUPPORT; if ((ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP)) return -EPROTONOSUPPORT; ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl); ip_proto = ip->protocol; src_ip = ip->saddr; dst_ip = ip->daddr; src_port = ports[0]; dst_port = ports[1]; spin_lock_bh(&priv->filters_lock); filter = mlx4_en_filter_find(priv, src_ip, dst_ip, ip_proto, src_port, dst_port); if (filter) { if (filter->rxq_index == rxq_index) goto out; filter->rxq_index = rxq_index; } else { filter = mlx4_en_filter_alloc(priv, rxq_index, src_ip, dst_ip, ip_proto, src_port, dst_port, flow_id); if (!filter) { ret = -ENOMEM; goto err; } } queue_work(priv->mdev->workqueue, &filter->work); out: ret = filter->id; err: spin_unlock_bh(&priv->filters_lock); return ret; } void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv) { struct mlx4_en_filter *filter, *tmp; LIST_HEAD(del_list); spin_lock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &priv->filters, next) { list_move(&filter->next, &del_list); hlist_del(&filter->filter_chain); } spin_unlock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &del_list, next) { cancel_work_sync(&filter->work); mlx4_en_filter_free(filter); } } static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv) { struct mlx4_en_filter *filter = NULL, *tmp, *last_filter = NULL; LIST_HEAD(del_list); int i = 0; spin_lock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &priv->filters, next) { if (i > MLX4_EN_FILTER_EXPIRY_QUOTA) break; if (filter->activated && !work_pending(&filter->work) && rps_may_expire_flow(priv->dev, filter->rxq_index, filter->flow_id, filter->id)) { list_move(&filter->next, &del_list); hlist_del(&filter->filter_chain); } else last_filter = filter; i++; } if (last_filter && (&last_filter->next != priv->filters.next)) list_move(&priv->filters, &last_filter->next); spin_unlock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &del_list, next) mlx4_en_filter_free(filter); } #endif static void mlx4_en_vlan_rx_add_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; int idx; if (arg != priv) return; en_dbg(HW, priv, "adding VLAN:%d\n", vid); set_bit(vid, priv->active_vlans); /* Add VID to port VLAN filter */ mutex_lock(&mdev->state_lock); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); if (err) en_err(priv, "Failed configuring VLAN filter\n"); } if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) en_dbg(HW, priv, "failed adding vlan %d\n", vid); mutex_unlock(&mdev->state_lock); } static void mlx4_en_vlan_rx_kill_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; if (arg != priv) return; en_dbg(HW, priv, "Killing VID:%d\n", vid); clear_bit(vid, priv->active_vlans); /* Remove VID from port VLAN filter */ mutex_lock(&mdev->state_lock); mlx4_unregister_vlan(mdev->dev, priv->port, vid); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); if (err) en_err(priv, "Failed configuring VLAN filter\n"); } mutex_unlock(&mdev->state_lock); } static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv, unsigned char *addr, int qpn, u64 *reg_id) { int err; if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN || priv->mdev->dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) return 0; /* do nothing */ err = mlx4_tunnel_steer_add(priv->mdev->dev, addr, priv->port, qpn, MLX4_DOMAIN_NIC, reg_id); if (err) { en_err(priv, "failed to add vxlan steering rule, err %d\n", err); return err; } en_dbg(DRV, priv, "added vxlan steering rule, mac %pM reg_id %llx\n", addr, (long long)*reg_id); return 0; } static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv, unsigned char *mac, int *qpn, u64 *reg_id) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int err; switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_B0: { struct mlx4_qp qp; u8 gid[16] = {0}; qp.qpn = *qpn; memcpy(&gid[10], mac, ETH_ALEN); gid[5] = priv->port; err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); break; } case MLX4_STEERING_MODE_DEVICE_MANAGED: { struct mlx4_spec_list spec_eth = { {NULL} }; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .allow_loopback = 1, .promisc_mode = MLX4_FS_REGULAR, .priority = MLX4_DOMAIN_NIC, }; rule.port = priv->port; rule.qpn = *qpn; INIT_LIST_HEAD(&rule.list); spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(spec_eth.eth.dst_mac, mac, ETH_ALEN); memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); list_add_tail(&spec_eth.list, &rule.list); err = mlx4_flow_attach(dev, &rule, reg_id); break; } default: return -EINVAL; } if (err) en_warn(priv, "Failed Attaching Unicast\n"); return err; } static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv, unsigned char *mac, int qpn, u64 reg_id) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_B0: { struct mlx4_qp qp; u8 gid[16] = {0}; qp.qpn = qpn; memcpy(&gid[10], mac, ETH_ALEN); gid[5] = priv->port; mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); break; } case MLX4_STEERING_MODE_DEVICE_MANAGED: { mlx4_flow_detach(dev, reg_id); break; } default: en_err(priv, "Invalid steering mode.\n"); } } static int mlx4_en_get_qp(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int index = 0; int err = 0; int *qpn = &priv->base_qpn; u64 mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev)); en_dbg(DRV, priv, "Registering MAC: %pM for adding\n", IF_LLADDR(priv->dev)); index = mlx4_register_mac(dev, priv->port, mac); if (index < 0) { err = index; en_err(priv, "Failed adding MAC: %pM\n", IF_LLADDR(priv->dev)); return err; } if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { int base_qpn = mlx4_get_base_qpn(dev, priv->port); *qpn = base_qpn + index; return 0; } err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP); en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); if (err) { en_err(priv, "Failed to reserve qp for mac registration\n"); mlx4_unregister_mac(dev, priv->port, mac); return err; } return 0; } static void mlx4_en_put_qp(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int qpn = priv->base_qpn; if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { u64 mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev)); en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", IF_LLADDR(priv->dev)); mlx4_unregister_mac(dev, priv->port, mac); } else { en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n", priv->port, qpn); mlx4_qp_release_range(dev, qpn, 1); priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC; } } static void mlx4_en_clear_uclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_addr_list *tmp, *uc_to_del; list_for_each_entry_safe(uc_to_del, tmp, &priv->uc_list, list) { list_del(&uc_to_del->list); kfree(uc_to_del); } } static void mlx4_en_cache_uclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_addr_list *tmp; struct ifaddr *ifa; mlx4_en_clear_uclist(dev); if_addr_rlock(dev); TAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; if (((struct sockaddr_dl *)ifa->ifa_addr)->sdl_alen != ETHER_ADDR_LEN) continue; tmp = kzalloc(sizeof(struct mlx4_en_addr_list), GFP_ATOMIC); if (tmp == NULL) { en_err(priv, "Failed to allocate address list\n"); break; } memcpy(tmp->addr, LLADDR((struct sockaddr_dl *)ifa->ifa_addr), ETH_ALEN); list_add_tail(&tmp->list, &priv->uc_list); } if_addr_runlock(dev); } static void mlx4_en_clear_mclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_addr_list *tmp, *mc_to_del; list_for_each_entry_safe(mc_to_del, tmp, &priv->mc_list, list) { list_del(&mc_to_del->list); kfree(mc_to_del); } } static void mlx4_en_cache_mclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_addr_list *tmp; struct ifmultiaddr *ifma; mlx4_en_clear_mclist(dev); if_maddr_rlock(dev); TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen != ETHER_ADDR_LEN) continue; tmp = kzalloc(sizeof(struct mlx4_en_addr_list), GFP_ATOMIC); if (tmp == NULL) { en_err(priv, "Failed to allocate address list\n"); break; } memcpy(tmp->addr, LLADDR((struct sockaddr_dl *)ifma->ifma_addr), ETH_ALEN); list_add_tail(&tmp->list, &priv->mc_list); } if_maddr_runlock(dev); } static void update_addr_list_flags(struct mlx4_en_priv *priv, struct list_head *dst, struct list_head *src) { struct mlx4_en_addr_list *dst_tmp, *src_tmp, *new_mc; bool found; /* Find all the entries that should be removed from dst, * These are the entries that are not found in src */ list_for_each_entry(dst_tmp, dst, list) { found = false; list_for_each_entry(src_tmp, src, list) { if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { found = true; break; } } if (!found) dst_tmp->action = MLX4_ADDR_LIST_REM; } /* Add entries that exist in src but not in dst * mark them as need to add */ list_for_each_entry(src_tmp, src, list) { found = false; list_for_each_entry(dst_tmp, dst, list) { if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { dst_tmp->action = MLX4_ADDR_LIST_NONE; found = true; break; } } if (!found) { new_mc = kmalloc(sizeof(struct mlx4_en_addr_list), GFP_KERNEL); if (!new_mc) { en_err(priv, "Failed to allocate current multicast list\n"); return; } memcpy(new_mc, src_tmp, sizeof(struct mlx4_en_addr_list)); new_mc->action = MLX4_ADDR_LIST_ADD; list_add_tail(&new_mc->list, dst); } } } static void mlx4_en_set_rx_mode(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); if (!priv->port_up) return; queue_work(priv->mdev->workqueue, &priv->rx_mode_task); } static void mlx4_en_set_promisc_mode(struct mlx4_en_priv *priv, struct mlx4_en_dev *mdev) { int err = 0; if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) { priv->flags |= MLX4_EN_FLAG_PROMISC; /* Enable promiscouos mode */ switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_add(mdev->dev, priv->port, priv->base_qpn, MLX4_FS_ALL_DEFAULT); if (err) en_err(priv, "Failed enabling promiscuous mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; break; case MLX4_STEERING_MODE_B0: err = mlx4_unicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed enabling unicast promiscuous mode\n"); /* Add the default qp number as multicast * promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed enabling multicast promiscuous mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; } break; case MLX4_STEERING_MODE_A0: err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 1); if (err) en_err(priv, "Failed enabling promiscuous mode\n"); break; } /* Disable port multicast filter (unconditionally) */ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); } } static void mlx4_en_clear_promisc_mode(struct mlx4_en_priv *priv, struct mlx4_en_dev *mdev) { int err = 0; priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_ALL_DEFAULT); if (err) en_err(priv, "Failed disabling promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; break; case MLX4_STEERING_MODE_B0: err = mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed disabling unicast promiscuous mode\n"); /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed disabling multicast promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } break; case MLX4_STEERING_MODE_A0: err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) en_err(priv, "Failed disabling promiscuous mode\n"); break; } } static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, struct net_device *dev, struct mlx4_en_dev *mdev) { struct mlx4_en_addr_list *addr_list, *tmp; u8 mc_list[16] = {0}; int err = 0; u64 mcast_addr = 0; /* Enable/disable the multicast filter according to IFF_ALLMULTI */ if (dev->if_flags & IFF_ALLMULTI) { err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); /* Add the default qp number as multicast promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_add(mdev->dev, priv->port, priv->base_qpn, MLX4_FS_MC_DEFAULT); break; case MLX4_STEERING_MODE_B0: err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); break; case MLX4_STEERING_MODE_A0: break; } if (err) en_err(priv, "Failed entering multicast promisc mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; } } else { /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_MC_DEFAULT); break; case MLX4_STEERING_MODE_B0: err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); break; case MLX4_STEERING_MODE_A0: break; } if (err) en_err(priv, "Failed disabling multicast promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } /* Update unicast list */ mlx4_en_cache_uclist(dev); update_addr_list_flags(priv, &priv->curr_uc_list, &priv->uc_list); list_for_each_entry_safe(addr_list, tmp, &priv->curr_uc_list, list) { if (addr_list->action == MLX4_ADDR_LIST_REM) { mlx4_en_uc_steer_release(priv, addr_list->addr, priv->rss_map.indir_qp.qpn, addr_list->reg_id); /* remove from list */ list_del(&addr_list->list); kfree(addr_list); } else if (addr_list->action == MLX4_ADDR_LIST_ADD) { err = mlx4_en_uc_steer_add(priv, addr_list->addr, &priv->rss_map.indir_qp.qpn, &addr_list->reg_id); if (err) en_err(priv, "Fail to add unicast address\n"); } } err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); /* Flush mcast filter and init it with broadcast address */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST, 1, MLX4_MCAST_CONFIG); /* Update multicast list - we cache all addresses so they won't * change while HW is updated holding the command semaphor */ mlx4_en_cache_mclist(dev); list_for_each_entry(addr_list, &priv->mc_list, list) { mcast_addr = mlx4_mac_to_u64(addr_list->addr); mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, mcast_addr, 0, MLX4_MCAST_CONFIG); } err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_ENABLE); if (err) en_err(priv, "Failed enabling multicast filter\n"); update_addr_list_flags(priv, &priv->curr_mc_list, &priv->mc_list); list_for_each_entry_safe(addr_list, tmp, &priv->curr_mc_list, list) { if (addr_list->action == MLX4_ADDR_LIST_REM) { /* detach this address and delete from list */ memcpy(&mc_list[10], addr_list->addr, ETH_ALEN); mc_list[5] = priv->port; err = mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, addr_list->reg_id); if (err) en_err(priv, "Fail to detach multicast address\n"); if (addr_list->tunnel_reg_id) { err = mlx4_flow_detach(priv->mdev->dev, addr_list->tunnel_reg_id); if (err) en_err(priv, "Failed to detach multicast address\n"); } /* remove from list */ list_del(&addr_list->list); kfree(addr_list); } else if (addr_list->action == MLX4_ADDR_LIST_ADD) { /* attach the address */ memcpy(&mc_list[10], addr_list->addr, ETH_ALEN); /* needed for B0 steering support */ mc_list[5] = priv->port; err = mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, &addr_list->reg_id); if (err) en_err(priv, "Fail to attach multicast address\n"); err = mlx4_en_tunnel_steer_add(priv, &mc_list[10], priv->base_qpn, &addr_list->tunnel_reg_id); if (err) en_err(priv, "Failed to attach multicast address\n"); } } } } static void mlx4_en_do_set_rx_mode(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, rx_mode_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_dbg(HW, priv, "Card is not up, ignoring rx mode change.\n"); goto out; } if (!priv->port_up) { en_dbg(HW, priv, "Port is down, ignoring rx mode change.\n"); goto out; } if (!mlx4_en_QUERY_PORT(mdev, priv->port)) { if (priv->port_state.link_state) { priv->last_link_state = MLX4_DEV_EVENT_PORT_UP; /* update netif baudrate */ priv->dev->if_baudrate = IF_Mbps(priv->port_state.link_speed); /* Important note: the following call for if_link_state_change * is needed for interface up scenario (start port, link state * change) */ if_link_state_change(priv->dev, LINK_STATE_UP); en_dbg(HW, priv, "Link Up\n"); } } /* Promsicuous mode: disable all filters */ if ((dev->if_flags & IFF_PROMISC) || (priv->flags & MLX4_EN_FLAG_FORCE_PROMISC)) { mlx4_en_set_promisc_mode(priv, mdev); goto out; } /* Not in promiscuous mode */ if (priv->flags & MLX4_EN_FLAG_PROMISC) mlx4_en_clear_promisc_mode(priv, mdev); mlx4_en_do_multicast(priv, dev, mdev); out: mutex_unlock(&mdev->state_lock); } static void mlx4_en_watchdog_timeout(void *arg) { struct mlx4_en_priv *priv = arg; struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Scheduling watchdog\n"); queue_work(mdev->workqueue, &priv->watchdog_task); if (priv->port_up) callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); } static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) { struct mlx4_en_cq *cq; int i; /* If we haven't received a specific coalescing setting * (module param), we set the moderation parameters as follows: * - moder_cnt is set to the number of mtu sized packets to * satisfy our coalescing target. * - moder_time is set to a fixed value. */ priv->rx_frames = MLX4_EN_RX_COAL_TARGET; priv->rx_usecs = MLX4_EN_RX_COAL_TIME; priv->tx_frames = MLX4_EN_TX_COAL_PKTS; priv->tx_usecs = MLX4_EN_TX_COAL_TIME; en_dbg(INTR, priv, "Default coalesing params for mtu: %u - " "rx_frames:%d rx_usecs:%d\n", (unsigned)priv->dev->if_mtu, priv->rx_frames, priv->rx_usecs); /* Setup cq moderation params */ for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; cq->moder_cnt = priv->rx_frames; cq->moder_time = priv->rx_usecs; priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; priv->last_moder_packets[i] = 0; priv->last_moder_bytes[i] = 0; } for (i = 0; i < priv->tx_ring_num; i++) { cq = priv->tx_cq[i]; cq->moder_cnt = priv->tx_frames; cq->moder_time = priv->tx_usecs; } /* Reset auto-moderation params */ priv->pkt_rate_low = MLX4_EN_RX_RATE_LOW; priv->rx_usecs_low = MLX4_EN_RX_COAL_TIME_LOW; priv->pkt_rate_high = MLX4_EN_RX_RATE_HIGH; priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH; priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL; priv->adaptive_rx_coal = 1; priv->last_moder_jiffies = 0; priv->last_moder_tx_packets = 0; } static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) { unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies); struct mlx4_en_cq *cq; unsigned long packets; unsigned long rate; unsigned long avg_pkt_size; unsigned long rx_packets; unsigned long rx_bytes; unsigned long rx_pkt_diff; int moder_time; int ring, err; if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ) return; for (ring = 0; ring < priv->rx_ring_num; ring++) { spin_lock(&priv->stats_lock); rx_packets = priv->rx_ring[ring]->packets; rx_bytes = priv->rx_ring[ring]->bytes; spin_unlock(&priv->stats_lock); rx_pkt_diff = ((unsigned long) (rx_packets - priv->last_moder_packets[ring])); packets = rx_pkt_diff; rate = packets * HZ / period; avg_pkt_size = packets ? ((unsigned long) (rx_bytes - priv->last_moder_bytes[ring])) / packets : 0; /* Apply auto-moderation only when packet rate * exceeds a rate that it matters */ if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) && avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) { if (rate < priv->pkt_rate_low) moder_time = priv->rx_usecs_low; else if (rate > priv->pkt_rate_high) moder_time = priv->rx_usecs_high; else moder_time = (rate - priv->pkt_rate_low) * (priv->rx_usecs_high - priv->rx_usecs_low) / (priv->pkt_rate_high - priv->pkt_rate_low) + priv->rx_usecs_low; } else { moder_time = priv->rx_usecs_low; } if (moder_time != priv->last_moder_time[ring]) { priv->last_moder_time[ring] = moder_time; cq = priv->rx_cq[ring]; cq->moder_time = moder_time; cq->moder_cnt = priv->rx_frames; err = mlx4_en_set_cq_moder(priv, cq); if (err) en_err(priv, "Failed modifying moderation for cq:%d\n", ring); } priv->last_moder_packets[ring] = rx_packets; priv->last_moder_bytes[ring] = rx_bytes; } priv->last_moder_jiffies = jiffies; } static void mlx4_en_do_get_stats(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, stats_task); struct mlx4_en_dev *mdev = priv->mdev; int err; mutex_lock(&mdev->state_lock); if (mdev->device_up) { if (priv->port_up) { if (mlx4_is_slave(mdev->dev)) err = mlx4_en_get_vport_stats(mdev, priv->port); else err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); if (err) en_dbg(HW, priv, "Could not update stats\n"); mlx4_en_auto_moderation(priv); } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); } mutex_unlock(&mdev->state_lock); } /* mlx4_en_service_task - Run service task for tasks that needed to be done * periodically */ static void mlx4_en_service_task(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, service_task); struct mlx4_en_dev *mdev = priv->mdev; mutex_lock(&mdev->state_lock); if (mdev->device_up) { queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); } mutex_unlock(&mdev->state_lock); } static void mlx4_en_linkstate(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, linkstate_task); struct mlx4_en_dev *mdev = priv->mdev; int linkstate = priv->link_state; mutex_lock(&mdev->state_lock); /* If observable port state changed set carrier state and * report to system log */ if (priv->last_link_state != linkstate) { if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) { en_info(priv, "Link Down\n"); if_link_state_change(priv->dev, LINK_STATE_DOWN); /* update netif baudrate */ priv->dev->if_baudrate = 0; /* make sure the port is up before notifying the OS. * This is tricky since we get here on INIT_PORT and * in such case we can't tell the OS the port is up. * To solve this there is a call to if_link_state_change * in set_rx_mode. * */ } else if (priv->port_up && (linkstate == MLX4_DEV_EVENT_PORT_UP)){ if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) en_info(priv, "Query port failed\n"); priv->dev->if_baudrate = IF_Mbps(priv->port_state.link_speed); en_info(priv, "Link Up\n"); if_link_state_change(priv->dev, LINK_STATE_UP); } } priv->last_link_state = linkstate; mutex_unlock(&mdev->state_lock); } int mlx4_en_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq; struct mlx4_en_tx_ring *tx_ring; int rx_index = 0; int tx_index = 0; int err = 0; int i; int j; u8 mc_list[16] = {0}; if (priv->port_up) { en_dbg(DRV, priv, "start port called while port already up\n"); return 0; } INIT_LIST_HEAD(&priv->mc_list); INIT_LIST_HEAD(&priv->uc_list); INIT_LIST_HEAD(&priv->curr_mc_list); INIT_LIST_HEAD(&priv->curr_uc_list); INIT_LIST_HEAD(&priv->ethtool_list); /* Calculate Rx buf size */ dev->if_mtu = min(dev->if_mtu, priv->max_mtu); mlx4_en_calc_rx_buf(dev); en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_mb_size); /* Configure rx cq's and rings */ err = mlx4_en_activate_rx_rings(priv); if (err) { en_err(priv, "Failed to activate RX rings\n"); return err; } for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; mlx4_en_cq_init_lock(cq); err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Rx CQ\n"); goto cq_err; } for (j = 0; j < cq->size; j++) cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK; err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto cq_err; } mlx4_en_arm_cq(priv, cq); priv->rx_ring[i]->cqn = cq->mcq.cqn; ++rx_index; } /* Set qp number */ en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port); err = mlx4_en_get_qp(priv); if (err) { en_err(priv, "Failed getting eth qp\n"); goto cq_err; } mdev->mac_removed[priv->port] = 0; priv->counter_index = mlx4_get_default_counter_index(mdev->dev, priv->port); err = mlx4_en_config_rss_steer(priv); if (err) { en_err(priv, "Failed configuring rss steering\n"); goto mac_err; } err = mlx4_en_create_drop_qp(priv); if (err) goto rss_err; /* Configure tx cq's and rings */ for (i = 0; i < priv->tx_ring_num; i++) { /* Configure cq */ cq = priv->tx_cq[i]; err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Tx CQ\n"); goto tx_err; } err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } en_dbg(DRV, priv, "Resetting index of collapsed CQ:%d to -1\n", i); cq->buf->wqe_index = cpu_to_be16(0xffff); /* Configure ring */ tx_ring = priv->tx_ring[i]; err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn, i / priv->num_tx_rings_p_up); if (err) { en_err(priv, "Failed activating Tx ring %d\n", i); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } /* Arm CQ for TX completions */ mlx4_en_arm_cq(priv, cq); /* Set initial ownership of all Tx TXBBs to SW (1) */ for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) *((u32 *) (tx_ring->buf + j)) = INIT_OWNER_BIT; ++tx_index; } /* Configure port */ err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_mb_size, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); if (err) { en_err(priv, "Failed setting port general configurations for port %d, with error %d\n", priv->port, err); goto tx_err; } /* Set default qp number */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) { en_err(priv, "Failed setting default qp numbers\n"); goto tx_err; } /* Init port */ en_dbg(HW, priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); goto tx_err; } /* Attach rx QP to bradcast address */ memset(&mc_list[10], 0xff, ETH_ALEN); mc_list[5] = priv->port; /* needed for B0 steering support */ if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, &priv->broadcast_id)) mlx4_warn(mdev, "Failed Attaching Broadcast\n"); /* Must redo promiscuous mode setup. */ priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->rx_mode_task); priv->port_up = true; /* Enable the queues. */ dev->if_drv_flags &= ~IFF_DRV_OACTIVE; dev->if_drv_flags |= IFF_DRV_RUNNING; #ifdef CONFIG_DEBUG_FS mlx4_en_create_debug_files(priv); #endif callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); return 0; tx_err: while (tx_index--) { mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[tx_index]); mlx4_en_deactivate_cq(priv, priv->tx_cq[tx_index]); } mlx4_en_destroy_drop_qp(priv); rss_err: mlx4_en_release_rss_steer(priv); mac_err: mlx4_en_put_qp(priv); cq_err: while (rx_index--) mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]); for (i = 0; i < priv->rx_ring_num; i++) mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); return err; /* need to close devices */ } void mlx4_en_stop_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_addr_list *addr_list, *tmp; int i; u8 mc_list[16] = {0}; if (!priv->port_up) { en_dbg(DRV, priv, "stop port called while port already down\n"); return; } #ifdef CONFIG_DEBUG_FS mlx4_en_delete_debug_files(priv); #endif /* close port*/ mlx4_CLOSE_PORT(mdev->dev, priv->port); /* Set port as not active */ priv->port_up = false; priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); /* Promsicuous mode */ if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_ALL_DEFAULT); mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_MC_DEFAULT); } else if (priv->flags & MLX4_EN_FLAG_PROMISC) { priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } } /* Detach All unicasts */ list_for_each_entry(addr_list, &priv->curr_uc_list, list) { mlx4_en_uc_steer_release(priv, addr_list->addr, priv->rss_map.indir_qp.qpn, addr_list->reg_id); } mlx4_en_clear_uclist(dev); list_for_each_entry_safe(addr_list, tmp, &priv->curr_uc_list, list) { list_del(&addr_list->list); kfree(addr_list); } /* Detach All multicasts */ memset(&mc_list[10], 0xff, ETH_ALEN); mc_list[5] = priv->port; /* needed for B0 steering support */ mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, priv->broadcast_id); list_for_each_entry(addr_list, &priv->curr_mc_list, list) { memcpy(&mc_list[10], addr_list->addr, ETH_ALEN); mc_list[5] = priv->port; mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, addr_list->reg_id); } mlx4_en_clear_mclist(dev); list_for_each_entry_safe(addr_list, tmp, &priv->curr_mc_list, list) { list_del(&addr_list->list); kfree(addr_list); } /* Flush multicast filter */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG); mlx4_en_destroy_drop_qp(priv); /* Free TX Rings */ for (i = 0; i < priv->tx_ring_num; i++) { mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[i]); mlx4_en_deactivate_cq(priv, priv->tx_cq[i]); } msleep(10); for (i = 0; i < priv->tx_ring_num; i++) mlx4_en_free_tx_buf(dev, priv->tx_ring[i]); /* Free RSS qps */ mlx4_en_release_rss_steer(priv); /* Unregister Mac address for the port */ mlx4_en_put_qp(priv); mdev->mac_removed[priv->port] = 1; /* Free RX Rings */ for (i = 0; i < priv->rx_ring_num; i++) { struct mlx4_en_cq *cq = priv->rx_cq[i]; mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); } callout_stop(&priv->watchdog_timer); dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } static void mlx4_en_restart(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, watchdog_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; struct mlx4_en_tx_ring *ring; int i; if (priv->blocked == 0 || priv->port_up == 0) return; for (i = 0; i < priv->tx_ring_num; i++) { ring = priv->tx_ring[i]; if (ring->blocked && ring->watchdog_time + MLX4_EN_WATCHDOG_TIMEOUT < ticks) goto reset; } return; reset: priv->port_stats.tx_timeout++; en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); mutex_lock(&mdev->state_lock); if (priv->port_up) { mlx4_en_stop_port(dev); //for (i = 0; i < priv->tx_ring_num; i++) // netdev_tx_reset_queue(priv->tx_ring[i]->tx_queue); if (mlx4_en_start_port(dev)) en_err(priv, "Failed restarting port %d\n", priv->port); } mutex_unlock(&mdev->state_lock); } static void mlx4_en_clear_stats(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int i; if (!mlx4_is_slave(mdev->dev)) if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) en_dbg(HW, priv, "Failed dumping statistics\n"); memset(&priv->pstats, 0, sizeof(priv->pstats)); memset(&priv->pkstats, 0, sizeof(priv->pkstats)); memset(&priv->port_stats, 0, sizeof(priv->port_stats)); memset(&priv->vport_stats, 0, sizeof(priv->vport_stats)); for (i = 0; i < priv->tx_ring_num; i++) { priv->tx_ring[i]->bytes = 0; priv->tx_ring[i]->packets = 0; priv->tx_ring[i]->tx_csum = 0; priv->tx_ring[i]->oversized_packets = 0; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->bytes = 0; priv->rx_ring[i]->packets = 0; priv->rx_ring[i]->csum_ok = 0; priv->rx_ring[i]->csum_none = 0; } } static void mlx4_en_open(void* arg) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; struct net_device *dev; int err = 0; priv = arg; mdev = priv->mdev; dev = priv->dev; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_err(priv, "Cannot open - device down/disabled\n"); goto out; } /* Reset HW statistics and SW counters */ mlx4_en_clear_stats(dev); err = mlx4_en_start_port(dev); if (err) en_err(priv, "Failed starting port:%d\n", priv->port); out: mutex_unlock(&mdev->state_lock); return; } void mlx4_en_free_resources(struct mlx4_en_priv *priv) { int i; #ifdef CONFIG_RFS_ACCEL if (priv->dev->rx_cpu_rmap) { free_irq_cpu_rmap(priv->dev->rx_cpu_rmap); priv->dev->rx_cpu_rmap = NULL; } #endif for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring && priv->tx_ring[i]) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); if (priv->tx_cq && priv->tx_cq[i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } for (i = 0; i < priv->rx_ring_num; i++) { if (priv->rx_ring[i]) mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], priv->prof->rx_ring_size, priv->stride); if (priv->rx_cq[i]) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } if (priv->stat_sysctl != NULL) sysctl_ctx_free(&priv->stat_ctx); } int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) { struct mlx4_en_port_profile *prof = priv->prof; int i; int node = 0; /* Create rx Rings */ for (i = 0; i < priv->rx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->rx_cq[i], prof->rx_ring_size, i, RX, node)) goto err; if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size, node)) goto err; } /* Create tx Rings */ for (i = 0; i < priv->tx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->tx_cq[i], prof->tx_ring_size, i, TX, node)) goto err; if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], prof->tx_ring_size, TXBB_SIZE, node, i)) goto err; } #ifdef CONFIG_RFS_ACCEL priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->rx_ring_num); if (!priv->dev->rx_cpu_rmap) goto err; #endif /* Re-create stat sysctls in case the number of rings changed. */ mlx4_en_sysctl_stat(priv); return 0; err: en_err(priv, "Failed to allocate NIC resources\n"); for (i = 0; i < priv->rx_ring_num; i++) { if (priv->rx_ring[i]) mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size, priv->stride); if (priv->rx_cq[i]) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring[i]) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); if (priv->tx_cq[i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } priv->port_up = false; return -ENOMEM; } struct en_port_attribute { struct attribute attr; ssize_t (*show)(struct en_port *, struct en_port_attribute *, char *buf); ssize_t (*store)(struct en_port *, struct en_port_attribute *, char *buf, size_t count); }; #define PORT_ATTR_RO(_name) \ struct en_port_attribute en_port_attr_##_name = __ATTR_RO(_name) #define EN_PORT_ATTR(_name, _mode, _show, _store) \ struct en_port_attribute en_port_attr_##_name = __ATTR(_name, _mode, _show, _store) void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); /* don't allow more IOCTLs */ priv->gone = 1; /* XXX wait a bit to allow IOCTL handlers to complete */ pause("W", hz); if (priv->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); if (priv->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); /* Unregister device - this will close the port if it was up */ if (priv->registered) { mutex_lock(&mdev->state_lock); ether_ifdetach(dev); mutex_unlock(&mdev->state_lock); } mutex_lock(&mdev->state_lock); mlx4_en_stop_port(dev); mutex_unlock(&mdev->state_lock); if (priv->allocated) mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE); cancel_delayed_work(&priv->stats_task); cancel_delayed_work(&priv->service_task); /* flush any pending task for this netdev */ flush_workqueue(mdev->workqueue); callout_drain(&priv->watchdog_timer); /* Detach the netdev so tasks would not attempt to access it */ mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; mutex_unlock(&mdev->state_lock); mlx4_en_free_resources(priv); /* freeing the sysctl conf cannot be called from within mlx4_en_free_resources */ if (priv->conf_sysctl != NULL) sysctl_ctx_free(&priv->conf_ctx); kfree(priv->tx_ring); kfree(priv->tx_cq); kfree(priv); if_free(dev); } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err = 0; en_dbg(DRV, priv, "Change MTU called - current:%u new:%u\n", (unsigned)dev->if_mtu, (unsigned)new_mtu); if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) { en_err(priv, "Bad MTU size:%d, max %u.\n", new_mtu, priv->max_mtu); return -EPERM; } mutex_lock(&mdev->state_lock); dev->if_mtu = new_mtu; if (dev->if_drv_flags & IFF_DRV_RUNNING) { if (!mdev->device_up) { /* NIC is probably restarting - let watchdog task reset * * the port */ en_dbg(DRV, priv, "Change MTU called with card down!?\n"); } else { mlx4_en_stop_port(dev); err = mlx4_en_start_port(dev); if (err) { en_err(priv, "Failed restarting port:%d\n", priv->port); queue_work(mdev->workqueue, &priv->watchdog_task); } } } mutex_unlock(&mdev->state_lock); return 0; } static int mlx4_en_calc_media(struct mlx4_en_priv *priv) { int trans_type; int active; active = IFM_ETHER; if (priv->last_link_state == MLX4_DEV_EVENT_PORT_DOWN) return (active); active |= IFM_FDX; trans_type = priv->port_state.transceiver; /* XXX I don't know all of the transceiver values. */ switch (priv->port_state.link_speed) { case 100: active |= IFM_100_T; break; case 1000: active |= IFM_1000_T; break; case 10000: if (trans_type > 0 && trans_type <= 0xC) active |= IFM_10G_SR; else if (trans_type == 0x80 || trans_type == 0) active |= IFM_10G_CX4; break; case 40000: active |= IFM_40G_CR4; break; } if (priv->prof->tx_pause) active |= IFM_ETH_TXPAUSE; if (priv->prof->rx_pause) active |= IFM_ETH_RXPAUSE; return (active); } static void mlx4_en_media_status(struct ifnet *dev, struct ifmediareq *ifmr) { struct mlx4_en_priv *priv; priv = dev->if_softc; ifmr->ifm_status = IFM_AVALID; if (priv->last_link_state != MLX4_DEV_EVENT_PORT_DOWN) ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active = mlx4_en_calc_media(priv); return; } static int mlx4_en_media_change(struct ifnet *dev) { struct mlx4_en_priv *priv; struct ifmedia *ifm; int rxpause; int txpause; int error; priv = dev->if_softc; ifm = &priv->media; rxpause = txpause = 0; error = 0; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; case IFM_10G_SR: case IFM_10G_CX4: case IFM_1000_T: case IFM_40G_CR4: if ((IFM_SUBTYPE(ifm->ifm_media) == IFM_SUBTYPE(mlx4_en_calc_media(priv))) && (ifm->ifm_media & IFM_FDX)) break; /* Fallthrough */ default: printf("%s: Only auto media type\n", if_name(dev)); return (EINVAL); } /* Allow user to set/clear pause */ if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE) rxpause = 1; if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE) txpause = 1; if (priv->prof->tx_pause != txpause || priv->prof->rx_pause != rxpause) { priv->prof->tx_pause = txpause; priv->prof->rx_pause = rxpause; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); } return (error); } static int mlx4_en_ioctl(struct ifnet *dev, u_long command, caddr_t data) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; struct ifreq *ifr; int error; int mask; struct ifrsskey *ifrk; const u32 *key; struct ifrsshash *ifrh; u8 rss_mask; error = 0; mask = 0; priv = dev->if_softc; /* check if detaching */ if (priv == NULL || priv->gone != 0) return (ENXIO); mdev = priv->mdev; ifr = (struct ifreq *) data; switch (command) { case SIOCSIFMTU: error = -mlx4_en_change_mtu(dev, ifr->ifr_mtu); break; case SIOCSIFFLAGS: if (dev->if_flags & IFF_UP) { if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) { mutex_lock(&mdev->state_lock); mlx4_en_start_port(dev); mutex_unlock(&mdev->state_lock); } else { mlx4_en_set_rx_mode(dev); } } else { mutex_lock(&mdev->state_lock); if (dev->if_drv_flags & IFF_DRV_RUNNING) { mlx4_en_stop_port(dev); if_link_state_change(dev, LINK_STATE_DOWN); } mutex_unlock(&mdev->state_lock); } break; case SIOCADDMULTI: case SIOCDELMULTI: mlx4_en_set_rx_mode(dev); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(dev, ifr, &priv->media, command); break; case SIOCSIFCAP: mutex_lock(&mdev->state_lock); mask = ifr->ifr_reqcap ^ dev->if_capenable; if (mask & IFCAP_TXCSUM) { dev->if_capenable ^= IFCAP_TXCSUM; dev->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & dev->if_capenable && !(IFCAP_TXCSUM & dev->if_capenable)) { dev->if_capenable &= ~IFCAP_TSO4; dev->if_hwassist &= ~CSUM_IP_TSO; if_printf(dev, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { dev->if_capenable ^= IFCAP_TXCSUM_IPV6; dev->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & dev->if_capenable && !(IFCAP_TXCSUM_IPV6 & dev->if_capenable)) { dev->if_capenable &= ~IFCAP_TSO6; dev->if_hwassist &= ~CSUM_IP6_TSO; if_printf(dev, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) dev->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) dev->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & dev->if_capenable) && !(IFCAP_TXCSUM & dev->if_capenable)) { if_printf(dev, "enable txcsum first.\n"); error = EAGAIN; goto out; } dev->if_capenable ^= IFCAP_TSO4; dev->if_hwassist ^= CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & dev->if_capenable) && !(IFCAP_TXCSUM_IPV6 & dev->if_capenable)) { if_printf(dev, "enable txcsum6 first.\n"); error = EAGAIN; goto out; } dev->if_capenable ^= IFCAP_TSO6; dev->if_hwassist ^= CSUM_IP6_TSO; } if (mask & IFCAP_LRO) dev->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) dev->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) dev->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_WOL_MAGIC) dev->if_capenable ^= IFCAP_WOL_MAGIC; if (dev->if_drv_flags & IFF_DRV_RUNNING) mlx4_en_start_port(dev); out: mutex_unlock(&mdev->state_lock); VLAN_CAPABILITIES(dev); break; #if __FreeBSD_version >= 1100036 case SIOCGI2C: { struct ifi2creq i2c; error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (error) break; if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } /* * Note that we ignore i2c.addr here. The driver hardcodes * the address to 0x50, while standard expects it to be 0xA0. */ error = mlx4_get_module_info(mdev->dev, priv->port, i2c.offset, i2c.len, i2c.data); if (error < 0) { error = -error; break; } error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } #endif case SIOCGIFRSSKEY: ifrk = (struct ifrsskey *)data; ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; mutex_lock(&mdev->state_lock); key = mlx4_en_get_rss_key(priv, &ifrk->ifrk_keylen); if (ifrk->ifrk_keylen > RSS_KEYLEN) error = EINVAL; else memcpy(ifrk->ifrk_key, key, ifrk->ifrk_keylen); mutex_unlock(&mdev->state_lock); break; case SIOCGIFRSSHASH: mutex_lock(&mdev->state_lock); rss_mask = mlx4_en_get_rss_mask(priv); mutex_unlock(&mdev->state_lock); ifrh = (struct ifrsshash *)data; ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; ifrh->ifrh_types = 0; if (rss_mask & MLX4_RSS_IPV4) ifrh->ifrh_types |= RSS_TYPE_IPV4; if (rss_mask & MLX4_RSS_TCP_IPV4) ifrh->ifrh_types |= RSS_TYPE_TCP_IPV4; if (rss_mask & MLX4_RSS_IPV6) ifrh->ifrh_types |= RSS_TYPE_IPV6; if (rss_mask & MLX4_RSS_TCP_IPV6) ifrh->ifrh_types |= RSS_TYPE_TCP_IPV6; if (rss_mask & MLX4_RSS_UDP_IPV4) ifrh->ifrh_types |= RSS_TYPE_UDP_IPV4; if (rss_mask & MLX4_RSS_UDP_IPV6) ifrh->ifrh_types |= RSS_TYPE_UDP_IPV6; break; default: error = ether_ioctl(dev, command, data); break; } return (error); } int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof) { struct net_device *dev; struct mlx4_en_priv *priv; uint8_t dev_addr[ETHER_ADDR_LEN]; int err; int i; priv = kzalloc(sizeof(*priv), GFP_KERNEL); dev = priv->dev = if_alloc(IFT_ETHER); if (dev == NULL) { en_err(priv, "Net device allocation failed\n"); kfree(priv); return -ENOMEM; } dev->if_softc = priv; if_initname(dev, "mlxen", (device_get_unit( mdev->pdev->dev.bsddev) * MLX4_MAX_PORTS) + port - 1); dev->if_mtu = ETHERMTU; dev->if_init = mlx4_en_open; dev->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; dev->if_ioctl = mlx4_en_ioctl; dev->if_transmit = mlx4_en_transmit; dev->if_qflush = mlx4_en_qflush; dev->if_snd.ifq_maxlen = prof->tx_ring_size; /* * Initialize driver private data */ priv->counter_index = 0xff; spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); INIT_WORK(&priv->watchdog_task, mlx4_en_restart); INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); callout_init(&priv->watchdog_timer, 1); #ifdef CONFIG_RFS_ACCEL INIT_LIST_HEAD(&priv->filters); spin_lock_init(&priv->filters_lock); #endif priv->msg_enable = MLX4_EN_MSG_LEVEL; priv->dev = dev; priv->mdev = mdev; priv->ddev = &mdev->pdev->dev; priv->prof = prof; priv->port = port; priv->port_up = false; priv->flags = prof->flags; priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; priv->tx_ring_num = prof->tx_ring_num; priv->tx_ring = kcalloc(MAX_TX_RINGS, sizeof(struct mlx4_en_tx_ring *), GFP_KERNEL); if (!priv->tx_ring) { err = -ENOMEM; goto out; } priv->tx_cq = kcalloc(sizeof(struct mlx4_en_cq *), MAX_TX_RINGS, GFP_KERNEL); if (!priv->tx_cq) { err = -ENOMEM; goto out; } priv->rx_ring_num = prof->rx_ring_num; priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; priv->mac_index = -1; priv->last_ifq_jiffies = 0; priv->if_counters_rx_errors = 0; priv->if_counters_rx_no_buffer = 0; #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { priv->dcbx_cap = DCB_CAP_DCBX_HOST; priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { dev->dcbnl_ops = &mlx4_en_dcbnl_ops; } else { en_info(priv, "QoS disabled - no HW support\n"); dev->dcbnl_ops = &mlx4_en_dcbnl_pfc_ops; } } #endif /* Query for default mac and max mtu */ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; priv->mac = mdev->dev->caps.def_mac[priv->port]; if (ILLEGAL_MAC(priv->mac)) { #if BITS_PER_LONG == 64 en_err(priv, "Port: %d, invalid mac burned: 0x%lx, quiting\n", priv->port, priv->mac); #elif BITS_PER_LONG == 32 en_err(priv, "Port: %d, invalid mac burned: 0x%llx, quiting\n", priv->port, priv->mac); #endif err = -EINVAL; goto out; } priv->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + DS_SIZE); mlx4_en_sysctl_conf(priv); err = mlx4_en_alloc_resources(priv); if (err) goto out; /* Allocate page for receive rings */ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); if (err) { en_err(priv, "Failed to allocate page for rx qps\n"); goto out; } priv->allocated = 1; /* * Set driver features */ dev->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6; dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; dev->if_capabilities |= IFCAP_LRO; dev->if_capabilities |= IFCAP_HWSTATS; if (mdev->LSO_support) dev->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTSO; #if __FreeBSD_version >= 1100000 /* set TSO limits so that we don't have to drop TX packets */ dev->if_hw_tsomax = MLX4_EN_TX_MAX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* hdr */; dev->if_hw_tsomaxsegcount = MLX4_EN_TX_MAX_MBUF_FRAGS - 1 /* hdr */; dev->if_hw_tsomaxsegsize = MLX4_EN_TX_MAX_MBUF_SIZE; #endif dev->if_capenable = dev->if_capabilities; dev->if_hwassist = 0; if (dev->if_capenable & (IFCAP_TSO4 | IFCAP_TSO6)) dev->if_hwassist |= CSUM_TSO; if (dev->if_capenable & IFCAP_TXCSUM) dev->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (dev->if_capenable & IFCAP_TXCSUM_IPV6) dev->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, mlx4_en_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, mlx4_en_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); mdev->pndev[priv->port] = dev; priv->last_link_state = MLX4_DEV_EVENT_PORT_DOWN; mlx4_en_set_default_moderation(priv); /* Set default MAC */ for (i = 0; i < ETHER_ADDR_LEN; i++) dev_addr[ETHER_ADDR_LEN - 1 - i] = (u8) (priv->mac >> (8 * i)); ether_ifattach(dev, dev_addr); if_link_state_change(dev, LINK_STATE_DOWN); ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK, mlx4_en_media_change, mlx4_en_media_status); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_1000_T, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_SR, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_CX4, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_40G_CR4, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); NETDUMP_SET(dev, mlx4_en); en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); priv->registered = 1; en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); priv->rx_mb_size = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_mb_size, prof->tx_pause, prof->tx_ppp, prof->rx_pause, prof->rx_ppp); if (err) { en_err(priv, "Failed setting port general configurations " "for port %d, with error %d\n", priv->port, err); goto out; } /* Init port */ en_warn(priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); goto out; } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); return 0; out: mlx4_en_destroy_netdev(dev); return err; } static int mlx4_en_set_ring_size(struct net_device *dev, int rx_size, int tx_size) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int port_up = 0; int err = 0; rx_size = roundup_pow_of_two(rx_size); rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE); rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE); tx_size = roundup_pow_of_two(tx_size); tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size) && tx_size == priv->tx_ring[0]->size) return 0; mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev); } mlx4_en_free_resources(priv); priv->prof->tx_ring_size = tx_size; priv->prof->rx_ring_size = rx_size; err = mlx4_en_alloc_resources(priv); if (err) { en_err(priv, "Failed reallocating port resources\n"); goto out; } if (port_up) { err = mlx4_en_start_port(dev); if (err) en_err(priv, "Failed starting port\n"); } out: mutex_unlock(&mdev->state_lock); return err; } static int mlx4_en_set_rx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->rx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, size, priv->prof->tx_ring_size); return (error); } static int mlx4_en_set_tx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->tx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, priv->prof->rx_ring_size, size); return (error); } static int mlx4_en_get_module_info(struct net_device *dev, struct ethtool_modinfo *modinfo) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int ret; u8 data[4]; /* Read first 2 bytes to get Module & REV ID */ ret = mlx4_get_module_info(mdev->dev, priv->port, 0/*offset*/, 2/*size*/, data); if (ret < 2) { en_err(priv, "Failed to read eeprom module first two bytes, error: 0x%x\n", -ret); return -EIO; } switch (data[0] /* identifier */) { case MLX4_MODULE_ID_QSFP: modinfo->type = ETH_MODULE_SFF_8436; modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; break; case MLX4_MODULE_ID_QSFP_PLUS: if (data[1] >= 0x3) { /* revision id */ modinfo->type = ETH_MODULE_SFF_8636; modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; } else { modinfo->type = ETH_MODULE_SFF_8436; modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; } break; case MLX4_MODULE_ID_QSFP28: modinfo->type = ETH_MODULE_SFF_8636; modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; break; case MLX4_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; break; default: en_err(priv, "mlx4_en_get_module_info : Not recognized cable type\n"); return -EINVAL; } return 0; } static int mlx4_en_get_module_eeprom(struct net_device *dev, struct ethtool_eeprom *ee, u8 *data) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int offset = ee->offset; int i = 0, ret; if (ee->len == 0) return -EINVAL; memset(data, 0, ee->len); while (i < ee->len) { en_dbg(DRV, priv, "mlx4_get_module_info i(%d) offset(%d) len(%d)\n", i, offset, ee->len - i); ret = mlx4_get_module_info(mdev->dev, priv->port, offset, ee->len - i, data + i); if (!ret) /* Done reading */ return 0; if (ret < 0) { en_err(priv, "mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n", i, offset, ee->len - i, ret); return -1; } i += ret; offset += ret; } return 0; } static void mlx4_en_print_eeprom(u8 *data, __u32 len) { int i; int j = 0; int row = 0; const int NUM_OF_BYTES = 16; printf("\nOffset\t\tValues\n"); printf("------\t\t------\n"); while(row < len){ printf("0x%04x\t\t",row); for(i=0; i < NUM_OF_BYTES; i++){ printf("%02x ", data[j]); row++; j++; } printf("\n"); } } /* Read cable EEPROM module information by first inspecting the first * two bytes to get the length and then read the rest of the information. * The information is printed to dmesg. */ static int mlx4_en_read_eeprom(SYSCTL_HANDLER_ARGS) { u8* data; int error; int result = 0; struct mlx4_en_priv *priv; struct net_device *dev; struct ethtool_modinfo modinfo; struct ethtool_eeprom ee; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { priv = arg1; dev = priv->dev; data = kmalloc(PAGE_SIZE, GFP_KERNEL); error = mlx4_en_get_module_info(dev, &modinfo); if (error) { en_err(priv, "mlx4_en_get_module_info returned with error - FAILED (0x%x)\n", -error); goto out; } ee.len = modinfo.eeprom_len; ee.offset = 0; error = mlx4_en_get_module_eeprom(dev, &ee, data); if (error) { en_err(priv, "mlx4_en_get_module_eeprom returned with error - FAILED (0x%x)\n", -error); /* Continue printing partial information in case of an error */ } /* EEPROM information will be printed in dmesg */ mlx4_en_print_eeprom(data, ee.len); out: kfree(data); } /* Return zero to prevent sysctl failure. */ return (0); } static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int ppp; int error; priv = arg1; ppp = priv->prof->tx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); priv->prof->tx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; int ppp; int error; int port_up; port_up = 0; priv = arg1; mdev = priv->mdev; ppp = priv->prof->rx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); /* See if we have to change the number of tx queues. */ if (!ppp != !priv->prof->rx_ppp) { mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(priv->dev); } mlx4_en_free_resources(priv); priv->prof->rx_ppp = ppp; error = -mlx4_en_alloc_resources(priv); if (error) en_err(priv, "Failed reallocating port resources\n"); if (error == 0 && port_up) { error = -mlx4_en_start_port(priv->dev); if (error) en_err(priv, "Failed starting port\n"); } mutex_unlock(&mdev->state_lock); return (error); } priv->prof->rx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv) { struct net_device *dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct sysctl_oid_list *node_list; struct sysctl_oid *coal; struct sysctl_oid_list *coal_list; const char *pnameunit; dev = priv->dev; ctx = &priv->conf_ctx; pnameunit = device_get_nameunit(priv->mdev->pdev->dev.bsddev); sysctl_ctx_init(ctx); priv->conf_sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, dev->if_xname, CTLFLAG_RD, 0, "mlx4 10gig ethernet"); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->conf_sysctl), OID_AUTO, "conf", CTLFLAG_RD, NULL, "Configuration"); node_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "msg_enable", CTLFLAG_RW, &priv->msg_enable, 0, "Driver message enable bitfield"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_rings", CTLFLAG_RD, &priv->rx_ring_num, 0, "Number of receive rings"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_rings", CTLFLAG_RD, &priv->tx_ring_num, 0, "Number of transmit rings"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ring_size, "I", "Receive ring size"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ring_size, "I", "Transmit ring size"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ppp, "I", "TX Per-priority pause"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ppp, "I", "RX Per-priority pause"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "port_num", CTLFLAG_RD, &priv->port, 0, "Port Number"); SYSCTL_ADD_STRING(ctx, node_list, OID_AUTO, "device_name", CTLFLAG_RD, __DECONST(void *, pnameunit), 0, "PCI device name"); /* Add coalescer configuration. */ coal = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, "coalesce", CTLFLAG_RD, NULL, "Interrupt coalesce configuration"); coal_list = SYSCTL_CHILDREN(coal); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_low", CTLFLAG_RW, &priv->pkt_rate_low, 0, "Packets per-second for minimum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_low", CTLFLAG_RW, &priv->rx_usecs_low, 0, "Minimum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_high", CTLFLAG_RW, &priv->pkt_rate_high, 0, "Packets per-second for maximum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_high", CTLFLAG_RW, &priv->rx_usecs_high, 0, "Maximum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "sample_interval", CTLFLAG_RW, &priv->sample_interval, 0, "adaptive frequency in units of HZ ticks"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "adaptive_rx_coal", CTLFLAG_RW, &priv->adaptive_rx_coal, 0, "Enable adaptive rx coalescing"); /* EEPROM support */ SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "eeprom_info", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_read_eeprom, "I", "EEPROM information"); } static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *node_list; struct sysctl_oid *ring_node; struct sysctl_oid_list *ring_list; struct mlx4_en_tx_ring *tx_ring; struct mlx4_en_rx_ring *rx_ring; char namebuf[128]; int i; ctx = &priv->stat_ctx; sysctl_ctx_init(ctx); priv->stat_sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->conf_sysctl), OID_AUTO, "stat", CTLFLAG_RD, NULL, "Statistics"); node_list = SYSCTL_CHILDREN(priv->stat_sysctl); #ifdef MLX4_EN_PERF_STAT SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_poll", CTLFLAG_RD, &priv->pstats.tx_poll, "TX Poll calls"); SYSCTL_ADD_QUAD(ctx, node_list, OID_AUTO, "tx_pktsz_avg", CTLFLAG_RD, &priv->pstats.tx_pktsz_avg, "TX average packet size"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "inflight_avg", CTLFLAG_RD, &priv->pstats.inflight_avg, "TX average packets in-flight"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_coal_avg", CTLFLAG_RD, &priv->pstats.tx_coal_avg, "TX average coalesced completions"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_coal_avg", CTLFLAG_RD, &priv->pstats.rx_coal_avg, "RX average coalesced completions"); #endif SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tso_packets", CTLFLAG_RD, &priv->port_stats.tso_packets, 0, "TSO packets sent"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "queue_stopped", CTLFLAG_RD, &priv->port_stats.queue_stopped, 0, "Queue full"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "wake_queue", CTLFLAG_RD, &priv->port_stats.wake_queue, 0, "Queue resumed after full"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_timeout", CTLFLAG_RD, &priv->port_stats.tx_timeout, 0, "Transmit timeouts"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_oversized_packets", CTLFLAG_RD, &priv->port_stats.oversized_packets, 0, "TX oversized packets, m_defrag failed"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_alloc_failed", CTLFLAG_RD, &priv->port_stats.rx_alloc_failed, 0, "RX failed to allocate mbuf"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_chksum_good", CTLFLAG_RD, &priv->port_stats.rx_chksum_good, 0, "RX checksum offload success"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_chksum_none", CTLFLAG_RD, &priv->port_stats.rx_chksum_none, 0, "RX without checksum offload"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_chksum_offload", CTLFLAG_RD, &priv->port_stats.tx_chksum_offload, 0, "TX checksum offloads"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "defrag_attempts", CTLFLAG_RD, &priv->port_stats.defrag_attempts, 0, "Oversized chains defragged"); /* Could strdup the names and add in a loop. This is simpler. */ SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &priv->pkstats.rx_bytes, 0, "RX Bytes"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &priv->pkstats.rx_packets, 0, "RX packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_multicast_packets", CTLFLAG_RD, &priv->pkstats.rx_multicast_packets, 0, "RX Multicast Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_broadcast_packets", CTLFLAG_RD, &priv->pkstats.rx_broadcast_packets, 0, "RX Broadcast Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_errors", CTLFLAG_RD, &priv->pkstats.rx_errors, 0, "RX Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_dropped", CTLFLAG_RD, &priv->pkstats.rx_dropped, 0, "RX Dropped"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_length_errors", CTLFLAG_RD, &priv->pkstats.rx_length_errors, 0, "RX Length Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_over_errors", CTLFLAG_RD, &priv->pkstats.rx_over_errors, 0, "RX Over Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_crc_errors", CTLFLAG_RD, &priv->pkstats.rx_crc_errors, 0, "RX CRC Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_jabbers", CTLFLAG_RD, &priv->pkstats.rx_jabbers, 0, "RX Jabbers"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_in_range_length_error", CTLFLAG_RD, &priv->pkstats.rx_in_range_length_error, 0, "RX IN_Range Length Error"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_out_range_length_error", CTLFLAG_RD, &priv->pkstats.rx_out_range_length_error, 0, "RX Out Range Length Error"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_lt_64_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_lt_64_bytes_packets, 0, "RX Lt 64 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_127_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_127_bytes_packets, 0, "RX 127 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_255_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_255_bytes_packets, 0, "RX 255 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_511_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_511_bytes_packets, 0, "RX 511 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_1023_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1023_bytes_packets, 0, "RX 1023 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_1518_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1518_bytes_packets, 0, "RX 1518 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_1522_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1522_bytes_packets, 0, "RX 1522 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1548_bytes_packets, 0, "RX 1548 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_gt_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_gt_1548_bytes_packets, 0, "RX Greater Then 1548 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &priv->pkstats.tx_packets, 0, "TX packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &priv->pkstats.tx_bytes, 0, "TX Bytes"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_multicast_packets", CTLFLAG_RD, &priv->pkstats.tx_multicast_packets, 0, "TX Multicast Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_broadcast_packets", CTLFLAG_RD, &priv->pkstats.tx_broadcast_packets, 0, "TX Broadcast Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_errors", CTLFLAG_RD, &priv->pkstats.tx_errors, 0, "TX Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_dropped", CTLFLAG_RD, &priv->pkstats.tx_dropped, 0, "TX Dropped"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_lt_64_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_lt_64_bytes_packets, 0, "TX Less Then 64 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_127_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_127_bytes_packets, 0, "TX 127 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_255_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_255_bytes_packets, 0, "TX 255 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_511_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_511_bytes_packets, 0, "TX 511 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_1023_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1023_bytes_packets, 0, "TX 1023 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_1518_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1518_bytes_packets, 0, "TX 1518 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_1522_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1522_bytes_packets, 0, "TX 1522 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1548_bytes_packets, 0, "TX 1548 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_gt_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_gt_1548_bytes_packets, 0, "TX Greater Then 1548 Bytes Packets"); for (i = 0; i < priv->tx_ring_num; i++) { tx_ring = priv->tx_ring[i]; snprintf(namebuf, sizeof(namebuf), "tx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &tx_ring->packets, 0, "TX packets"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &tx_ring->bytes, 0, "TX bytes"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "tso_packets", CTLFLAG_RD, &tx_ring->tso_packets, 0, "TSO packets"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "defrag_attempts", CTLFLAG_RD, &tx_ring->defrag_attempts, 0, "Oversized chains defragged"); } for (i = 0; i < priv->rx_ring_num; i++) { rx_ring = priv->rx_ring[i]; snprintf(namebuf, sizeof(namebuf), "rx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &rx_ring->packets, 0, "RX packets"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &rx_ring->bytes, 0, "RX bytes"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "error", CTLFLAG_RD, &rx_ring->errors, 0, "RX soft errors"); } } #ifdef NETDUMP static void -mlx4_en_netdump_init(struct ifnet *dev, int *nrxr, int *clsize) +mlx4_en_netdump_init(struct ifnet *dev, int *nrxr, int *ncl, int *clsize) { struct mlx4_en_priv *priv; priv = if_getsoftc(dev); mutex_lock(&priv->mdev->state_lock); *nrxr = priv->rx_ring_num; + *ncl = NETDUMP_MAX_IN_FLIGHT; *clsize = priv->rx_mb_size; mutex_unlock(&priv->mdev->state_lock); } static void mlx4_en_netdump_event(struct ifnet *dev, enum netdump_ev event) { } static int mlx4_en_netdump_transmit(struct ifnet *dev, struct mbuf *m) { struct mlx4_en_priv *priv; int err; priv = if_getsoftc(dev); if ((if_getdrvflags(dev) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || !priv->link_state) return (ENOENT); err = mlx4_en_xmit(priv, 0, &m); if (err != 0 && m != NULL) m_freem(m); return (err); } static int mlx4_en_netdump_poll(struct ifnet *dev, int count) { struct mlx4_en_priv *priv; int i; priv = if_getsoftc(dev); if ((if_getdrvflags(dev) & IFF_DRV_RUNNING) == 0 || !priv->link_state) return (ENOENT); (void)mlx4_en_process_tx_cq(dev, priv->tx_cq[0]); for (i = 0; i < priv->rx_ring_num; i++) (void)mlx4_en_process_rx_cq(dev, priv->rx_cq[i], 0); return (0); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/dev/re/if_re.c =================================================================== --- user/markj/netdump/sys/dev/re/if_re.c (revision 332406) +++ user/markj/netdump/sys/dev/re/if_re.c (revision 332407) @@ -1,4149 +1,4150 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1997, 1998-2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver * * Written by Bill Paul * Senior Networking Software Engineer * Wind River Systems */ /* * This driver is designed to support RealTek's next generation of * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S, * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E. * * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible * with the older 8139 family, however it also supports a special * C+ mode of operation that provides several new performance enhancing * features. These include: * * o Descriptor based DMA mechanism. Each descriptor represents * a single packet fragment. Data buffers may be aligned on * any byte boundary. * * o 64-bit DMA * * o TCP/IP checksum offload for both RX and TX * * o High and normal priority transmit DMA rings * * o VLAN tag insertion and extraction * * o TCP large send (segmentation offload) * * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+ * programming API is fairly straightforward. The RX filtering, EEPROM * access and PHY access is the same as it is on the older 8139 series * chips. * * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the * same programming API and feature set as the 8139C+ with the following * differences and additions: * * o 1000Mbps mode * * o Jumbo frames * * o GMII and TBI ports/registers for interfacing with copper * or fiber PHYs * * o RX and TX DMA rings can have up to 1024 descriptors * (the 8139C+ allows a maximum of 64) * * o Slight differences in register layout from the 8139C+ * * The TX start and timer interrupt registers are at different locations * on the 8169 than they are on the 8139C+. Also, the status word in the * RX descriptor has a slightly different bit layout. The 8169 does not * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska' * copper gigE PHY. * * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs * (the 'S' stands for 'single-chip'). These devices have the same * programming API as the older 8169, but also have some vendor-specific * registers for the on-board PHY. The 8110S is a LAN-on-motherboard * part designed to be pin-compatible with the RealTek 8100 10/100 chip. * * This driver takes advantage of the RX and TX checksum offload and * VLAN tag insertion/extraction features. It also implements TX * interrupt moderation using the timer interrupt registers, which * significantly reduces TX interrupt load. There is also support * for jumbo frames, however the 8169/8169S/8110S can not transmit * jumbo frames larger than 7440, so the max MTU possible with this * driver is 7422 bytes. */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_DEPEND(re, pci, 1, 1, 1); MODULE_DEPEND(re, ether, 1, 1, 1); MODULE_DEPEND(re, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" /* Tunables. */ static int intr_filter = 0; TUNABLE_INT("hw.re.intr_filter", &intr_filter); static int msi_disable = 0; TUNABLE_INT("hw.re.msi_disable", &msi_disable); static int msix_disable = 0; TUNABLE_INT("hw.re.msix_disable", &msix_disable); static int prefer_iomap = 0; TUNABLE_INT("hw.re.prefer_iomap", &prefer_iomap); #define RE_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP) /* * Various supported device vendors/types and their names. */ static const struct rl_type re_devs[] = { { DLINK_VENDORID, DLINK_DEVICEID_528T, 0, "D-Link DGE-528(T) Gigabit Ethernet Adapter" }, { DLINK_VENDORID, DLINK_DEVICEID_530T_REVC, 0, "D-Link DGE-530(T) Gigabit Ethernet Adapter" }, { RT_VENDORID, RT_DEVICEID_8139, 0, "RealTek 8139C+ 10/100BaseTX" }, { RT_VENDORID, RT_DEVICEID_8101E, 0, "RealTek 810xE PCIe 10/100baseTX" }, { RT_VENDORID, RT_DEVICEID_8168, 0, "RealTek 8168/8111 B/C/CP/D/DP/E/F/G PCIe Gigabit Ethernet" }, { NCUBE_VENDORID, RT_DEVICEID_8168, 0, "TP-Link TG-3468 v2 (RTL8168) Gigabit Ethernet" }, { RT_VENDORID, RT_DEVICEID_8169, 0, "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" }, { RT_VENDORID, RT_DEVICEID_8169SC, 0, "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" }, { COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0, "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" }, { LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0, "Linksys EG1032 (RTL8169S) Gigabit Ethernet" }, { USR_VENDORID, USR_DEVICEID_997902, 0, "US Robotics 997902 (RTL8169S) Gigabit Ethernet" } }; static const struct rl_hwrev re_hwrevs[] = { { RL_HWREV_8139, RL_8139, "", RL_MTU }, { RL_HWREV_8139A, RL_8139, "A", RL_MTU }, { RL_HWREV_8139AG, RL_8139, "A-G", RL_MTU }, { RL_HWREV_8139B, RL_8139, "B", RL_MTU }, { RL_HWREV_8130, RL_8139, "8130", RL_MTU }, { RL_HWREV_8139C, RL_8139, "C", RL_MTU }, { RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C", RL_MTU }, { RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+", RL_MTU }, { RL_HWREV_8168B_SPIN1, RL_8169, "8168", RL_JUMBO_MTU }, { RL_HWREV_8169, RL_8169, "8169", RL_JUMBO_MTU }, { RL_HWREV_8169S, RL_8169, "8169S", RL_JUMBO_MTU }, { RL_HWREV_8110S, RL_8169, "8110S", RL_JUMBO_MTU }, { RL_HWREV_8169_8110SB, RL_8169, "8169SB/8110SB", RL_JUMBO_MTU }, { RL_HWREV_8169_8110SC, RL_8169, "8169SC/8110SC", RL_JUMBO_MTU }, { RL_HWREV_8169_8110SBL, RL_8169, "8169SBL/8110SBL", RL_JUMBO_MTU }, { RL_HWREV_8169_8110SCE, RL_8169, "8169SC/8110SC", RL_JUMBO_MTU }, { RL_HWREV_8100, RL_8139, "8100", RL_MTU }, { RL_HWREV_8101, RL_8139, "8101", RL_MTU }, { RL_HWREV_8100E, RL_8169, "8100E", RL_MTU }, { RL_HWREV_8101E, RL_8169, "8101E", RL_MTU }, { RL_HWREV_8102E, RL_8169, "8102E", RL_MTU }, { RL_HWREV_8102EL, RL_8169, "8102EL", RL_MTU }, { RL_HWREV_8102EL_SPIN1, RL_8169, "8102EL", RL_MTU }, { RL_HWREV_8103E, RL_8169, "8103E", RL_MTU }, { RL_HWREV_8401E, RL_8169, "8401E", RL_MTU }, { RL_HWREV_8402, RL_8169, "8402", RL_MTU }, { RL_HWREV_8105E, RL_8169, "8105E", RL_MTU }, { RL_HWREV_8105E_SPIN1, RL_8169, "8105E", RL_MTU }, { RL_HWREV_8106E, RL_8169, "8106E", RL_MTU }, { RL_HWREV_8168B_SPIN2, RL_8169, "8168", RL_JUMBO_MTU }, { RL_HWREV_8168B_SPIN3, RL_8169, "8168", RL_JUMBO_MTU }, { RL_HWREV_8168C, RL_8169, "8168C/8111C", RL_JUMBO_MTU_6K }, { RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C", RL_JUMBO_MTU_6K }, { RL_HWREV_8168CP, RL_8169, "8168CP/8111CP", RL_JUMBO_MTU_6K }, { RL_HWREV_8168D, RL_8169, "8168D/8111D", RL_JUMBO_MTU_9K }, { RL_HWREV_8168DP, RL_8169, "8168DP/8111DP", RL_JUMBO_MTU_9K }, { RL_HWREV_8168E, RL_8169, "8168E/8111E", RL_JUMBO_MTU_9K}, { RL_HWREV_8168E_VL, RL_8169, "8168E/8111E-VL", RL_JUMBO_MTU_6K}, { RL_HWREV_8168EP, RL_8169, "8168EP/8111EP", RL_JUMBO_MTU_9K}, { RL_HWREV_8168F, RL_8169, "8168F/8111F", RL_JUMBO_MTU_9K}, { RL_HWREV_8168G, RL_8169, "8168G/8111G", RL_JUMBO_MTU_9K}, { RL_HWREV_8168GU, RL_8169, "8168GU/8111GU", RL_JUMBO_MTU_9K}, { RL_HWREV_8168H, RL_8169, "8168H/8111H", RL_JUMBO_MTU_9K}, { RL_HWREV_8411, RL_8169, "8411", RL_JUMBO_MTU_9K}, { RL_HWREV_8411B, RL_8169, "8411B", RL_JUMBO_MTU_9K}, { 0, 0, NULL, 0 } }; static int re_probe (device_t); static int re_attach (device_t); static int re_detach (device_t); static int re_encap (struct rl_softc *, struct mbuf **); static void re_dma_map_addr (void *, bus_dma_segment_t *, int, int); static int re_allocmem (device_t, struct rl_softc *); static __inline void re_discard_rxbuf (struct rl_softc *, int); static int re_newbuf (struct rl_softc *, int); static int re_jumbo_newbuf (struct rl_softc *, int); static int re_rx_list_init (struct rl_softc *); static int re_jrx_list_init (struct rl_softc *); static int re_tx_list_init (struct rl_softc *); #ifdef RE_FIXUP_RX static __inline void re_fixup_rx (struct mbuf *); #endif static int re_rxeof (struct rl_softc *, int *); static void re_txeof (struct rl_softc *); #ifdef DEVICE_POLLING static int re_poll (struct ifnet *, enum poll_cmd, int); static int re_poll_locked (struct ifnet *, enum poll_cmd, int); #endif static int re_intr (void *); static void re_intr_msi (void *); static void re_tick (void *); static void re_int_task (void *, int); static void re_start (struct ifnet *); static void re_start_locked (struct ifnet *); static void re_start_tx (struct rl_softc *); static int re_ioctl (struct ifnet *, u_long, caddr_t); static void re_init (void *); static void re_init_locked (struct rl_softc *); static void re_stop (struct rl_softc *); static void re_watchdog (struct rl_softc *); static int re_suspend (device_t); static int re_resume (device_t); static int re_shutdown (device_t); static int re_ifmedia_upd (struct ifnet *); static void re_ifmedia_sts (struct ifnet *, struct ifmediareq *); static void re_eeprom_putbyte (struct rl_softc *, int); static void re_eeprom_getword (struct rl_softc *, int, u_int16_t *); static void re_read_eeprom (struct rl_softc *, caddr_t, int, int); static int re_gmii_readreg (device_t, int, int); static int re_gmii_writereg (device_t, int, int, int); static int re_miibus_readreg (device_t, int, int); static int re_miibus_writereg (device_t, int, int, int); static void re_miibus_statchg (device_t); static void re_set_jumbo (struct rl_softc *, int); static void re_set_rxmode (struct rl_softc *); static void re_reset (struct rl_softc *); static void re_setwol (struct rl_softc *); static void re_clrwol (struct rl_softc *); static void re_set_linkspeed (struct rl_softc *); NETDUMP_DEFINE(re); #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include MODULE_DEPEND(re, netmap, 1, 1, 1); #endif /* !DEV_NETMAP */ #ifdef RE_DIAG static int re_diag (struct rl_softc *); #endif static void re_add_sysctls (struct rl_softc *); static int re_sysctl_stats (SYSCTL_HANDLER_ARGS); static int sysctl_int_range (SYSCTL_HANDLER_ARGS, int, int); static int sysctl_hw_re_int_mod (SYSCTL_HANDLER_ARGS); static device_method_t re_methods[] = { /* Device interface */ DEVMETHOD(device_probe, re_probe), DEVMETHOD(device_attach, re_attach), DEVMETHOD(device_detach, re_detach), DEVMETHOD(device_suspend, re_suspend), DEVMETHOD(device_resume, re_resume), DEVMETHOD(device_shutdown, re_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, re_miibus_readreg), DEVMETHOD(miibus_writereg, re_miibus_writereg), DEVMETHOD(miibus_statchg, re_miibus_statchg), DEVMETHOD_END }; static driver_t re_driver = { "re", re_methods, sizeof(struct rl_softc) }; static devclass_t re_devclass; DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0); DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0); #define EE_SET(x) \ CSR_WRITE_1(sc, RL_EECMD, \ CSR_READ_1(sc, RL_EECMD) | x) #define EE_CLR(x) \ CSR_WRITE_1(sc, RL_EECMD, \ CSR_READ_1(sc, RL_EECMD) & ~x) /* * Send a read command and address to the EEPROM, check for ACK. */ static void re_eeprom_putbyte(struct rl_softc *sc, int addr) { int d, i; d = addr | (RL_9346_READ << sc->rl_eewidth); /* * Feed in each bit and strobe the clock. */ for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) { if (d & i) { EE_SET(RL_EE_DATAIN); } else { EE_CLR(RL_EE_DATAIN); } DELAY(100); EE_SET(RL_EE_CLK); DELAY(150); EE_CLR(RL_EE_CLK); DELAY(100); } } /* * Read a word of data stored in the EEPROM at address 'addr.' */ static void re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest) { int i; u_int16_t word = 0; /* * Send address of word we want to read. */ re_eeprom_putbyte(sc, addr); /* * Start reading bits from EEPROM. */ for (i = 0x8000; i; i >>= 1) { EE_SET(RL_EE_CLK); DELAY(100); if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT) word |= i; EE_CLR(RL_EE_CLK); DELAY(100); } *dest = word; } /* * Read a sequence of words from the EEPROM. */ static void re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt) { int i; u_int16_t word = 0, *ptr; CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM); DELAY(100); for (i = 0; i < cnt; i++) { CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL); re_eeprom_getword(sc, off + i, &word); CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL); ptr = (u_int16_t *)(dest + (i * 2)); *ptr = word; } CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM); } static int re_gmii_readreg(device_t dev, int phy, int reg) { struct rl_softc *sc; u_int32_t rval; int i; sc = device_get_softc(dev); /* Let the rgephy driver read the GMEDIASTAT register */ if (reg == RL_GMEDIASTAT) { rval = CSR_READ_1(sc, RL_GMEDIASTAT); return (rval); } CSR_WRITE_4(sc, RL_PHYAR, reg << 16); for (i = 0; i < RL_PHY_TIMEOUT; i++) { rval = CSR_READ_4(sc, RL_PHYAR); if (rval & RL_PHYAR_BUSY) break; DELAY(25); } if (i == RL_PHY_TIMEOUT) { device_printf(sc->rl_dev, "PHY read failed\n"); return (0); } /* * Controller requires a 20us delay to process next MDIO request. */ DELAY(20); return (rval & RL_PHYAR_PHYDATA); } static int re_gmii_writereg(device_t dev, int phy, int reg, int data) { struct rl_softc *sc; u_int32_t rval; int i; sc = device_get_softc(dev); CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) | (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY); for (i = 0; i < RL_PHY_TIMEOUT; i++) { rval = CSR_READ_4(sc, RL_PHYAR); if (!(rval & RL_PHYAR_BUSY)) break; DELAY(25); } if (i == RL_PHY_TIMEOUT) { device_printf(sc->rl_dev, "PHY write failed\n"); return (0); } /* * Controller requires a 20us delay to process next MDIO request. */ DELAY(20); return (0); } static int re_miibus_readreg(device_t dev, int phy, int reg) { struct rl_softc *sc; u_int16_t rval = 0; u_int16_t re8139_reg = 0; sc = device_get_softc(dev); if (sc->rl_type == RL_8169) { rval = re_gmii_readreg(dev, phy, reg); return (rval); } switch (reg) { case MII_BMCR: re8139_reg = RL_BMCR; break; case MII_BMSR: re8139_reg = RL_BMSR; break; case MII_ANAR: re8139_reg = RL_ANAR; break; case MII_ANER: re8139_reg = RL_ANER; break; case MII_ANLPAR: re8139_reg = RL_LPAR; break; case MII_PHYIDR1: case MII_PHYIDR2: return (0); /* * Allow the rlphy driver to read the media status * register. If we have a link partner which does not * support NWAY, this is the register which will tell * us the results of parallel detection. */ case RL_MEDIASTAT: rval = CSR_READ_1(sc, RL_MEDIASTAT); return (rval); default: device_printf(sc->rl_dev, "bad phy register\n"); return (0); } rval = CSR_READ_2(sc, re8139_reg); if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) { /* 8139C+ has different bit layout. */ rval &= ~(BMCR_LOOP | BMCR_ISO); } return (rval); } static int re_miibus_writereg(device_t dev, int phy, int reg, int data) { struct rl_softc *sc; u_int16_t re8139_reg = 0; int rval = 0; sc = device_get_softc(dev); if (sc->rl_type == RL_8169) { rval = re_gmii_writereg(dev, phy, reg, data); return (rval); } switch (reg) { case MII_BMCR: re8139_reg = RL_BMCR; if (sc->rl_type == RL_8139CPLUS) { /* 8139C+ has different bit layout. */ data &= ~(BMCR_LOOP | BMCR_ISO); } break; case MII_BMSR: re8139_reg = RL_BMSR; break; case MII_ANAR: re8139_reg = RL_ANAR; break; case MII_ANER: re8139_reg = RL_ANER; break; case MII_ANLPAR: re8139_reg = RL_LPAR; break; case MII_PHYIDR1: case MII_PHYIDR2: return (0); break; default: device_printf(sc->rl_dev, "bad phy register\n"); return (0); } CSR_WRITE_2(sc, re8139_reg, data); return (0); } static void re_miibus_statchg(device_t dev) { struct rl_softc *sc; struct ifnet *ifp; struct mii_data *mii; sc = device_get_softc(dev); mii = device_get_softc(sc->rl_miibus); ifp = sc->rl_ifp; if (mii == NULL || ifp == NULL || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; sc->rl_flags &= ~RL_FLAG_LINK; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: sc->rl_flags |= RL_FLAG_LINK; break; case IFM_1000_T: if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0) break; sc->rl_flags |= RL_FLAG_LINK; break; default: break; } } /* * RealTek controllers do not provide any interface to the RX/TX * MACs for resolved speed, duplex and flow-control parameters. */ } /* * Set the RX configuration and 64-bit multicast hash filter. */ static void re_set_rxmode(struct rl_softc *sc) { struct ifnet *ifp; struct ifmultiaddr *ifma; uint32_t hashes[2] = { 0, 0 }; uint32_t h, rxfilt; RL_LOCK_ASSERT(sc); ifp = sc->rl_ifp; rxfilt = RL_RXCFG_CONFIG | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_BROAD; if ((sc->rl_flags & RL_FLAG_EARLYOFF) != 0) rxfilt |= RL_RXCFG_EARLYOFF; else if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) rxfilt |= RL_RXCFG_EARLYOFFV2; if (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) { if (ifp->if_flags & IFF_PROMISC) rxfilt |= RL_RXCFG_RX_ALLPHYS; /* * Unlike other hardwares, we have to explicitly set * RL_RXCFG_RX_MULTI to receive multicast frames in * promiscuous mode. */ rxfilt |= RL_RXCFG_RX_MULTI; hashes[0] = hashes[1] = 0xffffffff; goto done; } if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; h = ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) >> 26; if (h < 32) hashes[0] |= (1 << h); else hashes[1] |= (1 << (h - 32)); } if_maddr_runlock(ifp); if (hashes[0] != 0 || hashes[1] != 0) { /* * For some unfathomable reason, RealTek decided to * reverse the order of the multicast hash registers * in the PCI Express parts. This means we have to * write the hash pattern in reverse order for those * devices. */ if ((sc->rl_flags & RL_FLAG_PCIE) != 0) { h = bswap32(hashes[0]); hashes[0] = bswap32(hashes[1]); hashes[1] = h; } rxfilt |= RL_RXCFG_RX_MULTI; } if (sc->rl_hwrev->rl_rev == RL_HWREV_8168F) { /* Disable multicast filtering due to silicon bug. */ hashes[0] = 0xffffffff; hashes[1] = 0xffffffff; } done: CSR_WRITE_4(sc, RL_MAR0, hashes[0]); CSR_WRITE_4(sc, RL_MAR4, hashes[1]); CSR_WRITE_4(sc, RL_RXCFG, rxfilt); } static void re_reset(struct rl_softc *sc) { int i; RL_LOCK_ASSERT(sc); CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET); for (i = 0; i < RL_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET)) break; } if (i == RL_TIMEOUT) device_printf(sc->rl_dev, "reset never completed!\n"); if ((sc->rl_flags & RL_FLAG_MACRESET) != 0) CSR_WRITE_1(sc, 0x82, 1); if (sc->rl_hwrev->rl_rev == RL_HWREV_8169S) re_gmii_writereg(sc->rl_dev, 1, 0x0b, 0); } #ifdef RE_DIAG /* * The following routine is designed to test for a defect on some * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64# * lines connected to the bus, however for a 32-bit only card, they * should be pulled high. The result of this defect is that the * NIC will not work right if you plug it into a 64-bit slot: DMA * operations will be done with 64-bit transfers, which will fail * because the 64-bit data lines aren't connected. * * There's no way to work around this (short of talking a soldering * iron to the board), however we can detect it. The method we use * here is to put the NIC into digital loopback mode, set the receiver * to promiscuous mode, and then try to send a frame. We then compare * the frame data we sent to what was received. If the data matches, * then the NIC is working correctly, otherwise we know the user has * a defective NIC which has been mistakenly plugged into a 64-bit PCI * slot. In the latter case, there's no way the NIC can work correctly, * so we print out a message on the console and abort the device attach. */ static int re_diag(struct rl_softc *sc) { struct ifnet *ifp = sc->rl_ifp; struct mbuf *m0; struct ether_header *eh; struct rl_desc *cur_rx; u_int16_t status; u_int32_t rxstat; int total_len, i, error = 0, phyaddr; u_int8_t dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' }; u_int8_t src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' }; /* Allocate a single mbuf */ MGETHDR(m0, M_NOWAIT, MT_DATA); if (m0 == NULL) return (ENOBUFS); RL_LOCK(sc); /* * Initialize the NIC in test mode. This sets the chip up * so that it can send and receive frames, but performs the * following special functions: * - Puts receiver in promiscuous mode * - Enables digital loopback mode * - Leaves interrupts turned off */ ifp->if_flags |= IFF_PROMISC; sc->rl_testmode = 1; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); sc->rl_flags |= RL_FLAG_LINK; if (sc->rl_type == RL_8169) phyaddr = 1; else phyaddr = 0; re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET); for (i = 0; i < RL_TIMEOUT; i++) { status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR); if (!(status & BMCR_RESET)) break; } re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP); CSR_WRITE_2(sc, RL_ISR, RL_INTRS); DELAY(100000); /* Put some data in the mbuf */ eh = mtod(m0, struct ether_header *); bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN); bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN); eh->ether_type = htons(ETHERTYPE_IP); m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN; /* * Queue the packet, start transmission. * Note: IF_HANDOFF() ultimately calls re_start() for us. */ CSR_WRITE_2(sc, RL_ISR, 0xFFFF); RL_UNLOCK(sc); /* XXX: re_diag must not be called when in ALTQ mode */ IF_HANDOFF(&ifp->if_snd, m0, ifp); RL_LOCK(sc); m0 = NULL; /* Wait for it to propagate through the chip */ DELAY(100000); for (i = 0; i < RL_TIMEOUT; i++) { status = CSR_READ_2(sc, RL_ISR); CSR_WRITE_2(sc, RL_ISR, status); if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) == (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) break; DELAY(10); } if (i == RL_TIMEOUT) { device_printf(sc->rl_dev, "diagnostic failed, failed to receive packet in" " loopback mode\n"); error = EIO; goto done; } /* * The packet should have been dumped into the first * entry in the RX DMA ring. Grab it from there. */ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_POSTREAD); bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_desc[0].rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_desc[0].rx_dmamap); m0 = sc->rl_ldata.rl_rx_desc[0].rx_m; sc->rl_ldata.rl_rx_desc[0].rx_m = NULL; eh = mtod(m0, struct ether_header *); cur_rx = &sc->rl_ldata.rl_rx_list[0]; total_len = RL_RXBYTES(cur_rx); rxstat = le32toh(cur_rx->rl_cmdstat); if (total_len != ETHER_MIN_LEN) { device_printf(sc->rl_dev, "diagnostic failed, received short packet\n"); error = EIO; goto done; } /* Test that the received packet data matches what we sent. */ if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) || bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) || ntohs(eh->ether_type) != ETHERTYPE_IP) { device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n"); device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n", dst, ":", src, ":", ETHERTYPE_IP); device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n", eh->ether_dhost, ":", eh->ether_shost, ":", ntohs(eh->ether_type)); device_printf(sc->rl_dev, "You may have a defective 32-bit " "NIC plugged into a 64-bit PCI slot.\n"); device_printf(sc->rl_dev, "Please re-install the NIC in a " "32-bit slot for proper operation.\n"); device_printf(sc->rl_dev, "Read the re(4) man page for more " "details.\n"); error = EIO; } done: /* Turn interface off, release resources */ sc->rl_testmode = 0; sc->rl_flags &= ~RL_FLAG_LINK; ifp->if_flags &= ~IFF_PROMISC; re_stop(sc); if (m0 != NULL) m_freem(m0); RL_UNLOCK(sc); return (error); } #endif /* * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device * IDs against our list and return a device name if we find a match. */ static int re_probe(device_t dev) { const struct rl_type *t; uint16_t devid, vendor; uint16_t revid, sdevid; int i; vendor = pci_get_vendor(dev); devid = pci_get_device(dev); revid = pci_get_revid(dev); sdevid = pci_get_subdevice(dev); if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) { if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) { /* * Only attach to rev. 3 of the Linksys EG1032 adapter. * Rev. 2 is supported by sk(4). */ return (ENXIO); } } if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) { if (revid != 0x20) { /* 8139, let rl(4) take care of this device. */ return (ENXIO); } } t = re_devs; for (i = 0; i < nitems(re_devs); i++, t++) { if (vendor == t->rl_vid && devid == t->rl_did) { device_set_desc(dev, t->rl_name); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } /* * Map a single buffer address. */ static void re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *addr; if (error) return; KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg)); addr = arg; *addr = segs->ds_addr; } static int re_allocmem(device_t dev, struct rl_softc *sc) { bus_addr_t lowaddr; bus_size_t rx_list_size, tx_list_size; int error; int i; rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc); tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc); /* * Allocate the parent bus DMA tag appropriate for PCI. * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD * register should be set. However some RealTek chips are known * to be buggy on DAC handling, therefore disable DAC by limiting * DMA address space to 32bit. PCIe variants of RealTek chips * may not have the limitation. */ lowaddr = BUS_SPACE_MAXADDR; if ((sc->rl_flags & RL_FLAG_PCIE) == 0) lowaddr = BUS_SPACE_MAXADDR_32BIT; error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->rl_parent_tag); if (error) { device_printf(dev, "could not allocate parent DMA tag\n"); return (error); } /* * Allocate map for TX mbufs. */ error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0, NULL, NULL, &sc->rl_ldata.rl_tx_mtag); if (error) { device_printf(dev, "could not allocate TX DMA tag\n"); return (error); } /* * Allocate map for RX mbufs. */ if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM9BYTES, 1, MJUM9BYTES, 0, NULL, NULL, &sc->rl_ldata.rl_jrx_mtag); if (error) { device_printf(dev, "could not allocate jumbo RX DMA tag\n"); return (error); } } error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag); if (error) { device_printf(dev, "could not allocate RX DMA tag\n"); return (error); } /* * Allocate map for TX descriptor list. */ error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, tx_list_size, 1, tx_list_size, 0, NULL, NULL, &sc->rl_ldata.rl_tx_list_tag); if (error) { device_printf(dev, "could not allocate TX DMA ring tag\n"); return (error); } /* Allocate DMA'able memory for the TX ring */ error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag, (void **)&sc->rl_ldata.rl_tx_list, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->rl_ldata.rl_tx_list_map); if (error) { device_printf(dev, "could not allocate TX DMA ring\n"); return (error); } /* Load the map for the TX ring. */ sc->rl_ldata.rl_tx_list_addr = 0; error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list, tx_list_size, re_dma_map_addr, &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT); if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) { device_printf(dev, "could not load TX DMA ring\n"); return (ENOMEM); } /* Create DMA maps for TX buffers */ for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) { error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0, &sc->rl_ldata.rl_tx_desc[i].tx_dmamap); if (error) { device_printf(dev, "could not create DMA map for TX\n"); return (error); } } /* * Allocate map for RX descriptor list. */ error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, rx_list_size, 1, rx_list_size, 0, NULL, NULL, &sc->rl_ldata.rl_rx_list_tag); if (error) { device_printf(dev, "could not create RX DMA ring tag\n"); return (error); } /* Allocate DMA'able memory for the RX ring */ error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag, (void **)&sc->rl_ldata.rl_rx_list, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->rl_ldata.rl_rx_list_map); if (error) { device_printf(dev, "could not allocate RX DMA ring\n"); return (error); } /* Load the map for the RX ring. */ sc->rl_ldata.rl_rx_list_addr = 0; error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list, rx_list_size, re_dma_map_addr, &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT); if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) { device_printf(dev, "could not load RX DMA ring\n"); return (ENOMEM); } /* Create DMA maps for RX buffers */ if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { error = bus_dmamap_create(sc->rl_ldata.rl_jrx_mtag, 0, &sc->rl_ldata.rl_jrx_sparemap); if (error) { device_printf(dev, "could not create spare DMA map for jumbo RX\n"); return (error); } for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { error = bus_dmamap_create(sc->rl_ldata.rl_jrx_mtag, 0, &sc->rl_ldata.rl_jrx_desc[i].rx_dmamap); if (error) { device_printf(dev, "could not create DMA map for jumbo RX\n"); return (error); } } } error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0, &sc->rl_ldata.rl_rx_sparemap); if (error) { device_printf(dev, "could not create spare DMA map for RX\n"); return (error); } for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0, &sc->rl_ldata.rl_rx_desc[i].rx_dmamap); if (error) { device_printf(dev, "could not create DMA map for RX\n"); return (error); } } /* Create DMA map for statistics. */ error = bus_dma_tag_create(sc->rl_parent_tag, RL_DUMP_ALIGN, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct rl_stats), 1, sizeof(struct rl_stats), 0, NULL, NULL, &sc->rl_ldata.rl_stag); if (error) { device_printf(dev, "could not create statistics DMA tag\n"); return (error); } /* Allocate DMA'able memory for statistics. */ error = bus_dmamem_alloc(sc->rl_ldata.rl_stag, (void **)&sc->rl_ldata.rl_stats, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->rl_ldata.rl_smap); if (error) { device_printf(dev, "could not allocate statistics DMA memory\n"); return (error); } /* Load the map for statistics. */ sc->rl_ldata.rl_stats_addr = 0; error = bus_dmamap_load(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap, sc->rl_ldata.rl_stats, sizeof(struct rl_stats), re_dma_map_addr, &sc->rl_ldata.rl_stats_addr, BUS_DMA_NOWAIT); if (error != 0 || sc->rl_ldata.rl_stats_addr == 0) { device_printf(dev, "could not load statistics DMA memory\n"); return (ENOMEM); } return (0); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int re_attach(device_t dev) { u_char eaddr[ETHER_ADDR_LEN]; u_int16_t as[ETHER_ADDR_LEN / 2]; struct rl_softc *sc; struct ifnet *ifp; const struct rl_hwrev *hw_rev; int capmask, error = 0, hwrev, i, msic, msixc, phy, reg, rid; u_int32_t cap, ctl; u_int16_t devid, re_did = 0; uint8_t cfg; sc = device_get_softc(dev); sc->rl_dev = dev; mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0); /* * Map control/status registers. */ pci_enable_busmaster(dev); devid = pci_get_device(dev); /* * Prefer memory space register mapping over IO space. * Because RTL8169SC does not seem to work when memory mapping * is used always activate io mapping. */ if (devid == RT_DEVICEID_8169SC) prefer_iomap = 1; if (prefer_iomap == 0) { sc->rl_res_id = PCIR_BAR(1); sc->rl_res_type = SYS_RES_MEMORY; /* RTL8168/8101E seems to use different BARs. */ if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E) sc->rl_res_id = PCIR_BAR(2); } else { sc->rl_res_id = PCIR_BAR(0); sc->rl_res_type = SYS_RES_IOPORT; } sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type, &sc->rl_res_id, RF_ACTIVE); if (sc->rl_res == NULL && prefer_iomap == 0) { sc->rl_res_id = PCIR_BAR(0); sc->rl_res_type = SYS_RES_IOPORT; sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type, &sc->rl_res_id, RF_ACTIVE); } if (sc->rl_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto fail; } sc->rl_btag = rman_get_bustag(sc->rl_res); sc->rl_bhandle = rman_get_bushandle(sc->rl_res); msic = pci_msi_count(dev); msixc = pci_msix_count(dev); if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { sc->rl_flags |= RL_FLAG_PCIE; sc->rl_expcap = reg; } if (bootverbose) { device_printf(dev, "MSI count : %d\n", msic); device_printf(dev, "MSI-X count : %d\n", msixc); } if (msix_disable > 0) msixc = 0; if (msi_disable > 0) msic = 0; /* Prefer MSI-X to MSI. */ if (msixc > 0) { msixc = RL_MSI_MESSAGES; rid = PCIR_BAR(4); sc->rl_res_pba = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->rl_res_pba == NULL) { device_printf(sc->rl_dev, "could not allocate MSI-X PBA resource\n"); } if (sc->rl_res_pba != NULL && pci_alloc_msix(dev, &msixc) == 0) { if (msixc == RL_MSI_MESSAGES) { device_printf(dev, "Using %d MSI-X message\n", msixc); sc->rl_flags |= RL_FLAG_MSIX; } else pci_release_msi(dev); } if ((sc->rl_flags & RL_FLAG_MSIX) == 0) { if (sc->rl_res_pba != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->rl_res_pba); sc->rl_res_pba = NULL; msixc = 0; } } /* Prefer MSI to INTx. */ if (msixc == 0 && msic > 0) { msic = RL_MSI_MESSAGES; if (pci_alloc_msi(dev, &msic) == 0) { if (msic == RL_MSI_MESSAGES) { device_printf(dev, "Using %d MSI message\n", msic); sc->rl_flags |= RL_FLAG_MSI; /* Explicitly set MSI enable bit. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); cfg = CSR_READ_1(sc, RL_CFG2); cfg |= RL_CFG2_MSI; CSR_WRITE_1(sc, RL_CFG2, cfg); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); } else pci_release_msi(dev); } if ((sc->rl_flags & RL_FLAG_MSI) == 0) msic = 0; } /* Allocate interrupt */ if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) { rid = 0; sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->rl_irq[0] == NULL) { device_printf(dev, "couldn't allocate IRQ resources\n"); error = ENXIO; goto fail; } } else { for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) { sc->rl_irq[i] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->rl_irq[i] == NULL) { device_printf(dev, "couldn't allocate IRQ resources for " "message %d\n", rid); error = ENXIO; goto fail; } } } if ((sc->rl_flags & RL_FLAG_MSI) == 0) { CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); cfg = CSR_READ_1(sc, RL_CFG2); if ((cfg & RL_CFG2_MSI) != 0) { device_printf(dev, "turning off MSI enable bit.\n"); cfg &= ~RL_CFG2_MSI; CSR_WRITE_1(sc, RL_CFG2, cfg); } CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); } /* Disable ASPM L0S/L1 and CLKREQ. */ if (sc->rl_expcap != 0) { cap = pci_read_config(dev, sc->rl_expcap + PCIER_LINK_CAP, 2); if ((cap & PCIEM_LINK_CAP_ASPM) != 0) { ctl = pci_read_config(dev, sc->rl_expcap + PCIER_LINK_CTL, 2); if ((ctl & (PCIEM_LINK_CTL_ECPM | PCIEM_LINK_CTL_ASPMC))!= 0) { ctl &= ~(PCIEM_LINK_CTL_ECPM | PCIEM_LINK_CTL_ASPMC); pci_write_config(dev, sc->rl_expcap + PCIER_LINK_CTL, ctl, 2); device_printf(dev, "ASPM disabled\n"); } } else device_printf(dev, "no ASPM capability\n"); } hw_rev = re_hwrevs; hwrev = CSR_READ_4(sc, RL_TXCFG); switch (hwrev & 0x70000000) { case 0x00000000: case 0x10000000: device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0xfc800000); hwrev &= (RL_TXCFG_HWREV | 0x80000000); break; default: device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000); sc->rl_macrev = hwrev & 0x00700000; hwrev &= RL_TXCFG_HWREV; break; } device_printf(dev, "MAC rev. 0x%08x\n", sc->rl_macrev); while (hw_rev->rl_desc != NULL) { if (hw_rev->rl_rev == hwrev) { sc->rl_type = hw_rev->rl_type; sc->rl_hwrev = hw_rev; break; } hw_rev++; } if (hw_rev->rl_desc == NULL) { device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev); error = ENXIO; goto fail; } switch (hw_rev->rl_rev) { case RL_HWREV_8139CPLUS: sc->rl_flags |= RL_FLAG_FASTETHER | RL_FLAG_AUTOPAD; break; case RL_HWREV_8100E: case RL_HWREV_8101E: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_FASTETHER; break; case RL_HWREV_8102E: case RL_HWREV_8102EL: case RL_HWREV_8102EL_SPIN1: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD; break; case RL_HWREV_8103E: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_MACSLEEP; break; case RL_HWREV_8401E: case RL_HWREV_8105E: case RL_HWREV_8105E_SPIN1: case RL_HWREV_8106E: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD; break; case RL_HWREV_8402: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_CMDSTOP_WAIT_TXQ; break; case RL_HWREV_8168B_SPIN1: case RL_HWREV_8168B_SPIN2: sc->rl_flags |= RL_FLAG_WOLRXENB; /* FALLTHROUGH */ case RL_HWREV_8168B_SPIN3: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_MACSTAT; break; case RL_HWREV_8168C_SPIN2: sc->rl_flags |= RL_FLAG_MACSLEEP; /* FALLTHROUGH */ case RL_HWREV_8168C: if (sc->rl_macrev == 0x00200000) sc->rl_flags |= RL_FLAG_MACSLEEP; /* FALLTHROUGH */ case RL_HWREV_8168CP: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168D: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168DP: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WAIT_TXPOLL | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168E: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168E_VL: case RL_HWREV_8168F: sc->rl_flags |= RL_FLAG_EARLYOFF; /* FALLTHROUGH */ case RL_HWREV_8411: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168EP: case RL_HWREV_8168G: case RL_HWREV_8411B: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_WOL_MANLINK | RL_FLAG_8168G_PLUS; break; case RL_HWREV_8168GU: case RL_HWREV_8168H: if (pci_get_device(dev) == RT_DEVICEID_8101E) { /* RTL8106E(US), RTL8107E */ sc->rl_flags |= RL_FLAG_FASTETHER; } else sc->rl_flags |= RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK; sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_8168G_PLUS; break; case RL_HWREV_8169_8110SB: case RL_HWREV_8169_8110SBL: case RL_HWREV_8169_8110SC: case RL_HWREV_8169_8110SCE: sc->rl_flags |= RL_FLAG_PHYWAKE; /* FALLTHROUGH */ case RL_HWREV_8169: case RL_HWREV_8169S: case RL_HWREV_8110S: sc->rl_flags |= RL_FLAG_MACRESET; break; default: break; } if (sc->rl_hwrev->rl_rev == RL_HWREV_8139CPLUS) { sc->rl_cfg0 = RL_8139_CFG0; sc->rl_cfg1 = RL_8139_CFG1; sc->rl_cfg2 = 0; sc->rl_cfg3 = RL_8139_CFG3; sc->rl_cfg4 = RL_8139_CFG4; sc->rl_cfg5 = RL_8139_CFG5; } else { sc->rl_cfg0 = RL_CFG0; sc->rl_cfg1 = RL_CFG1; sc->rl_cfg2 = RL_CFG2; sc->rl_cfg3 = RL_CFG3; sc->rl_cfg4 = RL_CFG4; sc->rl_cfg5 = RL_CFG5; } /* Reset the adapter. */ RL_LOCK(sc); re_reset(sc); RL_UNLOCK(sc); /* Enable PME. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); cfg = CSR_READ_1(sc, sc->rl_cfg1); cfg |= RL_CFG1_PME; CSR_WRITE_1(sc, sc->rl_cfg1, cfg); cfg = CSR_READ_1(sc, sc->rl_cfg5); cfg &= RL_CFG5_PME_STS; CSR_WRITE_1(sc, sc->rl_cfg5, cfg); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); if ((sc->rl_flags & RL_FLAG_PAR) != 0) { /* * XXX Should have a better way to extract station * address from EEPROM. */ for (i = 0; i < ETHER_ADDR_LEN; i++) eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i); } else { sc->rl_eewidth = RL_9356_ADDR_LEN; re_read_eeprom(sc, (caddr_t)&re_did, 0, 1); if (re_did != 0x8129) sc->rl_eewidth = RL_9346_ADDR_LEN; /* * Get station address from the EEPROM. */ re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3); for (i = 0; i < ETHER_ADDR_LEN / 2; i++) as[i] = le16toh(as[i]); bcopy(as, eaddr, ETHER_ADDR_LEN); } if (sc->rl_type == RL_8169) { /* Set RX length mask and number of descriptors. */ sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN; sc->rl_txstart = RL_GTXSTART; sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT; sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT; } else { /* Set RX length mask and number of descriptors. */ sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN; sc->rl_txstart = RL_TXSTART; sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT; sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT; } error = re_allocmem(dev, sc); if (error) goto fail; re_add_sysctls(sc); ifp = sc->rl_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } /* Take controller out of deep sleep mode. */ if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) { if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80) CSR_WRITE_1(sc, RL_GPIO, CSR_READ_1(sc, RL_GPIO) | 0x01); else CSR_WRITE_1(sc, RL_GPIO, CSR_READ_1(sc, RL_GPIO) & ~0x01); } /* Take PHY out of power down mode. */ if ((sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0) { CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) | 0x80); if (hw_rev->rl_rev == RL_HWREV_8401E) CSR_WRITE_1(sc, 0xD1, CSR_READ_1(sc, 0xD1) & ~0x08); } if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) { re_gmii_writereg(dev, 1, 0x1f, 0); re_gmii_writereg(dev, 1, 0x0e, 0); } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = re_ioctl; ifp->if_start = re_start; /* * RTL8168/8111C generates wrong IP checksummed frame if the * packet has IP options so disable TX checksum offloading. */ if (sc->rl_hwrev->rl_rev == RL_HWREV_8168C || sc->rl_hwrev->rl_rev == RL_HWREV_8168C_SPIN2 || sc->rl_hwrev->rl_rev == RL_HWREV_8168CP) { ifp->if_hwassist = 0; ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TSO4; } else { ifp->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP; ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_TSO4; } ifp->if_hwassist |= CSUM_TSO; ifp->if_capenable = ifp->if_capabilities; ifp->if_init = re_init; IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN); ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN; IFQ_SET_READY(&ifp->if_snd); TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc); #define RE_PHYAD_INTERNAL 0 /* Do MII setup. */ phy = RE_PHYAD_INTERNAL; if (sc->rl_type == RL_8169) phy = 1; capmask = BMSR_DEFCAPMASK; if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0) capmask &= ~BMSR_EXTSTAT; error = mii_attach(dev, &sc->rl_miibus, ifp, re_ifmedia_upd, re_ifmedia_sts, capmask, phy, MII_OFFSET_ANY, MIIF_DOPAUSE); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); /* VLAN capability setup */ ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; if (ifp->if_capabilities & IFCAP_HWCSUM) ifp->if_capabilities |= IFCAP_VLAN_HWCSUM; /* Enable WOL if PM is supported. */ if (pci_find_cap(sc->rl_dev, PCIY_PMG, ®) == 0) ifp->if_capabilities |= IFCAP_WOL; ifp->if_capenable = ifp->if_capabilities; ifp->if_capenable &= ~(IFCAP_WOL_UCAST | IFCAP_WOL_MCAST); /* * Don't enable TSO by default. It is known to generate * corrupted TCP segments(bad TCP options) under certain * circumstances. */ ifp->if_hwassist &= ~CSUM_TSO; ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO); #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* * Tell the upper layer(s) we support long frames. * Must appear after the call to ether_ifattach() because * ether_ifattach() sets ifi_hdrlen to the default value. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); #ifdef DEV_NETMAP re_netmap_attach(sc); #endif /* DEV_NETMAP */ #ifdef RE_DIAG /* * Perform hardware diagnostic on the original RTL8169. * Some 32-bit cards were incorrectly wired and would * malfunction if plugged into a 64-bit slot. */ if (hwrev == RL_HWREV_8169) { error = re_diag(sc); if (error) { device_printf(dev, "attach aborted due to hardware diag failure\n"); ether_ifdetach(ifp); goto fail; } } #endif #ifdef RE_TX_MODERATION intr_filter = 1; #endif /* Hook interrupt last to avoid having to lock softc */ if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0 && intr_filter == 0) { error = bus_setup_intr(dev, sc->rl_irq[0], INTR_TYPE_NET | INTR_MPSAFE, NULL, re_intr_msi, sc, &sc->rl_intrhand[0]); } else { error = bus_setup_intr(dev, sc->rl_irq[0], INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc, &sc->rl_intrhand[0]); } if (error) { device_printf(dev, "couldn't set up irq\n"); ether_ifdetach(ifp); goto fail; } NETDUMP_SET(ifp, re); fail: if (error) re_detach(dev); return (error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int re_detach(device_t dev) { struct rl_softc *sc; struct ifnet *ifp; int i, rid; sc = device_get_softc(dev); ifp = sc->rl_ifp; KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized")); /* These should only be active if attach succeeded */ if (device_is_attached(dev)) { #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif RL_LOCK(sc); #if 0 sc->suspended = 1; #endif re_stop(sc); RL_UNLOCK(sc); callout_drain(&sc->rl_stat_callout); taskqueue_drain(taskqueue_fast, &sc->rl_inttask); /* * Force off the IFF_UP flag here, in case someone * still had a BPF descriptor attached to this * interface. If they do, ether_ifdetach() will cause * the BPF code to try and clear the promisc mode * flag, which will bubble down to re_ioctl(), * which will try to call re_init() again. This will * turn the NIC back on and restart the MII ticker, * which will panic the system when the kernel tries * to invoke the re_tick() function that isn't there * anymore. */ ifp->if_flags &= ~IFF_UP; ether_ifdetach(ifp); } if (sc->rl_miibus) device_delete_child(dev, sc->rl_miibus); bus_generic_detach(dev); /* * The rest is resource deallocation, so we should already be * stopped here. */ if (sc->rl_intrhand[0] != NULL) { bus_teardown_intr(dev, sc->rl_irq[0], sc->rl_intrhand[0]); sc->rl_intrhand[0] = NULL; } if (ifp != NULL) { #ifdef DEV_NETMAP netmap_detach(ifp); #endif /* DEV_NETMAP */ if_free(ifp); } if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) rid = 0; else rid = 1; if (sc->rl_irq[0] != NULL) { bus_release_resource(dev, SYS_RES_IRQ, rid, sc->rl_irq[0]); sc->rl_irq[0] = NULL; } if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0) pci_release_msi(dev); if (sc->rl_res_pba) { rid = PCIR_BAR(4); bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->rl_res_pba); } if (sc->rl_res) bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id, sc->rl_res); /* Unload and free the RX DMA ring memory and map */ if (sc->rl_ldata.rl_rx_list_tag) { if (sc->rl_ldata.rl_rx_list_addr) bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map); if (sc->rl_ldata.rl_rx_list) bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list, sc->rl_ldata.rl_rx_list_map); bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag); } /* Unload and free the TX DMA ring memory and map */ if (sc->rl_ldata.rl_tx_list_tag) { if (sc->rl_ldata.rl_tx_list_addr) bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map); if (sc->rl_ldata.rl_tx_list) bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list, sc->rl_ldata.rl_tx_list_map); bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag); } /* Destroy all the RX and TX buffer maps */ if (sc->rl_ldata.rl_tx_mtag) { for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) { if (sc->rl_ldata.rl_tx_desc[i].tx_dmamap) bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag, sc->rl_ldata.rl_tx_desc[i].tx_dmamap); } bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag); } if (sc->rl_ldata.rl_rx_mtag) { for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { if (sc->rl_ldata.rl_rx_desc[i].rx_dmamap) bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_desc[i].rx_dmamap); } if (sc->rl_ldata.rl_rx_sparemap) bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_sparemap); bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag); } if (sc->rl_ldata.rl_jrx_mtag) { for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { if (sc->rl_ldata.rl_jrx_desc[i].rx_dmamap) bus_dmamap_destroy(sc->rl_ldata.rl_jrx_mtag, sc->rl_ldata.rl_jrx_desc[i].rx_dmamap); } if (sc->rl_ldata.rl_jrx_sparemap) bus_dmamap_destroy(sc->rl_ldata.rl_jrx_mtag, sc->rl_ldata.rl_jrx_sparemap); bus_dma_tag_destroy(sc->rl_ldata.rl_jrx_mtag); } /* Unload and free the stats buffer and map */ if (sc->rl_ldata.rl_stag) { if (sc->rl_ldata.rl_stats_addr) bus_dmamap_unload(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap); if (sc->rl_ldata.rl_stats) bus_dmamem_free(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_stats, sc->rl_ldata.rl_smap); bus_dma_tag_destroy(sc->rl_ldata.rl_stag); } if (sc->rl_parent_tag) bus_dma_tag_destroy(sc->rl_parent_tag); mtx_destroy(&sc->rl_mtx); return (0); } static __inline void re_discard_rxbuf(struct rl_softc *sc, int idx) { struct rl_desc *desc; struct rl_rxdesc *rxd; uint32_t cmdstat; if (sc->rl_ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0) rxd = &sc->rl_ldata.rl_jrx_desc[idx]; else rxd = &sc->rl_ldata.rl_rx_desc[idx]; desc = &sc->rl_ldata.rl_rx_list[idx]; desc->rl_vlanctl = 0; cmdstat = rxd->rx_size; if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1) cmdstat |= RL_RDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN); } static int re_newbuf(struct rl_softc *sc, int idx) { struct mbuf *m; struct rl_rxdesc *rxd; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct rl_desc *desc; uint32_t cmdstat; int error, nsegs; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MCLBYTES; #ifdef RE_FIXUP_RX /* * This is part of an evil trick to deal with non-x86 platforms. * The RealTek chip requires RX buffers to be aligned on 64-bit * boundaries, but that will hose non-x86 machines. To get around * this, we leave some empty space at the start of each buffer * and for non-x86 hosts, we copy the buffer back six bytes * to achieve word alignment. This is slightly more efficient * than allocating a new buffer, copying the contents, and * discarding the old buffer. */ m_adj(m, RE_ETHER_ALIGN); #endif error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs)); rxd = &sc->rl_ldata.rl_rx_desc[idx]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap); } rxd->rx_m = m; map = rxd->rx_dmamap; rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap; rxd->rx_size = segs[0].ds_len; sc->rl_ldata.rl_rx_sparemap = map; bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); desc = &sc->rl_ldata.rl_rx_list[idx]; desc->rl_vlanctl = 0; desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr)); desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr)); cmdstat = segs[0].ds_len; if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1) cmdstat |= RL_RDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN); return (0); } static int re_jumbo_newbuf(struct rl_softc *sc, int idx) { struct mbuf *m; struct rl_rxdesc *rxd; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct rl_desc *desc; uint32_t cmdstat; int error, nsegs; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MJUM9BYTES; #ifdef RE_FIXUP_RX m_adj(m, RE_ETHER_ALIGN); #endif error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_jrx_mtag, sc->rl_ldata.rl_jrx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs)); rxd = &sc->rl_ldata.rl_jrx_desc[idx]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap); } rxd->rx_m = m; map = rxd->rx_dmamap; rxd->rx_dmamap = sc->rl_ldata.rl_jrx_sparemap; rxd->rx_size = segs[0].ds_len; sc->rl_ldata.rl_jrx_sparemap = map; bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); desc = &sc->rl_ldata.rl_rx_list[idx]; desc->rl_vlanctl = 0; desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr)); desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr)); cmdstat = segs[0].ds_len; if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1) cmdstat |= RL_RDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN); return (0); } #ifdef RE_FIXUP_RX static __inline void re_fixup_rx(struct mbuf *m) { int i; uint16_t *src, *dst; src = mtod(m, uint16_t *); dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src; for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++) *dst++ = *src++; m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN; } #endif static int re_tx_list_init(struct rl_softc *sc) { struct rl_desc *desc; int i; RL_LOCK_ASSERT(sc); bzero(sc->rl_ldata.rl_tx_list, sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc)); for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) sc->rl_ldata.rl_tx_desc[i].tx_m = NULL; #ifdef DEV_NETMAP re_netmap_tx_init(sc); #endif /* DEV_NETMAP */ /* Set EOR. */ desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1]; desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR); bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); sc->rl_ldata.rl_tx_prodidx = 0; sc->rl_ldata.rl_tx_considx = 0; sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt; return (0); } static int re_rx_list_init(struct rl_softc *sc) { int error, i; bzero(sc->rl_ldata.rl_rx_list, sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc)); for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { sc->rl_ldata.rl_rx_desc[i].rx_m = NULL; if ((error = re_newbuf(sc, i)) != 0) return (error); } #ifdef DEV_NETMAP re_netmap_rx_init(sc); #endif /* DEV_NETMAP */ /* Flush the RX descriptors */ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); sc->rl_ldata.rl_rx_prodidx = 0; sc->rl_head = sc->rl_tail = NULL; sc->rl_int_rx_act = 0; return (0); } static int re_jrx_list_init(struct rl_softc *sc) { int error, i; bzero(sc->rl_ldata.rl_rx_list, sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc)); for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { sc->rl_ldata.rl_jrx_desc[i].rx_m = NULL; if ((error = re_jumbo_newbuf(sc, i)) != 0) return (error); } bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); sc->rl_ldata.rl_rx_prodidx = 0; sc->rl_head = sc->rl_tail = NULL; sc->rl_int_rx_act = 0; return (0); } /* * RX handler for C+ and 8169. For the gigE chips, we support * the reception of jumbo frames that have been fragmented * across multiple 2K mbuf cluster buffers. */ static int re_rxeof(struct rl_softc *sc, int *rx_npktsp) { struct mbuf *m; struct ifnet *ifp; int i, rxerr, total_len; struct rl_desc *cur_rx; u_int32_t rxstat, rxvlan; int jumbo, maxpkt = 16, rx_npkts = 0; RL_LOCK_ASSERT(sc); ifp = sc->rl_ifp; #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, 0, &rx_npkts)) return 0; #endif /* DEV_NETMAP */ if (ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0) jumbo = 1; else jumbo = 0; /* Invalidate the descriptor memory */ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0; i = RL_RX_DESC_NXT(sc, i)) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; cur_rx = &sc->rl_ldata.rl_rx_list[i]; rxstat = le32toh(cur_rx->rl_cmdstat); if ((rxstat & RL_RDESC_STAT_OWN) != 0) break; total_len = rxstat & sc->rl_rxlenmask; rxvlan = le32toh(cur_rx->rl_vlanctl); if (jumbo != 0) m = sc->rl_ldata.rl_jrx_desc[i].rx_m; else m = sc->rl_ldata.rl_rx_desc[i].rx_m; if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 && (rxstat & (RL_RDESC_STAT_SOF | RL_RDESC_STAT_EOF)) != (RL_RDESC_STAT_SOF | RL_RDESC_STAT_EOF)) { /* * RTL8168C or later controllers do not * support multi-fragment packet. */ re_discard_rxbuf(sc, i); continue; } else if ((rxstat & RL_RDESC_STAT_EOF) == 0) { if (re_newbuf(sc, i) != 0) { /* * If this is part of a multi-fragment packet, * discard all the pieces. */ if (sc->rl_head != NULL) { m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } re_discard_rxbuf(sc, i); continue; } m->m_len = RE_RX_DESC_BUFLEN; if (sc->rl_head == NULL) sc->rl_head = sc->rl_tail = m; else { m->m_flags &= ~M_PKTHDR; sc->rl_tail->m_next = m; sc->rl_tail = m; } continue; } /* * NOTE: for the 8139C+, the frame length field * is always 12 bits in size, but for the gigE chips, * it is 13 bits (since the max RX frame length is 16K). * Unfortunately, all 32 bits in the status word * were already used, so to make room for the extra * length bit, RealTek took out the 'frame alignment * error' bit and shifted the other status bits * over one slot. The OWN, EOR, FS and LS bits are * still in the same places. We have already extracted * the frame length and checked the OWN bit, so rather * than using an alternate bit mapping, we shift the * status bits one space to the right so we can evaluate * them using the 8169 status as though it was in the * same format as that of the 8139C+. */ if (sc->rl_type == RL_8169) rxstat >>= 1; /* * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be * set, but if CRC is clear, it will still be a valid frame. */ if ((rxstat & RL_RDESC_STAT_RXERRSUM) != 0) { rxerr = 1; if ((sc->rl_flags & RL_FLAG_JUMBOV2) == 0 && total_len > 8191 && (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT) rxerr = 0; if (rxerr != 0) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); /* * If this is part of a multi-fragment packet, * discard all the pieces. */ if (sc->rl_head != NULL) { m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } re_discard_rxbuf(sc, i); continue; } } /* * If allocating a replacement mbuf fails, * reload the current one. */ if (jumbo != 0) rxerr = re_jumbo_newbuf(sc, i); else rxerr = re_newbuf(sc, i); if (rxerr != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); if (sc->rl_head != NULL) { m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } re_discard_rxbuf(sc, i); continue; } if (sc->rl_head != NULL) { if (jumbo != 0) m->m_len = total_len; else { m->m_len = total_len % RE_RX_DESC_BUFLEN; if (m->m_len == 0) m->m_len = RE_RX_DESC_BUFLEN; } /* * Special case: if there's 4 bytes or less * in this buffer, the mbuf can be discarded: * the last 4 bytes is the CRC, which we don't * care about anyway. */ if (m->m_len <= ETHER_CRC_LEN) { sc->rl_tail->m_len -= (ETHER_CRC_LEN - m->m_len); m_freem(m); } else { m->m_len -= ETHER_CRC_LEN; m->m_flags &= ~M_PKTHDR; sc->rl_tail->m_next = m; } m = sc->rl_head; sc->rl_head = sc->rl_tail = NULL; m->m_pkthdr.len = total_len - ETHER_CRC_LEN; } else m->m_pkthdr.len = m->m_len = (total_len - ETHER_CRC_LEN); #ifdef RE_FIXUP_RX re_fixup_rx(m); #endif if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); m->m_pkthdr.rcvif = ifp; /* Do RX checksumming if enabled */ if (ifp->if_capenable & IFCAP_RXCSUM) { if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) { /* Check IP header checksum */ if (rxstat & RL_RDESC_STAT_PROTOID) m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (!(rxstat & RL_RDESC_STAT_IPSUMBAD)) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; /* Check TCP/UDP checksum */ if ((RL_TCPPKT(rxstat) && !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) || (RL_UDPPKT(rxstat) && !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } } else { /* * RTL8168C/RTL816CP/RTL8111C/RTL8111CP */ if ((rxstat & RL_RDESC_STAT_PROTOID) && (rxvlan & RL_RDESC_IPV4)) m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) && (rxvlan & RL_RDESC_IPV4)) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; if (((rxstat & RL_RDESC_STAT_TCP) && !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) || ((rxstat & RL_RDESC_STAT_UDP) && !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } } } maxpkt--; if (rxvlan & RL_RDESC_VLANCTL_TAG) { m->m_pkthdr.ether_vtag = bswap16((rxvlan & RL_RDESC_VLANCTL_DATA)); m->m_flags |= M_VLANTAG; } RL_UNLOCK(sc); (*ifp->if_input)(ifp, m); RL_LOCK(sc); rx_npkts++; } /* Flush the RX DMA ring */ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); sc->rl_ldata.rl_rx_prodidx = i; if (rx_npktsp != NULL) *rx_npktsp = rx_npkts; if (maxpkt) return (EAGAIN); return (0); } static void re_txeof(struct rl_softc *sc) { struct ifnet *ifp; struct rl_txdesc *txd; u_int32_t txstat; int cons; cons = sc->rl_ldata.rl_tx_considx; if (cons == sc->rl_ldata.rl_tx_prodidx) return; ifp = sc->rl_ifp; #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, 0)) return; #endif /* DEV_NETMAP */ /* Invalidate the TX descriptor list */ bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (; cons != sc->rl_ldata.rl_tx_prodidx; cons = RL_TX_DESC_NXT(sc, cons)) { txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat); if (txstat & RL_TDESC_STAT_OWN) break; /* * We only stash mbufs in the last descriptor * in a fragment chain, which also happens to * be the only place where the TX status bits * are valid. */ if (txstat & RL_TDESC_CMD_EOF) { txd = &sc->rl_ldata.rl_tx_desc[cons]; bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap); KASSERT(txd->tx_m != NULL, ("%s: freeing NULL mbufs!", __func__)); m_freem(txd->tx_m); txd->tx_m = NULL; if (txstat & (RL_TDESC_STAT_EXCESSCOL| RL_TDESC_STAT_COLCNT)) if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); if (txstat & RL_TDESC_STAT_TXERRSUM) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); else if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); } sc->rl_ldata.rl_tx_free++; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->rl_ldata.rl_tx_considx = cons; /* No changes made to the TX ring, so no flush needed */ if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) { #ifdef RE_TX_MODERATION /* * If not all descriptors have been reaped yet, reload * the timer so that we will eventually get another * interrupt that will cause us to re-enter this routine. * This is done in case the transmitter has gone idle. */ CSR_WRITE_4(sc, RL_TIMERCNT, 1); #endif } else sc->rl_watchdog_timer = 0; } static void re_tick(void *xsc) { struct rl_softc *sc; struct mii_data *mii; sc = xsc; RL_LOCK_ASSERT(sc); mii = device_get_softc(sc->rl_miibus); mii_tick(mii); if ((sc->rl_flags & RL_FLAG_LINK) == 0) re_miibus_statchg(sc->rl_dev); /* * Reclaim transmitted frames here. Technically it is not * necessary to do here but it ensures periodic reclamation * regardless of Tx completion interrupt which seems to be * lost on PCIe based controllers under certain situations. */ re_txeof(sc); re_watchdog(sc); callout_reset(&sc->rl_stat_callout, hz, re_tick, sc); } #ifdef DEVICE_POLLING static int re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct rl_softc *sc = ifp->if_softc; int rx_npkts = 0; RL_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rx_npkts = re_poll_locked(ifp, cmd, count); RL_UNLOCK(sc); return (rx_npkts); } static int re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct rl_softc *sc = ifp->if_softc; int rx_npkts; RL_LOCK_ASSERT(sc); sc->rxcycles = count; re_rxeof(sc, &rx_npkts); re_txeof(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */ u_int16_t status; status = CSR_READ_2(sc, RL_ISR); if (status == 0xffff) return (rx_npkts); if (status) CSR_WRITE_2(sc, RL_ISR, status); if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) && (sc->rl_flags & RL_FLAG_PCIE)) CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); /* * XXX check behaviour on receiver stalls. */ if (status & RL_ISR_SYSTEM_ERR) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } } return (rx_npkts); } #endif /* DEVICE_POLLING */ static int re_intr(void *arg) { struct rl_softc *sc; uint16_t status; sc = arg; status = CSR_READ_2(sc, RL_ISR); if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0) return (FILTER_STRAY); CSR_WRITE_2(sc, RL_IMR, 0); taskqueue_enqueue(taskqueue_fast, &sc->rl_inttask); return (FILTER_HANDLED); } static void re_int_task(void *arg, int npending) { struct rl_softc *sc; struct ifnet *ifp; u_int16_t status; int rval = 0; sc = arg; ifp = sc->rl_ifp; RL_LOCK(sc); status = CSR_READ_2(sc, RL_ISR); CSR_WRITE_2(sc, RL_ISR, status); if (sc->suspended || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { RL_UNLOCK(sc); return; } #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { RL_UNLOCK(sc); return; } #endif if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW)) rval = re_rxeof(sc, NULL); /* * Some chips will ignore a second TX request issued * while an existing transmission is in progress. If * the transmitter goes idle but there are still * packets waiting to be sent, we need to restart the * channel here to flush them out. This only seems to * be required with the PCIe devices. */ if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) && (sc->rl_flags & RL_FLAG_PCIE)) CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); if (status & ( #ifdef RE_TX_MODERATION RL_ISR_TIMEOUT_EXPIRED| #else RL_ISR_TX_OK| #endif RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL)) re_txeof(sc); if (status & RL_ISR_SYSTEM_ERR) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); RL_UNLOCK(sc); if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) { taskqueue_enqueue(taskqueue_fast, &sc->rl_inttask); return; } CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS); } static void re_intr_msi(void *xsc) { struct rl_softc *sc; struct ifnet *ifp; uint16_t intrs, status; sc = xsc; RL_LOCK(sc); ifp = sc->rl_ifp; #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { RL_UNLOCK(sc); return; } #endif /* Disable interrupts. */ CSR_WRITE_2(sc, RL_IMR, 0); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { RL_UNLOCK(sc); return; } intrs = RL_INTRS_CPLUS; status = CSR_READ_2(sc, RL_ISR); CSR_WRITE_2(sc, RL_ISR, status); if (sc->rl_int_rx_act > 0) { intrs &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN); status &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN); } if (status & (RL_ISR_TIMEOUT_EXPIRED | RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN)) { re_rxeof(sc, NULL); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if (sc->rl_int_rx_mod != 0 && (status & (RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN)) != 0) { /* Rearm one-shot timer. */ CSR_WRITE_4(sc, RL_TIMERCNT, 1); intrs &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN); sc->rl_int_rx_act = 1; } else { intrs |= RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN; sc->rl_int_rx_act = 0; } } } /* * Some chips will ignore a second TX request issued * while an existing transmission is in progress. If * the transmitter goes idle but there are still * packets waiting to be sent, we need to restart the * channel here to flush them out. This only seems to * be required with the PCIe devices. */ if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) && (sc->rl_flags & RL_FLAG_PCIE)) CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); if (status & (RL_ISR_TX_OK | RL_ISR_TX_ERR | RL_ISR_TX_DESC_UNAVAIL)) re_txeof(sc); if (status & RL_ISR_SYSTEM_ERR) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); CSR_WRITE_2(sc, RL_IMR, intrs); } RL_UNLOCK(sc); } static int re_encap(struct rl_softc *sc, struct mbuf **m_head) { struct rl_txdesc *txd, *txd_last; bus_dma_segment_t segs[RL_NTXSEGS]; bus_dmamap_t map; struct mbuf *m_new; struct rl_desc *desc; int nsegs, prod; int i, error, ei, si; int padlen; uint32_t cmdstat, csum_flags, vlanctl; RL_LOCK_ASSERT(sc); M_ASSERTPKTHDR((*m_head)); /* * With some of the RealTek chips, using the checksum offload * support in conjunction with the autopadding feature results * in the transmission of corrupt frames. For example, if we * need to send a really small IP fragment that's less than 60 * bytes in size, and IP header checksumming is enabled, the * resulting ethernet frame that appears on the wire will * have garbled payload. To work around this, if TX IP checksum * offload is enabled, we always manually pad short frames out * to the minimum ethernet frame size. */ if ((sc->rl_flags & RL_FLAG_AUTOPAD) == 0 && (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN && ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) { padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len; if (M_WRITABLE(*m_head) == 0) { /* Get a writable copy. */ m_new = m_dup(*m_head, M_NOWAIT); m_freem(*m_head); if (m_new == NULL) { *m_head = NULL; return (ENOBUFS); } *m_head = m_new; } if ((*m_head)->m_next != NULL || M_TRAILINGSPACE(*m_head) < padlen) { m_new = m_defrag(*m_head, M_NOWAIT); if (m_new == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } } else m_new = *m_head; /* * Manually pad short frames, and zero the pad space * to avoid leaking data. */ bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen); m_new->m_pkthdr.len += padlen; m_new->m_len = m_new->m_pkthdr.len; *m_head = m_new; } prod = sc->rl_ldata.rl_tx_prodidx; txd = &sc->rl_ldata.rl_tx_desc[prod]; error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { m_new = m_collapse(*m_head, M_NOWAIT, RL_NTXSEGS); if (m_new == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m_new; error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); if (nsegs == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } /* Check for number of available descriptors. */ if (sc->rl_ldata.rl_tx_free - nsegs <= 1) { bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap); return (ENOBUFS); } bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, BUS_DMASYNC_PREWRITE); /* * Set up checksum offload. Note: checksum offload bits must * appear in all descriptors of a multi-descriptor transmit * attempt. This is according to testing done with an 8169 * chip. This is a requirement. */ vlanctl = 0; csum_flags = 0; if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0) { if ((sc->rl_flags & RL_FLAG_DESCV2) != 0) { csum_flags |= RL_TDESC_CMD_LGSEND; vlanctl |= ((uint32_t)(*m_head)->m_pkthdr.tso_segsz << RL_TDESC_CMD_MSSVALV2_SHIFT); } else { csum_flags |= RL_TDESC_CMD_LGSEND | ((uint32_t)(*m_head)->m_pkthdr.tso_segsz << RL_TDESC_CMD_MSSVAL_SHIFT); } } else { /* * Unconditionally enable IP checksum if TCP or UDP * checksum is required. Otherwise, TCP/UDP checksum * doesn't make effects. */ if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) { if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) { csum_flags |= RL_TDESC_CMD_IPCSUM; if (((*m_head)->m_pkthdr.csum_flags & CSUM_TCP) != 0) csum_flags |= RL_TDESC_CMD_TCPCSUM; if (((*m_head)->m_pkthdr.csum_flags & CSUM_UDP) != 0) csum_flags |= RL_TDESC_CMD_UDPCSUM; } else { vlanctl |= RL_TDESC_CMD_IPCSUMV2; if (((*m_head)->m_pkthdr.csum_flags & CSUM_TCP) != 0) vlanctl |= RL_TDESC_CMD_TCPCSUMV2; if (((*m_head)->m_pkthdr.csum_flags & CSUM_UDP) != 0) vlanctl |= RL_TDESC_CMD_UDPCSUMV2; } } } /* * Set up hardware VLAN tagging. Note: vlan tag info must * appear in all descriptors of a multi-descriptor * transmission attempt. */ if ((*m_head)->m_flags & M_VLANTAG) vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) | RL_TDESC_VLANCTL_TAG; si = prod; for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) { desc = &sc->rl_ldata.rl_tx_list[prod]; desc->rl_vlanctl = htole32(vlanctl); desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr)); desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr)); cmdstat = segs[i].ds_len; if (i != 0) cmdstat |= RL_TDESC_CMD_OWN; if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1) cmdstat |= RL_TDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmdstat | csum_flags); sc->rl_ldata.rl_tx_free--; } /* Update producer index. */ sc->rl_ldata.rl_tx_prodidx = prod; /* Set EOF on the last descriptor. */ ei = RL_TX_DESC_PRV(sc, prod); desc = &sc->rl_ldata.rl_tx_list[ei]; desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF); desc = &sc->rl_ldata.rl_tx_list[si]; /* Set SOF and transfer ownership of packet to the chip. */ desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF); /* * Insure that the map for this transmission * is placed at the array index of the last descriptor * in this chain. (Swap last and first dmamaps.) */ txd_last = &sc->rl_ldata.rl_tx_desc[ei]; map = txd->tx_dmamap; txd->tx_dmamap = txd_last->tx_dmamap; txd_last->tx_dmamap = map; txd_last->tx_m = *m_head; return (0); } static void re_start(struct ifnet *ifp) { struct rl_softc *sc; sc = ifp->if_softc; RL_LOCK(sc); re_start_locked(ifp); RL_UNLOCK(sc); } /* * Main transmit routine for C+ and gigE NICs. */ static void re_start_locked(struct ifnet *ifp) { struct rl_softc *sc; struct mbuf *m_head; int queued; sc = ifp->if_softc; #ifdef DEV_NETMAP /* XXX is this necessary ? */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_kring *kring = &NA(ifp)->tx_rings[0]; if (sc->rl_ldata.rl_tx_prodidx != kring->nr_hwcur) { /* kick the tx unit */ CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); #ifdef RE_TX_MODERATION CSR_WRITE_4(sc, RL_TIMERCNT, 1); #endif sc->rl_watchdog_timer = 5; } return; } #endif /* DEV_NETMAP */ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) return; for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->rl_ldata.rl_tx_free > 1;) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (re_encap(sc, &m_head) != 0) { if (m_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } /* * If there's a BPF listener, bounce a copy of this frame * to him. */ ETHER_BPF_MTAP(ifp, m_head); queued++; } if (queued == 0) { #ifdef RE_TX_MODERATION if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) CSR_WRITE_4(sc, RL_TIMERCNT, 1); #endif return; } re_start_tx(sc); } static void re_start_tx(struct rl_softc *sc) { /* Flush the TX descriptors */ bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); #ifdef RE_TX_MODERATION /* * Use the countdown timer for interrupt moderation. * 'TX done' interrupts are disabled. Instead, we reset the * countdown timer, which will begin counting until it hits * the value in the TIMERINT register, and then trigger an * interrupt. Each time we write to the TIMERCNT register, * the timer count is reset to 0. */ CSR_WRITE_4(sc, RL_TIMERCNT, 1); #endif /* * Set a timeout in case the chip goes out to lunch. */ sc->rl_watchdog_timer = 5; } static void re_set_jumbo(struct rl_softc *sc, int jumbo) { if (sc->rl_hwrev->rl_rev == RL_HWREV_8168E_VL) { pci_set_max_read_req(sc->rl_dev, 4096); return; } CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG); if (jumbo != 0) { CSR_WRITE_1(sc, sc->rl_cfg3, CSR_READ_1(sc, sc->rl_cfg3) | RL_CFG3_JUMBO_EN0); switch (sc->rl_hwrev->rl_rev) { case RL_HWREV_8168DP: break; case RL_HWREV_8168E: CSR_WRITE_1(sc, sc->rl_cfg4, CSR_READ_1(sc, sc->rl_cfg4) | 0x01); break; default: CSR_WRITE_1(sc, sc->rl_cfg4, CSR_READ_1(sc, sc->rl_cfg4) | RL_CFG4_JUMBO_EN1); } } else { CSR_WRITE_1(sc, sc->rl_cfg3, CSR_READ_1(sc, sc->rl_cfg3) & ~RL_CFG3_JUMBO_EN0); switch (sc->rl_hwrev->rl_rev) { case RL_HWREV_8168DP: break; case RL_HWREV_8168E: CSR_WRITE_1(sc, sc->rl_cfg4, CSR_READ_1(sc, sc->rl_cfg4) & ~0x01); break; default: CSR_WRITE_1(sc, sc->rl_cfg4, CSR_READ_1(sc, sc->rl_cfg4) & ~RL_CFG4_JUMBO_EN1); } } CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); switch (sc->rl_hwrev->rl_rev) { case RL_HWREV_8168DP: pci_set_max_read_req(sc->rl_dev, 4096); break; default: if (jumbo != 0) pci_set_max_read_req(sc->rl_dev, 512); else pci_set_max_read_req(sc->rl_dev, 4096); } } static void re_init(void *xsc) { struct rl_softc *sc = xsc; RL_LOCK(sc); re_init_locked(sc); RL_UNLOCK(sc); } static void re_init_locked(struct rl_softc *sc) { struct ifnet *ifp = sc->rl_ifp; struct mii_data *mii; uint32_t reg; uint16_t cfg; union { uint32_t align_dummy; u_char eaddr[ETHER_ADDR_LEN]; } eaddr; RL_LOCK_ASSERT(sc); mii = device_get_softc(sc->rl_miibus); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) return; /* * Cancel pending I/O and free all RX/TX buffers. */ re_stop(sc); /* Put controller into known state. */ re_reset(sc); /* * For C+ mode, initialize the RX descriptors and mbufs. */ if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { if (ifp->if_mtu > RL_MTU) { if (re_jrx_list_init(sc) != 0) { device_printf(sc->rl_dev, "no memory for jumbo RX buffers\n"); re_stop(sc); return; } /* Disable checksum offloading for jumbo frames. */ ifp->if_capenable &= ~(IFCAP_HWCSUM | IFCAP_TSO4); ifp->if_hwassist &= ~(RE_CSUM_FEATURES | CSUM_TSO); } else { if (re_rx_list_init(sc) != 0) { device_printf(sc->rl_dev, "no memory for RX buffers\n"); re_stop(sc); return; } } re_set_jumbo(sc, ifp->if_mtu > RL_MTU); } else { if (re_rx_list_init(sc) != 0) { device_printf(sc->rl_dev, "no memory for RX buffers\n"); re_stop(sc); return; } if ((sc->rl_flags & RL_FLAG_PCIE) != 0 && pci_get_device(sc->rl_dev) != RT_DEVICEID_8101E) { if (ifp->if_mtu > RL_MTU) pci_set_max_read_req(sc->rl_dev, 512); else pci_set_max_read_req(sc->rl_dev, 4096); } } re_tx_list_init(sc); /* * Enable C+ RX and TX mode, as well as VLAN stripping and * RX checksum offload. We must configure the C+ register * before all others. */ cfg = RL_CPLUSCMD_PCI_MRW; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) cfg |= RL_CPLUSCMD_RXCSUM_ENB; if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) cfg |= RL_CPLUSCMD_VLANSTRIP; if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) { cfg |= RL_CPLUSCMD_MACSTAT_DIS; /* XXX magic. */ cfg |= 0x0001; } else cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB; CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg); if (sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SC || sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SCE) { reg = 0x000fff00; if ((CSR_READ_1(sc, sc->rl_cfg2) & RL_CFG2_PCI66MHZ) != 0) reg |= 0x000000ff; if (sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SCE) reg |= 0x00f00000; CSR_WRITE_4(sc, 0x7c, reg); /* Disable interrupt mitigation. */ CSR_WRITE_2(sc, 0xe2, 0); } /* * Disable TSO if interface MTU size is greater than MSS * allowed in controller. */ if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; } /* * Init our MAC address. Even though the chipset * documentation doesn't mention it, we need to enter "Config * register write enable" mode to modify the ID registers. */ /* Copy MAC address on stack to align. */ bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG); CSR_WRITE_4(sc, RL_IDR0, htole32(*(u_int32_t *)(&eaddr.eaddr[0]))); CSR_WRITE_4(sc, RL_IDR4, htole32(*(u_int32_t *)(&eaddr.eaddr[4]))); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); /* * Load the addresses of the RX and TX lists into the chip. */ CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI, RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr)); CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO, RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr)); CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI, RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr)); CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO, RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr)); if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) { /* Disable RXDV gate. */ CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) & ~0x00080000); } /* * Enable transmit and receive for pre-RTL8168G controllers. * RX/TX MACs should be enabled before RX/TX configuration. */ if ((sc->rl_flags & RL_FLAG_8168G_PLUS) == 0) CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB | RL_CMD_RX_ENB); /* * Set the initial TX configuration. */ if (sc->rl_testmode) { if (sc->rl_type == RL_8169) CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG|RL_LOOPTEST_ON); else CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS); } else CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG); CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16); /* * Set the initial RX configuration. */ re_set_rxmode(sc); /* Configure interrupt moderation. */ if (sc->rl_type == RL_8169) { /* Magic from vendor. */ CSR_WRITE_2(sc, RL_INTRMOD, 0x5100); } /* * Enable transmit and receive for RTL8168G and later controllers. * RX/TX MACs should be enabled after RX/TX configuration. */ if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB | RL_CMD_RX_ENB); #ifdef DEVICE_POLLING /* * Disable interrupts if we are polling. */ if (ifp->if_capenable & IFCAP_POLLING) CSR_WRITE_2(sc, RL_IMR, 0); else /* otherwise ... */ #endif /* * Enable interrupts. */ if (sc->rl_testmode) CSR_WRITE_2(sc, RL_IMR, 0); else CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS); CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS); /* Set initial TX threshold */ sc->rl_txthresh = RL_TX_THRESH_INIT; /* Start RX/TX process. */ CSR_WRITE_4(sc, RL_MISSEDPKT, 0); /* * Initialize the timer interrupt register so that * a timer interrupt will be generated once the timer * reaches a certain number of ticks. The timer is * reloaded on each transmit. */ #ifdef RE_TX_MODERATION /* * Use timer interrupt register to moderate TX interrupt * moderation, which dramatically improves TX frame rate. */ if (sc->rl_type == RL_8169) CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800); else CSR_WRITE_4(sc, RL_TIMERINT, 0x400); #else /* * Use timer interrupt register to moderate RX interrupt * moderation. */ if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0 && intr_filter == 0) { if (sc->rl_type == RL_8169) CSR_WRITE_4(sc, RL_TIMERINT_8169, RL_USECS(sc->rl_int_rx_mod)); } else { if (sc->rl_type == RL_8169) CSR_WRITE_4(sc, RL_TIMERINT_8169, RL_USECS(0)); } #endif /* * For 8169 gigE NICs, set the max allowed RX packet * size so we can receive jumbo frames. */ if (sc->rl_type == RL_8169) { if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { /* * For controllers that use new jumbo frame scheme, * set maximum size of jumbo frame depending on * controller revisions. */ if (ifp->if_mtu > RL_MTU) CSR_WRITE_2(sc, RL_MAXRXPKTLEN, sc->rl_hwrev->rl_max_mtu + ETHER_VLAN_ENCAP_LEN + ETHER_HDR_LEN + ETHER_CRC_LEN); else CSR_WRITE_2(sc, RL_MAXRXPKTLEN, RE_RX_DESC_BUFLEN); } else if ((sc->rl_flags & RL_FLAG_PCIE) != 0 && sc->rl_hwrev->rl_max_mtu == RL_MTU) { /* RTL810x has no jumbo frame support. */ CSR_WRITE_2(sc, RL_MAXRXPKTLEN, RE_RX_DESC_BUFLEN); } else CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383); } if (sc->rl_testmode) return; CSR_WRITE_1(sc, sc->rl_cfg1, CSR_READ_1(sc, sc->rl_cfg1) | RL_CFG1_DRVLOAD); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->rl_flags &= ~RL_FLAG_LINK; mii_mediachg(mii); sc->rl_watchdog_timer = 0; callout_reset(&sc->rl_stat_callout, hz, re_tick, sc); } /* * Set media options. */ static int re_ifmedia_upd(struct ifnet *ifp) { struct rl_softc *sc; struct mii_data *mii; int error; sc = ifp->if_softc; mii = device_get_softc(sc->rl_miibus); RL_LOCK(sc); error = mii_mediachg(mii); RL_UNLOCK(sc); return (error); } /* * Report current media status. */ static void re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct rl_softc *sc; struct mii_data *mii; sc = ifp->if_softc; mii = device_get_softc(sc->rl_miibus); RL_LOCK(sc); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; RL_UNLOCK(sc); } static int re_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct rl_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; struct mii_data *mii; int error = 0; switch (command) { case SIOCSIFMTU: if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > sc->rl_hwrev->rl_max_mtu || ((sc->rl_flags & RL_FLAG_FASTETHER) != 0 && ifr->ifr_mtu > RL_MTU)) { error = EINVAL; break; } RL_LOCK(sc); if (ifp->if_mtu != ifr->ifr_mtu) { ifp->if_mtu = ifr->ifr_mtu; if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 && (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) { ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO); ifp->if_hwassist &= ~CSUM_TSO; } VLAN_CAPABILITIES(ifp); } RL_UNLOCK(sc); break; case SIOCSIFFLAGS: RL_LOCK(sc); if ((ifp->if_flags & IFF_UP) != 0) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if (((ifp->if_flags ^ sc->rl_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) != 0) re_set_rxmode(sc); } else re_init_locked(sc); } else { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) re_stop(sc); } sc->rl_if_flags = ifp->if_flags; RL_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: RL_LOCK(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) re_set_rxmode(sc); RL_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: mii = device_get_softc(sc->rl_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; case SIOCSIFCAP: { int mask, reinit; mask = ifr->ifr_reqcap ^ ifp->if_capenable; reinit = 0; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(re_poll, ifp); if (error) return (error); RL_LOCK(sc); /* Disable interrupts */ CSR_WRITE_2(sc, RL_IMR, 0x0000); ifp->if_capenable |= IFCAP_POLLING; RL_UNLOCK(sc); } else { error = ether_poll_deregister(ifp); /* Enable interrupts. */ RL_LOCK(sc); CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS); ifp->if_capenable &= ~IFCAP_POLLING; RL_UNLOCK(sc); } } #endif /* DEVICE_POLLING */ RL_LOCK(sc); if ((mask & IFCAP_TXCSUM) != 0 && (ifp->if_capabilities & IFCAP_TXCSUM) != 0) { ifp->if_capenable ^= IFCAP_TXCSUM; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist |= RE_CSUM_FEATURES; else ifp->if_hwassist &= ~RE_CSUM_FEATURES; reinit = 1; } if ((mask & IFCAP_RXCSUM) != 0 && (ifp->if_capabilities & IFCAP_RXCSUM) != 0) { ifp->if_capenable ^= IFCAP_RXCSUM; reinit = 1; } if ((mask & IFCAP_TSO4) != 0 && (ifp->if_capabilities & IFCAP_TSO4) != 0) { ifp->if_capenable ^= IFCAP_TSO4; if ((IFCAP_TSO4 & ifp->if_capenable) != 0) ifp->if_hwassist |= CSUM_TSO; else ifp->if_hwassist &= ~CSUM_TSO; if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; } } if ((mask & IFCAP_VLAN_HWTSO) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; /* TSO over VLAN requires VLAN hardware tagging. */ if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; reinit = 1; } if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 && (mask & (IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWTSO)) != 0) reinit = 1; if ((mask & IFCAP_WOL) != 0 && (ifp->if_capabilities & IFCAP_WOL) != 0) { if ((mask & IFCAP_WOL_UCAST) != 0) ifp->if_capenable ^= IFCAP_WOL_UCAST; if ((mask & IFCAP_WOL_MCAST) != 0) ifp->if_capenable ^= IFCAP_WOL_MCAST; if ((mask & IFCAP_WOL_MAGIC) != 0) ifp->if_capenable ^= IFCAP_WOL_MAGIC; } if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } RL_UNLOCK(sc); VLAN_CAPABILITIES(ifp); } break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static void re_watchdog(struct rl_softc *sc) { struct ifnet *ifp; RL_LOCK_ASSERT(sc); if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0) return; ifp = sc->rl_ifp; re_txeof(sc); if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) { if_printf(ifp, "watchdog timeout (missed Tx interrupts) " "-- recovering\n"); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); return; } if_printf(ifp, "watchdog timeout\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); re_rxeof(sc, NULL); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void re_stop(struct rl_softc *sc) { int i; struct ifnet *ifp; struct rl_txdesc *txd; struct rl_rxdesc *rxd; RL_LOCK_ASSERT(sc); ifp = sc->rl_ifp; sc->rl_watchdog_timer = 0; callout_stop(&sc->rl_stat_callout); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* * Disable accepting frames to put RX MAC into idle state. * Otherwise it's possible to get frames while stop command * execution is in progress and controller can DMA the frame * to already freed RX buffer during that period. */ CSR_WRITE_4(sc, RL_RXCFG, CSR_READ_4(sc, RL_RXCFG) & ~(RL_RXCFG_RX_ALLPHYS | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_MULTI | RL_RXCFG_RX_BROAD)); if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) { /* Enable RXDV gate. */ CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) | 0x00080000); } if ((sc->rl_flags & RL_FLAG_WAIT_TXPOLL) != 0) { for (i = RL_TIMEOUT; i > 0; i--) { if ((CSR_READ_1(sc, sc->rl_txstart) & RL_TXSTART_START) == 0) break; DELAY(20); } if (i == 0) device_printf(sc->rl_dev, "stopping TX poll timed out!\n"); CSR_WRITE_1(sc, RL_COMMAND, 0x00); } else if ((sc->rl_flags & RL_FLAG_CMDSTOP) != 0) { CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_STOPREQ | RL_CMD_TX_ENB | RL_CMD_RX_ENB); if ((sc->rl_flags & RL_FLAG_CMDSTOP_WAIT_TXQ) != 0) { for (i = RL_TIMEOUT; i > 0; i--) { if ((CSR_READ_4(sc, RL_TXCFG) & RL_TXCFG_QUEUE_EMPTY) != 0) break; DELAY(100); } if (i == 0) device_printf(sc->rl_dev, "stopping TXQ timed out!\n"); } } else CSR_WRITE_1(sc, RL_COMMAND, 0x00); DELAY(1000); CSR_WRITE_2(sc, RL_IMR, 0x0000); CSR_WRITE_2(sc, RL_ISR, 0xFFFF); if (sc->rl_head != NULL) { m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } /* Free the TX list buffers. */ for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) { txd = &sc->rl_ldata.rl_tx_desc[i]; if (txd->tx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap); m_freem(txd->tx_m); txd->tx_m = NULL; } } /* Free the RX list buffers. */ for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { rxd = &sc->rl_ldata.rl_rx_desc[i]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap); m_freem(rxd->rx_m); rxd->rx_m = NULL; } } if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { rxd = &sc->rl_ldata.rl_jrx_desc[i]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap); m_freem(rxd->rx_m); rxd->rx_m = NULL; } } } } /* * Device suspend routine. Stop the interface and save some PCI * settings in case the BIOS doesn't restore them properly on * resume. */ static int re_suspend(device_t dev) { struct rl_softc *sc; sc = device_get_softc(dev); RL_LOCK(sc); re_stop(sc); re_setwol(sc); sc->suspended = 1; RL_UNLOCK(sc); return (0); } /* * Device resume routine. Restore some PCI settings in case the BIOS * doesn't, re-enable busmastering, and restart the interface if * appropriate. */ static int re_resume(device_t dev) { struct rl_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); RL_LOCK(sc); ifp = sc->rl_ifp; /* Take controller out of sleep mode. */ if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) { if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80) CSR_WRITE_1(sc, RL_GPIO, CSR_READ_1(sc, RL_GPIO) | 0x01); } /* * Clear WOL matching such that normal Rx filtering * wouldn't interfere with WOL patterns. */ re_clrwol(sc); /* reinitialize interface if necessary */ if (ifp->if_flags & IFF_UP) re_init_locked(sc); sc->suspended = 0; RL_UNLOCK(sc); return (0); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int re_shutdown(device_t dev) { struct rl_softc *sc; sc = device_get_softc(dev); RL_LOCK(sc); re_stop(sc); /* * Mark interface as down since otherwise we will panic if * interrupt comes in later on, which can happen in some * cases. */ sc->rl_ifp->if_flags &= ~IFF_UP; re_setwol(sc); RL_UNLOCK(sc); return (0); } static void re_set_linkspeed(struct rl_softc *sc) { struct mii_softc *miisc; struct mii_data *mii; int aneg, i, phyno; RL_LOCK_ASSERT(sc); mii = device_get_softc(sc->rl_miibus); mii_pollstat(mii); aneg = 0; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch IFM_SUBTYPE(mii->mii_media_active) { case IFM_10_T: case IFM_100_TX: return; case IFM_1000_T: aneg++; break; default: break; } } miisc = LIST_FIRST(&mii->mii_phys); phyno = miisc->mii_phy; LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); re_miibus_writereg(sc->rl_dev, phyno, MII_100T2CR, 0); re_miibus_writereg(sc->rl_dev, phyno, MII_ANAR, ANAR_TX_FD | ANAR_TX | ANAR_10_FD | ANAR_10 | ANAR_CSMA); re_miibus_writereg(sc->rl_dev, phyno, MII_BMCR, BMCR_AUTOEN | BMCR_STARTNEG); DELAY(1000); if (aneg != 0) { /* * Poll link state until re(4) get a 10/100Mbps link. */ for (i = 0; i < MII_ANEGTICKS_GIGE; i++) { mii_pollstat(mii); if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: return; default: break; } } RL_UNLOCK(sc); pause("relnk", hz); RL_LOCK(sc); } if (i == MII_ANEGTICKS_GIGE) device_printf(sc->rl_dev, "establishing a link failed, WOL may not work!"); } /* * No link, force MAC to have 100Mbps, full-duplex link. * MAC does not require reprogramming on resolved speed/duplex, * so this is just for completeness. */ mii->mii_media_status = IFM_AVALID | IFM_ACTIVE; mii->mii_media_active = IFM_ETHER | IFM_100_TX | IFM_FDX; } static void re_setwol(struct rl_softc *sc) { struct ifnet *ifp; int pmc; uint16_t pmstat; uint8_t v; RL_LOCK_ASSERT(sc); if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0) return; ifp = sc->rl_ifp; /* Put controller into sleep mode. */ if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) { if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80) CSR_WRITE_1(sc, RL_GPIO, CSR_READ_1(sc, RL_GPIO) & ~0x01); } if ((ifp->if_capenable & IFCAP_WOL) != 0) { if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) { /* Disable RXDV gate. */ CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) & ~0x00080000); } re_set_rxmode(sc); if ((sc->rl_flags & RL_FLAG_WOL_MANLINK) != 0) re_set_linkspeed(sc); if ((sc->rl_flags & RL_FLAG_WOLRXENB) != 0) CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RX_ENB); } /* Enable config register write. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); /* Enable PME. */ v = CSR_READ_1(sc, sc->rl_cfg1); v &= ~RL_CFG1_PME; if ((ifp->if_capenable & IFCAP_WOL) != 0) v |= RL_CFG1_PME; CSR_WRITE_1(sc, sc->rl_cfg1, v); v = CSR_READ_1(sc, sc->rl_cfg3); v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC); if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) v |= RL_CFG3_WOL_MAGIC; CSR_WRITE_1(sc, sc->rl_cfg3, v); v = CSR_READ_1(sc, sc->rl_cfg5); v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST | RL_CFG5_WOL_LANWAKE); if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0) v |= RL_CFG5_WOL_UCAST; if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0) v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST; if ((ifp->if_capenable & IFCAP_WOL) != 0) v |= RL_CFG5_WOL_LANWAKE; CSR_WRITE_1(sc, sc->rl_cfg5, v); /* Config register write done. */ CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); if ((ifp->if_capenable & IFCAP_WOL) == 0 && (sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0) CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) & ~0x80); /* * It seems that hardware resets its link speed to 100Mbps in * power down mode so switching to 100Mbps in driver is not * needed. */ /* Request PME if WOL is requested. */ pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2); pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if ((ifp->if_capenable & IFCAP_WOL) != 0) pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2); } static void re_clrwol(struct rl_softc *sc) { int pmc; uint8_t v; RL_LOCK_ASSERT(sc); if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0) return; /* Enable config register write. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); v = CSR_READ_1(sc, sc->rl_cfg3); v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC); CSR_WRITE_1(sc, sc->rl_cfg3, v); /* Config register write done. */ CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); v = CSR_READ_1(sc, sc->rl_cfg5); v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST); v &= ~RL_CFG5_WOL_LANWAKE; CSR_WRITE_1(sc, sc->rl_cfg5, v); } static void re_add_sysctls(struct rl_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; int error; ctx = device_get_sysctl_ctx(sc->rl_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->rl_dev)); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "stats", CTLTYPE_INT | CTLFLAG_RW, sc, 0, re_sysctl_stats, "I", "Statistics Information"); if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) return; SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "int_rx_mod", CTLTYPE_INT | CTLFLAG_RW, &sc->rl_int_rx_mod, 0, sysctl_hw_re_int_mod, "I", "re RX interrupt moderation"); /* Pull in device tunables. */ sc->rl_int_rx_mod = RL_TIMER_DEFAULT; error = resource_int_value(device_get_name(sc->rl_dev), device_get_unit(sc->rl_dev), "int_rx_mod", &sc->rl_int_rx_mod); if (error == 0) { if (sc->rl_int_rx_mod < RL_TIMER_MIN || sc->rl_int_rx_mod > RL_TIMER_MAX) { device_printf(sc->rl_dev, "int_rx_mod value out of " "range; using default: %d\n", RL_TIMER_DEFAULT); sc->rl_int_rx_mod = RL_TIMER_DEFAULT; } } } static int re_sysctl_stats(SYSCTL_HANDLER_ARGS) { struct rl_softc *sc; struct rl_stats *stats; int error, i, result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || req->newptr == NULL) return (error); if (result == 1) { sc = (struct rl_softc *)arg1; RL_LOCK(sc); if ((sc->rl_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { RL_UNLOCK(sc); goto done; } bus_dmamap_sync(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap, BUS_DMASYNC_PREREAD); CSR_WRITE_4(sc, RL_DUMPSTATS_HI, RL_ADDR_HI(sc->rl_ldata.rl_stats_addr)); CSR_WRITE_4(sc, RL_DUMPSTATS_LO, RL_ADDR_LO(sc->rl_ldata.rl_stats_addr)); CSR_WRITE_4(sc, RL_DUMPSTATS_LO, RL_ADDR_LO(sc->rl_ldata.rl_stats_addr | RL_DUMPSTATS_START)); for (i = RL_TIMEOUT; i > 0; i--) { if ((CSR_READ_4(sc, RL_DUMPSTATS_LO) & RL_DUMPSTATS_START) == 0) break; DELAY(1000); } bus_dmamap_sync(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap, BUS_DMASYNC_POSTREAD); RL_UNLOCK(sc); if (i == 0) { device_printf(sc->rl_dev, "DUMP statistics request timed out\n"); return (ETIMEDOUT); } done: stats = sc->rl_ldata.rl_stats; printf("%s statistics:\n", device_get_nameunit(sc->rl_dev)); printf("Tx frames : %ju\n", (uintmax_t)le64toh(stats->rl_tx_pkts)); printf("Rx frames : %ju\n", (uintmax_t)le64toh(stats->rl_rx_pkts)); printf("Tx errors : %ju\n", (uintmax_t)le64toh(stats->rl_tx_errs)); printf("Rx errors : %u\n", le32toh(stats->rl_rx_errs)); printf("Rx missed frames : %u\n", (uint32_t)le16toh(stats->rl_missed_pkts)); printf("Rx frame alignment errs : %u\n", (uint32_t)le16toh(stats->rl_rx_framealign_errs)); printf("Tx single collisions : %u\n", le32toh(stats->rl_tx_onecoll)); printf("Tx multiple collisions : %u\n", le32toh(stats->rl_tx_multicolls)); printf("Rx unicast frames : %ju\n", (uintmax_t)le64toh(stats->rl_rx_ucasts)); printf("Rx broadcast frames : %ju\n", (uintmax_t)le64toh(stats->rl_rx_bcasts)); printf("Rx multicast frames : %u\n", le32toh(stats->rl_rx_mcasts)); printf("Tx aborts : %u\n", (uint32_t)le16toh(stats->rl_tx_aborts)); printf("Tx underruns : %u\n", (uint32_t)le16toh(stats->rl_rx_underruns)); } return (error); } static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high) { int error, value; if (arg1 == NULL) return (EINVAL); value = *(int *)arg1; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); if (value < low || value > high) return (EINVAL); *(int *)arg1 = value; return (0); } static int sysctl_hw_re_int_mod(SYSCTL_HANDLER_ARGS) { return (sysctl_int_range(oidp, arg1, arg2, req, RL_TIMER_MIN, RL_TIMER_MAX)); } #ifdef NETDUMP static void -re_netdump_init(struct ifnet *ifp, int *nrxr, int *clsize) +re_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { struct rl_softc *sc; sc = if_getsoftc(ifp); RL_LOCK(sc); *nrxr = sc->rl_ldata.rl_rx_desc_cnt; + *ncl = NETDUMP_MAX_IN_FLIGHT; *clsize = (ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0) ? MJUM9BYTES : MCLBYTES; RL_UNLOCK(sc); } static void re_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) { } static int re_netdump_transmit(struct ifnet *ifp, struct mbuf *m) { struct rl_softc *sc; int error; sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) return (EBUSY); error = re_encap(sc, &m); if (error == 0) re_start_tx(sc); return (error); } static int re_netdump_poll(struct ifnet *ifp, int count) { struct rl_softc *sc; int error; sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || (sc->rl_flags & RL_FLAG_LINK) == 0) return (EBUSY); re_txeof(sc); error = re_rxeof(sc, NULL); if (error != 0 && error != EAGAIN) return (error); return (0); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/dev/virtio/network/if_vtnet.c =================================================================== --- user/markj/netdump/sys/dev/virtio/network/if_vtnet.c (revision 332406) +++ user/markj/netdump/sys/dev/virtio/network/if_vtnet.c (revision 332407) @@ -1,4048 +1,4049 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011, Bryan Venteicher * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Driver for VirtIO network devices. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "virtio_if.h" #include "opt_inet.h" #include "opt_inet6.h" static int vtnet_modevent(module_t, int, void *); static int vtnet_probe(device_t); static int vtnet_attach(device_t); static int vtnet_detach(device_t); static int vtnet_suspend(device_t); static int vtnet_resume(device_t); static int vtnet_shutdown(device_t); static int vtnet_attach_completed(device_t); static int vtnet_config_change(device_t); static void vtnet_negotiate_features(struct vtnet_softc *); static void vtnet_setup_features(struct vtnet_softc *); static int vtnet_init_rxq(struct vtnet_softc *, int); static int vtnet_init_txq(struct vtnet_softc *, int); static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); static void vtnet_free_rxtx_queues(struct vtnet_softc *); static int vtnet_alloc_rx_filters(struct vtnet_softc *); static void vtnet_free_rx_filters(struct vtnet_softc *); static int vtnet_alloc_virtqueues(struct vtnet_softc *); static int vtnet_setup_interface(struct vtnet_softc *); static int vtnet_change_mtu(struct vtnet_softc *, int); static int vtnet_ioctl(struct ifnet *, u_long, caddr_t); static uint64_t vtnet_get_counter(struct ifnet *, ift_counter); static int vtnet_rxq_populate(struct vtnet_rxq *); static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); static struct mbuf * vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *, struct mbuf *, int); static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); static int vtnet_rxq_new_buf(struct vtnet_rxq *); static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, struct virtio_net_hdr *); static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *); static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int); static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *, struct virtio_net_hdr *); static int vtnet_rxq_eof(struct vtnet_rxq *); static void vtnet_rx_vq_intr(void *); static void vtnet_rxq_tq_intr(void *, int); static int vtnet_txq_below_threshold(struct vtnet_txq *); static int vtnet_txq_notify(struct vtnet_txq *); static void vtnet_txq_free_mbufs(struct vtnet_txq *); static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *, int *, int *, int *); static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int, int, struct virtio_net_hdr *); static struct mbuf * vtnet_txq_offload(struct vtnet_txq *, struct mbuf *, struct virtio_net_hdr *); static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, struct vtnet_tx_header *); static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int); #ifdef VTNET_LEGACY_TX static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *); static void vtnet_start(struct ifnet *); #else static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *); static int vtnet_txq_mq_start(struct ifnet *, struct mbuf *); static void vtnet_txq_tq_deferred(void *, int); #endif static void vtnet_txq_start(struct vtnet_txq *); static void vtnet_txq_tq_intr(void *, int); static int vtnet_txq_eof(struct vtnet_txq *); static void vtnet_tx_vq_intr(void *); static void vtnet_tx_start_all(struct vtnet_softc *); #ifndef VTNET_LEGACY_TX static void vtnet_qflush(struct ifnet *); #endif static int vtnet_watchdog(struct vtnet_txq *); static void vtnet_accum_stats(struct vtnet_softc *, struct vtnet_rxq_stats *, struct vtnet_txq_stats *); static void vtnet_tick(void *); static void vtnet_start_taskqueues(struct vtnet_softc *); static void vtnet_free_taskqueues(struct vtnet_softc *); static void vtnet_drain_taskqueues(struct vtnet_softc *); static void vtnet_drain_rxtx_queues(struct vtnet_softc *); static void vtnet_stop_rendezvous(struct vtnet_softc *); static void vtnet_stop(struct vtnet_softc *); static int vtnet_virtio_reinit(struct vtnet_softc *); static void vtnet_init_rx_filters(struct vtnet_softc *); static int vtnet_init_rx_queues(struct vtnet_softc *); static int vtnet_init_tx_queues(struct vtnet_softc *); static int vtnet_init_rxtx_queues(struct vtnet_softc *); static void vtnet_set_active_vq_pairs(struct vtnet_softc *); static int vtnet_reinit(struct vtnet_softc *); static void vtnet_init_locked(struct vtnet_softc *); static void vtnet_init(void *); static void vtnet_free_ctrl_vq(struct vtnet_softc *); static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, struct sglist *, int, int); static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int); static int vtnet_set_promisc(struct vtnet_softc *, int); static int vtnet_set_allmulti(struct vtnet_softc *, int); static void vtnet_attach_disable_promisc(struct vtnet_softc *); static void vtnet_rx_filter(struct vtnet_softc *); static void vtnet_rx_filter_mac(struct vtnet_softc *); static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); static void vtnet_rx_filter_vlan(struct vtnet_softc *); static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t); static void vtnet_register_vlan(void *, struct ifnet *, uint16_t); static void vtnet_unregister_vlan(void *, struct ifnet *, uint16_t); static int vtnet_is_link_up(struct vtnet_softc *); static void vtnet_update_link_status(struct vtnet_softc *); static int vtnet_ifmedia_upd(struct ifnet *); static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *); static void vtnet_get_hwaddr(struct vtnet_softc *); static void vtnet_set_hwaddr(struct vtnet_softc *); static void vtnet_vlan_tag_remove(struct mbuf *); static void vtnet_set_rx_process_limit(struct vtnet_softc *); static void vtnet_set_tx_intr_threshold(struct vtnet_softc *); static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct vtnet_rxq *); static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct vtnet_txq *); static void vtnet_setup_queue_sysctl(struct vtnet_softc *); static void vtnet_setup_sysctl(struct vtnet_softc *); static int vtnet_rxq_enable_intr(struct vtnet_rxq *); static void vtnet_rxq_disable_intr(struct vtnet_rxq *); static int vtnet_txq_enable_intr(struct vtnet_txq *); static void vtnet_txq_disable_intr(struct vtnet_txq *); static void vtnet_enable_rx_interrupts(struct vtnet_softc *); static void vtnet_enable_tx_interrupts(struct vtnet_softc *); static void vtnet_enable_interrupts(struct vtnet_softc *); static void vtnet_disable_rx_interrupts(struct vtnet_softc *); static void vtnet_disable_tx_interrupts(struct vtnet_softc *); static void vtnet_disable_interrupts(struct vtnet_softc *); static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); NETDUMP_DEFINE(vtnet); /* Tunables. */ static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters"); static int vtnet_csum_disable = 0; TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable); SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN, &vtnet_csum_disable, 0, "Disables receive and send checksum offload"); static int vtnet_tso_disable = 0; TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable); SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable, 0, "Disables TCP Segmentation Offload"); static int vtnet_lro_disable = 0; TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable); SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable, 0, "Disables TCP Large Receive Offload"); static int vtnet_mq_disable = 0; TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable); SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable, 0, "Disables Multi Queue support"); static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS; TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs); SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN, &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs"); static int vtnet_rx_process_limit = 512; TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &vtnet_rx_process_limit, 0, "Limits the number RX segments processed in a single pass"); static uma_zone_t vtnet_tx_header_zone; static struct virtio_feature_desc vtnet_feature_desc[] = { { VIRTIO_NET_F_CSUM, "TxChecksum" }, { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, { VIRTIO_NET_F_MAC, "MacAddress" }, { VIRTIO_NET_F_GSO, "TxAllGSO" }, { VIRTIO_NET_F_GUEST_TSO4, "RxTSOv4" }, { VIRTIO_NET_F_GUEST_TSO6, "RxTSOv6" }, { VIRTIO_NET_F_GUEST_ECN, "RxECN" }, { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, { VIRTIO_NET_F_STATUS, "Status" }, { VIRTIO_NET_F_CTRL_VQ, "ControlVq" }, { VIRTIO_NET_F_CTRL_RX, "RxMode" }, { VIRTIO_NET_F_CTRL_VLAN, "VLanFilter" }, { VIRTIO_NET_F_CTRL_RX_EXTRA, "RxModeExtra" }, { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, { VIRTIO_NET_F_MQ, "Multiqueue" }, { VIRTIO_NET_F_CTRL_MAC_ADDR, "SetMacAddress" }, { 0, NULL } }; static device_method_t vtnet_methods[] = { /* Device methods. */ DEVMETHOD(device_probe, vtnet_probe), DEVMETHOD(device_attach, vtnet_attach), DEVMETHOD(device_detach, vtnet_detach), DEVMETHOD(device_suspend, vtnet_suspend), DEVMETHOD(device_resume, vtnet_resume), DEVMETHOD(device_shutdown, vtnet_shutdown), /* VirtIO methods. */ DEVMETHOD(virtio_attach_completed, vtnet_attach_completed), DEVMETHOD(virtio_config_change, vtnet_config_change), DEVMETHOD_END }; #ifdef DEV_NETMAP #include #endif /* DEV_NETMAP */ static driver_t vtnet_driver = { "vtnet", vtnet_methods, sizeof(struct vtnet_softc) }; static devclass_t vtnet_devclass; DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass, vtnet_modevent, 0); DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass, vtnet_modevent, 0); MODULE_VERSION(vtnet, 1); MODULE_DEPEND(vtnet, virtio, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(vtnet, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ static int vtnet_modevent(module_t mod, int type, void *unused) { int error = 0; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded++ == 0) vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr", sizeof(struct vtnet_tx_header), NULL, NULL, NULL, NULL, 0, 0); break; case MOD_QUIESCE: if (uma_zone_get_cur(vtnet_tx_header_zone) > 0) error = EBUSY; break; case MOD_UNLOAD: if (--loaded == 0) { uma_zdestroy(vtnet_tx_header_zone); vtnet_tx_header_zone = NULL; } break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } static int vtnet_probe(device_t dev) { if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK) return (ENXIO); device_set_desc(dev, "VirtIO Networking Adapter"); return (BUS_PROBE_DEFAULT); } static int vtnet_attach(device_t dev) { struct vtnet_softc *sc; int error; sc = device_get_softc(dev); sc->vtnet_dev = dev; /* Register our feature descriptions. */ virtio_set_feature_desc(dev, vtnet_feature_desc); VTNET_CORE_LOCK_INIT(sc); callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); vtnet_setup_sysctl(sc); vtnet_setup_features(sc); error = vtnet_alloc_rx_filters(sc); if (error) { device_printf(dev, "cannot allocate Rx filters\n"); goto fail; } error = vtnet_alloc_rxtx_queues(sc); if (error) { device_printf(dev, "cannot allocate queues\n"); goto fail; } error = vtnet_alloc_virtqueues(sc); if (error) { device_printf(dev, "cannot allocate virtqueues\n"); goto fail; } error = vtnet_setup_interface(sc); if (error) { device_printf(dev, "cannot setup interface\n"); goto fail; } error = virtio_setup_intr(dev, INTR_TYPE_NET); if (error) { device_printf(dev, "cannot setup virtqueue interrupts\n"); /* BMV: This will crash if during boot! */ ether_ifdetach(sc->vtnet_ifp); goto fail; } #ifdef DEV_NETMAP vtnet_netmap_attach(sc); #endif /* DEV_NETMAP */ vtnet_start_taskqueues(sc); fail: if (error) vtnet_detach(dev); return (error); } static int vtnet_detach(device_t dev) { struct vtnet_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->vtnet_ifp; if (device_is_attached(dev)) { VTNET_CORE_LOCK(sc); vtnet_stop(sc); VTNET_CORE_UNLOCK(sc); callout_drain(&sc->vtnet_tick_ch); vtnet_drain_taskqueues(sc); ether_ifdetach(ifp); } #ifdef DEV_NETMAP netmap_detach(ifp); #endif /* DEV_NETMAP */ vtnet_free_taskqueues(sc); if (sc->vtnet_vlan_attach != NULL) { EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach); sc->vtnet_vlan_attach = NULL; } if (sc->vtnet_vlan_detach != NULL) { EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach); sc->vtnet_vlan_detach = NULL; } ifmedia_removeall(&sc->vtnet_media); if (ifp != NULL) { if_free(ifp); sc->vtnet_ifp = NULL; } vtnet_free_rxtx_queues(sc); vtnet_free_rx_filters(sc); if (sc->vtnet_ctrl_vq != NULL) vtnet_free_ctrl_vq(sc); VTNET_CORE_LOCK_DESTROY(sc); return (0); } static int vtnet_suspend(device_t dev) { struct vtnet_softc *sc; sc = device_get_softc(dev); VTNET_CORE_LOCK(sc); vtnet_stop(sc); sc->vtnet_flags |= VTNET_FLAG_SUSPENDED; VTNET_CORE_UNLOCK(sc); return (0); } static int vtnet_resume(device_t dev) { struct vtnet_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->vtnet_ifp; VTNET_CORE_LOCK(sc); if (ifp->if_flags & IFF_UP) vtnet_init_locked(sc); sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED; VTNET_CORE_UNLOCK(sc); return (0); } static int vtnet_shutdown(device_t dev) { /* * Suspend already does all of what we need to * do here; we just never expect to be resumed. */ return (vtnet_suspend(dev)); } static int vtnet_attach_completed(device_t dev) { vtnet_attach_disable_promisc(device_get_softc(dev)); return (0); } static int vtnet_config_change(device_t dev) { struct vtnet_softc *sc; sc = device_get_softc(dev); VTNET_CORE_LOCK(sc); vtnet_update_link_status(sc); if (sc->vtnet_link_active != 0) vtnet_tx_start_all(sc); VTNET_CORE_UNLOCK(sc); return (0); } static void vtnet_negotiate_features(struct vtnet_softc *sc) { device_t dev; uint64_t mask, features; dev = sc->vtnet_dev; mask = 0; /* * TSO and LRO are only available when their corresponding checksum * offload feature is also negotiated. */ if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) { mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM; mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES; } if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) mask |= VTNET_TSO_FEATURES; if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) mask |= VTNET_LRO_FEATURES; #ifndef VTNET_LEGACY_TX if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) mask |= VIRTIO_NET_F_MQ; #else mask |= VIRTIO_NET_F_MQ; #endif features = VTNET_FEATURES & ~mask; sc->vtnet_features = virtio_negotiate_features(dev, features); if (virtio_with_feature(dev, VTNET_LRO_FEATURES) && virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) { /* * LRO without mergeable buffers requires special care. This * is not ideal because every receive buffer must be large * enough to hold the maximum TCP packet, the Ethernet header, * and the header. This requires up to 34 descriptors with * MCLBYTES clusters. If we do not have indirect descriptors, * LRO is disabled since the virtqueue will not contain very * many receive buffers. */ if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) { device_printf(dev, "LRO disabled due to both mergeable buffers and " "indirect descriptors not negotiated\n"); features &= ~VTNET_LRO_FEATURES; sc->vtnet_features = virtio_negotiate_features(dev, features); } else sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; } } static void vtnet_setup_features(struct vtnet_softc *sc) { device_t dev; dev = sc->vtnet_dev; vtnet_negotiate_features(sc); if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) sc->vtnet_flags |= VTNET_FLAG_INDIRECT; if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX; if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) { /* This feature should always be negotiated. */ sc->vtnet_flags |= VTNET_FLAG_MAC; } if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); } else sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS; else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS; else sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS; if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS; else sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) sc->vtnet_flags |= VTNET_FLAG_CTRL_RX; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; } if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) && sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); } else sc->vtnet_max_vq_pairs = 1; if (sc->vtnet_max_vq_pairs > 1) { /* * Limit the maximum number of queue pairs to the lower of * the number of CPUs and the configured maximum. * The actual number of queues that get used may be less. */ int max; max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) { if (max > mp_ncpus) max = mp_ncpus; if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX; if (max > 1) { sc->vtnet_requested_vq_pairs = max; sc->vtnet_flags |= VTNET_FLAG_MULTIQ; } } } } static int vtnet_init_rxq(struct vtnet_softc *sc, int id) { struct vtnet_rxq *rxq; rxq = &sc->vtnet_rxqs[id]; snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d", device_get_nameunit(sc->vtnet_dev), id); mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF); rxq->vtnrx_sc = sc; rxq->vtnrx_id = id; rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT); if (rxq->vtnrx_sg == NULL) return (ENOMEM); TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, taskqueue_thread_enqueue, &rxq->vtnrx_tq); return (rxq->vtnrx_tq == NULL ? ENOMEM : 0); } static int vtnet_init_txq(struct vtnet_softc *sc, int id) { struct vtnet_txq *txq; txq = &sc->vtnet_txqs[id]; snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d", device_get_nameunit(sc->vtnet_dev), id); mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF); txq->vtntx_sc = sc; txq->vtntx_id = id; txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT); if (txq->vtntx_sg == NULL) return (ENOMEM); #ifndef VTNET_LEGACY_TX txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF, M_NOWAIT, &txq->vtntx_mtx); if (txq->vtntx_br == NULL) return (ENOMEM); TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq); #endif TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq); txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT, taskqueue_thread_enqueue, &txq->vtntx_tq); if (txq->vtntx_tq == NULL) return (ENOMEM); return (0); } static int vtnet_alloc_rxtx_queues(struct vtnet_softc *sc) { int i, npairs, error; npairs = sc->vtnet_max_vq_pairs; sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF, M_NOWAIT | M_ZERO); sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL) return (ENOMEM); for (i = 0; i < npairs; i++) { error = vtnet_init_rxq(sc, i); if (error) return (error); error = vtnet_init_txq(sc, i); if (error) return (error); } vtnet_setup_queue_sysctl(sc); return (0); } static void vtnet_destroy_rxq(struct vtnet_rxq *rxq) { rxq->vtnrx_sc = NULL; rxq->vtnrx_id = -1; if (rxq->vtnrx_sg != NULL) { sglist_free(rxq->vtnrx_sg); rxq->vtnrx_sg = NULL; } if (mtx_initialized(&rxq->vtnrx_mtx) != 0) mtx_destroy(&rxq->vtnrx_mtx); } static void vtnet_destroy_txq(struct vtnet_txq *txq) { txq->vtntx_sc = NULL; txq->vtntx_id = -1; if (txq->vtntx_sg != NULL) { sglist_free(txq->vtntx_sg); txq->vtntx_sg = NULL; } #ifndef VTNET_LEGACY_TX if (txq->vtntx_br != NULL) { buf_ring_free(txq->vtntx_br, M_DEVBUF); txq->vtntx_br = NULL; } #endif if (mtx_initialized(&txq->vtntx_mtx) != 0) mtx_destroy(&txq->vtntx_mtx); } static void vtnet_free_rxtx_queues(struct vtnet_softc *sc) { int i; if (sc->vtnet_rxqs != NULL) { for (i = 0; i < sc->vtnet_max_vq_pairs; i++) vtnet_destroy_rxq(&sc->vtnet_rxqs[i]); free(sc->vtnet_rxqs, M_DEVBUF); sc->vtnet_rxqs = NULL; } if (sc->vtnet_txqs != NULL) { for (i = 0; i < sc->vtnet_max_vq_pairs; i++) vtnet_destroy_txq(&sc->vtnet_txqs[i]); free(sc->vtnet_txqs, M_DEVBUF); sc->vtnet_txqs = NULL; } } static int vtnet_alloc_rx_filters(struct vtnet_softc *sc) { if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter), M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vtnet_mac_filter == NULL) return (ENOMEM); } if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) * VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vtnet_vlan_filter == NULL) return (ENOMEM); } return (0); } static void vtnet_free_rx_filters(struct vtnet_softc *sc) { if (sc->vtnet_mac_filter != NULL) { free(sc->vtnet_mac_filter, M_DEVBUF); sc->vtnet_mac_filter = NULL; } if (sc->vtnet_vlan_filter != NULL) { free(sc->vtnet_vlan_filter, M_DEVBUF); sc->vtnet_vlan_filter = NULL; } } static int vtnet_alloc_virtqueues(struct vtnet_softc *sc) { device_t dev; struct vq_alloc_info *info; struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i, idx, flags, nvqs, error; dev = sc->vtnet_dev; flags = 0; nvqs = sc->vtnet_max_vq_pairs * 2; if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) nvqs++; info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT); if (info == NULL) return (ENOMEM); for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) { rxq = &sc->vtnet_rxqs[i]; VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id); txq = &sc->vtnet_txqs[i]; VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs, vtnet_tx_vq_intr, txq, &txq->vtntx_vq, "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id); } if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); } /* * Enable interrupt binding if this is multiqueue. This only matters * when per-vq MSIX is available. */ if (sc->vtnet_flags & VTNET_FLAG_MULTIQ) flags |= 0; error = virtio_alloc_virtqueues(dev, flags, nvqs, info); free(info, M_TEMP); return (error); } static int vtnet_setup_interface(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "cannot allocate ifnet structure\n"); return (ENOSPC); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_baudrate = IF_Gbps(10); /* Approx. */ ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = vtnet_init; ifp->if_ioctl = vtnet_ioctl; ifp->if_get_counter = vtnet_get_counter; #ifndef VTNET_LEGACY_TX ifp->if_transmit = vtnet_txq_mq_start; ifp->if_qflush = vtnet_qflush; #else struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq; ifp->if_start = vtnet_start; IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1); ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1; IFQ_SET_READY(&ifp->if_snd); #endif ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd, vtnet_ifmedia_sts); ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL); ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE); /* Read (or generate) the MAC address for the adapter. */ vtnet_get_hwaddr(sc); ether_ifattach(ifp, sc->vtnet_hwaddr); if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) ifp->if_capabilities |= IFCAP_LINKSTATE; /* Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU; if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6; if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) { ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; } else { if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) ifp->if_capabilities |= IFCAP_TSO4; if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) ifp->if_capabilities |= IFCAP_TSO6; if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; } if (ifp->if_capabilities & IFCAP_TSO) ifp->if_capabilities |= IFCAP_VLAN_HWTSO; } if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) { ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) ifp->if_capabilities |= IFCAP_LRO; } if (ifp->if_capabilities & IFCAP_HWCSUM) { /* * VirtIO does not support VLAN tagging, but we can fake * it by inserting and removing the 802.1Q header during * transmit and receive. We are then able to do checksum * offloading of VLAN frames. */ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; } ifp->if_capenable = ifp->if_capabilities; /* * Capabilities after here are not enabled by default. */ if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST); sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); } vtnet_set_rx_process_limit(sc); vtnet_set_tx_intr_threshold(sc); NETDUMP_SET(ifp, vtnet); return (0); } static int vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu) { struct ifnet *ifp; int frame_size, clsize; ifp = sc->vtnet_ifp; if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU) return (EINVAL); frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) + new_mtu; /* * Based on the new MTU (and hence frame size) determine which * cluster size is most appropriate for the receive queues. */ if (frame_size <= MCLBYTES) { clsize = MCLBYTES; } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { /* Avoid going past 9K jumbos. */ if (frame_size > MJUM9BYTES) return (EINVAL); clsize = MJUM9BYTES; } else clsize = MJUMPAGESIZE; ifp->if_mtu = new_mtu; sc->vtnet_rx_new_clsize = clsize; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vtnet_init_locked(sc); } return (0); } static int vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct vtnet_softc *sc; struct ifreq *ifr; int reinit, mask, error; sc = ifp->if_softc; ifr = (struct ifreq *) data; error = 0; switch (cmd) { case SIOCSIFMTU: if (ifp->if_mtu != ifr->ifr_mtu) { VTNET_CORE_LOCK(sc); error = vtnet_change_mtu(sc, ifr->ifr_mtu); VTNET_CORE_UNLOCK(sc); } break; case SIOCSIFFLAGS: VTNET_CORE_LOCK(sc); if ((ifp->if_flags & IFF_UP) == 0) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) vtnet_stop(sc); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((ifp->if_flags ^ sc->vtnet_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) vtnet_rx_filter(sc); else { ifp->if_flags |= IFF_PROMISC; if ((ifp->if_flags ^ sc->vtnet_if_flags) & IFF_ALLMULTI) error = ENOTSUP; } } } else vtnet_init_locked(sc); if (error == 0) sc->vtnet_if_flags = ifp->if_flags; VTNET_CORE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) break; VTNET_CORE_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) vtnet_rx_filter_mac(sc); VTNET_CORE_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd); break; case SIOCSIFCAP: VTNET_CORE_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) ifp->if_capenable ^= IFCAP_TXCSUM; if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO | IFCAP_VLAN_HWFILTER)) { /* These Rx features require us to renegotiate. */ reinit = 1; if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; } else reinit = 0; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vtnet_init_locked(sc); } VTNET_CORE_UNLOCK(sc); VLAN_CAPABILITIES(ifp); break; default: error = ether_ioctl(ifp, cmd, data); break; } VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc); return (error); } static int vtnet_rxq_populate(struct vtnet_rxq *rxq) { struct virtqueue *vq; int nbufs, error; vq = rxq->vtnrx_vq; error = ENOSPC; for (nbufs = 0; !virtqueue_full(vq); nbufs++) { error = vtnet_rxq_new_buf(rxq); if (error) break; } if (nbufs > 0) { virtqueue_notify(vq); /* * EMSGSIZE signifies the virtqueue did not have enough * entries available to hold the last mbuf. This is not * an error. */ if (error == EMSGSIZE) error = 0; } return (error); } static void vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq) { struct virtqueue *vq; struct mbuf *m; int last; vq = rxq->vtnrx_vq; last = 0; while ((m = virtqueue_drain(vq, &last)) != NULL) m_freem(m); KASSERT(virtqueue_empty(vq), ("%s: mbufs remaining in rx queue %p", __func__, rxq)); } static struct mbuf * vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) { struct mbuf *m_head, *m_tail, *m; int i, clsize; clsize = sc->vtnet_rx_clsize; KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs)); m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize); if (m_head == NULL) goto fail; m_head->m_len = clsize; m_tail = m_head; /* Allocate the rest of the chain. */ for (i = 1; i < nbufs; i++) { m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize); if (m == NULL) goto fail; m->m_len = clsize; m_tail->m_next = m; m_tail = m; } if (m_tailp != NULL) *m_tailp = m_tail; return (m_head); fail: sc->vtnet_stats.mbuf_alloc_failed++; m_freem(m_head); return (NULL); } /* * Slow path for when LRO without mergeable buffers is negotiated. */ static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0, int len0) { struct vtnet_softc *sc; struct mbuf *m, *m_prev; struct mbuf *m_new, *m_tail; int len, clsize, nreplace, error; sc = rxq->vtnrx_sc; clsize = sc->vtnet_rx_clsize; m_prev = NULL; m_tail = NULL; nreplace = 0; m = m0; len = len0; /* * Since these mbuf chains are so large, we avoid allocating an * entire replacement chain if possible. When the received frame * did not consume the entire chain, the unused mbufs are moved * to the replacement chain. */ while (len > 0) { /* * Something is seriously wrong if we received a frame * larger than the chain. Drop it. */ if (m == NULL) { sc->vtnet_stats.rx_frame_too_large++; return (EMSGSIZE); } /* We always allocate the same cluster size. */ KASSERT(m->m_len == clsize, ("%s: mbuf size %d is not the cluster size %d", __func__, m->m_len, clsize)); m->m_len = MIN(m->m_len, len); len -= m->m_len; m_prev = m; m = m->m_next; nreplace++; } KASSERT(nreplace <= sc->vtnet_rx_nmbufs, ("%s: too many replacement mbufs %d max %d", __func__, nreplace, sc->vtnet_rx_nmbufs)); m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); if (m_new == NULL) { m_prev->m_len = clsize; return (ENOBUFS); } /* * Move any unused mbufs from the received chain onto the end * of the new chain. */ if (m_prev->m_next != NULL) { m_tail->m_next = m_prev->m_next; m_prev->m_next = NULL; } error = vtnet_rxq_enqueue_buf(rxq, m_new); if (error) { /* * BAD! We could not enqueue the replacement mbuf chain. We * must restore the m0 chain to the original state if it was * modified so we can subsequently discard it. * * NOTE: The replacement is suppose to be an identical copy * to the one just dequeued so this is an unexpected error. */ sc->vtnet_stats.rx_enq_replacement_failed++; if (m_tail->m_next != NULL) { m_prev->m_next = m_tail->m_next; m_tail->m_next = NULL; } m_prev->m_len = clsize; m_freem(m_new); } return (error); } static int vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) { struct vtnet_softc *sc; struct mbuf *m_new; int error; sc = rxq->vtnrx_sc; KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, ("%s: chained mbuf without LRO_NOMRG", __func__)); if (m->m_next == NULL) { /* Fast-path for the common case of just one mbuf. */ if (m->m_len < len) return (EINVAL); m_new = vtnet_rx_alloc_buf(sc, 1, NULL); if (m_new == NULL) return (ENOBUFS); error = vtnet_rxq_enqueue_buf(rxq, m_new); if (error) { /* * The new mbuf is suppose to be an identical * copy of the one just dequeued so this is an * unexpected error. */ m_freem(m_new); sc->vtnet_stats.rx_enq_replacement_failed++; } else m->m_len = len; } else error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len); return (error); } static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) { struct vtnet_softc *sc; struct sglist *sg; struct vtnet_rx_header *rxhdr; uint8_t *mdata; int offset, error; sc = rxq->vtnrx_sc; sg = rxq->vtnrx_sg; mdata = mtod(m, uint8_t *); VTNET_RXQ_LOCK_ASSERT(rxq); KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, ("%s: chained mbuf without LRO_NOMRG", __func__)); KASSERT(m->m_len == sc->vtnet_rx_clsize, ("%s: unexpected cluster size %d/%d", __func__, m->m_len, sc->vtnet_rx_clsize)); sglist_reset(sg); if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); rxhdr = (struct vtnet_rx_header *) mdata; sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); offset = sizeof(struct vtnet_rx_header); } else offset = 0; sglist_append(sg, mdata + offset, m->m_len - offset); if (m->m_next != NULL) { error = sglist_append_mbuf(sg, m->m_next); MPASS(error == 0); } error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg); return (error); } static int vtnet_rxq_new_buf(struct vtnet_rxq *rxq) { struct vtnet_softc *sc; struct mbuf *m; int error; sc = rxq->vtnrx_sc; m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL); if (m == NULL) return (ENOBUFS); error = vtnet_rxq_enqueue_buf(rxq, m); if (error) m_freem(m); return (error); } /* * Use the checksum offset in the VirtIO header to set the * correct CSUM_* flags. */ static int vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; #if defined(INET) || defined(INET6) int offset = hdr->csum_start + hdr->csum_offset; #endif sc = rxq->vtnrx_sc; /* Only do a basic sanity check on the offset. */ switch (eth_type) { #if defined(INET) case ETHERTYPE_IP: if (__predict_false(offset < ip_start + sizeof(struct ip))) return (1); break; #endif #if defined(INET6) case ETHERTYPE_IPV6: if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) return (1); break; #endif default: sc->vtnet_stats.rx_csum_bad_ethtype++; return (1); } /* * Use the offset to determine the appropriate CSUM_* flags. This is * a bit dirty, but we can get by with it since the checksum offsets * happen to be different. We assume the host host does not do IPv4 * header checksum offloading. */ switch (hdr->csum_offset) { case offsetof(struct udphdr, uh_sum): case offsetof(struct tcphdr, th_sum): m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; case offsetof(struct sctphdr, checksum): m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; break; default: sc->vtnet_stats.rx_csum_bad_offset++; return (1); } return (0); } static int vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; int offset, proto; sc = rxq->vtnrx_sc; switch (eth_type) { #if defined(INET) case ETHERTYPE_IP: { struct ip *ip; if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) return (1); ip = (struct ip *)(m->m_data + ip_start); proto = ip->ip_p; offset = ip_start + (ip->ip_hl << 2); break; } #endif #if defined(INET6) case ETHERTYPE_IPV6: if (__predict_false(m->m_len < ip_start + sizeof(struct ip6_hdr))) return (1); offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); if (__predict_false(offset < 0)) return (1); break; #endif default: sc->vtnet_stats.rx_csum_bad_ethtype++; return (1); } switch (proto) { case IPPROTO_TCP: if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) return (1); m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; case IPPROTO_UDP: if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) return (1); m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; case IPPROTO_SCTP: if (__predict_false(m->m_len < offset + sizeof(struct sctphdr))) return (1); m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; break; default: /* * For the remaining protocols, FreeBSD does not support * checksum offloading, so the checksum will be recomputed. */ #if 0 if_printf(sc->vtnet_ifp, "cksum offload of unsupported " "protocol eth_type=%#x proto=%d csum_start=%d " "csum_offset=%d\n", __func__, eth_type, proto, hdr->csum_start, hdr->csum_offset); #endif break; } return (0); } /* * Set the appropriate CSUM_* flags. Unfortunately, the information * provided is not directly useful to us. The VirtIO header gives the * offset of the checksum, which is all Linux needs, but this is not * how FreeBSD does things. We are forced to peek inside the packet * a bit. * * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD * could accept the offsets and let the stack figure it out. */ static int vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct ether_header *eh; struct ether_vlan_header *evh; uint16_t eth_type; int offset, error; eh = mtod(m, struct ether_header *); eth_type = ntohs(eh->ether_type); if (eth_type == ETHERTYPE_VLAN) { /* BMV: We should handle nested VLAN tags too. */ evh = mtod(m, struct ether_vlan_header *); eth_type = ntohs(evh->evl_proto); offset = sizeof(struct ether_vlan_header); } else offset = sizeof(struct ether_header); if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr); else error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr); return (error); } static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs) { struct mbuf *m; while (--nbufs > 0) { m = virtqueue_dequeue(rxq->vtnrx_vq, NULL); if (m == NULL) break; vtnet_rxq_discard_buf(rxq, m); } } static void vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m) { int error; /* * Requeue the discarded mbuf. This should always be successful * since it was just dequeued. */ error = vtnet_rxq_enqueue_buf(rxq, m); KASSERT(error == 0, ("%s: cannot requeue discarded mbuf %d", __func__, error)); } static int vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) { struct vtnet_softc *sc; struct virtqueue *vq; struct mbuf *m, *m_tail; int len; sc = rxq->vtnrx_sc; vq = rxq->vtnrx_vq; m_tail = m_head; while (--nbufs > 0) { m = virtqueue_dequeue(vq, &len); if (m == NULL) { rxq->vtnrx_stats.vrxs_ierrors++; goto fail; } if (vtnet_rxq_new_buf(rxq) != 0) { rxq->vtnrx_stats.vrxs_iqdrops++; vtnet_rxq_discard_buf(rxq, m); if (nbufs > 1) vtnet_rxq_discard_merged_bufs(rxq, nbufs); goto fail; } if (m->m_len < len) len = m->m_len; m->m_len = len; m->m_flags &= ~M_PKTHDR; m_head->m_pkthdr.len += len; m_tail->m_next = m; m_tail = m; } return (0); fail: sc->vtnet_stats.rx_mergeable_failed++; m_freem(m_head); return (1); } static void vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; struct ifnet *ifp; struct ether_header *eh; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { vtnet_vlan_tag_remove(m); /* * With the 802.1Q header removed, update the * checksum starting location accordingly. */ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) hdr->csum_start -= ETHER_VLAN_ENCAP_LEN; } } m->m_pkthdr.flowid = rxq->vtnrx_id; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); /* * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum * distinction that Linux does. Need to reevaluate if performing * offloading for the NEEDS_CSUM case is really appropriate. */ if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) { if (vtnet_rxq_csum(rxq, m, hdr) == 0) rxq->vtnrx_stats.vrxs_csum++; else rxq->vtnrx_stats.vrxs_csum_failed++; } rxq->vtnrx_stats.vrxs_ipackets++; rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; VTNET_RXQ_UNLOCK(rxq); (*ifp->if_input)(ifp, m); VTNET_RXQ_LOCK(rxq); } static int vtnet_rxq_eof(struct vtnet_rxq *rxq) { struct virtio_net_hdr lhdr, *hdr; struct vtnet_softc *sc; struct ifnet *ifp; struct virtqueue *vq; struct mbuf *m; struct virtio_net_hdr_mrg_rxbuf *mhdr; int len, deq, nbufs, adjsz, count; sc = rxq->vtnrx_sc; vq = rxq->vtnrx_vq; ifp = sc->vtnet_ifp; hdr = &lhdr; deq = 0; count = sc->vtnet_rx_process_limit; VTNET_RXQ_LOCK_ASSERT(rxq); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, 0, &deq)) { return (FALSE); } #endif /* DEV_NETMAP */ while (count-- > 0) { m = virtqueue_dequeue(vq, &len); if (m == NULL) break; deq++; if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { rxq->vtnrx_stats.vrxs_ierrors++; vtnet_rxq_discard_buf(rxq, m); continue; } if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { nbufs = 1; adjsz = sizeof(struct vtnet_rx_header); /* * Account for our pad inserted between the header * and the actual start of the frame. */ len += VTNET_RX_HEADER_PAD; } else { mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *); nbufs = mhdr->num_buffers; adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); } if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { rxq->vtnrx_stats.vrxs_iqdrops++; vtnet_rxq_discard_buf(rxq, m); if (nbufs > 1) vtnet_rxq_discard_merged_bufs(rxq, nbufs); continue; } m->m_pkthdr.len = len; m->m_pkthdr.rcvif = ifp; m->m_pkthdr.csum_flags = 0; if (nbufs > 1) { /* Dequeue the rest of chain. */ if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0) continue; } /* * Save copy of header before we strip it. For both mergeable * and non-mergeable, the header is at the beginning of the * mbuf data. We no longer need num_buffers, so always use a * regular header. * * BMV: Is this memcpy() expensive? We know the mbuf data is * still valid even after the m_adj(). */ memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr)); m_adj(m, adjsz); vtnet_rxq_input(rxq, m, hdr); /* Must recheck after dropping the Rx lock. */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } if (deq > 0) virtqueue_notify(vq); return (count > 0 ? 0 : EAGAIN); } static void vtnet_rx_vq_intr(void *xrxq) { struct vtnet_softc *sc; struct vtnet_rxq *rxq; struct ifnet *ifp; int tries, more; rxq = xrxq; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; tries = 0; if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) { /* * Ignore this interrupt. Either this is a spurious interrupt * or multiqueue without per-VQ MSIX so every queue needs to * be polled (a brain dead configuration we could try harder * to avoid). */ vtnet_rxq_disable_intr(rxq); return; } VTNET_RXQ_LOCK(rxq); again: if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_RXQ_UNLOCK(rxq); return; } more = vtnet_rxq_eof(rxq); if (more || vtnet_rxq_enable_intr(rxq) != 0) { if (!more) vtnet_rxq_disable_intr(rxq); /* * This is an occasional condition or race (when !more), * so retry a few times before scheduling the taskqueue. */ if (tries++ < VTNET_INTR_DISABLE_RETRIES) goto again; VTNET_RXQ_UNLOCK(rxq); rxq->vtnrx_stats.vrxs_rescheduled++; taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); } else VTNET_RXQ_UNLOCK(rxq); } static void vtnet_rxq_tq_intr(void *xrxq, int pending) { struct vtnet_softc *sc; struct vtnet_rxq *rxq; struct ifnet *ifp; int more; rxq = xrxq; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; VTNET_RXQ_LOCK(rxq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_RXQ_UNLOCK(rxq); return; } more = vtnet_rxq_eof(rxq); if (more || vtnet_rxq_enable_intr(rxq) != 0) { if (!more) vtnet_rxq_disable_intr(rxq); rxq->vtnrx_stats.vrxs_rescheduled++; taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); } VTNET_RXQ_UNLOCK(rxq); } static int vtnet_txq_below_threshold(struct vtnet_txq *txq) { struct vtnet_softc *sc; struct virtqueue *vq; sc = txq->vtntx_sc; vq = txq->vtntx_vq; return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh); } static int vtnet_txq_notify(struct vtnet_txq *txq) { struct virtqueue *vq; vq = txq->vtntx_vq; txq->vtntx_watchdog = VTNET_TX_TIMEOUT; virtqueue_notify(vq); if (vtnet_txq_enable_intr(txq) == 0) return (0); /* * Drain frames that were completed since last checked. If this * causes the queue to go above the threshold, the caller should * continue transmitting. */ if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) { virtqueue_disable_intr(vq); return (1); } return (0); } static void vtnet_txq_free_mbufs(struct vtnet_txq *txq) { struct virtqueue *vq; struct vtnet_tx_header *txhdr; int last; vq = txq->vtntx_vq; last = 0; while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { m_freem(txhdr->vth_mbuf); uma_zfree(vtnet_tx_header_zone, txhdr); } KASSERT(virtqueue_empty(vq), ("%s: mbufs remaining in tx queue %p", __func__, txq)); } /* * BMV: Much of this can go away once we finally have offsets in * the mbuf packet header. Bug andre@. */ static int vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype, int *proto, int *start) { struct vtnet_softc *sc; struct ether_vlan_header *evh; int offset; sc = txq->vtntx_sc; evh = mtod(m, struct ether_vlan_header *); if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { /* BMV: We should handle nested VLAN tags too. */ *etype = ntohs(evh->evl_proto); offset = sizeof(struct ether_vlan_header); } else { *etype = ntohs(evh->evl_encap_proto); offset = sizeof(struct ether_header); } switch (*etype) { #if defined(INET) case ETHERTYPE_IP: { struct ip *ip, iphdr; if (__predict_false(m->m_len < offset + sizeof(struct ip))) { m_copydata(m, offset, sizeof(struct ip), (caddr_t) &iphdr); ip = &iphdr; } else ip = (struct ip *)(m->m_data + offset); *proto = ip->ip_p; *start = offset + (ip->ip_hl << 2); break; } #endif #if defined(INET6) case ETHERTYPE_IPV6: *proto = -1; *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); /* Assert the network stack sent us a valid packet. */ KASSERT(*start > offset, ("%s: mbuf %p start %d offset %d proto %d", __func__, m, *start, offset, *proto)); break; #endif default: sc->vtnet_stats.tx_csum_bad_ethtype++; return (EINVAL); } return (0); } static int vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, int offset, struct virtio_net_hdr *hdr) { static struct timeval lastecn; static int curecn; struct vtnet_softc *sc; struct tcphdr *tcp, tcphdr; sc = txq->vtntx_sc; if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); tcp = &tcphdr; } else tcp = (struct tcphdr *)(m->m_data + offset); hdr->hdr_len = offset + (tcp->th_off << 2); hdr->gso_size = m->m_pkthdr.tso_segsz; hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : VIRTIO_NET_HDR_GSO_TCPV6; if (tcp->th_flags & TH_CWR) { /* * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, * ECN support is not on a per-interface basis, but globally via * the net.inet.tcp.ecn.enable sysctl knob. The default is off. */ if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { if (ppsratecheck(&lastecn, &curecn, 1)) if_printf(sc->vtnet_ifp, "TSO with ECN not negotiated with host\n"); return (ENOTSUP); } hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; } txq->vtntx_stats.vtxs_tso++; return (0); } static struct mbuf * vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; int flags, etype, csum_start, proto, error; sc = txq->vtntx_sc; flags = m->m_pkthdr.csum_flags; error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start); if (error) goto drop; if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) || (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) { /* * We could compare the IP protocol vs the CSUM_ flag too, * but that really should not be necessary. */ hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; hdr->csum_start = csum_start; hdr->csum_offset = m->m_pkthdr.csum_data; txq->vtntx_stats.vtxs_csum++; } if (flags & CSUM_TSO) { if (__predict_false(proto != IPPROTO_TCP)) { /* Likely failed to correctly parse the mbuf. */ sc->vtnet_stats.tx_tso_not_tcp++; goto drop; } KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, ("%s: mbuf %p TSO without checksum offload %#x", __func__, m, flags)); error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); if (error) goto drop; } return (m); drop: m_freem(m); return (NULL); } static int vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, struct vtnet_tx_header *txhdr) { struct vtnet_softc *sc; struct virtqueue *vq; struct sglist *sg; struct mbuf *m; int error; sc = txq->vtntx_sc; vq = txq->vtntx_vq; sg = txq->vtntx_sg; m = *m_head; sglist_reset(sg); error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); KASSERT(error == 0 && sg->sg_nseg == 1, ("%s: error %d adding header to sglist", __func__, error)); error = sglist_append_mbuf(sg, m); if (error) { m = m_defrag(m, M_NOWAIT); if (m == NULL) goto fail; *m_head = m; sc->vtnet_stats.tx_defragged++; error = sglist_append_mbuf(sg, m); if (error) goto fail; } txhdr->vth_mbuf = m; error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); return (error); fail: sc->vtnet_stats.tx_defrag_failed++; m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } static int vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags) { struct vtnet_tx_header *txhdr; struct virtio_net_hdr *hdr; struct mbuf *m; int error; m = *m_head; M_ASSERTPKTHDR(m); txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO); if (txhdr == NULL) { m_freem(m); *m_head = NULL; return (ENOMEM); } /* * Always use the non-mergeable header, regardless if the feature * was negotiated. For transmit, num_buffers is always zero. The * vtnet_hdr_size is used to enqueue the correct header size. */ hdr = &txhdr->vth_uhdr.hdr; if (m->m_flags & M_VLANTAG) { m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); if ((*m_head = m) == NULL) { error = ENOBUFS; goto fail; } m->m_flags &= ~M_VLANTAG; } if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) { m = vtnet_txq_offload(txq, m, hdr); if ((*m_head = m) == NULL) { error = ENOBUFS; goto fail; } } error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); if (error == 0) return (0); fail: uma_zfree(vtnet_tx_header_zone, txhdr); return (error); } #ifdef VTNET_LEGACY_TX static void vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp) { struct vtnet_softc *sc; struct virtqueue *vq; struct mbuf *m0; int tries, enq; sc = txq->vtntx_sc; vq = txq->vtntx_vq; tries = 0; VTNET_TXQ_LOCK_ASSERT(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->vtnet_link_active == 0) return; vtnet_txq_eof(txq); again: enq = 0; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (virtqueue_full(vq)) break; IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) break; if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) { if (m0 != NULL) IFQ_DRV_PREPEND(&ifp->if_snd, m0); break; } enq++; ETHER_BPF_MTAP(ifp, m0); } if (enq > 0 && vtnet_txq_notify(txq) != 0) { if (tries++ < VTNET_NOTIFY_RETRIES) goto again; txq->vtntx_stats.vtxs_rescheduled++; taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); } } static void vtnet_start(struct ifnet *ifp) { struct vtnet_softc *sc; struct vtnet_txq *txq; sc = ifp->if_softc; txq = &sc->vtnet_txqs[0]; VTNET_TXQ_LOCK(txq); vtnet_start_locked(txq, ifp); VTNET_TXQ_UNLOCK(txq); } #else /* !VTNET_LEGACY_TX */ static int vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) { struct vtnet_softc *sc; struct virtqueue *vq; struct buf_ring *br; struct ifnet *ifp; int enq, tries, error; sc = txq->vtntx_sc; vq = txq->vtntx_vq; br = txq->vtntx_br; ifp = sc->vtnet_ifp; tries = 0; error = 0; VTNET_TXQ_LOCK_ASSERT(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->vtnet_link_active == 0) { if (m != NULL) error = drbr_enqueue(ifp, br, m); return (error); } if (m != NULL) { error = drbr_enqueue(ifp, br, m); if (error) return (error); } vtnet_txq_eof(txq); again: enq = 0; while ((m = drbr_peek(ifp, br)) != NULL) { if (virtqueue_full(vq)) { drbr_putback(ifp, br, m); break; } if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) { if (m != NULL) drbr_putback(ifp, br, m); else drbr_advance(ifp, br); break; } drbr_advance(ifp, br); enq++; ETHER_BPF_MTAP(ifp, m); } if (enq > 0 && vtnet_txq_notify(txq) != 0) { if (tries++ < VTNET_NOTIFY_RETRIES) goto again; txq->vtntx_stats.vtxs_rescheduled++; taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); } return (0); } static int vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m) { struct vtnet_softc *sc; struct vtnet_txq *txq; int i, npairs, error; sc = ifp->if_softc; npairs = sc->vtnet_act_vq_pairs; /* check if flowid is set */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % npairs; else i = curcpu % npairs; txq = &sc->vtnet_txqs[i]; if (VTNET_TXQ_TRYLOCK(txq) != 0) { error = vtnet_txq_mq_start_locked(txq, m); VTNET_TXQ_UNLOCK(txq); } else { error = drbr_enqueue(ifp, txq->vtntx_br, m); taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask); } return (error); } static void vtnet_txq_tq_deferred(void *xtxq, int pending) { struct vtnet_softc *sc; struct vtnet_txq *txq; txq = xtxq; sc = txq->vtntx_sc; VTNET_TXQ_LOCK(txq); if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br)) vtnet_txq_mq_start_locked(txq, NULL); VTNET_TXQ_UNLOCK(txq); } #endif /* VTNET_LEGACY_TX */ static void vtnet_txq_start(struct vtnet_txq *txq) { struct vtnet_softc *sc; struct ifnet *ifp; sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; #ifdef VTNET_LEGACY_TX if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) vtnet_start_locked(txq, ifp); #else if (!drbr_empty(ifp, txq->vtntx_br)) vtnet_txq_mq_start_locked(txq, NULL); #endif } static void vtnet_txq_tq_intr(void *xtxq, int pending) { struct vtnet_softc *sc; struct vtnet_txq *txq; struct ifnet *ifp; txq = xtxq; sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; VTNET_TXQ_LOCK(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_TXQ_UNLOCK(txq); return; } vtnet_txq_eof(txq); vtnet_txq_start(txq); VTNET_TXQ_UNLOCK(txq); } static int vtnet_txq_eof(struct vtnet_txq *txq) { struct virtqueue *vq; struct vtnet_tx_header *txhdr; struct mbuf *m; int deq; vq = txq->vtntx_vq; deq = 0; VTNET_TXQ_LOCK_ASSERT(txq); #ifdef DEV_NETMAP if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) { virtqueue_disable_intr(vq); // XXX luigi return 0; // XXX or 1 ? } #endif /* DEV_NETMAP */ while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { m = txhdr->vth_mbuf; deq++; txq->vtntx_stats.vtxs_opackets++; txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len; if (m->m_flags & M_MCAST) txq->vtntx_stats.vtxs_omcasts++; m_freem(m); uma_zfree(vtnet_tx_header_zone, txhdr); } if (virtqueue_empty(vq)) txq->vtntx_watchdog = 0; return (deq); } static void vtnet_tx_vq_intr(void *xtxq) { struct vtnet_softc *sc; struct vtnet_txq *txq; struct ifnet *ifp; txq = xtxq; sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) { /* * Ignore this interrupt. Either this is a spurious interrupt * or multiqueue without per-VQ MSIX so every queue needs to * be polled (a brain dead configuration we could try harder * to avoid). */ vtnet_txq_disable_intr(txq); return; } VTNET_TXQ_LOCK(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_TXQ_UNLOCK(txq); return; } vtnet_txq_eof(txq); vtnet_txq_start(txq); VTNET_TXQ_UNLOCK(txq); } static void vtnet_tx_start_all(struct vtnet_softc *sc) { struct vtnet_txq *txq; int i; VTNET_CORE_LOCK_ASSERT(sc); for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; VTNET_TXQ_LOCK(txq); vtnet_txq_start(txq); VTNET_TXQ_UNLOCK(txq); } } #ifndef VTNET_LEGACY_TX static void vtnet_qflush(struct ifnet *ifp) { struct vtnet_softc *sc; struct vtnet_txq *txq; struct mbuf *m; int i; sc = ifp->if_softc; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; VTNET_TXQ_LOCK(txq); while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL) m_freem(m); VTNET_TXQ_UNLOCK(txq); } if_qflush(ifp); } #endif static int vtnet_watchdog(struct vtnet_txq *txq) { struct ifnet *ifp; ifp = txq->vtntx_sc->vtnet_ifp; VTNET_TXQ_LOCK(txq); if (txq->vtntx_watchdog == 1) { /* * Only drain completed frames if the watchdog is about to * expire. If any frames were drained, there may be enough * free descriptors now available to transmit queued frames. * In that case, the timer will immediately be decremented * below, but the timeout is generous enough that should not * be a problem. */ if (vtnet_txq_eof(txq) != 0) vtnet_txq_start(txq); } if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) { VTNET_TXQ_UNLOCK(txq); return (0); } VTNET_TXQ_UNLOCK(txq); if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id); return (1); } static void vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc, struct vtnet_txq_stats *txacc) { bzero(rxacc, sizeof(struct vtnet_rxq_stats)); bzero(txacc, sizeof(struct vtnet_txq_stats)); for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) { struct vtnet_rxq_stats *rxst; struct vtnet_txq_stats *txst; rxst = &sc->vtnet_rxqs[i].vtnrx_stats; rxacc->vrxs_ipackets += rxst->vrxs_ipackets; rxacc->vrxs_ibytes += rxst->vrxs_ibytes; rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops; rxacc->vrxs_csum += rxst->vrxs_csum; rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed; rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled; txst = &sc->vtnet_txqs[i].vtntx_stats; txacc->vtxs_opackets += txst->vtxs_opackets; txacc->vtxs_obytes += txst->vtxs_obytes; txacc->vtxs_csum += txst->vtxs_csum; txacc->vtxs_tso += txst->vtxs_tso; txacc->vtxs_rescheduled += txst->vtxs_rescheduled; } } static uint64_t vtnet_get_counter(if_t ifp, ift_counter cnt) { struct vtnet_softc *sc; struct vtnet_rxq_stats rxaccum; struct vtnet_txq_stats txaccum; sc = if_getsoftc(ifp); vtnet_accum_stats(sc, &rxaccum, &txaccum); switch (cnt) { case IFCOUNTER_IPACKETS: return (rxaccum.vrxs_ipackets); case IFCOUNTER_IQDROPS: return (rxaccum.vrxs_iqdrops); case IFCOUNTER_IERRORS: return (rxaccum.vrxs_ierrors); case IFCOUNTER_OPACKETS: return (txaccum.vtxs_opackets); #ifndef VTNET_LEGACY_TX case IFCOUNTER_OBYTES: return (txaccum.vtxs_obytes); case IFCOUNTER_OMCASTS: return (txaccum.vtxs_omcasts); #endif default: return (if_get_counter_default(ifp, cnt)); } } static void vtnet_tick(void *xsc) { struct vtnet_softc *sc; struct ifnet *ifp; int i, timedout; sc = xsc; ifp = sc->vtnet_ifp; timedout = 0; VTNET_CORE_LOCK_ASSERT(sc); for (i = 0; i < sc->vtnet_act_vq_pairs; i++) timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]); if (timedout != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vtnet_init_locked(sc); } else callout_schedule(&sc->vtnet_tick_ch, hz); } static void vtnet_start_taskqueues(struct vtnet_softc *sc) { device_t dev; struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i, error; dev = sc->vtnet_dev; /* * Errors here are very difficult to recover from - we cannot * easily fail because, if this is during boot, we will hang * when freeing any successfully started taskqueues because * the scheduler isn't up yet. * * Most drivers just ignore the return value - it only fails * with ENOMEM so an error is not likely. */ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); if (error) { device_printf(dev, "failed to start rx taskq %d\n", rxq->vtnrx_id); } txq = &sc->vtnet_txqs[i]; error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET, "%s txq %d", device_get_nameunit(dev), txq->vtntx_id); if (error) { device_printf(dev, "failed to start tx taskq %d\n", txq->vtntx_id); } } } static void vtnet_free_taskqueues(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; if (rxq->vtnrx_tq != NULL) { taskqueue_free(rxq->vtnrx_tq); rxq->vtnrx_vq = NULL; } txq = &sc->vtnet_txqs[i]; if (txq->vtntx_tq != NULL) { taskqueue_free(txq->vtntx_tq); txq->vtntx_tq = NULL; } } } static void vtnet_drain_taskqueues(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; if (rxq->vtnrx_tq != NULL) taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); txq = &sc->vtnet_txqs[i]; if (txq->vtntx_tq != NULL) { taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask); #ifndef VTNET_LEGACY_TX taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask); #endif } } } static void vtnet_drain_rxtx_queues(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; #ifdef DEV_NETMAP if (nm_native_on(NA(sc->vtnet_ifp))) return; #endif /* DEV_NETMAP */ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; vtnet_rxq_free_mbufs(rxq); txq = &sc->vtnet_txqs[i]; vtnet_txq_free_mbufs(txq); } } static void vtnet_stop_rendezvous(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; /* * Lock and unlock the per-queue mutex so we known the stop * state is visible. Doing only the active queues should be * sufficient, but it does not cost much extra to do all the * queues. Note we hold the core mutex here too. */ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; VTNET_RXQ_LOCK(rxq); VTNET_RXQ_UNLOCK(rxq); txq = &sc->vtnet_txqs[i]; VTNET_TXQ_LOCK(txq); VTNET_TXQ_UNLOCK(txq); } } static void vtnet_stop(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; sc->vtnet_link_active = 0; callout_stop(&sc->vtnet_tick_ch); /* Only advisory. */ vtnet_disable_interrupts(sc); /* * Stop the host adapter. This resets it to the pre-initialized * state. It will not generate any interrupts until after it is * reinitialized. */ virtio_stop(dev); vtnet_stop_rendezvous(sc); /* Free any mbufs left in the virtqueues. */ vtnet_drain_rxtx_queues(sc); } static int vtnet_virtio_reinit(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; uint64_t features; int mask, error; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; features = sc->vtnet_features; mask = 0; #if defined(INET) mask |= IFCAP_RXCSUM; #endif #if defined (INET6) mask |= IFCAP_RXCSUM_IPV6; #endif /* * Re-negotiate with the host, removing any disabled receive * features. Transmit features are disabled only on our side * via if_capenable and if_hwassist. */ if (ifp->if_capabilities & mask) { /* * We require both IPv4 and IPv6 offloading to be enabled * in order to negotiated it: VirtIO does not distinguish * between the two. */ if ((ifp->if_capenable & mask) != mask) features &= ~VIRTIO_NET_F_GUEST_CSUM; } if (ifp->if_capabilities & IFCAP_LRO) { if ((ifp->if_capenable & IFCAP_LRO) == 0) features &= ~VTNET_LRO_FEATURES; } if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) { if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) features &= ~VIRTIO_NET_F_CTRL_VLAN; } error = virtio_reinit(dev, features); if (error) device_printf(dev, "virtio reinit error %d\n", error); return (error); } static void vtnet_init_rx_filters(struct vtnet_softc *sc) { struct ifnet *ifp; ifp = sc->vtnet_ifp; if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { /* Restore promiscuous and all-multicast modes. */ vtnet_rx_filter(sc); /* Restore filtered MAC addresses. */ vtnet_rx_filter_mac(sc); } if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) vtnet_rx_filter_vlan(sc); } static int vtnet_init_rx_queues(struct vtnet_softc *sc) { device_t dev; struct vtnet_rxq *rxq; int i, clsize, error; dev = sc->vtnet_dev; /* * Use the new cluster size if one has been set (via a MTU * change). Otherwise, use the standard 2K clusters. * * BMV: It might make sense to use page sized clusters as * the default (depending on the features negotiated). */ if (sc->vtnet_rx_new_clsize != 0) { clsize = sc->vtnet_rx_new_clsize; sc->vtnet_rx_new_clsize = 0; } else clsize = MCLBYTES; sc->vtnet_rx_clsize = clsize; sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize); KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS || sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, ("%s: too many rx mbufs %d for %d segments", __func__, sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); #ifdef DEV_NETMAP if (vtnet_netmap_init_rx_buffers(sc)) return 0; #endif /* DEV_NETMAP */ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; /* Hold the lock to satisfy asserts. */ VTNET_RXQ_LOCK(rxq); error = vtnet_rxq_populate(rxq); VTNET_RXQ_UNLOCK(rxq); if (error) { device_printf(dev, "cannot allocate mbufs for Rx queue %d\n", i); return (error); } } return (0); } static int vtnet_init_tx_queues(struct vtnet_softc *sc) { struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; txq->vtntx_watchdog = 0; } return (0); } static int vtnet_init_rxtx_queues(struct vtnet_softc *sc) { int error; error = vtnet_init_rx_queues(sc); if (error) return (error); error = vtnet_init_tx_queues(sc); if (error) return (error); return (0); } static void vtnet_set_active_vq_pairs(struct vtnet_softc *sc) { device_t dev; int npairs; dev = sc->vtnet_dev; if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) { sc->vtnet_act_vq_pairs = 1; return; } npairs = sc->vtnet_requested_vq_pairs; if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { device_printf(dev, "cannot set active queue pairs to %d\n", npairs); npairs = 1; } sc->vtnet_act_vq_pairs = npairs; } static int vtnet_reinit(struct vtnet_softc *sc) { struct ifnet *ifp; int error; ifp = sc->vtnet_ifp; /* Use the current MAC address. */ bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); vtnet_set_hwaddr(sc); vtnet_set_active_vq_pairs(sc); ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= VTNET_CSUM_OFFLOAD; if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_IP_TSO; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) vtnet_init_rx_filters(sc); error = vtnet_init_rxtx_queues(sc); if (error) return (error); vtnet_enable_interrupts(sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; return (0); } static void vtnet_init_locked(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; vtnet_stop(sc); /* Reinitialize with the host. */ if (vtnet_virtio_reinit(sc) != 0) goto fail; if (vtnet_reinit(sc) != 0) goto fail; virtio_reinit_complete(dev); vtnet_update_link_status(sc); callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); return; fail: vtnet_stop(sc); } static void vtnet_init(void *xsc) { struct vtnet_softc *sc; sc = xsc; #ifdef DEV_NETMAP if (!NA(sc->vtnet_ifp)) { D("try to attach again"); vtnet_netmap_attach(sc); } #endif /* DEV_NETMAP */ VTNET_CORE_LOCK(sc); vtnet_init_locked(sc); VTNET_CORE_UNLOCK(sc); } static void vtnet_free_ctrl_vq(struct vtnet_softc *sc) { struct virtqueue *vq; vq = sc->vtnet_ctrl_vq; /* * The control virtqueue is only polled and therefore it should * already be empty. */ KASSERT(virtqueue_empty(vq), ("%s: ctrl vq %p not empty", __func__, vq)); } static void vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie, struct sglist *sg, int readable, int writable) { struct virtqueue *vq; vq = sc->vtnet_ctrl_vq; VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ, ("%s: CTRL_VQ feature not negotiated", __func__)); if (!virtqueue_empty(vq)) return; if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0) return; /* * Poll for the response, but the command is likely already * done when we return from the notify. */ virtqueue_notify(vq); virtqueue_poll(vq, NULL); } static int vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) { struct virtio_net_ctrl_hdr hdr __aligned(2); struct sglist_seg segs[3]; struct sglist sg; uint8_t ack; int error; hdr.class = VIRTIO_NET_CTRL_MAC; hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN); error |= sglist_append(&sg, &ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding set MAC msg to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); return (ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs) { struct sglist_seg segs[3]; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr; uint8_t pad1; struct virtio_net_ctrl_mq mq; uint8_t pad2; uint8_t ack; } s __aligned(2); int error; s.hdr.class = VIRTIO_NET_CTRL_MQ; s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; s.mq.virtqueue_pairs = npairs; s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding MQ message to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on) { struct sglist_seg segs[3]; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr; uint8_t pad1; uint8_t onoff; uint8_t pad2; uint8_t ack; } s __aligned(2); int error; KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, ("%s: CTRL_RX feature not negotiated", __func__)); s.hdr.class = VIRTIO_NET_CTRL_RX; s.hdr.cmd = cmd; s.onoff = !!on; s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding Rx message to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_set_promisc(struct vtnet_softc *sc, int on) { return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on)); } static int vtnet_set_allmulti(struct vtnet_softc *sc, int on) { return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); } /* * The device defaults to promiscuous mode for backwards compatibility. * Turn it off at attach time if possible. */ static void vtnet_attach_disable_promisc(struct vtnet_softc *sc) { struct ifnet *ifp; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK(sc); if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) { ifp->if_flags |= IFF_PROMISC; } else if (vtnet_set_promisc(sc, 0) != 0) { ifp->if_flags |= IFF_PROMISC; device_printf(sc->vtnet_dev, "cannot disable default promiscuous mode\n"); } VTNET_CORE_UNLOCK(sc); } static void vtnet_rx_filter(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) device_printf(dev, "cannot %s promiscuous mode\n", ifp->if_flags & IFF_PROMISC ? "enable" : "disable"); if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) device_printf(dev, "cannot %s all-multicast mode\n", ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable"); } static void vtnet_rx_filter_mac(struct vtnet_softc *sc) { struct virtio_net_ctrl_hdr hdr __aligned(2); struct vtnet_mac_filter *filter; struct sglist_seg segs[4]; struct sglist sg; struct ifnet *ifp; struct ifaddr *ifa; struct ifmultiaddr *ifma; int ucnt, mcnt, promisc, allmulti, error; uint8_t ack; ifp = sc->vtnet_ifp; filter = sc->vtnet_mac_filter; ucnt = 0; mcnt = 0; promisc = 0; allmulti = 0; VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, ("%s: CTRL_RX feature not negotiated", __func__)); /* Unicast MAC addresses: */ if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0) continue; else if (ucnt == VTNET_MAX_MAC_ENTRIES) { promisc = 1; break; } bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN); ucnt++; } if_addr_runlock(ifp); if (promisc != 0) { filter->vmf_unicast.nentries = 0; if_printf(ifp, "more than %d MAC addresses assigned, " "falling back to promiscuous mode\n", VTNET_MAX_MAC_ENTRIES); } else filter->vmf_unicast.nentries = ucnt; /* Multicast MAC addresses: */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; else if (mcnt == VTNET_MAX_MAC_ENTRIES) { allmulti = 1; break; } bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN); mcnt++; } if_maddr_runlock(ifp); if (allmulti != 0) { filter->vmf_multicast.nentries = 0; if_printf(ifp, "more than %d multicast MAC addresses " "assigned, falling back to all-multicast mode\n", VTNET_MAX_MAC_ENTRIES); } else filter->vmf_multicast.nentries = mcnt; if (promisc != 0 && allmulti != 0) goto out; hdr.class = VIRTIO_NET_CTRL_MAC; hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; ack = VIRTIO_NET_ERR; sglist_init(&sg, 4, segs); error = 0; error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &filter->vmf_unicast, sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN); error |= sglist_append(&sg, &filter->vmf_multicast, sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN); error |= sglist_append(&sg, &ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 4, ("%s: error %d adding MAC filter msg to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); if (ack != VIRTIO_NET_OK) if_printf(ifp, "error setting host MAC filter table\n"); out: if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0) if_printf(ifp, "cannot enable promiscuous mode\n"); if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0) if_printf(ifp, "cannot enable all-multicast mode\n"); } static int vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) { struct sglist_seg segs[3]; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr; uint8_t pad1; uint16_t tag; uint8_t pad2; uint8_t ack; } s __aligned(2); int error; s.hdr.class = VIRTIO_NET_CTRL_VLAN; s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; s.tag = tag; s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding VLAN message to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static void vtnet_rx_filter_vlan(struct vtnet_softc *sc) { uint32_t w; uint16_t tag; int i, bit; VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER, ("%s: VLAN_FILTER feature not negotiated", __func__)); /* Enable the filter for each configured VLAN. */ for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { w = sc->vtnet_vlan_filter[i]; while ((bit = ffs(w) - 1) != -1) { w &= ~(1 << bit); tag = sizeof(w) * CHAR_BIT * i + bit; if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) { device_printf(sc->vtnet_dev, "cannot enable VLAN %d filter\n", tag); } } } } static void vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) { struct ifnet *ifp; int idx, bit; ifp = sc->vtnet_ifp; idx = (tag >> 5) & 0x7F; bit = tag & 0x1F; if (tag == 0 || tag > 4095) return; VTNET_CORE_LOCK(sc); if (add) sc->vtnet_vlan_filter[idx] |= (1 << bit); else sc->vtnet_vlan_filter[idx] &= ~(1 << bit); if (ifp->if_capenable & IFCAP_VLAN_HWFILTER && ifp->if_drv_flags & IFF_DRV_RUNNING && vtnet_exec_vlan_filter(sc, add, tag) != 0) { device_printf(sc->vtnet_dev, "cannot %s VLAN %d %s the host filter table\n", add ? "add" : "remove", tag, add ? "to" : "from"); } VTNET_CORE_UNLOCK(sc); } static void vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag) { if (ifp->if_softc != arg) return; vtnet_update_vlan_filter(arg, 1, tag); } static void vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag) { if (ifp->if_softc != arg) return; vtnet_update_vlan_filter(arg, 0, tag); } static int vtnet_is_link_up(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; uint16_t status; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0) status = VIRTIO_NET_S_LINK_UP; else status = virtio_read_dev_config_2(dev, offsetof(struct virtio_net_config, status)); return ((status & VIRTIO_NET_S_LINK_UP) != 0); } static void vtnet_update_link_status(struct vtnet_softc *sc) { struct ifnet *ifp; int link; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); link = vtnet_is_link_up(sc); /* Notify if the link status has changed. */ if (link != 0 && sc->vtnet_link_active == 0) { sc->vtnet_link_active = 1; if_link_state_change(ifp, LINK_STATE_UP); } else if (link == 0 && sc->vtnet_link_active != 0) { sc->vtnet_link_active = 0; if_link_state_change(ifp, LINK_STATE_DOWN); } } static int vtnet_ifmedia_upd(struct ifnet *ifp) { struct vtnet_softc *sc; struct ifmedia *ifm; sc = ifp->if_softc; ifm = &sc->vtnet_media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); return (0); } static void vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct vtnet_softc *sc; sc = ifp->if_softc; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; VTNET_CORE_LOCK(sc); if (vtnet_is_link_up(sc) != 0) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= VTNET_MEDIATYPE; } else ifmr->ifm_active |= IFM_NONE; VTNET_CORE_UNLOCK(sc); } static void vtnet_set_hwaddr(struct vtnet_softc *sc) { device_t dev; int i; dev = sc->vtnet_dev; if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0) device_printf(dev, "unable to set MAC address\n"); } else if (sc->vtnet_flags & VTNET_FLAG_MAC) { for (i = 0; i < ETHER_ADDR_LEN; i++) { virtio_write_dev_config_1(dev, offsetof(struct virtio_net_config, mac) + i, sc->vtnet_hwaddr[i]); } } } static void vtnet_get_hwaddr(struct vtnet_softc *sc) { device_t dev; int i; dev = sc->vtnet_dev; if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) { /* * Generate a random locally administered unicast address. * * It would be nice to generate the same MAC address across * reboots, but it seems all the hosts currently available * support the MAC feature, so this isn't too important. */ sc->vtnet_hwaddr[0] = 0xB2; arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); vtnet_set_hwaddr(sc); return; } for (i = 0; i < ETHER_ADDR_LEN; i++) { sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev, offsetof(struct virtio_net_config, mac) + i); } } static void vtnet_vlan_tag_remove(struct mbuf *m) { struct ether_vlan_header *evh; evh = mtod(m, struct ether_vlan_header *); m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); m->m_flags |= M_VLANTAG; /* Strip the 802.1Q header. */ bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); } static void vtnet_set_rx_process_limit(struct vtnet_softc *sc) { int limit; limit = vtnet_tunable_int(sc, "rx_process_limit", vtnet_rx_process_limit); if (limit < 0) limit = INT_MAX; sc->vtnet_rx_process_limit = limit; } static void vtnet_set_tx_intr_threshold(struct vtnet_softc *sc) { int size, thresh; size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq); /* * The Tx interrupt is disabled until the queue free count falls * below our threshold. Completed frames are drained from the Tx * virtqueue before transmitting new frames and in the watchdog * callout, so the frequency of Tx interrupts is greatly reduced, * at the cost of not freeing mbufs as quickly as they otherwise * would be. * * N.B. We assume all the Tx queues are the same size. */ thresh = size / 4; /* * Without indirect descriptors, leave enough room for the most * segments we handle. */ if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 && thresh < sc->vtnet_tx_nsegs) thresh = sc->vtnet_tx_nsegs; sc->vtnet_tx_intr_thresh = thresh; } static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_rxq *rxq) { struct sysctl_oid *node; struct sysctl_oid_list *list; struct vtnet_rxq_stats *stats; char namebuf[16]; snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Receive Queue"); list = SYSCTL_CHILDREN(node); stats = &rxq->vtnrx_stats; SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, &stats->vrxs_ipackets, "Receive packets"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, &stats->vrxs_ibytes, "Receive bytes"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, &stats->vrxs_iqdrops, "Receive drops"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, &stats->vrxs_ierrors, "Receive errors"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, &stats->vrxs_csum, "Receive checksum offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, &stats->vrxs_csum_failed, "Receive checksum offload failed"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, &stats->vrxs_rescheduled, "Receive interrupt handler rescheduled"); } static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_txq *txq) { struct sysctl_oid *node; struct sysctl_oid_list *list; struct vtnet_txq_stats *stats; char namebuf[16]; snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Transmit Queue"); list = SYSCTL_CHILDREN(node); stats = &txq->vtntx_stats; SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, &stats->vtxs_opackets, "Transmit packets"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, &stats->vtxs_obytes, "Transmit bytes"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, &stats->vtxs_omcasts, "Transmit multicasts"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, &stats->vtxs_csum, "Transmit checksum offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, &stats->vtxs_tso, "Transmit segmentation offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, &stats->vtxs_rescheduled, "Transmit interrupt handler rescheduled"); } static void vtnet_setup_queue_sysctl(struct vtnet_softc *sc) { device_t dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; int i; dev = sc->vtnet_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); } } static void vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_softc *sc) { struct vtnet_statistics *stats; struct vtnet_rxq_stats rxaccum; struct vtnet_txq_stats txaccum; vtnet_accum_stats(sc, &rxaccum, &txaccum); stats = &sc->vtnet_stats; stats->rx_csum_offloaded = rxaccum.vrxs_csum; stats->rx_csum_failed = rxaccum.vrxs_csum_failed; stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled; stats->tx_csum_offloaded = txaccum.vtxs_csum; stats->tx_tso_offloaded = txaccum.vtxs_tso; stats->tx_task_rescheduled = txaccum.vtxs_rescheduled; SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed", CTLFLAG_RD, &stats->mbuf_alloc_failed, "Mbuf cluster allocation failures"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large", CTLFLAG_RD, &stats->rx_frame_too_large, "Received frame larger than the mbuf chain"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed", CTLFLAG_RD, &stats->rx_enq_replacement_failed, "Enqueuing the replacement receive mbuf failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed", CTLFLAG_RD, &stats->rx_mergeable_failed, "Mergeable buffers receive failures"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", CTLFLAG_RD, &stats->rx_csum_bad_ethtype, "Received checksum offloaded buffer with unsupported " "Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", CTLFLAG_RD, &stats->rx_csum_bad_ipproto, "Received checksum offloaded buffer with incorrect IP protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset", CTLFLAG_RD, &stats->rx_csum_bad_offset, "Received checksum offloaded buffer with incorrect offset"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto", CTLFLAG_RD, &stats->rx_csum_bad_proto, "Received checksum offloaded buffer with incorrect protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed", CTLFLAG_RD, &stats->rx_csum_failed, "Received buffer checksum offload failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded", CTLFLAG_RD, &stats->rx_csum_offloaded, "Received buffer checksum offload succeeded"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled", CTLFLAG_RD, &stats->rx_task_rescheduled, "Times the receive interrupt task rescheduled itself"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype", CTLFLAG_RD, &stats->tx_csum_bad_ethtype, "Aborted transmit of checksum offloaded buffer with unknown " "Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype", CTLFLAG_RD, &stats->tx_tso_bad_ethtype, "Aborted transmit of TSO buffer with unknown Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", CTLFLAG_RD, &stats->tx_tso_not_tcp, "Aborted transmit of TSO buffer with non TCP protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", CTLFLAG_RD, &stats->tx_defragged, "Transmit mbufs defragged"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed", CTLFLAG_RD, &stats->tx_defrag_failed, "Aborted transmit of buffer because defrag failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded", CTLFLAG_RD, &stats->tx_csum_offloaded, "Offloaded checksum of transmitted buffer"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded", CTLFLAG_RD, &stats->tx_tso_offloaded, "Segmentation offload of transmitted buffer"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled", CTLFLAG_RD, &stats->tx_task_rescheduled, "Times the transmit interrupt task rescheduled itself"); } static void vtnet_setup_sysctl(struct vtnet_softc *sc) { device_t dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; dev = sc->vtnet_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, "Maximum number of supported virtqueue pairs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs", CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0, "Requested number of virtqueue pairs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, "Number of active virtqueue pairs"); vtnet_setup_stat_sysctl(ctx, child, sc); } static int vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) { return (virtqueue_enable_intr(rxq->vtnrx_vq)); } static void vtnet_rxq_disable_intr(struct vtnet_rxq *rxq) { virtqueue_disable_intr(rxq->vtnrx_vq); } static int vtnet_txq_enable_intr(struct vtnet_txq *txq) { struct virtqueue *vq; vq = txq->vtntx_vq; if (vtnet_txq_below_threshold(txq) != 0) return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG)); /* * The free count is above our threshold. Keep the Tx interrupt * disabled until the queue is fuller. */ return (0); } static void vtnet_txq_disable_intr(struct vtnet_txq *txq) { virtqueue_disable_intr(txq->vtntx_vq); } static void vtnet_enable_rx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]); } static void vtnet_enable_tx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_txq_enable_intr(&sc->vtnet_txqs[i]); } static void vtnet_enable_interrupts(struct vtnet_softc *sc) { vtnet_enable_rx_interrupts(sc); vtnet_enable_tx_interrupts(sc); } static void vtnet_disable_rx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); } static void vtnet_disable_tx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); } static void vtnet_disable_interrupts(struct vtnet_softc *sc) { vtnet_disable_rx_interrupts(sc); vtnet_disable_tx_interrupts(sc); } static int vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) { char path[64]; snprintf(path, sizeof(path), "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob); TUNABLE_INT_FETCH(path, &def); return (def); } #ifdef NETDUMP static void -vtnet_netdump_init(struct ifnet *ifp, int *nrxr, int *clsize) +vtnet_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { struct vtnet_softc *sc; sc = if_getsoftc(ifp); VTNET_CORE_LOCK(sc); *nrxr = sc->vtnet_max_vq_pairs; + *ncl = NETDUMP_MAX_IN_FLIGHT; *clsize = sc->vtnet_rx_clsize; VTNET_CORE_UNLOCK(sc); /* * We need to allocate from this zone in the transmit path, so ensure * that we have at least one item per header available. * XXX add a separate zone like we do for mbufs? otherwise we may alloc * buckets */ uma_zone_reserve(vtnet_tx_header_zone, NETDUMP_MAX_IN_FLIGHT * 2); uma_prealloc(vtnet_tx_header_zone, NETDUMP_MAX_IN_FLIGHT * 2); } static void vtnet_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) { } static int vtnet_netdump_transmit(struct ifnet *ifp, struct mbuf *m) { struct vtnet_softc *sc; struct vtnet_txq *txq; int error; sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); txq = &sc->vtnet_txqs[0]; error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE); if (error == 0) error = vtnet_txq_notify(txq); return (error); } static int vtnet_netdump_poll(struct ifnet *ifp, int count) { struct vtnet_softc *sc; int i; sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); (void)vtnet_txq_eof(&sc->vtnet_txqs[0]); for (i = 0; i < sc->vtnet_max_vq_pairs; i++) (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]); return (0); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/net/iflib.c =================================================================== --- user/markj/netdump/sys/net/iflib.c (revision 332406) +++ user/markj/netdump/sys/net/iflib.c (revision 332407) @@ -1,6058 +1,6059 @@ /*- * Copyright (c) 2014-2017, Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_acpi.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #if defined(__i386__) || defined(__amd64__) #include #include #include #include #include #include #endif #include /* * enable accounting of every mbuf as it comes in to and goes out of * iflib's software descriptor references */ #define MEMORY_LOGGING 0 /* * Enable mbuf vectors for compressing long mbuf chains */ /* * NB: * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead * we prefetch needs to be determined by the time spent in m_free vis a vis * the cost of a prefetch. This will of course vary based on the workload: * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which * is quite expensive, thus suggesting very little prefetch. * - small packet forwarding which is just returning a single mbuf to * UMA will typically be very fast vis a vis the cost of a memory * access. */ /* * File organization: * - private structures * - iflib private utility functions * - ifnet functions * - vlan registry and other exported functions * - iflib public core functions * * */ static MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); struct iflib_txq; typedef struct iflib_txq *iflib_txq_t; struct iflib_rxq; typedef struct iflib_rxq *iflib_rxq_t; struct iflib_fl; typedef struct iflib_fl *iflib_fl_t; struct iflib_ctx; static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); typedef struct iflib_filter_info { driver_filter_t *ifi_filter; void *ifi_filter_arg; struct grouptask *ifi_task; void *ifi_ctx; } *iflib_filter_info_t; struct iflib_ctx { KOBJ_FIELDS; /* * Pointer to hardware driver's softc */ void *ifc_softc; device_t ifc_dev; if_t ifc_ifp; cpuset_t ifc_cpus; if_shared_ctx_t ifc_sctx; struct if_softc_ctx ifc_softc_ctx; struct mtx ifc_mtx; uint16_t ifc_nhwtxqs; uint16_t ifc_nhwrxqs; iflib_txq_t ifc_txqs; iflib_rxq_t ifc_rxqs; uint32_t ifc_if_flags; uint32_t ifc_flags; uint32_t ifc_max_fl_buf_size; int ifc_in_detach; int ifc_link_state; int ifc_link_irq; int ifc_watchdog_events; struct cdev *ifc_led_dev; struct resource *ifc_msix_mem; struct if_irq ifc_legacy_irq; struct grouptask ifc_admin_task; struct grouptask ifc_vflr_task; struct iflib_filter_info ifc_filter_info; struct ifmedia ifc_media; struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; uint16_t ifc_sysctl_qs_eq_override; uint16_t ifc_sysctl_rx_budget; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; struct if_txrx ifc_txrx; #define isc_txd_encap ifc_txrx.ift_txd_encap #define isc_txd_flush ifc_txrx.ift_txd_flush #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update #define isc_rxd_available ifc_txrx.ift_rxd_available #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_flush ifc_txrx.ift_rxd_flush #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_legacy_intr ifc_txrx.ift_legacy_intr eventhandler_tag ifc_vlan_attach_event; eventhandler_tag ifc_vlan_detach_event; uint8_t ifc_mac[ETHER_ADDR_LEN]; char ifc_mtx_name[16]; }; void * iflib_get_softc(if_ctx_t ctx) { return (ctx->ifc_softc); } device_t iflib_get_dev(if_ctx_t ctx) { return (ctx->ifc_dev); } if_t iflib_get_ifp(if_ctx_t ctx) { return (ctx->ifc_ifp); } struct ifmedia * iflib_get_media(if_ctx_t ctx) { return (&ctx->ifc_media); } void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) { bcopy(mac, ctx->ifc_mac, ETHER_ADDR_LEN); } if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx) { return (&ctx->ifc_softc_ctx); } if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx) { return (ctx->ifc_sctx); } #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2) #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1))) #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) #define RX_SW_DESC_MAP_CREATED (1 << 0) #define TX_SW_DESC_MAP_CREATED (1 << 1) #define RX_SW_DESC_INUSE (1 << 3) #define TX_SW_DESC_MAPPED (1 << 4) #define M_TOOBIG M_PROTO1 typedef struct iflib_sw_rx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ caddr_t *ifsd_cl; /* direct cluster pointer for rx */ uint8_t *ifsd_flags; } iflib_rxsd_array_t; typedef struct iflib_sw_tx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ uint8_t *ifsd_flags; } if_txsd_vec_t; /* magic number that should be high enough for any hardware */ #define IFLIB_MAX_TX_SEGS 128 /* bnxt supports 64 with hardware LRO enabled */ #define IFLIB_MAX_RX_SEGS 64 #define IFLIB_RX_COPY_THRESH 128 #define IFLIB_MAX_RX_REFRESH 32 /* The minimum descriptors per second before we start coalescing */ #define IFLIB_MIN_DESC_SEC 16384 #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16 #define IFLIB_QUEUE_IDLE 0 #define IFLIB_QUEUE_HUNG 1 #define IFLIB_QUEUE_WORKING 2 /* maximum number of txqs that can share an rx interrupt */ #define IFLIB_MAX_TX_SHARED_INTR 4 /* this should really scale with ring size - this is a fairly arbitrary value */ #define TX_BATCH_SIZE 32 #define IFLIB_RESTART_BUDGET 8 #define IFC_LEGACY 0x001 #define IFC_QFLUSH 0x002 #define IFC_MULTISEG 0x004 #define IFC_DMAR 0x008 #define IFC_SC_ALLOCATED 0x010 #define IFC_INIT_DONE 0x020 #define IFC_PREFETCH 0x040 #define IFC_DO_RESET 0x080 #define IFC_CHECK_HUNG 0x100 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) struct iflib_txq { qidx_t ift_in_use; qidx_t ift_cidx; qidx_t ift_cidx_processed; qidx_t ift_pidx; uint8_t ift_gen; uint8_t ift_br_offset; uint16_t ift_npending; uint16_t ift_db_pending; uint16_t ift_rs_pending; /* implicit pad */ uint8_t ift_txd_size[8]; uint64_t ift_processed; uint64_t ift_cleaned; uint64_t ift_cleaned_prev; #if MEMORY_LOGGING uint64_t ift_enqueued; uint64_t ift_dequeued; #endif uint64_t ift_no_tx_dma_setup; uint64_t ift_no_desc_avail; uint64_t ift_mbuf_defrag_failed; uint64_t ift_mbuf_defrag; uint64_t ift_map_failed; uint64_t ift_txd_encap_efbig; uint64_t ift_pullups; struct mtx ift_mtx; struct mtx ift_db_mtx; /* constant values */ if_ctx_t ift_ctx; struct ifmp_ring *ift_br; struct grouptask ift_task; qidx_t ift_size; uint16_t ift_id; struct callout ift_timer; if_txsd_vec_t ift_sds; uint8_t ift_qstatus; uint8_t ift_closed; uint8_t ift_update_freq; struct iflib_filter_info ift_filter_info; bus_dma_tag_t ift_desc_tag; bus_dma_tag_t ift_tso_desc_tag; iflib_dma_info_t ift_ifdi; #define MTX_NAME_LEN 16 char ift_mtx_name[MTX_NAME_LEN]; char ift_db_mtx_name[MTX_NAME_LEN]; bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ift_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); struct iflib_fl { qidx_t ifl_cidx; qidx_t ifl_pidx; qidx_t ifl_credits; uint8_t ifl_gen; uint8_t ifl_rxd_size; #if MEMORY_LOGGING uint64_t ifl_m_enqueued; uint64_t ifl_m_dequeued; uint64_t ifl_cl_enqueued; uint64_t ifl_cl_dequeued; #endif /* implicit pad */ bitstr_t *ifl_rx_bitmap; qidx_t ifl_fragidx; /* constant */ qidx_t ifl_size; uint16_t ifl_buf_size; uint16_t ifl_cltype; uma_zone_t ifl_zone; iflib_rxsd_array_t ifl_sds; iflib_rxq_t ifl_rxq; uint8_t ifl_id; bus_dma_tag_t ifl_desc_tag; iflib_dma_info_t ifl_ifdi; uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); caddr_t ifl_vm_addrs[IFLIB_MAX_RX_REFRESH]; qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; } __aligned(CACHE_LINE_SIZE); static inline qidx_t get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) { qidx_t used; if (pidx > cidx) used = pidx - cidx; else if (pidx < cidx) used = size - cidx + pidx; else if (gen == 0 && pidx == cidx) used = 0; else if (gen == 1 && pidx == cidx) used = size; else panic("bad state"); return (used); } #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) struct iflib_rxq { /* If there is a separate completion queue - * these are the cq cidx and pidx. Otherwise * these are unused. */ qidx_t ifr_size; qidx_t ifr_cq_cidx; qidx_t ifr_cq_pidx; uint8_t ifr_cq_gen; uint8_t ifr_fl_offset; if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; uint64_t ifr_rx_irq; uint16_t ifr_id; uint8_t ifr_lro_enabled; uint8_t ifr_nfl; uint8_t ifr_ntxqirq; uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; struct lro_ctrl ifr_lc; struct grouptask ifr_task; struct iflib_filter_info ifr_filter_info; iflib_dma_info_t ifr_ifdi; /* dynamically allocate if any drivers need a value substantially larger than this */ struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ifr_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); typedef struct if_rxsd { caddr_t *ifsd_cl; struct mbuf **ifsd_m; iflib_fl_t ifsd_fl; qidx_t ifsd_cidx; } *if_rxsd_t; /* multiple of word size */ #ifdef __LP64__ #define PKT_INFO_SIZE 6 #define RXD_INFO_SIZE 5 #define PKT_TYPE uint64_t #else #define PKT_INFO_SIZE 11 #define RXD_INFO_SIZE 8 #define PKT_TYPE uint32_t #endif #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3) #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4) typedef struct if_pkt_info_pad { PKT_TYPE pkt_val[PKT_INFO_SIZE]; } *if_pkt_info_pad_t; typedef struct if_rxd_info_pad { PKT_TYPE rxd_val[RXD_INFO_SIZE]; } *if_rxd_info_pad_t; CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info)); CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info)); static inline void pkt_info_zero(if_pkt_info_t pi) { if_pkt_info_pad_t pi_pad; pi_pad = (if_pkt_info_pad_t)pi; pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; #ifndef __LP64__ pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; #endif } static inline void rxd_info_zero(if_rxd_info_t ri) { if_rxd_info_pad_t ri_pad; int i; ri_pad = (if_rxd_info_pad_t)ri; for (i = 0; i < RXD_LOOP_BOUND; i += 4) { ri_pad->rxd_val[i] = 0; ri_pad->rxd_val[i+1] = 0; ri_pad->rxd_val[i+2] = 0; ri_pad->rxd_val[i+3] = 0; } #ifdef __LP64__ ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0; #endif } /* * Only allow a single packet to take up most 1/nth of the tx ring */ #define MAX_SINGLE_PACKET_FRACTION 12 #define IF_BAD_DMA (bus_addr_t)-1 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) #define CTX_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF) #define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx) #define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx) #define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx) #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) /* Our boot-time initialization hook */ static int iflib_module_event_handler(module_t, int, void *); static moduledata_t iflib_moduledata = { "iflib", iflib_module_event_handler, NULL }; DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(iflib, 1); MODULE_DEPEND(iflib, pci, 1, 1, 1); MODULE_DEPEND(iflib, ether, 1, 1, 1); TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); TASKQGROUP_DEFINE(if_config_tqg, 1, 1); #ifndef IFLIB_DEBUG_COUNTERS #ifdef INVARIANTS #define IFLIB_DEBUG_COUNTERS 1 #else #define IFLIB_DEBUG_COUNTERS 0 #endif /* !INVARIANTS */ #endif static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, "iflib driver parameters"); /* * XXX need to ensure that this can't accidentally cause the head to be moved backwards */ static int iflib_min_tx_latency = 0; SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput"); static int iflib_no_tx_batch = 0; SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW, &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput"); #if IFLIB_DEBUG_COUNTERS static int iflib_tx_seen; static int iflib_tx_sent; static int iflib_tx_encap; static int iflib_rx_allocs; static int iflib_fl_refills; static int iflib_fl_refills_large; static int iflib_tx_frees; SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, &iflib_tx_seen, 0, "# tx mbufs seen"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, &iflib_tx_sent, 0, "# tx mbufs sent"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, &iflib_tx_encap, 0, "# tx mbufs encapped"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, &iflib_tx_frees, 0, "# tx frees"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, &iflib_rx_allocs, 0, "# rx allocations"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, &iflib_fl_refills, 0, "# refills"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, &iflib_fl_refills_large, 0, "# large refills"); static int iflib_txq_drain_flushing; static int iflib_txq_drain_oactive; static int iflib_txq_drain_notready; static int iflib_txq_drain_encapfail; SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, &iflib_txq_drain_flushing, 0, "# drain flushes"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, &iflib_txq_drain_oactive, 0, "# drain oactives"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, &iflib_txq_drain_notready, 0, "# drain notready"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_encapfail, CTLFLAG_RD, &iflib_txq_drain_encapfail, 0, "# drain encap fails"); static int iflib_encap_load_mbuf_fail; static int iflib_encap_pad_mbuf_fail; static int iflib_encap_txq_avail_fail; static int iflib_encap_txd_encap_fail; SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD, &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); static int iflib_task_fn_rxs; static int iflib_rx_intr_enables; static int iflib_fast_intrs; static int iflib_intr_link; static int iflib_intr_msix; static int iflib_rx_unavail; static int iflib_rx_ctx_inactive; static int iflib_rx_zero_len; static int iflib_rx_if_input; static int iflib_rx_mbuf_null; static int iflib_rxd_flush; static int iflib_verbose_debug; SYSCTL_INT(_net_iflib, OID_AUTO, intr_link, CTLFLAG_RD, &iflib_intr_link, 0, "# intr link calls"); SYSCTL_INT(_net_iflib, OID_AUTO, intr_msix, CTLFLAG_RD, &iflib_intr_msix, 0, "# intr msix calls"); SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, &iflib_rx_intr_enables, 0, "# rx intr enables"); SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, &iflib_fast_intrs, 0, "# fast_intr calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, &iflib_rx_unavail, 0, "# times rxeof called with no available data"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_zero_len, CTLFLAG_RD, &iflib_rx_zero_len, 0, "# times rxeof saw zero len mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, &iflib_rx_if_input, 0, "# times rxeof called if_input"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_mbuf_null, CTLFLAG_RD, &iflib_rx_mbuf_null, 0, "# times rxeof got null mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, &iflib_rxd_flush, 0, "# times rxd_flush called"); SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, &iflib_verbose_debug, 0, "enable verbose debugging"); #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) static void iflib_debug_reset(void) { iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs = iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees = iflib_txq_drain_flushing = iflib_txq_drain_oactive = iflib_txq_drain_notready = iflib_txq_drain_encapfail = iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail = iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail = iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs = iflib_intr_link = iflib_intr_msix = iflib_rx_unavail = iflib_rx_ctx_inactive = iflib_rx_zero_len = iflib_rx_if_input = iflib_rx_mbuf_null = iflib_rxd_flush = 0; } #else #define DBG_COUNTER_INC(name) static void iflib_debug_reset(void) {} #endif #define IFLIB_DEBUG 0 static void iflib_tx_structures_free(if_ctx_t ctx); static void iflib_rx_structures_free(if_ctx_t ctx); static int iflib_queues_alloc(if_ctx_t ctx); static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget); static int iflib_qset_structures_setup(if_ctx_t ctx); static int iflib_msix_init(if_ctx_t ctx); static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str); static void iflib_txq_check_drain(iflib_txq_t txq, int budget); static uint32_t iflib_txq_can_drain(struct ifmp_ring *); static int iflib_register(if_ctx_t); static void iflib_init_locked(if_ctx_t ctx); static void iflib_add_device_sysctl_pre(if_ctx_t ctx); static void iflib_add_device_sysctl_post(if_ctx_t ctx); static void iflib_ifmp_purge(iflib_txq_t txq); static void _iflib_pre_assert(if_softc_ctx_t scctx); static void iflib_stop(if_ctx_t ctx); static void iflib_if_init_locked(if_ctx_t ctx); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m); #endif NETDUMP_DEFINE(iflib); #ifdef DEV_NETMAP #include #include #include MODULE_DEPEND(iflib, netmap, 1, 1, 1); static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init); /* * device-specific sysctl variables: * * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. * During regular operations the CRC is stripped, but on some * hardware reception of frames not multiple of 64 is slower, * so using crcstrip=0 helps in benchmarks. * * iflib_rx_miss, iflib_rx_miss_bufs: * count packets that might be missed due to lost interrupts. */ SYSCTL_DECL(_dev_netmap); /* * The xl driver by default strips CRCs and we do not override it. */ int iflib_crcstrip = 1; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on rx frames"); int iflib_rx_miss, iflib_rx_miss_bufs; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed rx intr"); SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed rx intr bufs"); /* * Register/unregister. We are already under netmap lock. * Only called on the first register or the last unregister. */ static int iflib_netmap_register(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; int status; CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if (!CTX_IS_VF(ctx)) IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); /* enable or disable flags and callbacks in na and ifp */ if (onoff) { nm_set_native_flags(na); } else { nm_clear_native_flags(na); } iflib_stop(ctx); iflib_init_locked(ctx); IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ? status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1; if (status) nm_clear_native_flags(na); CTX_UNLOCK(ctx); return (status); } static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init) { struct netmap_adapter *na = kring->na; u_int const lim = kring->nkr_num_slots - 1; u_int head = kring->rhead; struct netmap_ring *ring = kring->ring; bus_dmamap_t *map; struct if_rxd_update iru; if_ctx_t ctx = rxq->ifr_ctx; iflib_fl_t fl = &rxq->ifr_fl[0]; uint32_t refill_pidx, nic_i; if (nm_i == head && __predict_true(!init)) return 0; iru_init(&iru, rxq, 0 /* flid */); map = fl->ifl_sds.ifsd_map; refill_pidx = netmap_idx_k2n(kring, nm_i); /* * IMPORTANT: we must leave one free slot in the ring, * so move head back by one unit */ head = nm_prev(head, lim); while (nm_i != head) { for (int tmp_pidx = 0; tmp_pidx < IFLIB_MAX_RX_REFRESH && nm_i != head; tmp_pidx++) { struct netmap_slot *slot = &ring->slot[nm_i]; void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]); uint32_t nic_i_dma = refill_pidx; nic_i = netmap_idx_k2n(kring, nm_i); MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH); if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ return netmap_ring_reinit(kring); fl->ifl_vm_addrs[tmp_pidx] = addr; if (__predict_false(init) && map) { netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); } else if (map && (slot->flags & NS_BUF_CHANGED)) { /* buffer has changed, reload map */ netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); } slot->flags &= ~NS_BUF_CHANGED; nm_i = nm_next(nm_i, lim); fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim); if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1) continue; iru.iru_pidx = refill_pidx; iru.iru_count = tmp_pidx+1; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); refill_pidx = nic_i; if (map == NULL) continue; for (int n = 0; n < iru.iru_count; n++) { bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma], BUS_DMASYNC_PREREAD); /* XXX - change this to not use the netmap func*/ nic_i_dma = nm_next(nic_i_dma, lim); } } } kring->nr_hwcur = head; if (map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); return (0); } /* * Reconcile kernel and user view of the transmit ring. * * All information is in the kring. * Userspace wants to send packets up to the one before kring->rhead, * kernel knows kring->nr_hwcur is the first unsent packet. * * Here we push packets out (as many as possible), and possibly * reclaim buffers from previously completed transmission. * * The caller (netmap) guarantees that there is only one instance * running at any time. Any interference with other driver * methods should be handled by the individual drivers. */ static int iflib_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; struct if_pkt_info pi; /* * interrupts on every tx packet are expensive so request * them every half ring, or where NS_REPORT is set */ u_int report_frequency = kring->nkr_num_slots >> 1; /* device-specific */ if_ctx_t ctx = ifp->if_softc; iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; if (txq->ift_sds.ifsd_map) bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: process new packets to send. * nm_i is the current index in the netmap ring, * nic_i is the corresponding index in the NIC ring. * * If we have packets to send (nm_i != head) * iterate over the netmap ring, fetch length and update * the corresponding slot in the NIC ring. Some drivers also * need to update the buffer's physical address in the NIC slot * even NS_BUF_CHANGED is not set (PNMB computes the addresses). * * The netmap_reload_map() calls is especially expensive, * even when (as in this case) the tag is 0, so do only * when the buffer has actually changed. * * If possible do not set the report/intr bit on all slots, * but only a few times per ring or when NS_REPORT is set. * * Finally, on 10G and faster drivers, it might be useful * to prefetch the next slot and txr entry. */ nm_i = netmap_idx_n2k(kring, kring->nr_hwcur); pkt_info_zero(&pi); pi.ipi_segs = txq->ift_segs; pi.ipi_qsidx = kring->ring_id; if (nm_i != head) { /* we have new packets to send */ nic_i = netmap_idx_k2n(kring, nm_i); __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); if (txq->ift_sds.ifsd_map) __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); int flags = (slot->flags & NS_REPORT || nic_i == 0 || nic_i == report_frequency) ? IPI_TX_INTR : 0; /* device-specific */ pi.ipi_len = len; pi.ipi_segs[0].ds_addr = paddr; pi.ipi_segs[0].ds_len = len; pi.ipi_nsegs = 1; pi.ipi_ndescs = 0; pi.ipi_pidx = nic_i; pi.ipi_flags = flags; /* Fill the slot in the NIC ring. */ ctx->isc_txd_encap(ctx->ifc_softc, &pi); /* prefetch for next round */ __builtin_prefetch(&ring->slot[nm_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); if (txq->ift_sds.ifsd_map) { __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); NM_CHECK_ADDR_LEN(na, addr, len); if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr); } /* make sure changes to the buffer are synced */ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i], BUS_DMASYNC_PREWRITE); } slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } kring->nr_hwcur = head; /* synchronize the NIC ring */ if (txq->ift_sds.ifsd_map) bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* (re)start the tx unit up to slot nic_i (excluded) */ ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); } /* * Second part: reclaim buffers for completed transmissions. */ if (iflib_tx_credits_update(ctx, txq)) { /* some tx completed, increment avail */ nic_i = txq->ift_cidx_processed; kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } return (0); } /* * Reconcile kernel and user view of the receive ring. * Same as for the txsync, this routine must be efficient. * The caller guarantees a single invocations, but races against * the rest of the driver should be handled here. * * On call, kring->rhead is the first packet that userspace wants * to keep, and kring->rcur is the wakeup point. * The kernel has previously reported packets up to kring->rtail. * * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective * of whether or not we received an interrupt. */ static int iflib_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; uint32_t nm_i; /* index into the netmap ring */ uint32_t nic_i; /* index into the NIC ring */ u_int i, n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = netmap_idx_n2k(kring, kring->rhead); int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; struct if_rxd_info ri; struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; iflib_fl_t fl = rxq->ifr_fl; if (head > lim) return netmap_ring_reinit(kring); /* XXX check sync modes */ for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) { if (fl->ifl_sds.ifsd_map == NULL) continue; bus_dmamap_sync(rxq->ifr_fl[i].ifl_desc_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); } /* * First part: import newly received packets. * * nm_i is the index of the next free slot in the netmap ring, * nic_i is the index of the next received packet in the NIC ring, * and they may differ in case if_init() has been called while * in netmap mode. For the receive ring we have * * nic_i = rxr->next_check; * nm_i = kring->nr_hwtail (previous) * and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size * * rxr->next_check is set to 0 on a ring reinit */ if (netmap_no_pendintr || force_update) { int crclen = iflib_crcstrip ? 0 : 4; int error, avail; for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; nic_i = fl->ifl_cidx; nm_i = netmap_idx_n2k(kring, nic_i); avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX); for (n = 0; avail > 0; n++, avail--) { rxd_info_zero(&ri); ri.iri_frags = rxq->ifr_frags; ri.iri_qsidx = kring->ring_id; ri.iri_ifp = ctx->ifc_ifp; ri.iri_cidx = nic_i; error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen; ring->slot[nm_i].flags = 0; if (fl->ifl_sds.ifsd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } if (n) { /* update the state variables */ if (netmap_no_pendintr && !force_update) { /* diagnostics */ iflib_rx_miss ++; iflib_rx_miss_bufs += n; } fl->ifl_cidx = nic_i; kring->nr_hwtail = netmap_idx_k2n(kring, nm_i); } kring->nr_kflags &= ~NKR_PENDINTR; } } /* * Second part: skip past packets that userspace has released. * (kring->nr_hwcur to head excluded), * and make the buffers available for reception. * As usual nm_i is the index in the netmap ring, * nic_i is the index in the NIC ring, and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size */ /* XXX not sure how this will work with multiple free lists */ nm_i = netmap_idx_n2k(kring, kring->nr_hwcur); return (netmap_fl_refill(rxq, kring, nm_i, false)); } static void iflib_netmap_intr(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; CTX_LOCK(ctx); if (onoff) { IFDI_INTR_ENABLE(ctx); } else { IFDI_INTR_DISABLE(ctx); } CTX_UNLOCK(ctx); } static int iflib_netmap_attach(if_ctx_t ctx) { struct netmap_adapter na; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; bzero(&na, sizeof(na)); na.ifp = ctx->ifc_ifp; na.na_flags = NAF_BDG_MAYSLEEP; MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); na.num_tx_desc = scctx->isc_ntxd[0]; na.num_rx_desc = scctx->isc_nrxd[0]; na.nm_txsync = iflib_netmap_txsync; na.nm_rxsync = iflib_netmap_rxsync; na.nm_register = iflib_netmap_register; na.nm_intr = iflib_netmap_intr; na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; return (netmap_attach(&na)); } static void iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_slot *slot; slot = netmap_reset(na, NR_TX, txq->ift_id, 0); if (slot == NULL) return; if (txq->ift_sds.ifsd_map == NULL) return; for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { /* * In netmap mode, set the map for the packet buffer. * NOTE: Some drivers (not this one) also need to set * the physical buffer address in the NIC ring. * netmap_idx_n2k() maps a nic index, i, into the corresponding * netmap slot index, si */ int si = netmap_idx_n2k(&na->tx_rings[txq->ift_id], i); netmap_load_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si)); } } static void iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id]; struct netmap_slot *slot; uint32_t nm_i; slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); if (slot == NULL) return; nm_i = netmap_idx_n2k(kring, 0); netmap_fl_refill(rxq, kring, nm_i, true); } #define iflib_netmap_detach(ifp) netmap_detach(ifp) #else #define iflib_netmap_txq_init(ctx, txq) #define iflib_netmap_rxq_init(ctx, rxq) #define iflib_netmap_detach(ifp) #define iflib_netmap_attach(ctx) (0) #define netmap_rx_irq(ifp, qid, budget) (0) #define netmap_tx_irq(ifp, qid) do {} while (0) #endif #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } static __inline void prefetch2cachelines(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); #if (CACHE_LINE_SIZE < 128) __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); #endif } #else #define prefetch(x) #define prefetch2cachelines(x) #endif static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) { iflib_fl_t fl; fl = &rxq->ifr_fl[flid]; iru->iru_paddrs = fl->ifl_bus_addrs; iru->iru_vaddrs = &fl->ifl_vm_addrs[0]; iru->iru_idxs = fl->ifl_rxd_idxs; iru->iru_qsidx = rxq->ifr_id; iru->iru_buf_size = fl->ifl_buf_size; iru->iru_flidx = fl->ifl_id; } static void _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { if (err) return; *(bus_addr_t *) arg = segs[0].ds_addr; } int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) { int err; if_shared_ctx_t sctx = ctx->ifc_sctx; device_t dev = ctx->ifc_dev; KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ sctx->isc_q_align, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->idi_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed: %d\n", __func__, err); goto fail_0; } err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); if (err) { device_printf(dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, err); goto fail_1; } dma->idi_paddr = IF_BAD_DMA; err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); if (err || dma->idi_paddr == IF_BAD_DMA) { device_printf(dev, "%s: bus_dmamap_load failed: %d\n", __func__, err); goto fail_2; } dma->idi_size = size; return (0); fail_2: bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); fail_1: bus_dma_tag_destroy(dma->idi_tag); fail_0: dma->idi_tag = NULL; return (err); } int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) { int i, err; iflib_dma_info_t *dmaiter; dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) { if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) break; } if (err) iflib_dma_free_multi(dmalist, i); return (err); } void iflib_dma_free(iflib_dma_info_t dma) { if (dma->idi_tag == NULL) return; if (dma->idi_paddr != IF_BAD_DMA) { bus_dmamap_sync(dma->idi_tag, dma->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->idi_tag, dma->idi_map); dma->idi_paddr = IF_BAD_DMA; } if (dma->idi_vaddr != NULL) { bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); dma->idi_vaddr = NULL; } bus_dma_tag_destroy(dma->idi_tag); dma->idi_tag = NULL; } void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) { int i; iflib_dma_info_t *dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) iflib_dma_free(*dmaiter); } #ifdef EARLY_AP_STARTUP static const int iflib_started = 1; #else /* * We used to abuse the smp_started flag to decide if the queues have been * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()). * That gave bad races, since the SYSINIT() runs strictly after smp_started * is set. Run a SYSINIT() strictly after that to just set a usable * completion flag. */ static int iflib_started; static void iflib_record_started(void *arg) { iflib_started = 1; } SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST, iflib_record_started, NULL); #endif static int iflib_fast_intr(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int iflib_fast_intr_rxtx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; if_ctx_t ctx; int i, cidx; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); for (i = 0; i < rxq->ifr_ntxqirq; i++) { qidx_t txqid = rxq->ifr_txqid[i]; ctx = rxq->ifr_ctx; if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) { IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); continue; } GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) cidx = rxq->ifr_cq_cidx; else cidx = rxq->ifr_fl[0].ifl_cidx; if (iflib_rxd_avail(ctx, rxq, cidx, 1)) GROUPTASK_ENQUEUE(gtask); else IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); return (FILTER_HANDLED); } static int iflib_fast_intr_ctx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, driver_intr_t handler, void *arg, char *name) { int rc, flags; struct resource *res; void *tag = NULL; device_t dev = ctx->ifc_dev; flags = RF_ACTIVE; if (ctx->ifc_flags & IFC_LEGACY) flags |= RF_SHAREABLE; MPASS(rid < 512); irq->ii_rid = rid; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, flags); if (res == NULL) { device_printf(dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } irq->ii_res = res; KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, filter, handler, arg, &tag); if (rc != 0) { device_printf(dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name ? name : "unknown", rc); return (rc); } else if (name) bus_describe_intr(dev, res, tag, "%s", name); irq->ii_tag = tag; return (0); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int iflib_txsd_alloc(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int err, nsegments, ntsosegments; nsegments = scctx->isc_tx_nsegments; ntsosegments = scctx->isc_tx_tso_segments_max; MPASS(scctx->isc_ntxd[0] > 0); MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); MPASS(nsegments > 0); MPASS(ntsosegments > 0); /* * Setup DMA descriptor areas. */ if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_tx_maxsize, /* maxsize */ nsegments, /* nsegments */ sctx->isc_tx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_desc_tag))) { device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); goto fail; } if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ scctx->isc_tx_tso_size_max, /* maxsize */ ntsosegments, /* nsegments */ scctx->isc_tx_tso_segsize_max, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_tso_desc_tag))) { device_printf(dev,"Unable to allocate TX TSO DMA tag: %d\n", err); goto fail; } if (!(txq->ift_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(txq->ift_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ #if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__))) if ((ctx->ifc_flags & IFC_DMAR) == 0) return (0); if (!(txq->ift_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } #endif return (0); fail: /* We free all, it handles case where we are in the middle */ iflib_tx_structures_free(ctx); return (err); } static void iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) { bus_dmamap_t map; map = NULL; if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[i]; if (map != NULL) { bus_dmamap_unload(txq->ift_desc_tag, map); bus_dmamap_destroy(txq->ift_desc_tag, map); txq->ift_sds.ifsd_map[i] = NULL; } } static void iflib_txq_destroy(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; for (int i = 0; i < txq->ift_size; i++) iflib_txsd_destroy(ctx, txq, i); if (txq->ift_sds.ifsd_map != NULL) { free(txq->ift_sds.ifsd_map, M_IFLIB); txq->ift_sds.ifsd_map = NULL; } if (txq->ift_sds.ifsd_m != NULL) { free(txq->ift_sds.ifsd_m, M_IFLIB); txq->ift_sds.ifsd_m = NULL; } if (txq->ift_sds.ifsd_flags != NULL) { free(txq->ift_sds.ifsd_flags, M_IFLIB); txq->ift_sds.ifsd_flags = NULL; } if (txq->ift_desc_tag != NULL) { bus_dma_tag_destroy(txq->ift_desc_tag); txq->ift_desc_tag = NULL; } if (txq->ift_tso_desc_tag != NULL) { bus_dma_tag_destroy(txq->ift_tso_desc_tag); txq->ift_tso_desc_tag = NULL; } } static void iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) { struct mbuf **mp; mp = &txq->ift_sds.ifsd_m[i]; if (*mp == NULL) return; if (txq->ift_sds.ifsd_map != NULL) { bus_dmamap_sync(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i]); } m_free(*mp); DBG_COUNTER_INC(tx_frees); *mp = NULL; } static int iflib_txq_setup(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; int i; /* Set number of descriptors available */ txq->ift_qstatus = IFLIB_QUEUE_IDLE; /* XXX make configurable */ txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ; /* Reset indices */ txq->ift_cidx_processed = 0; txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bzero((void *)di->idi_vaddr, di->idi_size); IFDI_TXQ_SETUP(ctx, txq->ift_id); for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int iflib_rxsd_alloc(iflib_rxq_t rxq) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; iflib_fl_t fl; int err; MPASS(scctx->isc_nrxd[0] > 0); MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); fl = rxq->ifr_fl; for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_rx_maxsize, /* maxsize */ sctx->isc_rx_nsegments, /* nsegments */ sctx->isc_rx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &fl->ifl_desc_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, err); goto fail; } if (!(fl->ifl_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(fl->ifl_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(fl->ifl_sds.ifsd_cl = (caddr_t *) malloc(sizeof(caddr_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ #if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__))) if ((ctx->ifc_flags & IFC_DMAR) == 0) continue; if (!(fl->ifl_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) { err = bus_dmamap_create(fl->ifl_desc_tag, 0, &fl->ifl_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create RX buffer DMA map\n"); goto fail; } } #endif } return (0); fail: iflib_rx_structures_free(ctx); return (err); } /* * Internal service routines */ struct rxq_refill_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; static void _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct rxq_refill_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } #ifdef ACPI_DMAR #define IS_DMAR(ctx) (ctx->ifc_flags & IFC_DMAR) #else #define IS_DMAR(ctx) (0) #endif /** * rxq_refill - refill an rxq free-buffer list * @ctx: the iflib context * @rxq: the free-list to refill * @n: the number of new buffers to allocate * * (Re)populate an rxq free-buffer list with up to @n new packet buffers. * The caller must assure that @n does not exceed the queue's capacity. */ static void _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) { struct mbuf *m; int idx, frag_idx = fl->ifl_fragidx; int pidx = fl->ifl_pidx; caddr_t cl, *sd_cl; struct mbuf **sd_m; uint8_t *sd_flags; struct if_rxd_update iru; bus_dmamap_t *sd_map; int n, i = 0; uint64_t bus_addr; int err; qidx_t credits; sd_m = fl->ifl_sds.ifsd_m; sd_map = fl->ifl_sds.ifsd_map; sd_cl = fl->ifl_sds.ifsd_cl; sd_flags = fl->ifl_sds.ifsd_flags; idx = pidx; credits = fl->ifl_credits; n = count; MPASS(n > 0); MPASS(credits + n <= fl->ifl_size); if (pidx < fl->ifl_cidx) MPASS(pidx + n <= fl->ifl_cidx); if (pidx == fl->ifl_cidx && (credits < fl->ifl_size)) MPASS(fl->ifl_gen == 0); if (pidx > fl->ifl_cidx) MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); DBG_COUNTER_INC(fl_refills); if (n > 8) DBG_COUNTER_INC(fl_refills_large); iru_init(&iru, fl->ifl_rxq, fl->ifl_id); while (n--) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. * * If the cluster is still set then we know a minimum sized packet was received */ bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx); if ((frag_idx < 0) || (frag_idx >= fl->ifl_size)) bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); if ((cl = sd_cl[frag_idx]) == NULL) { if ((cl = sd_cl[frag_idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) break; #if MEMORY_LOGGING fl->ifl_cl_enqueued++; #endif } if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { break; } #if MEMORY_LOGGING fl->ifl_m_enqueued++; #endif DBG_COUNTER_INC(rx_allocs); #if defined(__i386__) || defined(__amd64__) if (!IS_DMAR(ctx)) { bus_addr = pmap_kextract((vm_offset_t)cl); } else #endif { struct rxq_refill_cb_arg cb_arg; iflib_rxq_t q; cb_arg.error = 0; q = fl->ifl_rxq; MPASS(sd_map != NULL); MPASS(sd_map[frag_idx] != NULL); err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[frag_idx], cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0); bus_dmamap_sync(fl->ifl_desc_tag, sd_map[frag_idx], BUS_DMASYNC_PREREAD); if (err != 0 || cb_arg.error) { /* * !zone_pack ? */ if (fl->ifl_zone == zone_pack) uma_zfree(fl->ifl_zone, cl); m_free(m); n = 0; goto done; } bus_addr = cb_arg.seg.ds_addr; } bit_set(fl->ifl_rx_bitmap, frag_idx); sd_flags[frag_idx] |= RX_SW_DESC_INUSE; MPASS(sd_m[frag_idx] == NULL); sd_cl[frag_idx] = cl; sd_m[frag_idx] = m; fl->ifl_rxd_idxs[i] = frag_idx; fl->ifl_bus_addrs[i] = bus_addr; fl->ifl_vm_addrs[i] = cl; credits++; i++; MPASS(credits <= fl->ifl_size); if (++idx == fl->ifl_size) { fl->ifl_gen = 1; idx = 0; } if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); i = 0; pidx = idx; fl->ifl_pidx = idx; fl->ifl_credits = credits; } } done: if (i) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); fl->ifl_pidx = idx; fl->ifl_credits = credits; } DBG_COUNTER_INC(rxd_flush); if (fl->ifl_pidx == 0) pidx = fl->ifl_size - 1; else pidx = fl->ifl_pidx - 1; if (sd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx); fl->ifl_fragidx = frag_idx; } static __inline void __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max) { /* we avoid allowing pidx to catch up with cidx as it confuses ixl */ int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; #ifdef INVARIANTS int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; #endif MPASS(fl->ifl_credits <= fl->ifl_size); MPASS(reclaimable == delta); if (reclaimable > 0) _iflib_fl_refill(ctx, fl, min(max, reclaimable)); } static void iflib_fl_bufs_free(iflib_fl_t fl) { iflib_dma_info_t idi = fl->ifl_ifdi; uint32_t i; for (i = 0; i < fl->ifl_size; i++) { struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i]; uint8_t *sd_flags = &fl->ifl_sds.ifsd_flags[i]; caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i]; if (*sd_flags & RX_SW_DESC_INUSE) { if (fl->ifl_sds.ifsd_map != NULL) { bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i]; bus_dmamap_unload(fl->ifl_desc_tag, sd_map); if (fl->ifl_rxq->ifr_ctx->ifc_in_detach) bus_dmamap_destroy(fl->ifl_desc_tag, sd_map); } if (*sd_m != NULL) { m_init(*sd_m, M_NOWAIT, MT_DATA, 0); uma_zfree(zone_mbuf, *sd_m); } if (*sd_cl != NULL) uma_zfree(fl->ifl_zone, *sd_cl); *sd_flags = 0; } else { MPASS(*sd_cl == NULL); MPASS(*sd_m == NULL); } #if MEMORY_LOGGING fl->ifl_m_dequeued++; fl->ifl_cl_dequeued++; #endif *sd_cl = NULL; *sd_m = NULL; } #ifdef INVARIANTS for (i = 0; i < fl->ifl_size; i++) { MPASS(fl->ifl_sds.ifsd_flags[i] == 0); MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); MPASS(fl->ifl_sds.ifsd_m[i] == NULL); } #endif /* * Reset free list values */ fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; bzero(idi->idi_vaddr, idi->idi_size); } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int iflib_fl_setup(iflib_fl_t fl) { iflib_rxq_t rxq = fl->ifl_rxq; if_ctx_t ctx = rxq->ifr_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1); /* ** Free current RX buffer structs and their mbufs */ iflib_fl_bufs_free(fl); /* Now replenish the mbufs */ MPASS(fl->ifl_credits == 0); /* * XXX don't set the max_frame_size to larger * than the hardware can handle */ if (sctx->isc_max_frame_size <= 2048) fl->ifl_buf_size = MCLBYTES; #ifndef CONTIGMALLOC_WORKS else fl->ifl_buf_size = MJUMPAGESIZE; #else else if (sctx->isc_max_frame_size <= 4096) fl->ifl_buf_size = MJUMPAGESIZE; else if (sctx->isc_max_frame_size <= 9216) fl->ifl_buf_size = MJUM9BYTES; else fl->ifl_buf_size = MJUM16BYTES; #endif if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; fl->ifl_cltype = m_gettype(fl->ifl_buf_size); fl->ifl_zone = m_getzone(fl->ifl_buf_size); /* avoid pre-allocating zillions of clusters to an idle card * potentially speeding up attach */ _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size)); MPASS(min(128, fl->ifl_size) == fl->ifl_credits); if (min(128, fl->ifl_size) != fl->ifl_credits) return (ENOBUFS); /* * handle failure */ MPASS(rxq != NULL); MPASS(fl->ifl_ifdi != NULL); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void iflib_rx_sds_free(iflib_rxq_t rxq) { iflib_fl_t fl; int i; if (rxq->ifr_fl != NULL) { for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; if (fl->ifl_desc_tag != NULL) { bus_dma_tag_destroy(fl->ifl_desc_tag); fl->ifl_desc_tag = NULL; } free(fl->ifl_sds.ifsd_m, M_IFLIB); free(fl->ifl_sds.ifsd_cl, M_IFLIB); /* XXX destroy maps first */ free(fl->ifl_sds.ifsd_map, M_IFLIB); fl->ifl_sds.ifsd_m = NULL; fl->ifl_sds.ifsd_cl = NULL; fl->ifl_sds.ifsd_map = NULL; } free(rxq->ifr_fl, M_IFLIB); rxq->ifr_fl = NULL; rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } } /* * MI independent logic * */ static void iflib_timer(void *arg) { iflib_txq_t txq = arg; if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ IFDI_TIMER(ctx, txq->ift_id); if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && ((txq->ift_cleaned_prev == txq->ift_cleaned) || (sctx->isc_pause_frames == 0))) goto hung; if (ifmp_ring_is_stalled(txq->ift_br)) txq->ift_qstatus = IFLIB_QUEUE_HUNG; txq->ift_cleaned_prev = txq->ift_cleaned; /* handle any laggards */ if (txq->ift_db_pending) GROUPTASK_ENQUEUE(&txq->ift_task); sctx->isc_pause_frames = 0; if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); return; hung: CTX_LOCK(ctx); if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); IFDI_WATCHDOG_RESET(ctx); ctx->ifc_watchdog_events++; ctx->ifc_flags |= IFC_DO_RESET; iflib_admin_intr_deferred(ctx); CTX_UNLOCK(ctx); } static void iflib_init_locked(if_ctx_t ctx) { if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_t ifp = ctx->ifc_ifp; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, tx_ip_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) if_sethwassistbits(ifp, tx_ip6_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TSO4) if_sethwassistbits(ifp, CSUM_IP_TSO, 0); if (if_getcapenable(ifp) & IFCAP_TSO6) if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); iflib_netmap_txq_init(ctx, txq); } #ifdef INVARIANTS i = if_getdrvflags(ifp); #endif IFDI_INIT(ctx); MPASS(if_getdrvflags(ifp) == i); for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { /* XXX this should really be done on a per-queue basis */ if (if_getcapenable(ifp) & IFCAP_NETMAP) { MPASS(rxq->ifr_id == i); iflib_netmap_rxq_init(ctx, rxq); continue; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n"); goto done; } } } done: if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); IFDI_INTR_ENABLE(ctx); txq = ctx->ifc_txqs; for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); } static int iflib_media_change(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); int err; CTX_LOCK(ctx); if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) iflib_init_locked(ctx); CTX_UNLOCK(ctx); return (err); } static void iflib_media_status(if_t ifp, struct ifmediareq *ifmr) { if_ctx_t ctx = if_getsoftc(ifp); CTX_LOCK(ctx); IFDI_UPDATE_ADMIN_STATUS(ctx); IFDI_MEDIA_STATUS(ctx, ifmr); CTX_UNLOCK(ctx); } static void iflib_stop(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; iflib_fl_t fl; int i, j; /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); DELAY(1000); IFDI_STOP(ctx); DELAY(1000); iflib_debug_reset(); /* Wait for current tx queue users to exit to disarm watchdog timer. */ for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { /* make sure all transmitters have completed before proceeding XXX */ CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); /* clean any enqueued buffers */ iflib_ifmp_purge(txq); /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; txq->ift_pullups = 0; ifmp_ring_reset_stats(txq->ift_br); for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); } for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { /* make sure all transmitters have completed before proceeding XXX */ for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwrxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); /* also resets the free lists pidx/cidx */ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) iflib_fl_bufs_free(fl); } } static inline caddr_t calc_next_rxd(iflib_fl_t fl, int cidx) { qidx_t size; int nrxd; caddr_t start, end, cur, next; nrxd = fl->ifl_size; size = fl->ifl_rxd_size; start = fl->ifl_ifdi->idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*nrxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } static inline void prefetch_pkts(iflib_fl_t fl, int cidx) { int nextptr; int nrxd = fl->ifl_size; caddr_t next_rxd; nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1); prefetch(&fl->ifl_sds.ifsd_m[nextptr]); prefetch(&fl->ifl_sds.ifsd_cl[nextptr]); next_rxd = calc_next_rxd(fl, cidx); prefetch(next_rxd); prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); } static void rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) { int flid, cidx; bus_dmamap_t map; iflib_fl_t fl; iflib_dma_info_t di; int next; map = NULL; flid = irf->irf_flid; cidx = irf->irf_idx; fl = &rxq->ifr_fl[flid]; sd->ifsd_fl = fl; sd->ifsd_cidx = cidx; sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx]; sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; fl->ifl_credits--; #if MEMORY_LOGGING fl->ifl_m_dequeued++; #endif if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH) prefetch_pkts(fl, cidx); if (fl->ifl_sds.ifsd_map != NULL) { next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_map[next]); map = fl->ifl_sds.ifsd_map[cidx]; di = fl->ifl_ifdi; next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_flags[next]); bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* not valid assert if bxe really does SGE from non-contiguous elements */ MPASS(fl->ifl_cidx == cidx); if (unload) bus_dmamap_unload(fl->ifl_desc_tag, map); } fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); if (__predict_false(fl->ifl_cidx == 0)) fl->ifl_gen = 0; if (map != NULL) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bit_clear(fl->ifl_rx_bitmap, cidx); } static struct mbuf * assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd) { int i, padlen , flags; struct mbuf *m, *mh, *mt; caddr_t cl; i = 0; mh = NULL; do { rxd_frag_to_sd(rxq, &ri->iri_frags[i], TRUE, sd); MPASS(*sd->ifsd_cl != NULL); MPASS(*sd->ifsd_m != NULL); /* Don't include zero-length frags */ if (ri->iri_frags[i].irf_len == 0) { /* XXX we can save the cluster here, but not the mbuf */ m_init(*sd->ifsd_m, M_NOWAIT, MT_DATA, 0); m_free(*sd->ifsd_m); *sd->ifsd_m = NULL; continue; } m = *sd->ifsd_m; *sd->ifsd_m = NULL; if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; padlen = ri->iri_pad; } else { flags = M_EXT; mt->m_next = m; mt = m; /* assuming padding is only on the first fragment */ padlen = 0; } cl = *sd->ifsd_cl; *sd->ifsd_cl = NULL; /* Can these two be made one ? */ m_init(m, M_NOWAIT, MT_DATA, flags); m_cljset(m, cl, sd->ifsd_fl->ifl_cltype); /* * These must follow m_init and m_cljset */ m->m_data += padlen; ri->iri_len -= padlen; m->m_len = ri->iri_frags[i].irf_len; } while (++i < ri->iri_nfrags); return (mh); } /* * Process one software descriptor */ static struct mbuf * iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) { struct if_rxsd sd; struct mbuf *m; /* should I merge this back in now that the two paths are basically duplicated? */ if (ri->iri_nfrags == 1 && ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) { rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd); m = *sd.ifsd_m; *sd.ifsd_m = NULL; m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m)) m->m_data += 2; #endif memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); m->m_len = ri->iri_frags[0].irf_len; } else { m = assemble_segments(rxq, ri, &sd); } m->m_pkthdr.len = ri->iri_len; m->m_pkthdr.rcvif = ri->iri_ifp; m->m_flags |= ri->iri_flags; m->m_pkthdr.ether_vtag = ri->iri_vtag; m->m_pkthdr.flowid = ri->iri_flowid; M_HASHTYPE_SET(m, ri->iri_rsstype); m->m_pkthdr.csum_flags = ri->iri_csum_flags; m->m_pkthdr.csum_data = ri->iri_csum_data; return (m); } #if defined(INET6) || defined(INET) static void iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6) { CURVNET_SET(lc->ifp->if_vnet); #if defined(INET6) *v6 = VNET(ip6_forwarding); #endif #if defined(INET) *v4 = VNET(ipforwarding); #endif CURVNET_RESTORE(); } /* * Returns true if it's possible this packet could be LROed. * if it returns false, it is guaranteed that tcp_lro_rx() * would not return zero. */ static bool iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding) { struct ether_header *eh; uint16_t eh_type; eh = mtod(m, struct ether_header *); eh_type = ntohs(eh->ether_type); switch (eh_type) { #if defined(INET6) case ETHERTYPE_IPV6: return !v6_forwarding; #endif #if defined (INET) case ETHERTYPE_IP: return !v4_forwarding; #endif } return false; } #else static void iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused) { } #endif static bool iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int avail, i; qidx_t *cidxp; struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; struct ifnet *ifp; int lro_enabled; bool lro_possible = false; bool v4_forwarding, v6_forwarding; /* * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ struct mbuf *m, *mh, *mt, *mf; ifp = ctx->ifc_ifp; mh = mt = NULL; MPASS(budget > 0); rx_pkts = rx_bytes = 0; if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else cidxp = &rxq->ifr_fl[0].ifl_cidx; if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); DBG_COUNTER_INC(rx_unavail); return (false); } for (budget_left = budget; (budget_left > 0) && (avail > 0); budget_left--, avail--) { if (__predict_false(!CTX_ACTIVE(ctx))) { DBG_COUNTER_INC(rx_ctx_inactive); break; } /* * Reset client set fields to their default values */ rxd_info_zero(&ri); ri.iri_qsidx = rxq->ifr_id; ri.iri_cidx = *cidxp; ri.iri_ifp = ifp; ri.iri_frags = rxq->ifr_frags; err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); if (err) goto err; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { *cidxp = ri.iri_cidx; /* Update our consumer index */ /* XXX NB: shurd - check if this is still safe */ while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) { rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; rxq->ifr_cq_gen = 0; } /* was this only a completion queue message? */ if (__predict_false(ri.iri_nfrags == 0)) continue; } MPASS(ri.iri_nfrags != 0); MPASS(ri.iri_len != 0); /* will advance the cidx on the corresponding free lists */ m = iflib_rxd_pkt_get(rxq, &ri); if (avail == 0 && budget_left) avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); if (__predict_false(m == NULL)) { DBG_COUNTER_INC(rx_mbuf_null); continue; } /* imm_pkt: -- cxgb */ if (mh == NULL) mh = mt = m; else { mt->m_nextpkt = m; mt = m; } } /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); if (lro_enabled) iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding); mt = mf = NULL; while (mh != NULL) { m = mh; mh = mh->m_nextpkt; m->m_nextpkt = NULL; #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) continue; #endif rx_bytes += m->m_pkthdr.len; rx_pkts++; #if defined(INET6) || defined(INET) if (lro_enabled) { if (!lro_possible) { lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding); if (lro_possible && mf != NULL) { ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); mt = mf = NULL; } } if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) == (CSUM_L4_CALC|CSUM_L4_VALID)) { if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) continue; } } #endif if (lro_possible) { ifp->if_input(ifp, m); DBG_COUNTER_INC(rx_if_input); continue; } if (mf == NULL) mf = m; if (mt != NULL) mt->m_nextpkt = m; mt = m; } if (mf != NULL) { ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); } if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); /* * Flush any outstanding LRO work */ #if defined(INET6) || defined(INET) tcp_lro_flush_all(&rxq->ifr_lc); #endif if (avail) return true; return (iflib_rxd_avail(ctx, rxq, *cidxp, 1)); err: CTX_LOCK(ctx); ctx->ifc_flags |= IFC_DO_RESET; iflib_admin_intr_deferred(ctx); CTX_UNLOCK(ctx); return (false); } #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1) static inline qidx_t txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (in_use > 4*minthresh) return (notify_count); if (in_use > 2*minthresh) return (notify_count >> 1); if (in_use > minthresh) return (notify_count >> 3); return (0); } static inline qidx_t txq_max_rs_deferred(iflib_txq_t txq) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (txq->ift_in_use > 4*minthresh) return (notify_count); if (txq->ift_in_use > 2*minthresh) return (notify_count >> 1); if (txq->ift_in_use > minthresh) return (notify_count >> 2); return (2); } #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use)) #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq) #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) /* forward compatibility for cxgb */ #define FIRST_QSET(ctx) 0 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) /* XXX we should be setting this to something other than zero */ #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max) static inline bool iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use) { qidx_t dbval, max; bool rang; rang = false; max = TXQ_MAX_DB_DEFERRED(txq, in_use); if (ring || txq->ift_db_pending >= max) { dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); txq->ift_db_pending = txq->ift_npending = 0; rang = true; } return (rang); } #ifdef PKT_DEBUG static void print_pkt(if_pkt_info_t pi) { printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); } #endif #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; struct ether_vlan_header *eh; struct mbuf *m, *n; n = m = *mp; if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && M_WRITABLE(m) == 0) { if ((m = m_dup(m, M_NOWAIT)) == NULL) { return (ENOMEM); } else { m_freem(*mp); n = *mp = m; } } /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ if (__predict_false(m->m_len < sizeof(*eh))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) return (ENOMEM); } eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { pi->ipi_etype = ntohs(eh->evl_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { pi->ipi_etype = ntohs(eh->evl_encap_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN; } switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: { struct ip *ip = NULL; struct tcphdr *th = NULL; int minthlen; minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); if (__predict_false(m->m_len < minthlen)) { /* * if this code bloat is causing too much of a hit * move it to a separate function and mark it noinline */ if (m->m_len == pi->ipi_ehdrlen) { n = m->m_next; MPASS(n); if (n->m_len >= sizeof(*ip)) { ip = (struct ip *)n->m_data; if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); } } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } } else { ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } pi->ipi_ip_hlen = ip->ip_hl << 2; pi->ipi_ipproto = ip->ip_p; pi->ipi_flags |= IPI_TX_IPV4; if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) ip->ip_sum = 0; if (IS_TSO4(pi)) { if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(th == NULL)) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) return (ENOMEM); th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; pi->ipi_tcp_seq = th->th_seq; } if (__predict_false(ip->ip_p != IPPROTO_TCP)) return (ENXIO); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; if (sctx->isc_flags & IFLIB_TSO_INIT_IP) { ip->ip_sum = 0; ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz); } } break; } #endif #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); struct tcphdr *th; pi->ipi_ip_hlen = sizeof(struct ip6_hdr); if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) return (ENOMEM); } th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); /* XXX-BZ this will go badly in case of ext hdrs. */ pi->ipi_ipproto = ip6->ip6_nxt; pi->ipi_flags |= IPI_TX_IPV6; if (IS_TSO6(pi)) { if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) return (ENOMEM); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; } if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) return (ENXIO); /* * The corresponding flag is set by the stack in the IPv4 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). * So, set it here because the rest of the flow requires it. */ pi->ipi_csum_flags |= CSUM_TCP_IPV6; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; } break; } #endif default: pi->ipi_csum_flags &= ~CSUM_OFFLOAD; pi->ipi_ip_hlen = 0; break; } *mp = m; return (0); } static __noinline struct mbuf * collapse_pkthdr(struct mbuf *m0) { struct mbuf *m, *m_next, *tmp; m = m0; m_next = m->m_next; while (m_next != NULL && m_next->m_len == 0) { m = m_next; m->m_next = NULL; m_free(m); m_next = m_next->m_next; } m = m0; m->m_next = m_next; if ((m_next->m_flags & M_EXT) == 0) { m = m_defrag(m, M_NOWAIT); } else { tmp = m_next->m_next; memcpy(m_next, m, MPKTHSIZE); m = m_next; m->m_next = tmp; } return (m); } /* * If dodgy hardware rejects the scatter gather chain we've handed it * we'll need to remove the mbuf chain from ifsg_m[] before we can add the * m_defrag'd mbufs */ static __noinline struct mbuf * iflib_remove_mbuf(iflib_txq_t txq) { int ntxd, i, pidx; struct mbuf *m, *mh, **ifsd_m; pidx = txq->ift_pidx; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; mh = m = ifsd_m[pidx]; ifsd_m[pidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif i = 1; while (m) { ifsd_m[(pidx + i) & (ntxd -1)] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif m = m->m_next; i++; } return (mh); } static int iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, struct mbuf **m0, bus_dma_segment_t *segs, int *nsegs, int max_segs, int flags) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; int i, next, pidx, err, ntxd, count; struct mbuf *m, *tmp, **ifsd_m; m = *m0; /* * Please don't ever do this */ if (__predict_false(m->m_len == 0)) *m0 = m = collapse_pkthdr(m); ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; pidx = txq->ift_pidx; if (map != NULL) { uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags; err = bus_dmamap_load_mbuf_sg(tag, map, *m0, segs, nsegs, BUS_DMA_NOWAIT); if (err) return (err); ifsd_flags[pidx] |= TX_SW_DESC_MAPPED; count = 0; m = *m0; do { if (__predict_false(m->m_len <= 0)) { tmp = m; m = m->m_next; tmp->m_next = NULL; m_free(tmp); continue; } m = m->m_next; count++; } while (m != NULL); if (count > *nsegs) { ifsd_m[pidx] = *m0; ifsd_m[pidx]->m_flags |= M_TOOBIG; return (0); } m = *m0; count = 0; do { next = (pidx + count) & (ntxd-1); MPASS(ifsd_m[next] == NULL); ifsd_m[next] = m; count++; tmp = m; m = m->m_next; } while (m != NULL); } else { int buflen, sgsize, maxsegsz, max_sgsize; vm_offset_t vaddr; vm_paddr_t curaddr; count = i = 0; m = *m0; if (m->m_pkthdr.csum_flags & CSUM_TSO) maxsegsz = scctx->isc_tx_tso_segsize_max; else maxsegsz = sctx->isc_tx_maxsegsize; do { if (__predict_false(m->m_len <= 0)) { tmp = m; m = m->m_next; tmp->m_next = NULL; m_free(tmp); continue; } buflen = m->m_len; vaddr = (vm_offset_t)m->m_data; /* * see if we can't be smarter about physically * contiguous mappings */ next = (pidx + count) & (ntxd-1); MPASS(ifsd_m[next] == NULL); #if MEMORY_LOGGING txq->ift_enqueued++; #endif ifsd_m[next] = m; while (buflen > 0) { if (i >= max_segs) goto err; max_sgsize = MIN(buflen, maxsegsz); curaddr = pmap_kextract(vaddr); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); sgsize = MIN(sgsize, max_sgsize); segs[i].ds_addr = curaddr; segs[i].ds_len = sgsize; vaddr += sgsize; buflen -= sgsize; i++; } count++; tmp = m; m = m->m_next; } while (m != NULL); *nsegs = i; } return (0); err: *m0 = iflib_remove_mbuf(txq); return (EFBIG); } static inline caddr_t calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid) { qidx_t size; int ntxd; caddr_t start, end, cur, next; ntxd = txq->ift_size; size = txq->ift_txd_size[qid]; start = txq->ift_ifdi[qid].idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*ntxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } /* * Pad an mbuf to ensure a minimum ethernet frame size. * min_frame_size is the frame size (less CRC) to pad the mbuf to */ static __noinline int iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size) { /* * 18 is enough bytes to pad an ARP packet to 46 bytes, and * and ARP message is the smallest common payload I can think of */ static char pad[18]; /* just zeros */ int n; struct mbuf *new_head; if (!M_WRITABLE(*m_head)) { new_head = m_dup(*m_head, M_NOWAIT); if (new_head == NULL) { m_freem(*m_head); device_printf(dev, "cannot pad short frame, m_dup() failed"); DBG_COUNTER_INC(encap_pad_mbuf_fail); return ENOMEM; } m_freem(*m_head); *m_head = new_head; } for (n = min_frame_size - (*m_head)->m_pkthdr.len; n > 0; n -= sizeof(pad)) if (!m_append(*m_head, min(n, sizeof(pad)), pad)) break; if (n > 0) { m_freem(*m_head); device_printf(dev, "cannot pad short frame\n"); DBG_COUNTER_INC(encap_pad_mbuf_fail); return (ENOBUFS); } return 0; } static int iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; bus_dma_segment_t *segs; struct mbuf *m_head; void *next_txd; bus_dmamap_t map; struct if_pkt_info pi; int remap = 0; int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; bus_dma_tag_t desc_tag; segs = txq->ift_segs; ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; segs = txq->ift_segs; ntxd = txq->ift_size; m_head = *m_headp; map = NULL; /* * If we're doing TSO the next descriptor to clean may be quite far ahead */ cidx = txq->ift_cidx; pidx = txq->ift_pidx; if (ctx->ifc_flags & IFC_PREFETCH) { next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) { next_txd = calc_next_txd(txq, cidx, 0); prefetch(next_txd); } /* prefetch the next cache line of mbuf pointers and flags */ prefetch(&txq->ift_sds.ifsd_m[next]); if (txq->ift_sds.ifsd_map != NULL) { prefetch(&txq->ift_sds.ifsd_map[next]); next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); prefetch(&txq->ift_sds.ifsd_flags[next]); } } else if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[pidx]; if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { desc_tag = txq->ift_tso_desc_tag; max_segs = scctx->isc_tx_tso_segments_max; } else { desc_tag = txq->ift_desc_tag; max_segs = scctx->isc_tx_nsegments; } if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) && __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) { err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size); if (err) return err; } m_head = *m_headp; pkt_info_zero(&pi); pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); pi.ipi_pidx = pidx; pi.ipi_qsidx = txq->ift_id; pi.ipi_len = m_head->m_pkthdr.len; pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; /* deliberate bitwise OR to make one condition */ if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) return (err); m_head = *m_headp; } retry: err = iflib_busdma_load_mbuf_sg(txq, desc_tag, map, m_headp, segs, &nsegs, max_segs, BUS_DMA_NOWAIT); defrag: if (__predict_false(err)) { switch (err) { case EFBIG: /* try collapse once and defrag once */ if (remap == 0) m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); if (remap == 1) m_head = m_defrag(*m_headp, M_NOWAIT); remap++; if (__predict_false(m_head == NULL)) goto defrag_failed; txq->ift_mbuf_defrag++; *m_headp = m_head; goto retry; break; case ENOMEM: txq->ift_no_tx_dma_setup++; break; default: txq->ift_no_tx_dma_setup++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; break; } txq->ift_map_failed++; DBG_COUNTER_INC(encap_load_mbuf_fail); return (err); } /* * XXX assumes a 1 to 1 relationship between segments and * descriptors - this does not hold true on all drivers, e.g. * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { txq->ift_no_desc_avail++; if (map != NULL) bus_dmamap_unload(desc_tag, map); DBG_COUNTER_INC(encap_txq_avail_fail); if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) GROUPTASK_ENQUEUE(&txq->ift_task); return (ENOBUFS); } /* * On Intel cards we can greatly reduce the number of TX interrupts * we see by only setting report status on every Nth descriptor. * However, this also means that the driver will need to keep track * of the descriptors that RS was set on to check them for the DD bit. */ txq->ift_rs_pending += nsegs + 1; if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) || iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs - 1) <= MAX_TX_DESC(ctx)) { pi.ipi_flags |= IPI_TX_INTR; txq->ift_rs_pending = 0; } pi.ipi_segs = segs; pi.ipi_nsegs = nsegs; MPASS(pidx >= 0 && pidx < txq->ift_size); #ifdef PKT_DEBUG print_pkt(&pi); #endif if (map != NULL) bus_dmamap_sync(desc_tag, map, BUS_DMASYNC_PREWRITE); if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { if (map != NULL) bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); MPASS(pi.ipi_new_pidx < txq->ift_size); ndesc = pi.ipi_new_pidx - pi.ipi_pidx; if (pi.ipi_new_pidx < pi.ipi_pidx) { ndesc += txq->ift_size; txq->ift_gen = 1; } /* * drivers can need as many as * two sentinels */ MPASS(ndesc <= pi.ipi_nsegs + 2); MPASS(pi.ipi_new_pidx != pidx); MPASS(ndesc > 0); txq->ift_in_use += ndesc; /* * We update the last software descriptor again here because there may * be a sentinel and/or there may be more mbufs than segments */ txq->ift_pidx = pi.ipi_new_pidx; txq->ift_npending += pi.ipi_ndescs; } else if (__predict_false(err == EFBIG && remap < 2)) { *m_headp = m_head = iflib_remove_mbuf(txq); remap = 1; txq->ift_txd_encap_efbig++; goto defrag; } else DBG_COUNTER_INC(encap_txd_encap_fail); return (err); defrag_failed: txq->ift_mbuf_defrag_failed++; txq->ift_map_failed++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; return (ENOMEM); } static void iflib_tx_desc_free(iflib_txq_t txq, int n) { int hasmap; uint32_t qsize, cidx, mask, gen; struct mbuf *m, **ifsd_m; uint8_t *ifsd_flags; bus_dmamap_t *ifsd_map; bool do_prefetch; cidx = txq->ift_cidx; gen = txq->ift_gen; qsize = txq->ift_size; mask = qsize-1; hasmap = txq->ift_sds.ifsd_map != NULL; ifsd_flags = txq->ift_sds.ifsd_flags; ifsd_m = txq->ift_sds.ifsd_m; ifsd_map = txq->ift_sds.ifsd_map; do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); while (n-- > 0) { if (do_prefetch) { prefetch(ifsd_m[(cidx + 3) & mask]); prefetch(ifsd_m[(cidx + 4) & mask]); } if (ifsd_m[cidx] != NULL) { prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); prefetch(&ifsd_flags[(cidx + CACHE_PTR_INCREMENT) & mask]); if (hasmap && (ifsd_flags[cidx] & TX_SW_DESC_MAPPED)) { /* * does it matter if it's not the TSO tag? If so we'll * have to add the type to flags */ bus_dmamap_unload(txq->ift_desc_tag, ifsd_map[cidx]); ifsd_flags[cidx] &= ~TX_SW_DESC_MAPPED; } if ((m = ifsd_m[cidx]) != NULL) { /* XXX we don't support any drivers that batch packets yet */ MPASS(m->m_nextpkt == NULL); /* if the number of clusters exceeds the number of segments * there won't be space on the ring to save a pointer to each * cluster so we simply free the list here */ if (m->m_flags & M_TOOBIG) { m_freem(m); } else { m_free(m); } ifsd_m[cidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif DBG_COUNTER_INC(tx_frees); } } if (__predict_false(++cidx == qsize)) { cidx = 0; gen = 0; } } txq->ift_cidx = cidx; txq->ift_gen = gen; } static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) { int reclaim; if_ctx_t ctx = txq->ift_ctx; KASSERT(thresh >= 0, ("invalid threshold to reclaim")); MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); /* * Need a rate-limiting check so that this isn't called every time */ iflib_tx_credits_update(ctx, txq); reclaim = DESC_RECLAIMABLE(txq); if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { #ifdef INVARIANTS if (iflib_verbose_debug) { printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, reclaim, thresh); } #endif return (0); } iflib_tx_desc_free(txq, reclaim); txq->ift_cleaned += reclaim; txq->ift_in_use -= reclaim; return (reclaim); } static struct mbuf ** _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) { int next, size; struct mbuf **items; size = r->size; next = (cidx + CACHE_PTR_INCREMENT) & (size-1); items = __DEVOLATILE(struct mbuf **, &r->items[0]); prefetch(items[(cidx + offset) & (size-1)]); if (remaining > 1) { prefetch2cachelines(&items[next]); prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]); prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]); prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]); } return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); } static void iflib_txq_check_drain(iflib_txq_t txq, int budget) { ifmp_ring_check_drainage(txq->ift_br, budget); } static uint32_t iflib_txq_can_drain(struct ifmp_ring *r) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; return ((TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) || ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)); } static uint32_t iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; struct ifnet *ifp = ctx->ifc_ifp; struct mbuf **mp, *m; int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail; int reclaimed, err, in_use_prev, desc_used; bool do_prefetch, ring, rang; if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(txq_drain_notready); return (0); } reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use); avail = IDXDIFF(pidx, cidx, r->size); if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { m_free(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } return (avail); } if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); DBG_COUNTER_INC(txq_drain_oactive); return (0); } if (reclaimed) txq->ift_qstatus = IFLIB_QUEUE_IDLE; consumed = mcast_sent = bytes_sent = pkt_sent = 0; count = MIN(avail, TX_BATCH_SIZE); #ifdef INVARIANTS if (iflib_verbose_debug) printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__, avail, ctx->ifc_flags, TXQ_AVAIL(txq)); #endif do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); avail = TXQ_AVAIL(txq); for (desc_used = i = 0; i < count && avail > MAX_TX_DESC(ctx) + 2; i++) { int pidx_prev, rem = do_prefetch ? count - i : 0; mp = _ring_peek_one(r, cidx, i, rem); MPASS(mp != NULL && *mp != NULL); if (__predict_false(*mp == (struct mbuf *)txq)) { consumed++; reclaimed++; continue; } in_use_prev = txq->ift_in_use; pidx_prev = txq->ift_pidx; err = iflib_encap(txq, mp); if (__predict_false(err)) { DBG_COUNTER_INC(txq_drain_encapfail); /* no room - bail out */ if (err == ENOBUFS) break; consumed++; DBG_COUNTER_INC(txq_drain_encapfail); /* we can't send this packet - skip it */ continue; } consumed++; pkt_sent++; m = *mp; DBG_COUNTER_INC(tx_sent); bytes_sent += m->m_pkthdr.len; mcast_sent += !!(m->m_flags & M_MCAST); avail = TXQ_AVAIL(txq); txq->ift_db_pending += (txq->ift_in_use - in_use_prev); desc_used += (txq->ift_in_use - in_use_prev); ETHER_BPF_MTAP(ifp, m); if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) break; rang = iflib_txd_db_check(ctx, txq, false, in_use_prev); } /* deliberate use of bitwise or to avoid gratuitous short-circuit */ ring = rang ? false : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx)); iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use); if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); if (mcast_sent) if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); #ifdef INVARIANTS if (iflib_verbose_debug) printf("consumed=%d\n", consumed); #endif return (consumed); } static uint32_t iflib_txq_drain_always(struct ifmp_ring *r) { return (1); } static uint32_t iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { int i, avail; struct mbuf **mp; iflib_txq_t txq; txq = r->cookie; txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); avail = IDXDIFF(pidx, cidx, r->size); for (i = 0; i < avail; i++) { mp = _ring_peek_one(r, cidx, i, avail - i); if (__predict_false(*mp == (struct mbuf *)txq)) continue; m_freem(*mp); } MPASS(ifmp_ring_is_stalled(r) == 0); return (avail); } static void iflib_ifmp_purge(iflib_txq_t txq) { struct ifmp_ring *r; r = txq->ift_br; r->drain = iflib_txq_drain_free; r->can_drain = iflib_txq_drain_always; ifmp_ring_check_drainage(r, r->size); r->drain = iflib_txq_drain; r->can_drain = iflib_txq_can_drain; } static void _task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; struct ifnet *ifp = ctx->ifc_ifp; int rc; #ifdef IFLIB_DIAGNOSTICS txq->ift_cpu_exec_count[curcpu]++; #endif if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; if (if_getcapenable(ifp) & IFCAP_NETMAP) { if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)) netmap_tx_irq(ifp, txq->ift_id); IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); return; } if (txq->ift_db_pending) ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE); ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { rc = IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } static void _task_fn_rx(void *context) { iflib_rxq_t rxq = context; if_ctx_t ctx = rxq->ifr_ctx; bool more; int rc; uint16_t budget; #ifdef IFLIB_DIAGNOSTICS rxq->ifr_cpu_exec_count[curcpu]++; #endif DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; more = true; #ifdef DEV_NETMAP if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) { u_int work = 0; if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) { more = false; } } #endif budget = ctx->ifc_sysctl_rx_budget; if (budget == 0) budget = 16; /* XXX */ if (more == false || (more = iflib_rxeof(rxq, budget)) == false) { if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { DBG_COUNTER_INC(rx_intr_enables); rc = IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; if (more) GROUPTASK_ENQUEUE(&rxq->ifr_task); } static void _task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; iflib_txq_t txq; int i; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) { if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { return; } } CTX_LOCK(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); } IFDI_UPDATE_ADMIN_STATUS(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); IFDI_LINK_INTR_ENABLE(ctx); if (ctx->ifc_flags & IFC_DO_RESET) { ctx->ifc_flags &= ~IFC_DO_RESET; iflib_if_init_locked(ctx); } CTX_UNLOCK(ctx); if (LINK_ACTIVE(ctx) == 0) return; for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); } static void _task_fn_iov(void *context) { if_ctx_t ctx = context; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; CTX_LOCK(ctx); IFDI_VFLR_HANDLE(ctx); CTX_UNLOCK(ctx); } static int iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { int err; if_int_delay_info_t info; if_ctx_t ctx; info = (if_int_delay_info_t)arg1; ctx = info->iidi_ctx; info->iidi_req = req; info->iidi_oidp = oidp; CTX_LOCK(ctx); err = IFDI_SYSCTL_INT_DELAY(ctx, info); CTX_UNLOCK(ctx); return (err); } /********************************************************************* * * IFNET FUNCTIONS * **********************************************************************/ static void iflib_if_init_locked(if_ctx_t ctx) { iflib_stop(ctx); iflib_init_locked(ctx); } static void iflib_if_init(void *arg) { if_ctx_t ctx = arg; CTX_LOCK(ctx); iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static int iflib_if_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq; int err, qidx; if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(tx_frees); m_freem(m); return (ENOBUFS); } MPASS(m->m_nextpkt == NULL); qidx = 0; if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) qidx = QIDX(ctx, m); /* * XXX calculate buf_ring based on flowid (divvy up bits?) */ txq = &ctx->ifc_txqs[qidx]; #ifdef DRIVER_BACKPRESSURE if (txq->ift_closed) { while (m != NULL) { next = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); m = next; } return (ENOBUFS); } #endif #ifdef notyet qidx = count = 0; mp = marr; next = m; do { count++; next = next->m_nextpkt; } while (next != NULL); if (count > nitems(marr)) if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { /* XXX check nextpkt */ m_freem(m); /* XXX simplify for now */ DBG_COUNTER_INC(tx_frees); return (ENOBUFS); } for (next = m, i = 0; next != NULL; i++) { mp[i] = next; next = next->m_nextpkt; mp[i]->m_nextpkt = NULL; } #endif DBG_COUNTER_INC(tx_seen); err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE); GROUPTASK_ENQUEUE(&txq->ift_task); if (err) { /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); m_freem(m); } return (err); } static void iflib_if_qflush(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq = ctx->ifc_txqs; int i; CTX_LOCK(ctx); ctx->ifc_flags |= IFC_QFLUSH; CTX_UNLOCK(ctx); for (i = 0; i < NTXQSETS(ctx); i++, txq++) while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br))) iflib_txq_check_drain(txq, 0); CTX_LOCK(ctx); ctx->ifc_flags &= ~IFC_QFLUSH; CTX_UNLOCK(ctx); if_qflush(ifp); } #define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \ IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO) static int iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) { if_ctx_t ctx = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int err = 0, reinit = 0, bits; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp, IFF_UP,0); if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) reinit = 1; #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else err = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: CTX_LOCK(ctx); if (ifr->ifr_mtu == if_getmtu(ifp)) { CTX_UNLOCK(ctx); break; } bits = if_getdrvflags(ifp); /* stop the driver and free any clusters before proceeding */ iflib_stop(ctx); if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) ctx->ifc_flags |= IFC_MULTISEG; else ctx->ifc_flags &= ~IFC_MULTISEG; err = if_setmtu(ifp, ifr->ifr_mtu); } iflib_init_locked(ctx); if_setdrvflags(ifp, bits); CTX_UNLOCK(ctx); break; case SIOCSIFFLAGS: CTX_LOCK(ctx); if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); } } else reinit = 1; } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { iflib_stop(ctx); } ctx->ifc_if_flags = if_getflags(ifp); CTX_UNLOCK(ctx); break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); IFDI_MULTI_SET(ctx); IFDI_INTR_ENABLE(ctx); CTX_UNLOCK(ctx); } break; case SIOCSIFMEDIA: CTX_LOCK(ctx); IFDI_MEDIA_SET(ctx); CTX_UNLOCK(ctx); /* falls thru */ case SIOCGIFMEDIA: case SIOCGIFXMEDIA: err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command); break; case SIOCGI2C: { struct ifi2creq i2c; err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (err != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { err = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { err = EINVAL; break; } if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) err = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } case SIOCSIFCAP: { int mask, setmask; mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); setmask = 0; #ifdef TCP_OFFLOAD setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); #endif setmask |= (mask & IFCAP_FLAGS); if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) setmask |= (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); if ((mask & IFCAP_WOL) && (if_getcapabilities(ifp) & IFCAP_WOL) != 0) setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC)); if_vlancap(ifp); /* * want to ensure that traffic has stopped before we change any of the flags */ if (setmask) { CTX_LOCK(ctx); bits = if_getdrvflags(ifp); if (bits & IFF_DRV_RUNNING) iflib_stop(ctx); if_togglecapenable(ifp, setmask); if (bits & IFF_DRV_RUNNING) iflib_init_locked(ctx); if_setdrvflags(ifp, bits); CTX_UNLOCK(ctx); } break; } case SIOCGPRIVATE_0: case SIOCSDRVSPEC: case SIOCGDRVSPEC: CTX_LOCK(ctx); err = IFDI_PRIV_IOCTL(ctx, command, data); CTX_UNLOCK(ctx); break; default: err = ether_ioctl(ifp, command, data); break; } if (reinit) iflib_if_init(ctx); return (err); } static uint64_t iflib_if_get_counter(if_t ifp, ift_counter cnt) { if_ctx_t ctx = if_getsoftc(ifp); return (IFDI_GET_COUNTER(ctx, cnt)); } /********************************************************************* * * OTHER FUNCTIONS EXPORTED TO THE STACK * **********************************************************************/ static void iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_REGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_UNREGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_led_func(void *arg, int onoff) { if_ctx_t ctx = arg; CTX_LOCK(ctx); IFDI_LED_FUNC(ctx, onoff); CTX_UNLOCK(ctx); } /********************************************************************* * * BUS FUNCTION DEFINITIONS * **********************************************************************/ int iflib_device_probe(device_t dev) { pci_vendor_info_t *ent; uint16_t pci_vendor_id, pci_device_id; uint16_t pci_subvendor_id, pci_subdevice_id; uint16_t pci_rev_id; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); pci_rev_id = pci_get_revid(dev); if (sctx->isc_parse_devinfo != NULL) sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); ent = sctx->isc_vendor_info; while (ent->pvi_vendor_id != 0) { if (pci_vendor_id != ent->pvi_vendor_id) { ent++; continue; } if ((pci_device_id == ent->pvi_device_id) && ((pci_subvendor_id == ent->pvi_subvendor_id) || (ent->pvi_subvendor_id == 0)) && ((pci_subdevice_id == ent->pvi_subdevice_id) || (ent->pvi_subdevice_id == 0)) && ((pci_rev_id == ent->pvi_rev_id) || (ent->pvi_rev_id == 0))) { device_set_desc_copy(dev, ent->pvi_name); /* this needs to be changed to zero if the bus probing code * ever stops re-probing on best match because the sctx * may have its values over written by register calls * in subsequent probes */ return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } int iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) { int err, rid, msix, msix_bar; if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; int i; uint16_t main_txq; uint16_t main_rxq; ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); if (sc == NULL) { sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); device_set_softc(dev, ctx); ctx->ifc_flags |= IFC_SC_ALLOCATED; } ctx->ifc_sctx = sctx; ctx->ifc_dev = dev; ctx->ifc_softc = sc; if ((err = iflib_register(ctx)) != 0) { device_printf(dev, "iflib_register failed %d\n", err); return (err); } iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; /* * XXX sanity check that ntxd & nrxd are a power of 2 */ if (ctx->ifc_sysctl_ntxqs != 0) scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; if (ctx->ifc_sysctl_nrxqs != 0) scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; for (i = 0; i < sctx->isc_ntxqs; i++) { if (ctx->ifc_sysctl_ntxds[i] != 0) scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; else scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (ctx->ifc_sysctl_nrxds[i] != 0) scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; else scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; } if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; } if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } } if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); return (err); } _iflib_pre_assert(scctx); ctx->ifc_txrx = *scctx->isc_txrx; #ifdef INVARIANTS MPASS(scctx->isc_capenable); if (scctx->isc_capenable & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif if_setcapabilities(ifp, scctx->isc_capenable | IFCAP_HWSTATS); if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS); if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; #ifdef ACPI_DMAR if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL) ctx->ifc_flags |= IFC_DMAR; #elif !(defined(__i386__) || defined(__amd64__)) /* set unconditionally for !x86 */ ctx->ifc_flags |= IFC_DMAR; #endif msix_bar = scctx->isc_msix_bar; main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); for (i = 0; i < sctx->isc_nrxqs; i++) { if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ device_printf(dev, "# rx descriptors must be a power of 2\n"); err = EINVAL; goto fail; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (!powerof2(scctx->isc_ntxd[i])) { device_printf(dev, "# tx descriptors must be a power of 2"); err = EINVAL; goto fail; } } if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_tso_segments_max = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); /* * Protect the stack against modern hardware */ if (scctx->isc_tx_tso_size_max > FREEBSD_TSO_SIZE_MAX) scctx->isc_tx_tso_size_max = FREEBSD_TSO_SIZE_MAX; /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ ifp->if_hw_tsomaxsegcount = scctx->isc_tx_tso_segments_max; ifp->if_hw_tsomax = scctx->isc_tx_tso_size_max; ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max; if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin"); /* Set up cpu set. If it fails, use the set of all CPUs. */ if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) { device_printf(dev, "Unable to fetch CPU list\n"); CPU_COPY(&all_cpus, &ctx->ifc_cpus); } MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0); /* ** Now setup MSI or MSI/X, should ** return us the number of supported ** vectors. (Will be 1 for MSI) */ if (sctx->isc_flags & IFLIB_SKIP_MSIX) { msix = scctx->isc_vectors; } else if (scctx->isc_msix_bar != 0) /* * The simple fact that isc_msix_bar is not 0 does not mean we * we have a good value there that is known to work. */ msix = iflib_msix_init(ctx); else { scctx->isc_vectors = 1; scctx->isc_ntxqsets = 1; scctx->isc_nrxqsets = 1; scctx->isc_intr = IFLIB_INTR_LEGACY; msix = 0; } /* Get memory for the station queues */ if ((err = iflib_queues_alloc(ctx))) { device_printf(dev, "Unable to allocate queue memory\n"); goto fail; } if ((err = iflib_qset_structures_setup(ctx))) { device_printf(dev, "qset structure setup failed %d\n", err); goto fail_queues; } /* * Group taskqueues aren't properly set up until SMP is started, * so we disable interrupts until we can handle them post * SI_SUB_SMP. * * XXX: disabling interrupts doesn't actually work, at least for * the non-MSI case. When they occur before SI_SUB_SMP completes, * we do null handling and depend on this not causing too large an * interrupt storm. */ IFDI_INTR_DISABLE(ctx); if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) { device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err); goto fail_intr_free; } if (msix <= 1) { rid = 0; if (scctx->isc_intr == IFLIB_INTR_MSI) { MPASS(msix == 1); rid = 1; } if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { device_printf(dev, "iflib_legacy_setup failed %d\n", err); goto fail_intr_free; } } ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac); if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } if ((err = iflib_netmap_attach(ctx))) { device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); goto fail_detach; } *ctxp = ctx; NETDUMP_SET(ctx->ifc_ifp, iflib); if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); ctx->ifc_flags |= IFC_INIT_DONE; return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_intr_free: if (scctx->isc_intr == IFLIB_INTR_MSIX || scctx->isc_intr == IFLIB_INTR_MSI) pci_release_msi(ctx->ifc_dev); fail_queues: /* XXX free queues */ fail: IFDI_DETACH(ctx); return (err); } int iflib_device_attach(device_t dev) { if_ctx_t ctx; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_enable_busmaster(dev); return (iflib_device_register(dev, NULL, sctx, &ctx)); } int iflib_device_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq; iflib_rxq_t rxq; device_t dev = ctx->ifc_dev; int i, j; struct taskqgroup *tqg; iflib_fl_t fl; /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } CTX_LOCK(ctx); ctx->ifc_in_detach = 1; iflib_stop(ctx); CTX_UNLOCK(ctx); /* Unregister VLAN events */ if (ctx->ifc_vlan_attach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); if (ctx->ifc_vlan_detach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); iflib_netmap_detach(ifp); ether_ifdetach(ifp); /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ CTX_LOCK_DESTROY(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */ tqg = qgroup_if_io_tqg; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); if (txq->ift_task.gt_uniq != NULL) taskqgroup_detach(tqg, &txq->ift_task); } for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { if (rxq->ifr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &rxq->ifr_task); for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) free(fl->ifl_rx_bitmap, M_IFLIB); } tqg = qgroup_if_config_tqg; if (ctx->ifc_admin_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_admin_task); if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); IFDI_DETACH(ctx); device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(dev); } if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { iflib_irq_free(ctx, &ctx->ifc_legacy_irq); } if (ctx->ifc_msix_mem != NULL) { bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, ctx->ifc_softc_ctx.isc_msix_bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } bus_generic_detach(dev); if_free(ifp); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (0); } int iflib_device_detach(device_t dev) { if_ctx_t ctx = device_get_softc(dev); return (iflib_device_deregister(ctx)); } int iflib_device_suspend(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SUSPEND(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_shutdown(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SHUTDOWN(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_resume(device_t dev) { if_ctx_t ctx = device_get_softc(dev); iflib_txq_t txq = ctx->ifc_txqs; CTX_LOCK(ctx); IFDI_RESUME(ctx); iflib_init_locked(ctx); CTX_UNLOCK(ctx); for (int i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); return (bus_generic_resume(dev)); } int iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_INIT(ctx, num_vfs, params); CTX_UNLOCK(ctx); return (error); } void iflib_device_iov_uninit(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_IOV_UNINIT(ctx); CTX_UNLOCK(ctx); } int iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_VF_ADD(ctx, vfnum, params); CTX_UNLOCK(ctx); return (error); } /********************************************************************* * * MODULE FUNCTION DEFINITIONS * **********************************************************************/ /* * - Start a fast taskqueue thread for each core * - Start a taskqueue for control operations */ static int iflib_module_init(void) { return (0); } static int iflib_module_event_handler(module_t mod, int what, void *arg) { int err; switch (what) { case MOD_LOAD: if ((err = iflib_module_init()) != 0) return (err); break; case MOD_UNLOAD: return (EBUSY); default: return (EOPNOTSUPP); } return (0); } /********************************************************************* * * PUBLIC FUNCTION DEFINITIONS * ordered as in iflib.h * **********************************************************************/ static void _iflib_assert(if_shared_ctx_t sctx) { MPASS(sctx->isc_tx_maxsize); MPASS(sctx->isc_tx_maxsegsize); MPASS(sctx->isc_rx_maxsize); MPASS(sctx->isc_rx_nsegments); MPASS(sctx->isc_rx_maxsegsize); MPASS(sctx->isc_nrxd_min[0]); MPASS(sctx->isc_nrxd_max[0]); MPASS(sctx->isc_nrxd_default[0]); MPASS(sctx->isc_ntxd_min[0]); MPASS(sctx->isc_ntxd_max[0]); MPASS(sctx->isc_ntxd_default[0]); } static void _iflib_pre_assert(if_softc_ctx_t scctx) { MPASS(scctx->isc_txrx->ift_txd_encap); MPASS(scctx->isc_txrx->ift_txd_flush); MPASS(scctx->isc_txrx->ift_txd_credits_update); MPASS(scctx->isc_txrx->ift_rxd_available); MPASS(scctx->isc_txrx->ift_rxd_pkt_get); MPASS(scctx->isc_txrx->ift_rxd_refill); MPASS(scctx->isc_txrx->ift_rxd_flush); } static int iflib_register(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; driver_t *driver = sctx->isc_driver; device_t dev = ctx->ifc_dev; if_t ifp; _iflib_assert(sctx); CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (ENOMEM); } /* * Initialize our context's device specific methods */ kobj_init((kobj_t) ctx, (kobj_class_t) driver); kobj_class_compile((kobj_class_t) driver); driver->refs++; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setsoftc(ifp, ctx); if_setdev(ifp, dev); if_setinitfn(ifp, iflib_if_init); if_setioctlfn(ifp, iflib_if_ioctl); if_settransmitfn(ifp, iflib_if_transmit); if_setqflushfn(ifp, iflib_if_qflush); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); ctx->ifc_vlan_attach_event = EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, EVENTHANDLER_PRI_FIRST); ctx->ifc_vlan_detach_event = EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, EVENTHANDLER_PRI_FIRST); ifmedia_init(&ctx->ifc_media, IFM_IMASK, iflib_media_change, iflib_media_status); return (0); } static int iflib_queues_alloc(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int nrxqsets = scctx->isc_nrxqsets; int ntxqsets = scctx->isc_ntxqsets; iflib_txq_t txq; iflib_rxq_t rxq; iflib_fl_t fl = NULL; int i, j, cpu, err, txconf, rxconf; iflib_dma_info_t ifdip; uint32_t *rxqsizes = scctx->isc_rxqsizes; uint32_t *txqsizes = scctx->isc_txqsizes; uint8_t nrxqs = sctx->isc_nrxqs; uint8_t ntxqs = sctx->isc_ntxqs; int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; caddr_t *vaddrs; uint64_t *paddrs; struct ifmp_ring **brscp; KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); brscp = NULL; txq = NULL; rxq = NULL; /* Allocate the TX ring struct memory */ if (!(txq = (iflib_txq_t) malloc(sizeof(struct iflib_txq) * ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); err = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(rxq = (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); err = ENOMEM; goto rx_fail; } ctx->ifc_txqs = txq; ctx->ifc_rxqs = rxq; /* * XXX handle allocation failure */ for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_ifdi = ifdip; for (j = 0; j < ntxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate Descriptor memory\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_txd_size[j] = scctx->isc_txd_size[j]; bzero((void *)ifdip->idi_vaddr, txqsizes[j]); } txq->ift_ctx = ctx; txq->ift_id = i; if (sctx->isc_flags & IFLIB_HAS_TXCQ) { txq->ift_br_offset = 1; } else { txq->ift_br_offset = 0; } /* XXX fix this */ txq->ift_timer.c_cpu = cpu; if (iflib_txsd_alloc(txq)) { device_printf(dev, "Critical Failure setting up TX buffers\n"); err = ENOMEM; goto err_tx_desc; } /* Initialize the TX lock */ snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:tx(%d):callout", device_get_nameunit(dev), txq->ift_id); mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db", device_get_nameunit(dev), txq->ift_id); err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, iflib_txq_can_drain, M_IFLIB, M_WAITOK); if (err) { /* XXX free any allocated rings */ device_printf(dev, "Unable to allocate buf_ring\n"); goto err_tx_desc; } } for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_ifdi = ifdip; /* XXX this needs to be changed if #rx queues != #tx queues */ rxq->ifr_ntxqirq = 1; rxq->ifr_txqid[0] = i; for (j = 0; j < nrxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate Descriptor memory\n"); err = ENOMEM; goto err_tx_desc; } bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); } rxq->ifr_ctx = ctx; rxq->ifr_id = i; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { rxq->ifr_fl_offset = 1; } else { rxq->ifr_fl_offset = 0; } rxq->ifr_nfl = nfree_lists; if (!(fl = (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate free list memory\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_fl = fl; for (j = 0; j < nfree_lists; j++) { fl[j].ifl_rxq = rxq; fl[j].ifl_id = j; fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; fl[j].ifl_rxd_size = scctx->isc_rxd_size[j]; } /* Allocate receive buffers for the ring*/ if (iflib_rxsd_alloc(rxq)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); err = ENOMEM; goto err_rx_desc; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK|M_ZERO); } /* TXQs */ vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); for (i = 0; i < ntxqsets; i++) { iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; for (j = 0; j < ntxqs; j++, di++) { vaddrs[i*ntxqs + j] = di->idi_vaddr; paddrs[i*ntxqs + j] = di->idi_paddr; } } if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { device_printf(ctx->ifc_dev, "device queue allocation failed\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); /* RXQs */ vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); for (i = 0; i < nrxqsets; i++) { iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; for (j = 0; j < nrxqs; j++, di++) { vaddrs[i*nrxqs + j] = di->idi_vaddr; paddrs[i*nrxqs + j] = di->idi_paddr; } } if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { device_printf(ctx->ifc_dev, "device queue allocation failed\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); return (0); /* XXX handle allocation failure changes */ err_rx_desc: err_tx_desc: if (ctx->ifc_rxqs != NULL) free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; if (ctx->ifc_txqs != NULL) free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; rx_fail: if (brscp != NULL) free(brscp, M_IFLIB); if (rxq != NULL) free(rxq, M_IFLIB); if (txq != NULL) free(txq, M_IFLIB); fail: return (err); } static int iflib_tx_structures_setup(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i; for (i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_setup(txq); return (0); } static void iflib_tx_structures_free(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i, j; for (i = 0; i < NTXQSETS(ctx); i++, txq++) { iflib_txq_destroy(txq); for (j = 0; j < ctx->ifc_nhwtxqs; j++) iflib_dma_free(&txq->ift_ifdi[j]); } free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; IFDI_QUEUES_FREE(ctx); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int iflib_rx_structures_setup(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; int q; #if defined(INET6) || defined(INET) int i, err; #endif for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { #if defined(INET6) || defined(INET) tcp_lro_free(&rxq->ifr_lc); if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, TCP_LRO_ENTRIES, min(1024, ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) { device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); goto fail; } rxq->ifr_lro_enabled = TRUE; #endif IFDI_RXQ_SETUP(ctx, rxq->ifr_id); } return (0); #if defined(INET6) || defined(INET) fail: /* * Free RX software descriptors allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ rxq = ctx->ifc_rxqs; for (i = 0; i < q; ++i, rxq++) { iflib_rx_sds_free(rxq); rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } return (err); #endif } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void iflib_rx_structures_free(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { iflib_rx_sds_free(rxq); } } static int iflib_qset_structures_setup(if_ctx_t ctx) { int err; if ((err = iflib_tx_structures_setup(ctx)) != 0) return (err); if ((err = iflib_rx_structures_setup(ctx)) != 0) { device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); } return (err); } int iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, char *name) { return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); } #ifdef SMP static int find_nth(if_ctx_t ctx, int qid) { cpuset_t cpus; int i, cpuid, eqid, count; CPU_COPY(&ctx->ifc_cpus, &cpus); count = CPU_COUNT(&cpus); eqid = qid % count; /* clear up to the qid'th bit */ for (i = 0; i < eqid; i++) { cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); CPU_CLR(cpuid-1, &cpus); } cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); return (cpuid-1); } #ifdef SCHED_ULE extern struct cpu_group *cpu_top; /* CPU topology */ static int find_child_with_core(int cpu, struct cpu_group *grp) { int i; if (grp->cg_children == 0) return -1; MPASS(grp->cg_child); for (i = 0; i < grp->cg_children; i++) { if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) return i; } return -1; } /* * Find the nth thread on the specified core */ static int find_thread(int cpu, int thread_num) { struct cpu_group *grp; int i; cpuset_t cs; grp = cpu_top; if (grp == NULL) return cpu; i = 0; while ((i = find_child_with_core(cpu, grp)) != -1) { /* If the child only has one cpu, don't descend */ if (grp->cg_child[i].cg_count <= 1) break; grp = &grp->cg_child[i]; } /* If they don't share at least an L2 cache, use the same CPU */ if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) return cpu; /* Now pick one */ CPU_COPY(&grp->cg_mask, &cs); for (i = thread_num % grp->cg_count; i > 0; i--) { MPASS(CPU_FFS(&cs)); CPU_CLR(CPU_FFS(&cs) - 1, &cs); } MPASS(CPU_FFS(&cs)); return CPU_FFS(&cs) - 1; } #else static int find_thread(int cpu, int thread_num __unused) { return cpu; } #endif static int get_thread_num(if_ctx_t ctx, iflib_intr_type_t type, int qid) { switch (type) { case IFLIB_INTR_TX: /* TX queues get threads on the same core as the corresponding RX queue */ /* XXX handle multiple RX threads per core and more than two threads per core */ return qid / CPU_COUNT(&ctx->ifc_cpus) + 1; case IFLIB_INTR_RX: case IFLIB_INTR_RXTX: /* RX queues get the first thread on their core */ return qid / CPU_COUNT(&ctx->ifc_cpus); default: return -1; } } #else #define get_thread_num(ctx, type, qid) CPU_FIRST() #define find_thread(cpuid, tid) CPU_FIRST() #define find_nth(ctx, gid) CPU_FIRST() #endif /* Just to avoid copy/paste */ static inline int iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid, struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name) { int cpuid; int err, tid; cpuid = find_nth(ctx, qid); tid = get_thread_num(ctx, type, qid); MPASS(tid >= 0); cpuid = find_thread(cpuid, tid); err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name); if (err) { device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err); return (err); } #ifdef notyet if (cpuid > ctx->ifc_cpuid_highest) ctx->ifc_cpuid_highest = cpuid; #endif return 0; } int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, char *name) { struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; gtask_fn_t *fn; int tqrid, err; driver_filter_t *intr_fast; void *q; info = &ctx->ifc_filter_info; tqrid = rid; switch (type) { /* XXX merge tx/rx for netmap? */ case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; info = &ctx->ifc_txqs[qid].ift_filter_info; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RXTX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr_rxtx; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_ADMIN: q = ctx; tqrid = -1; info = &ctx->ifc_filter_info; gtask = &ctx->ifc_admin_task; tqg = qgroup_if_config_tqg; fn = _task_fn_admin; intr_fast = iflib_fast_intr_ctx; break; default: panic("unknown net intr type"); } info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = q; err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); if (err != 0) { device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err); return (err); } if (type == IFLIB_INTR_ADMIN) return (0); if (tqrid != -1) { err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name); if (err) return (err); } else { taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name); } return (0); } void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, char *name) { struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; void *q; int irq_num = -1; int err; switch (type) { case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; if (irq != NULL) irq_num = rman_get_start(irq->ii_res); break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; if (irq != NULL) irq_num = rman_get_start(irq->ii_res); break; case IFLIB_INTR_IOV: q = ctx; gtask = &ctx->ifc_vflr_task; tqg = qgroup_if_config_tqg; fn = _task_fn_iov; break; default: panic("unknown net intr type"); } GROUPTASK_INIT(gtask, 0, fn, q); if (irq_num != -1) { err = iflib_irq_set_affinity(ctx, irq_num, type, qid, gtask, tqg, q, name); if (err) taskqgroup_attach(tqg, gtask, q, irq_num, name); } else { taskqgroup_attach(tqg, gtask, q, irq_num, name); } } void iflib_irq_free(if_ctx_t ctx, if_irq_t irq) { if (irq->ii_tag) bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); if (irq->ii_res) bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, irq->ii_rid, irq->ii_res); } static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, char *name) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_irq_t irq = &ctx->ifc_legacy_irq; iflib_filter_info_t info; struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; int tqrid; void *q; int err; q = &ctx->ifc_rxqs[0]; info = &rxq[0].ifr_filter_info; gtask = &rxq[0].ifr_task; tqg = qgroup_if_io_tqg; tqrid = irq->ii_rid = *rid; fn = _task_fn_rx; ctx->ifc_flags |= IFC_LEGACY; info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = ctx; /* We allocate a single interrupt resource */ if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0) return (err); GROUPTASK_INIT(gtask, 0, fn, q); taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name); GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, rman_get_start(irq->ii_res), "tx"); return (0); } void iflib_led_create(if_ctx_t ctx) { ctx->ifc_led_dev = led_create(iflib_led_func, ctx, device_get_nameunit(ctx->ifc_dev)); } void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) { GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) { GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); } void iflib_admin_intr_deferred(if_ctx_t ctx) { #ifdef INVARIANTS struct grouptask *gtask; gtask = &ctx->ifc_admin_task; MPASS(gtask != NULL && gtask->gt_taskqueue != NULL); #endif GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); } void iflib_iov_intr_deferred(if_ctx_t ctx) { GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); } void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) { taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name); } void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn, char *name) { GROUPTASK_INIT(gtask, 0, fn, ctx); taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name); } void iflib_config_gtask_deinit(struct grouptask *gtask) { taskqgroup_detach(qgroup_if_config_tqg, gtask); } void iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq = ctx->ifc_txqs; if_setbaudrate(ifp, baudrate); if (baudrate >= IF_Gbps(10)) ctx->ifc_flags |= IFC_PREFETCH; /* If link down, disable watchdog */ if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) txq->ift_qstatus = IFLIB_QUEUE_IDLE; } ctx->ifc_link_state = link_state; if_link_state_change(ifp, link_state); } static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) { int credits; #ifdef INVARIANTS int credits_pre = txq->ift_cidx_processed; #endif if (ctx->isc_txd_credits_update == NULL) return (0); if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) return (0); txq->ift_processed += credits; txq->ift_cidx_processed += credits; MPASS(credits_pre + credits == txq->ift_cidx_processed); if (txq->ift_cidx_processed >= txq->ift_size) txq->ift_cidx_processed -= txq->ift_size; return (credits); } static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) { return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, budget)); } void iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, const char *description, if_int_delay_info_t info, int offset, int value) { info->iidi_ctx = ctx; info->iidi_offset = offset; info->iidi_value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, iflib_sysctl_int_delay, "I", description); } struct mtx * iflib_ctx_lock_get(if_ctx_t ctx) { return (&ctx->ifc_mtx); } static int iflib_msix_init(if_ctx_t ctx) { device_t dev = ctx->ifc_dev; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs; int iflib_num_tx_queues, iflib_num_rx_queues; int err, admincnt, bar; iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; device_printf(dev, "msix_init qsets capped at %d\n", imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); bar = ctx->ifc_softc_ctx.isc_msix_bar; admincnt = sctx->isc_admin_intrcnt; /* Override by global tuneable */ { int i; size_t len = sizeof(i); err = kernel_sysctlbyname(curthread, "hw.pci.enable_msix", &i, &len, NULL, 0, NULL, 0); if (err == 0) { if (i == 0) goto msi; } else { device_printf(dev, "unable to read hw.pci.enable_msix."); } } /* Override by tuneable */ if (scctx->isc_disable_msix) goto msi; /* ** When used in a virtualized environment ** PCI BUSMASTER capability may not be set ** so explicity set it here and rewrite ** the ENABLE in the MSIX control register ** at this point to cause the host to ** successfully initialize us. */ { int msix_ctrl, rid; pci_enable_busmaster(dev); rid = 0; if (pci_find_cap(dev, PCIY_MSIX, &rid) == 0 && rid != 0) { rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } else { device_printf(dev, "PCIY_MSIX capability not found; " "or rid %d == 0.\n", rid); goto msi; } } /* * bar == -1 => "trust me I know what I'm doing" * Some drivers are for hardware that is so shoddily * documented that no one knows which bars are which * so the developer has to map all bars. This hack * allows shoddy garbage to use msix in this framework. */ if (bar != -1) { ctx->ifc_msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (ctx->ifc_msix_mem == NULL) { /* May not be enabled */ device_printf(dev, "Unable to map MSIX table \n"); goto msi; } } /* First try MSI/X */ if ((msgs = pci_msix_count(dev)) == 0) { /* system has msix disabled */ device_printf(dev, "System has MSIX disabled \n"); bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; goto msi; } #if IFLIB_DEBUG /* use only 1 qset in debug mode */ queuemsgs = min(msgs - admincnt, 1); #else queuemsgs = msgs - admincnt; #endif #ifdef RSS queues = imin(queuemsgs, rss_getnumbuckets()); #else queues = queuemsgs; #endif queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n", CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) rx_queues = iflib_num_rx_queues; else rx_queues = queues; if (rx_queues > scctx->isc_nrxqsets) rx_queues = scctx->isc_nrxqsets; /* * We want this to be all logical CPUs by default */ if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) tx_queues = iflib_num_tx_queues; else tx_queues = mp_ncpus; if (tx_queues > scctx->isc_ntxqsets) tx_queues = scctx->isc_ntxqsets; if (ctx->ifc_sysctl_qs_eq_override == 0) { #ifdef INVARIANTS if (tx_queues != rx_queues) device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", min(rx_queues, tx_queues), min(rx_queues, tx_queues)); #endif tx_queues = min(rx_queues, tx_queues); rx_queues = min(rx_queues, tx_queues); } device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); vectors = rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { device_printf(dev, "Using MSIX interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; return (vectors); } else { device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err); } msi: vectors = pci_msi_count(dev); scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets = 1; scctx->isc_vectors = vectors; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { device_printf(dev,"Using an MSI interrupt\n"); scctx->isc_intr = IFLIB_INTR_MSI; } else { device_printf(dev,"Using a Legacy interrupt\n"); scctx->isc_intr = IFLIB_INTR_LEGACY; } return (vectors); } char * ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; static int mp_ring_state_handler(SYSCTL_HANDLER_ARGS) { int rc; uint16_t *state = ((uint16_t *)oidp->oid_arg1); struct sbuf *sb; char *ring_state = "UNKNOWN"; /* XXX needed ? */ rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); MPASS(sb != NULL); if (sb == NULL) return (ENOMEM); if (state[3] <= 3) ring_state = ring_states[state[3]]; sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", state[0], state[1], state[2], ring_state); rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } enum iflib_ndesc_handler { IFLIB_NTXD_HANDLER, IFLIB_NRXD_HANDLER, }; static int mp_ndesc_handler(SYSCTL_HANDLER_ARGS) { if_ctx_t ctx = (void *)arg1; enum iflib_ndesc_handler type = arg2; char buf[256] = {0}; qidx_t *ndesc; char *p, *next; int nqs, rc, i; MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER); nqs = 8; switch(type) { case IFLIB_NTXD_HANDLER: ndesc = ctx->ifc_sysctl_ntxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_ntxqs; break; case IFLIB_NRXD_HANDLER: ndesc = ctx->ifc_sysctl_nrxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_nrxqs; break; } if (nqs == 0) nqs = 8; for (i=0; i<8; i++) { if (i >= nqs) break; if (i) strcat(buf, ","); sprintf(strchr(buf, 0), "%d", ndesc[i]); } rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (rc || req->newptr == NULL) return rc; for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; i++, p = strsep(&next, " ,")) { ndesc[i] = strtoul(p, NULL, 10); } return(rc); } #define NAME_BUFLEN 32 static void iflib_add_device_sysctl_pre(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child, *oid_list; struct sysctl_ctx_list *ctx_list; struct sysctl_oid *node; ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", CTLFLAG_RD, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0, "driver version"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, "# of rxqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, "disable MSIX (default 0)"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, "set the rx budget"); /* XXX change for per-queue sizes */ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", "list of # of tx descriptors to use, 0 = use default #"); SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of rx descriptors to use, 0 = use default #"); } static void iflib_add_device_sysctl_post(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx_list; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j; char namebuf[NAME_BUFLEN]; char *qfmt; struct sysctl_oid *queue_node, *fl_node, *node; struct sysctl_oid_list *queue_list, *fl_list; ctx_list = device_get_sysctl_ctx(dev); node = ctx->ifc_sysctl_node; child = SYSCTL_CHILDREN(node); if (scctx->isc_ntxqsets > 100) qfmt = "txq%03d"; else if (scctx->isc_ntxqsets > 10) qfmt = "txq%02d"; else qfmt = "txq%d"; for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", CTLFLAG_RD, &txq->ift_dequeued, "total mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", CTLFLAG_RD, &txq->ift_enqueued, "total mbufs enqueued"); #endif SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", CTLFLAG_RD, &txq->ift_mbuf_defrag, "# of times m_defrag was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", CTLFLAG_RD, &txq->ift_pullups, "# of times m_pullup was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txq->ift_no_desc_avail, "# of times no descriptors were available"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", CTLFLAG_RD, &txq->ift_map_failed, "# of times dma map failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", CTLFLAG_RD, &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", CTLFLAG_RD, &txq->ift_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", CTLFLAG_RD, &txq->ift_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", CTLFLAG_RD, &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", CTLFLAG_RD, &txq->ift_in_use, 1, "descriptors in use"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", CTLFLAG_RD, &txq->ift_processed, "descriptors procesed for clean"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", CTLFLAG_RD, &txq->ift_cleaned, "total cleaned"); SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br->state), 0, mp_ring_state_handler, "A", "soft ring state"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->ift_br->enqueues, "# of enqueues to the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->ift_br->drops, "# of drops in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->ift_br->starts, "# of normal consumer starts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->ift_br->stalls, "# of consumer stalls in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->ift_br->restarts, "# of consumer restarts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->ift_br->abdications, "# of consumer abdications in the mp_ring for this queue"); } if (scctx->isc_nrxqsets > 100) qfmt = "rxq%03d"; else if (scctx->isc_nrxqsets > 10) qfmt = "rxq%02d"; else qfmt = "rxq%d"; for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); if (sctx->isc_flags & IFLIB_HAS_RXCQ) { SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx", CTLFLAG_RD, &rxq->ifr_cq_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", CTLFLAG_RD, &rxq->ifr_cq_cidx, 1, "Consumer Index"); } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "freelist Name"); fl_list = SYSCTL_CHILDREN(fl_node); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", CTLFLAG_RD, &fl->ifl_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", CTLFLAG_RD, &fl->ifl_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", CTLFLAG_RD, &fl->ifl_credits, 1, "credits available"); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", CTLFLAG_RD, &fl->ifl_m_enqueued, "mbufs allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", CTLFLAG_RD, &fl->ifl_m_dequeued, "mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", CTLFLAG_RD, &fl->ifl_cl_enqueued, "clusters allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", CTLFLAG_RD, &fl->ifl_cl_dequeued, "clusters freed"); #endif } } } #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m) { struct mbuf *n; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; n = m; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return (NULL); } bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; } return (n); } #endif #ifdef NETDUMP static void -iflib_netdump_init(struct ifnet *ifp, int *nrxr, int *clsize) +iflib_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { if_ctx_t ctx; ctx = if_getsoftc(ifp); CTX_LOCK(ctx); *nrxr = NRXQSETS(ctx); + *ncl = ctx->ifc_rxqs[0].ifr_fl->ifl_size; *clsize = ctx->ifc_rxqs[0].ifr_fl->ifl_buf_size; CTX_UNLOCK(ctx); } static void iflib_netdump_event(struct ifnet *ifp, enum netdump_ev event) { if_ctx_t ctx; if_softc_ctx_t scctx; iflib_fl_t fl; iflib_rxq_t rxq; int i, j; ctx = if_getsoftc(ifp); scctx = &ctx->ifc_softc_ctx; switch (event) { case NETDUMP_START: for (i = 0; i < scctx->isc_nrxqsets; i++) { rxq = &ctx->ifc_rxqs[i]; for (j = 0; j < rxq->ifr_nfl; j++) { fl = rxq->ifr_fl; fl->ifl_zone = m_getzone(fl->ifl_buf_size); } } iflib_no_tx_batch = 1; break; default: break; } } static int iflib_netdump_transmit(struct ifnet *ifp, struct mbuf *m) { if_ctx_t ctx; iflib_txq_t txq; int error; ctx = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); txq = &ctx->ifc_txqs[0]; error = iflib_encap(txq, &m); if (error == 0) (void)iflib_txd_db_check(ctx, txq, true, txq->ift_in_use); return (error); } static int iflib_netdump_poll(struct ifnet *ifp, int count) { if_ctx_t ctx; if_softc_ctx_t scctx; iflib_txq_t txq; int i; ctx = if_getsoftc(ifp); scctx = &ctx->ifc_softc_ctx; if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); txq = &ctx->ifc_txqs[0]; (void)iflib_tx_credits_update(ctx, txq); (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); for (i = 0; i < scctx->isc_nrxqsets; i++) (void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */); return (0); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/netinet/netdump/netdump.h =================================================================== --- user/markj/netdump/sys/netinet/netdump/netdump.h (revision 332406) +++ user/markj/netdump/sys/netinet/netdump/netdump.h (revision 332407) @@ -1,130 +1,130 @@ /*- * Copyright (c) 2005-2014 Sandvine Incorporated * Copyright (c) 2000 Darrell Anderson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NETINET_NETDUMP_H_ #define _NETINET_NETDUMP_H_ #include #include #include #include #include #define NETDUMP_PORT 20023 /* Server udp port number for data. */ #define NETDUMP_ACKPORT 20024 /* Client udp port number for acks. */ #define NETDUMP_HERALD 1 /* Broadcast before starting a dump. */ #define NETDUMP_FINISHED 2 /* Send after finishing a dump. */ #define NETDUMP_VMCORE 3 /* Contains dump data. */ #define NETDUMP_KDH 4 /* Contains kernel dump header. */ #define NETDUMP_EKCD_KEY 5 /* Contains kernel dump key. */ #define NETDUMP_DATASIZE 4096 /* Arbitrary packet size limit. */ struct netdump_msg_hdr { uint32_t mh_type; /* Netdump message type. */ uint32_t mh_seqno; /* Match acks with msgs. */ uint64_t mh_offset; /* vmcore offset (bytes). */ uint32_t mh_len; /* Attached data (bytes). */ uint32_t mh__pad; } __packed; struct netdump_ack { uint32_t na_seqno; /* Match acks with msgs. */ } __packed; struct netdump_conf { struct diocskerneldump_arg ndc_kda; char ndc_iface[IFNAMSIZ]; struct in_addr ndc_server; struct in_addr ndc_client; struct in_addr ndc_gateway; }; #define _PATH_NETDUMP "/dev/netdump" #define NETDUMPGCONF _IOR('n', 1, struct netdump_conf) #define NETDUMPSCONF _IOW('n', 2, struct netdump_conf) #ifdef _KERNEL #ifdef NETDUMP #define NETDUMP_MAX_IN_FLIGHT 64 enum netdump_ev { NETDUMP_START, NETDUMP_END, }; struct ifnet; struct mbuf; void netdump_reinit(struct ifnet *); -typedef void netdump_init_t(struct ifnet *, int *nrxr, int *clsize); +typedef void netdump_init_t(struct ifnet *, int *nrxr, int *ncl, int *clsize); typedef void netdump_event_t(struct ifnet *, enum netdump_ev); typedef int netdump_transmit_t(struct ifnet *, struct mbuf *); typedef int netdump_poll_t(struct ifnet *, int); struct netdump_methods { netdump_init_t *nd_init; netdump_event_t *nd_event; netdump_transmit_t *nd_transmit; netdump_poll_t *nd_poll; }; #define NETDUMP_DEFINE(driver) \ static netdump_init_t driver##_netdump_init; \ static netdump_event_t driver##_netdump_event; \ static netdump_transmit_t driver##_netdump_transmit; \ static netdump_poll_t driver##_netdump_poll; \ \ static struct netdump_methods driver##_netdump_methods = { \ .nd_init = driver##_netdump_init, \ .nd_event = driver##_netdump_event, \ .nd_transmit = driver##_netdump_transmit, \ .nd_poll = driver##_netdump_poll, \ } #define NETDUMP_REINIT(ifp) netdump_reinit(ifp) #define NETDUMP_SET(ifp, driver) \ (ifp)->if_netdump_methods = &driver##_netdump_methods #else /* !NETDUMP */ #define NETDUMP_DEFINE(driver) #define NETDUMP_REINIT(ifp) #define NETDUMP_SET(ifp, driver) #endif /* NETDUMP */ #endif /* _KERNEL */ #endif /* _NETINET_NETDUMP_H_ */ Index: user/markj/netdump/sys/netinet/netdump/netdump_client.c =================================================================== --- user/markj/netdump/sys/netinet/netdump/netdump_client.c (revision 332406) +++ user/markj/netdump/sys/netinet/netdump/netdump_client.c (revision 332407) @@ -1,1260 +1,1260 @@ /*- * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved. * Copyright (c) 2000 Darrell Anderson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * netdump_client.c * FreeBSD subsystem supporting netdump network dumps. * A dedicated server must be running to accept client dumps. */ #include __FBSDID("$FreeBSD$"); #include "opt_netdump.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NETDUMP_DEBUG #define NETDDEBUG(f, ...) \ printf(("%s: " f), __func__, ## __VA_ARGS__) #define NETDDEBUG_IF(i, f, ...) \ if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__) #if NETDUMP_DEBUG > 1 #define NETDDEBUGV(f, ...) \ printf(("%s: " f), __func__, ## __VA_ARGS__) #define NETDDEBUGV_IF(i, f, ...) \ if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__) #else #define NETDDEBUGV(f, ...) #define NETDDEBUGV_IF(i, f, ...) #endif #else #define NETDDEBUG(f, ...) #define NETDDEBUG_IF(i, f, ...) #define NETDDEBUGV(f, ...) #define NETDDEBUGV_IF(i, f, ...) #endif static int netdump_arp_gw(void); static void netdump_cleanup(void); static int netdump_configure(struct netdump_conf *); static int netdump_dumper(void *priv __unused, void *virtual, vm_offset_t physical __unused, off_t offset, size_t length); static int netdump_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst, u_short etype); static void netdump_handle_arp(struct mbuf **mb); static void netdump_handle_ip(struct mbuf **mb); static int netdump_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, int flags __unused, struct thread *td); static int netdump_modevent(module_t mod, int type, void *priv); static void netdump_network_poll(void); static void netdump_pkt_in(struct ifnet *ifp, struct mbuf *m); static int netdump_send(uint32_t type, off_t offset, unsigned char *data, uint32_t datalen); static int netdump_send_arp(in_addr_t dst); static int netdump_start(struct dumperinfo *di); static int netdump_udp_output(struct mbuf *m); /* Must be at least as big as the chunks dumpsys() gives us. */ static unsigned char nd_buf[MAXDUMPPGS * PAGE_SIZE]; static uint32_t nd_seqno; static int dump_failed, have_gw_mac; static void (*drv_if_input)(struct ifnet *, struct mbuf *); static int restore_gw_addr; static uint64_t rcvd_acks; CTASSERT(sizeof(rcvd_acks) * NBBY == NETDUMP_MAX_IN_FLIGHT); /* * Times to poll the NIC (0.5ms each poll) before assuming packetloss * occurred (default to 1s). */ static int nd_polls = 2000; /* Times to retransmit lost packets. */ static int nd_retries = 10; /* Number of ARP retries. */ static int nd_arp_retries = 3; /* Configuration parameters. */ static struct netdump_conf nd_conf; #define nd_server nd_conf.ndc_server #define nd_client nd_conf.ndc_client #define nd_gateway nd_conf.ndc_gateway /* General dynamic settings. */ static struct ether_addr nd_gw_mac; static struct ifnet *nd_ifp; static uint16_t nd_server_port = NETDUMP_PORT; FEATURE(netdump, "Netdump client support"); static SYSCTL_NODE(_net, OID_AUTO, netdump, CTLFLAG_RD, NULL, "netdump parameters"); static int nd_enabled; SYSCTL_INT(_net_netdump, OID_AUTO, enabled, CTLFLAG_RD, &nd_enabled, 0, "netdump configuration status"); static char nd_path[MAXPATHLEN]; SYSCTL_STRING(_net_netdump, OID_AUTO, path, CTLFLAG_RW, nd_path, sizeof(nd_path), "Server path for output files"); /* * Checks for netdump support on a network interface * * Parameters: * ifp The network interface that is being tested for support * * Returns: * int 1 if the interface is supported, 0 if not */ static bool netdump_supported_nic(struct ifnet *ifp) { return (ifp->if_netdump_methods != NULL); } /*- * Network specific primitives. * Following down the code they are divided ordered as: * - Packet buffer primitives * - Output primitives * - Input primitives * - Polling primitives */ /* * Handles creation of the ethernet header, then places outgoing packets into * the tx buffer for the NIC * * Parameters: * m The mbuf containing the packet to be sent (will be freed by * this function or the NIC driver) * ifp The interface to send on * dst The destination ethernet address (source address will be looked * up using ifp) * etype The ETHERTYPE_* value for the protocol that is being sent * * Returns: * int see errno.h, 0 for success */ static int netdump_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst, u_short etype) { struct ether_header *eh; if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) || (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) { if_printf(ifp, "netdump_ether_output: interface isn't up\n"); m_freem(m); return (ENETDOWN); } /* Fill in the ethernet header. */ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); if (m == NULL) { printf("%s: out of mbufs\n", __func__); return (ENOBUFS); } eh = mtod(m, struct ether_header *); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN); eh->ether_type = htons(etype); return ((ifp->if_netdump_methods->nd_transmit)(ifp, m)); } /* * Unreliable transmission of an mbuf chain to the netdump server * Note: can't handle fragmentation; fails if the packet is larger than * nd_ifp->if_mtu after adding the UDP/IP headers * * Parameters: * m mbuf chain * * Returns: * int see errno.h, 0 for success */ static int netdump_udp_output(struct mbuf *m) { struct udpiphdr *ui; struct ip *ip; MPASS(nd_ifp != NULL); M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT); if (m == NULL) { printf("%s: out of mbufs\n", __func__); return (ENOBUFS); } if (m->m_pkthdr.len > nd_ifp->if_mtu) { printf("netdump_udp_output: Packet is too big: %d > MTU %u\n", m->m_pkthdr.len, nd_ifp->if_mtu); m_freem(m); return (ENOBUFS); } ui = mtod(m, struct udpiphdr *); bzero(ui->ui_x1, sizeof(ui->ui_x1)); ui->ui_pr = IPPROTO_UDP; ui->ui_len = htons(m->m_pkthdr.len - sizeof(struct ip)); ui->ui_ulen = ui->ui_len; ui->ui_src = nd_client; ui->ui_dst = nd_server; /* Use this src port so that the server can connect() the socket */ ui->ui_sport = htons(NETDUMP_ACKPORT); ui->ui_dport = htons(nd_server_port); ui->ui_sum = 0; if ((ui->ui_sum = in_cksum(m, m->m_pkthdr.len)) == 0) ui->ui_sum = 0xffff; ip = mtod(m, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(struct ip) >> 2; ip->ip_tos = 0; ip->ip_len = htons(m->m_pkthdr.len); ip->ip_id = 0; ip->ip_off = htons(IP_DF); ip->ip_ttl = 255; ip->ip_sum = 0; ip->ip_sum = in_cksum(m, sizeof(struct ip)); return (netdump_ether_output(m, nd_ifp, nd_gw_mac, ETHERTYPE_IP)); } /* * Builds and sends a single ARP request to locate the server * * Return value: * 0 on success * errno on error */ static int netdump_send_arp(in_addr_t dst) { struct ether_addr bcast; struct mbuf *m; struct arphdr *ah; int pktlen; MPASS(nd_ifp != NULL); /* Fill-up a broadcast address. */ memset(&bcast, 0xFF, ETHER_ADDR_LEN); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { printf("netdump_send_arp: Out of mbufs\n"); return (ENOBUFS); } pktlen = arphdr_len2(ETHER_ADDR_LEN, sizeof(struct in_addr)); m->m_len = pktlen; m->m_pkthdr.len = pktlen; MH_ALIGN(m, pktlen); ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_ETHER); ah->ar_pro = htons(ETHERTYPE_IP); ah->ar_hln = ETHER_ADDR_LEN; ah->ar_pln = sizeof(struct in_addr); ah->ar_op = htons(ARPOP_REQUEST); memcpy(ar_sha(ah), IF_LLADDR(nd_ifp), ETHER_ADDR_LEN); ((struct in_addr *)ar_spa(ah))->s_addr = nd_client.s_addr; bzero(ar_tha(ah), ETHER_ADDR_LEN); ((struct in_addr *)ar_tpa(ah))->s_addr = dst; return (netdump_ether_output(m, nd_ifp, bcast, ETHERTYPE_ARP)); } /* * Sends ARP requests to locate the server and waits for a response. * We first try to ARP the server itself, and fall back to the provided * gateway if the server appears to be off-link. * * Return value: * 0 on success * errno on error */ static int netdump_arp_gw(void) { in_addr_t dst; int error, polls, retries; dst = nd_server.s_addr; restart: for (retries = 0; retries < nd_arp_retries && have_gw_mac == 0; retries++) { error = netdump_send_arp(dst); if (error != 0) return (error); for (polls = 0; polls < nd_polls && have_gw_mac == 0; polls++) { netdump_network_poll(); DELAY(500); } if (have_gw_mac == 0) printf("(ARP retry)"); } if (have_gw_mac != 0) return (0); if (dst == nd_server.s_addr && nd_server.s_addr != nd_gateway.s_addr) { printf("Failed to ARP server, trying to reach gateway...\n"); dst = nd_gateway.s_addr; goto restart; } printf("\nARP timed out.\n"); return (ETIMEDOUT); } /* * Dummy free function for EXT_NETDUMP clusters. */ static void netdump_mbuf_free(struct mbuf *m __unused) { } /* * Construct and reliably send a netdump packet. May fail from a resource * shortage or extreme number of unacknowledged retransmissions. Wait for * an acknowledgement before returning. Splits packets into chunks small * enough to be sent without fragmentation (looks up the interface MTU) * * Parameters: * type netdump packet type (HERALD, FINISHED, or VMCORE) * offset vmcore data offset (bytes) * data vmcore data * datalen vmcore data size (bytes) * * Returns: * int see errno.h, 0 for success */ static int netdump_send(uint32_t type, off_t offset, unsigned char *data, uint32_t datalen) { struct netdump_msg_hdr *nd_msg_hdr; struct mbuf *m, *m2; uint64_t want_acks; uint32_t i, pktlen, sent_so_far; int retries, polls, error; want_acks = 0; rcvd_acks = 0; retries = 0; MPASS(nd_ifp != NULL); retransmit: /* Chunks can be too big to fit in packets. */ for (i = sent_so_far = 0; sent_so_far < datalen || (i == 0 && datalen == 0); i++) { pktlen = datalen - sent_so_far; /* First bound: the packet structure. */ pktlen = min(pktlen, NETDUMP_DATASIZE); /* Second bound: the interface MTU (assume no IP options). */ pktlen = min(pktlen, nd_ifp->if_mtu - sizeof(struct udpiphdr) - sizeof(struct netdump_msg_hdr)); /* * Check if it is retransmitting and this has been ACKed * already. */ if ((rcvd_acks & (1 << i)) != 0) { sent_so_far += pktlen; continue; } /* * Get and fill a header mbuf, then chain data as an extended * mbuf. */ m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { printf("netdump_send: Out of mbufs\n"); return (ENOBUFS); } m->m_len = sizeof(struct netdump_msg_hdr); m->m_pkthdr.len = sizeof(struct netdump_msg_hdr); MH_ALIGN(m, sizeof(struct netdump_msg_hdr)); nd_msg_hdr = mtod(m, struct netdump_msg_hdr *); nd_msg_hdr->mh_seqno = htonl(nd_seqno + i); nd_msg_hdr->mh_type = htonl(type); nd_msg_hdr->mh_offset = htobe64(offset + sent_so_far); nd_msg_hdr->mh_len = htonl(pktlen); nd_msg_hdr->mh__pad = 0; if (pktlen != 0) { m2 = m_get(M_NOWAIT, MT_DATA); if (m2 == NULL) { m_freem(m); printf("netdump_send: Out of mbufs\n"); return (ENOBUFS); } MEXTADD(m2, data + sent_so_far, pktlen, netdump_mbuf_free, NULL, NULL, 0, EXT_NETDUMP); m2->m_len = pktlen; m_cat(m, m2); m->m_pkthdr.len += pktlen; } error = netdump_udp_output(m); if (error != 0) return (error); /* Note that we're waiting for this packet in the bitfield. */ want_acks |= (1 << i); sent_so_far += pktlen; } if (i >= NETDUMP_MAX_IN_FLIGHT) printf("Warning: Sent more than %d packets (%d). " "Acknowledgements will fail unless the size of " "rcvd_acks/want_acks is increased.\n", NETDUMP_MAX_IN_FLIGHT, i); /* * Wait for acks. A *real* window would speed things up considerably. */ polls = 0; while (rcvd_acks != want_acks) { if (polls++ > nd_polls) { if (retries++ > nd_retries) return (ETIMEDOUT); printf(". "); goto retransmit; } netdump_network_poll(); DELAY(500); } nd_seqno += i; return (0); } /* * Handler for IP packets: checks their sanity and then processes any netdump * ACK packets it finds. * * It needs to replicate partially the behaviour of ip_input() and * udp_input(). * * Parameters: * mb a pointer to an mbuf * containing the packet received * Updates *mb if m_pullup et al change the pointer * Assumes the calling function will take care of freeing the mbuf */ static void netdump_handle_ip(struct mbuf **mb) { struct ip *ip; struct udpiphdr *udp; struct netdump_ack *nd_ack; struct mbuf *m; int rcv_ackno; unsigned short hlen; /* IP processing. */ m = *mb; if (m->m_pkthdr.len < sizeof(struct ip)) { NETDDEBUG("dropping packet too small for IP header\n"); return; } if (m->m_len < sizeof(struct ip)) { m = m_pullup(m, sizeof(struct ip)); *mb = m; if (m == NULL) { NETDDEBUG("m_pullup failed\n"); return; } } ip = mtod(m, struct ip *); /* IP version. */ if (ip->ip_v != IPVERSION) { NETDDEBUG("bad IP version %d\n", ip->ip_v); return; } /* Header length. */ hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) { NETDDEBUG("bad IP header length (%hu)\n", hlen); return; } if (hlen > m->m_len) { m = m_pullup(m, hlen); *mb = m; if (m == NULL) { NETDDEBUG("m_pullup failed\n"); return; } ip = mtod(m, struct ip *); } /* Ignore packets with IP options. */ if (hlen > sizeof(struct ip)) { NETDDEBUG("drop packet with IP options\n"); return; } #ifdef INVARIANTS if (((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) && (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { NETDDEBUG("Bad IP header (RFC1122)\n"); return; } #endif /* Checksum. */ if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) { if ((m->m_pkthdr.csum_flags & CSUM_IP_VALID) == 0) { NETDDEBUG("bad IP checksum\n"); return; } } else { /* XXX */ ; } /* Convert fields to host byte order. */ ip->ip_len = ntohs(ip->ip_len); if (ip->ip_len < hlen) { NETDDEBUG("IP packet smaller (%hu) than header (%hu)\n", ip->ip_len, hlen); return; } if (m->m_pkthdr.len < ip->ip_len) { NETDDEBUG("IP packet bigger (%hu) than ethernet packet (%d)\n", ip->ip_len, m->m_pkthdr.len); return; } if (m->m_pkthdr.len > ip->ip_len) { /* Truncate the packet to the IP length. */ if (m->m_len == m->m_pkthdr.len) { m->m_len = ip->ip_len; m->m_pkthdr.len = ip->ip_len; } else m_adj(m, ip->ip_len - m->m_pkthdr.len); } ip->ip_off = ntohs(ip->ip_off); /* Check that the source is the server's IP. */ if (ip->ip_src.s_addr != nd_server.s_addr) { NETDDEBUG("drop packet not from server (from 0x%x)\n", ip->ip_src.s_addr); return; } /* Check if the destination IP is ours. */ if (ip->ip_dst.s_addr != nd_client.s_addr) { NETDDEBUGV("drop packet not to our IP\n"); return; } if (ip->ip_p != IPPROTO_UDP) { NETDDEBUG("drop non-UDP packet\n"); return; } /* Do not deal with fragments. */ if ((ip->ip_off & (IP_MF | IP_OFFMASK)) != 0) { NETDDEBUG("drop fragmented packet\n"); return; } /* UDP custom is to have packet length not include IP header. */ ip->ip_len -= hlen; /* UDP processing. */ /* Get IP and UDP headers together, along with the netdump packet. */ if (m->m_pkthdr.len < sizeof(struct udpiphdr) + sizeof(struct netdump_ack)) { NETDDEBUG("ignoring small packet\n"); return; } if (m->m_len < sizeof(struct udpiphdr) + sizeof(struct netdump_ack)) { m = m_pullup(m, sizeof(struct udpiphdr) + sizeof(struct netdump_ack)); *mb = m; if (m == NULL) { NETDDEBUG("m_pullup failed\n"); return; } } udp = mtod(m, struct udpiphdr *); if (ntohs(udp->ui_u.uh_dport) != NETDUMP_ACKPORT) { NETDDEBUG("not on the netdump port.\n"); return; } /* Netdump processing. */ /* * Packet is meant for us. Extract the ack sequence number and the * port number if necessary. */ nd_ack = (struct netdump_ack *)(mtod(m, caddr_t) + sizeof(struct udpiphdr)); rcv_ackno = ntohl(nd_ack->na_seqno); if (nd_server_port == NETDUMP_PORT) nd_server_port = ntohs(udp->ui_u.uh_sport); if (rcv_ackno >= nd_seqno + NETDUMP_MAX_IN_FLIGHT) printf("%s: ACK %d too far in future!\n", __func__, rcv_ackno); else if (rcv_ackno >= nd_seqno) { /* We're interested in this ack. Record it. */ rcvd_acks |= 1 << (rcv_ackno - nd_seqno); } } /* * Handler for ARP packets: checks their sanity and then * 1. If the ARP is a request for our IP, respond with our MAC address * 2. If the ARP is a response from our server, record its MAC address * * It needs to replicate partially the behaviour of arpintr() and * in_arpinput(). * * Parameters: * mb a pointer to an mbuf * containing the packet received * Updates *mb if m_pullup et al change the pointer * Assumes the calling function will take care of freeing the mbuf */ static void netdump_handle_arp(struct mbuf **mb) { char buf[INET_ADDRSTRLEN]; struct in_addr isaddr, itaddr, myaddr; struct ether_addr dst; struct mbuf *m; struct arphdr *ah; struct ifnet *ifp; uint8_t *enaddr; int req_len, op; m = *mb; ifp = m->m_pkthdr.rcvif; if (m->m_len < sizeof(struct arphdr)) { m = m_pullup(m, sizeof(struct arphdr)); *mb = m; if (m == NULL) { NETDDEBUG("runt packet: m_pullup failed\n"); return; } } ah = mtod(m, struct arphdr *); if (ntohs(ah->ar_hrd) != ARPHRD_ETHER) { NETDDEBUG("unknown hardware address 0x%2D)\n", (unsigned char *)&ah->ar_hrd, ""); return; } if (ntohs(ah->ar_pro) != ETHERTYPE_IP) { NETDDEBUG("drop ARP for unknown protocol %d\n", ntohs(ah->ar_pro)); return; } req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr)); if (m->m_len < req_len) { m = m_pullup(m, req_len); *mb = m; if (m == NULL) { NETDDEBUG("runt packet: m_pullup failed\n"); return; } } ah = mtod(m, struct arphdr *); op = ntohs(ah->ar_op); memcpy(&isaddr, ar_spa(ah), sizeof(isaddr)); memcpy(&itaddr, ar_tpa(ah), sizeof(itaddr)); enaddr = (uint8_t *)IF_LLADDR(ifp); myaddr = nd_client; if (memcmp(ar_sha(ah), enaddr, ifp->if_addrlen) == 0) { NETDDEBUG("ignoring ARP from myself\n"); return; } if (isaddr.s_addr == nd_client.s_addr) { printf("%s: %*D is using my IP address %s!\n", __func__, ifp->if_addrlen, (u_char *)ar_sha(ah), ":", inet_ntoa_r(isaddr, buf)); return; } if (memcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen) == 0) { NETDDEBUG("ignoring ARP from broadcast address\n"); return; } if (op == ARPOP_REPLY) { if (isaddr.s_addr != nd_gateway.s_addr && isaddr.s_addr != nd_server.s_addr) { inet_ntoa_r(isaddr, buf); NETDDEBUG( "ignoring ARP reply from %s (not netdump server)\n", buf); return; } memcpy(nd_gw_mac.octet, ar_sha(ah), min(ah->ar_hln, ETHER_ADDR_LEN)); have_gw_mac = 1; NETDDEBUG("got server MAC address %6D\n", nd_gw_mac.octet, ":"); return; } if (op != ARPOP_REQUEST) { NETDDEBUG("ignoring ARP non-request/reply\n"); return; } if (itaddr.s_addr != nd_client.s_addr) { NETDDEBUG("ignoring ARP not to our IP\n"); return; } memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); memcpy(ar_sha(ah), enaddr, ah->ar_hln); memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln); memcpy(ar_spa(ah), &itaddr, ah->ar_pln); ah->ar_op = htons(ARPOP_REPLY); ah->ar_pro = htons(ETHERTYPE_IP); m->m_flags &= ~(M_BCAST|M_MCAST); m->m_len = arphdr_len(ah); m->m_pkthdr.len = m->m_len; memcpy(dst.octet, ar_tha(ah), ETHER_ADDR_LEN); netdump_ether_output(m, ifp, dst, ETHERTYPE_ARP); *mb = NULL; } /* * Handler for incoming packets directly from the network adapter * Identifies the packet type (IP or ARP) and passes it along to one of the * helper functions netdump_handle_ip or netdump_handle_arp. * * It needs to replicate partially the behaviour of ether_input() and * ether_demux(). * * Parameters: * ifp the interface the packet came from (should be nd_ifp) * m an mbuf containing the packet received */ static void netdump_pkt_in(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; u_short etype; /* Ethernet processing. */ if ((m->m_flags & M_PKTHDR) == 0) { NETDDEBUG_IF(ifp, "discard frame without packet header\n"); goto done; } if (m->m_len < ETHER_HDR_LEN) { NETDDEBUG_IF(ifp, "discard frame without leading eth header (len %u pktlen %u)\n", m->m_len, m->m_pkthdr.len); goto done; } if ((m->m_flags & M_HASFCS) != 0) { m_adj(m, -ETHER_CRC_LEN); m->m_flags &= ~M_HASFCS; } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) { NETDDEBUG_IF(ifp, "ignoring vlan packets\n"); goto done; } /* XXX: Probably must also check if we're the recipient MAC address. */ /* Done ethernet processing. Strip off the ethernet header. */ m_adj(m, ETHER_HDR_LEN); switch (etype) { case ETHERTYPE_ARP: netdump_handle_arp(&m); break; case ETHERTYPE_IP: netdump_handle_ip(&m); break; default: NETDDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype); break; } done: if (m != NULL) m_freem(m); } /* * After trapping, instead of assuming that most of the network stack is sane, * we just poll the driver directly for packets. */ static void netdump_network_poll(void) { MPASS(nd_ifp != NULL); nd_ifp->if_netdump_methods->nd_poll(nd_ifp, 1000); } /*- * Dumping specific primitives. */ /* * Callback from dumpsys() to dump a chunk of memory. * Copies it out to our static buffer then sends it across the network. * Detects the initial KDH and makes sure it is given a special packet type. * * Parameters: * priv Unused. Optional private pointer. * virtual Virtual address (where to read the data from) * physical Unused. Physical memory address. * offset Offset from start of core file * length Data length * * Return value: * 0 on success * errno on error */ static int netdump_dumper(void *priv __unused, void *virtual, vm_offset_t physical __unused, off_t offset, size_t length) { int error; NETDDEBUGV("netdump_dumper(NULL, %p, NULL, %ju, %zu)\n", virtual, (uintmax_t)offset, length); if (virtual == NULL) { if (dump_failed != 0) printf("failed to dump the kernel core\n"); else if (netdump_send(NETDUMP_FINISHED, 0, NULL, 0) != 0) printf("failed to close the transaction\n"); else printf("\nnetdump finished.\n"); netdump_cleanup(); return (0); } if (length > sizeof(nd_buf)) return (ENOSPC); memmove(nd_buf, virtual, length); error = netdump_send(NETDUMP_VMCORE, offset, nd_buf, length); if (error != 0) { dump_failed = 1; return (error); } return (0); } /* * Perform any initalization needed prior to transmitting the kernel core. */ static int netdump_start(struct dumperinfo *di) { char *path; char buf[INET_ADDRSTRLEN]; uint32_t len; int error; error = 0; /* Check if the dumping is allowed to continue. */ if (nd_enabled == 0) return (EINVAL); MPASS(nd_ifp != NULL); if (nd_server.s_addr == INADDR_ANY) { printf("netdump_start: can't netdump; no server IP given\n"); return (EINVAL); } if (nd_client.s_addr == INADDR_ANY) { printf("netdump_start: can't netdump; no client IP given\n"); return (EINVAL); } /* We start dumping at offset 0. */ di->dumpoff = 0; nd_seqno = 1; /* * nd_server_port could have switched after the first ack the * first time it gets called. Adjust it accordingly. */ nd_server_port = NETDUMP_PORT; /* Switch to the netdump mbuf zones. */ netdump_mbuf_dump(); nd_ifp->if_netdump_methods->nd_event(nd_ifp, NETDUMP_START); /* Make the card use *our* receive callback. */ drv_if_input = nd_ifp->if_input; nd_ifp->if_input = netdump_pkt_in; if (nd_gateway.s_addr == INADDR_ANY) { restore_gw_addr = 1; nd_gateway.s_addr = nd_server.s_addr; } printf("netdump in progress. searching for server...\n"); if (netdump_arp_gw()) { printf("failed to locate server MAC address\n"); error = EINVAL; goto trig_abort; } if (nd_path[0] != '\0') { path = nd_path; len = strlen(path) + 1; } else { path = NULL; len = 0; } if (netdump_send(NETDUMP_HERALD, 0, path, len) != 0) { printf("failed to contact netdump server\n"); error = EINVAL; goto trig_abort; } printf("netdumping to %s (%6D)\n", inet_ntoa_r(nd_server, buf), nd_gw_mac.octet, ":"); return (0); trig_abort: netdump_cleanup(); return (error); } static int netdump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh, void *key, uint32_t keysize) { int error; memcpy(nd_buf, kdh, sizeof(*kdh)); error = netdump_send(NETDUMP_KDH, 0, nd_buf, sizeof(*kdh)); if (error == 0 && keysize > 0) { if (keysize > sizeof(nd_buf)) return (EINVAL); memcpy(nd_buf, key, keysize); error = netdump_send(NETDUMP_EKCD_KEY, 0, nd_buf, keysize); } return (error); } /* * Cleanup routine for a possibly failed netdump. */ static void netdump_cleanup(void) { if (restore_gw_addr != 0) { nd_gateway.s_addr = INADDR_ANY; restore_gw_addr = 0; } if (drv_if_input != NULL) { nd_ifp->if_input = drv_if_input; drv_if_input = NULL; } nd_ifp->if_netdump_methods->nd_event(nd_ifp, NETDUMP_END); } /*- * KLD specific code. */ static struct cdevsw netdump_cdevsw = { .d_version = D_VERSION, .d_ioctl = netdump_ioctl, .d_name = "netdump", }; static struct cdev *netdump_cdev; static int netdump_configure(struct netdump_conf *conf) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strcmp(ifp->if_xname, conf->ndc_iface) == 0) break; } /* XXX ref */ IFNET_RUNLOCK_NOSLEEP(); if (ifp == NULL) { printf("netdump: unknown interface '%s'\n", conf->ndc_iface); return (1); } else if (!netdump_supported_nic(ifp) || ifp->if_type != IFT_ETHER) { printf("netdump: unsupported interface '%s'\n", conf->ndc_iface); return (1); } nd_ifp = ifp; netdump_reinit(ifp); memcpy(&nd_conf, conf, sizeof(nd_conf)); nd_enabled = 1; return (0); } /* * Reinitialize the mbuf pool used by drivers while dumping. This is called * from the generic ioctl handler for SIOCSIFMTU after the driver has * reconfigured itself. */ void netdump_reinit(struct ifnet *ifp) { - int clsize, nmbuf, nclust, nrxr; + int clsize, nmbuf, ncl, nrxr; if (ifp != nd_ifp) return; - ifp->if_netdump_methods->nd_init(ifp, &nrxr, &clsize); + ifp->if_netdump_methods->nd_init(ifp, &nrxr, &ncl, &clsize); KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr)); /* * We need two headers per message on the transmit side. Multiply by * four to give us some breathing room. */ - nmbuf = NETDUMP_MAX_IN_FLIGHT * (4 + nrxr); - nclust = NETDUMP_MAX_IN_FLIGHT * nrxr; - netdump_mbuf_reinit(nmbuf, nclust, clsize); + nmbuf = ncl * (4 + nrxr); + ncl *= nrxr; + netdump_mbuf_reinit(nmbuf, ncl, clsize); } /* * ioctl(2) handler for the netdump device. This is currently only used to * register netdump as a dump device. * * Parameters: * dev, Unused. * cmd, The ioctl to be handled. * addr, The parameter for the ioctl. * flags, Unused. * td, The thread invoking this ioctl. * * Returns: * 0 on success, and an errno value on failure. */ static int netdump_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, int flags __unused, struct thread *td) { struct dumperinfo dumper; struct netdump_conf *conf; int error; u_int u; error = 0; switch (cmd) { case DIOCSKERNELDUMP: u = *(u_int *)addr; if (u != 0) { error = ENXIO; break; } if (nd_enabled) { nd_enabled = 0; netdump_mbuf_drain(); } break; case NETDUMPGCONF: conf = (struct netdump_conf *)addr; if (!nd_enabled) { error = ENXIO; break; } strlcpy(conf->ndc_iface, nd_ifp->if_xname, sizeof(conf->ndc_iface)); memcpy(&conf->ndc_server, &nd_server, sizeof(nd_server)); memcpy(&conf->ndc_client, &nd_client, sizeof(nd_client)); memcpy(&conf->ndc_gateway, &nd_gateway, sizeof(nd_gateway)); break; case NETDUMPSCONF: conf = (struct netdump_conf *)addr; if (conf->ndc_kda.kda_enable == 0) { if (nd_enabled) { error = clear_dumper(td); if (error == 0) nd_enabled = 0; } break; } if (netdump_configure(conf) != 0) { error = EINVAL; break; } dumper.dumper_start = netdump_start; dumper.dumper_hdr = netdump_write_headers; dumper.dumper = netdump_dumper; dumper.priv = NULL; dumper.blocksize = NETDUMP_DATASIZE; dumper.maxiosize = MAXDUMPPGS * PAGE_SIZE; dumper.mediaoffset = 0; dumper.mediasize = 0; error = set_dumper(&dumper, conf->ndc_iface, td, conf->ndc_kda.kda_compression, conf->ndc_kda.kda_encryption, conf->ndc_kda.kda_key, conf->ndc_kda.kda_encryptedkeysize, conf->ndc_kda.kda_encryptedkey); if (error != 0) nd_enabled = 0; break; default: error = EINVAL; break; } return (error); } /* * Called upon system init or kld load. Initializes the netdump parameters to * sane defaults (locates the first available NIC and uses the first IPv4 IP on * that card as the client IP). Leaves the server IP unconfigured. * * Parameters: * mod, Unused. * what, The module event type. * priv, Unused. * * Returns: * int, An errno value if an error occured, 0 otherwise. */ static int netdump_modevent(module_t mod __unused, int what, void *priv __unused) { struct netdump_conf conf; char *arg; int error; error = 0; switch (what) { case MOD_LOAD: error = make_dev_p(MAKEDEV_WAITOK, &netdump_cdev, &netdump_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "netdump"); if (error != 0) return (error); if ((arg = kern_getenv("net.dump.iface")) != NULL) { strlcpy(conf.ndc_iface, arg, sizeof(conf.ndc_iface)); freeenv(arg); if ((arg = kern_getenv("net.dump.server")) != NULL) { inet_aton(arg, &conf.ndc_server); freeenv(arg); } if ((arg = kern_getenv("net.dump.client")) != NULL) { inet_aton(arg, &conf.ndc_server); freeenv(arg); } if ((arg = kern_getenv("net.dump.gateway")) != NULL) { inet_aton(arg, &conf.ndc_server); freeenv(arg); } /* Ignore errors; we print a message to the console. */ (void)netdump_configure(&conf); } break; case MOD_UNLOAD: destroy_dev(netdump_cdev); if (nd_enabled) { printf("netdump: disabling dump device for unload\n"); (void)clear_dumper(curthread); nd_enabled = 0; } break; default: error = EOPNOTSUPP; break; } return (error); } static moduledata_t netdump_mod = { "netdump", netdump_modevent, NULL, }; MODULE_VERSION(netdump, 1); DECLARE_MODULE(netdump, netdump_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);