Index: user/markj/netdump/sys/dev/alc/if_alc.c =================================================================== --- user/markj/netdump/sys/dev/alc/if_alc.c (revision 331674) +++ user/markj/netdump/sys/dev/alc/if_alc.c (revision 331675) @@ -1,4715 +1,4712 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009, Pyun YongHyeon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* Driver for Atheros AR813x/AR815x PCIe Ethernet. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" #undef ALC_USE_CUSTOM_CSUM #ifdef ALC_USE_CUSTOM_CSUM #define ALC_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) #else #define ALC_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP) #endif MODULE_DEPEND(alc, pci, 1, 1, 1); MODULE_DEPEND(alc, ether, 1, 1, 1); MODULE_DEPEND(alc, miibus, 1, 1, 1); /* Tunables. */ static int msi_disable = 0; static int msix_disable = 0; TUNABLE_INT("hw.alc.msi_disable", &msi_disable); TUNABLE_INT("hw.alc.msix_disable", &msix_disable); /* * Devices supported by this driver. */ static struct alc_ident alc_ident_table[] = { { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8131, 9 * 1024, "Atheros AR8131 PCIe Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8132, 9 * 1024, "Atheros AR8132 PCIe Fast Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8151, 6 * 1024, "Atheros AR8151 v1.0 PCIe Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8151_V2, 6 * 1024, "Atheros AR8151 v2.0 PCIe Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8152_B, 6 * 1024, "Atheros AR8152 v1.1 PCIe Fast Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8152_B2, 6 * 1024, "Atheros AR8152 v2.0 PCIe Fast Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8161, 9 * 1024, "Atheros AR8161 PCIe Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8162, 9 * 1024, "Atheros AR8162 PCIe Fast Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8171, 9 * 1024, "Atheros AR8171 PCIe Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_AR8172, 9 * 1024, "Atheros AR8172 PCIe Fast Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_E2200, 9 * 1024, "Killer E2200 Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_E2400, 9 * 1024, "Killer E2400 Gigabit Ethernet" }, { VENDORID_ATHEROS, DEVICEID_ATHEROS_E2500, 9 * 1024, "Killer E2500 Gigabit Ethernet" }, { 0, 0, 0, NULL} }; static void alc_aspm(struct alc_softc *, int, int); static void alc_aspm_813x(struct alc_softc *, int); static void alc_aspm_816x(struct alc_softc *, int); static int alc_attach(device_t); static int alc_check_boundary(struct alc_softc *); static void alc_config_msi(struct alc_softc *); static int alc_detach(device_t); static void alc_disable_l0s_l1(struct alc_softc *); static int alc_dma_alloc(struct alc_softc *); static void alc_dma_free(struct alc_softc *); static void alc_dmamap_cb(void *, bus_dma_segment_t *, int, int); static void alc_dsp_fixup(struct alc_softc *, int); static int alc_encap(struct alc_softc *, struct mbuf **); static struct alc_ident * alc_find_ident(device_t); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * alc_fixup_rx(struct ifnet *, struct mbuf *); #endif static void alc_get_macaddr(struct alc_softc *); static void alc_get_macaddr_813x(struct alc_softc *); static void alc_get_macaddr_816x(struct alc_softc *); static void alc_get_macaddr_par(struct alc_softc *); static void alc_init(void *); static void alc_init_cmb(struct alc_softc *); static void alc_init_locked(struct alc_softc *); static void alc_init_rr_ring(struct alc_softc *); static int alc_init_rx_ring(struct alc_softc *); static void alc_init_smb(struct alc_softc *); static void alc_init_tx_ring(struct alc_softc *); static void alc_int_task(void *, int); static int alc_intr(void *); static int alc_ioctl(struct ifnet *, u_long, caddr_t); static void alc_mac_config(struct alc_softc *); static uint32_t alc_mii_readreg_813x(struct alc_softc *, int, int); static uint32_t alc_mii_readreg_816x(struct alc_softc *, int, int); static uint32_t alc_mii_writereg_813x(struct alc_softc *, int, int, int); static uint32_t alc_mii_writereg_816x(struct alc_softc *, int, int, int); static int alc_miibus_readreg(device_t, int, int); static void alc_miibus_statchg(device_t); static int alc_miibus_writereg(device_t, int, int, int); static uint32_t alc_miidbg_readreg(struct alc_softc *, int); static uint32_t alc_miidbg_writereg(struct alc_softc *, int, int); static uint32_t alc_miiext_readreg(struct alc_softc *, int, int); static uint32_t alc_miiext_writereg(struct alc_softc *, int, int, int); static int alc_mediachange(struct ifnet *); static int alc_mediachange_locked(struct alc_softc *); static void alc_mediastatus(struct ifnet *, struct ifmediareq *); static int alc_newbuf(struct alc_softc *, struct alc_rxdesc *); static void alc_osc_reset(struct alc_softc *); static void alc_phy_down(struct alc_softc *); static void alc_phy_reset(struct alc_softc *); static void alc_phy_reset_813x(struct alc_softc *); static void alc_phy_reset_816x(struct alc_softc *); static int alc_probe(device_t); static void alc_reset(struct alc_softc *); static int alc_resume(device_t); static void alc_rxeof(struct alc_softc *, struct rx_rdesc *); static int alc_rxintr(struct alc_softc *, int); static void alc_rxfilter(struct alc_softc *); static void alc_rxvlan(struct alc_softc *); static void alc_setlinkspeed(struct alc_softc *); static void alc_setwol(struct alc_softc *); static void alc_setwol_813x(struct alc_softc *); static void alc_setwol_816x(struct alc_softc *); static int alc_shutdown(device_t); static void alc_start(struct ifnet *); static void alc_start_locked(struct ifnet *); static void alc_start_queue(struct alc_softc *); static void alc_start_tx(struct alc_softc *); static void alc_stats_clear(struct alc_softc *); static void alc_stats_update(struct alc_softc *); static void alc_stop(struct alc_softc *); static void alc_stop_mac(struct alc_softc *); static void alc_stop_queue(struct alc_softc *); static int alc_suspend(device_t); static void alc_sysctl_node(struct alc_softc *); static void alc_tick(void *); static void alc_txeof(struct alc_softc *); static void alc_watchdog(struct alc_softc *); static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int, int); static int sysctl_hw_alc_proc_limit(SYSCTL_HANDLER_ARGS); static int sysctl_hw_alc_int_mod(SYSCTL_HANDLER_ARGS); NETDUMP_DEFINE(alc); static device_method_t alc_methods[] = { /* Device interface. */ DEVMETHOD(device_probe, alc_probe), DEVMETHOD(device_attach, alc_attach), DEVMETHOD(device_detach, alc_detach), DEVMETHOD(device_shutdown, alc_shutdown), DEVMETHOD(device_suspend, alc_suspend), DEVMETHOD(device_resume, alc_resume), /* MII interface. */ DEVMETHOD(miibus_readreg, alc_miibus_readreg), DEVMETHOD(miibus_writereg, alc_miibus_writereg), DEVMETHOD(miibus_statchg, alc_miibus_statchg), DEVMETHOD_END }; static driver_t alc_driver = { "alc", alc_methods, sizeof(struct alc_softc) }; static devclass_t alc_devclass; DRIVER_MODULE(alc, pci, alc_driver, alc_devclass, 0, 0); DRIVER_MODULE(miibus, alc, miibus_driver, miibus_devclass, 0, 0); static struct resource_spec alc_res_spec_mem[] = { { SYS_RES_MEMORY, PCIR_BAR(0), RF_ACTIVE }, { -1, 0, 0 } }; static struct resource_spec alc_irq_spec_legacy[] = { { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE }, { -1, 0, 0 } }; static struct resource_spec alc_irq_spec_msi[] = { { SYS_RES_IRQ, 1, RF_ACTIVE }, { -1, 0, 0 } }; static struct resource_spec alc_irq_spec_msix[] = { { SYS_RES_IRQ, 1, RF_ACTIVE }, { -1, 0, 0 } }; static uint32_t alc_dma_burst[] = { 128, 256, 512, 1024, 2048, 4096, 0, 0 }; static int alc_miibus_readreg(device_t dev, int phy, int reg) { struct alc_softc *sc; int v; sc = device_get_softc(dev); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) v = alc_mii_readreg_816x(sc, phy, reg); else v = alc_mii_readreg_813x(sc, phy, reg); return (v); } static uint32_t alc_mii_readreg_813x(struct alc_softc *sc, int phy, int reg) { uint32_t v; int i; /* * For AR8132 fast ethernet controller, do not report 1000baseT * capability to mii(4). Even though AR8132 uses the same * model/revision number of F1 gigabit PHY, the PHY has no * ability to establish 1000baseT link. */ if ((sc->alc_flags & ALC_FLAG_FASTETHER) != 0 && reg == MII_EXTSR) return (0); CSR_WRITE_4(sc, ALC_MDIO, MDIO_OP_EXECUTE | MDIO_OP_READ | MDIO_SUP_PREAMBLE | MDIO_CLK_25_4 | MDIO_REG_ADDR(reg)); for (i = ALC_PHY_TIMEOUT; i > 0; i--) { DELAY(5); v = CSR_READ_4(sc, ALC_MDIO); if ((v & (MDIO_OP_EXECUTE | MDIO_OP_BUSY)) == 0) break; } if (i == 0) { device_printf(sc->alc_dev, "phy read timeout : %d\n", reg); return (0); } return ((v & MDIO_DATA_MASK) >> MDIO_DATA_SHIFT); } static uint32_t alc_mii_readreg_816x(struct alc_softc *sc, int phy, int reg) { uint32_t clk, v; int i; if ((sc->alc_flags & ALC_FLAG_LINK) != 0) clk = MDIO_CLK_25_128; else clk = MDIO_CLK_25_4; CSR_WRITE_4(sc, ALC_MDIO, MDIO_OP_EXECUTE | MDIO_OP_READ | MDIO_SUP_PREAMBLE | clk | MDIO_REG_ADDR(reg)); for (i = ALC_PHY_TIMEOUT; i > 0; i--) { DELAY(5); v = CSR_READ_4(sc, ALC_MDIO); if ((v & MDIO_OP_BUSY) == 0) break; } if (i == 0) { device_printf(sc->alc_dev, "phy read timeout : %d\n", reg); return (0); } return ((v & MDIO_DATA_MASK) >> MDIO_DATA_SHIFT); } static int alc_miibus_writereg(device_t dev, int phy, int reg, int val) { struct alc_softc *sc; int v; sc = device_get_softc(dev); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) v = alc_mii_writereg_816x(sc, phy, reg, val); else v = alc_mii_writereg_813x(sc, phy, reg, val); return (v); } static uint32_t alc_mii_writereg_813x(struct alc_softc *sc, int phy, int reg, int val) { uint32_t v; int i; CSR_WRITE_4(sc, ALC_MDIO, MDIO_OP_EXECUTE | MDIO_OP_WRITE | (val & MDIO_DATA_MASK) << MDIO_DATA_SHIFT | MDIO_SUP_PREAMBLE | MDIO_CLK_25_4 | MDIO_REG_ADDR(reg)); for (i = ALC_PHY_TIMEOUT; i > 0; i--) { DELAY(5); v = CSR_READ_4(sc, ALC_MDIO); if ((v & (MDIO_OP_EXECUTE | MDIO_OP_BUSY)) == 0) break; } if (i == 0) device_printf(sc->alc_dev, "phy write timeout : %d\n", reg); return (0); } static uint32_t alc_mii_writereg_816x(struct alc_softc *sc, int phy, int reg, int val) { uint32_t clk, v; int i; if ((sc->alc_flags & ALC_FLAG_LINK) != 0) clk = MDIO_CLK_25_128; else clk = MDIO_CLK_25_4; CSR_WRITE_4(sc, ALC_MDIO, MDIO_OP_EXECUTE | MDIO_OP_WRITE | ((val & MDIO_DATA_MASK) << MDIO_DATA_SHIFT) | MDIO_REG_ADDR(reg) | MDIO_SUP_PREAMBLE | clk); for (i = ALC_PHY_TIMEOUT; i > 0; i--) { DELAY(5); v = CSR_READ_4(sc, ALC_MDIO); if ((v & MDIO_OP_BUSY) == 0) break; } if (i == 0) device_printf(sc->alc_dev, "phy write timeout : %d\n", reg); return (0); } static void alc_miibus_statchg(device_t dev) { struct alc_softc *sc; struct mii_data *mii; struct ifnet *ifp; uint32_t reg; sc = device_get_softc(dev); mii = device_get_softc(sc->alc_miibus); ifp = sc->alc_ifp; if (mii == NULL || ifp == NULL || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; sc->alc_flags &= ~ALC_FLAG_LINK; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: sc->alc_flags |= ALC_FLAG_LINK; break; case IFM_1000_T: if ((sc->alc_flags & ALC_FLAG_FASTETHER) == 0) sc->alc_flags |= ALC_FLAG_LINK; break; default: break; } } /* Stop Rx/Tx MACs. */ alc_stop_mac(sc); /* Program MACs with resolved speed/duplex/flow-control. */ if ((sc->alc_flags & ALC_FLAG_LINK) != 0) { alc_start_queue(sc); alc_mac_config(sc); /* Re-enable Tx/Rx MACs. */ reg = CSR_READ_4(sc, ALC_MAC_CFG); reg |= MAC_CFG_TX_ENB | MAC_CFG_RX_ENB; CSR_WRITE_4(sc, ALC_MAC_CFG, reg); } alc_aspm(sc, 0, IFM_SUBTYPE(mii->mii_media_active)); alc_dsp_fixup(sc, IFM_SUBTYPE(mii->mii_media_active)); } static uint32_t alc_miidbg_readreg(struct alc_softc *sc, int reg) { alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, reg); return (alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA)); } static uint32_t alc_miidbg_writereg(struct alc_softc *sc, int reg, int val) { alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, reg); return (alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, val)); } static uint32_t alc_miiext_readreg(struct alc_softc *sc, int devaddr, int reg) { uint32_t clk, v; int i; CSR_WRITE_4(sc, ALC_EXT_MDIO, EXT_MDIO_REG(reg) | EXT_MDIO_DEVADDR(devaddr)); if ((sc->alc_flags & ALC_FLAG_LINK) != 0) clk = MDIO_CLK_25_128; else clk = MDIO_CLK_25_4; CSR_WRITE_4(sc, ALC_MDIO, MDIO_OP_EXECUTE | MDIO_OP_READ | MDIO_SUP_PREAMBLE | clk | MDIO_MODE_EXT); for (i = ALC_PHY_TIMEOUT; i > 0; i--) { DELAY(5); v = CSR_READ_4(sc, ALC_MDIO); if ((v & MDIO_OP_BUSY) == 0) break; } if (i == 0) { device_printf(sc->alc_dev, "phy ext read timeout : %d, %d\n", devaddr, reg); return (0); } return ((v & MDIO_DATA_MASK) >> MDIO_DATA_SHIFT); } static uint32_t alc_miiext_writereg(struct alc_softc *sc, int devaddr, int reg, int val) { uint32_t clk, v; int i; CSR_WRITE_4(sc, ALC_EXT_MDIO, EXT_MDIO_REG(reg) | EXT_MDIO_DEVADDR(devaddr)); if ((sc->alc_flags & ALC_FLAG_LINK) != 0) clk = MDIO_CLK_25_128; else clk = MDIO_CLK_25_4; CSR_WRITE_4(sc, ALC_MDIO, MDIO_OP_EXECUTE | MDIO_OP_WRITE | ((val & MDIO_DATA_MASK) << MDIO_DATA_SHIFT) | MDIO_SUP_PREAMBLE | clk | MDIO_MODE_EXT); for (i = ALC_PHY_TIMEOUT; i > 0; i--) { DELAY(5); v = CSR_READ_4(sc, ALC_MDIO); if ((v & MDIO_OP_BUSY) == 0) break; } if (i == 0) device_printf(sc->alc_dev, "phy ext write timeout : %d, %d\n", devaddr, reg); return (0); } static void alc_dsp_fixup(struct alc_softc *sc, int media) { uint16_t agc, len, val; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) return; if (AR816X_REV(sc->alc_rev) >= AR816X_REV_C0) return; /* * Vendor PHY magic. * 1000BT/AZ, wrong cable length */ if ((sc->alc_flags & ALC_FLAG_LINK) != 0) { len = alc_miiext_readreg(sc, MII_EXT_PCS, MII_EXT_CLDCTL6); len = (len >> EXT_CLDCTL6_CAB_LEN_SHIFT) & EXT_CLDCTL6_CAB_LEN_MASK; agc = alc_miidbg_readreg(sc, MII_DBG_AGC); agc = (agc >> DBG_AGC_2_VGA_SHIFT) & DBG_AGC_2_VGA_MASK; if ((media == IFM_1000_T && len > EXT_CLDCTL6_CAB_LEN_SHORT1G && agc > DBG_AGC_LONG1G_LIMT) || (media == IFM_100_TX && len > DBG_AGC_LONG100M_LIMT && agc > DBG_AGC_LONG1G_LIMT)) { alc_miidbg_writereg(sc, MII_DBG_AZ_ANADECT, DBG_AZ_ANADECT_LONG); val = alc_miiext_readreg(sc, MII_EXT_ANEG, MII_EXT_ANEG_AFE); val |= ANEG_AFEE_10BT_100M_TH; alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_AFE, val); } else { alc_miidbg_writereg(sc, MII_DBG_AZ_ANADECT, DBG_AZ_ANADECT_DEFAULT); val = alc_miiext_readreg(sc, MII_EXT_ANEG, MII_EXT_ANEG_AFE); val &= ~ANEG_AFEE_10BT_100M_TH; alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_AFE, val); } if ((sc->alc_flags & ALC_FLAG_LINK_WAR) != 0 && AR816X_REV(sc->alc_rev) == AR816X_REV_B0) { if (media == IFM_1000_T) { /* * Giga link threshold, raise the tolerance of * noise 50%. */ val = alc_miidbg_readreg(sc, MII_DBG_MSE20DB); val &= ~DBG_MSE20DB_TH_MASK; val |= (DBG_MSE20DB_TH_HI << DBG_MSE20DB_TH_SHIFT); alc_miidbg_writereg(sc, MII_DBG_MSE20DB, val); } else if (media == IFM_100_TX) alc_miidbg_writereg(sc, MII_DBG_MSE16DB, DBG_MSE16DB_UP); } } else { val = alc_miiext_readreg(sc, MII_EXT_ANEG, MII_EXT_ANEG_AFE); val &= ~ANEG_AFEE_10BT_100M_TH; alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_AFE, val); if ((sc->alc_flags & ALC_FLAG_LINK_WAR) != 0 && AR816X_REV(sc->alc_rev) == AR816X_REV_B0) { alc_miidbg_writereg(sc, MII_DBG_MSE16DB, DBG_MSE16DB_DOWN); val = alc_miidbg_readreg(sc, MII_DBG_MSE20DB); val &= ~DBG_MSE20DB_TH_MASK; val |= (DBG_MSE20DB_TH_DEFAULT << DBG_MSE20DB_TH_SHIFT); alc_miidbg_writereg(sc, MII_DBG_MSE20DB, val); } } } static void alc_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr) { struct alc_softc *sc; struct mii_data *mii; sc = ifp->if_softc; ALC_LOCK(sc); if ((ifp->if_flags & IFF_UP) == 0) { ALC_UNLOCK(sc); return; } mii = device_get_softc(sc->alc_miibus); mii_pollstat(mii); ifmr->ifm_status = mii->mii_media_status; ifmr->ifm_active = mii->mii_media_active; ALC_UNLOCK(sc); } static int alc_mediachange(struct ifnet *ifp) { struct alc_softc *sc; int error; sc = ifp->if_softc; ALC_LOCK(sc); error = alc_mediachange_locked(sc); ALC_UNLOCK(sc); return (error); } static int alc_mediachange_locked(struct alc_softc *sc) { struct mii_data *mii; struct mii_softc *miisc; int error; ALC_LOCK_ASSERT(sc); mii = device_get_softc(sc->alc_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); error = mii_mediachg(mii); return (error); } static struct alc_ident * alc_find_ident(device_t dev) { struct alc_ident *ident; uint16_t vendor, devid; vendor = pci_get_vendor(dev); devid = pci_get_device(dev); for (ident = alc_ident_table; ident->name != NULL; ident++) { if (vendor == ident->vendorid && devid == ident->deviceid) return (ident); } return (NULL); } static int alc_probe(device_t dev) { struct alc_ident *ident; ident = alc_find_ident(dev); if (ident != NULL) { device_set_desc(dev, ident->name); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static void alc_get_macaddr(struct alc_softc *sc) { if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) alc_get_macaddr_816x(sc); else alc_get_macaddr_813x(sc); } static void alc_get_macaddr_813x(struct alc_softc *sc) { uint32_t opt; uint16_t val; int eeprom, i; eeprom = 0; opt = CSR_READ_4(sc, ALC_OPT_CFG); if ((CSR_READ_4(sc, ALC_MASTER_CFG) & MASTER_OTP_SEL) != 0 && (CSR_READ_4(sc, ALC_TWSI_DEBUG) & TWSI_DEBUG_DEV_EXIST) != 0) { /* * EEPROM found, let TWSI reload EEPROM configuration. * This will set ethernet address of controller. */ eeprom++; switch (sc->alc_ident->deviceid) { case DEVICEID_ATHEROS_AR8131: case DEVICEID_ATHEROS_AR8132: if ((opt & OPT_CFG_CLK_ENB) == 0) { opt |= OPT_CFG_CLK_ENB; CSR_WRITE_4(sc, ALC_OPT_CFG, opt); CSR_READ_4(sc, ALC_OPT_CFG); DELAY(1000); } break; case DEVICEID_ATHEROS_AR8151: case DEVICEID_ATHEROS_AR8151_V2: case DEVICEID_ATHEROS_AR8152_B: case DEVICEID_ATHEROS_AR8152_B2: alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x00); val = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, val & 0xFF7F); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x3B); val = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, val | 0x0008); DELAY(20); break; } CSR_WRITE_4(sc, ALC_LTSSM_ID_CFG, CSR_READ_4(sc, ALC_LTSSM_ID_CFG) & ~LTSSM_ID_WRO_ENB); CSR_WRITE_4(sc, ALC_WOL_CFG, 0); CSR_READ_4(sc, ALC_WOL_CFG); CSR_WRITE_4(sc, ALC_TWSI_CFG, CSR_READ_4(sc, ALC_TWSI_CFG) | TWSI_CFG_SW_LD_START); for (i = 100; i > 0; i--) { DELAY(1000); if ((CSR_READ_4(sc, ALC_TWSI_CFG) & TWSI_CFG_SW_LD_START) == 0) break; } if (i == 0) device_printf(sc->alc_dev, "reloading EEPROM timeout!\n"); } else { if (bootverbose) device_printf(sc->alc_dev, "EEPROM not found!\n"); } if (eeprom != 0) { switch (sc->alc_ident->deviceid) { case DEVICEID_ATHEROS_AR8131: case DEVICEID_ATHEROS_AR8132: if ((opt & OPT_CFG_CLK_ENB) != 0) { opt &= ~OPT_CFG_CLK_ENB; CSR_WRITE_4(sc, ALC_OPT_CFG, opt); CSR_READ_4(sc, ALC_OPT_CFG); DELAY(1000); } break; case DEVICEID_ATHEROS_AR8151: case DEVICEID_ATHEROS_AR8151_V2: case DEVICEID_ATHEROS_AR8152_B: case DEVICEID_ATHEROS_AR8152_B2: alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x00); val = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, val | 0x0080); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x3B); val = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, val & 0xFFF7); DELAY(20); break; } } alc_get_macaddr_par(sc); } static void alc_get_macaddr_816x(struct alc_softc *sc) { uint32_t reg; int i, reloaded; reloaded = 0; /* Try to reload station address via TWSI. */ for (i = 100; i > 0; i--) { reg = CSR_READ_4(sc, ALC_SLD); if ((reg & (SLD_PROGRESS | SLD_START)) == 0) break; DELAY(1000); } if (i != 0) { CSR_WRITE_4(sc, ALC_SLD, reg | SLD_START); for (i = 100; i > 0; i--) { DELAY(1000); reg = CSR_READ_4(sc, ALC_SLD); if ((reg & SLD_START) == 0) break; } if (i != 0) reloaded++; else if (bootverbose) device_printf(sc->alc_dev, "reloading station address via TWSI timed out!\n"); } /* Try to reload station address from EEPROM or FLASH. */ if (reloaded == 0) { reg = CSR_READ_4(sc, ALC_EEPROM_LD); if ((reg & (EEPROM_LD_EEPROM_EXIST | EEPROM_LD_FLASH_EXIST)) != 0) { for (i = 100; i > 0; i--) { reg = CSR_READ_4(sc, ALC_EEPROM_LD); if ((reg & (EEPROM_LD_PROGRESS | EEPROM_LD_START)) == 0) break; DELAY(1000); } if (i != 0) { CSR_WRITE_4(sc, ALC_EEPROM_LD, reg | EEPROM_LD_START); for (i = 100; i > 0; i--) { DELAY(1000); reg = CSR_READ_4(sc, ALC_EEPROM_LD); if ((reg & EEPROM_LD_START) == 0) break; } } else if (bootverbose) device_printf(sc->alc_dev, "reloading EEPROM/FLASH timed out!\n"); } } alc_get_macaddr_par(sc); } static void alc_get_macaddr_par(struct alc_softc *sc) { uint32_t ea[2]; ea[0] = CSR_READ_4(sc, ALC_PAR0); ea[1] = CSR_READ_4(sc, ALC_PAR1); sc->alc_eaddr[0] = (ea[1] >> 8) & 0xFF; sc->alc_eaddr[1] = (ea[1] >> 0) & 0xFF; sc->alc_eaddr[2] = (ea[0] >> 24) & 0xFF; sc->alc_eaddr[3] = (ea[0] >> 16) & 0xFF; sc->alc_eaddr[4] = (ea[0] >> 8) & 0xFF; sc->alc_eaddr[5] = (ea[0] >> 0) & 0xFF; } static void alc_disable_l0s_l1(struct alc_softc *sc) { uint32_t pmcfg; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { /* Another magic from vendor. */ pmcfg = CSR_READ_4(sc, ALC_PM_CFG); pmcfg &= ~(PM_CFG_L1_ENTRY_TIMER_MASK | PM_CFG_CLK_SWH_L1 | PM_CFG_ASPM_L0S_ENB | PM_CFG_ASPM_L1_ENB | PM_CFG_MAC_ASPM_CHK | PM_CFG_SERDES_PD_EX_L1); pmcfg |= PM_CFG_SERDES_BUDS_RX_L1_ENB | PM_CFG_SERDES_PLL_L1_ENB | PM_CFG_SERDES_L1_ENB; CSR_WRITE_4(sc, ALC_PM_CFG, pmcfg); } } static void alc_phy_reset(struct alc_softc *sc) { if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) alc_phy_reset_816x(sc); else alc_phy_reset_813x(sc); } static void alc_phy_reset_813x(struct alc_softc *sc) { uint16_t data; /* Reset magic from Linux. */ CSR_WRITE_2(sc, ALC_GPHY_CFG, GPHY_CFG_SEL_ANA_RESET); CSR_READ_2(sc, ALC_GPHY_CFG); DELAY(10 * 1000); CSR_WRITE_2(sc, ALC_GPHY_CFG, GPHY_CFG_EXT_RESET | GPHY_CFG_SEL_ANA_RESET); CSR_READ_2(sc, ALC_GPHY_CFG); DELAY(10 * 1000); /* DSP fixup, Vendor magic. */ if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B) { alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x000A); data = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data & 0xDFFF); } if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151_V2 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B2) { alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x003B); data = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data & 0xFFF7); DELAY(20 * 1000); } if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151) { alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x0029); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, 0x929D); } if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8131 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8132 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151_V2 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B2) { alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x0029); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, 0xB6DD); } /* Load DSP codes, vendor magic. */ data = ANA_LOOP_SEL_10BT | ANA_EN_MASK_TB | ANA_EN_10BT_IDLE | ((1 << ANA_INTERVAL_SEL_TIMER_SHIFT) & ANA_INTERVAL_SEL_TIMER_MASK); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, MII_ANA_CFG18); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); data = ((2 << ANA_SERDES_CDR_BW_SHIFT) & ANA_SERDES_CDR_BW_MASK) | ANA_SERDES_EN_DEEM | ANA_SERDES_SEL_HSP | ANA_SERDES_EN_PLL | ANA_SERDES_EN_LCKDT; alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, MII_ANA_CFG5); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); data = ((44 << ANA_LONG_CABLE_TH_100_SHIFT) & ANA_LONG_CABLE_TH_100_MASK) | ((33 << ANA_SHORT_CABLE_TH_100_SHIFT) & ANA_SHORT_CABLE_TH_100_SHIFT) | ANA_BP_BAD_LINK_ACCUM | ANA_BP_SMALL_BW; alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, MII_ANA_CFG54); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); data = ((11 << ANA_IECHO_ADJ_3_SHIFT) & ANA_IECHO_ADJ_3_MASK) | ((11 << ANA_IECHO_ADJ_2_SHIFT) & ANA_IECHO_ADJ_2_MASK) | ((8 << ANA_IECHO_ADJ_1_SHIFT) & ANA_IECHO_ADJ_1_MASK) | ((8 << ANA_IECHO_ADJ_0_SHIFT) & ANA_IECHO_ADJ_0_MASK); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, MII_ANA_CFG4); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); data = ((7 & ANA_MANUL_SWICH_ON_SHIFT) & ANA_MANUL_SWICH_ON_MASK) | ANA_RESTART_CAL | ANA_MAN_ENABLE | ANA_SEL_HSP | ANA_EN_HB | ANA_OEN_125M; alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, MII_ANA_CFG0); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); DELAY(1000); /* Disable hibernation. */ alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x0029); data = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); data &= ~0x8000; alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_ADDR, 0x000B); data = alc_miibus_readreg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA); data &= ~0x8000; alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, ALC_MII_DBG_DATA, data); } static void alc_phy_reset_816x(struct alc_softc *sc) { uint32_t val; val = CSR_READ_4(sc, ALC_GPHY_CFG); val &= ~(GPHY_CFG_EXT_RESET | GPHY_CFG_LED_MODE | GPHY_CFG_GATE_25M_ENB | GPHY_CFG_PHY_IDDQ | GPHY_CFG_PHY_PLL_ON | GPHY_CFG_PWDOWN_HW | GPHY_CFG_100AB_ENB); val |= GPHY_CFG_SEL_ANA_RESET; #ifdef notyet val |= GPHY_CFG_HIB_PULSE | GPHY_CFG_HIB_EN | GPHY_CFG_SEL_ANA_RESET; #else /* Disable PHY hibernation. */ val &= ~(GPHY_CFG_HIB_PULSE | GPHY_CFG_HIB_EN); #endif CSR_WRITE_4(sc, ALC_GPHY_CFG, val); DELAY(10); CSR_WRITE_4(sc, ALC_GPHY_CFG, val | GPHY_CFG_EXT_RESET); DELAY(800); /* Vendor PHY magic. */ #ifdef notyet alc_miidbg_writereg(sc, MII_DBG_LEGCYPS, DBG_LEGCYPS_DEFAULT); alc_miidbg_writereg(sc, MII_DBG_SYSMODCTL, DBG_SYSMODCTL_DEFAULT); alc_miiext_writereg(sc, MII_EXT_PCS, MII_EXT_VDRVBIAS, EXT_VDRVBIAS_DEFAULT); #else /* Disable PHY hibernation. */ alc_miidbg_writereg(sc, MII_DBG_LEGCYPS, DBG_LEGCYPS_DEFAULT & ~DBG_LEGCYPS_ENB); alc_miidbg_writereg(sc, MII_DBG_HIBNEG, DBG_HIBNEG_DEFAULT & ~(DBG_HIBNEG_PSHIB_EN | DBG_HIBNEG_HIB_PULSE)); alc_miidbg_writereg(sc, MII_DBG_GREENCFG, DBG_GREENCFG_DEFAULT); #endif /* XXX Disable EEE. */ val = CSR_READ_4(sc, ALC_LPI_CTL); val &= ~LPI_CTL_ENB; CSR_WRITE_4(sc, ALC_LPI_CTL, val); alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_LOCAL_EEEADV, 0); /* PHY power saving. */ alc_miidbg_writereg(sc, MII_DBG_TST10BTCFG, DBG_TST10BTCFG_DEFAULT); alc_miidbg_writereg(sc, MII_DBG_SRDSYSMOD, DBG_SRDSYSMOD_DEFAULT); alc_miidbg_writereg(sc, MII_DBG_TST100BTCFG, DBG_TST100BTCFG_DEFAULT); alc_miidbg_writereg(sc, MII_DBG_ANACTL, DBG_ANACTL_DEFAULT); val = alc_miidbg_readreg(sc, MII_DBG_GREENCFG2); val &= ~DBG_GREENCFG2_GATE_DFSE_EN; alc_miidbg_writereg(sc, MII_DBG_GREENCFG2, val); /* RTL8139C, 120m issue. */ alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_NLP78, ANEG_NLP78_120M_DEFAULT); alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_S3DIG10, ANEG_S3DIG10_DEFAULT); if ((sc->alc_flags & ALC_FLAG_LINK_WAR) != 0) { /* Turn off half amplitude. */ val = alc_miiext_readreg(sc, MII_EXT_PCS, MII_EXT_CLDCTL3); val |= EXT_CLDCTL3_BP_CABLE1TH_DET_GT; alc_miiext_writereg(sc, MII_EXT_PCS, MII_EXT_CLDCTL3, val); /* Turn off Green feature. */ val = alc_miidbg_readreg(sc, MII_DBG_GREENCFG2); val |= DBG_GREENCFG2_BP_GREEN; alc_miidbg_writereg(sc, MII_DBG_GREENCFG2, val); /* Turn off half bias. */ val = alc_miiext_readreg(sc, MII_EXT_PCS, MII_EXT_CLDCTL5); val |= EXT_CLDCTL5_BP_VD_HLFBIAS; alc_miiext_writereg(sc, MII_EXT_PCS, MII_EXT_CLDCTL5, val); } } static void alc_phy_down(struct alc_softc *sc) { uint32_t gphy; switch (sc->alc_ident->deviceid) { case DEVICEID_ATHEROS_AR8161: case DEVICEID_ATHEROS_E2200: case DEVICEID_ATHEROS_E2400: case DEVICEID_ATHEROS_E2500: case DEVICEID_ATHEROS_AR8162: case DEVICEID_ATHEROS_AR8171: case DEVICEID_ATHEROS_AR8172: gphy = CSR_READ_4(sc, ALC_GPHY_CFG); gphy &= ~(GPHY_CFG_EXT_RESET | GPHY_CFG_LED_MODE | GPHY_CFG_100AB_ENB | GPHY_CFG_PHY_PLL_ON); gphy |= GPHY_CFG_HIB_EN | GPHY_CFG_HIB_PULSE | GPHY_CFG_SEL_ANA_RESET; gphy |= GPHY_CFG_PHY_IDDQ | GPHY_CFG_PWDOWN_HW; CSR_WRITE_4(sc, ALC_GPHY_CFG, gphy); break; case DEVICEID_ATHEROS_AR8151: case DEVICEID_ATHEROS_AR8151_V2: case DEVICEID_ATHEROS_AR8152_B: case DEVICEID_ATHEROS_AR8152_B2: /* * GPHY power down caused more problems on AR8151 v2.0. * When driver is reloaded after GPHY power down, * accesses to PHY/MAC registers hung the system. Only * cold boot recovered from it. I'm not sure whether * AR8151 v1.0 also requires this one though. I don't * have AR8151 v1.0 controller in hand. * The only option left is to isolate the PHY and * initiates power down the PHY which in turn saves * more power when driver is unloaded. */ alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, MII_BMCR, BMCR_ISO | BMCR_PDOWN); break; default: /* Force PHY down. */ CSR_WRITE_2(sc, ALC_GPHY_CFG, GPHY_CFG_EXT_RESET | GPHY_CFG_SEL_ANA_RESET | GPHY_CFG_PHY_IDDQ | GPHY_CFG_PWDOWN_HW); DELAY(1000); break; } } static void alc_aspm(struct alc_softc *sc, int init, int media) { if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) alc_aspm_816x(sc, init); else alc_aspm_813x(sc, media); } static void alc_aspm_813x(struct alc_softc *sc, int media) { uint32_t pmcfg; uint16_t linkcfg; if ((sc->alc_flags & ALC_FLAG_LINK) == 0) return; pmcfg = CSR_READ_4(sc, ALC_PM_CFG); if ((sc->alc_flags & (ALC_FLAG_APS | ALC_FLAG_PCIE)) == (ALC_FLAG_APS | ALC_FLAG_PCIE)) linkcfg = CSR_READ_2(sc, sc->alc_expcap + PCIER_LINK_CTL); else linkcfg = 0; pmcfg &= ~PM_CFG_SERDES_PD_EX_L1; pmcfg &= ~(PM_CFG_L1_ENTRY_TIMER_MASK | PM_CFG_LCKDET_TIMER_MASK); pmcfg |= PM_CFG_MAC_ASPM_CHK; pmcfg |= (PM_CFG_LCKDET_TIMER_DEFAULT << PM_CFG_LCKDET_TIMER_SHIFT); pmcfg &= ~(PM_CFG_ASPM_L1_ENB | PM_CFG_ASPM_L0S_ENB); if ((sc->alc_flags & ALC_FLAG_APS) != 0) { /* Disable extended sync except AR8152 B v1.0 */ linkcfg &= ~PCIEM_LINK_CTL_EXTENDED_SYNC; if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B && sc->alc_rev == ATHEROS_AR8152_B_V10) linkcfg |= PCIEM_LINK_CTL_EXTENDED_SYNC; CSR_WRITE_2(sc, sc->alc_expcap + PCIER_LINK_CTL, linkcfg); pmcfg &= ~(PM_CFG_EN_BUFS_RX_L0S | PM_CFG_SA_DLY_ENB | PM_CFG_HOTRST); pmcfg |= (PM_CFG_L1_ENTRY_TIMER_DEFAULT << PM_CFG_L1_ENTRY_TIMER_SHIFT); pmcfg &= ~PM_CFG_PM_REQ_TIMER_MASK; pmcfg |= (PM_CFG_PM_REQ_TIMER_DEFAULT << PM_CFG_PM_REQ_TIMER_SHIFT); pmcfg |= PM_CFG_SERDES_PD_EX_L1 | PM_CFG_PCIE_RECV; } if ((sc->alc_flags & ALC_FLAG_LINK) != 0) { if ((sc->alc_flags & ALC_FLAG_L0S) != 0) pmcfg |= PM_CFG_ASPM_L0S_ENB; if ((sc->alc_flags & ALC_FLAG_L1S) != 0) pmcfg |= PM_CFG_ASPM_L1_ENB; if ((sc->alc_flags & ALC_FLAG_APS) != 0) { if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B) pmcfg &= ~PM_CFG_ASPM_L0S_ENB; pmcfg &= ~(PM_CFG_SERDES_L1_ENB | PM_CFG_SERDES_PLL_L1_ENB | PM_CFG_SERDES_BUDS_RX_L1_ENB); pmcfg |= PM_CFG_CLK_SWH_L1; if (media == IFM_100_TX || media == IFM_1000_T) { pmcfg &= ~PM_CFG_L1_ENTRY_TIMER_MASK; switch (sc->alc_ident->deviceid) { case DEVICEID_ATHEROS_AR8152_B: pmcfg |= (7 << PM_CFG_L1_ENTRY_TIMER_SHIFT); break; case DEVICEID_ATHEROS_AR8152_B2: case DEVICEID_ATHEROS_AR8151_V2: pmcfg |= (4 << PM_CFG_L1_ENTRY_TIMER_SHIFT); break; default: pmcfg |= (15 << PM_CFG_L1_ENTRY_TIMER_SHIFT); break; } } } else { pmcfg |= PM_CFG_SERDES_L1_ENB | PM_CFG_SERDES_PLL_L1_ENB | PM_CFG_SERDES_BUDS_RX_L1_ENB; pmcfg &= ~(PM_CFG_CLK_SWH_L1 | PM_CFG_ASPM_L1_ENB | PM_CFG_ASPM_L0S_ENB); } } else { pmcfg &= ~(PM_CFG_SERDES_BUDS_RX_L1_ENB | PM_CFG_SERDES_L1_ENB | PM_CFG_SERDES_PLL_L1_ENB); pmcfg |= PM_CFG_CLK_SWH_L1; if ((sc->alc_flags & ALC_FLAG_L1S) != 0) pmcfg |= PM_CFG_ASPM_L1_ENB; } CSR_WRITE_4(sc, ALC_PM_CFG, pmcfg); } static void alc_aspm_816x(struct alc_softc *sc, int init) { uint32_t pmcfg; pmcfg = CSR_READ_4(sc, ALC_PM_CFG); pmcfg &= ~PM_CFG_L1_ENTRY_TIMER_816X_MASK; pmcfg |= PM_CFG_L1_ENTRY_TIMER_816X_DEFAULT; pmcfg &= ~PM_CFG_PM_REQ_TIMER_MASK; pmcfg |= PM_CFG_PM_REQ_TIMER_816X_DEFAULT; pmcfg &= ~PM_CFG_LCKDET_TIMER_MASK; pmcfg |= PM_CFG_LCKDET_TIMER_DEFAULT; pmcfg |= PM_CFG_SERDES_PD_EX_L1 | PM_CFG_CLK_SWH_L1 | PM_CFG_PCIE_RECV; pmcfg &= ~(PM_CFG_RX_L1_AFTER_L0S | PM_CFG_TX_L1_AFTER_L0S | PM_CFG_ASPM_L1_ENB | PM_CFG_ASPM_L0S_ENB | PM_CFG_SERDES_L1_ENB | PM_CFG_SERDES_PLL_L1_ENB | PM_CFG_SERDES_BUDS_RX_L1_ENB | PM_CFG_SA_DLY_ENB | PM_CFG_MAC_ASPM_CHK | PM_CFG_HOTRST); if (AR816X_REV(sc->alc_rev) <= AR816X_REV_A1 && (sc->alc_rev & 0x01) != 0) pmcfg |= PM_CFG_SERDES_L1_ENB | PM_CFG_SERDES_PLL_L1_ENB; if ((sc->alc_flags & ALC_FLAG_LINK) != 0) { /* Link up, enable both L0s, L1s. */ pmcfg |= PM_CFG_ASPM_L0S_ENB | PM_CFG_ASPM_L1_ENB | PM_CFG_MAC_ASPM_CHK; } else { if (init != 0) pmcfg |= PM_CFG_ASPM_L0S_ENB | PM_CFG_ASPM_L1_ENB | PM_CFG_MAC_ASPM_CHK; else if ((sc->alc_ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) pmcfg |= PM_CFG_ASPM_L1_ENB | PM_CFG_MAC_ASPM_CHK; } CSR_WRITE_4(sc, ALC_PM_CFG, pmcfg); } static void alc_init_pcie(struct alc_softc *sc) { const char *aspm_state[] = { "L0s/L1", "L0s", "L1", "L0s/L1" }; uint32_t cap, ctl, val; int state; /* Clear data link and flow-control protocol error. */ val = CSR_READ_4(sc, ALC_PEX_UNC_ERR_SEV); val &= ~(PEX_UNC_ERR_SEV_DLP | PEX_UNC_ERR_SEV_FCP); CSR_WRITE_4(sc, ALC_PEX_UNC_ERR_SEV, val); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { CSR_WRITE_4(sc, ALC_LTSSM_ID_CFG, CSR_READ_4(sc, ALC_LTSSM_ID_CFG) & ~LTSSM_ID_WRO_ENB); CSR_WRITE_4(sc, ALC_PCIE_PHYMISC, CSR_READ_4(sc, ALC_PCIE_PHYMISC) | PCIE_PHYMISC_FORCE_RCV_DET); if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B && sc->alc_rev == ATHEROS_AR8152_B_V10) { val = CSR_READ_4(sc, ALC_PCIE_PHYMISC2); val &= ~(PCIE_PHYMISC2_SERDES_CDR_MASK | PCIE_PHYMISC2_SERDES_TH_MASK); val |= 3 << PCIE_PHYMISC2_SERDES_CDR_SHIFT; val |= 3 << PCIE_PHYMISC2_SERDES_TH_SHIFT; CSR_WRITE_4(sc, ALC_PCIE_PHYMISC2, val); } /* Disable ASPM L0S and L1. */ cap = CSR_READ_2(sc, sc->alc_expcap + PCIER_LINK_CAP); if ((cap & PCIEM_LINK_CAP_ASPM) != 0) { ctl = CSR_READ_2(sc, sc->alc_expcap + PCIER_LINK_CTL); if ((ctl & PCIEM_LINK_CTL_RCB) != 0) sc->alc_rcb = DMA_CFG_RCB_128; if (bootverbose) device_printf(sc->alc_dev, "RCB %u bytes\n", sc->alc_rcb == DMA_CFG_RCB_64 ? 64 : 128); state = ctl & PCIEM_LINK_CTL_ASPMC; if (state & PCIEM_LINK_CTL_ASPMC_L0S) sc->alc_flags |= ALC_FLAG_L0S; if (state & PCIEM_LINK_CTL_ASPMC_L1) sc->alc_flags |= ALC_FLAG_L1S; if (bootverbose) device_printf(sc->alc_dev, "ASPM %s %s\n", aspm_state[state], state == 0 ? "disabled" : "enabled"); alc_disable_l0s_l1(sc); } else { if (bootverbose) device_printf(sc->alc_dev, "no ASPM support\n"); } } else { val = CSR_READ_4(sc, ALC_PDLL_TRNS1); val &= ~PDLL_TRNS1_D3PLLOFF_ENB; CSR_WRITE_4(sc, ALC_PDLL_TRNS1, val); val = CSR_READ_4(sc, ALC_MASTER_CFG); if (AR816X_REV(sc->alc_rev) <= AR816X_REV_A1 && (sc->alc_rev & 0x01) != 0) { if ((val & MASTER_WAKEN_25M) == 0 || (val & MASTER_CLK_SEL_DIS) == 0) { val |= MASTER_WAKEN_25M | MASTER_CLK_SEL_DIS; CSR_WRITE_4(sc, ALC_MASTER_CFG, val); } } else { if ((val & MASTER_WAKEN_25M) == 0 || (val & MASTER_CLK_SEL_DIS) != 0) { val |= MASTER_WAKEN_25M; val &= ~MASTER_CLK_SEL_DIS; CSR_WRITE_4(sc, ALC_MASTER_CFG, val); } } } alc_aspm(sc, 1, IFM_UNKNOWN); } static void alc_config_msi(struct alc_softc *sc) { uint32_t ctl, mod; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { /* * It seems interrupt moderation is controlled by * ALC_MSI_RETRANS_TIMER register if MSI/MSIX is active. * Driver uses RX interrupt moderation parameter to * program ALC_MSI_RETRANS_TIMER register. */ ctl = CSR_READ_4(sc, ALC_MSI_RETRANS_TIMER); ctl &= ~MSI_RETRANS_TIMER_MASK; ctl &= ~MSI_RETRANS_MASK_SEL_LINE; mod = ALC_USECS(sc->alc_int_rx_mod); if (mod == 0) mod = 1; ctl |= mod; if ((sc->alc_flags & ALC_FLAG_MSIX) != 0) CSR_WRITE_4(sc, ALC_MSI_RETRANS_TIMER, ctl | MSI_RETRANS_MASK_SEL_STD); else if ((sc->alc_flags & ALC_FLAG_MSI) != 0) CSR_WRITE_4(sc, ALC_MSI_RETRANS_TIMER, ctl | MSI_RETRANS_MASK_SEL_LINE); else CSR_WRITE_4(sc, ALC_MSI_RETRANS_TIMER, 0); } } static int alc_attach(device_t dev) { struct alc_softc *sc; struct ifnet *ifp; int base, error, i, msic, msixc; uint16_t burst; error = 0; sc = device_get_softc(dev); sc->alc_dev = dev; sc->alc_rev = pci_get_revid(dev); mtx_init(&sc->alc_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->alc_tick_ch, &sc->alc_mtx, 0); TASK_INIT(&sc->alc_int_task, 0, alc_int_task, sc); sc->alc_ident = alc_find_ident(dev); /* Map the device. */ pci_enable_busmaster(dev); sc->alc_res_spec = alc_res_spec_mem; sc->alc_irq_spec = alc_irq_spec_legacy; error = bus_alloc_resources(dev, sc->alc_res_spec, sc->alc_res); if (error != 0) { device_printf(dev, "cannot allocate memory resources.\n"); goto fail; } /* Set PHY address. */ sc->alc_phyaddr = ALC_PHY_ADDR; /* * One odd thing is AR8132 uses the same PHY hardware(F1 * gigabit PHY) of AR8131. So atphy(4) of AR8132 reports * the PHY supports 1000Mbps but that's not true. The PHY * used in AR8132 can't establish gigabit link even if it * shows the same PHY model/revision number of AR8131. */ switch (sc->alc_ident->deviceid) { case DEVICEID_ATHEROS_E2200: case DEVICEID_ATHEROS_E2400: case DEVICEID_ATHEROS_E2500: sc->alc_flags |= ALC_FLAG_E2X00; /* FALLTHROUGH */ case DEVICEID_ATHEROS_AR8161: if (pci_get_subvendor(dev) == VENDORID_ATHEROS && pci_get_subdevice(dev) == 0x0091 && sc->alc_rev == 0) sc->alc_flags |= ALC_FLAG_LINK_WAR; /* FALLTHROUGH */ case DEVICEID_ATHEROS_AR8171: sc->alc_flags |= ALC_FLAG_AR816X_FAMILY; break; case DEVICEID_ATHEROS_AR8162: case DEVICEID_ATHEROS_AR8172: sc->alc_flags |= ALC_FLAG_FASTETHER | ALC_FLAG_AR816X_FAMILY; break; case DEVICEID_ATHEROS_AR8152_B: case DEVICEID_ATHEROS_AR8152_B2: sc->alc_flags |= ALC_FLAG_APS; /* FALLTHROUGH */ case DEVICEID_ATHEROS_AR8132: sc->alc_flags |= ALC_FLAG_FASTETHER; break; case DEVICEID_ATHEROS_AR8151: case DEVICEID_ATHEROS_AR8151_V2: sc->alc_flags |= ALC_FLAG_APS; /* FALLTHROUGH */ default: break; } sc->alc_flags |= ALC_FLAG_JUMBO; /* * It seems that AR813x/AR815x has silicon bug for SMB. In * addition, Atheros said that enabling SMB wouldn't improve * performance. However I think it's bad to access lots of * registers to extract MAC statistics. */ sc->alc_flags |= ALC_FLAG_SMB_BUG; /* * Don't use Tx CMB. It is known to have silicon bug. */ sc->alc_flags |= ALC_FLAG_CMB_BUG; sc->alc_chip_rev = CSR_READ_4(sc, ALC_MASTER_CFG) >> MASTER_CHIP_REV_SHIFT; if (bootverbose) { device_printf(dev, "PCI device revision : 0x%04x\n", sc->alc_rev); device_printf(dev, "Chip id/revision : 0x%04x\n", sc->alc_chip_rev); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) device_printf(dev, "AR816x revision : 0x%x\n", AR816X_REV(sc->alc_rev)); } device_printf(dev, "%u Tx FIFO, %u Rx FIFO\n", CSR_READ_4(sc, ALC_SRAM_TX_FIFO_LEN) * 8, CSR_READ_4(sc, ALC_SRAM_RX_FIFO_LEN) * 8); /* Initialize DMA parameters. */ sc->alc_dma_rd_burst = 0; sc->alc_dma_wr_burst = 0; sc->alc_rcb = DMA_CFG_RCB_64; if (pci_find_cap(dev, PCIY_EXPRESS, &base) == 0) { sc->alc_flags |= ALC_FLAG_PCIE; sc->alc_expcap = base; burst = CSR_READ_2(sc, base + PCIER_DEVICE_CTL); sc->alc_dma_rd_burst = (burst & PCIEM_CTL_MAX_READ_REQUEST) >> 12; sc->alc_dma_wr_burst = (burst & PCIEM_CTL_MAX_PAYLOAD) >> 5; if (bootverbose) { device_printf(dev, "Read request size : %u bytes.\n", alc_dma_burst[sc->alc_dma_rd_burst]); device_printf(dev, "TLP payload size : %u bytes.\n", alc_dma_burst[sc->alc_dma_wr_burst]); } if (alc_dma_burst[sc->alc_dma_rd_burst] > 1024) sc->alc_dma_rd_burst = 3; if (alc_dma_burst[sc->alc_dma_wr_burst] > 1024) sc->alc_dma_wr_burst = 3; /* * Force maximum payload size to 128 bytes for * E2200/E2400/E2500. * Otherwise it triggers DMA write error. */ if ((sc->alc_flags & ALC_FLAG_E2X00) != 0) sc->alc_dma_wr_burst = 0; alc_init_pcie(sc); } /* Reset PHY. */ alc_phy_reset(sc); /* Reset the ethernet controller. */ alc_stop_mac(sc); alc_reset(sc); /* Allocate IRQ resources. */ msixc = pci_msix_count(dev); msic = pci_msi_count(dev); if (bootverbose) { device_printf(dev, "MSIX count : %d\n", msixc); device_printf(dev, "MSI count : %d\n", msic); } if (msixc > 1) msixc = 1; if (msic > 1) msic = 1; /* * Prefer MSIX over MSI. * AR816x controller has a silicon bug that MSI interrupt * does not assert if PCIM_CMD_INTxDIS bit of command * register is set. pci(4) was taught to handle that case. */ if (msix_disable == 0 || msi_disable == 0) { if (msix_disable == 0 && msixc > 0 && pci_alloc_msix(dev, &msixc) == 0) { if (msic == 1) { device_printf(dev, "Using %d MSIX message(s).\n", msixc); sc->alc_flags |= ALC_FLAG_MSIX; sc->alc_irq_spec = alc_irq_spec_msix; } else pci_release_msi(dev); } if (msi_disable == 0 && (sc->alc_flags & ALC_FLAG_MSIX) == 0 && msic > 0 && pci_alloc_msi(dev, &msic) == 0) { if (msic == 1) { device_printf(dev, "Using %d MSI message(s).\n", msic); sc->alc_flags |= ALC_FLAG_MSI; sc->alc_irq_spec = alc_irq_spec_msi; } else pci_release_msi(dev); } } error = bus_alloc_resources(dev, sc->alc_irq_spec, sc->alc_irq); if (error != 0) { device_printf(dev, "cannot allocate IRQ resources.\n"); goto fail; } /* Create device sysctl node. */ alc_sysctl_node(sc); if ((error = alc_dma_alloc(sc)) != 0) goto fail; /* Load station address. */ alc_get_macaddr(sc); ifp = sc->alc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "cannot allocate ifnet structure.\n"); error = ENXIO; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = alc_ioctl; ifp->if_start = alc_start; ifp->if_init = alc_init; ifp->if_snd.ifq_drv_maxlen = ALC_TX_RING_CNT - 1; IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO4; ifp->if_hwassist = ALC_CSUM_FEATURES | CSUM_TSO; if (pci_find_cap(dev, PCIY_PMG, &base) == 0) { ifp->if_capabilities |= IFCAP_WOL_MAGIC | IFCAP_WOL_MCAST; sc->alc_flags |= ALC_FLAG_PM; sc->alc_pmcap = base; } ifp->if_capenable = ifp->if_capabilities; /* Set up MII bus. */ error = mii_attach(dev, &sc->alc_miibus, ifp, alc_mediachange, alc_mediastatus, BMSR_DEFCAPMASK, sc->alc_phyaddr, MII_OFFSET_ANY, MIIF_DOPAUSE); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } ether_ifattach(ifp, sc->alc_eaddr); /* VLAN capability setup. */ ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO; ifp->if_capenable = ifp->if_capabilities; /* * XXX * It seems enabling Tx checksum offloading makes more trouble. * Sometimes the controller does not receive any frames when * Tx checksum offloading is enabled. I'm not sure whether this * is a bug in Tx checksum offloading logic or I got broken * sample boards. To safety, don't enable Tx checksum offloading * by default but give chance to users to toggle it if they know * their controllers work without problems. * Fortunately, Tx checksum offloading for AR816x family * seems to work. */ if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { ifp->if_capenable &= ~IFCAP_TXCSUM; ifp->if_hwassist &= ~ALC_CSUM_FEATURES; } /* Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); /* Create local taskq. */ sc->alc_tq = taskqueue_create_fast("alc_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->alc_tq); if (sc->alc_tq == NULL) { device_printf(dev, "could not create taskqueue.\n"); ether_ifdetach(ifp); error = ENXIO; goto fail; } taskqueue_start_threads(&sc->alc_tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->alc_dev)); alc_config_msi(sc); if ((sc->alc_flags & ALC_FLAG_MSIX) != 0) msic = ALC_MSIX_MESSAGES; else if ((sc->alc_flags & ALC_FLAG_MSI) != 0) msic = ALC_MSI_MESSAGES; else msic = 1; for (i = 0; i < msic; i++) { error = bus_setup_intr(dev, sc->alc_irq[i], INTR_TYPE_NET | INTR_MPSAFE, alc_intr, NULL, sc, &sc->alc_intrhand[i]); if (error != 0) break; } if (error != 0) { device_printf(dev, "could not set up interrupt handler.\n"); taskqueue_free(sc->alc_tq); sc->alc_tq = NULL; ether_ifdetach(ifp); goto fail; } /* Attach driver netdump methods. */ NETDUMP_SET(ifp, alc); fail: if (error != 0) alc_detach(dev); return (error); } static int alc_detach(device_t dev) { struct alc_softc *sc; struct ifnet *ifp; int i, msic; sc = device_get_softc(dev); ifp = sc->alc_ifp; if (device_is_attached(dev)) { ether_ifdetach(ifp); ALC_LOCK(sc); alc_stop(sc); ALC_UNLOCK(sc); callout_drain(&sc->alc_tick_ch); taskqueue_drain(sc->alc_tq, &sc->alc_int_task); } if (sc->alc_tq != NULL) { taskqueue_drain(sc->alc_tq, &sc->alc_int_task); taskqueue_free(sc->alc_tq); sc->alc_tq = NULL; } if (sc->alc_miibus != NULL) { device_delete_child(dev, sc->alc_miibus); sc->alc_miibus = NULL; } bus_generic_detach(dev); alc_dma_free(sc); if (ifp != NULL) { if_free(ifp); sc->alc_ifp = NULL; } if ((sc->alc_flags & ALC_FLAG_MSIX) != 0) msic = ALC_MSIX_MESSAGES; else if ((sc->alc_flags & ALC_FLAG_MSI) != 0) msic = ALC_MSI_MESSAGES; else msic = 1; for (i = 0; i < msic; i++) { if (sc->alc_intrhand[i] != NULL) { bus_teardown_intr(dev, sc->alc_irq[i], sc->alc_intrhand[i]); sc->alc_intrhand[i] = NULL; } } if (sc->alc_res[0] != NULL) alc_phy_down(sc); bus_release_resources(dev, sc->alc_irq_spec, sc->alc_irq); if ((sc->alc_flags & (ALC_FLAG_MSI | ALC_FLAG_MSIX)) != 0) pci_release_msi(dev); bus_release_resources(dev, sc->alc_res_spec, sc->alc_res); mtx_destroy(&sc->alc_mtx); return (0); } #define ALC_SYSCTL_STAT_ADD32(c, h, n, p, d) \ SYSCTL_ADD_UINT(c, h, OID_AUTO, n, CTLFLAG_RD, p, 0, d) #define ALC_SYSCTL_STAT_ADD64(c, h, n, p, d) \ SYSCTL_ADD_UQUAD(c, h, OID_AUTO, n, CTLFLAG_RD, p, d) static void alc_sysctl_node(struct alc_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *child, *parent; struct sysctl_oid *tree; struct alc_hw_stats *stats; int error; stats = &sc->alc_stats; ctx = device_get_sysctl_ctx(sc->alc_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->alc_dev)); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "int_rx_mod", CTLTYPE_INT | CTLFLAG_RW, &sc->alc_int_rx_mod, 0, sysctl_hw_alc_int_mod, "I", "alc Rx interrupt moderation"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "int_tx_mod", CTLTYPE_INT | CTLFLAG_RW, &sc->alc_int_tx_mod, 0, sysctl_hw_alc_int_mod, "I", "alc Tx interrupt moderation"); /* Pull in device tunables. */ sc->alc_int_rx_mod = ALC_IM_RX_TIMER_DEFAULT; error = resource_int_value(device_get_name(sc->alc_dev), device_get_unit(sc->alc_dev), "int_rx_mod", &sc->alc_int_rx_mod); if (error == 0) { if (sc->alc_int_rx_mod < ALC_IM_TIMER_MIN || sc->alc_int_rx_mod > ALC_IM_TIMER_MAX) { device_printf(sc->alc_dev, "int_rx_mod value out of " "range; using default: %d\n", ALC_IM_RX_TIMER_DEFAULT); sc->alc_int_rx_mod = ALC_IM_RX_TIMER_DEFAULT; } } sc->alc_int_tx_mod = ALC_IM_TX_TIMER_DEFAULT; error = resource_int_value(device_get_name(sc->alc_dev), device_get_unit(sc->alc_dev), "int_tx_mod", &sc->alc_int_tx_mod); if (error == 0) { if (sc->alc_int_tx_mod < ALC_IM_TIMER_MIN || sc->alc_int_tx_mod > ALC_IM_TIMER_MAX) { device_printf(sc->alc_dev, "int_tx_mod value out of " "range; using default: %d\n", ALC_IM_TX_TIMER_DEFAULT); sc->alc_int_tx_mod = ALC_IM_TX_TIMER_DEFAULT; } } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "process_limit", CTLTYPE_INT | CTLFLAG_RW, &sc->alc_process_limit, 0, sysctl_hw_alc_proc_limit, "I", "max number of Rx events to process"); /* Pull in device tunables. */ sc->alc_process_limit = ALC_PROC_DEFAULT; error = resource_int_value(device_get_name(sc->alc_dev), device_get_unit(sc->alc_dev), "process_limit", &sc->alc_process_limit); if (error == 0) { if (sc->alc_process_limit < ALC_PROC_MIN || sc->alc_process_limit > ALC_PROC_MAX) { device_printf(sc->alc_dev, "process_limit value out of range; " "using default: %d\n", ALC_PROC_DEFAULT); sc->alc_process_limit = ALC_PROC_DEFAULT; } } tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", CTLFLAG_RD, NULL, "ALC statistics"); parent = SYSCTL_CHILDREN(tree); /* Rx statistics. */ tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "rx", CTLFLAG_RD, NULL, "Rx MAC statistics"); child = SYSCTL_CHILDREN(tree); ALC_SYSCTL_STAT_ADD32(ctx, child, "good_frames", &stats->rx_frames, "Good frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "good_bcast_frames", &stats->rx_bcast_frames, "Good broadcast frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "good_mcast_frames", &stats->rx_mcast_frames, "Good multicast frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "pause_frames", &stats->rx_pause_frames, "Pause control frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "control_frames", &stats->rx_control_frames, "Control frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "crc_errs", &stats->rx_crcerrs, "CRC errors"); ALC_SYSCTL_STAT_ADD32(ctx, child, "len_errs", &stats->rx_lenerrs, "Frames with length mismatched"); ALC_SYSCTL_STAT_ADD64(ctx, child, "good_octets", &stats->rx_bytes, "Good octets"); ALC_SYSCTL_STAT_ADD64(ctx, child, "good_bcast_octets", &stats->rx_bcast_bytes, "Good broadcast octets"); ALC_SYSCTL_STAT_ADD64(ctx, child, "good_mcast_octets", &stats->rx_mcast_bytes, "Good multicast octets"); ALC_SYSCTL_STAT_ADD32(ctx, child, "runts", &stats->rx_runts, "Too short frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "fragments", &stats->rx_fragments, "Fragmented frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_64", &stats->rx_pkts_64, "64 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_65_127", &stats->rx_pkts_65_127, "65 to 127 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_128_255", &stats->rx_pkts_128_255, "128 to 255 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_256_511", &stats->rx_pkts_256_511, "256 to 511 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_512_1023", &stats->rx_pkts_512_1023, "512 to 1023 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_1024_1518", &stats->rx_pkts_1024_1518, "1024 to 1518 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_1519_max", &stats->rx_pkts_1519_max, "1519 to max frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "trunc_errs", &stats->rx_pkts_truncated, "Truncated frames due to MTU size"); ALC_SYSCTL_STAT_ADD32(ctx, child, "fifo_oflows", &stats->rx_fifo_oflows, "FIFO overflows"); ALC_SYSCTL_STAT_ADD32(ctx, child, "rrs_errs", &stats->rx_rrs_errs, "Return status write-back errors"); ALC_SYSCTL_STAT_ADD32(ctx, child, "align_errs", &stats->rx_alignerrs, "Alignment errors"); ALC_SYSCTL_STAT_ADD32(ctx, child, "filtered", &stats->rx_pkts_filtered, "Frames dropped due to address filtering"); /* Tx statistics. */ tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "tx", CTLFLAG_RD, NULL, "Tx MAC statistics"); child = SYSCTL_CHILDREN(tree); ALC_SYSCTL_STAT_ADD32(ctx, child, "good_frames", &stats->tx_frames, "Good frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "good_bcast_frames", &stats->tx_bcast_frames, "Good broadcast frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "good_mcast_frames", &stats->tx_mcast_frames, "Good multicast frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "pause_frames", &stats->tx_pause_frames, "Pause control frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "control_frames", &stats->tx_control_frames, "Control frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "excess_defers", &stats->tx_excess_defer, "Frames with excessive derferrals"); ALC_SYSCTL_STAT_ADD32(ctx, child, "defers", &stats->tx_excess_defer, "Frames with derferrals"); ALC_SYSCTL_STAT_ADD64(ctx, child, "good_octets", &stats->tx_bytes, "Good octets"); ALC_SYSCTL_STAT_ADD64(ctx, child, "good_bcast_octets", &stats->tx_bcast_bytes, "Good broadcast octets"); ALC_SYSCTL_STAT_ADD64(ctx, child, "good_mcast_octets", &stats->tx_mcast_bytes, "Good multicast octets"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_64", &stats->tx_pkts_64, "64 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_65_127", &stats->tx_pkts_65_127, "65 to 127 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_128_255", &stats->tx_pkts_128_255, "128 to 255 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_256_511", &stats->tx_pkts_256_511, "256 to 511 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_512_1023", &stats->tx_pkts_512_1023, "512 to 1023 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_1024_1518", &stats->tx_pkts_1024_1518, "1024 to 1518 bytes frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "frames_1519_max", &stats->tx_pkts_1519_max, "1519 to max frames"); ALC_SYSCTL_STAT_ADD32(ctx, child, "single_colls", &stats->tx_single_colls, "Single collisions"); ALC_SYSCTL_STAT_ADD32(ctx, child, "multi_colls", &stats->tx_multi_colls, "Multiple collisions"); ALC_SYSCTL_STAT_ADD32(ctx, child, "late_colls", &stats->tx_late_colls, "Late collisions"); ALC_SYSCTL_STAT_ADD32(ctx, child, "excess_colls", &stats->tx_excess_colls, "Excessive collisions"); ALC_SYSCTL_STAT_ADD32(ctx, child, "underruns", &stats->tx_underrun, "FIFO underruns"); ALC_SYSCTL_STAT_ADD32(ctx, child, "desc_underruns", &stats->tx_desc_underrun, "Descriptor write-back errors"); ALC_SYSCTL_STAT_ADD32(ctx, child, "len_errs", &stats->tx_lenerrs, "Frames with length mismatched"); ALC_SYSCTL_STAT_ADD32(ctx, child, "trunc_errs", &stats->tx_pkts_truncated, "Truncated frames due to MTU size"); } #undef ALC_SYSCTL_STAT_ADD32 #undef ALC_SYSCTL_STAT_ADD64 struct alc_dmamap_arg { bus_addr_t alc_busaddr; }; static void alc_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct alc_dmamap_arg *ctx; if (error != 0) return; KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); ctx = (struct alc_dmamap_arg *)arg; ctx->alc_busaddr = segs[0].ds_addr; } /* * Normal and high Tx descriptors shares single Tx high address. * Four Rx descriptor/return rings and CMB shares the same Rx * high address. */ static int alc_check_boundary(struct alc_softc *sc) { bus_addr_t cmb_end, rx_ring_end, rr_ring_end, tx_ring_end; rx_ring_end = sc->alc_rdata.alc_rx_ring_paddr + ALC_RX_RING_SZ; rr_ring_end = sc->alc_rdata.alc_rr_ring_paddr + ALC_RR_RING_SZ; cmb_end = sc->alc_rdata.alc_cmb_paddr + ALC_CMB_SZ; tx_ring_end = sc->alc_rdata.alc_tx_ring_paddr + ALC_TX_RING_SZ; /* 4GB boundary crossing is not allowed. */ if ((ALC_ADDR_HI(rx_ring_end) != ALC_ADDR_HI(sc->alc_rdata.alc_rx_ring_paddr)) || (ALC_ADDR_HI(rr_ring_end) != ALC_ADDR_HI(sc->alc_rdata.alc_rr_ring_paddr)) || (ALC_ADDR_HI(cmb_end) != ALC_ADDR_HI(sc->alc_rdata.alc_cmb_paddr)) || (ALC_ADDR_HI(tx_ring_end) != ALC_ADDR_HI(sc->alc_rdata.alc_tx_ring_paddr))) return (EFBIG); /* * Make sure Rx return descriptor/Rx descriptor/CMB use * the same high address. */ if ((ALC_ADDR_HI(rx_ring_end) != ALC_ADDR_HI(rr_ring_end)) || (ALC_ADDR_HI(rx_ring_end) != ALC_ADDR_HI(cmb_end))) return (EFBIG); return (0); } static int alc_dma_alloc(struct alc_softc *sc) { struct alc_txdesc *txd; struct alc_rxdesc *rxd; bus_addr_t lowaddr; struct alc_dmamap_arg ctx; int error, i; lowaddr = BUS_SPACE_MAXADDR; again: /* Create parent DMA tag. */ error = bus_dma_tag_create( bus_get_dma_tag(sc->alc_dev), /* parent */ 1, 0, /* alignment, boundary */ lowaddr, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT, /* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_parent_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create parent DMA tag.\n"); goto fail; } /* Create DMA tag for Tx descriptor ring. */ error = bus_dma_tag_create( sc->alc_cdata.alc_parent_tag, /* parent */ ALC_TX_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ALC_TX_RING_SZ, /* maxsize */ 1, /* nsegments */ ALC_TX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_tx_ring_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create Tx ring DMA tag.\n"); goto fail; } /* Create DMA tag for Rx free descriptor ring. */ error = bus_dma_tag_create( sc->alc_cdata.alc_parent_tag, /* parent */ ALC_RX_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ALC_RX_RING_SZ, /* maxsize */ 1, /* nsegments */ ALC_RX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_rx_ring_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create Rx ring DMA tag.\n"); goto fail; } /* Create DMA tag for Rx return descriptor ring. */ error = bus_dma_tag_create( sc->alc_cdata.alc_parent_tag, /* parent */ ALC_RR_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ALC_RR_RING_SZ, /* maxsize */ 1, /* nsegments */ ALC_RR_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_rr_ring_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create Rx return ring DMA tag.\n"); goto fail; } /* Create DMA tag for coalescing message block. */ error = bus_dma_tag_create( sc->alc_cdata.alc_parent_tag, /* parent */ ALC_CMB_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ALC_CMB_SZ, /* maxsize */ 1, /* nsegments */ ALC_CMB_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_cmb_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create CMB DMA tag.\n"); goto fail; } /* Create DMA tag for status message block. */ error = bus_dma_tag_create( sc->alc_cdata.alc_parent_tag, /* parent */ ALC_SMB_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ALC_SMB_SZ, /* maxsize */ 1, /* nsegments */ ALC_SMB_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_smb_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create SMB DMA tag.\n"); goto fail; } /* Allocate DMA'able memory and load the DMA map for Tx ring. */ error = bus_dmamem_alloc(sc->alc_cdata.alc_tx_ring_tag, (void **)&sc->alc_rdata.alc_tx_ring, BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &sc->alc_cdata.alc_tx_ring_map); if (error != 0) { device_printf(sc->alc_dev, "could not allocate DMA'able memory for Tx ring.\n"); goto fail; } ctx.alc_busaddr = 0; error = bus_dmamap_load(sc->alc_cdata.alc_tx_ring_tag, sc->alc_cdata.alc_tx_ring_map, sc->alc_rdata.alc_tx_ring, ALC_TX_RING_SZ, alc_dmamap_cb, &ctx, 0); if (error != 0 || ctx.alc_busaddr == 0) { device_printf(sc->alc_dev, "could not load DMA'able memory for Tx ring.\n"); goto fail; } sc->alc_rdata.alc_tx_ring_paddr = ctx.alc_busaddr; /* Allocate DMA'able memory and load the DMA map for Rx ring. */ error = bus_dmamem_alloc(sc->alc_cdata.alc_rx_ring_tag, (void **)&sc->alc_rdata.alc_rx_ring, BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &sc->alc_cdata.alc_rx_ring_map); if (error != 0) { device_printf(sc->alc_dev, "could not allocate DMA'able memory for Rx ring.\n"); goto fail; } ctx.alc_busaddr = 0; error = bus_dmamap_load(sc->alc_cdata.alc_rx_ring_tag, sc->alc_cdata.alc_rx_ring_map, sc->alc_rdata.alc_rx_ring, ALC_RX_RING_SZ, alc_dmamap_cb, &ctx, 0); if (error != 0 || ctx.alc_busaddr == 0) { device_printf(sc->alc_dev, "could not load DMA'able memory for Rx ring.\n"); goto fail; } sc->alc_rdata.alc_rx_ring_paddr = ctx.alc_busaddr; /* Allocate DMA'able memory and load the DMA map for Rx return ring. */ error = bus_dmamem_alloc(sc->alc_cdata.alc_rr_ring_tag, (void **)&sc->alc_rdata.alc_rr_ring, BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &sc->alc_cdata.alc_rr_ring_map); if (error != 0) { device_printf(sc->alc_dev, "could not allocate DMA'able memory for Rx return ring.\n"); goto fail; } ctx.alc_busaddr = 0; error = bus_dmamap_load(sc->alc_cdata.alc_rr_ring_tag, sc->alc_cdata.alc_rr_ring_map, sc->alc_rdata.alc_rr_ring, ALC_RR_RING_SZ, alc_dmamap_cb, &ctx, 0); if (error != 0 || ctx.alc_busaddr == 0) { device_printf(sc->alc_dev, "could not load DMA'able memory for Tx ring.\n"); goto fail; } sc->alc_rdata.alc_rr_ring_paddr = ctx.alc_busaddr; /* Allocate DMA'able memory and load the DMA map for CMB. */ error = bus_dmamem_alloc(sc->alc_cdata.alc_cmb_tag, (void **)&sc->alc_rdata.alc_cmb, BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &sc->alc_cdata.alc_cmb_map); if (error != 0) { device_printf(sc->alc_dev, "could not allocate DMA'able memory for CMB.\n"); goto fail; } ctx.alc_busaddr = 0; error = bus_dmamap_load(sc->alc_cdata.alc_cmb_tag, sc->alc_cdata.alc_cmb_map, sc->alc_rdata.alc_cmb, ALC_CMB_SZ, alc_dmamap_cb, &ctx, 0); if (error != 0 || ctx.alc_busaddr == 0) { device_printf(sc->alc_dev, "could not load DMA'able memory for CMB.\n"); goto fail; } sc->alc_rdata.alc_cmb_paddr = ctx.alc_busaddr; /* Allocate DMA'able memory and load the DMA map for SMB. */ error = bus_dmamem_alloc(sc->alc_cdata.alc_smb_tag, (void **)&sc->alc_rdata.alc_smb, BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &sc->alc_cdata.alc_smb_map); if (error != 0) { device_printf(sc->alc_dev, "could not allocate DMA'able memory for SMB.\n"); goto fail; } ctx.alc_busaddr = 0; error = bus_dmamap_load(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map, sc->alc_rdata.alc_smb, ALC_SMB_SZ, alc_dmamap_cb, &ctx, 0); if (error != 0 || ctx.alc_busaddr == 0) { device_printf(sc->alc_dev, "could not load DMA'able memory for CMB.\n"); goto fail; } sc->alc_rdata.alc_smb_paddr = ctx.alc_busaddr; /* Make sure we've not crossed 4GB boundary. */ if (lowaddr != BUS_SPACE_MAXADDR_32BIT && (error = alc_check_boundary(sc)) != 0) { device_printf(sc->alc_dev, "4GB boundary crossed, " "switching to 32bit DMA addressing mode.\n"); alc_dma_free(sc); /* * Limit max allowable DMA address space to 32bit * and try again. */ lowaddr = BUS_SPACE_MAXADDR_32BIT; goto again; } /* * Create Tx buffer parent tag. * AR81[3567]x allows 64bit DMA addressing of Tx/Rx buffers * so it needs separate parent DMA tag as parent DMA address * space could be restricted to be within 32bit address space * by 4GB boundary crossing. */ error = bus_dma_tag_create( bus_get_dma_tag(sc->alc_dev), /* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT, /* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_buffer_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create parent buffer DMA tag.\n"); goto fail; } /* Create DMA tag for Tx buffers. */ error = bus_dma_tag_create( sc->alc_cdata.alc_buffer_tag, /* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ALC_TSO_MAXSIZE, /* maxsize */ ALC_MAXTXSEGS, /* nsegments */ ALC_TSO_MAXSEGSIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_tx_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create Tx DMA tag.\n"); goto fail; } /* Create DMA tag for Rx buffers. */ error = bus_dma_tag_create( sc->alc_cdata.alc_buffer_tag, /* parent */ ALC_RX_BUF_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES, /* maxsize */ 1, /* nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->alc_cdata.alc_rx_tag); if (error != 0) { device_printf(sc->alc_dev, "could not create Rx DMA tag.\n"); goto fail; } /* Create DMA maps for Tx buffers. */ for (i = 0; i < ALC_TX_RING_CNT; i++) { txd = &sc->alc_cdata.alc_txdesc[i]; txd->tx_m = NULL; txd->tx_dmamap = NULL; error = bus_dmamap_create(sc->alc_cdata.alc_tx_tag, 0, &txd->tx_dmamap); if (error != 0) { device_printf(sc->alc_dev, "could not create Tx dmamap.\n"); goto fail; } } /* Create DMA maps for Rx buffers. */ if ((error = bus_dmamap_create(sc->alc_cdata.alc_rx_tag, 0, &sc->alc_cdata.alc_rx_sparemap)) != 0) { device_printf(sc->alc_dev, "could not create spare Rx dmamap.\n"); goto fail; } for (i = 0; i < ALC_RX_RING_CNT; i++) { rxd = &sc->alc_cdata.alc_rxdesc[i]; rxd->rx_m = NULL; rxd->rx_dmamap = NULL; error = bus_dmamap_create(sc->alc_cdata.alc_rx_tag, 0, &rxd->rx_dmamap); if (error != 0) { device_printf(sc->alc_dev, "could not create Rx dmamap.\n"); goto fail; } } fail: return (error); } static void alc_dma_free(struct alc_softc *sc) { struct alc_txdesc *txd; struct alc_rxdesc *rxd; int i; /* Tx buffers. */ if (sc->alc_cdata.alc_tx_tag != NULL) { for (i = 0; i < ALC_TX_RING_CNT; i++) { txd = &sc->alc_cdata.alc_txdesc[i]; if (txd->tx_dmamap != NULL) { bus_dmamap_destroy(sc->alc_cdata.alc_tx_tag, txd->tx_dmamap); txd->tx_dmamap = NULL; } } bus_dma_tag_destroy(sc->alc_cdata.alc_tx_tag); sc->alc_cdata.alc_tx_tag = NULL; } /* Rx buffers */ if (sc->alc_cdata.alc_rx_tag != NULL) { for (i = 0; i < ALC_RX_RING_CNT; i++) { rxd = &sc->alc_cdata.alc_rxdesc[i]; if (rxd->rx_dmamap != NULL) { bus_dmamap_destroy(sc->alc_cdata.alc_rx_tag, rxd->rx_dmamap); rxd->rx_dmamap = NULL; } } if (sc->alc_cdata.alc_rx_sparemap != NULL) { bus_dmamap_destroy(sc->alc_cdata.alc_rx_tag, sc->alc_cdata.alc_rx_sparemap); sc->alc_cdata.alc_rx_sparemap = NULL; } bus_dma_tag_destroy(sc->alc_cdata.alc_rx_tag); sc->alc_cdata.alc_rx_tag = NULL; } /* Tx descriptor ring. */ if (sc->alc_cdata.alc_tx_ring_tag != NULL) { if (sc->alc_rdata.alc_tx_ring_paddr != 0) bus_dmamap_unload(sc->alc_cdata.alc_tx_ring_tag, sc->alc_cdata.alc_tx_ring_map); if (sc->alc_rdata.alc_tx_ring != NULL) bus_dmamem_free(sc->alc_cdata.alc_tx_ring_tag, sc->alc_rdata.alc_tx_ring, sc->alc_cdata.alc_tx_ring_map); sc->alc_rdata.alc_tx_ring_paddr = 0; sc->alc_rdata.alc_tx_ring = NULL; bus_dma_tag_destroy(sc->alc_cdata.alc_tx_ring_tag); sc->alc_cdata.alc_tx_ring_tag = NULL; } /* Rx ring. */ if (sc->alc_cdata.alc_rx_ring_tag != NULL) { if (sc->alc_rdata.alc_rx_ring_paddr != 0) bus_dmamap_unload(sc->alc_cdata.alc_rx_ring_tag, sc->alc_cdata.alc_rx_ring_map); if (sc->alc_rdata.alc_rx_ring != NULL) bus_dmamem_free(sc->alc_cdata.alc_rx_ring_tag, sc->alc_rdata.alc_rx_ring, sc->alc_cdata.alc_rx_ring_map); sc->alc_rdata.alc_rx_ring_paddr = 0; sc->alc_rdata.alc_rx_ring = NULL; bus_dma_tag_destroy(sc->alc_cdata.alc_rx_ring_tag); sc->alc_cdata.alc_rx_ring_tag = NULL; } /* Rx return ring. */ if (sc->alc_cdata.alc_rr_ring_tag != NULL) { if (sc->alc_rdata.alc_rr_ring_paddr != 0) bus_dmamap_unload(sc->alc_cdata.alc_rr_ring_tag, sc->alc_cdata.alc_rr_ring_map); if (sc->alc_rdata.alc_rr_ring != NULL) bus_dmamem_free(sc->alc_cdata.alc_rr_ring_tag, sc->alc_rdata.alc_rr_ring, sc->alc_cdata.alc_rr_ring_map); sc->alc_rdata.alc_rr_ring_paddr = 0; sc->alc_rdata.alc_rr_ring = NULL; bus_dma_tag_destroy(sc->alc_cdata.alc_rr_ring_tag); sc->alc_cdata.alc_rr_ring_tag = NULL; } /* CMB block */ if (sc->alc_cdata.alc_cmb_tag != NULL) { if (sc->alc_rdata.alc_cmb_paddr != 0) bus_dmamap_unload(sc->alc_cdata.alc_cmb_tag, sc->alc_cdata.alc_cmb_map); if (sc->alc_rdata.alc_cmb != NULL) bus_dmamem_free(sc->alc_cdata.alc_cmb_tag, sc->alc_rdata.alc_cmb, sc->alc_cdata.alc_cmb_map); sc->alc_rdata.alc_cmb_paddr = 0; sc->alc_rdata.alc_cmb = NULL; bus_dma_tag_destroy(sc->alc_cdata.alc_cmb_tag); sc->alc_cdata.alc_cmb_tag = NULL; } /* SMB block */ if (sc->alc_cdata.alc_smb_tag != NULL) { if (sc->alc_rdata.alc_smb_paddr != 0) bus_dmamap_unload(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map); if (sc->alc_rdata.alc_smb != NULL) bus_dmamem_free(sc->alc_cdata.alc_smb_tag, sc->alc_rdata.alc_smb, sc->alc_cdata.alc_smb_map); sc->alc_rdata.alc_smb_paddr = 0; sc->alc_rdata.alc_smb = NULL; bus_dma_tag_destroy(sc->alc_cdata.alc_smb_tag); sc->alc_cdata.alc_smb_tag = NULL; } if (sc->alc_cdata.alc_buffer_tag != NULL) { bus_dma_tag_destroy(sc->alc_cdata.alc_buffer_tag); sc->alc_cdata.alc_buffer_tag = NULL; } if (sc->alc_cdata.alc_parent_tag != NULL) { bus_dma_tag_destroy(sc->alc_cdata.alc_parent_tag); sc->alc_cdata.alc_parent_tag = NULL; } } static int alc_shutdown(device_t dev) { return (alc_suspend(dev)); } /* * Note, this driver resets the link speed to 10/100Mbps by * restarting auto-negotiation in suspend/shutdown phase but we * don't know whether that auto-negotiation would succeed or not * as driver has no control after powering off/suspend operation. * If the renegotiation fail WOL may not work. Running at 1Gbps * will draw more power than 375mA at 3.3V which is specified in * PCI specification and that would result in complete * shutdowning power to ethernet controller. * * TODO * Save current negotiated media speed/duplex/flow-control to * softc and restore the same link again after resuming. PHY * handling such as power down/resetting to 100Mbps may be better * handled in suspend method in phy driver. */ static void alc_setlinkspeed(struct alc_softc *sc) { struct mii_data *mii; int aneg, i; mii = device_get_softc(sc->alc_miibus); mii_pollstat(mii); aneg = 0; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch IFM_SUBTYPE(mii->mii_media_active) { case IFM_10_T: case IFM_100_TX: return; case IFM_1000_T: aneg++; break; default: break; } } alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, MII_100T2CR, 0); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, MII_ANAR, ANAR_TX_FD | ANAR_TX | ANAR_10_FD | ANAR_10 | ANAR_CSMA); alc_miibus_writereg(sc->alc_dev, sc->alc_phyaddr, MII_BMCR, BMCR_RESET | BMCR_AUTOEN | BMCR_STARTNEG); DELAY(1000); if (aneg != 0) { /* * Poll link state until alc(4) get a 10/100Mbps link. */ for (i = 0; i < MII_ANEGTICKS_GIGE; i++) { mii_pollstat(mii); if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE( mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: alc_mac_config(sc); return; default: break; } } ALC_UNLOCK(sc); pause("alclnk", hz); ALC_LOCK(sc); } if (i == MII_ANEGTICKS_GIGE) device_printf(sc->alc_dev, "establishing a link failed, WOL may not work!"); } /* * No link, force MAC to have 100Mbps, full-duplex link. * This is the last resort and may/may not work. */ mii->mii_media_status = IFM_AVALID | IFM_ACTIVE; mii->mii_media_active = IFM_ETHER | IFM_100_TX | IFM_FDX; alc_mac_config(sc); } static void alc_setwol(struct alc_softc *sc) { if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) alc_setwol_816x(sc); else alc_setwol_813x(sc); } static void alc_setwol_813x(struct alc_softc *sc) { struct ifnet *ifp; uint32_t reg, pmcs; uint16_t pmstat; ALC_LOCK_ASSERT(sc); alc_disable_l0s_l1(sc); ifp = sc->alc_ifp; if ((sc->alc_flags & ALC_FLAG_PM) == 0) { /* Disable WOL. */ CSR_WRITE_4(sc, ALC_WOL_CFG, 0); reg = CSR_READ_4(sc, ALC_PCIE_PHYMISC); reg |= PCIE_PHYMISC_FORCE_RCV_DET; CSR_WRITE_4(sc, ALC_PCIE_PHYMISC, reg); /* Force PHY power down. */ alc_phy_down(sc); CSR_WRITE_4(sc, ALC_MASTER_CFG, CSR_READ_4(sc, ALC_MASTER_CFG) | MASTER_CLK_SEL_DIS); return; } if ((ifp->if_capenable & IFCAP_WOL) != 0) { if ((sc->alc_flags & ALC_FLAG_FASTETHER) == 0) alc_setlinkspeed(sc); CSR_WRITE_4(sc, ALC_MASTER_CFG, CSR_READ_4(sc, ALC_MASTER_CFG) & ~MASTER_CLK_SEL_DIS); } pmcs = 0; if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) pmcs |= WOL_CFG_MAGIC | WOL_CFG_MAGIC_ENB; CSR_WRITE_4(sc, ALC_WOL_CFG, pmcs); reg = CSR_READ_4(sc, ALC_MAC_CFG); reg &= ~(MAC_CFG_DBG | MAC_CFG_PROMISC | MAC_CFG_ALLMULTI | MAC_CFG_BCAST); if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0) reg |= MAC_CFG_ALLMULTI | MAC_CFG_BCAST; if ((ifp->if_capenable & IFCAP_WOL) != 0) reg |= MAC_CFG_RX_ENB; CSR_WRITE_4(sc, ALC_MAC_CFG, reg); reg = CSR_READ_4(sc, ALC_PCIE_PHYMISC); reg |= PCIE_PHYMISC_FORCE_RCV_DET; CSR_WRITE_4(sc, ALC_PCIE_PHYMISC, reg); if ((ifp->if_capenable & IFCAP_WOL) == 0) { /* WOL disabled, PHY power down. */ alc_phy_down(sc); CSR_WRITE_4(sc, ALC_MASTER_CFG, CSR_READ_4(sc, ALC_MASTER_CFG) | MASTER_CLK_SEL_DIS); } /* Request PME. */ pmstat = pci_read_config(sc->alc_dev, sc->alc_pmcap + PCIR_POWER_STATUS, 2); pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if ((ifp->if_capenable & IFCAP_WOL) != 0) pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(sc->alc_dev, sc->alc_pmcap + PCIR_POWER_STATUS, pmstat, 2); } static void alc_setwol_816x(struct alc_softc *sc) { struct ifnet *ifp; uint32_t gphy, mac, master, pmcs, reg; uint16_t pmstat; ALC_LOCK_ASSERT(sc); ifp = sc->alc_ifp; master = CSR_READ_4(sc, ALC_MASTER_CFG); master &= ~MASTER_CLK_SEL_DIS; gphy = CSR_READ_4(sc, ALC_GPHY_CFG); gphy &= ~(GPHY_CFG_EXT_RESET | GPHY_CFG_LED_MODE | GPHY_CFG_100AB_ENB | GPHY_CFG_PHY_PLL_ON); gphy |= GPHY_CFG_HIB_EN | GPHY_CFG_HIB_PULSE | GPHY_CFG_SEL_ANA_RESET; if ((sc->alc_flags & ALC_FLAG_PM) == 0) { CSR_WRITE_4(sc, ALC_WOL_CFG, 0); gphy |= GPHY_CFG_PHY_IDDQ | GPHY_CFG_PWDOWN_HW; mac = CSR_READ_4(sc, ALC_MAC_CFG); } else { if ((ifp->if_capenable & IFCAP_WOL) != 0) { gphy |= GPHY_CFG_EXT_RESET; if ((sc->alc_flags & ALC_FLAG_FASTETHER) == 0) alc_setlinkspeed(sc); } pmcs = 0; if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) pmcs |= WOL_CFG_MAGIC | WOL_CFG_MAGIC_ENB; CSR_WRITE_4(sc, ALC_WOL_CFG, pmcs); mac = CSR_READ_4(sc, ALC_MAC_CFG); mac &= ~(MAC_CFG_DBG | MAC_CFG_PROMISC | MAC_CFG_ALLMULTI | MAC_CFG_BCAST); if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0) mac |= MAC_CFG_ALLMULTI | MAC_CFG_BCAST; if ((ifp->if_capenable & IFCAP_WOL) != 0) mac |= MAC_CFG_RX_ENB; alc_miiext_writereg(sc, MII_EXT_ANEG, MII_EXT_ANEG_S3DIG10, ANEG_S3DIG10_SL); } /* Enable OSC. */ reg = CSR_READ_4(sc, ALC_MISC); reg &= ~MISC_INTNLOSC_OPEN; CSR_WRITE_4(sc, ALC_MISC, reg); reg |= MISC_INTNLOSC_OPEN; CSR_WRITE_4(sc, ALC_MISC, reg); CSR_WRITE_4(sc, ALC_MASTER_CFG, master); CSR_WRITE_4(sc, ALC_MAC_CFG, mac); CSR_WRITE_4(sc, ALC_GPHY_CFG, gphy); reg = CSR_READ_4(sc, ALC_PDLL_TRNS1); reg |= PDLL_TRNS1_D3PLLOFF_ENB; CSR_WRITE_4(sc, ALC_PDLL_TRNS1, reg); if ((sc->alc_flags & ALC_FLAG_PM) != 0) { /* Request PME. */ pmstat = pci_read_config(sc->alc_dev, sc->alc_pmcap + PCIR_POWER_STATUS, 2); pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if ((ifp->if_capenable & IFCAP_WOL) != 0) pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(sc->alc_dev, sc->alc_pmcap + PCIR_POWER_STATUS, pmstat, 2); } } static int alc_suspend(device_t dev) { struct alc_softc *sc; sc = device_get_softc(dev); ALC_LOCK(sc); alc_stop(sc); alc_setwol(sc); ALC_UNLOCK(sc); return (0); } static int alc_resume(device_t dev) { struct alc_softc *sc; struct ifnet *ifp; uint16_t pmstat; sc = device_get_softc(dev); ALC_LOCK(sc); if ((sc->alc_flags & ALC_FLAG_PM) != 0) { /* Disable PME and clear PME status. */ pmstat = pci_read_config(sc->alc_dev, sc->alc_pmcap + PCIR_POWER_STATUS, 2); if ((pmstat & PCIM_PSTAT_PMEENABLE) != 0) { pmstat &= ~PCIM_PSTAT_PMEENABLE; pci_write_config(sc->alc_dev, sc->alc_pmcap + PCIR_POWER_STATUS, pmstat, 2); } } /* Reset PHY. */ alc_phy_reset(sc); ifp = sc->alc_ifp; if ((ifp->if_flags & IFF_UP) != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; alc_init_locked(sc); } ALC_UNLOCK(sc); return (0); } static int alc_encap(struct alc_softc *sc, struct mbuf **m_head) { struct alc_txdesc *txd, *txd_last; struct tx_desc *desc; struct mbuf *m; struct ip *ip; struct tcphdr *tcp; bus_dma_segment_t txsegs[ALC_MAXTXSEGS]; bus_dmamap_t map; uint32_t cflags, hdrlen, ip_off, poff, vtag; int error, idx, nsegs, prod; ALC_LOCK_ASSERT(sc); M_ASSERTPKTHDR((*m_head)); m = *m_head; ip = NULL; tcp = NULL; ip_off = poff = 0; if ((m->m_pkthdr.csum_flags & (ALC_CSUM_FEATURES | CSUM_TSO)) != 0) { /* * AR81[3567]x requires offset of TCP/UDP header in its * Tx descriptor to perform Tx checksum offloading. TSO * also requires TCP header offset and modification of * IP/TCP header. This kind of operation takes many CPU * cycles on FreeBSD so fast host CPU is required to get * smooth TSO performance. */ struct ether_header *eh; if (M_WRITABLE(m) == 0) { /* Get a writable copy. */ m = m_dup(*m_head, M_NOWAIT); /* Release original mbufs. */ m_freem(*m_head); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } *m_head = m; } ip_off = sizeof(struct ether_header); m = m_pullup(m, ip_off); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } eh = mtod(m, struct ether_header *); /* * Check if hardware VLAN insertion is off. * Additional check for LLC/SNAP frame? */ if (eh->ether_type == htons(ETHERTYPE_VLAN)) { ip_off = sizeof(struct ether_vlan_header); m = m_pullup(m, ip_off); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } } m = m_pullup(m, ip_off + sizeof(struct ip)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m, char *) + ip_off); poff = ip_off + (ip->ip_hl << 2); if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { m = m_pullup(m, poff + sizeof(struct tcphdr)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } tcp = (struct tcphdr *)(mtod(m, char *) + poff); m = m_pullup(m, poff + (tcp->th_off << 2)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } /* * Due to strict adherence of Microsoft NDIS * Large Send specification, hardware expects * a pseudo TCP checksum inserted by upper * stack. Unfortunately the pseudo TCP * checksum that NDIS refers to does not include * TCP payload length so driver should recompute * the pseudo checksum here. Hopefully this * wouldn't be much burden on modern CPUs. * * Reset IP checksum and recompute TCP pseudo * checksum as NDIS specification said. */ ip = (struct ip *)(mtod(m, char *) + ip_off); tcp = (struct tcphdr *)(mtod(m, char *) + poff); ip->ip_sum = 0; tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } *m_head = m; } prod = sc->alc_cdata.alc_tx_prod; txd = &sc->alc_cdata.alc_txdesc[prod]; txd_last = txd; map = txd->tx_dmamap; error = bus_dmamap_load_mbuf_sg(sc->alc_cdata.alc_tx_tag, map, *m_head, txsegs, &nsegs, 0); if (error == EFBIG) { m = m_collapse(*m_head, M_NOWAIT, ALC_MAXTXSEGS); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOMEM); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc->alc_cdata.alc_tx_tag, map, *m_head, txsegs, &nsegs, 0); if (error != 0) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); if (nsegs == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } /* Check descriptor overrun. */ if (sc->alc_cdata.alc_tx_cnt + nsegs >= ALC_TX_RING_CNT - 3) { bus_dmamap_unload(sc->alc_cdata.alc_tx_tag, map); return (ENOBUFS); } bus_dmamap_sync(sc->alc_cdata.alc_tx_tag, map, BUS_DMASYNC_PREWRITE); m = *m_head; cflags = TD_ETHERNET; vtag = 0; desc = NULL; idx = 0; /* Configure VLAN hardware tag insertion. */ if ((m->m_flags & M_VLANTAG) != 0) { vtag = htons(m->m_pkthdr.ether_vtag); vtag = (vtag << TD_VLAN_SHIFT) & TD_VLAN_MASK; cflags |= TD_INS_VLAN_TAG; } if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { /* Request TSO and set MSS. */ cflags |= TD_TSO | TD_TSO_DESCV1; cflags |= ((uint32_t)m->m_pkthdr.tso_segsz << TD_MSS_SHIFT) & TD_MSS_MASK; /* Set TCP header offset. */ cflags |= (poff << TD_TCPHDR_OFFSET_SHIFT) & TD_TCPHDR_OFFSET_MASK; /* * AR81[3567]x requires the first buffer should * only hold IP/TCP header data. Payload should * be handled in other descriptors. */ hdrlen = poff + (tcp->th_off << 2); desc = &sc->alc_rdata.alc_tx_ring[prod]; desc->len = htole32(TX_BYTES(hdrlen | vtag)); desc->flags = htole32(cflags); desc->addr = htole64(txsegs[0].ds_addr); sc->alc_cdata.alc_tx_cnt++; ALC_DESC_INC(prod, ALC_TX_RING_CNT); if (m->m_len - hdrlen > 0) { /* Handle remaining payload of the first fragment. */ desc = &sc->alc_rdata.alc_tx_ring[prod]; desc->len = htole32(TX_BYTES((m->m_len - hdrlen) | vtag)); desc->flags = htole32(cflags); desc->addr = htole64(txsegs[0].ds_addr + hdrlen); sc->alc_cdata.alc_tx_cnt++; ALC_DESC_INC(prod, ALC_TX_RING_CNT); } /* Handle remaining fragments. */ idx = 1; } else if ((m->m_pkthdr.csum_flags & ALC_CSUM_FEATURES) != 0) { /* Configure Tx checksum offload. */ #ifdef ALC_USE_CUSTOM_CSUM cflags |= TD_CUSTOM_CSUM; /* Set checksum start offset. */ cflags |= ((poff >> 1) << TD_PLOAD_OFFSET_SHIFT) & TD_PLOAD_OFFSET_MASK; /* Set checksum insertion position of TCP/UDP. */ cflags |= (((poff + m->m_pkthdr.csum_data) >> 1) << TD_CUSTOM_CSUM_OFFSET_SHIFT) & TD_CUSTOM_CSUM_OFFSET_MASK; #else if ((m->m_pkthdr.csum_flags & CSUM_IP) != 0) cflags |= TD_IPCSUM; if ((m->m_pkthdr.csum_flags & CSUM_TCP) != 0) cflags |= TD_TCPCSUM; if ((m->m_pkthdr.csum_flags & CSUM_UDP) != 0) cflags |= TD_UDPCSUM; /* Set TCP/UDP header offset. */ cflags |= (poff << TD_L4HDR_OFFSET_SHIFT) & TD_L4HDR_OFFSET_MASK; #endif } for (; idx < nsegs; idx++) { desc = &sc->alc_rdata.alc_tx_ring[prod]; desc->len = htole32(TX_BYTES(txsegs[idx].ds_len) | vtag); desc->flags = htole32(cflags); desc->addr = htole64(txsegs[idx].ds_addr); sc->alc_cdata.alc_tx_cnt++; ALC_DESC_INC(prod, ALC_TX_RING_CNT); } /* Update producer index. */ sc->alc_cdata.alc_tx_prod = prod; /* Finally set EOP on the last descriptor. */ prod = (prod + ALC_TX_RING_CNT - 1) % ALC_TX_RING_CNT; desc = &sc->alc_rdata.alc_tx_ring[prod]; desc->flags |= htole32(TD_EOP); /* Swap dmamap of the first and the last. */ txd = &sc->alc_cdata.alc_txdesc[prod]; map = txd_last->tx_dmamap; txd_last->tx_dmamap = txd->tx_dmamap; txd->tx_dmamap = map; txd->tx_m = m; return (0); } static void alc_start(struct ifnet *ifp) { struct alc_softc *sc; sc = ifp->if_softc; ALC_LOCK(sc); alc_start_locked(ifp); ALC_UNLOCK(sc); } static void alc_start_locked(struct ifnet *ifp) { struct alc_softc *sc; struct mbuf *m_head; int enq; sc = ifp->if_softc; ALC_LOCK_ASSERT(sc); /* Reclaim transmitted frames. */ if (sc->alc_cdata.alc_tx_cnt >= ALC_TX_DESC_HIWAT) alc_txeof(sc); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (sc->alc_flags & ALC_FLAG_LINK) == 0) return; for (enq = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd); ) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Pack the data into the transmit ring. If we * don't have room, set the OACTIVE flag and wait * for the NIC to drain the ring. */ if (alc_encap(sc, &m_head)) { if (m_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } enq++; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ ETHER_BPF_MTAP(ifp, m_head); } if (enq > 0) alc_start_tx(sc); } static void alc_start_tx(struct alc_softc *sc) { /* Sync descriptors. */ bus_dmamap_sync(sc->alc_cdata.alc_tx_ring_tag, sc->alc_cdata.alc_tx_ring_map, BUS_DMASYNC_PREWRITE); /* Kick. Assume we're using normal Tx priority queue. */ if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) CSR_WRITE_2(sc, ALC_MBOX_TD_PRI0_PROD_IDX, (uint16_t)sc->alc_cdata.alc_tx_prod); else CSR_WRITE_4(sc, ALC_MBOX_TD_PROD_IDX, (sc->alc_cdata.alc_tx_prod << MBOX_TD_PROD_LO_IDX_SHIFT) & MBOX_TD_PROD_LO_IDX_MASK); /* Set a timeout in case the chip goes out to lunch. */ sc->alc_watchdog_timer = ALC_TX_TIMEOUT; } static void alc_watchdog(struct alc_softc *sc) { struct ifnet *ifp; ALC_LOCK_ASSERT(sc); if (sc->alc_watchdog_timer == 0 || --sc->alc_watchdog_timer) return; ifp = sc->alc_ifp; if ((sc->alc_flags & ALC_FLAG_LINK) == 0) { if_printf(sc->alc_ifp, "watchdog timeout (lost link)\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; alc_init_locked(sc); return; } if_printf(sc->alc_ifp, "watchdog timeout -- resetting\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; alc_init_locked(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) alc_start_locked(ifp); } static int alc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct alc_softc *sc; struct ifreq *ifr; struct mii_data *mii; int error, mask; sc = ifp->if_softc; ifr = (struct ifreq *)data; error = 0; switch (cmd) { case SIOCSIFMTU: if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > (sc->alc_ident->max_framelen - sizeof(struct ether_vlan_header) - ETHER_CRC_LEN) || ((sc->alc_flags & ALC_FLAG_JUMBO) == 0 && ifr->ifr_mtu > ETHERMTU)) error = EINVAL; else if (ifp->if_mtu != ifr->ifr_mtu) { ALC_LOCK(sc); ifp->if_mtu = ifr->ifr_mtu; /* AR81[3567]x has 13 bits MSS field. */ if (ifp->if_mtu > ALC_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; VLAN_CAPABILITIES(ifp); } ALC_UNLOCK(sc); } break; case SIOCSIFFLAGS: ALC_LOCK(sc); if ((ifp->if_flags & IFF_UP) != 0) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && ((ifp->if_flags ^ sc->alc_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) != 0) alc_rxfilter(sc); else alc_init_locked(sc); } else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) alc_stop(sc); sc->alc_if_flags = ifp->if_flags; ALC_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: ALC_LOCK(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) alc_rxfilter(sc); ALC_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: mii = device_get_softc(sc->alc_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, cmd); break; case SIOCSIFCAP: ALC_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if ((mask & IFCAP_TXCSUM) != 0 && (ifp->if_capabilities & IFCAP_TXCSUM) != 0) { ifp->if_capenable ^= IFCAP_TXCSUM; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist |= ALC_CSUM_FEATURES; else ifp->if_hwassist &= ~ALC_CSUM_FEATURES; } if ((mask & IFCAP_TSO4) != 0 && (ifp->if_capabilities & IFCAP_TSO4) != 0) { ifp->if_capenable ^= IFCAP_TSO4; if ((ifp->if_capenable & IFCAP_TSO4) != 0) { /* AR81[3567]x has 13 bits MSS field. */ if (ifp->if_mtu > ALC_TSO_MTU) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; } else ifp->if_hwassist |= CSUM_TSO; } else ifp->if_hwassist &= ~CSUM_TSO; } if ((mask & IFCAP_WOL_MCAST) != 0 && (ifp->if_capabilities & IFCAP_WOL_MCAST) != 0) ifp->if_capenable ^= IFCAP_WOL_MCAST; if ((mask & IFCAP_WOL_MAGIC) != 0 && (ifp->if_capabilities & IFCAP_WOL_MAGIC) != 0) ifp->if_capenable ^= IFCAP_WOL_MAGIC; if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; alc_rxvlan(sc); } if ((mask & IFCAP_VLAN_HWCSUM) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWCSUM) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; if ((mask & IFCAP_VLAN_HWTSO) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) ifp->if_capenable &= ~(IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM); ALC_UNLOCK(sc); VLAN_CAPABILITIES(ifp); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void alc_mac_config(struct alc_softc *sc) { struct mii_data *mii; uint32_t reg; ALC_LOCK_ASSERT(sc); mii = device_get_softc(sc->alc_miibus); reg = CSR_READ_4(sc, ALC_MAC_CFG); reg &= ~(MAC_CFG_FULL_DUPLEX | MAC_CFG_TX_FC | MAC_CFG_RX_FC | MAC_CFG_SPEED_MASK); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151_V2 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B2) reg |= MAC_CFG_HASH_ALG_CRC32 | MAC_CFG_SPEED_MODE_SW; /* Reprogram MAC with resolved speed/duplex. */ switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: reg |= MAC_CFG_SPEED_10_100; break; case IFM_1000_T: reg |= MAC_CFG_SPEED_1000; break; } if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) { reg |= MAC_CFG_FULL_DUPLEX; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_TXPAUSE) != 0) reg |= MAC_CFG_TX_FC; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) reg |= MAC_CFG_RX_FC; } CSR_WRITE_4(sc, ALC_MAC_CFG, reg); } static void alc_stats_clear(struct alc_softc *sc) { struct smb sb, *smb; uint32_t *reg; int i; if ((sc->alc_flags & ALC_FLAG_SMB_BUG) == 0) { bus_dmamap_sync(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); smb = sc->alc_rdata.alc_smb; /* Update done, clear. */ smb->updated = 0; bus_dmamap_sync(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } else { for (reg = &sb.rx_frames, i = 0; reg <= &sb.rx_pkts_filtered; reg++) { CSR_READ_4(sc, ALC_RX_MIB_BASE + i); i += sizeof(uint32_t); } /* Read Tx statistics. */ for (reg = &sb.tx_frames, i = 0; reg <= &sb.tx_mcast_bytes; reg++) { CSR_READ_4(sc, ALC_TX_MIB_BASE + i); i += sizeof(uint32_t); } } } static void alc_stats_update(struct alc_softc *sc) { struct alc_hw_stats *stat; struct smb sb, *smb; struct ifnet *ifp; uint32_t *reg; int i; ALC_LOCK_ASSERT(sc); ifp = sc->alc_ifp; stat = &sc->alc_stats; if ((sc->alc_flags & ALC_FLAG_SMB_BUG) == 0) { bus_dmamap_sync(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); smb = sc->alc_rdata.alc_smb; if (smb->updated == 0) return; } else { smb = &sb; /* Read Rx statistics. */ for (reg = &sb.rx_frames, i = 0; reg <= &sb.rx_pkts_filtered; reg++) { *reg = CSR_READ_4(sc, ALC_RX_MIB_BASE + i); i += sizeof(uint32_t); } /* Read Tx statistics. */ for (reg = &sb.tx_frames, i = 0; reg <= &sb.tx_mcast_bytes; reg++) { *reg = CSR_READ_4(sc, ALC_TX_MIB_BASE + i); i += sizeof(uint32_t); } } /* Rx stats. */ stat->rx_frames += smb->rx_frames; stat->rx_bcast_frames += smb->rx_bcast_frames; stat->rx_mcast_frames += smb->rx_mcast_frames; stat->rx_pause_frames += smb->rx_pause_frames; stat->rx_control_frames += smb->rx_control_frames; stat->rx_crcerrs += smb->rx_crcerrs; stat->rx_lenerrs += smb->rx_lenerrs; stat->rx_bytes += smb->rx_bytes; stat->rx_runts += smb->rx_runts; stat->rx_fragments += smb->rx_fragments; stat->rx_pkts_64 += smb->rx_pkts_64; stat->rx_pkts_65_127 += smb->rx_pkts_65_127; stat->rx_pkts_128_255 += smb->rx_pkts_128_255; stat->rx_pkts_256_511 += smb->rx_pkts_256_511; stat->rx_pkts_512_1023 += smb->rx_pkts_512_1023; stat->rx_pkts_1024_1518 += smb->rx_pkts_1024_1518; stat->rx_pkts_1519_max += smb->rx_pkts_1519_max; stat->rx_pkts_truncated += smb->rx_pkts_truncated; stat->rx_fifo_oflows += smb->rx_fifo_oflows; stat->rx_rrs_errs += smb->rx_rrs_errs; stat->rx_alignerrs += smb->rx_alignerrs; stat->rx_bcast_bytes += smb->rx_bcast_bytes; stat->rx_mcast_bytes += smb->rx_mcast_bytes; stat->rx_pkts_filtered += smb->rx_pkts_filtered; /* Tx stats. */ stat->tx_frames += smb->tx_frames; stat->tx_bcast_frames += smb->tx_bcast_frames; stat->tx_mcast_frames += smb->tx_mcast_frames; stat->tx_pause_frames += smb->tx_pause_frames; stat->tx_excess_defer += smb->tx_excess_defer; stat->tx_control_frames += smb->tx_control_frames; stat->tx_deferred += smb->tx_deferred; stat->tx_bytes += smb->tx_bytes; stat->tx_pkts_64 += smb->tx_pkts_64; stat->tx_pkts_65_127 += smb->tx_pkts_65_127; stat->tx_pkts_128_255 += smb->tx_pkts_128_255; stat->tx_pkts_256_511 += smb->tx_pkts_256_511; stat->tx_pkts_512_1023 += smb->tx_pkts_512_1023; stat->tx_pkts_1024_1518 += smb->tx_pkts_1024_1518; stat->tx_pkts_1519_max += smb->tx_pkts_1519_max; stat->tx_single_colls += smb->tx_single_colls; stat->tx_multi_colls += smb->tx_multi_colls; stat->tx_late_colls += smb->tx_late_colls; stat->tx_excess_colls += smb->tx_excess_colls; stat->tx_underrun += smb->tx_underrun; stat->tx_desc_underrun += smb->tx_desc_underrun; stat->tx_lenerrs += smb->tx_lenerrs; stat->tx_pkts_truncated += smb->tx_pkts_truncated; stat->tx_bcast_bytes += smb->tx_bcast_bytes; stat->tx_mcast_bytes += smb->tx_mcast_bytes; /* Update counters in ifnet. */ if_inc_counter(ifp, IFCOUNTER_OPACKETS, smb->tx_frames); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, smb->tx_single_colls + smb->tx_multi_colls * 2 + smb->tx_late_colls + smb->tx_excess_colls * HDPX_CFG_RETRY_DEFAULT); if_inc_counter(ifp, IFCOUNTER_OERRORS, smb->tx_late_colls + smb->tx_excess_colls + smb->tx_underrun + smb->tx_pkts_truncated); if_inc_counter(ifp, IFCOUNTER_IPACKETS, smb->rx_frames); if_inc_counter(ifp, IFCOUNTER_IERRORS, smb->rx_crcerrs + smb->rx_lenerrs + smb->rx_runts + smb->rx_pkts_truncated + smb->rx_fifo_oflows + smb->rx_rrs_errs + smb->rx_alignerrs); if ((sc->alc_flags & ALC_FLAG_SMB_BUG) == 0) { /* Update done, clear. */ smb->updated = 0; bus_dmamap_sync(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } } static int alc_intr(void *arg) { struct alc_softc *sc; uint32_t status; sc = (struct alc_softc *)arg; status = CSR_READ_4(sc, ALC_INTR_STATUS); if ((status & ALC_INTRS) == 0) return (FILTER_STRAY); /* Disable interrupts. */ CSR_WRITE_4(sc, ALC_INTR_STATUS, INTR_DIS_INT); taskqueue_enqueue(sc->alc_tq, &sc->alc_int_task); return (FILTER_HANDLED); } static void alc_int_task(void *arg, int pending) { struct alc_softc *sc; struct ifnet *ifp; uint32_t status; int more; sc = (struct alc_softc *)arg; ifp = sc->alc_ifp; status = CSR_READ_4(sc, ALC_INTR_STATUS); ALC_LOCK(sc); if (sc->alc_morework != 0) { sc->alc_morework = 0; status |= INTR_RX_PKT; } if ((status & ALC_INTRS) == 0) goto done; /* Acknowledge interrupts but still disable interrupts. */ CSR_WRITE_4(sc, ALC_INTR_STATUS, status | INTR_DIS_INT); more = 0; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if ((status & INTR_RX_PKT) != 0) { more = alc_rxintr(sc, sc->alc_process_limit); if (more == EAGAIN) sc->alc_morework = 1; else if (more == EIO) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; alc_init_locked(sc); ALC_UNLOCK(sc); return; } } if ((status & (INTR_DMA_RD_TO_RST | INTR_DMA_WR_TO_RST | INTR_TXQ_TO_RST)) != 0) { if ((status & INTR_DMA_RD_TO_RST) != 0) device_printf(sc->alc_dev, "DMA read error! -- resetting\n"); if ((status & INTR_DMA_WR_TO_RST) != 0) device_printf(sc->alc_dev, "DMA write error! -- resetting\n"); if ((status & INTR_TXQ_TO_RST) != 0) device_printf(sc->alc_dev, "TxQ reset! -- resetting\n"); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; alc_init_locked(sc); ALC_UNLOCK(sc); return; } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) alc_start_locked(ifp); } if (more == EAGAIN || (CSR_READ_4(sc, ALC_INTR_STATUS) & ALC_INTRS) != 0) { ALC_UNLOCK(sc); taskqueue_enqueue(sc->alc_tq, &sc->alc_int_task); return; } done: if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { /* Re-enable interrupts if we're running. */ CSR_WRITE_4(sc, ALC_INTR_STATUS, 0x7FFFFFFF); } ALC_UNLOCK(sc); } static void alc_txeof(struct alc_softc *sc) { struct ifnet *ifp; struct alc_txdesc *txd; uint32_t cons, prod; int prog; ALC_LOCK_ASSERT(sc); ifp = sc->alc_ifp; if (sc->alc_cdata.alc_tx_cnt == 0) return; bus_dmamap_sync(sc->alc_cdata.alc_tx_ring_tag, sc->alc_cdata.alc_tx_ring_map, BUS_DMASYNC_POSTWRITE); if ((sc->alc_flags & ALC_FLAG_CMB_BUG) == 0) { bus_dmamap_sync(sc->alc_cdata.alc_cmb_tag, sc->alc_cdata.alc_cmb_map, BUS_DMASYNC_POSTREAD); prod = sc->alc_rdata.alc_cmb->cons; } else { if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) prod = CSR_READ_2(sc, ALC_MBOX_TD_PRI0_CONS_IDX); else { prod = CSR_READ_4(sc, ALC_MBOX_TD_CONS_IDX); /* Assume we're using normal Tx priority queue. */ prod = (prod & MBOX_TD_CONS_LO_IDX_MASK) >> MBOX_TD_CONS_LO_IDX_SHIFT; } } cons = sc->alc_cdata.alc_tx_cons; /* * Go through our Tx list and free mbufs for those * frames which have been transmitted. */ for (prog = 0; cons != prod; prog++, ALC_DESC_INC(cons, ALC_TX_RING_CNT)) { if (sc->alc_cdata.alc_tx_cnt <= 0) break; prog++; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->alc_cdata.alc_tx_cnt--; txd = &sc->alc_cdata.alc_txdesc[cons]; if (txd->tx_m != NULL) { /* Reclaim transmitted mbufs. */ bus_dmamap_sync(sc->alc_cdata.alc_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->alc_cdata.alc_tx_tag, txd->tx_dmamap); m_freem(txd->tx_m); txd->tx_m = NULL; } } if ((sc->alc_flags & ALC_FLAG_CMB_BUG) == 0) bus_dmamap_sync(sc->alc_cdata.alc_cmb_tag, sc->alc_cdata.alc_cmb_map, BUS_DMASYNC_PREREAD); sc->alc_cdata.alc_tx_cons = cons; /* * Unarm watchdog timer only when there is no pending * frames in Tx queue. */ if (sc->alc_cdata.alc_tx_cnt == 0) sc->alc_watchdog_timer = 0; } static int alc_newbuf(struct alc_softc *sc, struct alc_rxdesc *rxd) { struct mbuf *m; bus_dma_segment_t segs[1]; bus_dmamap_t map; int nsegs; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = RX_BUF_SIZE_MAX; #ifndef __NO_STRICT_ALIGNMENT m_adj(m, sizeof(uint64_t)); #endif if (bus_dmamap_load_mbuf_sg(sc->alc_cdata.alc_rx_tag, sc->alc_cdata.alc_rx_sparemap, m, segs, &nsegs, 0) != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->alc_cdata.alc_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->alc_cdata.alc_rx_tag, rxd->rx_dmamap); } map = rxd->rx_dmamap; rxd->rx_dmamap = sc->alc_cdata.alc_rx_sparemap; sc->alc_cdata.alc_rx_sparemap = map; bus_dmamap_sync(sc->alc_cdata.alc_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); rxd->rx_m = m; rxd->rx_desc->addr = htole64(segs[0].ds_addr); return (0); } static int alc_rxintr(struct alc_softc *sc, int count) { struct ifnet *ifp; struct rx_rdesc *rrd; uint32_t nsegs, status; int rr_cons, prog; bus_dmamap_sync(sc->alc_cdata.alc_rr_ring_tag, sc->alc_cdata.alc_rr_ring_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_sync(sc->alc_cdata.alc_rx_ring_tag, sc->alc_cdata.alc_rx_ring_map, BUS_DMASYNC_POSTWRITE); rr_cons = sc->alc_cdata.alc_rr_cons; ifp = sc->alc_ifp; for (prog = 0; (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0;) { if (count-- <= 0) break; rrd = &sc->alc_rdata.alc_rr_ring[rr_cons]; status = le32toh(rrd->status); if ((status & RRD_VALID) == 0) break; nsegs = RRD_RD_CNT(le32toh(rrd->rdinfo)); if (nsegs == 0) { /* This should not happen! */ device_printf(sc->alc_dev, "unexpected segment count -- resetting\n"); return (EIO); } alc_rxeof(sc, rrd); /* Clear Rx return status. */ rrd->status = 0; ALC_DESC_INC(rr_cons, ALC_RR_RING_CNT); sc->alc_cdata.alc_rx_cons += nsegs; sc->alc_cdata.alc_rx_cons %= ALC_RR_RING_CNT; prog += nsegs; } if (prog > 0) { /* Update the consumer index. */ sc->alc_cdata.alc_rr_cons = rr_cons; /* Sync Rx return descriptors. */ bus_dmamap_sync(sc->alc_cdata.alc_rr_ring_tag, sc->alc_cdata.alc_rr_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Sync updated Rx descriptors such that controller see * modified buffer addresses. */ bus_dmamap_sync(sc->alc_cdata.alc_rx_ring_tag, sc->alc_cdata.alc_rx_ring_map, BUS_DMASYNC_PREWRITE); /* * Let controller know availability of new Rx buffers. * Since alc(4) use RXQ_CFG_RD_BURST_DEFAULT descriptors * it may be possible to update ALC_MBOX_RD0_PROD_IDX * only when Rx buffer pre-fetching is required. In * addition we already set ALC_RX_RD_FREE_THRESH to * RX_RD_FREE_THRESH_LO_DEFAULT descriptors. However * it still seems that pre-fetching needs more * experimentation. */ if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) CSR_WRITE_2(sc, ALC_MBOX_RD0_PROD_IDX, (uint16_t)sc->alc_cdata.alc_rx_cons); else CSR_WRITE_4(sc, ALC_MBOX_RD0_PROD_IDX, sc->alc_cdata.alc_rx_cons); } return (count > 0 ? 0 : EAGAIN); } #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * alc_fixup_rx(struct ifnet *ifp, struct mbuf *m) { struct mbuf *n; int i; uint16_t *src, *dst; src = mtod(m, uint16_t *); dst = src - 3; if (m->m_next == NULL) { for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++) *dst++ = *src++; m->m_data -= 6; return (m); } /* * Append a new mbuf to received mbuf chain and copy ethernet * header from the mbuf chain. This can save lots of CPU * cycles for jumbo frame. */ MGETHDR(n, M_NOWAIT, MT_DATA); if (n == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); m_freem(m); return (NULL); } bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; return (n); } #endif /* Receive a frame. */ static void alc_rxeof(struct alc_softc *sc, struct rx_rdesc *rrd) { struct alc_rxdesc *rxd; struct ifnet *ifp; struct mbuf *mp, *m; uint32_t rdinfo, status, vtag; int count, nsegs, rx_cons; ifp = sc->alc_ifp; status = le32toh(rrd->status); rdinfo = le32toh(rrd->rdinfo); rx_cons = RRD_RD_IDX(rdinfo); nsegs = RRD_RD_CNT(rdinfo); sc->alc_cdata.alc_rxlen = RRD_BYTES(status); if ((status & (RRD_ERR_SUM | RRD_ERR_LENGTH)) != 0) { /* * We want to pass the following frames to upper * layer regardless of error status of Rx return * ring. * * o IP/TCP/UDP checksum is bad. * o frame length and protocol specific length * does not match. * * Force network stack compute checksum for * errored frames. */ status |= RRD_TCP_UDPCSUM_NOK | RRD_IPCSUM_NOK; if ((status & (RRD_ERR_CRC | RRD_ERR_ALIGN | RRD_ERR_TRUNC | RRD_ERR_RUNT)) != 0) return; } for (count = 0; count < nsegs; count++, ALC_DESC_INC(rx_cons, ALC_RX_RING_CNT)) { rxd = &sc->alc_cdata.alc_rxdesc[rx_cons]; mp = rxd->rx_m; /* Add a new receive buffer to the ring. */ if (alc_newbuf(sc, rxd) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); /* Reuse Rx buffers. */ if (sc->alc_cdata.alc_rxhead != NULL) m_freem(sc->alc_cdata.alc_rxhead); break; } /* * Assume we've received a full sized frame. * Actual size is fixed when we encounter the end of * multi-segmented frame. */ mp->m_len = sc->alc_buf_size; /* Chain received mbufs. */ if (sc->alc_cdata.alc_rxhead == NULL) { sc->alc_cdata.alc_rxhead = mp; sc->alc_cdata.alc_rxtail = mp; } else { mp->m_flags &= ~M_PKTHDR; sc->alc_cdata.alc_rxprev_tail = sc->alc_cdata.alc_rxtail; sc->alc_cdata.alc_rxtail->m_next = mp; sc->alc_cdata.alc_rxtail = mp; } if (count == nsegs - 1) { /* Last desc. for this frame. */ m = sc->alc_cdata.alc_rxhead; m->m_flags |= M_PKTHDR; /* * It seems that L1C/L2C controller has no way * to tell hardware to strip CRC bytes. */ m->m_pkthdr.len = sc->alc_cdata.alc_rxlen - ETHER_CRC_LEN; if (nsegs > 1) { /* Set last mbuf size. */ mp->m_len = sc->alc_cdata.alc_rxlen - (nsegs - 1) * sc->alc_buf_size; /* Remove the CRC bytes in chained mbufs. */ if (mp->m_len <= ETHER_CRC_LEN) { sc->alc_cdata.alc_rxtail = sc->alc_cdata.alc_rxprev_tail; sc->alc_cdata.alc_rxtail->m_len -= (ETHER_CRC_LEN - mp->m_len); sc->alc_cdata.alc_rxtail->m_next = NULL; m_freem(mp); } else { mp->m_len -= ETHER_CRC_LEN; } } else m->m_len = m->m_pkthdr.len; m->m_pkthdr.rcvif = ifp; /* * Due to hardware bugs, Rx checksum offloading * was intentionally disabled. */ if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (status & RRD_VLAN_TAG) != 0) { vtag = RRD_VLAN(le32toh(rrd->vtag)); m->m_pkthdr.ether_vtag = ntohs(vtag); m->m_flags |= M_VLANTAG; } #ifndef __NO_STRICT_ALIGNMENT m = alc_fixup_rx(ifp, m); if (m != NULL) #endif { /* Pass it on. */ ALC_UNLOCK(sc); (*ifp->if_input)(ifp, m); ALC_LOCK(sc); } } } /* Reset mbuf chains. */ ALC_RXCHAIN_RESET(sc); } static void alc_tick(void *arg) { struct alc_softc *sc; struct mii_data *mii; sc = (struct alc_softc *)arg; ALC_LOCK_ASSERT(sc); mii = device_get_softc(sc->alc_miibus); mii_tick(mii); alc_stats_update(sc); /* * alc(4) does not rely on Tx completion interrupts to reclaim * transferred buffers. Instead Tx completion interrupts are * used to hint for scheduling Tx task. So it's necessary to * release transmitted buffers by kicking Tx completion * handler. This limits the maximum reclamation delay to a hz. */ alc_txeof(sc); alc_watchdog(sc); callout_reset(&sc->alc_tick_ch, hz, alc_tick, sc); } static void alc_osc_reset(struct alc_softc *sc) { uint32_t reg; reg = CSR_READ_4(sc, ALC_MISC3); reg &= ~MISC3_25M_BY_SW; reg |= MISC3_25M_NOTO_INTNL; CSR_WRITE_4(sc, ALC_MISC3, reg); reg = CSR_READ_4(sc, ALC_MISC); if (AR816X_REV(sc->alc_rev) >= AR816X_REV_B0) { /* * Restore over-current protection default value. * This value could be reset by MAC reset. */ reg &= ~MISC_PSW_OCP_MASK; reg |= (MISC_PSW_OCP_DEFAULT << MISC_PSW_OCP_SHIFT); reg &= ~MISC_INTNLOSC_OPEN; CSR_WRITE_4(sc, ALC_MISC, reg); CSR_WRITE_4(sc, ALC_MISC, reg | MISC_INTNLOSC_OPEN); reg = CSR_READ_4(sc, ALC_MISC2); reg &= ~MISC2_CALB_START; CSR_WRITE_4(sc, ALC_MISC2, reg); CSR_WRITE_4(sc, ALC_MISC2, reg | MISC2_CALB_START); } else { reg &= ~MISC_INTNLOSC_OPEN; /* Disable isolate for revision A devices. */ if (AR816X_REV(sc->alc_rev) <= AR816X_REV_A1) reg &= ~MISC_ISO_ENB; CSR_WRITE_4(sc, ALC_MISC, reg | MISC_INTNLOSC_OPEN); CSR_WRITE_4(sc, ALC_MISC, reg); } DELAY(20); } static void alc_reset(struct alc_softc *sc) { uint32_t pmcfg, reg; int i; pmcfg = 0; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { /* Reset workaround. */ CSR_WRITE_4(sc, ALC_MBOX_RD0_PROD_IDX, 1); if (AR816X_REV(sc->alc_rev) <= AR816X_REV_A1 && (sc->alc_rev & 0x01) != 0) { /* Disable L0s/L1s before reset. */ pmcfg = CSR_READ_4(sc, ALC_PM_CFG); if ((pmcfg & (PM_CFG_ASPM_L0S_ENB | PM_CFG_ASPM_L1_ENB)) != 0) { pmcfg &= ~(PM_CFG_ASPM_L0S_ENB | PM_CFG_ASPM_L1_ENB); CSR_WRITE_4(sc, ALC_PM_CFG, pmcfg); } } } reg = CSR_READ_4(sc, ALC_MASTER_CFG); reg |= MASTER_OOB_DIS_OFF | MASTER_RESET; CSR_WRITE_4(sc, ALC_MASTER_CFG, reg); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { for (i = ALC_RESET_TIMEOUT; i > 0; i--) { DELAY(10); if (CSR_READ_4(sc, ALC_MBOX_RD0_PROD_IDX) == 0) break; } if (i == 0) device_printf(sc->alc_dev, "MAC reset timeout!\n"); } for (i = ALC_RESET_TIMEOUT; i > 0; i--) { DELAY(10); if ((CSR_READ_4(sc, ALC_MASTER_CFG) & MASTER_RESET) == 0) break; } if (i == 0) device_printf(sc->alc_dev, "master reset timeout!\n"); for (i = ALC_RESET_TIMEOUT; i > 0; i--) { reg = CSR_READ_4(sc, ALC_IDLE_STATUS); if ((reg & (IDLE_STATUS_RXMAC | IDLE_STATUS_TXMAC | IDLE_STATUS_RXQ | IDLE_STATUS_TXQ)) == 0) break; DELAY(10); } if (i == 0) device_printf(sc->alc_dev, "reset timeout(0x%08x)!\n", reg); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { if (AR816X_REV(sc->alc_rev) <= AR816X_REV_A1 && (sc->alc_rev & 0x01) != 0) { reg = CSR_READ_4(sc, ALC_MASTER_CFG); reg |= MASTER_CLK_SEL_DIS; CSR_WRITE_4(sc, ALC_MASTER_CFG, reg); /* Restore L0s/L1s config. */ if ((pmcfg & (PM_CFG_ASPM_L0S_ENB | PM_CFG_ASPM_L1_ENB)) != 0) CSR_WRITE_4(sc, ALC_PM_CFG, pmcfg); } alc_osc_reset(sc); reg = CSR_READ_4(sc, ALC_MISC3); reg &= ~MISC3_25M_BY_SW; reg |= MISC3_25M_NOTO_INTNL; CSR_WRITE_4(sc, ALC_MISC3, reg); reg = CSR_READ_4(sc, ALC_MISC); reg &= ~MISC_INTNLOSC_OPEN; if (AR816X_REV(sc->alc_rev) <= AR816X_REV_A1) reg &= ~MISC_ISO_ENB; CSR_WRITE_4(sc, ALC_MISC, reg); DELAY(20); } if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151_V2) CSR_WRITE_4(sc, ALC_SERDES_LOCK, CSR_READ_4(sc, ALC_SERDES_LOCK) | SERDES_MAC_CLK_SLOWDOWN | SERDES_PHY_CLK_SLOWDOWN); } static void alc_init(void *xsc) { struct alc_softc *sc; sc = (struct alc_softc *)xsc; ALC_LOCK(sc); alc_init_locked(sc); ALC_UNLOCK(sc); } static void alc_init_locked(struct alc_softc *sc) { struct ifnet *ifp; struct mii_data *mii; uint8_t eaddr[ETHER_ADDR_LEN]; bus_addr_t paddr; uint32_t reg, rxf_hi, rxf_lo; ALC_LOCK_ASSERT(sc); ifp = sc->alc_ifp; mii = device_get_softc(sc->alc_miibus); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) return; /* * Cancel any pending I/O. */ alc_stop(sc); /* * Reset the chip to a known state. */ alc_reset(sc); /* Initialize Rx descriptors. */ if (alc_init_rx_ring(sc) != 0) { device_printf(sc->alc_dev, "no memory for Rx buffers.\n"); alc_stop(sc); return; } alc_init_rr_ring(sc); alc_init_tx_ring(sc); alc_init_cmb(sc); alc_init_smb(sc); /* Enable all clocks. */ if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { CSR_WRITE_4(sc, ALC_CLK_GATING_CFG, CLK_GATING_DMAW_ENB | CLK_GATING_DMAR_ENB | CLK_GATING_TXQ_ENB | CLK_GATING_RXQ_ENB | CLK_GATING_TXMAC_ENB | CLK_GATING_RXMAC_ENB); if (AR816X_REV(sc->alc_rev) >= AR816X_REV_B0) CSR_WRITE_4(sc, ALC_IDLE_DECISN_TIMER, IDLE_DECISN_TIMER_DEFAULT_1MS); } else CSR_WRITE_4(sc, ALC_CLK_GATING_CFG, 0); /* Reprogram the station address. */ bcopy(IF_LLADDR(ifp), eaddr, ETHER_ADDR_LEN); CSR_WRITE_4(sc, ALC_PAR0, eaddr[2] << 24 | eaddr[3] << 16 | eaddr[4] << 8 | eaddr[5]); CSR_WRITE_4(sc, ALC_PAR1, eaddr[0] << 8 | eaddr[1]); /* * Clear WOL status and disable all WOL feature as WOL * would interfere Rx operation under normal environments. */ CSR_READ_4(sc, ALC_WOL_CFG); CSR_WRITE_4(sc, ALC_WOL_CFG, 0); /* Set Tx descriptor base addresses. */ paddr = sc->alc_rdata.alc_tx_ring_paddr; CSR_WRITE_4(sc, ALC_TX_BASE_ADDR_HI, ALC_ADDR_HI(paddr)); CSR_WRITE_4(sc, ALC_TDL_HEAD_ADDR_LO, ALC_ADDR_LO(paddr)); /* We don't use high priority ring. */ CSR_WRITE_4(sc, ALC_TDH_HEAD_ADDR_LO, 0); /* Set Tx descriptor counter. */ CSR_WRITE_4(sc, ALC_TD_RING_CNT, (ALC_TX_RING_CNT << TD_RING_CNT_SHIFT) & TD_RING_CNT_MASK); /* Set Rx descriptor base addresses. */ paddr = sc->alc_rdata.alc_rx_ring_paddr; CSR_WRITE_4(sc, ALC_RX_BASE_ADDR_HI, ALC_ADDR_HI(paddr)); CSR_WRITE_4(sc, ALC_RD0_HEAD_ADDR_LO, ALC_ADDR_LO(paddr)); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { /* We use one Rx ring. */ CSR_WRITE_4(sc, ALC_RD1_HEAD_ADDR_LO, 0); CSR_WRITE_4(sc, ALC_RD2_HEAD_ADDR_LO, 0); CSR_WRITE_4(sc, ALC_RD3_HEAD_ADDR_LO, 0); } /* Set Rx descriptor counter. */ CSR_WRITE_4(sc, ALC_RD_RING_CNT, (ALC_RX_RING_CNT << RD_RING_CNT_SHIFT) & RD_RING_CNT_MASK); /* * Let hardware split jumbo frames into alc_max_buf_sized chunks. * if it do not fit the buffer size. Rx return descriptor holds * a counter that indicates how many fragments were made by the * hardware. The buffer size should be multiple of 8 bytes. * Since hardware has limit on the size of buffer size, always * use the maximum value. * For strict-alignment architectures make sure to reduce buffer * size by 8 bytes to make room for alignment fixup. */ #ifndef __NO_STRICT_ALIGNMENT sc->alc_buf_size = RX_BUF_SIZE_MAX - sizeof(uint64_t); #else sc->alc_buf_size = RX_BUF_SIZE_MAX; #endif CSR_WRITE_4(sc, ALC_RX_BUF_SIZE, sc->alc_buf_size); paddr = sc->alc_rdata.alc_rr_ring_paddr; /* Set Rx return descriptor base addresses. */ CSR_WRITE_4(sc, ALC_RRD0_HEAD_ADDR_LO, ALC_ADDR_LO(paddr)); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { /* We use one Rx return ring. */ CSR_WRITE_4(sc, ALC_RRD1_HEAD_ADDR_LO, 0); CSR_WRITE_4(sc, ALC_RRD2_HEAD_ADDR_LO, 0); CSR_WRITE_4(sc, ALC_RRD3_HEAD_ADDR_LO, 0); } /* Set Rx return descriptor counter. */ CSR_WRITE_4(sc, ALC_RRD_RING_CNT, (ALC_RR_RING_CNT << RRD_RING_CNT_SHIFT) & RRD_RING_CNT_MASK); paddr = sc->alc_rdata.alc_cmb_paddr; CSR_WRITE_4(sc, ALC_CMB_BASE_ADDR_LO, ALC_ADDR_LO(paddr)); paddr = sc->alc_rdata.alc_smb_paddr; CSR_WRITE_4(sc, ALC_SMB_BASE_ADDR_HI, ALC_ADDR_HI(paddr)); CSR_WRITE_4(sc, ALC_SMB_BASE_ADDR_LO, ALC_ADDR_LO(paddr)); if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B) { /* Reconfigure SRAM - Vendor magic. */ CSR_WRITE_4(sc, ALC_SRAM_RX_FIFO_LEN, 0x000002A0); CSR_WRITE_4(sc, ALC_SRAM_TX_FIFO_LEN, 0x00000100); CSR_WRITE_4(sc, ALC_SRAM_RX_FIFO_ADDR, 0x029F0000); CSR_WRITE_4(sc, ALC_SRAM_RD0_ADDR, 0x02BF02A0); CSR_WRITE_4(sc, ALC_SRAM_TX_FIFO_ADDR, 0x03BF02C0); CSR_WRITE_4(sc, ALC_SRAM_TD_ADDR, 0x03DF03C0); CSR_WRITE_4(sc, ALC_TXF_WATER_MARK, 0x00000000); CSR_WRITE_4(sc, ALC_RD_DMA_CFG, 0x00000000); } /* Tell hardware that we're ready to load DMA blocks. */ CSR_WRITE_4(sc, ALC_DMA_BLOCK, DMA_BLOCK_LOAD); /* Configure interrupt moderation timer. */ reg = ALC_USECS(sc->alc_int_rx_mod) << IM_TIMER_RX_SHIFT; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) reg |= ALC_USECS(sc->alc_int_tx_mod) << IM_TIMER_TX_SHIFT; CSR_WRITE_4(sc, ALC_IM_TIMER, reg); /* * We don't want to automatic interrupt clear as task queue * for the interrupt should know interrupt status. */ reg = CSR_READ_4(sc, ALC_MASTER_CFG); reg &= ~(MASTER_IM_RX_TIMER_ENB | MASTER_IM_TX_TIMER_ENB); reg |= MASTER_SA_TIMER_ENB; if (ALC_USECS(sc->alc_int_rx_mod) != 0) reg |= MASTER_IM_RX_TIMER_ENB; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0 && ALC_USECS(sc->alc_int_tx_mod) != 0) reg |= MASTER_IM_TX_TIMER_ENB; CSR_WRITE_4(sc, ALC_MASTER_CFG, reg); /* * Disable interrupt re-trigger timer. We don't want automatic * re-triggering of un-ACKed interrupts. */ CSR_WRITE_4(sc, ALC_INTR_RETRIG_TIMER, ALC_USECS(0)); /* Configure CMB. */ if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { CSR_WRITE_4(sc, ALC_CMB_TD_THRESH, ALC_TX_RING_CNT / 3); CSR_WRITE_4(sc, ALC_CMB_TX_TIMER, ALC_USECS(sc->alc_int_tx_mod)); } else { if ((sc->alc_flags & ALC_FLAG_CMB_BUG) == 0) { CSR_WRITE_4(sc, ALC_CMB_TD_THRESH, 4); CSR_WRITE_4(sc, ALC_CMB_TX_TIMER, ALC_USECS(5000)); } else CSR_WRITE_4(sc, ALC_CMB_TX_TIMER, ALC_USECS(0)); } /* * Hardware can be configured to issue SMB interrupt based * on programmed interval. Since there is a callout that is * invoked for every hz in driver we use that instead of * relying on periodic SMB interrupt. */ CSR_WRITE_4(sc, ALC_SMB_STAT_TIMER, ALC_USECS(0)); /* Clear MAC statistics. */ alc_stats_clear(sc); /* * Always use maximum frame size that controller can support. * Otherwise received frames that has larger frame length * than alc(4) MTU would be silently dropped in hardware. This * would make path-MTU discovery hard as sender wouldn't get * any responses from receiver. alc(4) supports * multi-fragmented frames on Rx path so it has no issue on * assembling fragmented frames. Using maximum frame size also * removes the need to reinitialize hardware when interface * MTU configuration was changed. * * Be conservative in what you do, be liberal in what you * accept from others - RFC 793. */ CSR_WRITE_4(sc, ALC_FRAME_SIZE, sc->alc_ident->max_framelen); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { /* Disable header split(?) */ CSR_WRITE_4(sc, ALC_HDS_CFG, 0); /* Configure IPG/IFG parameters. */ CSR_WRITE_4(sc, ALC_IPG_IFG_CFG, ((IPG_IFG_IPGT_DEFAULT << IPG_IFG_IPGT_SHIFT) & IPG_IFG_IPGT_MASK) | ((IPG_IFG_MIFG_DEFAULT << IPG_IFG_MIFG_SHIFT) & IPG_IFG_MIFG_MASK) | ((IPG_IFG_IPG1_DEFAULT << IPG_IFG_IPG1_SHIFT) & IPG_IFG_IPG1_MASK) | ((IPG_IFG_IPG2_DEFAULT << IPG_IFG_IPG2_SHIFT) & IPG_IFG_IPG2_MASK)); /* Set parameters for half-duplex media. */ CSR_WRITE_4(sc, ALC_HDPX_CFG, ((HDPX_CFG_LCOL_DEFAULT << HDPX_CFG_LCOL_SHIFT) & HDPX_CFG_LCOL_MASK) | ((HDPX_CFG_RETRY_DEFAULT << HDPX_CFG_RETRY_SHIFT) & HDPX_CFG_RETRY_MASK) | HDPX_CFG_EXC_DEF_EN | ((HDPX_CFG_ABEBT_DEFAULT << HDPX_CFG_ABEBT_SHIFT) & HDPX_CFG_ABEBT_MASK) | ((HDPX_CFG_JAMIPG_DEFAULT << HDPX_CFG_JAMIPG_SHIFT) & HDPX_CFG_JAMIPG_MASK)); } /* * Set TSO/checksum offload threshold. For frames that is * larger than this threshold, hardware wouldn't do * TSO/checksum offloading. */ reg = (sc->alc_ident->max_framelen >> TSO_OFFLOAD_THRESH_UNIT_SHIFT) & TSO_OFFLOAD_THRESH_MASK; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) reg |= TSO_OFFLOAD_ERRLGPKT_DROP_ENB; CSR_WRITE_4(sc, ALC_TSO_OFFLOAD_THRESH, reg); /* Configure TxQ. */ reg = (alc_dma_burst[sc->alc_dma_rd_burst] << TXQ_CFG_TX_FIFO_BURST_SHIFT) & TXQ_CFG_TX_FIFO_BURST_MASK; if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B2) reg >>= 1; reg |= (TXQ_CFG_TD_BURST_DEFAULT << TXQ_CFG_TD_BURST_SHIFT) & TXQ_CFG_TD_BURST_MASK; reg |= TXQ_CFG_IP_OPTION_ENB | TXQ_CFG_8023_ENB; CSR_WRITE_4(sc, ALC_TXQ_CFG, reg | TXQ_CFG_ENHANCED_MODE); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { reg = (TXQ_CFG_TD_BURST_DEFAULT << HQTD_CFG_Q1_BURST_SHIFT | TXQ_CFG_TD_BURST_DEFAULT << HQTD_CFG_Q2_BURST_SHIFT | TXQ_CFG_TD_BURST_DEFAULT << HQTD_CFG_Q3_BURST_SHIFT | HQTD_CFG_BURST_ENB); CSR_WRITE_4(sc, ALC_HQTD_CFG, reg); reg = WRR_PRI_RESTRICT_NONE; reg |= (WRR_PRI_DEFAULT << WRR_PRI0_SHIFT | WRR_PRI_DEFAULT << WRR_PRI1_SHIFT | WRR_PRI_DEFAULT << WRR_PRI2_SHIFT | WRR_PRI_DEFAULT << WRR_PRI3_SHIFT); CSR_WRITE_4(sc, ALC_WRR, reg); } else { /* Configure Rx free descriptor pre-fetching. */ CSR_WRITE_4(sc, ALC_RX_RD_FREE_THRESH, ((RX_RD_FREE_THRESH_HI_DEFAULT << RX_RD_FREE_THRESH_HI_SHIFT) & RX_RD_FREE_THRESH_HI_MASK) | ((RX_RD_FREE_THRESH_LO_DEFAULT << RX_RD_FREE_THRESH_LO_SHIFT) & RX_RD_FREE_THRESH_LO_MASK)); } /* * Configure flow control parameters. * XON : 80% of Rx FIFO * XOFF : 30% of Rx FIFO */ if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { reg = CSR_READ_4(sc, ALC_SRAM_RX_FIFO_LEN); reg &= SRAM_RX_FIFO_LEN_MASK; reg *= 8; if (reg > 8 * 1024) reg -= RX_FIFO_PAUSE_816X_RSVD; else reg -= RX_BUF_SIZE_MAX; reg /= 8; CSR_WRITE_4(sc, ALC_RX_FIFO_PAUSE_THRESH, ((reg << RX_FIFO_PAUSE_THRESH_LO_SHIFT) & RX_FIFO_PAUSE_THRESH_LO_MASK) | (((RX_FIFO_PAUSE_816X_RSVD / 8) << RX_FIFO_PAUSE_THRESH_HI_SHIFT) & RX_FIFO_PAUSE_THRESH_HI_MASK)); } else if (sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8131 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8132) { reg = CSR_READ_4(sc, ALC_SRAM_RX_FIFO_LEN); rxf_hi = (reg * 8) / 10; rxf_lo = (reg * 3) / 10; CSR_WRITE_4(sc, ALC_RX_FIFO_PAUSE_THRESH, ((rxf_lo << RX_FIFO_PAUSE_THRESH_LO_SHIFT) & RX_FIFO_PAUSE_THRESH_LO_MASK) | ((rxf_hi << RX_FIFO_PAUSE_THRESH_HI_SHIFT) & RX_FIFO_PAUSE_THRESH_HI_MASK)); } if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { /* Disable RSS until I understand L1C/L2C's RSS logic. */ CSR_WRITE_4(sc, ALC_RSS_IDT_TABLE0, 0); CSR_WRITE_4(sc, ALC_RSS_CPU, 0); } /* Configure RxQ. */ reg = (RXQ_CFG_RD_BURST_DEFAULT << RXQ_CFG_RD_BURST_SHIFT) & RXQ_CFG_RD_BURST_MASK; reg |= RXQ_CFG_RSS_MODE_DIS; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { reg |= (RXQ_CFG_816X_IDT_TBL_SIZE_DEFAULT << RXQ_CFG_816X_IDT_TBL_SIZE_SHIFT) & RXQ_CFG_816X_IDT_TBL_SIZE_MASK; if ((sc->alc_flags & ALC_FLAG_FASTETHER) == 0) reg |= RXQ_CFG_ASPM_THROUGHPUT_LIMIT_100M; } else { if ((sc->alc_flags & ALC_FLAG_FASTETHER) == 0 && sc->alc_ident->deviceid != DEVICEID_ATHEROS_AR8151_V2) reg |= RXQ_CFG_ASPM_THROUGHPUT_LIMIT_100M; } CSR_WRITE_4(sc, ALC_RXQ_CFG, reg); /* Configure DMA parameters. */ reg = DMA_CFG_OUT_ORDER | DMA_CFG_RD_REQ_PRI; reg |= sc->alc_rcb; if ((sc->alc_flags & ALC_FLAG_CMB_BUG) == 0) reg |= DMA_CFG_CMB_ENB; if ((sc->alc_flags & ALC_FLAG_SMB_BUG) == 0) reg |= DMA_CFG_SMB_ENB; else reg |= DMA_CFG_SMB_DIS; reg |= (sc->alc_dma_rd_burst & DMA_CFG_RD_BURST_MASK) << DMA_CFG_RD_BURST_SHIFT; reg |= (sc->alc_dma_wr_burst & DMA_CFG_WR_BURST_MASK) << DMA_CFG_WR_BURST_SHIFT; reg |= (DMA_CFG_RD_DELAY_CNT_DEFAULT << DMA_CFG_RD_DELAY_CNT_SHIFT) & DMA_CFG_RD_DELAY_CNT_MASK; reg |= (DMA_CFG_WR_DELAY_CNT_DEFAULT << DMA_CFG_WR_DELAY_CNT_SHIFT) & DMA_CFG_WR_DELAY_CNT_MASK; if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) { switch (AR816X_REV(sc->alc_rev)) { case AR816X_REV_A0: case AR816X_REV_A1: reg |= DMA_CFG_RD_CHNL_SEL_2; break; case AR816X_REV_B0: /* FALLTHROUGH */ default: reg |= DMA_CFG_RD_CHNL_SEL_4; break; } } CSR_WRITE_4(sc, ALC_DMA_CFG, reg); /* * Configure Tx/Rx MACs. * - Auto-padding for short frames. * - Enable CRC generation. * Actual reconfiguration of MAC for resolved speed/duplex * is followed after detection of link establishment. * AR813x/AR815x always does checksum computation regardless * of MAC_CFG_RXCSUM_ENB bit. Also the controller is known to * have bug in protocol field in Rx return structure so * these controllers can't handle fragmented frames. Disable * Rx checksum offloading until there is a newer controller * that has sane implementation. */ reg = MAC_CFG_TX_CRC_ENB | MAC_CFG_TX_AUTO_PAD | MAC_CFG_FULL_DUPLEX | ((MAC_CFG_PREAMBLE_DEFAULT << MAC_CFG_PREAMBLE_SHIFT) & MAC_CFG_PREAMBLE_MASK); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8151_V2 || sc->alc_ident->deviceid == DEVICEID_ATHEROS_AR8152_B2) reg |= MAC_CFG_HASH_ALG_CRC32 | MAC_CFG_SPEED_MODE_SW; if ((sc->alc_flags & ALC_FLAG_FASTETHER) != 0) reg |= MAC_CFG_SPEED_10_100; else reg |= MAC_CFG_SPEED_1000; CSR_WRITE_4(sc, ALC_MAC_CFG, reg); /* Set up the receive filter. */ alc_rxfilter(sc); alc_rxvlan(sc); /* Acknowledge all pending interrupts and clear it. */ CSR_WRITE_4(sc, ALC_INTR_MASK, ALC_INTRS); CSR_WRITE_4(sc, ALC_INTR_STATUS, 0xFFFFFFFF); CSR_WRITE_4(sc, ALC_INTR_STATUS, 0); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->alc_flags &= ~ALC_FLAG_LINK; /* Switch to the current media. */ alc_mediachange_locked(sc); callout_reset(&sc->alc_tick_ch, hz, alc_tick, sc); } static void alc_stop(struct alc_softc *sc) { struct ifnet *ifp; struct alc_txdesc *txd; struct alc_rxdesc *rxd; uint32_t reg; int i; ALC_LOCK_ASSERT(sc); /* * Mark the interface down and cancel the watchdog timer. */ ifp = sc->alc_ifp; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->alc_flags &= ~ALC_FLAG_LINK; callout_stop(&sc->alc_tick_ch); sc->alc_watchdog_timer = 0; alc_stats_update(sc); /* Disable interrupts. */ CSR_WRITE_4(sc, ALC_INTR_MASK, 0); CSR_WRITE_4(sc, ALC_INTR_STATUS, 0xFFFFFFFF); /* Disable DMA. */ reg = CSR_READ_4(sc, ALC_DMA_CFG); reg &= ~(DMA_CFG_CMB_ENB | DMA_CFG_SMB_ENB); reg |= DMA_CFG_SMB_DIS; CSR_WRITE_4(sc, ALC_DMA_CFG, reg); DELAY(1000); /* Stop Rx/Tx MACs. */ alc_stop_mac(sc); /* Disable interrupts which might be touched in taskq handler. */ CSR_WRITE_4(sc, ALC_INTR_STATUS, 0xFFFFFFFF); /* Disable L0s/L1s */ alc_aspm(sc, 0, IFM_UNKNOWN); /* Reclaim Rx buffers that have been processed. */ if (sc->alc_cdata.alc_rxhead != NULL) m_freem(sc->alc_cdata.alc_rxhead); ALC_RXCHAIN_RESET(sc); /* * Free Tx/Rx mbufs still in the queues. */ for (i = 0; i < ALC_RX_RING_CNT; i++) { rxd = &sc->alc_cdata.alc_rxdesc[i]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->alc_cdata.alc_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->alc_cdata.alc_rx_tag, rxd->rx_dmamap); m_freem(rxd->rx_m); rxd->rx_m = NULL; } } for (i = 0; i < ALC_TX_RING_CNT; i++) { txd = &sc->alc_cdata.alc_txdesc[i]; if (txd->tx_m != NULL) { bus_dmamap_sync(sc->alc_cdata.alc_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->alc_cdata.alc_tx_tag, txd->tx_dmamap); m_freem(txd->tx_m); txd->tx_m = NULL; } } } static void alc_stop_mac(struct alc_softc *sc) { uint32_t reg; int i; alc_stop_queue(sc); /* Disable Rx/Tx MAC. */ reg = CSR_READ_4(sc, ALC_MAC_CFG); if ((reg & (MAC_CFG_TX_ENB | MAC_CFG_RX_ENB)) != 0) { reg &= ~(MAC_CFG_TX_ENB | MAC_CFG_RX_ENB); CSR_WRITE_4(sc, ALC_MAC_CFG, reg); } for (i = ALC_TIMEOUT; i > 0; i--) { reg = CSR_READ_4(sc, ALC_IDLE_STATUS); if ((reg & (IDLE_STATUS_RXMAC | IDLE_STATUS_TXMAC)) == 0) break; DELAY(10); } if (i == 0) device_printf(sc->alc_dev, "could not disable Rx/Tx MAC(0x%08x)!\n", reg); } static void alc_start_queue(struct alc_softc *sc) { uint32_t qcfg[] = { 0, RXQ_CFG_QUEUE0_ENB, RXQ_CFG_QUEUE0_ENB | RXQ_CFG_QUEUE1_ENB, RXQ_CFG_QUEUE0_ENB | RXQ_CFG_QUEUE1_ENB | RXQ_CFG_QUEUE2_ENB, RXQ_CFG_ENB }; uint32_t cfg; ALC_LOCK_ASSERT(sc); /* Enable RxQ. */ cfg = CSR_READ_4(sc, ALC_RXQ_CFG); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { cfg &= ~RXQ_CFG_ENB; cfg |= qcfg[1]; } else cfg |= RXQ_CFG_QUEUE0_ENB; CSR_WRITE_4(sc, ALC_RXQ_CFG, cfg); /* Enable TxQ. */ cfg = CSR_READ_4(sc, ALC_TXQ_CFG); cfg |= TXQ_CFG_ENB; CSR_WRITE_4(sc, ALC_TXQ_CFG, cfg); } static void alc_stop_queue(struct alc_softc *sc) { uint32_t reg; int i; /* Disable RxQ. */ reg = CSR_READ_4(sc, ALC_RXQ_CFG); if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) == 0) { if ((reg & RXQ_CFG_ENB) != 0) { reg &= ~RXQ_CFG_ENB; CSR_WRITE_4(sc, ALC_RXQ_CFG, reg); } } else { if ((reg & RXQ_CFG_QUEUE0_ENB) != 0) { reg &= ~RXQ_CFG_QUEUE0_ENB; CSR_WRITE_4(sc, ALC_RXQ_CFG, reg); } } /* Disable TxQ. */ reg = CSR_READ_4(sc, ALC_TXQ_CFG); if ((reg & TXQ_CFG_ENB) != 0) { reg &= ~TXQ_CFG_ENB; CSR_WRITE_4(sc, ALC_TXQ_CFG, reg); } DELAY(40); for (i = ALC_TIMEOUT; i > 0; i--) { reg = CSR_READ_4(sc, ALC_IDLE_STATUS); if ((reg & (IDLE_STATUS_RXQ | IDLE_STATUS_TXQ)) == 0) break; DELAY(10); } if (i == 0) device_printf(sc->alc_dev, "could not disable RxQ/TxQ (0x%08x)!\n", reg); } static void alc_init_tx_ring(struct alc_softc *sc) { struct alc_ring_data *rd; struct alc_txdesc *txd; int i; ALC_LOCK_ASSERT(sc); sc->alc_cdata.alc_tx_prod = 0; sc->alc_cdata.alc_tx_cons = 0; sc->alc_cdata.alc_tx_cnt = 0; rd = &sc->alc_rdata; bzero(rd->alc_tx_ring, ALC_TX_RING_SZ); for (i = 0; i < ALC_TX_RING_CNT; i++) { txd = &sc->alc_cdata.alc_txdesc[i]; txd->tx_m = NULL; } bus_dmamap_sync(sc->alc_cdata.alc_tx_ring_tag, sc->alc_cdata.alc_tx_ring_map, BUS_DMASYNC_PREWRITE); } static int alc_init_rx_ring(struct alc_softc *sc) { struct alc_ring_data *rd; struct alc_rxdesc *rxd; int i; ALC_LOCK_ASSERT(sc); sc->alc_cdata.alc_rx_cons = ALC_RX_RING_CNT - 1; sc->alc_morework = 0; rd = &sc->alc_rdata; bzero(rd->alc_rx_ring, ALC_RX_RING_SZ); for (i = 0; i < ALC_RX_RING_CNT; i++) { rxd = &sc->alc_cdata.alc_rxdesc[i]; rxd->rx_m = NULL; rxd->rx_desc = &rd->alc_rx_ring[i]; if (alc_newbuf(sc, rxd) != 0) return (ENOBUFS); } /* * Since controller does not update Rx descriptors, driver * does have to read Rx descriptors back so BUS_DMASYNC_PREWRITE * is enough to ensure coherence. */ bus_dmamap_sync(sc->alc_cdata.alc_rx_ring_tag, sc->alc_cdata.alc_rx_ring_map, BUS_DMASYNC_PREWRITE); /* Let controller know availability of new Rx buffers. */ CSR_WRITE_4(sc, ALC_MBOX_RD0_PROD_IDX, sc->alc_cdata.alc_rx_cons); return (0); } static void alc_init_rr_ring(struct alc_softc *sc) { struct alc_ring_data *rd; ALC_LOCK_ASSERT(sc); sc->alc_cdata.alc_rr_cons = 0; ALC_RXCHAIN_RESET(sc); rd = &sc->alc_rdata; bzero(rd->alc_rr_ring, ALC_RR_RING_SZ); bus_dmamap_sync(sc->alc_cdata.alc_rr_ring_tag, sc->alc_cdata.alc_rr_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static void alc_init_cmb(struct alc_softc *sc) { struct alc_ring_data *rd; ALC_LOCK_ASSERT(sc); rd = &sc->alc_rdata; bzero(rd->alc_cmb, ALC_CMB_SZ); bus_dmamap_sync(sc->alc_cdata.alc_cmb_tag, sc->alc_cdata.alc_cmb_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static void alc_init_smb(struct alc_softc *sc) { struct alc_ring_data *rd; ALC_LOCK_ASSERT(sc); rd = &sc->alc_rdata; bzero(rd->alc_smb, ALC_SMB_SZ); bus_dmamap_sync(sc->alc_cdata.alc_smb_tag, sc->alc_cdata.alc_smb_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static void alc_rxvlan(struct alc_softc *sc) { struct ifnet *ifp; uint32_t reg; ALC_LOCK_ASSERT(sc); ifp = sc->alc_ifp; reg = CSR_READ_4(sc, ALC_MAC_CFG); if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) reg |= MAC_CFG_VLAN_TAG_STRIP; else reg &= ~MAC_CFG_VLAN_TAG_STRIP; CSR_WRITE_4(sc, ALC_MAC_CFG, reg); } static void alc_rxfilter(struct alc_softc *sc) { struct ifnet *ifp; struct ifmultiaddr *ifma; uint32_t crc; uint32_t mchash[2]; uint32_t rxcfg; ALC_LOCK_ASSERT(sc); ifp = sc->alc_ifp; bzero(mchash, sizeof(mchash)); rxcfg = CSR_READ_4(sc, ALC_MAC_CFG); rxcfg &= ~(MAC_CFG_ALLMULTI | MAC_CFG_BCAST | MAC_CFG_PROMISC); if ((ifp->if_flags & IFF_BROADCAST) != 0) rxcfg |= MAC_CFG_BCAST; if ((ifp->if_flags & (IFF_PROMISC | IFF_ALLMULTI)) != 0) { if ((ifp->if_flags & IFF_PROMISC) != 0) rxcfg |= MAC_CFG_PROMISC; if ((ifp->if_flags & IFF_ALLMULTI) != 0) rxcfg |= MAC_CFG_ALLMULTI; mchash[0] = 0xFFFFFFFF; mchash[1] = 0xFFFFFFFF; goto chipit; } if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &sc->alc_ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; crc = ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN); mchash[crc >> 31] |= 1 << ((crc >> 26) & 0x1f); } if_maddr_runlock(ifp); chipit: CSR_WRITE_4(sc, ALC_MAR0, mchash[0]); CSR_WRITE_4(sc, ALC_MAR1, mchash[1]); CSR_WRITE_4(sc, ALC_MAC_CFG, rxcfg); } static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high) { int error, value; if (arg1 == NULL) return (EINVAL); value = *(int *)arg1; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); if (value < low || value > high) return (EINVAL); *(int *)arg1 = value; return (0); } static int sysctl_hw_alc_proc_limit(SYSCTL_HANDLER_ARGS) { return (sysctl_int_range(oidp, arg1, arg2, req, ALC_PROC_MIN, ALC_PROC_MAX)); } static int sysctl_hw_alc_int_mod(SYSCTL_HANDLER_ARGS) { return (sysctl_int_range(oidp, arg1, arg2, req, ALC_IM_TIMER_MIN, ALC_IM_TIMER_MAX)); } #ifdef NETDUMP static void -alc_netdump_init(struct ifnet *ifp, int *nmbufp, int *nclustp) +alc_netdump_init(struct ifnet *ifp __unused, int *nrxr) { - *nmbufp += ALC_RX_RING_CNT; - *nclustp += ALC_RX_RING_CNT; + *nrxr = ALC_RX_RING_CNT; } static void alc_netdump_event(struct ifnet *ifp, enum netdump_ev event) { struct alc_softc *sc; - sc = ifp->if_softc; + sc = if_getsoftc(ifp); switch (event) { case NETDUMP_START: sc->alc_buf_size = imin(sc->alc_buf_size, MCLBYTES); break; default: break; } } static int alc_netdump_transmit(struct ifnet *ifp, struct mbuf *m) { struct alc_softc *sc; int error; - sc = ifp->if_softc; - + sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); error = alc_encap(sc, &m); if (error == 0) alc_start_tx(sc); return (error); } static int alc_netdump_poll(struct ifnet *ifp, int count) { struct alc_softc *sc; - sc = ifp->if_softc; - + sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); alc_txeof(sc); return (alc_rxintr(sc, count)); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/dev/bge/if_bge.c =================================================================== --- user/markj/netdump/sys/dev/bge/if_bge.c (revision 331674) +++ user/markj/netdump/sys/dev/bge/if_bge.c (revision 331675) @@ -1,6884 +1,6882 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2001 Wind River Systems * Copyright (c) 1997, 1998, 1999, 2001 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Broadcom BCM57xx(x)/BCM590x NetXtreme and NetLink family Ethernet driver * * The Broadcom BCM5700 is based on technology originally developed by * Alteon Networks as part of the Tigon I and Tigon II Gigabit Ethernet * MAC chips. The BCM5700, sometimes referred to as the Tigon III, has * two on-board MIPS R4000 CPUs and can have as much as 16MB of external * SSRAM. The BCM5700 supports TCP, UDP and IP checksum offload, jumbo * frames, highly configurable RX filtering, and 16 RX and TX queues * (which, along with RX filter rules, can be used for QOS applications). * Other features, such as TCP segmentation, may be available as part * of value-added firmware updates. Unlike the Tigon I and Tigon II, * firmware images can be stored in hardware and need not be compiled * into the driver. * * The BCM5700 supports the PCI v2.2 and PCI-X v1.0 standards, and will * function in a 32-bit/64-bit 33/66Mhz bus, or a 64-bit/133Mhz bus. * * The BCM5701 is a single-chip solution incorporating both the BCM5700 * MAC and a BCM5401 10/100/1000 PHY. Unlike the BCM5700, the BCM5701 * does not support external SSRAM. * * Broadcom also produces a variation of the BCM5700 under the "Altima" * brand name, which is functionally similar but lacks PCI-X support. * * Without external SSRAM, you can only have at most 4 TX rings, * and the use of the mini RX ring is disabled. This seems to imply * that these features are simply not available on the BCM5701. As a * result, this driver does not implement any support for the mini RX * ring. */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "miidevs.h" #include #ifdef __sparc64__ #include #include #include #include #endif #include #include #include #define BGE_CSUM_FEATURES (CSUM_IP | CSUM_TCP) #define ETHER_MIN_NOPAD (ETHER_MIN_LEN - ETHER_CRC_LEN) /* i.e., 60 */ MODULE_DEPEND(bge, pci, 1, 1, 1); MODULE_DEPEND(bge, ether, 1, 1, 1); MODULE_DEPEND(bge, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" /* * Various supported device vendors/types and their names. Note: the * spec seems to indicate that the hardware still has Alteon's vendor * ID burned into it, though it will always be overriden by the vendor * ID in the EEPROM. Just to be safe, we cover all possibilities. */ static const struct bge_type { uint16_t bge_vid; uint16_t bge_did; } bge_devs[] = { { ALTEON_VENDORID, ALTEON_DEVICEID_BCM5700 }, { ALTEON_VENDORID, ALTEON_DEVICEID_BCM5701 }, { ALTIMA_VENDORID, ALTIMA_DEVICE_AC1000 }, { ALTIMA_VENDORID, ALTIMA_DEVICE_AC1002 }, { ALTIMA_VENDORID, ALTIMA_DEVICE_AC9100 }, { APPLE_VENDORID, APPLE_DEVICE_BCM5701 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5700 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5701 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5702 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5702_ALT }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5702X }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5703 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5703_ALT }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5703X }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5704C }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5704S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5704S_ALT }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705K }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705M_ALT }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5714C }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5714S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5715 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5715S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5717 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5717C }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5718 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5719 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5720 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5721 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5722 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5723 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5725 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5727 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5750 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5750M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5751 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5751F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5751M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5752 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5752M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5753 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5753F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5753M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5754 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5754M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5755 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5755M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5756 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5761 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5761E }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5761S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5761SE }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5762 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5764 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5780 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5780S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5781 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5782 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5784 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5785F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5785G }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5786 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5787 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5787F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5787M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5788 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5789 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5901 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5901A2 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5903M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5906 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5906M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57760 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57761 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57762 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57764 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57765 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57766 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57767 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57780 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57781 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57782 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57785 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57786 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57787 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57788 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57790 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57791 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57795 }, { SK_VENDORID, SK_DEVICEID_ALTIMA }, { TC_VENDORID, TC_DEVICEID_3C996 }, { FJTSU_VENDORID, FJTSU_DEVICEID_PW008GE4 }, { FJTSU_VENDORID, FJTSU_DEVICEID_PW008GE5 }, { FJTSU_VENDORID, FJTSU_DEVICEID_PP250450 }, { 0, 0 } }; static const struct bge_vendor { uint16_t v_id; const char *v_name; } bge_vendors[] = { { ALTEON_VENDORID, "Alteon" }, { ALTIMA_VENDORID, "Altima" }, { APPLE_VENDORID, "Apple" }, { BCOM_VENDORID, "Broadcom" }, { SK_VENDORID, "SysKonnect" }, { TC_VENDORID, "3Com" }, { FJTSU_VENDORID, "Fujitsu" }, { 0, NULL } }; static const struct bge_revision { uint32_t br_chipid; const char *br_name; } bge_revisions[] = { { BGE_CHIPID_BCM5700_A0, "BCM5700 A0" }, { BGE_CHIPID_BCM5700_A1, "BCM5700 A1" }, { BGE_CHIPID_BCM5700_B0, "BCM5700 B0" }, { BGE_CHIPID_BCM5700_B1, "BCM5700 B1" }, { BGE_CHIPID_BCM5700_B2, "BCM5700 B2" }, { BGE_CHIPID_BCM5700_B3, "BCM5700 B3" }, { BGE_CHIPID_BCM5700_ALTIMA, "BCM5700 Altima" }, { BGE_CHIPID_BCM5700_C0, "BCM5700 C0" }, { BGE_CHIPID_BCM5701_A0, "BCM5701 A0" }, { BGE_CHIPID_BCM5701_B0, "BCM5701 B0" }, { BGE_CHIPID_BCM5701_B2, "BCM5701 B2" }, { BGE_CHIPID_BCM5701_B5, "BCM5701 B5" }, { BGE_CHIPID_BCM5703_A0, "BCM5703 A0" }, { BGE_CHIPID_BCM5703_A1, "BCM5703 A1" }, { BGE_CHIPID_BCM5703_A2, "BCM5703 A2" }, { BGE_CHIPID_BCM5703_A3, "BCM5703 A3" }, { BGE_CHIPID_BCM5703_B0, "BCM5703 B0" }, { BGE_CHIPID_BCM5704_A0, "BCM5704 A0" }, { BGE_CHIPID_BCM5704_A1, "BCM5704 A1" }, { BGE_CHIPID_BCM5704_A2, "BCM5704 A2" }, { BGE_CHIPID_BCM5704_A3, "BCM5704 A3" }, { BGE_CHIPID_BCM5704_B0, "BCM5704 B0" }, { BGE_CHIPID_BCM5705_A0, "BCM5705 A0" }, { BGE_CHIPID_BCM5705_A1, "BCM5705 A1" }, { BGE_CHIPID_BCM5705_A2, "BCM5705 A2" }, { BGE_CHIPID_BCM5705_A3, "BCM5705 A3" }, { BGE_CHIPID_BCM5750_A0, "BCM5750 A0" }, { BGE_CHIPID_BCM5750_A1, "BCM5750 A1" }, { BGE_CHIPID_BCM5750_A3, "BCM5750 A3" }, { BGE_CHIPID_BCM5750_B0, "BCM5750 B0" }, { BGE_CHIPID_BCM5750_B1, "BCM5750 B1" }, { BGE_CHIPID_BCM5750_C0, "BCM5750 C0" }, { BGE_CHIPID_BCM5750_C1, "BCM5750 C1" }, { BGE_CHIPID_BCM5750_C2, "BCM5750 C2" }, { BGE_CHIPID_BCM5714_A0, "BCM5714 A0" }, { BGE_CHIPID_BCM5752_A0, "BCM5752 A0" }, { BGE_CHIPID_BCM5752_A1, "BCM5752 A1" }, { BGE_CHIPID_BCM5752_A2, "BCM5752 A2" }, { BGE_CHIPID_BCM5714_B0, "BCM5714 B0" }, { BGE_CHIPID_BCM5714_B3, "BCM5714 B3" }, { BGE_CHIPID_BCM5715_A0, "BCM5715 A0" }, { BGE_CHIPID_BCM5715_A1, "BCM5715 A1" }, { BGE_CHIPID_BCM5715_A3, "BCM5715 A3" }, { BGE_CHIPID_BCM5717_A0, "BCM5717 A0" }, { BGE_CHIPID_BCM5717_B0, "BCM5717 B0" }, { BGE_CHIPID_BCM5717_C0, "BCM5717 C0" }, { BGE_CHIPID_BCM5719_A0, "BCM5719 A0" }, { BGE_CHIPID_BCM5720_A0, "BCM5720 A0" }, { BGE_CHIPID_BCM5755_A0, "BCM5755 A0" }, { BGE_CHIPID_BCM5755_A1, "BCM5755 A1" }, { BGE_CHIPID_BCM5755_A2, "BCM5755 A2" }, { BGE_CHIPID_BCM5722_A0, "BCM5722 A0" }, { BGE_CHIPID_BCM5761_A0, "BCM5761 A0" }, { BGE_CHIPID_BCM5761_A1, "BCM5761 A1" }, { BGE_CHIPID_BCM5762_A0, "BCM5762 A0" }, { BGE_CHIPID_BCM5784_A0, "BCM5784 A0" }, { BGE_CHIPID_BCM5784_A1, "BCM5784 A1" }, /* 5754 and 5787 share the same ASIC ID */ { BGE_CHIPID_BCM5787_A0, "BCM5754/5787 A0" }, { BGE_CHIPID_BCM5787_A1, "BCM5754/5787 A1" }, { BGE_CHIPID_BCM5787_A2, "BCM5754/5787 A2" }, { BGE_CHIPID_BCM5906_A1, "BCM5906 A1" }, { BGE_CHIPID_BCM5906_A2, "BCM5906 A2" }, { BGE_CHIPID_BCM57765_A0, "BCM57765 A0" }, { BGE_CHIPID_BCM57765_B0, "BCM57765 B0" }, { BGE_CHIPID_BCM57780_A0, "BCM57780 A0" }, { BGE_CHIPID_BCM57780_A1, "BCM57780 A1" }, { 0, NULL } }; /* * Some defaults for major revisions, so that newer steppings * that we don't know about have a shot at working. */ static const struct bge_revision bge_majorrevs[] = { { BGE_ASICREV_BCM5700, "unknown BCM5700" }, { BGE_ASICREV_BCM5701, "unknown BCM5701" }, { BGE_ASICREV_BCM5703, "unknown BCM5703" }, { BGE_ASICREV_BCM5704, "unknown BCM5704" }, { BGE_ASICREV_BCM5705, "unknown BCM5705" }, { BGE_ASICREV_BCM5750, "unknown BCM5750" }, { BGE_ASICREV_BCM5714_A0, "unknown BCM5714" }, { BGE_ASICREV_BCM5752, "unknown BCM5752" }, { BGE_ASICREV_BCM5780, "unknown BCM5780" }, { BGE_ASICREV_BCM5714, "unknown BCM5714" }, { BGE_ASICREV_BCM5755, "unknown BCM5755" }, { BGE_ASICREV_BCM5761, "unknown BCM5761" }, { BGE_ASICREV_BCM5784, "unknown BCM5784" }, { BGE_ASICREV_BCM5785, "unknown BCM5785" }, /* 5754 and 5787 share the same ASIC ID */ { BGE_ASICREV_BCM5787, "unknown BCM5754/5787" }, { BGE_ASICREV_BCM5906, "unknown BCM5906" }, { BGE_ASICREV_BCM57765, "unknown BCM57765" }, { BGE_ASICREV_BCM57766, "unknown BCM57766" }, { BGE_ASICREV_BCM57780, "unknown BCM57780" }, { BGE_ASICREV_BCM5717, "unknown BCM5717" }, { BGE_ASICREV_BCM5719, "unknown BCM5719" }, { BGE_ASICREV_BCM5720, "unknown BCM5720" }, { BGE_ASICREV_BCM5762, "unknown BCM5762" }, { 0, NULL } }; #define BGE_IS_JUMBO_CAPABLE(sc) ((sc)->bge_flags & BGE_FLAG_JUMBO) #define BGE_IS_5700_FAMILY(sc) ((sc)->bge_flags & BGE_FLAG_5700_FAMILY) #define BGE_IS_5705_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_5705_PLUS) #define BGE_IS_5714_FAMILY(sc) ((sc)->bge_flags & BGE_FLAG_5714_FAMILY) #define BGE_IS_575X_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_575X_PLUS) #define BGE_IS_5755_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_5755_PLUS) #define BGE_IS_5717_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_5717_PLUS) #define BGE_IS_57765_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_57765_PLUS) static uint32_t bge_chipid(device_t); static const struct bge_vendor * bge_lookup_vendor(uint16_t); static const struct bge_revision * bge_lookup_rev(uint32_t); typedef int (*bge_eaddr_fcn_t)(struct bge_softc *, uint8_t[]); static int bge_probe(device_t); static int bge_attach(device_t); static int bge_detach(device_t); static int bge_suspend(device_t); static int bge_resume(device_t); static void bge_release_resources(struct bge_softc *); static void bge_dma_map_addr(void *, bus_dma_segment_t *, int, int); static int bge_dma_alloc(struct bge_softc *); static void bge_dma_free(struct bge_softc *); static int bge_dma_ring_alloc(struct bge_softc *, bus_size_t, bus_size_t, bus_dma_tag_t *, uint8_t **, bus_dmamap_t *, bus_addr_t *, const char *); static void bge_devinfo(struct bge_softc *); static int bge_mbox_reorder(struct bge_softc *); static int bge_get_eaddr_fw(struct bge_softc *sc, uint8_t ether_addr[]); static int bge_get_eaddr_mem(struct bge_softc *, uint8_t[]); static int bge_get_eaddr_nvram(struct bge_softc *, uint8_t[]); static int bge_get_eaddr_eeprom(struct bge_softc *, uint8_t[]); static int bge_get_eaddr(struct bge_softc *, uint8_t[]); static void bge_txeof(struct bge_softc *, uint16_t); static void bge_rxcsum(struct bge_softc *, struct bge_rx_bd *, struct mbuf *); static int bge_rxeof(struct bge_softc *, uint16_t, int); static void bge_asf_driver_up (struct bge_softc *); static void bge_tick(void *); static void bge_stats_clear_regs(struct bge_softc *); static void bge_stats_update(struct bge_softc *); static void bge_stats_update_regs(struct bge_softc *); static struct mbuf *bge_check_short_dma(struct mbuf *); static struct mbuf *bge_setup_tso(struct bge_softc *, struct mbuf *, uint16_t *, uint16_t *); static int bge_encap(struct bge_softc *, struct mbuf **, uint32_t *); static void bge_intr(void *); static int bge_msi_intr(void *); static void bge_intr_task(void *, int); static void bge_start(if_t); static void bge_start_locked(if_t); static void bge_start_tx(struct bge_softc *, uint32_t); static int bge_ioctl(if_t, u_long, caddr_t); static void bge_init_locked(struct bge_softc *); static void bge_init(void *); static void bge_stop_block(struct bge_softc *, bus_size_t, uint32_t); static void bge_stop(struct bge_softc *); static void bge_watchdog(struct bge_softc *); static int bge_shutdown(device_t); static int bge_ifmedia_upd_locked(if_t); static int bge_ifmedia_upd(if_t); static void bge_ifmedia_sts(if_t, struct ifmediareq *); static uint64_t bge_get_counter(if_t, ift_counter); static uint8_t bge_nvram_getbyte(struct bge_softc *, int, uint8_t *); static int bge_read_nvram(struct bge_softc *, caddr_t, int, int); static uint8_t bge_eeprom_getbyte(struct bge_softc *, int, uint8_t *); static int bge_read_eeprom(struct bge_softc *, caddr_t, int, int); static void bge_setpromisc(struct bge_softc *); static void bge_setmulti(struct bge_softc *); static void bge_setvlan(struct bge_softc *); static __inline void bge_rxreuse_std(struct bge_softc *, int); static __inline void bge_rxreuse_jumbo(struct bge_softc *, int); static int bge_newbuf_std(struct bge_softc *, int); static int bge_newbuf_jumbo(struct bge_softc *, int); static int bge_init_rx_ring_std(struct bge_softc *); static void bge_free_rx_ring_std(struct bge_softc *); static int bge_init_rx_ring_jumbo(struct bge_softc *); static void bge_free_rx_ring_jumbo(struct bge_softc *); static void bge_free_tx_ring(struct bge_softc *); static int bge_init_tx_ring(struct bge_softc *); static int bge_chipinit(struct bge_softc *); static int bge_blockinit(struct bge_softc *); static uint32_t bge_dma_swap_options(struct bge_softc *); static int bge_has_eaddr(struct bge_softc *); static uint32_t bge_readmem_ind(struct bge_softc *, int); static void bge_writemem_ind(struct bge_softc *, int, int); static void bge_writembx(struct bge_softc *, int, int); #ifdef notdef static uint32_t bge_readreg_ind(struct bge_softc *, int); #endif static void bge_writemem_direct(struct bge_softc *, int, int); static void bge_writereg_ind(struct bge_softc *, int, int); static int bge_miibus_readreg(device_t, int, int); static int bge_miibus_writereg(device_t, int, int, int); static void bge_miibus_statchg(device_t); #ifdef DEVICE_POLLING static int bge_poll(if_t ifp, enum poll_cmd cmd, int count); #endif #define BGE_RESET_SHUTDOWN 0 #define BGE_RESET_START 1 #define BGE_RESET_SUSPEND 2 static void bge_sig_post_reset(struct bge_softc *, int); static void bge_sig_legacy(struct bge_softc *, int); static void bge_sig_pre_reset(struct bge_softc *, int); static void bge_stop_fw(struct bge_softc *); static int bge_reset(struct bge_softc *); static void bge_link_upd(struct bge_softc *); static void bge_ape_lock_init(struct bge_softc *); static void bge_ape_read_fw_ver(struct bge_softc *); static int bge_ape_lock(struct bge_softc *, int); static void bge_ape_unlock(struct bge_softc *, int); static void bge_ape_send_event(struct bge_softc *, uint32_t); static void bge_ape_driver_state_change(struct bge_softc *, int); /* * The BGE_REGISTER_DEBUG option is only for low-level debugging. It may * leak information to untrusted users. It is also known to cause alignment * traps on certain architectures. */ #ifdef BGE_REGISTER_DEBUG static int bge_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static int bge_sysctl_reg_read(SYSCTL_HANDLER_ARGS); static int bge_sysctl_ape_read(SYSCTL_HANDLER_ARGS); static int bge_sysctl_mem_read(SYSCTL_HANDLER_ARGS); #endif static void bge_add_sysctls(struct bge_softc *); static void bge_add_sysctl_stats_regs(struct bge_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void bge_add_sysctl_stats(struct bge_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static int bge_sysctl_stats(SYSCTL_HANDLER_ARGS); NETDUMP_DEFINE(bge); static device_method_t bge_methods[] = { /* Device interface */ DEVMETHOD(device_probe, bge_probe), DEVMETHOD(device_attach, bge_attach), DEVMETHOD(device_detach, bge_detach), DEVMETHOD(device_shutdown, bge_shutdown), DEVMETHOD(device_suspend, bge_suspend), DEVMETHOD(device_resume, bge_resume), /* MII interface */ DEVMETHOD(miibus_readreg, bge_miibus_readreg), DEVMETHOD(miibus_writereg, bge_miibus_writereg), DEVMETHOD(miibus_statchg, bge_miibus_statchg), DEVMETHOD_END }; static driver_t bge_driver = { "bge", bge_methods, sizeof(struct bge_softc) }; static devclass_t bge_devclass; DRIVER_MODULE(bge, pci, bge_driver, bge_devclass, 0, 0); DRIVER_MODULE(miibus, bge, miibus_driver, miibus_devclass, 0, 0); static int bge_allow_asf = 1; static SYSCTL_NODE(_hw, OID_AUTO, bge, CTLFLAG_RD, 0, "BGE driver parameters"); SYSCTL_INT(_hw_bge, OID_AUTO, allow_asf, CTLFLAG_RDTUN, &bge_allow_asf, 0, "Allow ASF mode if available"); #define SPARC64_BLADE_1500_MODEL "SUNW,Sun-Blade-1500" #define SPARC64_BLADE_1500_PATH_BGE "/pci@1f,700000/network@2" #define SPARC64_BLADE_2500_MODEL "SUNW,Sun-Blade-2500" #define SPARC64_BLADE_2500_PATH_BGE "/pci@1c,600000/network@3" #define SPARC64_OFW_SUBVENDOR "subsystem-vendor-id" static int bge_has_eaddr(struct bge_softc *sc) { #ifdef __sparc64__ char buf[sizeof(SPARC64_BLADE_1500_PATH_BGE)]; device_t dev; uint32_t subvendor; dev = sc->bge_dev; /* * The on-board BGEs found in sun4u machines aren't fitted with * an EEPROM which means that we have to obtain the MAC address * via OFW and that some tests will always fail. We distinguish * such BGEs by the subvendor ID, which also has to be obtained * from OFW instead of the PCI configuration space as the latter * indicates Broadcom as the subvendor of the netboot interface. * For early Blade 1500 and 2500 we even have to check the OFW * device path as the subvendor ID always defaults to Broadcom * there. */ if (OF_getprop(ofw_bus_get_node(dev), SPARC64_OFW_SUBVENDOR, &subvendor, sizeof(subvendor)) == sizeof(subvendor) && (subvendor == FJTSU_VENDORID || subvendor == SUN_VENDORID)) return (0); memset(buf, 0, sizeof(buf)); if (OF_package_to_path(ofw_bus_get_node(dev), buf, sizeof(buf)) > 0) { if (strcmp(sparc64_model, SPARC64_BLADE_1500_MODEL) == 0 && strcmp(buf, SPARC64_BLADE_1500_PATH_BGE) == 0) return (0); if (strcmp(sparc64_model, SPARC64_BLADE_2500_MODEL) == 0 && strcmp(buf, SPARC64_BLADE_2500_PATH_BGE) == 0) return (0); } #endif return (1); } static uint32_t bge_readmem_ind(struct bge_softc *sc, int off) { device_t dev; uint32_t val; if (sc->bge_asicrev == BGE_ASICREV_BCM5906 && off >= BGE_STATS_BLOCK && off < BGE_SEND_RING_1_TO_4) return (0); dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4); val = pci_read_config(dev, BGE_PCI_MEMWIN_DATA, 4); pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4); return (val); } static void bge_writemem_ind(struct bge_softc *sc, int off, int val) { device_t dev; if (sc->bge_asicrev == BGE_ASICREV_BCM5906 && off >= BGE_STATS_BLOCK && off < BGE_SEND_RING_1_TO_4) return; dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4); pci_write_config(dev, BGE_PCI_MEMWIN_DATA, val, 4); pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4); } #ifdef notdef static uint32_t bge_readreg_ind(struct bge_softc *sc, int off) { device_t dev; dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_REG_BASEADDR, off, 4); return (pci_read_config(dev, BGE_PCI_REG_DATA, 4)); } #endif static void bge_writereg_ind(struct bge_softc *sc, int off, int val) { device_t dev; dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_REG_BASEADDR, off, 4); pci_write_config(dev, BGE_PCI_REG_DATA, val, 4); } static void bge_writemem_direct(struct bge_softc *sc, int off, int val) { CSR_WRITE_4(sc, off, val); } static void bge_writembx(struct bge_softc *sc, int off, int val) { if (sc->bge_asicrev == BGE_ASICREV_BCM5906) off += BGE_LPMBX_IRQ0_HI - BGE_MBX_IRQ0_HI; CSR_WRITE_4(sc, off, val); if ((sc->bge_flags & BGE_FLAG_MBOX_REORDER) != 0) CSR_READ_4(sc, off); } /* * Clear all stale locks and select the lock for this driver instance. */ static void bge_ape_lock_init(struct bge_softc *sc) { uint32_t bit, regbase; int i; if (sc->bge_asicrev == BGE_ASICREV_BCM5761) regbase = BGE_APE_LOCK_GRANT; else regbase = BGE_APE_PER_LOCK_GRANT; /* Clear any stale locks. */ for (i = BGE_APE_LOCK_PHY0; i <= BGE_APE_LOCK_GPIO; i++) { switch (i) { case BGE_APE_LOCK_PHY0: case BGE_APE_LOCK_PHY1: case BGE_APE_LOCK_PHY2: case BGE_APE_LOCK_PHY3: bit = BGE_APE_LOCK_GRANT_DRIVER0; break; default: if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_GRANT_DRIVER0; else bit = (1 << sc->bge_func_addr); } APE_WRITE_4(sc, regbase + 4 * i, bit); } /* Select the PHY lock based on the device's function number. */ switch (sc->bge_func_addr) { case 0: sc->bge_phy_ape_lock = BGE_APE_LOCK_PHY0; break; case 1: sc->bge_phy_ape_lock = BGE_APE_LOCK_PHY1; break; case 2: sc->bge_phy_ape_lock = BGE_APE_LOCK_PHY2; break; case 3: sc->bge_phy_ape_lock = BGE_APE_LOCK_PHY3; break; default: device_printf(sc->bge_dev, "PHY lock not supported on this function\n"); } } /* * Check for APE firmware, set flags, and print version info. */ static void bge_ape_read_fw_ver(struct bge_softc *sc) { const char *fwtype; uint32_t apedata, features; /* Check for a valid APE signature in shared memory. */ apedata = APE_READ_4(sc, BGE_APE_SEG_SIG); if (apedata != BGE_APE_SEG_SIG_MAGIC) { sc->bge_mfw_flags &= ~ BGE_MFW_ON_APE; return; } /* Check if APE firmware is running. */ apedata = APE_READ_4(sc, BGE_APE_FW_STATUS); if ((apedata & BGE_APE_FW_STATUS_READY) == 0) { device_printf(sc->bge_dev, "APE signature found " "but FW status not ready! 0x%08x\n", apedata); return; } sc->bge_mfw_flags |= BGE_MFW_ON_APE; /* Fetch the APE firwmare type and version. */ apedata = APE_READ_4(sc, BGE_APE_FW_VERSION); features = APE_READ_4(sc, BGE_APE_FW_FEATURES); if ((features & BGE_APE_FW_FEATURE_NCSI) != 0) { sc->bge_mfw_flags |= BGE_MFW_TYPE_NCSI; fwtype = "NCSI"; } else if ((features & BGE_APE_FW_FEATURE_DASH) != 0) { sc->bge_mfw_flags |= BGE_MFW_TYPE_DASH; fwtype = "DASH"; } else fwtype = "UNKN"; /* Print the APE firmware version. */ device_printf(sc->bge_dev, "APE FW version: %s v%d.%d.%d.%d\n", fwtype, (apedata & BGE_APE_FW_VERSION_MAJMSK) >> BGE_APE_FW_VERSION_MAJSFT, (apedata & BGE_APE_FW_VERSION_MINMSK) >> BGE_APE_FW_VERSION_MINSFT, (apedata & BGE_APE_FW_VERSION_REVMSK) >> BGE_APE_FW_VERSION_REVSFT, (apedata & BGE_APE_FW_VERSION_BLDMSK)); } static int bge_ape_lock(struct bge_softc *sc, int locknum) { uint32_t bit, gnt, req, status; int i, off; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) == 0) return (0); /* Lock request/grant registers have different bases. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5761) { req = BGE_APE_LOCK_REQ; gnt = BGE_APE_LOCK_GRANT; } else { req = BGE_APE_PER_LOCK_REQ; gnt = BGE_APE_PER_LOCK_GRANT; } off = 4 * locknum; switch (locknum) { case BGE_APE_LOCK_GPIO: /* Lock required when using GPIO. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5761) return (0); if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_REQ_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_GRC: /* Lock required to reset the device. */ if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_REQ_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_MEM: /* Lock required when accessing certain APE memory. */ if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_REQ_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_PHY0: case BGE_APE_LOCK_PHY1: case BGE_APE_LOCK_PHY2: case BGE_APE_LOCK_PHY3: /* Lock required when accessing PHYs. */ bit = BGE_APE_LOCK_REQ_DRIVER0; break; default: return (EINVAL); } /* Request a lock. */ APE_WRITE_4(sc, req + off, bit); /* Wait up to 1 second to acquire lock. */ for (i = 0; i < 20000; i++) { status = APE_READ_4(sc, gnt + off); if (status == bit) break; DELAY(50); } /* Handle any errors. */ if (status != bit) { device_printf(sc->bge_dev, "APE lock %d request failed! " "request = 0x%04x[0x%04x], status = 0x%04x[0x%04x]\n", locknum, req + off, bit & 0xFFFF, gnt + off, status & 0xFFFF); /* Revoke the lock request. */ APE_WRITE_4(sc, gnt + off, bit); return (EBUSY); } return (0); } static void bge_ape_unlock(struct bge_softc *sc, int locknum) { uint32_t bit, gnt; int off; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) == 0) return; if (sc->bge_asicrev == BGE_ASICREV_BCM5761) gnt = BGE_APE_LOCK_GRANT; else gnt = BGE_APE_PER_LOCK_GRANT; off = 4 * locknum; switch (locknum) { case BGE_APE_LOCK_GPIO: if (sc->bge_asicrev == BGE_ASICREV_BCM5761) return; if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_GRANT_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_GRC: if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_GRANT_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_MEM: if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_GRANT_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_PHY0: case BGE_APE_LOCK_PHY1: case BGE_APE_LOCK_PHY2: case BGE_APE_LOCK_PHY3: bit = BGE_APE_LOCK_GRANT_DRIVER0; break; default: return; } APE_WRITE_4(sc, gnt + off, bit); } /* * Send an event to the APE firmware. */ static void bge_ape_send_event(struct bge_softc *sc, uint32_t event) { uint32_t apedata; int i; /* NCSI does not support APE events. */ if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) == 0) return; /* Wait up to 1ms for APE to service previous event. */ for (i = 10; i > 0; i--) { if (bge_ape_lock(sc, BGE_APE_LOCK_MEM) != 0) break; apedata = APE_READ_4(sc, BGE_APE_EVENT_STATUS); if ((apedata & BGE_APE_EVENT_STATUS_EVENT_PENDING) == 0) { APE_WRITE_4(sc, BGE_APE_EVENT_STATUS, event | BGE_APE_EVENT_STATUS_EVENT_PENDING); bge_ape_unlock(sc, BGE_APE_LOCK_MEM); APE_WRITE_4(sc, BGE_APE_EVENT, BGE_APE_EVENT_1); break; } bge_ape_unlock(sc, BGE_APE_LOCK_MEM); DELAY(100); } if (i == 0) device_printf(sc->bge_dev, "APE event 0x%08x send timed out\n", event); } static void bge_ape_driver_state_change(struct bge_softc *sc, int kind) { uint32_t apedata, event; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) == 0) return; switch (kind) { case BGE_RESET_START: /* If this is the first load, clear the load counter. */ apedata = APE_READ_4(sc, BGE_APE_HOST_SEG_SIG); if (apedata != BGE_APE_HOST_SEG_SIG_MAGIC) APE_WRITE_4(sc, BGE_APE_HOST_INIT_COUNT, 0); else { apedata = APE_READ_4(sc, BGE_APE_HOST_INIT_COUNT); APE_WRITE_4(sc, BGE_APE_HOST_INIT_COUNT, ++apedata); } APE_WRITE_4(sc, BGE_APE_HOST_SEG_SIG, BGE_APE_HOST_SEG_SIG_MAGIC); APE_WRITE_4(sc, BGE_APE_HOST_SEG_LEN, BGE_APE_HOST_SEG_LEN_MAGIC); /* Add some version info if bge(4) supports it. */ APE_WRITE_4(sc, BGE_APE_HOST_DRIVER_ID, BGE_APE_HOST_DRIVER_ID_MAGIC(1, 0)); APE_WRITE_4(sc, BGE_APE_HOST_BEHAVIOR, BGE_APE_HOST_BEHAV_NO_PHYLOCK); APE_WRITE_4(sc, BGE_APE_HOST_HEARTBEAT_INT_MS, BGE_APE_HOST_HEARTBEAT_INT_DISABLE); APE_WRITE_4(sc, BGE_APE_HOST_DRVR_STATE, BGE_APE_HOST_DRVR_STATE_START); event = BGE_APE_EVENT_STATUS_STATE_START; break; case BGE_RESET_SHUTDOWN: APE_WRITE_4(sc, BGE_APE_HOST_DRVR_STATE, BGE_APE_HOST_DRVR_STATE_UNLOAD); event = BGE_APE_EVENT_STATUS_STATE_UNLOAD; break; case BGE_RESET_SUSPEND: event = BGE_APE_EVENT_STATUS_STATE_SUSPEND; break; default: return; } bge_ape_send_event(sc, event | BGE_APE_EVENT_STATUS_DRIVER_EVNT | BGE_APE_EVENT_STATUS_STATE_CHNGE); } /* * Map a single buffer address. */ static void bge_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct bge_dmamap_arg *ctx; if (error) return; KASSERT(nseg == 1, ("%s: %d segments returned!", __func__, nseg)); ctx = arg; ctx->bge_busaddr = segs->ds_addr; } static uint8_t bge_nvram_getbyte(struct bge_softc *sc, int addr, uint8_t *dest) { uint32_t access, byte = 0; int i; /* Lock. */ CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_SET1); for (i = 0; i < 8000; i++) { if (CSR_READ_4(sc, BGE_NVRAM_SWARB) & BGE_NVRAMSWARB_GNT1) break; DELAY(20); } if (i == 8000) return (1); /* Enable access. */ access = CSR_READ_4(sc, BGE_NVRAM_ACCESS); CSR_WRITE_4(sc, BGE_NVRAM_ACCESS, access | BGE_NVRAMACC_ENABLE); CSR_WRITE_4(sc, BGE_NVRAM_ADDR, addr & 0xfffffffc); CSR_WRITE_4(sc, BGE_NVRAM_CMD, BGE_NVRAM_READCMD); for (i = 0; i < BGE_TIMEOUT * 10; i++) { DELAY(10); if (CSR_READ_4(sc, BGE_NVRAM_CMD) & BGE_NVRAMCMD_DONE) { DELAY(10); break; } } if (i == BGE_TIMEOUT * 10) { if_printf(sc->bge_ifp, "nvram read timed out\n"); return (1); } /* Get result. */ byte = CSR_READ_4(sc, BGE_NVRAM_RDDATA); *dest = (bswap32(byte) >> ((addr % 4) * 8)) & 0xFF; /* Disable access. */ CSR_WRITE_4(sc, BGE_NVRAM_ACCESS, access); /* Unlock. */ CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_CLR1); CSR_READ_4(sc, BGE_NVRAM_SWARB); return (0); } /* * Read a sequence of bytes from NVRAM. */ static int bge_read_nvram(struct bge_softc *sc, caddr_t dest, int off, int cnt) { int err = 0, i; uint8_t byte = 0; if (sc->bge_asicrev != BGE_ASICREV_BCM5906) return (1); for (i = 0; i < cnt; i++) { err = bge_nvram_getbyte(sc, off + i, &byte); if (err) break; *(dest + i) = byte; } return (err ? 1 : 0); } /* * Read a byte of data stored in the EEPROM at address 'addr.' The * BCM570x supports both the traditional bitbang interface and an * auto access interface for reading the EEPROM. We use the auto * access method. */ static uint8_t bge_eeprom_getbyte(struct bge_softc *sc, int addr, uint8_t *dest) { int i; uint32_t byte = 0; /* * Enable use of auto EEPROM access so we can avoid * having to use the bitbang method. */ BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_AUTO_EEPROM); /* Reset the EEPROM, load the clock period. */ CSR_WRITE_4(sc, BGE_EE_ADDR, BGE_EEADDR_RESET | BGE_EEHALFCLK(BGE_HALFCLK_384SCL)); DELAY(20); /* Issue the read EEPROM command. */ CSR_WRITE_4(sc, BGE_EE_ADDR, BGE_EE_READCMD | addr); /* Wait for completion */ for(i = 0; i < BGE_TIMEOUT * 10; i++) { DELAY(10); if (CSR_READ_4(sc, BGE_EE_ADDR) & BGE_EEADDR_DONE) break; } if (i == BGE_TIMEOUT * 10) { device_printf(sc->bge_dev, "EEPROM read timed out\n"); return (1); } /* Get result. */ byte = CSR_READ_4(sc, BGE_EE_DATA); *dest = (byte >> ((addr % 4) * 8)) & 0xFF; return (0); } /* * Read a sequence of bytes from the EEPROM. */ static int bge_read_eeprom(struct bge_softc *sc, caddr_t dest, int off, int cnt) { int i, error = 0; uint8_t byte = 0; for (i = 0; i < cnt; i++) { error = bge_eeprom_getbyte(sc, off + i, &byte); if (error) break; *(dest + i) = byte; } return (error ? 1 : 0); } static int bge_miibus_readreg(device_t dev, int phy, int reg) { struct bge_softc *sc; uint32_t val; int i; sc = device_get_softc(dev); if (bge_ape_lock(sc, sc->bge_phy_ape_lock) != 0) return (0); /* Clear the autopoll bit if set, otherwise may trigger PCI errors. */ if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode & ~BGE_MIMODE_AUTOPOLL); DELAY(80); } CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_READ | BGE_MICOMM_BUSY | BGE_MIPHY(phy) | BGE_MIREG(reg)); /* Poll for the PHY register access to complete. */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); val = CSR_READ_4(sc, BGE_MI_COMM); if ((val & BGE_MICOMM_BUSY) == 0) { DELAY(5); val = CSR_READ_4(sc, BGE_MI_COMM); break; } } if (i == BGE_TIMEOUT) { device_printf(sc->bge_dev, "PHY read timed out (phy %d, reg %d, val 0x%08x)\n", phy, reg, val); val = 0; } /* Restore the autopoll bit if necessary. */ if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode); DELAY(80); } bge_ape_unlock(sc, sc->bge_phy_ape_lock); if (val & BGE_MICOMM_READFAIL) return (0); return (val & 0xFFFF); } static int bge_miibus_writereg(device_t dev, int phy, int reg, int val) { struct bge_softc *sc; int i; sc = device_get_softc(dev); if (sc->bge_asicrev == BGE_ASICREV_BCM5906 && (reg == BRGPHY_MII_1000CTL || reg == BRGPHY_MII_AUXCTL)) return (0); if (bge_ape_lock(sc, sc->bge_phy_ape_lock) != 0) return (0); /* Clear the autopoll bit if set, otherwise may trigger PCI errors. */ if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode & ~BGE_MIMODE_AUTOPOLL); DELAY(80); } CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_WRITE | BGE_MICOMM_BUSY | BGE_MIPHY(phy) | BGE_MIREG(reg) | val); for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_4(sc, BGE_MI_COMM) & BGE_MICOMM_BUSY)) { DELAY(5); CSR_READ_4(sc, BGE_MI_COMM); /* dummy read */ break; } } /* Restore the autopoll bit if necessary. */ if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode); DELAY(80); } bge_ape_unlock(sc, sc->bge_phy_ape_lock); if (i == BGE_TIMEOUT) device_printf(sc->bge_dev, "PHY write timed out (phy %d, reg %d, val 0x%04x)\n", phy, reg, val); return (0); } static void bge_miibus_statchg(device_t dev) { struct bge_softc *sc; struct mii_data *mii; uint32_t mac_mode, rx_mode, tx_mode; sc = device_get_softc(dev); if ((if_getdrvflags(sc->bge_ifp) & IFF_DRV_RUNNING) == 0) return; mii = device_get_softc(sc->bge_miibus); if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: sc->bge_link = 1; break; case IFM_1000_T: case IFM_1000_SX: case IFM_2500_SX: if (sc->bge_asicrev != BGE_ASICREV_BCM5906) sc->bge_link = 1; else sc->bge_link = 0; break; default: sc->bge_link = 0; break; } } else sc->bge_link = 0; if (sc->bge_link == 0) return; /* * APE firmware touches these registers to keep the MAC * connected to the outside world. Try to keep the * accesses atomic. */ /* Set the port mode (MII/GMII) to match the link speed. */ mac_mode = CSR_READ_4(sc, BGE_MAC_MODE) & ~(BGE_MACMODE_PORTMODE | BGE_MACMODE_HALF_DUPLEX); tx_mode = CSR_READ_4(sc, BGE_TX_MODE); rx_mode = CSR_READ_4(sc, BGE_RX_MODE); if (IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T || IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_SX) mac_mode |= BGE_PORTMODE_GMII; else mac_mode |= BGE_PORTMODE_MII; /* Set MAC flow control behavior to match link flow control settings. */ tx_mode &= ~BGE_TXMODE_FLOWCTL_ENABLE; rx_mode &= ~BGE_RXMODE_FLOWCTL_ENABLE; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) { if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_TXPAUSE) != 0) tx_mode |= BGE_TXMODE_FLOWCTL_ENABLE; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) rx_mode |= BGE_RXMODE_FLOWCTL_ENABLE; } else mac_mode |= BGE_MACMODE_HALF_DUPLEX; CSR_WRITE_4(sc, BGE_MAC_MODE, mac_mode); DELAY(40); CSR_WRITE_4(sc, BGE_TX_MODE, tx_mode); CSR_WRITE_4(sc, BGE_RX_MODE, rx_mode); } /* * Intialize a standard receive ring descriptor. */ static int bge_newbuf_std(struct bge_softc *sc, int i) { struct mbuf *m; struct bge_rx_bd *r; bus_dma_segment_t segs[1]; bus_dmamap_t map; int error, nsegs; if (sc->bge_flags & BGE_FLAG_JUMBO_STD && (if_getmtu(sc->bge_ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN))) { m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MJUM9BYTES; } else { m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MCLBYTES; } if ((sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) == 0) m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_sparemap, m, segs, &nsegs, 0); if (error != 0) { m_freem(m); return (error); } if (sc->bge_cdata.bge_rx_std_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i]); } map = sc->bge_cdata.bge_rx_std_dmamap[i]; sc->bge_cdata.bge_rx_std_dmamap[i] = sc->bge_cdata.bge_rx_std_sparemap; sc->bge_cdata.bge_rx_std_sparemap = map; sc->bge_cdata.bge_rx_std_chain[i] = m; sc->bge_cdata.bge_rx_std_seglen[i] = segs[0].ds_len; r = &sc->bge_ldata.bge_rx_std_ring[sc->bge_std]; r->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[0].ds_addr); r->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[0].ds_addr); r->bge_flags = BGE_RXBDFLAG_END; r->bge_len = segs[0].ds_len; r->bge_idx = i; bus_dmamap_sync(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i], BUS_DMASYNC_PREREAD); return (0); } /* * Initialize a jumbo receive ring descriptor. This allocates * a jumbo buffer from the pool managed internally by the driver. */ static int bge_newbuf_jumbo(struct bge_softc *sc, int i) { bus_dma_segment_t segs[BGE_NSEG_JUMBO]; bus_dmamap_t map; struct bge_extrx_bd *r; struct mbuf *m; int error, nsegs; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); if (m_cljget(m, M_NOWAIT, MJUM9BYTES) == NULL) { m_freem(m); return (ENOBUFS); } m->m_len = m->m_pkthdr.len = MJUM9BYTES; if ((sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) == 0) m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_sparemap, m, segs, &nsegs, 0); if (error != 0) { m_freem(m); return (error); } if (sc->bge_cdata.bge_rx_jumbo_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i]); } map = sc->bge_cdata.bge_rx_jumbo_dmamap[i]; sc->bge_cdata.bge_rx_jumbo_dmamap[i] = sc->bge_cdata.bge_rx_jumbo_sparemap; sc->bge_cdata.bge_rx_jumbo_sparemap = map; sc->bge_cdata.bge_rx_jumbo_chain[i] = m; sc->bge_cdata.bge_rx_jumbo_seglen[i][0] = 0; sc->bge_cdata.bge_rx_jumbo_seglen[i][1] = 0; sc->bge_cdata.bge_rx_jumbo_seglen[i][2] = 0; sc->bge_cdata.bge_rx_jumbo_seglen[i][3] = 0; /* * Fill in the extended RX buffer descriptor. */ r = &sc->bge_ldata.bge_rx_jumbo_ring[sc->bge_jumbo]; r->bge_flags = BGE_RXBDFLAG_JUMBO_RING | BGE_RXBDFLAG_END; r->bge_idx = i; r->bge_len3 = r->bge_len2 = r->bge_len1 = 0; switch (nsegs) { case 4: r->bge_addr3.bge_addr_lo = BGE_ADDR_LO(segs[3].ds_addr); r->bge_addr3.bge_addr_hi = BGE_ADDR_HI(segs[3].ds_addr); r->bge_len3 = segs[3].ds_len; sc->bge_cdata.bge_rx_jumbo_seglen[i][3] = segs[3].ds_len; case 3: r->bge_addr2.bge_addr_lo = BGE_ADDR_LO(segs[2].ds_addr); r->bge_addr2.bge_addr_hi = BGE_ADDR_HI(segs[2].ds_addr); r->bge_len2 = segs[2].ds_len; sc->bge_cdata.bge_rx_jumbo_seglen[i][2] = segs[2].ds_len; case 2: r->bge_addr1.bge_addr_lo = BGE_ADDR_LO(segs[1].ds_addr); r->bge_addr1.bge_addr_hi = BGE_ADDR_HI(segs[1].ds_addr); r->bge_len1 = segs[1].ds_len; sc->bge_cdata.bge_rx_jumbo_seglen[i][1] = segs[1].ds_len; case 1: r->bge_addr0.bge_addr_lo = BGE_ADDR_LO(segs[0].ds_addr); r->bge_addr0.bge_addr_hi = BGE_ADDR_HI(segs[0].ds_addr); r->bge_len0 = segs[0].ds_len; sc->bge_cdata.bge_rx_jumbo_seglen[i][0] = segs[0].ds_len; break; default: panic("%s: %d segments\n", __func__, nsegs); } bus_dmamap_sync(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i], BUS_DMASYNC_PREREAD); return (0); } static int bge_init_rx_ring_std(struct bge_softc *sc) { int error, i; bzero(sc->bge_ldata.bge_rx_std_ring, BGE_STD_RX_RING_SZ); sc->bge_std = 0; for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { if ((error = bge_newbuf_std(sc, i)) != 0) return (error); BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT); } bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_PREWRITE); sc->bge_std = 0; bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, BGE_STD_RX_RING_CNT - 1); return (0); } static void bge_free_rx_ring_std(struct bge_softc *sc) { int i; for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { if (sc->bge_cdata.bge_rx_std_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i]); m_freem(sc->bge_cdata.bge_rx_std_chain[i]); sc->bge_cdata.bge_rx_std_chain[i] = NULL; } bzero((char *)&sc->bge_ldata.bge_rx_std_ring[i], sizeof(struct bge_rx_bd)); } } static int bge_init_rx_ring_jumbo(struct bge_softc *sc) { struct bge_rcb *rcb; int error, i; bzero(sc->bge_ldata.bge_rx_jumbo_ring, BGE_JUMBO_RX_RING_SZ); sc->bge_jumbo = 0; for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) { if ((error = bge_newbuf_jumbo(sc, i)) != 0) return (error); BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); } bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); sc->bge_jumbo = 0; /* Enable the jumbo receive producer ring. */ rcb = &sc->bge_ldata.bge_info.bge_jumbo_rx_rcb; rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_USE_EXT_RX_BD); CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, BGE_JUMBO_RX_RING_CNT - 1); return (0); } static void bge_free_rx_ring_jumbo(struct bge_softc *sc) { int i; for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) { if (sc->bge_cdata.bge_rx_jumbo_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i]); m_freem(sc->bge_cdata.bge_rx_jumbo_chain[i]); sc->bge_cdata.bge_rx_jumbo_chain[i] = NULL; } bzero((char *)&sc->bge_ldata.bge_rx_jumbo_ring[i], sizeof(struct bge_extrx_bd)); } } static void bge_free_tx_ring(struct bge_softc *sc) { int i; if (sc->bge_ldata.bge_tx_ring == NULL) return; for (i = 0; i < BGE_TX_RING_CNT; i++) { if (sc->bge_cdata.bge_tx_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[i]); m_freem(sc->bge_cdata.bge_tx_chain[i]); sc->bge_cdata.bge_tx_chain[i] = NULL; } bzero((char *)&sc->bge_ldata.bge_tx_ring[i], sizeof(struct bge_tx_bd)); } } static int bge_init_tx_ring(struct bge_softc *sc) { sc->bge_txcnt = 0; sc->bge_tx_saved_considx = 0; bzero(sc->bge_ldata.bge_tx_ring, BGE_TX_RING_SZ); bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_PREWRITE); /* Initialize transmit producer index for host-memory send ring. */ sc->bge_tx_prodidx = 0; bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, sc->bge_tx_prodidx); /* 5700 b2 errata */ if (sc->bge_chiprev == BGE_CHIPREV_5700_BX) bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, sc->bge_tx_prodidx); /* NIC-memory send ring not used; initialize to zero. */ bge_writembx(sc, BGE_MBX_TX_NIC_PROD0_LO, 0); /* 5700 b2 errata */ if (sc->bge_chiprev == BGE_CHIPREV_5700_BX) bge_writembx(sc, BGE_MBX_TX_NIC_PROD0_LO, 0); return (0); } static void bge_setpromisc(struct bge_softc *sc) { if_t ifp; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; /* Enable or disable promiscuous mode as needed. */ if (if_getflags(ifp) & IFF_PROMISC) BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC); else BGE_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC); } static void bge_setmulti(struct bge_softc *sc) { if_t ifp; int mc_count = 0; uint32_t hashes[4] = { 0, 0, 0, 0 }; int h, i, mcnt; unsigned char *mta; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; mc_count = if_multiaddr_count(ifp, -1); mta = malloc(sizeof(unsigned char) * ETHER_ADDR_LEN * mc_count, M_DEVBUF, M_NOWAIT); if(mta == NULL) { device_printf(sc->bge_dev, "Failed to allocated temp mcast list\n"); return; } if (if_getflags(ifp) & IFF_ALLMULTI || if_getflags(ifp) & IFF_PROMISC) { for (i = 0; i < 4; i++) CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0xFFFFFFFF); free(mta, M_DEVBUF); return; } /* First, zot all the existing filters. */ for (i = 0; i < 4; i++) CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0); if_multiaddr_array(ifp, mta, &mcnt, mc_count); for(i = 0; i < mcnt; i++) { h = ether_crc32_le(mta + (i * ETHER_ADDR_LEN), ETHER_ADDR_LEN) & 0x7F; hashes[(h & 0x60) >> 5] |= 1 << (h & 0x1F); } for (i = 0; i < 4; i++) CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), hashes[i]); free(mta, M_DEVBUF); } static void bge_setvlan(struct bge_softc *sc) { if_t ifp; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; /* Enable or disable VLAN tag stripping as needed. */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) BGE_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_KEEP_VLAN_DIAG); else BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_KEEP_VLAN_DIAG); } static void bge_sig_pre_reset(struct bge_softc *sc, int type) { /* * Some chips don't like this so only do this if ASF is enabled */ if (sc->bge_asf_mode) bge_writemem_ind(sc, BGE_SRAM_FW_MB, BGE_SRAM_FW_MB_MAGIC); if (sc->bge_asf_mode & ASF_NEW_HANDSHAKE) { switch (type) { case BGE_RESET_START: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_START); break; case BGE_RESET_SHUTDOWN: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_UNLOAD); break; case BGE_RESET_SUSPEND: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_SUSPEND); break; } } if (type == BGE_RESET_START || type == BGE_RESET_SUSPEND) bge_ape_driver_state_change(sc, type); } static void bge_sig_post_reset(struct bge_softc *sc, int type) { if (sc->bge_asf_mode & ASF_NEW_HANDSHAKE) { switch (type) { case BGE_RESET_START: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_START_DONE); /* START DONE */ break; case BGE_RESET_SHUTDOWN: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_UNLOAD_DONE); break; } } if (type == BGE_RESET_SHUTDOWN) bge_ape_driver_state_change(sc, type); } static void bge_sig_legacy(struct bge_softc *sc, int type) { if (sc->bge_asf_mode) { switch (type) { case BGE_RESET_START: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_START); break; case BGE_RESET_SHUTDOWN: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_UNLOAD); break; } } } static void bge_stop_fw(struct bge_softc *sc) { int i; if (sc->bge_asf_mode) { bge_writemem_ind(sc, BGE_SRAM_FW_CMD_MB, BGE_FW_CMD_PAUSE); CSR_WRITE_4(sc, BGE_RX_CPU_EVENT, CSR_READ_4(sc, BGE_RX_CPU_EVENT) | BGE_RX_CPU_DRV_EVENT); for (i = 0; i < 100; i++ ) { if (!(CSR_READ_4(sc, BGE_RX_CPU_EVENT) & BGE_RX_CPU_DRV_EVENT)) break; DELAY(10); } } } static uint32_t bge_dma_swap_options(struct bge_softc *sc) { uint32_t dma_options; dma_options = BGE_MODECTL_WORDSWAP_NONFRAME | BGE_MODECTL_BYTESWAP_DATA | BGE_MODECTL_WORDSWAP_DATA; #if BYTE_ORDER == BIG_ENDIAN dma_options |= BGE_MODECTL_BYTESWAP_NONFRAME; #endif return (dma_options); } /* * Do endian, PCI and DMA initialization. */ static int bge_chipinit(struct bge_softc *sc) { uint32_t dma_rw_ctl, misc_ctl, mode_ctl; uint16_t val; int i; /* Set endianness before we access any non-PCI registers. */ misc_ctl = BGE_INIT; if (sc->bge_flags & BGE_FLAG_TAGGED_STATUS) misc_ctl |= BGE_PCIMISCCTL_TAGGED_STATUS; pci_write_config(sc->bge_dev, BGE_PCI_MISC_CTL, misc_ctl, 4); /* * Clear the MAC statistics block in the NIC's * internal memory. */ for (i = BGE_STATS_BLOCK; i < BGE_STATS_BLOCK_END + 1; i += sizeof(uint32_t)) BGE_MEMWIN_WRITE(sc, i, 0); for (i = BGE_STATUS_BLOCK; i < BGE_STATUS_BLOCK_END + 1; i += sizeof(uint32_t)) BGE_MEMWIN_WRITE(sc, i, 0); if (sc->bge_chiprev == BGE_CHIPREV_5704_BX) { /* * Fix data corruption caused by non-qword write with WB. * Fix master abort in PCI mode. * Fix PCI latency timer. */ val = pci_read_config(sc->bge_dev, BGE_PCI_MSI_DATA + 2, 2); val |= (1 << 10) | (1 << 12) | (1 << 13); pci_write_config(sc->bge_dev, BGE_PCI_MSI_DATA + 2, val, 2); } if (sc->bge_asicrev == BGE_ASICREV_BCM57765 || sc->bge_asicrev == BGE_ASICREV_BCM57766) { /* * For the 57766 and non Ax versions of 57765, bootcode * needs to setup the PCIE Fast Training Sequence (FTS) * value to prevent transmit hangs. */ if (sc->bge_chiprev != BGE_CHIPREV_57765_AX) { CSR_WRITE_4(sc, BGE_CPMU_PADRNG_CTL, CSR_READ_4(sc, BGE_CPMU_PADRNG_CTL) | BGE_CPMU_PADRNG_CTL_RDIV2); } } /* * Set up the PCI DMA control register. */ dma_rw_ctl = BGE_PCIDMARWCTL_RD_CMD_SHIFT(6) | BGE_PCIDMARWCTL_WR_CMD_SHIFT(7); if (sc->bge_flags & BGE_FLAG_PCIE) { if (sc->bge_mps >= 256) dma_rw_ctl |= BGE_PCIDMARWCTL_WR_WAT_SHIFT(7); else dma_rw_ctl |= BGE_PCIDMARWCTL_WR_WAT_SHIFT(3); } else if (sc->bge_flags & BGE_FLAG_PCIX) { if (BGE_IS_5714_FAMILY(sc)) { /* 256 bytes for read and write. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(2) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(2); dma_rw_ctl |= (sc->bge_asicrev == BGE_ASICREV_BCM5780) ? BGE_PCIDMARWCTL_ONEDMA_ATONCE_GLOBAL : BGE_PCIDMARWCTL_ONEDMA_ATONCE_LOCAL; } else if (sc->bge_asicrev == BGE_ASICREV_BCM5703) { /* * In the BCM5703, the DMA read watermark should * be set to less than or equal to the maximum * memory read byte count of the PCI-X command * register. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(4) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(3); } else if (sc->bge_asicrev == BGE_ASICREV_BCM5704) { /* 1536 bytes for read, 384 bytes for write. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(7) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(3); } else { /* 384 bytes for read and write. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(3) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(3) | 0x0F; } if (sc->bge_asicrev == BGE_ASICREV_BCM5703 || sc->bge_asicrev == BGE_ASICREV_BCM5704) { uint32_t tmp; /* Set ONE_DMA_AT_ONCE for hardware workaround. */ tmp = CSR_READ_4(sc, BGE_PCI_CLKCTL) & 0x1F; if (tmp == 6 || tmp == 7) dma_rw_ctl |= BGE_PCIDMARWCTL_ONEDMA_ATONCE_GLOBAL; /* Set PCI-X DMA write workaround. */ dma_rw_ctl |= BGE_PCIDMARWCTL_ASRT_ALL_BE; } } else { /* Conventional PCI bus: 256 bytes for read and write. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(7) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(7); if (sc->bge_asicrev != BGE_ASICREV_BCM5705 && sc->bge_asicrev != BGE_ASICREV_BCM5750) dma_rw_ctl |= 0x0F; } if (sc->bge_asicrev == BGE_ASICREV_BCM5700 || sc->bge_asicrev == BGE_ASICREV_BCM5701) dma_rw_ctl |= BGE_PCIDMARWCTL_USE_MRM | BGE_PCIDMARWCTL_ASRT_ALL_BE; if (sc->bge_asicrev == BGE_ASICREV_BCM5703 || sc->bge_asicrev == BGE_ASICREV_BCM5704) dma_rw_ctl &= ~BGE_PCIDMARWCTL_MINDMA; if (BGE_IS_5717_PLUS(sc)) { dma_rw_ctl &= ~BGE_PCIDMARWCTL_DIS_CACHE_ALIGNMENT; if (sc->bge_chipid == BGE_CHIPID_BCM57765_A0) dma_rw_ctl &= ~BGE_PCIDMARWCTL_CRDRDR_RDMA_MRRS_MSK; /* * Enable HW workaround for controllers that misinterpret * a status tag update and leave interrupts permanently * disabled. */ if (!BGE_IS_57765_PLUS(sc) && sc->bge_asicrev != BGE_ASICREV_BCM5717 && sc->bge_asicrev != BGE_ASICREV_BCM5762) dma_rw_ctl |= BGE_PCIDMARWCTL_TAGGED_STATUS_WA; } pci_write_config(sc->bge_dev, BGE_PCI_DMA_RW_CTL, dma_rw_ctl, 4); /* * Set up general mode register. */ mode_ctl = bge_dma_swap_options(sc); if (sc->bge_asicrev == BGE_ASICREV_BCM5720 || sc->bge_asicrev == BGE_ASICREV_BCM5762) { /* Retain Host-2-BMC settings written by APE firmware. */ mode_ctl |= CSR_READ_4(sc, BGE_MODE_CTL) & (BGE_MODECTL_BYTESWAP_B2HRX_DATA | BGE_MODECTL_WORDSWAP_B2HRX_DATA | BGE_MODECTL_B2HRX_ENABLE | BGE_MODECTL_HTX2B_ENABLE); } mode_ctl |= BGE_MODECTL_MAC_ATTN_INTR | BGE_MODECTL_HOST_SEND_BDS | BGE_MODECTL_TX_NO_PHDR_CSUM; /* * BCM5701 B5 have a bug causing data corruption when using * 64-bit DMA reads, which can be terminated early and then * completed later as 32-bit accesses, in combination with * certain bridges. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5701 && sc->bge_chipid == BGE_CHIPID_BCM5701_B5) mode_ctl |= BGE_MODECTL_FORCE_PCI32; /* * Tell the firmware the driver is running */ if (sc->bge_asf_mode & ASF_STACKUP) mode_ctl |= BGE_MODECTL_STACKUP; CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl); /* * Disable memory write invalidate. Apparently it is not supported * properly by these devices. */ PCI_CLRBIT(sc->bge_dev, BGE_PCI_CMD, PCIM_CMD_MWIEN, 4); /* Set the timer prescaler (always 66 MHz). */ CSR_WRITE_4(sc, BGE_MISC_CFG, BGE_32BITTIME_66MHZ); /* XXX: The Linux tg3 driver does this at the start of brgphy_reset. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { DELAY(40); /* XXX */ /* Put PHY into ready state */ BGE_CLRBIT(sc, BGE_MISC_CFG, BGE_MISCCFG_EPHY_IDDQ); CSR_READ_4(sc, BGE_MISC_CFG); /* Flush */ DELAY(40); } return (0); } static int bge_blockinit(struct bge_softc *sc) { struct bge_rcb *rcb; bus_size_t vrcb; bge_hostaddr taddr; uint32_t dmactl, rdmareg, val; int i, limit; /* * Initialize the memory window pointer register so that * we can access the first 32K of internal NIC RAM. This will * allow us to set up the TX send ring RCBs and the RX return * ring RCBs, plus other things which live in NIC memory. */ CSR_WRITE_4(sc, BGE_PCI_MEMWIN_BASEADDR, 0); /* Note: the BCM5704 has a smaller mbuf space than other chips. */ if (!(BGE_IS_5705_PLUS(sc))) { /* Configure mbuf memory pool */ CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_BASEADDR, BGE_BUFFPOOL_1); if (sc->bge_asicrev == BGE_ASICREV_BCM5704) CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_LEN, 0x10000); else CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_LEN, 0x18000); /* Configure DMA resource pool */ CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_BASEADDR, BGE_DMA_DESCRIPTORS); CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_LEN, 0x2000); } /* Configure mbuf pool watermarks */ if (BGE_IS_5717_PLUS(sc)) { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0); if (if_getmtu(sc->bge_ifp) > ETHERMTU) { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x7e); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xea); } else { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x2a); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xa0); } } else if (!BGE_IS_5705_PLUS(sc)) { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x50); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x20); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x60); } else if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x04); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x10); } else { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x10); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x60); } /* Configure DMA resource watermarks */ CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_LOWAT, 5); CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_HIWAT, 10); /* Enable buffer manager */ val = BGE_BMANMODE_ENABLE | BGE_BMANMODE_LOMBUF_ATTN; /* * Change the arbitration algorithm of TXMBUF read request to * round-robin instead of priority based for BCM5719. When * TXFIFO is almost empty, RDMA will hold its request until * TXFIFO is not almost empty. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5719) val |= BGE_BMANMODE_NO_TX_UNDERRUN; CSR_WRITE_4(sc, BGE_BMAN_MODE, val); /* Poll for buffer manager start indication */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); if (CSR_READ_4(sc, BGE_BMAN_MODE) & BGE_BMANMODE_ENABLE) break; } if (i == BGE_TIMEOUT) { device_printf(sc->bge_dev, "buffer manager failed to start\n"); return (ENXIO); } /* Enable flow-through queues */ CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF); CSR_WRITE_4(sc, BGE_FTQ_RESET, 0); /* Wait until queue initialization is complete */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); if (CSR_READ_4(sc, BGE_FTQ_RESET) == 0) break; } if (i == BGE_TIMEOUT) { device_printf(sc->bge_dev, "flow-through queue init failed\n"); return (ENXIO); } /* * Summary of rings supported by the controller: * * Standard Receive Producer Ring * - This ring is used to feed receive buffers for "standard" * sized frames (typically 1536 bytes) to the controller. * * Jumbo Receive Producer Ring * - This ring is used to feed receive buffers for jumbo sized * frames (i.e. anything bigger than the "standard" frames) * to the controller. * * Mini Receive Producer Ring * - This ring is used to feed receive buffers for "mini" * sized frames to the controller. * - This feature required external memory for the controller * but was never used in a production system. Should always * be disabled. * * Receive Return Ring * - After the controller has placed an incoming frame into a * receive buffer that buffer is moved into a receive return * ring. The driver is then responsible to passing the * buffer up to the stack. Many versions of the controller * support multiple RR rings. * * Send Ring * - This ring is used for outgoing frames. Many versions of * the controller support multiple send rings. */ /* Initialize the standard receive producer ring control block. */ rcb = &sc->bge_ldata.bge_info.bge_std_rx_rcb; rcb->bge_hostaddr.bge_addr_lo = BGE_ADDR_LO(sc->bge_ldata.bge_rx_std_ring_paddr); rcb->bge_hostaddr.bge_addr_hi = BGE_ADDR_HI(sc->bge_ldata.bge_rx_std_ring_paddr); bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_PREREAD); if (BGE_IS_5717_PLUS(sc)) { /* * Bits 31-16: Programmable ring size (2048, 1024, 512, .., 32) * Bits 15-2 : Maximum RX frame size * Bit 1 : 1 = Ring Disabled, 0 = Ring ENabled * Bit 0 : Reserved */ rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(512, BGE_MAX_FRAMELEN << 2); } else if (BGE_IS_5705_PLUS(sc)) { /* * Bits 31-16: Programmable ring size (512, 256, 128, 64, 32) * Bits 15-2 : Reserved (should be 0) * Bit 1 : 1 = Ring Disabled, 0 = Ring Enabled * Bit 0 : Reserved */ rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(512, 0); } else { /* * Ring size is always XXX entries * Bits 31-16: Maximum RX frame size * Bits 15-2 : Reserved (should be 0) * Bit 1 : 1 = Ring Disabled, 0 = Ring Enabled * Bit 0 : Reserved */ rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(BGE_MAX_FRAMELEN, 0); } if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) rcb->bge_nicaddr = BGE_STD_RX_RINGS_5717; else rcb->bge_nicaddr = BGE_STD_RX_RINGS; /* Write the standard receive producer ring control block. */ CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_HI, rcb->bge_hostaddr.bge_addr_hi); CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo); CSR_WRITE_4(sc, BGE_RX_STD_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); CSR_WRITE_4(sc, BGE_RX_STD_RCB_NICADDR, rcb->bge_nicaddr); /* Reset the standard receive producer ring producer index. */ bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, 0); /* * Initialize the jumbo RX producer ring control * block. We set the 'ring disabled' bit in the * flags field until we're actually ready to start * using this ring (i.e. once we set the MTU * high enough to require it). */ if (BGE_IS_JUMBO_CAPABLE(sc)) { rcb = &sc->bge_ldata.bge_info.bge_jumbo_rx_rcb; /* Get the jumbo receive producer ring RCB parameters. */ rcb->bge_hostaddr.bge_addr_lo = BGE_ADDR_LO(sc->bge_ldata.bge_rx_jumbo_ring_paddr); rcb->bge_hostaddr.bge_addr_hi = BGE_ADDR_HI(sc->bge_ldata.bge_rx_jumbo_ring_paddr); bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_PREREAD); rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_USE_EXT_RX_BD | BGE_RCB_FLAG_RING_DISABLED); if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS_5717; else rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS; CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_HI, rcb->bge_hostaddr.bge_addr_hi); CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo); /* Program the jumbo receive producer ring RCB parameters. */ CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_NICADDR, rcb->bge_nicaddr); /* Reset the jumbo receive producer ring producer index. */ bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, 0); } /* Disable the mini receive producer ring RCB. */ if (BGE_IS_5700_FAMILY(sc)) { rcb = &sc->bge_ldata.bge_info.bge_mini_rx_rcb; rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_RING_DISABLED); CSR_WRITE_4(sc, BGE_RX_MINI_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); /* Reset the mini receive producer ring producer index. */ bge_writembx(sc, BGE_MBX_RX_MINI_PROD_LO, 0); } /* Choose de-pipeline mode for BCM5906 A0, A1 and A2. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { if (sc->bge_chipid == BGE_CHIPID_BCM5906_A0 || sc->bge_chipid == BGE_CHIPID_BCM5906_A1 || sc->bge_chipid == BGE_CHIPID_BCM5906_A2) CSR_WRITE_4(sc, BGE_ISO_PKT_TX, (CSR_READ_4(sc, BGE_ISO_PKT_TX) & ~3) | 2); } /* * The BD ring replenish thresholds control how often the * hardware fetches new BD's from the producer rings in host * memory. Setting the value too low on a busy system can * starve the hardware and recue the throughpout. * * Set the BD ring replentish thresholds. The recommended * values are 1/8th the number of descriptors allocated to * each ring. * XXX The 5754 requires a lower threshold, so it might be a * requirement of all 575x family chips. The Linux driver sets * the lower threshold for all 5705 family chips as well, but there * are reports that it might not need to be so strict. * * XXX Linux does some extra fiddling here for the 5906 parts as * well. */ if (BGE_IS_5705_PLUS(sc)) val = 8; else val = BGE_STD_RX_RING_CNT / 8; CSR_WRITE_4(sc, BGE_RBDI_STD_REPL_THRESH, val); if (BGE_IS_JUMBO_CAPABLE(sc)) CSR_WRITE_4(sc, BGE_RBDI_JUMBO_REPL_THRESH, BGE_JUMBO_RX_RING_CNT/8); if (BGE_IS_5717_PLUS(sc)) { CSR_WRITE_4(sc, BGE_STD_REPLENISH_LWM, 32); CSR_WRITE_4(sc, BGE_JMB_REPLENISH_LWM, 16); } /* * Disable all send rings by setting the 'ring disabled' bit * in the flags field of all the TX send ring control blocks, * located in NIC memory. */ if (!BGE_IS_5705_PLUS(sc)) /* 5700 to 5704 had 16 send rings. */ limit = BGE_TX_RINGS_EXTSSRAM_MAX; else if (BGE_IS_57765_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5762) limit = 2; else if (BGE_IS_5717_PLUS(sc)) limit = 4; else limit = 1; vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB; for (i = 0; i < limit; i++) { RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_RING_DISABLED)); RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0); vrcb += sizeof(struct bge_rcb); } /* Configure send ring RCB 0 (we use only the first ring) */ vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB; BGE_HOSTADDR(taddr, sc->bge_ldata.bge_tx_ring_paddr); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, taddr.bge_addr_hi); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, taddr.bge_addr_lo); if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) RCB_WRITE_4(sc, vrcb, bge_nicaddr, BGE_SEND_RING_5717); else RCB_WRITE_4(sc, vrcb, bge_nicaddr, BGE_NIC_TXRING_ADDR(0, BGE_TX_RING_CNT)); RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_MAXLEN_FLAGS(BGE_TX_RING_CNT, 0)); /* * Disable all receive return rings by setting the * 'ring diabled' bit in the flags field of all the receive * return ring control blocks, located in NIC memory. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) { /* Should be 17, use 16 until we get an SRAM map. */ limit = 16; } else if (!BGE_IS_5705_PLUS(sc)) limit = BGE_RX_RINGS_MAX; else if (sc->bge_asicrev == BGE_ASICREV_BCM5755 || sc->bge_asicrev == BGE_ASICREV_BCM5762 || BGE_IS_57765_PLUS(sc)) limit = 4; else limit = 1; /* Disable all receive return rings. */ vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB; for (i = 0; i < limit; i++) { RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, 0); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, 0); RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_FLAG_RING_DISABLED); RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0); bge_writembx(sc, BGE_MBX_RX_CONS0_LO + (i * (sizeof(uint64_t))), 0); vrcb += sizeof(struct bge_rcb); } /* * Set up receive return ring 0. Note that the NIC address * for RX return rings is 0x0. The return rings live entirely * within the host, so the nicaddr field in the RCB isn't used. */ vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB; BGE_HOSTADDR(taddr, sc->bge_ldata.bge_rx_return_ring_paddr); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, taddr.bge_addr_hi); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, taddr.bge_addr_lo); RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0); RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_MAXLEN_FLAGS(sc->bge_return_ring_cnt, 0)); /* Set random backoff seed for TX */ CSR_WRITE_4(sc, BGE_TX_RANDOM_BACKOFF, (IF_LLADDR(sc->bge_ifp)[0] + IF_LLADDR(sc->bge_ifp)[1] + IF_LLADDR(sc->bge_ifp)[2] + IF_LLADDR(sc->bge_ifp)[3] + IF_LLADDR(sc->bge_ifp)[4] + IF_LLADDR(sc->bge_ifp)[5]) & BGE_TX_BACKOFF_SEED_MASK); /* Set inter-packet gap */ val = 0x2620; if (sc->bge_asicrev == BGE_ASICREV_BCM5720 || sc->bge_asicrev == BGE_ASICREV_BCM5762) val |= CSR_READ_4(sc, BGE_TX_LENGTHS) & (BGE_TXLEN_JMB_FRM_LEN_MSK | BGE_TXLEN_CNT_DN_VAL_MSK); CSR_WRITE_4(sc, BGE_TX_LENGTHS, val); /* * Specify which ring to use for packets that don't match * any RX rules. */ CSR_WRITE_4(sc, BGE_RX_RULES_CFG, 0x08); /* * Configure number of RX lists. One interrupt distribution * list, sixteen active lists, one bad frames class. */ CSR_WRITE_4(sc, BGE_RXLP_CFG, 0x181); /* Inialize RX list placement stats mask. */ CSR_WRITE_4(sc, BGE_RXLP_STATS_ENABLE_MASK, 0x007FFFFF); CSR_WRITE_4(sc, BGE_RXLP_STATS_CTL, 0x1); /* Disable host coalescing until we get it set up */ CSR_WRITE_4(sc, BGE_HCC_MODE, 0x00000000); /* Poll to make sure it's shut down. */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_4(sc, BGE_HCC_MODE) & BGE_HCCMODE_ENABLE)) break; } if (i == BGE_TIMEOUT) { device_printf(sc->bge_dev, "host coalescing engine failed to idle\n"); return (ENXIO); } /* Set up host coalescing defaults */ CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS, sc->bge_rx_coal_ticks); CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS, sc->bge_tx_coal_ticks); CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS, sc->bge_rx_max_coal_bds); CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS, sc->bge_tx_max_coal_bds); if (!(BGE_IS_5705_PLUS(sc))) { CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS_INT, 0); CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS_INT, 0); } CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT, 1); CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT, 1); /* Set up address of statistics block */ if (!(BGE_IS_5705_PLUS(sc))) { CSR_WRITE_4(sc, BGE_HCC_STATS_ADDR_HI, BGE_ADDR_HI(sc->bge_ldata.bge_stats_paddr)); CSR_WRITE_4(sc, BGE_HCC_STATS_ADDR_LO, BGE_ADDR_LO(sc->bge_ldata.bge_stats_paddr)); CSR_WRITE_4(sc, BGE_HCC_STATS_BASEADDR, BGE_STATS_BLOCK); CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_BASEADDR, BGE_STATUS_BLOCK); CSR_WRITE_4(sc, BGE_HCC_STATS_TICKS, sc->bge_stat_ticks); } /* Set up address of status block */ CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_HI, BGE_ADDR_HI(sc->bge_ldata.bge_status_block_paddr)); CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_LO, BGE_ADDR_LO(sc->bge_ldata.bge_status_block_paddr)); /* Set up status block size. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_C0) { val = BGE_STATBLKSZ_FULL; bzero(sc->bge_ldata.bge_status_block, BGE_STATUS_BLK_SZ); } else { val = BGE_STATBLKSZ_32BYTE; bzero(sc->bge_ldata.bge_status_block, 32); } bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Turn on host coalescing state machine */ CSR_WRITE_4(sc, BGE_HCC_MODE, val | BGE_HCCMODE_ENABLE); /* Turn on RX BD completion state machine and enable attentions */ CSR_WRITE_4(sc, BGE_RBDC_MODE, BGE_RBDCMODE_ENABLE | BGE_RBDCMODE_ATTN); /* Turn on RX list placement state machine */ CSR_WRITE_4(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE); /* Turn on RX list selector state machine. */ if (!(BGE_IS_5705_PLUS(sc))) CSR_WRITE_4(sc, BGE_RXLS_MODE, BGE_RXLSMODE_ENABLE); /* Turn on DMA, clear stats. */ val = BGE_MACMODE_TXDMA_ENB | BGE_MACMODE_RXDMA_ENB | BGE_MACMODE_RX_STATS_CLEAR | BGE_MACMODE_TX_STATS_CLEAR | BGE_MACMODE_RX_STATS_ENB | BGE_MACMODE_TX_STATS_ENB | BGE_MACMODE_FRMHDR_DMA_ENB; if (sc->bge_flags & BGE_FLAG_TBI) val |= BGE_PORTMODE_TBI; else if (sc->bge_flags & BGE_FLAG_MII_SERDES) val |= BGE_PORTMODE_GMII; else val |= BGE_PORTMODE_MII; /* Allow APE to send/receive frames. */ if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) != 0) val |= BGE_MACMODE_APE_RX_EN | BGE_MACMODE_APE_TX_EN; CSR_WRITE_4(sc, BGE_MAC_MODE, val); DELAY(40); /* Set misc. local control, enable interrupts on attentions */ BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_ONATTN); #ifdef notdef /* Assert GPIO pins for PHY reset */ BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUT0 | BGE_MLC_MISCIO_OUT1 | BGE_MLC_MISCIO_OUT2); BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUTEN0 | BGE_MLC_MISCIO_OUTEN1 | BGE_MLC_MISCIO_OUTEN2); #endif /* Turn on DMA completion state machine */ if (!(BGE_IS_5705_PLUS(sc))) CSR_WRITE_4(sc, BGE_DMAC_MODE, BGE_DMACMODE_ENABLE); val = BGE_WDMAMODE_ENABLE | BGE_WDMAMODE_ALL_ATTNS; /* Enable host coalescing bug fix. */ if (BGE_IS_5755_PLUS(sc)) val |= BGE_WDMAMODE_STATUS_TAG_FIX; /* Request larger DMA burst size to get better performance. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5785) val |= BGE_WDMAMODE_BURST_ALL_DATA; /* Turn on write DMA state machine */ CSR_WRITE_4(sc, BGE_WDMA_MODE, val); DELAY(40); /* Turn on read DMA state machine */ val = BGE_RDMAMODE_ENABLE | BGE_RDMAMODE_ALL_ATTNS; if (sc->bge_asicrev == BGE_ASICREV_BCM5717) val |= BGE_RDMAMODE_MULT_DMA_RD_DIS; if (sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5785 || sc->bge_asicrev == BGE_ASICREV_BCM57780) val |= BGE_RDMAMODE_BD_SBD_CRPT_ATTN | BGE_RDMAMODE_MBUF_RBD_CRPT_ATTN | BGE_RDMAMODE_MBUF_SBD_CRPT_ATTN; if (sc->bge_flags & BGE_FLAG_PCIE) val |= BGE_RDMAMODE_FIFO_LONG_BURST; if (sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) { val |= BGE_RDMAMODE_TSO4_ENABLE; if (sc->bge_flags & BGE_FLAG_TSO3 || sc->bge_asicrev == BGE_ASICREV_BCM5785 || sc->bge_asicrev == BGE_ASICREV_BCM57780) val |= BGE_RDMAMODE_TSO6_ENABLE; } if (sc->bge_asicrev == BGE_ASICREV_BCM5720 || sc->bge_asicrev == BGE_ASICREV_BCM5762) { val |= CSR_READ_4(sc, BGE_RDMA_MODE) & BGE_RDMAMODE_H2BNC_VLAN_DET; /* * Allow multiple outstanding read requests from * non-LSO read DMA engine. */ val &= ~BGE_RDMAMODE_MULT_DMA_RD_DIS; } if (sc->bge_asicrev == BGE_ASICREV_BCM5761 || sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5785 || sc->bge_asicrev == BGE_ASICREV_BCM57780 || BGE_IS_5717_PLUS(sc) || BGE_IS_57765_PLUS(sc)) { if (sc->bge_asicrev == BGE_ASICREV_BCM5762) rdmareg = BGE_RDMA_RSRVCTRL_REG2; else rdmareg = BGE_RDMA_RSRVCTRL; dmactl = CSR_READ_4(sc, rdmareg); /* * Adjust tx margin to prevent TX data corruption and * fix internal FIFO overflow. */ if (sc->bge_chipid == BGE_CHIPID_BCM5719_A0 || sc->bge_asicrev == BGE_ASICREV_BCM5762) { dmactl &= ~(BGE_RDMA_RSRVCTRL_FIFO_LWM_MASK | BGE_RDMA_RSRVCTRL_FIFO_HWM_MASK | BGE_RDMA_RSRVCTRL_TXMRGN_MASK); dmactl |= BGE_RDMA_RSRVCTRL_FIFO_LWM_1_5K | BGE_RDMA_RSRVCTRL_FIFO_HWM_1_5K | BGE_RDMA_RSRVCTRL_TXMRGN_320B; } /* * Enable fix for read DMA FIFO overruns. * The fix is to limit the number of RX BDs * the hardware would fetch at a fime. */ CSR_WRITE_4(sc, rdmareg, dmactl | BGE_RDMA_RSRVCTRL_FIFO_OFLW_FIX); } if (sc->bge_asicrev == BGE_ASICREV_BCM5719) { CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL, CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_4K | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K); } else if (sc->bge_asicrev == BGE_ASICREV_BCM5720) { /* * Allow 4KB burst length reads for non-LSO frames. * Enable 512B burst length reads for buffer descriptors. */ CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL, CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_512 | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K); } else if (sc->bge_asicrev == BGE_ASICREV_BCM5762) { CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL_REG2, CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL_REG2) | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_4K | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K); } CSR_WRITE_4(sc, BGE_RDMA_MODE, val); DELAY(40); if (sc->bge_flags & BGE_FLAG_RDMA_BUG) { for (i = 0; i < BGE_NUM_RDMA_CHANNELS / 2; i++) { val = CSR_READ_4(sc, BGE_RDMA_LENGTH + i * 4); if ((val & 0xFFFF) > BGE_FRAMELEN) break; if (((val >> 16) & 0xFFFF) > BGE_FRAMELEN) break; } if (i != BGE_NUM_RDMA_CHANNELS / 2) { val = CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL); if (sc->bge_asicrev == BGE_ASICREV_BCM5719) val |= BGE_RDMA_TX_LENGTH_WA_5719; else val |= BGE_RDMA_TX_LENGTH_WA_5720; CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL, val); } } /* Turn on RX data completion state machine */ CSR_WRITE_4(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE); /* Turn on RX BD initiator state machine */ CSR_WRITE_4(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE); /* Turn on RX data and RX BD initiator state machine */ CSR_WRITE_4(sc, BGE_RDBDI_MODE, BGE_RDBDIMODE_ENABLE); /* Turn on Mbuf cluster free state machine */ if (!(BGE_IS_5705_PLUS(sc))) CSR_WRITE_4(sc, BGE_MBCF_MODE, BGE_MBCFMODE_ENABLE); /* Turn on send BD completion state machine */ CSR_WRITE_4(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE); /* Turn on send data completion state machine */ val = BGE_SDCMODE_ENABLE; if (sc->bge_asicrev == BGE_ASICREV_BCM5761) val |= BGE_SDCMODE_CDELAY; CSR_WRITE_4(sc, BGE_SDC_MODE, val); /* Turn on send data initiator state machine */ if (sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE | BGE_SDIMODE_HW_LSO_PRE_DMA); else CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE); /* Turn on send BD initiator state machine */ CSR_WRITE_4(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE); /* Turn on send BD selector state machine */ CSR_WRITE_4(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE); CSR_WRITE_4(sc, BGE_SDI_STATS_ENABLE_MASK, 0x007FFFFF); CSR_WRITE_4(sc, BGE_SDI_STATS_CTL, BGE_SDISTATSCTL_ENABLE | BGE_SDISTATSCTL_FASTER); /* ack/clear link change events */ CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED | BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE | BGE_MACSTAT_LINK_CHANGED); CSR_WRITE_4(sc, BGE_MI_STS, 0); /* * Enable attention when the link has changed state for * devices that use auto polling. */ if (sc->bge_flags & BGE_FLAG_TBI) { CSR_WRITE_4(sc, BGE_MI_STS, BGE_MISTS_LINK); } else { if (sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode); DELAY(80); } if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) CSR_WRITE_4(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_MI_INTERRUPT); } /* * Clear any pending link state attention. * Otherwise some link state change events may be lost until attention * is cleared by bge_intr() -> bge_link_upd() sequence. * It's not necessary on newer BCM chips - perhaps enabling link * state change attentions implies clearing pending attention. */ CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED | BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE | BGE_MACSTAT_LINK_CHANGED); /* Enable link state change attentions. */ BGE_SETBIT(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_LINK_CHANGED); return (0); } static const struct bge_revision * bge_lookup_rev(uint32_t chipid) { const struct bge_revision *br; for (br = bge_revisions; br->br_name != NULL; br++) { if (br->br_chipid == chipid) return (br); } for (br = bge_majorrevs; br->br_name != NULL; br++) { if (br->br_chipid == BGE_ASICREV(chipid)) return (br); } return (NULL); } static const struct bge_vendor * bge_lookup_vendor(uint16_t vid) { const struct bge_vendor *v; for (v = bge_vendors; v->v_name != NULL; v++) if (v->v_id == vid) return (v); return (NULL); } static uint32_t bge_chipid(device_t dev) { uint32_t id; id = pci_read_config(dev, BGE_PCI_MISC_CTL, 4) >> BGE_PCIMISCCTL_ASICREV_SHIFT; if (BGE_ASICREV(id) == BGE_ASICREV_USE_PRODID_REG) { /* * Find the ASCI revision. Different chips use different * registers. */ switch (pci_get_device(dev)) { case BCOM_DEVICEID_BCM5717C: /* 5717 C0 seems to belong to 5720 line. */ id = BGE_CHIPID_BCM5720_A0; break; case BCOM_DEVICEID_BCM5717: case BCOM_DEVICEID_BCM5718: case BCOM_DEVICEID_BCM5719: case BCOM_DEVICEID_BCM5720: case BCOM_DEVICEID_BCM5725: case BCOM_DEVICEID_BCM5727: case BCOM_DEVICEID_BCM5762: case BCOM_DEVICEID_BCM57764: case BCOM_DEVICEID_BCM57767: case BCOM_DEVICEID_BCM57787: id = pci_read_config(dev, BGE_PCI_GEN2_PRODID_ASICREV, 4); break; case BCOM_DEVICEID_BCM57761: case BCOM_DEVICEID_BCM57762: case BCOM_DEVICEID_BCM57765: case BCOM_DEVICEID_BCM57766: case BCOM_DEVICEID_BCM57781: case BCOM_DEVICEID_BCM57782: case BCOM_DEVICEID_BCM57785: case BCOM_DEVICEID_BCM57786: case BCOM_DEVICEID_BCM57791: case BCOM_DEVICEID_BCM57795: id = pci_read_config(dev, BGE_PCI_GEN15_PRODID_ASICREV, 4); break; default: id = pci_read_config(dev, BGE_PCI_PRODID_ASICREV, 4); } } return (id); } /* * Probe for a Broadcom chip. Check the PCI vendor and device IDs * against our list and return its name if we find a match. * * Note that since the Broadcom controller contains VPD support, we * try to get the device name string from the controller itself instead * of the compiled-in string. It guarantees we'll always announce the * right product name. We fall back to the compiled-in string when * VPD is unavailable or corrupt. */ static int bge_probe(device_t dev) { char buf[96]; char model[64]; const struct bge_revision *br; const char *pname; struct bge_softc *sc; const struct bge_type *t = bge_devs; const struct bge_vendor *v; uint32_t id; uint16_t did, vid; sc = device_get_softc(dev); sc->bge_dev = dev; vid = pci_get_vendor(dev); did = pci_get_device(dev); while(t->bge_vid != 0) { if ((vid == t->bge_vid) && (did == t->bge_did)) { id = bge_chipid(dev); br = bge_lookup_rev(id); if (bge_has_eaddr(sc) && pci_get_vpd_ident(dev, &pname) == 0) snprintf(model, sizeof(model), "%s", pname); else { v = bge_lookup_vendor(vid); snprintf(model, sizeof(model), "%s %s", v != NULL ? v->v_name : "Unknown", br != NULL ? br->br_name : "NetXtreme/NetLink Ethernet Controller"); } snprintf(buf, sizeof(buf), "%s, %sASIC rev. %#08x", model, br != NULL ? "" : "unknown ", id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } t++; } return (ENXIO); } static void bge_dma_free(struct bge_softc *sc) { int i; /* Destroy DMA maps for RX buffers. */ for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { if (sc->bge_cdata.bge_rx_std_dmamap[i]) bus_dmamap_destroy(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i]); } if (sc->bge_cdata.bge_rx_std_sparemap) bus_dmamap_destroy(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_sparemap); /* Destroy DMA maps for jumbo RX buffers. */ for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) { if (sc->bge_cdata.bge_rx_jumbo_dmamap[i]) bus_dmamap_destroy(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i]); } if (sc->bge_cdata.bge_rx_jumbo_sparemap) bus_dmamap_destroy(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_sparemap); /* Destroy DMA maps for TX buffers. */ for (i = 0; i < BGE_TX_RING_CNT; i++) { if (sc->bge_cdata.bge_tx_dmamap[i]) bus_dmamap_destroy(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[i]); } if (sc->bge_cdata.bge_rx_mtag) bus_dma_tag_destroy(sc->bge_cdata.bge_rx_mtag); if (sc->bge_cdata.bge_mtag_jumbo) bus_dma_tag_destroy(sc->bge_cdata.bge_mtag_jumbo); if (sc->bge_cdata.bge_tx_mtag) bus_dma_tag_destroy(sc->bge_cdata.bge_tx_mtag); /* Destroy standard RX ring. */ if (sc->bge_ldata.bge_rx_std_ring_paddr) bus_dmamap_unload(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map); if (sc->bge_ldata.bge_rx_std_ring) bus_dmamem_free(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_ldata.bge_rx_std_ring, sc->bge_cdata.bge_rx_std_ring_map); if (sc->bge_cdata.bge_rx_std_ring_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_rx_std_ring_tag); /* Destroy jumbo RX ring. */ if (sc->bge_ldata.bge_rx_jumbo_ring_paddr) bus_dmamap_unload(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map); if (sc->bge_ldata.bge_rx_jumbo_ring) bus_dmamem_free(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_ldata.bge_rx_jumbo_ring, sc->bge_cdata.bge_rx_jumbo_ring_map); if (sc->bge_cdata.bge_rx_jumbo_ring_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_rx_jumbo_ring_tag); /* Destroy RX return ring. */ if (sc->bge_ldata.bge_rx_return_ring_paddr) bus_dmamap_unload(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_cdata.bge_rx_return_ring_map); if (sc->bge_ldata.bge_rx_return_ring) bus_dmamem_free(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_ldata.bge_rx_return_ring, sc->bge_cdata.bge_rx_return_ring_map); if (sc->bge_cdata.bge_rx_return_ring_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_rx_return_ring_tag); /* Destroy TX ring. */ if (sc->bge_ldata.bge_tx_ring_paddr) bus_dmamap_unload(sc->bge_cdata.bge_tx_ring_tag, sc->bge_cdata.bge_tx_ring_map); if (sc->bge_ldata.bge_tx_ring) bus_dmamem_free(sc->bge_cdata.bge_tx_ring_tag, sc->bge_ldata.bge_tx_ring, sc->bge_cdata.bge_tx_ring_map); if (sc->bge_cdata.bge_tx_ring_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_tx_ring_tag); /* Destroy status block. */ if (sc->bge_ldata.bge_status_block_paddr) bus_dmamap_unload(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map); if (sc->bge_ldata.bge_status_block) bus_dmamem_free(sc->bge_cdata.bge_status_tag, sc->bge_ldata.bge_status_block, sc->bge_cdata.bge_status_map); if (sc->bge_cdata.bge_status_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_status_tag); /* Destroy statistics block. */ if (sc->bge_ldata.bge_stats_paddr) bus_dmamap_unload(sc->bge_cdata.bge_stats_tag, sc->bge_cdata.bge_stats_map); if (sc->bge_ldata.bge_stats) bus_dmamem_free(sc->bge_cdata.bge_stats_tag, sc->bge_ldata.bge_stats, sc->bge_cdata.bge_stats_map); if (sc->bge_cdata.bge_stats_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_stats_tag); if (sc->bge_cdata.bge_buffer_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_buffer_tag); /* Destroy the parent tag. */ if (sc->bge_cdata.bge_parent_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_parent_tag); } static int bge_dma_ring_alloc(struct bge_softc *sc, bus_size_t alignment, bus_size_t maxsize, bus_dma_tag_t *tag, uint8_t **ring, bus_dmamap_t *map, bus_addr_t *paddr, const char *msg) { struct bge_dmamap_arg ctx; int error; error = bus_dma_tag_create(sc->bge_cdata.bge_parent_tag, alignment, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, maxsize, 1, maxsize, 0, NULL, NULL, tag); if (error != 0) { device_printf(sc->bge_dev, "could not create %s dma tag\n", msg); return (ENOMEM); } /* Allocate DMA'able memory for ring. */ error = bus_dmamem_alloc(*tag, (void **)ring, BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, map); if (error != 0) { device_printf(sc->bge_dev, "could not allocate DMA'able memory for %s\n", msg); return (ENOMEM); } /* Load the address of the ring. */ ctx.bge_busaddr = 0; error = bus_dmamap_load(*tag, *map, *ring, maxsize, bge_dma_map_addr, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->bge_dev, "could not load DMA'able memory for %s\n", msg); return (ENOMEM); } *paddr = ctx.bge_busaddr; return (0); } static int bge_dma_alloc(struct bge_softc *sc) { bus_addr_t lowaddr; bus_size_t rxmaxsegsz, sbsz, txsegsz, txmaxsegsz; int i, error; lowaddr = BUS_SPACE_MAXADDR; if ((sc->bge_flags & BGE_FLAG_40BIT_BUG) != 0) lowaddr = BGE_DMA_MAXADDR; /* * Allocate the parent bus DMA tag appropriate for PCI. */ error = bus_dma_tag_create(bus_get_dma_tag(sc->bge_dev), 1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->bge_cdata.bge_parent_tag); if (error != 0) { device_printf(sc->bge_dev, "could not allocate parent dma tag\n"); return (ENOMEM); } /* Create tag for standard RX ring. */ error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_STD_RX_RING_SZ, &sc->bge_cdata.bge_rx_std_ring_tag, (uint8_t **)&sc->bge_ldata.bge_rx_std_ring, &sc->bge_cdata.bge_rx_std_ring_map, &sc->bge_ldata.bge_rx_std_ring_paddr, "RX ring"); if (error) return (error); /* Create tag for RX return ring. */ error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_RX_RTN_RING_SZ(sc), &sc->bge_cdata.bge_rx_return_ring_tag, (uint8_t **)&sc->bge_ldata.bge_rx_return_ring, &sc->bge_cdata.bge_rx_return_ring_map, &sc->bge_ldata.bge_rx_return_ring_paddr, "RX return ring"); if (error) return (error); /* Create tag for TX ring. */ error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_TX_RING_SZ, &sc->bge_cdata.bge_tx_ring_tag, (uint8_t **)&sc->bge_ldata.bge_tx_ring, &sc->bge_cdata.bge_tx_ring_map, &sc->bge_ldata.bge_tx_ring_paddr, "TX ring"); if (error) return (error); /* * Create tag for status block. * Because we only use single Tx/Rx/Rx return ring, use * minimum status block size except BCM5700 AX/BX which * seems to want to see full status block size regardless * of configured number of ring. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_C0) sbsz = BGE_STATUS_BLK_SZ; else sbsz = 32; error = bge_dma_ring_alloc(sc, PAGE_SIZE, sbsz, &sc->bge_cdata.bge_status_tag, (uint8_t **)&sc->bge_ldata.bge_status_block, &sc->bge_cdata.bge_status_map, &sc->bge_ldata.bge_status_block_paddr, "status block"); if (error) return (error); /* Create tag for statistics block. */ error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_STATS_SZ, &sc->bge_cdata.bge_stats_tag, (uint8_t **)&sc->bge_ldata.bge_stats, &sc->bge_cdata.bge_stats_map, &sc->bge_ldata.bge_stats_paddr, "statistics block"); if (error) return (error); /* Create tag for jumbo RX ring. */ if (BGE_IS_JUMBO_CAPABLE(sc)) { error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_JUMBO_RX_RING_SZ, &sc->bge_cdata.bge_rx_jumbo_ring_tag, (uint8_t **)&sc->bge_ldata.bge_rx_jumbo_ring, &sc->bge_cdata.bge_rx_jumbo_ring_map, &sc->bge_ldata.bge_rx_jumbo_ring_paddr, "jumbo RX ring"); if (error) return (error); } /* Create parent tag for buffers. */ if ((sc->bge_flags & BGE_FLAG_4G_BNDRY_BUG) != 0) { /* * XXX * watchdog timeout issue was observed on BCM5704 which * lives behind PCI-X bridge(e.g AMD 8131 PCI-X bridge). * Both limiting DMA address space to 32bits and flushing * mailbox write seem to address the issue. */ if (sc->bge_pcixcap != 0) lowaddr = BUS_SPACE_MAXADDR_32BIT; } error = bus_dma_tag_create(bus_get_dma_tag(sc->bge_dev), 1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->bge_cdata.bge_buffer_tag); if (error != 0) { device_printf(sc->bge_dev, "could not allocate buffer dma tag\n"); return (ENOMEM); } /* Create tag for Tx mbufs. */ if (sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) { txsegsz = BGE_TSOSEG_SZ; txmaxsegsz = 65535 + sizeof(struct ether_vlan_header); } else { txsegsz = MCLBYTES; txmaxsegsz = MCLBYTES * BGE_NSEG_NEW; } error = bus_dma_tag_create(sc->bge_cdata.bge_buffer_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, txmaxsegsz, BGE_NSEG_NEW, txsegsz, 0, NULL, NULL, &sc->bge_cdata.bge_tx_mtag); if (error) { device_printf(sc->bge_dev, "could not allocate TX dma tag\n"); return (ENOMEM); } /* Create tag for Rx mbufs. */ if (sc->bge_flags & BGE_FLAG_JUMBO_STD) rxmaxsegsz = MJUM9BYTES; else rxmaxsegsz = MCLBYTES; error = bus_dma_tag_create(sc->bge_cdata.bge_buffer_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, rxmaxsegsz, 1, rxmaxsegsz, 0, NULL, NULL, &sc->bge_cdata.bge_rx_mtag); if (error) { device_printf(sc->bge_dev, "could not allocate RX dma tag\n"); return (ENOMEM); } /* Create DMA maps for RX buffers. */ error = bus_dmamap_create(sc->bge_cdata.bge_rx_mtag, 0, &sc->bge_cdata.bge_rx_std_sparemap); if (error) { device_printf(sc->bge_dev, "can't create spare DMA map for RX\n"); return (ENOMEM); } for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { error = bus_dmamap_create(sc->bge_cdata.bge_rx_mtag, 0, &sc->bge_cdata.bge_rx_std_dmamap[i]); if (error) { device_printf(sc->bge_dev, "can't create DMA map for RX\n"); return (ENOMEM); } } /* Create DMA maps for TX buffers. */ for (i = 0; i < BGE_TX_RING_CNT; i++) { error = bus_dmamap_create(sc->bge_cdata.bge_tx_mtag, 0, &sc->bge_cdata.bge_tx_dmamap[i]); if (error) { device_printf(sc->bge_dev, "can't create DMA map for TX\n"); return (ENOMEM); } } /* Create tags for jumbo RX buffers. */ if (BGE_IS_JUMBO_CAPABLE(sc)) { error = bus_dma_tag_create(sc->bge_cdata.bge_buffer_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM9BYTES, BGE_NSEG_JUMBO, PAGE_SIZE, 0, NULL, NULL, &sc->bge_cdata.bge_mtag_jumbo); if (error) { device_printf(sc->bge_dev, "could not allocate jumbo dma tag\n"); return (ENOMEM); } /* Create DMA maps for jumbo RX buffers. */ error = bus_dmamap_create(sc->bge_cdata.bge_mtag_jumbo, 0, &sc->bge_cdata.bge_rx_jumbo_sparemap); if (error) { device_printf(sc->bge_dev, "can't create spare DMA map for jumbo RX\n"); return (ENOMEM); } for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) { error = bus_dmamap_create(sc->bge_cdata.bge_mtag_jumbo, 0, &sc->bge_cdata.bge_rx_jumbo_dmamap[i]); if (error) { device_printf(sc->bge_dev, "can't create DMA map for jumbo RX\n"); return (ENOMEM); } } } return (0); } /* * Return true if this device has more than one port. */ static int bge_has_multiple_ports(struct bge_softc *sc) { device_t dev = sc->bge_dev; u_int b, d, f, fscan, s; d = pci_get_domain(dev); b = pci_get_bus(dev); s = pci_get_slot(dev); f = pci_get_function(dev); for (fscan = 0; fscan <= PCI_FUNCMAX; fscan++) if (fscan != f && pci_find_dbsf(d, b, s, fscan) != NULL) return (1); return (0); } /* * Return true if MSI can be used with this device. */ static int bge_can_use_msi(struct bge_softc *sc) { int can_use_msi = 0; if (sc->bge_msi == 0) return (0); /* Disable MSI for polling(4). */ #ifdef DEVICE_POLLING return (0); #endif switch (sc->bge_asicrev) { case BGE_ASICREV_BCM5714_A0: case BGE_ASICREV_BCM5714: /* * Apparently, MSI doesn't work when these chips are * configured in single-port mode. */ if (bge_has_multiple_ports(sc)) can_use_msi = 1; break; case BGE_ASICREV_BCM5750: if (sc->bge_chiprev != BGE_CHIPREV_5750_AX && sc->bge_chiprev != BGE_CHIPREV_5750_BX) can_use_msi = 1; break; default: if (BGE_IS_575X_PLUS(sc)) can_use_msi = 1; } return (can_use_msi); } static int bge_mbox_reorder(struct bge_softc *sc) { /* Lists of PCI bridges that are known to reorder mailbox writes. */ static const struct mbox_reorder { const uint16_t vendor; const uint16_t device; const char *desc; } mbox_reorder_lists[] = { { 0x1022, 0x7450, "AMD-8131 PCI-X Bridge" }, }; devclass_t pci, pcib; device_t bus, dev; int i; pci = devclass_find("pci"); pcib = devclass_find("pcib"); dev = sc->bge_dev; bus = device_get_parent(dev); for (;;) { dev = device_get_parent(bus); bus = device_get_parent(dev); if (device_get_devclass(dev) != pcib) break; for (i = 0; i < nitems(mbox_reorder_lists); i++) { if (pci_get_vendor(dev) == mbox_reorder_lists[i].vendor && pci_get_device(dev) == mbox_reorder_lists[i].device) { device_printf(sc->bge_dev, "enabling MBOX workaround for %s\n", mbox_reorder_lists[i].desc); return (1); } } if (device_get_devclass(bus) != pci) break; } return (0); } static void bge_devinfo(struct bge_softc *sc) { uint32_t cfg, clk; device_printf(sc->bge_dev, "CHIP ID 0x%08x; ASIC REV 0x%02x; CHIP REV 0x%02x; ", sc->bge_chipid, sc->bge_asicrev, sc->bge_chiprev); if (sc->bge_flags & BGE_FLAG_PCIE) printf("PCI-E\n"); else if (sc->bge_flags & BGE_FLAG_PCIX) { printf("PCI-X "); cfg = CSR_READ_4(sc, BGE_MISC_CFG) & BGE_MISCCFG_BOARD_ID_MASK; if (cfg == BGE_MISCCFG_BOARD_ID_5704CIOBE) clk = 133; else { clk = CSR_READ_4(sc, BGE_PCI_CLKCTL) & 0x1F; switch (clk) { case 0: clk = 33; break; case 2: clk = 50; break; case 4: clk = 66; break; case 6: clk = 100; break; case 7: clk = 133; break; } } printf("%u MHz\n", clk); } else { if (sc->bge_pcixcap != 0) printf("PCI on PCI-X "); else printf("PCI "); cfg = pci_read_config(sc->bge_dev, BGE_PCI_PCISTATE, 4); if (cfg & BGE_PCISTATE_PCI_BUSSPEED) clk = 66; else clk = 33; if (cfg & BGE_PCISTATE_32BIT_BUS) printf("%u MHz; 32bit\n", clk); else printf("%u MHz; 64bit\n", clk); } } static int bge_attach(device_t dev) { if_t ifp; struct bge_softc *sc; uint32_t hwcfg = 0, misccfg, pcistate; u_char eaddr[ETHER_ADDR_LEN]; int capmask, error, reg, rid, trys; sc = device_get_softc(dev); sc->bge_dev = dev; BGE_LOCK_INIT(sc, device_get_nameunit(dev)); TASK_INIT(&sc->bge_intr_task, 0, bge_intr_task, sc); callout_init_mtx(&sc->bge_stat_ch, &sc->bge_mtx, 0); pci_enable_busmaster(dev); /* * Allocate control/status registers. */ rid = PCIR_BAR(0); sc->bge_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->bge_res == NULL) { device_printf (sc->bge_dev, "couldn't map BAR0 memory\n"); error = ENXIO; goto fail; } /* Save various chip information. */ sc->bge_func_addr = pci_get_function(dev); sc->bge_chipid = bge_chipid(dev); sc->bge_asicrev = BGE_ASICREV(sc->bge_chipid); sc->bge_chiprev = BGE_CHIPREV(sc->bge_chipid); /* Set default PHY address. */ sc->bge_phy_addr = 1; /* * PHY address mapping for various devices. * * | F0 Cu | F0 Sr | F1 Cu | F1 Sr | * ---------+-------+-------+-------+-------+ * BCM57XX | 1 | X | X | X | * BCM5704 | 1 | X | 1 | X | * BCM5717 | 1 | 8 | 2 | 9 | * BCM5719 | 1 | 8 | 2 | 9 | * BCM5720 | 1 | 8 | 2 | 9 | * * | F2 Cu | F2 Sr | F3 Cu | F3 Sr | * ---------+-------+-------+-------+-------+ * BCM57XX | X | X | X | X | * BCM5704 | X | X | X | X | * BCM5717 | X | X | X | X | * BCM5719 | 3 | 10 | 4 | 11 | * BCM5720 | X | X | X | X | * * Other addresses may respond but they are not * IEEE compliant PHYs and should be ignored. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) { if (sc->bge_chipid != BGE_CHIPID_BCM5717_A0) { if (CSR_READ_4(sc, BGE_SGDIG_STS) & BGE_SGDIGSTS_IS_SERDES) sc->bge_phy_addr = sc->bge_func_addr + 8; else sc->bge_phy_addr = sc->bge_func_addr + 1; } else { if (CSR_READ_4(sc, BGE_CPMU_PHY_STRAP) & BGE_CPMU_PHY_STRAP_IS_SERDES) sc->bge_phy_addr = sc->bge_func_addr + 8; else sc->bge_phy_addr = sc->bge_func_addr + 1; } } if (bge_has_eaddr(sc)) sc->bge_flags |= BGE_FLAG_EADDR; /* Save chipset family. */ switch (sc->bge_asicrev) { case BGE_ASICREV_BCM5762: case BGE_ASICREV_BCM57765: case BGE_ASICREV_BCM57766: sc->bge_flags |= BGE_FLAG_57765_PLUS; /* FALLTHROUGH */ case BGE_ASICREV_BCM5717: case BGE_ASICREV_BCM5719: case BGE_ASICREV_BCM5720: sc->bge_flags |= BGE_FLAG_5717_PLUS | BGE_FLAG_5755_PLUS | BGE_FLAG_575X_PLUS | BGE_FLAG_5705_PLUS | BGE_FLAG_JUMBO | BGE_FLAG_JUMBO_FRAME; if (sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) { /* * Enable work around for DMA engine miscalculation * of TXMBUF available space. */ sc->bge_flags |= BGE_FLAG_RDMA_BUG; if (sc->bge_asicrev == BGE_ASICREV_BCM5719 && sc->bge_chipid == BGE_CHIPID_BCM5719_A0) { /* Jumbo frame on BCM5719 A0 does not work. */ sc->bge_flags &= ~BGE_FLAG_JUMBO; } } break; case BGE_ASICREV_BCM5755: case BGE_ASICREV_BCM5761: case BGE_ASICREV_BCM5784: case BGE_ASICREV_BCM5785: case BGE_ASICREV_BCM5787: case BGE_ASICREV_BCM57780: sc->bge_flags |= BGE_FLAG_5755_PLUS | BGE_FLAG_575X_PLUS | BGE_FLAG_5705_PLUS; break; case BGE_ASICREV_BCM5700: case BGE_ASICREV_BCM5701: case BGE_ASICREV_BCM5703: case BGE_ASICREV_BCM5704: sc->bge_flags |= BGE_FLAG_5700_FAMILY | BGE_FLAG_JUMBO; break; case BGE_ASICREV_BCM5714_A0: case BGE_ASICREV_BCM5780: case BGE_ASICREV_BCM5714: sc->bge_flags |= BGE_FLAG_5714_FAMILY | BGE_FLAG_JUMBO_STD; /* FALLTHROUGH */ case BGE_ASICREV_BCM5750: case BGE_ASICREV_BCM5752: case BGE_ASICREV_BCM5906: sc->bge_flags |= BGE_FLAG_575X_PLUS; /* FALLTHROUGH */ case BGE_ASICREV_BCM5705: sc->bge_flags |= BGE_FLAG_5705_PLUS; break; } /* Identify chips with APE processor. */ switch (sc->bge_asicrev) { case BGE_ASICREV_BCM5717: case BGE_ASICREV_BCM5719: case BGE_ASICREV_BCM5720: case BGE_ASICREV_BCM5761: case BGE_ASICREV_BCM5762: sc->bge_flags |= BGE_FLAG_APE; break; } /* Chips with APE need BAR2 access for APE registers/memory. */ if ((sc->bge_flags & BGE_FLAG_APE) != 0) { rid = PCIR_BAR(2); sc->bge_res2 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->bge_res2 == NULL) { device_printf (sc->bge_dev, "couldn't map BAR2 memory\n"); error = ENXIO; goto fail; } /* Enable APE register/memory access by host driver. */ pcistate = pci_read_config(dev, BGE_PCI_PCISTATE, 4); pcistate |= BGE_PCISTATE_ALLOW_APE_CTLSPC_WR | BGE_PCISTATE_ALLOW_APE_SHMEM_WR | BGE_PCISTATE_ALLOW_APE_PSPACE_WR; pci_write_config(dev, BGE_PCI_PCISTATE, pcistate, 4); bge_ape_lock_init(sc); bge_ape_read_fw_ver(sc); } /* Add SYSCTLs, requires the chipset family to be set. */ bge_add_sysctls(sc); /* Identify the chips that use an CPMU. */ if (BGE_IS_5717_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5761 || sc->bge_asicrev == BGE_ASICREV_BCM5785 || sc->bge_asicrev == BGE_ASICREV_BCM57780) sc->bge_flags |= BGE_FLAG_CPMU_PRESENT; if ((sc->bge_flags & BGE_FLAG_CPMU_PRESENT) != 0) sc->bge_mi_mode = BGE_MIMODE_500KHZ_CONST; else sc->bge_mi_mode = BGE_MIMODE_BASE; /* Enable auto polling for BCM570[0-5]. */ if (BGE_IS_5700_FAMILY(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5705) sc->bge_mi_mode |= BGE_MIMODE_AUTOPOLL; /* * All Broadcom controllers have 4GB boundary DMA bug. * Whenever an address crosses a multiple of the 4GB boundary * (including 4GB, 8Gb, 12Gb, etc.) and makes the transition * from 0xX_FFFF_FFFF to 0x(X+1)_0000_0000 an internal DMA * state machine will lockup and cause the device to hang. */ sc->bge_flags |= BGE_FLAG_4G_BNDRY_BUG; /* BCM5755 or higher and BCM5906 have short DMA bug. */ if (BGE_IS_5755_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5906) sc->bge_flags |= BGE_FLAG_SHORT_DMA_BUG; /* * BCM5719 cannot handle DMA requests for DMA segments that * have larger than 4KB in size. However the maximum DMA * segment size created in DMA tag is 4KB for TSO, so we * wouldn't encounter the issue here. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5719) sc->bge_flags |= BGE_FLAG_4K_RDMA_BUG; misccfg = CSR_READ_4(sc, BGE_MISC_CFG) & BGE_MISCCFG_BOARD_ID_MASK; if (sc->bge_asicrev == BGE_ASICREV_BCM5705) { if (misccfg == BGE_MISCCFG_BOARD_ID_5788 || misccfg == BGE_MISCCFG_BOARD_ID_5788M) sc->bge_flags |= BGE_FLAG_5788; } capmask = BMSR_DEFCAPMASK; if ((sc->bge_asicrev == BGE_ASICREV_BCM5703 && (misccfg == 0x4000 || misccfg == 0x8000)) || (sc->bge_asicrev == BGE_ASICREV_BCM5705 && pci_get_vendor(dev) == BCOM_VENDORID && (pci_get_device(dev) == BCOM_DEVICEID_BCM5901 || pci_get_device(dev) == BCOM_DEVICEID_BCM5901A2 || pci_get_device(dev) == BCOM_DEVICEID_BCM5705F)) || (pci_get_vendor(dev) == BCOM_VENDORID && (pci_get_device(dev) == BCOM_DEVICEID_BCM5751F || pci_get_device(dev) == BCOM_DEVICEID_BCM5753F || pci_get_device(dev) == BCOM_DEVICEID_BCM5787F)) || pci_get_device(dev) == BCOM_DEVICEID_BCM57790 || pci_get_device(dev) == BCOM_DEVICEID_BCM57791 || pci_get_device(dev) == BCOM_DEVICEID_BCM57795 || sc->bge_asicrev == BGE_ASICREV_BCM5906) { /* These chips are 10/100 only. */ capmask &= ~BMSR_EXTSTAT; sc->bge_phy_flags |= BGE_PHY_NO_WIRESPEED; } /* * Some controllers seem to require a special firmware to use * TSO. But the firmware is not available to FreeBSD and Linux * claims that the TSO performed by the firmware is slower than * hardware based TSO. Moreover the firmware based TSO has one * known bug which can't handle TSO if Ethernet header + IP/TCP * header is greater than 80 bytes. A workaround for the TSO * bug exist but it seems it's too expensive than not using * TSO at all. Some hardwares also have the TSO bug so limit * the TSO to the controllers that are not affected TSO issues * (e.g. 5755 or higher). */ if (BGE_IS_5717_PLUS(sc)) { /* BCM5717 requires different TSO configuration. */ sc->bge_flags |= BGE_FLAG_TSO3; if (sc->bge_asicrev == BGE_ASICREV_BCM5719 && sc->bge_chipid == BGE_CHIPID_BCM5719_A0) { /* TSO on BCM5719 A0 does not work. */ sc->bge_flags &= ~BGE_FLAG_TSO3; } } else if (BGE_IS_5755_PLUS(sc)) { /* * BCM5754 and BCM5787 shares the same ASIC id so * explicit device id check is required. * Due to unknown reason TSO does not work on BCM5755M. */ if (pci_get_device(dev) != BCOM_DEVICEID_BCM5754 && pci_get_device(dev) != BCOM_DEVICEID_BCM5754M && pci_get_device(dev) != BCOM_DEVICEID_BCM5755M) sc->bge_flags |= BGE_FLAG_TSO; } /* * Check if this is a PCI-X or PCI Express device. */ if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { /* * Found a PCI Express capabilities register, this * must be a PCI Express device. */ sc->bge_flags |= BGE_FLAG_PCIE; sc->bge_expcap = reg; /* Extract supported maximum payload size. */ sc->bge_mps = pci_read_config(dev, sc->bge_expcap + PCIER_DEVICE_CAP, 2); sc->bge_mps = 128 << (sc->bge_mps & PCIEM_CAP_MAX_PAYLOAD); if (sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) sc->bge_expmrq = 2048; else sc->bge_expmrq = 4096; pci_set_max_read_req(dev, sc->bge_expmrq); } else { /* * Check if the device is in PCI-X Mode. * (This bit is not valid on PCI Express controllers.) */ if (pci_find_cap(dev, PCIY_PCIX, ®) == 0) sc->bge_pcixcap = reg; if ((pci_read_config(dev, BGE_PCI_PCISTATE, 4) & BGE_PCISTATE_PCI_BUSMODE) == 0) sc->bge_flags |= BGE_FLAG_PCIX; } /* * The 40bit DMA bug applies to the 5714/5715 controllers and is * not actually a MAC controller bug but an issue with the embedded * PCIe to PCI-X bridge in the device. Use 40bit DMA workaround. */ if (BGE_IS_5714_FAMILY(sc) && (sc->bge_flags & BGE_FLAG_PCIX)) sc->bge_flags |= BGE_FLAG_40BIT_BUG; /* * Some PCI-X bridges are known to trigger write reordering to * the mailbox registers. Typical phenomena is watchdog timeouts * caused by out-of-order TX completions. Enable workaround for * PCI-X devices that live behind these bridges. * Note, PCI-X controllers can run in PCI mode so we can't use * BGE_FLAG_PCIX flag to detect PCI-X controllers. */ if (sc->bge_pcixcap != 0 && bge_mbox_reorder(sc) != 0) sc->bge_flags |= BGE_FLAG_MBOX_REORDER; /* * Allocate the interrupt, using MSI if possible. These devices * support 8 MSI messages, but only the first one is used in * normal operation. */ rid = 0; if (pci_find_cap(sc->bge_dev, PCIY_MSI, ®) == 0) { sc->bge_msicap = reg; reg = 1; if (bge_can_use_msi(sc) && pci_alloc_msi(dev, ®) == 0) { rid = 1; sc->bge_flags |= BGE_FLAG_MSI; } } /* * All controllers except BCM5700 supports tagged status but * we use tagged status only for MSI case on BCM5717. Otherwise * MSI on BCM5717 does not work. */ #ifndef DEVICE_POLLING if (sc->bge_flags & BGE_FLAG_MSI && BGE_IS_5717_PLUS(sc)) sc->bge_flags |= BGE_FLAG_TAGGED_STATUS; #endif sc->bge_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | (rid != 0 ? 0 : RF_SHAREABLE)); if (sc->bge_irq == NULL) { device_printf(sc->bge_dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } bge_devinfo(sc); sc->bge_asf_mode = 0; /* No ASF if APE present. */ if ((sc->bge_flags & BGE_FLAG_APE) == 0) { if (bge_allow_asf && (bge_readmem_ind(sc, BGE_SRAM_DATA_SIG) == BGE_SRAM_DATA_SIG_MAGIC)) { if (bge_readmem_ind(sc, BGE_SRAM_DATA_CFG) & BGE_HWCFG_ASF) { sc->bge_asf_mode |= ASF_ENABLE; sc->bge_asf_mode |= ASF_STACKUP; if (BGE_IS_575X_PLUS(sc)) sc->bge_asf_mode |= ASF_NEW_HANDSHAKE; } } } bge_stop_fw(sc); bge_sig_pre_reset(sc, BGE_RESET_SHUTDOWN); if (bge_reset(sc)) { device_printf(sc->bge_dev, "chip reset failed\n"); error = ENXIO; goto fail; } bge_sig_legacy(sc, BGE_RESET_SHUTDOWN); bge_sig_post_reset(sc, BGE_RESET_SHUTDOWN); if (bge_chipinit(sc)) { device_printf(sc->bge_dev, "chip initialization failed\n"); error = ENXIO; goto fail; } error = bge_get_eaddr(sc, eaddr); if (error) { device_printf(sc->bge_dev, "failed to read station address\n"); error = ENXIO; goto fail; } /* 5705 limits RX return ring to 512 entries. */ if (BGE_IS_5717_PLUS(sc)) sc->bge_return_ring_cnt = BGE_RETURN_RING_CNT; else if (BGE_IS_5705_PLUS(sc)) sc->bge_return_ring_cnt = BGE_RETURN_RING_CNT_5705; else sc->bge_return_ring_cnt = BGE_RETURN_RING_CNT; if (bge_dma_alloc(sc)) { device_printf(sc->bge_dev, "failed to allocate DMA resources\n"); error = ENXIO; goto fail; } /* Set default tuneable values. */ sc->bge_stat_ticks = BGE_TICKS_PER_SEC; sc->bge_rx_coal_ticks = 150; sc->bge_tx_coal_ticks = 150; sc->bge_rx_max_coal_bds = 10; sc->bge_tx_max_coal_bds = 10; /* Initialize checksum features to use. */ sc->bge_csum_features = BGE_CSUM_FEATURES; if (sc->bge_forced_udpcsum != 0) sc->bge_csum_features |= CSUM_UDP; /* Set up ifnet structure */ ifp = sc->bge_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(sc->bge_dev, "failed to if_alloc()\n"); error = ENXIO; goto fail; } if_setsoftc(ifp, sc); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); if_setioctlfn(ifp, bge_ioctl); if_setstartfn(ifp, bge_start); if_setinitfn(ifp, bge_init); if_setgetcounterfn(ifp, bge_get_counter); if_setsendqlen(ifp, BGE_TX_RING_CNT - 1); if_setsendqready(ifp); if_sethwassist(ifp, sc->bge_csum_features); if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU); if ((sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) != 0) { if_sethwassistbits(ifp, CSUM_TSO, 0); if_setcapabilitiesbit(ifp, IFCAP_TSO4 | IFCAP_VLAN_HWTSO, 0); } #ifdef IFCAP_VLAN_HWCSUM if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWCSUM, 0); #endif if_setcapenable(ifp, if_getcapabilities(ifp)); #ifdef DEVICE_POLLING if_setcapabilitiesbit(ifp, IFCAP_POLLING, 0); #endif /* * 5700 B0 chips do not support checksumming correctly due * to hardware bugs. */ if (sc->bge_chipid == BGE_CHIPID_BCM5700_B0) { if_setcapabilitiesbit(ifp, 0, IFCAP_HWCSUM); if_setcapenablebit(ifp, 0, IFCAP_HWCSUM); if_sethwassist(ifp, 0); } /* * Figure out what sort of media we have by checking the * hardware config word in the first 32k of NIC internal memory, * or fall back to examining the EEPROM if necessary. * Note: on some BCM5700 cards, this value appears to be unset. * If that's the case, we have to rely on identifying the NIC * by its PCI subsystem ID, as we do below for the SysKonnect * SK-9D41. */ if (bge_readmem_ind(sc, BGE_SRAM_DATA_SIG) == BGE_SRAM_DATA_SIG_MAGIC) hwcfg = bge_readmem_ind(sc, BGE_SRAM_DATA_CFG); else if ((sc->bge_flags & BGE_FLAG_EADDR) && (sc->bge_asicrev != BGE_ASICREV_BCM5906)) { if (bge_read_eeprom(sc, (caddr_t)&hwcfg, BGE_EE_HWCFG_OFFSET, sizeof(hwcfg))) { device_printf(sc->bge_dev, "failed to read EEPROM\n"); error = ENXIO; goto fail; } hwcfg = ntohl(hwcfg); } /* The SysKonnect SK-9D41 is a 1000baseSX card. */ if ((pci_read_config(dev, BGE_PCI_SUBSYS, 4) >> 16) == SK_SUBSYSID_9D41 || (hwcfg & BGE_HWCFG_MEDIA) == BGE_MEDIA_FIBER) { if (BGE_IS_5705_PLUS(sc)) { sc->bge_flags |= BGE_FLAG_MII_SERDES; sc->bge_phy_flags |= BGE_PHY_NO_WIRESPEED; } else sc->bge_flags |= BGE_FLAG_TBI; } /* Set various PHY bug flags. */ if (sc->bge_chipid == BGE_CHIPID_BCM5701_A0 || sc->bge_chipid == BGE_CHIPID_BCM5701_B0) sc->bge_phy_flags |= BGE_PHY_CRC_BUG; if (sc->bge_chiprev == BGE_CHIPREV_5703_AX || sc->bge_chiprev == BGE_CHIPREV_5704_AX) sc->bge_phy_flags |= BGE_PHY_ADC_BUG; if (sc->bge_chipid == BGE_CHIPID_BCM5704_A0) sc->bge_phy_flags |= BGE_PHY_5704_A0_BUG; if (pci_get_subvendor(dev) == DELL_VENDORID) sc->bge_phy_flags |= BGE_PHY_NO_3LED; if ((BGE_IS_5705_PLUS(sc)) && sc->bge_asicrev != BGE_ASICREV_BCM5906 && sc->bge_asicrev != BGE_ASICREV_BCM5785 && sc->bge_asicrev != BGE_ASICREV_BCM57780 && !BGE_IS_5717_PLUS(sc)) { if (sc->bge_asicrev == BGE_ASICREV_BCM5755 || sc->bge_asicrev == BGE_ASICREV_BCM5761 || sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5787) { if (pci_get_device(dev) != BCOM_DEVICEID_BCM5722 && pci_get_device(dev) != BCOM_DEVICEID_BCM5756) sc->bge_phy_flags |= BGE_PHY_JITTER_BUG; if (pci_get_device(dev) == BCOM_DEVICEID_BCM5755M) sc->bge_phy_flags |= BGE_PHY_ADJUST_TRIM; } else sc->bge_phy_flags |= BGE_PHY_BER_BUG; } /* * Don't enable Ethernet@WireSpeed for the 5700 or the * 5705 A0 and A1 chips. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 || (sc->bge_asicrev == BGE_ASICREV_BCM5705 && (sc->bge_chipid != BGE_CHIPID_BCM5705_A0 && sc->bge_chipid != BGE_CHIPID_BCM5705_A1))) sc->bge_phy_flags |= BGE_PHY_NO_WIRESPEED; if (sc->bge_flags & BGE_FLAG_TBI) { ifmedia_init(&sc->bge_ifmedia, IFM_IMASK, bge_ifmedia_upd, bge_ifmedia_sts); ifmedia_add(&sc->bge_ifmedia, IFM_ETHER | IFM_1000_SX, 0, NULL); ifmedia_add(&sc->bge_ifmedia, IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL); ifmedia_add(&sc->bge_ifmedia, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->bge_ifmedia, IFM_ETHER | IFM_AUTO); sc->bge_ifmedia.ifm_media = sc->bge_ifmedia.ifm_cur->ifm_media; } else { /* * Do transceiver setup and tell the firmware the * driver is down so we can try to get access the * probe if ASF is running. Retry a couple of times * if we get a conflict with the ASF firmware accessing * the PHY. */ trys = 0; BGE_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); again: bge_asf_driver_up(sc); error = mii_attach(dev, &sc->bge_miibus, ifp, (ifm_change_cb_t)bge_ifmedia_upd, (ifm_stat_cb_t)bge_ifmedia_sts, capmask, sc->bge_phy_addr, MII_OFFSET_ANY, MIIF_DOPAUSE); if (error != 0) { if (trys++ < 4) { device_printf(sc->bge_dev, "Try again\n"); bge_miibus_writereg(sc->bge_dev, sc->bge_phy_addr, MII_BMCR, BMCR_RESET); goto again; } device_printf(sc->bge_dev, "attaching PHYs failed\n"); goto fail; } /* * Now tell the firmware we are going up after probing the PHY */ if (sc->bge_asf_mode & ASF_STACKUP) BGE_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); } /* * When using the BCM5701 in PCI-X mode, data corruption has * been observed in the first few bytes of some received packets. * Aligning the packet buffer in memory eliminates the corruption. * Unfortunately, this misaligns the packet payloads. On platforms * which do not support unaligned accesses, we will realign the * payloads by copying the received packets. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5701 && sc->bge_flags & BGE_FLAG_PCIX) sc->bge_flags |= BGE_FLAG_RX_ALIGNBUG; /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); /* Tell upper layer we support long frames. */ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); /* * Hookup IRQ last. */ if (BGE_IS_5755_PLUS(sc) && sc->bge_flags & BGE_FLAG_MSI) { /* Take advantage of single-shot MSI. */ CSR_WRITE_4(sc, BGE_MSI_MODE, CSR_READ_4(sc, BGE_MSI_MODE) & ~BGE_MSIMODE_ONE_SHOT_DISABLE); sc->bge_tq = taskqueue_create_fast("bge_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->bge_tq); if (sc->bge_tq == NULL) { device_printf(dev, "could not create taskqueue.\n"); ether_ifdetach(ifp); error = ENOMEM; goto fail; } error = taskqueue_start_threads(&sc->bge_tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->bge_dev)); if (error != 0) { device_printf(dev, "could not start threads.\n"); ether_ifdetach(ifp); goto fail; } error = bus_setup_intr(dev, sc->bge_irq, INTR_TYPE_NET | INTR_MPSAFE, bge_msi_intr, NULL, sc, &sc->bge_intrhand); } else error = bus_setup_intr(dev, sc->bge_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, bge_intr, sc, &sc->bge_intrhand); if (error) { ether_ifdetach(ifp); device_printf(sc->bge_dev, "couldn't set up irq\n"); goto fail; } /* Attach driver netdump methods. */ NETDUMP_SET(ifp, bge); fail: if (error) bge_detach(dev); return (error); } static int bge_detach(device_t dev) { struct bge_softc *sc; if_t ifp; sc = device_get_softc(dev); ifp = sc->bge_ifp; #ifdef DEVICE_POLLING if (if_getcapenable(ifp) & IFCAP_POLLING) ether_poll_deregister(ifp); #endif if (device_is_attached(dev)) { ether_ifdetach(ifp); BGE_LOCK(sc); bge_stop(sc); BGE_UNLOCK(sc); callout_drain(&sc->bge_stat_ch); } if (sc->bge_tq) taskqueue_drain(sc->bge_tq, &sc->bge_intr_task); if (sc->bge_flags & BGE_FLAG_TBI) ifmedia_removeall(&sc->bge_ifmedia); else if (sc->bge_miibus != NULL) { bus_generic_detach(dev); device_delete_child(dev, sc->bge_miibus); } bge_release_resources(sc); return (0); } static void bge_release_resources(struct bge_softc *sc) { device_t dev; dev = sc->bge_dev; if (sc->bge_tq != NULL) taskqueue_free(sc->bge_tq); if (sc->bge_intrhand != NULL) bus_teardown_intr(dev, sc->bge_irq, sc->bge_intrhand); if (sc->bge_irq != NULL) { bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->bge_irq), sc->bge_irq); pci_release_msi(dev); } if (sc->bge_res != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->bge_res), sc->bge_res); if (sc->bge_res2 != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->bge_res2), sc->bge_res2); if (sc->bge_ifp != NULL) if_free(sc->bge_ifp); bge_dma_free(sc); if (mtx_initialized(&sc->bge_mtx)) /* XXX */ BGE_LOCK_DESTROY(sc); } static int bge_reset(struct bge_softc *sc) { device_t dev; uint32_t cachesize, command, mac_mode, mac_mode_mask, reset, val; void (*write_op)(struct bge_softc *, int, int); uint16_t devctl; int i; dev = sc->bge_dev; mac_mode_mask = BGE_MACMODE_HALF_DUPLEX | BGE_MACMODE_PORTMODE; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) != 0) mac_mode_mask |= BGE_MACMODE_APE_RX_EN | BGE_MACMODE_APE_TX_EN; mac_mode = CSR_READ_4(sc, BGE_MAC_MODE) & mac_mode_mask; if (BGE_IS_575X_PLUS(sc) && !BGE_IS_5714_FAMILY(sc) && (sc->bge_asicrev != BGE_ASICREV_BCM5906)) { if (sc->bge_flags & BGE_FLAG_PCIE) write_op = bge_writemem_direct; else write_op = bge_writemem_ind; } else write_op = bge_writereg_ind; if (sc->bge_asicrev != BGE_ASICREV_BCM5700 && sc->bge_asicrev != BGE_ASICREV_BCM5701) { CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_SET1); for (i = 0; i < 8000; i++) { if (CSR_READ_4(sc, BGE_NVRAM_SWARB) & BGE_NVRAMSWARB_GNT1) break; DELAY(20); } if (i == 8000) { if (bootverbose) device_printf(dev, "NVRAM lock timedout!\n"); } } /* Take APE lock when performing reset. */ bge_ape_lock(sc, BGE_APE_LOCK_GRC); /* Save some important PCI state. */ cachesize = pci_read_config(dev, BGE_PCI_CACHESZ, 4); command = pci_read_config(dev, BGE_PCI_CMD, 4); pci_write_config(dev, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_INDIRECT_ACCESS | BGE_PCIMISCCTL_MASK_PCI_INTR | BGE_HIF_SWAP_OPTIONS | BGE_PCIMISCCTL_PCISTATE_RW, 4); /* Disable fastboot on controllers that support it. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5752 || BGE_IS_5755_PLUS(sc)) { if (bootverbose) device_printf(dev, "Disabling fastboot\n"); CSR_WRITE_4(sc, BGE_FASTBOOT_PC, 0x0); } /* * Write the magic number to SRAM at offset 0xB50. * When firmware finishes its initialization it will * write ~BGE_SRAM_FW_MB_MAGIC to the same location. */ bge_writemem_ind(sc, BGE_SRAM_FW_MB, BGE_SRAM_FW_MB_MAGIC); reset = BGE_MISCCFG_RESET_CORE_CLOCKS | BGE_32BITTIME_66MHZ; /* XXX: Broadcom Linux driver. */ if (sc->bge_flags & BGE_FLAG_PCIE) { if (sc->bge_asicrev != BGE_ASICREV_BCM5785 && (sc->bge_flags & BGE_FLAG_5717_PLUS) == 0) { if (CSR_READ_4(sc, 0x7E2C) == 0x60) /* PCIE 1.0 */ CSR_WRITE_4(sc, 0x7E2C, 0x20); } if (sc->bge_chipid != BGE_CHIPID_BCM5750_A0) { /* Prevent PCIE link training during global reset */ CSR_WRITE_4(sc, BGE_MISC_CFG, 1 << 29); reset |= 1 << 29; } } if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { val = CSR_READ_4(sc, BGE_VCPU_STATUS); CSR_WRITE_4(sc, BGE_VCPU_STATUS, val | BGE_VCPU_STATUS_DRV_RESET); val = CSR_READ_4(sc, BGE_VCPU_EXT_CTRL); CSR_WRITE_4(sc, BGE_VCPU_EXT_CTRL, val & ~BGE_VCPU_EXT_CTRL_HALT_CPU); } /* * Set GPHY Power Down Override to leave GPHY * powered up in D0 uninitialized. */ if (BGE_IS_5705_PLUS(sc) && (sc->bge_flags & BGE_FLAG_CPMU_PRESENT) == 0) reset |= BGE_MISCCFG_GPHY_PD_OVERRIDE; /* Issue global reset */ write_op(sc, BGE_MISC_CFG, reset); if (sc->bge_flags & BGE_FLAG_PCIE) DELAY(100 * 1000); else DELAY(1000); /* XXX: Broadcom Linux driver. */ if (sc->bge_flags & BGE_FLAG_PCIE) { if (sc->bge_chipid == BGE_CHIPID_BCM5750_A0) { DELAY(500000); /* wait for link training to complete */ val = pci_read_config(dev, 0xC4, 4); pci_write_config(dev, 0xC4, val | (1 << 15), 4); } devctl = pci_read_config(dev, sc->bge_expcap + PCIER_DEVICE_CTL, 2); /* Clear enable no snoop and disable relaxed ordering. */ devctl &= ~(PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE); pci_write_config(dev, sc->bge_expcap + PCIER_DEVICE_CTL, devctl, 2); pci_set_max_read_req(dev, sc->bge_expmrq); /* Clear error status. */ pci_write_config(dev, sc->bge_expcap + PCIER_DEVICE_STA, PCIEM_STA_CORRECTABLE_ERROR | PCIEM_STA_NON_FATAL_ERROR | PCIEM_STA_FATAL_ERROR | PCIEM_STA_UNSUPPORTED_REQ, 2); } /* Reset some of the PCI state that got zapped by reset. */ pci_write_config(dev, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_INDIRECT_ACCESS | BGE_PCIMISCCTL_MASK_PCI_INTR | BGE_HIF_SWAP_OPTIONS | BGE_PCIMISCCTL_PCISTATE_RW, 4); val = BGE_PCISTATE_ROM_ENABLE | BGE_PCISTATE_ROM_RETRY_ENABLE; if (sc->bge_chipid == BGE_CHIPID_BCM5704_A0 && (sc->bge_flags & BGE_FLAG_PCIX) != 0) val |= BGE_PCISTATE_RETRY_SAME_DMA; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) != 0) val |= BGE_PCISTATE_ALLOW_APE_CTLSPC_WR | BGE_PCISTATE_ALLOW_APE_SHMEM_WR | BGE_PCISTATE_ALLOW_APE_PSPACE_WR; pci_write_config(dev, BGE_PCI_PCISTATE, val, 4); pci_write_config(dev, BGE_PCI_CACHESZ, cachesize, 4); pci_write_config(dev, BGE_PCI_CMD, command, 4); /* * Disable PCI-X relaxed ordering to ensure status block update * comes first then packet buffer DMA. Otherwise driver may * read stale status block. */ if (sc->bge_flags & BGE_FLAG_PCIX) { devctl = pci_read_config(dev, sc->bge_pcixcap + PCIXR_COMMAND, 2); devctl &= ~PCIXM_COMMAND_ERO; if (sc->bge_asicrev == BGE_ASICREV_BCM5703) { devctl &= ~PCIXM_COMMAND_MAX_READ; devctl |= PCIXM_COMMAND_MAX_READ_2048; } else if (sc->bge_asicrev == BGE_ASICREV_BCM5704) { devctl &= ~(PCIXM_COMMAND_MAX_SPLITS | PCIXM_COMMAND_MAX_READ); devctl |= PCIXM_COMMAND_MAX_READ_2048; } pci_write_config(dev, sc->bge_pcixcap + PCIXR_COMMAND, devctl, 2); } /* Re-enable MSI, if necessary, and enable the memory arbiter. */ if (BGE_IS_5714_FAMILY(sc)) { /* This chip disables MSI on reset. */ if (sc->bge_flags & BGE_FLAG_MSI) { val = pci_read_config(dev, sc->bge_msicap + PCIR_MSI_CTRL, 2); pci_write_config(dev, sc->bge_msicap + PCIR_MSI_CTRL, val | PCIM_MSICTRL_MSI_ENABLE, 2); val = CSR_READ_4(sc, BGE_MSI_MODE); CSR_WRITE_4(sc, BGE_MSI_MODE, val | BGE_MSIMODE_ENABLE); } val = CSR_READ_4(sc, BGE_MARB_MODE); CSR_WRITE_4(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE | val); } else CSR_WRITE_4(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE); /* Fix up byte swapping. */ CSR_WRITE_4(sc, BGE_MODE_CTL, bge_dma_swap_options(sc)); val = CSR_READ_4(sc, BGE_MAC_MODE); val = (val & ~mac_mode_mask) | mac_mode; CSR_WRITE_4(sc, BGE_MAC_MODE, val); DELAY(40); bge_ape_unlock(sc, BGE_APE_LOCK_GRC); if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { for (i = 0; i < BGE_TIMEOUT; i++) { val = CSR_READ_4(sc, BGE_VCPU_STATUS); if (val & BGE_VCPU_STATUS_INIT_DONE) break; DELAY(100); } if (i == BGE_TIMEOUT) { device_printf(dev, "reset timed out\n"); return (1); } } else { /* * Poll until we see the 1's complement of the magic number. * This indicates that the firmware initialization is complete. * We expect this to fail if no chip containing the Ethernet * address is fitted though. */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); val = bge_readmem_ind(sc, BGE_SRAM_FW_MB); if (val == ~BGE_SRAM_FW_MB_MAGIC) break; } if ((sc->bge_flags & BGE_FLAG_EADDR) && i == BGE_TIMEOUT) device_printf(dev, "firmware handshake timed out, found 0x%08x\n", val); /* BCM57765 A0 needs additional time before accessing. */ if (sc->bge_chipid == BGE_CHIPID_BCM57765_A0) DELAY(10 * 1000); /* XXX */ } /* * The 5704 in TBI mode apparently needs some special * adjustment to insure the SERDES drive level is set * to 1.2V. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5704 && sc->bge_flags & BGE_FLAG_TBI) { val = CSR_READ_4(sc, BGE_SERDES_CFG); val = (val & ~0xFFF) | 0x880; CSR_WRITE_4(sc, BGE_SERDES_CFG, val); } /* XXX: Broadcom Linux driver. */ if (sc->bge_flags & BGE_FLAG_PCIE && !BGE_IS_5717_PLUS(sc) && sc->bge_chipid != BGE_CHIPID_BCM5750_A0 && sc->bge_asicrev != BGE_ASICREV_BCM5785) { /* Enable Data FIFO protection. */ val = CSR_READ_4(sc, 0x7C00); CSR_WRITE_4(sc, 0x7C00, val | (1 << 25)); } if (sc->bge_asicrev == BGE_ASICREV_BCM5720) BGE_CLRBIT(sc, BGE_CPMU_CLCK_ORIDE, CPMU_CLCK_ORIDE_MAC_ORIDE_EN); return (0); } static __inline void bge_rxreuse_std(struct bge_softc *sc, int i) { struct bge_rx_bd *r; r = &sc->bge_ldata.bge_rx_std_ring[sc->bge_std]; r->bge_flags = BGE_RXBDFLAG_END; r->bge_len = sc->bge_cdata.bge_rx_std_seglen[i]; r->bge_idx = i; BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT); } static __inline void bge_rxreuse_jumbo(struct bge_softc *sc, int i) { struct bge_extrx_bd *r; r = &sc->bge_ldata.bge_rx_jumbo_ring[sc->bge_jumbo]; r->bge_flags = BGE_RXBDFLAG_JUMBO_RING | BGE_RXBDFLAG_END; r->bge_len0 = sc->bge_cdata.bge_rx_jumbo_seglen[i][0]; r->bge_len1 = sc->bge_cdata.bge_rx_jumbo_seglen[i][1]; r->bge_len2 = sc->bge_cdata.bge_rx_jumbo_seglen[i][2]; r->bge_len3 = sc->bge_cdata.bge_rx_jumbo_seglen[i][3]; r->bge_idx = i; BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); } /* * Frame reception handling. This is called if there's a frame * on the receive return list. * * Note: we have to be able to handle two possibilities here: * 1) the frame is from the jumbo receive ring * 2) the frame is from the standard receive ring */ static int bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck) { if_t ifp; int rx_npkts = 0, stdcnt = 0, jumbocnt = 0; uint16_t rx_cons; rx_cons = sc->bge_rx_saved_considx; /* Nothing to do. */ if (rx_cons == rx_prod) return (rx_npkts); ifp = sc->bge_ifp; bus_dmamap_sync(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_cdata.bge_rx_return_ring_map, BUS_DMASYNC_POSTREAD); bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_POSTWRITE); if (BGE_IS_JUMBO_CAPABLE(sc) && if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN)) bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_POSTWRITE); while (rx_cons != rx_prod) { struct bge_rx_bd *cur_rx; uint32_t rxidx; struct mbuf *m = NULL; uint16_t vlan_tag = 0; int have_tag = 0; #ifdef DEVICE_POLLING if (if_getcapenable(ifp) & IFCAP_POLLING) { if (sc->rxcycles <= 0) break; sc->rxcycles--; } #endif cur_rx = &sc->bge_ldata.bge_rx_return_ring[rx_cons]; rxidx = cur_rx->bge_idx; BGE_INC(rx_cons, sc->bge_return_ring_cnt); if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING && cur_rx->bge_flags & BGE_RXBDFLAG_VLAN_TAG) { have_tag = 1; vlan_tag = cur_rx->bge_vlan_tag; } if (cur_rx->bge_flags & BGE_RXBDFLAG_JUMBO_RING) { jumbocnt++; m = sc->bge_cdata.bge_rx_jumbo_chain[rxidx]; if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) { bge_rxreuse_jumbo(sc, rxidx); continue; } if (bge_newbuf_jumbo(sc, rxidx) != 0) { bge_rxreuse_jumbo(sc, rxidx); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); continue; } BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); } else { stdcnt++; m = sc->bge_cdata.bge_rx_std_chain[rxidx]; if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) { bge_rxreuse_std(sc, rxidx); continue; } if (bge_newbuf_std(sc, rxidx) != 0) { bge_rxreuse_std(sc, rxidx); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); continue; } BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT); } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); #ifndef __NO_STRICT_ALIGNMENT /* * For architectures with strict alignment we must make sure * the payload is aligned. */ if (sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) { bcopy(m->m_data, m->m_data + ETHER_ALIGN, cur_rx->bge_len); m->m_data += ETHER_ALIGN; } #endif m->m_pkthdr.len = m->m_len = cur_rx->bge_len - ETHER_CRC_LEN; m->m_pkthdr.rcvif = ifp; if (if_getcapenable(ifp) & IFCAP_RXCSUM) bge_rxcsum(sc, cur_rx, m); /* * If we received a packet with a vlan tag, * attach that information to the packet. */ if (have_tag) { m->m_pkthdr.ether_vtag = vlan_tag; m->m_flags |= M_VLANTAG; } if (holdlck != 0) { BGE_UNLOCK(sc); if_input(ifp, m); BGE_LOCK(sc); } else if_input(ifp, m); rx_npkts++; if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) return (rx_npkts); } bus_dmamap_sync(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_cdata.bge_rx_return_ring_map, BUS_DMASYNC_PREREAD); if (stdcnt > 0) bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_PREWRITE); if (jumbocnt > 0) bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); sc->bge_rx_saved_considx = rx_cons; bge_writembx(sc, BGE_MBX_RX_CONS0_LO, sc->bge_rx_saved_considx); if (stdcnt) bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, (sc->bge_std + BGE_STD_RX_RING_CNT - 1) % BGE_STD_RX_RING_CNT); if (jumbocnt) bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, (sc->bge_jumbo + BGE_JUMBO_RX_RING_CNT - 1) % BGE_JUMBO_RX_RING_CNT); #ifdef notyet /* * This register wraps very quickly under heavy packet drops. * If you need correct statistics, you can enable this check. */ if (BGE_IS_5705_PLUS(sc)) if_incierrors(ifp, CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS)); #endif return (rx_npkts); } static void bge_rxcsum(struct bge_softc *sc, struct bge_rx_bd *cur_rx, struct mbuf *m) { if (BGE_IS_5717_PLUS(sc)) { if ((cur_rx->bge_flags & BGE_RXBDFLAG_IPV6) == 0) { if (cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if ((cur_rx->bge_error_flag & BGE_RXERRFLAG_IP_CSUM_NOK) == 0) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) { m->m_pkthdr.csum_data = cur_rx->bge_tcp_udp_csum; m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; } } } else { if (cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if ((cur_rx->bge_ip_csum ^ 0xFFFF) == 0) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM && m->m_pkthdr.len >= ETHER_MIN_NOPAD) { m->m_pkthdr.csum_data = cur_rx->bge_tcp_udp_csum; m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; } } } static void bge_txeof(struct bge_softc *sc, uint16_t tx_cons) { struct bge_tx_bd *cur_tx; if_t ifp; BGE_LOCK_ASSERT(sc); /* Nothing to do. */ if (sc->bge_tx_saved_considx == tx_cons) return; ifp = sc->bge_ifp; bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_POSTWRITE); /* * Go through our tx ring and free mbufs for those * frames that have been sent. */ while (sc->bge_tx_saved_considx != tx_cons) { uint32_t idx; idx = sc->bge_tx_saved_considx; cur_tx = &sc->bge_ldata.bge_tx_ring[idx]; if (cur_tx->bge_flags & BGE_TXBDFLAG_END) if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (sc->bge_cdata.bge_tx_chain[idx] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[idx], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[idx]); m_freem(sc->bge_cdata.bge_tx_chain[idx]); sc->bge_cdata.bge_tx_chain[idx] = NULL; } sc->bge_txcnt--; BGE_INC(sc->bge_tx_saved_considx, BGE_TX_RING_CNT); } if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); if (sc->bge_txcnt == 0) sc->bge_timer = 0; } #ifdef DEVICE_POLLING static int bge_poll(if_t ifp, enum poll_cmd cmd, int count) { struct bge_softc *sc = if_getsoftc(ifp); uint16_t rx_prod, tx_cons; uint32_t statusword; int rx_npkts = 0; BGE_LOCK(sc); if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { BGE_UNLOCK(sc); return (rx_npkts); } bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Fetch updates from the status block. */ rx_prod = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; tx_cons = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; statusword = sc->bge_ldata.bge_status_block->bge_status; /* Clear the status so the next pass only sees the changes. */ sc->bge_ldata.bge_status_block->bge_status = 0; bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Note link event. It will be processed by POLL_AND_CHECK_STATUS. */ if (statusword & BGE_STATFLAG_LINKSTATE_CHANGED) sc->bge_link_evt++; if (cmd == POLL_AND_CHECK_STATUS) if ((sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) || sc->bge_link_evt || (sc->bge_flags & BGE_FLAG_TBI)) bge_link_upd(sc); sc->rxcycles = count; rx_npkts = bge_rxeof(sc, rx_prod, 1); if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { BGE_UNLOCK(sc); return (rx_npkts); } bge_txeof(sc, tx_cons); if (!if_sendq_empty(ifp)) bge_start_locked(ifp); BGE_UNLOCK(sc); return (rx_npkts); } #endif /* DEVICE_POLLING */ static int bge_msi_intr(void *arg) { struct bge_softc *sc; sc = (struct bge_softc *)arg; /* * This interrupt is not shared and controller already * disabled further interrupt. */ taskqueue_enqueue(sc->bge_tq, &sc->bge_intr_task); return (FILTER_HANDLED); } static void bge_intr_task(void *arg, int pending) { struct bge_softc *sc; if_t ifp; uint32_t status, status_tag; uint16_t rx_prod, tx_cons; sc = (struct bge_softc *)arg; ifp = sc->bge_ifp; BGE_LOCK(sc); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { BGE_UNLOCK(sc); return; } /* Get updated status block. */ bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Save producer/consumer indices. */ rx_prod = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; tx_cons = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; status = sc->bge_ldata.bge_status_block->bge_status; status_tag = sc->bge_ldata.bge_status_block->bge_status_tag << 24; /* Dirty the status flag. */ sc->bge_ldata.bge_status_block->bge_status = 0; bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if ((sc->bge_flags & BGE_FLAG_TAGGED_STATUS) == 0) status_tag = 0; if ((status & BGE_STATFLAG_LINKSTATE_CHANGED) != 0) bge_link_upd(sc); /* Let controller work. */ bge_writembx(sc, BGE_MBX_IRQ0_LO, status_tag); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING && sc->bge_rx_saved_considx != rx_prod) { /* Check RX return ring producer/consumer. */ BGE_UNLOCK(sc); bge_rxeof(sc, rx_prod, 0); BGE_LOCK(sc); } if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { /* Check TX ring producer/consumer. */ bge_txeof(sc, tx_cons); if (!if_sendq_empty(ifp)) bge_start_locked(ifp); } BGE_UNLOCK(sc); } static void bge_intr(void *xsc) { struct bge_softc *sc; if_t ifp; uint32_t statusword; uint16_t rx_prod, tx_cons; sc = xsc; BGE_LOCK(sc); ifp = sc->bge_ifp; #ifdef DEVICE_POLLING if (if_getcapenable(ifp) & IFCAP_POLLING) { BGE_UNLOCK(sc); return; } #endif /* * Ack the interrupt by writing something to BGE_MBX_IRQ0_LO. Don't * disable interrupts by writing nonzero like we used to, since with * our current organization this just gives complications and * pessimizations for re-enabling interrupts. We used to have races * instead of the necessary complications. Disabling interrupts * would just reduce the chance of a status update while we are * running (by switching to the interrupt-mode coalescence * parameters), but this chance is already very low so it is more * efficient to get another interrupt than prevent it. * * We do the ack first to ensure another interrupt if there is a * status update after the ack. We don't check for the status * changing later because it is more efficient to get another * interrupt than prevent it, not quite as above (not checking is * a smaller optimization than not toggling the interrupt enable, * since checking doesn't involve PCI accesses and toggling require * the status check). So toggling would probably be a pessimization * even with MSI. It would only be needed for using a task queue. */ bge_writembx(sc, BGE_MBX_IRQ0_LO, 0); /* * Do the mandatory PCI flush as well as get the link status. */ statusword = CSR_READ_4(sc, BGE_MAC_STS) & BGE_MACSTAT_LINK_CHANGED; /* Make sure the descriptor ring indexes are coherent. */ bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); rx_prod = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; tx_cons = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; sc->bge_ldata.bge_status_block->bge_status = 0; bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if ((sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) || statusword || sc->bge_link_evt) bge_link_upd(sc); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { /* Check RX return ring producer/consumer. */ bge_rxeof(sc, rx_prod, 1); } if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { /* Check TX ring producer/consumer. */ bge_txeof(sc, tx_cons); } if (if_getdrvflags(ifp) & IFF_DRV_RUNNING && !if_sendq_empty(ifp)) bge_start_locked(ifp); BGE_UNLOCK(sc); } static void bge_asf_driver_up(struct bge_softc *sc) { if (sc->bge_asf_mode & ASF_STACKUP) { /* Send ASF heartbeat aprox. every 2s */ if (sc->bge_asf_count) sc->bge_asf_count --; else { sc->bge_asf_count = 2; bge_writemem_ind(sc, BGE_SRAM_FW_CMD_MB, BGE_FW_CMD_DRV_ALIVE); bge_writemem_ind(sc, BGE_SRAM_FW_CMD_LEN_MB, 4); bge_writemem_ind(sc, BGE_SRAM_FW_CMD_DATA_MB, BGE_FW_HB_TIMEOUT_SEC); CSR_WRITE_4(sc, BGE_RX_CPU_EVENT, CSR_READ_4(sc, BGE_RX_CPU_EVENT) | BGE_RX_CPU_DRV_EVENT); } } } static void bge_tick(void *xsc) { struct bge_softc *sc = xsc; struct mii_data *mii = NULL; BGE_LOCK_ASSERT(sc); /* Synchronize with possible callout reset/stop. */ if (callout_pending(&sc->bge_stat_ch) || !callout_active(&sc->bge_stat_ch)) return; if (BGE_IS_5705_PLUS(sc)) bge_stats_update_regs(sc); else bge_stats_update(sc); /* XXX Add APE heartbeat check here? */ if ((sc->bge_flags & BGE_FLAG_TBI) == 0) { mii = device_get_softc(sc->bge_miibus); /* * Do not touch PHY if we have link up. This could break * IPMI/ASF mode or produce extra input errors * (extra errors was reported for bcm5701 & bcm5704). */ if (!sc->bge_link) mii_tick(mii); } else { /* * Since in TBI mode auto-polling can't be used we should poll * link status manually. Here we register pending link event * and trigger interrupt. */ #ifdef DEVICE_POLLING /* In polling mode we poll link state in bge_poll(). */ if (!(if_getcapenable(sc->bge_ifp) & IFCAP_POLLING)) #endif { sc->bge_link_evt++; if (sc->bge_asicrev == BGE_ASICREV_BCM5700 || sc->bge_flags & BGE_FLAG_5788) BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_SET); else BGE_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW); } } bge_asf_driver_up(sc); bge_watchdog(sc); callout_reset(&sc->bge_stat_ch, hz, bge_tick, sc); } static void bge_stats_update_regs(struct bge_softc *sc) { if_t ifp; struct bge_mac_stats *stats; uint32_t val; ifp = sc->bge_ifp; stats = &sc->bge_mac_stats; stats->ifHCOutOctets += CSR_READ_4(sc, BGE_TX_MAC_STATS_OCTETS); stats->etherStatsCollisions += CSR_READ_4(sc, BGE_TX_MAC_STATS_COLLS); stats->outXonSent += CSR_READ_4(sc, BGE_TX_MAC_STATS_XON_SENT); stats->outXoffSent += CSR_READ_4(sc, BGE_TX_MAC_STATS_XOFF_SENT); stats->dot3StatsInternalMacTransmitErrors += CSR_READ_4(sc, BGE_TX_MAC_STATS_ERRORS); stats->dot3StatsSingleCollisionFrames += CSR_READ_4(sc, BGE_TX_MAC_STATS_SINGLE_COLL); stats->dot3StatsMultipleCollisionFrames += CSR_READ_4(sc, BGE_TX_MAC_STATS_MULTI_COLL); stats->dot3StatsDeferredTransmissions += CSR_READ_4(sc, BGE_TX_MAC_STATS_DEFERRED); stats->dot3StatsExcessiveCollisions += CSR_READ_4(sc, BGE_TX_MAC_STATS_EXCESS_COLL); stats->dot3StatsLateCollisions += CSR_READ_4(sc, BGE_TX_MAC_STATS_LATE_COLL); stats->ifHCOutUcastPkts += CSR_READ_4(sc, BGE_TX_MAC_STATS_UCAST); stats->ifHCOutMulticastPkts += CSR_READ_4(sc, BGE_TX_MAC_STATS_MCAST); stats->ifHCOutBroadcastPkts += CSR_READ_4(sc, BGE_TX_MAC_STATS_BCAST); stats->ifHCInOctets += CSR_READ_4(sc, BGE_RX_MAC_STATS_OCTESTS); stats->etherStatsFragments += CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAGMENTS); stats->ifHCInUcastPkts += CSR_READ_4(sc, BGE_RX_MAC_STATS_UCAST); stats->ifHCInMulticastPkts += CSR_READ_4(sc, BGE_RX_MAC_STATS_MCAST); stats->ifHCInBroadcastPkts += CSR_READ_4(sc, BGE_RX_MAC_STATS_BCAST); stats->dot3StatsFCSErrors += CSR_READ_4(sc, BGE_RX_MAC_STATS_FCS_ERRORS); stats->dot3StatsAlignmentErrors += CSR_READ_4(sc, BGE_RX_MAC_STATS_ALGIN_ERRORS); stats->xonPauseFramesReceived += CSR_READ_4(sc, BGE_RX_MAC_STATS_XON_RCVD); stats->xoffPauseFramesReceived += CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_RCVD); stats->macControlFramesReceived += CSR_READ_4(sc, BGE_RX_MAC_STATS_CTRL_RCVD); stats->xoffStateEntered += CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_ENTERED); stats->dot3StatsFramesTooLong += CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAME_TOO_LONG); stats->etherStatsJabbers += CSR_READ_4(sc, BGE_RX_MAC_STATS_JABBERS); stats->etherStatsUndersizePkts += CSR_READ_4(sc, BGE_RX_MAC_STATS_UNDERSIZE); stats->FramesDroppedDueToFilters += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_FILTDROP); stats->DmaWriteQueueFull += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_WRQ_FULL); stats->DmaWriteHighPriQueueFull += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_HPWRQ_FULL); stats->NoMoreRxBDs += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_OUT_OF_BDS); /* * XXX * Unlike other controllers, BGE_RXLP_LOCSTAT_IFIN_DROPS * counter of BCM5717, BCM5718, BCM5719 A0 and BCM5720 A0 * includes number of unwanted multicast frames. This comes * from silicon bug and known workaround to get rough(not * exact) counter is to enable interrupt on MBUF low water * attention. This can be accomplished by setting * BGE_HCCMODE_ATTN bit of BGE_HCC_MODE, * BGE_BMANMODE_LOMBUF_ATTN bit of BGE_BMAN_MODE and * BGE_MODECTL_FLOWCTL_ATTN_INTR bit of BGE_MODE_CTL. * However that change would generate more interrupts and * there are still possibilities of losing multiple frames * during BGE_MODECTL_FLOWCTL_ATTN_INTR interrupt handling. * Given that the workaround still would not get correct * counter I don't think it's worth to implement it. So * ignore reading the counter on controllers that have the * silicon bug. */ if (sc->bge_asicrev != BGE_ASICREV_BCM5717 && sc->bge_chipid != BGE_CHIPID_BCM5719_A0 && sc->bge_chipid != BGE_CHIPID_BCM5720_A0) stats->InputDiscards += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS); stats->InputErrors += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_ERRORS); stats->RecvThresholdHit += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_RXTHRESH_HIT); if (sc->bge_flags & BGE_FLAG_RDMA_BUG) { /* * If controller transmitted more than BGE_NUM_RDMA_CHANNELS * frames, it's safe to disable workaround for DMA engine's * miscalculation of TXMBUF space. */ if (stats->ifHCOutUcastPkts + stats->ifHCOutMulticastPkts + stats->ifHCOutBroadcastPkts > BGE_NUM_RDMA_CHANNELS) { val = CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL); if (sc->bge_asicrev == BGE_ASICREV_BCM5719) val &= ~BGE_RDMA_TX_LENGTH_WA_5719; else val &= ~BGE_RDMA_TX_LENGTH_WA_5720; CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL, val); sc->bge_flags &= ~BGE_FLAG_RDMA_BUG; } } } static void bge_stats_clear_regs(struct bge_softc *sc) { CSR_READ_4(sc, BGE_TX_MAC_STATS_OCTETS); CSR_READ_4(sc, BGE_TX_MAC_STATS_COLLS); CSR_READ_4(sc, BGE_TX_MAC_STATS_XON_SENT); CSR_READ_4(sc, BGE_TX_MAC_STATS_XOFF_SENT); CSR_READ_4(sc, BGE_TX_MAC_STATS_ERRORS); CSR_READ_4(sc, BGE_TX_MAC_STATS_SINGLE_COLL); CSR_READ_4(sc, BGE_TX_MAC_STATS_MULTI_COLL); CSR_READ_4(sc, BGE_TX_MAC_STATS_DEFERRED); CSR_READ_4(sc, BGE_TX_MAC_STATS_EXCESS_COLL); CSR_READ_4(sc, BGE_TX_MAC_STATS_LATE_COLL); CSR_READ_4(sc, BGE_TX_MAC_STATS_UCAST); CSR_READ_4(sc, BGE_TX_MAC_STATS_MCAST); CSR_READ_4(sc, BGE_TX_MAC_STATS_BCAST); CSR_READ_4(sc, BGE_RX_MAC_STATS_OCTESTS); CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAGMENTS); CSR_READ_4(sc, BGE_RX_MAC_STATS_UCAST); CSR_READ_4(sc, BGE_RX_MAC_STATS_MCAST); CSR_READ_4(sc, BGE_RX_MAC_STATS_BCAST); CSR_READ_4(sc, BGE_RX_MAC_STATS_FCS_ERRORS); CSR_READ_4(sc, BGE_RX_MAC_STATS_ALGIN_ERRORS); CSR_READ_4(sc, BGE_RX_MAC_STATS_XON_RCVD); CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_RCVD); CSR_READ_4(sc, BGE_RX_MAC_STATS_CTRL_RCVD); CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_ENTERED); CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAME_TOO_LONG); CSR_READ_4(sc, BGE_RX_MAC_STATS_JABBERS); CSR_READ_4(sc, BGE_RX_MAC_STATS_UNDERSIZE); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_FILTDROP); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_WRQ_FULL); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_HPWRQ_FULL); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_OUT_OF_BDS); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_ERRORS); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_RXTHRESH_HIT); } static void bge_stats_update(struct bge_softc *sc) { if_t ifp; bus_size_t stats; uint32_t cnt; /* current register value */ ifp = sc->bge_ifp; stats = BGE_MEMWIN_START + BGE_STATS_BLOCK; #define READ_STAT(sc, stats, stat) \ CSR_READ_4(sc, stats + offsetof(struct bge_stats, stat)) cnt = READ_STAT(sc, stats, txstats.etherStatsCollisions.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, cnt - sc->bge_tx_collisions); sc->bge_tx_collisions = cnt; cnt = READ_STAT(sc, stats, nicNoMoreRxBDs.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_IERRORS, cnt - sc->bge_rx_nobds); sc->bge_rx_nobds = cnt; cnt = READ_STAT(sc, stats, ifInErrors.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_IERRORS, cnt - sc->bge_rx_inerrs); sc->bge_rx_inerrs = cnt; cnt = READ_STAT(sc, stats, ifInDiscards.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_IERRORS, cnt - sc->bge_rx_discards); sc->bge_rx_discards = cnt; cnt = READ_STAT(sc, stats, txstats.ifOutDiscards.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_OERRORS, cnt - sc->bge_tx_discards); sc->bge_tx_discards = cnt; #undef READ_STAT } /* * Pad outbound frame to ETHER_MIN_NOPAD for an unusual reason. * The bge hardware will pad out Tx runts to ETHER_MIN_NOPAD, * but when such padded frames employ the bge IP/TCP checksum offload, * the hardware checksum assist gives incorrect results (possibly * from incorporating its own padding into the UDP/TCP checksum; who knows). * If we pad such runts with zeros, the onboard checksum comes out correct. */ static __inline int bge_cksum_pad(struct mbuf *m) { int padlen = ETHER_MIN_NOPAD - m->m_pkthdr.len; struct mbuf *last; /* If there's only the packet-header and we can pad there, use it. */ if (m->m_pkthdr.len == m->m_len && M_WRITABLE(m) && M_TRAILINGSPACE(m) >= padlen) { last = m; } else { /* * Walk packet chain to find last mbuf. We will either * pad there, or append a new mbuf and pad it. */ for (last = m; last->m_next != NULL; last = last->m_next); if (!(M_WRITABLE(last) && M_TRAILINGSPACE(last) >= padlen)) { /* Allocate new empty mbuf, pad it. Compact later. */ struct mbuf *n; MGET(n, M_NOWAIT, MT_DATA); if (n == NULL) return (ENOBUFS); n->m_len = 0; last->m_next = n; last = n; } } /* Now zero the pad area, to avoid the bge cksum-assist bug. */ memset(mtod(last, caddr_t) + last->m_len, 0, padlen); last->m_len += padlen; m->m_pkthdr.len += padlen; return (0); } static struct mbuf * bge_check_short_dma(struct mbuf *m) { struct mbuf *n; int found; /* * If device receive two back-to-back send BDs with less than * or equal to 8 total bytes then the device may hang. The two * back-to-back send BDs must in the same frame for this failure * to occur. Scan mbuf chains and see whether two back-to-back * send BDs are there. If this is the case, allocate new mbuf * and copy the frame to workaround the silicon bug. */ for (n = m, found = 0; n != NULL; n = n->m_next) { if (n->m_len < 8) { found++; if (found > 1) break; continue; } found = 0; } if (found > 1) { n = m_defrag(m, M_NOWAIT); if (n == NULL) m_freem(m); } else n = m; return (n); } static struct mbuf * bge_setup_tso(struct bge_softc *sc, struct mbuf *m, uint16_t *mss, uint16_t *flags) { struct ip *ip; struct tcphdr *tcp; struct mbuf *n; uint16_t hlen; uint32_t poff; if (M_WRITABLE(m) == 0) { /* Get a writable copy. */ n = m_dup(m, M_NOWAIT); m_freem(m); if (n == NULL) return (NULL); m = n; } m = m_pullup(m, sizeof(struct ether_header) + sizeof(struct ip)); if (m == NULL) return (NULL); ip = (struct ip *)(mtod(m, char *) + sizeof(struct ether_header)); poff = sizeof(struct ether_header) + (ip->ip_hl << 2); m = m_pullup(m, poff + sizeof(struct tcphdr)); if (m == NULL) return (NULL); tcp = (struct tcphdr *)(mtod(m, char *) + poff); m = m_pullup(m, poff + (tcp->th_off << 2)); if (m == NULL) return (NULL); /* * It seems controller doesn't modify IP length and TCP pseudo * checksum. These checksum computed by upper stack should be 0. */ *mss = m->m_pkthdr.tso_segsz; ip = (struct ip *)(mtod(m, char *) + sizeof(struct ether_header)); ip->ip_sum = 0; ip->ip_len = htons(*mss + (ip->ip_hl << 2) + (tcp->th_off << 2)); /* Clear pseudo checksum computed by TCP stack. */ tcp = (struct tcphdr *)(mtod(m, char *) + poff); tcp->th_sum = 0; /* * Broadcom controllers uses different descriptor format for * TSO depending on ASIC revision. Due to TSO-capable firmware * license issue and lower performance of firmware based TSO * we only support hardware based TSO. */ /* Calculate header length, incl. TCP/IP options, in 32 bit units. */ hlen = ((ip->ip_hl << 2) + (tcp->th_off << 2)) >> 2; if (sc->bge_flags & BGE_FLAG_TSO3) { /* * For BCM5717 and newer controllers, hardware based TSO * uses the 14 lower bits of the bge_mss field to store the * MSS and the upper 2 bits to store the lowest 2 bits of * the IP/TCP header length. The upper 6 bits of the header * length are stored in the bge_flags[14:10,4] field. Jumbo * frames are supported. */ *mss |= ((hlen & 0x3) << 14); *flags |= ((hlen & 0xF8) << 7) | ((hlen & 0x4) << 2); } else { /* * For BCM5755 and newer controllers, hardware based TSO uses * the lower 11 bits to store the MSS and the upper 5 bits to * store the IP/TCP header length. Jumbo frames are not * supported. */ *mss |= (hlen << 11); } return (m); } /* * Encapsulate an mbuf chain in the tx ring by coupling the mbuf data * pointers to descriptors. */ static int bge_encap(struct bge_softc *sc, struct mbuf **m_head, uint32_t *txidx) { bus_dma_segment_t segs[BGE_NSEG_NEW]; bus_dmamap_t map; struct bge_tx_bd *d; struct mbuf *m = *m_head; uint32_t idx = *txidx; uint16_t csum_flags, mss, vlan_tag; int nsegs, i, error; csum_flags = 0; mss = 0; vlan_tag = 0; if ((sc->bge_flags & BGE_FLAG_SHORT_DMA_BUG) != 0 && m->m_next != NULL) { *m_head = bge_check_short_dma(m); if (*m_head == NULL) return (ENOBUFS); m = *m_head; } if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { *m_head = m = bge_setup_tso(sc, m, &mss, &csum_flags); if (*m_head == NULL) return (ENOBUFS); csum_flags |= BGE_TXBDFLAG_CPU_PRE_DMA | BGE_TXBDFLAG_CPU_POST_DMA; } else if ((m->m_pkthdr.csum_flags & sc->bge_csum_features) != 0) { if (m->m_pkthdr.csum_flags & CSUM_IP) csum_flags |= BGE_TXBDFLAG_IP_CSUM; if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)) { csum_flags |= BGE_TXBDFLAG_TCP_UDP_CSUM; if (m->m_pkthdr.len < ETHER_MIN_NOPAD && (error = bge_cksum_pad(m)) != 0) { m_freem(m); *m_head = NULL; return (error); } } } if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { if (sc->bge_flags & BGE_FLAG_JUMBO_FRAME && m->m_pkthdr.len > ETHER_MAX_LEN) csum_flags |= BGE_TXBDFLAG_JUMBO_FRAME; if (sc->bge_forced_collapse > 0 && (sc->bge_flags & BGE_FLAG_PCIE) != 0 && m->m_next != NULL) { /* * Forcedly collapse mbuf chains to overcome hardware * limitation which only support a single outstanding * DMA read operation. */ if (sc->bge_forced_collapse == 1) m = m_defrag(m, M_NOWAIT); else m = m_collapse(m, M_NOWAIT, sc->bge_forced_collapse); if (m == NULL) m = *m_head; *m_head = m; } } map = sc->bge_cdata.bge_tx_dmamap[idx]; error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_tx_mtag, map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { m = m_collapse(m, M_NOWAIT, BGE_NSEG_NEW); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_tx_mtag, map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { m_freem(m); *m_head = NULL; return (error); } } else if (error != 0) return (error); /* Check if we have enough free send BDs. */ if (sc->bge_txcnt + nsegs >= BGE_TX_RING_CNT) { bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag, map); return (ENOBUFS); } bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, map, BUS_DMASYNC_PREWRITE); if (m->m_flags & M_VLANTAG) { csum_flags |= BGE_TXBDFLAG_VLAN_TAG; vlan_tag = m->m_pkthdr.ether_vtag; } if (sc->bge_asicrev == BGE_ASICREV_BCM5762 && (m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { /* * 5725 family of devices corrupts TSO packets when TSO DMA * buffers cross into regions which are within MSS bytes of * a 4GB boundary. If we encounter the condition, drop the * packet. */ for (i = 0; ; i++) { d = &sc->bge_ldata.bge_tx_ring[idx]; d->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[i].ds_addr); d->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[i].ds_addr); d->bge_len = segs[i].ds_len; if (d->bge_addr.bge_addr_lo + segs[i].ds_len + mss < d->bge_addr.bge_addr_lo) break; d->bge_flags = csum_flags; d->bge_vlan_tag = vlan_tag; d->bge_mss = mss; if (i == nsegs - 1) break; BGE_INC(idx, BGE_TX_RING_CNT); } if (i != nsegs - 1) { bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag, map); m_freem(*m_head); *m_head = NULL; return (EIO); } } else { for (i = 0; ; i++) { d = &sc->bge_ldata.bge_tx_ring[idx]; d->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[i].ds_addr); d->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[i].ds_addr); d->bge_len = segs[i].ds_len; d->bge_flags = csum_flags; d->bge_vlan_tag = vlan_tag; d->bge_mss = mss; if (i == nsegs - 1) break; BGE_INC(idx, BGE_TX_RING_CNT); } } /* Mark the last segment as end of packet... */ d->bge_flags |= BGE_TXBDFLAG_END; /* * Insure that the map for this transmission * is placed at the array index of the last descriptor * in this chain. */ sc->bge_cdata.bge_tx_dmamap[*txidx] = sc->bge_cdata.bge_tx_dmamap[idx]; sc->bge_cdata.bge_tx_dmamap[idx] = map; sc->bge_cdata.bge_tx_chain[idx] = m; sc->bge_txcnt += nsegs; BGE_INC(idx, BGE_TX_RING_CNT); *txidx = idx; return (0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit descriptors. */ static void bge_start_locked(if_t ifp) { struct bge_softc *sc; struct mbuf *m_head; uint32_t prodidx; int count; sc = if_getsoftc(ifp); BGE_LOCK_ASSERT(sc); if (!sc->bge_link || (if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; prodidx = sc->bge_tx_prodidx; for (count = 0; !if_sendq_empty(ifp);) { if (sc->bge_txcnt > BGE_TX_RING_CNT - 16) { if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); break; } m_head = if_dequeue(ifp); if (m_head == NULL) break; /* * Pack the data into the transmit ring. If we * don't have room, set the OACTIVE flag and wait * for the NIC to drain the ring. */ if (bge_encap(sc, &m_head, &prodidx)) { if (m_head == NULL) break; if_sendq_prepend(ifp, m_head); if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); break; } ++count; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ if_bpfmtap(ifp, m_head); } if (count > 0) bge_start_tx(sc, prodidx); } static void bge_start_tx(struct bge_softc *sc, uint32_t prodidx) { bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_PREWRITE); /* Transmit. */ bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx); /* 5700 b2 errata */ if (sc->bge_chiprev == BGE_CHIPREV_5700_BX) bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx); sc->bge_tx_prodidx = prodidx; /* Set a timeout in case the chip goes out to lunch. */ sc->bge_timer = BGE_TX_TIMEOUT; } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit descriptors. */ static void bge_start(if_t ifp) { struct bge_softc *sc; sc = if_getsoftc(ifp); BGE_LOCK(sc); bge_start_locked(ifp); BGE_UNLOCK(sc); } static void bge_init_locked(struct bge_softc *sc) { if_t ifp; uint16_t *m; uint32_t mode; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) return; /* Cancel pending I/O and flush buffers. */ bge_stop(sc); bge_stop_fw(sc); bge_sig_pre_reset(sc, BGE_RESET_START); bge_reset(sc); bge_sig_legacy(sc, BGE_RESET_START); bge_sig_post_reset(sc, BGE_RESET_START); bge_chipinit(sc); /* * Init the various state machines, ring * control blocks and firmware. */ if (bge_blockinit(sc)) { device_printf(sc->bge_dev, "initialization failure\n"); return; } ifp = sc->bge_ifp; /* Specify MTU. */ CSR_WRITE_4(sc, BGE_RX_MTU, if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + (if_getcapenable(ifp) & IFCAP_VLAN_MTU ? ETHER_VLAN_ENCAP_LEN : 0)); /* Load our MAC address. */ m = (uint16_t *)IF_LLADDR(sc->bge_ifp); CSR_WRITE_4(sc, BGE_MAC_ADDR1_LO, htons(m[0])); CSR_WRITE_4(sc, BGE_MAC_ADDR1_HI, (htons(m[1]) << 16) | htons(m[2])); /* Program promiscuous mode. */ bge_setpromisc(sc); /* Program multicast filter. */ bge_setmulti(sc); /* Program VLAN tag stripping. */ bge_setvlan(sc); /* Override UDP checksum offloading. */ if (sc->bge_forced_udpcsum == 0) sc->bge_csum_features &= ~CSUM_UDP; else sc->bge_csum_features |= CSUM_UDP; if (if_getcapabilities(ifp) & IFCAP_TXCSUM && if_getcapenable(ifp) & IFCAP_TXCSUM) { if_sethwassistbits(ifp, 0, (BGE_CSUM_FEATURES | CSUM_UDP)); if_sethwassistbits(ifp, sc->bge_csum_features, 0); } /* Init RX ring. */ if (bge_init_rx_ring_std(sc) != 0) { device_printf(sc->bge_dev, "no memory for std Rx buffers.\n"); bge_stop(sc); return; } /* * Workaround for a bug in 5705 ASIC rev A0. Poll the NIC's * memory to insure that the chip has in fact read the first * entry of the ring. */ if (sc->bge_chipid == BGE_CHIPID_BCM5705_A0) { uint32_t v, i; for (i = 0; i < 10; i++) { DELAY(20); v = bge_readmem_ind(sc, BGE_STD_RX_RINGS + 8); if (v == (MCLBYTES - ETHER_ALIGN)) break; } if (i == 10) device_printf (sc->bge_dev, "5705 A0 chip failed to load RX ring\n"); } /* Init jumbo RX ring. */ if (BGE_IS_JUMBO_CAPABLE(sc) && if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN)) { if (bge_init_rx_ring_jumbo(sc) != 0) { device_printf(sc->bge_dev, "no memory for jumbo Rx buffers.\n"); bge_stop(sc); return; } } /* Init our RX return ring index. */ sc->bge_rx_saved_considx = 0; /* Init our RX/TX stat counters. */ sc->bge_rx_discards = sc->bge_tx_discards = sc->bge_tx_collisions = 0; /* Init TX ring. */ bge_init_tx_ring(sc); /* Enable TX MAC state machine lockup fix. */ mode = CSR_READ_4(sc, BGE_TX_MODE); if (BGE_IS_5755_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5906) mode |= BGE_TXMODE_MBUF_LOCKUP_FIX; if (sc->bge_asicrev == BGE_ASICREV_BCM5720 || sc->bge_asicrev == BGE_ASICREV_BCM5762) { mode &= ~(BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE); mode |= CSR_READ_4(sc, BGE_TX_MODE) & (BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE); } /* Turn on transmitter. */ CSR_WRITE_4(sc, BGE_TX_MODE, mode | BGE_TXMODE_ENABLE); DELAY(100); /* Turn on receiver. */ mode = CSR_READ_4(sc, BGE_RX_MODE); if (BGE_IS_5755_PLUS(sc)) mode |= BGE_RXMODE_IPV6_ENABLE; if (sc->bge_asicrev == BGE_ASICREV_BCM5762) mode |= BGE_RXMODE_IPV4_FRAG_FIX; CSR_WRITE_4(sc,BGE_RX_MODE, mode | BGE_RXMODE_ENABLE); DELAY(10); /* * Set the number of good frames to receive after RX MBUF * Low Watermark has been reached. After the RX MAC receives * this number of frames, it will drop subsequent incoming * frames until the MBUF High Watermark is reached. */ if (BGE_IS_57765_PLUS(sc)) CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 1); else CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 2); /* Clear MAC statistics. */ if (BGE_IS_5705_PLUS(sc)) bge_stats_clear_regs(sc); /* Tell firmware we're alive. */ BGE_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); #ifdef DEVICE_POLLING /* Disable interrupts if we are polling. */ if (if_getcapenable(ifp) & IFCAP_POLLING) { BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 1); } else #endif /* Enable host interrupts. */ { BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_CLEAR_INTA); BGE_CLRBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 0); } if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); bge_ifmedia_upd_locked(ifp); callout_reset(&sc->bge_stat_ch, hz, bge_tick, sc); } static void bge_init(void *xsc) { struct bge_softc *sc = xsc; BGE_LOCK(sc); bge_init_locked(sc); BGE_UNLOCK(sc); } /* * Set media options. */ static int bge_ifmedia_upd(if_t ifp) { struct bge_softc *sc = if_getsoftc(ifp); int res; BGE_LOCK(sc); res = bge_ifmedia_upd_locked(ifp); BGE_UNLOCK(sc); return (res); } static int bge_ifmedia_upd_locked(if_t ifp) { struct bge_softc *sc = if_getsoftc(ifp); struct mii_data *mii; struct mii_softc *miisc; struct ifmedia *ifm; BGE_LOCK_ASSERT(sc); ifm = &sc->bge_ifmedia; /* If this is a 1000baseX NIC, enable the TBI port. */ if (sc->bge_flags & BGE_FLAG_TBI) { if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch(IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: /* * The BCM5704 ASIC appears to have a special * mechanism for programming the autoneg * advertisement registers in TBI mode. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5704) { uint32_t sgdig; sgdig = CSR_READ_4(sc, BGE_SGDIG_STS); if (sgdig & BGE_SGDIGSTS_DONE) { CSR_WRITE_4(sc, BGE_TX_TBI_AUTONEG, 0); sgdig = CSR_READ_4(sc, BGE_SGDIG_CFG); sgdig |= BGE_SGDIGCFG_AUTO | BGE_SGDIGCFG_PAUSE_CAP | BGE_SGDIGCFG_ASYM_PAUSE; CSR_WRITE_4(sc, BGE_SGDIG_CFG, sgdig | BGE_SGDIGCFG_SEND); DELAY(5); CSR_WRITE_4(sc, BGE_SGDIG_CFG, sgdig); } } break; case IFM_1000_SX: if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) { BGE_CLRBIT(sc, BGE_MAC_MODE, BGE_MACMODE_HALF_DUPLEX); } else { BGE_SETBIT(sc, BGE_MAC_MODE, BGE_MACMODE_HALF_DUPLEX); } DELAY(40); break; default: return (EINVAL); } return (0); } sc->bge_link_evt++; mii = device_get_softc(sc->bge_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); mii_mediachg(mii); /* * Force an interrupt so that we will call bge_link_upd * if needed and clear any pending link state attention. * Without this we are not getting any further interrupts * for link state changes and thus will not UP the link and * not be able to send in bge_start_locked. The only * way to get things working was to receive a packet and * get an RX intr. * bge_tick should help for fiber cards and we might not * need to do this here if BGE_FLAG_TBI is set but as * we poll for fiber anyway it should not harm. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 || sc->bge_flags & BGE_FLAG_5788) BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_SET); else BGE_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW); return (0); } /* * Report current media status. */ static void bge_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) { struct bge_softc *sc = if_getsoftc(ifp); struct mii_data *mii; BGE_LOCK(sc); if ((if_getflags(ifp) & IFF_UP) == 0) { BGE_UNLOCK(sc); return; } if (sc->bge_flags & BGE_FLAG_TBI) { ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (CSR_READ_4(sc, BGE_MAC_STS) & BGE_MACSTAT_TBI_PCS_SYNCHED) ifmr->ifm_status |= IFM_ACTIVE; else { ifmr->ifm_active |= IFM_NONE; BGE_UNLOCK(sc); return; } ifmr->ifm_active |= IFM_1000_SX; if (CSR_READ_4(sc, BGE_MAC_MODE) & BGE_MACMODE_HALF_DUPLEX) ifmr->ifm_active |= IFM_HDX; else ifmr->ifm_active |= IFM_FDX; BGE_UNLOCK(sc); return; } mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; BGE_UNLOCK(sc); } static int bge_ioctl(if_t ifp, u_long command, caddr_t data) { struct bge_softc *sc = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *) data; struct mii_data *mii; int flags, mask, error = 0; switch (command) { case SIOCSIFMTU: if (BGE_IS_JUMBO_CAPABLE(sc) || (sc->bge_flags & BGE_FLAG_JUMBO_STD)) { if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > BGE_JUMBO_MTU) { error = EINVAL; break; } } else if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > ETHERMTU) { error = EINVAL; break; } BGE_LOCK(sc); if (if_getmtu(ifp) != ifr->ifr_mtu) { if_setmtu(ifp, ifr->ifr_mtu); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); bge_init_locked(sc); } } BGE_UNLOCK(sc); break; case SIOCSIFFLAGS: BGE_LOCK(sc); if (if_getflags(ifp) & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. Similarly for ALLMULTI. */ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { flags = if_getflags(ifp) ^ sc->bge_if_flags; if (flags & IFF_PROMISC) bge_setpromisc(sc); if (flags & IFF_ALLMULTI) bge_setmulti(sc); } else bge_init_locked(sc); } else { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { bge_stop(sc); } } sc->bge_if_flags = if_getflags(ifp); BGE_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { BGE_LOCK(sc); bge_setmulti(sc); BGE_UNLOCK(sc); error = 0; } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: if (sc->bge_flags & BGE_FLAG_TBI) { error = ifmedia_ioctl(ifp, ifr, &sc->bge_ifmedia, command); } else { mii = device_get_softc(sc->bge_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); } break; case SIOCSIFCAP: mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(bge_poll, ifp); if (error) return (error); BGE_LOCK(sc); BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 1); if_setcapenablebit(ifp, IFCAP_POLLING, 0); BGE_UNLOCK(sc); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ BGE_LOCK(sc); BGE_CLRBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 0); if_setcapenablebit(ifp, 0, IFCAP_POLLING); BGE_UNLOCK(sc); } } #endif if ((mask & IFCAP_TXCSUM) != 0 && (if_getcapabilities(ifp) & IFCAP_TXCSUM) != 0) { if_togglecapenable(ifp, IFCAP_TXCSUM); if ((if_getcapenable(ifp) & IFCAP_TXCSUM) != 0) if_sethwassistbits(ifp, sc->bge_csum_features, 0); else if_sethwassistbits(ifp, 0, sc->bge_csum_features); } if ((mask & IFCAP_RXCSUM) != 0 && (if_getcapabilities(ifp) & IFCAP_RXCSUM) != 0) if_togglecapenable(ifp, IFCAP_RXCSUM); if ((mask & IFCAP_TSO4) != 0 && (if_getcapabilities(ifp) & IFCAP_TSO4) != 0) { if_togglecapenable(ifp, IFCAP_TSO4); if ((if_getcapenable(ifp) & IFCAP_TSO4) != 0) if_sethwassistbits(ifp, CSUM_TSO, 0); else if_sethwassistbits(ifp, 0, CSUM_TSO); } if (mask & IFCAP_VLAN_MTU) { if_togglecapenable(ifp, IFCAP_VLAN_MTU); if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); bge_init(sc); } if ((mask & IFCAP_VLAN_HWTSO) != 0 && (if_getcapabilities(ifp) & IFCAP_VLAN_HWTSO) != 0) if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (if_getcapabilities(ifp) & IFCAP_VLAN_HWTAGGING) != 0) { if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING); if ((if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) == 0) if_setcapenablebit(ifp, 0, IFCAP_VLAN_HWTSO); BGE_LOCK(sc); bge_setvlan(sc); BGE_UNLOCK(sc); } #ifdef VLAN_CAPABILITIES if_vlancap(ifp); #endif break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static void bge_watchdog(struct bge_softc *sc) { if_t ifp; uint32_t status; BGE_LOCK_ASSERT(sc); if (sc->bge_timer == 0 || --sc->bge_timer) return; /* If pause frames are active then don't reset the hardware. */ if ((CSR_READ_4(sc, BGE_RX_MODE) & BGE_RXMODE_FLOWCTL_ENABLE) != 0) { status = CSR_READ_4(sc, BGE_RX_STS); if ((status & BGE_RXSTAT_REMOTE_XOFFED) != 0) { /* * If link partner has us in XOFF state then wait for * the condition to clear. */ CSR_WRITE_4(sc, BGE_RX_STS, status); sc->bge_timer = BGE_TX_TIMEOUT; return; } else if ((status & BGE_RXSTAT_RCVD_XOFF) != 0 && (status & BGE_RXSTAT_RCVD_XON) != 0) { /* * If link partner has us in XOFF state then wait for * the condition to clear. */ CSR_WRITE_4(sc, BGE_RX_STS, status); sc->bge_timer = BGE_TX_TIMEOUT; return; } /* * Any other condition is unexpected and the controller * should be reset. */ } ifp = sc->bge_ifp; if_printf(ifp, "watchdog timeout -- resetting\n"); if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); bge_init_locked(sc); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } static void bge_stop_block(struct bge_softc *sc, bus_size_t reg, uint32_t bit) { int i; BGE_CLRBIT(sc, reg, bit); for (i = 0; i < BGE_TIMEOUT; i++) { if ((CSR_READ_4(sc, reg) & bit) == 0) return; DELAY(100); } } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void bge_stop(struct bge_softc *sc) { if_t ifp; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; callout_stop(&sc->bge_stat_ch); /* Disable host interrupts. */ BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 1); /* * Tell firmware we're shutting down. */ bge_stop_fw(sc); bge_sig_pre_reset(sc, BGE_RESET_SHUTDOWN); /* * Disable all of the receiver blocks. */ bge_stop_block(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE); bge_stop_block(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE); bge_stop_block(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE); if (BGE_IS_5700_FAMILY(sc)) bge_stop_block(sc, BGE_RXLS_MODE, BGE_RXLSMODE_ENABLE); bge_stop_block(sc, BGE_RDBDI_MODE, BGE_RBDIMODE_ENABLE); bge_stop_block(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE); bge_stop_block(sc, BGE_RBDC_MODE, BGE_RBDCMODE_ENABLE); /* * Disable all of the transmit blocks. */ bge_stop_block(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE); bge_stop_block(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE); bge_stop_block(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE); bge_stop_block(sc, BGE_RDMA_MODE, BGE_RDMAMODE_ENABLE); bge_stop_block(sc, BGE_SDC_MODE, BGE_SDCMODE_ENABLE); if (BGE_IS_5700_FAMILY(sc)) bge_stop_block(sc, BGE_DMAC_MODE, BGE_DMACMODE_ENABLE); bge_stop_block(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE); /* * Shut down all of the memory managers and related * state machines. */ bge_stop_block(sc, BGE_HCC_MODE, BGE_HCCMODE_ENABLE); bge_stop_block(sc, BGE_WDMA_MODE, BGE_WDMAMODE_ENABLE); if (BGE_IS_5700_FAMILY(sc)) bge_stop_block(sc, BGE_MBCF_MODE, BGE_MBCFMODE_ENABLE); CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF); CSR_WRITE_4(sc, BGE_FTQ_RESET, 0); if (!(BGE_IS_5705_PLUS(sc))) { BGE_CLRBIT(sc, BGE_BMAN_MODE, BGE_BMANMODE_ENABLE); BGE_CLRBIT(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE); } /* Update MAC statistics. */ if (BGE_IS_5705_PLUS(sc)) bge_stats_update_regs(sc); bge_reset(sc); bge_sig_legacy(sc, BGE_RESET_SHUTDOWN); bge_sig_post_reset(sc, BGE_RESET_SHUTDOWN); /* * Keep the ASF firmware running if up. */ if (sc->bge_asf_mode & ASF_STACKUP) BGE_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); else BGE_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); /* Free the RX lists. */ bge_free_rx_ring_std(sc); /* Free jumbo RX list. */ if (BGE_IS_JUMBO_CAPABLE(sc)) bge_free_rx_ring_jumbo(sc); /* Free TX buffers. */ bge_free_tx_ring(sc); sc->bge_tx_saved_considx = BGE_TXCONS_UNSET; /* Clear MAC's link state (PHY may still have link UP). */ if (bootverbose && sc->bge_link) if_printf(sc->bge_ifp, "link DOWN\n"); sc->bge_link = 0; if_setdrvflagbits(ifp, 0, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int bge_shutdown(device_t dev) { struct bge_softc *sc; sc = device_get_softc(dev); BGE_LOCK(sc); bge_stop(sc); BGE_UNLOCK(sc); return (0); } static int bge_suspend(device_t dev) { struct bge_softc *sc; sc = device_get_softc(dev); BGE_LOCK(sc); bge_stop(sc); BGE_UNLOCK(sc); return (0); } static int bge_resume(device_t dev) { struct bge_softc *sc; if_t ifp; sc = device_get_softc(dev); BGE_LOCK(sc); ifp = sc->bge_ifp; if (if_getflags(ifp) & IFF_UP) { bge_init_locked(sc); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) bge_start_locked(ifp); } BGE_UNLOCK(sc); return (0); } static void bge_link_upd(struct bge_softc *sc) { struct mii_data *mii; uint32_t link, status; BGE_LOCK_ASSERT(sc); /* Clear 'pending link event' flag. */ sc->bge_link_evt = 0; /* * Process link state changes. * Grrr. The link status word in the status block does * not work correctly on the BCM5700 rev AX and BX chips, * according to all available information. Hence, we have * to enable MII interrupts in order to properly obtain * async link changes. Unfortunately, this also means that * we have to read the MAC status register to detect link * changes, thereby adding an additional register access to * the interrupt handler. * * XXX: perhaps link state detection procedure used for * BGE_CHIPID_BCM5700_B2 can be used for others BCM5700 revisions. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) { status = CSR_READ_4(sc, BGE_MAC_STS); if (status & BGE_MACSTAT_MI_INTERRUPT) { mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); if (!sc->bge_link && mii->mii_media_status & IFM_ACTIVE && IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { sc->bge_link++; if (bootverbose) if_printf(sc->bge_ifp, "link UP\n"); } else if (sc->bge_link && (!(mii->mii_media_status & IFM_ACTIVE) || IFM_SUBTYPE(mii->mii_media_active) == IFM_NONE)) { sc->bge_link = 0; if (bootverbose) if_printf(sc->bge_ifp, "link DOWN\n"); } /* Clear the interrupt. */ CSR_WRITE_4(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_MI_INTERRUPT); bge_miibus_readreg(sc->bge_dev, sc->bge_phy_addr, BRGPHY_MII_ISR); bge_miibus_writereg(sc->bge_dev, sc->bge_phy_addr, BRGPHY_MII_IMR, BRGPHY_INTRS); } return; } if (sc->bge_flags & BGE_FLAG_TBI) { status = CSR_READ_4(sc, BGE_MAC_STS); if (status & BGE_MACSTAT_TBI_PCS_SYNCHED) { if (!sc->bge_link) { sc->bge_link++; if (sc->bge_asicrev == BGE_ASICREV_BCM5704) { BGE_CLRBIT(sc, BGE_MAC_MODE, BGE_MACMODE_TBI_SEND_CFGS); DELAY(40); } CSR_WRITE_4(sc, BGE_MAC_STS, 0xFFFFFFFF); if (bootverbose) if_printf(sc->bge_ifp, "link UP\n"); if_link_state_change(sc->bge_ifp, LINK_STATE_UP); } } else if (sc->bge_link) { sc->bge_link = 0; if (bootverbose) if_printf(sc->bge_ifp, "link DOWN\n"); if_link_state_change(sc->bge_ifp, LINK_STATE_DOWN); } } else if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { /* * Some broken BCM chips have BGE_STATFLAG_LINKSTATE_CHANGED bit * in status word always set. Workaround this bug by reading * PHY link status directly. */ link = (CSR_READ_4(sc, BGE_MI_STS) & BGE_MISTS_LINK) ? 1 : 0; if (link != sc->bge_link || sc->bge_asicrev == BGE_ASICREV_BCM5700) { mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); if (!sc->bge_link && mii->mii_media_status & IFM_ACTIVE && IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { sc->bge_link++; if (bootverbose) if_printf(sc->bge_ifp, "link UP\n"); } else if (sc->bge_link && (!(mii->mii_media_status & IFM_ACTIVE) || IFM_SUBTYPE(mii->mii_media_active) == IFM_NONE)) { sc->bge_link = 0; if (bootverbose) if_printf(sc->bge_ifp, "link DOWN\n"); } } } else { /* * For controllers that call mii_tick, we have to poll * link status. */ mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); bge_miibus_statchg(sc->bge_dev); } /* Disable MAC attention when link is up. */ CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED | BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE | BGE_MACSTAT_LINK_CHANGED); } static void bge_add_sysctls(struct bge_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; int unit; ctx = device_get_sysctl_ctx(sc->bge_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->bge_dev)); #ifdef BGE_REGISTER_DEBUG SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "debug_info", CTLTYPE_INT | CTLFLAG_RW, sc, 0, bge_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "reg_read", CTLTYPE_INT | CTLFLAG_RW, sc, 0, bge_sysctl_reg_read, "I", "MAC Register Read"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ape_read", CTLTYPE_INT | CTLFLAG_RW, sc, 0, bge_sysctl_ape_read, "I", "APE Register Read"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mem_read", CTLTYPE_INT | CTLFLAG_RW, sc, 0, bge_sysctl_mem_read, "I", "Memory Read"); #endif unit = device_get_unit(sc->bge_dev); /* * A common design characteristic for many Broadcom client controllers * is that they only support a single outstanding DMA read operation * on the PCIe bus. This means that it will take twice as long to fetch * a TX frame that is split into header and payload buffers as it does * to fetch a single, contiguous TX frame (2 reads vs. 1 read). For * these controllers, coalescing buffers to reduce the number of memory * reads is effective way to get maximum performance(about 940Mbps). * Without collapsing TX buffers the maximum TCP bulk transfer * performance is about 850Mbps. However forcing coalescing mbufs * consumes a lot of CPU cycles, so leave it off by default. */ sc->bge_forced_collapse = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "forced_collapse", CTLFLAG_RWTUN, &sc->bge_forced_collapse, 0, "Number of fragmented TX buffers of a frame allowed before " "forced collapsing"); sc->bge_msi = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "msi", CTLFLAG_RDTUN, &sc->bge_msi, 0, "Enable MSI"); /* * It seems all Broadcom controllers have a bug that can generate UDP * datagrams with checksum value 0 when TX UDP checksum offloading is * enabled. Generating UDP checksum value 0 is RFC 768 violation. * Even though the probability of generating such UDP datagrams is * low, I don't want to see FreeBSD boxes to inject such datagrams * into network so disable UDP checksum offloading by default. Users * still override this behavior by setting a sysctl variable, * dev.bge.0.forced_udpcsum. */ sc->bge_forced_udpcsum = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "forced_udpcsum", CTLFLAG_RWTUN, &sc->bge_forced_udpcsum, 0, "Enable UDP checksum offloading even if controller can " "generate UDP checksum value 0"); if (BGE_IS_5705_PLUS(sc)) bge_add_sysctl_stats_regs(sc, ctx, children); else bge_add_sysctl_stats(sc, ctx, children); } #define BGE_SYSCTL_STAT(sc, ctx, desc, parent, node, oid) \ SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, oid, CTLTYPE_UINT|CTLFLAG_RD, \ sc, offsetof(struct bge_stats, node), bge_sysctl_stats, "IU", \ desc) static void bge_add_sysctl_stats(struct bge_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent) { struct sysctl_oid *tree; struct sysctl_oid_list *children, *schildren; tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "stats", CTLFLAG_RD, NULL, "BGE Statistics"); schildren = children = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT(sc, ctx, "Frames Dropped Due To Filters", children, COSFramesDroppedDueToFilters, "FramesDroppedDueToFilters"); BGE_SYSCTL_STAT(sc, ctx, "NIC DMA Write Queue Full", children, nicDmaWriteQueueFull, "DmaWriteQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC DMA Write High Priority Queue Full", children, nicDmaWriteHighPriQueueFull, "DmaWriteHighPriQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC No More RX Buffer Descriptors", children, nicNoMoreRxBDs, "NoMoreRxBDs"); BGE_SYSCTL_STAT(sc, ctx, "Discarded Input Frames", children, ifInDiscards, "InputDiscards"); BGE_SYSCTL_STAT(sc, ctx, "Input Errors", children, ifInErrors, "InputErrors"); BGE_SYSCTL_STAT(sc, ctx, "NIC Recv Threshold Hit", children, nicRecvThresholdHit, "RecvThresholdHit"); BGE_SYSCTL_STAT(sc, ctx, "NIC DMA Read Queue Full", children, nicDmaReadQueueFull, "DmaReadQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC DMA Read High Priority Queue Full", children, nicDmaReadHighPriQueueFull, "DmaReadHighPriQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC Send Data Complete Queue Full", children, nicSendDataCompQueueFull, "SendDataCompQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC Ring Set Send Producer Index", children, nicRingSetSendProdIndex, "RingSetSendProdIndex"); BGE_SYSCTL_STAT(sc, ctx, "NIC Ring Status Update", children, nicRingStatusUpdate, "RingStatusUpdate"); BGE_SYSCTL_STAT(sc, ctx, "NIC Interrupts", children, nicInterrupts, "Interrupts"); BGE_SYSCTL_STAT(sc, ctx, "NIC Avoided Interrupts", children, nicAvoidedInterrupts, "AvoidedInterrupts"); BGE_SYSCTL_STAT(sc, ctx, "NIC Send Threshold Hit", children, nicSendThresholdHit, "SendThresholdHit"); tree = SYSCTL_ADD_NODE(ctx, schildren, OID_AUTO, "rx", CTLFLAG_RD, NULL, "BGE RX Statistics"); children = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT(sc, ctx, "Inbound Octets", children, rxstats.ifHCInOctets, "ifHCInOctets"); BGE_SYSCTL_STAT(sc, ctx, "Fragments", children, rxstats.etherStatsFragments, "Fragments"); BGE_SYSCTL_STAT(sc, ctx, "Inbound Unicast Packets", children, rxstats.ifHCInUcastPkts, "UnicastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Inbound Multicast Packets", children, rxstats.ifHCInMulticastPkts, "MulticastPkts"); BGE_SYSCTL_STAT(sc, ctx, "FCS Errors", children, rxstats.dot3StatsFCSErrors, "FCSErrors"); BGE_SYSCTL_STAT(sc, ctx, "Alignment Errors", children, rxstats.dot3StatsAlignmentErrors, "AlignmentErrors"); BGE_SYSCTL_STAT(sc, ctx, "XON Pause Frames Received", children, rxstats.xonPauseFramesReceived, "xonPauseFramesReceived"); BGE_SYSCTL_STAT(sc, ctx, "XOFF Pause Frames Received", children, rxstats.xoffPauseFramesReceived, "xoffPauseFramesReceived"); BGE_SYSCTL_STAT(sc, ctx, "MAC Control Frames Received", children, rxstats.macControlFramesReceived, "ControlFramesReceived"); BGE_SYSCTL_STAT(sc, ctx, "XOFF State Entered", children, rxstats.xoffStateEntered, "xoffStateEntered"); BGE_SYSCTL_STAT(sc, ctx, "Frames Too Long", children, rxstats.dot3StatsFramesTooLong, "FramesTooLong"); BGE_SYSCTL_STAT(sc, ctx, "Jabbers", children, rxstats.etherStatsJabbers, "Jabbers"); BGE_SYSCTL_STAT(sc, ctx, "Undersized Packets", children, rxstats.etherStatsUndersizePkts, "UndersizePkts"); BGE_SYSCTL_STAT(sc, ctx, "Inbound Range Length Errors", children, rxstats.inRangeLengthError, "inRangeLengthError"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Range Length Errors", children, rxstats.outRangeLengthError, "outRangeLengthError"); tree = SYSCTL_ADD_NODE(ctx, schildren, OID_AUTO, "tx", CTLFLAG_RD, NULL, "BGE TX Statistics"); children = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT(sc, ctx, "Outbound Octets", children, txstats.ifHCOutOctets, "ifHCOutOctets"); BGE_SYSCTL_STAT(sc, ctx, "TX Collisions", children, txstats.etherStatsCollisions, "Collisions"); BGE_SYSCTL_STAT(sc, ctx, "XON Sent", children, txstats.outXonSent, "XonSent"); BGE_SYSCTL_STAT(sc, ctx, "XOFF Sent", children, txstats.outXoffSent, "XoffSent"); BGE_SYSCTL_STAT(sc, ctx, "Flow Control Done", children, txstats.flowControlDone, "flowControlDone"); BGE_SYSCTL_STAT(sc, ctx, "Internal MAC TX errors", children, txstats.dot3StatsInternalMacTransmitErrors, "InternalMacTransmitErrors"); BGE_SYSCTL_STAT(sc, ctx, "Single Collision Frames", children, txstats.dot3StatsSingleCollisionFrames, "SingleCollisionFrames"); BGE_SYSCTL_STAT(sc, ctx, "Multiple Collision Frames", children, txstats.dot3StatsMultipleCollisionFrames, "MultipleCollisionFrames"); BGE_SYSCTL_STAT(sc, ctx, "Deferred Transmissions", children, txstats.dot3StatsDeferredTransmissions, "DeferredTransmissions"); BGE_SYSCTL_STAT(sc, ctx, "Excessive Collisions", children, txstats.dot3StatsExcessiveCollisions, "ExcessiveCollisions"); BGE_SYSCTL_STAT(sc, ctx, "Late Collisions", children, txstats.dot3StatsLateCollisions, "LateCollisions"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Unicast Packets", children, txstats.ifHCOutUcastPkts, "UnicastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Multicast Packets", children, txstats.ifHCOutMulticastPkts, "MulticastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Broadcast Packets", children, txstats.ifHCOutBroadcastPkts, "BroadcastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Carrier Sense Errors", children, txstats.dot3StatsCarrierSenseErrors, "CarrierSenseErrors"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Discards", children, txstats.ifOutDiscards, "Discards"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Errors", children, txstats.ifOutErrors, "Errors"); } #undef BGE_SYSCTL_STAT #define BGE_SYSCTL_STAT_ADD64(c, h, n, p, d) \ SYSCTL_ADD_UQUAD(c, h, OID_AUTO, n, CTLFLAG_RD, p, d) static void bge_add_sysctl_stats_regs(struct bge_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent) { struct sysctl_oid *tree; struct sysctl_oid_list *child, *schild; struct bge_mac_stats *stats; stats = &sc->bge_mac_stats; tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "stats", CTLFLAG_RD, NULL, "BGE Statistics"); schild = child = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT_ADD64(ctx, child, "FramesDroppedDueToFilters", &stats->FramesDroppedDueToFilters, "Frames Dropped Due to Filters"); BGE_SYSCTL_STAT_ADD64(ctx, child, "DmaWriteQueueFull", &stats->DmaWriteQueueFull, "NIC DMA Write Queue Full"); BGE_SYSCTL_STAT_ADD64(ctx, child, "DmaWriteHighPriQueueFull", &stats->DmaWriteHighPriQueueFull, "NIC DMA Write High Priority Queue Full"); BGE_SYSCTL_STAT_ADD64(ctx, child, "NoMoreRxBDs", &stats->NoMoreRxBDs, "NIC No More RX Buffer Descriptors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "InputDiscards", &stats->InputDiscards, "Discarded Input Frames"); BGE_SYSCTL_STAT_ADD64(ctx, child, "InputErrors", &stats->InputErrors, "Input Errors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "RecvThresholdHit", &stats->RecvThresholdHit, "NIC Recv Threshold Hit"); tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "rx", CTLFLAG_RD, NULL, "BGE RX Statistics"); child = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT_ADD64(ctx, child, "ifHCInOctets", &stats->ifHCInOctets, "Inbound Octets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "Fragments", &stats->etherStatsFragments, "Fragments"); BGE_SYSCTL_STAT_ADD64(ctx, child, "UnicastPkts", &stats->ifHCInUcastPkts, "Inbound Unicast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "MulticastPkts", &stats->ifHCInMulticastPkts, "Inbound Multicast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "BroadcastPkts", &stats->ifHCInBroadcastPkts, "Inbound Broadcast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "FCSErrors", &stats->dot3StatsFCSErrors, "FCS Errors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "AlignmentErrors", &stats->dot3StatsAlignmentErrors, "Alignment Errors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "xonPauseFramesReceived", &stats->xonPauseFramesReceived, "XON Pause Frames Received"); BGE_SYSCTL_STAT_ADD64(ctx, child, "xoffPauseFramesReceived", &stats->xoffPauseFramesReceived, "XOFF Pause Frames Received"); BGE_SYSCTL_STAT_ADD64(ctx, child, "ControlFramesReceived", &stats->macControlFramesReceived, "MAC Control Frames Received"); BGE_SYSCTL_STAT_ADD64(ctx, child, "xoffStateEntered", &stats->xoffStateEntered, "XOFF State Entered"); BGE_SYSCTL_STAT_ADD64(ctx, child, "FramesTooLong", &stats->dot3StatsFramesTooLong, "Frames Too Long"); BGE_SYSCTL_STAT_ADD64(ctx, child, "Jabbers", &stats->etherStatsJabbers, "Jabbers"); BGE_SYSCTL_STAT_ADD64(ctx, child, "UndersizePkts", &stats->etherStatsUndersizePkts, "Undersized Packets"); tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "tx", CTLFLAG_RD, NULL, "BGE TX Statistics"); child = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT_ADD64(ctx, child, "ifHCOutOctets", &stats->ifHCOutOctets, "Outbound Octets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "Collisions", &stats->etherStatsCollisions, "TX Collisions"); BGE_SYSCTL_STAT_ADD64(ctx, child, "XonSent", &stats->outXonSent, "XON Sent"); BGE_SYSCTL_STAT_ADD64(ctx, child, "XoffSent", &stats->outXoffSent, "XOFF Sent"); BGE_SYSCTL_STAT_ADD64(ctx, child, "InternalMacTransmitErrors", &stats->dot3StatsInternalMacTransmitErrors, "Internal MAC TX Errors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "SingleCollisionFrames", &stats->dot3StatsSingleCollisionFrames, "Single Collision Frames"); BGE_SYSCTL_STAT_ADD64(ctx, child, "MultipleCollisionFrames", &stats->dot3StatsMultipleCollisionFrames, "Multiple Collision Frames"); BGE_SYSCTL_STAT_ADD64(ctx, child, "DeferredTransmissions", &stats->dot3StatsDeferredTransmissions, "Deferred Transmissions"); BGE_SYSCTL_STAT_ADD64(ctx, child, "ExcessiveCollisions", &stats->dot3StatsExcessiveCollisions, "Excessive Collisions"); BGE_SYSCTL_STAT_ADD64(ctx, child, "LateCollisions", &stats->dot3StatsLateCollisions, "Late Collisions"); BGE_SYSCTL_STAT_ADD64(ctx, child, "UnicastPkts", &stats->ifHCOutUcastPkts, "Outbound Unicast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "MulticastPkts", &stats->ifHCOutMulticastPkts, "Outbound Multicast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "BroadcastPkts", &stats->ifHCOutBroadcastPkts, "Outbound Broadcast Packets"); } #undef BGE_SYSCTL_STAT_ADD64 static int bge_sysctl_stats(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; uint32_t result; int offset; sc = (struct bge_softc *)arg1; offset = arg2; result = CSR_READ_4(sc, BGE_MEMWIN_START + BGE_STATS_BLOCK + offset + offsetof(bge_hostaddr, bge_addr_lo)); return (sysctl_handle_int(oidp, &result, 0, req)); } #ifdef BGE_REGISTER_DEBUG static int bge_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; uint16_t *sbdata; int error, result, sbsz; int i, j; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || (req->newptr == NULL)) return (error); if (result == 1) { sc = (struct bge_softc *)arg1; if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_C0) sbsz = BGE_STATUS_BLK_SZ; else sbsz = 32; sbdata = (uint16_t *)sc->bge_ldata.bge_status_block; printf("Status Block:\n"); BGE_LOCK(sc); bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (i = 0x0; i < sbsz / sizeof(uint16_t); ) { printf("%06x:", i); for (j = 0; j < 8; j++) printf(" %04x", sbdata[i++]); printf("\n"); } printf("Registers:\n"); for (i = 0x800; i < 0xA00; ) { printf("%06x:", i); for (j = 0; j < 8; j++) { printf(" %08x", CSR_READ_4(sc, i)); i += 4; } printf("\n"); } BGE_UNLOCK(sc); printf("Hardware Flags:\n"); if (BGE_IS_5717_PLUS(sc)) printf(" - 5717 Plus\n"); if (BGE_IS_5755_PLUS(sc)) printf(" - 5755 Plus\n"); if (BGE_IS_575X_PLUS(sc)) printf(" - 575X Plus\n"); if (BGE_IS_5705_PLUS(sc)) printf(" - 5705 Plus\n"); if (BGE_IS_5714_FAMILY(sc)) printf(" - 5714 Family\n"); if (BGE_IS_5700_FAMILY(sc)) printf(" - 5700 Family\n"); if (sc->bge_flags & BGE_FLAG_JUMBO) printf(" - Supports Jumbo Frames\n"); if (sc->bge_flags & BGE_FLAG_PCIX) printf(" - PCI-X Bus\n"); if (sc->bge_flags & BGE_FLAG_PCIE) printf(" - PCI Express Bus\n"); if (sc->bge_phy_flags & BGE_PHY_NO_3LED) printf(" - No 3 LEDs\n"); if (sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) printf(" - RX Alignment Bug\n"); } return (error); } static int bge_sysctl_reg_read(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; int error; uint16_t result; uint32_t val; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || (req->newptr == NULL)) return (error); if (result < 0x8000) { sc = (struct bge_softc *)arg1; val = CSR_READ_4(sc, result); printf("reg 0x%06X = 0x%08X\n", result, val); } return (error); } static int bge_sysctl_ape_read(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; int error; uint16_t result; uint32_t val; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || (req->newptr == NULL)) return (error); if (result < 0x8000) { sc = (struct bge_softc *)arg1; val = APE_READ_4(sc, result); printf("reg 0x%06X = 0x%08X\n", result, val); } return (error); } static int bge_sysctl_mem_read(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; int error; uint16_t result; uint32_t val; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || (req->newptr == NULL)) return (error); if (result < 0x8000) { sc = (struct bge_softc *)arg1; val = bge_readmem_ind(sc, result); printf("mem 0x%06X = 0x%08X\n", result, val); } return (error); } #endif static int bge_get_eaddr_fw(struct bge_softc *sc, uint8_t ether_addr[]) { if (sc->bge_flags & BGE_FLAG_EADDR) return (1); #ifdef __sparc64__ OF_getetheraddr(sc->bge_dev, ether_addr); return (0); #endif return (1); } static int bge_get_eaddr_mem(struct bge_softc *sc, uint8_t ether_addr[]) { uint32_t mac_addr; mac_addr = bge_readmem_ind(sc, BGE_SRAM_MAC_ADDR_HIGH_MB); if ((mac_addr >> 16) == 0x484b) { ether_addr[0] = (uint8_t)(mac_addr >> 8); ether_addr[1] = (uint8_t)mac_addr; mac_addr = bge_readmem_ind(sc, BGE_SRAM_MAC_ADDR_LOW_MB); ether_addr[2] = (uint8_t)(mac_addr >> 24); ether_addr[3] = (uint8_t)(mac_addr >> 16); ether_addr[4] = (uint8_t)(mac_addr >> 8); ether_addr[5] = (uint8_t)mac_addr; return (0); } return (1); } static int bge_get_eaddr_nvram(struct bge_softc *sc, uint8_t ether_addr[]) { int mac_offset = BGE_EE_MAC_OFFSET; if (sc->bge_asicrev == BGE_ASICREV_BCM5906) mac_offset = BGE_EE_MAC_OFFSET_5906; return (bge_read_nvram(sc, ether_addr, mac_offset + 2, ETHER_ADDR_LEN)); } static int bge_get_eaddr_eeprom(struct bge_softc *sc, uint8_t ether_addr[]) { if (sc->bge_asicrev == BGE_ASICREV_BCM5906) return (1); return (bge_read_eeprom(sc, ether_addr, BGE_EE_MAC_OFFSET + 2, ETHER_ADDR_LEN)); } static int bge_get_eaddr(struct bge_softc *sc, uint8_t eaddr[]) { static const bge_eaddr_fcn_t bge_eaddr_funcs[] = { /* NOTE: Order is critical */ bge_get_eaddr_fw, bge_get_eaddr_mem, bge_get_eaddr_nvram, bge_get_eaddr_eeprom, NULL }; const bge_eaddr_fcn_t *func; for (func = bge_eaddr_funcs; *func != NULL; ++func) { if ((*func)(sc, eaddr) == 0) break; } return (*func == NULL ? ENXIO : 0); } static uint64_t bge_get_counter(if_t ifp, ift_counter cnt) { struct bge_softc *sc; struct bge_mac_stats *stats; sc = if_getsoftc(ifp); if (!BGE_IS_5705_PLUS(sc)) return (if_get_counter_default(ifp, cnt)); stats = &sc->bge_mac_stats; switch (cnt) { case IFCOUNTER_IERRORS: return (stats->NoMoreRxBDs + stats->InputDiscards + stats->InputErrors); case IFCOUNTER_COLLISIONS: return (stats->etherStatsCollisions); default: return (if_get_counter_default(ifp, cnt)); } } #ifdef NETDUMP static void -bge_netdump_init(if_t ifp, int *nmbufp, int *nclustp) +bge_netdump_init(if_t ifp, int *nrxr) { struct bge_softc *sc; sc = if_getsoftc(ifp); - *nmbufp += sc->bge_return_ring_cnt; - *nclustp += sc->bge_return_ring_cnt; + *nrxr = sc->bge_return_ring_cnt; } static void bge_netdump_event(if_t ifp, enum netdump_ev event) { struct bge_softc *sc; sc = if_getsoftc(ifp); switch (event) { case NETDUMP_START: sc->bge_flags &= ~BGE_FLAG_JUMBO_STD; break; default: break; } } static int bge_netdump_transmit(if_t ifp, struct mbuf *m) { struct bge_softc *sc; uint32_t prodidx; int error; + sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (1); - sc = if_getsoftc(ifp); prodidx = sc->bge_tx_prodidx; error = bge_encap(sc, &m, &prodidx); if (error == 0) bge_start_tx(sc, prodidx); return (error); } static int bge_netdump_poll(if_t ifp, int count) { struct bge_softc *sc; uint32_t rx_prod, tx_cons; sc = if_getsoftc(ifp); - if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (1); bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); rx_prod = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; tx_cons = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); (void)bge_rxeof(sc, rx_prod, 0); bge_txeof(sc, tx_cons); return (0); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/dev/re/if_re.c =================================================================== --- user/markj/netdump/sys/dev/re/if_re.c (revision 331674) +++ user/markj/netdump/sys/dev/re/if_re.c (revision 331675) @@ -1,4160 +1,4155 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1997, 1998-2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver * * Written by Bill Paul * Senior Networking Software Engineer * Wind River Systems */ /* * This driver is designed to support RealTek's next generation of * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S, * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E. * * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible * with the older 8139 family, however it also supports a special * C+ mode of operation that provides several new performance enhancing * features. These include: * * o Descriptor based DMA mechanism. Each descriptor represents * a single packet fragment. Data buffers may be aligned on * any byte boundary. * * o 64-bit DMA * * o TCP/IP checksum offload for both RX and TX * * o High and normal priority transmit DMA rings * * o VLAN tag insertion and extraction * * o TCP large send (segmentation offload) * * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+ * programming API is fairly straightforward. The RX filtering, EEPROM * access and PHY access is the same as it is on the older 8139 series * chips. * * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the * same programming API and feature set as the 8139C+ with the following * differences and additions: * * o 1000Mbps mode * * o Jumbo frames * * o GMII and TBI ports/registers for interfacing with copper * or fiber PHYs * * o RX and TX DMA rings can have up to 1024 descriptors * (the 8139C+ allows a maximum of 64) * * o Slight differences in register layout from the 8139C+ * * The TX start and timer interrupt registers are at different locations * on the 8169 than they are on the 8139C+. Also, the status word in the * RX descriptor has a slightly different bit layout. The 8169 does not * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska' * copper gigE PHY. * * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs * (the 'S' stands for 'single-chip'). These devices have the same * programming API as the older 8169, but also have some vendor-specific * registers for the on-board PHY. The 8110S is a LAN-on-motherboard * part designed to be pin-compatible with the RealTek 8100 10/100 chip. * * This driver takes advantage of the RX and TX checksum offload and * VLAN tag insertion/extraction features. It also implements TX * interrupt moderation using the timer interrupt registers, which * significantly reduces TX interrupt load. There is also support * for jumbo frames, however the 8169/8169S/8110S can not transmit * jumbo frames larger than 7440, so the max MTU possible with this * driver is 7422 bytes. */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_DEPEND(re, pci, 1, 1, 1); MODULE_DEPEND(re, ether, 1, 1, 1); MODULE_DEPEND(re, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" /* Tunables. */ static int intr_filter = 0; TUNABLE_INT("hw.re.intr_filter", &intr_filter); static int msi_disable = 0; TUNABLE_INT("hw.re.msi_disable", &msi_disable); static int msix_disable = 0; TUNABLE_INT("hw.re.msix_disable", &msix_disable); static int prefer_iomap = 0; TUNABLE_INT("hw.re.prefer_iomap", &prefer_iomap); #define RE_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP) /* * Various supported device vendors/types and their names. */ static const struct rl_type re_devs[] = { { DLINK_VENDORID, DLINK_DEVICEID_528T, 0, "D-Link DGE-528(T) Gigabit Ethernet Adapter" }, { DLINK_VENDORID, DLINK_DEVICEID_530T_REVC, 0, "D-Link DGE-530(T) Gigabit Ethernet Adapter" }, { RT_VENDORID, RT_DEVICEID_8139, 0, "RealTek 8139C+ 10/100BaseTX" }, { RT_VENDORID, RT_DEVICEID_8101E, 0, "RealTek 810xE PCIe 10/100baseTX" }, { RT_VENDORID, RT_DEVICEID_8168, 0, "RealTek 8168/8111 B/C/CP/D/DP/E/F/G PCIe Gigabit Ethernet" }, { NCUBE_VENDORID, RT_DEVICEID_8168, 0, "TP-Link TG-3468 v2 (RTL8168) Gigabit Ethernet" }, { RT_VENDORID, RT_DEVICEID_8169, 0, "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" }, { RT_VENDORID, RT_DEVICEID_8169SC, 0, "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" }, { COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0, "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" }, { LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0, "Linksys EG1032 (RTL8169S) Gigabit Ethernet" }, { USR_VENDORID, USR_DEVICEID_997902, 0, "US Robotics 997902 (RTL8169S) Gigabit Ethernet" } }; static const struct rl_hwrev re_hwrevs[] = { { RL_HWREV_8139, RL_8139, "", RL_MTU }, { RL_HWREV_8139A, RL_8139, "A", RL_MTU }, { RL_HWREV_8139AG, RL_8139, "A-G", RL_MTU }, { RL_HWREV_8139B, RL_8139, "B", RL_MTU }, { RL_HWREV_8130, RL_8139, "8130", RL_MTU }, { RL_HWREV_8139C, RL_8139, "C", RL_MTU }, { RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C", RL_MTU }, { RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+", RL_MTU }, { RL_HWREV_8168B_SPIN1, RL_8169, "8168", RL_JUMBO_MTU }, { RL_HWREV_8169, RL_8169, "8169", RL_JUMBO_MTU }, { RL_HWREV_8169S, RL_8169, "8169S", RL_JUMBO_MTU }, { RL_HWREV_8110S, RL_8169, "8110S", RL_JUMBO_MTU }, { RL_HWREV_8169_8110SB, RL_8169, "8169SB/8110SB", RL_JUMBO_MTU }, { RL_HWREV_8169_8110SC, RL_8169, "8169SC/8110SC", RL_JUMBO_MTU }, { RL_HWREV_8169_8110SBL, RL_8169, "8169SBL/8110SBL", RL_JUMBO_MTU }, { RL_HWREV_8169_8110SCE, RL_8169, "8169SC/8110SC", RL_JUMBO_MTU }, { RL_HWREV_8100, RL_8139, "8100", RL_MTU }, { RL_HWREV_8101, RL_8139, "8101", RL_MTU }, { RL_HWREV_8100E, RL_8169, "8100E", RL_MTU }, { RL_HWREV_8101E, RL_8169, "8101E", RL_MTU }, { RL_HWREV_8102E, RL_8169, "8102E", RL_MTU }, { RL_HWREV_8102EL, RL_8169, "8102EL", RL_MTU }, { RL_HWREV_8102EL_SPIN1, RL_8169, "8102EL", RL_MTU }, { RL_HWREV_8103E, RL_8169, "8103E", RL_MTU }, { RL_HWREV_8401E, RL_8169, "8401E", RL_MTU }, { RL_HWREV_8402, RL_8169, "8402", RL_MTU }, { RL_HWREV_8105E, RL_8169, "8105E", RL_MTU }, { RL_HWREV_8105E_SPIN1, RL_8169, "8105E", RL_MTU }, { RL_HWREV_8106E, RL_8169, "8106E", RL_MTU }, { RL_HWREV_8168B_SPIN2, RL_8169, "8168", RL_JUMBO_MTU }, { RL_HWREV_8168B_SPIN3, RL_8169, "8168", RL_JUMBO_MTU }, { RL_HWREV_8168C, RL_8169, "8168C/8111C", RL_JUMBO_MTU_6K }, { RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C", RL_JUMBO_MTU_6K }, { RL_HWREV_8168CP, RL_8169, "8168CP/8111CP", RL_JUMBO_MTU_6K }, { RL_HWREV_8168D, RL_8169, "8168D/8111D", RL_JUMBO_MTU_9K }, { RL_HWREV_8168DP, RL_8169, "8168DP/8111DP", RL_JUMBO_MTU_9K }, { RL_HWREV_8168E, RL_8169, "8168E/8111E", RL_JUMBO_MTU_9K}, { RL_HWREV_8168E_VL, RL_8169, "8168E/8111E-VL", RL_JUMBO_MTU_6K}, { RL_HWREV_8168EP, RL_8169, "8168EP/8111EP", RL_JUMBO_MTU_9K}, { RL_HWREV_8168F, RL_8169, "8168F/8111F", RL_JUMBO_MTU_9K}, { RL_HWREV_8168G, RL_8169, "8168G/8111G", RL_JUMBO_MTU_9K}, { RL_HWREV_8168GU, RL_8169, "8168GU/8111GU", RL_JUMBO_MTU_9K}, { RL_HWREV_8168H, RL_8169, "8168H/8111H", RL_JUMBO_MTU_9K}, { RL_HWREV_8411, RL_8169, "8411", RL_JUMBO_MTU_9K}, { RL_HWREV_8411B, RL_8169, "8411B", RL_JUMBO_MTU_9K}, { 0, 0, NULL, 0 } }; static int re_probe (device_t); static int re_attach (device_t); static int re_detach (device_t); static int re_encap (struct rl_softc *, struct mbuf **); static void re_dma_map_addr (void *, bus_dma_segment_t *, int, int); static int re_allocmem (device_t, struct rl_softc *); static __inline void re_discard_rxbuf (struct rl_softc *, int); static int re_newbuf (struct rl_softc *, int); static int re_jumbo_newbuf (struct rl_softc *, int); static int re_rx_list_init (struct rl_softc *); static int re_jrx_list_init (struct rl_softc *); static int re_tx_list_init (struct rl_softc *); #ifdef RE_FIXUP_RX static __inline void re_fixup_rx (struct mbuf *); #endif static int re_rxeof (struct rl_softc *, int *); static void re_txeof (struct rl_softc *); #ifdef DEVICE_POLLING static int re_poll (struct ifnet *, enum poll_cmd, int); static int re_poll_locked (struct ifnet *, enum poll_cmd, int); #endif static int re_intr (void *); static void re_intr_msi (void *); static void re_tick (void *); static void re_int_task (void *, int); static void re_start (struct ifnet *); static void re_start_locked (struct ifnet *); static void re_start_tx (struct rl_softc *); static int re_ioctl (struct ifnet *, u_long, caddr_t); static void re_init (void *); static void re_init_locked (struct rl_softc *); static void re_stop (struct rl_softc *); static void re_watchdog (struct rl_softc *); static int re_suspend (device_t); static int re_resume (device_t); static int re_shutdown (device_t); static int re_ifmedia_upd (struct ifnet *); static void re_ifmedia_sts (struct ifnet *, struct ifmediareq *); static void re_eeprom_putbyte (struct rl_softc *, int); static void re_eeprom_getword (struct rl_softc *, int, u_int16_t *); static void re_read_eeprom (struct rl_softc *, caddr_t, int, int); static int re_gmii_readreg (device_t, int, int); static int re_gmii_writereg (device_t, int, int, int); static int re_miibus_readreg (device_t, int, int); static int re_miibus_writereg (device_t, int, int, int); static void re_miibus_statchg (device_t); static void re_set_jumbo (struct rl_softc *, int); static void re_set_rxmode (struct rl_softc *); static void re_reset (struct rl_softc *); static void re_setwol (struct rl_softc *); static void re_clrwol (struct rl_softc *); static void re_set_linkspeed (struct rl_softc *); NETDUMP_DEFINE(re); #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include MODULE_DEPEND(re, netmap, 1, 1, 1); #endif /* !DEV_NETMAP */ #ifdef RE_DIAG static int re_diag (struct rl_softc *); #endif static void re_add_sysctls (struct rl_softc *); static int re_sysctl_stats (SYSCTL_HANDLER_ARGS); static int sysctl_int_range (SYSCTL_HANDLER_ARGS, int, int); static int sysctl_hw_re_int_mod (SYSCTL_HANDLER_ARGS); static device_method_t re_methods[] = { /* Device interface */ DEVMETHOD(device_probe, re_probe), DEVMETHOD(device_attach, re_attach), DEVMETHOD(device_detach, re_detach), DEVMETHOD(device_suspend, re_suspend), DEVMETHOD(device_resume, re_resume), DEVMETHOD(device_shutdown, re_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, re_miibus_readreg), DEVMETHOD(miibus_writereg, re_miibus_writereg), DEVMETHOD(miibus_statchg, re_miibus_statchg), DEVMETHOD_END }; static driver_t re_driver = { "re", re_methods, sizeof(struct rl_softc) }; static devclass_t re_devclass; DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0); DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0); #define EE_SET(x) \ CSR_WRITE_1(sc, RL_EECMD, \ CSR_READ_1(sc, RL_EECMD) | x) #define EE_CLR(x) \ CSR_WRITE_1(sc, RL_EECMD, \ CSR_READ_1(sc, RL_EECMD) & ~x) /* * Send a read command and address to the EEPROM, check for ACK. */ static void re_eeprom_putbyte(struct rl_softc *sc, int addr) { int d, i; d = addr | (RL_9346_READ << sc->rl_eewidth); /* * Feed in each bit and strobe the clock. */ for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) { if (d & i) { EE_SET(RL_EE_DATAIN); } else { EE_CLR(RL_EE_DATAIN); } DELAY(100); EE_SET(RL_EE_CLK); DELAY(150); EE_CLR(RL_EE_CLK); DELAY(100); } } /* * Read a word of data stored in the EEPROM at address 'addr.' */ static void re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest) { int i; u_int16_t word = 0; /* * Send address of word we want to read. */ re_eeprom_putbyte(sc, addr); /* * Start reading bits from EEPROM. */ for (i = 0x8000; i; i >>= 1) { EE_SET(RL_EE_CLK); DELAY(100); if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT) word |= i; EE_CLR(RL_EE_CLK); DELAY(100); } *dest = word; } /* * Read a sequence of words from the EEPROM. */ static void re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt) { int i; u_int16_t word = 0, *ptr; CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM); DELAY(100); for (i = 0; i < cnt; i++) { CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL); re_eeprom_getword(sc, off + i, &word); CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL); ptr = (u_int16_t *)(dest + (i * 2)); *ptr = word; } CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM); } static int re_gmii_readreg(device_t dev, int phy, int reg) { struct rl_softc *sc; u_int32_t rval; int i; sc = device_get_softc(dev); /* Let the rgephy driver read the GMEDIASTAT register */ if (reg == RL_GMEDIASTAT) { rval = CSR_READ_1(sc, RL_GMEDIASTAT); return (rval); } CSR_WRITE_4(sc, RL_PHYAR, reg << 16); for (i = 0; i < RL_PHY_TIMEOUT; i++) { rval = CSR_READ_4(sc, RL_PHYAR); if (rval & RL_PHYAR_BUSY) break; DELAY(25); } if (i == RL_PHY_TIMEOUT) { device_printf(sc->rl_dev, "PHY read failed\n"); return (0); } /* * Controller requires a 20us delay to process next MDIO request. */ DELAY(20); return (rval & RL_PHYAR_PHYDATA); } static int re_gmii_writereg(device_t dev, int phy, int reg, int data) { struct rl_softc *sc; u_int32_t rval; int i; sc = device_get_softc(dev); CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) | (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY); for (i = 0; i < RL_PHY_TIMEOUT; i++) { rval = CSR_READ_4(sc, RL_PHYAR); if (!(rval & RL_PHYAR_BUSY)) break; DELAY(25); } if (i == RL_PHY_TIMEOUT) { device_printf(sc->rl_dev, "PHY write failed\n"); return (0); } /* * Controller requires a 20us delay to process next MDIO request. */ DELAY(20); return (0); } static int re_miibus_readreg(device_t dev, int phy, int reg) { struct rl_softc *sc; u_int16_t rval = 0; u_int16_t re8139_reg = 0; sc = device_get_softc(dev); if (sc->rl_type == RL_8169) { rval = re_gmii_readreg(dev, phy, reg); return (rval); } switch (reg) { case MII_BMCR: re8139_reg = RL_BMCR; break; case MII_BMSR: re8139_reg = RL_BMSR; break; case MII_ANAR: re8139_reg = RL_ANAR; break; case MII_ANER: re8139_reg = RL_ANER; break; case MII_ANLPAR: re8139_reg = RL_LPAR; break; case MII_PHYIDR1: case MII_PHYIDR2: return (0); /* * Allow the rlphy driver to read the media status * register. If we have a link partner which does not * support NWAY, this is the register which will tell * us the results of parallel detection. */ case RL_MEDIASTAT: rval = CSR_READ_1(sc, RL_MEDIASTAT); return (rval); default: device_printf(sc->rl_dev, "bad phy register\n"); return (0); } rval = CSR_READ_2(sc, re8139_reg); if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) { /* 8139C+ has different bit layout. */ rval &= ~(BMCR_LOOP | BMCR_ISO); } return (rval); } static int re_miibus_writereg(device_t dev, int phy, int reg, int data) { struct rl_softc *sc; u_int16_t re8139_reg = 0; int rval = 0; sc = device_get_softc(dev); if (sc->rl_type == RL_8169) { rval = re_gmii_writereg(dev, phy, reg, data); return (rval); } switch (reg) { case MII_BMCR: re8139_reg = RL_BMCR; if (sc->rl_type == RL_8139CPLUS) { /* 8139C+ has different bit layout. */ data &= ~(BMCR_LOOP | BMCR_ISO); } break; case MII_BMSR: re8139_reg = RL_BMSR; break; case MII_ANAR: re8139_reg = RL_ANAR; break; case MII_ANER: re8139_reg = RL_ANER; break; case MII_ANLPAR: re8139_reg = RL_LPAR; break; case MII_PHYIDR1: case MII_PHYIDR2: return (0); break; default: device_printf(sc->rl_dev, "bad phy register\n"); return (0); } CSR_WRITE_2(sc, re8139_reg, data); return (0); } static void re_miibus_statchg(device_t dev) { struct rl_softc *sc; struct ifnet *ifp; struct mii_data *mii; sc = device_get_softc(dev); mii = device_get_softc(sc->rl_miibus); ifp = sc->rl_ifp; if (mii == NULL || ifp == NULL || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; sc->rl_flags &= ~RL_FLAG_LINK; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: sc->rl_flags |= RL_FLAG_LINK; break; case IFM_1000_T: if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0) break; sc->rl_flags |= RL_FLAG_LINK; break; default: break; } } /* * RealTek controllers do not provide any interface to the RX/TX * MACs for resolved speed, duplex and flow-control parameters. */ } /* * Set the RX configuration and 64-bit multicast hash filter. */ static void re_set_rxmode(struct rl_softc *sc) { struct ifnet *ifp; struct ifmultiaddr *ifma; uint32_t hashes[2] = { 0, 0 }; uint32_t h, rxfilt; RL_LOCK_ASSERT(sc); ifp = sc->rl_ifp; rxfilt = RL_RXCFG_CONFIG | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_BROAD; if ((sc->rl_flags & RL_FLAG_EARLYOFF) != 0) rxfilt |= RL_RXCFG_EARLYOFF; else if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) rxfilt |= RL_RXCFG_EARLYOFFV2; if (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) { if (ifp->if_flags & IFF_PROMISC) rxfilt |= RL_RXCFG_RX_ALLPHYS; /* * Unlike other hardwares, we have to explicitly set * RL_RXCFG_RX_MULTI to receive multicast frames in * promiscuous mode. */ rxfilt |= RL_RXCFG_RX_MULTI; hashes[0] = hashes[1] = 0xffffffff; goto done; } if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; h = ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) >> 26; if (h < 32) hashes[0] |= (1 << h); else hashes[1] |= (1 << (h - 32)); } if_maddr_runlock(ifp); if (hashes[0] != 0 || hashes[1] != 0) { /* * For some unfathomable reason, RealTek decided to * reverse the order of the multicast hash registers * in the PCI Express parts. This means we have to * write the hash pattern in reverse order for those * devices. */ if ((sc->rl_flags & RL_FLAG_PCIE) != 0) { h = bswap32(hashes[0]); hashes[0] = bswap32(hashes[1]); hashes[1] = h; } rxfilt |= RL_RXCFG_RX_MULTI; } if (sc->rl_hwrev->rl_rev == RL_HWREV_8168F) { /* Disable multicast filtering due to silicon bug. */ hashes[0] = 0xffffffff; hashes[1] = 0xffffffff; } done: CSR_WRITE_4(sc, RL_MAR0, hashes[0]); CSR_WRITE_4(sc, RL_MAR4, hashes[1]); CSR_WRITE_4(sc, RL_RXCFG, rxfilt); } static void re_reset(struct rl_softc *sc) { int i; RL_LOCK_ASSERT(sc); CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET); for (i = 0; i < RL_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET)) break; } if (i == RL_TIMEOUT) device_printf(sc->rl_dev, "reset never completed!\n"); if ((sc->rl_flags & RL_FLAG_MACRESET) != 0) CSR_WRITE_1(sc, 0x82, 1); if (sc->rl_hwrev->rl_rev == RL_HWREV_8169S) re_gmii_writereg(sc->rl_dev, 1, 0x0b, 0); } #ifdef RE_DIAG /* * The following routine is designed to test for a defect on some * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64# * lines connected to the bus, however for a 32-bit only card, they * should be pulled high. The result of this defect is that the * NIC will not work right if you plug it into a 64-bit slot: DMA * operations will be done with 64-bit transfers, which will fail * because the 64-bit data lines aren't connected. * * There's no way to work around this (short of talking a soldering * iron to the board), however we can detect it. The method we use * here is to put the NIC into digital loopback mode, set the receiver * to promiscuous mode, and then try to send a frame. We then compare * the frame data we sent to what was received. If the data matches, * then the NIC is working correctly, otherwise we know the user has * a defective NIC which has been mistakenly plugged into a 64-bit PCI * slot. In the latter case, there's no way the NIC can work correctly, * so we print out a message on the console and abort the device attach. */ static int re_diag(struct rl_softc *sc) { struct ifnet *ifp = sc->rl_ifp; struct mbuf *m0; struct ether_header *eh; struct rl_desc *cur_rx; u_int16_t status; u_int32_t rxstat; int total_len, i, error = 0, phyaddr; u_int8_t dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' }; u_int8_t src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' }; /* Allocate a single mbuf */ MGETHDR(m0, M_NOWAIT, MT_DATA); if (m0 == NULL) return (ENOBUFS); RL_LOCK(sc); /* * Initialize the NIC in test mode. This sets the chip up * so that it can send and receive frames, but performs the * following special functions: * - Puts receiver in promiscuous mode * - Enables digital loopback mode * - Leaves interrupts turned off */ ifp->if_flags |= IFF_PROMISC; sc->rl_testmode = 1; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); sc->rl_flags |= RL_FLAG_LINK; if (sc->rl_type == RL_8169) phyaddr = 1; else phyaddr = 0; re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET); for (i = 0; i < RL_TIMEOUT; i++) { status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR); if (!(status & BMCR_RESET)) break; } re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP); CSR_WRITE_2(sc, RL_ISR, RL_INTRS); DELAY(100000); /* Put some data in the mbuf */ eh = mtod(m0, struct ether_header *); bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN); bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN); eh->ether_type = htons(ETHERTYPE_IP); m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN; /* * Queue the packet, start transmission. * Note: IF_HANDOFF() ultimately calls re_start() for us. */ CSR_WRITE_2(sc, RL_ISR, 0xFFFF); RL_UNLOCK(sc); /* XXX: re_diag must not be called when in ALTQ mode */ IF_HANDOFF(&ifp->if_snd, m0, ifp); RL_LOCK(sc); m0 = NULL; /* Wait for it to propagate through the chip */ DELAY(100000); for (i = 0; i < RL_TIMEOUT; i++) { status = CSR_READ_2(sc, RL_ISR); CSR_WRITE_2(sc, RL_ISR, status); if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) == (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) break; DELAY(10); } if (i == RL_TIMEOUT) { device_printf(sc->rl_dev, "diagnostic failed, failed to receive packet in" " loopback mode\n"); error = EIO; goto done; } /* * The packet should have been dumped into the first * entry in the RX DMA ring. Grab it from there. */ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_POSTREAD); bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_desc[0].rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_desc[0].rx_dmamap); m0 = sc->rl_ldata.rl_rx_desc[0].rx_m; sc->rl_ldata.rl_rx_desc[0].rx_m = NULL; eh = mtod(m0, struct ether_header *); cur_rx = &sc->rl_ldata.rl_rx_list[0]; total_len = RL_RXBYTES(cur_rx); rxstat = le32toh(cur_rx->rl_cmdstat); if (total_len != ETHER_MIN_LEN) { device_printf(sc->rl_dev, "diagnostic failed, received short packet\n"); error = EIO; goto done; } /* Test that the received packet data matches what we sent. */ if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) || bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) || ntohs(eh->ether_type) != ETHERTYPE_IP) { device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n"); device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n", dst, ":", src, ":", ETHERTYPE_IP); device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n", eh->ether_dhost, ":", eh->ether_shost, ":", ntohs(eh->ether_type)); device_printf(sc->rl_dev, "You may have a defective 32-bit " "NIC plugged into a 64-bit PCI slot.\n"); device_printf(sc->rl_dev, "Please re-install the NIC in a " "32-bit slot for proper operation.\n"); device_printf(sc->rl_dev, "Read the re(4) man page for more " "details.\n"); error = EIO; } done: /* Turn interface off, release resources */ sc->rl_testmode = 0; sc->rl_flags &= ~RL_FLAG_LINK; ifp->if_flags &= ~IFF_PROMISC; re_stop(sc); if (m0 != NULL) m_freem(m0); RL_UNLOCK(sc); return (error); } #endif /* * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device * IDs against our list and return a device name if we find a match. */ static int re_probe(device_t dev) { const struct rl_type *t; uint16_t devid, vendor; uint16_t revid, sdevid; int i; vendor = pci_get_vendor(dev); devid = pci_get_device(dev); revid = pci_get_revid(dev); sdevid = pci_get_subdevice(dev); if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) { if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) { /* * Only attach to rev. 3 of the Linksys EG1032 adapter. * Rev. 2 is supported by sk(4). */ return (ENXIO); } } if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) { if (revid != 0x20) { /* 8139, let rl(4) take care of this device. */ return (ENXIO); } } t = re_devs; for (i = 0; i < nitems(re_devs); i++, t++) { if (vendor == t->rl_vid && devid == t->rl_did) { device_set_desc(dev, t->rl_name); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } /* * Map a single buffer address. */ static void re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *addr; if (error) return; KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg)); addr = arg; *addr = segs->ds_addr; } static int re_allocmem(device_t dev, struct rl_softc *sc) { bus_addr_t lowaddr; bus_size_t rx_list_size, tx_list_size; int error; int i; rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc); tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc); /* * Allocate the parent bus DMA tag appropriate for PCI. * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD * register should be set. However some RealTek chips are known * to be buggy on DAC handling, therefore disable DAC by limiting * DMA address space to 32bit. PCIe variants of RealTek chips * may not have the limitation. */ lowaddr = BUS_SPACE_MAXADDR; if ((sc->rl_flags & RL_FLAG_PCIE) == 0) lowaddr = BUS_SPACE_MAXADDR_32BIT; error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->rl_parent_tag); if (error) { device_printf(dev, "could not allocate parent DMA tag\n"); return (error); } /* * Allocate map for TX mbufs. */ error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0, NULL, NULL, &sc->rl_ldata.rl_tx_mtag); if (error) { device_printf(dev, "could not allocate TX DMA tag\n"); return (error); } /* * Allocate map for RX mbufs. */ if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM9BYTES, 1, MJUM9BYTES, 0, NULL, NULL, &sc->rl_ldata.rl_jrx_mtag); if (error) { device_printf(dev, "could not allocate jumbo RX DMA tag\n"); return (error); } } error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag); if (error) { device_printf(dev, "could not allocate RX DMA tag\n"); return (error); } /* * Allocate map for TX descriptor list. */ error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, tx_list_size, 1, tx_list_size, 0, NULL, NULL, &sc->rl_ldata.rl_tx_list_tag); if (error) { device_printf(dev, "could not allocate TX DMA ring tag\n"); return (error); } /* Allocate DMA'able memory for the TX ring */ error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag, (void **)&sc->rl_ldata.rl_tx_list, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->rl_ldata.rl_tx_list_map); if (error) { device_printf(dev, "could not allocate TX DMA ring\n"); return (error); } /* Load the map for the TX ring. */ sc->rl_ldata.rl_tx_list_addr = 0; error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list, tx_list_size, re_dma_map_addr, &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT); if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) { device_printf(dev, "could not load TX DMA ring\n"); return (ENOMEM); } /* Create DMA maps for TX buffers */ for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) { error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0, &sc->rl_ldata.rl_tx_desc[i].tx_dmamap); if (error) { device_printf(dev, "could not create DMA map for TX\n"); return (error); } } /* * Allocate map for RX descriptor list. */ error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, rx_list_size, 1, rx_list_size, 0, NULL, NULL, &sc->rl_ldata.rl_rx_list_tag); if (error) { device_printf(dev, "could not create RX DMA ring tag\n"); return (error); } /* Allocate DMA'able memory for the RX ring */ error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag, (void **)&sc->rl_ldata.rl_rx_list, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->rl_ldata.rl_rx_list_map); if (error) { device_printf(dev, "could not allocate RX DMA ring\n"); return (error); } /* Load the map for the RX ring. */ sc->rl_ldata.rl_rx_list_addr = 0; error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list, rx_list_size, re_dma_map_addr, &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT); if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) { device_printf(dev, "could not load RX DMA ring\n"); return (ENOMEM); } /* Create DMA maps for RX buffers */ if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { error = bus_dmamap_create(sc->rl_ldata.rl_jrx_mtag, 0, &sc->rl_ldata.rl_jrx_sparemap); if (error) { device_printf(dev, "could not create spare DMA map for jumbo RX\n"); return (error); } for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { error = bus_dmamap_create(sc->rl_ldata.rl_jrx_mtag, 0, &sc->rl_ldata.rl_jrx_desc[i].rx_dmamap); if (error) { device_printf(dev, "could not create DMA map for jumbo RX\n"); return (error); } } } error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0, &sc->rl_ldata.rl_rx_sparemap); if (error) { device_printf(dev, "could not create spare DMA map for RX\n"); return (error); } for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0, &sc->rl_ldata.rl_rx_desc[i].rx_dmamap); if (error) { device_printf(dev, "could not create DMA map for RX\n"); return (error); } } /* Create DMA map for statistics. */ error = bus_dma_tag_create(sc->rl_parent_tag, RL_DUMP_ALIGN, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct rl_stats), 1, sizeof(struct rl_stats), 0, NULL, NULL, &sc->rl_ldata.rl_stag); if (error) { device_printf(dev, "could not create statistics DMA tag\n"); return (error); } /* Allocate DMA'able memory for statistics. */ error = bus_dmamem_alloc(sc->rl_ldata.rl_stag, (void **)&sc->rl_ldata.rl_stats, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->rl_ldata.rl_smap); if (error) { device_printf(dev, "could not allocate statistics DMA memory\n"); return (error); } /* Load the map for statistics. */ sc->rl_ldata.rl_stats_addr = 0; error = bus_dmamap_load(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap, sc->rl_ldata.rl_stats, sizeof(struct rl_stats), re_dma_map_addr, &sc->rl_ldata.rl_stats_addr, BUS_DMA_NOWAIT); if (error != 0 || sc->rl_ldata.rl_stats_addr == 0) { device_printf(dev, "could not load statistics DMA memory\n"); return (ENOMEM); } return (0); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int re_attach(device_t dev) { u_char eaddr[ETHER_ADDR_LEN]; u_int16_t as[ETHER_ADDR_LEN / 2]; struct rl_softc *sc; struct ifnet *ifp; const struct rl_hwrev *hw_rev; int capmask, error = 0, hwrev, i, msic, msixc, phy, reg, rid; u_int32_t cap, ctl; u_int16_t devid, re_did = 0; uint8_t cfg; sc = device_get_softc(dev); sc->rl_dev = dev; mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0); /* * Map control/status registers. */ pci_enable_busmaster(dev); devid = pci_get_device(dev); /* * Prefer memory space register mapping over IO space. * Because RTL8169SC does not seem to work when memory mapping * is used always activate io mapping. */ if (devid == RT_DEVICEID_8169SC) prefer_iomap = 1; if (prefer_iomap == 0) { sc->rl_res_id = PCIR_BAR(1); sc->rl_res_type = SYS_RES_MEMORY; /* RTL8168/8101E seems to use different BARs. */ if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E) sc->rl_res_id = PCIR_BAR(2); } else { sc->rl_res_id = PCIR_BAR(0); sc->rl_res_type = SYS_RES_IOPORT; } sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type, &sc->rl_res_id, RF_ACTIVE); if (sc->rl_res == NULL && prefer_iomap == 0) { sc->rl_res_id = PCIR_BAR(0); sc->rl_res_type = SYS_RES_IOPORT; sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type, &sc->rl_res_id, RF_ACTIVE); } if (sc->rl_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto fail; } sc->rl_btag = rman_get_bustag(sc->rl_res); sc->rl_bhandle = rman_get_bushandle(sc->rl_res); msic = pci_msi_count(dev); msixc = pci_msix_count(dev); if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { sc->rl_flags |= RL_FLAG_PCIE; sc->rl_expcap = reg; } if (bootverbose) { device_printf(dev, "MSI count : %d\n", msic); device_printf(dev, "MSI-X count : %d\n", msixc); } if (msix_disable > 0) msixc = 0; if (msi_disable > 0) msic = 0; /* Prefer MSI-X to MSI. */ if (msixc > 0) { msixc = RL_MSI_MESSAGES; rid = PCIR_BAR(4); sc->rl_res_pba = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->rl_res_pba == NULL) { device_printf(sc->rl_dev, "could not allocate MSI-X PBA resource\n"); } if (sc->rl_res_pba != NULL && pci_alloc_msix(dev, &msixc) == 0) { if (msixc == RL_MSI_MESSAGES) { device_printf(dev, "Using %d MSI-X message\n", msixc); sc->rl_flags |= RL_FLAG_MSIX; } else pci_release_msi(dev); } if ((sc->rl_flags & RL_FLAG_MSIX) == 0) { if (sc->rl_res_pba != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->rl_res_pba); sc->rl_res_pba = NULL; msixc = 0; } } /* Prefer MSI to INTx. */ if (msixc == 0 && msic > 0) { msic = RL_MSI_MESSAGES; if (pci_alloc_msi(dev, &msic) == 0) { if (msic == RL_MSI_MESSAGES) { device_printf(dev, "Using %d MSI message\n", msic); sc->rl_flags |= RL_FLAG_MSI; /* Explicitly set MSI enable bit. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); cfg = CSR_READ_1(sc, RL_CFG2); cfg |= RL_CFG2_MSI; CSR_WRITE_1(sc, RL_CFG2, cfg); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); } else pci_release_msi(dev); } if ((sc->rl_flags & RL_FLAG_MSI) == 0) msic = 0; } /* Allocate interrupt */ if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) { rid = 0; sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->rl_irq[0] == NULL) { device_printf(dev, "couldn't allocate IRQ resources\n"); error = ENXIO; goto fail; } } else { for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) { sc->rl_irq[i] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->rl_irq[i] == NULL) { device_printf(dev, "couldn't allocate IRQ resources for " "message %d\n", rid); error = ENXIO; goto fail; } } } if ((sc->rl_flags & RL_FLAG_MSI) == 0) { CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); cfg = CSR_READ_1(sc, RL_CFG2); if ((cfg & RL_CFG2_MSI) != 0) { device_printf(dev, "turning off MSI enable bit.\n"); cfg &= ~RL_CFG2_MSI; CSR_WRITE_1(sc, RL_CFG2, cfg); } CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); } /* Disable ASPM L0S/L1 and CLKREQ. */ if (sc->rl_expcap != 0) { cap = pci_read_config(dev, sc->rl_expcap + PCIER_LINK_CAP, 2); if ((cap & PCIEM_LINK_CAP_ASPM) != 0) { ctl = pci_read_config(dev, sc->rl_expcap + PCIER_LINK_CTL, 2); if ((ctl & (PCIEM_LINK_CTL_ECPM | PCIEM_LINK_CTL_ASPMC))!= 0) { ctl &= ~(PCIEM_LINK_CTL_ECPM | PCIEM_LINK_CTL_ASPMC); pci_write_config(dev, sc->rl_expcap + PCIER_LINK_CTL, ctl, 2); device_printf(dev, "ASPM disabled\n"); } } else device_printf(dev, "no ASPM capability\n"); } hw_rev = re_hwrevs; hwrev = CSR_READ_4(sc, RL_TXCFG); switch (hwrev & 0x70000000) { case 0x00000000: case 0x10000000: device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0xfc800000); hwrev &= (RL_TXCFG_HWREV | 0x80000000); break; default: device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000); sc->rl_macrev = hwrev & 0x00700000; hwrev &= RL_TXCFG_HWREV; break; } device_printf(dev, "MAC rev. 0x%08x\n", sc->rl_macrev); while (hw_rev->rl_desc != NULL) { if (hw_rev->rl_rev == hwrev) { sc->rl_type = hw_rev->rl_type; sc->rl_hwrev = hw_rev; break; } hw_rev++; } if (hw_rev->rl_desc == NULL) { device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev); error = ENXIO; goto fail; } switch (hw_rev->rl_rev) { case RL_HWREV_8139CPLUS: sc->rl_flags |= RL_FLAG_FASTETHER | RL_FLAG_AUTOPAD; break; case RL_HWREV_8100E: case RL_HWREV_8101E: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_FASTETHER; break; case RL_HWREV_8102E: case RL_HWREV_8102EL: case RL_HWREV_8102EL_SPIN1: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD; break; case RL_HWREV_8103E: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_MACSLEEP; break; case RL_HWREV_8401E: case RL_HWREV_8105E: case RL_HWREV_8105E_SPIN1: case RL_HWREV_8106E: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD; break; case RL_HWREV_8402: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_CMDSTOP_WAIT_TXQ; break; case RL_HWREV_8168B_SPIN1: case RL_HWREV_8168B_SPIN2: sc->rl_flags |= RL_FLAG_WOLRXENB; /* FALLTHROUGH */ case RL_HWREV_8168B_SPIN3: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_MACSTAT; break; case RL_HWREV_8168C_SPIN2: sc->rl_flags |= RL_FLAG_MACSLEEP; /* FALLTHROUGH */ case RL_HWREV_8168C: if (sc->rl_macrev == 0x00200000) sc->rl_flags |= RL_FLAG_MACSLEEP; /* FALLTHROUGH */ case RL_HWREV_8168CP: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168D: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168DP: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WAIT_TXPOLL | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168E: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168E_VL: case RL_HWREV_8168F: sc->rl_flags |= RL_FLAG_EARLYOFF; /* FALLTHROUGH */ case RL_HWREV_8411: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168EP: case RL_HWREV_8168G: case RL_HWREV_8411B: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_WOL_MANLINK | RL_FLAG_8168G_PLUS; break; case RL_HWREV_8168GU: case RL_HWREV_8168H: if (pci_get_device(dev) == RT_DEVICEID_8101E) { /* RTL8106E(US), RTL8107E */ sc->rl_flags |= RL_FLAG_FASTETHER; } else sc->rl_flags |= RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK; sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_8168G_PLUS; break; case RL_HWREV_8169_8110SB: case RL_HWREV_8169_8110SBL: case RL_HWREV_8169_8110SC: case RL_HWREV_8169_8110SCE: sc->rl_flags |= RL_FLAG_PHYWAKE; /* FALLTHROUGH */ case RL_HWREV_8169: case RL_HWREV_8169S: case RL_HWREV_8110S: sc->rl_flags |= RL_FLAG_MACRESET; break; default: break; } if (sc->rl_hwrev->rl_rev == RL_HWREV_8139CPLUS) { sc->rl_cfg0 = RL_8139_CFG0; sc->rl_cfg1 = RL_8139_CFG1; sc->rl_cfg2 = 0; sc->rl_cfg3 = RL_8139_CFG3; sc->rl_cfg4 = RL_8139_CFG4; sc->rl_cfg5 = RL_8139_CFG5; } else { sc->rl_cfg0 = RL_CFG0; sc->rl_cfg1 = RL_CFG1; sc->rl_cfg2 = RL_CFG2; sc->rl_cfg3 = RL_CFG3; sc->rl_cfg4 = RL_CFG4; sc->rl_cfg5 = RL_CFG5; } /* Reset the adapter. */ RL_LOCK(sc); re_reset(sc); RL_UNLOCK(sc); /* Enable PME. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); cfg = CSR_READ_1(sc, sc->rl_cfg1); cfg |= RL_CFG1_PME; CSR_WRITE_1(sc, sc->rl_cfg1, cfg); cfg = CSR_READ_1(sc, sc->rl_cfg5); cfg &= RL_CFG5_PME_STS; CSR_WRITE_1(sc, sc->rl_cfg5, cfg); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); if ((sc->rl_flags & RL_FLAG_PAR) != 0) { /* * XXX Should have a better way to extract station * address from EEPROM. */ for (i = 0; i < ETHER_ADDR_LEN; i++) eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i); } else { sc->rl_eewidth = RL_9356_ADDR_LEN; re_read_eeprom(sc, (caddr_t)&re_did, 0, 1); if (re_did != 0x8129) sc->rl_eewidth = RL_9346_ADDR_LEN; /* * Get station address from the EEPROM. */ re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3); for (i = 0; i < ETHER_ADDR_LEN / 2; i++) as[i] = le16toh(as[i]); bcopy(as, eaddr, ETHER_ADDR_LEN); } if (sc->rl_type == RL_8169) { /* Set RX length mask and number of descriptors. */ sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN; sc->rl_txstart = RL_GTXSTART; sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT; sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT; } else { /* Set RX length mask and number of descriptors. */ sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN; sc->rl_txstart = RL_TXSTART; sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT; sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT; } error = re_allocmem(dev, sc); if (error) goto fail; re_add_sysctls(sc); ifp = sc->rl_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } /* Take controller out of deep sleep mode. */ if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) { if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80) CSR_WRITE_1(sc, RL_GPIO, CSR_READ_1(sc, RL_GPIO) | 0x01); else CSR_WRITE_1(sc, RL_GPIO, CSR_READ_1(sc, RL_GPIO) & ~0x01); } /* Take PHY out of power down mode. */ if ((sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0) { CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) | 0x80); if (hw_rev->rl_rev == RL_HWREV_8401E) CSR_WRITE_1(sc, 0xD1, CSR_READ_1(sc, 0xD1) & ~0x08); } if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) { re_gmii_writereg(dev, 1, 0x1f, 0); re_gmii_writereg(dev, 1, 0x0e, 0); } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = re_ioctl; ifp->if_start = re_start; /* * RTL8168/8111C generates wrong IP checksummed frame if the * packet has IP options so disable TX checksum offloading. */ if (sc->rl_hwrev->rl_rev == RL_HWREV_8168C || sc->rl_hwrev->rl_rev == RL_HWREV_8168C_SPIN2 || sc->rl_hwrev->rl_rev == RL_HWREV_8168CP) { ifp->if_hwassist = 0; ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TSO4; } else { ifp->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP; ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_TSO4; } ifp->if_hwassist |= CSUM_TSO; ifp->if_capenable = ifp->if_capabilities; ifp->if_init = re_init; IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN); ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN; IFQ_SET_READY(&ifp->if_snd); TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc); #define RE_PHYAD_INTERNAL 0 /* Do MII setup. */ phy = RE_PHYAD_INTERNAL; if (sc->rl_type == RL_8169) phy = 1; capmask = BMSR_DEFCAPMASK; if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0) capmask &= ~BMSR_EXTSTAT; error = mii_attach(dev, &sc->rl_miibus, ifp, re_ifmedia_upd, re_ifmedia_sts, capmask, phy, MII_OFFSET_ANY, MIIF_DOPAUSE); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); /* VLAN capability setup */ ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; if (ifp->if_capabilities & IFCAP_HWCSUM) ifp->if_capabilities |= IFCAP_VLAN_HWCSUM; /* Enable WOL if PM is supported. */ if (pci_find_cap(sc->rl_dev, PCIY_PMG, ®) == 0) ifp->if_capabilities |= IFCAP_WOL; ifp->if_capenable = ifp->if_capabilities; ifp->if_capenable &= ~(IFCAP_WOL_UCAST | IFCAP_WOL_MCAST); /* * Don't enable TSO by default. It is known to generate * corrupted TCP segments(bad TCP options) under certain * circumstances. */ ifp->if_hwassist &= ~CSUM_TSO; ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO); #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* * Tell the upper layer(s) we support long frames. * Must appear after the call to ether_ifattach() because * ether_ifattach() sets ifi_hdrlen to the default value. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); #ifdef DEV_NETMAP re_netmap_attach(sc); #endif /* DEV_NETMAP */ #ifdef RE_DIAG /* * Perform hardware diagnostic on the original RTL8169. * Some 32-bit cards were incorrectly wired and would * malfunction if plugged into a 64-bit slot. */ if (hwrev == RL_HWREV_8169) { error = re_diag(sc); if (error) { device_printf(dev, "attach aborted due to hardware diag failure\n"); ether_ifdetach(ifp); goto fail; } } #endif #ifdef RE_TX_MODERATION intr_filter = 1; #endif /* Hook interrupt last to avoid having to lock softc */ if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0 && intr_filter == 0) { error = bus_setup_intr(dev, sc->rl_irq[0], INTR_TYPE_NET | INTR_MPSAFE, NULL, re_intr_msi, sc, &sc->rl_intrhand[0]); } else { error = bus_setup_intr(dev, sc->rl_irq[0], INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc, &sc->rl_intrhand[0]); } if (error) { device_printf(dev, "couldn't set up irq\n"); ether_ifdetach(ifp); goto fail; } NETDUMP_SET(ifp, re); fail: if (error) re_detach(dev); return (error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int re_detach(device_t dev) { struct rl_softc *sc; struct ifnet *ifp; int i, rid; sc = device_get_softc(dev); ifp = sc->rl_ifp; KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized")); /* These should only be active if attach succeeded */ if (device_is_attached(dev)) { #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif RL_LOCK(sc); #if 0 sc->suspended = 1; #endif re_stop(sc); RL_UNLOCK(sc); callout_drain(&sc->rl_stat_callout); taskqueue_drain(taskqueue_fast, &sc->rl_inttask); /* * Force off the IFF_UP flag here, in case someone * still had a BPF descriptor attached to this * interface. If they do, ether_ifdetach() will cause * the BPF code to try and clear the promisc mode * flag, which will bubble down to re_ioctl(), * which will try to call re_init() again. This will * turn the NIC back on and restart the MII ticker, * which will panic the system when the kernel tries * to invoke the re_tick() function that isn't there * anymore. */ ifp->if_flags &= ~IFF_UP; ether_ifdetach(ifp); } if (sc->rl_miibus) device_delete_child(dev, sc->rl_miibus); bus_generic_detach(dev); /* * The rest is resource deallocation, so we should already be * stopped here. */ if (sc->rl_intrhand[0] != NULL) { bus_teardown_intr(dev, sc->rl_irq[0], sc->rl_intrhand[0]); sc->rl_intrhand[0] = NULL; } if (ifp != NULL) { #ifdef DEV_NETMAP netmap_detach(ifp); #endif /* DEV_NETMAP */ if_free(ifp); } if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) rid = 0; else rid = 1; if (sc->rl_irq[0] != NULL) { bus_release_resource(dev, SYS_RES_IRQ, rid, sc->rl_irq[0]); sc->rl_irq[0] = NULL; } if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0) pci_release_msi(dev); if (sc->rl_res_pba) { rid = PCIR_BAR(4); bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->rl_res_pba); } if (sc->rl_res) bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id, sc->rl_res); /* Unload and free the RX DMA ring memory and map */ if (sc->rl_ldata.rl_rx_list_tag) { if (sc->rl_ldata.rl_rx_list_addr) bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map); if (sc->rl_ldata.rl_rx_list) bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list, sc->rl_ldata.rl_rx_list_map); bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag); } /* Unload and free the TX DMA ring memory and map */ if (sc->rl_ldata.rl_tx_list_tag) { if (sc->rl_ldata.rl_tx_list_addr) bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map); if (sc->rl_ldata.rl_tx_list) bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list, sc->rl_ldata.rl_tx_list_map); bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag); } /* Destroy all the RX and TX buffer maps */ if (sc->rl_ldata.rl_tx_mtag) { for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) { if (sc->rl_ldata.rl_tx_desc[i].tx_dmamap) bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag, sc->rl_ldata.rl_tx_desc[i].tx_dmamap); } bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag); } if (sc->rl_ldata.rl_rx_mtag) { for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { if (sc->rl_ldata.rl_rx_desc[i].rx_dmamap) bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_desc[i].rx_dmamap); } if (sc->rl_ldata.rl_rx_sparemap) bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_sparemap); bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag); } if (sc->rl_ldata.rl_jrx_mtag) { for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { if (sc->rl_ldata.rl_jrx_desc[i].rx_dmamap) bus_dmamap_destroy(sc->rl_ldata.rl_jrx_mtag, sc->rl_ldata.rl_jrx_desc[i].rx_dmamap); } if (sc->rl_ldata.rl_jrx_sparemap) bus_dmamap_destroy(sc->rl_ldata.rl_jrx_mtag, sc->rl_ldata.rl_jrx_sparemap); bus_dma_tag_destroy(sc->rl_ldata.rl_jrx_mtag); } /* Unload and free the stats buffer and map */ if (sc->rl_ldata.rl_stag) { if (sc->rl_ldata.rl_stats_addr) bus_dmamap_unload(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap); if (sc->rl_ldata.rl_stats) bus_dmamem_free(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_stats, sc->rl_ldata.rl_smap); bus_dma_tag_destroy(sc->rl_ldata.rl_stag); } if (sc->rl_parent_tag) bus_dma_tag_destroy(sc->rl_parent_tag); mtx_destroy(&sc->rl_mtx); return (0); } static __inline void re_discard_rxbuf(struct rl_softc *sc, int idx) { struct rl_desc *desc; struct rl_rxdesc *rxd; uint32_t cmdstat; if (sc->rl_ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0) rxd = &sc->rl_ldata.rl_jrx_desc[idx]; else rxd = &sc->rl_ldata.rl_rx_desc[idx]; desc = &sc->rl_ldata.rl_rx_list[idx]; desc->rl_vlanctl = 0; cmdstat = rxd->rx_size; if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1) cmdstat |= RL_RDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN); } static int re_newbuf(struct rl_softc *sc, int idx) { struct mbuf *m; struct rl_rxdesc *rxd; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct rl_desc *desc; uint32_t cmdstat; int error, nsegs; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MCLBYTES; #ifdef RE_FIXUP_RX /* * This is part of an evil trick to deal with non-x86 platforms. * The RealTek chip requires RX buffers to be aligned on 64-bit * boundaries, but that will hose non-x86 machines. To get around * this, we leave some empty space at the start of each buffer * and for non-x86 hosts, we copy the buffer back six bytes * to achieve word alignment. This is slightly more efficient * than allocating a new buffer, copying the contents, and * discarding the old buffer. */ m_adj(m, RE_ETHER_ALIGN); #endif error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs)); rxd = &sc->rl_ldata.rl_rx_desc[idx]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap); } rxd->rx_m = m; map = rxd->rx_dmamap; rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap; rxd->rx_size = segs[0].ds_len; sc->rl_ldata.rl_rx_sparemap = map; bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); desc = &sc->rl_ldata.rl_rx_list[idx]; desc->rl_vlanctl = 0; desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr)); desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr)); cmdstat = segs[0].ds_len; if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1) cmdstat |= RL_RDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN); return (0); } static int re_jumbo_newbuf(struct rl_softc *sc, int idx) { struct mbuf *m; struct rl_rxdesc *rxd; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct rl_desc *desc; uint32_t cmdstat; int error, nsegs; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MJUM9BYTES; #ifdef RE_FIXUP_RX m_adj(m, RE_ETHER_ALIGN); #endif error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_jrx_mtag, sc->rl_ldata.rl_jrx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs)); rxd = &sc->rl_ldata.rl_jrx_desc[idx]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap); } rxd->rx_m = m; map = rxd->rx_dmamap; rxd->rx_dmamap = sc->rl_ldata.rl_jrx_sparemap; rxd->rx_size = segs[0].ds_len; sc->rl_ldata.rl_jrx_sparemap = map; bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); desc = &sc->rl_ldata.rl_rx_list[idx]; desc->rl_vlanctl = 0; desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr)); desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr)); cmdstat = segs[0].ds_len; if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1) cmdstat |= RL_RDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN); return (0); } #ifdef RE_FIXUP_RX static __inline void re_fixup_rx(struct mbuf *m) { int i; uint16_t *src, *dst; src = mtod(m, uint16_t *); dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src; for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++) *dst++ = *src++; m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN; } #endif static int re_tx_list_init(struct rl_softc *sc) { struct rl_desc *desc; int i; RL_LOCK_ASSERT(sc); bzero(sc->rl_ldata.rl_tx_list, sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc)); for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) sc->rl_ldata.rl_tx_desc[i].tx_m = NULL; #ifdef DEV_NETMAP re_netmap_tx_init(sc); #endif /* DEV_NETMAP */ /* Set EOR. */ desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1]; desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR); bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); sc->rl_ldata.rl_tx_prodidx = 0; sc->rl_ldata.rl_tx_considx = 0; sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt; return (0); } static int re_rx_list_init(struct rl_softc *sc) { int error, i; bzero(sc->rl_ldata.rl_rx_list, sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc)); for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { sc->rl_ldata.rl_rx_desc[i].rx_m = NULL; if ((error = re_newbuf(sc, i)) != 0) return (error); } #ifdef DEV_NETMAP re_netmap_rx_init(sc); #endif /* DEV_NETMAP */ /* Flush the RX descriptors */ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); sc->rl_ldata.rl_rx_prodidx = 0; sc->rl_head = sc->rl_tail = NULL; sc->rl_int_rx_act = 0; return (0); } static int re_jrx_list_init(struct rl_softc *sc) { int error, i; bzero(sc->rl_ldata.rl_rx_list, sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc)); for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { sc->rl_ldata.rl_jrx_desc[i].rx_m = NULL; if ((error = re_jumbo_newbuf(sc, i)) != 0) return (error); } bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); sc->rl_ldata.rl_rx_prodidx = 0; sc->rl_head = sc->rl_tail = NULL; sc->rl_int_rx_act = 0; return (0); } /* * RX handler for C+ and 8169. For the gigE chips, we support * the reception of jumbo frames that have been fragmented * across multiple 2K mbuf cluster buffers. */ static int re_rxeof(struct rl_softc *sc, int *rx_npktsp) { struct mbuf *m; struct ifnet *ifp; int i, rxerr, total_len; struct rl_desc *cur_rx; u_int32_t rxstat, rxvlan; int jumbo, maxpkt = 16, rx_npkts = 0; RL_LOCK_ASSERT(sc); ifp = sc->rl_ifp; #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, 0, &rx_npkts)) return 0; #endif /* DEV_NETMAP */ if (ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0) jumbo = 1; else jumbo = 0; /* Invalidate the descriptor memory */ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0; i = RL_RX_DESC_NXT(sc, i)) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; cur_rx = &sc->rl_ldata.rl_rx_list[i]; rxstat = le32toh(cur_rx->rl_cmdstat); if ((rxstat & RL_RDESC_STAT_OWN) != 0) break; total_len = rxstat & sc->rl_rxlenmask; rxvlan = le32toh(cur_rx->rl_vlanctl); if (jumbo != 0) m = sc->rl_ldata.rl_jrx_desc[i].rx_m; else m = sc->rl_ldata.rl_rx_desc[i].rx_m; if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 && (rxstat & (RL_RDESC_STAT_SOF | RL_RDESC_STAT_EOF)) != (RL_RDESC_STAT_SOF | RL_RDESC_STAT_EOF)) { /* * RTL8168C or later controllers do not * support multi-fragment packet. */ re_discard_rxbuf(sc, i); continue; } else if ((rxstat & RL_RDESC_STAT_EOF) == 0) { if (re_newbuf(sc, i) != 0) { /* * If this is part of a multi-fragment packet, * discard all the pieces. */ if (sc->rl_head != NULL) { m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } re_discard_rxbuf(sc, i); continue; } m->m_len = RE_RX_DESC_BUFLEN; if (sc->rl_head == NULL) sc->rl_head = sc->rl_tail = m; else { m->m_flags &= ~M_PKTHDR; sc->rl_tail->m_next = m; sc->rl_tail = m; } continue; } /* * NOTE: for the 8139C+, the frame length field * is always 12 bits in size, but for the gigE chips, * it is 13 bits (since the max RX frame length is 16K). * Unfortunately, all 32 bits in the status word * were already used, so to make room for the extra * length bit, RealTek took out the 'frame alignment * error' bit and shifted the other status bits * over one slot. The OWN, EOR, FS and LS bits are * still in the same places. We have already extracted * the frame length and checked the OWN bit, so rather * than using an alternate bit mapping, we shift the * status bits one space to the right so we can evaluate * them using the 8169 status as though it was in the * same format as that of the 8139C+. */ if (sc->rl_type == RL_8169) rxstat >>= 1; /* * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be * set, but if CRC is clear, it will still be a valid frame. */ if ((rxstat & RL_RDESC_STAT_RXERRSUM) != 0) { rxerr = 1; if ((sc->rl_flags & RL_FLAG_JUMBOV2) == 0 && total_len > 8191 && (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT) rxerr = 0; if (rxerr != 0) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); /* * If this is part of a multi-fragment packet, * discard all the pieces. */ if (sc->rl_head != NULL) { m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } re_discard_rxbuf(sc, i); continue; } } /* * If allocating a replacement mbuf fails, * reload the current one. */ if (jumbo != 0) rxerr = re_jumbo_newbuf(sc, i); else rxerr = re_newbuf(sc, i); if (rxerr != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); if (sc->rl_head != NULL) { m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } re_discard_rxbuf(sc, i); continue; } if (sc->rl_head != NULL) { if (jumbo != 0) m->m_len = total_len; else { m->m_len = total_len % RE_RX_DESC_BUFLEN; if (m->m_len == 0) m->m_len = RE_RX_DESC_BUFLEN; } /* * Special case: if there's 4 bytes or less * in this buffer, the mbuf can be discarded: * the last 4 bytes is the CRC, which we don't * care about anyway. */ if (m->m_len <= ETHER_CRC_LEN) { sc->rl_tail->m_len -= (ETHER_CRC_LEN - m->m_len); m_freem(m); } else { m->m_len -= ETHER_CRC_LEN; m->m_flags &= ~M_PKTHDR; sc->rl_tail->m_next = m; } m = sc->rl_head; sc->rl_head = sc->rl_tail = NULL; m->m_pkthdr.len = total_len - ETHER_CRC_LEN; } else m->m_pkthdr.len = m->m_len = (total_len - ETHER_CRC_LEN); #ifdef RE_FIXUP_RX re_fixup_rx(m); #endif if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); m->m_pkthdr.rcvif = ifp; /* Do RX checksumming if enabled */ if (ifp->if_capenable & IFCAP_RXCSUM) { if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) { /* Check IP header checksum */ if (rxstat & RL_RDESC_STAT_PROTOID) m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (!(rxstat & RL_RDESC_STAT_IPSUMBAD)) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; /* Check TCP/UDP checksum */ if ((RL_TCPPKT(rxstat) && !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) || (RL_UDPPKT(rxstat) && !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } } else { /* * RTL8168C/RTL816CP/RTL8111C/RTL8111CP */ if ((rxstat & RL_RDESC_STAT_PROTOID) && (rxvlan & RL_RDESC_IPV4)) m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) && (rxvlan & RL_RDESC_IPV4)) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; if (((rxstat & RL_RDESC_STAT_TCP) && !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) || ((rxstat & RL_RDESC_STAT_UDP) && !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } } } maxpkt--; if (rxvlan & RL_RDESC_VLANCTL_TAG) { m->m_pkthdr.ether_vtag = bswap16((rxvlan & RL_RDESC_VLANCTL_DATA)); m->m_flags |= M_VLANTAG; } RL_UNLOCK(sc); (*ifp->if_input)(ifp, m); RL_LOCK(sc); rx_npkts++; } /* Flush the RX DMA ring */ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); sc->rl_ldata.rl_rx_prodidx = i; if (rx_npktsp != NULL) *rx_npktsp = rx_npkts; if (maxpkt) return (EAGAIN); return (0); } static void re_txeof(struct rl_softc *sc) { struct ifnet *ifp; struct rl_txdesc *txd; u_int32_t txstat; int cons; cons = sc->rl_ldata.rl_tx_considx; if (cons == sc->rl_ldata.rl_tx_prodidx) return; ifp = sc->rl_ifp; #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, 0)) return; #endif /* DEV_NETMAP */ /* Invalidate the TX descriptor list */ bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (; cons != sc->rl_ldata.rl_tx_prodidx; cons = RL_TX_DESC_NXT(sc, cons)) { txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat); if (txstat & RL_TDESC_STAT_OWN) break; /* * We only stash mbufs in the last descriptor * in a fragment chain, which also happens to * be the only place where the TX status bits * are valid. */ if (txstat & RL_TDESC_CMD_EOF) { txd = &sc->rl_ldata.rl_tx_desc[cons]; bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap); KASSERT(txd->tx_m != NULL, ("%s: freeing NULL mbufs!", __func__)); m_freem(txd->tx_m); txd->tx_m = NULL; if (txstat & (RL_TDESC_STAT_EXCESSCOL| RL_TDESC_STAT_COLCNT)) if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); if (txstat & RL_TDESC_STAT_TXERRSUM) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); else if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); } sc->rl_ldata.rl_tx_free++; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->rl_ldata.rl_tx_considx = cons; /* No changes made to the TX ring, so no flush needed */ if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) { #ifdef RE_TX_MODERATION /* * If not all descriptors have been reaped yet, reload * the timer so that we will eventually get another * interrupt that will cause us to re-enter this routine. * This is done in case the transmitter has gone idle. */ CSR_WRITE_4(sc, RL_TIMERCNT, 1); #endif } else sc->rl_watchdog_timer = 0; } static void re_tick(void *xsc) { struct rl_softc *sc; struct mii_data *mii; sc = xsc; RL_LOCK_ASSERT(sc); mii = device_get_softc(sc->rl_miibus); mii_tick(mii); if ((sc->rl_flags & RL_FLAG_LINK) == 0) re_miibus_statchg(sc->rl_dev); /* * Reclaim transmitted frames here. Technically it is not * necessary to do here but it ensures periodic reclamation * regardless of Tx completion interrupt which seems to be * lost on PCIe based controllers under certain situations. */ re_txeof(sc); re_watchdog(sc); callout_reset(&sc->rl_stat_callout, hz, re_tick, sc); } #ifdef DEVICE_POLLING static int re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct rl_softc *sc = ifp->if_softc; int rx_npkts = 0; RL_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rx_npkts = re_poll_locked(ifp, cmd, count); RL_UNLOCK(sc); return (rx_npkts); } static int re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct rl_softc *sc = ifp->if_softc; int rx_npkts; RL_LOCK_ASSERT(sc); sc->rxcycles = count; re_rxeof(sc, &rx_npkts); re_txeof(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */ u_int16_t status; status = CSR_READ_2(sc, RL_ISR); if (status == 0xffff) return (rx_npkts); if (status) CSR_WRITE_2(sc, RL_ISR, status); if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) && (sc->rl_flags & RL_FLAG_PCIE)) CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); /* * XXX check behaviour on receiver stalls. */ if (status & RL_ISR_SYSTEM_ERR) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } } return (rx_npkts); } #endif /* DEVICE_POLLING */ static int re_intr(void *arg) { struct rl_softc *sc; uint16_t status; sc = arg; status = CSR_READ_2(sc, RL_ISR); if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0) return (FILTER_STRAY); CSR_WRITE_2(sc, RL_IMR, 0); taskqueue_enqueue(taskqueue_fast, &sc->rl_inttask); return (FILTER_HANDLED); } static void re_int_task(void *arg, int npending) { struct rl_softc *sc; struct ifnet *ifp; u_int16_t status; int rval = 0; sc = arg; ifp = sc->rl_ifp; RL_LOCK(sc); status = CSR_READ_2(sc, RL_ISR); CSR_WRITE_2(sc, RL_ISR, status); if (sc->suspended || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { RL_UNLOCK(sc); return; } #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { RL_UNLOCK(sc); return; } #endif if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW)) rval = re_rxeof(sc, NULL); /* * Some chips will ignore a second TX request issued * while an existing transmission is in progress. If * the transmitter goes idle but there are still * packets waiting to be sent, we need to restart the * channel here to flush them out. This only seems to * be required with the PCIe devices. */ if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) && (sc->rl_flags & RL_FLAG_PCIE)) CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); if (status & ( #ifdef RE_TX_MODERATION RL_ISR_TIMEOUT_EXPIRED| #else RL_ISR_TX_OK| #endif RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL)) re_txeof(sc); if (status & RL_ISR_SYSTEM_ERR) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); RL_UNLOCK(sc); if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) { taskqueue_enqueue(taskqueue_fast, &sc->rl_inttask); return; } CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS); } static void re_intr_msi(void *xsc) { struct rl_softc *sc; struct ifnet *ifp; uint16_t intrs, status; sc = xsc; RL_LOCK(sc); ifp = sc->rl_ifp; #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { RL_UNLOCK(sc); return; } #endif /* Disable interrupts. */ CSR_WRITE_2(sc, RL_IMR, 0); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { RL_UNLOCK(sc); return; } intrs = RL_INTRS_CPLUS; status = CSR_READ_2(sc, RL_ISR); CSR_WRITE_2(sc, RL_ISR, status); if (sc->rl_int_rx_act > 0) { intrs &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN); status &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN); } if (status & (RL_ISR_TIMEOUT_EXPIRED | RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN)) { re_rxeof(sc, NULL); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if (sc->rl_int_rx_mod != 0 && (status & (RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN)) != 0) { /* Rearm one-shot timer. */ CSR_WRITE_4(sc, RL_TIMERCNT, 1); intrs &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN); sc->rl_int_rx_act = 1; } else { intrs |= RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN; sc->rl_int_rx_act = 0; } } } /* * Some chips will ignore a second TX request issued * while an existing transmission is in progress. If * the transmitter goes idle but there are still * packets waiting to be sent, we need to restart the * channel here to flush them out. This only seems to * be required with the PCIe devices. */ if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) && (sc->rl_flags & RL_FLAG_PCIE)) CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); if (status & (RL_ISR_TX_OK | RL_ISR_TX_ERR | RL_ISR_TX_DESC_UNAVAIL)) re_txeof(sc); if (status & RL_ISR_SYSTEM_ERR) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); CSR_WRITE_2(sc, RL_IMR, intrs); } RL_UNLOCK(sc); } static int re_encap(struct rl_softc *sc, struct mbuf **m_head) { struct rl_txdesc *txd, *txd_last; bus_dma_segment_t segs[RL_NTXSEGS]; bus_dmamap_t map; struct mbuf *m_new; struct rl_desc *desc; int nsegs, prod; int i, error, ei, si; int padlen; uint32_t cmdstat, csum_flags, vlanctl; RL_LOCK_ASSERT(sc); M_ASSERTPKTHDR((*m_head)); /* * With some of the RealTek chips, using the checksum offload * support in conjunction with the autopadding feature results * in the transmission of corrupt frames. For example, if we * need to send a really small IP fragment that's less than 60 * bytes in size, and IP header checksumming is enabled, the * resulting ethernet frame that appears on the wire will * have garbled payload. To work around this, if TX IP checksum * offload is enabled, we always manually pad short frames out * to the minimum ethernet frame size. */ if ((sc->rl_flags & RL_FLAG_AUTOPAD) == 0 && (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN && ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) { padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len; if (M_WRITABLE(*m_head) == 0) { /* Get a writable copy. */ m_new = m_dup(*m_head, M_NOWAIT); m_freem(*m_head); if (m_new == NULL) { *m_head = NULL; return (ENOBUFS); } *m_head = m_new; } if ((*m_head)->m_next != NULL || M_TRAILINGSPACE(*m_head) < padlen) { m_new = m_defrag(*m_head, M_NOWAIT); if (m_new == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } } else m_new = *m_head; /* * Manually pad short frames, and zero the pad space * to avoid leaking data. */ bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen); m_new->m_pkthdr.len += padlen; m_new->m_len = m_new->m_pkthdr.len; *m_head = m_new; } prod = sc->rl_ldata.rl_tx_prodidx; txd = &sc->rl_ldata.rl_tx_desc[prod]; error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { m_new = m_collapse(*m_head, M_NOWAIT, RL_NTXSEGS); if (m_new == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m_new; error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); if (nsegs == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } /* Check for number of available descriptors. */ if (sc->rl_ldata.rl_tx_free - nsegs <= 1) { bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap); return (ENOBUFS); } bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, BUS_DMASYNC_PREWRITE); /* * Set up checksum offload. Note: checksum offload bits must * appear in all descriptors of a multi-descriptor transmit * attempt. This is according to testing done with an 8169 * chip. This is a requirement. */ vlanctl = 0; csum_flags = 0; if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0) { if ((sc->rl_flags & RL_FLAG_DESCV2) != 0) { csum_flags |= RL_TDESC_CMD_LGSEND; vlanctl |= ((uint32_t)(*m_head)->m_pkthdr.tso_segsz << RL_TDESC_CMD_MSSVALV2_SHIFT); } else { csum_flags |= RL_TDESC_CMD_LGSEND | ((uint32_t)(*m_head)->m_pkthdr.tso_segsz << RL_TDESC_CMD_MSSVAL_SHIFT); } } else { /* * Unconditionally enable IP checksum if TCP or UDP * checksum is required. Otherwise, TCP/UDP checksum * doesn't make effects. */ if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) { if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) { csum_flags |= RL_TDESC_CMD_IPCSUM; if (((*m_head)->m_pkthdr.csum_flags & CSUM_TCP) != 0) csum_flags |= RL_TDESC_CMD_TCPCSUM; if (((*m_head)->m_pkthdr.csum_flags & CSUM_UDP) != 0) csum_flags |= RL_TDESC_CMD_UDPCSUM; } else { vlanctl |= RL_TDESC_CMD_IPCSUMV2; if (((*m_head)->m_pkthdr.csum_flags & CSUM_TCP) != 0) vlanctl |= RL_TDESC_CMD_TCPCSUMV2; if (((*m_head)->m_pkthdr.csum_flags & CSUM_UDP) != 0) vlanctl |= RL_TDESC_CMD_UDPCSUMV2; } } } /* * Set up hardware VLAN tagging. Note: vlan tag info must * appear in all descriptors of a multi-descriptor * transmission attempt. */ if ((*m_head)->m_flags & M_VLANTAG) vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) | RL_TDESC_VLANCTL_TAG; si = prod; for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) { desc = &sc->rl_ldata.rl_tx_list[prod]; desc->rl_vlanctl = htole32(vlanctl); desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr)); desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr)); cmdstat = segs[i].ds_len; if (i != 0) cmdstat |= RL_TDESC_CMD_OWN; if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1) cmdstat |= RL_TDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmdstat | csum_flags); sc->rl_ldata.rl_tx_free--; } /* Update producer index. */ sc->rl_ldata.rl_tx_prodidx = prod; /* Set EOF on the last descriptor. */ ei = RL_TX_DESC_PRV(sc, prod); desc = &sc->rl_ldata.rl_tx_list[ei]; desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF); desc = &sc->rl_ldata.rl_tx_list[si]; /* Set SOF and transfer ownership of packet to the chip. */ desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF); /* * Insure that the map for this transmission * is placed at the array index of the last descriptor * in this chain. (Swap last and first dmamaps.) */ txd_last = &sc->rl_ldata.rl_tx_desc[ei]; map = txd->tx_dmamap; txd->tx_dmamap = txd_last->tx_dmamap; txd_last->tx_dmamap = map; txd_last->tx_m = *m_head; return (0); } static void re_start(struct ifnet *ifp) { struct rl_softc *sc; sc = ifp->if_softc; RL_LOCK(sc); re_start_locked(ifp); RL_UNLOCK(sc); } /* * Main transmit routine for C+ and gigE NICs. */ static void re_start_locked(struct ifnet *ifp) { struct rl_softc *sc; struct mbuf *m_head; int queued; sc = ifp->if_softc; #ifdef DEV_NETMAP /* XXX is this necessary ? */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_kring *kring = &NA(ifp)->tx_rings[0]; if (sc->rl_ldata.rl_tx_prodidx != kring->nr_hwcur) { /* kick the tx unit */ CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); #ifdef RE_TX_MODERATION CSR_WRITE_4(sc, RL_TIMERCNT, 1); #endif sc->rl_watchdog_timer = 5; } return; } #endif /* DEV_NETMAP */ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) return; for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->rl_ldata.rl_tx_free > 1;) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (re_encap(sc, &m_head) != 0) { if (m_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } /* * If there's a BPF listener, bounce a copy of this frame * to him. */ ETHER_BPF_MTAP(ifp, m_head); queued++; } if (queued == 0) { #ifdef RE_TX_MODERATION if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) CSR_WRITE_4(sc, RL_TIMERCNT, 1); #endif return; } re_start_tx(sc); } static void re_start_tx(struct rl_softc *sc) { /* Flush the TX descriptors */ bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); #ifdef RE_TX_MODERATION /* * Use the countdown timer for interrupt moderation. * 'TX done' interrupts are disabled. Instead, we reset the * countdown timer, which will begin counting until it hits * the value in the TIMERINT register, and then trigger an * interrupt. Each time we write to the TIMERCNT register, * the timer count is reset to 0. */ CSR_WRITE_4(sc, RL_TIMERCNT, 1); #endif /* * Set a timeout in case the chip goes out to lunch. */ sc->rl_watchdog_timer = 5; } static void re_set_jumbo(struct rl_softc *sc, int jumbo) { if (sc->rl_hwrev->rl_rev == RL_HWREV_8168E_VL) { pci_set_max_read_req(sc->rl_dev, 4096); return; } CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG); if (jumbo != 0) { CSR_WRITE_1(sc, sc->rl_cfg3, CSR_READ_1(sc, sc->rl_cfg3) | RL_CFG3_JUMBO_EN0); switch (sc->rl_hwrev->rl_rev) { case RL_HWREV_8168DP: break; case RL_HWREV_8168E: CSR_WRITE_1(sc, sc->rl_cfg4, CSR_READ_1(sc, sc->rl_cfg4) | 0x01); break; default: CSR_WRITE_1(sc, sc->rl_cfg4, CSR_READ_1(sc, sc->rl_cfg4) | RL_CFG4_JUMBO_EN1); } } else { CSR_WRITE_1(sc, sc->rl_cfg3, CSR_READ_1(sc, sc->rl_cfg3) & ~RL_CFG3_JUMBO_EN0); switch (sc->rl_hwrev->rl_rev) { case RL_HWREV_8168DP: break; case RL_HWREV_8168E: CSR_WRITE_1(sc, sc->rl_cfg4, CSR_READ_1(sc, sc->rl_cfg4) & ~0x01); break; default: CSR_WRITE_1(sc, sc->rl_cfg4, CSR_READ_1(sc, sc->rl_cfg4) & ~RL_CFG4_JUMBO_EN1); } } CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); switch (sc->rl_hwrev->rl_rev) { case RL_HWREV_8168DP: pci_set_max_read_req(sc->rl_dev, 4096); break; default: if (jumbo != 0) pci_set_max_read_req(sc->rl_dev, 512); else pci_set_max_read_req(sc->rl_dev, 4096); } } static void re_init(void *xsc) { struct rl_softc *sc = xsc; RL_LOCK(sc); re_init_locked(sc); RL_UNLOCK(sc); } static void re_init_locked(struct rl_softc *sc) { struct ifnet *ifp = sc->rl_ifp; struct mii_data *mii; uint32_t reg; uint16_t cfg; union { uint32_t align_dummy; u_char eaddr[ETHER_ADDR_LEN]; } eaddr; RL_LOCK_ASSERT(sc); mii = device_get_softc(sc->rl_miibus); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) return; /* * Cancel pending I/O and free all RX/TX buffers. */ re_stop(sc); /* Put controller into known state. */ re_reset(sc); /* * For C+ mode, initialize the RX descriptors and mbufs. */ if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { if (ifp->if_mtu > RL_MTU) { if (re_jrx_list_init(sc) != 0) { device_printf(sc->rl_dev, "no memory for jumbo RX buffers\n"); re_stop(sc); return; } /* Disable checksum offloading for jumbo frames. */ ifp->if_capenable &= ~(IFCAP_HWCSUM | IFCAP_TSO4); ifp->if_hwassist &= ~(RE_CSUM_FEATURES | CSUM_TSO); } else { if (re_rx_list_init(sc) != 0) { device_printf(sc->rl_dev, "no memory for RX buffers\n"); re_stop(sc); return; } } re_set_jumbo(sc, ifp->if_mtu > RL_MTU); } else { if (re_rx_list_init(sc) != 0) { device_printf(sc->rl_dev, "no memory for RX buffers\n"); re_stop(sc); return; } if ((sc->rl_flags & RL_FLAG_PCIE) != 0 && pci_get_device(sc->rl_dev) != RT_DEVICEID_8101E) { if (ifp->if_mtu > RL_MTU) pci_set_max_read_req(sc->rl_dev, 512); else pci_set_max_read_req(sc->rl_dev, 4096); } } re_tx_list_init(sc); /* * Enable C+ RX and TX mode, as well as VLAN stripping and * RX checksum offload. We must configure the C+ register * before all others. */ cfg = RL_CPLUSCMD_PCI_MRW; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) cfg |= RL_CPLUSCMD_RXCSUM_ENB; if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) cfg |= RL_CPLUSCMD_VLANSTRIP; if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) { cfg |= RL_CPLUSCMD_MACSTAT_DIS; /* XXX magic. */ cfg |= 0x0001; } else cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB; CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg); if (sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SC || sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SCE) { reg = 0x000fff00; if ((CSR_READ_1(sc, sc->rl_cfg2) & RL_CFG2_PCI66MHZ) != 0) reg |= 0x000000ff; if (sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SCE) reg |= 0x00f00000; CSR_WRITE_4(sc, 0x7c, reg); /* Disable interrupt mitigation. */ CSR_WRITE_2(sc, 0xe2, 0); } /* * Disable TSO if interface MTU size is greater than MSS * allowed in controller. */ if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; } /* * Init our MAC address. Even though the chipset * documentation doesn't mention it, we need to enter "Config * register write enable" mode to modify the ID registers. */ /* Copy MAC address on stack to align. */ bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG); CSR_WRITE_4(sc, RL_IDR0, htole32(*(u_int32_t *)(&eaddr.eaddr[0]))); CSR_WRITE_4(sc, RL_IDR4, htole32(*(u_int32_t *)(&eaddr.eaddr[4]))); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); /* * Load the addresses of the RX and TX lists into the chip. */ CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI, RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr)); CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO, RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr)); CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI, RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr)); CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO, RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr)); if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) { /* Disable RXDV gate. */ CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) & ~0x00080000); } /* * Enable transmit and receive for pre-RTL8168G controllers. * RX/TX MACs should be enabled before RX/TX configuration. */ if ((sc->rl_flags & RL_FLAG_8168G_PLUS) == 0) CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB | RL_CMD_RX_ENB); /* * Set the initial TX configuration. */ if (sc->rl_testmode) { if (sc->rl_type == RL_8169) CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG|RL_LOOPTEST_ON); else CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS); } else CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG); CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16); /* * Set the initial RX configuration. */ re_set_rxmode(sc); /* Configure interrupt moderation. */ if (sc->rl_type == RL_8169) { /* Magic from vendor. */ CSR_WRITE_2(sc, RL_INTRMOD, 0x5100); } /* * Enable transmit and receive for RTL8168G and later controllers. * RX/TX MACs should be enabled after RX/TX configuration. */ if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB | RL_CMD_RX_ENB); #ifdef DEVICE_POLLING /* * Disable interrupts if we are polling. */ if (ifp->if_capenable & IFCAP_POLLING) CSR_WRITE_2(sc, RL_IMR, 0); else /* otherwise ... */ #endif /* * Enable interrupts. */ if (sc->rl_testmode) CSR_WRITE_2(sc, RL_IMR, 0); else CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS); CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS); /* Set initial TX threshold */ sc->rl_txthresh = RL_TX_THRESH_INIT; /* Start RX/TX process. */ CSR_WRITE_4(sc, RL_MISSEDPKT, 0); /* * Initialize the timer interrupt register so that * a timer interrupt will be generated once the timer * reaches a certain number of ticks. The timer is * reloaded on each transmit. */ #ifdef RE_TX_MODERATION /* * Use timer interrupt register to moderate TX interrupt * moderation, which dramatically improves TX frame rate. */ if (sc->rl_type == RL_8169) CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800); else CSR_WRITE_4(sc, RL_TIMERINT, 0x400); #else /* * Use timer interrupt register to moderate RX interrupt * moderation. */ if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0 && intr_filter == 0) { if (sc->rl_type == RL_8169) CSR_WRITE_4(sc, RL_TIMERINT_8169, RL_USECS(sc->rl_int_rx_mod)); } else { if (sc->rl_type == RL_8169) CSR_WRITE_4(sc, RL_TIMERINT_8169, RL_USECS(0)); } #endif /* * For 8169 gigE NICs, set the max allowed RX packet * size so we can receive jumbo frames. */ if (sc->rl_type == RL_8169) { if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { /* * For controllers that use new jumbo frame scheme, * set maximum size of jumbo frame depending on * controller revisions. */ if (ifp->if_mtu > RL_MTU) CSR_WRITE_2(sc, RL_MAXRXPKTLEN, sc->rl_hwrev->rl_max_mtu + ETHER_VLAN_ENCAP_LEN + ETHER_HDR_LEN + ETHER_CRC_LEN); else CSR_WRITE_2(sc, RL_MAXRXPKTLEN, RE_RX_DESC_BUFLEN); } else if ((sc->rl_flags & RL_FLAG_PCIE) != 0 && sc->rl_hwrev->rl_max_mtu == RL_MTU) { /* RTL810x has no jumbo frame support. */ CSR_WRITE_2(sc, RL_MAXRXPKTLEN, RE_RX_DESC_BUFLEN); } else CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383); } if (sc->rl_testmode) return; CSR_WRITE_1(sc, sc->rl_cfg1, CSR_READ_1(sc, sc->rl_cfg1) | RL_CFG1_DRVLOAD); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->rl_flags &= ~RL_FLAG_LINK; mii_mediachg(mii); sc->rl_watchdog_timer = 0; callout_reset(&sc->rl_stat_callout, hz, re_tick, sc); } /* * Set media options. */ static int re_ifmedia_upd(struct ifnet *ifp) { struct rl_softc *sc; struct mii_data *mii; int error; sc = ifp->if_softc; mii = device_get_softc(sc->rl_miibus); RL_LOCK(sc); error = mii_mediachg(mii); RL_UNLOCK(sc); return (error); } /* * Report current media status. */ static void re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct rl_softc *sc; struct mii_data *mii; sc = ifp->if_softc; mii = device_get_softc(sc->rl_miibus); RL_LOCK(sc); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; RL_UNLOCK(sc); } static int re_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct rl_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; struct mii_data *mii; int error = 0; switch (command) { case SIOCSIFMTU: if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > sc->rl_hwrev->rl_max_mtu || ((sc->rl_flags & RL_FLAG_FASTETHER) != 0 && ifr->ifr_mtu > RL_MTU)) { error = EINVAL; break; } RL_LOCK(sc); if (ifp->if_mtu != ifr->ifr_mtu) { ifp->if_mtu = ifr->ifr_mtu; if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 && (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) { ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO); ifp->if_hwassist &= ~CSUM_TSO; } VLAN_CAPABILITIES(ifp); } RL_UNLOCK(sc); break; case SIOCSIFFLAGS: RL_LOCK(sc); if ((ifp->if_flags & IFF_UP) != 0) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if (((ifp->if_flags ^ sc->rl_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) != 0) re_set_rxmode(sc); } else re_init_locked(sc); } else { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) re_stop(sc); } sc->rl_if_flags = ifp->if_flags; RL_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: RL_LOCK(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) re_set_rxmode(sc); RL_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: mii = device_get_softc(sc->rl_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; case SIOCSIFCAP: { int mask, reinit; mask = ifr->ifr_reqcap ^ ifp->if_capenable; reinit = 0; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(re_poll, ifp); if (error) return (error); RL_LOCK(sc); /* Disable interrupts */ CSR_WRITE_2(sc, RL_IMR, 0x0000); ifp->if_capenable |= IFCAP_POLLING; RL_UNLOCK(sc); } else { error = ether_poll_deregister(ifp); /* Enable interrupts. */ RL_LOCK(sc); CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS); ifp->if_capenable &= ~IFCAP_POLLING; RL_UNLOCK(sc); } } #endif /* DEVICE_POLLING */ RL_LOCK(sc); if ((mask & IFCAP_TXCSUM) != 0 && (ifp->if_capabilities & IFCAP_TXCSUM) != 0) { ifp->if_capenable ^= IFCAP_TXCSUM; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist |= RE_CSUM_FEATURES; else ifp->if_hwassist &= ~RE_CSUM_FEATURES; reinit = 1; } if ((mask & IFCAP_RXCSUM) != 0 && (ifp->if_capabilities & IFCAP_RXCSUM) != 0) { ifp->if_capenable ^= IFCAP_RXCSUM; reinit = 1; } if ((mask & IFCAP_TSO4) != 0 && (ifp->if_capabilities & IFCAP_TSO4) != 0) { ifp->if_capenable ^= IFCAP_TSO4; if ((IFCAP_TSO4 & ifp->if_capenable) != 0) ifp->if_hwassist |= CSUM_TSO; else ifp->if_hwassist &= ~CSUM_TSO; if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; } } if ((mask & IFCAP_VLAN_HWTSO) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; /* TSO over VLAN requires VLAN hardware tagging. */ if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; reinit = 1; } if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 && (mask & (IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWTSO)) != 0) reinit = 1; if ((mask & IFCAP_WOL) != 0 && (ifp->if_capabilities & IFCAP_WOL) != 0) { if ((mask & IFCAP_WOL_UCAST) != 0) ifp->if_capenable ^= IFCAP_WOL_UCAST; if ((mask & IFCAP_WOL_MCAST) != 0) ifp->if_capenable ^= IFCAP_WOL_MCAST; if ((mask & IFCAP_WOL_MAGIC) != 0) ifp->if_capenable ^= IFCAP_WOL_MAGIC; } if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } RL_UNLOCK(sc); VLAN_CAPABILITIES(ifp); } break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static void re_watchdog(struct rl_softc *sc) { struct ifnet *ifp; RL_LOCK_ASSERT(sc); if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0) return; ifp = sc->rl_ifp; re_txeof(sc); if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) { if_printf(ifp, "watchdog timeout (missed Tx interrupts) " "-- recovering\n"); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); return; } if_printf(ifp, "watchdog timeout\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); re_rxeof(sc, NULL); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void re_stop(struct rl_softc *sc) { int i; struct ifnet *ifp; struct rl_txdesc *txd; struct rl_rxdesc *rxd; RL_LOCK_ASSERT(sc); ifp = sc->rl_ifp; sc->rl_watchdog_timer = 0; callout_stop(&sc->rl_stat_callout); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* * Disable accepting frames to put RX MAC into idle state. * Otherwise it's possible to get frames while stop command * execution is in progress and controller can DMA the frame * to already freed RX buffer during that period. */ CSR_WRITE_4(sc, RL_RXCFG, CSR_READ_4(sc, RL_RXCFG) & ~(RL_RXCFG_RX_ALLPHYS | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_MULTI | RL_RXCFG_RX_BROAD)); if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) { /* Enable RXDV gate. */ CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) | 0x00080000); } if ((sc->rl_flags & RL_FLAG_WAIT_TXPOLL) != 0) { for (i = RL_TIMEOUT; i > 0; i--) { if ((CSR_READ_1(sc, sc->rl_txstart) & RL_TXSTART_START) == 0) break; DELAY(20); } if (i == 0) device_printf(sc->rl_dev, "stopping TX poll timed out!\n"); CSR_WRITE_1(sc, RL_COMMAND, 0x00); } else if ((sc->rl_flags & RL_FLAG_CMDSTOP) != 0) { CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_STOPREQ | RL_CMD_TX_ENB | RL_CMD_RX_ENB); if ((sc->rl_flags & RL_FLAG_CMDSTOP_WAIT_TXQ) != 0) { for (i = RL_TIMEOUT; i > 0; i--) { if ((CSR_READ_4(sc, RL_TXCFG) & RL_TXCFG_QUEUE_EMPTY) != 0) break; DELAY(100); } if (i == 0) device_printf(sc->rl_dev, "stopping TXQ timed out!\n"); } } else CSR_WRITE_1(sc, RL_COMMAND, 0x00); DELAY(1000); CSR_WRITE_2(sc, RL_IMR, 0x0000); CSR_WRITE_2(sc, RL_ISR, 0xFFFF); if (sc->rl_head != NULL) { m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } /* Free the TX list buffers. */ for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) { txd = &sc->rl_ldata.rl_tx_desc[i]; if (txd->tx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap); m_freem(txd->tx_m); txd->tx_m = NULL; } } /* Free the RX list buffers. */ for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { rxd = &sc->rl_ldata.rl_rx_desc[i]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap); m_freem(rxd->rx_m); rxd->rx_m = NULL; } } if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { rxd = &sc->rl_ldata.rl_jrx_desc[i]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap); m_freem(rxd->rx_m); rxd->rx_m = NULL; } } } } /* * Device suspend routine. Stop the interface and save some PCI * settings in case the BIOS doesn't restore them properly on * resume. */ static int re_suspend(device_t dev) { struct rl_softc *sc; sc = device_get_softc(dev); RL_LOCK(sc); re_stop(sc); re_setwol(sc); sc->suspended = 1; RL_UNLOCK(sc); return (0); } /* * Device resume routine. Restore some PCI settings in case the BIOS * doesn't, re-enable busmastering, and restart the interface if * appropriate. */ static int re_resume(device_t dev) { struct rl_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); RL_LOCK(sc); ifp = sc->rl_ifp; /* Take controller out of sleep mode. */ if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) { if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80) CSR_WRITE_1(sc, RL_GPIO, CSR_READ_1(sc, RL_GPIO) | 0x01); } /* * Clear WOL matching such that normal Rx filtering * wouldn't interfere with WOL patterns. */ re_clrwol(sc); /* reinitialize interface if necessary */ if (ifp->if_flags & IFF_UP) re_init_locked(sc); sc->suspended = 0; RL_UNLOCK(sc); return (0); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int re_shutdown(device_t dev) { struct rl_softc *sc; sc = device_get_softc(dev); RL_LOCK(sc); re_stop(sc); /* * Mark interface as down since otherwise we will panic if * interrupt comes in later on, which can happen in some * cases. */ sc->rl_ifp->if_flags &= ~IFF_UP; re_setwol(sc); RL_UNLOCK(sc); return (0); } static void re_set_linkspeed(struct rl_softc *sc) { struct mii_softc *miisc; struct mii_data *mii; int aneg, i, phyno; RL_LOCK_ASSERT(sc); mii = device_get_softc(sc->rl_miibus); mii_pollstat(mii); aneg = 0; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch IFM_SUBTYPE(mii->mii_media_active) { case IFM_10_T: case IFM_100_TX: return; case IFM_1000_T: aneg++; break; default: break; } } miisc = LIST_FIRST(&mii->mii_phys); phyno = miisc->mii_phy; LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); re_miibus_writereg(sc->rl_dev, phyno, MII_100T2CR, 0); re_miibus_writereg(sc->rl_dev, phyno, MII_ANAR, ANAR_TX_FD | ANAR_TX | ANAR_10_FD | ANAR_10 | ANAR_CSMA); re_miibus_writereg(sc->rl_dev, phyno, MII_BMCR, BMCR_AUTOEN | BMCR_STARTNEG); DELAY(1000); if (aneg != 0) { /* * Poll link state until re(4) get a 10/100Mbps link. */ for (i = 0; i < MII_ANEGTICKS_GIGE; i++) { mii_pollstat(mii); if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: return; default: break; } } RL_UNLOCK(sc); pause("relnk", hz); RL_LOCK(sc); } if (i == MII_ANEGTICKS_GIGE) device_printf(sc->rl_dev, "establishing a link failed, WOL may not work!"); } /* * No link, force MAC to have 100Mbps, full-duplex link. * MAC does not require reprogramming on resolved speed/duplex, * so this is just for completeness. */ mii->mii_media_status = IFM_AVALID | IFM_ACTIVE; mii->mii_media_active = IFM_ETHER | IFM_100_TX | IFM_FDX; } static void re_setwol(struct rl_softc *sc) { struct ifnet *ifp; int pmc; uint16_t pmstat; uint8_t v; RL_LOCK_ASSERT(sc); if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0) return; ifp = sc->rl_ifp; /* Put controller into sleep mode. */ if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) { if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80) CSR_WRITE_1(sc, RL_GPIO, CSR_READ_1(sc, RL_GPIO) & ~0x01); } if ((ifp->if_capenable & IFCAP_WOL) != 0) { if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) { /* Disable RXDV gate. */ CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) & ~0x00080000); } re_set_rxmode(sc); if ((sc->rl_flags & RL_FLAG_WOL_MANLINK) != 0) re_set_linkspeed(sc); if ((sc->rl_flags & RL_FLAG_WOLRXENB) != 0) CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RX_ENB); } /* Enable config register write. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); /* Enable PME. */ v = CSR_READ_1(sc, sc->rl_cfg1); v &= ~RL_CFG1_PME; if ((ifp->if_capenable & IFCAP_WOL) != 0) v |= RL_CFG1_PME; CSR_WRITE_1(sc, sc->rl_cfg1, v); v = CSR_READ_1(sc, sc->rl_cfg3); v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC); if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) v |= RL_CFG3_WOL_MAGIC; CSR_WRITE_1(sc, sc->rl_cfg3, v); v = CSR_READ_1(sc, sc->rl_cfg5); v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST | RL_CFG5_WOL_LANWAKE); if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0) v |= RL_CFG5_WOL_UCAST; if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0) v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST; if ((ifp->if_capenable & IFCAP_WOL) != 0) v |= RL_CFG5_WOL_LANWAKE; CSR_WRITE_1(sc, sc->rl_cfg5, v); /* Config register write done. */ CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); if ((ifp->if_capenable & IFCAP_WOL) == 0 && (sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0) CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) & ~0x80); /* * It seems that hardware resets its link speed to 100Mbps in * power down mode so switching to 100Mbps in driver is not * needed. */ /* Request PME if WOL is requested. */ pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2); pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if ((ifp->if_capenable & IFCAP_WOL) != 0) pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2); } static void re_clrwol(struct rl_softc *sc) { int pmc; uint8_t v; RL_LOCK_ASSERT(sc); if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0) return; /* Enable config register write. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); v = CSR_READ_1(sc, sc->rl_cfg3); v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC); CSR_WRITE_1(sc, sc->rl_cfg3, v); /* Config register write done. */ CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); v = CSR_READ_1(sc, sc->rl_cfg5); v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST); v &= ~RL_CFG5_WOL_LANWAKE; CSR_WRITE_1(sc, sc->rl_cfg5, v); } static void re_add_sysctls(struct rl_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; int error; ctx = device_get_sysctl_ctx(sc->rl_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->rl_dev)); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "stats", CTLTYPE_INT | CTLFLAG_RW, sc, 0, re_sysctl_stats, "I", "Statistics Information"); if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) return; SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "int_rx_mod", CTLTYPE_INT | CTLFLAG_RW, &sc->rl_int_rx_mod, 0, sysctl_hw_re_int_mod, "I", "re RX interrupt moderation"); /* Pull in device tunables. */ sc->rl_int_rx_mod = RL_TIMER_DEFAULT; error = resource_int_value(device_get_name(sc->rl_dev), device_get_unit(sc->rl_dev), "int_rx_mod", &sc->rl_int_rx_mod); if (error == 0) { if (sc->rl_int_rx_mod < RL_TIMER_MIN || sc->rl_int_rx_mod > RL_TIMER_MAX) { device_printf(sc->rl_dev, "int_rx_mod value out of " "range; using default: %d\n", RL_TIMER_DEFAULT); sc->rl_int_rx_mod = RL_TIMER_DEFAULT; } } } static int re_sysctl_stats(SYSCTL_HANDLER_ARGS) { struct rl_softc *sc; struct rl_stats *stats; int error, i, result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || req->newptr == NULL) return (error); if (result == 1) { sc = (struct rl_softc *)arg1; RL_LOCK(sc); if ((sc->rl_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { RL_UNLOCK(sc); goto done; } bus_dmamap_sync(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap, BUS_DMASYNC_PREREAD); CSR_WRITE_4(sc, RL_DUMPSTATS_HI, RL_ADDR_HI(sc->rl_ldata.rl_stats_addr)); CSR_WRITE_4(sc, RL_DUMPSTATS_LO, RL_ADDR_LO(sc->rl_ldata.rl_stats_addr)); CSR_WRITE_4(sc, RL_DUMPSTATS_LO, RL_ADDR_LO(sc->rl_ldata.rl_stats_addr | RL_DUMPSTATS_START)); for (i = RL_TIMEOUT; i > 0; i--) { if ((CSR_READ_4(sc, RL_DUMPSTATS_LO) & RL_DUMPSTATS_START) == 0) break; DELAY(1000); } bus_dmamap_sync(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap, BUS_DMASYNC_POSTREAD); RL_UNLOCK(sc); if (i == 0) { device_printf(sc->rl_dev, "DUMP statistics request timed out\n"); return (ETIMEDOUT); } done: stats = sc->rl_ldata.rl_stats; printf("%s statistics:\n", device_get_nameunit(sc->rl_dev)); printf("Tx frames : %ju\n", (uintmax_t)le64toh(stats->rl_tx_pkts)); printf("Rx frames : %ju\n", (uintmax_t)le64toh(stats->rl_rx_pkts)); printf("Tx errors : %ju\n", (uintmax_t)le64toh(stats->rl_tx_errs)); printf("Rx errors : %u\n", le32toh(stats->rl_rx_errs)); printf("Rx missed frames : %u\n", (uint32_t)le16toh(stats->rl_missed_pkts)); printf("Rx frame alignment errs : %u\n", (uint32_t)le16toh(stats->rl_rx_framealign_errs)); printf("Tx single collisions : %u\n", le32toh(stats->rl_tx_onecoll)); printf("Tx multiple collisions : %u\n", le32toh(stats->rl_tx_multicolls)); printf("Rx unicast frames : %ju\n", (uintmax_t)le64toh(stats->rl_rx_ucasts)); printf("Rx broadcast frames : %ju\n", (uintmax_t)le64toh(stats->rl_rx_bcasts)); printf("Rx multicast frames : %u\n", le32toh(stats->rl_rx_mcasts)); printf("Tx aborts : %u\n", (uint32_t)le16toh(stats->rl_tx_aborts)); printf("Tx underruns : %u\n", (uint32_t)le16toh(stats->rl_rx_underruns)); } return (error); } static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high) { int error, value; if (arg1 == NULL) return (EINVAL); value = *(int *)arg1; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); if (value < low || value > high) return (EINVAL); *(int *)arg1 = value; return (0); } static int sysctl_hw_re_int_mod(SYSCTL_HANDLER_ARGS) { return (sysctl_int_range(oidp, arg1, arg2, req, RL_TIMER_MIN, RL_TIMER_MAX)); } #ifdef NETDUMP static void -re_netdump_init(struct ifnet *ifp, int *nmbufp, int *nclustp) +re_netdump_init(struct ifnet *ifp, int *nrxr) { struct rl_softc *sc; - sc = ifp->if_softc; - - *nmbufp += sc->rl_ldata.rl_rx_desc_cnt; - *nclustp += sc->rl_ldata.rl_rx_desc_cnt; + sc = if_getsoftc(ifp); + *nrxr = sc->rl_ldata.rl_rx_desc_cnt; } static void re_netdump_event(struct ifnet *ifp, enum netdump_ev event) { struct rl_softc *sc; - sc = ifp->if_softc; - + sc = if_getsoftc(ifp); switch (event) { case NETDUMP_START: sc->rl_flags &= ~RL_FLAG_JUMBOV2; break; default: break; } } static int re_netdump_transmit(struct ifnet *ifp, struct mbuf *m) { struct rl_softc *sc; int error; - sc = ifp->if_softc; - - if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + sc = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) return (EBUSY); error = re_encap(sc, &m); if (error == 0) re_start_tx(sc); return (error); } static int re_netdump_poll(struct ifnet *ifp, int count) { struct rl_softc *sc; int error; - sc = ifp->if_softc; - - if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != - IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) + sc = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || + (sc->rl_flags & RL_FLAG_LINK) == 0) return (EBUSY); re_txeof(sc); error = re_rxeof(sc, NULL); if (error != 0 && error != EAGAIN) return (error); return (0); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/dev/virtio/network/if_vtnet.c =================================================================== --- user/markj/netdump/sys/dev/virtio/network/if_vtnet.c (revision 331674) +++ user/markj/netdump/sys/dev/virtio/network/if_vtnet.c (revision 331675) @@ -1,4063 +1,4054 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011, Bryan Venteicher * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Driver for VirtIO network devices. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "virtio_if.h" #include "opt_inet.h" #include "opt_inet6.h" static int vtnet_modevent(module_t, int, void *); static int vtnet_probe(device_t); static int vtnet_attach(device_t); static int vtnet_detach(device_t); static int vtnet_suspend(device_t); static int vtnet_resume(device_t); static int vtnet_shutdown(device_t); static int vtnet_attach_completed(device_t); static int vtnet_config_change(device_t); static void vtnet_negotiate_features(struct vtnet_softc *); static void vtnet_setup_features(struct vtnet_softc *); static int vtnet_init_rxq(struct vtnet_softc *, int); static int vtnet_init_txq(struct vtnet_softc *, int); static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); static void vtnet_free_rxtx_queues(struct vtnet_softc *); static int vtnet_alloc_rx_filters(struct vtnet_softc *); static void vtnet_free_rx_filters(struct vtnet_softc *); static int vtnet_alloc_virtqueues(struct vtnet_softc *); static int vtnet_setup_interface(struct vtnet_softc *); static int vtnet_change_mtu(struct vtnet_softc *, int); static int vtnet_ioctl(struct ifnet *, u_long, caddr_t); static uint64_t vtnet_get_counter(struct ifnet *, ift_counter); static int vtnet_rxq_populate(struct vtnet_rxq *); static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); static struct mbuf * vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *, struct mbuf *, int); static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); static int vtnet_rxq_new_buf(struct vtnet_rxq *); static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, struct virtio_net_hdr *); static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *); static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int); static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *, struct virtio_net_hdr *); static int vtnet_rxq_eof(struct vtnet_rxq *); static void vtnet_rx_vq_intr(void *); static void vtnet_rxq_tq_intr(void *, int); static int vtnet_txq_below_threshold(struct vtnet_txq *); static int vtnet_txq_notify(struct vtnet_txq *); static void vtnet_txq_free_mbufs(struct vtnet_txq *); static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *, int *, int *, int *); static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int, int, struct virtio_net_hdr *); static struct mbuf * vtnet_txq_offload(struct vtnet_txq *, struct mbuf *, struct virtio_net_hdr *); static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, struct vtnet_tx_header *); static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int); #ifdef VTNET_LEGACY_TX static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *); static void vtnet_start(struct ifnet *); #else static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *); static int vtnet_txq_mq_start(struct ifnet *, struct mbuf *); static void vtnet_txq_tq_deferred(void *, int); #endif static void vtnet_txq_start(struct vtnet_txq *); static void vtnet_txq_tq_intr(void *, int); static int vtnet_txq_eof(struct vtnet_txq *); static void vtnet_tx_vq_intr(void *); static void vtnet_tx_start_all(struct vtnet_softc *); #ifndef VTNET_LEGACY_TX static void vtnet_qflush(struct ifnet *); #endif static int vtnet_watchdog(struct vtnet_txq *); static void vtnet_accum_stats(struct vtnet_softc *, struct vtnet_rxq_stats *, struct vtnet_txq_stats *); static void vtnet_tick(void *); static void vtnet_start_taskqueues(struct vtnet_softc *); static void vtnet_free_taskqueues(struct vtnet_softc *); static void vtnet_drain_taskqueues(struct vtnet_softc *); static void vtnet_drain_rxtx_queues(struct vtnet_softc *); static void vtnet_stop_rendezvous(struct vtnet_softc *); static void vtnet_stop(struct vtnet_softc *); static int vtnet_virtio_reinit(struct vtnet_softc *); static void vtnet_init_rx_filters(struct vtnet_softc *); static int vtnet_init_rx_queues(struct vtnet_softc *); static int vtnet_init_tx_queues(struct vtnet_softc *); static int vtnet_init_rxtx_queues(struct vtnet_softc *); static void vtnet_set_active_vq_pairs(struct vtnet_softc *); static int vtnet_reinit(struct vtnet_softc *); static void vtnet_init_locked(struct vtnet_softc *); static void vtnet_init(void *); static void vtnet_free_ctrl_vq(struct vtnet_softc *); static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, struct sglist *, int, int); static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int); static int vtnet_set_promisc(struct vtnet_softc *, int); static int vtnet_set_allmulti(struct vtnet_softc *, int); static void vtnet_attach_disable_promisc(struct vtnet_softc *); static void vtnet_rx_filter(struct vtnet_softc *); static void vtnet_rx_filter_mac(struct vtnet_softc *); static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); static void vtnet_rx_filter_vlan(struct vtnet_softc *); static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t); static void vtnet_register_vlan(void *, struct ifnet *, uint16_t); static void vtnet_unregister_vlan(void *, struct ifnet *, uint16_t); static int vtnet_is_link_up(struct vtnet_softc *); static void vtnet_update_link_status(struct vtnet_softc *); static int vtnet_ifmedia_upd(struct ifnet *); static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *); static void vtnet_get_hwaddr(struct vtnet_softc *); static void vtnet_set_hwaddr(struct vtnet_softc *); static void vtnet_vlan_tag_remove(struct mbuf *); static void vtnet_set_rx_process_limit(struct vtnet_softc *); static void vtnet_set_tx_intr_threshold(struct vtnet_softc *); static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct vtnet_rxq *); static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct vtnet_txq *); static void vtnet_setup_queue_sysctl(struct vtnet_softc *); static void vtnet_setup_sysctl(struct vtnet_softc *); static int vtnet_rxq_enable_intr(struct vtnet_rxq *); static void vtnet_rxq_disable_intr(struct vtnet_rxq *); static int vtnet_txq_enable_intr(struct vtnet_txq *); static void vtnet_txq_disable_intr(struct vtnet_txq *); static void vtnet_enable_rx_interrupts(struct vtnet_softc *); static void vtnet_enable_tx_interrupts(struct vtnet_softc *); static void vtnet_enable_interrupts(struct vtnet_softc *); static void vtnet_disable_rx_interrupts(struct vtnet_softc *); static void vtnet_disable_tx_interrupts(struct vtnet_softc *); static void vtnet_disable_interrupts(struct vtnet_softc *); static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); NETDUMP_DEFINE(vtnet); /* Tunables. */ static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters"); static int vtnet_csum_disable = 0; TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable); SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN, &vtnet_csum_disable, 0, "Disables receive and send checksum offload"); static int vtnet_tso_disable = 0; TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable); SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable, 0, "Disables TCP Segmentation Offload"); static int vtnet_lro_disable = 0; TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable); SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable, 0, "Disables TCP Large Receive Offload"); static int vtnet_mq_disable = 0; TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable); SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable, 0, "Disables Multi Queue support"); static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS; TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs); SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN, &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs"); static int vtnet_rx_process_limit = 512; TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &vtnet_rx_process_limit, 0, "Limits the number RX segments processed in a single pass"); static uma_zone_t vtnet_tx_header_zone; static struct virtio_feature_desc vtnet_feature_desc[] = { { VIRTIO_NET_F_CSUM, "TxChecksum" }, { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, { VIRTIO_NET_F_MAC, "MacAddress" }, { VIRTIO_NET_F_GSO, "TxAllGSO" }, { VIRTIO_NET_F_GUEST_TSO4, "RxTSOv4" }, { VIRTIO_NET_F_GUEST_TSO6, "RxTSOv6" }, { VIRTIO_NET_F_GUEST_ECN, "RxECN" }, { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, { VIRTIO_NET_F_STATUS, "Status" }, { VIRTIO_NET_F_CTRL_VQ, "ControlVq" }, { VIRTIO_NET_F_CTRL_RX, "RxMode" }, { VIRTIO_NET_F_CTRL_VLAN, "VLanFilter" }, { VIRTIO_NET_F_CTRL_RX_EXTRA, "RxModeExtra" }, { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, { VIRTIO_NET_F_MQ, "Multiqueue" }, { VIRTIO_NET_F_CTRL_MAC_ADDR, "SetMacAddress" }, { 0, NULL } }; static device_method_t vtnet_methods[] = { /* Device methods. */ DEVMETHOD(device_probe, vtnet_probe), DEVMETHOD(device_attach, vtnet_attach), DEVMETHOD(device_detach, vtnet_detach), DEVMETHOD(device_suspend, vtnet_suspend), DEVMETHOD(device_resume, vtnet_resume), DEVMETHOD(device_shutdown, vtnet_shutdown), /* VirtIO methods. */ DEVMETHOD(virtio_attach_completed, vtnet_attach_completed), DEVMETHOD(virtio_config_change, vtnet_config_change), DEVMETHOD_END }; #ifdef DEV_NETMAP #include #endif /* DEV_NETMAP */ static driver_t vtnet_driver = { "vtnet", vtnet_methods, sizeof(struct vtnet_softc) }; static devclass_t vtnet_devclass; DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass, vtnet_modevent, 0); DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass, vtnet_modevent, 0); MODULE_VERSION(vtnet, 1); MODULE_DEPEND(vtnet, virtio, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(vtnet, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ static int vtnet_modevent(module_t mod, int type, void *unused) { int error = 0; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded++ == 0) vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr", sizeof(struct vtnet_tx_header), NULL, NULL, NULL, NULL, 0, 0); break; case MOD_QUIESCE: if (uma_zone_get_cur(vtnet_tx_header_zone) > 0) error = EBUSY; break; case MOD_UNLOAD: if (--loaded == 0) { uma_zdestroy(vtnet_tx_header_zone); vtnet_tx_header_zone = NULL; } break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } static int vtnet_probe(device_t dev) { if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK) return (ENXIO); device_set_desc(dev, "VirtIO Networking Adapter"); return (BUS_PROBE_DEFAULT); } static int vtnet_attach(device_t dev) { struct vtnet_softc *sc; int error; sc = device_get_softc(dev); sc->vtnet_dev = dev; /* Register our feature descriptions. */ virtio_set_feature_desc(dev, vtnet_feature_desc); VTNET_CORE_LOCK_INIT(sc); callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); vtnet_setup_sysctl(sc); vtnet_setup_features(sc); error = vtnet_alloc_rx_filters(sc); if (error) { device_printf(dev, "cannot allocate Rx filters\n"); goto fail; } error = vtnet_alloc_rxtx_queues(sc); if (error) { device_printf(dev, "cannot allocate queues\n"); goto fail; } error = vtnet_alloc_virtqueues(sc); if (error) { device_printf(dev, "cannot allocate virtqueues\n"); goto fail; } error = vtnet_setup_interface(sc); if (error) { device_printf(dev, "cannot setup interface\n"); goto fail; } error = virtio_setup_intr(dev, INTR_TYPE_NET); if (error) { device_printf(dev, "cannot setup virtqueue interrupts\n"); /* BMV: This will crash if during boot! */ ether_ifdetach(sc->vtnet_ifp); goto fail; } #ifdef DEV_NETMAP vtnet_netmap_attach(sc); #endif /* DEV_NETMAP */ vtnet_start_taskqueues(sc); fail: if (error) vtnet_detach(dev); return (error); } static int vtnet_detach(device_t dev) { struct vtnet_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->vtnet_ifp; if (device_is_attached(dev)) { VTNET_CORE_LOCK(sc); vtnet_stop(sc); VTNET_CORE_UNLOCK(sc); callout_drain(&sc->vtnet_tick_ch); vtnet_drain_taskqueues(sc); ether_ifdetach(ifp); } #ifdef DEV_NETMAP netmap_detach(ifp); #endif /* DEV_NETMAP */ vtnet_free_taskqueues(sc); if (sc->vtnet_vlan_attach != NULL) { EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach); sc->vtnet_vlan_attach = NULL; } if (sc->vtnet_vlan_detach != NULL) { EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach); sc->vtnet_vlan_detach = NULL; } ifmedia_removeall(&sc->vtnet_media); if (ifp != NULL) { if_free(ifp); sc->vtnet_ifp = NULL; } vtnet_free_rxtx_queues(sc); vtnet_free_rx_filters(sc); if (sc->vtnet_ctrl_vq != NULL) vtnet_free_ctrl_vq(sc); VTNET_CORE_LOCK_DESTROY(sc); return (0); } static int vtnet_suspend(device_t dev) { struct vtnet_softc *sc; sc = device_get_softc(dev); VTNET_CORE_LOCK(sc); vtnet_stop(sc); sc->vtnet_flags |= VTNET_FLAG_SUSPENDED; VTNET_CORE_UNLOCK(sc); return (0); } static int vtnet_resume(device_t dev) { struct vtnet_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->vtnet_ifp; VTNET_CORE_LOCK(sc); if (ifp->if_flags & IFF_UP) vtnet_init_locked(sc); sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED; VTNET_CORE_UNLOCK(sc); return (0); } static int vtnet_shutdown(device_t dev) { /* * Suspend already does all of what we need to * do here; we just never expect to be resumed. */ return (vtnet_suspend(dev)); } static int vtnet_attach_completed(device_t dev) { vtnet_attach_disable_promisc(device_get_softc(dev)); return (0); } static int vtnet_config_change(device_t dev) { struct vtnet_softc *sc; sc = device_get_softc(dev); VTNET_CORE_LOCK(sc); vtnet_update_link_status(sc); if (sc->vtnet_link_active != 0) vtnet_tx_start_all(sc); VTNET_CORE_UNLOCK(sc); return (0); } static void vtnet_negotiate_features(struct vtnet_softc *sc) { device_t dev; uint64_t mask, features; dev = sc->vtnet_dev; mask = 0; /* * TSO and LRO are only available when their corresponding checksum * offload feature is also negotiated. */ if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) { mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM; mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES; } if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) mask |= VTNET_TSO_FEATURES; if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) mask |= VTNET_LRO_FEATURES; #ifndef VTNET_LEGACY_TX if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) mask |= VIRTIO_NET_F_MQ; #else mask |= VIRTIO_NET_F_MQ; #endif features = VTNET_FEATURES & ~mask; sc->vtnet_features = virtio_negotiate_features(dev, features); if (virtio_with_feature(dev, VTNET_LRO_FEATURES) && virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) { /* * LRO without mergeable buffers requires special care. This * is not ideal because every receive buffer must be large * enough to hold the maximum TCP packet, the Ethernet header, * and the header. This requires up to 34 descriptors with * MCLBYTES clusters. If we do not have indirect descriptors, * LRO is disabled since the virtqueue will not contain very * many receive buffers. */ if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) { device_printf(dev, "LRO disabled due to both mergeable buffers and " "indirect descriptors not negotiated\n"); features &= ~VTNET_LRO_FEATURES; sc->vtnet_features = virtio_negotiate_features(dev, features); } else sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; } } static void vtnet_setup_features(struct vtnet_softc *sc) { device_t dev; dev = sc->vtnet_dev; vtnet_negotiate_features(sc); if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) sc->vtnet_flags |= VTNET_FLAG_INDIRECT; if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX; if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) { /* This feature should always be negotiated. */ sc->vtnet_flags |= VTNET_FLAG_MAC; } if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); } else sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS; else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS; else sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS; if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS; else sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) sc->vtnet_flags |= VTNET_FLAG_CTRL_RX; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; } if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) && sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); } else sc->vtnet_max_vq_pairs = 1; if (sc->vtnet_max_vq_pairs > 1) { /* * Limit the maximum number of queue pairs to the lower of * the number of CPUs and the configured maximum. * The actual number of queues that get used may be less. */ int max; max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) { if (max > mp_ncpus) max = mp_ncpus; if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX; if (max > 1) { sc->vtnet_requested_vq_pairs = max; sc->vtnet_flags |= VTNET_FLAG_MULTIQ; } } } } static int vtnet_init_rxq(struct vtnet_softc *sc, int id) { struct vtnet_rxq *rxq; rxq = &sc->vtnet_rxqs[id]; snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d", device_get_nameunit(sc->vtnet_dev), id); mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF); rxq->vtnrx_sc = sc; rxq->vtnrx_id = id; rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT); if (rxq->vtnrx_sg == NULL) return (ENOMEM); TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, taskqueue_thread_enqueue, &rxq->vtnrx_tq); return (rxq->vtnrx_tq == NULL ? ENOMEM : 0); } static int vtnet_init_txq(struct vtnet_softc *sc, int id) { struct vtnet_txq *txq; txq = &sc->vtnet_txqs[id]; snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d", device_get_nameunit(sc->vtnet_dev), id); mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF); txq->vtntx_sc = sc; txq->vtntx_id = id; txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT); if (txq->vtntx_sg == NULL) return (ENOMEM); #ifndef VTNET_LEGACY_TX txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF, M_NOWAIT, &txq->vtntx_mtx); if (txq->vtntx_br == NULL) return (ENOMEM); TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq); #endif TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq); txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT, taskqueue_thread_enqueue, &txq->vtntx_tq); if (txq->vtntx_tq == NULL) return (ENOMEM); return (0); } static int vtnet_alloc_rxtx_queues(struct vtnet_softc *sc) { int i, npairs, error; npairs = sc->vtnet_max_vq_pairs; sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF, M_NOWAIT | M_ZERO); sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL) return (ENOMEM); for (i = 0; i < npairs; i++) { error = vtnet_init_rxq(sc, i); if (error) return (error); error = vtnet_init_txq(sc, i); if (error) return (error); } vtnet_setup_queue_sysctl(sc); return (0); } static void vtnet_destroy_rxq(struct vtnet_rxq *rxq) { rxq->vtnrx_sc = NULL; rxq->vtnrx_id = -1; if (rxq->vtnrx_sg != NULL) { sglist_free(rxq->vtnrx_sg); rxq->vtnrx_sg = NULL; } if (mtx_initialized(&rxq->vtnrx_mtx) != 0) mtx_destroy(&rxq->vtnrx_mtx); } static void vtnet_destroy_txq(struct vtnet_txq *txq) { txq->vtntx_sc = NULL; txq->vtntx_id = -1; if (txq->vtntx_sg != NULL) { sglist_free(txq->vtntx_sg); txq->vtntx_sg = NULL; } #ifndef VTNET_LEGACY_TX if (txq->vtntx_br != NULL) { buf_ring_free(txq->vtntx_br, M_DEVBUF); txq->vtntx_br = NULL; } #endif if (mtx_initialized(&txq->vtntx_mtx) != 0) mtx_destroy(&txq->vtntx_mtx); } static void vtnet_free_rxtx_queues(struct vtnet_softc *sc) { int i; if (sc->vtnet_rxqs != NULL) { for (i = 0; i < sc->vtnet_max_vq_pairs; i++) vtnet_destroy_rxq(&sc->vtnet_rxqs[i]); free(sc->vtnet_rxqs, M_DEVBUF); sc->vtnet_rxqs = NULL; } if (sc->vtnet_txqs != NULL) { for (i = 0; i < sc->vtnet_max_vq_pairs; i++) vtnet_destroy_txq(&sc->vtnet_txqs[i]); free(sc->vtnet_txqs, M_DEVBUF); sc->vtnet_txqs = NULL; } } static int vtnet_alloc_rx_filters(struct vtnet_softc *sc) { if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter), M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vtnet_mac_filter == NULL) return (ENOMEM); } if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) * VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vtnet_vlan_filter == NULL) return (ENOMEM); } return (0); } static void vtnet_free_rx_filters(struct vtnet_softc *sc) { if (sc->vtnet_mac_filter != NULL) { free(sc->vtnet_mac_filter, M_DEVBUF); sc->vtnet_mac_filter = NULL; } if (sc->vtnet_vlan_filter != NULL) { free(sc->vtnet_vlan_filter, M_DEVBUF); sc->vtnet_vlan_filter = NULL; } } static int vtnet_alloc_virtqueues(struct vtnet_softc *sc) { device_t dev; struct vq_alloc_info *info; struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i, idx, flags, nvqs, error; dev = sc->vtnet_dev; flags = 0; nvqs = sc->vtnet_max_vq_pairs * 2; if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) nvqs++; info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT); if (info == NULL) return (ENOMEM); for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) { rxq = &sc->vtnet_rxqs[i]; VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id); txq = &sc->vtnet_txqs[i]; VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs, vtnet_tx_vq_intr, txq, &txq->vtntx_vq, "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id); } if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); } /* * Enable interrupt binding if this is multiqueue. This only matters * when per-vq MSIX is available. */ if (sc->vtnet_flags & VTNET_FLAG_MULTIQ) flags |= 0; error = virtio_alloc_virtqueues(dev, flags, nvqs, info); free(info, M_TEMP); return (error); } static int vtnet_setup_interface(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "cannot allocate ifnet structure\n"); return (ENOSPC); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_baudrate = IF_Gbps(10); /* Approx. */ ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = vtnet_init; ifp->if_ioctl = vtnet_ioctl; ifp->if_get_counter = vtnet_get_counter; #ifndef VTNET_LEGACY_TX ifp->if_transmit = vtnet_txq_mq_start; ifp->if_qflush = vtnet_qflush; #else struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq; ifp->if_start = vtnet_start; IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1); ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1; IFQ_SET_READY(&ifp->if_snd); #endif ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd, vtnet_ifmedia_sts); ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL); ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE); /* Read (or generate) the MAC address for the adapter. */ vtnet_get_hwaddr(sc); ether_ifattach(ifp, sc->vtnet_hwaddr); if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) ifp->if_capabilities |= IFCAP_LINKSTATE; /* Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU; if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6; if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) { ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; } else { if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) ifp->if_capabilities |= IFCAP_TSO4; if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) ifp->if_capabilities |= IFCAP_TSO6; if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; } if (ifp->if_capabilities & IFCAP_TSO) ifp->if_capabilities |= IFCAP_VLAN_HWTSO; } if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) { ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) ifp->if_capabilities |= IFCAP_LRO; } if (ifp->if_capabilities & IFCAP_HWCSUM) { /* * VirtIO does not support VLAN tagging, but we can fake * it by inserting and removing the 802.1Q header during * transmit and receive. We are then able to do checksum * offloading of VLAN frames. */ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; } ifp->if_capenable = ifp->if_capabilities; /* * Capabilities after here are not enabled by default. */ if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST); sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); } vtnet_set_rx_process_limit(sc); vtnet_set_tx_intr_threshold(sc); NETDUMP_SET(ifp, vtnet); return (0); } static int vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu) { struct ifnet *ifp; int frame_size, clsize; ifp = sc->vtnet_ifp; if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU) return (EINVAL); frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) + new_mtu; /* * Based on the new MTU (and hence frame size) determine which * cluster size is most appropriate for the receive queues. */ if (frame_size <= MCLBYTES) { clsize = MCLBYTES; } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { /* Avoid going past 9K jumbos. */ if (frame_size > MJUM9BYTES) return (EINVAL); clsize = MJUM9BYTES; } else clsize = MJUMPAGESIZE; ifp->if_mtu = new_mtu; sc->vtnet_rx_new_clsize = clsize; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vtnet_init_locked(sc); } return (0); } static int vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct vtnet_softc *sc; struct ifreq *ifr; int reinit, mask, error; sc = ifp->if_softc; ifr = (struct ifreq *) data; error = 0; switch (cmd) { case SIOCSIFMTU: if (ifp->if_mtu != ifr->ifr_mtu) { VTNET_CORE_LOCK(sc); error = vtnet_change_mtu(sc, ifr->ifr_mtu); VTNET_CORE_UNLOCK(sc); } break; case SIOCSIFFLAGS: VTNET_CORE_LOCK(sc); if ((ifp->if_flags & IFF_UP) == 0) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) vtnet_stop(sc); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((ifp->if_flags ^ sc->vtnet_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) vtnet_rx_filter(sc); else { ifp->if_flags |= IFF_PROMISC; if ((ifp->if_flags ^ sc->vtnet_if_flags) & IFF_ALLMULTI) error = ENOTSUP; } } } else vtnet_init_locked(sc); if (error == 0) sc->vtnet_if_flags = ifp->if_flags; VTNET_CORE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) break; VTNET_CORE_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) vtnet_rx_filter_mac(sc); VTNET_CORE_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd); break; case SIOCSIFCAP: VTNET_CORE_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) ifp->if_capenable ^= IFCAP_TXCSUM; if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO | IFCAP_VLAN_HWFILTER)) { /* These Rx features require us to renegotiate. */ reinit = 1; if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; } else reinit = 0; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vtnet_init_locked(sc); } VTNET_CORE_UNLOCK(sc); VLAN_CAPABILITIES(ifp); break; default: error = ether_ioctl(ifp, cmd, data); break; } VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc); return (error); } static int vtnet_rxq_populate(struct vtnet_rxq *rxq) { struct virtqueue *vq; int nbufs, error; vq = rxq->vtnrx_vq; error = ENOSPC; for (nbufs = 0; !virtqueue_full(vq); nbufs++) { error = vtnet_rxq_new_buf(rxq); if (error) break; } if (nbufs > 0) { virtqueue_notify(vq); /* * EMSGSIZE signifies the virtqueue did not have enough * entries available to hold the last mbuf. This is not * an error. */ if (error == EMSGSIZE) error = 0; } return (error); } static void vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq) { struct virtqueue *vq; struct mbuf *m; int last; vq = rxq->vtnrx_vq; last = 0; while ((m = virtqueue_drain(vq, &last)) != NULL) m_freem(m); KASSERT(virtqueue_empty(vq), ("%s: mbufs remaining in rx queue %p", __func__, rxq)); } static struct mbuf * vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) { struct mbuf *m_head, *m_tail, *m; int i, clsize; clsize = sc->vtnet_rx_clsize; KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs)); m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize); if (m_head == NULL) goto fail; m_head->m_len = clsize; m_tail = m_head; /* Allocate the rest of the chain. */ for (i = 1; i < nbufs; i++) { m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize); if (m == NULL) goto fail; m->m_len = clsize; m_tail->m_next = m; m_tail = m; } if (m_tailp != NULL) *m_tailp = m_tail; return (m_head); fail: sc->vtnet_stats.mbuf_alloc_failed++; m_freem(m_head); return (NULL); } /* * Slow path for when LRO without mergeable buffers is negotiated. */ static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0, int len0) { struct vtnet_softc *sc; struct mbuf *m, *m_prev; struct mbuf *m_new, *m_tail; int len, clsize, nreplace, error; sc = rxq->vtnrx_sc; clsize = sc->vtnet_rx_clsize; m_prev = NULL; m_tail = NULL; nreplace = 0; m = m0; len = len0; /* * Since these mbuf chains are so large, we avoid allocating an * entire replacement chain if possible. When the received frame * did not consume the entire chain, the unused mbufs are moved * to the replacement chain. */ while (len > 0) { /* * Something is seriously wrong if we received a frame * larger than the chain. Drop it. */ if (m == NULL) { sc->vtnet_stats.rx_frame_too_large++; return (EMSGSIZE); } /* We always allocate the same cluster size. */ KASSERT(m->m_len == clsize, ("%s: mbuf size %d is not the cluster size %d", __func__, m->m_len, clsize)); m->m_len = MIN(m->m_len, len); len -= m->m_len; m_prev = m; m = m->m_next; nreplace++; } KASSERT(nreplace <= sc->vtnet_rx_nmbufs, ("%s: too many replacement mbufs %d max %d", __func__, nreplace, sc->vtnet_rx_nmbufs)); m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); if (m_new == NULL) { m_prev->m_len = clsize; return (ENOBUFS); } /* * Move any unused mbufs from the received chain onto the end * of the new chain. */ if (m_prev->m_next != NULL) { m_tail->m_next = m_prev->m_next; m_prev->m_next = NULL; } error = vtnet_rxq_enqueue_buf(rxq, m_new); if (error) { /* * BAD! We could not enqueue the replacement mbuf chain. We * must restore the m0 chain to the original state if it was * modified so we can subsequently discard it. * * NOTE: The replacement is suppose to be an identical copy * to the one just dequeued so this is an unexpected error. */ sc->vtnet_stats.rx_enq_replacement_failed++; if (m_tail->m_next != NULL) { m_prev->m_next = m_tail->m_next; m_tail->m_next = NULL; } m_prev->m_len = clsize; m_freem(m_new); } return (error); } static int vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) { struct vtnet_softc *sc; struct mbuf *m_new; int error; sc = rxq->vtnrx_sc; KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, ("%s: chained mbuf without LRO_NOMRG", __func__)); if (m->m_next == NULL) { /* Fast-path for the common case of just one mbuf. */ if (m->m_len < len) return (EINVAL); m_new = vtnet_rx_alloc_buf(sc, 1, NULL); if (m_new == NULL) return (ENOBUFS); error = vtnet_rxq_enqueue_buf(rxq, m_new); if (error) { /* * The new mbuf is suppose to be an identical * copy of the one just dequeued so this is an * unexpected error. */ m_freem(m_new); sc->vtnet_stats.rx_enq_replacement_failed++; } else m->m_len = len; } else error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len); return (error); } static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) { struct vtnet_softc *sc; struct sglist *sg; struct vtnet_rx_header *rxhdr; uint8_t *mdata; int offset, error; sc = rxq->vtnrx_sc; sg = rxq->vtnrx_sg; mdata = mtod(m, uint8_t *); VTNET_RXQ_LOCK_ASSERT(rxq); KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, ("%s: chained mbuf without LRO_NOMRG", __func__)); KASSERT(m->m_len == sc->vtnet_rx_clsize, ("%s: unexpected cluster size %d/%d", __func__, m->m_len, sc->vtnet_rx_clsize)); sglist_reset(sg); if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); rxhdr = (struct vtnet_rx_header *) mdata; sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); offset = sizeof(struct vtnet_rx_header); } else offset = 0; sglist_append(sg, mdata + offset, m->m_len - offset); if (m->m_next != NULL) { error = sglist_append_mbuf(sg, m->m_next); MPASS(error == 0); } error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg); return (error); } static int vtnet_rxq_new_buf(struct vtnet_rxq *rxq) { struct vtnet_softc *sc; struct mbuf *m; int error; sc = rxq->vtnrx_sc; m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL); if (m == NULL) return (ENOBUFS); error = vtnet_rxq_enqueue_buf(rxq, m); if (error) m_freem(m); return (error); } /* * Use the checksum offset in the VirtIO header to set the * correct CSUM_* flags. */ static int vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; #if defined(INET) || defined(INET6) int offset = hdr->csum_start + hdr->csum_offset; #endif sc = rxq->vtnrx_sc; /* Only do a basic sanity check on the offset. */ switch (eth_type) { #if defined(INET) case ETHERTYPE_IP: if (__predict_false(offset < ip_start + sizeof(struct ip))) return (1); break; #endif #if defined(INET6) case ETHERTYPE_IPV6: if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) return (1); break; #endif default: sc->vtnet_stats.rx_csum_bad_ethtype++; return (1); } /* * Use the offset to determine the appropriate CSUM_* flags. This is * a bit dirty, but we can get by with it since the checksum offsets * happen to be different. We assume the host host does not do IPv4 * header checksum offloading. */ switch (hdr->csum_offset) { case offsetof(struct udphdr, uh_sum): case offsetof(struct tcphdr, th_sum): m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; case offsetof(struct sctphdr, checksum): m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; break; default: sc->vtnet_stats.rx_csum_bad_offset++; return (1); } return (0); } static int vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; int offset, proto; sc = rxq->vtnrx_sc; switch (eth_type) { #if defined(INET) case ETHERTYPE_IP: { struct ip *ip; if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) return (1); ip = (struct ip *)(m->m_data + ip_start); proto = ip->ip_p; offset = ip_start + (ip->ip_hl << 2); break; } #endif #if defined(INET6) case ETHERTYPE_IPV6: if (__predict_false(m->m_len < ip_start + sizeof(struct ip6_hdr))) return (1); offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); if (__predict_false(offset < 0)) return (1); break; #endif default: sc->vtnet_stats.rx_csum_bad_ethtype++; return (1); } switch (proto) { case IPPROTO_TCP: if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) return (1); m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; case IPPROTO_UDP: if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) return (1); m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; case IPPROTO_SCTP: if (__predict_false(m->m_len < offset + sizeof(struct sctphdr))) return (1); m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; break; default: /* * For the remaining protocols, FreeBSD does not support * checksum offloading, so the checksum will be recomputed. */ #if 0 if_printf(sc->vtnet_ifp, "cksum offload of unsupported " "protocol eth_type=%#x proto=%d csum_start=%d " "csum_offset=%d\n", __func__, eth_type, proto, hdr->csum_start, hdr->csum_offset); #endif break; } return (0); } /* * Set the appropriate CSUM_* flags. Unfortunately, the information * provided is not directly useful to us. The VirtIO header gives the * offset of the checksum, which is all Linux needs, but this is not * how FreeBSD does things. We are forced to peek inside the packet * a bit. * * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD * could accept the offsets and let the stack figure it out. */ static int vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct ether_header *eh; struct ether_vlan_header *evh; uint16_t eth_type; int offset, error; eh = mtod(m, struct ether_header *); eth_type = ntohs(eh->ether_type); if (eth_type == ETHERTYPE_VLAN) { /* BMV: We should handle nested VLAN tags too. */ evh = mtod(m, struct ether_vlan_header *); eth_type = ntohs(evh->evl_proto); offset = sizeof(struct ether_vlan_header); } else offset = sizeof(struct ether_header); if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr); else error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr); return (error); } static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs) { struct mbuf *m; while (--nbufs > 0) { m = virtqueue_dequeue(rxq->vtnrx_vq, NULL); if (m == NULL) break; vtnet_rxq_discard_buf(rxq, m); } } static void vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m) { int error; /* * Requeue the discarded mbuf. This should always be successful * since it was just dequeued. */ error = vtnet_rxq_enqueue_buf(rxq, m); KASSERT(error == 0, ("%s: cannot requeue discarded mbuf %d", __func__, error)); } static int vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) { struct vtnet_softc *sc; struct virtqueue *vq; struct mbuf *m, *m_tail; int len; sc = rxq->vtnrx_sc; vq = rxq->vtnrx_vq; m_tail = m_head; while (--nbufs > 0) { m = virtqueue_dequeue(vq, &len); if (m == NULL) { rxq->vtnrx_stats.vrxs_ierrors++; goto fail; } if (vtnet_rxq_new_buf(rxq) != 0) { rxq->vtnrx_stats.vrxs_iqdrops++; vtnet_rxq_discard_buf(rxq, m); if (nbufs > 1) vtnet_rxq_discard_merged_bufs(rxq, nbufs); goto fail; } if (m->m_len < len) len = m->m_len; m->m_len = len; m->m_flags &= ~M_PKTHDR; m_head->m_pkthdr.len += len; m_tail->m_next = m; m_tail = m; } return (0); fail: sc->vtnet_stats.rx_mergeable_failed++; m_freem(m_head); return (1); } static void vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; struct ifnet *ifp; struct ether_header *eh; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { vtnet_vlan_tag_remove(m); /* * With the 802.1Q header removed, update the * checksum starting location accordingly. */ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) hdr->csum_start -= ETHER_VLAN_ENCAP_LEN; } } m->m_pkthdr.flowid = rxq->vtnrx_id; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); /* * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum * distinction that Linux does. Need to reevaluate if performing * offloading for the NEEDS_CSUM case is really appropriate. */ if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) { if (vtnet_rxq_csum(rxq, m, hdr) == 0) rxq->vtnrx_stats.vrxs_csum++; else rxq->vtnrx_stats.vrxs_csum_failed++; } rxq->vtnrx_stats.vrxs_ipackets++; rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; VTNET_RXQ_UNLOCK(rxq); (*ifp->if_input)(ifp, m); VTNET_RXQ_LOCK(rxq); } static int vtnet_rxq_eof(struct vtnet_rxq *rxq) { struct virtio_net_hdr lhdr, *hdr; struct vtnet_softc *sc; struct ifnet *ifp; struct virtqueue *vq; struct mbuf *m; struct virtio_net_hdr_mrg_rxbuf *mhdr; int len, deq, nbufs, adjsz, count; sc = rxq->vtnrx_sc; vq = rxq->vtnrx_vq; ifp = sc->vtnet_ifp; hdr = &lhdr; deq = 0; count = sc->vtnet_rx_process_limit; VTNET_RXQ_LOCK_ASSERT(rxq); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, 0, &deq)) { return (FALSE); } #endif /* DEV_NETMAP */ while (count-- > 0) { m = virtqueue_dequeue(vq, &len); if (m == NULL) break; deq++; if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { rxq->vtnrx_stats.vrxs_ierrors++; vtnet_rxq_discard_buf(rxq, m); continue; } if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { nbufs = 1; adjsz = sizeof(struct vtnet_rx_header); /* * Account for our pad inserted between the header * and the actual start of the frame. */ len += VTNET_RX_HEADER_PAD; } else { mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *); nbufs = mhdr->num_buffers; adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); } if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { rxq->vtnrx_stats.vrxs_iqdrops++; vtnet_rxq_discard_buf(rxq, m); if (nbufs > 1) vtnet_rxq_discard_merged_bufs(rxq, nbufs); continue; } m->m_pkthdr.len = len; m->m_pkthdr.rcvif = ifp; m->m_pkthdr.csum_flags = 0; if (nbufs > 1) { /* Dequeue the rest of chain. */ if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0) continue; } /* * Save copy of header before we strip it. For both mergeable * and non-mergeable, the header is at the beginning of the * mbuf data. We no longer need num_buffers, so always use a * regular header. * * BMV: Is this memcpy() expensive? We know the mbuf data is * still valid even after the m_adj(). */ memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr)); m_adj(m, adjsz); vtnet_rxq_input(rxq, m, hdr); /* Must recheck after dropping the Rx lock. */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } if (deq > 0) virtqueue_notify(vq); return (count > 0 ? 0 : EAGAIN); } static void vtnet_rx_vq_intr(void *xrxq) { struct vtnet_softc *sc; struct vtnet_rxq *rxq; struct ifnet *ifp; int tries, more; rxq = xrxq; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; tries = 0; if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) { /* * Ignore this interrupt. Either this is a spurious interrupt * or multiqueue without per-VQ MSIX so every queue needs to * be polled (a brain dead configuration we could try harder * to avoid). */ vtnet_rxq_disable_intr(rxq); return; } VTNET_RXQ_LOCK(rxq); again: if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_RXQ_UNLOCK(rxq); return; } more = vtnet_rxq_eof(rxq); if (more || vtnet_rxq_enable_intr(rxq) != 0) { if (!more) vtnet_rxq_disable_intr(rxq); /* * This is an occasional condition or race (when !more), * so retry a few times before scheduling the taskqueue. */ if (tries++ < VTNET_INTR_DISABLE_RETRIES) goto again; VTNET_RXQ_UNLOCK(rxq); rxq->vtnrx_stats.vrxs_rescheduled++; taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); } else VTNET_RXQ_UNLOCK(rxq); } static void vtnet_rxq_tq_intr(void *xrxq, int pending) { struct vtnet_softc *sc; struct vtnet_rxq *rxq; struct ifnet *ifp; int more; rxq = xrxq; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; VTNET_RXQ_LOCK(rxq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_RXQ_UNLOCK(rxq); return; } more = vtnet_rxq_eof(rxq); if (more || vtnet_rxq_enable_intr(rxq) != 0) { if (!more) vtnet_rxq_disable_intr(rxq); rxq->vtnrx_stats.vrxs_rescheduled++; taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); } VTNET_RXQ_UNLOCK(rxq); } static int vtnet_txq_below_threshold(struct vtnet_txq *txq) { struct vtnet_softc *sc; struct virtqueue *vq; sc = txq->vtntx_sc; vq = txq->vtntx_vq; return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh); } static int vtnet_txq_notify(struct vtnet_txq *txq) { struct virtqueue *vq; vq = txq->vtntx_vq; txq->vtntx_watchdog = VTNET_TX_TIMEOUT; virtqueue_notify(vq); if (vtnet_txq_enable_intr(txq) == 0) return (0); /* * Drain frames that were completed since last checked. If this * causes the queue to go above the threshold, the caller should * continue transmitting. */ if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) { virtqueue_disable_intr(vq); return (1); } return (0); } static void vtnet_txq_free_mbufs(struct vtnet_txq *txq) { struct virtqueue *vq; struct vtnet_tx_header *txhdr; int last; vq = txq->vtntx_vq; last = 0; while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { m_freem(txhdr->vth_mbuf); uma_zfree(vtnet_tx_header_zone, txhdr); } KASSERT(virtqueue_empty(vq), ("%s: mbufs remaining in tx queue %p", __func__, txq)); } /* * BMV: Much of this can go away once we finally have offsets in * the mbuf packet header. Bug andre@. */ static int vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype, int *proto, int *start) { struct vtnet_softc *sc; struct ether_vlan_header *evh; int offset; sc = txq->vtntx_sc; evh = mtod(m, struct ether_vlan_header *); if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { /* BMV: We should handle nested VLAN tags too. */ *etype = ntohs(evh->evl_proto); offset = sizeof(struct ether_vlan_header); } else { *etype = ntohs(evh->evl_encap_proto); offset = sizeof(struct ether_header); } switch (*etype) { #if defined(INET) case ETHERTYPE_IP: { struct ip *ip, iphdr; if (__predict_false(m->m_len < offset + sizeof(struct ip))) { m_copydata(m, offset, sizeof(struct ip), (caddr_t) &iphdr); ip = &iphdr; } else ip = (struct ip *)(m->m_data + offset); *proto = ip->ip_p; *start = offset + (ip->ip_hl << 2); break; } #endif #if defined(INET6) case ETHERTYPE_IPV6: *proto = -1; *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); /* Assert the network stack sent us a valid packet. */ KASSERT(*start > offset, ("%s: mbuf %p start %d offset %d proto %d", __func__, m, *start, offset, *proto)); break; #endif default: sc->vtnet_stats.tx_csum_bad_ethtype++; return (EINVAL); } return (0); } static int vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, int offset, struct virtio_net_hdr *hdr) { static struct timeval lastecn; static int curecn; struct vtnet_softc *sc; struct tcphdr *tcp, tcphdr; sc = txq->vtntx_sc; if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); tcp = &tcphdr; } else tcp = (struct tcphdr *)(m->m_data + offset); hdr->hdr_len = offset + (tcp->th_off << 2); hdr->gso_size = m->m_pkthdr.tso_segsz; hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : VIRTIO_NET_HDR_GSO_TCPV6; if (tcp->th_flags & TH_CWR) { /* * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, * ECN support is not on a per-interface basis, but globally via * the net.inet.tcp.ecn.enable sysctl knob. The default is off. */ if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { if (ppsratecheck(&lastecn, &curecn, 1)) if_printf(sc->vtnet_ifp, "TSO with ECN not negotiated with host\n"); return (ENOTSUP); } hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; } txq->vtntx_stats.vtxs_tso++; return (0); } static struct mbuf * vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; int flags, etype, csum_start, proto, error; sc = txq->vtntx_sc; flags = m->m_pkthdr.csum_flags; error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start); if (error) goto drop; if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) || (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) { /* * We could compare the IP protocol vs the CSUM_ flag too, * but that really should not be necessary. */ hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; hdr->csum_start = csum_start; hdr->csum_offset = m->m_pkthdr.csum_data; txq->vtntx_stats.vtxs_csum++; } if (flags & CSUM_TSO) { if (__predict_false(proto != IPPROTO_TCP)) { /* Likely failed to correctly parse the mbuf. */ sc->vtnet_stats.tx_tso_not_tcp++; goto drop; } KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, ("%s: mbuf %p TSO without checksum offload %#x", __func__, m, flags)); error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); if (error) goto drop; } return (m); drop: m_freem(m); return (NULL); } static int vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, struct vtnet_tx_header *txhdr) { struct vtnet_softc *sc; struct virtqueue *vq; struct sglist *sg; struct mbuf *m; int error; sc = txq->vtntx_sc; vq = txq->vtntx_vq; sg = txq->vtntx_sg; m = *m_head; sglist_reset(sg); error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); KASSERT(error == 0 && sg->sg_nseg == 1, ("%s: error %d adding header to sglist", __func__, error)); error = sglist_append_mbuf(sg, m); if (error) { m = m_defrag(m, M_NOWAIT); if (m == NULL) goto fail; *m_head = m; sc->vtnet_stats.tx_defragged++; error = sglist_append_mbuf(sg, m); if (error) goto fail; } txhdr->vth_mbuf = m; error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); return (error); fail: sc->vtnet_stats.tx_defrag_failed++; m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } static int vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags) { struct vtnet_tx_header *txhdr; struct virtio_net_hdr *hdr; struct mbuf *m; int error; m = *m_head; M_ASSERTPKTHDR(m); txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO); if (txhdr == NULL) { m_freem(m); *m_head = NULL; return (ENOMEM); } /* * Always use the non-mergeable header, regardless if the feature * was negotiated. For transmit, num_buffers is always zero. The * vtnet_hdr_size is used to enqueue the correct header size. */ hdr = &txhdr->vth_uhdr.hdr; if (m->m_flags & M_VLANTAG) { m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); if ((*m_head = m) == NULL) { error = ENOBUFS; goto fail; } m->m_flags &= ~M_VLANTAG; } if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) { m = vtnet_txq_offload(txq, m, hdr); if ((*m_head = m) == NULL) { error = ENOBUFS; goto fail; } } error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); if (error == 0) return (0); fail: uma_zfree(vtnet_tx_header_zone, txhdr); return (error); } #ifdef VTNET_LEGACY_TX static void vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp) { struct vtnet_softc *sc; struct virtqueue *vq; struct mbuf *m0; int tries, enq; sc = txq->vtntx_sc; vq = txq->vtntx_vq; tries = 0; VTNET_TXQ_LOCK_ASSERT(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->vtnet_link_active == 0) return; vtnet_txq_eof(txq); again: enq = 0; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (virtqueue_full(vq)) break; IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) break; if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) { if (m0 != NULL) IFQ_DRV_PREPEND(&ifp->if_snd, m0); break; } enq++; ETHER_BPF_MTAP(ifp, m0); } if (enq > 0 && vtnet_txq_notify(txq) != 0) { if (tries++ < VTNET_NOTIFY_RETRIES) goto again; txq->vtntx_stats.vtxs_rescheduled++; taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); } } static void vtnet_start(struct ifnet *ifp) { struct vtnet_softc *sc; struct vtnet_txq *txq; sc = ifp->if_softc; txq = &sc->vtnet_txqs[0]; VTNET_TXQ_LOCK(txq); vtnet_start_locked(txq, ifp); VTNET_TXQ_UNLOCK(txq); } #else /* !VTNET_LEGACY_TX */ static int vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) { struct vtnet_softc *sc; struct virtqueue *vq; struct buf_ring *br; struct ifnet *ifp; int enq, tries, error; sc = txq->vtntx_sc; vq = txq->vtntx_vq; br = txq->vtntx_br; ifp = sc->vtnet_ifp; tries = 0; error = 0; VTNET_TXQ_LOCK_ASSERT(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->vtnet_link_active == 0) { if (m != NULL) error = drbr_enqueue(ifp, br, m); return (error); } if (m != NULL) { error = drbr_enqueue(ifp, br, m); if (error) return (error); } vtnet_txq_eof(txq); again: enq = 0; while ((m = drbr_peek(ifp, br)) != NULL) { if (virtqueue_full(vq)) { drbr_putback(ifp, br, m); break; } if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) { if (m != NULL) drbr_putback(ifp, br, m); else drbr_advance(ifp, br); break; } drbr_advance(ifp, br); enq++; ETHER_BPF_MTAP(ifp, m); } if (enq > 0 && vtnet_txq_notify(txq) != 0) { if (tries++ < VTNET_NOTIFY_RETRIES) goto again; txq->vtntx_stats.vtxs_rescheduled++; taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); } return (0); } static int vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m) { struct vtnet_softc *sc; struct vtnet_txq *txq; int i, npairs, error; sc = ifp->if_softc; npairs = sc->vtnet_act_vq_pairs; /* check if flowid is set */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % npairs; else i = curcpu % npairs; txq = &sc->vtnet_txqs[i]; if (VTNET_TXQ_TRYLOCK(txq) != 0) { error = vtnet_txq_mq_start_locked(txq, m); VTNET_TXQ_UNLOCK(txq); } else { error = drbr_enqueue(ifp, txq->vtntx_br, m); taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask); } return (error); } static void vtnet_txq_tq_deferred(void *xtxq, int pending) { struct vtnet_softc *sc; struct vtnet_txq *txq; txq = xtxq; sc = txq->vtntx_sc; VTNET_TXQ_LOCK(txq); if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br)) vtnet_txq_mq_start_locked(txq, NULL); VTNET_TXQ_UNLOCK(txq); } #endif /* VTNET_LEGACY_TX */ static void vtnet_txq_start(struct vtnet_txq *txq) { struct vtnet_softc *sc; struct ifnet *ifp; sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; #ifdef VTNET_LEGACY_TX if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) vtnet_start_locked(txq, ifp); #else if (!drbr_empty(ifp, txq->vtntx_br)) vtnet_txq_mq_start_locked(txq, NULL); #endif } static void vtnet_txq_tq_intr(void *xtxq, int pending) { struct vtnet_softc *sc; struct vtnet_txq *txq; struct ifnet *ifp; txq = xtxq; sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; VTNET_TXQ_LOCK(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_TXQ_UNLOCK(txq); return; } vtnet_txq_eof(txq); vtnet_txq_start(txq); VTNET_TXQ_UNLOCK(txq); } static int vtnet_txq_eof(struct vtnet_txq *txq) { struct virtqueue *vq; struct vtnet_tx_header *txhdr; struct mbuf *m; int deq; vq = txq->vtntx_vq; deq = 0; VTNET_TXQ_LOCK_ASSERT(txq); #ifdef DEV_NETMAP if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) { virtqueue_disable_intr(vq); // XXX luigi return 0; // XXX or 1 ? } #endif /* DEV_NETMAP */ while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { m = txhdr->vth_mbuf; deq++; txq->vtntx_stats.vtxs_opackets++; txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len; if (m->m_flags & M_MCAST) txq->vtntx_stats.vtxs_omcasts++; m_freem(m); uma_zfree(vtnet_tx_header_zone, txhdr); } if (virtqueue_empty(vq)) txq->vtntx_watchdog = 0; return (deq); } static void vtnet_tx_vq_intr(void *xtxq) { struct vtnet_softc *sc; struct vtnet_txq *txq; struct ifnet *ifp; txq = xtxq; sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) { /* * Ignore this interrupt. Either this is a spurious interrupt * or multiqueue without per-VQ MSIX so every queue needs to * be polled (a brain dead configuration we could try harder * to avoid). */ vtnet_txq_disable_intr(txq); return; } VTNET_TXQ_LOCK(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_TXQ_UNLOCK(txq); return; } vtnet_txq_eof(txq); vtnet_txq_start(txq); VTNET_TXQ_UNLOCK(txq); } static void vtnet_tx_start_all(struct vtnet_softc *sc) { struct vtnet_txq *txq; int i; VTNET_CORE_LOCK_ASSERT(sc); for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; VTNET_TXQ_LOCK(txq); vtnet_txq_start(txq); VTNET_TXQ_UNLOCK(txq); } } #ifndef VTNET_LEGACY_TX static void vtnet_qflush(struct ifnet *ifp) { struct vtnet_softc *sc; struct vtnet_txq *txq; struct mbuf *m; int i; sc = ifp->if_softc; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; VTNET_TXQ_LOCK(txq); while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL) m_freem(m); VTNET_TXQ_UNLOCK(txq); } if_qflush(ifp); } #endif static int vtnet_watchdog(struct vtnet_txq *txq) { struct ifnet *ifp; ifp = txq->vtntx_sc->vtnet_ifp; VTNET_TXQ_LOCK(txq); if (txq->vtntx_watchdog == 1) { /* * Only drain completed frames if the watchdog is about to * expire. If any frames were drained, there may be enough * free descriptors now available to transmit queued frames. * In that case, the timer will immediately be decremented * below, but the timeout is generous enough that should not * be a problem. */ if (vtnet_txq_eof(txq) != 0) vtnet_txq_start(txq); } if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) { VTNET_TXQ_UNLOCK(txq); return (0); } VTNET_TXQ_UNLOCK(txq); if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id); return (1); } static void vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc, struct vtnet_txq_stats *txacc) { bzero(rxacc, sizeof(struct vtnet_rxq_stats)); bzero(txacc, sizeof(struct vtnet_txq_stats)); for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) { struct vtnet_rxq_stats *rxst; struct vtnet_txq_stats *txst; rxst = &sc->vtnet_rxqs[i].vtnrx_stats; rxacc->vrxs_ipackets += rxst->vrxs_ipackets; rxacc->vrxs_ibytes += rxst->vrxs_ibytes; rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops; rxacc->vrxs_csum += rxst->vrxs_csum; rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed; rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled; txst = &sc->vtnet_txqs[i].vtntx_stats; txacc->vtxs_opackets += txst->vtxs_opackets; txacc->vtxs_obytes += txst->vtxs_obytes; txacc->vtxs_csum += txst->vtxs_csum; txacc->vtxs_tso += txst->vtxs_tso; txacc->vtxs_rescheduled += txst->vtxs_rescheduled; } } static uint64_t vtnet_get_counter(if_t ifp, ift_counter cnt) { struct vtnet_softc *sc; struct vtnet_rxq_stats rxaccum; struct vtnet_txq_stats txaccum; sc = if_getsoftc(ifp); vtnet_accum_stats(sc, &rxaccum, &txaccum); switch (cnt) { case IFCOUNTER_IPACKETS: return (rxaccum.vrxs_ipackets); case IFCOUNTER_IQDROPS: return (rxaccum.vrxs_iqdrops); case IFCOUNTER_IERRORS: return (rxaccum.vrxs_ierrors); case IFCOUNTER_OPACKETS: return (txaccum.vtxs_opackets); #ifndef VTNET_LEGACY_TX case IFCOUNTER_OBYTES: return (txaccum.vtxs_obytes); case IFCOUNTER_OMCASTS: return (txaccum.vtxs_omcasts); #endif default: return (if_get_counter_default(ifp, cnt)); } } static void vtnet_tick(void *xsc) { struct vtnet_softc *sc; struct ifnet *ifp; int i, timedout; sc = xsc; ifp = sc->vtnet_ifp; timedout = 0; VTNET_CORE_LOCK_ASSERT(sc); for (i = 0; i < sc->vtnet_act_vq_pairs; i++) timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]); if (timedout != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vtnet_init_locked(sc); } else callout_schedule(&sc->vtnet_tick_ch, hz); } static void vtnet_start_taskqueues(struct vtnet_softc *sc) { device_t dev; struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i, error; dev = sc->vtnet_dev; /* * Errors here are very difficult to recover from - we cannot * easily fail because, if this is during boot, we will hang * when freeing any successfully started taskqueues because * the scheduler isn't up yet. * * Most drivers just ignore the return value - it only fails * with ENOMEM so an error is not likely. */ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); if (error) { device_printf(dev, "failed to start rx taskq %d\n", rxq->vtnrx_id); } txq = &sc->vtnet_txqs[i]; error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET, "%s txq %d", device_get_nameunit(dev), txq->vtntx_id); if (error) { device_printf(dev, "failed to start tx taskq %d\n", txq->vtntx_id); } } } static void vtnet_free_taskqueues(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; if (rxq->vtnrx_tq != NULL) { taskqueue_free(rxq->vtnrx_tq); rxq->vtnrx_vq = NULL; } txq = &sc->vtnet_txqs[i]; if (txq->vtntx_tq != NULL) { taskqueue_free(txq->vtntx_tq); txq->vtntx_tq = NULL; } } } static void vtnet_drain_taskqueues(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; if (rxq->vtnrx_tq != NULL) taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); txq = &sc->vtnet_txqs[i]; if (txq->vtntx_tq != NULL) { taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask); #ifndef VTNET_LEGACY_TX taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask); #endif } } } static void vtnet_drain_rxtx_queues(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; #ifdef DEV_NETMAP if (nm_native_on(NA(sc->vtnet_ifp))) return; #endif /* DEV_NETMAP */ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; vtnet_rxq_free_mbufs(rxq); txq = &sc->vtnet_txqs[i]; vtnet_txq_free_mbufs(txq); } } static void vtnet_stop_rendezvous(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; /* * Lock and unlock the per-queue mutex so we known the stop * state is visible. Doing only the active queues should be * sufficient, but it does not cost much extra to do all the * queues. Note we hold the core mutex here too. */ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; VTNET_RXQ_LOCK(rxq); VTNET_RXQ_UNLOCK(rxq); txq = &sc->vtnet_txqs[i]; VTNET_TXQ_LOCK(txq); VTNET_TXQ_UNLOCK(txq); } } static void vtnet_stop(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; sc->vtnet_link_active = 0; callout_stop(&sc->vtnet_tick_ch); /* Only advisory. */ vtnet_disable_interrupts(sc); /* * Stop the host adapter. This resets it to the pre-initialized * state. It will not generate any interrupts until after it is * reinitialized. */ virtio_stop(dev); vtnet_stop_rendezvous(sc); /* Free any mbufs left in the virtqueues. */ vtnet_drain_rxtx_queues(sc); } static int vtnet_virtio_reinit(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; uint64_t features; int mask, error; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; features = sc->vtnet_features; mask = 0; #if defined(INET) mask |= IFCAP_RXCSUM; #endif #if defined (INET6) mask |= IFCAP_RXCSUM_IPV6; #endif /* * Re-negotiate with the host, removing any disabled receive * features. Transmit features are disabled only on our side * via if_capenable and if_hwassist. */ if (ifp->if_capabilities & mask) { /* * We require both IPv4 and IPv6 offloading to be enabled * in order to negotiated it: VirtIO does not distinguish * between the two. */ if ((ifp->if_capenable & mask) != mask) features &= ~VIRTIO_NET_F_GUEST_CSUM; } if (ifp->if_capabilities & IFCAP_LRO) { if ((ifp->if_capenable & IFCAP_LRO) == 0) features &= ~VTNET_LRO_FEATURES; } if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) { if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) features &= ~VIRTIO_NET_F_CTRL_VLAN; } error = virtio_reinit(dev, features); if (error) device_printf(dev, "virtio reinit error %d\n", error); return (error); } static void vtnet_init_rx_filters(struct vtnet_softc *sc) { struct ifnet *ifp; ifp = sc->vtnet_ifp; if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { /* Restore promiscuous and all-multicast modes. */ vtnet_rx_filter(sc); /* Restore filtered MAC addresses. */ vtnet_rx_filter_mac(sc); } if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) vtnet_rx_filter_vlan(sc); } static int vtnet_init_rx_queues(struct vtnet_softc *sc) { device_t dev; struct vtnet_rxq *rxq; int i, clsize, error; dev = sc->vtnet_dev; /* * Use the new cluster size if one has been set (via a MTU * change). Otherwise, use the standard 2K clusters. * * BMV: It might make sense to use page sized clusters as * the default (depending on the features negotiated). */ if (sc->vtnet_rx_new_clsize != 0) { clsize = sc->vtnet_rx_new_clsize; sc->vtnet_rx_new_clsize = 0; } else clsize = MCLBYTES; sc->vtnet_rx_clsize = clsize; sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize); KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS || sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, ("%s: too many rx mbufs %d for %d segments", __func__, sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); #ifdef DEV_NETMAP if (vtnet_netmap_init_rx_buffers(sc)) return 0; #endif /* DEV_NETMAP */ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; /* Hold the lock to satisfy asserts. */ VTNET_RXQ_LOCK(rxq); error = vtnet_rxq_populate(rxq); VTNET_RXQ_UNLOCK(rxq); if (error) { device_printf(dev, "cannot allocate mbufs for Rx queue %d\n", i); return (error); } } return (0); } static int vtnet_init_tx_queues(struct vtnet_softc *sc) { struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; txq->vtntx_watchdog = 0; } return (0); } static int vtnet_init_rxtx_queues(struct vtnet_softc *sc) { int error; error = vtnet_init_rx_queues(sc); if (error) return (error); error = vtnet_init_tx_queues(sc); if (error) return (error); return (0); } static void vtnet_set_active_vq_pairs(struct vtnet_softc *sc) { device_t dev; int npairs; dev = sc->vtnet_dev; if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) { sc->vtnet_act_vq_pairs = 1; return; } npairs = sc->vtnet_requested_vq_pairs; if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { device_printf(dev, "cannot set active queue pairs to %d\n", npairs); npairs = 1; } sc->vtnet_act_vq_pairs = npairs; } static int vtnet_reinit(struct vtnet_softc *sc) { struct ifnet *ifp; int error; ifp = sc->vtnet_ifp; /* Use the current MAC address. */ bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); vtnet_set_hwaddr(sc); vtnet_set_active_vq_pairs(sc); ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= VTNET_CSUM_OFFLOAD; if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_IP_TSO; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) vtnet_init_rx_filters(sc); error = vtnet_init_rxtx_queues(sc); if (error) return (error); vtnet_enable_interrupts(sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; return (0); } static void vtnet_init_locked(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; vtnet_stop(sc); /* Reinitialize with the host. */ if (vtnet_virtio_reinit(sc) != 0) goto fail; if (vtnet_reinit(sc) != 0) goto fail; virtio_reinit_complete(dev); vtnet_update_link_status(sc); callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); return; fail: vtnet_stop(sc); } static void vtnet_init(void *xsc) { struct vtnet_softc *sc; sc = xsc; #ifdef DEV_NETMAP if (!NA(sc->vtnet_ifp)) { D("try to attach again"); vtnet_netmap_attach(sc); } #endif /* DEV_NETMAP */ VTNET_CORE_LOCK(sc); vtnet_init_locked(sc); VTNET_CORE_UNLOCK(sc); } static void vtnet_free_ctrl_vq(struct vtnet_softc *sc) { struct virtqueue *vq; vq = sc->vtnet_ctrl_vq; /* * The control virtqueue is only polled and therefore it should * already be empty. */ KASSERT(virtqueue_empty(vq), ("%s: ctrl vq %p not empty", __func__, vq)); } static void vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie, struct sglist *sg, int readable, int writable) { struct virtqueue *vq; vq = sc->vtnet_ctrl_vq; VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ, ("%s: CTRL_VQ feature not negotiated", __func__)); if (!virtqueue_empty(vq)) return; if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0) return; /* * Poll for the response, but the command is likely already * done when we return from the notify. */ virtqueue_notify(vq); virtqueue_poll(vq, NULL); } static int vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) { struct virtio_net_ctrl_hdr hdr __aligned(2); struct sglist_seg segs[3]; struct sglist sg; uint8_t ack; int error; hdr.class = VIRTIO_NET_CTRL_MAC; hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN); error |= sglist_append(&sg, &ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding set MAC msg to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); return (ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs) { struct sglist_seg segs[3]; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr; uint8_t pad1; struct virtio_net_ctrl_mq mq; uint8_t pad2; uint8_t ack; } s __aligned(2); int error; s.hdr.class = VIRTIO_NET_CTRL_MQ; s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; s.mq.virtqueue_pairs = npairs; s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding MQ message to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on) { struct sglist_seg segs[3]; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr; uint8_t pad1; uint8_t onoff; uint8_t pad2; uint8_t ack; } s __aligned(2); int error; KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, ("%s: CTRL_RX feature not negotiated", __func__)); s.hdr.class = VIRTIO_NET_CTRL_RX; s.hdr.cmd = cmd; s.onoff = !!on; s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding Rx message to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_set_promisc(struct vtnet_softc *sc, int on) { return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on)); } static int vtnet_set_allmulti(struct vtnet_softc *sc, int on) { return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); } /* * The device defaults to promiscuous mode for backwards compatibility. * Turn it off at attach time if possible. */ static void vtnet_attach_disable_promisc(struct vtnet_softc *sc) { struct ifnet *ifp; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK(sc); if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) { ifp->if_flags |= IFF_PROMISC; } else if (vtnet_set_promisc(sc, 0) != 0) { ifp->if_flags |= IFF_PROMISC; device_printf(sc->vtnet_dev, "cannot disable default promiscuous mode\n"); } VTNET_CORE_UNLOCK(sc); } static void vtnet_rx_filter(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) device_printf(dev, "cannot %s promiscuous mode\n", ifp->if_flags & IFF_PROMISC ? "enable" : "disable"); if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) device_printf(dev, "cannot %s all-multicast mode\n", ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable"); } static void vtnet_rx_filter_mac(struct vtnet_softc *sc) { struct virtio_net_ctrl_hdr hdr __aligned(2); struct vtnet_mac_filter *filter; struct sglist_seg segs[4]; struct sglist sg; struct ifnet *ifp; struct ifaddr *ifa; struct ifmultiaddr *ifma; int ucnt, mcnt, promisc, allmulti, error; uint8_t ack; ifp = sc->vtnet_ifp; filter = sc->vtnet_mac_filter; ucnt = 0; mcnt = 0; promisc = 0; allmulti = 0; VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, ("%s: CTRL_RX feature not negotiated", __func__)); /* Unicast MAC addresses: */ if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0) continue; else if (ucnt == VTNET_MAX_MAC_ENTRIES) { promisc = 1; break; } bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN); ucnt++; } if_addr_runlock(ifp); if (promisc != 0) { filter->vmf_unicast.nentries = 0; if_printf(ifp, "more than %d MAC addresses assigned, " "falling back to promiscuous mode\n", VTNET_MAX_MAC_ENTRIES); } else filter->vmf_unicast.nentries = ucnt; /* Multicast MAC addresses: */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; else if (mcnt == VTNET_MAX_MAC_ENTRIES) { allmulti = 1; break; } bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN); mcnt++; } if_maddr_runlock(ifp); if (allmulti != 0) { filter->vmf_multicast.nentries = 0; if_printf(ifp, "more than %d multicast MAC addresses " "assigned, falling back to all-multicast mode\n", VTNET_MAX_MAC_ENTRIES); } else filter->vmf_multicast.nentries = mcnt; if (promisc != 0 && allmulti != 0) goto out; hdr.class = VIRTIO_NET_CTRL_MAC; hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; ack = VIRTIO_NET_ERR; sglist_init(&sg, 4, segs); error = 0; error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &filter->vmf_unicast, sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN); error |= sglist_append(&sg, &filter->vmf_multicast, sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN); error |= sglist_append(&sg, &ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 4, ("%s: error %d adding MAC filter msg to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); if (ack != VIRTIO_NET_OK) if_printf(ifp, "error setting host MAC filter table\n"); out: if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0) if_printf(ifp, "cannot enable promiscuous mode\n"); if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0) if_printf(ifp, "cannot enable all-multicast mode\n"); } static int vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) { struct sglist_seg segs[3]; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr; uint8_t pad1; uint16_t tag; uint8_t pad2; uint8_t ack; } s __aligned(2); int error; s.hdr.class = VIRTIO_NET_CTRL_VLAN; s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; s.tag = tag; s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding VLAN message to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static void vtnet_rx_filter_vlan(struct vtnet_softc *sc) { uint32_t w; uint16_t tag; int i, bit; VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER, ("%s: VLAN_FILTER feature not negotiated", __func__)); /* Enable the filter for each configured VLAN. */ for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { w = sc->vtnet_vlan_filter[i]; while ((bit = ffs(w) - 1) != -1) { w &= ~(1 << bit); tag = sizeof(w) * CHAR_BIT * i + bit; if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) { device_printf(sc->vtnet_dev, "cannot enable VLAN %d filter\n", tag); } } } } static void vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) { struct ifnet *ifp; int idx, bit; ifp = sc->vtnet_ifp; idx = (tag >> 5) & 0x7F; bit = tag & 0x1F; if (tag == 0 || tag > 4095) return; VTNET_CORE_LOCK(sc); if (add) sc->vtnet_vlan_filter[idx] |= (1 << bit); else sc->vtnet_vlan_filter[idx] &= ~(1 << bit); if (ifp->if_capenable & IFCAP_VLAN_HWFILTER && ifp->if_drv_flags & IFF_DRV_RUNNING && vtnet_exec_vlan_filter(sc, add, tag) != 0) { device_printf(sc->vtnet_dev, "cannot %s VLAN %d %s the host filter table\n", add ? "add" : "remove", tag, add ? "to" : "from"); } VTNET_CORE_UNLOCK(sc); } static void vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag) { if (ifp->if_softc != arg) return; vtnet_update_vlan_filter(arg, 1, tag); } static void vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag) { if (ifp->if_softc != arg) return; vtnet_update_vlan_filter(arg, 0, tag); } static int vtnet_is_link_up(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; uint16_t status; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0) status = VIRTIO_NET_S_LINK_UP; else status = virtio_read_dev_config_2(dev, offsetof(struct virtio_net_config, status)); return ((status & VIRTIO_NET_S_LINK_UP) != 0); } static void vtnet_update_link_status(struct vtnet_softc *sc) { struct ifnet *ifp; int link; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); link = vtnet_is_link_up(sc); /* Notify if the link status has changed. */ if (link != 0 && sc->vtnet_link_active == 0) { sc->vtnet_link_active = 1; if_link_state_change(ifp, LINK_STATE_UP); } else if (link == 0 && sc->vtnet_link_active != 0) { sc->vtnet_link_active = 0; if_link_state_change(ifp, LINK_STATE_DOWN); } } static int vtnet_ifmedia_upd(struct ifnet *ifp) { struct vtnet_softc *sc; struct ifmedia *ifm; sc = ifp->if_softc; ifm = &sc->vtnet_media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); return (0); } static void vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct vtnet_softc *sc; sc = ifp->if_softc; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; VTNET_CORE_LOCK(sc); if (vtnet_is_link_up(sc) != 0) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= VTNET_MEDIATYPE; } else ifmr->ifm_active |= IFM_NONE; VTNET_CORE_UNLOCK(sc); } static void vtnet_set_hwaddr(struct vtnet_softc *sc) { device_t dev; int i; dev = sc->vtnet_dev; if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0) device_printf(dev, "unable to set MAC address\n"); } else if (sc->vtnet_flags & VTNET_FLAG_MAC) { for (i = 0; i < ETHER_ADDR_LEN; i++) { virtio_write_dev_config_1(dev, offsetof(struct virtio_net_config, mac) + i, sc->vtnet_hwaddr[i]); } } } static void vtnet_get_hwaddr(struct vtnet_softc *sc) { device_t dev; int i; dev = sc->vtnet_dev; if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) { /* * Generate a random locally administered unicast address. * * It would be nice to generate the same MAC address across * reboots, but it seems all the hosts currently available * support the MAC feature, so this isn't too important. */ sc->vtnet_hwaddr[0] = 0xB2; arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); vtnet_set_hwaddr(sc); return; } for (i = 0; i < ETHER_ADDR_LEN; i++) { sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev, offsetof(struct virtio_net_config, mac) + i); } } static void vtnet_vlan_tag_remove(struct mbuf *m) { struct ether_vlan_header *evh; evh = mtod(m, struct ether_vlan_header *); m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); m->m_flags |= M_VLANTAG; /* Strip the 802.1Q header. */ bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); } static void vtnet_set_rx_process_limit(struct vtnet_softc *sc) { int limit; limit = vtnet_tunable_int(sc, "rx_process_limit", vtnet_rx_process_limit); if (limit < 0) limit = INT_MAX; sc->vtnet_rx_process_limit = limit; } static void vtnet_set_tx_intr_threshold(struct vtnet_softc *sc) { int size, thresh; size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq); /* * The Tx interrupt is disabled until the queue free count falls * below our threshold. Completed frames are drained from the Tx * virtqueue before transmitting new frames and in the watchdog * callout, so the frequency of Tx interrupts is greatly reduced, * at the cost of not freeing mbufs as quickly as they otherwise * would be. * * N.B. We assume all the Tx queues are the same size. */ thresh = size / 4; /* * Without indirect descriptors, leave enough room for the most * segments we handle. */ if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 && thresh < sc->vtnet_tx_nsegs) thresh = sc->vtnet_tx_nsegs; sc->vtnet_tx_intr_thresh = thresh; } static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_rxq *rxq) { struct sysctl_oid *node; struct sysctl_oid_list *list; struct vtnet_rxq_stats *stats; char namebuf[16]; snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Receive Queue"); list = SYSCTL_CHILDREN(node); stats = &rxq->vtnrx_stats; SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, &stats->vrxs_ipackets, "Receive packets"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, &stats->vrxs_ibytes, "Receive bytes"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, &stats->vrxs_iqdrops, "Receive drops"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, &stats->vrxs_ierrors, "Receive errors"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, &stats->vrxs_csum, "Receive checksum offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, &stats->vrxs_csum_failed, "Receive checksum offload failed"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, &stats->vrxs_rescheduled, "Receive interrupt handler rescheduled"); } static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_txq *txq) { struct sysctl_oid *node; struct sysctl_oid_list *list; struct vtnet_txq_stats *stats; char namebuf[16]; snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Transmit Queue"); list = SYSCTL_CHILDREN(node); stats = &txq->vtntx_stats; SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, &stats->vtxs_opackets, "Transmit packets"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, &stats->vtxs_obytes, "Transmit bytes"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, &stats->vtxs_omcasts, "Transmit multicasts"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, &stats->vtxs_csum, "Transmit checksum offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, &stats->vtxs_tso, "Transmit segmentation offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, &stats->vtxs_rescheduled, "Transmit interrupt handler rescheduled"); } static void vtnet_setup_queue_sysctl(struct vtnet_softc *sc) { device_t dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; int i; dev = sc->vtnet_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); } } static void vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_softc *sc) { struct vtnet_statistics *stats; struct vtnet_rxq_stats rxaccum; struct vtnet_txq_stats txaccum; vtnet_accum_stats(sc, &rxaccum, &txaccum); stats = &sc->vtnet_stats; stats->rx_csum_offloaded = rxaccum.vrxs_csum; stats->rx_csum_failed = rxaccum.vrxs_csum_failed; stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled; stats->tx_csum_offloaded = txaccum.vtxs_csum; stats->tx_tso_offloaded = txaccum.vtxs_tso; stats->tx_task_rescheduled = txaccum.vtxs_rescheduled; SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed", CTLFLAG_RD, &stats->mbuf_alloc_failed, "Mbuf cluster allocation failures"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large", CTLFLAG_RD, &stats->rx_frame_too_large, "Received frame larger than the mbuf chain"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed", CTLFLAG_RD, &stats->rx_enq_replacement_failed, "Enqueuing the replacement receive mbuf failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed", CTLFLAG_RD, &stats->rx_mergeable_failed, "Mergeable buffers receive failures"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", CTLFLAG_RD, &stats->rx_csum_bad_ethtype, "Received checksum offloaded buffer with unsupported " "Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", CTLFLAG_RD, &stats->rx_csum_bad_ipproto, "Received checksum offloaded buffer with incorrect IP protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset", CTLFLAG_RD, &stats->rx_csum_bad_offset, "Received checksum offloaded buffer with incorrect offset"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto", CTLFLAG_RD, &stats->rx_csum_bad_proto, "Received checksum offloaded buffer with incorrect protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed", CTLFLAG_RD, &stats->rx_csum_failed, "Received buffer checksum offload failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded", CTLFLAG_RD, &stats->rx_csum_offloaded, "Received buffer checksum offload succeeded"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled", CTLFLAG_RD, &stats->rx_task_rescheduled, "Times the receive interrupt task rescheduled itself"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype", CTLFLAG_RD, &stats->tx_csum_bad_ethtype, "Aborted transmit of checksum offloaded buffer with unknown " "Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype", CTLFLAG_RD, &stats->tx_tso_bad_ethtype, "Aborted transmit of TSO buffer with unknown Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", CTLFLAG_RD, &stats->tx_tso_not_tcp, "Aborted transmit of TSO buffer with non TCP protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", CTLFLAG_RD, &stats->tx_defragged, "Transmit mbufs defragged"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed", CTLFLAG_RD, &stats->tx_defrag_failed, "Aborted transmit of buffer because defrag failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded", CTLFLAG_RD, &stats->tx_csum_offloaded, "Offloaded checksum of transmitted buffer"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded", CTLFLAG_RD, &stats->tx_tso_offloaded, "Segmentation offload of transmitted buffer"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled", CTLFLAG_RD, &stats->tx_task_rescheduled, "Times the transmit interrupt task rescheduled itself"); } static void vtnet_setup_sysctl(struct vtnet_softc *sc) { device_t dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; dev = sc->vtnet_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, "Maximum number of supported virtqueue pairs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs", CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0, "Requested number of virtqueue pairs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, "Number of active virtqueue pairs"); vtnet_setup_stat_sysctl(ctx, child, sc); } static int vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) { return (virtqueue_enable_intr(rxq->vtnrx_vq)); } static void vtnet_rxq_disable_intr(struct vtnet_rxq *rxq) { virtqueue_disable_intr(rxq->vtnrx_vq); } static int vtnet_txq_enable_intr(struct vtnet_txq *txq) { struct virtqueue *vq; vq = txq->vtntx_vq; if (vtnet_txq_below_threshold(txq) != 0) return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG)); /* * The free count is above our threshold. Keep the Tx interrupt * disabled until the queue is fuller. */ return (0); } static void vtnet_txq_disable_intr(struct vtnet_txq *txq) { virtqueue_disable_intr(txq->vtntx_vq); } static void vtnet_enable_rx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]); } static void vtnet_enable_tx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_txq_enable_intr(&sc->vtnet_txqs[i]); } static void vtnet_enable_interrupts(struct vtnet_softc *sc) { vtnet_enable_rx_interrupts(sc); vtnet_enable_tx_interrupts(sc); } static void vtnet_disable_rx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); } static void vtnet_disable_tx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); } static void vtnet_disable_interrupts(struct vtnet_softc *sc) { vtnet_disable_rx_interrupts(sc); vtnet_disable_tx_interrupts(sc); } static int vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) { char path[64]; snprintf(path, sizeof(path), "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob); TUNABLE_INT_FETCH(path, &def); return (def); } #ifdef NETDUMP static void -vtnet_netdump_init(struct ifnet *ifp, int *nmbufp, int *nclustp) +vtnet_netdump_init(struct ifnet *ifp, int *nrxr) { struct vtnet_softc *sc; - sc = ifp->if_softc; + sc = if_getsoftc(ifp); + *nrxr = sc->vtnet_max_vq_pairs; /* - * Allocate enough packet buffers to fill an entire queue. This ought - * to be enough provided that we don't have many queues. - */ - *nmbufp += virtqueue_size(sc->vtnet_rxqs[0].vtnrx_vq); - *nclustp += virtqueue_size(sc->vtnet_rxqs[0].vtnrx_vq); - - /* * We need to allocate from this zone in the transmit path, so ensure * that we have at least one item per header available. * XXX add a separate zone like we do for mbufs? otherwise we may alloc * buckets */ uma_zone_reserve(vtnet_tx_header_zone, NETDUMP_MAX_IN_FLIGHT * 2); uma_prealloc(vtnet_tx_header_zone, NETDUMP_MAX_IN_FLIGHT * 2); } static void vtnet_netdump_event(struct ifnet *ifp, enum netdump_ev event) { struct vtnet_softc *sc; - sc = ifp->if_softc; - + sc = if_getsoftc(ifp); switch (event) { case NETDUMP_START: sc->vtnet_rx_clsize = MCLBYTES; break; default: break; } } static int vtnet_netdump_transmit(struct ifnet *ifp, struct mbuf *m) { struct vtnet_softc *sc; struct vtnet_txq *txq; int error; - sc = ifp->if_softc; - + sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); txq = &sc->vtnet_txqs[0]; error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE); if (error == 0) error = vtnet_txq_notify(txq); return (error); } static int vtnet_netdump_poll(struct ifnet *ifp, int count) { struct vtnet_softc *sc; int i; - sc = ifp->if_softc; - + sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); (void)vtnet_txq_eof(&sc->vtnet_txqs[0]); for (i = 0; i < sc->vtnet_max_vq_pairs; i++) (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]); return (0); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/net/iflib.c =================================================================== --- user/markj/netdump/sys/net/iflib.c (revision 331674) +++ user/markj/netdump/sys/net/iflib.c (revision 331675) @@ -1,6050 +1,6047 @@ /*- * Copyright (c) 2014-2017, Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_acpi.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #if defined(__i386__) || defined(__amd64__) #include #include #include #include #include #include #endif #include /* * enable accounting of every mbuf as it comes in to and goes out of * iflib's software descriptor references */ #define MEMORY_LOGGING 0 /* * Enable mbuf vectors for compressing long mbuf chains */ /* * NB: * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead * we prefetch needs to be determined by the time spent in m_free vis a vis * the cost of a prefetch. This will of course vary based on the workload: * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which * is quite expensive, thus suggesting very little prefetch. * - small packet forwarding which is just returning a single mbuf to * UMA will typically be very fast vis a vis the cost of a memory * access. */ /* * File organization: * - private structures * - iflib private utility functions * - ifnet functions * - vlan registry and other exported functions * - iflib public core functions * * */ static MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); struct iflib_txq; typedef struct iflib_txq *iflib_txq_t; struct iflib_rxq; typedef struct iflib_rxq *iflib_rxq_t; struct iflib_fl; typedef struct iflib_fl *iflib_fl_t; struct iflib_ctx; static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); typedef struct iflib_filter_info { driver_filter_t *ifi_filter; void *ifi_filter_arg; struct grouptask *ifi_task; void *ifi_ctx; } *iflib_filter_info_t; struct iflib_ctx { KOBJ_FIELDS; /* * Pointer to hardware driver's softc */ void *ifc_softc; device_t ifc_dev; if_t ifc_ifp; cpuset_t ifc_cpus; if_shared_ctx_t ifc_sctx; struct if_softc_ctx ifc_softc_ctx; struct mtx ifc_mtx; uint16_t ifc_nhwtxqs; uint16_t ifc_nhwrxqs; iflib_txq_t ifc_txqs; iflib_rxq_t ifc_rxqs; uint32_t ifc_if_flags; uint32_t ifc_flags; uint32_t ifc_max_fl_buf_size; int ifc_in_detach; int ifc_link_state; int ifc_link_irq; int ifc_watchdog_events; struct cdev *ifc_led_dev; struct resource *ifc_msix_mem; struct if_irq ifc_legacy_irq; struct grouptask ifc_admin_task; struct grouptask ifc_vflr_task; struct iflib_filter_info ifc_filter_info; struct ifmedia ifc_media; struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; uint16_t ifc_sysctl_qs_eq_override; uint16_t ifc_sysctl_rx_budget; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; struct if_txrx ifc_txrx; #define isc_txd_encap ifc_txrx.ift_txd_encap #define isc_txd_flush ifc_txrx.ift_txd_flush #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update #define isc_rxd_available ifc_txrx.ift_rxd_available #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_flush ifc_txrx.ift_rxd_flush #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_legacy_intr ifc_txrx.ift_legacy_intr eventhandler_tag ifc_vlan_attach_event; eventhandler_tag ifc_vlan_detach_event; uint8_t ifc_mac[ETHER_ADDR_LEN]; char ifc_mtx_name[16]; }; void * iflib_get_softc(if_ctx_t ctx) { return (ctx->ifc_softc); } device_t iflib_get_dev(if_ctx_t ctx) { return (ctx->ifc_dev); } if_t iflib_get_ifp(if_ctx_t ctx) { return (ctx->ifc_ifp); } struct ifmedia * iflib_get_media(if_ctx_t ctx) { return (&ctx->ifc_media); } void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) { bcopy(mac, ctx->ifc_mac, ETHER_ADDR_LEN); } if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx) { return (&ctx->ifc_softc_ctx); } if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx) { return (ctx->ifc_sctx); } #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2) #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1))) #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) #define RX_SW_DESC_MAP_CREATED (1 << 0) #define TX_SW_DESC_MAP_CREATED (1 << 1) #define RX_SW_DESC_INUSE (1 << 3) #define TX_SW_DESC_MAPPED (1 << 4) #define M_TOOBIG M_PROTO1 typedef struct iflib_sw_rx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ caddr_t *ifsd_cl; /* direct cluster pointer for rx */ uint8_t *ifsd_flags; } iflib_rxsd_array_t; typedef struct iflib_sw_tx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ uint8_t *ifsd_flags; } if_txsd_vec_t; /* magic number that should be high enough for any hardware */ #define IFLIB_MAX_TX_SEGS 128 /* bnxt supports 64 with hardware LRO enabled */ #define IFLIB_MAX_RX_SEGS 64 #define IFLIB_RX_COPY_THRESH 128 #define IFLIB_MAX_RX_REFRESH 32 /* The minimum descriptors per second before we start coalescing */ #define IFLIB_MIN_DESC_SEC 16384 #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16 #define IFLIB_QUEUE_IDLE 0 #define IFLIB_QUEUE_HUNG 1 #define IFLIB_QUEUE_WORKING 2 /* maximum number of txqs that can share an rx interrupt */ #define IFLIB_MAX_TX_SHARED_INTR 4 /* this should really scale with ring size - this is a fairly arbitrary value */ #define TX_BATCH_SIZE 32 #define IFLIB_RESTART_BUDGET 8 #define IFC_LEGACY 0x001 #define IFC_QFLUSH 0x002 #define IFC_MULTISEG 0x004 #define IFC_DMAR 0x008 #define IFC_SC_ALLOCATED 0x010 #define IFC_INIT_DONE 0x020 #define IFC_PREFETCH 0x040 #define IFC_DO_RESET 0x080 #define IFC_CHECK_HUNG 0x100 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) struct iflib_txq { qidx_t ift_in_use; qidx_t ift_cidx; qidx_t ift_cidx_processed; qidx_t ift_pidx; uint8_t ift_gen; uint8_t ift_br_offset; uint16_t ift_npending; uint16_t ift_db_pending; uint16_t ift_rs_pending; /* implicit pad */ uint8_t ift_txd_size[8]; uint64_t ift_processed; uint64_t ift_cleaned; uint64_t ift_cleaned_prev; #if MEMORY_LOGGING uint64_t ift_enqueued; uint64_t ift_dequeued; #endif uint64_t ift_no_tx_dma_setup; uint64_t ift_no_desc_avail; uint64_t ift_mbuf_defrag_failed; uint64_t ift_mbuf_defrag; uint64_t ift_map_failed; uint64_t ift_txd_encap_efbig; uint64_t ift_pullups; struct mtx ift_mtx; struct mtx ift_db_mtx; /* constant values */ if_ctx_t ift_ctx; struct ifmp_ring *ift_br; struct grouptask ift_task; qidx_t ift_size; uint16_t ift_id; struct callout ift_timer; if_txsd_vec_t ift_sds; uint8_t ift_qstatus; uint8_t ift_closed; uint8_t ift_update_freq; struct iflib_filter_info ift_filter_info; bus_dma_tag_t ift_desc_tag; bus_dma_tag_t ift_tso_desc_tag; iflib_dma_info_t ift_ifdi; #define MTX_NAME_LEN 16 char ift_mtx_name[MTX_NAME_LEN]; char ift_db_mtx_name[MTX_NAME_LEN]; bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ift_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); struct iflib_fl { qidx_t ifl_cidx; qidx_t ifl_pidx; qidx_t ifl_credits; uint8_t ifl_gen; uint8_t ifl_rxd_size; #if MEMORY_LOGGING uint64_t ifl_m_enqueued; uint64_t ifl_m_dequeued; uint64_t ifl_cl_enqueued; uint64_t ifl_cl_dequeued; #endif /* implicit pad */ bitstr_t *ifl_rx_bitmap; qidx_t ifl_fragidx; /* constant */ qidx_t ifl_size; uint16_t ifl_buf_size; uint16_t ifl_cltype; uma_zone_t ifl_zone; iflib_rxsd_array_t ifl_sds; iflib_rxq_t ifl_rxq; uint8_t ifl_id; bus_dma_tag_t ifl_desc_tag; iflib_dma_info_t ifl_ifdi; uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); caddr_t ifl_vm_addrs[IFLIB_MAX_RX_REFRESH]; qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; } __aligned(CACHE_LINE_SIZE); static inline qidx_t get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) { qidx_t used; if (pidx > cidx) used = pidx - cidx; else if (pidx < cidx) used = size - cidx + pidx; else if (gen == 0 && pidx == cidx) used = 0; else if (gen == 1 && pidx == cidx) used = size; else panic("bad state"); return (used); } #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) struct iflib_rxq { /* If there is a separate completion queue - * these are the cq cidx and pidx. Otherwise * these are unused. */ qidx_t ifr_size; qidx_t ifr_cq_cidx; qidx_t ifr_cq_pidx; uint8_t ifr_cq_gen; uint8_t ifr_fl_offset; if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; uint64_t ifr_rx_irq; uint16_t ifr_id; uint8_t ifr_lro_enabled; uint8_t ifr_nfl; uint8_t ifr_ntxqirq; uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; struct lro_ctrl ifr_lc; struct grouptask ifr_task; struct iflib_filter_info ifr_filter_info; iflib_dma_info_t ifr_ifdi; /* dynamically allocate if any drivers need a value substantially larger than this */ struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ifr_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); typedef struct if_rxsd { caddr_t *ifsd_cl; struct mbuf **ifsd_m; iflib_fl_t ifsd_fl; qidx_t ifsd_cidx; } *if_rxsd_t; /* multiple of word size */ #ifdef __LP64__ #define PKT_INFO_SIZE 6 #define RXD_INFO_SIZE 5 #define PKT_TYPE uint64_t #else #define PKT_INFO_SIZE 11 #define RXD_INFO_SIZE 8 #define PKT_TYPE uint32_t #endif #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3) #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4) typedef struct if_pkt_info_pad { PKT_TYPE pkt_val[PKT_INFO_SIZE]; } *if_pkt_info_pad_t; typedef struct if_rxd_info_pad { PKT_TYPE rxd_val[RXD_INFO_SIZE]; } *if_rxd_info_pad_t; CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info)); CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info)); static inline void pkt_info_zero(if_pkt_info_t pi) { if_pkt_info_pad_t pi_pad; pi_pad = (if_pkt_info_pad_t)pi; pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; #ifndef __LP64__ pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; #endif } static inline void rxd_info_zero(if_rxd_info_t ri) { if_rxd_info_pad_t ri_pad; int i; ri_pad = (if_rxd_info_pad_t)ri; for (i = 0; i < RXD_LOOP_BOUND; i += 4) { ri_pad->rxd_val[i] = 0; ri_pad->rxd_val[i+1] = 0; ri_pad->rxd_val[i+2] = 0; ri_pad->rxd_val[i+3] = 0; } #ifdef __LP64__ ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0; #endif } /* * Only allow a single packet to take up most 1/nth of the tx ring */ #define MAX_SINGLE_PACKET_FRACTION 12 #define IF_BAD_DMA (bus_addr_t)-1 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) #define CTX_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF) #define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx) #define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx) #define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx) #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) /* Our boot-time initialization hook */ static int iflib_module_event_handler(module_t, int, void *); static moduledata_t iflib_moduledata = { "iflib", iflib_module_event_handler, NULL }; DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(iflib, 1); MODULE_DEPEND(iflib, pci, 1, 1, 1); MODULE_DEPEND(iflib, ether, 1, 1, 1); TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); TASKQGROUP_DEFINE(if_config_tqg, 1, 1); #ifndef IFLIB_DEBUG_COUNTERS #ifdef INVARIANTS #define IFLIB_DEBUG_COUNTERS 1 #else #define IFLIB_DEBUG_COUNTERS 0 #endif /* !INVARIANTS */ #endif static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, "iflib driver parameters"); /* * XXX need to ensure that this can't accidentally cause the head to be moved backwards */ static int iflib_min_tx_latency = 0; SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput"); static int iflib_no_tx_batch = 0; SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW, &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput"); #if IFLIB_DEBUG_COUNTERS static int iflib_tx_seen; static int iflib_tx_sent; static int iflib_tx_encap; static int iflib_rx_allocs; static int iflib_fl_refills; static int iflib_fl_refills_large; static int iflib_tx_frees; SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, &iflib_tx_seen, 0, "# tx mbufs seen"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, &iflib_tx_sent, 0, "# tx mbufs sent"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, &iflib_tx_encap, 0, "# tx mbufs encapped"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, &iflib_tx_frees, 0, "# tx frees"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, &iflib_rx_allocs, 0, "# rx allocations"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, &iflib_fl_refills, 0, "# refills"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, &iflib_fl_refills_large, 0, "# large refills"); static int iflib_txq_drain_flushing; static int iflib_txq_drain_oactive; static int iflib_txq_drain_notready; static int iflib_txq_drain_encapfail; SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, &iflib_txq_drain_flushing, 0, "# drain flushes"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, &iflib_txq_drain_oactive, 0, "# drain oactives"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, &iflib_txq_drain_notready, 0, "# drain notready"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_encapfail, CTLFLAG_RD, &iflib_txq_drain_encapfail, 0, "# drain encap fails"); static int iflib_encap_load_mbuf_fail; static int iflib_encap_pad_mbuf_fail; static int iflib_encap_txq_avail_fail; static int iflib_encap_txd_encap_fail; SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD, &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); static int iflib_task_fn_rxs; static int iflib_rx_intr_enables; static int iflib_fast_intrs; static int iflib_intr_link; static int iflib_intr_msix; static int iflib_rx_unavail; static int iflib_rx_ctx_inactive; static int iflib_rx_zero_len; static int iflib_rx_if_input; static int iflib_rx_mbuf_null; static int iflib_rxd_flush; static int iflib_verbose_debug; SYSCTL_INT(_net_iflib, OID_AUTO, intr_link, CTLFLAG_RD, &iflib_intr_link, 0, "# intr link calls"); SYSCTL_INT(_net_iflib, OID_AUTO, intr_msix, CTLFLAG_RD, &iflib_intr_msix, 0, "# intr msix calls"); SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, &iflib_rx_intr_enables, 0, "# rx intr enables"); SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, &iflib_fast_intrs, 0, "# fast_intr calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, &iflib_rx_unavail, 0, "# times rxeof called with no available data"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_zero_len, CTLFLAG_RD, &iflib_rx_zero_len, 0, "# times rxeof saw zero len mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, &iflib_rx_if_input, 0, "# times rxeof called if_input"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_mbuf_null, CTLFLAG_RD, &iflib_rx_mbuf_null, 0, "# times rxeof got null mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, &iflib_rxd_flush, 0, "# times rxd_flush called"); SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, &iflib_verbose_debug, 0, "enable verbose debugging"); #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) static void iflib_debug_reset(void) { iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs = iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees = iflib_txq_drain_flushing = iflib_txq_drain_oactive = iflib_txq_drain_notready = iflib_txq_drain_encapfail = iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail = iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail = iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs = iflib_intr_link = iflib_intr_msix = iflib_rx_unavail = iflib_rx_ctx_inactive = iflib_rx_zero_len = iflib_rx_if_input = iflib_rx_mbuf_null = iflib_rxd_flush = 0; } #else #define DBG_COUNTER_INC(name) static void iflib_debug_reset(void) {} #endif #define IFLIB_DEBUG 0 static void iflib_tx_structures_free(if_ctx_t ctx); static void iflib_rx_structures_free(if_ctx_t ctx); static int iflib_queues_alloc(if_ctx_t ctx); static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget); static int iflib_qset_structures_setup(if_ctx_t ctx); static int iflib_msix_init(if_ctx_t ctx); static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str); static void iflib_txq_check_drain(iflib_txq_t txq, int budget); static uint32_t iflib_txq_can_drain(struct ifmp_ring *); static int iflib_register(if_ctx_t); static void iflib_init_locked(if_ctx_t ctx); static void iflib_add_device_sysctl_pre(if_ctx_t ctx); static void iflib_add_device_sysctl_post(if_ctx_t ctx); static void iflib_ifmp_purge(iflib_txq_t txq); static void _iflib_pre_assert(if_softc_ctx_t scctx); static void iflib_stop(if_ctx_t ctx); static void iflib_if_init_locked(if_ctx_t ctx); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m); #endif NETDUMP_DEFINE(iflib); #ifdef DEV_NETMAP #include #include #include MODULE_DEPEND(iflib, netmap, 1, 1, 1); static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init); /* * device-specific sysctl variables: * * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. * During regular operations the CRC is stripped, but on some * hardware reception of frames not multiple of 64 is slower, * so using crcstrip=0 helps in benchmarks. * * iflib_rx_miss, iflib_rx_miss_bufs: * count packets that might be missed due to lost interrupts. */ SYSCTL_DECL(_dev_netmap); /* * The xl driver by default strips CRCs and we do not override it. */ int iflib_crcstrip = 1; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on rx frames"); int iflib_rx_miss, iflib_rx_miss_bufs; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed rx intr"); SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed rx intr bufs"); /* * Register/unregister. We are already under netmap lock. * Only called on the first register or the last unregister. */ static int iflib_netmap_register(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; int status; CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if (!CTX_IS_VF(ctx)) IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); /* enable or disable flags and callbacks in na and ifp */ if (onoff) { nm_set_native_flags(na); } else { nm_clear_native_flags(na); } iflib_stop(ctx); iflib_init_locked(ctx); IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ? status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1; if (status) nm_clear_native_flags(na); CTX_UNLOCK(ctx); return (status); } static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init) { struct netmap_adapter *na = kring->na; u_int const lim = kring->nkr_num_slots - 1; u_int head = kring->rhead; struct netmap_ring *ring = kring->ring; bus_dmamap_t *map; struct if_rxd_update iru; if_ctx_t ctx = rxq->ifr_ctx; iflib_fl_t fl = &rxq->ifr_fl[0]; uint32_t refill_pidx, nic_i; if (nm_i == head && __predict_true(!init)) return 0; iru_init(&iru, rxq, 0 /* flid */); map = fl->ifl_sds.ifsd_map; refill_pidx = netmap_idx_k2n(kring, nm_i); /* * IMPORTANT: we must leave one free slot in the ring, * so move head back by one unit */ head = nm_prev(head, lim); while (nm_i != head) { for (int tmp_pidx = 0; tmp_pidx < IFLIB_MAX_RX_REFRESH && nm_i != head; tmp_pidx++) { struct netmap_slot *slot = &ring->slot[nm_i]; void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]); uint32_t nic_i_dma = refill_pidx; nic_i = netmap_idx_k2n(kring, nm_i); MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH); if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ return netmap_ring_reinit(kring); fl->ifl_vm_addrs[tmp_pidx] = addr; if (__predict_false(init) && map) { netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); } else if (map && (slot->flags & NS_BUF_CHANGED)) { /* buffer has changed, reload map */ netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); } slot->flags &= ~NS_BUF_CHANGED; nm_i = nm_next(nm_i, lim); fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim); if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1) continue; iru.iru_pidx = refill_pidx; iru.iru_count = tmp_pidx+1; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); refill_pidx = nic_i; if (map == NULL) continue; for (int n = 0; n < iru.iru_count; n++) { bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma], BUS_DMASYNC_PREREAD); /* XXX - change this to not use the netmap func*/ nic_i_dma = nm_next(nic_i_dma, lim); } } } kring->nr_hwcur = head; if (map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); return (0); } /* * Reconcile kernel and user view of the transmit ring. * * All information is in the kring. * Userspace wants to send packets up to the one before kring->rhead, * kernel knows kring->nr_hwcur is the first unsent packet. * * Here we push packets out (as many as possible), and possibly * reclaim buffers from previously completed transmission. * * The caller (netmap) guarantees that there is only one instance * running at any time. Any interference with other driver * methods should be handled by the individual drivers. */ static int iflib_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; struct if_pkt_info pi; /* * interrupts on every tx packet are expensive so request * them every half ring, or where NS_REPORT is set */ u_int report_frequency = kring->nkr_num_slots >> 1; /* device-specific */ if_ctx_t ctx = ifp->if_softc; iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; if (txq->ift_sds.ifsd_map) bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: process new packets to send. * nm_i is the current index in the netmap ring, * nic_i is the corresponding index in the NIC ring. * * If we have packets to send (nm_i != head) * iterate over the netmap ring, fetch length and update * the corresponding slot in the NIC ring. Some drivers also * need to update the buffer's physical address in the NIC slot * even NS_BUF_CHANGED is not set (PNMB computes the addresses). * * The netmap_reload_map() calls is especially expensive, * even when (as in this case) the tag is 0, so do only * when the buffer has actually changed. * * If possible do not set the report/intr bit on all slots, * but only a few times per ring or when NS_REPORT is set. * * Finally, on 10G and faster drivers, it might be useful * to prefetch the next slot and txr entry. */ nm_i = netmap_idx_n2k(kring, kring->nr_hwcur); pkt_info_zero(&pi); pi.ipi_segs = txq->ift_segs; pi.ipi_qsidx = kring->ring_id; if (nm_i != head) { /* we have new packets to send */ nic_i = netmap_idx_k2n(kring, nm_i); __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); if (txq->ift_sds.ifsd_map) __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); int flags = (slot->flags & NS_REPORT || nic_i == 0 || nic_i == report_frequency) ? IPI_TX_INTR : 0; /* device-specific */ pi.ipi_len = len; pi.ipi_segs[0].ds_addr = paddr; pi.ipi_segs[0].ds_len = len; pi.ipi_nsegs = 1; pi.ipi_ndescs = 0; pi.ipi_pidx = nic_i; pi.ipi_flags = flags; /* Fill the slot in the NIC ring. */ ctx->isc_txd_encap(ctx->ifc_softc, &pi); /* prefetch for next round */ __builtin_prefetch(&ring->slot[nm_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); if (txq->ift_sds.ifsd_map) { __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); NM_CHECK_ADDR_LEN(na, addr, len); if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr); } /* make sure changes to the buffer are synced */ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i], BUS_DMASYNC_PREWRITE); } slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } kring->nr_hwcur = head; /* synchronize the NIC ring */ if (txq->ift_sds.ifsd_map) bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* (re)start the tx unit up to slot nic_i (excluded) */ ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); } /* * Second part: reclaim buffers for completed transmissions. */ if (iflib_tx_credits_update(ctx, txq)) { /* some tx completed, increment avail */ nic_i = txq->ift_cidx_processed; kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } return (0); } /* * Reconcile kernel and user view of the receive ring. * Same as for the txsync, this routine must be efficient. * The caller guarantees a single invocations, but races against * the rest of the driver should be handled here. * * On call, kring->rhead is the first packet that userspace wants * to keep, and kring->rcur is the wakeup point. * The kernel has previously reported packets up to kring->rtail. * * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective * of whether or not we received an interrupt. */ static int iflib_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; uint32_t nm_i; /* index into the netmap ring */ uint32_t nic_i; /* index into the NIC ring */ u_int i, n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = netmap_idx_n2k(kring, kring->rhead); int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; struct if_rxd_info ri; struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; iflib_fl_t fl = rxq->ifr_fl; if (head > lim) return netmap_ring_reinit(kring); /* XXX check sync modes */ for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) { if (fl->ifl_sds.ifsd_map == NULL) continue; bus_dmamap_sync(rxq->ifr_fl[i].ifl_desc_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); } /* * First part: import newly received packets. * * nm_i is the index of the next free slot in the netmap ring, * nic_i is the index of the next received packet in the NIC ring, * and they may differ in case if_init() has been called while * in netmap mode. For the receive ring we have * * nic_i = rxr->next_check; * nm_i = kring->nr_hwtail (previous) * and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size * * rxr->next_check is set to 0 on a ring reinit */ if (netmap_no_pendintr || force_update) { int crclen = iflib_crcstrip ? 0 : 4; int error, avail; for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; nic_i = fl->ifl_cidx; nm_i = netmap_idx_n2k(kring, nic_i); avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX); for (n = 0; avail > 0; n++, avail--) { rxd_info_zero(&ri); ri.iri_frags = rxq->ifr_frags; ri.iri_qsidx = kring->ring_id; ri.iri_ifp = ctx->ifc_ifp; ri.iri_cidx = nic_i; error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen; ring->slot[nm_i].flags = 0; if (fl->ifl_sds.ifsd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } if (n) { /* update the state variables */ if (netmap_no_pendintr && !force_update) { /* diagnostics */ iflib_rx_miss ++; iflib_rx_miss_bufs += n; } fl->ifl_cidx = nic_i; kring->nr_hwtail = netmap_idx_k2n(kring, nm_i); } kring->nr_kflags &= ~NKR_PENDINTR; } } /* * Second part: skip past packets that userspace has released. * (kring->nr_hwcur to head excluded), * and make the buffers available for reception. * As usual nm_i is the index in the netmap ring, * nic_i is the index in the NIC ring, and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size */ /* XXX not sure how this will work with multiple free lists */ nm_i = netmap_idx_n2k(kring, kring->nr_hwcur); return (netmap_fl_refill(rxq, kring, nm_i, false)); } static void iflib_netmap_intr(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; CTX_LOCK(ctx); if (onoff) { IFDI_INTR_ENABLE(ctx); } else { IFDI_INTR_DISABLE(ctx); } CTX_UNLOCK(ctx); } static int iflib_netmap_attach(if_ctx_t ctx) { struct netmap_adapter na; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; bzero(&na, sizeof(na)); na.ifp = ctx->ifc_ifp; na.na_flags = NAF_BDG_MAYSLEEP; MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); na.num_tx_desc = scctx->isc_ntxd[0]; na.num_rx_desc = scctx->isc_nrxd[0]; na.nm_txsync = iflib_netmap_txsync; na.nm_rxsync = iflib_netmap_rxsync; na.nm_register = iflib_netmap_register; na.nm_intr = iflib_netmap_intr; na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; return (netmap_attach(&na)); } static void iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_slot *slot; slot = netmap_reset(na, NR_TX, txq->ift_id, 0); if (slot == NULL) return; if (txq->ift_sds.ifsd_map == NULL) return; for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { /* * In netmap mode, set the map for the packet buffer. * NOTE: Some drivers (not this one) also need to set * the physical buffer address in the NIC ring. * netmap_idx_n2k() maps a nic index, i, into the corresponding * netmap slot index, si */ int si = netmap_idx_n2k(&na->tx_rings[txq->ift_id], i); netmap_load_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si)); } } static void iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id]; struct netmap_slot *slot; uint32_t nm_i; slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); if (slot == NULL) return; nm_i = netmap_idx_n2k(kring, 0); netmap_fl_refill(rxq, kring, nm_i, true); } #define iflib_netmap_detach(ifp) netmap_detach(ifp) #else #define iflib_netmap_txq_init(ctx, txq) #define iflib_netmap_rxq_init(ctx, rxq) #define iflib_netmap_detach(ifp) #define iflib_netmap_attach(ctx) (0) #define netmap_rx_irq(ifp, qid, budget) (0) #define netmap_tx_irq(ifp, qid) do {} while (0) #endif #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } static __inline void prefetch2cachelines(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); #if (CACHE_LINE_SIZE < 128) __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); #endif } #else #define prefetch(x) #define prefetch2cachelines(x) #endif static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) { iflib_fl_t fl; fl = &rxq->ifr_fl[flid]; iru->iru_paddrs = fl->ifl_bus_addrs; iru->iru_vaddrs = &fl->ifl_vm_addrs[0]; iru->iru_idxs = fl->ifl_rxd_idxs; iru->iru_qsidx = rxq->ifr_id; iru->iru_buf_size = fl->ifl_buf_size; iru->iru_flidx = fl->ifl_id; } static void _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { if (err) return; *(bus_addr_t *) arg = segs[0].ds_addr; } int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) { int err; if_shared_ctx_t sctx = ctx->ifc_sctx; device_t dev = ctx->ifc_dev; KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ sctx->isc_q_align, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->idi_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed: %d\n", __func__, err); goto fail_0; } err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); if (err) { device_printf(dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, err); goto fail_1; } dma->idi_paddr = IF_BAD_DMA; err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); if (err || dma->idi_paddr == IF_BAD_DMA) { device_printf(dev, "%s: bus_dmamap_load failed: %d\n", __func__, err); goto fail_2; } dma->idi_size = size; return (0); fail_2: bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); fail_1: bus_dma_tag_destroy(dma->idi_tag); fail_0: dma->idi_tag = NULL; return (err); } int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) { int i, err; iflib_dma_info_t *dmaiter; dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) { if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) break; } if (err) iflib_dma_free_multi(dmalist, i); return (err); } void iflib_dma_free(iflib_dma_info_t dma) { if (dma->idi_tag == NULL) return; if (dma->idi_paddr != IF_BAD_DMA) { bus_dmamap_sync(dma->idi_tag, dma->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->idi_tag, dma->idi_map); dma->idi_paddr = IF_BAD_DMA; } if (dma->idi_vaddr != NULL) { bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); dma->idi_vaddr = NULL; } bus_dma_tag_destroy(dma->idi_tag); dma->idi_tag = NULL; } void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) { int i; iflib_dma_info_t *dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) iflib_dma_free(*dmaiter); } #ifdef EARLY_AP_STARTUP static const int iflib_started = 1; #else /* * We used to abuse the smp_started flag to decide if the queues have been * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()). * That gave bad races, since the SYSINIT() runs strictly after smp_started * is set. Run a SYSINIT() strictly after that to just set a usable * completion flag. */ static int iflib_started; static void iflib_record_started(void *arg) { iflib_started = 1; } SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST, iflib_record_started, NULL); #endif static int iflib_fast_intr(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int iflib_fast_intr_rxtx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; if_ctx_t ctx; int i, cidx; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); for (i = 0; i < rxq->ifr_ntxqirq; i++) { qidx_t txqid = rxq->ifr_txqid[i]; ctx = rxq->ifr_ctx; if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) { IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); continue; } GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) cidx = rxq->ifr_cq_cidx; else cidx = rxq->ifr_fl[0].ifl_cidx; if (iflib_rxd_avail(ctx, rxq, cidx, 1)) GROUPTASK_ENQUEUE(gtask); else IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); return (FILTER_HANDLED); } static int iflib_fast_intr_ctx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, driver_intr_t handler, void *arg, char *name) { int rc, flags; struct resource *res; void *tag = NULL; device_t dev = ctx->ifc_dev; flags = RF_ACTIVE; if (ctx->ifc_flags & IFC_LEGACY) flags |= RF_SHAREABLE; MPASS(rid < 512); irq->ii_rid = rid; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, flags); if (res == NULL) { device_printf(dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } irq->ii_res = res; KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, filter, handler, arg, &tag); if (rc != 0) { device_printf(dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name ? name : "unknown", rc); return (rc); } else if (name) bus_describe_intr(dev, res, tag, "%s", name); irq->ii_tag = tag; return (0); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int iflib_txsd_alloc(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int err, nsegments, ntsosegments; nsegments = scctx->isc_tx_nsegments; ntsosegments = scctx->isc_tx_tso_segments_max; MPASS(scctx->isc_ntxd[0] > 0); MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); MPASS(nsegments > 0); MPASS(ntsosegments > 0); /* * Setup DMA descriptor areas. */ if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_tx_maxsize, /* maxsize */ nsegments, /* nsegments */ sctx->isc_tx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_desc_tag))) { device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); goto fail; } if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ scctx->isc_tx_tso_size_max, /* maxsize */ ntsosegments, /* nsegments */ scctx->isc_tx_tso_segsize_max, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_tso_desc_tag))) { device_printf(dev,"Unable to allocate TX TSO DMA tag: %d\n", err); goto fail; } if (!(txq->ift_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(txq->ift_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ #if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__))) if ((ctx->ifc_flags & IFC_DMAR) == 0) return (0); if (!(txq->ift_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } #endif return (0); fail: /* We free all, it handles case where we are in the middle */ iflib_tx_structures_free(ctx); return (err); } static void iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) { bus_dmamap_t map; map = NULL; if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[i]; if (map != NULL) { bus_dmamap_unload(txq->ift_desc_tag, map); bus_dmamap_destroy(txq->ift_desc_tag, map); txq->ift_sds.ifsd_map[i] = NULL; } } static void iflib_txq_destroy(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; for (int i = 0; i < txq->ift_size; i++) iflib_txsd_destroy(ctx, txq, i); if (txq->ift_sds.ifsd_map != NULL) { free(txq->ift_sds.ifsd_map, M_IFLIB); txq->ift_sds.ifsd_map = NULL; } if (txq->ift_sds.ifsd_m != NULL) { free(txq->ift_sds.ifsd_m, M_IFLIB); txq->ift_sds.ifsd_m = NULL; } if (txq->ift_sds.ifsd_flags != NULL) { free(txq->ift_sds.ifsd_flags, M_IFLIB); txq->ift_sds.ifsd_flags = NULL; } if (txq->ift_desc_tag != NULL) { bus_dma_tag_destroy(txq->ift_desc_tag); txq->ift_desc_tag = NULL; } if (txq->ift_tso_desc_tag != NULL) { bus_dma_tag_destroy(txq->ift_tso_desc_tag); txq->ift_tso_desc_tag = NULL; } } static void iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) { struct mbuf **mp; mp = &txq->ift_sds.ifsd_m[i]; if (*mp == NULL) return; if (txq->ift_sds.ifsd_map != NULL) { bus_dmamap_sync(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i]); } m_free(*mp); DBG_COUNTER_INC(tx_frees); *mp = NULL; } static int iflib_txq_setup(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; int i; /* Set number of descriptors available */ txq->ift_qstatus = IFLIB_QUEUE_IDLE; /* XXX make configurable */ txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ; /* Reset indices */ txq->ift_cidx_processed = 0; txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bzero((void *)di->idi_vaddr, di->idi_size); IFDI_TXQ_SETUP(ctx, txq->ift_id); for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int iflib_rxsd_alloc(iflib_rxq_t rxq) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; iflib_fl_t fl; int err; MPASS(scctx->isc_nrxd[0] > 0); MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); fl = rxq->ifr_fl; for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_rx_maxsize, /* maxsize */ sctx->isc_rx_nsegments, /* nsegments */ sctx->isc_rx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &fl->ifl_desc_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, err); goto fail; } if (!(fl->ifl_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(fl->ifl_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(fl->ifl_sds.ifsd_cl = (caddr_t *) malloc(sizeof(caddr_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ #if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__))) if ((ctx->ifc_flags & IFC_DMAR) == 0) continue; if (!(fl->ifl_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) { err = bus_dmamap_create(fl->ifl_desc_tag, 0, &fl->ifl_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create RX buffer DMA map\n"); goto fail; } } #endif } return (0); fail: iflib_rx_structures_free(ctx); return (err); } /* * Internal service routines */ struct rxq_refill_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; static void _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct rxq_refill_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } #ifdef ACPI_DMAR #define IS_DMAR(ctx) (ctx->ifc_flags & IFC_DMAR) #else #define IS_DMAR(ctx) (0) #endif /** * rxq_refill - refill an rxq free-buffer list * @ctx: the iflib context * @rxq: the free-list to refill * @n: the number of new buffers to allocate * * (Re)populate an rxq free-buffer list with up to @n new packet buffers. * The caller must assure that @n does not exceed the queue's capacity. */ static void _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) { struct mbuf *m; int idx, frag_idx = fl->ifl_fragidx; int pidx = fl->ifl_pidx; caddr_t cl, *sd_cl; struct mbuf **sd_m; uint8_t *sd_flags; struct if_rxd_update iru; bus_dmamap_t *sd_map; int n, i = 0; uint64_t bus_addr; int err; qidx_t credits; sd_m = fl->ifl_sds.ifsd_m; sd_map = fl->ifl_sds.ifsd_map; sd_cl = fl->ifl_sds.ifsd_cl; sd_flags = fl->ifl_sds.ifsd_flags; idx = pidx; credits = fl->ifl_credits; n = count; MPASS(n > 0); MPASS(credits + n <= fl->ifl_size); if (pidx < fl->ifl_cidx) MPASS(pidx + n <= fl->ifl_cidx); if (pidx == fl->ifl_cidx && (credits < fl->ifl_size)) MPASS(fl->ifl_gen == 0); if (pidx > fl->ifl_cidx) MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); DBG_COUNTER_INC(fl_refills); if (n > 8) DBG_COUNTER_INC(fl_refills_large); iru_init(&iru, fl->ifl_rxq, fl->ifl_id); while (n--) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. * * If the cluster is still set then we know a minimum sized packet was received */ bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx); if ((frag_idx < 0) || (frag_idx >= fl->ifl_size)) bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); if ((cl = sd_cl[frag_idx]) == NULL) { if ((cl = sd_cl[frag_idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) break; #if MEMORY_LOGGING fl->ifl_cl_enqueued++; #endif } if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { break; } #if MEMORY_LOGGING fl->ifl_m_enqueued++; #endif DBG_COUNTER_INC(rx_allocs); #if defined(__i386__) || defined(__amd64__) if (!IS_DMAR(ctx)) { bus_addr = pmap_kextract((vm_offset_t)cl); } else #endif { struct rxq_refill_cb_arg cb_arg; iflib_rxq_t q; cb_arg.error = 0; q = fl->ifl_rxq; MPASS(sd_map != NULL); MPASS(sd_map[frag_idx] != NULL); err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[frag_idx], cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0); bus_dmamap_sync(fl->ifl_desc_tag, sd_map[frag_idx], BUS_DMASYNC_PREREAD); if (err != 0 || cb_arg.error) { /* * !zone_pack ? */ if (fl->ifl_zone == zone_pack) uma_zfree(fl->ifl_zone, cl); m_free(m); n = 0; goto done; } bus_addr = cb_arg.seg.ds_addr; } bit_set(fl->ifl_rx_bitmap, frag_idx); sd_flags[frag_idx] |= RX_SW_DESC_INUSE; MPASS(sd_m[frag_idx] == NULL); sd_cl[frag_idx] = cl; sd_m[frag_idx] = m; fl->ifl_rxd_idxs[i] = frag_idx; fl->ifl_bus_addrs[i] = bus_addr; fl->ifl_vm_addrs[i] = cl; credits++; i++; MPASS(credits <= fl->ifl_size); if (++idx == fl->ifl_size) { fl->ifl_gen = 1; idx = 0; } if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); i = 0; pidx = idx; fl->ifl_pidx = idx; fl->ifl_credits = credits; } } done: if (i) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); fl->ifl_pidx = idx; fl->ifl_credits = credits; } DBG_COUNTER_INC(rxd_flush); if (fl->ifl_pidx == 0) pidx = fl->ifl_size - 1; else pidx = fl->ifl_pidx - 1; if (sd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx); fl->ifl_fragidx = frag_idx; } static __inline void __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max) { /* we avoid allowing pidx to catch up with cidx as it confuses ixl */ int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; #ifdef INVARIANTS int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; #endif MPASS(fl->ifl_credits <= fl->ifl_size); MPASS(reclaimable == delta); if (reclaimable > 0) _iflib_fl_refill(ctx, fl, min(max, reclaimable)); } static void iflib_fl_bufs_free(iflib_fl_t fl) { iflib_dma_info_t idi = fl->ifl_ifdi; uint32_t i; for (i = 0; i < fl->ifl_size; i++) { struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i]; uint8_t *sd_flags = &fl->ifl_sds.ifsd_flags[i]; caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i]; if (*sd_flags & RX_SW_DESC_INUSE) { if (fl->ifl_sds.ifsd_map != NULL) { bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i]; bus_dmamap_unload(fl->ifl_desc_tag, sd_map); if (fl->ifl_rxq->ifr_ctx->ifc_in_detach) bus_dmamap_destroy(fl->ifl_desc_tag, sd_map); } if (*sd_m != NULL) { m_init(*sd_m, M_NOWAIT, MT_DATA, 0); uma_zfree(zone_mbuf, *sd_m); } if (*sd_cl != NULL) uma_zfree(fl->ifl_zone, *sd_cl); *sd_flags = 0; } else { MPASS(*sd_cl == NULL); MPASS(*sd_m == NULL); } #if MEMORY_LOGGING fl->ifl_m_dequeued++; fl->ifl_cl_dequeued++; #endif *sd_cl = NULL; *sd_m = NULL; } #ifdef INVARIANTS for (i = 0; i < fl->ifl_size; i++) { MPASS(fl->ifl_sds.ifsd_flags[i] == 0); MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); MPASS(fl->ifl_sds.ifsd_m[i] == NULL); } #endif /* * Reset free list values */ fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; bzero(idi->idi_vaddr, idi->idi_size); } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int iflib_fl_setup(iflib_fl_t fl) { iflib_rxq_t rxq = fl->ifl_rxq; if_ctx_t ctx = rxq->ifr_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1); /* ** Free current RX buffer structs and their mbufs */ iflib_fl_bufs_free(fl); /* Now replenish the mbufs */ MPASS(fl->ifl_credits == 0); /* * XXX don't set the max_frame_size to larger * than the hardware can handle */ if (sctx->isc_max_frame_size <= 2048) fl->ifl_buf_size = MCLBYTES; #ifndef CONTIGMALLOC_WORKS else fl->ifl_buf_size = MJUMPAGESIZE; #else else if (sctx->isc_max_frame_size <= 4096) fl->ifl_buf_size = MJUMPAGESIZE; else if (sctx->isc_max_frame_size <= 9216) fl->ifl_buf_size = MJUM9BYTES; else fl->ifl_buf_size = MJUM16BYTES; #endif if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; fl->ifl_cltype = m_gettype(fl->ifl_buf_size); fl->ifl_zone = m_getzone(fl->ifl_buf_size); /* avoid pre-allocating zillions of clusters to an idle card * potentially speeding up attach */ _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size)); MPASS(min(128, fl->ifl_size) == fl->ifl_credits); if (min(128, fl->ifl_size) != fl->ifl_credits) return (ENOBUFS); /* * handle failure */ MPASS(rxq != NULL); MPASS(fl->ifl_ifdi != NULL); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void iflib_rx_sds_free(iflib_rxq_t rxq) { iflib_fl_t fl; int i; if (rxq->ifr_fl != NULL) { for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; if (fl->ifl_desc_tag != NULL) { bus_dma_tag_destroy(fl->ifl_desc_tag); fl->ifl_desc_tag = NULL; } free(fl->ifl_sds.ifsd_m, M_IFLIB); free(fl->ifl_sds.ifsd_cl, M_IFLIB); /* XXX destroy maps first */ free(fl->ifl_sds.ifsd_map, M_IFLIB); fl->ifl_sds.ifsd_m = NULL; fl->ifl_sds.ifsd_cl = NULL; fl->ifl_sds.ifsd_map = NULL; } free(rxq->ifr_fl, M_IFLIB); rxq->ifr_fl = NULL; rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } } /* * MI independent logic * */ static void iflib_timer(void *arg) { iflib_txq_t txq = arg; if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ IFDI_TIMER(ctx, txq->ift_id); if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && ((txq->ift_cleaned_prev == txq->ift_cleaned) || (sctx->isc_pause_frames == 0))) goto hung; if (ifmp_ring_is_stalled(txq->ift_br)) txq->ift_qstatus = IFLIB_QUEUE_HUNG; txq->ift_cleaned_prev = txq->ift_cleaned; /* handle any laggards */ if (txq->ift_db_pending) GROUPTASK_ENQUEUE(&txq->ift_task); sctx->isc_pause_frames = 0; if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); return; hung: CTX_LOCK(ctx); if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); IFDI_WATCHDOG_RESET(ctx); ctx->ifc_watchdog_events++; ctx->ifc_flags |= IFC_DO_RESET; iflib_admin_intr_deferred(ctx); CTX_UNLOCK(ctx); } static void iflib_init_locked(if_ctx_t ctx) { if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_t ifp = ctx->ifc_ifp; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, tx_ip_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) if_sethwassistbits(ifp, tx_ip6_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TSO4) if_sethwassistbits(ifp, CSUM_IP_TSO, 0); if (if_getcapenable(ifp) & IFCAP_TSO6) if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); iflib_netmap_txq_init(ctx, txq); } #ifdef INVARIANTS i = if_getdrvflags(ifp); #endif IFDI_INIT(ctx); MPASS(if_getdrvflags(ifp) == i); for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { /* XXX this should really be done on a per-queue basis */ if (if_getcapenable(ifp) & IFCAP_NETMAP) { MPASS(rxq->ifr_id == i); iflib_netmap_rxq_init(ctx, rxq); continue; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n"); goto done; } } } done: if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); IFDI_INTR_ENABLE(ctx); txq = ctx->ifc_txqs; for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); } static int iflib_media_change(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); int err; CTX_LOCK(ctx); if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) iflib_init_locked(ctx); CTX_UNLOCK(ctx); return (err); } static void iflib_media_status(if_t ifp, struct ifmediareq *ifmr) { if_ctx_t ctx = if_getsoftc(ifp); CTX_LOCK(ctx); IFDI_UPDATE_ADMIN_STATUS(ctx); IFDI_MEDIA_STATUS(ctx, ifmr); CTX_UNLOCK(ctx); } static void iflib_stop(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; iflib_fl_t fl; int i, j; /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); DELAY(1000); IFDI_STOP(ctx); DELAY(1000); iflib_debug_reset(); /* Wait for current tx queue users to exit to disarm watchdog timer. */ for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { /* make sure all transmitters have completed before proceeding XXX */ CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); /* clean any enqueued buffers */ iflib_ifmp_purge(txq); /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; txq->ift_pullups = 0; ifmp_ring_reset_stats(txq->ift_br); for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); } for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { /* make sure all transmitters have completed before proceeding XXX */ for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwrxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); /* also resets the free lists pidx/cidx */ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) iflib_fl_bufs_free(fl); } } static inline caddr_t calc_next_rxd(iflib_fl_t fl, int cidx) { qidx_t size; int nrxd; caddr_t start, end, cur, next; nrxd = fl->ifl_size; size = fl->ifl_rxd_size; start = fl->ifl_ifdi->idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*nrxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } static inline void prefetch_pkts(iflib_fl_t fl, int cidx) { int nextptr; int nrxd = fl->ifl_size; caddr_t next_rxd; nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1); prefetch(&fl->ifl_sds.ifsd_m[nextptr]); prefetch(&fl->ifl_sds.ifsd_cl[nextptr]); next_rxd = calc_next_rxd(fl, cidx); prefetch(next_rxd); prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); } static void rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) { int flid, cidx; bus_dmamap_t map; iflib_fl_t fl; iflib_dma_info_t di; int next; map = NULL; flid = irf->irf_flid; cidx = irf->irf_idx; fl = &rxq->ifr_fl[flid]; sd->ifsd_fl = fl; sd->ifsd_cidx = cidx; sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx]; sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; fl->ifl_credits--; #if MEMORY_LOGGING fl->ifl_m_dequeued++; #endif if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH) prefetch_pkts(fl, cidx); if (fl->ifl_sds.ifsd_map != NULL) { next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_map[next]); map = fl->ifl_sds.ifsd_map[cidx]; di = fl->ifl_ifdi; next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_flags[next]); bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* not valid assert if bxe really does SGE from non-contiguous elements */ MPASS(fl->ifl_cidx == cidx); if (unload) bus_dmamap_unload(fl->ifl_desc_tag, map); } fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); if (__predict_false(fl->ifl_cidx == 0)) fl->ifl_gen = 0; if (map != NULL) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bit_clear(fl->ifl_rx_bitmap, cidx); } static struct mbuf * assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd) { int i, padlen , flags; struct mbuf *m, *mh, *mt; caddr_t cl; i = 0; mh = NULL; do { rxd_frag_to_sd(rxq, &ri->iri_frags[i], TRUE, sd); MPASS(*sd->ifsd_cl != NULL); MPASS(*sd->ifsd_m != NULL); /* Don't include zero-length frags */ if (ri->iri_frags[i].irf_len == 0) { /* XXX we can save the cluster here, but not the mbuf */ m_init(*sd->ifsd_m, M_NOWAIT, MT_DATA, 0); m_free(*sd->ifsd_m); *sd->ifsd_m = NULL; continue; } m = *sd->ifsd_m; *sd->ifsd_m = NULL; if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; padlen = ri->iri_pad; } else { flags = M_EXT; mt->m_next = m; mt = m; /* assuming padding is only on the first fragment */ padlen = 0; } cl = *sd->ifsd_cl; *sd->ifsd_cl = NULL; /* Can these two be made one ? */ m_init(m, M_NOWAIT, MT_DATA, flags); m_cljset(m, cl, sd->ifsd_fl->ifl_cltype); /* * These must follow m_init and m_cljset */ m->m_data += padlen; ri->iri_len -= padlen; m->m_len = ri->iri_frags[i].irf_len; } while (++i < ri->iri_nfrags); return (mh); } /* * Process one software descriptor */ static struct mbuf * iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) { struct if_rxsd sd; struct mbuf *m; /* should I merge this back in now that the two paths are basically duplicated? */ if (ri->iri_nfrags == 1 && ri->iri_frags[0].irf_len <= IFLIB_RX_COPY_THRESH) { rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd); m = *sd.ifsd_m; *sd.ifsd_m = NULL; m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m)) m->m_data += 2; #endif memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); m->m_len = ri->iri_frags[0].irf_len; } else { m = assemble_segments(rxq, ri, &sd); } m->m_pkthdr.len = ri->iri_len; m->m_pkthdr.rcvif = ri->iri_ifp; m->m_flags |= ri->iri_flags; m->m_pkthdr.ether_vtag = ri->iri_vtag; m->m_pkthdr.flowid = ri->iri_flowid; M_HASHTYPE_SET(m, ri->iri_rsstype); m->m_pkthdr.csum_flags = ri->iri_csum_flags; m->m_pkthdr.csum_data = ri->iri_csum_data; return (m); } #if defined(INET6) || defined(INET) static void iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6) { CURVNET_SET(lc->ifp->if_vnet); #if defined(INET6) *v6 = VNET(ip6_forwarding); #endif #if defined(INET) *v4 = VNET(ipforwarding); #endif CURVNET_RESTORE(); } /* * Returns true if it's possible this packet could be LROed. * if it returns false, it is guaranteed that tcp_lro_rx() * would not return zero. */ static bool iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding) { struct ether_header *eh; uint16_t eh_type; eh = mtod(m, struct ether_header *); eh_type = ntohs(eh->ether_type); switch (eh_type) { #if defined(INET6) case ETHERTYPE_IPV6: return !v6_forwarding; #endif #if defined (INET) case ETHERTYPE_IP: return !v4_forwarding; #endif } return false; } #else static void iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused) { } #endif static bool iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int avail, i; qidx_t *cidxp; struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; struct ifnet *ifp; int lro_enabled; bool lro_possible = false; bool v4_forwarding, v6_forwarding; /* * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ struct mbuf *m, *mh, *mt, *mf; ifp = ctx->ifc_ifp; mh = mt = NULL; MPASS(budget > 0); rx_pkts = rx_bytes = 0; if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else cidxp = &rxq->ifr_fl[0].ifl_cidx; if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); DBG_COUNTER_INC(rx_unavail); return (false); } for (budget_left = budget; (budget_left > 0) && (avail > 0); budget_left--, avail--) { if (__predict_false(!CTX_ACTIVE(ctx))) { DBG_COUNTER_INC(rx_ctx_inactive); break; } /* * Reset client set fields to their default values */ rxd_info_zero(&ri); ri.iri_qsidx = rxq->ifr_id; ri.iri_cidx = *cidxp; ri.iri_ifp = ifp; ri.iri_frags = rxq->ifr_frags; err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); if (err) goto err; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { *cidxp = ri.iri_cidx; /* Update our consumer index */ /* XXX NB: shurd - check if this is still safe */ while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) { rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; rxq->ifr_cq_gen = 0; } /* was this only a completion queue message? */ if (__predict_false(ri.iri_nfrags == 0)) continue; } MPASS(ri.iri_nfrags != 0); MPASS(ri.iri_len != 0); /* will advance the cidx on the corresponding free lists */ m = iflib_rxd_pkt_get(rxq, &ri); if (avail == 0 && budget_left) avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); if (__predict_false(m == NULL)) { DBG_COUNTER_INC(rx_mbuf_null); continue; } /* imm_pkt: -- cxgb */ if (mh == NULL) mh = mt = m; else { mt->m_nextpkt = m; mt = m; } } /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); if (lro_enabled) iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding); mt = mf = NULL; while (mh != NULL) { m = mh; mh = mh->m_nextpkt; m->m_nextpkt = NULL; #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) continue; #endif rx_bytes += m->m_pkthdr.len; rx_pkts++; #if defined(INET6) || defined(INET) if (lro_enabled) { if (!lro_possible) { lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding); if (lro_possible && mf != NULL) { ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); mt = mf = NULL; } } if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) == (CSUM_L4_CALC|CSUM_L4_VALID)) { if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) continue; } } #endif if (lro_possible) { ifp->if_input(ifp, m); DBG_COUNTER_INC(rx_if_input); continue; } if (mf == NULL) mf = m; if (mt != NULL) mt->m_nextpkt = m; mt = m; } if (mf != NULL) { ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); } if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); /* * Flush any outstanding LRO work */ #if defined(INET6) || defined(INET) tcp_lro_flush_all(&rxq->ifr_lc); #endif if (avail) return true; return (iflib_rxd_avail(ctx, rxq, *cidxp, 1)); err: CTX_LOCK(ctx); ctx->ifc_flags |= IFC_DO_RESET; iflib_admin_intr_deferred(ctx); CTX_UNLOCK(ctx); return (false); } #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1) static inline qidx_t txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (in_use > 4*minthresh) return (notify_count); if (in_use > 2*minthresh) return (notify_count >> 1); if (in_use > minthresh) return (notify_count >> 3); return (0); } static inline qidx_t txq_max_rs_deferred(iflib_txq_t txq) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (txq->ift_in_use > 4*minthresh) return (notify_count); if (txq->ift_in_use > 2*minthresh) return (notify_count >> 1); if (txq->ift_in_use > minthresh) return (notify_count >> 2); return (2); } #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use)) #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq) #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) /* forward compatibility for cxgb */ #define FIRST_QSET(ctx) 0 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) /* XXX we should be setting this to something other than zero */ #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max) static inline bool iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use) { qidx_t dbval, max; bool rang; rang = false; max = TXQ_MAX_DB_DEFERRED(txq, in_use); if (ring || txq->ift_db_pending >= max) { dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); txq->ift_db_pending = txq->ift_npending = 0; rang = true; } return (rang); } #ifdef PKT_DEBUG static void print_pkt(if_pkt_info_t pi) { printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); } #endif #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; struct ether_vlan_header *eh; struct mbuf *m, *n; n = m = *mp; if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && M_WRITABLE(m) == 0) { if ((m = m_dup(m, M_NOWAIT)) == NULL) { return (ENOMEM); } else { m_freem(*mp); n = *mp = m; } } /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ if (__predict_false(m->m_len < sizeof(*eh))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) return (ENOMEM); } eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { pi->ipi_etype = ntohs(eh->evl_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { pi->ipi_etype = ntohs(eh->evl_encap_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN; } switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: { struct ip *ip = NULL; struct tcphdr *th = NULL; int minthlen; minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); if (__predict_false(m->m_len < minthlen)) { /* * if this code bloat is causing too much of a hit * move it to a separate function and mark it noinline */ if (m->m_len == pi->ipi_ehdrlen) { n = m->m_next; MPASS(n); if (n->m_len >= sizeof(*ip)) { ip = (struct ip *)n->m_data; if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); } } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } } else { ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } pi->ipi_ip_hlen = ip->ip_hl << 2; pi->ipi_ipproto = ip->ip_p; pi->ipi_flags |= IPI_TX_IPV4; if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) ip->ip_sum = 0; if (IS_TSO4(pi)) { if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(th == NULL)) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) return (ENOMEM); th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; pi->ipi_tcp_seq = th->th_seq; } if (__predict_false(ip->ip_p != IPPROTO_TCP)) return (ENXIO); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; if (sctx->isc_flags & IFLIB_TSO_INIT_IP) { ip->ip_sum = 0; ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz); } } break; } #endif #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); struct tcphdr *th; pi->ipi_ip_hlen = sizeof(struct ip6_hdr); if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) return (ENOMEM); } th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); /* XXX-BZ this will go badly in case of ext hdrs. */ pi->ipi_ipproto = ip6->ip6_nxt; pi->ipi_flags |= IPI_TX_IPV6; if (IS_TSO6(pi)) { if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) return (ENOMEM); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; } if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) return (ENXIO); /* * The corresponding flag is set by the stack in the IPv4 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). * So, set it here because the rest of the flow requires it. */ pi->ipi_csum_flags |= CSUM_TCP_IPV6; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; } break; } #endif default: pi->ipi_csum_flags &= ~CSUM_OFFLOAD; pi->ipi_ip_hlen = 0; break; } *mp = m; return (0); } static __noinline struct mbuf * collapse_pkthdr(struct mbuf *m0) { struct mbuf *m, *m_next, *tmp; m = m0; m_next = m->m_next; while (m_next != NULL && m_next->m_len == 0) { m = m_next; m->m_next = NULL; m_free(m); m_next = m_next->m_next; } m = m0; m->m_next = m_next; if ((m_next->m_flags & M_EXT) == 0) { m = m_defrag(m, M_NOWAIT); } else { tmp = m_next->m_next; memcpy(m_next, m, MPKTHSIZE); m = m_next; m->m_next = tmp; } return (m); } /* * If dodgy hardware rejects the scatter gather chain we've handed it * we'll need to remove the mbuf chain from ifsg_m[] before we can add the * m_defrag'd mbufs */ static __noinline struct mbuf * iflib_remove_mbuf(iflib_txq_t txq) { int ntxd, i, pidx; struct mbuf *m, *mh, **ifsd_m; pidx = txq->ift_pidx; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; mh = m = ifsd_m[pidx]; ifsd_m[pidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif i = 1; while (m) { ifsd_m[(pidx + i) & (ntxd -1)] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif m = m->m_next; i++; } return (mh); } static int iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, struct mbuf **m0, bus_dma_segment_t *segs, int *nsegs, int max_segs, int flags) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; int i, next, pidx, err, ntxd, count; struct mbuf *m, *tmp, **ifsd_m; m = *m0; /* * Please don't ever do this */ if (__predict_false(m->m_len == 0)) *m0 = m = collapse_pkthdr(m); ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; pidx = txq->ift_pidx; if (map != NULL) { uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags; err = bus_dmamap_load_mbuf_sg(tag, map, *m0, segs, nsegs, BUS_DMA_NOWAIT); if (err) return (err); ifsd_flags[pidx] |= TX_SW_DESC_MAPPED; count = 0; m = *m0; do { if (__predict_false(m->m_len <= 0)) { tmp = m; m = m->m_next; tmp->m_next = NULL; m_free(tmp); continue; } m = m->m_next; count++; } while (m != NULL); if (count > *nsegs) { ifsd_m[pidx] = *m0; ifsd_m[pidx]->m_flags |= M_TOOBIG; return (0); } m = *m0; count = 0; do { next = (pidx + count) & (ntxd-1); MPASS(ifsd_m[next] == NULL); ifsd_m[next] = m; count++; tmp = m; m = m->m_next; } while (m != NULL); } else { int buflen, sgsize, maxsegsz, max_sgsize; vm_offset_t vaddr; vm_paddr_t curaddr; count = i = 0; m = *m0; if (m->m_pkthdr.csum_flags & CSUM_TSO) maxsegsz = scctx->isc_tx_tso_segsize_max; else maxsegsz = sctx->isc_tx_maxsegsize; do { if (__predict_false(m->m_len <= 0)) { tmp = m; m = m->m_next; tmp->m_next = NULL; m_free(tmp); continue; } buflen = m->m_len; vaddr = (vm_offset_t)m->m_data; /* * see if we can't be smarter about physically * contiguous mappings */ next = (pidx + count) & (ntxd-1); MPASS(ifsd_m[next] == NULL); #if MEMORY_LOGGING txq->ift_enqueued++; #endif ifsd_m[next] = m; while (buflen > 0) { if (i >= max_segs) goto err; max_sgsize = MIN(buflen, maxsegsz); curaddr = pmap_kextract(vaddr); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); sgsize = MIN(sgsize, max_sgsize); segs[i].ds_addr = curaddr; segs[i].ds_len = sgsize; vaddr += sgsize; buflen -= sgsize; i++; } count++; tmp = m; m = m->m_next; } while (m != NULL); *nsegs = i; } return (0); err: *m0 = iflib_remove_mbuf(txq); return (EFBIG); } static inline caddr_t calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid) { qidx_t size; int ntxd; caddr_t start, end, cur, next; ntxd = txq->ift_size; size = txq->ift_txd_size[qid]; start = txq->ift_ifdi[qid].idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*ntxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } /* * Pad an mbuf to ensure a minimum ethernet frame size. * min_frame_size is the frame size (less CRC) to pad the mbuf to */ static __noinline int iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size) { /* * 18 is enough bytes to pad an ARP packet to 46 bytes, and * and ARP message is the smallest common payload I can think of */ static char pad[18]; /* just zeros */ int n; struct mbuf *new_head; if (!M_WRITABLE(*m_head)) { new_head = m_dup(*m_head, M_NOWAIT); if (new_head == NULL) { m_freem(*m_head); device_printf(dev, "cannot pad short frame, m_dup() failed"); DBG_COUNTER_INC(encap_pad_mbuf_fail); return ENOMEM; } m_freem(*m_head); *m_head = new_head; } for (n = min_frame_size - (*m_head)->m_pkthdr.len; n > 0; n -= sizeof(pad)) if (!m_append(*m_head, min(n, sizeof(pad)), pad)) break; if (n > 0) { m_freem(*m_head); device_printf(dev, "cannot pad short frame\n"); DBG_COUNTER_INC(encap_pad_mbuf_fail); return (ENOBUFS); } return 0; } static int iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; bus_dma_segment_t *segs; struct mbuf *m_head; void *next_txd; bus_dmamap_t map; struct if_pkt_info pi; int remap = 0; int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; bus_dma_tag_t desc_tag; segs = txq->ift_segs; ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; segs = txq->ift_segs; ntxd = txq->ift_size; m_head = *m_headp; map = NULL; /* * If we're doing TSO the next descriptor to clean may be quite far ahead */ cidx = txq->ift_cidx; pidx = txq->ift_pidx; if (ctx->ifc_flags & IFC_PREFETCH) { next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) { next_txd = calc_next_txd(txq, cidx, 0); prefetch(next_txd); } /* prefetch the next cache line of mbuf pointers and flags */ prefetch(&txq->ift_sds.ifsd_m[next]); if (txq->ift_sds.ifsd_map != NULL) { prefetch(&txq->ift_sds.ifsd_map[next]); next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); prefetch(&txq->ift_sds.ifsd_flags[next]); } } else if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[pidx]; if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { desc_tag = txq->ift_tso_desc_tag; max_segs = scctx->isc_tx_tso_segments_max; } else { desc_tag = txq->ift_desc_tag; max_segs = scctx->isc_tx_nsegments; } if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) && __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) { err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size); if (err) return err; } m_head = *m_headp; pkt_info_zero(&pi); pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); pi.ipi_pidx = pidx; pi.ipi_qsidx = txq->ift_id; pi.ipi_len = m_head->m_pkthdr.len; pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; /* deliberate bitwise OR to make one condition */ if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) return (err); m_head = *m_headp; } retry: err = iflib_busdma_load_mbuf_sg(txq, desc_tag, map, m_headp, segs, &nsegs, max_segs, BUS_DMA_NOWAIT); defrag: if (__predict_false(err)) { switch (err) { case EFBIG: /* try collapse once and defrag once */ if (remap == 0) m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); if (remap == 1) m_head = m_defrag(*m_headp, M_NOWAIT); remap++; if (__predict_false(m_head == NULL)) goto defrag_failed; txq->ift_mbuf_defrag++; *m_headp = m_head; goto retry; break; case ENOMEM: txq->ift_no_tx_dma_setup++; break; default: txq->ift_no_tx_dma_setup++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; break; } txq->ift_map_failed++; DBG_COUNTER_INC(encap_load_mbuf_fail); return (err); } /* * XXX assumes a 1 to 1 relationship between segments and * descriptors - this does not hold true on all drivers, e.g. * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { txq->ift_no_desc_avail++; if (map != NULL) bus_dmamap_unload(desc_tag, map); DBG_COUNTER_INC(encap_txq_avail_fail); if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) GROUPTASK_ENQUEUE(&txq->ift_task); return (ENOBUFS); } /* * On Intel cards we can greatly reduce the number of TX interrupts * we see by only setting report status on every Nth descriptor. * However, this also means that the driver will need to keep track * of the descriptors that RS was set on to check them for the DD bit. */ txq->ift_rs_pending += nsegs + 1; if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) || iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs - 1) <= MAX_TX_DESC(ctx)) { pi.ipi_flags |= IPI_TX_INTR; txq->ift_rs_pending = 0; } pi.ipi_segs = segs; pi.ipi_nsegs = nsegs; MPASS(pidx >= 0 && pidx < txq->ift_size); #ifdef PKT_DEBUG print_pkt(&pi); #endif if (map != NULL) bus_dmamap_sync(desc_tag, map, BUS_DMASYNC_PREWRITE); if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { if (map != NULL) bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); MPASS(pi.ipi_new_pidx < txq->ift_size); ndesc = pi.ipi_new_pidx - pi.ipi_pidx; if (pi.ipi_new_pidx < pi.ipi_pidx) { ndesc += txq->ift_size; txq->ift_gen = 1; } /* * drivers can need as many as * two sentinels */ MPASS(ndesc <= pi.ipi_nsegs + 2); MPASS(pi.ipi_new_pidx != pidx); MPASS(ndesc > 0); txq->ift_in_use += ndesc; /* * We update the last software descriptor again here because there may * be a sentinel and/or there may be more mbufs than segments */ txq->ift_pidx = pi.ipi_new_pidx; txq->ift_npending += pi.ipi_ndescs; } else if (__predict_false(err == EFBIG && remap < 2)) { *m_headp = m_head = iflib_remove_mbuf(txq); remap = 1; txq->ift_txd_encap_efbig++; goto defrag; } else DBG_COUNTER_INC(encap_txd_encap_fail); return (err); defrag_failed: txq->ift_mbuf_defrag_failed++; txq->ift_map_failed++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; return (ENOMEM); } static void iflib_tx_desc_free(iflib_txq_t txq, int n) { int hasmap; uint32_t qsize, cidx, mask, gen; struct mbuf *m, **ifsd_m; uint8_t *ifsd_flags; bus_dmamap_t *ifsd_map; bool do_prefetch; cidx = txq->ift_cidx; gen = txq->ift_gen; qsize = txq->ift_size; mask = qsize-1; hasmap = txq->ift_sds.ifsd_map != NULL; ifsd_flags = txq->ift_sds.ifsd_flags; ifsd_m = txq->ift_sds.ifsd_m; ifsd_map = txq->ift_sds.ifsd_map; do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); while (n-- > 0) { if (do_prefetch) { prefetch(ifsd_m[(cidx + 3) & mask]); prefetch(ifsd_m[(cidx + 4) & mask]); } if (ifsd_m[cidx] != NULL) { prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); prefetch(&ifsd_flags[(cidx + CACHE_PTR_INCREMENT) & mask]); if (hasmap && (ifsd_flags[cidx] & TX_SW_DESC_MAPPED)) { /* * does it matter if it's not the TSO tag? If so we'll * have to add the type to flags */ bus_dmamap_unload(txq->ift_desc_tag, ifsd_map[cidx]); ifsd_flags[cidx] &= ~TX_SW_DESC_MAPPED; } if ((m = ifsd_m[cidx]) != NULL) { /* XXX we don't support any drivers that batch packets yet */ MPASS(m->m_nextpkt == NULL); /* if the number of clusters exceeds the number of segments * there won't be space on the ring to save a pointer to each * cluster so we simply free the list here */ if (m->m_flags & M_TOOBIG) { m_freem(m); } else { m_free(m); } ifsd_m[cidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif DBG_COUNTER_INC(tx_frees); } } if (__predict_false(++cidx == qsize)) { cidx = 0; gen = 0; } } txq->ift_cidx = cidx; txq->ift_gen = gen; } static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) { int reclaim; if_ctx_t ctx = txq->ift_ctx; KASSERT(thresh >= 0, ("invalid threshold to reclaim")); MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); /* * Need a rate-limiting check so that this isn't called every time */ iflib_tx_credits_update(ctx, txq); reclaim = DESC_RECLAIMABLE(txq); if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { #ifdef INVARIANTS if (iflib_verbose_debug) { printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, reclaim, thresh); } #endif return (0); } iflib_tx_desc_free(txq, reclaim); txq->ift_cleaned += reclaim; txq->ift_in_use -= reclaim; return (reclaim); } static struct mbuf ** _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) { int next, size; struct mbuf **items; size = r->size; next = (cidx + CACHE_PTR_INCREMENT) & (size-1); items = __DEVOLATILE(struct mbuf **, &r->items[0]); prefetch(items[(cidx + offset) & (size-1)]); if (remaining > 1) { prefetch2cachelines(&items[next]); prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]); prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]); prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]); } return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); } static void iflib_txq_check_drain(iflib_txq_t txq, int budget) { ifmp_ring_check_drainage(txq->ift_br, budget); } static uint32_t iflib_txq_can_drain(struct ifmp_ring *r) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; return ((TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) || ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)); } static uint32_t iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; struct ifnet *ifp = ctx->ifc_ifp; struct mbuf **mp, *m; int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail; int reclaimed, err, in_use_prev, desc_used; bool do_prefetch, ring, rang; if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(txq_drain_notready); return (0); } reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use); avail = IDXDIFF(pidx, cidx, r->size); if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { m_free(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } return (avail); } if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); DBG_COUNTER_INC(txq_drain_oactive); return (0); } if (reclaimed) txq->ift_qstatus = IFLIB_QUEUE_IDLE; consumed = mcast_sent = bytes_sent = pkt_sent = 0; count = MIN(avail, TX_BATCH_SIZE); #ifdef INVARIANTS if (iflib_verbose_debug) printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__, avail, ctx->ifc_flags, TXQ_AVAIL(txq)); #endif do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); avail = TXQ_AVAIL(txq); for (desc_used = i = 0; i < count && avail > MAX_TX_DESC(ctx) + 2; i++) { int pidx_prev, rem = do_prefetch ? count - i : 0; mp = _ring_peek_one(r, cidx, i, rem); MPASS(mp != NULL && *mp != NULL); if (__predict_false(*mp == (struct mbuf *)txq)) { consumed++; reclaimed++; continue; } in_use_prev = txq->ift_in_use; pidx_prev = txq->ift_pidx; err = iflib_encap(txq, mp); if (__predict_false(err)) { DBG_COUNTER_INC(txq_drain_encapfail); /* no room - bail out */ if (err == ENOBUFS) break; consumed++; DBG_COUNTER_INC(txq_drain_encapfail); /* we can't send this packet - skip it */ continue; } consumed++; pkt_sent++; m = *mp; DBG_COUNTER_INC(tx_sent); bytes_sent += m->m_pkthdr.len; mcast_sent += !!(m->m_flags & M_MCAST); avail = TXQ_AVAIL(txq); txq->ift_db_pending += (txq->ift_in_use - in_use_prev); desc_used += (txq->ift_in_use - in_use_prev); ETHER_BPF_MTAP(ifp, m); if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) break; rang = iflib_txd_db_check(ctx, txq, false, in_use_prev); } /* deliberate use of bitwise or to avoid gratuitous short-circuit */ ring = rang ? false : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx)); iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use); if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); if (mcast_sent) if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); #ifdef INVARIANTS if (iflib_verbose_debug) printf("consumed=%d\n", consumed); #endif return (consumed); } static uint32_t iflib_txq_drain_always(struct ifmp_ring *r) { return (1); } static uint32_t iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { int i, avail; struct mbuf **mp; iflib_txq_t txq; txq = r->cookie; txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); avail = IDXDIFF(pidx, cidx, r->size); for (i = 0; i < avail; i++) { mp = _ring_peek_one(r, cidx, i, avail - i); if (__predict_false(*mp == (struct mbuf *)txq)) continue; m_freem(*mp); } MPASS(ifmp_ring_is_stalled(r) == 0); return (avail); } static void iflib_ifmp_purge(iflib_txq_t txq) { struct ifmp_ring *r; r = txq->ift_br; r->drain = iflib_txq_drain_free; r->can_drain = iflib_txq_drain_always; ifmp_ring_check_drainage(r, r->size); r->drain = iflib_txq_drain; r->can_drain = iflib_txq_can_drain; } static void _task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; struct ifnet *ifp = ctx->ifc_ifp; int rc; #ifdef IFLIB_DIAGNOSTICS txq->ift_cpu_exec_count[curcpu]++; #endif if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; if (if_getcapenable(ifp) & IFCAP_NETMAP) { if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)) netmap_tx_irq(ifp, txq->ift_id); IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); return; } if (txq->ift_db_pending) ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE); ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { rc = IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } static void _task_fn_rx(void *context) { iflib_rxq_t rxq = context; if_ctx_t ctx = rxq->ifr_ctx; bool more; int rc; uint16_t budget; #ifdef IFLIB_DIAGNOSTICS rxq->ifr_cpu_exec_count[curcpu]++; #endif DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; more = true; #ifdef DEV_NETMAP if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) { u_int work = 0; if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) { more = false; } } #endif budget = ctx->ifc_sysctl_rx_budget; if (budget == 0) budget = 16; /* XXX */ if (more == false || (more = iflib_rxeof(rxq, budget)) == false) { if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { DBG_COUNTER_INC(rx_intr_enables); rc = IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; if (more) GROUPTASK_ENQUEUE(&rxq->ifr_task); } static void _task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; iflib_txq_t txq; int i; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) { if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { return; } } CTX_LOCK(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); } IFDI_UPDATE_ADMIN_STATUS(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); IFDI_LINK_INTR_ENABLE(ctx); if (ctx->ifc_flags & IFC_DO_RESET) { ctx->ifc_flags &= ~IFC_DO_RESET; iflib_if_init_locked(ctx); } CTX_UNLOCK(ctx); if (LINK_ACTIVE(ctx) == 0) return; for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); } static void _task_fn_iov(void *context) { if_ctx_t ctx = context; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; CTX_LOCK(ctx); IFDI_VFLR_HANDLE(ctx); CTX_UNLOCK(ctx); } static int iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { int err; if_int_delay_info_t info; if_ctx_t ctx; info = (if_int_delay_info_t)arg1; ctx = info->iidi_ctx; info->iidi_req = req; info->iidi_oidp = oidp; CTX_LOCK(ctx); err = IFDI_SYSCTL_INT_DELAY(ctx, info); CTX_UNLOCK(ctx); return (err); } /********************************************************************* * * IFNET FUNCTIONS * **********************************************************************/ static void iflib_if_init_locked(if_ctx_t ctx) { iflib_stop(ctx); iflib_init_locked(ctx); } static void iflib_if_init(void *arg) { if_ctx_t ctx = arg; CTX_LOCK(ctx); iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static int iflib_if_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq; int err, qidx; if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(tx_frees); m_freem(m); return (ENOBUFS); } MPASS(m->m_nextpkt == NULL); qidx = 0; if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) qidx = QIDX(ctx, m); /* * XXX calculate buf_ring based on flowid (divvy up bits?) */ txq = &ctx->ifc_txqs[qidx]; #ifdef DRIVER_BACKPRESSURE if (txq->ift_closed) { while (m != NULL) { next = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); m = next; } return (ENOBUFS); } #endif #ifdef notyet qidx = count = 0; mp = marr; next = m; do { count++; next = next->m_nextpkt; } while (next != NULL); if (count > nitems(marr)) if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { /* XXX check nextpkt */ m_freem(m); /* XXX simplify for now */ DBG_COUNTER_INC(tx_frees); return (ENOBUFS); } for (next = m, i = 0; next != NULL; i++) { mp[i] = next; next = next->m_nextpkt; mp[i]->m_nextpkt = NULL; } #endif DBG_COUNTER_INC(tx_seen); err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE); GROUPTASK_ENQUEUE(&txq->ift_task); if (err) { /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); m_freem(m); } return (err); } static void iflib_if_qflush(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq = ctx->ifc_txqs; int i; CTX_LOCK(ctx); ctx->ifc_flags |= IFC_QFLUSH; CTX_UNLOCK(ctx); for (i = 0; i < NTXQSETS(ctx); i++, txq++) while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br))) iflib_txq_check_drain(txq, 0); CTX_LOCK(ctx); ctx->ifc_flags &= ~IFC_QFLUSH; CTX_UNLOCK(ctx); if_qflush(ifp); } #define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \ IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO) static int iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) { if_ctx_t ctx = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int err = 0, reinit = 0, bits; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp, IFF_UP,0); if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) reinit = 1; #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else err = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: CTX_LOCK(ctx); if (ifr->ifr_mtu == if_getmtu(ifp)) { CTX_UNLOCK(ctx); break; } bits = if_getdrvflags(ifp); /* stop the driver and free any clusters before proceeding */ iflib_stop(ctx); if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) ctx->ifc_flags |= IFC_MULTISEG; else ctx->ifc_flags &= ~IFC_MULTISEG; err = if_setmtu(ifp, ifr->ifr_mtu); } iflib_init_locked(ctx); if_setdrvflags(ifp, bits); CTX_UNLOCK(ctx); break; case SIOCSIFFLAGS: CTX_LOCK(ctx); if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); } } else reinit = 1; } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { iflib_stop(ctx); } ctx->ifc_if_flags = if_getflags(ifp); CTX_UNLOCK(ctx); break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); IFDI_MULTI_SET(ctx); IFDI_INTR_ENABLE(ctx); CTX_UNLOCK(ctx); } break; case SIOCSIFMEDIA: CTX_LOCK(ctx); IFDI_MEDIA_SET(ctx); CTX_UNLOCK(ctx); /* falls thru */ case SIOCGIFMEDIA: case SIOCGIFXMEDIA: err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command); break; case SIOCGI2C: { struct ifi2creq i2c; err = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); if (err != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { err = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { err = EINVAL; break; } if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) err = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); break; } case SIOCSIFCAP: { int mask, setmask; mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); setmask = 0; #ifdef TCP_OFFLOAD setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); #endif setmask |= (mask & IFCAP_FLAGS); if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) setmask |= (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); if ((mask & IFCAP_WOL) && (if_getcapabilities(ifp) & IFCAP_WOL) != 0) setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC)); if_vlancap(ifp); /* * want to ensure that traffic has stopped before we change any of the flags */ if (setmask) { CTX_LOCK(ctx); bits = if_getdrvflags(ifp); if (bits & IFF_DRV_RUNNING) iflib_stop(ctx); if_togglecapenable(ifp, setmask); if (bits & IFF_DRV_RUNNING) iflib_init_locked(ctx); if_setdrvflags(ifp, bits); CTX_UNLOCK(ctx); } break; } case SIOCGPRIVATE_0: case SIOCSDRVSPEC: case SIOCGDRVSPEC: CTX_LOCK(ctx); err = IFDI_PRIV_IOCTL(ctx, command, data); CTX_UNLOCK(ctx); break; default: err = ether_ioctl(ifp, command, data); break; } if (reinit) iflib_if_init(ctx); return (err); } static uint64_t iflib_if_get_counter(if_t ifp, ift_counter cnt) { if_ctx_t ctx = if_getsoftc(ifp); return (IFDI_GET_COUNTER(ctx, cnt)); } /********************************************************************* * * OTHER FUNCTIONS EXPORTED TO THE STACK * **********************************************************************/ static void iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_REGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_UNREGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_led_func(void *arg, int onoff) { if_ctx_t ctx = arg; CTX_LOCK(ctx); IFDI_LED_FUNC(ctx, onoff); CTX_UNLOCK(ctx); } /********************************************************************* * * BUS FUNCTION DEFINITIONS * **********************************************************************/ int iflib_device_probe(device_t dev) { pci_vendor_info_t *ent; uint16_t pci_vendor_id, pci_device_id; uint16_t pci_subvendor_id, pci_subdevice_id; uint16_t pci_rev_id; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); pci_rev_id = pci_get_revid(dev); if (sctx->isc_parse_devinfo != NULL) sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); ent = sctx->isc_vendor_info; while (ent->pvi_vendor_id != 0) { if (pci_vendor_id != ent->pvi_vendor_id) { ent++; continue; } if ((pci_device_id == ent->pvi_device_id) && ((pci_subvendor_id == ent->pvi_subvendor_id) || (ent->pvi_subvendor_id == 0)) && ((pci_subdevice_id == ent->pvi_subdevice_id) || (ent->pvi_subdevice_id == 0)) && ((pci_rev_id == ent->pvi_rev_id) || (ent->pvi_rev_id == 0))) { device_set_desc_copy(dev, ent->pvi_name); /* this needs to be changed to zero if the bus probing code * ever stops re-probing on best match because the sctx * may have its values over written by register calls * in subsequent probes */ return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } int iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) { int err, rid, msix, msix_bar; if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; int i; uint16_t main_txq; uint16_t main_rxq; ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); if (sc == NULL) { sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); device_set_softc(dev, ctx); ctx->ifc_flags |= IFC_SC_ALLOCATED; } ctx->ifc_sctx = sctx; ctx->ifc_dev = dev; ctx->ifc_softc = sc; if ((err = iflib_register(ctx)) != 0) { device_printf(dev, "iflib_register failed %d\n", err); return (err); } iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; /* * XXX sanity check that ntxd & nrxd are a power of 2 */ if (ctx->ifc_sysctl_ntxqs != 0) scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; if (ctx->ifc_sysctl_nrxqs != 0) scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; for (i = 0; i < sctx->isc_ntxqs; i++) { if (ctx->ifc_sysctl_ntxds[i] != 0) scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; else scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (ctx->ifc_sysctl_nrxds[i] != 0) scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; else scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; } if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; } if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } } if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); return (err); } _iflib_pre_assert(scctx); ctx->ifc_txrx = *scctx->isc_txrx; #ifdef INVARIANTS MPASS(scctx->isc_capenable); if (scctx->isc_capenable & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif if_setcapabilities(ifp, scctx->isc_capenable | IFCAP_HWSTATS); if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS); if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; #ifdef ACPI_DMAR if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL) ctx->ifc_flags |= IFC_DMAR; #elif !(defined(__i386__) || defined(__amd64__)) /* set unconditionally for !x86 */ ctx->ifc_flags |= IFC_DMAR; #endif msix_bar = scctx->isc_msix_bar; main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); for (i = 0; i < sctx->isc_nrxqs; i++) { if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ device_printf(dev, "# rx descriptors must be a power of 2\n"); err = EINVAL; goto fail; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (!powerof2(scctx->isc_ntxd[i])) { device_printf(dev, "# tx descriptors must be a power of 2"); err = EINVAL; goto fail; } } if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_tso_segments_max = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); /* * Protect the stack against modern hardware */ if (scctx->isc_tx_tso_size_max > FREEBSD_TSO_SIZE_MAX) scctx->isc_tx_tso_size_max = FREEBSD_TSO_SIZE_MAX; /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ ifp->if_hw_tsomaxsegcount = scctx->isc_tx_tso_segments_max; ifp->if_hw_tsomax = scctx->isc_tx_tso_size_max; ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max; if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin"); /* Set up cpu set. If it fails, use the set of all CPUs. */ if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) { device_printf(dev, "Unable to fetch CPU list\n"); CPU_COPY(&all_cpus, &ctx->ifc_cpus); } MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0); /* ** Now setup MSI or MSI/X, should ** return us the number of supported ** vectors. (Will be 1 for MSI) */ if (sctx->isc_flags & IFLIB_SKIP_MSIX) { msix = scctx->isc_vectors; } else if (scctx->isc_msix_bar != 0) /* * The simple fact that isc_msix_bar is not 0 does not mean we * we have a good value there that is known to work. */ msix = iflib_msix_init(ctx); else { scctx->isc_vectors = 1; scctx->isc_ntxqsets = 1; scctx->isc_nrxqsets = 1; scctx->isc_intr = IFLIB_INTR_LEGACY; msix = 0; } /* Get memory for the station queues */ if ((err = iflib_queues_alloc(ctx))) { device_printf(dev, "Unable to allocate queue memory\n"); goto fail; } if ((err = iflib_qset_structures_setup(ctx))) { device_printf(dev, "qset structure setup failed %d\n", err); goto fail_queues; } /* * Group taskqueues aren't properly set up until SMP is started, * so we disable interrupts until we can handle them post * SI_SUB_SMP. * * XXX: disabling interrupts doesn't actually work, at least for * the non-MSI case. When they occur before SI_SUB_SMP completes, * we do null handling and depend on this not causing too large an * interrupt storm. */ IFDI_INTR_DISABLE(ctx); if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) { device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err); goto fail_intr_free; } if (msix <= 1) { rid = 0; if (scctx->isc_intr == IFLIB_INTR_MSI) { MPASS(msix == 1); rid = 1; } if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { device_printf(dev, "iflib_legacy_setup failed %d\n", err); goto fail_intr_free; } } ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac); if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } if ((err = iflib_netmap_attach(ctx))) { device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); goto fail_detach; } *ctxp = ctx; NETDUMP_SET(ctx->ifc_ifp, iflib); if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); ctx->ifc_flags |= IFC_INIT_DONE; return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_intr_free: if (scctx->isc_intr == IFLIB_INTR_MSIX || scctx->isc_intr == IFLIB_INTR_MSI) pci_release_msi(ctx->ifc_dev); fail_queues: /* XXX free queues */ fail: IFDI_DETACH(ctx); return (err); } int iflib_device_attach(device_t dev) { if_ctx_t ctx; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_enable_busmaster(dev); return (iflib_device_register(dev, NULL, sctx, &ctx)); } int iflib_device_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq; iflib_rxq_t rxq; device_t dev = ctx->ifc_dev; int i, j; struct taskqgroup *tqg; iflib_fl_t fl; /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } CTX_LOCK(ctx); ctx->ifc_in_detach = 1; iflib_stop(ctx); CTX_UNLOCK(ctx); /* Unregister VLAN events */ if (ctx->ifc_vlan_attach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); if (ctx->ifc_vlan_detach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); iflib_netmap_detach(ifp); ether_ifdetach(ifp); /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ CTX_LOCK_DESTROY(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */ tqg = qgroup_if_io_tqg; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); if (txq->ift_task.gt_uniq != NULL) taskqgroup_detach(tqg, &txq->ift_task); } for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { if (rxq->ifr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &rxq->ifr_task); for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) free(fl->ifl_rx_bitmap, M_IFLIB); } tqg = qgroup_if_config_tqg; if (ctx->ifc_admin_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_admin_task); if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); IFDI_DETACH(ctx); device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(dev); } if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { iflib_irq_free(ctx, &ctx->ifc_legacy_irq); } if (ctx->ifc_msix_mem != NULL) { bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, ctx->ifc_softc_ctx.isc_msix_bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } bus_generic_detach(dev); if_free(ifp); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (0); } int iflib_device_detach(device_t dev) { if_ctx_t ctx = device_get_softc(dev); return (iflib_device_deregister(ctx)); } int iflib_device_suspend(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SUSPEND(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_shutdown(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SHUTDOWN(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_resume(device_t dev) { if_ctx_t ctx = device_get_softc(dev); iflib_txq_t txq = ctx->ifc_txqs; CTX_LOCK(ctx); IFDI_RESUME(ctx); iflib_init_locked(ctx); CTX_UNLOCK(ctx); for (int i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); return (bus_generic_resume(dev)); } int iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_INIT(ctx, num_vfs, params); CTX_UNLOCK(ctx); return (error); } void iflib_device_iov_uninit(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_IOV_UNINIT(ctx); CTX_UNLOCK(ctx); } int iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_VF_ADD(ctx, vfnum, params); CTX_UNLOCK(ctx); return (error); } /********************************************************************* * * MODULE FUNCTION DEFINITIONS * **********************************************************************/ /* * - Start a fast taskqueue thread for each core * - Start a taskqueue for control operations */ static int iflib_module_init(void) { return (0); } static int iflib_module_event_handler(module_t mod, int what, void *arg) { int err; switch (what) { case MOD_LOAD: if ((err = iflib_module_init()) != 0) return (err); break; case MOD_UNLOAD: return (EBUSY); default: return (EOPNOTSUPP); } return (0); } /********************************************************************* * * PUBLIC FUNCTION DEFINITIONS * ordered as in iflib.h * **********************************************************************/ static void _iflib_assert(if_shared_ctx_t sctx) { MPASS(sctx->isc_tx_maxsize); MPASS(sctx->isc_tx_maxsegsize); MPASS(sctx->isc_rx_maxsize); MPASS(sctx->isc_rx_nsegments); MPASS(sctx->isc_rx_maxsegsize); MPASS(sctx->isc_nrxd_min[0]); MPASS(sctx->isc_nrxd_max[0]); MPASS(sctx->isc_nrxd_default[0]); MPASS(sctx->isc_ntxd_min[0]); MPASS(sctx->isc_ntxd_max[0]); MPASS(sctx->isc_ntxd_default[0]); } static void _iflib_pre_assert(if_softc_ctx_t scctx) { MPASS(scctx->isc_txrx->ift_txd_encap); MPASS(scctx->isc_txrx->ift_txd_flush); MPASS(scctx->isc_txrx->ift_txd_credits_update); MPASS(scctx->isc_txrx->ift_rxd_available); MPASS(scctx->isc_txrx->ift_rxd_pkt_get); MPASS(scctx->isc_txrx->ift_rxd_refill); MPASS(scctx->isc_txrx->ift_rxd_flush); } static int iflib_register(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; driver_t *driver = sctx->isc_driver; device_t dev = ctx->ifc_dev; if_t ifp; _iflib_assert(sctx); CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (ENOMEM); } /* * Initialize our context's device specific methods */ kobj_init((kobj_t) ctx, (kobj_class_t) driver); kobj_class_compile((kobj_class_t) driver); driver->refs++; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setsoftc(ifp, ctx); if_setdev(ifp, dev); if_setinitfn(ifp, iflib_if_init); if_setioctlfn(ifp, iflib_if_ioctl); if_settransmitfn(ifp, iflib_if_transmit); if_setqflushfn(ifp, iflib_if_qflush); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); ctx->ifc_vlan_attach_event = EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, EVENTHANDLER_PRI_FIRST); ctx->ifc_vlan_detach_event = EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, EVENTHANDLER_PRI_FIRST); ifmedia_init(&ctx->ifc_media, IFM_IMASK, iflib_media_change, iflib_media_status); return (0); } static int iflib_queues_alloc(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int nrxqsets = scctx->isc_nrxqsets; int ntxqsets = scctx->isc_ntxqsets; iflib_txq_t txq; iflib_rxq_t rxq; iflib_fl_t fl = NULL; int i, j, cpu, err, txconf, rxconf; iflib_dma_info_t ifdip; uint32_t *rxqsizes = scctx->isc_rxqsizes; uint32_t *txqsizes = scctx->isc_txqsizes; uint8_t nrxqs = sctx->isc_nrxqs; uint8_t ntxqs = sctx->isc_ntxqs; int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; caddr_t *vaddrs; uint64_t *paddrs; struct ifmp_ring **brscp; KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); brscp = NULL; txq = NULL; rxq = NULL; /* Allocate the TX ring struct memory */ if (!(txq = (iflib_txq_t) malloc(sizeof(struct iflib_txq) * ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); err = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(rxq = (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); err = ENOMEM; goto rx_fail; } ctx->ifc_txqs = txq; ctx->ifc_rxqs = rxq; /* * XXX handle allocation failure */ for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_ifdi = ifdip; for (j = 0; j < ntxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate Descriptor memory\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_txd_size[j] = scctx->isc_txd_size[j]; bzero((void *)ifdip->idi_vaddr, txqsizes[j]); } txq->ift_ctx = ctx; txq->ift_id = i; if (sctx->isc_flags & IFLIB_HAS_TXCQ) { txq->ift_br_offset = 1; } else { txq->ift_br_offset = 0; } /* XXX fix this */ txq->ift_timer.c_cpu = cpu; if (iflib_txsd_alloc(txq)) { device_printf(dev, "Critical Failure setting up TX buffers\n"); err = ENOMEM; goto err_tx_desc; } /* Initialize the TX lock */ snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:tx(%d):callout", device_get_nameunit(dev), txq->ift_id); mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db", device_get_nameunit(dev), txq->ift_id); err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, iflib_txq_can_drain, M_IFLIB, M_WAITOK); if (err) { /* XXX free any allocated rings */ device_printf(dev, "Unable to allocate buf_ring\n"); goto err_tx_desc; } } for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_ifdi = ifdip; /* XXX this needs to be changed if #rx queues != #tx queues */ rxq->ifr_ntxqirq = 1; rxq->ifr_txqid[0] = i; for (j = 0; j < nrxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate Descriptor memory\n"); err = ENOMEM; goto err_tx_desc; } bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); } rxq->ifr_ctx = ctx; rxq->ifr_id = i; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { rxq->ifr_fl_offset = 1; } else { rxq->ifr_fl_offset = 0; } rxq->ifr_nfl = nfree_lists; if (!(fl = (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate free list memory\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_fl = fl; for (j = 0; j < nfree_lists; j++) { fl[j].ifl_rxq = rxq; fl[j].ifl_id = j; fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; fl[j].ifl_rxd_size = scctx->isc_rxd_size[j]; } /* Allocate receive buffers for the ring*/ if (iflib_rxsd_alloc(rxq)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); err = ENOMEM; goto err_rx_desc; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK|M_ZERO); } /* TXQs */ vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); for (i = 0; i < ntxqsets; i++) { iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; for (j = 0; j < ntxqs; j++, di++) { vaddrs[i*ntxqs + j] = di->idi_vaddr; paddrs[i*ntxqs + j] = di->idi_paddr; } } if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { device_printf(ctx->ifc_dev, "device queue allocation failed\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); /* RXQs */ vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); for (i = 0; i < nrxqsets; i++) { iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; for (j = 0; j < nrxqs; j++, di++) { vaddrs[i*nrxqs + j] = di->idi_vaddr; paddrs[i*nrxqs + j] = di->idi_paddr; } } if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { device_printf(ctx->ifc_dev, "device queue allocation failed\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); return (0); /* XXX handle allocation failure changes */ err_rx_desc: err_tx_desc: if (ctx->ifc_rxqs != NULL) free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; if (ctx->ifc_txqs != NULL) free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; rx_fail: if (brscp != NULL) free(brscp, M_IFLIB); if (rxq != NULL) free(rxq, M_IFLIB); if (txq != NULL) free(txq, M_IFLIB); fail: return (err); } static int iflib_tx_structures_setup(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i; for (i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_setup(txq); return (0); } static void iflib_tx_structures_free(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i, j; for (i = 0; i < NTXQSETS(ctx); i++, txq++) { iflib_txq_destroy(txq); for (j = 0; j < ctx->ifc_nhwtxqs; j++) iflib_dma_free(&txq->ift_ifdi[j]); } free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; IFDI_QUEUES_FREE(ctx); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int iflib_rx_structures_setup(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; int q; #if defined(INET6) || defined(INET) int i, err; #endif for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { #if defined(INET6) || defined(INET) tcp_lro_free(&rxq->ifr_lc); if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, TCP_LRO_ENTRIES, min(1024, ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) { device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); goto fail; } rxq->ifr_lro_enabled = TRUE; #endif IFDI_RXQ_SETUP(ctx, rxq->ifr_id); } return (0); #if defined(INET6) || defined(INET) fail: /* * Free RX software descriptors allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ rxq = ctx->ifc_rxqs; for (i = 0; i < q; ++i, rxq++) { iflib_rx_sds_free(rxq); rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } return (err); #endif } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void iflib_rx_structures_free(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { iflib_rx_sds_free(rxq); } } static int iflib_qset_structures_setup(if_ctx_t ctx) { int err; if ((err = iflib_tx_structures_setup(ctx)) != 0) return (err); if ((err = iflib_rx_structures_setup(ctx)) != 0) { device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); } return (err); } int iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, char *name) { return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); } #ifdef SMP static int find_nth(if_ctx_t ctx, int qid) { cpuset_t cpus; int i, cpuid, eqid, count; CPU_COPY(&ctx->ifc_cpus, &cpus); count = CPU_COUNT(&cpus); eqid = qid % count; /* clear up to the qid'th bit */ for (i = 0; i < eqid; i++) { cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); CPU_CLR(cpuid-1, &cpus); } cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); return (cpuid-1); } #ifdef SCHED_ULE extern struct cpu_group *cpu_top; /* CPU topology */ static int find_child_with_core(int cpu, struct cpu_group *grp) { int i; if (grp->cg_children == 0) return -1; MPASS(grp->cg_child); for (i = 0; i < grp->cg_children; i++) { if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) return i; } return -1; } /* * Find the nth thread on the specified core */ static int find_thread(int cpu, int thread_num) { struct cpu_group *grp; int i; cpuset_t cs; grp = cpu_top; if (grp == NULL) return cpu; i = 0; while ((i = find_child_with_core(cpu, grp)) != -1) { /* If the child only has one cpu, don't descend */ if (grp->cg_child[i].cg_count <= 1) break; grp = &grp->cg_child[i]; } /* If they don't share at least an L2 cache, use the same CPU */ if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) return cpu; /* Now pick one */ CPU_COPY(&grp->cg_mask, &cs); for (i = thread_num % grp->cg_count; i > 0; i--) { MPASS(CPU_FFS(&cs)); CPU_CLR(CPU_FFS(&cs) - 1, &cs); } MPASS(CPU_FFS(&cs)); return CPU_FFS(&cs) - 1; } #else static int find_thread(int cpu, int thread_num __unused) { return cpu; } #endif static int get_thread_num(if_ctx_t ctx, iflib_intr_type_t type, int qid) { switch (type) { case IFLIB_INTR_TX: /* TX queues get threads on the same core as the corresponding RX queue */ /* XXX handle multiple RX threads per core and more than two threads per core */ return qid / CPU_COUNT(&ctx->ifc_cpus) + 1; case IFLIB_INTR_RX: case IFLIB_INTR_RXTX: /* RX queues get the first thread on their core */ return qid / CPU_COUNT(&ctx->ifc_cpus); default: return -1; } } #else #define get_thread_num(ctx, type, qid) CPU_FIRST() #define find_thread(cpuid, tid) CPU_FIRST() #define find_nth(ctx, gid) CPU_FIRST() #endif /* Just to avoid copy/paste */ static inline int iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid, struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name) { int cpuid; int err, tid; cpuid = find_nth(ctx, qid); tid = get_thread_num(ctx, type, qid); MPASS(tid >= 0); cpuid = find_thread(cpuid, tid); err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name); if (err) { device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err); return (err); } #ifdef notyet if (cpuid > ctx->ifc_cpuid_highest) ctx->ifc_cpuid_highest = cpuid; #endif return 0; } int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, char *name) { struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; gtask_fn_t *fn; int tqrid, err; driver_filter_t *intr_fast; void *q; info = &ctx->ifc_filter_info; tqrid = rid; switch (type) { /* XXX merge tx/rx for netmap? */ case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; info = &ctx->ifc_txqs[qid].ift_filter_info; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RXTX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr_rxtx; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_ADMIN: q = ctx; tqrid = -1; info = &ctx->ifc_filter_info; gtask = &ctx->ifc_admin_task; tqg = qgroup_if_config_tqg; fn = _task_fn_admin; intr_fast = iflib_fast_intr_ctx; break; default: panic("unknown net intr type"); } info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = q; err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); if (err != 0) { device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err); return (err); } if (type == IFLIB_INTR_ADMIN) return (0); if (tqrid != -1) { err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name); if (err) return (err); } else { taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name); } return (0); } void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, char *name) { struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; void *q; int irq_num = -1; int err; switch (type) { case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; if (irq != NULL) irq_num = rman_get_start(irq->ii_res); break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; if (irq != NULL) irq_num = rman_get_start(irq->ii_res); break; case IFLIB_INTR_IOV: q = ctx; gtask = &ctx->ifc_vflr_task; tqg = qgroup_if_config_tqg; fn = _task_fn_iov; break; default: panic("unknown net intr type"); } GROUPTASK_INIT(gtask, 0, fn, q); if (irq_num != -1) { err = iflib_irq_set_affinity(ctx, irq_num, type, qid, gtask, tqg, q, name); if (err) taskqgroup_attach(tqg, gtask, q, irq_num, name); } else { taskqgroup_attach(tqg, gtask, q, irq_num, name); } } void iflib_irq_free(if_ctx_t ctx, if_irq_t irq) { if (irq->ii_tag) bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); if (irq->ii_res) bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, irq->ii_rid, irq->ii_res); } static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, char *name) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_irq_t irq = &ctx->ifc_legacy_irq; iflib_filter_info_t info; struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; int tqrid; void *q; int err; q = &ctx->ifc_rxqs[0]; info = &rxq[0].ifr_filter_info; gtask = &rxq[0].ifr_task; tqg = qgroup_if_io_tqg; tqrid = irq->ii_rid = *rid; fn = _task_fn_rx; ctx->ifc_flags |= IFC_LEGACY; info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = ctx; /* We allocate a single interrupt resource */ if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0) return (err); GROUPTASK_INIT(gtask, 0, fn, q); taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name); GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, rman_get_start(irq->ii_res), "tx"); return (0); } void iflib_led_create(if_ctx_t ctx) { ctx->ifc_led_dev = led_create(iflib_led_func, ctx, device_get_nameunit(ctx->ifc_dev)); } void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) { GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) { GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); } void iflib_admin_intr_deferred(if_ctx_t ctx) { #ifdef INVARIANTS struct grouptask *gtask; gtask = &ctx->ifc_admin_task; MPASS(gtask != NULL && gtask->gt_taskqueue != NULL); #endif GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); } void iflib_iov_intr_deferred(if_ctx_t ctx) { GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); } void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) { taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name); } void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn, char *name) { GROUPTASK_INIT(gtask, 0, fn, ctx); taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name); } void iflib_config_gtask_deinit(struct grouptask *gtask) { taskqgroup_detach(qgroup_if_config_tqg, gtask); } void iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq = ctx->ifc_txqs; if_setbaudrate(ifp, baudrate); if (baudrate >= IF_Gbps(10)) ctx->ifc_flags |= IFC_PREFETCH; /* If link down, disable watchdog */ if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) txq->ift_qstatus = IFLIB_QUEUE_IDLE; } ctx->ifc_link_state = link_state; if_link_state_change(ifp, link_state); } static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) { int credits; #ifdef INVARIANTS int credits_pre = txq->ift_cidx_processed; #endif if (ctx->isc_txd_credits_update == NULL) return (0); if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) return (0); txq->ift_processed += credits; txq->ift_cidx_processed += credits; MPASS(credits_pre + credits == txq->ift_cidx_processed); if (txq->ift_cidx_processed >= txq->ift_size) txq->ift_cidx_processed -= txq->ift_size; return (credits); } static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) { return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, budget)); } void iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, const char *description, if_int_delay_info_t info, int offset, int value) { info->iidi_ctx = ctx; info->iidi_offset = offset; info->iidi_value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, iflib_sysctl_int_delay, "I", description); } struct mtx * iflib_ctx_lock_get(if_ctx_t ctx) { return (&ctx->ifc_mtx); } static int iflib_msix_init(if_ctx_t ctx) { device_t dev = ctx->ifc_dev; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs; int iflib_num_tx_queues, iflib_num_rx_queues; int err, admincnt, bar; iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; device_printf(dev, "msix_init qsets capped at %d\n", imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); bar = ctx->ifc_softc_ctx.isc_msix_bar; admincnt = sctx->isc_admin_intrcnt; /* Override by global tuneable */ { int i; size_t len = sizeof(i); err = kernel_sysctlbyname(curthread, "hw.pci.enable_msix", &i, &len, NULL, 0, NULL, 0); if (err == 0) { if (i == 0) goto msi; } else { device_printf(dev, "unable to read hw.pci.enable_msix."); } } /* Override by tuneable */ if (scctx->isc_disable_msix) goto msi; /* ** When used in a virtualized environment ** PCI BUSMASTER capability may not be set ** so explicity set it here and rewrite ** the ENABLE in the MSIX control register ** at this point to cause the host to ** successfully initialize us. */ { int msix_ctrl, rid; pci_enable_busmaster(dev); rid = 0; if (pci_find_cap(dev, PCIY_MSIX, &rid) == 0 && rid != 0) { rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } else { device_printf(dev, "PCIY_MSIX capability not found; " "or rid %d == 0.\n", rid); goto msi; } } /* * bar == -1 => "trust me I know what I'm doing" * Some drivers are for hardware that is so shoddily * documented that no one knows which bars are which * so the developer has to map all bars. This hack * allows shoddy garbage to use msix in this framework. */ if (bar != -1) { ctx->ifc_msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (ctx->ifc_msix_mem == NULL) { /* May not be enabled */ device_printf(dev, "Unable to map MSIX table \n"); goto msi; } } /* First try MSI/X */ if ((msgs = pci_msix_count(dev)) == 0) { /* system has msix disabled */ device_printf(dev, "System has MSIX disabled \n"); bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; goto msi; } #if IFLIB_DEBUG /* use only 1 qset in debug mode */ queuemsgs = min(msgs - admincnt, 1); #else queuemsgs = msgs - admincnt; #endif #ifdef RSS queues = imin(queuemsgs, rss_getnumbuckets()); #else queues = queuemsgs; #endif queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n", CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) rx_queues = iflib_num_rx_queues; else rx_queues = queues; if (rx_queues > scctx->isc_nrxqsets) rx_queues = scctx->isc_nrxqsets; /* * We want this to be all logical CPUs by default */ if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) tx_queues = iflib_num_tx_queues; else tx_queues = mp_ncpus; if (tx_queues > scctx->isc_ntxqsets) tx_queues = scctx->isc_ntxqsets; if (ctx->ifc_sysctl_qs_eq_override == 0) { #ifdef INVARIANTS if (tx_queues != rx_queues) device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", min(rx_queues, tx_queues), min(rx_queues, tx_queues)); #endif tx_queues = min(rx_queues, tx_queues); rx_queues = min(rx_queues, tx_queues); } device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); vectors = rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { device_printf(dev, "Using MSIX interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; return (vectors); } else { device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err); } msi: vectors = pci_msi_count(dev); scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets = 1; scctx->isc_vectors = vectors; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { device_printf(dev,"Using an MSI interrupt\n"); scctx->isc_intr = IFLIB_INTR_MSI; } else { device_printf(dev,"Using a Legacy interrupt\n"); scctx->isc_intr = IFLIB_INTR_LEGACY; } return (vectors); } char * ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; static int mp_ring_state_handler(SYSCTL_HANDLER_ARGS) { int rc; uint16_t *state = ((uint16_t *)oidp->oid_arg1); struct sbuf *sb; char *ring_state = "UNKNOWN"; /* XXX needed ? */ rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); MPASS(sb != NULL); if (sb == NULL) return (ENOMEM); if (state[3] <= 3) ring_state = ring_states[state[3]]; sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", state[0], state[1], state[2], ring_state); rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } enum iflib_ndesc_handler { IFLIB_NTXD_HANDLER, IFLIB_NRXD_HANDLER, }; static int mp_ndesc_handler(SYSCTL_HANDLER_ARGS) { if_ctx_t ctx = (void *)arg1; enum iflib_ndesc_handler type = arg2; char buf[256] = {0}; qidx_t *ndesc; char *p, *next; int nqs, rc, i; MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER); nqs = 8; switch(type) { case IFLIB_NTXD_HANDLER: ndesc = ctx->ifc_sysctl_ntxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_ntxqs; break; case IFLIB_NRXD_HANDLER: ndesc = ctx->ifc_sysctl_nrxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_nrxqs; break; } if (nqs == 0) nqs = 8; for (i=0; i<8; i++) { if (i >= nqs) break; if (i) strcat(buf, ","); sprintf(strchr(buf, 0), "%d", ndesc[i]); } rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (rc || req->newptr == NULL) return rc; for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; i++, p = strsep(&next, " ,")) { ndesc[i] = strtoul(p, NULL, 10); } return(rc); } #define NAME_BUFLEN 32 static void iflib_add_device_sysctl_pre(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child, *oid_list; struct sysctl_ctx_list *ctx_list; struct sysctl_oid *node; ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", CTLFLAG_RD, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0, "driver version"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, "# of rxqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, "disable MSIX (default 0)"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, "set the rx budget"); /* XXX change for per-queue sizes */ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", "list of # of tx descriptors to use, 0 = use default #"); SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of rx descriptors to use, 0 = use default #"); } static void iflib_add_device_sysctl_post(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx_list; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j; char namebuf[NAME_BUFLEN]; char *qfmt; struct sysctl_oid *queue_node, *fl_node, *node; struct sysctl_oid_list *queue_list, *fl_list; ctx_list = device_get_sysctl_ctx(dev); node = ctx->ifc_sysctl_node; child = SYSCTL_CHILDREN(node); if (scctx->isc_ntxqsets > 100) qfmt = "txq%03d"; else if (scctx->isc_ntxqsets > 10) qfmt = "txq%02d"; else qfmt = "txq%d"; for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", CTLFLAG_RD, &txq->ift_dequeued, "total mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", CTLFLAG_RD, &txq->ift_enqueued, "total mbufs enqueued"); #endif SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", CTLFLAG_RD, &txq->ift_mbuf_defrag, "# of times m_defrag was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", CTLFLAG_RD, &txq->ift_pullups, "# of times m_pullup was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txq->ift_no_desc_avail, "# of times no descriptors were available"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", CTLFLAG_RD, &txq->ift_map_failed, "# of times dma map failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", CTLFLAG_RD, &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", CTLFLAG_RD, &txq->ift_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", CTLFLAG_RD, &txq->ift_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", CTLFLAG_RD, &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", CTLFLAG_RD, &txq->ift_in_use, 1, "descriptors in use"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", CTLFLAG_RD, &txq->ift_processed, "descriptors procesed for clean"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", CTLFLAG_RD, &txq->ift_cleaned, "total cleaned"); SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br->state), 0, mp_ring_state_handler, "A", "soft ring state"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->ift_br->enqueues, "# of enqueues to the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->ift_br->drops, "# of drops in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->ift_br->starts, "# of normal consumer starts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->ift_br->stalls, "# of consumer stalls in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->ift_br->restarts, "# of consumer restarts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->ift_br->abdications, "# of consumer abdications in the mp_ring for this queue"); } if (scctx->isc_nrxqsets > 100) qfmt = "rxq%03d"; else if (scctx->isc_nrxqsets > 10) qfmt = "rxq%02d"; else qfmt = "rxq%d"; for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); if (sctx->isc_flags & IFLIB_HAS_RXCQ) { SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx", CTLFLAG_RD, &rxq->ifr_cq_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", CTLFLAG_RD, &rxq->ifr_cq_cidx, 1, "Consumer Index"); } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "freelist Name"); fl_list = SYSCTL_CHILDREN(fl_node); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", CTLFLAG_RD, &fl->ifl_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", CTLFLAG_RD, &fl->ifl_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", CTLFLAG_RD, &fl->ifl_credits, 1, "credits available"); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", CTLFLAG_RD, &fl->ifl_m_enqueued, "mbufs allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", CTLFLAG_RD, &fl->ifl_m_dequeued, "mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", CTLFLAG_RD, &fl->ifl_cl_enqueued, "clusters allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", CTLFLAG_RD, &fl->ifl_cl_dequeued, "clusters freed"); #endif } } } #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m) { struct mbuf *n; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; n = m; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return (NULL); } bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; } return (n); } #endif #ifdef NETDUMP static void -iflib_netdump_init(struct ifnet *ifp, int *nmbufp, int *nclustp) +iflib_netdump_init(struct ifnet *ifp, int *nrxr) { if_ctx_t ctx; - ctx = ifp->if_softc; - - *nmbufp += ctx->ifc_rxqs[0].ifr_fl->ifl_size; - *nclustp += ctx->ifc_rxqs[0].ifr_fl->ifl_size; + ctx = if_getsoftc(ifp); + *nrxr = NRXQSETS(ctx); } static void iflib_netdump_event(struct ifnet *ifp, enum netdump_ev event) { if_ctx_t ctx; if_softc_ctx_t scctx; int i; - ctx = ifp->if_softc; + ctx = if_getsoftc(ifp); scctx = &ctx->ifc_softc_ctx; switch (event) { case NETDUMP_START: for (i = 0; i < scctx->isc_nrxqsets; i++) ctx->ifc_rxqs[i].ifr_fl->ifl_buf_size = MCLBYTES; iflib_no_tx_batch = 1; break; default: break; } } static int iflib_netdump_transmit(struct ifnet *ifp, struct mbuf *m) { if_ctx_t ctx; iflib_txq_t txq; int error; - ctx = ifp->if_softc; - + ctx = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); txq = &ctx->ifc_txqs[0]; error = iflib_encap(txq, &m); if (error == 0) (void)iflib_txd_db_check(ctx, txq, true, txq->ift_in_use); return (error); } static int iflib_netdump_poll(struct ifnet *ifp, int count) { if_ctx_t ctx; if_softc_ctx_t scctx; iflib_txq_t txq; int i; - ctx = ifp->if_softc; + ctx = if_getsoftc(ifp); scctx = &ctx->ifc_softc_ctx; if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); txq = &ctx->ifc_txqs[0]; (void)iflib_tx_credits_update(ctx, txq); (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); for (i = 0; i < scctx->isc_nrxqsets; i++) (void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */); return (0); } #endif /* NETDUMP */ Index: user/markj/netdump/sys/netinet/netdump/netdump.h =================================================================== --- user/markj/netdump/sys/netinet/netdump/netdump.h (revision 331674) +++ user/markj/netdump/sys/netinet/netdump/netdump.h (revision 331675) @@ -1,125 +1,125 @@ /*- * Copyright (c) 2005-2014 Sandvine Incorporated * Copyright (c) 2000 Darrell Anderson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NETINET_NETDUMP_H_ #define _NETINET_NETDUMP_H_ #include #include #include #include #include #define NETDUMP_PORT 20023 /* Server udp port number for data. */ #define NETDUMP_ACKPORT 20024 /* Client udp port number for acks. */ #define NETDUMP_HERALD 1 /* Broadcast before starting a dump. */ #define NETDUMP_FINISHED 2 /* Send after finishing a dump. */ #define NETDUMP_VMCORE 3 /* Contains dump data. */ #define NETDUMP_KDH 4 /* Contains kernel dump header. */ #define NETDUMP_EKCD_KEY 5 /* Contains kernel dump key. */ #define NETDUMP_DATASIZE 4096 /* Arbitrary packet size limit. */ struct netdump_msg_hdr { uint32_t mh_type; /* Netdump message type. */ uint32_t mh_seqno; /* Match acks with msgs. */ uint64_t mh_offset; /* vmcore offset (bytes). */ uint32_t mh_len; /* Attached data (bytes). */ uint32_t mh__pad; } __packed; struct netdump_ack { uint32_t na_seqno; /* Match acks with msgs. */ } __packed; struct netdump_conf { struct diocskerneldump_arg ndc_kda; char ndc_iface[IFNAMSIZ]; struct in_addr ndc_server; struct in_addr ndc_client; struct in_addr ndc_gateway; }; #define _PATH_NETDUMP "/dev/netdump" #define NETDUMPGCONF _IOR('n', 1, struct netdump_conf) #define NETDUMPSCONF _IOW('n', 2, struct netdump_conf) #ifdef _KERNEL #ifdef NETDUMP #define NETDUMP_MAX_IN_FLIGHT 64 enum netdump_ev { NETDUMP_START, NETDUMP_END, }; struct ifnet; struct mbuf; -typedef void netdump_init_t(struct ifnet *, int *nmbufp, int *nclustp); +typedef void netdump_init_t(struct ifnet *, int *nrxr); typedef void netdump_event_t(struct ifnet *, enum netdump_ev); typedef int netdump_transmit_t(struct ifnet *, struct mbuf *); typedef int netdump_poll_t(struct ifnet *, int); struct netdump_methods { netdump_init_t *nd_init; netdump_event_t *nd_event; netdump_transmit_t *nd_transmit; netdump_poll_t *nd_poll; }; #define NETDUMP_DEFINE(driver) \ static netdump_init_t driver##_netdump_init; \ static netdump_event_t driver##_netdump_event; \ static netdump_transmit_t driver##_netdump_transmit; \ static netdump_poll_t driver##_netdump_poll; \ \ static struct netdump_methods driver##_netdump_methods = { \ .nd_init = driver##_netdump_init, \ .nd_event = driver##_netdump_event, \ .nd_transmit = driver##_netdump_transmit, \ .nd_poll = driver##_netdump_poll, \ } #define NETDUMP_SET(ifp, driver) \ (ifp)->if_netdump_methods = &driver##_netdump_methods #else /* !NETDUMP */ #define NETDUMP_DEFINE(driver) #define NETDUMP_SET(ifp, driver) #endif /* NETDUMP */ #endif /* _KERNEL */ #endif /* _NETINET_NETDUMP_H_ */ Index: user/markj/netdump/sys/netinet/netdump/netdump_client.c =================================================================== --- user/markj/netdump/sys/netinet/netdump/netdump_client.c (revision 331674) +++ user/markj/netdump/sys/netinet/netdump/netdump_client.c (revision 331675) @@ -1,1244 +1,1246 @@ /*- * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved. * Copyright (c) 2000 Darrell Anderson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * netdump_client.c * FreeBSD subsystem supporting netdump network dumps. * A dedicated server must be running to accept client dumps. */ #include __FBSDID("$FreeBSD$"); #include "opt_netdump.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NETDUMP_DEBUG #define NETDDEBUG(f, ...) \ printf(("%s: " f), __func__, ## __VA_ARGS__) #define NETDDEBUG_IF(i, f, ...) \ if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__) #if NETDUMP_DEBUG > 1 #define NETDDEBUGV(f, ...) \ printf(("%s: " f), __func__, ## __VA_ARGS__) #define NETDDEBUGV_IF(i, f, ...) \ if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__) #else #define NETDDEBUGV(f, ...) #define NETDDEBUGV_IF(i, f, ...) #endif #else #define NETDDEBUG(f, ...) #define NETDDEBUG_IF(i, f, ...) #define NETDDEBUGV(f, ...) #define NETDDEBUGV_IF(i, f, ...) #endif static int netdump_arp_gw(void); static void netdump_cleanup(void); static int netdump_configure(struct netdump_conf *); static int netdump_dumper(void *priv __unused, void *virtual, vm_offset_t physical __unused, off_t offset, size_t length); static int netdump_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst, u_short etype); static void netdump_handle_arp(struct mbuf **mb); static void netdump_handle_ip(struct mbuf **mb); static int netdump_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, int flags __unused, struct thread *td); static int netdump_modevent(module_t mod, int type, void *priv); static void netdump_network_poll(void); static void netdump_pkt_in(struct ifnet *ifp, struct mbuf *m); static int netdump_send(uint32_t type, off_t offset, unsigned char *data, uint32_t datalen); static int netdump_send_arp(in_addr_t dst); static int netdump_start(struct dumperinfo *di); static int netdump_udp_output(struct mbuf *m); /* Must be at least as big as the chunks dumpsys() gives us. */ static unsigned char nd_buf[MAXDUMPPGS * PAGE_SIZE]; static uint32_t nd_seqno; static int dump_failed, have_gw_mac; static void (*drv_if_input)(struct ifnet *, struct mbuf *); static int restore_gw_addr; static uint64_t rcvd_acks; CTASSERT(sizeof(rcvd_acks) * NBBY == NETDUMP_MAX_IN_FLIGHT); /* * Times to poll the NIC (0.5ms each poll) before assuming packetloss * occurred (default to 1s). */ static int nd_polls = 2000; /* Times to retransmit lost packets. */ static int nd_retries = 10; /* Number of ARP retries. */ static int nd_arp_retries = 3; /* Configuration parameters. */ static struct netdump_conf nd_conf; #define nd_server nd_conf.ndc_server #define nd_client nd_conf.ndc_client #define nd_gateway nd_conf.ndc_gateway /* General dynamic settings. */ static struct ether_addr nd_gw_mac; static struct ifnet *nd_ifp; static uint16_t nd_server_port = NETDUMP_PORT; FEATURE(netdump, "Netdump client support"); static SYSCTL_NODE(_net, OID_AUTO, netdump, CTLFLAG_RD, NULL, "netdump parameters"); static int nd_enabled; SYSCTL_INT(_net_netdump, OID_AUTO, enabled, CTLFLAG_RD, &nd_enabled, 0, "netdump configuration status"); static char nd_path[MAXPATHLEN]; SYSCTL_STRING(_net_netdump, OID_AUTO, path, CTLFLAG_RW, nd_path, sizeof(nd_path), "Server path for output files"); /* * Checks for netdump support on a network interface * * Parameters: * ifp The network interface that is being tested for support * * Returns: * int 1 if the interface is supported, 0 if not */ static bool netdump_supported_nic(struct ifnet *ifp) { return (ifp->if_netdump_methods != NULL); } /*- * Network specific primitives. * Following down the code they are divided ordered as: * - Packet buffer primitives * - Output primitives * - Input primitives * - Polling primitives */ /* * Handles creation of the ethernet header, then places outgoing packets into * the tx buffer for the NIC * * Parameters: * m The mbuf containing the packet to be sent (will be freed by * this function or the NIC driver) * ifp The interface to send on * dst The destination ethernet address (source address will be looked * up using ifp) * etype The ETHERTYPE_* value for the protocol that is being sent * * Returns: * int see errno.h, 0 for success */ static int netdump_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst, u_short etype) { struct ether_header *eh; if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) || (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) { if_printf(ifp, "netdump_ether_output: interface isn't up\n"); m_freem(m); return (ENETDOWN); } /* Fill in the ethernet header. */ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); if (m == NULL) { printf("%s: out of mbufs\n", __func__); return (ENOBUFS); } eh = mtod(m, struct ether_header *); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN); eh->ether_type = htons(etype); return ((ifp->if_netdump_methods->nd_transmit)(ifp, m)); } /* * Unreliable transmission of an mbuf chain to the netdump server * Note: can't handle fragmentation; fails if the packet is larger than * nd_ifp->if_mtu after adding the UDP/IP headers * * Parameters: * m mbuf chain * * Returns: * int see errno.h, 0 for success */ static int netdump_udp_output(struct mbuf *m) { struct udpiphdr *ui; struct ip *ip; MPASS(nd_ifp != NULL); M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT); if (m == NULL) { printf("%s: out of mbufs\n", __func__); return (ENOBUFS); } if (m->m_pkthdr.len > nd_ifp->if_mtu) { printf("netdump_udp_output: Packet is too big: %d > MTU %u\n", m->m_pkthdr.len, nd_ifp->if_mtu); m_freem(m); return (ENOBUFS); } ui = mtod(m, struct udpiphdr *); bzero(ui->ui_x1, sizeof(ui->ui_x1)); ui->ui_pr = IPPROTO_UDP; ui->ui_len = htons(m->m_pkthdr.len - sizeof(struct ip)); ui->ui_ulen = ui->ui_len; ui->ui_src = nd_client; ui->ui_dst = nd_server; /* Use this src port so that the server can connect() the socket */ ui->ui_sport = htons(NETDUMP_ACKPORT); ui->ui_dport = htons(nd_server_port); ui->ui_sum = 0; if ((ui->ui_sum = in_cksum(m, m->m_pkthdr.len)) == 0) ui->ui_sum = 0xffff; ip = mtod(m, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(struct ip) >> 2; ip->ip_tos = 0; ip->ip_len = htons(m->m_pkthdr.len); ip->ip_id = 0; ip->ip_off = htons(IP_DF); ip->ip_ttl = 255; ip->ip_sum = 0; ip->ip_sum = in_cksum(m, sizeof(struct ip)); return (netdump_ether_output(m, nd_ifp, nd_gw_mac, ETHERTYPE_IP)); } /* * Builds and sends a single ARP request to locate the server * * Return value: * 0 on success * errno on error */ static int netdump_send_arp(in_addr_t dst) { struct ether_addr bcast; struct mbuf *m; struct arphdr *ah; int pktlen; MPASS(nd_ifp != NULL); /* Fill-up a broadcast address. */ memset(&bcast, 0xFF, ETHER_ADDR_LEN); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { printf("netdump_send_arp: Out of mbufs\n"); return (ENOBUFS); } pktlen = arphdr_len2(ETHER_ADDR_LEN, sizeof(struct in_addr)); m->m_len = pktlen; m->m_pkthdr.len = pktlen; MH_ALIGN(m, pktlen); ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_ETHER); ah->ar_pro = htons(ETHERTYPE_IP); ah->ar_hln = ETHER_ADDR_LEN; ah->ar_pln = sizeof(struct in_addr); ah->ar_op = htons(ARPOP_REQUEST); memcpy(ar_sha(ah), IF_LLADDR(nd_ifp), ETHER_ADDR_LEN); ((struct in_addr *)ar_spa(ah))->s_addr = nd_client.s_addr; bzero(ar_tha(ah), ETHER_ADDR_LEN); ((struct in_addr *)ar_tpa(ah))->s_addr = dst; return (netdump_ether_output(m, nd_ifp, bcast, ETHERTYPE_ARP)); } /* * Sends ARP requests to locate the server and waits for a response. * We first try to ARP the server itself, and fall back to the provided * gateway if the server appears to be off-link. * * Return value: * 0 on success * errno on error */ static int netdump_arp_gw(void) { in_addr_t dst; int error, polls, retries; dst = nd_server.s_addr; restart: for (retries = 0; retries < nd_arp_retries && have_gw_mac == 0; retries++) { error = netdump_send_arp(dst); if (error != 0) return (error); for (polls = 0; polls < nd_polls && have_gw_mac == 0; polls++) { netdump_network_poll(); DELAY(500); } if (have_gw_mac == 0) printf("(ARP retry)"); } if (have_gw_mac != 0) return (0); if (dst == nd_server.s_addr && nd_server.s_addr != nd_gateway.s_addr) { printf("Failed to ARP server, trying to reach gateway...\n"); dst = nd_gateway.s_addr; goto restart; } printf("\nARP timed out.\n"); return (ETIMEDOUT); } /* * Dummy free function for EXT_NETDUMP clusters. */ static void netdump_mbuf_free(struct mbuf *m __unused) { } /* * Construct and reliably send a netdump packet. May fail from a resource * shortage or extreme number of unacknowledged retransmissions. Wait for * an acknowledgement before returning. Splits packets into chunks small * enough to be sent without fragmentation (looks up the interface MTU) * * Parameters: * type netdump packet type (HERALD, FINISHED, or VMCORE) * offset vmcore data offset (bytes) * data vmcore data * datalen vmcore data size (bytes) * * Returns: * int see errno.h, 0 for success */ static int netdump_send(uint32_t type, off_t offset, unsigned char *data, uint32_t datalen) { struct netdump_msg_hdr *nd_msg_hdr; struct mbuf *m, *m2; uint64_t want_acks; uint32_t i, pktlen, sent_so_far; int retries, polls, error; want_acks = 0; rcvd_acks = 0; retries = 0; MPASS(nd_ifp != NULL); retransmit: /* Chunks can be too big to fit in packets. */ for (i = sent_so_far = 0; sent_so_far < datalen || (i == 0 && datalen == 0); i++) { pktlen = datalen - sent_so_far; /* First bound: the packet structure. */ pktlen = min(pktlen, NETDUMP_DATASIZE); /* Second bound: the interface MTU (assume no IP options). */ pktlen = min(pktlen, nd_ifp->if_mtu - sizeof(struct udpiphdr) - sizeof(struct netdump_msg_hdr)); /* * Check if it is retransmitting and this has been ACKed * already. */ if ((rcvd_acks & (1 << i)) != 0) { sent_so_far += pktlen; continue; } /* * Get and fill a header mbuf, then chain data as an extended * mbuf. */ m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { printf("netdump_send: Out of mbufs\n"); return (ENOBUFS); } m->m_len = sizeof(struct netdump_msg_hdr); m->m_pkthdr.len = sizeof(struct netdump_msg_hdr); MH_ALIGN(m, sizeof(struct netdump_msg_hdr)); nd_msg_hdr = mtod(m, struct netdump_msg_hdr *); nd_msg_hdr->mh_seqno = htonl(nd_seqno + i); nd_msg_hdr->mh_type = htonl(type); nd_msg_hdr->mh_offset = htobe64(offset + sent_so_far); nd_msg_hdr->mh_len = htonl(pktlen); nd_msg_hdr->mh__pad = 0; if (pktlen != 0) { m2 = m_get(M_NOWAIT, MT_DATA); if (m2 == NULL) { m_freem(m); printf("netdump_send: Out of mbufs\n"); return (ENOBUFS); } MEXTADD(m2, data + sent_so_far, pktlen, netdump_mbuf_free, NULL, NULL, 0, EXT_NETDUMP); m2->m_len = pktlen; m_cat(m, m2); m->m_pkthdr.len += pktlen; } error = netdump_udp_output(m); if (error != 0) return (error); /* Note that we're waiting for this packet in the bitfield. */ want_acks |= (1 << i); sent_so_far += pktlen; } if (i >= NETDUMP_MAX_IN_FLIGHT) printf("Warning: Sent more than %d packets (%d). " "Acknowledgements will fail unless the size of " "rcvd_acks/want_acks is increased.\n", NETDUMP_MAX_IN_FLIGHT, i); /* * Wait for acks. A *real* window would speed things up considerably. */ polls = 0; while (rcvd_acks != want_acks) { if (polls++ > nd_polls) { if (retries++ > nd_retries) return (ETIMEDOUT); printf(". "); goto retransmit; } netdump_network_poll(); DELAY(500); } nd_seqno += i; return (0); } /* * Handler for IP packets: checks their sanity and then processes any netdump * ACK packets it finds. * * It needs to replicate partially the behaviour of ip_input() and * udp_input(). * * Parameters: * mb a pointer to an mbuf * containing the packet received * Updates *mb if m_pullup et al change the pointer * Assumes the calling function will take care of freeing the mbuf */ static void netdump_handle_ip(struct mbuf **mb) { struct ip *ip; struct udpiphdr *udp; struct netdump_ack *nd_ack; struct mbuf *m; int rcv_ackno; unsigned short hlen; /* IP processing. */ m = *mb; if (m->m_pkthdr.len < sizeof(struct ip)) { NETDDEBUG("dropping packet too small for IP header\n"); return; } if (m->m_len < sizeof(struct ip)) { m = m_pullup(m, sizeof(struct ip)); *mb = m; if (m == NULL) { NETDDEBUG("m_pullup failed\n"); return; } } ip = mtod(m, struct ip *); /* IP version. */ if (ip->ip_v != IPVERSION) { NETDDEBUG("bad IP version %d\n", ip->ip_v); return; } /* Header length. */ hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) { NETDDEBUG("bad IP header length (%hu)\n", hlen); return; } if (hlen > m->m_len) { m = m_pullup(m, hlen); *mb = m; if (m == NULL) { NETDDEBUG("m_pullup failed\n"); return; } ip = mtod(m, struct ip *); } /* Ignore packets with IP options. */ if (hlen > sizeof(struct ip)) { NETDDEBUG("drop packet with IP options\n"); return; } #ifdef INVARIANTS if (((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) && (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { NETDDEBUG("Bad IP header (RFC1122)\n"); return; } #endif /* Checksum. */ if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) { if ((m->m_pkthdr.csum_flags & CSUM_IP_VALID) == 0) { NETDDEBUG("bad IP checksum\n"); return; } } else { /* XXX */ ; } /* Convert fields to host byte order. */ ip->ip_len = ntohs(ip->ip_len); if (ip->ip_len < hlen) { NETDDEBUG("IP packet smaller (%hu) than header (%hu)\n", ip->ip_len, hlen); return; } if (m->m_pkthdr.len < ip->ip_len) { NETDDEBUG("IP packet bigger (%hu) than ethernet packet (%d)\n", ip->ip_len, m->m_pkthdr.len); return; } if (m->m_pkthdr.len > ip->ip_len) { /* Truncate the packet to the IP length. */ if (m->m_len == m->m_pkthdr.len) { m->m_len = ip->ip_len; m->m_pkthdr.len = ip->ip_len; } else m_adj(m, ip->ip_len - m->m_pkthdr.len); } ip->ip_off = ntohs(ip->ip_off); /* Check that the source is the server's IP. */ if (ip->ip_src.s_addr != nd_server.s_addr) { NETDDEBUG("drop packet not from server (from 0x%x)\n", ip->ip_src.s_addr); return; } /* Check if the destination IP is ours. */ if (ip->ip_dst.s_addr != nd_client.s_addr) { NETDDEBUGV("drop packet not to our IP\n"); return; } if (ip->ip_p != IPPROTO_UDP) { NETDDEBUG("drop non-UDP packet\n"); return; } /* Do not deal with fragments. */ if ((ip->ip_off & (IP_MF | IP_OFFMASK)) != 0) { NETDDEBUG("drop fragmented packet\n"); return; } /* UDP custom is to have packet length not include IP header. */ ip->ip_len -= hlen; /* UDP processing. */ /* Get IP and UDP headers together, along with the netdump packet. */ if (m->m_pkthdr.len < sizeof(struct udpiphdr) + sizeof(struct netdump_ack)) { NETDDEBUG("ignoring small packet\n"); return; } if (m->m_len < sizeof(struct udpiphdr) + sizeof(struct netdump_ack)) { m = m_pullup(m, sizeof(struct udpiphdr) + sizeof(struct netdump_ack)); *mb = m; if (m == NULL) { NETDDEBUG("m_pullup failed\n"); return; } } udp = mtod(m, struct udpiphdr *); if (ntohs(udp->ui_u.uh_dport) != NETDUMP_ACKPORT) { NETDDEBUG("not on the netdump port.\n"); return; } /* Netdump processing. */ /* * Packet is meant for us. Extract the ack sequence number and the * port number if necessary. */ nd_ack = (struct netdump_ack *)(mtod(m, caddr_t) + sizeof(struct udpiphdr)); rcv_ackno = ntohl(nd_ack->na_seqno); if (nd_server_port == NETDUMP_PORT) nd_server_port = ntohs(udp->ui_u.uh_sport); if (rcv_ackno >= nd_seqno + NETDUMP_MAX_IN_FLIGHT) printf("%s: ACK %d too far in future!\n", __func__, rcv_ackno); else if (rcv_ackno >= nd_seqno) { /* We're interested in this ack. Record it. */ rcvd_acks |= 1 << (rcv_ackno - nd_seqno); } } /* * Handler for ARP packets: checks their sanity and then * 1. If the ARP is a request for our IP, respond with our MAC address * 2. If the ARP is a response from our server, record its MAC address * * It needs to replicate partially the behaviour of arpintr() and * in_arpinput(). * * Parameters: * mb a pointer to an mbuf * containing the packet received * Updates *mb if m_pullup et al change the pointer * Assumes the calling function will take care of freeing the mbuf */ static void netdump_handle_arp(struct mbuf **mb) { char buf[INET_ADDRSTRLEN]; struct in_addr isaddr, itaddr, myaddr; struct ether_addr dst; struct mbuf *m; struct arphdr *ah; struct ifnet *ifp; uint8_t *enaddr; int req_len, op; m = *mb; ifp = m->m_pkthdr.rcvif; if (m->m_len < sizeof(struct arphdr)) { m = m_pullup(m, sizeof(struct arphdr)); *mb = m; if (m == NULL) { NETDDEBUG("runt packet: m_pullup failed\n"); return; } } ah = mtod(m, struct arphdr *); if (ntohs(ah->ar_hrd) != ARPHRD_ETHER) { NETDDEBUG("unknown hardware address 0x%2D)\n", (unsigned char *)&ah->ar_hrd, ""); return; } if (ntohs(ah->ar_pro) != ETHERTYPE_IP) { NETDDEBUG("drop ARP for unknown protocol %d\n", ntohs(ah->ar_pro)); return; } req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr)); if (m->m_len < req_len) { m = m_pullup(m, req_len); *mb = m; if (m == NULL) { NETDDEBUG("runt packet: m_pullup failed\n"); return; } } ah = mtod(m, struct arphdr *); op = ntohs(ah->ar_op); memcpy(&isaddr, ar_spa(ah), sizeof(isaddr)); memcpy(&itaddr, ar_tpa(ah), sizeof(itaddr)); enaddr = (uint8_t *)IF_LLADDR(ifp); myaddr = nd_client; if (memcmp(ar_sha(ah), enaddr, ifp->if_addrlen) == 0) { NETDDEBUG("ignoring ARP from myself\n"); return; } if (isaddr.s_addr == nd_client.s_addr) { printf("%s: %*D is using my IP address %s!\n", __func__, ifp->if_addrlen, (u_char *)ar_sha(ah), ":", inet_ntoa_r(isaddr, buf)); return; } if (memcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen) == 0) { NETDDEBUG("ignoring ARP from broadcast address\n"); return; } if (op == ARPOP_REPLY) { if (isaddr.s_addr != nd_gateway.s_addr && isaddr.s_addr != nd_server.s_addr) { inet_ntoa_r(isaddr, buf); NETDDEBUG( "ignoring ARP reply from %s (not netdump server)\n", buf); return; } memcpy(nd_gw_mac.octet, ar_sha(ah), min(ah->ar_hln, ETHER_ADDR_LEN)); have_gw_mac = 1; NETDDEBUG("got server MAC address %6D\n", nd_gw_mac.octet, ":"); return; } if (op != ARPOP_REQUEST) { NETDDEBUG("ignoring ARP non-request/reply\n"); return; } if (itaddr.s_addr != nd_client.s_addr) { NETDDEBUG("ignoring ARP not to our IP\n"); return; } memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); memcpy(ar_sha(ah), enaddr, ah->ar_hln); memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln); memcpy(ar_spa(ah), &itaddr, ah->ar_pln); ah->ar_op = htons(ARPOP_REPLY); ah->ar_pro = htons(ETHERTYPE_IP); m->m_flags &= ~(M_BCAST|M_MCAST); m->m_len = arphdr_len(ah); m->m_pkthdr.len = m->m_len; memcpy(dst.octet, ar_tha(ah), ETHER_ADDR_LEN); netdump_ether_output(m, ifp, dst, ETHERTYPE_ARP); *mb = NULL; } /* * Handler for incoming packets directly from the network adapter * Identifies the packet type (IP or ARP) and passes it along to one of the * helper functions netdump_handle_ip or netdump_handle_arp. * * It needs to replicate partially the behaviour of ether_input() and * ether_demux(). * * Parameters: * ifp the interface the packet came from (should be nd_ifp) * m an mbuf containing the packet received */ static void netdump_pkt_in(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; u_short etype; /* Ethernet processing. */ if ((m->m_flags & M_PKTHDR) == 0) { NETDDEBUG_IF(ifp, "discard frame without packet header\n"); goto done; } if (m->m_len < ETHER_HDR_LEN) { NETDDEBUG_IF(ifp, "discard frame without leading eth header (len %u pktlen %u)\n", m->m_len, m->m_pkthdr.len); goto done; } if ((m->m_flags & M_HASFCS) != 0) { m_adj(m, -ETHER_CRC_LEN); m->m_flags &= ~M_HASFCS; } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) { NETDDEBUG_IF(ifp, "ignoring vlan packets\n"); goto done; } /* XXX: Probably must also check if we're the recipient MAC address. */ /* Done ethernet processing. Strip off the ethernet header. */ m_adj(m, ETHER_HDR_LEN); switch (etype) { case ETHERTYPE_ARP: netdump_handle_arp(&m); break; case ETHERTYPE_IP: netdump_handle_ip(&m); break; default: NETDDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype); break; } done: if (m != NULL) m_freem(m); } /* * After trapping, instead of assuming that most of the network stack is sane, * we just poll the driver directly for packets. */ static void netdump_network_poll(void) { MPASS(nd_ifp != NULL); nd_ifp->if_netdump_methods->nd_poll(nd_ifp, 1000); } /*- * Dumping specific primitives. */ /* * Callback from dumpsys() to dump a chunk of memory. * Copies it out to our static buffer then sends it across the network. * Detects the initial KDH and makes sure it is given a special packet type. * * Parameters: * priv Unused. Optional private pointer. * virtual Virtual address (where to read the data from) * physical Unused. Physical memory address. * offset Offset from start of core file * length Data length * * Return value: * 0 on success * errno on error */ static int netdump_dumper(void *priv __unused, void *virtual, vm_offset_t physical __unused, off_t offset, size_t length) { int error; NETDDEBUGV("netdump_dumper(NULL, %p, NULL, %ju, %zu)\n", virtual, (uintmax_t)offset, length); if (virtual == NULL) { if (dump_failed != 0) printf("failed to dump the kernel core\n"); else if (netdump_send(NETDUMP_FINISHED, 0, NULL, 0) != 0) printf("failed to close the transaction\n"); else printf("\nnetdump finished.\n"); netdump_cleanup(); return (0); } if (length > sizeof(nd_buf)) return (ENOSPC); memmove(nd_buf, virtual, length); error = netdump_send(NETDUMP_VMCORE, offset, nd_buf, length); if (error != 0) { dump_failed = 1; return (error); } return (0); } /* * Perform any initalization needed prior to transmitting the kernel core. */ static int netdump_start(struct dumperinfo *di) { char *path; char buf[INET_ADDRSTRLEN]; uint32_t len; int error; error = 0; /* Check if the dumping is allowed to continue. */ if (nd_enabled == 0) return (EINVAL); MPASS(nd_ifp != NULL); if (nd_server.s_addr == INADDR_ANY) { printf("netdump_start: can't netdump; no server IP given\n"); return (EINVAL); } if (nd_client.s_addr == INADDR_ANY) { printf("netdump_start: can't netdump; no client IP given\n"); return (EINVAL); } /* We start dumping at offset 0. */ di->dumpoff = 0; nd_seqno = 1; /* * nd_server_port could have switched after the first ack the * first time it gets called. Adjust it accordingly. */ nd_server_port = NETDUMP_PORT; /* Switch to the netdump mbuf zones. */ netdump_mbuf_dump(); nd_ifp->if_netdump_methods->nd_event(nd_ifp, NETDUMP_START); /* Make the card use *our* receive callback. */ drv_if_input = nd_ifp->if_input; nd_ifp->if_input = netdump_pkt_in; if (nd_gateway.s_addr == INADDR_ANY) { restore_gw_addr = 1; nd_gateway.s_addr = nd_server.s_addr; } printf("netdump in progress. searching for server...\n"); if (netdump_arp_gw()) { printf("failed to locate server MAC address\n"); error = EINVAL; goto trig_abort; } if (nd_path[0] != '\0') { path = nd_path; len = strlen(path) + 1; } else { path = NULL; len = 0; } if (netdump_send(NETDUMP_HERALD, 0, path, len) != 0) { printf("failed to contact netdump server\n"); error = EINVAL; goto trig_abort; } printf("netdumping to %s (%6D)\n", inet_ntoa_r(nd_server, buf), nd_gw_mac.octet, ":"); return (0); trig_abort: netdump_cleanup(); return (error); } static int netdump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh, void *key, uint32_t keysize) { int error; memcpy(nd_buf, kdh, sizeof(*kdh)); error = netdump_send(NETDUMP_KDH, 0, nd_buf, sizeof(*kdh)); if (error == 0 && keysize > 0) { if (keysize > sizeof(nd_buf)) return (EINVAL); memcpy(nd_buf, key, keysize); error = netdump_send(NETDUMP_EKCD_KEY, 0, nd_buf, keysize); } return (error); } /* * Cleanup routine for a possibly failed netdump. */ static void netdump_cleanup(void) { if (restore_gw_addr != 0) { nd_gateway.s_addr = INADDR_ANY; restore_gw_addr = 0; } if (drv_if_input != NULL) { nd_ifp->if_input = drv_if_input; drv_if_input = NULL; } nd_ifp->if_netdump_methods->nd_event(nd_ifp, NETDUMP_END); } /*- * KLD specific code. */ static struct cdevsw netdump_cdevsw = { .d_version = D_VERSION, .d_ioctl = netdump_ioctl, .d_name = "netdump", }; static struct cdev *netdump_cdev; static int netdump_configure(struct netdump_conf *conf) { struct ifnet *ifp; - int nmbuf, nclust; + int nmbuf, nclust, nrxr; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strcmp(ifp->if_xname, conf->ndc_iface) == 0) break; } /* XXX ref */ IFNET_RUNLOCK_NOSLEEP(); if (ifp == NULL) { printf("netdump: unknown interface '%s'\n", conf->ndc_iface); return (1); } else if (!netdump_supported_nic(ifp) || ifp->if_type != IFT_ETHER) { printf("netdump: unsupported interface '%s'\n", conf->ndc_iface); return (1); } + ifp->if_netdump_methods->nd_init(ifp, &nrxr); + KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr)); + /* - * We need two headers per message. Multiply by four to give us some - * breathing room. + * We need two headers per message on the transmit side. Multiply by + * four to give us some breathing room. */ - nmbuf = NETDUMP_MAX_IN_FLIGHT * 4; - nclust = 0; - ifp->if_netdump_methods->nd_init(ifp, &nmbuf, &nclust); + nmbuf = NETDUMP_MAX_IN_FLIGHT * (4 + nrxr); + nclust = NETDUMP_MAX_IN_FLIGHT * nrxr; netdump_mbuf_init(nmbuf, nclust); nd_ifp = ifp; memcpy(&nd_conf, conf, sizeof(nd_conf)); nd_enabled = 1; return (0); } /* * ioctl(2) handler for the netdump device. This is currently only used to * register netdump as a dump device. * * Parameters: * dev, Unused. * cmd, The ioctl to be handled. * addr, The parameter for the ioctl. * flags, Unused. * td, The thread invoking this ioctl. * * Returns: * 0 on success, and an errno value on failure. */ static int netdump_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, int flags __unused, struct thread *td) { struct dumperinfo dumper; struct netdump_conf *conf; int error; u_int u; error = 0; switch (cmd) { case DIOCSKERNELDUMP: u = *(u_int *)addr; if (u != 0) { error = ENXIO; break; } if (nd_enabled) { nd_enabled = 0; netdump_mbuf_drain(); } break; case NETDUMPGCONF: conf = (struct netdump_conf *)addr; if (!nd_enabled) { error = ENXIO; break; } strlcpy(conf->ndc_iface, nd_ifp->if_xname, sizeof(conf->ndc_iface)); memcpy(&conf->ndc_server, &nd_server, sizeof(nd_server)); memcpy(&conf->ndc_client, &nd_client, sizeof(nd_client)); memcpy(&conf->ndc_gateway, &nd_gateway, sizeof(nd_gateway)); break; case NETDUMPSCONF: conf = (struct netdump_conf *)addr; if (conf->ndc_kda.kda_enable == 0) { if (nd_enabled) { error = clear_dumper(td); if (error == 0) nd_enabled = 0; } break; } if (netdump_configure(conf) != 0) { error = EINVAL; break; } dumper.dumper_start = netdump_start; dumper.dumper_hdr = netdump_write_headers; dumper.dumper = netdump_dumper; dumper.priv = NULL; dumper.blocksize = NETDUMP_DATASIZE; dumper.maxiosize = MAXDUMPPGS * PAGE_SIZE; dumper.mediaoffset = 0; dumper.mediasize = 0; error = set_dumper(&dumper, conf->ndc_iface, td, conf->ndc_kda.kda_compression, conf->ndc_kda.kda_encryption, conf->ndc_kda.kda_key, conf->ndc_kda.kda_encryptedkeysize, conf->ndc_kda.kda_encryptedkey); if (error != 0) nd_enabled = 0; break; default: error = EINVAL; break; } return (error); } /* * Called upon system init or kld load. Initializes the netdump parameters to * sane defaults (locates the first available NIC and uses the first IPv4 IP on * that card as the client IP). Leaves the server IP unconfigured. * * Parameters: * mod, Unused. * what, The module event type. * priv, Unused. * * Returns: * int, An errno value if an error occured, 0 otherwise. */ static int netdump_modevent(module_t mod __unused, int what, void *priv __unused) { struct netdump_conf conf; char *arg; int error; error = 0; switch (what) { case MOD_LOAD: error = make_dev_p(MAKEDEV_WAITOK, &netdump_cdev, &netdump_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "netdump"); if (error != 0) return (error); if ((arg = kern_getenv("net.dump.iface")) != NULL) { strlcpy(conf.ndc_iface, arg, sizeof(conf.ndc_iface)); freeenv(arg); if ((arg = kern_getenv("net.dump.server")) != NULL) { inet_aton(arg, &conf.ndc_server); freeenv(arg); } if ((arg = kern_getenv("net.dump.client")) != NULL) { inet_aton(arg, &conf.ndc_server); freeenv(arg); } if ((arg = kern_getenv("net.dump.gateway")) != NULL) { inet_aton(arg, &conf.ndc_server); freeenv(arg); } /* Ignore errors; we print a message to the console. */ (void)netdump_configure(&conf); } break; case MOD_UNLOAD: destroy_dev(netdump_cdev); if (nd_enabled) { printf("netdump: disabling dump device for unload\n"); (void)clear_dumper(curthread); nd_enabled = 0; } break; default: error = EOPNOTSUPP; break; } return (error); } static moduledata_t netdump_mod = { "netdump", netdump_modevent, NULL, }; MODULE_VERSION(netdump, 1); DECLARE_MODULE(netdump, netdump_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);