Index: projects/ifnet/sys/dev/bge/if_bge.c =================================================================== --- projects/ifnet/sys/dev/bge/if_bge.c (revision 280372) +++ projects/ifnet/sys/dev/bge/if_bge.c (revision 280373) @@ -1,6719 +1,6718 @@ /*- * Copyright (c) 2001 Wind River Systems * Copyright (c) 1997, 1998, 1999, 2001 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Broadcom BCM57xx(x)/BCM590x NetXtreme and NetLink family Ethernet driver * * The Broadcom BCM5700 is based on technology originally developed by * Alteon Networks as part of the Tigon I and Tigon II Gigabit Ethernet * MAC chips. The BCM5700, sometimes referred to as the Tigon III, has * two on-board MIPS R4000 CPUs and can have as much as 16MB of external * SSRAM. The BCM5700 supports TCP, UDP and IP checksum offload, jumbo * frames, highly configurable RX filtering, and 16 RX and TX queues * (which, along with RX filter rules, can be used for QOS applications). * Other features, such as TCP segmentation, may be available as part * of value-added firmware updates. Unlike the Tigon I and Tigon II, * firmware images can be stored in hardware and need not be compiled * into the driver. * * The BCM5700 supports the PCI v2.2 and PCI-X v1.0 standards, and will * function in a 32-bit/64-bit 33/66Mhz bus, or a 64-bit/133Mhz bus. * * The BCM5701 is a single-chip solution incorporating both the BCM5700 * MAC and a BCM5401 10/100/1000 PHY. Unlike the BCM5700, the BCM5701 * does not support external SSRAM. * * Broadcom also produces a variation of the BCM5700 under the "Altima" * brand name, which is functionally similar but lacks PCI-X support. * * Without external SSRAM, you can only have at most 4 TX rings, * and the use of the mini RX ring is disabled. This seems to imply * that these features are simply not available on the BCM5701. As a * result, this driver does not implement any support for the mini RX * ring. */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "miidevs.h" #include #ifdef __sparc64__ #include #include #include #include #endif #include #include #include #define ETHER_MIN_NOPAD (ETHER_MIN_LEN - ETHER_CRC_LEN) /* i.e., 60 */ MODULE_DEPEND(bge, pci, 1, 1, 1); MODULE_DEPEND(bge, ether, 1, 1, 1); MODULE_DEPEND(bge, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" /* * Various supported device vendors/types and their names. Note: the * spec seems to indicate that the hardware still has Alteon's vendor * ID burned into it, though it will always be overriden by the vendor * ID in the EEPROM. Just to be safe, we cover all possibilities. */ static const struct bge_type { uint16_t bge_vid; uint16_t bge_did; } bge_devs[] = { { ALTEON_VENDORID, ALTEON_DEVICEID_BCM5700 }, { ALTEON_VENDORID, ALTEON_DEVICEID_BCM5701 }, { ALTIMA_VENDORID, ALTIMA_DEVICE_AC1000 }, { ALTIMA_VENDORID, ALTIMA_DEVICE_AC1002 }, { ALTIMA_VENDORID, ALTIMA_DEVICE_AC9100 }, { APPLE_VENDORID, APPLE_DEVICE_BCM5701 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5700 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5701 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5702 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5702_ALT }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5702X }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5703 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5703_ALT }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5703X }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5704C }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5704S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5704S_ALT }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705K }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5705M_ALT }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5714C }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5714S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5715 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5715S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5717 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5718 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5719 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5720 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5721 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5722 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5723 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5725 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5727 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5750 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5750M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5751 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5751F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5751M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5752 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5752M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5753 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5753F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5753M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5754 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5754M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5755 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5755M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5756 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5761 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5761E }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5761S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5761SE }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5762 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5764 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5780 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5780S }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5781 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5782 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5784 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5785F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5785G }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5786 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5787 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5787F }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5787M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5788 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5789 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5901 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5901A2 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5903M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5906 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM5906M }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57760 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57761 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57762 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57764 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57765 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57766 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57767 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57780 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57781 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57782 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57785 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57786 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57787 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57788 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57790 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57791 }, { BCOM_VENDORID, BCOM_DEVICEID_BCM57795 }, { SK_VENDORID, SK_DEVICEID_ALTIMA }, { TC_VENDORID, TC_DEVICEID_3C996 }, { FJTSU_VENDORID, FJTSU_DEVICEID_PW008GE4 }, { FJTSU_VENDORID, FJTSU_DEVICEID_PW008GE5 }, { FJTSU_VENDORID, FJTSU_DEVICEID_PP250450 }, { 0, 0 } }; static const struct bge_vendor { uint16_t v_id; const char *v_name; } bge_vendors[] = { { ALTEON_VENDORID, "Alteon" }, { ALTIMA_VENDORID, "Altima" }, { APPLE_VENDORID, "Apple" }, { BCOM_VENDORID, "Broadcom" }, { SK_VENDORID, "SysKonnect" }, { TC_VENDORID, "3Com" }, { FJTSU_VENDORID, "Fujitsu" }, { 0, NULL } }; static const struct bge_revision { uint32_t br_chipid; const char *br_name; } bge_revisions[] = { { BGE_CHIPID_BCM5700_A0, "BCM5700 A0" }, { BGE_CHIPID_BCM5700_A1, "BCM5700 A1" }, { BGE_CHIPID_BCM5700_B0, "BCM5700 B0" }, { BGE_CHIPID_BCM5700_B1, "BCM5700 B1" }, { BGE_CHIPID_BCM5700_B2, "BCM5700 B2" }, { BGE_CHIPID_BCM5700_B3, "BCM5700 B3" }, { BGE_CHIPID_BCM5700_ALTIMA, "BCM5700 Altima" }, { BGE_CHIPID_BCM5700_C0, "BCM5700 C0" }, { BGE_CHIPID_BCM5701_A0, "BCM5701 A0" }, { BGE_CHIPID_BCM5701_B0, "BCM5701 B0" }, { BGE_CHIPID_BCM5701_B2, "BCM5701 B2" }, { BGE_CHIPID_BCM5701_B5, "BCM5701 B5" }, { BGE_CHIPID_BCM5703_A0, "BCM5703 A0" }, { BGE_CHIPID_BCM5703_A1, "BCM5703 A1" }, { BGE_CHIPID_BCM5703_A2, "BCM5703 A2" }, { BGE_CHIPID_BCM5703_A3, "BCM5703 A3" }, { BGE_CHIPID_BCM5703_B0, "BCM5703 B0" }, { BGE_CHIPID_BCM5704_A0, "BCM5704 A0" }, { BGE_CHIPID_BCM5704_A1, "BCM5704 A1" }, { BGE_CHIPID_BCM5704_A2, "BCM5704 A2" }, { BGE_CHIPID_BCM5704_A3, "BCM5704 A3" }, { BGE_CHIPID_BCM5704_B0, "BCM5704 B0" }, { BGE_CHIPID_BCM5705_A0, "BCM5705 A0" }, { BGE_CHIPID_BCM5705_A1, "BCM5705 A1" }, { BGE_CHIPID_BCM5705_A2, "BCM5705 A2" }, { BGE_CHIPID_BCM5705_A3, "BCM5705 A3" }, { BGE_CHIPID_BCM5750_A0, "BCM5750 A0" }, { BGE_CHIPID_BCM5750_A1, "BCM5750 A1" }, { BGE_CHIPID_BCM5750_A3, "BCM5750 A3" }, { BGE_CHIPID_BCM5750_B0, "BCM5750 B0" }, { BGE_CHIPID_BCM5750_B1, "BCM5750 B1" }, { BGE_CHIPID_BCM5750_C0, "BCM5750 C0" }, { BGE_CHIPID_BCM5750_C1, "BCM5750 C1" }, { BGE_CHIPID_BCM5750_C2, "BCM5750 C2" }, { BGE_CHIPID_BCM5714_A0, "BCM5714 A0" }, { BGE_CHIPID_BCM5752_A0, "BCM5752 A0" }, { BGE_CHIPID_BCM5752_A1, "BCM5752 A1" }, { BGE_CHIPID_BCM5752_A2, "BCM5752 A2" }, { BGE_CHIPID_BCM5714_B0, "BCM5714 B0" }, { BGE_CHIPID_BCM5714_B3, "BCM5714 B3" }, { BGE_CHIPID_BCM5715_A0, "BCM5715 A0" }, { BGE_CHIPID_BCM5715_A1, "BCM5715 A1" }, { BGE_CHIPID_BCM5715_A3, "BCM5715 A3" }, { BGE_CHIPID_BCM5717_A0, "BCM5717 A0" }, { BGE_CHIPID_BCM5717_B0, "BCM5717 B0" }, { BGE_CHIPID_BCM5719_A0, "BCM5719 A0" }, { BGE_CHIPID_BCM5720_A0, "BCM5720 A0" }, { BGE_CHIPID_BCM5755_A0, "BCM5755 A0" }, { BGE_CHIPID_BCM5755_A1, "BCM5755 A1" }, { BGE_CHIPID_BCM5755_A2, "BCM5755 A2" }, { BGE_CHIPID_BCM5722_A0, "BCM5722 A0" }, { BGE_CHIPID_BCM5761_A0, "BCM5761 A0" }, { BGE_CHIPID_BCM5761_A1, "BCM5761 A1" }, { BGE_CHIPID_BCM5762_A0, "BCM5762 A0" }, { BGE_CHIPID_BCM5784_A0, "BCM5784 A0" }, { BGE_CHIPID_BCM5784_A1, "BCM5784 A1" }, /* 5754 and 5787 share the same ASIC ID */ { BGE_CHIPID_BCM5787_A0, "BCM5754/5787 A0" }, { BGE_CHIPID_BCM5787_A1, "BCM5754/5787 A1" }, { BGE_CHIPID_BCM5787_A2, "BCM5754/5787 A2" }, { BGE_CHIPID_BCM5906_A1, "BCM5906 A1" }, { BGE_CHIPID_BCM5906_A2, "BCM5906 A2" }, { BGE_CHIPID_BCM57765_A0, "BCM57765 A0" }, { BGE_CHIPID_BCM57765_B0, "BCM57765 B0" }, { BGE_CHIPID_BCM57780_A0, "BCM57780 A0" }, { BGE_CHIPID_BCM57780_A1, "BCM57780 A1" }, { 0, NULL } }; /* * Some defaults for major revisions, so that newer steppings * that we don't know about have a shot at working. */ static const struct bge_revision bge_majorrevs[] = { { BGE_ASICREV_BCM5700, "unknown BCM5700" }, { BGE_ASICREV_BCM5701, "unknown BCM5701" }, { BGE_ASICREV_BCM5703, "unknown BCM5703" }, { BGE_ASICREV_BCM5704, "unknown BCM5704" }, { BGE_ASICREV_BCM5705, "unknown BCM5705" }, { BGE_ASICREV_BCM5750, "unknown BCM5750" }, { BGE_ASICREV_BCM5714_A0, "unknown BCM5714" }, { BGE_ASICREV_BCM5752, "unknown BCM5752" }, { BGE_ASICREV_BCM5780, "unknown BCM5780" }, { BGE_ASICREV_BCM5714, "unknown BCM5714" }, { BGE_ASICREV_BCM5755, "unknown BCM5755" }, { BGE_ASICREV_BCM5761, "unknown BCM5761" }, { BGE_ASICREV_BCM5784, "unknown BCM5784" }, { BGE_ASICREV_BCM5785, "unknown BCM5785" }, /* 5754 and 5787 share the same ASIC ID */ { BGE_ASICREV_BCM5787, "unknown BCM5754/5787" }, { BGE_ASICREV_BCM5906, "unknown BCM5906" }, { BGE_ASICREV_BCM57765, "unknown BCM57765" }, { BGE_ASICREV_BCM57766, "unknown BCM57766" }, { BGE_ASICREV_BCM57780, "unknown BCM57780" }, { BGE_ASICREV_BCM5717, "unknown BCM5717" }, { BGE_ASICREV_BCM5719, "unknown BCM5719" }, { BGE_ASICREV_BCM5720, "unknown BCM5720" }, { BGE_ASICREV_BCM5762, "unknown BCM5762" }, { 0, NULL } }; #define BGE_IS_JUMBO_CAPABLE(sc) ((sc)->bge_flags & BGE_FLAG_JUMBO) #define BGE_IS_5700_FAMILY(sc) ((sc)->bge_flags & BGE_FLAG_5700_FAMILY) #define BGE_IS_5705_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_5705_PLUS) #define BGE_IS_5714_FAMILY(sc) ((sc)->bge_flags & BGE_FLAG_5714_FAMILY) #define BGE_IS_575X_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_575X_PLUS) #define BGE_IS_5755_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_5755_PLUS) #define BGE_IS_5717_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_5717_PLUS) #define BGE_IS_57765_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_57765_PLUS) static uint32_t bge_chipid(device_t); static const struct bge_vendor * bge_lookup_vendor(uint16_t); static const struct bge_revision * bge_lookup_rev(uint32_t); typedef int (*bge_eaddr_fcn_t)(struct bge_softc *, uint8_t[]); static int bge_probe(device_t); static int bge_attach(device_t); static int bge_detach(device_t); static int bge_suspend(device_t); static int bge_resume(device_t); static void bge_release_resources(struct bge_softc *); static void bge_dma_map_addr(void *, bus_dma_segment_t *, int, int); static int bge_dma_alloc(struct bge_softc *); static void bge_dma_free(struct bge_softc *); static int bge_dma_ring_alloc(struct bge_softc *, bus_size_t, bus_size_t, bus_dma_tag_t *, uint8_t **, bus_dmamap_t *, bus_addr_t *, const char *); static void bge_devinfo(struct bge_softc *); static int bge_mbox_reorder(struct bge_softc *); static int bge_get_eaddr_fw(struct bge_softc *sc, uint8_t ether_addr[]); static int bge_get_eaddr_mem(struct bge_softc *, uint8_t[]); static int bge_get_eaddr_nvram(struct bge_softc *, uint8_t[]); static int bge_get_eaddr_eeprom(struct bge_softc *, uint8_t[]); static int bge_get_eaddr(struct bge_softc *, uint8_t[]); static void bge_txeof(struct bge_softc *, uint16_t); static void bge_rxcsum(struct bge_softc *, struct bge_rx_bd *, struct mbuf *); static int bge_rxeof(struct bge_softc *, uint16_t, int); static void bge_asf_driver_up (struct bge_softc *); static void bge_tick(void *); static void bge_stats_clear_regs(struct bge_softc *); static void bge_stats_update(struct bge_softc *); static void bge_stats_update_regs(struct bge_softc *); static struct mbuf *bge_check_short_dma(struct mbuf *); static struct mbuf *bge_setup_tso(struct bge_softc *, struct mbuf *, uint16_t *, uint16_t *); static int bge_encap(struct bge_softc *, struct mbuf **, uint32_t *); static void bge_intr(void *); static int bge_msi_intr(void *); static void bge_intr_task(void *, int); static int bge_start_locked(struct bge_softc *); static int bge_transmit(if_t, struct mbuf *); static int bge_ioctl(if_t, u_long, void *, struct thread *); static void bge_init(struct bge_softc *); static void bge_stop_block(struct bge_softc *, bus_size_t, uint32_t); static void bge_stop(struct bge_softc *); static void bge_watchdog(struct bge_softc *); static int bge_shutdown(device_t); static int bge_ifmedia_upd_locked(if_t); static int bge_ifmedia_upd(if_t); static void bge_ifmedia_sts(if_t, struct ifmediareq *); static uint64_t bge_get_counter(if_t, ift_counter); static uint8_t bge_nvram_getbyte(struct bge_softc *, int, uint8_t *); static int bge_read_nvram(struct bge_softc *, caddr_t, int, int); static uint8_t bge_eeprom_getbyte(struct bge_softc *, int, uint8_t *); static int bge_read_eeprom(struct bge_softc *, caddr_t, int, int); static void bge_setpromisc(struct bge_softc *); static void bge_setmulti(struct bge_softc *); static void bge_setvlan(struct bge_softc *); static __inline void bge_rxreuse_std(struct bge_softc *, int); static __inline void bge_rxreuse_jumbo(struct bge_softc *, int); static int bge_newbuf_std(struct bge_softc *, int); static int bge_newbuf_jumbo(struct bge_softc *, int); static int bge_init_rx_ring_std(struct bge_softc *); static void bge_free_rx_ring_std(struct bge_softc *); static int bge_init_rx_ring_jumbo(struct bge_softc *); static void bge_free_rx_ring_jumbo(struct bge_softc *); static void bge_free_tx_ring(struct bge_softc *); static int bge_init_tx_ring(struct bge_softc *); static int bge_chipinit(struct bge_softc *); static int bge_blockinit(struct bge_softc *); static uint32_t bge_dma_swap_options(struct bge_softc *); static int bge_has_eaddr(struct bge_softc *); static uint32_t bge_readmem_ind(struct bge_softc *, int); static void bge_writemem_ind(struct bge_softc *, int, int); static void bge_writembx(struct bge_softc *, int, int); #ifdef notdef static uint32_t bge_readreg_ind(struct bge_softc *, int); #endif static void bge_writemem_direct(struct bge_softc *, int, int); static void bge_writereg_ind(struct bge_softc *, int, int); static int bge_miibus_readreg(device_t, int, int); static int bge_miibus_writereg(device_t, int, int, int); static void bge_miibus_statchg(device_t); static uint64_t bge_miibus_readvar(device_t, int); #ifdef DEVICE_POLLING static int bge_poll(if_t ifp, enum poll_cmd cmd, int count); #endif #define BGE_RESET_SHUTDOWN 0 #define BGE_RESET_START 1 #define BGE_RESET_SUSPEND 2 static void bge_sig_post_reset(struct bge_softc *, int); static void bge_sig_legacy(struct bge_softc *, int); static void bge_sig_pre_reset(struct bge_softc *, int); static void bge_stop_fw(struct bge_softc *); static int bge_reset(struct bge_softc *); static void bge_link_upd(struct bge_softc *); static void bge_ape_lock_init(struct bge_softc *); static void bge_ape_read_fw_ver(struct bge_softc *); static int bge_ape_lock(struct bge_softc *, int); static void bge_ape_unlock(struct bge_softc *, int); static void bge_ape_send_event(struct bge_softc *, uint32_t); static void bge_ape_driver_state_change(struct bge_softc *, int); /* * The BGE_REGISTER_DEBUG option is only for low-level debugging. It may * leak information to untrusted users. It is also known to cause alignment * traps on certain architectures. */ #ifdef BGE_REGISTER_DEBUG static int bge_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static int bge_sysctl_reg_read(SYSCTL_HANDLER_ARGS); static int bge_sysctl_ape_read(SYSCTL_HANDLER_ARGS); static int bge_sysctl_mem_read(SYSCTL_HANDLER_ARGS); #endif static void bge_add_sysctls(struct bge_softc *); static void bge_add_sysctl_stats_regs(struct bge_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void bge_add_sysctl_stats(struct bge_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static int bge_sysctl_stats(SYSCTL_HANDLER_ARGS); static device_method_t bge_methods[] = { /* Device interface */ DEVMETHOD(device_probe, bge_probe), DEVMETHOD(device_attach, bge_attach), DEVMETHOD(device_detach, bge_detach), DEVMETHOD(device_shutdown, bge_shutdown), DEVMETHOD(device_suspend, bge_suspend), DEVMETHOD(device_resume, bge_resume), /* MII interface */ DEVMETHOD(miibus_readreg, bge_miibus_readreg), DEVMETHOD(miibus_writereg, bge_miibus_writereg), DEVMETHOD(miibus_statchg, bge_miibus_statchg), DEVMETHOD(miibus_readvar, bge_miibus_readvar), DEVMETHOD_END }; static driver_t bge_driver = { "bge", bge_methods, sizeof(struct bge_softc) }; static struct ifdriver bge_ifdrv = { .ifdrv_ops = { - .ifop_origin = IFOP_ORIGIN_DRIVER, .ifop_ioctl = bge_ioctl, .ifop_transmit = bge_transmit, .ifop_get_counter = bge_get_counter, #ifdef DEVICE_POLLING .ifop_poll = bge_poll, #endif }, .ifdrv_name = "bge", .ifdrv_type = IFT_ETHER, .ifdrv_hdrlen = sizeof(struct ether_vlan_header), .ifdrv_maxqlen = BGE_TX_RING_CNT - 1, }; static devclass_t bge_devclass; DRIVER_MODULE(bge, pci, bge_driver, bge_devclass, 0, 0); DRIVER_MODULE(miibus, bge, miibus_driver, miibus_devclass, 0, 0); static int bge_allow_asf = 1; static SYSCTL_NODE(_hw, OID_AUTO, bge, CTLFLAG_RD, 0, "BGE driver parameters"); SYSCTL_INT(_hw_bge, OID_AUTO, allow_asf, CTLFLAG_RDTUN, &bge_allow_asf, 0, "Allow ASF mode if available"); #define SPARC64_BLADE_1500_MODEL "SUNW,Sun-Blade-1500" #define SPARC64_BLADE_1500_PATH_BGE "/pci@1f,700000/network@2" #define SPARC64_BLADE_2500_MODEL "SUNW,Sun-Blade-2500" #define SPARC64_BLADE_2500_PATH_BGE "/pci@1c,600000/network@3" #define SPARC64_OFW_SUBVENDOR "subsystem-vendor-id" static int bge_has_eaddr(struct bge_softc *sc) { #ifdef __sparc64__ char buf[sizeof(SPARC64_BLADE_1500_PATH_BGE)]; device_t dev; uint32_t subvendor; dev = sc->bge_dev; /* * The on-board BGEs found in sun4u machines aren't fitted with * an EEPROM which means that we have to obtain the MAC address * via OFW and that some tests will always fail. We distinguish * such BGEs by the subvendor ID, which also has to be obtained * from OFW instead of the PCI configuration space as the latter * indicates Broadcom as the subvendor of the netboot interface. * For early Blade 1500 and 2500 we even have to check the OFW * device path as the subvendor ID always defaults to Broadcom * there. */ if (OF_getprop(ofw_bus_get_node(dev), SPARC64_OFW_SUBVENDOR, &subvendor, sizeof(subvendor)) == sizeof(subvendor) && (subvendor == FJTSU_VENDORID || subvendor == SUN_VENDORID)) return (0); memset(buf, 0, sizeof(buf)); if (OF_package_to_path(ofw_bus_get_node(dev), buf, sizeof(buf)) > 0) { if (strcmp(sparc64_model, SPARC64_BLADE_1500_MODEL) == 0 && strcmp(buf, SPARC64_BLADE_1500_PATH_BGE) == 0) return (0); if (strcmp(sparc64_model, SPARC64_BLADE_2500_MODEL) == 0 && strcmp(buf, SPARC64_BLADE_2500_PATH_BGE) == 0) return (0); } #endif return (1); } static uint32_t bge_readmem_ind(struct bge_softc *sc, int off) { device_t dev; uint32_t val; if (sc->bge_asicrev == BGE_ASICREV_BCM5906 && off >= BGE_STATS_BLOCK && off < BGE_SEND_RING_1_TO_4) return (0); dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4); val = pci_read_config(dev, BGE_PCI_MEMWIN_DATA, 4); pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4); return (val); } static void bge_writemem_ind(struct bge_softc *sc, int off, int val) { device_t dev; if (sc->bge_asicrev == BGE_ASICREV_BCM5906 && off >= BGE_STATS_BLOCK && off < BGE_SEND_RING_1_TO_4) return; dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4); pci_write_config(dev, BGE_PCI_MEMWIN_DATA, val, 4); pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4); } #ifdef notdef static uint32_t bge_readreg_ind(struct bge_softc *sc, int off) { device_t dev; dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_REG_BASEADDR, off, 4); return (pci_read_config(dev, BGE_PCI_REG_DATA, 4)); } #endif static void bge_writereg_ind(struct bge_softc *sc, int off, int val) { device_t dev; dev = sc->bge_dev; pci_write_config(dev, BGE_PCI_REG_BASEADDR, off, 4); pci_write_config(dev, BGE_PCI_REG_DATA, val, 4); } static void bge_writemem_direct(struct bge_softc *sc, int off, int val) { CSR_WRITE_4(sc, off, val); } static void bge_writembx(struct bge_softc *sc, int off, int val) { if (sc->bge_asicrev == BGE_ASICREV_BCM5906) off += BGE_LPMBX_IRQ0_HI - BGE_MBX_IRQ0_HI; CSR_WRITE_4(sc, off, val); if ((sc->bge_flags & BGE_FLAG_MBOX_REORDER) != 0) CSR_READ_4(sc, off); } /* * Clear all stale locks and select the lock for this driver instance. */ static void bge_ape_lock_init(struct bge_softc *sc) { uint32_t bit, regbase; int i; if (sc->bge_asicrev == BGE_ASICREV_BCM5761) regbase = BGE_APE_LOCK_GRANT; else regbase = BGE_APE_PER_LOCK_GRANT; /* Clear any stale locks. */ for (i = BGE_APE_LOCK_PHY0; i <= BGE_APE_LOCK_GPIO; i++) { switch (i) { case BGE_APE_LOCK_PHY0: case BGE_APE_LOCK_PHY1: case BGE_APE_LOCK_PHY2: case BGE_APE_LOCK_PHY3: bit = BGE_APE_LOCK_GRANT_DRIVER0; break; default: if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_GRANT_DRIVER0; else bit = (1 << sc->bge_func_addr); } APE_WRITE_4(sc, regbase + 4 * i, bit); } /* Select the PHY lock based on the device's function number. */ switch (sc->bge_func_addr) { case 0: sc->bge_phy_ape_lock = BGE_APE_LOCK_PHY0; break; case 1: sc->bge_phy_ape_lock = BGE_APE_LOCK_PHY1; break; case 2: sc->bge_phy_ape_lock = BGE_APE_LOCK_PHY2; break; case 3: sc->bge_phy_ape_lock = BGE_APE_LOCK_PHY3; break; default: device_printf(sc->bge_dev, "PHY lock not supported on this function\n"); } } /* * Check for APE firmware, set flags, and print version info. */ static void bge_ape_read_fw_ver(struct bge_softc *sc) { const char *fwtype; uint32_t apedata, features; /* Check for a valid APE signature in shared memory. */ apedata = APE_READ_4(sc, BGE_APE_SEG_SIG); if (apedata != BGE_APE_SEG_SIG_MAGIC) { sc->bge_mfw_flags &= ~ BGE_MFW_ON_APE; return; } /* Check if APE firmware is running. */ apedata = APE_READ_4(sc, BGE_APE_FW_STATUS); if ((apedata & BGE_APE_FW_STATUS_READY) == 0) { device_printf(sc->bge_dev, "APE signature found " "but FW status not ready! 0x%08x\n", apedata); return; } sc->bge_mfw_flags |= BGE_MFW_ON_APE; /* Fetch the APE firwmare type and version. */ apedata = APE_READ_4(sc, BGE_APE_FW_VERSION); features = APE_READ_4(sc, BGE_APE_FW_FEATURES); if ((features & BGE_APE_FW_FEATURE_NCSI) != 0) { sc->bge_mfw_flags |= BGE_MFW_TYPE_NCSI; fwtype = "NCSI"; } else if ((features & BGE_APE_FW_FEATURE_DASH) != 0) { sc->bge_mfw_flags |= BGE_MFW_TYPE_DASH; fwtype = "DASH"; } else fwtype = "UNKN"; /* Print the APE firmware version. */ device_printf(sc->bge_dev, "APE FW version: %s v%d.%d.%d.%d\n", fwtype, (apedata & BGE_APE_FW_VERSION_MAJMSK) >> BGE_APE_FW_VERSION_MAJSFT, (apedata & BGE_APE_FW_VERSION_MINMSK) >> BGE_APE_FW_VERSION_MINSFT, (apedata & BGE_APE_FW_VERSION_REVMSK) >> BGE_APE_FW_VERSION_REVSFT, (apedata & BGE_APE_FW_VERSION_BLDMSK)); } static int bge_ape_lock(struct bge_softc *sc, int locknum) { uint32_t bit, gnt, req, status; int i, off; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) == 0) return (0); /* Lock request/grant registers have different bases. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5761) { req = BGE_APE_LOCK_REQ; gnt = BGE_APE_LOCK_GRANT; } else { req = BGE_APE_PER_LOCK_REQ; gnt = BGE_APE_PER_LOCK_GRANT; } off = 4 * locknum; switch (locknum) { case BGE_APE_LOCK_GPIO: /* Lock required when using GPIO. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5761) return (0); if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_REQ_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_GRC: /* Lock required to reset the device. */ if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_REQ_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_MEM: /* Lock required when accessing certain APE memory. */ if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_REQ_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_PHY0: case BGE_APE_LOCK_PHY1: case BGE_APE_LOCK_PHY2: case BGE_APE_LOCK_PHY3: /* Lock required when accessing PHYs. */ bit = BGE_APE_LOCK_REQ_DRIVER0; break; default: return (EINVAL); } /* Request a lock. */ APE_WRITE_4(sc, req + off, bit); /* Wait up to 1 second to acquire lock. */ for (i = 0; i < 20000; i++) { status = APE_READ_4(sc, gnt + off); if (status == bit) break; DELAY(50); } /* Handle any errors. */ if (status != bit) { device_printf(sc->bge_dev, "APE lock %d request failed! " "request = 0x%04x[0x%04x], status = 0x%04x[0x%04x]\n", locknum, req + off, bit & 0xFFFF, gnt + off, status & 0xFFFF); /* Revoke the lock request. */ APE_WRITE_4(sc, gnt + off, bit); return (EBUSY); } return (0); } static void bge_ape_unlock(struct bge_softc *sc, int locknum) { uint32_t bit, gnt; int off; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) == 0) return; if (sc->bge_asicrev == BGE_ASICREV_BCM5761) gnt = BGE_APE_LOCK_GRANT; else gnt = BGE_APE_PER_LOCK_GRANT; off = 4 * locknum; switch (locknum) { case BGE_APE_LOCK_GPIO: if (sc->bge_asicrev == BGE_ASICREV_BCM5761) return; if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_GRANT_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_GRC: if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_GRANT_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_MEM: if (sc->bge_func_addr == 0) bit = BGE_APE_LOCK_GRANT_DRIVER0; else bit = (1 << sc->bge_func_addr); break; case BGE_APE_LOCK_PHY0: case BGE_APE_LOCK_PHY1: case BGE_APE_LOCK_PHY2: case BGE_APE_LOCK_PHY3: bit = BGE_APE_LOCK_GRANT_DRIVER0; break; default: return; } APE_WRITE_4(sc, gnt + off, bit); } /* * Send an event to the APE firmware. */ static void bge_ape_send_event(struct bge_softc *sc, uint32_t event) { uint32_t apedata; int i; /* NCSI does not support APE events. */ if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) == 0) return; /* Wait up to 1ms for APE to service previous event. */ for (i = 10; i > 0; i--) { if (bge_ape_lock(sc, BGE_APE_LOCK_MEM) != 0) break; apedata = APE_READ_4(sc, BGE_APE_EVENT_STATUS); if ((apedata & BGE_APE_EVENT_STATUS_EVENT_PENDING) == 0) { APE_WRITE_4(sc, BGE_APE_EVENT_STATUS, event | BGE_APE_EVENT_STATUS_EVENT_PENDING); bge_ape_unlock(sc, BGE_APE_LOCK_MEM); APE_WRITE_4(sc, BGE_APE_EVENT, BGE_APE_EVENT_1); break; } bge_ape_unlock(sc, BGE_APE_LOCK_MEM); DELAY(100); } if (i == 0) device_printf(sc->bge_dev, "APE event 0x%08x send timed out\n", event); } static void bge_ape_driver_state_change(struct bge_softc *sc, int kind) { uint32_t apedata, event; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) == 0) return; switch (kind) { case BGE_RESET_START: /* If this is the first load, clear the load counter. */ apedata = APE_READ_4(sc, BGE_APE_HOST_SEG_SIG); if (apedata != BGE_APE_HOST_SEG_SIG_MAGIC) APE_WRITE_4(sc, BGE_APE_HOST_INIT_COUNT, 0); else { apedata = APE_READ_4(sc, BGE_APE_HOST_INIT_COUNT); APE_WRITE_4(sc, BGE_APE_HOST_INIT_COUNT, ++apedata); } APE_WRITE_4(sc, BGE_APE_HOST_SEG_SIG, BGE_APE_HOST_SEG_SIG_MAGIC); APE_WRITE_4(sc, BGE_APE_HOST_SEG_LEN, BGE_APE_HOST_SEG_LEN_MAGIC); /* Add some version info if bge(4) supports it. */ APE_WRITE_4(sc, BGE_APE_HOST_DRIVER_ID, BGE_APE_HOST_DRIVER_ID_MAGIC(1, 0)); APE_WRITE_4(sc, BGE_APE_HOST_BEHAVIOR, BGE_APE_HOST_BEHAV_NO_PHYLOCK); APE_WRITE_4(sc, BGE_APE_HOST_HEARTBEAT_INT_MS, BGE_APE_HOST_HEARTBEAT_INT_DISABLE); APE_WRITE_4(sc, BGE_APE_HOST_DRVR_STATE, BGE_APE_HOST_DRVR_STATE_START); event = BGE_APE_EVENT_STATUS_STATE_START; break; case BGE_RESET_SHUTDOWN: APE_WRITE_4(sc, BGE_APE_HOST_DRVR_STATE, BGE_APE_HOST_DRVR_STATE_UNLOAD); event = BGE_APE_EVENT_STATUS_STATE_UNLOAD; break; case BGE_RESET_SUSPEND: event = BGE_APE_EVENT_STATUS_STATE_SUSPEND; break; default: return; } bge_ape_send_event(sc, event | BGE_APE_EVENT_STATUS_DRIVER_EVNT | BGE_APE_EVENT_STATUS_STATE_CHNGE); } /* * Map a single buffer address. */ static void bge_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct bge_dmamap_arg *ctx; if (error) return; KASSERT(nseg == 1, ("%s: %d segments returned!", __func__, nseg)); ctx = arg; ctx->bge_busaddr = segs->ds_addr; } static uint8_t bge_nvram_getbyte(struct bge_softc *sc, int addr, uint8_t *dest) { uint32_t access, byte = 0; int i; /* Lock. */ CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_SET1); for (i = 0; i < 8000; i++) { if (CSR_READ_4(sc, BGE_NVRAM_SWARB) & BGE_NVRAMSWARB_GNT1) break; DELAY(20); } if (i == 8000) return (1); /* Enable access. */ access = CSR_READ_4(sc, BGE_NVRAM_ACCESS); CSR_WRITE_4(sc, BGE_NVRAM_ACCESS, access | BGE_NVRAMACC_ENABLE); CSR_WRITE_4(sc, BGE_NVRAM_ADDR, addr & 0xfffffffc); CSR_WRITE_4(sc, BGE_NVRAM_CMD, BGE_NVRAM_READCMD); for (i = 0; i < BGE_TIMEOUT * 10; i++) { DELAY(10); if (CSR_READ_4(sc, BGE_NVRAM_CMD) & BGE_NVRAMCMD_DONE) { DELAY(10); break; } } if (i == BGE_TIMEOUT * 10) { if_printf(sc->bge_ifp, "nvram read timed out\n"); return (1); } /* Get result. */ byte = CSR_READ_4(sc, BGE_NVRAM_RDDATA); *dest = (bswap32(byte) >> ((addr % 4) * 8)) & 0xFF; /* Disable access. */ CSR_WRITE_4(sc, BGE_NVRAM_ACCESS, access); /* Unlock. */ CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_CLR1); CSR_READ_4(sc, BGE_NVRAM_SWARB); return (0); } /* * Read a sequence of bytes from NVRAM. */ static int bge_read_nvram(struct bge_softc *sc, caddr_t dest, int off, int cnt) { int err = 0, i; uint8_t byte = 0; if (sc->bge_asicrev != BGE_ASICREV_BCM5906) return (1); for (i = 0; i < cnt; i++) { err = bge_nvram_getbyte(sc, off + i, &byte); if (err) break; *(dest + i) = byte; } return (err ? 1 : 0); } /* * Read a byte of data stored in the EEPROM at address 'addr.' The * BCM570x supports both the traditional bitbang interface and an * auto access interface for reading the EEPROM. We use the auto * access method. */ static uint8_t bge_eeprom_getbyte(struct bge_softc *sc, int addr, uint8_t *dest) { int i; uint32_t byte = 0; /* * Enable use of auto EEPROM access so we can avoid * having to use the bitbang method. */ BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_AUTO_EEPROM); /* Reset the EEPROM, load the clock period. */ CSR_WRITE_4(sc, BGE_EE_ADDR, BGE_EEADDR_RESET | BGE_EEHALFCLK(BGE_HALFCLK_384SCL)); DELAY(20); /* Issue the read EEPROM command. */ CSR_WRITE_4(sc, BGE_EE_ADDR, BGE_EE_READCMD | addr); /* Wait for completion */ for(i = 0; i < BGE_TIMEOUT * 10; i++) { DELAY(10); if (CSR_READ_4(sc, BGE_EE_ADDR) & BGE_EEADDR_DONE) break; } if (i == BGE_TIMEOUT * 10) { device_printf(sc->bge_dev, "EEPROM read timed out\n"); return (1); } /* Get result. */ byte = CSR_READ_4(sc, BGE_EE_DATA); *dest = (byte >> ((addr % 4) * 8)) & 0xFF; return (0); } /* * Read a sequence of bytes from the EEPROM. */ static int bge_read_eeprom(struct bge_softc *sc, caddr_t dest, int off, int cnt) { int i, error = 0; uint8_t byte = 0; for (i = 0; i < cnt; i++) { error = bge_eeprom_getbyte(sc, off + i, &byte); if (error) break; *(dest + i) = byte; } return (error ? 1 : 0); } static int bge_miibus_readreg(device_t dev, int phy, int reg) { struct bge_softc *sc; uint32_t val; int i; sc = device_get_softc(dev); if (bge_ape_lock(sc, sc->bge_phy_ape_lock) != 0) return (0); /* Clear the autopoll bit if set, otherwise may trigger PCI errors. */ if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode & ~BGE_MIMODE_AUTOPOLL); DELAY(80); } CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_READ | BGE_MICOMM_BUSY | BGE_MIPHY(phy) | BGE_MIREG(reg)); /* Poll for the PHY register access to complete. */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); val = CSR_READ_4(sc, BGE_MI_COMM); if ((val & BGE_MICOMM_BUSY) == 0) { DELAY(5); val = CSR_READ_4(sc, BGE_MI_COMM); break; } } if (i == BGE_TIMEOUT) { device_printf(sc->bge_dev, "PHY read timed out (phy %d, reg %d, val 0x%08x)\n", phy, reg, val); val = 0; } /* Restore the autopoll bit if necessary. */ if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode); DELAY(80); } bge_ape_unlock(sc, sc->bge_phy_ape_lock); if (val & BGE_MICOMM_READFAIL) return (0); return (val & 0xFFFF); } static int bge_miibus_writereg(device_t dev, int phy, int reg, int val) { struct bge_softc *sc; int i; sc = device_get_softc(dev); if (sc->bge_asicrev == BGE_ASICREV_BCM5906 && (reg == BRGPHY_MII_1000CTL || reg == BRGPHY_MII_AUXCTL)) return (0); if (bge_ape_lock(sc, sc->bge_phy_ape_lock) != 0) return (0); /* Clear the autopoll bit if set, otherwise may trigger PCI errors. */ if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode & ~BGE_MIMODE_AUTOPOLL); DELAY(80); } CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_WRITE | BGE_MICOMM_BUSY | BGE_MIPHY(phy) | BGE_MIREG(reg) | val); for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_4(sc, BGE_MI_COMM) & BGE_MICOMM_BUSY)) { DELAY(5); CSR_READ_4(sc, BGE_MI_COMM); /* dummy read */ break; } } /* Restore the autopoll bit if necessary. */ if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode); DELAY(80); } bge_ape_unlock(sc, sc->bge_phy_ape_lock); if (i == BGE_TIMEOUT) device_printf(sc->bge_dev, "PHY write timed out (phy %d, reg %d, val 0x%04x)\n", phy, reg, val); return (0); } static void bge_miibus_statchg(device_t dev) { struct bge_softc *sc; struct mii_data *mii; uint32_t mac_mode, rx_mode, tx_mode; sc = device_get_softc(dev); if ((sc->bge_flags & BGE_FLAG_RUNNING) == 0) return; mii = device_get_softc(sc->bge_miibus); if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: sc->bge_link = 1; break; case IFM_1000_T: case IFM_1000_SX: case IFM_2500_SX: if (sc->bge_asicrev != BGE_ASICREV_BCM5906) sc->bge_link = 1; else sc->bge_link = 0; break; default: sc->bge_link = 0; break; } } else sc->bge_link = 0; if (sc->bge_ifp != NULL) { if_setbaudrate(sc->bge_ifp, ifmedia_baudrate(mii->mii_media_active)); if_link_state_change(sc->bge_ifp, ifmedia_link_state(mii->mii_media_status)); } if (sc->bge_link == 0) return; /* * APE firmware touches these registers to keep the MAC * connected to the outside world. Try to keep the * accesses atomic. */ /* Set the port mode (MII/GMII) to match the link speed. */ mac_mode = CSR_READ_4(sc, BGE_MAC_MODE) & ~(BGE_MACMODE_PORTMODE | BGE_MACMODE_HALF_DUPLEX); tx_mode = CSR_READ_4(sc, BGE_TX_MODE); rx_mode = CSR_READ_4(sc, BGE_RX_MODE); if (IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T || IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_SX) mac_mode |= BGE_PORTMODE_GMII; else mac_mode |= BGE_PORTMODE_MII; /* Set MAC flow control behavior to match link flow control settings. */ tx_mode &= ~BGE_TXMODE_FLOWCTL_ENABLE; rx_mode &= ~BGE_RXMODE_FLOWCTL_ENABLE; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) { if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_TXPAUSE) != 0) tx_mode |= BGE_TXMODE_FLOWCTL_ENABLE; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) rx_mode |= BGE_RXMODE_FLOWCTL_ENABLE; } else mac_mode |= BGE_MACMODE_HALF_DUPLEX; CSR_WRITE_4(sc, BGE_MAC_MODE, mac_mode); DELAY(40); CSR_WRITE_4(sc, BGE_TX_MODE, tx_mode); CSR_WRITE_4(sc, BGE_RX_MODE, rx_mode); } static uint64_t bge_miibus_readvar(device_t dev, int var) { struct bge_softc *sc; sc = device_get_softc(dev); switch (var) { case MIIVAR_MTU: return (sc->bge_mtu); default: return (0); } } /* * Intialize a standard receive ring descriptor. */ static int bge_newbuf_std(struct bge_softc *sc, int i) { struct mbuf *m; struct bge_rx_bd *r; bus_dma_segment_t segs[1]; bus_dmamap_t map; int error, nsegs; if (sc->bge_flags & BGE_FLAG_JUMBO_STD && (sc->bge_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN))) { m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MJUM9BYTES; } else { m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MCLBYTES; } if ((sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) == 0) m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_sparemap, m, segs, &nsegs, 0); if (error != 0) { m_freem(m); return (error); } if (sc->bge_cdata.bge_rx_std_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i]); } map = sc->bge_cdata.bge_rx_std_dmamap[i]; sc->bge_cdata.bge_rx_std_dmamap[i] = sc->bge_cdata.bge_rx_std_sparemap; sc->bge_cdata.bge_rx_std_sparemap = map; sc->bge_cdata.bge_rx_std_chain[i] = m; sc->bge_cdata.bge_rx_std_seglen[i] = segs[0].ds_len; r = &sc->bge_ldata.bge_rx_std_ring[sc->bge_std]; r->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[0].ds_addr); r->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[0].ds_addr); r->bge_flags = BGE_RXBDFLAG_END; r->bge_len = segs[0].ds_len; r->bge_idx = i; bus_dmamap_sync(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i], BUS_DMASYNC_PREREAD); return (0); } /* * Initialize a jumbo receive ring descriptor. This allocates * a jumbo buffer from the pool managed internally by the driver. */ static int bge_newbuf_jumbo(struct bge_softc *sc, int i) { bus_dma_segment_t segs[BGE_NSEG_JUMBO]; bus_dmamap_t map; struct bge_extrx_bd *r; struct mbuf *m; int error, nsegs; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); if (m_cljget(m, M_NOWAIT, MJUM9BYTES) == NULL) { m_freem(m); return (ENOBUFS); } m->m_len = m->m_pkthdr.len = MJUM9BYTES; if ((sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) == 0) m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_sparemap, m, segs, &nsegs, 0); if (error != 0) { m_freem(m); return (error); } if (sc->bge_cdata.bge_rx_jumbo_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i]); } map = sc->bge_cdata.bge_rx_jumbo_dmamap[i]; sc->bge_cdata.bge_rx_jumbo_dmamap[i] = sc->bge_cdata.bge_rx_jumbo_sparemap; sc->bge_cdata.bge_rx_jumbo_sparemap = map; sc->bge_cdata.bge_rx_jumbo_chain[i] = m; sc->bge_cdata.bge_rx_jumbo_seglen[i][0] = 0; sc->bge_cdata.bge_rx_jumbo_seglen[i][1] = 0; sc->bge_cdata.bge_rx_jumbo_seglen[i][2] = 0; sc->bge_cdata.bge_rx_jumbo_seglen[i][3] = 0; /* * Fill in the extended RX buffer descriptor. */ r = &sc->bge_ldata.bge_rx_jumbo_ring[sc->bge_jumbo]; r->bge_flags = BGE_RXBDFLAG_JUMBO_RING | BGE_RXBDFLAG_END; r->bge_idx = i; r->bge_len3 = r->bge_len2 = r->bge_len1 = 0; switch (nsegs) { case 4: r->bge_addr3.bge_addr_lo = BGE_ADDR_LO(segs[3].ds_addr); r->bge_addr3.bge_addr_hi = BGE_ADDR_HI(segs[3].ds_addr); r->bge_len3 = segs[3].ds_len; sc->bge_cdata.bge_rx_jumbo_seglen[i][3] = segs[3].ds_len; case 3: r->bge_addr2.bge_addr_lo = BGE_ADDR_LO(segs[2].ds_addr); r->bge_addr2.bge_addr_hi = BGE_ADDR_HI(segs[2].ds_addr); r->bge_len2 = segs[2].ds_len; sc->bge_cdata.bge_rx_jumbo_seglen[i][2] = segs[2].ds_len; case 2: r->bge_addr1.bge_addr_lo = BGE_ADDR_LO(segs[1].ds_addr); r->bge_addr1.bge_addr_hi = BGE_ADDR_HI(segs[1].ds_addr); r->bge_len1 = segs[1].ds_len; sc->bge_cdata.bge_rx_jumbo_seglen[i][1] = segs[1].ds_len; case 1: r->bge_addr0.bge_addr_lo = BGE_ADDR_LO(segs[0].ds_addr); r->bge_addr0.bge_addr_hi = BGE_ADDR_HI(segs[0].ds_addr); r->bge_len0 = segs[0].ds_len; sc->bge_cdata.bge_rx_jumbo_seglen[i][0] = segs[0].ds_len; break; default: panic("%s: %d segments\n", __func__, nsegs); } bus_dmamap_sync(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i], BUS_DMASYNC_PREREAD); return (0); } static int bge_init_rx_ring_std(struct bge_softc *sc) { int error, i; bzero(sc->bge_ldata.bge_rx_std_ring, BGE_STD_RX_RING_SZ); sc->bge_std = 0; for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { if ((error = bge_newbuf_std(sc, i)) != 0) return (error); BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT); } bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_PREWRITE); sc->bge_std = 0; bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, BGE_STD_RX_RING_CNT - 1); return (0); } static void bge_free_rx_ring_std(struct bge_softc *sc) { int i; for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { if (sc->bge_cdata.bge_rx_std_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i]); m_freem(sc->bge_cdata.bge_rx_std_chain[i]); sc->bge_cdata.bge_rx_std_chain[i] = NULL; } bzero((char *)&sc->bge_ldata.bge_rx_std_ring[i], sizeof(struct bge_rx_bd)); } } static int bge_init_rx_ring_jumbo(struct bge_softc *sc) { struct bge_rcb *rcb; int error, i; bzero(sc->bge_ldata.bge_rx_jumbo_ring, BGE_JUMBO_RX_RING_SZ); sc->bge_jumbo = 0; for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) { if ((error = bge_newbuf_jumbo(sc, i)) != 0) return (error); BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); } bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); sc->bge_jumbo = 0; /* Enable the jumbo receive producer ring. */ rcb = &sc->bge_ldata.bge_info.bge_jumbo_rx_rcb; rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_USE_EXT_RX_BD); CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, BGE_JUMBO_RX_RING_CNT - 1); return (0); } static void bge_free_rx_ring_jumbo(struct bge_softc *sc) { int i; for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) { if (sc->bge_cdata.bge_rx_jumbo_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i]); m_freem(sc->bge_cdata.bge_rx_jumbo_chain[i]); sc->bge_cdata.bge_rx_jumbo_chain[i] = NULL; } bzero((char *)&sc->bge_ldata.bge_rx_jumbo_ring[i], sizeof(struct bge_extrx_bd)); } } static void bge_free_tx_ring(struct bge_softc *sc) { int i; if (sc->bge_ldata.bge_tx_ring == NULL) return; for (i = 0; i < BGE_TX_RING_CNT; i++) { if (sc->bge_cdata.bge_tx_chain[i] != NULL) { bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[i]); m_freem(sc->bge_cdata.bge_tx_chain[i]); sc->bge_cdata.bge_tx_chain[i] = NULL; } bzero((char *)&sc->bge_ldata.bge_tx_ring[i], sizeof(struct bge_tx_bd)); } } static int bge_init_tx_ring(struct bge_softc *sc) { sc->bge_txcnt = 0; sc->bge_tx_saved_considx = 0; bzero(sc->bge_ldata.bge_tx_ring, BGE_TX_RING_SZ); bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_PREWRITE); /* Initialize transmit producer index for host-memory send ring. */ sc->bge_tx_prodidx = 0; bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, sc->bge_tx_prodidx); /* 5700 b2 errata */ if (sc->bge_chiprev == BGE_CHIPREV_5700_BX) bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, sc->bge_tx_prodidx); /* NIC-memory send ring not used; initialize to zero. */ bge_writembx(sc, BGE_MBX_TX_NIC_PROD0_LO, 0); /* 5700 b2 errata */ if (sc->bge_chiprev == BGE_CHIPREV_5700_BX) bge_writembx(sc, BGE_MBX_TX_NIC_PROD0_LO, 0); return (0); } static void bge_setpromisc(struct bge_softc *sc) { BGE_LOCK_ASSERT(sc); /* Enable or disable promiscuous mode as needed. */ if (sc->bge_if_flags & IFF_PROMISC) BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC); else BGE_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC); } static void bge_hash_maddr(void *arg, struct sockaddr *maddr) { struct sockaddr_dl *sdl = (struct sockaddr_dl *)maddr; uint32_t *hashes = arg; int h; if (sdl->sdl_family != AF_LINK) return; h = ether_crc32_le(LLADDR(sdl), ETHER_ADDR_LEN) & 0x7F; hashes[(h & 0x60) >> 5] |= 1 << (h & 0x1F); } static void bge_setmulti(struct bge_softc *sc) { uint32_t hashes[4] = { 0, 0, 0, 0 }; int i; BGE_LOCK_ASSERT(sc); if (sc->bge_if_flags & (IFF_ALLMULTI | IFF_PROMISC)) { for (i = 0; i < 4; i++) CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0xFFFFFFFF); return; } /* First, zot all the existing filters. */ for (i = 0; i < 4; i++) CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0); if_foreach_maddr(sc->bge_ifp, bge_hash_maddr, hashes); for (i = 0; i < 4; i++) CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), hashes[i]); } static void bge_setvlan(struct bge_softc *sc) { if_t ifp; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; /* Enable or disable VLAN tag stripping as needed. */ if (sc->bge_capenable & IFCAP_VLAN_HWTAGGING) BGE_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_KEEP_VLAN_DIAG); else BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_KEEP_VLAN_DIAG); } static void bge_sig_pre_reset(struct bge_softc *sc, int type) { /* * Some chips don't like this so only do this if ASF is enabled */ if (sc->bge_asf_mode) bge_writemem_ind(sc, BGE_SRAM_FW_MB, BGE_SRAM_FW_MB_MAGIC); if (sc->bge_asf_mode & ASF_NEW_HANDSHAKE) { switch (type) { case BGE_RESET_START: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_START); break; case BGE_RESET_SHUTDOWN: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_UNLOAD); break; case BGE_RESET_SUSPEND: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_SUSPEND); break; } } if (type == BGE_RESET_START || type == BGE_RESET_SUSPEND) bge_ape_driver_state_change(sc, type); } static void bge_sig_post_reset(struct bge_softc *sc, int type) { if (sc->bge_asf_mode & ASF_NEW_HANDSHAKE) { switch (type) { case BGE_RESET_START: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_START_DONE); /* START DONE */ break; case BGE_RESET_SHUTDOWN: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_UNLOAD_DONE); break; } } if (type == BGE_RESET_SHUTDOWN) bge_ape_driver_state_change(sc, type); } static void bge_sig_legacy(struct bge_softc *sc, int type) { if (sc->bge_asf_mode) { switch (type) { case BGE_RESET_START: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_START); break; case BGE_RESET_SHUTDOWN: bge_writemem_ind(sc, BGE_SRAM_FW_DRV_STATE_MB, BGE_FW_DRV_STATE_UNLOAD); break; } } } static void bge_stop_fw(struct bge_softc *sc) { int i; if (sc->bge_asf_mode) { bge_writemem_ind(sc, BGE_SRAM_FW_CMD_MB, BGE_FW_CMD_PAUSE); CSR_WRITE_4(sc, BGE_RX_CPU_EVENT, CSR_READ_4(sc, BGE_RX_CPU_EVENT) | BGE_RX_CPU_DRV_EVENT); for (i = 0; i < 100; i++ ) { if (!(CSR_READ_4(sc, BGE_RX_CPU_EVENT) & BGE_RX_CPU_DRV_EVENT)) break; DELAY(10); } } } static uint32_t bge_dma_swap_options(struct bge_softc *sc) { uint32_t dma_options; dma_options = BGE_MODECTL_WORDSWAP_NONFRAME | BGE_MODECTL_BYTESWAP_DATA | BGE_MODECTL_WORDSWAP_DATA; #if BYTE_ORDER == BIG_ENDIAN dma_options |= BGE_MODECTL_BYTESWAP_NONFRAME; #endif return (dma_options); } /* * Do endian, PCI and DMA initialization. */ static int bge_chipinit(struct bge_softc *sc) { uint32_t dma_rw_ctl, misc_ctl, mode_ctl; uint16_t val; int i; /* Set endianness before we access any non-PCI registers. */ misc_ctl = BGE_INIT; if (sc->bge_flags & BGE_FLAG_TAGGED_STATUS) misc_ctl |= BGE_PCIMISCCTL_TAGGED_STATUS; pci_write_config(sc->bge_dev, BGE_PCI_MISC_CTL, misc_ctl, 4); /* * Clear the MAC statistics block in the NIC's * internal memory. */ for (i = BGE_STATS_BLOCK; i < BGE_STATS_BLOCK_END + 1; i += sizeof(uint32_t)) BGE_MEMWIN_WRITE(sc, i, 0); for (i = BGE_STATUS_BLOCK; i < BGE_STATUS_BLOCK_END + 1; i += sizeof(uint32_t)) BGE_MEMWIN_WRITE(sc, i, 0); if (sc->bge_chiprev == BGE_CHIPREV_5704_BX) { /* * Fix data corruption caused by non-qword write with WB. * Fix master abort in PCI mode. * Fix PCI latency timer. */ val = pci_read_config(sc->bge_dev, BGE_PCI_MSI_DATA + 2, 2); val |= (1 << 10) | (1 << 12) | (1 << 13); pci_write_config(sc->bge_dev, BGE_PCI_MSI_DATA + 2, val, 2); } if (sc->bge_asicrev == BGE_ASICREV_BCM57765 || sc->bge_asicrev == BGE_ASICREV_BCM57766) { /* * For the 57766 and non Ax versions of 57765, bootcode * needs to setup the PCIE Fast Training Sequence (FTS) * value to prevent transmit hangs. */ if (sc->bge_chiprev != BGE_CHIPREV_57765_AX) { CSR_WRITE_4(sc, BGE_CPMU_PADRNG_CTL, CSR_READ_4(sc, BGE_CPMU_PADRNG_CTL) | BGE_CPMU_PADRNG_CTL_RDIV2); } } /* * Set up the PCI DMA control register. */ dma_rw_ctl = BGE_PCIDMARWCTL_RD_CMD_SHIFT(6) | BGE_PCIDMARWCTL_WR_CMD_SHIFT(7); if (sc->bge_flags & BGE_FLAG_PCIE) { if (sc->bge_mps >= 256) dma_rw_ctl |= BGE_PCIDMARWCTL_WR_WAT_SHIFT(7); else dma_rw_ctl |= BGE_PCIDMARWCTL_WR_WAT_SHIFT(3); } else if (sc->bge_flags & BGE_FLAG_PCIX) { if (BGE_IS_5714_FAMILY(sc)) { /* 256 bytes for read and write. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(2) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(2); dma_rw_ctl |= (sc->bge_asicrev == BGE_ASICREV_BCM5780) ? BGE_PCIDMARWCTL_ONEDMA_ATONCE_GLOBAL : BGE_PCIDMARWCTL_ONEDMA_ATONCE_LOCAL; } else if (sc->bge_asicrev == BGE_ASICREV_BCM5703) { /* * In the BCM5703, the DMA read watermark should * be set to less than or equal to the maximum * memory read byte count of the PCI-X command * register. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(4) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(3); } else if (sc->bge_asicrev == BGE_ASICREV_BCM5704) { /* 1536 bytes for read, 384 bytes for write. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(7) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(3); } else { /* 384 bytes for read and write. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(3) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(3) | 0x0F; } if (sc->bge_asicrev == BGE_ASICREV_BCM5703 || sc->bge_asicrev == BGE_ASICREV_BCM5704) { uint32_t tmp; /* Set ONE_DMA_AT_ONCE for hardware workaround. */ tmp = CSR_READ_4(sc, BGE_PCI_CLKCTL) & 0x1F; if (tmp == 6 || tmp == 7) dma_rw_ctl |= BGE_PCIDMARWCTL_ONEDMA_ATONCE_GLOBAL; /* Set PCI-X DMA write workaround. */ dma_rw_ctl |= BGE_PCIDMARWCTL_ASRT_ALL_BE; } } else { /* Conventional PCI bus: 256 bytes for read and write. */ dma_rw_ctl |= BGE_PCIDMARWCTL_RD_WAT_SHIFT(7) | BGE_PCIDMARWCTL_WR_WAT_SHIFT(7); if (sc->bge_asicrev != BGE_ASICREV_BCM5705 && sc->bge_asicrev != BGE_ASICREV_BCM5750) dma_rw_ctl |= 0x0F; } if (sc->bge_asicrev == BGE_ASICREV_BCM5700 || sc->bge_asicrev == BGE_ASICREV_BCM5701) dma_rw_ctl |= BGE_PCIDMARWCTL_USE_MRM | BGE_PCIDMARWCTL_ASRT_ALL_BE; if (sc->bge_asicrev == BGE_ASICREV_BCM5703 || sc->bge_asicrev == BGE_ASICREV_BCM5704) dma_rw_ctl &= ~BGE_PCIDMARWCTL_MINDMA; if (BGE_IS_5717_PLUS(sc)) { dma_rw_ctl &= ~BGE_PCIDMARWCTL_DIS_CACHE_ALIGNMENT; if (sc->bge_chipid == BGE_CHIPID_BCM57765_A0) dma_rw_ctl &= ~BGE_PCIDMARWCTL_CRDRDR_RDMA_MRRS_MSK; /* * Enable HW workaround for controllers that misinterpret * a status tag update and leave interrupts permanently * disabled. */ if (!BGE_IS_57765_PLUS(sc) && sc->bge_asicrev != BGE_ASICREV_BCM5717 && sc->bge_asicrev != BGE_ASICREV_BCM5762) dma_rw_ctl |= BGE_PCIDMARWCTL_TAGGED_STATUS_WA; } pci_write_config(sc->bge_dev, BGE_PCI_DMA_RW_CTL, dma_rw_ctl, 4); /* * Set up general mode register. */ mode_ctl = bge_dma_swap_options(sc); if (sc->bge_asicrev == BGE_ASICREV_BCM5720 || sc->bge_asicrev == BGE_ASICREV_BCM5762) { /* Retain Host-2-BMC settings written by APE firmware. */ mode_ctl |= CSR_READ_4(sc, BGE_MODE_CTL) & (BGE_MODECTL_BYTESWAP_B2HRX_DATA | BGE_MODECTL_WORDSWAP_B2HRX_DATA | BGE_MODECTL_B2HRX_ENABLE | BGE_MODECTL_HTX2B_ENABLE); } mode_ctl |= BGE_MODECTL_MAC_ATTN_INTR | BGE_MODECTL_HOST_SEND_BDS | BGE_MODECTL_TX_NO_PHDR_CSUM; /* * BCM5701 B5 have a bug causing data corruption when using * 64-bit DMA reads, which can be terminated early and then * completed later as 32-bit accesses, in combination with * certain bridges. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5701 && sc->bge_chipid == BGE_CHIPID_BCM5701_B5) mode_ctl |= BGE_MODECTL_FORCE_PCI32; /* * Tell the firmware the driver is running */ if (sc->bge_asf_mode & ASF_STACKUP) mode_ctl |= BGE_MODECTL_STACKUP; CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl); /* * Disable memory write invalidate. Apparently it is not supported * properly by these devices. */ PCI_CLRBIT(sc->bge_dev, BGE_PCI_CMD, PCIM_CMD_MWIEN, 4); /* Set the timer prescaler (always 66 MHz). */ CSR_WRITE_4(sc, BGE_MISC_CFG, BGE_32BITTIME_66MHZ); /* XXX: The Linux tg3 driver does this at the start of brgphy_reset. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { DELAY(40); /* XXX */ /* Put PHY into ready state */ BGE_CLRBIT(sc, BGE_MISC_CFG, BGE_MISCCFG_EPHY_IDDQ); CSR_READ_4(sc, BGE_MISC_CFG); /* Flush */ DELAY(40); } return (0); } static int bge_blockinit(struct bge_softc *sc) { struct bge_rcb *rcb; bus_size_t vrcb; bge_hostaddr taddr; uint32_t dmactl, rdmareg, val; int i, limit; /* * Initialize the memory window pointer register so that * we can access the first 32K of internal NIC RAM. This will * allow us to set up the TX send ring RCBs and the RX return * ring RCBs, plus other things which live in NIC memory. */ CSR_WRITE_4(sc, BGE_PCI_MEMWIN_BASEADDR, 0); /* Note: the BCM5704 has a smaller mbuf space than other chips. */ if (!(BGE_IS_5705_PLUS(sc))) { /* Configure mbuf memory pool */ CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_BASEADDR, BGE_BUFFPOOL_1); if (sc->bge_asicrev == BGE_ASICREV_BCM5704) CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_LEN, 0x10000); else CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_LEN, 0x18000); /* Configure DMA resource pool */ CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_BASEADDR, BGE_DMA_DESCRIPTORS); CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_LEN, 0x2000); } /* Configure mbuf pool watermarks */ if (BGE_IS_5717_PLUS(sc)) { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0); if (sc->bge_mtu > ETHERMTU) { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x7e); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xea); } else { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x2a); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xa0); } } else if (!BGE_IS_5705_PLUS(sc)) { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x50); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x20); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x60); } else if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x04); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x10); } else { CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x10); CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x60); } /* Configure DMA resource watermarks */ CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_LOWAT, 5); CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_HIWAT, 10); /* Enable buffer manager */ val = BGE_BMANMODE_ENABLE | BGE_BMANMODE_LOMBUF_ATTN; /* * Change the arbitration algorithm of TXMBUF read request to * round-robin instead of priority based for BCM5719. When * TXFIFO is almost empty, RDMA will hold its request until * TXFIFO is not almost empty. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5719) val |= BGE_BMANMODE_NO_TX_UNDERRUN; CSR_WRITE_4(sc, BGE_BMAN_MODE, val); /* Poll for buffer manager start indication */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); if (CSR_READ_4(sc, BGE_BMAN_MODE) & BGE_BMANMODE_ENABLE) break; } if (i == BGE_TIMEOUT) { device_printf(sc->bge_dev, "buffer manager failed to start\n"); return (ENXIO); } /* Enable flow-through queues */ CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF); CSR_WRITE_4(sc, BGE_FTQ_RESET, 0); /* Wait until queue initialization is complete */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); if (CSR_READ_4(sc, BGE_FTQ_RESET) == 0) break; } if (i == BGE_TIMEOUT) { device_printf(sc->bge_dev, "flow-through queue init failed\n"); return (ENXIO); } /* * Summary of rings supported by the controller: * * Standard Receive Producer Ring * - This ring is used to feed receive buffers for "standard" * sized frames (typically 1536 bytes) to the controller. * * Jumbo Receive Producer Ring * - This ring is used to feed receive buffers for jumbo sized * frames (i.e. anything bigger than the "standard" frames) * to the controller. * * Mini Receive Producer Ring * - This ring is used to feed receive buffers for "mini" * sized frames to the controller. * - This feature required external memory for the controller * but was never used in a production system. Should always * be disabled. * * Receive Return Ring * - After the controller has placed an incoming frame into a * receive buffer that buffer is moved into a receive return * ring. The driver is then responsible to passing the * buffer up to the stack. Many versions of the controller * support multiple RR rings. * * Send Ring * - This ring is used for outgoing frames. Many versions of * the controller support multiple send rings. */ /* Initialize the standard receive producer ring control block. */ rcb = &sc->bge_ldata.bge_info.bge_std_rx_rcb; rcb->bge_hostaddr.bge_addr_lo = BGE_ADDR_LO(sc->bge_ldata.bge_rx_std_ring_paddr); rcb->bge_hostaddr.bge_addr_hi = BGE_ADDR_HI(sc->bge_ldata.bge_rx_std_ring_paddr); bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_PREREAD); if (BGE_IS_5717_PLUS(sc)) { /* * Bits 31-16: Programmable ring size (2048, 1024, 512, .., 32) * Bits 15-2 : Maximum RX frame size * Bit 1 : 1 = Ring Disabled, 0 = Ring ENabled * Bit 0 : Reserved */ rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(512, BGE_MAX_FRAMELEN << 2); } else if (BGE_IS_5705_PLUS(sc)) { /* * Bits 31-16: Programmable ring size (512, 256, 128, 64, 32) * Bits 15-2 : Reserved (should be 0) * Bit 1 : 1 = Ring Disabled, 0 = Ring Enabled * Bit 0 : Reserved */ rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(512, 0); } else { /* * Ring size is always XXX entries * Bits 31-16: Maximum RX frame size * Bits 15-2 : Reserved (should be 0) * Bit 1 : 1 = Ring Disabled, 0 = Ring Enabled * Bit 0 : Reserved */ rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(BGE_MAX_FRAMELEN, 0); } if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) rcb->bge_nicaddr = BGE_STD_RX_RINGS_5717; else rcb->bge_nicaddr = BGE_STD_RX_RINGS; /* Write the standard receive producer ring control block. */ CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_HI, rcb->bge_hostaddr.bge_addr_hi); CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo); CSR_WRITE_4(sc, BGE_RX_STD_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); CSR_WRITE_4(sc, BGE_RX_STD_RCB_NICADDR, rcb->bge_nicaddr); /* Reset the standard receive producer ring producer index. */ bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, 0); /* * Initialize the jumbo RX producer ring control * block. We set the 'ring disabled' bit in the * flags field until we're actually ready to start * using this ring (i.e. once we set the MTU * high enough to require it). */ if (BGE_IS_JUMBO_CAPABLE(sc)) { rcb = &sc->bge_ldata.bge_info.bge_jumbo_rx_rcb; /* Get the jumbo receive producer ring RCB parameters. */ rcb->bge_hostaddr.bge_addr_lo = BGE_ADDR_LO(sc->bge_ldata.bge_rx_jumbo_ring_paddr); rcb->bge_hostaddr.bge_addr_hi = BGE_ADDR_HI(sc->bge_ldata.bge_rx_jumbo_ring_paddr); bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_PREREAD); rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_USE_EXT_RX_BD | BGE_RCB_FLAG_RING_DISABLED); if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS_5717; else rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS; CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_HI, rcb->bge_hostaddr.bge_addr_hi); CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo); /* Program the jumbo receive producer ring RCB parameters. */ CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_NICADDR, rcb->bge_nicaddr); /* Reset the jumbo receive producer ring producer index. */ bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, 0); } /* Disable the mini receive producer ring RCB. */ if (BGE_IS_5700_FAMILY(sc)) { rcb = &sc->bge_ldata.bge_info.bge_mini_rx_rcb; rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_RING_DISABLED); CSR_WRITE_4(sc, BGE_RX_MINI_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags); /* Reset the mini receive producer ring producer index. */ bge_writembx(sc, BGE_MBX_RX_MINI_PROD_LO, 0); } /* Choose de-pipeline mode for BCM5906 A0, A1 and A2. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { if (sc->bge_chipid == BGE_CHIPID_BCM5906_A0 || sc->bge_chipid == BGE_CHIPID_BCM5906_A1 || sc->bge_chipid == BGE_CHIPID_BCM5906_A2) CSR_WRITE_4(sc, BGE_ISO_PKT_TX, (CSR_READ_4(sc, BGE_ISO_PKT_TX) & ~3) | 2); } /* * The BD ring replenish thresholds control how often the * hardware fetches new BD's from the producer rings in host * memory. Setting the value too low on a busy system can * starve the hardware and recue the throughpout. * * Set the BD ring replentish thresholds. The recommended * values are 1/8th the number of descriptors allocated to * each ring. * XXX The 5754 requires a lower threshold, so it might be a * requirement of all 575x family chips. The Linux driver sets * the lower threshold for all 5705 family chips as well, but there * are reports that it might not need to be so strict. * * XXX Linux does some extra fiddling here for the 5906 parts as * well. */ if (BGE_IS_5705_PLUS(sc)) val = 8; else val = BGE_STD_RX_RING_CNT / 8; CSR_WRITE_4(sc, BGE_RBDI_STD_REPL_THRESH, val); if (BGE_IS_JUMBO_CAPABLE(sc)) CSR_WRITE_4(sc, BGE_RBDI_JUMBO_REPL_THRESH, BGE_JUMBO_RX_RING_CNT/8); if (BGE_IS_5717_PLUS(sc)) { CSR_WRITE_4(sc, BGE_STD_REPLENISH_LWM, 32); CSR_WRITE_4(sc, BGE_JMB_REPLENISH_LWM, 16); } /* * Disable all send rings by setting the 'ring disabled' bit * in the flags field of all the TX send ring control blocks, * located in NIC memory. */ if (!BGE_IS_5705_PLUS(sc)) /* 5700 to 5704 had 16 send rings. */ limit = BGE_TX_RINGS_EXTSSRAM_MAX; else if (BGE_IS_57765_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5762) limit = 2; else if (BGE_IS_5717_PLUS(sc)) limit = 4; else limit = 1; vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB; for (i = 0; i < limit; i++) { RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_RING_DISABLED)); RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0); vrcb += sizeof(struct bge_rcb); } /* Configure send ring RCB 0 (we use only the first ring) */ vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB; BGE_HOSTADDR(taddr, sc->bge_ldata.bge_tx_ring_paddr); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, taddr.bge_addr_hi); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, taddr.bge_addr_lo); if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) RCB_WRITE_4(sc, vrcb, bge_nicaddr, BGE_SEND_RING_5717); else RCB_WRITE_4(sc, vrcb, bge_nicaddr, BGE_NIC_TXRING_ADDR(0, BGE_TX_RING_CNT)); RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_MAXLEN_FLAGS(BGE_TX_RING_CNT, 0)); /* * Disable all receive return rings by setting the * 'ring diabled' bit in the flags field of all the receive * return ring control blocks, located in NIC memory. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) { /* Should be 17, use 16 until we get an SRAM map. */ limit = 16; } else if (!BGE_IS_5705_PLUS(sc)) limit = BGE_RX_RINGS_MAX; else if (sc->bge_asicrev == BGE_ASICREV_BCM5755 || sc->bge_asicrev == BGE_ASICREV_BCM5762 || BGE_IS_57765_PLUS(sc)) limit = 4; else limit = 1; /* Disable all receive return rings. */ vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB; for (i = 0; i < limit; i++) { RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, 0); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, 0); RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_FLAG_RING_DISABLED); RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0); bge_writembx(sc, BGE_MBX_RX_CONS0_LO + (i * (sizeof(uint64_t))), 0); vrcb += sizeof(struct bge_rcb); } /* * Set up receive return ring 0. Note that the NIC address * for RX return rings is 0x0. The return rings live entirely * within the host, so the nicaddr field in the RCB isn't used. */ vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB; BGE_HOSTADDR(taddr, sc->bge_ldata.bge_rx_return_ring_paddr); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, taddr.bge_addr_hi); RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, taddr.bge_addr_lo); RCB_WRITE_4(sc, vrcb, bge_nicaddr, 0); RCB_WRITE_4(sc, vrcb, bge_maxlen_flags, BGE_RCB_MAXLEN_FLAGS(sc->bge_return_ring_cnt, 0)); /* Set random backoff seed for TX */ CSR_WRITE_4(sc, BGE_TX_RANDOM_BACKOFF, (if_lladdr(sc->bge_ifp)[0] + if_lladdr(sc->bge_ifp)[1] + if_lladdr(sc->bge_ifp)[2] + if_lladdr(sc->bge_ifp)[3] + if_lladdr(sc->bge_ifp)[4] + if_lladdr(sc->bge_ifp)[5]) & BGE_TX_BACKOFF_SEED_MASK); /* Set inter-packet gap */ val = 0x2620; if (sc->bge_asicrev == BGE_ASICREV_BCM5720 || sc->bge_asicrev == BGE_ASICREV_BCM5762) val |= CSR_READ_4(sc, BGE_TX_LENGTHS) & (BGE_TXLEN_JMB_FRM_LEN_MSK | BGE_TXLEN_CNT_DN_VAL_MSK); CSR_WRITE_4(sc, BGE_TX_LENGTHS, val); /* * Specify which ring to use for packets that don't match * any RX rules. */ CSR_WRITE_4(sc, BGE_RX_RULES_CFG, 0x08); /* * Configure number of RX lists. One interrupt distribution * list, sixteen active lists, one bad frames class. */ CSR_WRITE_4(sc, BGE_RXLP_CFG, 0x181); /* Inialize RX list placement stats mask. */ CSR_WRITE_4(sc, BGE_RXLP_STATS_ENABLE_MASK, 0x007FFFFF); CSR_WRITE_4(sc, BGE_RXLP_STATS_CTL, 0x1); /* Disable host coalescing until we get it set up */ CSR_WRITE_4(sc, BGE_HCC_MODE, 0x00000000); /* Poll to make sure it's shut down. */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_4(sc, BGE_HCC_MODE) & BGE_HCCMODE_ENABLE)) break; } if (i == BGE_TIMEOUT) { device_printf(sc->bge_dev, "host coalescing engine failed to idle\n"); return (ENXIO); } /* Set up host coalescing defaults */ CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS, sc->bge_rx_coal_ticks); CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS, sc->bge_tx_coal_ticks); CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS, sc->bge_rx_max_coal_bds); CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS, sc->bge_tx_max_coal_bds); if (!(BGE_IS_5705_PLUS(sc))) { CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS_INT, 0); CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS_INT, 0); } CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT, 1); CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT, 1); /* Set up address of statistics block */ if (!(BGE_IS_5705_PLUS(sc))) { CSR_WRITE_4(sc, BGE_HCC_STATS_ADDR_HI, BGE_ADDR_HI(sc->bge_ldata.bge_stats_paddr)); CSR_WRITE_4(sc, BGE_HCC_STATS_ADDR_LO, BGE_ADDR_LO(sc->bge_ldata.bge_stats_paddr)); CSR_WRITE_4(sc, BGE_HCC_STATS_BASEADDR, BGE_STATS_BLOCK); CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_BASEADDR, BGE_STATUS_BLOCK); CSR_WRITE_4(sc, BGE_HCC_STATS_TICKS, sc->bge_stat_ticks); } /* Set up address of status block */ CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_HI, BGE_ADDR_HI(sc->bge_ldata.bge_status_block_paddr)); CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_LO, BGE_ADDR_LO(sc->bge_ldata.bge_status_block_paddr)); /* Set up status block size. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_C0) { val = BGE_STATBLKSZ_FULL; bzero(sc->bge_ldata.bge_status_block, BGE_STATUS_BLK_SZ); } else { val = BGE_STATBLKSZ_32BYTE; bzero(sc->bge_ldata.bge_status_block, 32); } bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Turn on host coalescing state machine */ CSR_WRITE_4(sc, BGE_HCC_MODE, val | BGE_HCCMODE_ENABLE); /* Turn on RX BD completion state machine and enable attentions */ CSR_WRITE_4(sc, BGE_RBDC_MODE, BGE_RBDCMODE_ENABLE | BGE_RBDCMODE_ATTN); /* Turn on RX list placement state machine */ CSR_WRITE_4(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE); /* Turn on RX list selector state machine. */ if (!(BGE_IS_5705_PLUS(sc))) CSR_WRITE_4(sc, BGE_RXLS_MODE, BGE_RXLSMODE_ENABLE); /* Turn on DMA, clear stats. */ val = BGE_MACMODE_TXDMA_ENB | BGE_MACMODE_RXDMA_ENB | BGE_MACMODE_RX_STATS_CLEAR | BGE_MACMODE_TX_STATS_CLEAR | BGE_MACMODE_RX_STATS_ENB | BGE_MACMODE_TX_STATS_ENB | BGE_MACMODE_FRMHDR_DMA_ENB; if (sc->bge_flags & BGE_FLAG_TBI) val |= BGE_PORTMODE_TBI; else if (sc->bge_flags & BGE_FLAG_MII_SERDES) val |= BGE_PORTMODE_GMII; else val |= BGE_PORTMODE_MII; /* Allow APE to send/receive frames. */ if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) != 0) val |= BGE_MACMODE_APE_RX_EN | BGE_MACMODE_APE_TX_EN; CSR_WRITE_4(sc, BGE_MAC_MODE, val); DELAY(40); /* Set misc. local control, enable interrupts on attentions */ BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_ONATTN); #ifdef notdef /* Assert GPIO pins for PHY reset */ BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUT0 | BGE_MLC_MISCIO_OUT1 | BGE_MLC_MISCIO_OUT2); BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUTEN0 | BGE_MLC_MISCIO_OUTEN1 | BGE_MLC_MISCIO_OUTEN2); #endif /* Turn on DMA completion state machine */ if (!(BGE_IS_5705_PLUS(sc))) CSR_WRITE_4(sc, BGE_DMAC_MODE, BGE_DMACMODE_ENABLE); val = BGE_WDMAMODE_ENABLE | BGE_WDMAMODE_ALL_ATTNS; /* Enable host coalescing bug fix. */ if (BGE_IS_5755_PLUS(sc)) val |= BGE_WDMAMODE_STATUS_TAG_FIX; /* Request larger DMA burst size to get better performance. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5785) val |= BGE_WDMAMODE_BURST_ALL_DATA; /* Turn on write DMA state machine */ CSR_WRITE_4(sc, BGE_WDMA_MODE, val); DELAY(40); /* Turn on read DMA state machine */ val = BGE_RDMAMODE_ENABLE | BGE_RDMAMODE_ALL_ATTNS; if (sc->bge_asicrev == BGE_ASICREV_BCM5717) val |= BGE_RDMAMODE_MULT_DMA_RD_DIS; if (sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5785 || sc->bge_asicrev == BGE_ASICREV_BCM57780) val |= BGE_RDMAMODE_BD_SBD_CRPT_ATTN | BGE_RDMAMODE_MBUF_RBD_CRPT_ATTN | BGE_RDMAMODE_MBUF_SBD_CRPT_ATTN; if (sc->bge_flags & BGE_FLAG_PCIE) val |= BGE_RDMAMODE_FIFO_LONG_BURST; if (sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) { val |= BGE_RDMAMODE_TSO4_ENABLE; if (sc->bge_flags & BGE_FLAG_TSO3 || sc->bge_asicrev == BGE_ASICREV_BCM5785 || sc->bge_asicrev == BGE_ASICREV_BCM57780) val |= BGE_RDMAMODE_TSO6_ENABLE; } if (sc->bge_asicrev == BGE_ASICREV_BCM5720 || sc->bge_asicrev == BGE_ASICREV_BCM5762) { val |= CSR_READ_4(sc, BGE_RDMA_MODE) & BGE_RDMAMODE_H2BNC_VLAN_DET; /* * Allow multiple outstanding read requests from * non-LSO read DMA engine. */ val &= ~BGE_RDMAMODE_MULT_DMA_RD_DIS; } if (sc->bge_asicrev == BGE_ASICREV_BCM5761 || sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5785 || sc->bge_asicrev == BGE_ASICREV_BCM57780 || BGE_IS_5717_PLUS(sc) || BGE_IS_57765_PLUS(sc)) { if (sc->bge_asicrev == BGE_ASICREV_BCM5762) rdmareg = BGE_RDMA_RSRVCTRL_REG2; else rdmareg = BGE_RDMA_RSRVCTRL; dmactl = CSR_READ_4(sc, rdmareg); /* * Adjust tx margin to prevent TX data corruption and * fix internal FIFO overflow. */ if (sc->bge_chipid == BGE_CHIPID_BCM5719_A0 || sc->bge_asicrev == BGE_ASICREV_BCM5762) { dmactl &= ~(BGE_RDMA_RSRVCTRL_FIFO_LWM_MASK | BGE_RDMA_RSRVCTRL_FIFO_HWM_MASK | BGE_RDMA_RSRVCTRL_TXMRGN_MASK); dmactl |= BGE_RDMA_RSRVCTRL_FIFO_LWM_1_5K | BGE_RDMA_RSRVCTRL_FIFO_HWM_1_5K | BGE_RDMA_RSRVCTRL_TXMRGN_320B; } /* * Enable fix for read DMA FIFO overruns. * The fix is to limit the number of RX BDs * the hardware would fetch at a fime. */ CSR_WRITE_4(sc, rdmareg, dmactl | BGE_RDMA_RSRVCTRL_FIFO_OFLW_FIX); } if (sc->bge_asicrev == BGE_ASICREV_BCM5719) { CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL, CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_4K | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K); } else if (sc->bge_asicrev == BGE_ASICREV_BCM5720) { /* * Allow 4KB burst length reads for non-LSO frames. * Enable 512B burst length reads for buffer descriptors. */ CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL, CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_512 | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K); } else if (sc->bge_asicrev == BGE_ASICREV_BCM5762) { CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL_REG2, CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL_REG2) | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_4K | BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K); } CSR_WRITE_4(sc, BGE_RDMA_MODE, val); DELAY(40); if (sc->bge_flags & BGE_FLAG_RDMA_BUG) { for (i = 0; i < BGE_NUM_RDMA_CHANNELS / 2; i++) { val = CSR_READ_4(sc, BGE_RDMA_LENGTH + i * 4); if ((val & 0xFFFF) > BGE_FRAMELEN) break; if (((val >> 16) & 0xFFFF) > BGE_FRAMELEN) break; } if (i != BGE_NUM_RDMA_CHANNELS / 2) { val = CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL); if (sc->bge_asicrev == BGE_ASICREV_BCM5719) val |= BGE_RDMA_TX_LENGTH_WA_5719; else val |= BGE_RDMA_TX_LENGTH_WA_5720; CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL, val); } } /* Turn on RX data completion state machine */ CSR_WRITE_4(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE); /* Turn on RX BD initiator state machine */ CSR_WRITE_4(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE); /* Turn on RX data and RX BD initiator state machine */ CSR_WRITE_4(sc, BGE_RDBDI_MODE, BGE_RDBDIMODE_ENABLE); /* Turn on Mbuf cluster free state machine */ if (!(BGE_IS_5705_PLUS(sc))) CSR_WRITE_4(sc, BGE_MBCF_MODE, BGE_MBCFMODE_ENABLE); /* Turn on send BD completion state machine */ CSR_WRITE_4(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE); /* Turn on send data completion state machine */ val = BGE_SDCMODE_ENABLE; if (sc->bge_asicrev == BGE_ASICREV_BCM5761) val |= BGE_SDCMODE_CDELAY; CSR_WRITE_4(sc, BGE_SDC_MODE, val); /* Turn on send data initiator state machine */ if (sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE | BGE_SDIMODE_HW_LSO_PRE_DMA); else CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE); /* Turn on send BD initiator state machine */ CSR_WRITE_4(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE); /* Turn on send BD selector state machine */ CSR_WRITE_4(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE); CSR_WRITE_4(sc, BGE_SDI_STATS_ENABLE_MASK, 0x007FFFFF); CSR_WRITE_4(sc, BGE_SDI_STATS_CTL, BGE_SDISTATSCTL_ENABLE | BGE_SDISTATSCTL_FASTER); /* ack/clear link change events */ CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED | BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE | BGE_MACSTAT_LINK_CHANGED); CSR_WRITE_4(sc, BGE_MI_STS, 0); /* * Enable attention when the link has changed state for * devices that use auto polling. */ if (sc->bge_flags & BGE_FLAG_TBI) { CSR_WRITE_4(sc, BGE_MI_STS, BGE_MISTS_LINK); } else { if (sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) { CSR_WRITE_4(sc, BGE_MI_MODE, sc->bge_mi_mode); DELAY(80); } if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) CSR_WRITE_4(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_MI_INTERRUPT); } /* * Clear any pending link state attention. * Otherwise some link state change events may be lost until attention * is cleared by bge_intr() -> bge_link_upd() sequence. * It's not necessary on newer BCM chips - perhaps enabling link * state change attentions implies clearing pending attention. */ CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED | BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE | BGE_MACSTAT_LINK_CHANGED); /* Enable link state change attentions. */ BGE_SETBIT(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_LINK_CHANGED); return (0); } static const struct bge_revision * bge_lookup_rev(uint32_t chipid) { const struct bge_revision *br; for (br = bge_revisions; br->br_name != NULL; br++) { if (br->br_chipid == chipid) return (br); } for (br = bge_majorrevs; br->br_name != NULL; br++) { if (br->br_chipid == BGE_ASICREV(chipid)) return (br); } return (NULL); } static const struct bge_vendor * bge_lookup_vendor(uint16_t vid) { const struct bge_vendor *v; for (v = bge_vendors; v->v_name != NULL; v++) if (v->v_id == vid) return (v); return (NULL); } static uint32_t bge_chipid(device_t dev) { uint32_t id; id = pci_read_config(dev, BGE_PCI_MISC_CTL, 4) >> BGE_PCIMISCCTL_ASICREV_SHIFT; if (BGE_ASICREV(id) == BGE_ASICREV_USE_PRODID_REG) { /* * Find the ASCI revision. Different chips use different * registers. */ switch (pci_get_device(dev)) { case BCOM_DEVICEID_BCM5717: case BCOM_DEVICEID_BCM5718: case BCOM_DEVICEID_BCM5719: case BCOM_DEVICEID_BCM5720: case BCOM_DEVICEID_BCM5725: case BCOM_DEVICEID_BCM5727: case BCOM_DEVICEID_BCM5762: case BCOM_DEVICEID_BCM57764: case BCOM_DEVICEID_BCM57767: case BCOM_DEVICEID_BCM57787: id = pci_read_config(dev, BGE_PCI_GEN2_PRODID_ASICREV, 4); break; case BCOM_DEVICEID_BCM57761: case BCOM_DEVICEID_BCM57762: case BCOM_DEVICEID_BCM57765: case BCOM_DEVICEID_BCM57766: case BCOM_DEVICEID_BCM57781: case BCOM_DEVICEID_BCM57782: case BCOM_DEVICEID_BCM57785: case BCOM_DEVICEID_BCM57786: case BCOM_DEVICEID_BCM57791: case BCOM_DEVICEID_BCM57795: id = pci_read_config(dev, BGE_PCI_GEN15_PRODID_ASICREV, 4); break; default: id = pci_read_config(dev, BGE_PCI_PRODID_ASICREV, 4); } } return (id); } /* * Probe for a Broadcom chip. Check the PCI vendor and device IDs * against our list and return its name if we find a match. * * Note that since the Broadcom controller contains VPD support, we * try to get the device name string from the controller itself instead * of the compiled-in string. It guarantees we'll always announce the * right product name. We fall back to the compiled-in string when * VPD is unavailable or corrupt. */ static int bge_probe(device_t dev) { char buf[96]; char model[64]; const struct bge_revision *br; const char *pname; struct bge_softc *sc; const struct bge_type *t = bge_devs; const struct bge_vendor *v; uint32_t id; uint16_t did, vid; sc = device_get_softc(dev); sc->bge_dev = dev; vid = pci_get_vendor(dev); did = pci_get_device(dev); while(t->bge_vid != 0) { if ((vid == t->bge_vid) && (did == t->bge_did)) { id = bge_chipid(dev); br = bge_lookup_rev(id); if (bge_has_eaddr(sc) && pci_get_vpd_ident(dev, &pname) == 0) snprintf(model, sizeof(model), "%s", pname); else { v = bge_lookup_vendor(vid); snprintf(model, sizeof(model), "%s %s", v != NULL ? v->v_name : "Unknown", br != NULL ? br->br_name : "NetXtreme/NetLink Ethernet Controller"); } snprintf(buf, sizeof(buf), "%s, %sASIC rev. %#08x", model, br != NULL ? "" : "unknown ", id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } t++; } return (ENXIO); } static void bge_dma_free(struct bge_softc *sc) { int i; /* Destroy DMA maps for RX buffers. */ for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { if (sc->bge_cdata.bge_rx_std_dmamap[i]) bus_dmamap_destroy(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_dmamap[i]); } if (sc->bge_cdata.bge_rx_std_sparemap) bus_dmamap_destroy(sc->bge_cdata.bge_rx_mtag, sc->bge_cdata.bge_rx_std_sparemap); /* Destroy DMA maps for jumbo RX buffers. */ for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) { if (sc->bge_cdata.bge_rx_jumbo_dmamap[i]) bus_dmamap_destroy(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_dmamap[i]); } if (sc->bge_cdata.bge_rx_jumbo_sparemap) bus_dmamap_destroy(sc->bge_cdata.bge_mtag_jumbo, sc->bge_cdata.bge_rx_jumbo_sparemap); /* Destroy DMA maps for TX buffers. */ for (i = 0; i < BGE_TX_RING_CNT; i++) { if (sc->bge_cdata.bge_tx_dmamap[i]) bus_dmamap_destroy(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[i]); } if (sc->bge_cdata.bge_rx_mtag) bus_dma_tag_destroy(sc->bge_cdata.bge_rx_mtag); if (sc->bge_cdata.bge_mtag_jumbo) bus_dma_tag_destroy(sc->bge_cdata.bge_mtag_jumbo); if (sc->bge_cdata.bge_tx_mtag) bus_dma_tag_destroy(sc->bge_cdata.bge_tx_mtag); /* Destroy standard RX ring. */ if (sc->bge_ldata.bge_rx_std_ring_paddr) bus_dmamap_unload(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map); if (sc->bge_ldata.bge_rx_std_ring) bus_dmamem_free(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_ldata.bge_rx_std_ring, sc->bge_cdata.bge_rx_std_ring_map); if (sc->bge_cdata.bge_rx_std_ring_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_rx_std_ring_tag); /* Destroy jumbo RX ring. */ if (sc->bge_ldata.bge_rx_jumbo_ring_paddr) bus_dmamap_unload(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map); if (sc->bge_ldata.bge_rx_jumbo_ring) bus_dmamem_free(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_ldata.bge_rx_jumbo_ring, sc->bge_cdata.bge_rx_jumbo_ring_map); if (sc->bge_cdata.bge_rx_jumbo_ring_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_rx_jumbo_ring_tag); /* Destroy RX return ring. */ if (sc->bge_ldata.bge_rx_return_ring_paddr) bus_dmamap_unload(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_cdata.bge_rx_return_ring_map); if (sc->bge_ldata.bge_rx_return_ring) bus_dmamem_free(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_ldata.bge_rx_return_ring, sc->bge_cdata.bge_rx_return_ring_map); if (sc->bge_cdata.bge_rx_return_ring_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_rx_return_ring_tag); /* Destroy TX ring. */ if (sc->bge_ldata.bge_tx_ring_paddr) bus_dmamap_unload(sc->bge_cdata.bge_tx_ring_tag, sc->bge_cdata.bge_tx_ring_map); if (sc->bge_ldata.bge_tx_ring) bus_dmamem_free(sc->bge_cdata.bge_tx_ring_tag, sc->bge_ldata.bge_tx_ring, sc->bge_cdata.bge_tx_ring_map); if (sc->bge_cdata.bge_tx_ring_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_tx_ring_tag); /* Destroy status block. */ if (sc->bge_ldata.bge_status_block_paddr) bus_dmamap_unload(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map); if (sc->bge_ldata.bge_status_block) bus_dmamem_free(sc->bge_cdata.bge_status_tag, sc->bge_ldata.bge_status_block, sc->bge_cdata.bge_status_map); if (sc->bge_cdata.bge_status_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_status_tag); /* Destroy statistics block. */ if (sc->bge_ldata.bge_stats_paddr) bus_dmamap_unload(sc->bge_cdata.bge_stats_tag, sc->bge_cdata.bge_stats_map); if (sc->bge_ldata.bge_stats) bus_dmamem_free(sc->bge_cdata.bge_stats_tag, sc->bge_ldata.bge_stats, sc->bge_cdata.bge_stats_map); if (sc->bge_cdata.bge_stats_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_stats_tag); if (sc->bge_cdata.bge_buffer_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_buffer_tag); /* Destroy the parent tag. */ if (sc->bge_cdata.bge_parent_tag) bus_dma_tag_destroy(sc->bge_cdata.bge_parent_tag); } static int bge_dma_ring_alloc(struct bge_softc *sc, bus_size_t alignment, bus_size_t maxsize, bus_dma_tag_t *tag, uint8_t **ring, bus_dmamap_t *map, bus_addr_t *paddr, const char *msg) { struct bge_dmamap_arg ctx; int error; error = bus_dma_tag_create(sc->bge_cdata.bge_parent_tag, alignment, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, maxsize, 1, maxsize, 0, NULL, NULL, tag); if (error != 0) { device_printf(sc->bge_dev, "could not create %s dma tag\n", msg); return (ENOMEM); } /* Allocate DMA'able memory for ring. */ error = bus_dmamem_alloc(*tag, (void **)ring, BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, map); if (error != 0) { device_printf(sc->bge_dev, "could not allocate DMA'able memory for %s\n", msg); return (ENOMEM); } /* Load the address of the ring. */ ctx.bge_busaddr = 0; error = bus_dmamap_load(*tag, *map, *ring, maxsize, bge_dma_map_addr, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->bge_dev, "could not load DMA'able memory for %s\n", msg); return (ENOMEM); } *paddr = ctx.bge_busaddr; return (0); } static int bge_dma_alloc(struct bge_softc *sc) { bus_addr_t lowaddr; bus_size_t rxmaxsegsz, sbsz, txsegsz, txmaxsegsz; int i, error; lowaddr = BUS_SPACE_MAXADDR; if ((sc->bge_flags & BGE_FLAG_40BIT_BUG) != 0) lowaddr = BGE_DMA_MAXADDR; /* * Allocate the parent bus DMA tag appropriate for PCI. */ error = bus_dma_tag_create(bus_get_dma_tag(sc->bge_dev), 1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->bge_cdata.bge_parent_tag); if (error != 0) { device_printf(sc->bge_dev, "could not allocate parent dma tag\n"); return (ENOMEM); } /* Create tag for standard RX ring. */ error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_STD_RX_RING_SZ, &sc->bge_cdata.bge_rx_std_ring_tag, (uint8_t **)&sc->bge_ldata.bge_rx_std_ring, &sc->bge_cdata.bge_rx_std_ring_map, &sc->bge_ldata.bge_rx_std_ring_paddr, "RX ring"); if (error) return (error); /* Create tag for RX return ring. */ error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_RX_RTN_RING_SZ(sc), &sc->bge_cdata.bge_rx_return_ring_tag, (uint8_t **)&sc->bge_ldata.bge_rx_return_ring, &sc->bge_cdata.bge_rx_return_ring_map, &sc->bge_ldata.bge_rx_return_ring_paddr, "RX return ring"); if (error) return (error); /* Create tag for TX ring. */ error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_TX_RING_SZ, &sc->bge_cdata.bge_tx_ring_tag, (uint8_t **)&sc->bge_ldata.bge_tx_ring, &sc->bge_cdata.bge_tx_ring_map, &sc->bge_ldata.bge_tx_ring_paddr, "TX ring"); if (error) return (error); /* * Create tag for status block. * Because we only use single Tx/Rx/Rx return ring, use * minimum status block size except BCM5700 AX/BX which * seems to want to see full status block size regardless * of configured number of ring. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_C0) sbsz = BGE_STATUS_BLK_SZ; else sbsz = 32; error = bge_dma_ring_alloc(sc, PAGE_SIZE, sbsz, &sc->bge_cdata.bge_status_tag, (uint8_t **)&sc->bge_ldata.bge_status_block, &sc->bge_cdata.bge_status_map, &sc->bge_ldata.bge_status_block_paddr, "status block"); if (error) return (error); /* Create tag for statistics block. */ error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_STATS_SZ, &sc->bge_cdata.bge_stats_tag, (uint8_t **)&sc->bge_ldata.bge_stats, &sc->bge_cdata.bge_stats_map, &sc->bge_ldata.bge_stats_paddr, "statistics block"); if (error) return (error); /* Create tag for jumbo RX ring. */ if (BGE_IS_JUMBO_CAPABLE(sc)) { error = bge_dma_ring_alloc(sc, PAGE_SIZE, BGE_JUMBO_RX_RING_SZ, &sc->bge_cdata.bge_rx_jumbo_ring_tag, (uint8_t **)&sc->bge_ldata.bge_rx_jumbo_ring, &sc->bge_cdata.bge_rx_jumbo_ring_map, &sc->bge_ldata.bge_rx_jumbo_ring_paddr, "jumbo RX ring"); if (error) return (error); } /* Create parent tag for buffers. */ if ((sc->bge_flags & BGE_FLAG_4G_BNDRY_BUG) != 0) { /* * XXX * watchdog timeout issue was observed on BCM5704 which * lives behind PCI-X bridge(e.g AMD 8131 PCI-X bridge). * Both limiting DMA address space to 32bits and flushing * mailbox write seem to address the issue. */ if (sc->bge_pcixcap != 0) lowaddr = BUS_SPACE_MAXADDR_32BIT; } error = bus_dma_tag_create(bus_get_dma_tag(sc->bge_dev), 1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->bge_cdata.bge_buffer_tag); if (error != 0) { device_printf(sc->bge_dev, "could not allocate buffer dma tag\n"); return (ENOMEM); } /* Create tag for Tx mbufs. */ if (sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) { txsegsz = BGE_TSOSEG_SZ; txmaxsegsz = 65535 + sizeof(struct ether_vlan_header); } else { txsegsz = MCLBYTES; txmaxsegsz = MCLBYTES * BGE_NSEG_NEW; } error = bus_dma_tag_create(sc->bge_cdata.bge_buffer_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, txmaxsegsz, BGE_NSEG_NEW, txsegsz, 0, NULL, NULL, &sc->bge_cdata.bge_tx_mtag); if (error) { device_printf(sc->bge_dev, "could not allocate TX dma tag\n"); return (ENOMEM); } /* Create tag for Rx mbufs. */ if (sc->bge_flags & BGE_FLAG_JUMBO_STD) rxmaxsegsz = MJUM9BYTES; else rxmaxsegsz = MCLBYTES; error = bus_dma_tag_create(sc->bge_cdata.bge_buffer_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, rxmaxsegsz, 1, rxmaxsegsz, 0, NULL, NULL, &sc->bge_cdata.bge_rx_mtag); if (error) { device_printf(sc->bge_dev, "could not allocate RX dma tag\n"); return (ENOMEM); } /* Create DMA maps for RX buffers. */ error = bus_dmamap_create(sc->bge_cdata.bge_rx_mtag, 0, &sc->bge_cdata.bge_rx_std_sparemap); if (error) { device_printf(sc->bge_dev, "can't create spare DMA map for RX\n"); return (ENOMEM); } for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { error = bus_dmamap_create(sc->bge_cdata.bge_rx_mtag, 0, &sc->bge_cdata.bge_rx_std_dmamap[i]); if (error) { device_printf(sc->bge_dev, "can't create DMA map for RX\n"); return (ENOMEM); } } /* Create DMA maps for TX buffers. */ for (i = 0; i < BGE_TX_RING_CNT; i++) { error = bus_dmamap_create(sc->bge_cdata.bge_tx_mtag, 0, &sc->bge_cdata.bge_tx_dmamap[i]); if (error) { device_printf(sc->bge_dev, "can't create DMA map for TX\n"); return (ENOMEM); } } /* Create tags for jumbo RX buffers. */ if (BGE_IS_JUMBO_CAPABLE(sc)) { error = bus_dma_tag_create(sc->bge_cdata.bge_buffer_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM9BYTES, BGE_NSEG_JUMBO, PAGE_SIZE, 0, NULL, NULL, &sc->bge_cdata.bge_mtag_jumbo); if (error) { device_printf(sc->bge_dev, "could not allocate jumbo dma tag\n"); return (ENOMEM); } /* Create DMA maps for jumbo RX buffers. */ error = bus_dmamap_create(sc->bge_cdata.bge_mtag_jumbo, 0, &sc->bge_cdata.bge_rx_jumbo_sparemap); if (error) { device_printf(sc->bge_dev, "can't create spare DMA map for jumbo RX\n"); return (ENOMEM); } for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) { error = bus_dmamap_create(sc->bge_cdata.bge_mtag_jumbo, 0, &sc->bge_cdata.bge_rx_jumbo_dmamap[i]); if (error) { device_printf(sc->bge_dev, "can't create DMA map for jumbo RX\n"); return (ENOMEM); } } } return (0); } /* * Return true if this device has more than one port. */ static int bge_has_multiple_ports(struct bge_softc *sc) { device_t dev = sc->bge_dev; u_int b, d, f, fscan, s; d = pci_get_domain(dev); b = pci_get_bus(dev); s = pci_get_slot(dev); f = pci_get_function(dev); for (fscan = 0; fscan <= PCI_FUNCMAX; fscan++) if (fscan != f && pci_find_dbsf(d, b, s, fscan) != NULL) return (1); return (0); } /* * Return true if MSI can be used with this device. */ static int bge_can_use_msi(struct bge_softc *sc) { int can_use_msi = 0; if (sc->bge_msi == 0) return (0); /* Disable MSI for polling(4). */ #ifdef DEVICE_POLLING return (0); #endif switch (sc->bge_asicrev) { case BGE_ASICREV_BCM5714_A0: case BGE_ASICREV_BCM5714: /* * Apparently, MSI doesn't work when these chips are * configured in single-port mode. */ if (bge_has_multiple_ports(sc)) can_use_msi = 1; break; case BGE_ASICREV_BCM5750: if (sc->bge_chiprev != BGE_CHIPREV_5750_AX && sc->bge_chiprev != BGE_CHIPREV_5750_BX) can_use_msi = 1; break; default: if (BGE_IS_575X_PLUS(sc)) can_use_msi = 1; } return (can_use_msi); } static int bge_mbox_reorder(struct bge_softc *sc) { /* Lists of PCI bridges that are known to reorder mailbox writes. */ static const struct mbox_reorder { const uint16_t vendor; const uint16_t device; const char *desc; } mbox_reorder_lists[] = { { 0x1022, 0x7450, "AMD-8131 PCI-X Bridge" }, }; devclass_t pci, pcib; device_t bus, dev; int i; pci = devclass_find("pci"); pcib = devclass_find("pcib"); dev = sc->bge_dev; bus = device_get_parent(dev); for (;;) { dev = device_get_parent(bus); bus = device_get_parent(dev); if (device_get_devclass(dev) != pcib) break; for (i = 0; i < nitems(mbox_reorder_lists); i++) { if (pci_get_vendor(dev) == mbox_reorder_lists[i].vendor && pci_get_device(dev) == mbox_reorder_lists[i].device) { device_printf(sc->bge_dev, "enabling MBOX workaround for %s\n", mbox_reorder_lists[i].desc); return (1); } } if (device_get_devclass(bus) != pci) break; } return (0); } static void bge_devinfo(struct bge_softc *sc) { uint32_t cfg, clk; device_printf(sc->bge_dev, "CHIP ID 0x%08x; ASIC REV 0x%02x; CHIP REV 0x%02x; ", sc->bge_chipid, sc->bge_asicrev, sc->bge_chiprev); if (sc->bge_flags & BGE_FLAG_PCIE) printf("PCI-E\n"); else if (sc->bge_flags & BGE_FLAG_PCIX) { printf("PCI-X "); cfg = CSR_READ_4(sc, BGE_MISC_CFG) & BGE_MISCCFG_BOARD_ID_MASK; if (cfg == BGE_MISCCFG_BOARD_ID_5704CIOBE) clk = 133; else { clk = CSR_READ_4(sc, BGE_PCI_CLKCTL) & 0x1F; switch (clk) { case 0: clk = 33; break; case 2: clk = 50; break; case 4: clk = 66; break; case 6: clk = 100; break; case 7: clk = 133; break; } } printf("%u MHz\n", clk); } else { if (sc->bge_pcixcap != 0) printf("PCI on PCI-X "); else printf("PCI "); cfg = pci_read_config(sc->bge_dev, BGE_PCI_PCISTATE, 4); if (cfg & BGE_PCISTATE_PCI_BUSSPEED) clk = 66; else clk = 33; if (cfg & BGE_PCISTATE_32BIT_BUS) printf("%u MHz; 32bit\n", clk); else printf("%u MHz; 64bit\n", clk); } } static int bge_attach(device_t dev) { struct if_attach_args ifat = { .ifat_version = IF_ATTACH_VERSION, .ifat_drv = &bge_ifdrv, .ifat_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST, .ifat_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | #ifdef DEVICE_POLLING IFCAP_POLLING | #endif IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM, }; struct bge_softc *sc; uint32_t hwcfg = 0, misccfg, pcistate; u_char eaddr[ETHER_ADDR_LEN]; int capmask, error, reg, rid, trys; sc = device_get_softc(dev); sc->bge_dev = dev; BGE_LOCK_INIT(sc, device_get_nameunit(dev)); TASK_INIT(&sc->bge_intr_task, 0, bge_intr_task, sc); callout_init_mtx(&sc->bge_stat_ch, &sc->bge_mtx, 0); pci_enable_busmaster(dev); /* * Allocate control/status registers. */ rid = PCIR_BAR(0); sc->bge_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->bge_res == NULL) { device_printf (sc->bge_dev, "couldn't map BAR0 memory\n"); error = ENXIO; goto fail; } /* Save various chip information. */ sc->bge_func_addr = pci_get_function(dev); sc->bge_chipid = bge_chipid(dev); sc->bge_asicrev = BGE_ASICREV(sc->bge_chipid); sc->bge_chiprev = BGE_CHIPREV(sc->bge_chipid); /* Set default PHY address. */ sc->bge_phy_addr = 1; /* * PHY address mapping for various devices. * * | F0 Cu | F0 Sr | F1 Cu | F1 Sr | * ---------+-------+-------+-------+-------+ * BCM57XX | 1 | X | X | X | * BCM5704 | 1 | X | 1 | X | * BCM5717 | 1 | 8 | 2 | 9 | * BCM5719 | 1 | 8 | 2 | 9 | * BCM5720 | 1 | 8 | 2 | 9 | * * | F2 Cu | F2 Sr | F3 Cu | F3 Sr | * ---------+-------+-------+-------+-------+ * BCM57XX | X | X | X | X | * BCM5704 | X | X | X | X | * BCM5717 | X | X | X | X | * BCM5719 | 3 | 10 | 4 | 11 | * BCM5720 | X | X | X | X | * * Other addresses may respond but they are not * IEEE compliant PHYs and should be ignored. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5717 || sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) { if (sc->bge_chipid != BGE_CHIPID_BCM5717_A0) { if (CSR_READ_4(sc, BGE_SGDIG_STS) & BGE_SGDIGSTS_IS_SERDES) sc->bge_phy_addr = sc->bge_func_addr + 8; else sc->bge_phy_addr = sc->bge_func_addr + 1; } else { if (CSR_READ_4(sc, BGE_CPMU_PHY_STRAP) & BGE_CPMU_PHY_STRAP_IS_SERDES) sc->bge_phy_addr = sc->bge_func_addr + 8; else sc->bge_phy_addr = sc->bge_func_addr + 1; } } if (bge_has_eaddr(sc)) sc->bge_flags |= BGE_FLAG_EADDR; /* Save chipset family. */ switch (sc->bge_asicrev) { case BGE_ASICREV_BCM5762: case BGE_ASICREV_BCM57765: case BGE_ASICREV_BCM57766: sc->bge_flags |= BGE_FLAG_57765_PLUS; /* FALLTHROUGH */ case BGE_ASICREV_BCM5717: case BGE_ASICREV_BCM5719: case BGE_ASICREV_BCM5720: sc->bge_flags |= BGE_FLAG_5717_PLUS | BGE_FLAG_5755_PLUS | BGE_FLAG_575X_PLUS | BGE_FLAG_5705_PLUS | BGE_FLAG_JUMBO | BGE_FLAG_JUMBO_FRAME; if (sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) { /* * Enable work around for DMA engine miscalculation * of TXMBUF available space. */ sc->bge_flags |= BGE_FLAG_RDMA_BUG; if (sc->bge_asicrev == BGE_ASICREV_BCM5719 && sc->bge_chipid == BGE_CHIPID_BCM5719_A0) { /* Jumbo frame on BCM5719 A0 does not work. */ sc->bge_flags &= ~BGE_FLAG_JUMBO; } } break; case BGE_ASICREV_BCM5755: case BGE_ASICREV_BCM5761: case BGE_ASICREV_BCM5784: case BGE_ASICREV_BCM5785: case BGE_ASICREV_BCM5787: case BGE_ASICREV_BCM57780: sc->bge_flags |= BGE_FLAG_5755_PLUS | BGE_FLAG_575X_PLUS | BGE_FLAG_5705_PLUS; break; case BGE_ASICREV_BCM5700: case BGE_ASICREV_BCM5701: case BGE_ASICREV_BCM5703: case BGE_ASICREV_BCM5704: sc->bge_flags |= BGE_FLAG_5700_FAMILY | BGE_FLAG_JUMBO; break; case BGE_ASICREV_BCM5714_A0: case BGE_ASICREV_BCM5780: case BGE_ASICREV_BCM5714: sc->bge_flags |= BGE_FLAG_5714_FAMILY | BGE_FLAG_JUMBO_STD; /* FALLTHROUGH */ case BGE_ASICREV_BCM5750: case BGE_ASICREV_BCM5752: case BGE_ASICREV_BCM5906: sc->bge_flags |= BGE_FLAG_575X_PLUS; /* FALLTHROUGH */ case BGE_ASICREV_BCM5705: sc->bge_flags |= BGE_FLAG_5705_PLUS; break; } /* Identify chips with APE processor. */ switch (sc->bge_asicrev) { case BGE_ASICREV_BCM5717: case BGE_ASICREV_BCM5719: case BGE_ASICREV_BCM5720: case BGE_ASICREV_BCM5761: case BGE_ASICREV_BCM5762: sc->bge_flags |= BGE_FLAG_APE; break; } /* Chips with APE need BAR2 access for APE registers/memory. */ if ((sc->bge_flags & BGE_FLAG_APE) != 0) { rid = PCIR_BAR(2); sc->bge_res2 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->bge_res2 == NULL) { device_printf (sc->bge_dev, "couldn't map BAR2 memory\n"); error = ENXIO; goto fail; } /* Enable APE register/memory access by host driver. */ pcistate = pci_read_config(dev, BGE_PCI_PCISTATE, 4); pcistate |= BGE_PCISTATE_ALLOW_APE_CTLSPC_WR | BGE_PCISTATE_ALLOW_APE_SHMEM_WR | BGE_PCISTATE_ALLOW_APE_PSPACE_WR; pci_write_config(dev, BGE_PCI_PCISTATE, pcistate, 4); bge_ape_lock_init(sc); bge_ape_read_fw_ver(sc); } /* Add SYSCTLs, requires the chipset family to be set. */ bge_add_sysctls(sc); /* Identify the chips that use an CPMU. */ if (BGE_IS_5717_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5761 || sc->bge_asicrev == BGE_ASICREV_BCM5785 || sc->bge_asicrev == BGE_ASICREV_BCM57780) sc->bge_flags |= BGE_FLAG_CPMU_PRESENT; if ((sc->bge_flags & BGE_FLAG_CPMU_PRESENT) != 0) sc->bge_mi_mode = BGE_MIMODE_500KHZ_CONST; else sc->bge_mi_mode = BGE_MIMODE_BASE; /* Enable auto polling for BCM570[0-5]. */ if (BGE_IS_5700_FAMILY(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5705) sc->bge_mi_mode |= BGE_MIMODE_AUTOPOLL; /* * All Broadcom controllers have 4GB boundary DMA bug. * Whenever an address crosses a multiple of the 4GB boundary * (including 4GB, 8Gb, 12Gb, etc.) and makes the transition * from 0xX_FFFF_FFFF to 0x(X+1)_0000_0000 an internal DMA * state machine will lockup and cause the device to hang. */ sc->bge_flags |= BGE_FLAG_4G_BNDRY_BUG; /* BCM5755 or higher and BCM5906 have short DMA bug. */ if (BGE_IS_5755_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5906) sc->bge_flags |= BGE_FLAG_SHORT_DMA_BUG; /* * BCM5719 cannot handle DMA requests for DMA segments that * have larger than 4KB in size. However the maximum DMA * segment size created in DMA tag is 4KB for TSO, so we * wouldn't encounter the issue here. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5719) sc->bge_flags |= BGE_FLAG_4K_RDMA_BUG; misccfg = CSR_READ_4(sc, BGE_MISC_CFG) & BGE_MISCCFG_BOARD_ID_MASK; if (sc->bge_asicrev == BGE_ASICREV_BCM5705) { if (misccfg == BGE_MISCCFG_BOARD_ID_5788 || misccfg == BGE_MISCCFG_BOARD_ID_5788M) sc->bge_flags |= BGE_FLAG_5788; } capmask = BMSR_DEFCAPMASK; if ((sc->bge_asicrev == BGE_ASICREV_BCM5703 && (misccfg == 0x4000 || misccfg == 0x8000)) || (sc->bge_asicrev == BGE_ASICREV_BCM5705 && pci_get_vendor(dev) == BCOM_VENDORID && (pci_get_device(dev) == BCOM_DEVICEID_BCM5901 || pci_get_device(dev) == BCOM_DEVICEID_BCM5901A2 || pci_get_device(dev) == BCOM_DEVICEID_BCM5705F)) || (pci_get_vendor(dev) == BCOM_VENDORID && (pci_get_device(dev) == BCOM_DEVICEID_BCM5751F || pci_get_device(dev) == BCOM_DEVICEID_BCM5753F || pci_get_device(dev) == BCOM_DEVICEID_BCM5787F)) || pci_get_device(dev) == BCOM_DEVICEID_BCM57790 || pci_get_device(dev) == BCOM_DEVICEID_BCM57791 || pci_get_device(dev) == BCOM_DEVICEID_BCM57795 || sc->bge_asicrev == BGE_ASICREV_BCM5906) { /* These chips are 10/100 only. */ capmask &= ~BMSR_EXTSTAT; sc->bge_phy_flags |= BGE_PHY_NO_WIRESPEED; } /* * Some controllers seem to require a special firmware to use * TSO. But the firmware is not available to FreeBSD and Linux * claims that the TSO performed by the firmware is slower than * hardware based TSO. Moreover the firmware based TSO has one * known bug which can't handle TSO if Ethernet header + IP/TCP * header is greater than 80 bytes. A workaround for the TSO * bug exist but it seems it's too expensive than not using * TSO at all. Some hardwares also have the TSO bug so limit * the TSO to the controllers that are not affected TSO issues * (e.g. 5755 or higher). */ if (BGE_IS_5717_PLUS(sc)) { /* BCM5717 requires different TSO configuration. */ sc->bge_flags |= BGE_FLAG_TSO3; if (sc->bge_asicrev == BGE_ASICREV_BCM5719 && sc->bge_chipid == BGE_CHIPID_BCM5719_A0) { /* TSO on BCM5719 A0 does not work. */ sc->bge_flags &= ~BGE_FLAG_TSO3; } } else if (BGE_IS_5755_PLUS(sc)) { /* * BCM5754 and BCM5787 shares the same ASIC id so * explicit device id check is required. * Due to unknown reason TSO does not work on BCM5755M. */ if (pci_get_device(dev) != BCOM_DEVICEID_BCM5754 && pci_get_device(dev) != BCOM_DEVICEID_BCM5754M && pci_get_device(dev) != BCOM_DEVICEID_BCM5755M) sc->bge_flags |= BGE_FLAG_TSO; } /* * Check if this is a PCI-X or PCI Express device. */ if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { /* * Found a PCI Express capabilities register, this * must be a PCI Express device. */ sc->bge_flags |= BGE_FLAG_PCIE; sc->bge_expcap = reg; /* Extract supported maximum payload size. */ sc->bge_mps = pci_read_config(dev, sc->bge_expcap + PCIER_DEVICE_CAP, 2); sc->bge_mps = 128 << (sc->bge_mps & PCIEM_CAP_MAX_PAYLOAD); if (sc->bge_asicrev == BGE_ASICREV_BCM5719 || sc->bge_asicrev == BGE_ASICREV_BCM5720) sc->bge_expmrq = 2048; else sc->bge_expmrq = 4096; pci_set_max_read_req(dev, sc->bge_expmrq); } else { /* * Check if the device is in PCI-X Mode. * (This bit is not valid on PCI Express controllers.) */ if (pci_find_cap(dev, PCIY_PCIX, ®) == 0) sc->bge_pcixcap = reg; if ((pci_read_config(dev, BGE_PCI_PCISTATE, 4) & BGE_PCISTATE_PCI_BUSMODE) == 0) sc->bge_flags |= BGE_FLAG_PCIX; } /* * The 40bit DMA bug applies to the 5714/5715 controllers and is * not actually a MAC controller bug but an issue with the embedded * PCIe to PCI-X bridge in the device. Use 40bit DMA workaround. */ if (BGE_IS_5714_FAMILY(sc) && (sc->bge_flags & BGE_FLAG_PCIX)) sc->bge_flags |= BGE_FLAG_40BIT_BUG; /* * Some PCI-X bridges are known to trigger write reordering to * the mailbox registers. Typical phenomena is watchdog timeouts * caused by out-of-order TX completions. Enable workaround for * PCI-X devices that live behind these bridges. * Note, PCI-X controllers can run in PCI mode so we can't use * BGE_FLAG_PCIX flag to detect PCI-X controllers. */ if (sc->bge_pcixcap != 0 && bge_mbox_reorder(sc) != 0) sc->bge_flags |= BGE_FLAG_MBOX_REORDER; /* * Allocate the interrupt, using MSI if possible. These devices * support 8 MSI messages, but only the first one is used in * normal operation. */ rid = 0; if (pci_find_cap(sc->bge_dev, PCIY_MSI, ®) == 0) { sc->bge_msicap = reg; reg = 1; if (bge_can_use_msi(sc) && pci_alloc_msi(dev, ®) == 0) { rid = 1; sc->bge_flags |= BGE_FLAG_MSI; } } /* * All controllers except BCM5700 supports tagged status but * we use tagged status only for MSI case on BCM5717. Otherwise * MSI on BCM5717 does not work. */ #ifndef DEVICE_POLLING if (sc->bge_flags & BGE_FLAG_MSI && BGE_IS_5717_PLUS(sc)) sc->bge_flags |= BGE_FLAG_TAGGED_STATUS; #endif sc->bge_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | (rid != 0 ? 0 : RF_SHAREABLE)); if (sc->bge_irq == NULL) { device_printf(sc->bge_dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } bge_devinfo(sc); sc->bge_asf_mode = 0; /* No ASF if APE present. */ if ((sc->bge_flags & BGE_FLAG_APE) == 0) { if (bge_allow_asf && (bge_readmem_ind(sc, BGE_SRAM_DATA_SIG) == BGE_SRAM_DATA_SIG_MAGIC)) { if (bge_readmem_ind(sc, BGE_SRAM_DATA_CFG) & BGE_HWCFG_ASF) { sc->bge_asf_mode |= ASF_ENABLE; sc->bge_asf_mode |= ASF_STACKUP; if (BGE_IS_575X_PLUS(sc)) sc->bge_asf_mode |= ASF_NEW_HANDSHAKE; } } } bge_stop_fw(sc); bge_sig_pre_reset(sc, BGE_RESET_SHUTDOWN); if (bge_reset(sc)) { device_printf(sc->bge_dev, "chip reset failed\n"); error = ENXIO; goto fail; } bge_sig_legacy(sc, BGE_RESET_SHUTDOWN); bge_sig_post_reset(sc, BGE_RESET_SHUTDOWN); if (bge_chipinit(sc)) { device_printf(sc->bge_dev, "chip initialization failed\n"); error = ENXIO; goto fail; } error = bge_get_eaddr(sc, eaddr); if (error) { device_printf(sc->bge_dev, "failed to read station address\n"); error = ENXIO; goto fail; } /* 5705 limits RX return ring to 512 entries. */ if (BGE_IS_5717_PLUS(sc)) sc->bge_return_ring_cnt = BGE_RETURN_RING_CNT; else if (BGE_IS_5705_PLUS(sc)) sc->bge_return_ring_cnt = BGE_RETURN_RING_CNT_5705; else sc->bge_return_ring_cnt = BGE_RETURN_RING_CNT; if (bge_dma_alloc(sc)) { device_printf(sc->bge_dev, "failed to allocate DMA resources\n"); error = ENXIO; goto fail; } /* Set default tuneable values. */ sc->bge_stat_ticks = BGE_TICKS_PER_SEC; sc->bge_rx_coal_ticks = 150; sc->bge_tx_coal_ticks = 150; sc->bge_rx_max_coal_bds = 10; sc->bge_tx_max_coal_bds = 10; /* Initialize checksum features to use. */ sc->bge_hwassist = (CSUM_IP | CSUM_TCP); if (sc->bge_forced_udpcsum != 0) sc->bge_hwassist |= CSUM_UDP; /* * Figure out what sort of media we have by checking the * hardware config word in the first 32k of NIC internal memory, * or fall back to examining the EEPROM if necessary. * Note: on some BCM5700 cards, this value appears to be unset. * If that's the case, we have to rely on identifying the NIC * by its PCI subsystem ID, as we do below for the SysKonnect * SK-9D41. */ if (bge_readmem_ind(sc, BGE_SRAM_DATA_SIG) == BGE_SRAM_DATA_SIG_MAGIC) hwcfg = bge_readmem_ind(sc, BGE_SRAM_DATA_CFG); else if ((sc->bge_flags & BGE_FLAG_EADDR) && (sc->bge_asicrev != BGE_ASICREV_BCM5906)) { if (bge_read_eeprom(sc, (caddr_t)&hwcfg, BGE_EE_HWCFG_OFFSET, sizeof(hwcfg))) { device_printf(sc->bge_dev, "failed to read EEPROM\n"); error = ENXIO; goto fail; } hwcfg = ntohl(hwcfg); } /* The SysKonnect SK-9D41 is a 1000baseSX card. */ if ((pci_read_config(dev, BGE_PCI_SUBSYS, 4) >> 16) == SK_SUBSYSID_9D41 || (hwcfg & BGE_HWCFG_MEDIA) == BGE_MEDIA_FIBER) { if (BGE_IS_5705_PLUS(sc)) { sc->bge_flags |= BGE_FLAG_MII_SERDES; sc->bge_phy_flags |= BGE_PHY_NO_WIRESPEED; } else sc->bge_flags |= BGE_FLAG_TBI; } /* Set various PHY bug flags. */ if (sc->bge_chipid == BGE_CHIPID_BCM5701_A0 || sc->bge_chipid == BGE_CHIPID_BCM5701_B0) sc->bge_phy_flags |= BGE_PHY_CRC_BUG; if (sc->bge_chiprev == BGE_CHIPREV_5703_AX || sc->bge_chiprev == BGE_CHIPREV_5704_AX) sc->bge_phy_flags |= BGE_PHY_ADC_BUG; if (sc->bge_chipid == BGE_CHIPID_BCM5704_A0) sc->bge_phy_flags |= BGE_PHY_5704_A0_BUG; if (pci_get_subvendor(dev) == DELL_VENDORID) sc->bge_phy_flags |= BGE_PHY_NO_3LED; if ((BGE_IS_5705_PLUS(sc)) && sc->bge_asicrev != BGE_ASICREV_BCM5906 && sc->bge_asicrev != BGE_ASICREV_BCM5785 && sc->bge_asicrev != BGE_ASICREV_BCM57780 && !BGE_IS_5717_PLUS(sc)) { if (sc->bge_asicrev == BGE_ASICREV_BCM5755 || sc->bge_asicrev == BGE_ASICREV_BCM5761 || sc->bge_asicrev == BGE_ASICREV_BCM5784 || sc->bge_asicrev == BGE_ASICREV_BCM5787) { if (pci_get_device(dev) != BCOM_DEVICEID_BCM5722 && pci_get_device(dev) != BCOM_DEVICEID_BCM5756) sc->bge_phy_flags |= BGE_PHY_JITTER_BUG; if (pci_get_device(dev) == BCOM_DEVICEID_BCM5755M) sc->bge_phy_flags |= BGE_PHY_ADJUST_TRIM; } else sc->bge_phy_flags |= BGE_PHY_BER_BUG; } /* * Don't enable Ethernet@WireSpeed for the 5700 or the * 5705 A0 and A1 chips. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 || (sc->bge_asicrev == BGE_ASICREV_BCM5705 && (sc->bge_chipid != BGE_CHIPID_BCM5705_A0 && sc->bge_chipid != BGE_CHIPID_BCM5705_A1))) sc->bge_phy_flags |= BGE_PHY_NO_WIRESPEED; if (sc->bge_flags & BGE_FLAG_TBI) { ifmedia_init(&sc->bge_ifmedia, IFM_IMASK, bge_ifmedia_upd, bge_ifmedia_sts); ifmedia_add(&sc->bge_ifmedia, IFM_ETHER | IFM_1000_SX, 0, NULL); ifmedia_add(&sc->bge_ifmedia, IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL); ifmedia_add(&sc->bge_ifmedia, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->bge_ifmedia, IFM_ETHER | IFM_AUTO); sc->bge_ifmedia.ifm_media = sc->bge_ifmedia.ifm_cur->ifm_media; } else { /* * Do transceiver setup and tell the firmware the * driver is down so we can try to get access the * probe if ASF is running. Retry a couple of times * if we get a conflict with the ASF firmware accessing * the PHY. */ trys = 0; BGE_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); again: bge_asf_driver_up(sc); error = mii_attach(dev, &sc->bge_miibus, (ifm_change_cb_t)bge_ifmedia_upd, (ifm_stat_cb_t)bge_ifmedia_sts, capmask, sc->bge_phy_addr, MII_OFFSET_ANY, MIIF_DOPAUSE); if (error != 0) { if (trys++ < 4) { device_printf(sc->bge_dev, "Try again\n"); bge_miibus_writereg(sc->bge_dev, sc->bge_phy_addr, MII_BMCR, BMCR_RESET); goto again; } device_printf(sc->bge_dev, "attaching PHYs failed\n"); goto fail; } /* * Now tell the firmware we are going up after probing the PHY */ if (sc->bge_asf_mode & ASF_STACKUP) BGE_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); } /* * When using the BCM5701 in PCI-X mode, data corruption has * been observed in the first few bytes of some received packets. * Aligning the packet buffer in memory eliminates the corruption. * Unfortunately, this misaligns the packet payloads. On platforms * which do not support unaligned accesses, we will realign the * payloads by copying the received packets. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5701 && sc->bge_flags & BGE_FLAG_PCIX) sc->bge_flags |= BGE_FLAG_RX_ALIGNBUG; /* * Hookup IRQ last. */ if (BGE_IS_5755_PLUS(sc) && sc->bge_flags & BGE_FLAG_MSI) { /* Take advantage of single-shot MSI. */ CSR_WRITE_4(sc, BGE_MSI_MODE, CSR_READ_4(sc, BGE_MSI_MODE) & ~BGE_MSIMODE_ONE_SHOT_DISABLE); sc->bge_tq = taskqueue_create_fast("bge_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->bge_tq); if (sc->bge_tq == NULL) { device_printf(dev, "could not create taskqueue.\n"); error = ENOMEM; goto fail; } error = taskqueue_start_threads(&sc->bge_tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->bge_dev)); if (error != 0) { device_printf(dev, "could not start threads.\n"); goto fail; } error = bus_setup_intr(dev, sc->bge_irq, INTR_TYPE_NET | INTR_MPSAFE, bge_msi_intr, NULL, sc, &sc->bge_intrhand); } else error = bus_setup_intr(dev, sc->bge_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, bge_intr, sc, &sc->bge_intrhand); if (error) { device_printf(sc->bge_dev, "couldn't set up irq\n"); goto fail; } /* Attach interface. */ ifat.ifat_softc = sc; ifat.ifat_dunit = device_get_unit(dev); ifat.ifat_lla = eaddr; ifat.ifat_hwassist = sc->bge_hwassist; if ((sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) != 0) { ifat.ifat_hwassist |= CSUM_TSO; ifat.ifat_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO; } ifat.ifat_capenable = ifat.ifat_capabilities; /* * 5700 B0 chips do not support checksumming correctly due * to hardware bugs. */ if (sc->bge_chipid == BGE_CHIPID_BCM5700_B0) { ifat.ifat_capabilities &= ~IFCAP_HWCSUM; ifat.ifat_capenable &= ~IFCAP_HWCSUM; ifat.ifat_hwassist = 0; } sc->bge_capenable = ifat.ifat_capenable & ~(IFCAP_POLLING); sc->bge_mtu = ETHERMTU; sc->bge_ifp = if_attach(&ifat); return (0); fail: bge_detach(dev); return (error); } static int bge_detach(device_t dev) { struct bge_softc *sc; if_t ifp; sc = device_get_softc(dev); ifp = sc->bge_ifp; if (device_is_attached(dev)) { if_detach(ifp); BGE_LOCK(sc); bge_stop(sc); BGE_UNLOCK(sc); callout_drain(&sc->bge_stat_ch); } if (sc->bge_tq) taskqueue_drain(sc->bge_tq, &sc->bge_intr_task); if (sc->bge_flags & BGE_FLAG_TBI) ifmedia_removeall(&sc->bge_ifmedia); else if (sc->bge_miibus != NULL) { bus_generic_detach(dev); device_delete_child(dev, sc->bge_miibus); } bge_release_resources(sc); return (0); } static void bge_release_resources(struct bge_softc *sc) { device_t dev; dev = sc->bge_dev; if (sc->bge_tq != NULL) taskqueue_free(sc->bge_tq); if (sc->bge_intrhand != NULL) bus_teardown_intr(dev, sc->bge_irq, sc->bge_intrhand); if (sc->bge_irq != NULL) { bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->bge_irq), sc->bge_irq); pci_release_msi(dev); } if (sc->bge_res != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->bge_res), sc->bge_res); if (sc->bge_res2 != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->bge_res2), sc->bge_res2); bge_dma_free(sc); if (mtx_initialized(&sc->bge_mtx)) /* XXX */ BGE_LOCK_DESTROY(sc); } static int bge_reset(struct bge_softc *sc) { device_t dev; uint32_t cachesize, command, mac_mode, mac_mode_mask, reset, val; void (*write_op)(struct bge_softc *, int, int); uint16_t devctl; int i; dev = sc->bge_dev; mac_mode_mask = BGE_MACMODE_HALF_DUPLEX | BGE_MACMODE_PORTMODE; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) != 0) mac_mode_mask |= BGE_MACMODE_APE_RX_EN | BGE_MACMODE_APE_TX_EN; mac_mode = CSR_READ_4(sc, BGE_MAC_MODE) & mac_mode_mask; if (BGE_IS_575X_PLUS(sc) && !BGE_IS_5714_FAMILY(sc) && (sc->bge_asicrev != BGE_ASICREV_BCM5906)) { if (sc->bge_flags & BGE_FLAG_PCIE) write_op = bge_writemem_direct; else write_op = bge_writemem_ind; } else write_op = bge_writereg_ind; if (sc->bge_asicrev != BGE_ASICREV_BCM5700 && sc->bge_asicrev != BGE_ASICREV_BCM5701) { CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_SET1); for (i = 0; i < 8000; i++) { if (CSR_READ_4(sc, BGE_NVRAM_SWARB) & BGE_NVRAMSWARB_GNT1) break; DELAY(20); } if (i == 8000) { if (bootverbose) device_printf(dev, "NVRAM lock timedout!\n"); } } /* Take APE lock when performing reset. */ bge_ape_lock(sc, BGE_APE_LOCK_GRC); /* Save some important PCI state. */ cachesize = pci_read_config(dev, BGE_PCI_CACHESZ, 4); command = pci_read_config(dev, BGE_PCI_CMD, 4); pci_write_config(dev, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_INDIRECT_ACCESS | BGE_PCIMISCCTL_MASK_PCI_INTR | BGE_HIF_SWAP_OPTIONS | BGE_PCIMISCCTL_PCISTATE_RW, 4); /* Disable fastboot on controllers that support it. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5752 || BGE_IS_5755_PLUS(sc)) { if (bootverbose) device_printf(dev, "Disabling fastboot\n"); CSR_WRITE_4(sc, BGE_FASTBOOT_PC, 0x0); } /* * Write the magic number to SRAM at offset 0xB50. * When firmware finishes its initialization it will * write ~BGE_SRAM_FW_MB_MAGIC to the same location. */ bge_writemem_ind(sc, BGE_SRAM_FW_MB, BGE_SRAM_FW_MB_MAGIC); reset = BGE_MISCCFG_RESET_CORE_CLOCKS | BGE_32BITTIME_66MHZ; /* XXX: Broadcom Linux driver. */ if (sc->bge_flags & BGE_FLAG_PCIE) { if (sc->bge_asicrev != BGE_ASICREV_BCM5785 && (sc->bge_flags & BGE_FLAG_5717_PLUS) == 0) { if (CSR_READ_4(sc, 0x7E2C) == 0x60) /* PCIE 1.0 */ CSR_WRITE_4(sc, 0x7E2C, 0x20); } if (sc->bge_chipid != BGE_CHIPID_BCM5750_A0) { /* Prevent PCIE link training during global reset */ CSR_WRITE_4(sc, BGE_MISC_CFG, 1 << 29); reset |= 1 << 29; } } if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { val = CSR_READ_4(sc, BGE_VCPU_STATUS); CSR_WRITE_4(sc, BGE_VCPU_STATUS, val | BGE_VCPU_STATUS_DRV_RESET); val = CSR_READ_4(sc, BGE_VCPU_EXT_CTRL); CSR_WRITE_4(sc, BGE_VCPU_EXT_CTRL, val & ~BGE_VCPU_EXT_CTRL_HALT_CPU); } /* * Set GPHY Power Down Override to leave GPHY * powered up in D0 uninitialized. */ if (BGE_IS_5705_PLUS(sc) && (sc->bge_flags & BGE_FLAG_CPMU_PRESENT) == 0) reset |= BGE_MISCCFG_GPHY_PD_OVERRIDE; /* Issue global reset */ write_op(sc, BGE_MISC_CFG, reset); if (sc->bge_flags & BGE_FLAG_PCIE) DELAY(100 * 1000); else DELAY(1000); /* XXX: Broadcom Linux driver. */ if (sc->bge_flags & BGE_FLAG_PCIE) { if (sc->bge_chipid == BGE_CHIPID_BCM5750_A0) { DELAY(500000); /* wait for link training to complete */ val = pci_read_config(dev, 0xC4, 4); pci_write_config(dev, 0xC4, val | (1 << 15), 4); } devctl = pci_read_config(dev, sc->bge_expcap + PCIER_DEVICE_CTL, 2); /* Clear enable no snoop and disable relaxed ordering. */ devctl &= ~(PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE); pci_write_config(dev, sc->bge_expcap + PCIER_DEVICE_CTL, devctl, 2); pci_set_max_read_req(dev, sc->bge_expmrq); /* Clear error status. */ pci_write_config(dev, sc->bge_expcap + PCIER_DEVICE_STA, PCIEM_STA_CORRECTABLE_ERROR | PCIEM_STA_NON_FATAL_ERROR | PCIEM_STA_FATAL_ERROR | PCIEM_STA_UNSUPPORTED_REQ, 2); } /* Reset some of the PCI state that got zapped by reset. */ pci_write_config(dev, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_INDIRECT_ACCESS | BGE_PCIMISCCTL_MASK_PCI_INTR | BGE_HIF_SWAP_OPTIONS | BGE_PCIMISCCTL_PCISTATE_RW, 4); val = BGE_PCISTATE_ROM_ENABLE | BGE_PCISTATE_ROM_RETRY_ENABLE; if (sc->bge_chipid == BGE_CHIPID_BCM5704_A0 && (sc->bge_flags & BGE_FLAG_PCIX) != 0) val |= BGE_PCISTATE_RETRY_SAME_DMA; if ((sc->bge_mfw_flags & BGE_MFW_ON_APE) != 0) val |= BGE_PCISTATE_ALLOW_APE_CTLSPC_WR | BGE_PCISTATE_ALLOW_APE_SHMEM_WR | BGE_PCISTATE_ALLOW_APE_PSPACE_WR; pci_write_config(dev, BGE_PCI_PCISTATE, val, 4); pci_write_config(dev, BGE_PCI_CACHESZ, cachesize, 4); pci_write_config(dev, BGE_PCI_CMD, command, 4); /* * Disable PCI-X relaxed ordering to ensure status block update * comes first then packet buffer DMA. Otherwise driver may * read stale status block. */ if (sc->bge_flags & BGE_FLAG_PCIX) { devctl = pci_read_config(dev, sc->bge_pcixcap + PCIXR_COMMAND, 2); devctl &= ~PCIXM_COMMAND_ERO; if (sc->bge_asicrev == BGE_ASICREV_BCM5703) { devctl &= ~PCIXM_COMMAND_MAX_READ; devctl |= PCIXM_COMMAND_MAX_READ_2048; } else if (sc->bge_asicrev == BGE_ASICREV_BCM5704) { devctl &= ~(PCIXM_COMMAND_MAX_SPLITS | PCIXM_COMMAND_MAX_READ); devctl |= PCIXM_COMMAND_MAX_READ_2048; } pci_write_config(dev, sc->bge_pcixcap + PCIXR_COMMAND, devctl, 2); } /* Re-enable MSI, if necessary, and enable the memory arbiter. */ if (BGE_IS_5714_FAMILY(sc)) { /* This chip disables MSI on reset. */ if (sc->bge_flags & BGE_FLAG_MSI) { val = pci_read_config(dev, sc->bge_msicap + PCIR_MSI_CTRL, 2); pci_write_config(dev, sc->bge_msicap + PCIR_MSI_CTRL, val | PCIM_MSICTRL_MSI_ENABLE, 2); val = CSR_READ_4(sc, BGE_MSI_MODE); CSR_WRITE_4(sc, BGE_MSI_MODE, val | BGE_MSIMODE_ENABLE); } val = CSR_READ_4(sc, BGE_MARB_MODE); CSR_WRITE_4(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE | val); } else CSR_WRITE_4(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE); /* Fix up byte swapping. */ CSR_WRITE_4(sc, BGE_MODE_CTL, bge_dma_swap_options(sc)); val = CSR_READ_4(sc, BGE_MAC_MODE); val = (val & ~mac_mode_mask) | mac_mode; CSR_WRITE_4(sc, BGE_MAC_MODE, val); DELAY(40); bge_ape_unlock(sc, BGE_APE_LOCK_GRC); if (sc->bge_asicrev == BGE_ASICREV_BCM5906) { for (i = 0; i < BGE_TIMEOUT; i++) { val = CSR_READ_4(sc, BGE_VCPU_STATUS); if (val & BGE_VCPU_STATUS_INIT_DONE) break; DELAY(100); } if (i == BGE_TIMEOUT) { device_printf(dev, "reset timed out\n"); return (1); } } else { /* * Poll until we see the 1's complement of the magic number. * This indicates that the firmware initialization is complete. * We expect this to fail if no chip containing the Ethernet * address is fitted though. */ for (i = 0; i < BGE_TIMEOUT; i++) { DELAY(10); val = bge_readmem_ind(sc, BGE_SRAM_FW_MB); if (val == ~BGE_SRAM_FW_MB_MAGIC) break; } if ((sc->bge_flags & BGE_FLAG_EADDR) && i == BGE_TIMEOUT) device_printf(dev, "firmware handshake timed out, found 0x%08x\n", val); /* BCM57765 A0 needs additional time before accessing. */ if (sc->bge_chipid == BGE_CHIPID_BCM57765_A0) DELAY(10 * 1000); /* XXX */ } /* * The 5704 in TBI mode apparently needs some special * adjustment to insure the SERDES drive level is set * to 1.2V. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5704 && sc->bge_flags & BGE_FLAG_TBI) { val = CSR_READ_4(sc, BGE_SERDES_CFG); val = (val & ~0xFFF) | 0x880; CSR_WRITE_4(sc, BGE_SERDES_CFG, val); } /* XXX: Broadcom Linux driver. */ if (sc->bge_flags & BGE_FLAG_PCIE && !BGE_IS_5717_PLUS(sc) && sc->bge_chipid != BGE_CHIPID_BCM5750_A0 && sc->bge_asicrev != BGE_ASICREV_BCM5785) { /* Enable Data FIFO protection. */ val = CSR_READ_4(sc, 0x7C00); CSR_WRITE_4(sc, 0x7C00, val | (1 << 25)); } if (sc->bge_asicrev == BGE_ASICREV_BCM5720) BGE_CLRBIT(sc, BGE_CPMU_CLCK_ORIDE, CPMU_CLCK_ORIDE_MAC_ORIDE_EN); return (0); } static __inline void bge_rxreuse_std(struct bge_softc *sc, int i) { struct bge_rx_bd *r; r = &sc->bge_ldata.bge_rx_std_ring[sc->bge_std]; r->bge_flags = BGE_RXBDFLAG_END; r->bge_len = sc->bge_cdata.bge_rx_std_seglen[i]; r->bge_idx = i; BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT); } static __inline void bge_rxreuse_jumbo(struct bge_softc *sc, int i) { struct bge_extrx_bd *r; r = &sc->bge_ldata.bge_rx_jumbo_ring[sc->bge_jumbo]; r->bge_flags = BGE_RXBDFLAG_JUMBO_RING | BGE_RXBDFLAG_END; r->bge_len0 = sc->bge_cdata.bge_rx_jumbo_seglen[i][0]; r->bge_len1 = sc->bge_cdata.bge_rx_jumbo_seglen[i][1]; r->bge_len2 = sc->bge_cdata.bge_rx_jumbo_seglen[i][2]; r->bge_len3 = sc->bge_cdata.bge_rx_jumbo_seglen[i][3]; r->bge_idx = i; BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); } /* * Frame reception handling. This is called if there's a frame * on the receive return list. * * Note: we have to be able to handle two possibilities here: * 1) the frame is from the jumbo receive ring * 2) the frame is from the standard receive ring */ static int bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck) { if_t ifp; int rx_npkts = 0, stdcnt = 0, jumbocnt = 0; uint16_t rx_cons; rx_cons = sc->bge_rx_saved_considx; /* Nothing to do. */ if (rx_cons == rx_prod) return (rx_npkts); ifp = sc->bge_ifp; bus_dmamap_sync(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_cdata.bge_rx_return_ring_map, BUS_DMASYNC_POSTREAD); bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_POSTWRITE); if (BGE_IS_JUMBO_CAPABLE(sc) && sc->bge_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN)) bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_POSTWRITE); while (rx_cons != rx_prod) { struct bge_rx_bd *cur_rx; uint32_t rxidx; struct mbuf *m = NULL; uint16_t vlan_tag = 0; int have_tag = 0; #ifdef DEVICE_POLLING if (sc->bge_capenable & IFCAP_POLLING) { if (sc->rxcycles <= 0) break; sc->rxcycles--; } #endif cur_rx = &sc->bge_ldata.bge_rx_return_ring[rx_cons]; rxidx = cur_rx->bge_idx; BGE_INC(rx_cons, sc->bge_return_ring_cnt); if (sc->bge_capenable & IFCAP_VLAN_HWTAGGING && cur_rx->bge_flags & BGE_RXBDFLAG_VLAN_TAG) { have_tag = 1; vlan_tag = cur_rx->bge_vlan_tag; } if (cur_rx->bge_flags & BGE_RXBDFLAG_JUMBO_RING) { jumbocnt++; m = sc->bge_cdata.bge_rx_jumbo_chain[rxidx]; if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) { bge_rxreuse_jumbo(sc, rxidx); continue; } if (bge_newbuf_jumbo(sc, rxidx) != 0) { bge_rxreuse_jumbo(sc, rxidx); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); continue; } BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT); } else { stdcnt++; m = sc->bge_cdata.bge_rx_std_chain[rxidx]; if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) { bge_rxreuse_std(sc, rxidx); continue; } if (bge_newbuf_std(sc, rxidx) != 0) { bge_rxreuse_std(sc, rxidx); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); continue; } BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT); } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); #ifndef __NO_STRICT_ALIGNMENT /* * For architectures with strict alignment we must make sure * the payload is aligned. */ if (sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) { bcopy(m->m_data, m->m_data + ETHER_ALIGN, cur_rx->bge_len); m->m_data += ETHER_ALIGN; } #endif m->m_pkthdr.len = m->m_len = cur_rx->bge_len - ETHER_CRC_LEN; m->m_pkthdr.rcvif = ifp; if (sc->bge_capenable & IFCAP_RXCSUM) bge_rxcsum(sc, cur_rx, m); /* * If we received a packet with a vlan tag, * attach that information to the packet. */ if (have_tag) { m->m_pkthdr.ether_vtag = vlan_tag; m->m_flags |= M_VLANTAG; } if (holdlck != 0) { BGE_UNLOCK(sc); if_input(ifp, m); BGE_LOCK(sc); } else if_input(ifp, m); rx_npkts++; if (!(sc->bge_flags & BGE_FLAG_RUNNING)) return (rx_npkts); } bus_dmamap_sync(sc->bge_cdata.bge_rx_return_ring_tag, sc->bge_cdata.bge_rx_return_ring_map, BUS_DMASYNC_PREREAD); if (stdcnt > 0) bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_PREWRITE); if (jumbocnt > 0) bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag, sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); sc->bge_rx_saved_considx = rx_cons; bge_writembx(sc, BGE_MBX_RX_CONS0_LO, sc->bge_rx_saved_considx); if (stdcnt) bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, (sc->bge_std + BGE_STD_RX_RING_CNT - 1) % BGE_STD_RX_RING_CNT); if (jumbocnt) bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, (sc->bge_jumbo + BGE_JUMBO_RX_RING_CNT - 1) % BGE_JUMBO_RX_RING_CNT); #ifdef notyet /* * This register wraps very quickly under heavy packet drops. * If you need correct statistics, you can enable this check. */ if (BGE_IS_5705_PLUS(sc)) if_incierrors(ifp, CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS)); #endif return (rx_npkts); } static void bge_rxcsum(struct bge_softc *sc, struct bge_rx_bd *cur_rx, struct mbuf *m) { if (BGE_IS_5717_PLUS(sc)) { if ((cur_rx->bge_flags & BGE_RXBDFLAG_IPV6) == 0) { if (cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if ((cur_rx->bge_error_flag & BGE_RXERRFLAG_IP_CSUM_NOK) == 0) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) { m->m_pkthdr.csum_data = cur_rx->bge_tcp_udp_csum; m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; } } } else { if (cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if ((cur_rx->bge_ip_csum ^ 0xFFFF) == 0) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM && m->m_pkthdr.len >= ETHER_MIN_NOPAD) { m->m_pkthdr.csum_data = cur_rx->bge_tcp_udp_csum; m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; } } } static void bge_txeof(struct bge_softc *sc, uint16_t tx_cons) { struct bge_tx_bd *cur_tx; if_t ifp; BGE_LOCK_ASSERT(sc); /* Nothing to do. */ if (sc->bge_tx_saved_considx == tx_cons) return; ifp = sc->bge_ifp; bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_POSTWRITE); /* * Go through our tx ring and free mbufs for those * frames that have been sent. */ while (sc->bge_tx_saved_considx != tx_cons) { uint32_t idx; idx = sc->bge_tx_saved_considx; cur_tx = &sc->bge_ldata.bge_tx_ring[idx]; if (cur_tx->bge_flags & BGE_TXBDFLAG_END) if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (sc->bge_cdata.bge_tx_chain[idx] != NULL) { struct mbuf *m; bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[idx], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag, sc->bge_cdata.bge_tx_dmamap[idx]); m = sc->bge_cdata.bge_tx_chain[idx]; sc->bge_cdata.bge_tx_chain[idx] = NULL; if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); m_freem(m); } sc->bge_txcnt--; BGE_INC(sc->bge_tx_saved_considx, BGE_TX_RING_CNT); } if (sc->bge_txcnt == 0) sc->bge_timer = 0; } #ifdef DEVICE_POLLING static int bge_poll(if_t ifp, enum poll_cmd cmd, int count) { struct bge_softc *sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); uint16_t rx_prod, tx_cons; uint32_t statusword; int rx_npkts = 0; BGE_LOCK(sc); if (!(sc->bge_flags & BGE_FLAG_RUNNING)) { BGE_UNLOCK(sc); return (rx_npkts); } bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Fetch updates from the status block. */ rx_prod = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; tx_cons = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; statusword = sc->bge_ldata.bge_status_block->bge_status; /* Clear the status so the next pass only sees the changes. */ sc->bge_ldata.bge_status_block->bge_status = 0; bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Note link event. It will be processed by POLL_AND_CHECK_STATUS. */ if (statusword & BGE_STATFLAG_LINKSTATE_CHANGED) sc->bge_link_evt++; if (cmd == POLL_AND_CHECK_STATUS) if ((sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) || sc->bge_link_evt || (sc->bge_flags & BGE_FLAG_TBI)) bge_link_upd(sc); sc->rxcycles = count; rx_npkts = bge_rxeof(sc, rx_prod, 1); if (!(sc->bge_flags & BGE_FLAG_RUNNING)) { BGE_UNLOCK(sc); return (rx_npkts); } bge_txeof(sc, tx_cons); if (if_snd_len(ifp)) bge_start_locked(sc); BGE_UNLOCK(sc); return (rx_npkts); } #endif /* DEVICE_POLLING */ static int bge_msi_intr(void *arg) { struct bge_softc *sc; sc = (struct bge_softc *)arg; /* * This interrupt is not shared and controller already * disabled further interrupt. */ taskqueue_enqueue(sc->bge_tq, &sc->bge_intr_task); return (FILTER_HANDLED); } static void bge_intr_task(void *arg, int pending) { struct bge_softc *sc; if_t ifp; uint32_t status, status_tag; uint16_t rx_prod, tx_cons; sc = (struct bge_softc *)arg; ifp = sc->bge_ifp; BGE_LOCK(sc); if ((sc->bge_flags & BGE_FLAG_RUNNING) == 0) { BGE_UNLOCK(sc); return; } /* Get updated status block. */ bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Save producer/consumer indices. */ rx_prod = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; tx_cons = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; status = sc->bge_ldata.bge_status_block->bge_status; status_tag = sc->bge_ldata.bge_status_block->bge_status_tag << 24; /* Dirty the status flag. */ sc->bge_ldata.bge_status_block->bge_status = 0; bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if ((sc->bge_flags & BGE_FLAG_TAGGED_STATUS) == 0) status_tag = 0; if ((status & BGE_STATFLAG_LINKSTATE_CHANGED) != 0) bge_link_upd(sc); /* Let controller work. */ bge_writembx(sc, BGE_MBX_IRQ0_LO, status_tag); if (sc->bge_flags & BGE_FLAG_RUNNING && sc->bge_rx_saved_considx != rx_prod) { /* Check RX return ring producer/consumer. */ BGE_UNLOCK(sc); bge_rxeof(sc, rx_prod, 0); BGE_LOCK(sc); } if (sc->bge_flags & BGE_FLAG_RUNNING) { /* Check TX ring producer/consumer. */ bge_txeof(sc, tx_cons); if (if_snd_len(ifp)) bge_start_locked(sc); } BGE_UNLOCK(sc); } static void bge_intr(void *xsc) { struct bge_softc *sc; if_t ifp; uint32_t statusword; uint16_t rx_prod, tx_cons; sc = xsc; BGE_LOCK(sc); ifp = sc->bge_ifp; #ifdef DEVICE_POLLING if (sc->bge_capenable & IFCAP_POLLING) { BGE_UNLOCK(sc); return; } #endif /* * Ack the interrupt by writing something to BGE_MBX_IRQ0_LO. Don't * disable interrupts by writing nonzero like we used to, since with * our current organization this just gives complications and * pessimizations for re-enabling interrupts. We used to have races * instead of the necessary complications. Disabling interrupts * would just reduce the chance of a status update while we are * running (by switching to the interrupt-mode coalescence * parameters), but this chance is already very low so it is more * efficient to get another interrupt than prevent it. * * We do the ack first to ensure another interrupt if there is a * status update after the ack. We don't check for the status * changing later because it is more efficient to get another * interrupt than prevent it, not quite as above (not checking is * a smaller optimization than not toggling the interrupt enable, * since checking doesn't involve PCI accesses and toggling require * the status check). So toggling would probably be a pessimization * even with MSI. It would only be needed for using a task queue. */ bge_writembx(sc, BGE_MBX_IRQ0_LO, 0); /* * Do the mandatory PCI flush as well as get the link status. */ statusword = CSR_READ_4(sc, BGE_MAC_STS) & BGE_MACSTAT_LINK_CHANGED; /* Make sure the descriptor ring indexes are coherent. */ bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); rx_prod = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; tx_cons = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; sc->bge_ldata.bge_status_block->bge_status = 0; bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if ((sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) || statusword || sc->bge_link_evt) bge_link_upd(sc); if (sc->bge_flags & BGE_FLAG_RUNNING) { /* Check RX return ring producer/consumer. */ bge_rxeof(sc, rx_prod, 1); } if (sc->bge_flags & BGE_FLAG_RUNNING) { /* Check TX ring producer/consumer. */ bge_txeof(sc, tx_cons); } if (sc->bge_flags & BGE_FLAG_RUNNING && if_snd_len(ifp)) bge_start_locked(sc); BGE_UNLOCK(sc); } static void bge_asf_driver_up(struct bge_softc *sc) { if (sc->bge_asf_mode & ASF_STACKUP) { /* Send ASF heartbeat aprox. every 2s */ if (sc->bge_asf_count) sc->bge_asf_count --; else { sc->bge_asf_count = 2; bge_writemem_ind(sc, BGE_SRAM_FW_CMD_MB, BGE_FW_CMD_DRV_ALIVE); bge_writemem_ind(sc, BGE_SRAM_FW_CMD_LEN_MB, 4); bge_writemem_ind(sc, BGE_SRAM_FW_CMD_DATA_MB, BGE_FW_HB_TIMEOUT_SEC); CSR_WRITE_4(sc, BGE_RX_CPU_EVENT, CSR_READ_4(sc, BGE_RX_CPU_EVENT) | BGE_RX_CPU_DRV_EVENT); } } } static void bge_tick(void *xsc) { struct bge_softc *sc = xsc; struct mii_data *mii = NULL; BGE_LOCK_ASSERT(sc); /* Synchronize with possible callout reset/stop. */ if (callout_pending(&sc->bge_stat_ch) || !callout_active(&sc->bge_stat_ch)) return; if (BGE_IS_5705_PLUS(sc)) bge_stats_update_regs(sc); else bge_stats_update(sc); /* XXX Add APE heartbeat check here? */ if ((sc->bge_flags & BGE_FLAG_TBI) == 0) { mii = device_get_softc(sc->bge_miibus); /* * Do not touch PHY if we have link up. This could break * IPMI/ASF mode or produce extra input errors * (extra errors was reported for bcm5701 & bcm5704). */ if (!sc->bge_link) mii_tick(mii); } else { /* * Since in TBI mode auto-polling can't be used we should poll * link status manually. Here we register pending link event * and trigger interrupt. */ #ifdef DEVICE_POLLING /* In polling mode we poll link state in bge_poll(). */ if (!(sc->bge_capenable & IFCAP_POLLING)) #endif { sc->bge_link_evt++; if (sc->bge_asicrev == BGE_ASICREV_BCM5700 || sc->bge_flags & BGE_FLAG_5788) BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_SET); else BGE_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW); } } bge_asf_driver_up(sc); bge_watchdog(sc); callout_reset(&sc->bge_stat_ch, hz, bge_tick, sc); } static void bge_stats_update_regs(struct bge_softc *sc) { if_t ifp; struct bge_mac_stats *stats; uint32_t val; ifp = sc->bge_ifp; stats = &sc->bge_mac_stats; stats->ifHCOutOctets += CSR_READ_4(sc, BGE_TX_MAC_STATS_OCTETS); stats->etherStatsCollisions += CSR_READ_4(sc, BGE_TX_MAC_STATS_COLLS); stats->outXonSent += CSR_READ_4(sc, BGE_TX_MAC_STATS_XON_SENT); stats->outXoffSent += CSR_READ_4(sc, BGE_TX_MAC_STATS_XOFF_SENT); stats->dot3StatsInternalMacTransmitErrors += CSR_READ_4(sc, BGE_TX_MAC_STATS_ERRORS); stats->dot3StatsSingleCollisionFrames += CSR_READ_4(sc, BGE_TX_MAC_STATS_SINGLE_COLL); stats->dot3StatsMultipleCollisionFrames += CSR_READ_4(sc, BGE_TX_MAC_STATS_MULTI_COLL); stats->dot3StatsDeferredTransmissions += CSR_READ_4(sc, BGE_TX_MAC_STATS_DEFERRED); stats->dot3StatsExcessiveCollisions += CSR_READ_4(sc, BGE_TX_MAC_STATS_EXCESS_COLL); stats->dot3StatsLateCollisions += CSR_READ_4(sc, BGE_TX_MAC_STATS_LATE_COLL); stats->ifHCOutUcastPkts += CSR_READ_4(sc, BGE_TX_MAC_STATS_UCAST); stats->ifHCOutMulticastPkts += CSR_READ_4(sc, BGE_TX_MAC_STATS_MCAST); stats->ifHCOutBroadcastPkts += CSR_READ_4(sc, BGE_TX_MAC_STATS_BCAST); stats->ifHCInOctets += CSR_READ_4(sc, BGE_RX_MAC_STATS_OCTESTS); stats->etherStatsFragments += CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAGMENTS); stats->ifHCInUcastPkts += CSR_READ_4(sc, BGE_RX_MAC_STATS_UCAST); stats->ifHCInMulticastPkts += CSR_READ_4(sc, BGE_RX_MAC_STATS_MCAST); stats->ifHCInBroadcastPkts += CSR_READ_4(sc, BGE_RX_MAC_STATS_BCAST); stats->dot3StatsFCSErrors += CSR_READ_4(sc, BGE_RX_MAC_STATS_FCS_ERRORS); stats->dot3StatsAlignmentErrors += CSR_READ_4(sc, BGE_RX_MAC_STATS_ALGIN_ERRORS); stats->xonPauseFramesReceived += CSR_READ_4(sc, BGE_RX_MAC_STATS_XON_RCVD); stats->xoffPauseFramesReceived += CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_RCVD); stats->macControlFramesReceived += CSR_READ_4(sc, BGE_RX_MAC_STATS_CTRL_RCVD); stats->xoffStateEntered += CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_ENTERED); stats->dot3StatsFramesTooLong += CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAME_TOO_LONG); stats->etherStatsJabbers += CSR_READ_4(sc, BGE_RX_MAC_STATS_JABBERS); stats->etherStatsUndersizePkts += CSR_READ_4(sc, BGE_RX_MAC_STATS_UNDERSIZE); stats->FramesDroppedDueToFilters += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_FILTDROP); stats->DmaWriteQueueFull += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_WRQ_FULL); stats->DmaWriteHighPriQueueFull += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_HPWRQ_FULL); stats->NoMoreRxBDs += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_OUT_OF_BDS); /* * XXX * Unlike other controllers, BGE_RXLP_LOCSTAT_IFIN_DROPS * counter of BCM5717, BCM5718, BCM5719 A0 and BCM5720 A0 * includes number of unwanted multicast frames. This comes * from silicon bug and known workaround to get rough(not * exact) counter is to enable interrupt on MBUF low water * attention. This can be accomplished by setting * BGE_HCCMODE_ATTN bit of BGE_HCC_MODE, * BGE_BMANMODE_LOMBUF_ATTN bit of BGE_BMAN_MODE and * BGE_MODECTL_FLOWCTL_ATTN_INTR bit of BGE_MODE_CTL. * However that change would generate more interrupts and * there are still possibilities of losing multiple frames * during BGE_MODECTL_FLOWCTL_ATTN_INTR interrupt handling. * Given that the workaround still would not get correct * counter I don't think it's worth to implement it. So * ignore reading the counter on controllers that have the * silicon bug. */ if (sc->bge_asicrev != BGE_ASICREV_BCM5717 && sc->bge_chipid != BGE_CHIPID_BCM5719_A0 && sc->bge_chipid != BGE_CHIPID_BCM5720_A0) stats->InputDiscards += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS); stats->InputErrors += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_ERRORS); stats->RecvThresholdHit += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_RXTHRESH_HIT); if (sc->bge_flags & BGE_FLAG_RDMA_BUG) { /* * If controller transmitted more than BGE_NUM_RDMA_CHANNELS * frames, it's safe to disable workaround for DMA engine's * miscalculation of TXMBUF space. */ if (stats->ifHCOutUcastPkts + stats->ifHCOutMulticastPkts + stats->ifHCOutBroadcastPkts > BGE_NUM_RDMA_CHANNELS) { val = CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL); if (sc->bge_asicrev == BGE_ASICREV_BCM5719) val &= ~BGE_RDMA_TX_LENGTH_WA_5719; else val &= ~BGE_RDMA_TX_LENGTH_WA_5720; CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL, val); sc->bge_flags &= ~BGE_FLAG_RDMA_BUG; } } } static void bge_stats_clear_regs(struct bge_softc *sc) { CSR_READ_4(sc, BGE_TX_MAC_STATS_OCTETS); CSR_READ_4(sc, BGE_TX_MAC_STATS_COLLS); CSR_READ_4(sc, BGE_TX_MAC_STATS_XON_SENT); CSR_READ_4(sc, BGE_TX_MAC_STATS_XOFF_SENT); CSR_READ_4(sc, BGE_TX_MAC_STATS_ERRORS); CSR_READ_4(sc, BGE_TX_MAC_STATS_SINGLE_COLL); CSR_READ_4(sc, BGE_TX_MAC_STATS_MULTI_COLL); CSR_READ_4(sc, BGE_TX_MAC_STATS_DEFERRED); CSR_READ_4(sc, BGE_TX_MAC_STATS_EXCESS_COLL); CSR_READ_4(sc, BGE_TX_MAC_STATS_LATE_COLL); CSR_READ_4(sc, BGE_TX_MAC_STATS_UCAST); CSR_READ_4(sc, BGE_TX_MAC_STATS_MCAST); CSR_READ_4(sc, BGE_TX_MAC_STATS_BCAST); CSR_READ_4(sc, BGE_RX_MAC_STATS_OCTESTS); CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAGMENTS); CSR_READ_4(sc, BGE_RX_MAC_STATS_UCAST); CSR_READ_4(sc, BGE_RX_MAC_STATS_MCAST); CSR_READ_4(sc, BGE_RX_MAC_STATS_BCAST); CSR_READ_4(sc, BGE_RX_MAC_STATS_FCS_ERRORS); CSR_READ_4(sc, BGE_RX_MAC_STATS_ALGIN_ERRORS); CSR_READ_4(sc, BGE_RX_MAC_STATS_XON_RCVD); CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_RCVD); CSR_READ_4(sc, BGE_RX_MAC_STATS_CTRL_RCVD); CSR_READ_4(sc, BGE_RX_MAC_STATS_XOFF_ENTERED); CSR_READ_4(sc, BGE_RX_MAC_STATS_FRAME_TOO_LONG); CSR_READ_4(sc, BGE_RX_MAC_STATS_JABBERS); CSR_READ_4(sc, BGE_RX_MAC_STATS_UNDERSIZE); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_FILTDROP); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_WRQ_FULL); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_DMA_HPWRQ_FULL); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_OUT_OF_BDS); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_ERRORS); CSR_READ_4(sc, BGE_RXLP_LOCSTAT_RXTHRESH_HIT); } static void bge_stats_update(struct bge_softc *sc) { if_t ifp; bus_size_t stats; uint32_t cnt; /* current register value */ ifp = sc->bge_ifp; stats = BGE_MEMWIN_START + BGE_STATS_BLOCK; #define READ_STAT(sc, stats, stat) \ CSR_READ_4(sc, stats + offsetof(struct bge_stats, stat)) cnt = READ_STAT(sc, stats, txstats.etherStatsCollisions.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, cnt - sc->bge_tx_collisions); sc->bge_tx_collisions = cnt; cnt = READ_STAT(sc, stats, nicNoMoreRxBDs.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_IERRORS, cnt - sc->bge_rx_nobds); sc->bge_rx_nobds = cnt; cnt = READ_STAT(sc, stats, ifInErrors.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_IERRORS, cnt - sc->bge_rx_inerrs); sc->bge_rx_inerrs = cnt; cnt = READ_STAT(sc, stats, ifInDiscards.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_IERRORS, cnt - sc->bge_rx_discards); sc->bge_rx_discards = cnt; cnt = READ_STAT(sc, stats, txstats.ifOutDiscards.bge_addr_lo); if_inc_counter(ifp, IFCOUNTER_OERRORS, cnt - sc->bge_tx_discards); sc->bge_tx_discards = cnt; #undef READ_STAT } /* * Pad outbound frame to ETHER_MIN_NOPAD for an unusual reason. * The bge hardware will pad out Tx runts to ETHER_MIN_NOPAD, * but when such padded frames employ the bge IP/TCP checksum offload, * the hardware checksum assist gives incorrect results (possibly * from incorporating its own padding into the UDP/TCP checksum; who knows). * If we pad such runts with zeros, the onboard checksum comes out correct. */ static __inline int bge_cksum_pad(struct mbuf *m) { int padlen = ETHER_MIN_NOPAD - m->m_pkthdr.len; struct mbuf *last; /* If there's only the packet-header and we can pad there, use it. */ if (m->m_pkthdr.len == m->m_len && M_WRITABLE(m) && M_TRAILINGSPACE(m) >= padlen) { last = m; } else { /* * Walk packet chain to find last mbuf. We will either * pad there, or append a new mbuf and pad it. */ for (last = m; last->m_next != NULL; last = last->m_next); if (!(M_WRITABLE(last) && M_TRAILINGSPACE(last) >= padlen)) { /* Allocate new empty mbuf, pad it. Compact later. */ struct mbuf *n; MGET(n, M_NOWAIT, MT_DATA); if (n == NULL) return (ENOBUFS); n->m_len = 0; last->m_next = n; last = n; } } /* Now zero the pad area, to avoid the bge cksum-assist bug. */ memset(mtod(last, caddr_t) + last->m_len, 0, padlen); last->m_len += padlen; m->m_pkthdr.len += padlen; return (0); } static struct mbuf * bge_check_short_dma(struct mbuf *m) { struct mbuf *n; int found; /* * If device receive two back-to-back send BDs with less than * or equal to 8 total bytes then the device may hang. The two * back-to-back send BDs must in the same frame for this failure * to occur. Scan mbuf chains and see whether two back-to-back * send BDs are there. If this is the case, allocate new mbuf * and copy the frame to workaround the silicon bug. */ for (n = m, found = 0; n != NULL; n = n->m_next) { if (n->m_len < 8) { found++; if (found > 1) break; continue; } found = 0; } if (found > 1) { n = m_defrag(m, M_NOWAIT); if (n == NULL) m_freem(m); } else n = m; return (n); } static struct mbuf * bge_setup_tso(struct bge_softc *sc, struct mbuf *m, uint16_t *mss, uint16_t *flags) { struct ip *ip; struct tcphdr *tcp; struct mbuf *n; uint16_t hlen; uint32_t poff; if (M_WRITABLE(m) == 0) { /* Get a writable copy. */ n = m_dup(m, M_NOWAIT); m_freem(m); if (n == NULL) return (NULL); m = n; } m = m_pullup(m, sizeof(struct ether_header) + sizeof(struct ip)); if (m == NULL) return (NULL); ip = (struct ip *)(mtod(m, char *) + sizeof(struct ether_header)); poff = sizeof(struct ether_header) + (ip->ip_hl << 2); m = m_pullup(m, poff + sizeof(struct tcphdr)); if (m == NULL) return (NULL); tcp = (struct tcphdr *)(mtod(m, char *) + poff); m = m_pullup(m, poff + (tcp->th_off << 2)); if (m == NULL) return (NULL); /* * It seems controller doesn't modify IP length and TCP pseudo * checksum. These checksum computed by upper stack should be 0. */ *mss = m->m_pkthdr.tso_segsz; ip = (struct ip *)(mtod(m, char *) + sizeof(struct ether_header)); ip->ip_sum = 0; ip->ip_len = htons(*mss + (ip->ip_hl << 2) + (tcp->th_off << 2)); /* Clear pseudo checksum computed by TCP stack. */ tcp = (struct tcphdr *)(mtod(m, char *) + poff); tcp->th_sum = 0; /* * Broadcom controllers uses different descriptor format for * TSO depending on ASIC revision. Due to TSO-capable firmware * license issue and lower performance of firmware based TSO * we only support hardware based TSO. */ /* Calculate header length, incl. TCP/IP options, in 32 bit units. */ hlen = ((ip->ip_hl << 2) + (tcp->th_off << 2)) >> 2; if (sc->bge_flags & BGE_FLAG_TSO3) { /* * For BCM5717 and newer controllers, hardware based TSO * uses the 14 lower bits of the bge_mss field to store the * MSS and the upper 2 bits to store the lowest 2 bits of * the IP/TCP header length. The upper 6 bits of the header * length are stored in the bge_flags[14:10,4] field. Jumbo * frames are supported. */ *mss |= ((hlen & 0x3) << 14); *flags |= ((hlen & 0xF8) << 7) | ((hlen & 0x4) << 2); } else { /* * For BCM5755 and newer controllers, hardware based TSO uses * the lower 11 bits to store the MSS and the upper 5 bits to * store the IP/TCP header length. Jumbo frames are not * supported. */ *mss |= (hlen << 11); } return (m); } /* * Encapsulate an mbuf chain in the tx ring by coupling the mbuf data * pointers to descriptors. */ static int bge_encap(struct bge_softc *sc, struct mbuf **m_head, uint32_t *txidx) { bus_dma_segment_t segs[BGE_NSEG_NEW]; bus_dmamap_t map; struct bge_tx_bd *d; struct mbuf *m = *m_head; uint32_t idx = *txidx; uint16_t csum_flags, mss, vlan_tag; int nsegs, i, error; csum_flags = 0; mss = 0; vlan_tag = 0; if ((sc->bge_flags & BGE_FLAG_SHORT_DMA_BUG) != 0 && m->m_next != NULL) { *m_head = bge_check_short_dma(m); if (*m_head == NULL) return (ENOBUFS); m = *m_head; } if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { *m_head = m = bge_setup_tso(sc, m, &mss, &csum_flags); if (*m_head == NULL) return (ENOBUFS); csum_flags |= BGE_TXBDFLAG_CPU_PRE_DMA | BGE_TXBDFLAG_CPU_POST_DMA; } else if ((m->m_pkthdr.csum_flags & sc->bge_hwassist) != 0) { if (m->m_pkthdr.csum_flags & CSUM_IP) csum_flags |= BGE_TXBDFLAG_IP_CSUM; if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)) { csum_flags |= BGE_TXBDFLAG_TCP_UDP_CSUM; if (m->m_pkthdr.len < ETHER_MIN_NOPAD && (error = bge_cksum_pad(m)) != 0) { m_freem(m); *m_head = NULL; return (error); } } } if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { if (sc->bge_flags & BGE_FLAG_JUMBO_FRAME && m->m_pkthdr.len > ETHER_MAX_LEN) csum_flags |= BGE_TXBDFLAG_JUMBO_FRAME; if (sc->bge_forced_collapse > 0 && (sc->bge_flags & BGE_FLAG_PCIE) != 0 && m->m_next != NULL) { /* * Forcedly collapse mbuf chains to overcome hardware * limitation which only support a single outstanding * DMA read operation. */ if (sc->bge_forced_collapse == 1) m = m_defrag(m, M_NOWAIT); else m = m_collapse(m, M_NOWAIT, sc->bge_forced_collapse); if (m == NULL) m = *m_head; *m_head = m; } } map = sc->bge_cdata.bge_tx_dmamap[idx]; error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_tx_mtag, map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { m = m_collapse(m, M_NOWAIT, BGE_NSEG_NEW); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_tx_mtag, map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { m_freem(m); *m_head = NULL; return (error); } } else if (error != 0) return (error); /* Check if we have enough free send BDs. */ if (sc->bge_txcnt + nsegs >= BGE_TX_RING_CNT) { bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag, map); return (ENOBUFS); } bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, map, BUS_DMASYNC_PREWRITE); if (m->m_flags & M_VLANTAG) { csum_flags |= BGE_TXBDFLAG_VLAN_TAG; vlan_tag = m->m_pkthdr.ether_vtag; } if (sc->bge_asicrev == BGE_ASICREV_BCM5762 && (m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { /* * 5725 family of devices corrupts TSO packets when TSO DMA * buffers cross into regions which are within MSS bytes of * a 4GB boundary. If we encounter the condition, drop the * packet. */ for (i = 0; ; i++) { d = &sc->bge_ldata.bge_tx_ring[idx]; d->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[i].ds_addr); d->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[i].ds_addr); d->bge_len = segs[i].ds_len; if (d->bge_addr.bge_addr_lo + segs[i].ds_len + mss < d->bge_addr.bge_addr_lo) break; d->bge_flags = csum_flags; d->bge_vlan_tag = vlan_tag; d->bge_mss = mss; if (i == nsegs - 1) break; BGE_INC(idx, BGE_TX_RING_CNT); } if (i != nsegs - 1) { bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag, map); m_freem(*m_head); *m_head = NULL; return (EIO); } } else { for (i = 0; ; i++) { d = &sc->bge_ldata.bge_tx_ring[idx]; d->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[i].ds_addr); d->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[i].ds_addr); d->bge_len = segs[i].ds_len; d->bge_flags = csum_flags; d->bge_vlan_tag = vlan_tag; d->bge_mss = mss; if (i == nsegs - 1) break; BGE_INC(idx, BGE_TX_RING_CNT); } } /* Mark the last segment as end of packet... */ d->bge_flags |= BGE_TXBDFLAG_END; /* * Insure that the map for this transmission * is placed at the array index of the last descriptor * in this chain. */ sc->bge_cdata.bge_tx_dmamap[*txidx] = sc->bge_cdata.bge_tx_dmamap[idx]; sc->bge_cdata.bge_tx_dmamap[idx] = map; sc->bge_cdata.bge_tx_chain[idx] = m; sc->bge_txcnt += nsegs; BGE_INC(idx, BGE_TX_RING_CNT); *txidx = idx; return (0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit descriptors. */ static int bge_start_locked(struct bge_softc *sc) { if_t ifp; struct mbuf *m; uint32_t prodidx; int error, count; BGE_LOCK_ASSERT(sc); if (!sc->bge_link || (sc->bge_flags & BGE_FLAG_RUNNING) == 0) return (ENETDOWN); ifp = sc->bge_ifp; prodidx = sc->bge_tx_prodidx; error = count = 0; while (sc->bge_txcnt <= BGE_TX_RING_CNT - 16 && (m = if_snd_dequeue(ifp)) != NULL) { /* * Pack the data into the transmit ring. If we * don't have room, set the OACTIVE flag and wait * for the NIC to drain the ring. */ if (bge_encap(sc, &m, &prodidx)) { if (m == NULL) break; if_snd_prepend(ifp, m); break; } ++count; if_mtap(ifp, m, NULL, 0); } if (count > 0) { bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_PREWRITE); /* Transmit. */ bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx); /* 5700 b2 errata */ if (sc->bge_chiprev == BGE_CHIPREV_5700_BX) bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx); sc->bge_tx_prodidx = prodidx; /* * Set a timeout in case the chip goes out to lunch. */ sc->bge_timer = BGE_TX_TIMEOUT; } return (0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit descriptors. */ static int bge_transmit(if_t ifp, struct mbuf *m) { struct bge_softc *sc; int error; if ((error = if_snd_enqueue(ifp, m)) != 0) return (error); sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); BGE_LOCK(sc); error = bge_start_locked(sc); BGE_UNLOCK(sc); return (error); } static void bge_init(struct bge_softc *sc) { if_t ifp; uint16_t *m; uint32_t mode; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; if (sc->bge_flags & BGE_FLAG_RUNNING) return; /* Cancel pending I/O and flush buffers. */ bge_stop(sc); bge_stop_fw(sc); bge_sig_pre_reset(sc, BGE_RESET_START); bge_reset(sc); bge_sig_legacy(sc, BGE_RESET_START); bge_sig_post_reset(sc, BGE_RESET_START); bge_chipinit(sc); /* * Init the various state machines, ring * control blocks and firmware. */ if (bge_blockinit(sc)) { device_printf(sc->bge_dev, "initialization failure\n"); return; } ifp = sc->bge_ifp; /* Specify MTU. */ CSR_WRITE_4(sc, BGE_RX_MTU, sc->bge_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + (sc->bge_capenable & IFCAP_VLAN_MTU ? ETHER_VLAN_ENCAP_LEN : 0)); /* Load our MAC address. */ m = (uint16_t *)if_lladdr(sc->bge_ifp); CSR_WRITE_4(sc, BGE_MAC_ADDR1_LO, htons(m[0])); CSR_WRITE_4(sc, BGE_MAC_ADDR1_HI, (htons(m[1]) << 16) | htons(m[2])); /* Program promiscuous mode. */ bge_setpromisc(sc); /* Program multicast filter. */ bge_setmulti(sc); /* Program VLAN tag stripping. */ bge_setvlan(sc); /* Override UDP checksum offloading. */ if (sc->bge_forced_udpcsum == 0) sc->bge_hwassist &= ~CSUM_UDP; else sc->bge_hwassist |= CSUM_UDP; /* Init RX ring. */ if (bge_init_rx_ring_std(sc) != 0) { device_printf(sc->bge_dev, "no memory for std Rx buffers.\n"); bge_stop(sc); return; } /* * Workaround for a bug in 5705 ASIC rev A0. Poll the NIC's * memory to insure that the chip has in fact read the first * entry of the ring. */ if (sc->bge_chipid == BGE_CHIPID_BCM5705_A0) { uint32_t v, i; for (i = 0; i < 10; i++) { DELAY(20); v = bge_readmem_ind(sc, BGE_STD_RX_RINGS + 8); if (v == (MCLBYTES - ETHER_ALIGN)) break; } if (i == 10) device_printf (sc->bge_dev, "5705 A0 chip failed to load RX ring\n"); } /* Init jumbo RX ring. */ if (BGE_IS_JUMBO_CAPABLE(sc) && sc->bge_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN)) { if (bge_init_rx_ring_jumbo(sc) != 0) { device_printf(sc->bge_dev, "no memory for jumbo Rx buffers.\n"); bge_stop(sc); return; } } /* Init our RX return ring index. */ sc->bge_rx_saved_considx = 0; /* Init our RX/TX stat counters. */ sc->bge_rx_discards = sc->bge_tx_discards = sc->bge_tx_collisions = 0; /* Init TX ring. */ bge_init_tx_ring(sc); /* Enable TX MAC state machine lockup fix. */ mode = CSR_READ_4(sc, BGE_TX_MODE); if (BGE_IS_5755_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5906) mode |= BGE_TXMODE_MBUF_LOCKUP_FIX; if (sc->bge_asicrev == BGE_ASICREV_BCM5720 || sc->bge_asicrev == BGE_ASICREV_BCM5762) { mode &= ~(BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE); mode |= CSR_READ_4(sc, BGE_TX_MODE) & (BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE); } /* Turn on transmitter. */ CSR_WRITE_4(sc, BGE_TX_MODE, mode | BGE_TXMODE_ENABLE); DELAY(100); /* Turn on receiver. */ mode = CSR_READ_4(sc, BGE_RX_MODE); if (BGE_IS_5755_PLUS(sc)) mode |= BGE_RXMODE_IPV6_ENABLE; if (sc->bge_asicrev == BGE_ASICREV_BCM5762) mode |= BGE_RXMODE_IPV4_FRAG_FIX; CSR_WRITE_4(sc,BGE_RX_MODE, mode | BGE_RXMODE_ENABLE); DELAY(10); /* * Set the number of good frames to receive after RX MBUF * Low Watermark has been reached. After the RX MAC receives * this number of frames, it will drop subsequent incoming * frames until the MBUF High Watermark is reached. */ if (BGE_IS_57765_PLUS(sc)) CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 1); else CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 2); /* Clear MAC statistics. */ if (BGE_IS_5705_PLUS(sc)) bge_stats_clear_regs(sc); /* Tell firmware we're alive. */ BGE_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); #ifdef DEVICE_POLLING /* Disable interrupts if we are polling. */ if (sc->bge_capenable & IFCAP_POLLING) { BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 1); } else #endif /* Enable host interrupts. */ { BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_CLEAR_INTA); BGE_CLRBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 0); } sc->bge_flags |= BGE_FLAG_RUNNING; bge_ifmedia_upd_locked(ifp); callout_reset(&sc->bge_stat_ch, hz, bge_tick, sc); } /* * Set media options. */ static int bge_ifmedia_upd(if_t ifp) { struct bge_softc *sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); int res; BGE_LOCK(sc); res = bge_ifmedia_upd_locked(ifp); BGE_UNLOCK(sc); return (res); } static int bge_ifmedia_upd_locked(if_t ifp) { struct bge_softc *sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); struct mii_data *mii; struct mii_softc *miisc; struct ifmedia *ifm; BGE_LOCK_ASSERT(sc); ifm = &sc->bge_ifmedia; /* If this is a 1000baseX NIC, enable the TBI port. */ if (sc->bge_flags & BGE_FLAG_TBI) { if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch(IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: /* * The BCM5704 ASIC appears to have a special * mechanism for programming the autoneg * advertisement registers in TBI mode. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5704) { uint32_t sgdig; sgdig = CSR_READ_4(sc, BGE_SGDIG_STS); if (sgdig & BGE_SGDIGSTS_DONE) { CSR_WRITE_4(sc, BGE_TX_TBI_AUTONEG, 0); sgdig = CSR_READ_4(sc, BGE_SGDIG_CFG); sgdig |= BGE_SGDIGCFG_AUTO | BGE_SGDIGCFG_PAUSE_CAP | BGE_SGDIGCFG_ASYM_PAUSE; CSR_WRITE_4(sc, BGE_SGDIG_CFG, sgdig | BGE_SGDIGCFG_SEND); DELAY(5); CSR_WRITE_4(sc, BGE_SGDIG_CFG, sgdig); } } break; case IFM_1000_SX: if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) { BGE_CLRBIT(sc, BGE_MAC_MODE, BGE_MACMODE_HALF_DUPLEX); } else { BGE_SETBIT(sc, BGE_MAC_MODE, BGE_MACMODE_HALF_DUPLEX); } DELAY(40); break; default: return (EINVAL); } return (0); } sc->bge_link_evt++; mii = device_get_softc(sc->bge_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); mii_mediachg(mii); /* * Force an interrupt so that we will call bge_link_upd * if needed and clear any pending link state attention. * Without this we are not getting any further interrupts * for link state changes and thus will not UP the link and * not be able to send in bge_start_locked. The only * way to get things working was to receive a packet and * get an RX intr. * bge_tick should help for fiber cards and we might not * need to do this here if BGE_FLAG_TBI is set but as * we poll for fiber anyway it should not harm. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 || sc->bge_flags & BGE_FLAG_5788) BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_SET); else BGE_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW); return (0); } /* * Report current media status. */ static void bge_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) { struct bge_softc *sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); struct mii_data *mii; BGE_LOCK(sc); if ((sc->bge_if_flags & IFF_UP) == 0) { BGE_UNLOCK(sc); return; } if (sc->bge_flags & BGE_FLAG_TBI) { ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (CSR_READ_4(sc, BGE_MAC_STS) & BGE_MACSTAT_TBI_PCS_SYNCHED) ifmr->ifm_status |= IFM_ACTIVE; else { ifmr->ifm_active |= IFM_NONE; BGE_UNLOCK(sc); return; } ifmr->ifm_active |= IFM_1000_SX; if (CSR_READ_4(sc, BGE_MAC_MODE) & BGE_MACMODE_HALF_DUPLEX) ifmr->ifm_active |= IFM_HDX; else ifmr->ifm_active |= IFM_FDX; BGE_UNLOCK(sc); return; } mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; BGE_UNLOCK(sc); } static int bge_ioctl(if_t ifp, u_long command, void *data, struct thread *td) { struct bge_softc *sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); struct ifreq *ifr = (struct ifreq *) data; struct mii_data *mii; int oflags, mask, error = 0; switch (command) { case SIOCSIFMTU: if (BGE_IS_JUMBO_CAPABLE(sc) || (sc->bge_flags & BGE_FLAG_JUMBO_STD)) { if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > BGE_JUMBO_MTU) { error = EINVAL; break; } } else if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > ETHERMTU) { error = EINVAL; break; } BGE_LOCK(sc); sc->bge_mtu = ifr->ifr_mtu; if (sc->bge_flags & BGE_FLAG_RUNNING) { sc->bge_flags &= ~BGE_FLAG_RUNNING; bge_init(sc); } BGE_UNLOCK(sc); break; case SIOCSIFFLAGS: BGE_LOCK(sc); oflags = sc->bge_if_flags; sc->bge_if_flags = ifr->ifr_flags; if (sc->bge_if_flags & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. Similarly for ALLMULTI. */ if (sc->bge_flags & BGE_FLAG_RUNNING) { if ((oflags ^ sc->bge_if_flags) & IFF_PROMISC) bge_setpromisc(sc); if ((oflags ^ sc->bge_if_flags) & IFF_ALLMULTI) bge_setmulti(sc); } else bge_init(sc); } else if (sc->bge_flags & BGE_FLAG_RUNNING) bge_stop(sc); BGE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: if (sc->bge_flags & BGE_FLAG_RUNNING) { BGE_LOCK(sc); bge_setmulti(sc); BGE_UNLOCK(sc); error = 0; } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: if (sc->bge_flags & BGE_FLAG_TBI) { error = ifmedia_ioctl(ifp, ifr, &sc->bge_ifmedia, command); } else { mii = device_get_softc(sc->bge_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); } break; case SIOCSIFCAP: mask = ifr->ifr_reqcap ^ ifr->ifr_curcap; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { BGE_LOCK(sc); if (ifr->ifr_reqcap & IFCAP_POLLING) { BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 1); } else { BGE_CLRBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 0); } BGE_UNLOCK(sc); } #endif sc->bge_capenable = ifr->ifr_reqcap; ifr->ifr_hwassist = 0; if ((sc->bge_capenable & IFCAP_TXCSUM) != 0) ifr->ifr_hwassist = sc->bge_hwassist; if ((sc->bge_capenable & IFCAP_TSO4) != 0 && (sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) != 0) ifr->ifr_hwassist |= CSUM_TSO; if (mask & IFCAP_VLAN_MTU) { BGE_LOCK(sc); sc->bge_flags &= ~BGE_FLAG_RUNNING; bge_init(sc); BGE_UNLOCK(sc); } if ((mask & IFCAP_VLAN_HWTAGGING) != 0) { BGE_LOCK(sc); bge_setvlan(sc); BGE_UNLOCK(sc); } break; default: error = EOPNOTSUPP; break; } return (error); } static void bge_watchdog(struct bge_softc *sc) { if_t ifp; uint32_t status; BGE_LOCK_ASSERT(sc); if (sc->bge_timer == 0 || --sc->bge_timer) return; /* If pause frames are active then don't reset the hardware. */ if ((CSR_READ_4(sc, BGE_RX_MODE) & BGE_RXMODE_FLOWCTL_ENABLE) != 0) { status = CSR_READ_4(sc, BGE_RX_STS); if ((status & BGE_RXSTAT_REMOTE_XOFFED) != 0) { /* * If link partner has us in XOFF state then wait for * the condition to clear. */ CSR_WRITE_4(sc, BGE_RX_STS, status); sc->bge_timer = BGE_TX_TIMEOUT; return; } else if ((status & BGE_RXSTAT_RCVD_XOFF) != 0 && (status & BGE_RXSTAT_RCVD_XON) != 0) { /* * If link partner has us in XOFF state then wait for * the condition to clear. */ CSR_WRITE_4(sc, BGE_RX_STS, status); sc->bge_timer = BGE_TX_TIMEOUT; return; } /* * Any other condition is unexpected and the controller * should be reset. */ } ifp = sc->bge_ifp; if_printf(ifp, "watchdog timeout -- resetting\n"); sc->bge_flags &= ~BGE_FLAG_RUNNING; bge_init(sc); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } static void bge_stop_block(struct bge_softc *sc, bus_size_t reg, uint32_t bit) { int i; BGE_CLRBIT(sc, reg, bit); for (i = 0; i < BGE_TIMEOUT; i++) { if ((CSR_READ_4(sc, reg) & bit) == 0) return; DELAY(100); } } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void bge_stop(struct bge_softc *sc) { if_t ifp; BGE_LOCK_ASSERT(sc); ifp = sc->bge_ifp; callout_stop(&sc->bge_stat_ch); /* Disable host interrupts. */ BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR); bge_writembx(sc, BGE_MBX_IRQ0_LO, 1); /* * Tell firmware we're shutting down. */ bge_stop_fw(sc); bge_sig_pre_reset(sc, BGE_RESET_SHUTDOWN); /* * Disable all of the receiver blocks. */ bge_stop_block(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE); bge_stop_block(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE); bge_stop_block(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE); if (BGE_IS_5700_FAMILY(sc)) bge_stop_block(sc, BGE_RXLS_MODE, BGE_RXLSMODE_ENABLE); bge_stop_block(sc, BGE_RDBDI_MODE, BGE_RBDIMODE_ENABLE); bge_stop_block(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE); bge_stop_block(sc, BGE_RBDC_MODE, BGE_RBDCMODE_ENABLE); /* * Disable all of the transmit blocks. */ bge_stop_block(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE); bge_stop_block(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE); bge_stop_block(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE); bge_stop_block(sc, BGE_RDMA_MODE, BGE_RDMAMODE_ENABLE); bge_stop_block(sc, BGE_SDC_MODE, BGE_SDCMODE_ENABLE); if (BGE_IS_5700_FAMILY(sc)) bge_stop_block(sc, BGE_DMAC_MODE, BGE_DMACMODE_ENABLE); bge_stop_block(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE); /* * Shut down all of the memory managers and related * state machines. */ bge_stop_block(sc, BGE_HCC_MODE, BGE_HCCMODE_ENABLE); bge_stop_block(sc, BGE_WDMA_MODE, BGE_WDMAMODE_ENABLE); if (BGE_IS_5700_FAMILY(sc)) bge_stop_block(sc, BGE_MBCF_MODE, BGE_MBCFMODE_ENABLE); CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF); CSR_WRITE_4(sc, BGE_FTQ_RESET, 0); if (!(BGE_IS_5705_PLUS(sc))) { BGE_CLRBIT(sc, BGE_BMAN_MODE, BGE_BMANMODE_ENABLE); BGE_CLRBIT(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE); } /* Update MAC statistics. */ if (BGE_IS_5705_PLUS(sc)) bge_stats_update_regs(sc); bge_reset(sc); bge_sig_legacy(sc, BGE_RESET_SHUTDOWN); bge_sig_post_reset(sc, BGE_RESET_SHUTDOWN); /* * Keep the ASF firmware running if up. */ if (sc->bge_asf_mode & ASF_STACKUP) BGE_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); else BGE_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP); /* Free the RX lists. */ bge_free_rx_ring_std(sc); /* Free jumbo RX list. */ if (BGE_IS_JUMBO_CAPABLE(sc)) bge_free_rx_ring_jumbo(sc); /* Free TX buffers. */ bge_free_tx_ring(sc); sc->bge_tx_saved_considx = BGE_TXCONS_UNSET; /* Clear MAC's link state (PHY may still have link UP). */ if (bootverbose && sc->bge_link) if_printf(sc->bge_ifp, "link DOWN\n"); sc->bge_link = 0; sc->bge_flags &= ~BGE_FLAG_RUNNING; } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int bge_shutdown(device_t dev) { struct bge_softc *sc; sc = device_get_softc(dev); BGE_LOCK(sc); bge_stop(sc); BGE_UNLOCK(sc); return (0); } static int bge_suspend(device_t dev) { struct bge_softc *sc; sc = device_get_softc(dev); BGE_LOCK(sc); bge_stop(sc); BGE_UNLOCK(sc); return (0); } static int bge_resume(device_t dev) { struct bge_softc *sc; sc = device_get_softc(dev); BGE_LOCK(sc); if (sc->bge_if_flags & IFF_UP) { bge_init(sc); if (sc->bge_flags & BGE_FLAG_RUNNING) bge_start_locked(sc); } BGE_UNLOCK(sc); return (0); } static void bge_link_upd(struct bge_softc *sc) { struct mii_data *mii; uint32_t link, status; BGE_LOCK_ASSERT(sc); /* Clear 'pending link event' flag. */ sc->bge_link_evt = 0; /* * Process link state changes. * Grrr. The link status word in the status block does * not work correctly on the BCM5700 rev AX and BX chips, * according to all available information. Hence, we have * to enable MII interrupts in order to properly obtain * async link changes. Unfortunately, this also means that * we have to read the MAC status register to detect link * changes, thereby adding an additional register access to * the interrupt handler. * * XXX: perhaps link state detection procedure used for * BGE_CHIPID_BCM5700_B2 can be used for others BCM5700 revisions. */ if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_B2) { status = CSR_READ_4(sc, BGE_MAC_STS); if (status & BGE_MACSTAT_MI_INTERRUPT) { mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); if (!sc->bge_link && mii->mii_media_status & IFM_ACTIVE && IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { sc->bge_link++; if (bootverbose) if_printf(sc->bge_ifp, "link UP\n"); } else if (sc->bge_link && (!(mii->mii_media_status & IFM_ACTIVE) || IFM_SUBTYPE(mii->mii_media_active) == IFM_NONE)) { sc->bge_link = 0; if (bootverbose) if_printf(sc->bge_ifp, "link DOWN\n"); } /* Clear the interrupt. */ CSR_WRITE_4(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_MI_INTERRUPT); bge_miibus_readreg(sc->bge_dev, sc->bge_phy_addr, BRGPHY_MII_ISR); bge_miibus_writereg(sc->bge_dev, sc->bge_phy_addr, BRGPHY_MII_IMR, BRGPHY_INTRS); } return; } if (sc->bge_flags & BGE_FLAG_TBI) { status = CSR_READ_4(sc, BGE_MAC_STS); if (status & BGE_MACSTAT_TBI_PCS_SYNCHED) { if (!sc->bge_link) { sc->bge_link++; if (sc->bge_asicrev == BGE_ASICREV_BCM5704) { BGE_CLRBIT(sc, BGE_MAC_MODE, BGE_MACMODE_TBI_SEND_CFGS); DELAY(40); } CSR_WRITE_4(sc, BGE_MAC_STS, 0xFFFFFFFF); if (bootverbose) if_printf(sc->bge_ifp, "link UP\n"); if_link_state_change(sc->bge_ifp, LINK_STATE_UP); } } else if (sc->bge_link) { sc->bge_link = 0; if (bootverbose) if_printf(sc->bge_ifp, "link DOWN\n"); if_link_state_change(sc->bge_ifp, LINK_STATE_DOWN); } } else if ((sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) != 0) { /* * Some broken BCM chips have BGE_STATFLAG_LINKSTATE_CHANGED bit * in status word always set. Workaround this bug by reading * PHY link status directly. */ link = (CSR_READ_4(sc, BGE_MI_STS) & BGE_MISTS_LINK) ? 1 : 0; if (link != sc->bge_link || sc->bge_asicrev == BGE_ASICREV_BCM5700) { mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); if (!sc->bge_link && mii->mii_media_status & IFM_ACTIVE && IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { sc->bge_link++; if (bootverbose) if_printf(sc->bge_ifp, "link UP\n"); } else if (sc->bge_link && (!(mii->mii_media_status & IFM_ACTIVE) || IFM_SUBTYPE(mii->mii_media_active) == IFM_NONE)) { sc->bge_link = 0; if (bootverbose) if_printf(sc->bge_ifp, "link DOWN\n"); } } } else { /* * For controllers that call mii_tick, we have to poll * link status. */ mii = device_get_softc(sc->bge_miibus); mii_pollstat(mii); bge_miibus_statchg(sc->bge_dev); } /* Disable MAC attention when link is up. */ CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED | BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE | BGE_MACSTAT_LINK_CHANGED); } static void bge_add_sysctls(struct bge_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; int unit; ctx = device_get_sysctl_ctx(sc->bge_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->bge_dev)); #ifdef BGE_REGISTER_DEBUG SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "debug_info", CTLTYPE_INT | CTLFLAG_RW, sc, 0, bge_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "reg_read", CTLTYPE_INT | CTLFLAG_RW, sc, 0, bge_sysctl_reg_read, "I", "MAC Register Read"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ape_read", CTLTYPE_INT | CTLFLAG_RW, sc, 0, bge_sysctl_ape_read, "I", "APE Register Read"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mem_read", CTLTYPE_INT | CTLFLAG_RW, sc, 0, bge_sysctl_mem_read, "I", "Memory Read"); #endif unit = device_get_unit(sc->bge_dev); /* * A common design characteristic for many Broadcom client controllers * is that they only support a single outstanding DMA read operation * on the PCIe bus. This means that it will take twice as long to fetch * a TX frame that is split into header and payload buffers as it does * to fetch a single, contiguous TX frame (2 reads vs. 1 read). For * these controllers, coalescing buffers to reduce the number of memory * reads is effective way to get maximum performance(about 940Mbps). * Without collapsing TX buffers the maximum TCP bulk transfer * performance is about 850Mbps. However forcing coalescing mbufs * consumes a lot of CPU cycles, so leave it off by default. */ sc->bge_forced_collapse = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "forced_collapse", CTLFLAG_RWTUN, &sc->bge_forced_collapse, 0, "Number of fragmented TX buffers of a frame allowed before " "forced collapsing"); sc->bge_msi = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "msi", CTLFLAG_RDTUN, &sc->bge_msi, 0, "Enable MSI"); /* * It seems all Broadcom controllers have a bug that can generate UDP * datagrams with checksum value 0 when TX UDP checksum offloading is * enabled. Generating UDP checksum value 0 is RFC 768 violation. * Even though the probability of generating such UDP datagrams is * low, I don't want to see FreeBSD boxes to inject such datagrams * into network so disable UDP checksum offloading by default. Users * still override this behavior by setting a sysctl variable, * dev.bge.0.forced_udpcsum. */ sc->bge_forced_udpcsum = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "forced_udpcsum", CTLFLAG_RWTUN, &sc->bge_forced_udpcsum, 0, "Enable UDP checksum offloading even if controller can " "generate UDP checksum value 0"); if (BGE_IS_5705_PLUS(sc)) bge_add_sysctl_stats_regs(sc, ctx, children); else bge_add_sysctl_stats(sc, ctx, children); } #define BGE_SYSCTL_STAT(sc, ctx, desc, parent, node, oid) \ SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, oid, CTLTYPE_UINT|CTLFLAG_RD, \ sc, offsetof(struct bge_stats, node), bge_sysctl_stats, "IU", \ desc) static void bge_add_sysctl_stats(struct bge_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent) { struct sysctl_oid *tree; struct sysctl_oid_list *children, *schildren; tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "stats", CTLFLAG_RD, NULL, "BGE Statistics"); schildren = children = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT(sc, ctx, "Frames Dropped Due To Filters", children, COSFramesDroppedDueToFilters, "FramesDroppedDueToFilters"); BGE_SYSCTL_STAT(sc, ctx, "NIC DMA Write Queue Full", children, nicDmaWriteQueueFull, "DmaWriteQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC DMA Write High Priority Queue Full", children, nicDmaWriteHighPriQueueFull, "DmaWriteHighPriQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC No More RX Buffer Descriptors", children, nicNoMoreRxBDs, "NoMoreRxBDs"); BGE_SYSCTL_STAT(sc, ctx, "Discarded Input Frames", children, ifInDiscards, "InputDiscards"); BGE_SYSCTL_STAT(sc, ctx, "Input Errors", children, ifInErrors, "InputErrors"); BGE_SYSCTL_STAT(sc, ctx, "NIC Recv Threshold Hit", children, nicRecvThresholdHit, "RecvThresholdHit"); BGE_SYSCTL_STAT(sc, ctx, "NIC DMA Read Queue Full", children, nicDmaReadQueueFull, "DmaReadQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC DMA Read High Priority Queue Full", children, nicDmaReadHighPriQueueFull, "DmaReadHighPriQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC Send Data Complete Queue Full", children, nicSendDataCompQueueFull, "SendDataCompQueueFull"); BGE_SYSCTL_STAT(sc, ctx, "NIC Ring Set Send Producer Index", children, nicRingSetSendProdIndex, "RingSetSendProdIndex"); BGE_SYSCTL_STAT(sc, ctx, "NIC Ring Status Update", children, nicRingStatusUpdate, "RingStatusUpdate"); BGE_SYSCTL_STAT(sc, ctx, "NIC Interrupts", children, nicInterrupts, "Interrupts"); BGE_SYSCTL_STAT(sc, ctx, "NIC Avoided Interrupts", children, nicAvoidedInterrupts, "AvoidedInterrupts"); BGE_SYSCTL_STAT(sc, ctx, "NIC Send Threshold Hit", children, nicSendThresholdHit, "SendThresholdHit"); tree = SYSCTL_ADD_NODE(ctx, schildren, OID_AUTO, "rx", CTLFLAG_RD, NULL, "BGE RX Statistics"); children = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT(sc, ctx, "Inbound Octets", children, rxstats.ifHCInOctets, "ifHCInOctets"); BGE_SYSCTL_STAT(sc, ctx, "Fragments", children, rxstats.etherStatsFragments, "Fragments"); BGE_SYSCTL_STAT(sc, ctx, "Inbound Unicast Packets", children, rxstats.ifHCInUcastPkts, "UnicastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Inbound Multicast Packets", children, rxstats.ifHCInMulticastPkts, "MulticastPkts"); BGE_SYSCTL_STAT(sc, ctx, "FCS Errors", children, rxstats.dot3StatsFCSErrors, "FCSErrors"); BGE_SYSCTL_STAT(sc, ctx, "Alignment Errors", children, rxstats.dot3StatsAlignmentErrors, "AlignmentErrors"); BGE_SYSCTL_STAT(sc, ctx, "XON Pause Frames Received", children, rxstats.xonPauseFramesReceived, "xonPauseFramesReceived"); BGE_SYSCTL_STAT(sc, ctx, "XOFF Pause Frames Received", children, rxstats.xoffPauseFramesReceived, "xoffPauseFramesReceived"); BGE_SYSCTL_STAT(sc, ctx, "MAC Control Frames Received", children, rxstats.macControlFramesReceived, "ControlFramesReceived"); BGE_SYSCTL_STAT(sc, ctx, "XOFF State Entered", children, rxstats.xoffStateEntered, "xoffStateEntered"); BGE_SYSCTL_STAT(sc, ctx, "Frames Too Long", children, rxstats.dot3StatsFramesTooLong, "FramesTooLong"); BGE_SYSCTL_STAT(sc, ctx, "Jabbers", children, rxstats.etherStatsJabbers, "Jabbers"); BGE_SYSCTL_STAT(sc, ctx, "Undersized Packets", children, rxstats.etherStatsUndersizePkts, "UndersizePkts"); BGE_SYSCTL_STAT(sc, ctx, "Inbound Range Length Errors", children, rxstats.inRangeLengthError, "inRangeLengthError"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Range Length Errors", children, rxstats.outRangeLengthError, "outRangeLengthError"); tree = SYSCTL_ADD_NODE(ctx, schildren, OID_AUTO, "tx", CTLFLAG_RD, NULL, "BGE TX Statistics"); children = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT(sc, ctx, "Outbound Octets", children, txstats.ifHCOutOctets, "ifHCOutOctets"); BGE_SYSCTL_STAT(sc, ctx, "TX Collisions", children, txstats.etherStatsCollisions, "Collisions"); BGE_SYSCTL_STAT(sc, ctx, "XON Sent", children, txstats.outXonSent, "XonSent"); BGE_SYSCTL_STAT(sc, ctx, "XOFF Sent", children, txstats.outXoffSent, "XoffSent"); BGE_SYSCTL_STAT(sc, ctx, "Flow Control Done", children, txstats.flowControlDone, "flowControlDone"); BGE_SYSCTL_STAT(sc, ctx, "Internal MAC TX errors", children, txstats.dot3StatsInternalMacTransmitErrors, "InternalMacTransmitErrors"); BGE_SYSCTL_STAT(sc, ctx, "Single Collision Frames", children, txstats.dot3StatsSingleCollisionFrames, "SingleCollisionFrames"); BGE_SYSCTL_STAT(sc, ctx, "Multiple Collision Frames", children, txstats.dot3StatsMultipleCollisionFrames, "MultipleCollisionFrames"); BGE_SYSCTL_STAT(sc, ctx, "Deferred Transmissions", children, txstats.dot3StatsDeferredTransmissions, "DeferredTransmissions"); BGE_SYSCTL_STAT(sc, ctx, "Excessive Collisions", children, txstats.dot3StatsExcessiveCollisions, "ExcessiveCollisions"); BGE_SYSCTL_STAT(sc, ctx, "Late Collisions", children, txstats.dot3StatsLateCollisions, "LateCollisions"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Unicast Packets", children, txstats.ifHCOutUcastPkts, "UnicastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Multicast Packets", children, txstats.ifHCOutMulticastPkts, "MulticastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Broadcast Packets", children, txstats.ifHCOutBroadcastPkts, "BroadcastPkts"); BGE_SYSCTL_STAT(sc, ctx, "Carrier Sense Errors", children, txstats.dot3StatsCarrierSenseErrors, "CarrierSenseErrors"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Discards", children, txstats.ifOutDiscards, "Discards"); BGE_SYSCTL_STAT(sc, ctx, "Outbound Errors", children, txstats.ifOutErrors, "Errors"); } #undef BGE_SYSCTL_STAT #define BGE_SYSCTL_STAT_ADD64(c, h, n, p, d) \ SYSCTL_ADD_UQUAD(c, h, OID_AUTO, n, CTLFLAG_RD, p, d) static void bge_add_sysctl_stats_regs(struct bge_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent) { struct sysctl_oid *tree; struct sysctl_oid_list *child, *schild; struct bge_mac_stats *stats; stats = &sc->bge_mac_stats; tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "stats", CTLFLAG_RD, NULL, "BGE Statistics"); schild = child = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT_ADD64(ctx, child, "FramesDroppedDueToFilters", &stats->FramesDroppedDueToFilters, "Frames Dropped Due to Filters"); BGE_SYSCTL_STAT_ADD64(ctx, child, "DmaWriteQueueFull", &stats->DmaWriteQueueFull, "NIC DMA Write Queue Full"); BGE_SYSCTL_STAT_ADD64(ctx, child, "DmaWriteHighPriQueueFull", &stats->DmaWriteHighPriQueueFull, "NIC DMA Write High Priority Queue Full"); BGE_SYSCTL_STAT_ADD64(ctx, child, "NoMoreRxBDs", &stats->NoMoreRxBDs, "NIC No More RX Buffer Descriptors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "InputDiscards", &stats->InputDiscards, "Discarded Input Frames"); BGE_SYSCTL_STAT_ADD64(ctx, child, "InputErrors", &stats->InputErrors, "Input Errors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "RecvThresholdHit", &stats->RecvThresholdHit, "NIC Recv Threshold Hit"); tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "rx", CTLFLAG_RD, NULL, "BGE RX Statistics"); child = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT_ADD64(ctx, child, "ifHCInOctets", &stats->ifHCInOctets, "Inbound Octets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "Fragments", &stats->etherStatsFragments, "Fragments"); BGE_SYSCTL_STAT_ADD64(ctx, child, "UnicastPkts", &stats->ifHCInUcastPkts, "Inbound Unicast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "MulticastPkts", &stats->ifHCInMulticastPkts, "Inbound Multicast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "BroadcastPkts", &stats->ifHCInBroadcastPkts, "Inbound Broadcast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "FCSErrors", &stats->dot3StatsFCSErrors, "FCS Errors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "AlignmentErrors", &stats->dot3StatsAlignmentErrors, "Alignment Errors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "xonPauseFramesReceived", &stats->xonPauseFramesReceived, "XON Pause Frames Received"); BGE_SYSCTL_STAT_ADD64(ctx, child, "xoffPauseFramesReceived", &stats->xoffPauseFramesReceived, "XOFF Pause Frames Received"); BGE_SYSCTL_STAT_ADD64(ctx, child, "ControlFramesReceived", &stats->macControlFramesReceived, "MAC Control Frames Received"); BGE_SYSCTL_STAT_ADD64(ctx, child, "xoffStateEntered", &stats->xoffStateEntered, "XOFF State Entered"); BGE_SYSCTL_STAT_ADD64(ctx, child, "FramesTooLong", &stats->dot3StatsFramesTooLong, "Frames Too Long"); BGE_SYSCTL_STAT_ADD64(ctx, child, "Jabbers", &stats->etherStatsJabbers, "Jabbers"); BGE_SYSCTL_STAT_ADD64(ctx, child, "UndersizePkts", &stats->etherStatsUndersizePkts, "Undersized Packets"); tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "tx", CTLFLAG_RD, NULL, "BGE TX Statistics"); child = SYSCTL_CHILDREN(tree); BGE_SYSCTL_STAT_ADD64(ctx, child, "ifHCOutOctets", &stats->ifHCOutOctets, "Outbound Octets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "Collisions", &stats->etherStatsCollisions, "TX Collisions"); BGE_SYSCTL_STAT_ADD64(ctx, child, "XonSent", &stats->outXonSent, "XON Sent"); BGE_SYSCTL_STAT_ADD64(ctx, child, "XoffSent", &stats->outXoffSent, "XOFF Sent"); BGE_SYSCTL_STAT_ADD64(ctx, child, "InternalMacTransmitErrors", &stats->dot3StatsInternalMacTransmitErrors, "Internal MAC TX Errors"); BGE_SYSCTL_STAT_ADD64(ctx, child, "SingleCollisionFrames", &stats->dot3StatsSingleCollisionFrames, "Single Collision Frames"); BGE_SYSCTL_STAT_ADD64(ctx, child, "MultipleCollisionFrames", &stats->dot3StatsMultipleCollisionFrames, "Multiple Collision Frames"); BGE_SYSCTL_STAT_ADD64(ctx, child, "DeferredTransmissions", &stats->dot3StatsDeferredTransmissions, "Deferred Transmissions"); BGE_SYSCTL_STAT_ADD64(ctx, child, "ExcessiveCollisions", &stats->dot3StatsExcessiveCollisions, "Excessive Collisions"); BGE_SYSCTL_STAT_ADD64(ctx, child, "LateCollisions", &stats->dot3StatsLateCollisions, "Late Collisions"); BGE_SYSCTL_STAT_ADD64(ctx, child, "UnicastPkts", &stats->ifHCOutUcastPkts, "Outbound Unicast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "MulticastPkts", &stats->ifHCOutMulticastPkts, "Outbound Multicast Packets"); BGE_SYSCTL_STAT_ADD64(ctx, child, "BroadcastPkts", &stats->ifHCOutBroadcastPkts, "Outbound Broadcast Packets"); } #undef BGE_SYSCTL_STAT_ADD64 static int bge_sysctl_stats(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; uint32_t result; int offset; sc = (struct bge_softc *)arg1; offset = arg2; result = CSR_READ_4(sc, BGE_MEMWIN_START + BGE_STATS_BLOCK + offset + offsetof(bge_hostaddr, bge_addr_lo)); return (sysctl_handle_int(oidp, &result, 0, req)); } #ifdef BGE_REGISTER_DEBUG static int bge_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; uint16_t *sbdata; int error, result, sbsz; int i, j; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || (req->newptr == NULL)) return (error); if (result == 1) { sc = (struct bge_softc *)arg1; if (sc->bge_asicrev == BGE_ASICREV_BCM5700 && sc->bge_chipid != BGE_CHIPID_BCM5700_C0) sbsz = BGE_STATUS_BLK_SZ; else sbsz = 32; sbdata = (uint16_t *)sc->bge_ldata.bge_status_block; printf("Status Block:\n"); BGE_LOCK(sc); bus_dmamap_sync(sc->bge_cdata.bge_status_tag, sc->bge_cdata.bge_status_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (i = 0x0; i < sbsz / sizeof(uint16_t); ) { printf("%06x:", i); for (j = 0; j < 8; j++) printf(" %04x", sbdata[i++]); printf("\n"); } printf("Registers:\n"); for (i = 0x800; i < 0xA00; ) { printf("%06x:", i); for (j = 0; j < 8; j++) { printf(" %08x", CSR_READ_4(sc, i)); i += 4; } printf("\n"); } BGE_UNLOCK(sc); printf("Hardware Flags:\n"); if (BGE_IS_5717_PLUS(sc)) printf(" - 5717 Plus\n"); if (BGE_IS_5755_PLUS(sc)) printf(" - 5755 Plus\n"); if (BGE_IS_575X_PLUS(sc)) printf(" - 575X Plus\n"); if (BGE_IS_5705_PLUS(sc)) printf(" - 5705 Plus\n"); if (BGE_IS_5714_FAMILY(sc)) printf(" - 5714 Family\n"); if (BGE_IS_5700_FAMILY(sc)) printf(" - 5700 Family\n"); if (sc->bge_flags & BGE_FLAG_JUMBO) printf(" - Supports Jumbo Frames\n"); if (sc->bge_flags & BGE_FLAG_PCIX) printf(" - PCI-X Bus\n"); if (sc->bge_flags & BGE_FLAG_PCIE) printf(" - PCI Express Bus\n"); if (sc->bge_phy_flags & BGE_PHY_NO_3LED) printf(" - No 3 LEDs\n"); if (sc->bge_flags & BGE_FLAG_RX_ALIGNBUG) printf(" - RX Alignment Bug\n"); } return (error); } static int bge_sysctl_reg_read(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; int error; uint16_t result; uint32_t val; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || (req->newptr == NULL)) return (error); if (result < 0x8000) { sc = (struct bge_softc *)arg1; val = CSR_READ_4(sc, result); printf("reg 0x%06X = 0x%08X\n", result, val); } return (error); } static int bge_sysctl_ape_read(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; int error; uint16_t result; uint32_t val; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || (req->newptr == NULL)) return (error); if (result < 0x8000) { sc = (struct bge_softc *)arg1; val = APE_READ_4(sc, result); printf("reg 0x%06X = 0x%08X\n", result, val); } return (error); } static int bge_sysctl_mem_read(SYSCTL_HANDLER_ARGS) { struct bge_softc *sc; int error; uint16_t result; uint32_t val; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || (req->newptr == NULL)) return (error); if (result < 0x8000) { sc = (struct bge_softc *)arg1; val = bge_readmem_ind(sc, result); printf("mem 0x%06X = 0x%08X\n", result, val); } return (error); } #endif static int bge_get_eaddr_fw(struct bge_softc *sc, uint8_t ether_addr[]) { if (sc->bge_flags & BGE_FLAG_EADDR) return (1); #ifdef __sparc64__ OF_getetheraddr(sc->bge_dev, ether_addr); return (0); #endif return (1); } static int bge_get_eaddr_mem(struct bge_softc *sc, uint8_t ether_addr[]) { uint32_t mac_addr; mac_addr = bge_readmem_ind(sc, BGE_SRAM_MAC_ADDR_HIGH_MB); if ((mac_addr >> 16) == 0x484b) { ether_addr[0] = (uint8_t)(mac_addr >> 8); ether_addr[1] = (uint8_t)mac_addr; mac_addr = bge_readmem_ind(sc, BGE_SRAM_MAC_ADDR_LOW_MB); ether_addr[2] = (uint8_t)(mac_addr >> 24); ether_addr[3] = (uint8_t)(mac_addr >> 16); ether_addr[4] = (uint8_t)(mac_addr >> 8); ether_addr[5] = (uint8_t)mac_addr; return (0); } return (1); } static int bge_get_eaddr_nvram(struct bge_softc *sc, uint8_t ether_addr[]) { int mac_offset = BGE_EE_MAC_OFFSET; if (sc->bge_asicrev == BGE_ASICREV_BCM5906) mac_offset = BGE_EE_MAC_OFFSET_5906; return (bge_read_nvram(sc, ether_addr, mac_offset + 2, ETHER_ADDR_LEN)); } static int bge_get_eaddr_eeprom(struct bge_softc *sc, uint8_t ether_addr[]) { if (sc->bge_asicrev == BGE_ASICREV_BCM5906) return (1); return (bge_read_eeprom(sc, ether_addr, BGE_EE_MAC_OFFSET + 2, ETHER_ADDR_LEN)); } static int bge_get_eaddr(struct bge_softc *sc, uint8_t eaddr[]) { static const bge_eaddr_fcn_t bge_eaddr_funcs[] = { /* NOTE: Order is critical */ bge_get_eaddr_fw, bge_get_eaddr_mem, bge_get_eaddr_nvram, bge_get_eaddr_eeprom, NULL }; const bge_eaddr_fcn_t *func; for (func = bge_eaddr_funcs; *func != NULL; ++func) { if ((*func)(sc, eaddr) == 0) break; } return (*func == NULL ? ENXIO : 0); } static uint64_t bge_get_counter(if_t ifp, ift_counter cnt) { struct bge_softc *sc; struct bge_mac_stats *stats; sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); if (!BGE_IS_5705_PLUS(sc)) return (if_get_counter_default(ifp, cnt)); stats = &sc->bge_mac_stats; switch (cnt) { case IFCOUNTER_IERRORS: return (stats->NoMoreRxBDs + stats->InputDiscards + stats->InputErrors); case IFCOUNTER_COLLISIONS: return (stats->etherStatsCollisions); default: return (if_get_counter_default(ifp, cnt)); } } Index: projects/ifnet/sys/dev/e1000/if_igb.c =================================================================== --- projects/ifnet/sys/dev/e1000/if_igb.c (revision 280372) +++ projects/ifnet/sys/dev/e1000/if_igb.c (revision 280373) @@ -1,6080 +1,6079 @@ /****************************************************************************** Copyright (c) 2001-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #include "opt_altq.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "e1000_82575.h" #include "if_igb.h" /********************************************************************* * Set this to one to display debug statistics *********************************************************************/ int igb_display_debug_stats = 0; /********************************************************************* * Driver version: *********************************************************************/ char igb_driver_version[] = "version - 2.4.0"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static igb_vendor_info_t igb_vendor_info_array[] = { { 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_NS, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_VF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_DH89XXCC_SFP, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I350_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I350_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I350_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I350_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I350_VF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_COPPER_IT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I211_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I354_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0}, /* required last entry */ { 0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *igb_strings[] = { "Intel(R) PRO/1000 Network Connection" }; /********************************************************************* * Function prototypes *********************************************************************/ static int igb_probe(device_t); static int igb_attach(device_t); static int igb_detach(device_t); static int igb_shutdown(device_t); static int igb_suspend(device_t); static int igb_resume(device_t); static int igb_mq_start(if_t, struct mbuf *); static int igb_mq_start_locked(if_t, struct tx_ring *); static void igb_qflush(if_t); static void igb_deferred_mq_start(void *, int); static int igb_ioctl(if_t, u_long, void *, struct thread *); static uint64_t igb_get_counter(if_t, ift_counter); static void igb_init(struct adapter *); static void igb_stop(void *); static void igb_media_status(if_t, struct ifmediareq *); static int igb_media_change(if_t); static void igb_identify_hardware(struct adapter *); static int igb_allocate_pci_resources(struct adapter *); static int igb_allocate_msix(struct adapter *); static int igb_allocate_legacy(struct adapter *); static int igb_setup_msix(struct adapter *); static void igb_free_pci_resources(struct adapter *); static void igb_local_timer(void *); static void igb_reset(struct adapter *); static void igb_setup_interface(device_t, struct adapter *); static int igb_allocate_queues(struct adapter *); static void igb_configure_queues(struct adapter *); static int igb_allocate_transmit_buffers(struct tx_ring *); static void igb_setup_transmit_structures(struct adapter *); static void igb_setup_transmit_ring(struct tx_ring *); static void igb_initialize_transmit_units(struct adapter *); static void igb_free_transmit_structures(struct adapter *); static void igb_free_transmit_buffers(struct tx_ring *); static int igb_allocate_receive_buffers(struct rx_ring *); static int igb_setup_receive_structures(struct adapter *); static int igb_setup_receive_ring(struct rx_ring *); static void igb_initialize_receive_units(struct adapter *); static void igb_free_receive_structures(struct adapter *); static void igb_free_receive_buffers(struct rx_ring *); static void igb_free_receive_ring(struct rx_ring *); static void igb_enable_intr(struct adapter *); static void igb_disable_intr(struct adapter *); static void igb_update_stats_counters(struct adapter *); static bool igb_txeof(struct tx_ring *); static __inline void igb_rx_discard(struct rx_ring *, int); static __inline void igb_rx_input(struct rx_ring *, struct adapter *, struct mbuf *, u32); static bool igb_rxeof(struct igb_queue *, int, int *); static void igb_rx_checksum(u32, struct mbuf *, u32); static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static int igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static void igb_set_promisc(struct adapter *); static void igb_disable_promisc(struct adapter *); static void igb_set_multi(struct adapter *); static void igb_update_link_status(struct adapter *); static void igb_refresh_mbufs(struct rx_ring *, int); static void igb_register_vlan(void *, if_t, u16); static void igb_unregister_vlan(void *, if_t, u16); static void igb_setup_vlan_hw_support(struct adapter *); static int igb_xmit(struct tx_ring *, struct mbuf **); static int igb_dma_malloc(struct adapter *, bus_size_t, struct igb_dma_alloc *, int); static void igb_dma_free(struct adapter *, struct igb_dma_alloc *); static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void igb_print_nvm_info(struct adapter *); static int igb_is_valid_ether_addr(u8 *); static void igb_add_hw_stats(struct adapter *); static void igb_vf_init_stats(struct adapter *); static void igb_update_vf_stats_counters(struct adapter *); /* Management and WOL Support */ static void igb_init_manageability(struct adapter *); static void igb_release_manageability(struct adapter *); static void igb_get_hw_control(struct adapter *); static void igb_release_hw_control(struct adapter *); static void igb_enable_wakeup(device_t); static void igb_led_func(void *, int); static int igb_irq_fast(void *); static void igb_msix_que(void *); static void igb_msix_link(void *); static void igb_handle_que(void *context, int pending); static void igb_handle_link(void *context, int pending); static void igb_handle_link_locked(struct adapter *); static void igb_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS); static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS); static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS); #ifdef DEVICE_POLLING static int igb_poll(if_t, enum poll_cmd, int); #endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t igb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, igb_probe), DEVMETHOD(device_attach, igb_attach), DEVMETHOD(device_detach, igb_detach), DEVMETHOD(device_shutdown, igb_shutdown), DEVMETHOD(device_suspend, igb_suspend), DEVMETHOD(device_resume, igb_resume), DEVMETHOD_END }; static driver_t igb_driver = { "igb", igb_methods, sizeof(struct adapter), }; static struct ifdriver igb_ifdrv = { .ifdrv_ops = { - .ifop_origin = IFOP_ORIGIN_DRIVER, .ifop_ioctl = igb_ioctl, .ifop_get_counter = igb_get_counter, .ifop_transmit = igb_mq_start, .ifop_qflush = igb_qflush, #ifdef DEVICE_POLLING .ifop_poll = igb_poll, #endif }, .ifdrv_name = "igb", .ifdrv_type = IFT_ETHER, .ifdrv_hdrlen = sizeof(struct ether_vlan_header), }; static devclass_t igb_devclass; DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); MODULE_DEPEND(igb, pci, 1, 1, 1); MODULE_DEPEND(igb, ether, 1, 1, 1); /********************************************************************* * Tunable default values. *********************************************************************/ static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters"); /* Descriptor defaults */ static int igb_rxd = IGB_DEFAULT_RXD; static int igb_txd = IGB_DEFAULT_TXD; SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0, "Number of receive descriptors per queue"); SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0, "Number of transmit descriptors per queue"); /* ** AIM: Adaptive Interrupt Moderation ** which means that the interrupt rate ** is varied over time based on the ** traffic for that interrupt vector */ static int igb_enable_aim = TRUE; SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0, "Enable adaptive interrupt moderation"); /* * MSIX should be the default for best performance, * but this allows it to be forced off for testing. */ static int igb_enable_msix = 1; SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0, "Enable MSI-X interrupts"); /* ** Tuneable Interrupt rate */ static int igb_max_interrupt_rate = 8000; SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &igb_max_interrupt_rate, 0, "Maximum interrupts per second"); /* ** Tuneable number of buffers in the buf-ring */ static int igb_buf_ring_size = IGB_BR_SIZE; SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN, &igb_buf_ring_size, 0, "Size of the bufring"); /* ** Header split causes the packet header to ** be dma'd to a seperate mbuf from the payload. ** this can have memory alignment benefits. But ** another plus is that small packets often fit ** into the header and thus use no cluster. Its ** a very workload dependent type feature. */ static int igb_header_split = FALSE; SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0, "Enable receive mbuf header split"); /* ** This will autoconfigure based on the ** number of CPUs and max supported ** MSIX messages if left at 0. */ static int igb_num_queues = 0; SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0, "Number of queues to configure, 0 indicates autoconfigure"); /* ** Global variable to store last used CPU when binding queues ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a ** queue is bound to a cpu. */ static int igb_last_bind_cpu = -1; /* How many packets rxeof tries to clean at a time */ static int igb_rx_process_limit = 100; SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &igb_rx_process_limit, 0, "Maximum number of received packets to process at a time, -1 means unlimited"); #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * igb_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int igb_probe(device_t dev) { char adapter_name[60]; uint16_t pci_vendor_id = 0; uint16_t pci_device_id = 0; uint16_t pci_subvendor_id = 0; uint16_t pci_subdevice_id = 0; igb_vendor_info_t *ent; INIT_DEBUGOUT("igb_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IGB_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = igb_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == PCI_ANY_ID)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == PCI_ANY_ID))) { sprintf(adapter_name, "%s %s", igb_strings[ent->index], igb_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int igb_attach(device_t dev) { struct adapter *adapter; int error = 0; u16 eeprom_data; INIT_DEBUGOUT("igb_attach: begin"); if (resource_disabled("igb", device_get_unit(dev))) { device_printf(dev, "Disabled by device hint\n"); return (ENXIO); } adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_nvm_info, "I", "NVM Information"); igb_set_sysctl_value(adapter, "enable_aim", "Interrupt Moderation", &adapter->enable_aim, igb_enable_aim); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_set_flowcntl, "I", "Flow Control"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware and mac info */ igb_identify_hardware(adapter); /* Setup PCI resources */ if (igb_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* Do Shared Code initialization */ if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { device_printf(dev, "Setup of Shared code failed\n"); error = ENXIO; goto err_pci; } e1000_get_bus_info(&adapter->hw); /* Sysctl for limiting the amount of work done in the taskqueue */ igb_set_sysctl_value(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, igb_rx_process_limit); /* * Validate number of transmit and receive descriptors. It * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 || (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", IGB_DEFAULT_TXD, igb_txd); adapter->num_tx_desc = IGB_DEFAULT_TXD; } else adapter->num_tx_desc = igb_txd; if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 || (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", IGB_DEFAULT_RXD, igb_rxd); adapter->num_rx_desc = IGB_DEFAULT_RXD; } else adapter->num_rx_desc = igb_rxd; adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_wait_to_complete = FALSE; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; /* Copper options */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { adapter->hw.phy.mdix = AUTO_ALL_MODES; adapter->hw.phy.disable_polarity_correction = FALSE; adapter->hw.phy.ms_type = IGB_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* ** Allocate and Setup Queues */ if (igb_allocate_queues(adapter)) { error = ENOMEM; goto err_pci; } /* Allocate the appropriate stats memory */ if (adapter->vf_ifp) { adapter->stats = (struct e1000_vf_stats *)malloc(sizeof \ (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO); igb_vf_init_stats(adapter); } else adapter->stats = (struct e1000_hw_stats *)malloc(sizeof \ (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO); if (adapter->stats == NULL) { device_printf(dev, "Can not allocate stats memory\n"); error = ENOMEM; goto err_late; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Some adapter-specific advanced features */ if (adapter->hw.mac.type >= e1000_i350) { SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); if (adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) e1000_set_eee_i354(&adapter->hw); else e1000_set_eee_i350(&adapter->hw); } } /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(&adapter->hw); /* Make sure we have a good EEPROM before we read from it */ if (((adapter->hw.mac.type != e1000_i210) && (adapter->hw.mac.type != e1000_i211)) && (e1000_validate_nvm_checksum(&adapter->hw) < 0)) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* ** Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(&adapter->hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } /* Check its sanity */ if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Now get a good starting state */ igb_reset(adapter); /* Initialize statistics */ igb_update_stats_counters(adapter); adapter->hw.mac.get_link_status = 1; igb_update_link_status(adapter); /* Indicate SOL/IDER usage */ if (e1000_check_reset_block(&adapter->hw)) device_printf(dev, "PHY reset is blocked due to SOL/IDER session.\n"); /* Determine if we have to control management hardware */ adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); /* * Setup Wake-on-Lan */ /* APME bit in EEPROM is mapped to WUC.APME */ eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME; if (eeprom_data) adapter->wol = E1000_WUFC_MAG; /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); igb_add_hw_stats(adapter); /* Tell the stack that the interface is not active */ adapter->flags &= ~IGB_RUNNING; adapter->led_dev = led_create(igb_led_func, adapter, device_get_nameunit(dev)); /* ** Configure Interrupts */ if ((adapter->msix > 1) && (igb_enable_msix)) error = igb_allocate_msix(adapter); else /* MSI or Legacy */ error = igb_allocate_legacy(adapter); if (error) goto err_late; /* Setup OS specific network interface */ igb_setup_interface(dev, adapter); #ifdef DEV_NETMAP igb_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("igb_attach: end"); return (0); err_late: igb_detach(dev); igb_free_transmit_structures(adapter); igb_free_receive_structures(adapter); igb_release_hw_control(adapter); err_pci: igb_free_pci_resources(adapter); free(adapter->mta, M_DEVBUF); IGB_CORE_LOCK_DESTROY(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int igb_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); INIT_DEBUGOUT("igb_detach: begin"); if (adapter->ifp) { #ifdef DEV_NETMAP netmap_detach(adapter->ifp); #endif /* DEV_NETMAP */ if_detach(adapter->ifp); } if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); IGB_CORE_LOCK(adapter); adapter->in_detach = 1; igb_stop(adapter); IGB_CORE_UNLOCK(adapter); e1000_phy_hw_reset(&adapter->hw); /* Give control back to firmware */ igb_release_manageability(adapter); igb_release_hw_control(adapter); if (adapter->wol) { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); igb_enable_wakeup(dev); } /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); callout_drain(&adapter->timer); igb_free_pci_resources(adapter); bus_generic_detach(dev); igb_free_transmit_structures(adapter); igb_free_receive_structures(adapter); if (adapter->mta != NULL) free(adapter->mta, M_DEVBUF); IGB_CORE_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int igb_shutdown(device_t dev) { return igb_suspend(dev); } /* * Suspend/resume device methods. */ static int igb_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); IGB_CORE_LOCK(adapter); igb_stop(adapter); igb_release_manageability(adapter); igb_release_hw_control(adapter); if (adapter->wol) { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); igb_enable_wakeup(dev); } IGB_CORE_UNLOCK(adapter); return bus_generic_suspend(dev); } static int igb_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct tx_ring *txr = adapter->tx_rings; if_t ifp = adapter->ifp; IGB_CORE_LOCK(adapter); igb_init(adapter); igb_init_manageability(adapter); if ((adapter->if_flags & IFF_UP) && (adapter->flags & IGB_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !buf_ring_empty(txr->br)) igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); } } IGB_CORE_UNLOCK(adapter); return bus_generic_resume(dev); } /* ** Multiqueue Transmit Entry: ** quick turnaround to the stack ** */ static int igb_mq_start(if_t ifp, struct mbuf *m) { struct adapter *adapter = if_getsoftc(ifp, IF_DRIVER_SOFTC); struct igb_queue *que; struct tx_ring *txr; int i, err = 0; #ifdef RSS uint32_t bucket_id; #endif /* Which queue to use */ /* * When doing RSS, map it to the same outbound queue * as the incoming flow would be mapped to. * * If everything is setup correctly, it should be the * same bucket that the current CPU we're on is. */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { #ifdef RSS if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), &bucket_id) == 0) { /* XXX TODO: spit out something if bucket_id > num_queues? */ i = bucket_id % adapter->num_queues; } else { #endif i = m->m_pkthdr.flowid % adapter->num_queues; #ifdef RSS } #endif } else { i = curcpu % adapter->num_queues; } txr = &adapter->tx_rings[i]; que = &adapter->queues[i]; err = buf_ring_enqueue(txr->br, m); if (err) return (err); if (IGB_TX_TRYLOCK(txr)) { igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); } else taskqueue_enqueue(que->tq, &txr->txq_task); return (0); } static int igb_mq_start_locked(if_t ifp, struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct mbuf *next; int err = 0, enq = 0; IGB_TX_LOCK_ASSERT(txr); if (((adapter->flags & IGB_RUNNING) == 0) || adapter->link_active == 0) return (ENETDOWN); /* Process the queue */ while ((next = buf_ring_peek(txr->br)) != NULL) { if ((err = igb_xmit(txr, &next)) != 0) { if (next == NULL) { /* It was freed, move forward */ buf_ring_advance_sc(txr->br); } else { /* * Still have one left, it may not be * the same since the transmit function * may have changed it. */ buf_ring_putback_sc(txr->br, next); } break; } buf_ring_advance_sc(txr->br); enq++; if_mtap(ifp, next, NULL, 0); if ((adapter->flags & IGB_RUNNING) == 0) break; } if (enq > 0) { /* Set the watchdog */ txr->queue_status |= IGB_QUEUE_WORKING; txr->watchdog_time = ticks; } if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) igb_txeof(txr); if (txr->tx_avail <= IGB_MAX_SCATTER) txr->queue_status |= IGB_QUEUE_DEPLETED; return (err); } /* * Called from a taskqueue to drain queued transmit packets. */ static void igb_deferred_mq_start(void *arg, int pending) { struct tx_ring *txr = arg; struct adapter *adapter = txr->adapter; if_t ifp = adapter->ifp; IGB_TX_LOCK(txr); if (!buf_ring_empty(txr->br)) igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); } /* ** Flush all ring buffers */ static void igb_qflush(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp, IF_DRIVER_SOFTC); struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); IGB_TX_UNLOCK(txr); } } /********************************************************************* * Ioctl entry point * * igb_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int igb_ioctl(if_t ifp, u_long command, void *data, struct thread *td) { struct adapter *adapter = if_getsoftc(ifp, IF_DRIVER_SOFTC); struct ifreq *ifr = (struct ifreq *)data; uint32_t oflags, mask; int error = 0; if (adapter->in_detach) return (error); switch (command) { case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > 9234 - ETHER_HDR_LEN - ETHER_CRC_LEN) return (EINVAL); IGB_CORE_LOCK(adapter); adapter->max_frame_size = ifr->ifr_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; igb_init(adapter); IGB_CORE_UNLOCK(adapter); break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); IGB_CORE_LOCK(adapter); oflags = adapter->if_flags; adapter->if_flags = ifr->ifr_flags; if (adapter->if_flags & IFF_UP) { if ((adapter->flags & IGB_RUNNING)) { if ((oflags ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { igb_disable_promisc(adapter); igb_set_promisc(adapter); } } else igb_init(adapter); } else if (adapter->flags & IGB_RUNNING) igb_stop(adapter); IGB_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (adapter->flags & IGB_RUNNING) { IGB_CORE_LOCK(adapter); igb_disable_intr(adapter); igb_set_multi(adapter); #ifdef DEVICE_POLLING if (!(adapter->if_capenable & IFCAP_POLLING)) #endif igb_enable_intr(adapter); IGB_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: /* Check SOL/IDER usage */ IGB_CORE_LOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { IGB_CORE_UNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } IGB_CORE_UNLOCK(adapter); case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); mask = ifr->ifr_reqcap ^ ifr->ifr_curcap; IGB_CORE_LOCK(adapter); #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) igb_disable_intr(adapter); else igb_enable_intr(adapter); } #endif ifr->ifr_hwassist = 0; if (ifr->ifr_reqcap & IFCAP_TXCSUM) { ifr->ifr_hwassist |= (CSUM_TCP | CSUM_UDP); if (adapter->hw.mac.type == e1000_82576) ifr->ifr_hwassist |= CSUM_SCTP; } if (ifr->ifr_reqcap & IFCAP_TSO) ifr->ifr_hwassist |= CSUM_TSO; adapter->if_capenable = ifr->ifr_reqcap; if ((mask & (IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO | IFCAP_LRO)) && (adapter->flags & IGB_RUNNING)) igb_init(adapter); IGB_CORE_UNLOCK(adapter); break; default: error = EOPNOTSUPP; break; } return (error); } /********************************************************************* * Init entry point * * It is used by the driver as a hw/sw initialization routine to get * to a consistent state. **********************************************************************/ static void igb_init(struct adapter *adapter) { device_t dev = adapter->dev; INIT_DEBUGOUT("igb_init: begin"); IGB_CORE_LOCK_ASSERT(adapter); igb_disable_intr(adapter); callout_stop(&adapter->timer); /* Get the latest mac address, User can use a LAA */ bcopy(if_lladdr(adapter->ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); igb_reset(adapter); igb_update_link_status(adapter); E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Configure for OS presence */ igb_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ igb_setup_transmit_structures(adapter); igb_initialize_transmit_units(adapter); /* Setup Multicast table */ igb_set_multi(adapter); /* ** Figure out the desired mbuf pool ** for doing jumbo/packetsplit */ if (adapter->max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; else if (adapter->max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; /* Prepare receive descriptors and buffers */ if (igb_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); return; } igb_initialize_receive_units(adapter); /* Enable VLAN support */ if (adapter->if_capenable & IFCAP_VLAN_HWTAGGING) igb_setup_vlan_hw_support(adapter); /* Don't lose promiscuous settings */ igb_set_promisc(adapter); adapter->flags |= IGB_RUNNING; callout_reset(&adapter->timer, hz, igb_local_timer, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); if (adapter->msix > 1) /* Set up queue routing */ igb_configure_queues(adapter); /* this clears any pending interrupts */ E1000_READ_REG(&adapter->hw, E1000_ICR); #ifdef DEVICE_POLLING /* * Only enable interrupts if we are not polling, make sure * they are off otherwise. */ if (adapter->if_capenable & IFCAP_POLLING) igb_disable_intr(adapter); else #endif /* DEVICE_POLLING */ { igb_enable_intr(adapter); E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); } /* Set Energy Efficient Ethernet */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) e1000_set_eee_i354(&adapter->hw); else e1000_set_eee_i350(&adapter->hw); } } static void igb_handle_que(void *context, int pending) { struct igb_queue *que = context; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; if_t ifp = adapter->ifp; if (adapter->flags & IGB_RUNNING) { bool more; more = igb_rxeof(que, adapter->rx_process_limit, NULL); IGB_TX_LOCK(txr); igb_txeof(txr); /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !buf_ring_empty(txr->br)) igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); /* Do we need another? */ if (more) { taskqueue_enqueue(que->tq, &que->que_task); return; } } #ifdef DEVICE_POLLING if (adapter->if_capenable & IFCAP_POLLING) return; #endif /* Reenable this interrupt */ if (que->eims) E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); else igb_enable_intr(adapter); } /* Deal with link in a sleepable context */ static void igb_handle_link(void *context, int pending) { struct adapter *adapter = context; IGB_CORE_LOCK(adapter); igb_handle_link_locked(adapter); IGB_CORE_UNLOCK(adapter); } static void igb_handle_link_locked(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; if_t ifp = adapter->ifp; IGB_CORE_LOCK_ASSERT(adapter); adapter->hw.mac.get_link_status = 1; igb_update_link_status(adapter); if ((adapter->flags & IGB_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !buf_ring_empty(txr->br)) igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); } } } /********************************************************************* * * MSI/Legacy Deferred * Interrupt Service routine * *********************************************************************/ static int igb_irq_fast(void *arg) { struct adapter *adapter = arg; struct igb_queue *que = adapter->queues; u32 reg_icr; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; /* * Mask interrupts until the taskqueue is finished running. This is * cheap, just assume that it is needed. This also works around the * MSI message reordering errata on certain systems. */ igb_disable_intr(adapter); taskqueue_enqueue(que->tq, &que->que_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) taskqueue_enqueue(que->tq, &adapter->link_task); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return FILTER_HANDLED; } #ifdef DEVICE_POLLING static int igb_poll(if_t ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = if_getsoftc(ifp, IF_DRIVER_SOFTC); struct igb_queue *que; struct tx_ring *txr; u32 reg_icr, rx_done = 0; u32 loop = IGB_MAX_LOOP; bool more; IGB_CORE_LOCK(adapter); if ((adapter->flags & IGB_RUNNING) == 0) { IGB_CORE_UNLOCK(adapter); return (rx_done); } if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) igb_handle_link_locked(adapter); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; } IGB_CORE_UNLOCK(adapter); for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; txr = que->txr; igb_rxeof(que, count, &rx_done); IGB_TX_LOCK(txr); do { more = igb_txeof(txr); } while (loop-- && more); if (!buf_ring_empty(txr->br)) igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); } return (rx_done); } #endif /* DEVICE_POLLING */ /********************************************************************* * * MSIX Que Interrupt Service routine * **********************************************************************/ static void igb_msix_que(void *arg) { struct igb_queue *que = arg; struct adapter *adapter = que->adapter; if_t ifp = adapter->ifp; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; u32 newitr = 0; bool more_rx; /* Ignore spurious interrupts */ if ((adapter->flags & IGB_RUNNING) == 0) return; E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims); ++que->irqs; IGB_TX_LOCK(txr); igb_txeof(txr); /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !buf_ring_empty(txr->br)) igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL); if (adapter->enable_aim == FALSE) goto no_calc; /* ** Do Adaptive Interrupt Moderation: ** - Write out last calculated setting ** - Calculate based on average size over ** the last interval. */ if (que->eitr_setting) E1000_WRITE_REG(&adapter->hw, E1000_EITR(que->msix), que->eitr_setting); que->eitr_setting = 0; /* Idle, do nothing */ if ((txr->bytes == 0) && (rxr->bytes == 0)) goto no_calc; /* Used half Default if sub-gig */ if (adapter->link_speed != 1000) newitr = IGB_DEFAULT_ITR / 2; else { if ((txr->bytes) && (txr->packets)) newitr = txr->bytes/txr->packets; if ((rxr->bytes) && (rxr->packets)) newitr = max(newitr, (rxr->bytes / rxr->packets)); newitr += 24; /* account for hardware frame, crc */ /* set an upper boundary */ newitr = min(newitr, 3000); /* Be nice to the mid range */ if ((newitr > 300) && (newitr < 1200)) newitr = (newitr / 3); else newitr = (newitr / 2); } newitr &= 0x7FFC; /* Mask invalid bits */ if (adapter->hw.mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; /* save for next interrupt */ que->eitr_setting = newitr; /* Reset state */ txr->bytes = 0; txr->packets = 0; rxr->bytes = 0; rxr->packets = 0; no_calc: /* Schedule a clean task if needed*/ if (more_rx) taskqueue_enqueue(que->tq, &que->que_task); else /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); return; } /********************************************************************* * * MSIX Link Interrupt Service routine * **********************************************************************/ static void igb_msix_link(void *arg) { struct adapter *adapter = arg; u32 icr; ++adapter->link_irq; icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (!(icr & E1000_ICR_LSC)) goto spurious; igb_handle_link(adapter, 0); spurious: /* Rearm */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void igb_media_status(if_t ifp, struct ifmediareq *ifmr) { struct adapter *adapter = if_getsoftc(ifp, IF_DRIVER_SOFTC); INIT_DEBUGOUT("igb_media_status: begin"); IGB_CORE_LOCK(adapter); igb_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { IGB_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: /* ** Support for 100Mb SFP - these are Fiber ** but the media type appears as serdes */ if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) ifmr->ifm_active |= IFM_100_FX; else ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; case 2500: ifmr->ifm_active |= IFM_2500_SX; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; IGB_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int igb_media_change(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp, IF_DRIVER_SOFTC); struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("igb_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); IGB_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } igb_init(adapter); IGB_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * * This routine maps the mbufs to Advanced TX descriptors. * **********************************************************************/ static int igb_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; u32 olinfo_status = 0, cmd_type_len; int i, j, error, nsegs; int first; bool remap = TRUE; struct mbuf *m_head; bus_dma_segment_t segs[IGB_MAX_SCATTER]; bus_dmamap_t map; struct igb_tx_buf *txbuf; union e1000_adv_tx_desc *txd = NULL; m_head = *m_headp; /* Basic descriptor defines */ cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); if (m_head->m_flags & M_VLANTAG) cmd_type_len |= E1000_ADVTXD_DCMD_VLE; /* * Important to capture the first descriptor * used because it will contain the index of * the one we tell the hardware to report back */ first = txr->next_avail_desc; txbuf = &txr->tx_buffers[first]; map = txbuf->map; /* * Map the packet for DMA. */ retry: error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(error)) { struct mbuf *m; switch (error) { case EFBIG: /* Try it again? - one try */ if (remap == TRUE) { remap = FALSE; m = m_defrag(*m_headp, M_NOWAIT); if (m == NULL) { adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; goto retry; } else return (error); case ENOMEM: txr->no_tx_dma_setup++; return (error); default: txr->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } } /* Make certain there are enough descriptors */ if (nsegs > txr->tx_avail - 2) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* ** Set up the appropriate offload context ** this will consume the first descriptor */ error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); if (__predict_false(error)) { m_freem(*m_headp); *m_headp = NULL; return (error); } /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) olinfo_status |= txr->me << 4; i = txr->next_avail_desc; for (j = 0; j < nsegs; j++) { bus_size_t seglen; bus_addr_t segaddr; txbuf = &txr->tx_buffers[i]; txd = &txr->tx_base[i]; seglen = segs[j].ds_len; segaddr = htole64(segs[j].ds_addr); txd->read.buffer_addr = segaddr; txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | cmd_type_len | seglen); txd->read.olinfo_status = htole32(olinfo_status); if (++i == txr->num_desc) i = 0; } txd->read.cmd_type_len |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); txr->tx_avail -= nsegs; txr->next_avail_desc = i; txbuf->m_head = m_head; /* ** Here we swap the map so the last descriptor, ** which gets the completion interrupt has the ** real map, and the first descriptor gets the ** unused map from this descriptor. */ txr->tx_buffers[first].map = txbuf->map; txbuf->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* Set the EOP descriptor that will be marked done */ txbuf = &txr->tx_buffers[first]; txbuf->eop = txd; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Advance the Transmit Descriptor Tail (Tdt), this tells the * hardware that this frame is available to transmit. */ ++txr->total_packets; E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); return (0); } static void igb_set_promisc(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; if (adapter->vf_ifp) { e1000_promisc_set_vf(hw, e1000_promisc_enabled); return; } reg = E1000_READ_REG(hw, E1000_RCTL); if (adapter->if_flags & IFF_PROMISC) { reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); E1000_WRITE_REG(hw, E1000_RCTL, reg); } else if (adapter->if_flags & IFF_ALLMULTI) { reg |= E1000_RCTL_MPE; reg &= ~E1000_RCTL_UPE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } } static void igb_count_maddr(void *arg, struct sockaddr *maddr) { struct sockaddr_dl *sdl = (struct sockaddr_dl *)maddr; int *mcnt = arg; if (sdl->sdl_family == AF_LINK) (*mcnt)++; } static void igb_disable_promisc(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; if_t ifp = adapter->ifp; u32 reg; int mcnt = 0; if (adapter->vf_ifp) { e1000_promisc_set_vf(hw, e1000_promisc_disabled); return; } reg = E1000_READ_REG(hw, E1000_RCTL); reg &= (~E1000_RCTL_UPE); if (adapter->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else if_foreach_maddr(ifp, igb_count_maddr, &mcnt); /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg &= (~E1000_RCTL_MPE); E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void igb_copy_maddr(void *arg, struct sockaddr *maddr) { struct sockaddr_dl *sdl = (struct sockaddr_dl *)maddr; struct adapter *adapter = arg; if (sdl->sdl_family != AF_LINK) return; if (adapter->mcnt == MAX_NUM_MULTICAST_ADDRESSES) return; bcopy(LLADDR(sdl), &adapter->mta[adapter->mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); adapter->mcnt++; } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void igb_set_multi(struct adapter *adapter) { u32 reg_rctl = 0; IOCTL_DEBUGOUT("igb_set_multi: begin"); bzero(adapter->mta, sizeof(uint8_t) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); adapter->mcnt = 0; if_foreach_maddr(adapter->ifp, igb_copy_maddr, adapter); if (adapter->mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, adapter->mta, adapter->mcnt); } /********************************************************************* * Timer routine: * This routine checks for link status, * updates statistics, and does the watchdog. * **********************************************************************/ static void igb_local_timer(void *arg) { struct adapter *adapter = arg; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct igb_queue *que = adapter->queues; int hung = 0, busy = 0; IGB_CORE_LOCK_ASSERT(adapter); igb_update_link_status(adapter); igb_update_stats_counters(adapter); /* ** Check the TX queues status ** - central locked handling of OACTIVE ** - watchdog only if all queues show hung */ for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { if ((txr->queue_status & IGB_QUEUE_HUNG) && (adapter->pause_frames == 0)) ++hung; if (txr->queue_status & IGB_QUEUE_DEPLETED) ++busy; if ((txr->queue_status & IGB_QUEUE_IDLE) == 0) taskqueue_enqueue(que->tq, &que->que_task); } if (hung == adapter->num_queues) goto timeout; adapter->pause_frames = 0; callout_reset(&adapter->timer, hz, igb_local_timer, adapter); #ifndef DEVICE_POLLING /* Schedule all queue interrupts - deadlock protection */ E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask); #endif return; timeout: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)), E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me))); device_printf(dev,"TX(%d) desc avail = %d," "Next TX to Clean = %d\n", txr->me, txr->tx_avail, txr->next_to_clean); adapter->flags &= ~IGB_RUNNING; adapter->watchdog_events++; igb_init(adapter); } static void igb_update_link_status(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_fc_info *fc = &hw->fc; if_t ifp = adapter->ifp; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; u32 link_check, thstat, ctrl; char *flowctl = NULL; link_check = thstat = ctrl = 0; /* Get the cached link value or read for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; } else link_check = TRUE; break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; /* VF device is type_unknown */ case e1000_media_type_unknown: e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; /* Fall thru */ default: break; } /* Check for thermal downshift or shutdown */ if (hw->mac.type == e1000_i350) { thstat = E1000_READ_REG(hw, E1000_THSTAT); ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); } /* Get the flow control for display */ switch (fc->current_mode) { case e1000_fc_rx_pause: flowctl = "RX"; break; case e1000_fc_tx_pause: flowctl = "TX"; break; case e1000_fc_full: flowctl = "Full"; break; case e1000_fc_none: default: flowctl = "None"; break; } /* Now we check if a transition has happened */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(&adapter->hw, &adapter->link_speed, &adapter->link_duplex); if (bootverbose) device_printf(dev, "Link is up %d Mbps %s," " Flow Control: %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex"), flowctl); adapter->link_active = 1; if_setbaudrate(ifp, adapter->link_speed * 1000000); if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && (thstat & E1000_THSTAT_LINK_THROTTLE)) device_printf(dev, "Link: thermal downshift\n"); /* Delay Link Up for Phy update */ if (((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) && (hw->phy.id == I210_I_PHY_ID)) msec_delay(I210_LINK_DELAY); /* Reset if the media type changed. */ if (hw->dev_spec._82575.media_changed) { hw->dev_spec._82575.media_changed = false; adapter->flags |= IGB_MEDIA_RESET; igb_reset(adapter); } /* This can sleep */ if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { if_setbaudrate(ifp, 0); adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && (thstat & E1000_THSTAT_PWR_DOWN)) device_printf(dev, "Link: thermal shutdown\n"); adapter->link_active = 0; /* This can sleep */ if_link_state_change(ifp, LINK_STATE_DOWN); /* Reset queue state */ for (int i = 0; i < adapter->num_queues; i++, txr++) txr->queue_status = IGB_QUEUE_IDLE; } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void igb_stop(void *arg) { struct adapter *adapter = arg; struct tx_ring *txr = adapter->tx_rings; IGB_CORE_LOCK_ASSERT(adapter); INIT_DEBUGOUT("igb_stop: begin"); igb_disable_intr(adapter); callout_stop(&adapter->timer); adapter->flags &= ~IGB_RUNNING; /* Disarm watchdog timer. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); txr->queue_status = IGB_QUEUE_IDLE; IGB_TX_UNLOCK(txr); } e1000_reset_hw(&adapter->hw); E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void igb_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ pci_enable_busmaster(dev); adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Set MAC type early for PCI setup */ e1000_set_mac_type(&adapter->hw); /* Are we a VF device? */ if ((adapter->hw.mac.type == e1000_vfadapt) || (adapter->hw.mac.type == e1000_vfadapt_i350)) adapter->vf_ifp = 1; else adapter->vf_ifp = 0; } static int igb_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int rid; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->pci_mem == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; adapter->num_queues = 1; /* Defaults for Legacy or MSI */ /* This will setup either MSI/X or MSI */ adapter->msix = igb_setup_msix(adapter); adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ static int igb_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; int error, rid = 0; /* Turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* MSI RID is 1 */ if (adapter->msix == 1) rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); /* * Try allocating a fast interrupt and the associated deferred * processing contexts. */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); /* Make tasklet for deferred link handling */ TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter); que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq", device_get_nameunit(adapter->dev)); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(que->tq); que->tq = NULL; return (error); } return (0); } /********************************************************************* * * Setup the MSIX Queue Interrupt handlers: * **********************************************************************/ static int igb_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; int error, rid, vector = 0; int cpu_id = 0; #ifdef RSS cpuset_t cpu_mask; #endif /* Be sure to start with all interrupts disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); E1000_WRITE_FLUSH(&adapter->hw); #ifdef RSS /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. * * + If there's more queues than RSS buckets, we'll end * up with queues that get no traffic. * * + If there's more RSS buckets than queues, we'll end * up having multiple RSS buckets map to the same queue, * so there'll be some contention. */ if (adapter->num_queues != rss_getnumbuckets()) { device_printf(dev, "%s: number of queues (%d) != number of RSS buckets (%d)" "; performance will be impacted.\n", __func__, adapter->num_queues, rss_getnumbuckets()); } #endif for (int i = 0; i < adapter->num_queues; i++, vector++, que++) { rid = vector +1; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "MSIX Queue Interrupt\n"); return (ENXIO); } error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register Queue handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, que->res, que->tag, "que %d", i); #endif que->msix = vector; if (adapter->hw.mac.type == e1000_82575) que->eims = E1000_EICR_TX_QUEUE0 << i; else que->eims = 1 << vector; #ifdef RSS /* * The queue ID is used as the RSS layer bucket ID. * We look up the queue ID -> RSS CPU ID and select * that. */ cpu_id = rss_getcpu(i % rss_getnumbuckets()); #else /* * Bind the msix vector, and thus the * rings to the corresponding cpu. * * This just happens to match the default RSS round-robin * bucket -> queue -> CPU allocation. */ if (adapter->num_queues > 1) { if (igb_last_bind_cpu < 0) igb_last_bind_cpu = CPU_FIRST(); cpu_id = igb_last_bind_cpu; } #endif if (adapter->num_queues > 1) { bus_bind_intr(dev, que->res, cpu_id); #ifdef RSS device_printf(dev, "Bound queue %d to RSS bucket %d\n", i, cpu_id); #else device_printf(dev, "Bound queue %d to cpu %d\n", i, cpu_id); #endif } TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr); /* Make tasklet for deferred handling */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); que->tq = taskqueue_create("igb_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); if (adapter->num_queues > 1) { /* * Only pin the taskqueue thread to a CPU if * RSS is in use. * * This again just happens to match the default RSS * round-robin bucket -> queue -> CPU allocation. */ #ifdef RSS CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, "%s que (bucket %d)", device_get_nameunit(adapter->dev), cpu_id); #else taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que (qid %d)", device_get_nameunit(adapter->dev), cpu_id); #endif } else { taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); } /* Finally update the last bound CPU id */ if (adapter->num_queues > 1) igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu); } /* And Link */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "MSIX Link Interrupt\n"); return (ENXIO); } if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_link, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register Link handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; return (0); } static void igb_configure_queues(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct igb_queue *que; u32 tmp, ivar = 0, newitr = 0; /* First turn on RSS capability */ if (adapter->hw.mac.type != e1000_82575) E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); /* Turn on MSIX */ switch (adapter->hw.mac.type) { case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* RX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i & 1) { ivar &= 0xFF00FFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 16; } else { ivar &= 0xFFFFFF00; ivar |= que->msix | E1000_IVAR_VALID; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); } /* TX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i & 1) { ivar &= 0x00FFFFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 24; } else { ivar &= 0xFFFF00FF; ivar |= (que->msix | E1000_IVAR_VALID) << 8; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82576: /* RX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i < 8) { ivar &= 0xFFFFFF00; ivar |= que->msix | E1000_IVAR_VALID; } else { ivar &= 0xFF00FFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 16; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= que->eims; } /* TX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i < 8) { ivar &= 0xFFFF00FF; ivar |= (que->msix | E1000_IVAR_VALID) << 8; } else { ivar &= 0x00FFFFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 24; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82575: /* enable MSI-X support*/ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; /* Auto-Mask interrupts upon ICR read. */ tmp |= E1000_CTRL_EXT_EIAME; tmp |= E1000_CTRL_EXT_IRCA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); /* Queues */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; tmp = E1000_EICR_RX_QUEUE0 << i; tmp |= E1000_EICR_TX_QUEUE0 << i; que->eims = tmp; E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), i, que->eims); adapter->que_mask |= que->eims; } /* Link */ E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), E1000_EIMS_OTHER); adapter->link_mask |= E1000_EIMS_OTHER; default: break; } /* Set the starting interrupt rate */ if (igb_max_interrupt_rate > 0) newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC; if (hw->mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr); } return; } static void igb_free_pci_resources(struct adapter *adapter) { struct igb_queue *que = adapter->queues; device_t dev = adapter->dev; int rid; /* ** There is a slight possibility of a failure mode ** in attach that will result in entering this function ** before interrupt resources have been initialized, and ** in that case we do not want to execute the loops below ** We can detect this reliably by the state of the adapter ** res pointer. */ if (adapter->res == NULL) goto mem; /* * First release all the interrupt resources: */ for (int i = 0; i < adapter->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } /* Clean the Legacy or Link interrupt last */ if (adapter->linkvec) /* we are doing MSIX */ rid = adapter->linkvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); que = adapter->queues; if (adapter->tag != NULL) { taskqueue_drain(que->tq, &adapter->link_task); bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); for (int i = 0; i < adapter->num_queues; i++, que++) { if (que->tq != NULL) { taskqueue_drain(que->tq, &que->txr->txq_task); taskqueue_drain(que->tq, &que->que_task); taskqueue_free(que->tq); } } mem: if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, adapter->memrid, adapter->msix_mem); if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->pci_mem); } /* * Setup Either MSI/X or MSI */ static int igb_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int bar, want, queues, msgs, maxqueues; /* tuneable override */ if (igb_enable_msix == 0) goto msi; /* First try MSI/X */ msgs = pci_msix_count(dev); if (msgs == 0) goto msi; /* ** Some new devices, as with ixgbe, now may ** use a different BAR, so we need to keep ** track of which is used. */ adapter->memrid = PCIR_BAR(IGB_MSIX_BAR); bar = pci_read_config(dev, adapter->memrid, 4); if (bar == 0) /* use next bar */ adapter->memrid += 4; adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE); if (adapter->msix_mem == NULL) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; /* Override via tuneable */ if (igb_num_queues != 0) queues = igb_num_queues; #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif /* Sanity check based on HW */ switch (adapter->hw.mac.type) { case e1000_82575: maxqueues = 4; break; case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: maxqueues = 8; break; case e1000_i210: maxqueues = 4; break; case e1000_i211: maxqueues = 2; break; default: /* VF interfaces */ maxqueues = 1; break; } /* Final clamp on the actual hardware capability */ if (queues > maxqueues) queues = maxqueues; /* ** One vector (RX/TX pair) per queue ** plus an additional for Link interrupt */ want = queues + 1; if (msgs >= want) msgs = want; else { device_printf(adapter->dev, "MSIX Configuration Problem, " "%d vectors configured, but %d queues wanted!\n", msgs, want); goto msi; } if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { device_printf(adapter->dev, "Using MSIX interrupts with %d vectors\n", msgs); adapter->num_queues = queues; return (msgs); } /* ** If MSIX alloc failed or provided us with ** less than needed, free and fall through to MSI */ pci_release_msi(dev); msi: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem); adapter->msix_mem = NULL; } msgs = 1; if (pci_alloc_msi(dev, &msgs) == 0) { device_printf(adapter->dev," Using an MSI interrupt\n"); return (msgs); } device_printf(adapter->dev," Using a Legacy interrupt\n"); return (0); } /********************************************************************* * * Initialize the DMA Coalescing feature * **********************************************************************/ static void igb_init_dmac(struct adapter *adapter, u32 pba) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; u32 dmac, reg = ~E1000_DMACR_DMAC_EN; u16 hwm; if (hw->mac.type == e1000_i211) return; if (hw->mac.type > e1000_82580) { if (adapter->dmac == 0) { /* Disabling it */ E1000_WRITE_REG(hw, E1000_DMACR, reg); return; } else device_printf(dev, "DMA Coalescing enabled\n"); /* Set starting threshold */ E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); hwm = 64 * pba - adapter->max_frame_size / 16; if (hwm < 64 * (pba - 6)) hwm = 64 * (pba - 6); reg = E1000_READ_REG(hw, E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) & E1000_FCRTC_RTH_COAL_MASK); E1000_WRITE_REG(hw, E1000_FCRTC, reg); dmac = pba - adapter->max_frame_size / 512; if (dmac < pba - 10) dmac = pba - 10; reg = E1000_READ_REG(hw, E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) & E1000_DMACR_DMACTHR_MASK); /* transition to L0x or L1 if available..*/ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); /* Check if status is 2.5Gb backplane connection * before configuration of watchdog timer, which is * in msec values in 12.8usec intervals * watchdog timer= msec values in 32usec intervals * for non 2.5Gb connection */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= ((adapter->dmac * 5) >> 6); else reg |= (adapter->dmac >> 5); } else { reg |= (adapter->dmac >> 5); } E1000_WRITE_REG(hw, E1000_DMACR, reg); #ifdef I210_OBFF_SUPPORT /* * Set the OBFF Rx threshold to DMA Coalescing Rx * threshold - 2KB and enable the feature in the * hardware for I210. */ if (hw->mac.type == e1000_i210) { int obff = dmac - 2; reg = E1000_READ_REG(hw, E1000_DOBFFCTL); reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK; reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK) | E1000_DOBFFCTL_EXIT_ACT_MASK; E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg); } #endif E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); /* Set the interval before transition */ reg = E1000_READ_REG(hw, E1000_DMCTLX); if (hw->mac.type == e1000_i350) reg |= IGB_DMCTLX_DCFLUSH_DIS; /* ** in 2.5Gb connection, TTLX unit is 0.4 usec ** which is 0x4*2 = 0xA. But delay is still 4 usec */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= 0xA; else reg |= 0x4; } else { reg |= 0x4; } E1000_WRITE_REG(hw, E1000_DMCTLX, reg); /* free space in tx packet buffer to wake from DMA coal */ E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - (2 * adapter->max_frame_size)) >> 6); /* make low power state decision controlled by DMA coal */ reg = E1000_READ_REG(hw, E1000_PCIEMISC); reg &= ~E1000_PCIEMISC_LX_DECISION; E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); } else if (hw->mac.type == e1000_82580) { u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); E1000_WRITE_REG(hw, E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); E1000_WRITE_REG(hw, E1000_DMACR, 0); } } /********************************************************************* * * Set up an fresh starting state * **********************************************************************/ static void igb_reset(struct adapter *adapter) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; struct e1000_fc_info *fc = &hw->fc; u32 pba = 0; u16 hwm; INIT_DEBUGOUT("igb_reset: begin"); /* Let the firmware know the OS is in control */ igb_get_hw_control(adapter); /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { case e1000_82575: pba = E1000_PBA_32K; break; case e1000_82576: case e1000_vfadapt: pba = E1000_READ_REG(hw, E1000_RXPBS); pba &= E1000_RXPBS_SIZE_MASK_82576; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_vfadapt_i350: pba = E1000_READ_REG(hw, E1000_RXPBS); pba = e1000_rxpbs_adjust_82580(pba); break; case e1000_i210: case e1000_i211: pba = E1000_PBA_34K; default: break; } /* Special needs in case of Jumbo frames */ if ((hw->mac.type == e1000_82575) && (adapter->max_frame_size > ETHER_MAX_LEN)) { u32 tx_space, min_tx, min_rx; pba = E1000_READ_REG(hw, E1000_PBA); tx_space = pba >> 16; pba &= 0xffff; min_tx = (adapter->max_frame_size + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; min_tx = roundup2(min_tx, 1024); min_tx >>= 10; min_rx = adapter->max_frame_size; min_rx = roundup2(min_rx, 1024); min_rx >>= 10; if (tx_space < min_tx && ((min_tx - tx_space) < pba)) { pba = pba - (min_tx - tx_space); /* * if short on rx space, rx wins * and must trump tx adjustment */ if (pba < min_rx) pba = min_rx; } E1000_WRITE_REG(hw, E1000_PBA, pba); } INIT_DEBUGOUT1("igb_init: pba=%dK",pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. */ hwm = min(((pba << 10) * 9 / 10), ((pba << 10) - 2 * adapter->max_frame_size)); if (hw->mac.type < e1000_82576) { fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ fc->low_water = fc->high_water - 8; } else { fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ fc->low_water = fc->high_water - 16; } fc->pause_time = IGB_FC_PAUSE_TIME; fc->send_xon = TRUE; if (adapter->fc) fc->requested_mode = adapter->fc; else fc->requested_mode = e1000_fc_default; /* Issue a global reset */ e1000_reset_hw(hw); E1000_WRITE_REG(hw, E1000_WUC, 0); /* Reset for AutoMediaDetect */ if (adapter->flags & IGB_MEDIA_RESET) { e1000_setup_init_funcs(hw, TRUE); e1000_get_bus_info(hw); adapter->flags &= ~IGB_MEDIA_RESET; } if (e1000_init_hw(hw) < 0) device_printf(dev, "Hardware Initialization Failed\n"); /* Setup DMA Coalescing */ igb_init_dmac(adapter, pba); E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static void igb_setup_interface(device_t dev, struct adapter *adapter) { struct if_attach_args ifat = { .ifat_version = IF_ATTACH_VERSION, .ifat_drv = &igb_ifdrv, .ifat_softc = adapter, .ifat_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST, .ifat_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | #ifdef DEVICE_POLLING IFCAP_POLLING | #endif IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | IFCAP_VLAN_HWFILTER, }; INIT_DEBUGOUT("igb_setup_interface: begin"); ifat.ifat_dunit = device_get_unit(dev); ifat.ifat_lla = adapter->hw.mac.addr; /* ** Don't turn IFCAP_VLAN_HWFILTER on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the igb driver you can ** enable this and get full hardware tag filtering. ** ** Don't enable LRO by default. */ adapter->if_capenable = ifat.ifat_capenable = ifat.ifat_capabilities & ~(IFCAP_LRO | IFCAP_VLAN_HWFILTER | IFCAP_POLLING); adapter->ifp = if_attach(&ifat); /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, igb_media_change, igb_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); } else { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); } /* * Manage DMA'able memory. */ static void igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; } static int igb_dma_malloc(struct adapter *adapter, bus_size_t size, struct igb_dma_alloc *dma, int mapflags) { int error; error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ IGB_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->dma_tag); if (error) { device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_0; } error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); if (error) { device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_2; } dma->dma_paddr = 0; error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (error || dma->dma_paddr == 0) { device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_3; } return (0); fail_3: bus_dmamap_unload(dma->dma_tag, dma->dma_map); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_tag = NULL; return (error); } static void igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_paddr != 0) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); dma->dma_paddr = 0; } if (dma->dma_vaddr != NULL) { bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); dma->dma_vaddr = NULL; } bus_dma_tag_destroy(dma->dma_tag); dma->dma_tag = NULL; } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int igb_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = NULL; struct tx_ring *txr = NULL; struct rx_ring *rxr = NULL; int rsize, tsize, error = E1000_SUCCESS; int txconf = 0, rxconf = 0; /* First allocate the top level queue structs */ if (!(adapter->queues = (struct igb_queue *) malloc(sizeof(struct igb_queue) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } /* Next allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto tx_fail; } /* Now allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } tsize = roundup2(adapter->num_tx_desc * sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; txr->num_desc = adapter->num_tx_desc; /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (igb_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); /* Now allocate transmit buffers for the ring */ if (igb_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } /* Allocate a buf ring */ txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF, M_WAITOK, &txr->tx_mtx); } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; rxr->adapter = adapter; rxr->me = i; /* Initialize the RX lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (igb_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (igb_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } /* ** Finally set up the queue holding structs */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; que->adapter = adapter; que->txr = &adapter->tx_rings[i]; que->rxr = &adapter->rx_rings[i]; } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) igb_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) igb_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: buf_ring_free(txr->br, M_DEVBUF); free(adapter->tx_rings, M_DEVBUF); tx_fail: free(adapter->queues, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int igb_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct igb_tx_buf *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ IGB_TSO_SIZE, /* maxsize */ IGB_MAX_SCATTER, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ igb_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void igb_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct igb_tx_buf *txbuf; int i; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ /* Clear the old descriptor contents */ IGB_TX_LOCK(txr); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_TX, txr->me, 0); #endif /* DEV_NETMAP */ bzero((void *)txr->tx_base, (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } #ifdef DEV_NETMAP if (slot) { int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); /* no need to set the address */ netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si)); } #endif /* DEV_NETMAP */ /* clear the watch index */ txbuf->eop = NULL; } /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); IGB_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static void igb_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) igb_setup_transmit_ring(txr); return; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void igb_initialize_transmit_units(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl; INIT_DEBUGOUT("igb_initialize_transmit_units: begin"); tctl = txdctl = 0; /* Setup the Tx Descriptor Rings */ for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 bus_addr = txr->txdma.dma_paddr; E1000_WRITE_REG(hw, E1000_TDLEN(i), adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr); /* Setup the HW Tx Head and Tail descriptor pointers */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(hw, E1000_TDBAL(i)), E1000_READ_REG(hw, E1000_TDLEN(i))); txr->queue_status = IGB_QUEUE_IDLE; txdctl |= IGB_TX_PTHRESH; txdctl |= IGB_TX_HTHRESH << 8; txdctl |= IGB_TX_WTHRESH << 16; txdctl |= E1000_TXDCTL_QUEUE_ENABLE; E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } if (adapter->vf_ifp) return; e1000_config_collision_dist(hw); /* Program the Transmit Control Register */ tctl = E1000_READ_REG(hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(hw, E1000_TCTL, tctl); } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void igb_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); igb_free_transmit_buffers(txr); igb_dma_free(adapter, &txr->txdma); IGB_TX_UNLOCK(txr); IGB_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void igb_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct igb_tx_buf *tx_buffer; int i; INIT_DEBUGOUT("free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; tx_buffer = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { if (tx_buffer->m_head != NULL) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; if (tx_buffer->map != NULL) { bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } else if (tx_buffer->map != NULL) { bus_dmamap_unload(txr->txtag, tx_buffer->map); bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, u32 *olinfo_status) { struct adapter *adapter = txr->adapter; struct e1000_adv_tx_context_desc *TXD; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; u32 mss_l4len_idx = 0, paylen; u16 vtag = 0, eh_type; int ctxd, ehdrlen, ip_hlen, tcp_hlen; struct ether_vlan_header *eh; #ifdef INET6 struct ip6_hdr *ip6; #endif #ifdef INET struct ip *ip; #endif struct tcphdr *th; /* * Determine where frame payload starts. * Jump over vlan headers if already present */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; eh_type = eh->evl_proto; } else { ehdrlen = ETHER_HDR_LEN; eh_type = eh->evl_encap_proto; } switch (ntohs(eh_type)) { #ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); /* XXX-BZ For now we do not pretend to support ext. hdrs. */ if (ip6->ip6_nxt != IPPROTO_TCP) return (ENXIO); ip_hlen = sizeof(struct ip6_hdr); ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; #endif #ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); if (ip->ip_p != IPPROTO_TCP) return (ENXIO); ip->ip_sum = 0; ip_hlen = ip->ip_hl << 2; th = (struct tcphdr *)((caddr_t)ip + ip_hlen); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; break; #endif default: panic("%s: CSUM_TSO but no supported IP version (0x%04x)", __func__, ntohs(eh_type)); break; } ctxd = txr->next_avail_desc; TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; tcp_hlen = th->th_off << 2; /* This is used in the transmit desc in encap */ paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; /* VLAN MACLEN IPLEN */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); } vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= ip_hlen; TXD->vlan_macip_lens = htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); /* MSS L4LEN IDX */ mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx |= txr->me << 4; TXD->mss_l4len_idx = htole32(mss_l4len_idx); TXD->seqnum_seed = htole32(0); if (++ctxd == txr->num_desc) ctxd = 0; txr->tx_avail--; txr->next_avail_desc = ctxd; *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; ++txr->tso_tx; return (0); } /********************************************************************* * * Advanced Context Descriptor setup for VLAN, CSUM or TSO * **********************************************************************/ static int igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, u32 *olinfo_status) { struct e1000_adv_tx_context_desc *TXD; struct adapter *adapter = txr->adapter; struct ether_vlan_header *eh; struct ip *ip; struct ip6_hdr *ip6; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; int ehdrlen, ip_hlen = 0; u16 etype; u8 ipproto = 0; int offload = TRUE; int ctxd = txr->next_avail_desc; u16 vtag = 0; /* First check if TSO is to be used */ if (mp->m_pkthdr.csum_flags & CSUM_TSO) return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status)); if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) offload = FALSE; /* Indicate the whole packet as payload when not doing TSO */ *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT; /* Now ready a context descriptor */ TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; /* ** In advanced descriptors the vlan tag must ** be placed into the context descriptor. Hence ** we need to make one even if not doing offloads. */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); } else if (offload == FALSE) /* ... no offload to do */ return (0); /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } /* Set the ether header length */ vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; ipproto = ip->ip_p; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); /* XXX-BZ this will go badly in case of ext hdrs. */ ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; default: offload = FALSE; break; } vlan_macip_lens |= ip_hlen; type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; switch (ipproto) { case IPPROTO_TCP: if (mp->m_pkthdr.csum_flags & CSUM_TCP) type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; break; case IPPROTO_UDP: if (mp->m_pkthdr.csum_flags & CSUM_UDP) type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; break; #if __FreeBSD_version >= 800000 case IPPROTO_SCTP: if (mp->m_pkthdr.csum_flags & CSUM_SCTP) type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; break; #endif default: offload = FALSE; break; } if (offload) /* For the TX descriptor setup */ *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx = txr->me << 4; /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(mss_l4len_idx); /* We've consumed the first desc, adjust counters */ if (++ctxd == txr->num_desc) ctxd = 0; txr->next_avail_desc = ctxd; --txr->tx_avail; return (0); } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * * TRUE return means there's work in the ring to clean, FALSE its empty. **********************************************************************/ static bool igb_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; if_t ifp = adapter->ifp; u32 work, processed = 0; u16 limit = txr->process_limit; struct igb_tx_buf *buf; union e1000_adv_tx_desc *txd; mtx_assert(&txr->tx_mtx, MA_OWNED); #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, txr->me)) return (FALSE); #endif /* DEV_NETMAP */ if (txr->tx_avail == txr->num_desc) { txr->queue_status = IGB_QUEUE_IDLE; return FALSE; } /* Get work starting point */ work = txr->next_to_clean; buf = &txr->tx_buffers[work]; txd = &txr->tx_base[work]; work -= txr->num_desc; /* The distance to ring end */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); do { union e1000_adv_tx_desc *eop = buf->eop; if (eop == NULL) /* No work */ break; if ((eop->wb.status & E1000_TXD_STAT_DD) == 0) break; /* I/O not complete */ if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, buf->map); if_inc_txcounters(ifp, buf->m_head); m_freem(buf->m_head); buf->m_head = NULL; } buf->eop = NULL; ++txr->tx_avail; /* We clean the range if multi segment */ while (txd != eop) { ++txd; ++buf; ++work; /* wrap the ring? */ if (__predict_false(!work)) { work -= txr->num_desc; buf = txr->tx_buffers; txd = txr->tx_base; } if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; } ++txr->tx_avail; buf->eop = NULL; } ++txr->packets; ++processed; txr->watchdog_time = ticks; /* Try the next packet */ ++txd; ++buf; ++work; /* reset with a wrap */ if (__predict_false(!work)) { work -= txr->num_desc; buf = txr->tx_buffers; txd = txr->tx_base; } prefetch(txd); } while (__predict_true(--limit)); bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); work += txr->num_desc; txr->next_to_clean = work; /* ** Watchdog calculation, we know there's ** work outstanding or the first return ** would have been taken, so none processed ** for too long indicates a hang. */ if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG)) txr->queue_status |= IGB_QUEUE_HUNG; if (txr->tx_avail >= IGB_QUEUE_THRESHOLD) txr->queue_status &= ~IGB_QUEUE_DEPLETED; if (txr->tx_avail == txr->num_desc) { txr->queue_status = IGB_QUEUE_IDLE; return (FALSE); } return (TRUE); } /********************************************************************* * * Refresh mbuf buffers for RX descriptor rings * - now keeps its own state so discards due to resource * exhaustion are unnecessary, if an mbuf cannot be obtained * it just returns, keeping its placeholder, thus it can simply * be recalled to try again. * **********************************************************************/ static void igb_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; bus_dma_segment_t hseg[1]; bus_dma_segment_t pseg[1]; struct igb_rx_buf *rxbuf; struct mbuf *mh, *mp; int i, j, nsegs, error; bool refreshed = FALSE; i = j = rxr->next_to_refresh; /* ** Get one descriptor beyond ** our work mark to control ** the loop. */ if (++j == adapter->num_rx_desc) j = 0; while (j != limit) { rxbuf = &rxr->rx_buffers[i]; /* No hdr mbuf used with header split off */ if (rxr->hdr_split == FALSE) goto no_split; if (rxbuf->m_head == NULL) { mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) goto update; } else mh = rxbuf->m_head; mh->m_pkthdr.len = mh->m_len = MHLEN; mh->m_len = MHLEN; mh->m_flags |= M_PKTHDR; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: hdr dmamap load" " failure - %d\n", error); m_free(mh); rxbuf->m_head = NULL; goto update; } rxbuf->m_head = mh; bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_PREREAD); rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr); no_split: if (rxbuf->m_pack == NULL) { mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (mp == NULL) goto update; } else mp = rxbuf->m_pack; mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: payload dmamap load" " failure - %d\n", error); m_free(mp); rxbuf->m_pack = NULL; goto update; } rxbuf->m_pack = mp; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr); refreshed = TRUE; /* I feel wefreshed :) */ i = j; /* our next is precalculated */ rxr->next_to_refresh = i; if (++j == adapter->num_rx_desc) j = 0; } update: if (refreshed) /* update tail */ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->next_to_refresh); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int igb_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct igb_rx_buf *rxbuf; int i, bsize, error; bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc; if (!(rxr->rx_buffers = (struct igb_rx_buf *) malloc(bsize, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); error = ENOMEM; goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSIZE, /* maxsize */ 1, /* nsegments */ MSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->htag))) { device_printf(dev, "Unable to create RX DMA tag\n"); goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->ptag))) { device_printf(dev, "Unable to create RX payload DMA tag\n"); goto fail; } for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap); if (error) { device_printf(dev, "Unable to create RX head DMA maps\n"); goto fail; } error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); if (error) { device_printf(dev, "Unable to create RX packet DMA maps\n"); goto fail; } } return (0); fail: /* Frees all, but can handle partial completion */ igb_free_receive_structures(adapter); return (error); } static void igb_free_receive_ring(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct igb_rx_buf *rxbuf; for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; } } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int igb_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter; if_t ifp; device_t dev; struct igb_rx_buf *rxbuf; bus_dma_segment_t pseg[1], hseg[1]; struct lro_ctrl *lro = &rxr->lro; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(rxr->adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ adapter = rxr->adapter; dev = adapter->dev; ifp = adapter->ifp; /* Clear the ring contents */ IGB_RX_LOCK(rxr); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_RX, rxr->me, 0); #endif /* DEV_NETMAP */ rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); /* ** Free current RX buffer structures and their mbufs */ igb_free_receive_ring(rxr); /* Configure for header split? */ if (igb_header_split) rxr->hdr_split = TRUE; /* Now replenish the ring mbufs */ for (int j = 0; j < adapter->num_rx_desc; ++j) { struct mbuf *mh, *mp; rxbuf = &rxr->rx_buffers[j]; #ifdef DEV_NETMAP if (slot) { /* slot sj is mapped to the j-th NIC-ring entry */ int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; addr = PNMB(na, slot + sj, &paddr); netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); /* Update descriptor */ rxr->rx_base[j].read.pkt_addr = htole64(paddr); continue; } #endif /* DEV_NETMAP */ if (rxr->hdr_split == FALSE) goto skip_head; /* First the header */ rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA); if (rxbuf->m_head == NULL) { error = ENOBUFS; goto fail; } m_adj(rxbuf->m_head, ETHER_ALIGN); mh = rxbuf->m_head; mh->m_len = mh->m_pkthdr.len = MHLEN; mh->m_flags |= M_PKTHDR; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, rxbuf->hmap, rxbuf->m_head, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) /* Nothing elegant to do here */ goto fail; bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr); skip_head: /* Now the payload cluster */ rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->m_pack == NULL) { error = ENOBUFS; goto fail; } mp = rxbuf->m_pack; mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) goto fail; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr); } /* Setup our descriptor indices */ rxr->next_to_check = 0; rxr->next_to_refresh = adapter->num_rx_desc - 1; rxr->lro_enabled = FALSE; rxr->rx_split_packets = 0; rxr->rx_bytes = 0; rxr->fmp = NULL; rxr->lmp = NULL; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* ** Now set up the LRO interface, we ** also only do head split when LRO ** is enabled, since so often they ** are undesireable in similar setups. */ if (adapter->if_capenable & IFCAP_LRO) { error = tcp_lro_init(lro); if (error) { device_printf(dev, "LRO Initialization failed!\n"); goto fail; } INIT_DEBUGOUT("RX LRO Initialized\n"); rxr->lro_enabled = TRUE; lro->ifp = adapter->ifp; } IGB_RX_UNLOCK(rxr); return (0); fail: igb_free_receive_ring(rxr); IGB_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int igb_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int i; for (i = 0; i < adapter->num_queues; i++, rxr++) if (igb_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'i' is the endpoint. */ for (int j = 0; j < i; ++j) { rxr = &adapter->rx_rings[j]; IGB_RX_LOCK(rxr); igb_free_receive_ring(rxr); IGB_RX_UNLOCK(rxr); } return (ENOBUFS); } /* * Initialise the RSS mapping for NICs that support multiple transmit/ * receive rings. */ static void igb_initialise_rss_mapping(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; int i; int queue_id; u32 reta; u32 rss_key[10], mrqc, shift = 0; /* XXX? */ if (adapter->hw.mac.type == e1000_82575) shift = 6; /* * The redirection table controls which destination * queue each bucket redirects traffic to. * Each DWORD represents four queues, with the LSB * being the first queue in the DWORD. * * This just allocates buckets to queues using round-robin * allocation. * * NOTE: It Just Happens to line up with the default * RSS allocation method. */ /* Warning FM follows */ reta = 0; for (i = 0; i < 128; i++) { #ifdef RSS queue_id = rss_get_indirection_to_bucket(i); /* * If we have more queues than buckets, we'll * end up mapping buckets to a subset of the * queues. * * If we have more buckets than queues, we'll * end up instead assigning multiple buckets * to queues. * * Both are suboptimal, but we need to handle * the case so we don't go out of bounds * indexing arrays and such. */ queue_id = queue_id % adapter->num_queues; #else queue_id = (i % adapter->num_queues); #endif /* Adjust if required */ queue_id = queue_id << shift; /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | ( ((uint32_t) queue_id) << 24); if ((i & 3) == 3) { E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); reta = 0; } } /* Now fill in hash table */ /* XXX This means RSS enable + 8 queues for my igb (82580.) */ mrqc = E1000_MRQC_ENABLE_RSS_4Q; #ifdef RSS /* XXX ew typecasting */ rss_getkey((uint8_t *) &rss_key); #else arc4rand(&rss_key, sizeof(rss_key), 0); #endif for (i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key[i]); /* * Configure the RSS fields to hash upon. */ mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV4_TCP); mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | E1000_MRQC_RSS_FIELD_IPV6_TCP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | E1000_MRQC_RSS_FIELD_IPV6_UDP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); E1000_WRITE_REG(hw, E1000_MRQC, mrqc); } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void igb_initialize_receive_units(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; struct e1000_hw *hw = &adapter->hw; u32 rctl, rxcsum, psize, srrctl = 0; INIT_DEBUGOUT("igb_initialize_receive_unit: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* ** Set up for header split */ if (igb_header_split) { /* Use a standard mbuf for the header */ srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; } else srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; /* ** Set up for jumbo frames */ if (adapter->max_frame_size > ETHER_MAX_LEN) { rctl |= E1000_RCTL_LPE; if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; } /* Set maximum packet len */ psize = adapter->max_frame_size; /* are we on a vlan? */ if (if_getsoftc(adapter->ifp, IF_VLAN) != NULL) psize += VLAN_TAG_SIZE; E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); } else { rctl &= ~E1000_RCTL_LPE; srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_2048; } /* * If TX flow control is disabled and there's >1 queue defined, * enable DROP. * * This drops frames rather than hanging the RX MAC for all queues. */ if ((adapter->num_queues > 1) && (adapter->fc == e1000_fc_none || adapter->fc == e1000_fc_rx_pause)) { srrctl |= E1000_SRRCTL_DROP_EN; } /* Setup the Base and Length of the Rx Descriptor Rings */ for (int i = 0; i < adapter->num_queues; i++, rxr++) { u64 bus_addr = rxr->rxdma.dma_paddr; u32 rxdctl; E1000_WRITE_REG(hw, E1000_RDLEN(i), adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr); E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); /* Enable this Queue */ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; rxdctl &= 0xFFF00000; rxdctl |= IGB_RX_PTHRESH; rxdctl |= IGB_RX_HTHRESH << 8; rxdctl |= IGB_RX_WTHRESH << 16; E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } /* ** Setup for RX MultiQueue */ rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (adapter->num_queues >1) { /* rss setup */ igb_initialise_rss_mapping(adapter); /* ** NOTE: Receive Full-Packet Checksum Offload ** is mutually exclusive with Multiqueue. However ** this is not the same as TCP/IP checksums which ** still work. */ rxcsum |= E1000_RXCSUM_PCSD; /* For SCTP Offload */ if ((hw->mac.type == e1000_82576) && (adapter->if_capenable & IFCAP_RXCSUM)) rxcsum |= E1000_RXCSUM_CRCOFL; } else { /* Non RSS setup */ if (adapter->if_capenable & IFCAP_RXCSUM) { rxcsum |= E1000_RXCSUM_IPPCSE; if (adapter->hw.mac.type == e1000_82576) rxcsum |= E1000_RXCSUM_CRCOFL; } else rxcsum &= ~E1000_RXCSUM_TUOFL; } E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Strip CRC bytes. */ rctl |= E1000_RCTL_SECRC; /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; /* Don't store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Receives */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); /* * Setup the HW Rx Head and Tail Descriptor Pointers * - needs to be after enable */ for (int i = 0; i < adapter->num_queues; i++) { rxr = &adapter->rx_rings[i]; E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); #ifdef DEV_NETMAP /* * an init() while a netmap client is active must * preserve the rx buffers passed to userspace. * In this driver it means we adjust RDT to * something different from next_to_refresh * (which is not used in netmap mode). */ if (adapter->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = rxr->next_to_refresh - nm_kr_rxspace(kring); if (t >= adapter->num_rx_desc) t -= adapter->num_rx_desc; else if (t < 0) t += adapter->num_rx_desc; E1000_WRITE_REG(hw, E1000_RDT(i), t); } else #endif /* DEV_NETMAP */ E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh); } return; } /********************************************************************* * * Free receive rings. * **********************************************************************/ static void igb_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; for (int i = 0; i < adapter->num_queues; i++, rxr++) { struct lro_ctrl *lro = &rxr->lro; igb_free_receive_buffers(rxr); tcp_lro_free(lro); igb_dma_free(adapter, &rxr->rxdma); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures. * **********************************************************************/ static void igb_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct igb_rx_buf *rxbuf; int i; INIT_DEBUGOUT("free_receive_structures: begin"); /* Cleanup any existing buffers */ if (rxr->rx_buffers != NULL) { for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; if (rxbuf->hmap != NULL) { bus_dmamap_destroy(rxr->htag, rxbuf->hmap); rxbuf->hmap = NULL; } if (rxbuf->pmap != NULL) { bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); rxbuf->pmap = NULL; } } if (rxr->rx_buffers != NULL) { free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; } } if (rxr->htag != NULL) { bus_dma_tag_destroy(rxr->htag); rxr->htag = NULL; } if (rxr->ptag != NULL) { bus_dma_tag_destroy(rxr->ptag); rxr->ptag = NULL; } } static __inline void igb_rx_discard(struct rx_ring *rxr, int i) { struct igb_rx_buf *rbuf; rbuf = &rxr->rx_buffers[i]; /* Partially received? Free the chain */ if (rxr->fmp != NULL) { rxr->fmp->m_flags |= M_PKTHDR; m_freem(rxr->fmp); rxr->fmp = NULL; rxr->lmp = NULL; } /* ** With advanced descriptors the writeback ** clobbers the buffer addrs, so its easier ** to just free the existing mbufs and take ** the normal refresh path to get new buffers ** and mapping. */ if (rbuf->m_head) { m_free(rbuf->m_head); rbuf->m_head = NULL; bus_dmamap_unload(rxr->htag, rbuf->hmap); } if (rbuf->m_pack) { m_free(rbuf->m_pack); rbuf->m_pack = NULL; bus_dmamap_unload(rxr->ptag, rbuf->pmap); } return; } static __inline void igb_rx_input(struct rx_ring *rxr, struct adapter *adapter, struct mbuf *m, u32 ptype) { /* * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet * should be computed by hardware. Also it should not have VLAN tag in * ethernet header. */ if (rxr->lro_enabled && (adapter->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) == (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) && (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { /* * Send to the stack if: ** - LRO not enabled, or ** - no LRO resources, or ** - lro enqueue fails */ if (rxr->lro.lro_cnt != 0) if (tcp_lro_rx(&rxr->lro, m, 0) == 0) return; } IGB_RX_UNLOCK(rxr); if_input(adapter->ifp, m); IGB_RX_LOCK(rxr); } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * Return TRUE if more to clean, FALSE otherwise *********************************************************************/ static bool igb_rxeof(struct igb_queue *que, int count, int *done) { struct adapter *adapter = que->adapter; struct rx_ring *rxr = que->rxr; if_t ifp = adapter->ifp; struct lro_ctrl *lro = &rxr->lro; struct lro_entry *queued; int i, processed = 0, rxdone = 0; u32 ptype, staterr = 0; union e1000_adv_rx_desc *cur; IGB_RX_LOCK(rxr); /* Sync the ring. */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, rxr->me, &processed)) { IGB_RX_UNLOCK(rxr); return (FALSE); } #endif /* DEV_NETMAP */ /* Main clean loop */ for (i = rxr->next_to_check; count != 0;) { struct mbuf *sendmp, *mh, *mp; struct igb_rx_buf *rxbuf; u16 hlen, plen, hdr, vtag, pkt_info; bool eop = FALSE; cur = &rxr->rx_base[i]; staterr = le32toh(cur->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; if ((adapter->flags & IGB_RUNNING) == 0) break; count--; sendmp = mh = mp = NULL; cur->wb.upper.status_error = 0; rxbuf = &rxr->rx_buffers[i]; plen = le16toh(cur->wb.upper.length); ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; if (((adapter->hw.mac.type == e1000_i350) || (adapter->hw.mac.type == e1000_i354)) && (staterr & E1000_RXDEXT_STATERR_LB)) vtag = be16toh(cur->wb.upper.vlan); else vtag = le16toh(cur->wb.upper.vlan); hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info); pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); /* * Free the frame (all segments) if we're at EOP and * it's an error. * * The datasheet states that EOP + status is only valid for * the final segment in a multi-segment frame. */ if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) { adapter->dropped_pkts++; ++rxr->rx_discarded; igb_rx_discard(rxr, i); goto next_desc; } /* ** The way the hardware is configured to ** split, it will ONLY use the header buffer ** when header split is enabled, otherwise we ** get normal behavior, ie, both header and ** payload are DMA'd into the payload buffer. ** ** The fmp test is to catch the case where a ** packet spans multiple descriptors, in that ** case only the first header is valid. */ if (rxr->hdr_split && rxr->fmp == NULL) { bus_dmamap_unload(rxr->htag, rxbuf->hmap); hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; if (hlen > IGB_HDR_BUF) hlen = IGB_HDR_BUF; mh = rxr->rx_buffers[i].m_head; mh->m_len = hlen; /* clear buf pointer for refresh */ rxbuf->m_head = NULL; /* ** Get the payload length, this ** could be zero if its a small ** packet. */ if (plen > 0) { mp = rxr->rx_buffers[i].m_pack; mp->m_len = plen; mh->m_next = mp; /* clear buf pointer */ rxbuf->m_pack = NULL; rxr->rx_split_packets++; } } else { /* ** Either no header split, or a ** secondary piece of a fragmented ** split packet. */ mh = rxr->rx_buffers[i].m_pack; mh->m_len = plen; /* clear buf info for refresh */ rxbuf->m_pack = NULL; } bus_dmamap_unload(rxr->ptag, rxbuf->pmap); ++processed; /* So we know when to refresh */ /* Initial frame - setup */ if (rxr->fmp == NULL) { mh->m_pkthdr.len = mh->m_len; /* Save the head of the chain */ rxr->fmp = mh; rxr->lmp = mh; if (mp != NULL) { /* Add payload if split */ mh->m_pkthdr.len += mp->m_len; rxr->lmp = mh->m_next; } } else { /* Chain mbuf's together */ rxr->lmp->m_next = mh; rxr->lmp = rxr->lmp->m_next; rxr->fmp->m_pkthdr.len += mh->m_len; } if (eop) { rxr->fmp->m_pkthdr.rcvif = ifp; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); rxr->rx_packets++; /* capture data for AIM */ rxr->packets++; rxr->bytes += rxr->fmp->m_pkthdr.len; rxr->rx_bytes += rxr->fmp->m_pkthdr.len; if ((adapter->if_capenable & IFCAP_RXCSUM) != 0) igb_rx_checksum(staterr, rxr->fmp, ptype); if ((adapter->if_capenable & IFCAP_VLAN_HWTAGGING) && (staterr & E1000_RXD_STAT_VP) != 0) { rxr->fmp->m_pkthdr.ether_vtag = vtag; rxr->fmp->m_flags |= M_VLANTAG; } #ifdef RSS /* XXX set flowtype once this works right */ rxr->fmp->m_pkthdr.flowid = le32toh(cur->wb.lower.hi_dword.rss); switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { case E1000_RXDADV_RSSTYPE_IPV4_TCP: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV4); break; case E1000_RXDADV_RSSTYPE_IPV4: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV4); break; case E1000_RXDADV_RSSTYPE_IPV6_TCP: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV6); break; case E1000_RXDADV_RSSTYPE_IPV6_EX: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV6_EX); break; case E1000_RXDADV_RSSTYPE_IPV6: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV6); break; case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV6_EX); break; /* XXX no UDP support in RSS just yet */ #ifdef notyet case E1000_RXDADV_RSSTYPE_IPV4_UDP: case E1000_RXDADV_RSSTYPE_IPV6_UDP: case E1000_RXDADV_RSSTYPE_IPV6_UDP_EX: #endif default: /* XXX fallthrough */ M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE); } #else rxr->fmp->m_pkthdr.flowid = que->msix; M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE); #endif sendmp = rxr->fmp; /* Make sure to set M_PKTHDR. */ sendmp->m_flags |= M_PKTHDR; rxr->fmp = NULL; rxr->lmp = NULL; } next_desc: bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* ** Send to the stack or LRO */ if (sendmp != NULL) { rxr->next_to_check = i; igb_rx_input(rxr, adapter, sendmp, ptype); i = rxr->next_to_check; rxdone++; } /* Every 8 descriptors we go to refresh mbufs */ if (processed == 8) { igb_refresh_mbufs(rxr, i); processed = 0; } } /* Catch any remainders */ if (igb_rx_unrefreshed(rxr)) igb_refresh_mbufs(rxr, i); rxr->next_to_check = i; /* * Flush any outstanding LRO work */ while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } if (done != NULL) *done += rxdone; IGB_RX_UNLOCK(rxr); return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype) { u16 status = (u16)staterr; u8 errors = (u8) (staterr >> 24); int sctp; /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) { mp->m_pkthdr.csum_flags = 0; return; } if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) sctp = 1; else sctp = 0; if (status & E1000_RXD_STAT_IPCS) { /* Did it pass? */ if (!(errors & E1000_RXD_ERR_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else mp->m_pkthdr.csum_flags = 0; } if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); #if __FreeBSD_version >= 800000 if (sctp) /* reassign */ type = CSUM_SCTP_VALID; #endif /* Did it pass? */ if (!(errors & E1000_RXD_ERR_TCPE)) { mp->m_pkthdr.csum_flags |= type; if (sctp == 0) mp->m_pkthdr.csum_data = htons(0xffff); } } return; } /* * This routine is run via an vlan * config EVENT */ static void igb_register_vlan(void *arg, if_t ifp, u16 vtag) { struct adapter *adapter = if_getsoftc(ifp, IF_DRIVER_SOFTC); u32 index, bit; if (if_getsoftc(ifp, IF_DRIVER_SOFTC) != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IGB_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Change hw filter setting */ if (adapter->if_capenable & IFCAP_VLAN_HWFILTER) igb_setup_vlan_hw_support(adapter); IGB_CORE_UNLOCK(adapter); } /* * This routine is run via an vlan * unconfig EVENT */ static void igb_unregister_vlan(void *arg, if_t ifp, u16 vtag) { struct adapter *adapter = if_getsoftc(ifp, IF_DRIVER_SOFTC); u32 index, bit; if (if_getsoftc(ifp, IF_DRIVER_SOFTC) != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IGB_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Change hw filter setting */ if (adapter->if_capenable & IFCAP_VLAN_HWFILTER) igb_setup_vlan_hw_support(adapter); IGB_CORE_UNLOCK(adapter); } static void igb_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; if (adapter->vf_ifp) { e1000_rlpml_set_vf(hw, adapter->max_frame_size + VLAN_TAG_SIZE); return; } reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ if (adapter->if_capenable & IFCAP_VLAN_HWFILTER) { reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } /* Update the frame size */ E1000_WRITE_REG(&adapter->hw, E1000_RLPML, adapter->max_frame_size + VLAN_TAG_SIZE); /* Don't bother with table if no vlans */ if ((adapter->num_vlans == 0) || ((adapter->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < IGB_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) { if (adapter->vf_ifp) e1000_vfta_set_vf(hw, adapter->shadow_vfta[i], TRUE); else e1000_write_vfta(hw, i, adapter->shadow_vfta[i]); } } static void igb_enable_intr(struct adapter *adapter) { /* With RSS set up what to auto clear */ if (adapter->msix_mem) { u32 mask = (adapter->que_mask | adapter->link_mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); } else { E1000_WRITE_REG(&adapter->hw, E1000_IMS, IMS_ENABLE_MASK); } E1000_WRITE_FLUSH(&adapter->hw); return; } static void igb_disable_intr(struct adapter *adapter) { if (adapter->msix_mem) { E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); } E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); E1000_WRITE_FLUSH(&adapter->hw); return; } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void igb_init_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; manc2h |= 1 << 5; /* Mng Port 623 */ manc2h |= 1 << 6; /* Mng Port 664 */ E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void igb_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is loaded. * */ static void igb_get_hw_control(struct adapter *adapter) { u32 ctrl_ext; if (adapter->vf_ifp) return; /* Let firmware know the driver has taken over */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } /* * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that the * driver is no longer loaded. * */ static void igb_release_hw_control(struct adapter *adapter) { u32 ctrl_ext; if (adapter->vf_ifp) return; /* Let firmware taken over control of h/w */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); } static int igb_is_valid_ether_addr(uint8_t *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* * Enable PCI Wake On Lan capability */ static void igb_enable_wakeup(device_t dev) { u16 cap, status; u8 id; /* First find the capabilities pointer*/ cap = pci_read_config(dev, PCIR_CAP_PTR, 2); /* Read the PM Capabilities */ id = pci_read_config(dev, cap, 1); if (id != PCIY_PMG) /* Something wrong */ return; /* OK, we have the power capabilities, so now get the status register */ cap += PCIR_POWER_STATUS; status = pci_read_config(dev, cap, 2); status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, cap, status, 2); return; } static void igb_led_func(void *arg, int onoff) { struct adapter *adapter = arg; IGB_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } IGB_CORE_UNLOCK(adapter); } static uint64_t igb_get_counter(if_t ifp, ift_counter cnt) { struct adapter *adapter; struct e1000_hw_stats *stats; adapter = if_getsoftc(ifp, IF_DRIVER_SOFTC); stats = (struct e1000_hw_stats *)adapter->stats; switch (cnt) { case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + stats->rxerrc + stats->crcerrs + stats->algnerrc + stats->ruc + stats->roc + stats->mpc + stats->cexterr); case IFCOUNTER_OERRORS: return (stats->ecol + stats->latecol + adapter->watchdog_events); case IFCOUNTER_COLLISIONS: return (stats->colc); default: return (if_get_counter_default(ifp, cnt)); } } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void igb_update_stats_counters(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_hw_stats *stats; /* ** The virtual function adapter has only a ** small controlled set of stats, do only ** those and return. */ if (adapter->vf_ifp) { igb_update_vf_stats_counters(adapter); return; } stats = (struct e1000_hw_stats *)adapter->stats; if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { stats->symerrs += E1000_READ_REG(hw,E1000_SYMERRS); stats->sec += E1000_READ_REG(hw, E1000_SEC); } stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); stats->mpc += E1000_READ_REG(hw, E1000_MPC); stats->scc += E1000_READ_REG(hw, E1000_SCC); stats->ecol += E1000_READ_REG(hw, E1000_ECOL); stats->mcc += E1000_READ_REG(hw, E1000_MCC); stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); stats->colc += E1000_READ_REG(hw, E1000_COLC); stats->dc += E1000_READ_REG(hw, E1000_DC); stats->rlec += E1000_READ_REG(hw, E1000_RLEC); stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); stats->xoffrxc += adapter->pause_frames; stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); stats->gprc += E1000_READ_REG(hw, E1000_GPRC); stats->bprc += E1000_READ_REG(hw, E1000_BPRC); stats->mprc += E1000_READ_REG(hw, E1000_MPRC); stats->gptc += E1000_READ_REG(hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32); stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32); stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); stats->ruc += E1000_READ_REG(hw, E1000_RUC); stats->rfc += E1000_READ_REG(hw, E1000_RFC); stats->roc += E1000_READ_REG(hw, E1000_ROC); stats->rjc += E1000_READ_REG(hw, E1000_RJC); stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC); stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC); stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC); stats->tor += E1000_READ_REG(hw, E1000_TORL) + ((u64)E1000_READ_REG(hw, E1000_TORH) << 32); stats->tot += E1000_READ_REG(hw, E1000_TOTL) + ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32); stats->tpr += E1000_READ_REG(hw, E1000_TPR); stats->tpt += E1000_READ_REG(hw, E1000_TPT); stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); stats->mptc += E1000_READ_REG(hw, E1000_MPTC); stats->bptc += E1000_READ_REG(hw, E1000_BPTC); /* Interrupt Counts */ stats->iac += E1000_READ_REG(hw, E1000_IAC); stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); /* Host to Card Statistics */ stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32)); stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); /* Driver specific counters */ adapter->device_control = E1000_READ_REG(hw, E1000_CTRL); adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL); adapter->int_mask = E1000_READ_REG(hw, E1000_IMS); adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS); adapter->packet_buf_alloc_tx = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); adapter->packet_buf_alloc_rx = (E1000_READ_REG(hw, E1000_PBA) & 0xffff); } /********************************************************************** * * Initialize the VF board statistics counters. * **********************************************************************/ static void igb_vf_init_stats(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_vf_stats *stats; stats = (struct e1000_vf_stats *)adapter->stats; if (stats == NULL) return; stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); } /********************************************************************** * * Update the VF board statistics counters. * **********************************************************************/ static void igb_update_vf_stats_counters(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_vf_stats *stats; if (adapter->link_speed == 0) return; stats = (struct e1000_vf_stats *)adapter->stats; UPDATE_VF_REG(E1000_VFGPRC, stats->last_gprc, stats->gprc); UPDATE_VF_REG(E1000_VFGORC, stats->last_gorc, stats->gorc); UPDATE_VF_REG(E1000_VFGPTC, stats->last_gptc, stats->gptc); UPDATE_VF_REG(E1000_VFGOTC, stats->last_gotc, stats->gotc); UPDATE_VF_REG(E1000_VFMPRC, stats->last_mprc, stats->mprc); } /* Export a single 32-bit register via a read-only sysctl. */ static int igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* ** Tuneable interrupt rate handler */ static int igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) { struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1); int error; u32 reg, usec, rate; reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix)); usec = ((reg & 0x7FFC) >> 2); if (usec > 0) rate = 1000000 / usec; else rate = 0; error = sysctl_handle_int(oidp, &rate, 0, req); if (error || !req->newptr) return error; return 0; } /* * Add sysctl variables, one per statistic, to the system. */ static void igb_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", CTLFLAG_RD, &adapter->device_control, "Device Control Register"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", CTLFLAG_RD, &adapter->rx_control, "Receiver Control Register"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", CTLFLAG_RD, &adapter->int_mask, "Interrupt Mask"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", CTLFLAG_RD, &adapter->eint_mask, "Extended Interrupt Mask"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", CTLFLAG_RD, &adapter->packet_buf_alloc_tx, "Transmit Buffer Packet Allocation"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", CTLFLAG_RD, &adapter->packet_buf_alloc_rx, "Receive Buffer Packet Allocation"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { struct lro_ctrl *lro = &rxr->lro; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i], sizeof(&adapter->queues[i]), igb_sysctl_interrupt_rate_handler, "IU", "Interrupt Rate"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), igb_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), igb_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue Descriptors Unavailable"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &txr->total_packets, "Queue Packets Transmitted"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), igb_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), igb_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &rxr->rx_packets, "Queue Packets Received"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &lro->lro_flushed, 0, "LRO Flushed"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "MAC Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); /* ** VF adapter has a very limited set of stats ** since its not managing the metal, so to speak. */ if (adapter->vf_ifp) { SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); return; } SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &stats->symerrs, "Symbol Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &stats->sec, "Sequence Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &stats->dc, "Defer Count"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &stats->mpc, "Missed Packets"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors", CTLFLAG_RD, &stats->rlec, "Receive Length Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &stats->rnbc, "Receive No Buffers"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &stats->ruc, "Receive Undersize"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &stats->rfc, "Fragmented Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &stats->roc, "Oversized Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &stats->rjc, "Recevied Jabber"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &stats->rxerrc, "Receive Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &stats->crcerrs, "CRC errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &stats->algnerrc, "Alignment Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs", CTLFLAG_RD, &stats->tncrs, "Transmit with No CRS"); /* On 82575 these are collision counts */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &stats->cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &stats->xonrxc, "XON Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &stats->xontxc, "XON Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &stats->xoffrxc, "XOFF Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &stats->xofftxc, "XOFF Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd", CTLFLAG_RD, &stats->fcruc, "Unsupported Flow Control Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd", CTLFLAG_RD, &stats->mgprc, "Management Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop", CTLFLAG_RD, &stats->mgpdc, "Management Packets Dropped"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd", CTLFLAG_RD, &stats->mgptc, "Management Packets Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &stats->tpr, "Total Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &stats->bprc, "Broadcast Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &stats->prc64, "64 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &stats->prc127, "65-127 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &stats->prc255, "128-255 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &stats->prc511, "256-511 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &stats->prc1023, "512-1023 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &stats->prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", CTLFLAG_RD, &stats->tor, "Total Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", CTLFLAG_RD, &stats->tot, "Total Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &stats->tpt, "Total Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &stats->bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &stats->mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &stats->ptc64, "64 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &stats->ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &stats->ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &stats->ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &stats->ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &stats->ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &stats->tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &stats->tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &stats->iac, "Interrupt Assertion Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &stats->icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &stats->icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &stats->ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &stats->ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &stats->ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &stats->ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &stats->icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &stats->icrxoc, "Interrupt Cause Receiver Overrun Count"); /* Host to Card Stats */ host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", CTLFLAG_RD, NULL, "Host to Card Statistics"); host_list = SYSCTL_CHILDREN(host_node); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt", CTLFLAG_RD, &stats->cbtmpc, "Circuit Breaker Tx Packet Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard", CTLFLAG_RD, &stats->htdpmc, "Host Transmit Discarded Packets"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt", CTLFLAG_RD, &stats->rpthc, "Rx Packets To Host"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts", CTLFLAG_RD, &stats->cbrmpc, "Circuit Breaker Rx Packet Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop", CTLFLAG_RD, &stats->cbrdpc, "Circuit Breaker Rx Dropped Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt", CTLFLAG_RD, &stats->hgptc, "Host Good Packets Tx Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop", CTLFLAG_RD, &stats->htcbdpc, "Host Tx Circuit Breaker Dropped Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes", CTLFLAG_RD, &stats->hgorc, "Host Good Octets Received Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes", CTLFLAG_RD, &stats->hgotc, "Host Good Octets Transmit Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors", CTLFLAG_RD, &stats->lenerrs, "Length Errors"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt", CTLFLAG_RD, &stats->scvpc, "SerDes/SGMII Code Violation Pkt Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed", CTLFLAG_RD, &stats->hrmpc, "Header Redirection Missed Packet Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) { adapter = (struct adapter *)arg1; igb_print_nvm_info(adapter); } return (error); } static void igb_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static void igb_set_sysctl_value(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLFLAG_RW, limit, value, description); } /* ** Set flow control using sysctl: ** Flow control values: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); /* XXX TODO: update DROP_EN on each RX queue if appropriate */ return (error); } /* ** Manage DMA Coalesce: ** Control values: ** 0/1 - off/on ** Legal timer values are: ** 250,500,1000-10000 in thousands */ static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error; error = sysctl_handle_int(oidp, &adapter->dmac, 0, req); if ((error) || (req->newptr == NULL)) return (error); switch (adapter->dmac) { case 0: /*Disabling */ break; case 1: /* Just enable and use default */ adapter->dmac = 1000; break; case 250: case 500: case 1000: case 2000: case 3000: case 4000: case 5000: case 6000: case 7000: case 8000: case 9000: case 10000: /* Legal values - allow */ break; default: /* Do nothing, illegal value */ adapter->dmac = 0; return (EINVAL); } /* Reinit the interface */ igb_init(adapter); return (error); } /* ** Manage Energy Efficient Ethernet: ** Control values: ** 0/1 - enabled/disabled */ static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec._82575.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); IGB_CORE_LOCK(adapter); adapter->hw.dev_spec._82575.eee_disable = (value != 0); igb_init(adapter); IGB_CORE_UNLOCK(adapter); return (0); } Index: projects/ifnet/sys/dev/msk/if_msk.c =================================================================== --- projects/ifnet/sys/dev/msk/if_msk.c (revision 280372) +++ projects/ifnet/sys/dev/msk/if_msk.c (revision 280373) @@ -1,4546 +1,4545 @@ /****************************************************************************** * * Name : sky2.c * Project: Gigabit Ethernet Driver for FreeBSD 5.x/6.x * Version: $Revision: 1.23 $ * Date : $Date: 2005/12/22 09:04:11 $ * Purpose: Main driver source file * *****************************************************************************/ /****************************************************************************** * * LICENSE: * Copyright (C) Marvell International Ltd. and/or its affiliates * * The computer program files contained in this folder ("Files") * are provided to you under the BSD-type license terms provided * below, and any use of such Files and any derivative works * thereof created by you shall be governed by the following terms * and conditions: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * - Neither the name of Marvell nor the names of its contributors * may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * /LICENSE * *****************************************************************************/ /*- * Copyright (c) 1997, 1998, 1999, 2000 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2003 Nathan L. Binkert * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * Device driver for the Marvell Yukon II Ethernet controller. * Due to lack of documentation, this driver is based on the code from * sk(4) and Marvell's myk(4) driver for FreeBSD 5.x. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_DEPEND(msk, pci, 1, 1, 1); MODULE_DEPEND(msk, ether, 1, 1, 1); MODULE_DEPEND(msk, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" /* Tunables. */ static int msi_disable = 0; TUNABLE_INT("hw.msk.msi_disable", &msi_disable); static int legacy_intr = 0; TUNABLE_INT("hw.msk.legacy_intr", &legacy_intr); static int jumbo_disable = 0; TUNABLE_INT("hw.msk.jumbo_disable", &jumbo_disable); #define MSK_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) #define MSK_DEFAULT_FRAMESIZE \ (ETHERMTU + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* * Devices supported by this driver. */ static const struct msk_product { uint16_t msk_vendorid; uint16_t msk_deviceid; const char *msk_name; } msk_products[] = { { VENDORID_SK, DEVICEID_SK_YUKON2, "SK-9Sxx Gigabit Ethernet" }, { VENDORID_SK, DEVICEID_SK_YUKON2_EXPR, "SK-9Exx Gigabit Ethernet"}, { VENDORID_MARVELL, DEVICEID_MRVL_8021CU, "Marvell Yukon 88E8021CU Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8021X, "Marvell Yukon 88E8021 SX/LX Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8022CU, "Marvell Yukon 88E8022CU Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8022X, "Marvell Yukon 88E8022 SX/LX Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8061CU, "Marvell Yukon 88E8061CU Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8061X, "Marvell Yukon 88E8061 SX/LX Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8062CU, "Marvell Yukon 88E8062CU Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8062X, "Marvell Yukon 88E8062 SX/LX Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8035, "Marvell Yukon 88E8035 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8036, "Marvell Yukon 88E8036 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8038, "Marvell Yukon 88E8038 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8039, "Marvell Yukon 88E8039 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8040, "Marvell Yukon 88E8040 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8040T, "Marvell Yukon 88E8040T Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8042, "Marvell Yukon 88E8042 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8048, "Marvell Yukon 88E8048 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4361, "Marvell Yukon 88E8050 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4360, "Marvell Yukon 88E8052 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4362, "Marvell Yukon 88E8053 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4363, "Marvell Yukon 88E8055 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4364, "Marvell Yukon 88E8056 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4365, "Marvell Yukon 88E8070 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_436A, "Marvell Yukon 88E8058 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_436B, "Marvell Yukon 88E8071 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_436C, "Marvell Yukon 88E8072 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_436D, "Marvell Yukon 88E8055 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4370, "Marvell Yukon 88E8075 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4380, "Marvell Yukon 88E8057 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4381, "Marvell Yukon 88E8059 Gigabit Ethernet" }, { VENDORID_DLINK, DEVICEID_DLINK_DGE550SX, "D-Link 550SX Gigabit Ethernet" }, { VENDORID_DLINK, DEVICEID_DLINK_DGE560SX, "D-Link 560SX Gigabit Ethernet" }, { VENDORID_DLINK, DEVICEID_DLINK_DGE560T, "D-Link 560T Gigabit Ethernet" } }; static const char *model_name[] = { "Yukon XL", "Yukon EC Ultra", "Yukon EX", "Yukon EC", "Yukon FE", "Yukon FE+", "Yukon Supreme", "Yukon Ultra 2", "Yukon Unknown", "Yukon Optima", }; static int mskc_probe(device_t); static int mskc_attach(device_t); static int mskc_detach(device_t); static int mskc_shutdown(device_t); static int mskc_setup_rambuffer(struct msk_softc *); static int mskc_suspend(device_t); static int mskc_resume(device_t); static bus_dma_tag_t mskc_get_dma_tag(device_t, device_t); static void mskc_reset(struct msk_softc *); static int msk_probe(device_t); static int msk_attach(device_t); static int msk_detach(device_t); static void msk_tick(void *); static void msk_intr(void *); static void msk_intr_phy(struct msk_if_softc *); static void msk_intr_gmac(struct msk_if_softc *); static __inline void msk_rxput(struct msk_if_softc *); static int msk_handle_events(struct msk_softc *); static void msk_handle_hwerr(struct msk_if_softc *, uint32_t); static void msk_intr_hwerr(struct msk_softc *); #ifndef __NO_STRICT_ALIGNMENT static __inline void msk_fixup_rx(struct mbuf *); #endif static __inline void msk_rxcsum(struct msk_if_softc *, uint32_t, struct mbuf *); static void msk_rxeof(struct msk_if_softc *, uint32_t, uint32_t, int); static void msk_jumbo_rxeof(struct msk_if_softc *, uint32_t, uint32_t, int); static void msk_txeof(struct msk_if_softc *, int); static int msk_encap(struct msk_if_softc *, struct mbuf **); static int msk_transmit(if_t, struct mbuf *); static int msk_start(struct msk_if_softc *); static int msk_ioctl(if_t, u_long, void *, struct thread *); static void msk_set_prefetch(struct msk_softc *, int, bus_addr_t, uint32_t); static void msk_set_rambuffer(struct msk_if_softc *); static void msk_set_tx_stfwd(struct msk_if_softc *); static void msk_init(struct msk_if_softc *); static void msk_stop(struct msk_if_softc *); static void msk_watchdog(struct msk_if_softc *); static int msk_mediachange(if_t); static void msk_mediastatus(if_t, struct ifmediareq *); static void msk_phy_power(struct msk_softc *, int); static void msk_dmamap_cb(void *, bus_dma_segment_t *, int, int); static int msk_status_dma_alloc(struct msk_softc *); static void msk_status_dma_free(struct msk_softc *); static int msk_txrx_dma_alloc(struct msk_if_softc *); static int msk_rx_dma_jalloc(struct msk_if_softc *); static void msk_txrx_dma_free(struct msk_if_softc *); static void msk_rx_dma_jfree(struct msk_if_softc *); static int msk_rx_fill(struct msk_if_softc *, int); static int msk_init_rx_ring(struct msk_if_softc *); static int msk_init_jumbo_rx_ring(struct msk_if_softc *); static void msk_init_tx_ring(struct msk_if_softc *); static __inline void msk_discard_rxbuf(struct msk_if_softc *, int); static __inline void msk_discard_jumbo_rxbuf(struct msk_if_softc *, int); static int msk_newbuf(struct msk_if_softc *, int); static int msk_jumbo_newbuf(struct msk_if_softc *, int); static int msk_phy_readreg(struct msk_if_softc *, int, int); static int msk_phy_writereg(struct msk_if_softc *, int, int, int); static int msk_miibus_readreg(device_t, int, int); static int msk_miibus_writereg(device_t, int, int, int); static void msk_miibus_statchg(device_t); static void msk_rxfilter(struct msk_if_softc *); static void msk_setvlan(struct msk_if_softc *); static void msk_stats_clear(struct msk_if_softc *); static void msk_stats_update(struct msk_if_softc *); static int msk_sysctl_stat32(SYSCTL_HANDLER_ARGS); static int msk_sysctl_stat64(SYSCTL_HANDLER_ARGS); static void msk_sysctl_node(struct msk_if_softc *); static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int, int); static int sysctl_hw_msk_proc_limit(SYSCTL_HANDLER_ARGS); static device_method_t mskc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, mskc_probe), DEVMETHOD(device_attach, mskc_attach), DEVMETHOD(device_detach, mskc_detach), DEVMETHOD(device_suspend, mskc_suspend), DEVMETHOD(device_resume, mskc_resume), DEVMETHOD(device_shutdown, mskc_shutdown), DEVMETHOD(bus_get_dma_tag, mskc_get_dma_tag), DEVMETHOD_END }; static driver_t mskc_driver = { "mskc", mskc_methods, sizeof(struct msk_softc) }; static devclass_t mskc_devclass; static device_method_t msk_methods[] = { /* Device interface */ DEVMETHOD(device_probe, msk_probe), DEVMETHOD(device_attach, msk_attach), DEVMETHOD(device_detach, msk_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, msk_miibus_readreg), DEVMETHOD(miibus_writereg, msk_miibus_writereg), DEVMETHOD(miibus_statchg, msk_miibus_statchg), DEVMETHOD_END }; static driver_t msk_driver = { "msk", msk_methods, sizeof(struct msk_if_softc) }; static devclass_t msk_devclass; DRIVER_MODULE(mskc, pci, mskc_driver, mskc_devclass, NULL, NULL); DRIVER_MODULE(msk, mskc, msk_driver, msk_devclass, NULL, NULL); DRIVER_MODULE(miibus, msk, miibus_driver, miibus_devclass, NULL, NULL); static struct resource_spec msk_res_spec_io[] = { { SYS_RES_IOPORT, PCIR_BAR(1), RF_ACTIVE }, { -1, 0, 0 } }; static struct resource_spec msk_res_spec_mem[] = { { SYS_RES_MEMORY, PCIR_BAR(0), RF_ACTIVE }, { -1, 0, 0 } }; static struct resource_spec msk_irq_spec_legacy[] = { { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE }, { -1, 0, 0 } }; static struct resource_spec msk_irq_spec_msi[] = { { SYS_RES_IRQ, 1, RF_ACTIVE }, { -1, 0, 0 } }; static struct ifdriver msk_ifdrv = { .ifdrv_ops = { - .ifop_origin = IFOP_ORIGIN_DRIVER, .ifop_ioctl = msk_ioctl, .ifop_transmit = msk_transmit, }, .ifdrv_name = "msk", .ifdrv_type = IFT_ETHER, .ifdrv_hdrlen = sizeof(struct ether_vlan_header), .ifdrv_maxqlen = MSK_TX_RING_CNT - 1, }; static int msk_miibus_readreg(device_t dev, int phy, int reg) { struct msk_if_softc *sc_if; sc_if = device_get_softc(dev); return (msk_phy_readreg(sc_if, phy, reg)); } static int msk_phy_readreg(struct msk_if_softc *sc_if, int phy, int reg) { struct msk_softc *sc; int i, val; sc = sc_if->msk_softc; GMAC_WRITE_2(sc, sc_if->msk_port, GM_SMI_CTRL, GM_SMI_CT_PHY_AD(phy) | GM_SMI_CT_REG_AD(reg) | GM_SMI_CT_OP_RD); for (i = 0; i < MSK_TIMEOUT; i++) { DELAY(1); val = GMAC_READ_2(sc, sc_if->msk_port, GM_SMI_CTRL); if ((val & GM_SMI_CT_RD_VAL) != 0) { val = GMAC_READ_2(sc, sc_if->msk_port, GM_SMI_DATA); break; } } if (i == MSK_TIMEOUT) { if_printf(sc_if->msk_ifp, "phy failed to come ready\n"); val = 0; } return (val); } static int msk_miibus_writereg(device_t dev, int phy, int reg, int val) { struct msk_if_softc *sc_if; sc_if = device_get_softc(dev); return (msk_phy_writereg(sc_if, phy, reg, val)); } static int msk_phy_writereg(struct msk_if_softc *sc_if, int phy, int reg, int val) { struct msk_softc *sc; int i; sc = sc_if->msk_softc; GMAC_WRITE_2(sc, sc_if->msk_port, GM_SMI_DATA, val); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SMI_CTRL, GM_SMI_CT_PHY_AD(phy) | GM_SMI_CT_REG_AD(reg)); for (i = 0; i < MSK_TIMEOUT; i++) { DELAY(1); if ((GMAC_READ_2(sc, sc_if->msk_port, GM_SMI_CTRL) & GM_SMI_CT_BUSY) == 0) break; } if (i == MSK_TIMEOUT) if_printf(sc_if->msk_ifp, "phy write timeout\n"); return (0); } static void msk_miibus_statchg(device_t dev) { struct msk_softc *sc; struct msk_if_softc *sc_if; struct mii_data *mii; if_t ifp; uint32_t gmac; sc_if = device_get_softc(dev); sc = sc_if->msk_softc; MSK_IF_LOCK_ASSERT(sc_if); mii = device_get_softc(sc_if->msk_miibus); ifp = sc_if->msk_ifp; if (mii == NULL || ifp == NULL || (sc_if->msk_flags & MSK_FLAG_RUNNING) == 0) return; sc_if->msk_flags &= ~MSK_FLAG_LINK; if ((mii->mii_media_status & (IFM_AVALID | IFM_ACTIVE)) == (IFM_AVALID | IFM_ACTIVE)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: sc_if->msk_flags |= MSK_FLAG_LINK; break; case IFM_1000_T: case IFM_1000_SX: case IFM_1000_LX: case IFM_1000_CX: if ((sc_if->msk_flags & MSK_FLAG_FASTETHER) == 0) sc_if->msk_flags |= MSK_FLAG_LINK; break; default: break; } } if ((sc_if->msk_flags & MSK_FLAG_LINK) != 0) { /* Enable Tx FIFO Underrun. */ CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, GMAC_IRQ_MSK), GM_IS_TX_FF_UR | GM_IS_RX_FF_OR); /* * Because mii(4) notify msk(4) that it detected link status * change, there is no need to enable automatic * speed/flow-control/duplex updates. */ gmac = GM_GPCR_AU_ALL_DIS; switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_1000_SX: case IFM_1000_T: gmac |= GM_GPCR_SPEED_1000; break; case IFM_100_TX: gmac |= GM_GPCR_SPEED_100; break; case IFM_10_T: break; } if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) == 0) gmac |= GM_GPCR_FC_RX_DIS; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_TXPAUSE) == 0) gmac |= GM_GPCR_FC_TX_DIS; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) gmac |= GM_GPCR_DUP_FULL; else gmac |= GM_GPCR_FC_RX_DIS | GM_GPCR_FC_TX_DIS; gmac |= GM_GPCR_RX_ENA | GM_GPCR_TX_ENA; GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, gmac); /* Read again to ensure writing. */ GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL); gmac = GMC_PAUSE_OFF; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) { if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) gmac = GMC_PAUSE_ON; } CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), gmac); /* Enable PHY interrupt for FIFO underrun/overflow. */ msk_phy_writereg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_MASK, PHY_M_IS_FIFO_ERROR); } else { /* * Link state changed to down. * Disable PHY interrupts. */ msk_phy_writereg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_MASK, 0); /* Disable Rx/Tx MAC. */ gmac = GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL); if ((gmac & (GM_GPCR_RX_ENA | GM_GPCR_TX_ENA)) != 0) { gmac &= ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA); GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, gmac); /* Read again to ensure writing. */ GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL); } } if_setbaudrate(ifp, ifmedia_baudrate(mii->mii_media_active)); if_link_state_change(ifp, ifmedia_link_state(mii->mii_media_status)); } static void msk_hash_maddr(void *arg, struct sockaddr *maddr) { struct sockaddr_dl *sdl = (struct sockaddr_dl *)maddr; uint32_t *mchash, crc; if (sdl->sdl_family != AF_LINK) return; mchash = arg; crc = ether_crc32_be(LLADDR(sdl), ETHER_ADDR_LEN); /* Just want the 6 least significant bits. */ crc &= 0x3f; /* Set the corresponding bit in the hash table. */ mchash[crc >> 5] |= 1 << (crc & 0x1f); } static void msk_rxfilter(struct msk_if_softc *sc_if) { struct msk_softc *sc; if_t ifp; uint32_t mchash[2]; uint16_t mode; sc = sc_if->msk_softc; MSK_IF_LOCK_ASSERT(sc_if); ifp = sc_if->msk_ifp; bzero(mchash, sizeof(mchash)); mode = GMAC_READ_2(sc, sc_if->msk_port, GM_RX_CTRL); if ((sc_if->msk_if_flags & IFF_PROMISC) != 0) mode &= ~(GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA); else if ((sc_if->msk_if_flags & IFF_ALLMULTI) != 0) { mode |= GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA; mchash[0] = 0xffff; mchash[1] = 0xffff; } else { mode |= GM_RXCR_UCF_ENA; if_foreach_maddr(ifp, msk_hash_maddr, mchash); if (mchash[0] != 0 || mchash[1] != 0) mode |= GM_RXCR_MCF_ENA; } GMAC_WRITE_2(sc, sc_if->msk_port, GM_MC_ADDR_H1, mchash[0] & 0xffff); GMAC_WRITE_2(sc, sc_if->msk_port, GM_MC_ADDR_H2, (mchash[0] >> 16) & 0xffff); GMAC_WRITE_2(sc, sc_if->msk_port, GM_MC_ADDR_H3, mchash[1] & 0xffff); GMAC_WRITE_2(sc, sc_if->msk_port, GM_MC_ADDR_H4, (mchash[1] >> 16) & 0xffff); GMAC_WRITE_2(sc, sc_if->msk_port, GM_RX_CTRL, mode); } static void msk_setvlan(struct msk_if_softc *sc_if) { struct msk_softc *sc; sc = sc_if->msk_softc; if (sc_if->msk_capenable & IFCAP_VLAN_HWTAGGING) { CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), RX_VLAN_STRIP_ON); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), TX_VLAN_TAG_ON); } else { CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), RX_VLAN_STRIP_OFF); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), TX_VLAN_TAG_OFF); } } static int msk_rx_fill(struct msk_if_softc *sc_if, int jumbo) { uint16_t idx; int i; if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && (sc_if->msk_capenable & IFCAP_RXCSUM) != 0) { /* Wait until controller executes OP_TCPSTART command. */ for (i = 100; i > 0; i--) { DELAY(100); idx = CSR_READ_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_GET_IDX_REG)); if (idx != 0) break; } if (i == 0) { device_printf(sc_if->msk_if_dev, "prefetch unit stuck?\n"); return (ETIMEDOUT); } /* * Fill consumed LE with free buffer. This can be done * in Rx handler but we don't want to add special code * in fast handler. */ if (jumbo > 0) { if (msk_jumbo_newbuf(sc_if, 0) != 0) return (ENOBUFS); bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_cdata.msk_jumbo_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } else { if (msk_newbuf(sc_if, 0) != 0) return (ENOBUFS); bus_dmamap_sync(sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_cdata.msk_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } sc_if->msk_cdata.msk_rx_prod = 0; CSR_WRITE_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), sc_if->msk_cdata.msk_rx_prod); } return (0); } static int msk_init_rx_ring(struct msk_if_softc *sc_if) { struct msk_ring_data *rd; struct msk_rxdesc *rxd; int i, nbuf, prod; MSK_IF_LOCK_ASSERT(sc_if); sc_if->msk_cdata.msk_rx_cons = 0; sc_if->msk_cdata.msk_rx_prod = 0; sc_if->msk_cdata.msk_rx_putwm = MSK_PUT_WM; rd = &sc_if->msk_rdata; bzero(rd->msk_rx_ring, sizeof(struct msk_rx_desc) * MSK_RX_RING_CNT); for (i = prod = 0; i < MSK_RX_RING_CNT; i++) { rxd = &sc_if->msk_cdata.msk_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_rx_ring[prod]; MSK_INC(prod, MSK_RX_RING_CNT); } nbuf = MSK_RX_BUF_CNT; prod = 0; /* Have controller know how to compute Rx checksum. */ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && (sc_if->msk_capenable & IFCAP_RXCSUM)) { #ifdef MSK_64BIT_DMA rxd = &sc_if->msk_cdata.msk_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_rx_ring[prod]; rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 | ETHER_HDR_LEN); rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); MSK_INC(prod, MSK_RX_RING_CNT); MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT); #endif rxd = &sc_if->msk_cdata.msk_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_rx_ring[prod]; rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 | ETHER_HDR_LEN); rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); MSK_INC(prod, MSK_RX_RING_CNT); MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT); nbuf--; } for (i = 0; i < nbuf; i++) { if (msk_newbuf(sc_if, prod) != 0) return (ENOBUFS); MSK_RX_INC(prod, MSK_RX_RING_CNT); } bus_dmamap_sync(sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_cdata.msk_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Update prefetch unit. */ sc_if->msk_cdata.msk_rx_prod = prod; CSR_WRITE_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), (sc_if->msk_cdata.msk_rx_prod + MSK_RX_RING_CNT - 1) % MSK_RX_RING_CNT); if (msk_rx_fill(sc_if, 0) != 0) return (ENOBUFS); return (0); } static int msk_init_jumbo_rx_ring(struct msk_if_softc *sc_if) { struct msk_ring_data *rd; struct msk_rxdesc *rxd; int i, nbuf, prod; MSK_IF_LOCK_ASSERT(sc_if); sc_if->msk_cdata.msk_rx_cons = 0; sc_if->msk_cdata.msk_rx_prod = 0; sc_if->msk_cdata.msk_rx_putwm = MSK_PUT_WM; rd = &sc_if->msk_rdata; bzero(rd->msk_jumbo_rx_ring, sizeof(struct msk_rx_desc) * MSK_JUMBO_RX_RING_CNT); for (i = prod = 0; i < MSK_JUMBO_RX_RING_CNT; i++) { rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_jumbo_rx_ring[prod]; MSK_INC(prod, MSK_JUMBO_RX_RING_CNT); } nbuf = MSK_RX_BUF_CNT; prod = 0; /* Have controller know how to compute Rx checksum. */ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && (sc_if->msk_capenable & IFCAP_RXCSUM) != 0) { #ifdef MSK_64BIT_DMA rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_jumbo_rx_ring[prod]; rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 | ETHER_HDR_LEN); rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); MSK_INC(prod, MSK_JUMBO_RX_RING_CNT); MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT); #endif rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_jumbo_rx_ring[prod]; rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 | ETHER_HDR_LEN); rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); MSK_INC(prod, MSK_JUMBO_RX_RING_CNT); MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT); nbuf--; } for (i = 0; i < nbuf; i++) { if (msk_jumbo_newbuf(sc_if, prod) != 0) return (ENOBUFS); MSK_RX_INC(prod, MSK_JUMBO_RX_RING_CNT); } bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_cdata.msk_jumbo_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Update prefetch unit. */ sc_if->msk_cdata.msk_rx_prod = prod; CSR_WRITE_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), (sc_if->msk_cdata.msk_rx_prod + MSK_JUMBO_RX_RING_CNT - 1) % MSK_JUMBO_RX_RING_CNT); if (msk_rx_fill(sc_if, 1) != 0) return (ENOBUFS); return (0); } static void msk_init_tx_ring(struct msk_if_softc *sc_if) { struct msk_ring_data *rd; struct msk_txdesc *txd; int i; sc_if->msk_cdata.msk_tso_mtu = 0; sc_if->msk_cdata.msk_last_csum = 0; sc_if->msk_cdata.msk_tx_prod = 0; sc_if->msk_cdata.msk_tx_cons = 0; sc_if->msk_cdata.msk_tx_cnt = 0; sc_if->msk_cdata.msk_tx_high_addr = 0; rd = &sc_if->msk_rdata; bzero(rd->msk_tx_ring, sizeof(struct msk_tx_desc) * MSK_TX_RING_CNT); for (i = 0; i < MSK_TX_RING_CNT; i++) { txd = &sc_if->msk_cdata.msk_txdesc[i]; txd->tx_m = NULL; txd->tx_le = &rd->msk_tx_ring[i]; } bus_dmamap_sync(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_cdata.msk_tx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static __inline void msk_discard_rxbuf(struct msk_if_softc *sc_if, int idx) { struct msk_rx_desc *rx_le; struct msk_rxdesc *rxd; struct mbuf *m; #ifdef MSK_64BIT_DMA rxd = &sc_if->msk_cdata.msk_rxdesc[idx]; rx_le = rxd->rx_le; rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); MSK_INC(idx, MSK_RX_RING_CNT); #endif rxd = &sc_if->msk_cdata.msk_rxdesc[idx]; m = rxd->rx_m; rx_le = rxd->rx_le; rx_le->msk_control = htole32(m->m_len | OP_PACKET | HW_OWNER); } static __inline void msk_discard_jumbo_rxbuf(struct msk_if_softc *sc_if, int idx) { struct msk_rx_desc *rx_le; struct msk_rxdesc *rxd; struct mbuf *m; #ifdef MSK_64BIT_DMA rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx]; rx_le = rxd->rx_le; rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); MSK_INC(idx, MSK_JUMBO_RX_RING_CNT); #endif rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx]; m = rxd->rx_m; rx_le = rxd->rx_le; rx_le->msk_control = htole32(m->m_len | OP_PACKET | HW_OWNER); } static int msk_newbuf(struct msk_if_softc *sc_if, int idx) { struct msk_rx_desc *rx_le; struct msk_rxdesc *rxd; struct mbuf *m; bus_dma_segment_t segs[1]; bus_dmamap_t map; int nsegs; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MCLBYTES; if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0) m_adj(m, ETHER_ALIGN); #ifndef __NO_STRICT_ALIGNMENT else m_adj(m, MSK_RX_BUF_ALIGN); #endif if (bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_rx_tag, sc_if->msk_cdata.msk_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT) != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); rxd = &sc_if->msk_cdata.msk_rxdesc[idx]; #ifdef MSK_64BIT_DMA rx_le = rxd->rx_le; rx_le->msk_addr = htole32(MSK_ADDR_HI(segs[0].ds_addr)); rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); MSK_INC(idx, MSK_RX_RING_CNT); rxd = &sc_if->msk_cdata.msk_rxdesc[idx]; #endif if (rxd->rx_m != NULL) { bus_dmamap_sync(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap); rxd->rx_m = NULL; } map = rxd->rx_dmamap; rxd->rx_dmamap = sc_if->msk_cdata.msk_rx_sparemap; sc_if->msk_cdata.msk_rx_sparemap = map; bus_dmamap_sync(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); rxd->rx_m = m; rx_le = rxd->rx_le; rx_le->msk_addr = htole32(MSK_ADDR_LO(segs[0].ds_addr)); rx_le->msk_control = htole32(segs[0].ds_len | OP_PACKET | HW_OWNER); return (0); } static int msk_jumbo_newbuf(struct msk_if_softc *sc_if, int idx) { struct msk_rx_desc *rx_le; struct msk_rxdesc *rxd; struct mbuf *m; bus_dma_segment_t segs[1]; bus_dmamap_t map; int nsegs; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MJUM9BYTES; if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0) m_adj(m, ETHER_ALIGN); #ifndef __NO_STRICT_ALIGNMENT else m_adj(m, MSK_RX_BUF_ALIGN); #endif if (bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_jumbo_rx_tag, sc_if->msk_cdata.msk_jumbo_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT) != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx]; #ifdef MSK_64BIT_DMA rx_le = rxd->rx_le; rx_le->msk_addr = htole32(MSK_ADDR_HI(segs[0].ds_addr)); rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); MSK_INC(idx, MSK_JUMBO_RX_RING_CNT); rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx]; #endif if (rxd->rx_m != NULL) { bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->msk_cdata.msk_jumbo_rx_tag, rxd->rx_dmamap); rxd->rx_m = NULL; } map = rxd->rx_dmamap; rxd->rx_dmamap = sc_if->msk_cdata.msk_jumbo_rx_sparemap; sc_if->msk_cdata.msk_jumbo_rx_sparemap = map; bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); rxd->rx_m = m; rx_le = rxd->rx_le; rx_le->msk_addr = htole32(MSK_ADDR_LO(segs[0].ds_addr)); rx_le->msk_control = htole32(segs[0].ds_len | OP_PACKET | HW_OWNER); return (0); } /* * Set media options. */ static int msk_mediachange(if_t ifp) { struct msk_if_softc *sc_if; struct mii_data *mii; int error; sc_if = if_getsoftc(ifp, IF_DRIVER_SOFTC); MSK_IF_LOCK(sc_if); mii = device_get_softc(sc_if->msk_miibus); error = mii_mediachg(mii); MSK_IF_UNLOCK(sc_if); return (error); } /* * Report current media status. */ static void msk_mediastatus(if_t ifp, struct ifmediareq *ifmr) { struct msk_if_softc *sc_if; struct mii_data *mii; sc_if = if_getsoftc(ifp, IF_DRIVER_SOFTC); MSK_IF_LOCK(sc_if); if ((sc_if->msk_if_flags & IFF_UP) == 0) { MSK_IF_UNLOCK(sc_if); return; } mii = device_get_softc(sc_if->msk_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; MSK_IF_UNLOCK(sc_if); } static int msk_ioctl(if_t ifp, u_long command, void *data, struct thread *td) { struct msk_if_softc *sc_if; struct ifreq *ifr; struct mii_data *mii; int error, reinit, setvlan; uint32_t oflags, mask; sc_if = if_getsoftc(ifp, IF_DRIVER_SOFTC); ifr = (struct ifreq *)data; error = 0; switch(command) { case SIOCSIFMTU: if (ifr->ifr_mtu > MSK_JUMBO_MTU || ifr->ifr_mtu < ETHERMIN) { error = EINVAL; break; } MSK_IF_LOCK(sc_if); if (ifr->ifr_mtu > ETHERMTU) { if ((sc_if->msk_flags & MSK_FLAG_JUMBO) == 0) { error = EINVAL; MSK_IF_UNLOCK(sc_if); break; } if ((sc_if->msk_flags & MSK_FLAG_JUMBO_NOCSUM) != 0) { struct ifreq tmp; MSK_IF_UNLOCK(sc_if); if_drvioctl(ifp, SIOCGIFCAP, &tmp, td); tmp.ifr_reqcap = tmp.ifr_curcap & ~(MSK_CSUM_FEATURES | CSUM_TSO); if_drvioctl(ifp, SIOCSIFCAP, &tmp, td); MSK_IF_LOCK(sc_if); } } sc_if->msk_framesize = ifr->ifr_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; if ((sc_if->msk_flags & MSK_FLAG_RUNNING) != 0) { sc_if->msk_flags &= ~MSK_FLAG_RUNNING; msk_init(sc_if); } MSK_IF_UNLOCK(sc_if); break; case SIOCSIFFLAGS: MSK_IF_LOCK(sc_if); oflags = sc_if->msk_if_flags; sc_if->msk_if_flags = ifr->ifr_flags; if ((sc_if->msk_if_flags & IFF_UP) != 0) { if ((sc_if->msk_flags & MSK_FLAG_RUNNING) != 0 && ((oflags ^ sc_if->msk_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) != 0) msk_rxfilter(sc_if); else if ((sc_if->msk_flags & MSK_FLAG_DETACH) == 0) msk_init(sc_if); } else if ((sc_if->msk_flags & MSK_FLAG_RUNNING) != 0) msk_stop(sc_if); MSK_IF_UNLOCK(sc_if); break; case SIOCADDMULTI: case SIOCDELMULTI: MSK_IF_LOCK(sc_if); if ((sc_if->msk_flags & MSK_FLAG_RUNNING) != 0) msk_rxfilter(sc_if); MSK_IF_UNLOCK(sc_if); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: mii = device_get_softc(sc_if->msk_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; case SIOCSIFCAP: reinit = 0; setvlan = 0; ifr->ifr_hwassist = 0; mask = ifr->ifr_reqcap ^ ifr->ifr_curcap; if ((IFCAP_TXCSUM & ifr->ifr_reqcap) != 0) ifr->ifr_hwassist |= MSK_CSUM_FEATURES; if ((mask & IFCAP_RXCSUM) != 0 && (sc_if->msk_flags & MSK_FLAG_DESCV2) == 0) reinit = 1; if ((IFCAP_TSO4 & ifr->ifr_reqcap) != 0) ifr->ifr_hwassist |= CSUM_TSO; if ((mask & IFCAP_VLAN_HWTAGGING) != 0) setvlan = 1; if (sc_if->msk_framesize > MSK_DEFAULT_FRAMESIZE && (sc_if->msk_flags & MSK_FLAG_JUMBO_NOCSUM) != 0) { ifr->ifr_hwassist &= ~(MSK_CSUM_FEATURES | CSUM_TSO); ifr->ifr_reqcap &= ~(IFCAP_TSO4 | IFCAP_TXCSUM); } MSK_IF_LOCK(sc_if); sc_if->msk_capenable = ifr->ifr_reqcap; if (setvlan) msk_setvlan(sc_if); if (reinit && (sc_if->msk_flags & MSK_FLAG_RUNNING) != 0) { sc_if->msk_flags &= ~MSK_FLAG_RUNNING; msk_init(sc_if); } MSK_IF_UNLOCK(sc_if); break; default: error = EOPNOTSUPP; break; } return (error); } static int mskc_probe(device_t dev) { const struct msk_product *mp; uint16_t vendor, devid; int i; vendor = pci_get_vendor(dev); devid = pci_get_device(dev); mp = msk_products; for (i = 0; i < nitems(msk_products); i++, mp++) { if (vendor == mp->msk_vendorid && devid == mp->msk_deviceid) { device_set_desc(dev, mp->msk_name); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int mskc_setup_rambuffer(struct msk_softc *sc) { int next; int i; /* Get adapter SRAM size. */ sc->msk_ramsize = CSR_READ_1(sc, B2_E_0) * 4; if (bootverbose) device_printf(sc->msk_dev, "RAM buffer size : %dKB\n", sc->msk_ramsize); if (sc->msk_ramsize == 0) return (0); sc->msk_pflags |= MSK_FLAG_RAMBUF; /* * Give receiver 2/3 of memory and round down to the multiple * of 1024. Tx/Rx RAM buffer size of Yukon II should be multiple * of 1024. */ sc->msk_rxqsize = rounddown((sc->msk_ramsize * 1024 * 2) / 3, 1024); sc->msk_txqsize = (sc->msk_ramsize * 1024) - sc->msk_rxqsize; for (i = 0, next = 0; i < sc->msk_num_port; i++) { sc->msk_rxqstart[i] = next; sc->msk_rxqend[i] = next + sc->msk_rxqsize - 1; next = sc->msk_rxqend[i] + 1; sc->msk_txqstart[i] = next; sc->msk_txqend[i] = next + sc->msk_txqsize - 1; next = sc->msk_txqend[i] + 1; if (bootverbose) { device_printf(sc->msk_dev, "Port %d : Rx Queue %dKB(0x%08x:0x%08x)\n", i, sc->msk_rxqsize / 1024, sc->msk_rxqstart[i], sc->msk_rxqend[i]); device_printf(sc->msk_dev, "Port %d : Tx Queue %dKB(0x%08x:0x%08x)\n", i, sc->msk_txqsize / 1024, sc->msk_txqstart[i], sc->msk_txqend[i]); } } return (0); } static void msk_phy_power(struct msk_softc *sc, int mode) { uint32_t our, val; int i; switch (mode) { case MSK_PHY_POWERUP: /* Switch power to VCC (WA for VAUX problem). */ CSR_WRITE_1(sc, B0_POWER_CTRL, PC_VAUX_ENA | PC_VCC_ENA | PC_VAUX_OFF | PC_VCC_ON); /* Disable Core Clock Division, set Clock Select to 0. */ CSR_WRITE_4(sc, B2_Y2_CLK_CTRL, Y2_CLK_DIV_DIS); val = 0; if (sc->msk_hw_id == CHIP_ID_YUKON_XL && sc->msk_hw_rev > CHIP_REV_YU_XL_A1) { /* Enable bits are inverted. */ val = Y2_PCI_CLK_LNK1_DIS | Y2_COR_CLK_LNK1_DIS | Y2_CLK_GAT_LNK1_DIS | Y2_PCI_CLK_LNK2_DIS | Y2_COR_CLK_LNK2_DIS | Y2_CLK_GAT_LNK2_DIS; } /* * Enable PCI & Core Clock, enable clock gating for both Links. */ CSR_WRITE_1(sc, B2_Y2_CLK_GATE, val); our = CSR_PCI_READ_4(sc, PCI_OUR_REG_1); our &= ~(PCI_Y2_PHY1_POWD | PCI_Y2_PHY2_POWD); if (sc->msk_hw_id == CHIP_ID_YUKON_XL) { if (sc->msk_hw_rev > CHIP_REV_YU_XL_A1) { /* Deassert Low Power for 1st PHY. */ our |= PCI_Y2_PHY1_COMA; if (sc->msk_num_port > 1) our |= PCI_Y2_PHY2_COMA; } } if (sc->msk_hw_id == CHIP_ID_YUKON_EC_U || sc->msk_hw_id == CHIP_ID_YUKON_EX || sc->msk_hw_id >= CHIP_ID_YUKON_FE_P) { val = CSR_PCI_READ_4(sc, PCI_OUR_REG_4); val &= (PCI_FORCE_ASPM_REQUEST | PCI_ASPM_GPHY_LINK_DOWN | PCI_ASPM_INT_FIFO_EMPTY | PCI_ASPM_CLKRUN_REQUEST); /* Set all bits to 0 except bits 15..12. */ CSR_PCI_WRITE_4(sc, PCI_OUR_REG_4, val); val = CSR_PCI_READ_4(sc, PCI_OUR_REG_5); val &= PCI_CTL_TIM_VMAIN_AV_MSK; CSR_PCI_WRITE_4(sc, PCI_OUR_REG_5, val); CSR_PCI_WRITE_4(sc, PCI_CFG_REG_1, 0); CSR_WRITE_2(sc, B0_CTST, Y2_HW_WOL_ON); /* * Disable status race, workaround for * Yukon EC Ultra & Yukon EX. */ val = CSR_READ_4(sc, B2_GP_IO); val |= GLB_GPIO_STAT_RACE_DIS; CSR_WRITE_4(sc, B2_GP_IO, val); CSR_READ_4(sc, B2_GP_IO); } /* Release PHY from PowerDown/COMA mode. */ CSR_PCI_WRITE_4(sc, PCI_OUR_REG_1, our); for (i = 0; i < sc->msk_num_port; i++) { CSR_WRITE_2(sc, MR_ADDR(i, GMAC_LINK_CTRL), GMLC_RST_SET); CSR_WRITE_2(sc, MR_ADDR(i, GMAC_LINK_CTRL), GMLC_RST_CLR); } break; case MSK_PHY_POWERDOWN: val = CSR_PCI_READ_4(sc, PCI_OUR_REG_1); val |= PCI_Y2_PHY1_POWD | PCI_Y2_PHY2_POWD; if (sc->msk_hw_id == CHIP_ID_YUKON_XL && sc->msk_hw_rev > CHIP_REV_YU_XL_A1) { val &= ~PCI_Y2_PHY1_COMA; if (sc->msk_num_port > 1) val &= ~PCI_Y2_PHY2_COMA; } CSR_PCI_WRITE_4(sc, PCI_OUR_REG_1, val); val = Y2_PCI_CLK_LNK1_DIS | Y2_COR_CLK_LNK1_DIS | Y2_CLK_GAT_LNK1_DIS | Y2_PCI_CLK_LNK2_DIS | Y2_COR_CLK_LNK2_DIS | Y2_CLK_GAT_LNK2_DIS; if (sc->msk_hw_id == CHIP_ID_YUKON_XL && sc->msk_hw_rev > CHIP_REV_YU_XL_A1) { /* Enable bits are inverted. */ val = 0; } /* * Disable PCI & Core Clock, disable clock gating for * both Links. */ CSR_WRITE_1(sc, B2_Y2_CLK_GATE, val); CSR_WRITE_1(sc, B0_POWER_CTRL, PC_VAUX_ENA | PC_VCC_ENA | PC_VAUX_ON | PC_VCC_OFF); break; default: break; } } static void mskc_reset(struct msk_softc *sc) { bus_addr_t addr; uint16_t status; uint32_t val; int i, initram; /* Disable ASF. */ if (sc->msk_hw_id >= CHIP_ID_YUKON_XL && sc->msk_hw_id <= CHIP_ID_YUKON_SUPR) { if (sc->msk_hw_id == CHIP_ID_YUKON_EX || sc->msk_hw_id == CHIP_ID_YUKON_SUPR) { CSR_WRITE_4(sc, B28_Y2_CPU_WDOG, 0); status = CSR_READ_2(sc, B28_Y2_ASF_HCU_CCSR); /* Clear AHB bridge & microcontroller reset. */ status &= ~(Y2_ASF_HCU_CCSR_AHB_RST | Y2_ASF_HCU_CCSR_CPU_RST_MODE); /* Clear ASF microcontroller state. */ status &= ~Y2_ASF_HCU_CCSR_UC_STATE_MSK; status &= ~Y2_ASF_HCU_CCSR_CPU_CLK_DIVIDE_MSK; CSR_WRITE_2(sc, B28_Y2_ASF_HCU_CCSR, status); CSR_WRITE_4(sc, B28_Y2_CPU_WDOG, 0); } else CSR_WRITE_1(sc, B28_Y2_ASF_STAT_CMD, Y2_ASF_RESET); CSR_WRITE_2(sc, B0_CTST, Y2_ASF_DISABLE); /* * Since we disabled ASF, S/W reset is required for * Power Management. */ CSR_WRITE_2(sc, B0_CTST, CS_RST_SET); CSR_WRITE_2(sc, B0_CTST, CS_RST_CLR); } /* Clear all error bits in the PCI status register. */ status = pci_read_config(sc->msk_dev, PCIR_STATUS, 2); CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_ON); pci_write_config(sc->msk_dev, PCIR_STATUS, status | PCIM_STATUS_PERR | PCIM_STATUS_SERR | PCIM_STATUS_RMABORT | PCIM_STATUS_RTABORT | PCIM_STATUS_MDPERR, 2); CSR_WRITE_2(sc, B0_CTST, CS_MRST_CLR); switch (sc->msk_bustype) { case MSK_PEX_BUS: /* Clear all PEX errors. */ CSR_PCI_WRITE_4(sc, PEX_UNC_ERR_STAT, 0xffffffff); val = CSR_PCI_READ_4(sc, PEX_UNC_ERR_STAT); if ((val & PEX_RX_OV) != 0) { sc->msk_intrmask &= ~Y2_IS_HW_ERR; sc->msk_intrhwemask &= ~Y2_IS_PCI_EXP; } break; case MSK_PCI_BUS: case MSK_PCIX_BUS: /* Set Cache Line Size to 2(8bytes) if configured to 0. */ val = pci_read_config(sc->msk_dev, PCIR_CACHELNSZ, 1); if (val == 0) pci_write_config(sc->msk_dev, PCIR_CACHELNSZ, 2, 1); if (sc->msk_bustype == MSK_PCIX_BUS) { /* Set Cache Line Size opt. */ val = pci_read_config(sc->msk_dev, PCI_OUR_REG_1, 4); val |= PCI_CLS_OPT; pci_write_config(sc->msk_dev, PCI_OUR_REG_1, val, 4); } break; } /* Set PHY power state. */ msk_phy_power(sc, MSK_PHY_POWERUP); /* Reset GPHY/GMAC Control */ for (i = 0; i < sc->msk_num_port; i++) { /* GPHY Control reset. */ CSR_WRITE_1(sc, MR_ADDR(i, GPHY_CTRL), GPC_RST_SET); CSR_WRITE_1(sc, MR_ADDR(i, GPHY_CTRL), GPC_RST_CLR); /* GMAC Control reset. */ CSR_WRITE_4(sc, MR_ADDR(i, GMAC_CTRL), GMC_RST_SET); CSR_WRITE_4(sc, MR_ADDR(i, GMAC_CTRL), GMC_RST_CLR); CSR_WRITE_4(sc, MR_ADDR(i, GMAC_CTRL), GMC_F_LOOPB_OFF); if (sc->msk_hw_id == CHIP_ID_YUKON_EX || sc->msk_hw_id == CHIP_ID_YUKON_SUPR) CSR_WRITE_4(sc, MR_ADDR(i, GMAC_CTRL), GMC_BYP_MACSECRX_ON | GMC_BYP_MACSECTX_ON | GMC_BYP_RETR_ON); } if (sc->msk_hw_id == CHIP_ID_YUKON_SUPR && sc->msk_hw_rev > CHIP_REV_YU_SU_B0) CSR_PCI_WRITE_4(sc, PCI_OUR_REG_3, PCI_CLK_MACSEC_DIS); if (sc->msk_hw_id == CHIP_ID_YUKON_OPT && sc->msk_hw_rev == 0) { /* Disable PCIe PHY powerdown(reg 0x80, bit7). */ CSR_WRITE_4(sc, Y2_PEX_PHY_DATA, (0x0080 << 16) | 0x0080); } CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF); /* LED On. */ CSR_WRITE_2(sc, B0_CTST, Y2_LED_STAT_ON); /* Clear TWSI IRQ. */ CSR_WRITE_4(sc, B2_I2C_IRQ, I2C_CLR_IRQ); /* Turn off hardware timer. */ CSR_WRITE_1(sc, B2_TI_CTRL, TIM_STOP); CSR_WRITE_1(sc, B2_TI_CTRL, TIM_CLR_IRQ); /* Turn off descriptor polling. */ CSR_WRITE_1(sc, B28_DPT_CTRL, DPT_STOP); /* Turn off time stamps. */ CSR_WRITE_1(sc, GMAC_TI_ST_CTRL, GMT_ST_STOP); CSR_WRITE_1(sc, GMAC_TI_ST_CTRL, GMT_ST_CLR_IRQ); initram = 0; if (sc->msk_hw_id == CHIP_ID_YUKON_XL || sc->msk_hw_id == CHIP_ID_YUKON_EC || sc->msk_hw_id == CHIP_ID_YUKON_FE) initram++; /* Configure timeout values. */ for (i = 0; initram > 0 && i < sc->msk_num_port; i++) { CSR_WRITE_2(sc, SELECT_RAM_BUFFER(i, B3_RI_CTRL), RI_RST_SET); CSR_WRITE_2(sc, SELECT_RAM_BUFFER(i, B3_RI_CTRL), RI_RST_CLR); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_R1), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_XA1), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_XS1), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_R1), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_XA1), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_XS1), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_R2), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_XA2), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_XS2), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_R2), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_XA2), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_XS2), MSK_RI_TO_53); } /* Disable all interrupts. */ CSR_WRITE_4(sc, B0_HWE_IMSK, 0); CSR_READ_4(sc, B0_HWE_IMSK); CSR_WRITE_4(sc, B0_IMSK, 0); CSR_READ_4(sc, B0_IMSK); /* * On dual port PCI-X card, there is an problem where status * can be received out of order due to split transactions. */ if (sc->msk_pcixcap != 0 && sc->msk_num_port > 1) { uint16_t pcix_cmd; pcix_cmd = pci_read_config(sc->msk_dev, sc->msk_pcixcap + PCIXR_COMMAND, 2); /* Clear Max Outstanding Split Transactions. */ pcix_cmd &= ~PCIXM_COMMAND_MAX_SPLITS; CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_ON); pci_write_config(sc->msk_dev, sc->msk_pcixcap + PCIXR_COMMAND, pcix_cmd, 2); CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF); } if (sc->msk_expcap != 0) { /* Change Max. Read Request Size to 2048 bytes. */ if (pci_get_max_read_req(sc->msk_dev) == 512) pci_set_max_read_req(sc->msk_dev, 2048); } /* Clear status list. */ bzero(sc->msk_stat_ring, sizeof(struct msk_stat_desc) * sc->msk_stat_count); sc->msk_stat_cons = 0; bus_dmamap_sync(sc->msk_stat_tag, sc->msk_stat_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CSR_WRITE_4(sc, STAT_CTRL, SC_STAT_RST_SET); CSR_WRITE_4(sc, STAT_CTRL, SC_STAT_RST_CLR); /* Set the status list base address. */ addr = sc->msk_stat_ring_paddr; CSR_WRITE_4(sc, STAT_LIST_ADDR_LO, MSK_ADDR_LO(addr)); CSR_WRITE_4(sc, STAT_LIST_ADDR_HI, MSK_ADDR_HI(addr)); /* Set the status list last index. */ CSR_WRITE_2(sc, STAT_LAST_IDX, sc->msk_stat_count - 1); if (sc->msk_hw_id == CHIP_ID_YUKON_EC && sc->msk_hw_rev == CHIP_REV_YU_EC_A1) { /* WA for dev. #4.3 */ CSR_WRITE_2(sc, STAT_TX_IDX_TH, ST_TXTH_IDX_MASK); /* WA for dev. #4.18 */ CSR_WRITE_1(sc, STAT_FIFO_WM, 0x21); CSR_WRITE_1(sc, STAT_FIFO_ISR_WM, 0x07); } else { CSR_WRITE_2(sc, STAT_TX_IDX_TH, 0x0a); CSR_WRITE_1(sc, STAT_FIFO_WM, 0x10); if (sc->msk_hw_id == CHIP_ID_YUKON_XL && sc->msk_hw_rev == CHIP_REV_YU_XL_A0) CSR_WRITE_1(sc, STAT_FIFO_ISR_WM, 0x04); else CSR_WRITE_1(sc, STAT_FIFO_ISR_WM, 0x10); CSR_WRITE_4(sc, STAT_ISR_TIMER_INI, 0x0190); } /* * Use default value for STAT_ISR_TIMER_INI, STAT_LEV_TIMER_INI. */ CSR_WRITE_4(sc, STAT_TX_TIMER_INI, MSK_USECS(sc, 1000)); /* Enable status unit. */ CSR_WRITE_4(sc, STAT_CTRL, SC_STAT_OP_ON); CSR_WRITE_1(sc, STAT_TX_TIMER_CTRL, TIM_START); CSR_WRITE_1(sc, STAT_LEV_TIMER_CTRL, TIM_START); CSR_WRITE_1(sc, STAT_ISR_TIMER_CTRL, TIM_START); } static int msk_probe(device_t dev) { struct msk_softc *sc; char desc[100]; sc = device_get_softc(device_get_parent(dev)); /* * Not much to do here. We always know there will be * at least one GMAC present, and if there are two, * mskc_attach() will create a second device instance * for us. */ snprintf(desc, sizeof(desc), "Marvell Technology Group Ltd. %s Id 0x%02x Rev 0x%02x", model_name[sc->msk_hw_id - CHIP_ID_YUKON_XL], sc->msk_hw_id, sc->msk_hw_rev); device_set_desc_copy(dev, desc); return (BUS_PROBE_DEFAULT); } static int msk_attach(device_t dev) { struct if_attach_args ifat = { .ifat_version = IF_ATTACH_VERSION, .ifat_drv = &msk_ifdrv, .ifat_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST, .ifat_capabilities = IFCAP_TXCSUM | IFCAP_TSO4 | IFCAP_LINKSTATE, }; struct msk_softc *sc; struct msk_if_softc *sc_if; struct msk_mii_data *mmd; struct mii_data *mii; int i, port, error; uint8_t eaddr[6]; if_t ifp; if (dev == NULL) return (EINVAL); error = 0; sc_if = device_get_softc(dev); sc = device_get_softc(device_get_parent(dev)); mmd = device_get_ivars(dev); port = mmd->port; sc_if->msk_if_dev = dev; sc_if->msk_port = port; sc_if->msk_softc = sc; sc_if->msk_flags = sc->msk_pflags; sc->msk_if[port] = sc_if; /* Setup Tx/Rx queue register offsets. */ if (port == MSK_PORT_A) { sc_if->msk_txq = Q_XA1; sc_if->msk_txsq = Q_XS1; sc_if->msk_rxq = Q_R1; } else { sc_if->msk_txq = Q_XA2; sc_if->msk_txsq = Q_XS2; sc_if->msk_rxq = Q_R2; } callout_init_mtx(&sc_if->msk_tick_ch, &sc_if->msk_softc->msk_mtx, 0); msk_sysctl_node(sc_if); if ((error = msk_txrx_dma_alloc(sc_if) != 0)) goto fail; msk_rx_dma_jalloc(sc_if); /* * Do miibus setup. */ error = mii_attach(dev, &sc_if->msk_miibus, msk_mediachange, msk_mediastatus, BMSR_DEFCAPMASK, PHY_ADDR_MARV, MII_OFFSET_ANY, mmd->mii_flags); if (error) goto fail; mii = device_get_softc(sc_if->msk_miibus); /* * Enable Rx checksum offloading if controller supports * new descriptor formant and controller is not Yukon XL. */ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && sc->msk_hw_id != CHIP_ID_YUKON_XL) ifat.ifat_capabilities |= IFCAP_RXCSUM; if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0 && (sc_if->msk_flags & MSK_FLAG_NORX_CSUM) == 0) ifat.ifat_capabilities |= IFCAP_RXCSUM; /* VLAN capability setup */ ifat.ifat_capabilities |= IFCAP_VLAN_MTU; if ((sc_if->msk_flags & MSK_FLAG_NOHWVLAN) == 0) { /* * Due to Tx checksum offload hardware bugs, msk(4) manually * computes checksum for short frames. For VLAN tagged frames * this workaround does not work so disable checksum offload * for VLAN interface. */ ifat.ifat_capabilities |= IFCAP_VLAN_HWTAGGING; ifat.ifat_capabilities |= IFCAP_VLAN_HWTSO; /* * Enable Rx checksum offloading for VLAN tagged frames * if controller support new descriptor format. */ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0 && (sc_if->msk_flags & MSK_FLAG_NORX_CSUM) == 0) ifat.ifat_capabilities |= IFCAP_VLAN_HWCSUM; } ifat.ifat_hwassist = MSK_CSUM_FEATURES | CSUM_TSO; ifat.ifat_capenable = ifat.ifat_capabilities; ifat.ifat_baudrate = ifmedia_baudrate(mii->mii_media_active); /* * Disable RX checksum offloading on controllers that don't use * new descriptor format but give chance to enable it. */ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0) ifat.ifat_capenable &= ~IFCAP_RXCSUM; /* * Get station address for this interface. Note that * dual port cards actually come with three station * addresses: one for each port, plus an extra. The * extra one is used by the SysKonnect driver software * as a 'virtual' station address for when both ports * are operating in failover mode. Currently we don't * use this extra address. */ for (i = 0; i < ETHER_ADDR_LEN; i++) eaddr[i] = CSR_READ_1(sc, B2_MAC_1 + (port * 8) + i); ifat.ifat_lla = eaddr; ifat.ifat_softc = sc_if; ifat.ifat_dunit = device_get_unit(dev); ifp = sc_if->msk_ifp = if_attach(&ifat); sc_if->msk_capenable = ifat.ifat_capenable; sc_if->msk_framesize = MSK_DEFAULT_FRAMESIZE; return (0); fail: /* Access should be ok even though lock has been dropped */ sc->msk_if[port] = NULL; msk_detach(dev); return (error); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int mskc_attach(device_t dev) { struct msk_softc *sc; struct msk_mii_data *mmd; int error, msic, msir, reg; sc = device_get_softc(dev); sc->msk_dev = dev; mtx_init(&sc->msk_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); /* * Map control/status registers. */ pci_enable_busmaster(dev); /* Allocate I/O resource */ #ifdef MSK_USEIOSPACE sc->msk_res_spec = msk_res_spec_io; #else sc->msk_res_spec = msk_res_spec_mem; #endif sc->msk_irq_spec = msk_irq_spec_legacy; error = bus_alloc_resources(dev, sc->msk_res_spec, sc->msk_res); if (error) { if (sc->msk_res_spec == msk_res_spec_mem) sc->msk_res_spec = msk_res_spec_io; else sc->msk_res_spec = msk_res_spec_mem; error = bus_alloc_resources(dev, sc->msk_res_spec, sc->msk_res); if (error) { device_printf(dev, "couldn't allocate %s resources\n", sc->msk_res_spec == msk_res_spec_mem ? "memory" : "I/O"); mtx_destroy(&sc->msk_mtx); return (ENXIO); } } /* Enable all clocks before accessing any registers. */ CSR_PCI_WRITE_4(sc, PCI_OUR_REG_3, 0); CSR_WRITE_2(sc, B0_CTST, CS_RST_CLR); sc->msk_hw_id = CSR_READ_1(sc, B2_CHIP_ID); sc->msk_hw_rev = (CSR_READ_1(sc, B2_MAC_CFG) >> 4) & 0x0f; /* Bail out if chip is not recognized. */ if (sc->msk_hw_id < CHIP_ID_YUKON_XL || sc->msk_hw_id > CHIP_ID_YUKON_OPT || sc->msk_hw_id == CHIP_ID_YUKON_UNKNOWN) { device_printf(dev, "unknown device: id=0x%02x, rev=0x%02x\n", sc->msk_hw_id, sc->msk_hw_rev); mtx_destroy(&sc->msk_mtx); return (ENXIO); } SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "process_limit", CTLTYPE_INT | CTLFLAG_RW, &sc->msk_process_limit, 0, sysctl_hw_msk_proc_limit, "I", "max number of Rx events to process"); sc->msk_process_limit = MSK_PROC_DEFAULT; error = resource_int_value(device_get_name(dev), device_get_unit(dev), "process_limit", &sc->msk_process_limit); if (error == 0) { if (sc->msk_process_limit < MSK_PROC_MIN || sc->msk_process_limit > MSK_PROC_MAX) { device_printf(dev, "process_limit value out of range; " "using default: %d\n", MSK_PROC_DEFAULT); sc->msk_process_limit = MSK_PROC_DEFAULT; } } sc->msk_int_holdoff = MSK_INT_HOLDOFF_DEFAULT; SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "int_holdoff", CTLFLAG_RW, &sc->msk_int_holdoff, 0, "Maximum number of time to delay interrupts"); resource_int_value(device_get_name(dev), device_get_unit(dev), "int_holdoff", &sc->msk_int_holdoff); sc->msk_pmd = CSR_READ_1(sc, B2_PMD_TYP); /* Check number of MACs. */ sc->msk_num_port = 1; if ((CSR_READ_1(sc, B2_Y2_HW_RES) & CFG_DUAL_MAC_MSK) == CFG_DUAL_MAC_MSK) { if (!(CSR_READ_1(sc, B2_Y2_CLK_GATE) & Y2_STATUS_LNK2_INAC)) sc->msk_num_port++; } /* Check bus type. */ if (pci_find_cap(sc->msk_dev, PCIY_EXPRESS, ®) == 0) { sc->msk_bustype = MSK_PEX_BUS; sc->msk_expcap = reg; } else if (pci_find_cap(sc->msk_dev, PCIY_PCIX, ®) == 0) { sc->msk_bustype = MSK_PCIX_BUS; sc->msk_pcixcap = reg; } else sc->msk_bustype = MSK_PCI_BUS; switch (sc->msk_hw_id) { case CHIP_ID_YUKON_EC: sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO; break; case CHIP_ID_YUKON_EC_U: sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_JUMBO_NOCSUM; break; case CHIP_ID_YUKON_EX: sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_DESCV2 | MSK_FLAG_AUTOTX_CSUM; /* * Yukon Extreme seems to have silicon bug for * automatic Tx checksum calculation capability. */ if (sc->msk_hw_rev == CHIP_REV_YU_EX_B0) sc->msk_pflags &= ~MSK_FLAG_AUTOTX_CSUM; /* * Yukon Extreme A0 could not use store-and-forward * for jumbo frames, so disable Tx checksum * offloading for jumbo frames. */ if (sc->msk_hw_rev == CHIP_REV_YU_EX_A0) sc->msk_pflags |= MSK_FLAG_JUMBO_NOCSUM; break; case CHIP_ID_YUKON_FE: sc->msk_clock = 100; /* 100 MHz */ sc->msk_pflags |= MSK_FLAG_FASTETHER; break; case CHIP_ID_YUKON_FE_P: sc->msk_clock = 50; /* 50 MHz */ sc->msk_pflags |= MSK_FLAG_FASTETHER | MSK_FLAG_DESCV2 | MSK_FLAG_AUTOTX_CSUM; if (sc->msk_hw_rev == CHIP_REV_YU_FE_P_A0) { /* * XXX * FE+ A0 has status LE writeback bug so msk(4) * does not rely on status word of received frame * in msk_rxeof() which in turn disables all * hardware assistance bits reported by the status * word as well as validity of the received frame. * Just pass received frames to upper stack with * minimal test and let upper stack handle them. */ sc->msk_pflags |= MSK_FLAG_NOHWVLAN | MSK_FLAG_NORXCHK | MSK_FLAG_NORX_CSUM; } break; case CHIP_ID_YUKON_XL: sc->msk_clock = 156; /* 156 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO; break; case CHIP_ID_YUKON_SUPR: sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_DESCV2 | MSK_FLAG_AUTOTX_CSUM; break; case CHIP_ID_YUKON_UL_2: sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO; break; case CHIP_ID_YUKON_OPT: sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_DESCV2; break; default: sc->msk_clock = 156; /* 156 MHz */ break; } /* Allocate IRQ resources. */ msic = pci_msi_count(dev); if (bootverbose) device_printf(dev, "MSI count : %d\n", msic); if (legacy_intr != 0) msi_disable = 1; if (msi_disable == 0 && msic > 0) { msir = 1; if (pci_alloc_msi(dev, &msir) == 0) { if (msir == 1) { sc->msk_pflags |= MSK_FLAG_MSI; sc->msk_irq_spec = msk_irq_spec_msi; } else pci_release_msi(dev); } } error = bus_alloc_resources(dev, sc->msk_irq_spec, sc->msk_irq); if (error) { device_printf(dev, "couldn't allocate IRQ resources\n"); goto fail; } if ((error = msk_status_dma_alloc(sc)) != 0) goto fail; /* Set base interrupt mask. */ sc->msk_intrmask = Y2_IS_HW_ERR | Y2_IS_STAT_BMU; sc->msk_intrhwemask = Y2_IS_TIST_OV | Y2_IS_MST_ERR | Y2_IS_IRQ_STAT | Y2_IS_PCI_EXP | Y2_IS_PCI_NEXP; /* Reset the adapter. */ mskc_reset(sc); if ((error = mskc_setup_rambuffer(sc)) != 0) goto fail; sc->msk_devs[MSK_PORT_A] = device_add_child(dev, "msk", -1); if (sc->msk_devs[MSK_PORT_A] == NULL) { device_printf(dev, "failed to add child for PORT_A\n"); error = ENXIO; goto fail; } mmd = malloc(sizeof(struct msk_mii_data), M_DEVBUF, M_WAITOK | M_ZERO); if (mmd == NULL) { device_printf(dev, "failed to allocate memory for " "ivars of PORT_A\n"); error = ENXIO; goto fail; } mmd->port = MSK_PORT_A; mmd->pmd = sc->msk_pmd; mmd->mii_flags |= MIIF_DOPAUSE; if (sc->msk_pmd == 'L' || sc->msk_pmd == 'S') mmd->mii_flags |= MIIF_HAVEFIBER; if (sc->msk_pmd == 'P') mmd->mii_flags |= MIIF_HAVEFIBER | MIIF_MACPRIV0; device_set_ivars(sc->msk_devs[MSK_PORT_A], mmd); if (sc->msk_num_port > 1) { sc->msk_devs[MSK_PORT_B] = device_add_child(dev, "msk", -1); if (sc->msk_devs[MSK_PORT_B] == NULL) { device_printf(dev, "failed to add child for PORT_B\n"); error = ENXIO; goto fail; } mmd = malloc(sizeof(struct msk_mii_data), M_DEVBUF, M_WAITOK | M_ZERO); if (mmd == NULL) { device_printf(dev, "failed to allocate memory for " "ivars of PORT_B\n"); error = ENXIO; goto fail; } mmd->port = MSK_PORT_B; mmd->pmd = sc->msk_pmd; if (sc->msk_pmd == 'L' || sc->msk_pmd == 'S') mmd->mii_flags |= MIIF_HAVEFIBER; if (sc->msk_pmd == 'P') mmd->mii_flags |= MIIF_HAVEFIBER | MIIF_MACPRIV0; device_set_ivars(sc->msk_devs[MSK_PORT_B], mmd); } error = bus_generic_attach(dev); if (error) { device_printf(dev, "failed to attach port(s)\n"); goto fail; } /* Hook interrupt last to avoid having to lock softc. */ error = bus_setup_intr(dev, sc->msk_irq[0], INTR_TYPE_NET | INTR_MPSAFE, NULL, msk_intr, sc, &sc->msk_intrhand); if (error != 0) { device_printf(dev, "couldn't set up interrupt handler\n"); goto fail; } fail: if (error != 0) mskc_detach(dev); return (error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int msk_detach(device_t dev) { struct msk_softc *sc; struct msk_if_softc *sc_if; if_t ifp; sc_if = device_get_softc(dev); KASSERT(mtx_initialized(&sc_if->msk_softc->msk_mtx), ("msk mutex not initialized in msk_detach")); MSK_IF_LOCK(sc_if); ifp = sc_if->msk_ifp; if (device_is_attached(dev)) { /* XXX */ sc_if->msk_flags |= MSK_FLAG_DETACH; msk_stop(sc_if); /* Can't hold locks while calling detach. */ MSK_IF_UNLOCK(sc_if); callout_drain(&sc_if->msk_tick_ch); if (ifp) if_detach(ifp); MSK_IF_LOCK(sc_if); } /* * We're generally called from mskc_detach() which is using * device_delete_child() to get to here. It's already trashed * miibus for us, so don't do it here or we'll panic. * * if (sc_if->msk_miibus != NULL) { * device_delete_child(dev, sc_if->msk_miibus); * sc_if->msk_miibus = NULL; * } */ msk_rx_dma_jfree(sc_if); msk_txrx_dma_free(sc_if); bus_generic_detach(dev); sc = sc_if->msk_softc; sc->msk_if[sc_if->msk_port] = NULL; MSK_IF_UNLOCK(sc_if); return (0); } static int mskc_detach(device_t dev) { struct msk_softc *sc; sc = device_get_softc(dev); KASSERT(mtx_initialized(&sc->msk_mtx), ("msk mutex not initialized")); if (device_is_alive(dev)) { if (sc->msk_devs[MSK_PORT_A] != NULL) { free(device_get_ivars(sc->msk_devs[MSK_PORT_A]), M_DEVBUF); device_delete_child(dev, sc->msk_devs[MSK_PORT_A]); } if (sc->msk_devs[MSK_PORT_B] != NULL) { free(device_get_ivars(sc->msk_devs[MSK_PORT_B]), M_DEVBUF); device_delete_child(dev, sc->msk_devs[MSK_PORT_B]); } bus_generic_detach(dev); } /* Disable all interrupts. */ CSR_WRITE_4(sc, B0_IMSK, 0); CSR_READ_4(sc, B0_IMSK); CSR_WRITE_4(sc, B0_HWE_IMSK, 0); CSR_READ_4(sc, B0_HWE_IMSK); /* LED Off. */ CSR_WRITE_2(sc, B0_CTST, Y2_LED_STAT_OFF); /* Put hardware reset. */ CSR_WRITE_2(sc, B0_CTST, CS_RST_SET); msk_status_dma_free(sc); if (sc->msk_intrhand) { bus_teardown_intr(dev, sc->msk_irq[0], sc->msk_intrhand); sc->msk_intrhand = NULL; } bus_release_resources(dev, sc->msk_irq_spec, sc->msk_irq); if ((sc->msk_pflags & MSK_FLAG_MSI) != 0) pci_release_msi(dev); bus_release_resources(dev, sc->msk_res_spec, sc->msk_res); mtx_destroy(&sc->msk_mtx); return (0); } static bus_dma_tag_t mskc_get_dma_tag(device_t bus, device_t child __unused) { return (bus_get_dma_tag(bus)); } struct msk_dmamap_arg { bus_addr_t msk_busaddr; }; static void msk_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct msk_dmamap_arg *ctx; if (error != 0) return; ctx = arg; ctx->msk_busaddr = segs[0].ds_addr; } /* Create status DMA region. */ static int msk_status_dma_alloc(struct msk_softc *sc) { struct msk_dmamap_arg ctx; bus_size_t stat_sz; int count, error; /* * It seems controller requires number of status LE entries * is power of 2 and the maximum number of status LE entries * is 4096. For dual-port controllers, the number of status * LE entries should be large enough to hold both port's * status updates. */ count = 3 * MSK_RX_RING_CNT + MSK_TX_RING_CNT; count = imin(4096, roundup2(count, 1024)); sc->msk_stat_count = count; stat_sz = count * sizeof(struct msk_stat_desc); error = bus_dma_tag_create( bus_get_dma_tag(sc->msk_dev), /* parent */ MSK_STAT_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ stat_sz, /* maxsize */ 1, /* nsegments */ stat_sz, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->msk_stat_tag); if (error != 0) { device_printf(sc->msk_dev, "failed to create status DMA tag\n"); return (error); } /* Allocate DMA'able memory and load the DMA map for status ring. */ error = bus_dmamem_alloc(sc->msk_stat_tag, (void **)&sc->msk_stat_ring, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->msk_stat_map); if (error != 0) { device_printf(sc->msk_dev, "failed to allocate DMA'able memory for status ring\n"); return (error); } ctx.msk_busaddr = 0; error = bus_dmamap_load(sc->msk_stat_tag, sc->msk_stat_map, sc->msk_stat_ring, stat_sz, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->msk_dev, "failed to load DMA'able memory for status ring\n"); return (error); } sc->msk_stat_ring_paddr = ctx.msk_busaddr; return (0); } static void msk_status_dma_free(struct msk_softc *sc) { /* Destroy status block. */ if (sc->msk_stat_tag) { if (sc->msk_stat_ring_paddr) { bus_dmamap_unload(sc->msk_stat_tag, sc->msk_stat_map); sc->msk_stat_ring_paddr = 0; } if (sc->msk_stat_ring) { bus_dmamem_free(sc->msk_stat_tag, sc->msk_stat_ring, sc->msk_stat_map); sc->msk_stat_ring = NULL; } bus_dma_tag_destroy(sc->msk_stat_tag); sc->msk_stat_tag = NULL; } } static int msk_txrx_dma_alloc(struct msk_if_softc *sc_if) { struct msk_dmamap_arg ctx; struct msk_txdesc *txd; struct msk_rxdesc *rxd; bus_size_t rxalign; int error, i; /* Create parent DMA tag. */ error = bus_dma_tag_create( bus_get_dma_tag(sc_if->msk_if_dev), /* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT, /* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_parent_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create parent DMA tag\n"); goto fail; } /* Create tag for Tx ring. */ error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */ MSK_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSK_TX_RING_SZ, /* maxsize */ 1, /* nsegments */ MSK_TX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_tx_ring_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create Tx ring DMA tag\n"); goto fail; } /* Create tag for Rx ring. */ error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */ MSK_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSK_RX_RING_SZ, /* maxsize */ 1, /* nsegments */ MSK_RX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_rx_ring_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create Rx ring DMA tag\n"); goto fail; } /* Create tag for Tx buffers. */ error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSK_TSO_MAXSIZE, /* maxsize */ MSK_MAXTXSEGS, /* nsegments */ MSK_TSO_MAXSGSIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_tx_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create Tx DMA tag\n"); goto fail; } rxalign = 1; /* * Workaround hardware hang which seems to happen when Rx buffer * is not aligned on multiple of FIFO word(8 bytes). */ if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0) rxalign = MSK_RX_BUF_ALIGN; /* Create tag for Rx buffers. */ error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */ rxalign, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES, /* maxsize */ 1, /* nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_rx_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create Rx DMA tag\n"); goto fail; } /* Allocate DMA'able memory and load the DMA map for Tx ring. */ error = bus_dmamem_alloc(sc_if->msk_cdata.msk_tx_ring_tag, (void **)&sc_if->msk_rdata.msk_tx_ring, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc_if->msk_cdata.msk_tx_ring_map); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to allocate DMA'able memory for Tx ring\n"); goto fail; } ctx.msk_busaddr = 0; error = bus_dmamap_load(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_cdata.msk_tx_ring_map, sc_if->msk_rdata.msk_tx_ring, MSK_TX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to load DMA'able memory for Tx ring\n"); goto fail; } sc_if->msk_rdata.msk_tx_ring_paddr = ctx.msk_busaddr; /* Allocate DMA'able memory and load the DMA map for Rx ring. */ error = bus_dmamem_alloc(sc_if->msk_cdata.msk_rx_ring_tag, (void **)&sc_if->msk_rdata.msk_rx_ring, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc_if->msk_cdata.msk_rx_ring_map); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to allocate DMA'able memory for Rx ring\n"); goto fail; } ctx.msk_busaddr = 0; error = bus_dmamap_load(sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_cdata.msk_rx_ring_map, sc_if->msk_rdata.msk_rx_ring, MSK_RX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to load DMA'able memory for Rx ring\n"); goto fail; } sc_if->msk_rdata.msk_rx_ring_paddr = ctx.msk_busaddr; /* Create DMA maps for Tx buffers. */ for (i = 0; i < MSK_TX_RING_CNT; i++) { txd = &sc_if->msk_cdata.msk_txdesc[i]; txd->tx_m = NULL; txd->tx_dmamap = NULL; error = bus_dmamap_create(sc_if->msk_cdata.msk_tx_tag, 0, &txd->tx_dmamap); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create Tx dmamap\n"); goto fail; } } /* Create DMA maps for Rx buffers. */ if ((error = bus_dmamap_create(sc_if->msk_cdata.msk_rx_tag, 0, &sc_if->msk_cdata.msk_rx_sparemap)) != 0) { device_printf(sc_if->msk_if_dev, "failed to create spare Rx dmamap\n"); goto fail; } for (i = 0; i < MSK_RX_RING_CNT; i++) { rxd = &sc_if->msk_cdata.msk_rxdesc[i]; rxd->rx_m = NULL; rxd->rx_dmamap = NULL; error = bus_dmamap_create(sc_if->msk_cdata.msk_rx_tag, 0, &rxd->rx_dmamap); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create Rx dmamap\n"); goto fail; } } fail: return (error); } static int msk_rx_dma_jalloc(struct msk_if_softc *sc_if) { struct msk_dmamap_arg ctx; struct msk_rxdesc *jrxd; bus_size_t rxalign; int error, i; if (jumbo_disable != 0 || (sc_if->msk_flags & MSK_FLAG_JUMBO) == 0) { sc_if->msk_flags &= ~MSK_FLAG_JUMBO; device_printf(sc_if->msk_if_dev, "disabling jumbo frame support\n"); return (0); } /* Create tag for jumbo Rx ring. */ error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */ MSK_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSK_JUMBO_RX_RING_SZ, /* maxsize */ 1, /* nsegments */ MSK_JUMBO_RX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_jumbo_rx_ring_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create jumbo Rx ring DMA tag\n"); goto jumbo_fail; } rxalign = 1; /* * Workaround hardware hang which seems to happen when Rx buffer * is not aligned on multiple of FIFO word(8 bytes). */ if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0) rxalign = MSK_RX_BUF_ALIGN; /* Create tag for jumbo Rx buffers. */ error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */ rxalign, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_jumbo_rx_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create jumbo Rx DMA tag\n"); goto jumbo_fail; } /* Allocate DMA'able memory and load the DMA map for jumbo Rx ring. */ error = bus_dmamem_alloc(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, (void **)&sc_if->msk_rdata.msk_jumbo_rx_ring, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc_if->msk_cdata.msk_jumbo_rx_ring_map); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to allocate DMA'able memory for jumbo Rx ring\n"); goto jumbo_fail; } ctx.msk_busaddr = 0; error = bus_dmamap_load(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_cdata.msk_jumbo_rx_ring_map, sc_if->msk_rdata.msk_jumbo_rx_ring, MSK_JUMBO_RX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to load DMA'able memory for jumbo Rx ring\n"); goto jumbo_fail; } sc_if->msk_rdata.msk_jumbo_rx_ring_paddr = ctx.msk_busaddr; /* Create DMA maps for jumbo Rx buffers. */ if ((error = bus_dmamap_create(sc_if->msk_cdata.msk_jumbo_rx_tag, 0, &sc_if->msk_cdata.msk_jumbo_rx_sparemap)) != 0) { device_printf(sc_if->msk_if_dev, "failed to create spare jumbo Rx dmamap\n"); goto jumbo_fail; } for (i = 0; i < MSK_JUMBO_RX_RING_CNT; i++) { jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[i]; jrxd->rx_m = NULL; jrxd->rx_dmamap = NULL; error = bus_dmamap_create(sc_if->msk_cdata.msk_jumbo_rx_tag, 0, &jrxd->rx_dmamap); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create jumbo Rx dmamap\n"); goto jumbo_fail; } } return (0); jumbo_fail: msk_rx_dma_jfree(sc_if); device_printf(sc_if->msk_if_dev, "disabling jumbo frame support " "due to resource shortage\n"); sc_if->msk_flags &= ~MSK_FLAG_JUMBO; return (error); } static void msk_txrx_dma_free(struct msk_if_softc *sc_if) { struct msk_txdesc *txd; struct msk_rxdesc *rxd; int i; /* Tx ring. */ if (sc_if->msk_cdata.msk_tx_ring_tag) { if (sc_if->msk_rdata.msk_tx_ring_paddr) bus_dmamap_unload(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_cdata.msk_tx_ring_map); if (sc_if->msk_rdata.msk_tx_ring) bus_dmamem_free(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_rdata.msk_tx_ring, sc_if->msk_cdata.msk_tx_ring_map); sc_if->msk_rdata.msk_tx_ring = NULL; sc_if->msk_rdata.msk_tx_ring_paddr = 0; bus_dma_tag_destroy(sc_if->msk_cdata.msk_tx_ring_tag); sc_if->msk_cdata.msk_tx_ring_tag = NULL; } /* Rx ring. */ if (sc_if->msk_cdata.msk_rx_ring_tag) { if (sc_if->msk_rdata.msk_rx_ring_paddr) bus_dmamap_unload(sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_cdata.msk_rx_ring_map); if (sc_if->msk_rdata.msk_rx_ring) bus_dmamem_free(sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_rdata.msk_rx_ring, sc_if->msk_cdata.msk_rx_ring_map); sc_if->msk_rdata.msk_rx_ring = NULL; sc_if->msk_rdata.msk_rx_ring_paddr = 0; bus_dma_tag_destroy(sc_if->msk_cdata.msk_rx_ring_tag); sc_if->msk_cdata.msk_rx_ring_tag = NULL; } /* Tx buffers. */ if (sc_if->msk_cdata.msk_tx_tag) { for (i = 0; i < MSK_TX_RING_CNT; i++) { txd = &sc_if->msk_cdata.msk_txdesc[i]; if (txd->tx_dmamap) { bus_dmamap_destroy(sc_if->msk_cdata.msk_tx_tag, txd->tx_dmamap); txd->tx_dmamap = NULL; } } bus_dma_tag_destroy(sc_if->msk_cdata.msk_tx_tag); sc_if->msk_cdata.msk_tx_tag = NULL; } /* Rx buffers. */ if (sc_if->msk_cdata.msk_rx_tag) { for (i = 0; i < MSK_RX_RING_CNT; i++) { rxd = &sc_if->msk_cdata.msk_rxdesc[i]; if (rxd->rx_dmamap) { bus_dmamap_destroy(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap); rxd->rx_dmamap = NULL; } } if (sc_if->msk_cdata.msk_rx_sparemap) { bus_dmamap_destroy(sc_if->msk_cdata.msk_rx_tag, sc_if->msk_cdata.msk_rx_sparemap); sc_if->msk_cdata.msk_rx_sparemap = 0; } bus_dma_tag_destroy(sc_if->msk_cdata.msk_rx_tag); sc_if->msk_cdata.msk_rx_tag = NULL; } if (sc_if->msk_cdata.msk_parent_tag) { bus_dma_tag_destroy(sc_if->msk_cdata.msk_parent_tag); sc_if->msk_cdata.msk_parent_tag = NULL; } } static void msk_rx_dma_jfree(struct msk_if_softc *sc_if) { struct msk_rxdesc *jrxd; int i; /* Jumbo Rx ring. */ if (sc_if->msk_cdata.msk_jumbo_rx_ring_tag) { if (sc_if->msk_rdata.msk_jumbo_rx_ring_paddr) bus_dmamap_unload(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_cdata.msk_jumbo_rx_ring_map); if (sc_if->msk_rdata.msk_jumbo_rx_ring) bus_dmamem_free(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_rdata.msk_jumbo_rx_ring, sc_if->msk_cdata.msk_jumbo_rx_ring_map); sc_if->msk_rdata.msk_jumbo_rx_ring = NULL; sc_if->msk_rdata.msk_jumbo_rx_ring_paddr = 0; bus_dma_tag_destroy(sc_if->msk_cdata.msk_jumbo_rx_ring_tag); sc_if->msk_cdata.msk_jumbo_rx_ring_tag = NULL; } /* Jumbo Rx buffers. */ if (sc_if->msk_cdata.msk_jumbo_rx_tag) { for (i = 0; i < MSK_JUMBO_RX_RING_CNT; i++) { jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[i]; if (jrxd->rx_dmamap) { bus_dmamap_destroy( sc_if->msk_cdata.msk_jumbo_rx_tag, jrxd->rx_dmamap); jrxd->rx_dmamap = NULL; } } if (sc_if->msk_cdata.msk_jumbo_rx_sparemap) { bus_dmamap_destroy(sc_if->msk_cdata.msk_jumbo_rx_tag, sc_if->msk_cdata.msk_jumbo_rx_sparemap); sc_if->msk_cdata.msk_jumbo_rx_sparemap = 0; } bus_dma_tag_destroy(sc_if->msk_cdata.msk_jumbo_rx_tag); sc_if->msk_cdata.msk_jumbo_rx_tag = NULL; } } static int msk_encap(struct msk_if_softc *sc_if, struct mbuf **m_head) { struct msk_txdesc *txd, *txd_last; struct msk_tx_desc *tx_le; struct mbuf *m; bus_dmamap_t map; bus_dma_segment_t txsegs[MSK_MAXTXSEGS]; uint32_t control, csum, prod, si; uint16_t offset, tcp_offset, tso_mtu; int error, i, nseg, tso; MSK_IF_LOCK_ASSERT(sc_if); tcp_offset = offset = 0; m = *m_head; if (((sc_if->msk_flags & MSK_FLAG_AUTOTX_CSUM) == 0 && (m->m_pkthdr.csum_flags & MSK_CSUM_FEATURES) != 0) || ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && (m->m_pkthdr.csum_flags & CSUM_TSO) != 0)) { /* * Since mbuf has no protocol specific structure information * in it we have to inspect protocol information here to * setup TSO and checksum offload. I don't know why Marvell * made a such decision in chip design because other GigE * hardwares normally takes care of all these chores in * hardware. However, TSO performance of Yukon II is very * good such that it's worth to implement it. */ struct ether_header *eh; struct ip *ip; struct tcphdr *tcp; if (M_WRITABLE(m) == 0) { /* Get a writable copy. */ m = m_dup(*m_head, M_NOWAIT); m_freem(*m_head); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } *m_head = m; } offset = sizeof(struct ether_header); m = m_pullup(m, offset); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } eh = mtod(m, struct ether_header *); /* Check if hardware VLAN insertion is off. */ if (eh->ether_type == htons(ETHERTYPE_VLAN)) { offset = sizeof(struct ether_vlan_header); m = m_pullup(m, offset); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } } m = m_pullup(m, offset + sizeof(struct ip)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m, char *) + offset); offset += (ip->ip_hl << 2); tcp_offset = offset; if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { m = m_pullup(m, offset + sizeof(struct tcphdr)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } tcp = (struct tcphdr *)(mtod(m, char *) + offset); offset += (tcp->th_off << 2); } else if ((sc_if->msk_flags & MSK_FLAG_AUTOTX_CSUM) == 0 && (m->m_pkthdr.len < MSK_MIN_FRAMELEN) && (m->m_pkthdr.csum_flags & CSUM_TCP) != 0) { /* * It seems that Yukon II has Tx checksum offload bug * for small TCP packets that's less than 60 bytes in * size (e.g. TCP window probe packet, pure ACK packet). * Common work around like padding with zeros to make * the frame minimum ethernet frame size didn't work at * all. * Instead of disabling checksum offload completely we * resort to S/W checksum routine when we encounter * short TCP frames. * Short UDP packets appear to be handled correctly by * Yukon II. Also I assume this bug does not happen on * controllers that use newer descriptor format or * automatic Tx checksum calculation. */ m = m_pullup(m, offset + sizeof(struct tcphdr)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } *(uint16_t *)(m->m_data + offset + m->m_pkthdr.csum_data) = in_cksum_skip(m, m->m_pkthdr.len, offset); m->m_pkthdr.csum_flags &= ~CSUM_TCP; } *m_head = m; } prod = sc_if->msk_cdata.msk_tx_prod; txd = &sc_if->msk_cdata.msk_txdesc[prod]; txd_last = txd; map = txd->tx_dmamap; error = bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_tx_tag, map, *m_head, txsegs, &nseg, BUS_DMA_NOWAIT); if (error == EFBIG) { m = m_collapse(*m_head, M_NOWAIT, MSK_MAXTXSEGS); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_tx_tag, map, *m_head, txsegs, &nseg, BUS_DMA_NOWAIT); if (error != 0) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); if (nseg == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } /* Check number of available descriptors. */ if (sc_if->msk_cdata.msk_tx_cnt + nseg >= (MSK_TX_RING_CNT - MSK_RESERVED_TX_DESC_CNT)) { bus_dmamap_unload(sc_if->msk_cdata.msk_tx_tag, map); return (ENOBUFS); } control = 0; tso = 0; tx_le = NULL; /* Check TSO support. */ if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0) tso_mtu = m->m_pkthdr.tso_segsz; else tso_mtu = offset + m->m_pkthdr.tso_segsz; if (tso_mtu != sc_if->msk_cdata.msk_tso_mtu) { tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(tso_mtu); if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0) tx_le->msk_control = htole32(OP_MSS | HW_OWNER); else tx_le->msk_control = htole32(OP_LRGLEN | HW_OWNER); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); sc_if->msk_cdata.msk_tso_mtu = tso_mtu; } tso++; } /* Check if we have a VLAN tag to insert. */ if ((m->m_flags & M_VLANTAG) != 0) { if (tx_le == NULL) { tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(0); tx_le->msk_control = htole32(OP_VLAN | HW_OWNER | htons(m->m_pkthdr.ether_vtag)); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); } else { tx_le->msk_control |= htole32(OP_VLAN | htons(m->m_pkthdr.ether_vtag)); } control |= INS_VLAN; } /* Check if we have to handle checksum offload. */ if (tso == 0 && (m->m_pkthdr.csum_flags & MSK_CSUM_FEATURES) != 0) { if ((sc_if->msk_flags & MSK_FLAG_AUTOTX_CSUM) != 0) control |= CALSUM; else { control |= CALSUM | WR_SUM | INIT_SUM | LOCK_SUM; if ((m->m_pkthdr.csum_flags & CSUM_UDP) != 0) control |= UDPTCP; /* Checksum write position. */ csum = (tcp_offset + m->m_pkthdr.csum_data) & 0xffff; /* Checksum start position. */ csum |= (uint32_t)tcp_offset << 16; if (csum != sc_if->msk_cdata.msk_last_csum) { tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(csum); tx_le->msk_control = htole32(1 << 16 | (OP_TCPLISW | HW_OWNER)); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); sc_if->msk_cdata.msk_last_csum = csum; } } } #ifdef MSK_64BIT_DMA if (MSK_ADDR_HI(txsegs[0].ds_addr) != sc_if->msk_cdata.msk_tx_high_addr) { sc_if->msk_cdata.msk_tx_high_addr = MSK_ADDR_HI(txsegs[0].ds_addr); tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(MSK_ADDR_HI(txsegs[0].ds_addr)); tx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); } #endif si = prod; tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(MSK_ADDR_LO(txsegs[0].ds_addr)); if (tso == 0) tx_le->msk_control = htole32(txsegs[0].ds_len | control | OP_PACKET); else tx_le->msk_control = htole32(txsegs[0].ds_len | control | OP_LARGESEND); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); for (i = 1; i < nseg; i++) { tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; #ifdef MSK_64BIT_DMA if (MSK_ADDR_HI(txsegs[i].ds_addr) != sc_if->msk_cdata.msk_tx_high_addr) { sc_if->msk_cdata.msk_tx_high_addr = MSK_ADDR_HI(txsegs[i].ds_addr); tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(MSK_ADDR_HI(txsegs[i].ds_addr)); tx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; } #endif tx_le->msk_addr = htole32(MSK_ADDR_LO(txsegs[i].ds_addr)); tx_le->msk_control = htole32(txsegs[i].ds_len | control | OP_BUFFER | HW_OWNER); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); } /* Update producer index. */ sc_if->msk_cdata.msk_tx_prod = prod; /* Set EOP on the last descriptor. */ prod = (prod + MSK_TX_RING_CNT - 1) % MSK_TX_RING_CNT; tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_control |= htole32(EOP); /* Turn the first descriptor ownership to hardware. */ tx_le = &sc_if->msk_rdata.msk_tx_ring[si]; tx_le->msk_control |= htole32(HW_OWNER); txd = &sc_if->msk_cdata.msk_txdesc[prod]; map = txd_last->tx_dmamap; txd_last->tx_dmamap = txd->tx_dmamap; txd->tx_dmamap = map; txd->tx_m = m; /* Sync descriptors. */ bus_dmamap_sync(sc_if->msk_cdata.msk_tx_tag, map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_cdata.msk_tx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } static int msk_transmit(if_t ifp, struct mbuf *m) { struct msk_if_softc *sc_if; int error; if ((error = if_snd_enqueue(ifp, m)) != 0) return (error); sc_if = if_getsoftc(ifp, IF_DRIVER_SOFTC); MSK_IF_LOCK(sc_if); error = msk_start(sc_if); MSK_IF_UNLOCK(sc_if); return (error); } static int msk_start(struct msk_if_softc *sc_if) { struct mbuf *m; int error, enq; MSK_IF_LOCK_ASSERT(sc_if); if ((sc_if->msk_flags & MSK_FLAG_LINK) == 0) return (ENETDOWN); error = enq = 0; while (sc_if->msk_cdata.msk_tx_cnt < (MSK_TX_RING_CNT - MSK_RESERVED_TX_DESC_CNT) && (m = if_snd_dequeue(sc_if->msk_ifp)) != NULL) { if ((error = msk_encap(sc_if, &m)) != 0) { if (m == NULL) break; if_snd_prepend(sc_if->msk_ifp, m); break; } enq++; if_mtap(sc_if->msk_ifp, m, NULL, 0); } if (enq > 0) { /* Transmit */ CSR_WRITE_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_txq, PREF_UNIT_PUT_IDX_REG), sc_if->msk_cdata.msk_tx_prod); /* Set a timeout in case the chip goes out to lunch. */ sc_if->msk_watchdog_timer = MSK_TX_TIMEOUT; } return (0); } static void msk_watchdog(struct msk_if_softc *sc_if) { if_t ifp; MSK_IF_LOCK_ASSERT(sc_if); if (sc_if->msk_watchdog_timer == 0 || --sc_if->msk_watchdog_timer) return; ifp = sc_if->msk_ifp; if ((sc_if->msk_flags & MSK_FLAG_LINK) == 0) { if (bootverbose) if_printf(sc_if->msk_ifp, "watchdog timeout " "(missed link)\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); sc_if->msk_flags &= ~MSK_FLAG_RUNNING; msk_init(sc_if); return; } if_printf(ifp, "watchdog timeout\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); sc_if->msk_flags &= ~MSK_FLAG_RUNNING; msk_init(sc_if); msk_start(sc_if); } static int mskc_shutdown(device_t dev) { struct msk_softc *sc; int i; sc = device_get_softc(dev); MSK_LOCK(sc); for (i = 0; i < sc->msk_num_port; i++) { if (sc->msk_if[i] != NULL && ((sc->msk_if[i]->msk_flags & MSK_FLAG_RUNNING) != 0)) msk_stop(sc->msk_if[i]); } MSK_UNLOCK(sc); /* Put hardware reset. */ CSR_WRITE_2(sc, B0_CTST, CS_RST_SET); return (0); } static int mskc_suspend(device_t dev) { struct msk_softc *sc; int i; sc = device_get_softc(dev); MSK_LOCK(sc); for (i = 0; i < sc->msk_num_port; i++) { if (sc->msk_if[i] != NULL && ((sc->msk_if[i]->msk_flags & MSK_FLAG_RUNNING) != 0)) msk_stop(sc->msk_if[i]); } /* Disable all interrupts. */ CSR_WRITE_4(sc, B0_IMSK, 0); CSR_READ_4(sc, B0_IMSK); CSR_WRITE_4(sc, B0_HWE_IMSK, 0); CSR_READ_4(sc, B0_HWE_IMSK); msk_phy_power(sc, MSK_PHY_POWERDOWN); /* Put hardware reset. */ CSR_WRITE_2(sc, B0_CTST, CS_RST_SET); sc->msk_pflags |= MSK_FLAG_SUSPEND; MSK_UNLOCK(sc); return (0); } static int mskc_resume(device_t dev) { struct msk_softc *sc; int i; sc = device_get_softc(dev); MSK_LOCK(sc); CSR_PCI_WRITE_4(sc, PCI_OUR_REG_3, 0); mskc_reset(sc); for (i = 0; i < sc->msk_num_port; i++) { if (sc->msk_if[i] != NULL && sc->msk_if[i]->msk_ifp != NULL && (sc->msk_if[i]->msk_if_flags & IFF_UP)) { sc->msk_if[i]->msk_flags &= ~MSK_FLAG_RUNNING; msk_init(sc->msk_if[i]); } } sc->msk_pflags &= ~MSK_FLAG_SUSPEND; MSK_UNLOCK(sc); return (0); } #ifndef __NO_STRICT_ALIGNMENT static __inline void msk_fixup_rx(struct mbuf *m) { int i; uint16_t *src, *dst; src = mtod(m, uint16_t *); dst = src - 3; for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++) *dst++ = *src++; m->m_data -= (MSK_RX_BUF_ALIGN - ETHER_ALIGN); } #endif static __inline void msk_rxcsum(struct msk_if_softc *sc_if, uint32_t control, struct mbuf *m) { struct ether_header *eh; struct ip *ip; struct udphdr *uh; int32_t hlen, len, pktlen, temp32; uint16_t csum, *opts; if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0) { if ((control & (CSS_IPV4 | CSS_IPFRAG)) == CSS_IPV4) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if ((control & CSS_IPV4_CSUM_OK) != 0) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; if ((control & (CSS_TCP | CSS_UDP)) != 0 && (control & (CSS_TCPUDP_CSUM_OK)) != 0) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } } return; } /* * Marvell Yukon controllers that support OP_RXCHKS has known * to have various Rx checksum offloading bugs. These * controllers can be configured to compute simple checksum * at two different positions. So we can compute IP and TCP/UDP * checksum at the same time. We intentionally have controller * compute TCP/UDP checksum twice by specifying the same * checksum start position and compare the result. If the value * is different it would indicate the hardware logic was wrong. */ if ((sc_if->msk_csum & 0xFFFF) != (sc_if->msk_csum >> 16)) { if (bootverbose) device_printf(sc_if->msk_if_dev, "Rx checksum value mismatch!\n"); return; } pktlen = m->m_pkthdr.len; if (pktlen < sizeof(struct ether_header) + sizeof(struct ip)) return; eh = mtod(m, struct ether_header *); if (eh->ether_type != htons(ETHERTYPE_IP)) return; ip = (struct ip *)(eh + 1); if (ip->ip_v != IPVERSION) return; hlen = ip->ip_hl << 2; pktlen -= sizeof(struct ether_header); if (hlen < sizeof(struct ip)) return; if (ntohs(ip->ip_len) < hlen) return; if (ntohs(ip->ip_len) != pktlen) return; if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) return; /* can't handle fragmented packet. */ switch (ip->ip_p) { case IPPROTO_TCP: if (pktlen < (hlen + sizeof(struct tcphdr))) return; break; case IPPROTO_UDP: if (pktlen < (hlen + sizeof(struct udphdr))) return; uh = (struct udphdr *)((caddr_t)ip + hlen); if (uh->uh_sum == 0) return; /* no checksum */ break; default: return; } csum = bswap16(sc_if->msk_csum & 0xFFFF); /* Checksum fixup for IP options. */ len = hlen - sizeof(struct ip); if (len > 0) { opts = (uint16_t *)(ip + 1); for (; len > 0; len -= sizeof(uint16_t), opts++) { temp32 = csum - *opts; temp32 = (temp32 >> 16) + (temp32 & 65535); csum = temp32 & 65535; } } m->m_pkthdr.csum_flags |= CSUM_DATA_VALID; m->m_pkthdr.csum_data = csum; } static void msk_rxeof(struct msk_if_softc *sc_if, uint32_t status, uint32_t control, int len) { struct mbuf *m; if_t ifp; struct msk_rxdesc *rxd; int cons, rxlen; ifp = sc_if->msk_ifp; MSK_IF_LOCK_ASSERT(sc_if); cons = sc_if->msk_cdata.msk_rx_cons; do { rxlen = status >> 16; if ((status & GMR_FS_VLAN) != 0 && (sc_if->msk_capenable & IFCAP_VLAN_HWTAGGING) != 0) rxlen -= ETHER_VLAN_ENCAP_LEN; if ((sc_if->msk_flags & MSK_FLAG_NORXCHK) != 0) { /* * For controllers that returns bogus status code * just do minimal check and let upper stack * handle this frame. */ if (len > MSK_MAX_FRAMELEN || len < ETHER_HDR_LEN) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); msk_discard_rxbuf(sc_if, cons); break; } } else if (len > sc_if->msk_framesize || ((status & GMR_FS_ANY_ERR) != 0) || ((status & GMR_FS_RX_OK) == 0) || (rxlen != len)) { /* Don't count flow-control packet as errors. */ if ((status & GMR_FS_GOOD_FC) == 0) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); msk_discard_rxbuf(sc_if, cons); break; } #ifdef MSK_64BIT_DMA rxd = &sc_if->msk_cdata.msk_rxdesc[(cons + 1) % MSK_RX_RING_CNT]; #else rxd = &sc_if->msk_cdata.msk_rxdesc[cons]; #endif m = rxd->rx_m; if (msk_newbuf(sc_if, cons) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); /* Reuse old buffer. */ msk_discard_rxbuf(sc_if, cons); break; } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = len; #ifndef __NO_STRICT_ALIGNMENT if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0) msk_fixup_rx(m); #endif if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if ((sc_if->msk_capenable & IFCAP_RXCSUM) != 0) msk_rxcsum(sc_if, control, m); /* Check for VLAN tagged packets. */ if ((status & GMR_FS_VLAN) != 0 && (sc_if->msk_capenable & IFCAP_VLAN_HWTAGGING) != 0) { m->m_pkthdr.ether_vtag = sc_if->msk_vtag; m->m_flags |= M_VLANTAG; } MSK_IF_UNLOCK(sc_if); if_input(ifp, m); MSK_IF_LOCK(sc_if); } while (0); MSK_RX_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT); MSK_RX_INC(sc_if->msk_cdata.msk_rx_prod, MSK_RX_RING_CNT); } static void msk_jumbo_rxeof(struct msk_if_softc *sc_if, uint32_t status, uint32_t control, int len) { struct mbuf *m; if_t ifp; struct msk_rxdesc *jrxd; int cons, rxlen; ifp = sc_if->msk_ifp; MSK_IF_LOCK_ASSERT(sc_if); cons = sc_if->msk_cdata.msk_rx_cons; do { rxlen = status >> 16; if ((status & GMR_FS_VLAN) != 0 && (sc_if->msk_capenable & IFCAP_VLAN_HWTAGGING) != 0) rxlen -= ETHER_VLAN_ENCAP_LEN; if (len > sc_if->msk_framesize || ((status & GMR_FS_ANY_ERR) != 0) || ((status & GMR_FS_RX_OK) == 0) || (rxlen != len)) { /* Don't count flow-control packet as errors. */ if ((status & GMR_FS_GOOD_FC) == 0) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); msk_discard_jumbo_rxbuf(sc_if, cons); break; } #ifdef MSK_64BIT_DMA jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[(cons + 1) % MSK_JUMBO_RX_RING_CNT]; #else jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[cons]; #endif m = jrxd->rx_m; if (msk_jumbo_newbuf(sc_if, cons) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); /* Reuse old buffer. */ msk_discard_jumbo_rxbuf(sc_if, cons); break; } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = len; #ifndef __NO_STRICT_ALIGNMENT if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0) msk_fixup_rx(m); #endif if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if (sc_if->msk_capenable & IFCAP_RXCSUM) msk_rxcsum(sc_if, control, m); /* Check for VLAN tagged packets. */ if ((status & GMR_FS_VLAN) != 0 && (sc_if->msk_capenable & IFCAP_VLAN_HWTAGGING) != 0) { m->m_pkthdr.ether_vtag = sc_if->msk_vtag; m->m_flags |= M_VLANTAG; } MSK_IF_UNLOCK(sc_if); if_input(ifp, m); MSK_IF_LOCK(sc_if); } while (0); MSK_RX_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT); MSK_RX_INC(sc_if->msk_cdata.msk_rx_prod, MSK_JUMBO_RX_RING_CNT); } static void msk_txeof(struct msk_if_softc *sc_if, int idx) { struct msk_txdesc *txd; struct msk_tx_desc *cur_tx; if_t ifp; uint32_t control; int cons, prog; MSK_IF_LOCK_ASSERT(sc_if); ifp = sc_if->msk_ifp; bus_dmamap_sync(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_cdata.msk_tx_ring_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * Go through our tx ring and free mbufs for those * frames that have been sent. */ cons = sc_if->msk_cdata.msk_tx_cons; prog = 0; for (; cons != idx; MSK_INC(cons, MSK_TX_RING_CNT)) { if (sc_if->msk_cdata.msk_tx_cnt <= 0) break; prog++; cur_tx = &sc_if->msk_rdata.msk_tx_ring[cons]; control = le32toh(cur_tx->msk_control); sc_if->msk_cdata.msk_tx_cnt--; if ((control & EOP) == 0) continue; txd = &sc_if->msk_cdata.msk_txdesc[cons]; bus_dmamap_sync(sc_if->msk_cdata.msk_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc_if->msk_cdata.msk_tx_tag, txd->tx_dmamap); if_inc_txcounters(ifp, txd->tx_m); m_freem(txd->tx_m); txd->tx_m = NULL; } if (prog > 0) { sc_if->msk_cdata.msk_tx_cons = cons; if (sc_if->msk_cdata.msk_tx_cnt == 0) sc_if->msk_watchdog_timer = 0; /* No need to sync LEs as we didn't update LEs. */ } } static void msk_tick(void *xsc_if) { struct msk_if_softc *sc_if; struct mii_data *mii; sc_if = xsc_if; MSK_IF_LOCK_ASSERT(sc_if); mii = device_get_softc(sc_if->msk_miibus); mii_tick(mii); if ((sc_if->msk_flags & MSK_FLAG_LINK) == 0) msk_miibus_statchg(sc_if->msk_if_dev); msk_handle_events(sc_if->msk_softc); msk_watchdog(sc_if); callout_reset(&sc_if->msk_tick_ch, hz, msk_tick, sc_if); } static void msk_intr_phy(struct msk_if_softc *sc_if) { uint16_t status; msk_phy_readreg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_STAT); status = msk_phy_readreg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_STAT); /* Handle FIFO Underrun/Overflow? */ if ((status & PHY_M_IS_FIFO_ERROR)) device_printf(sc_if->msk_if_dev, "PHY FIFO underrun/overflow.\n"); } static void msk_intr_gmac(struct msk_if_softc *sc_if) { struct msk_softc *sc; uint8_t status; sc = sc_if->msk_softc; status = CSR_READ_1(sc, MR_ADDR(sc_if->msk_port, GMAC_IRQ_SRC)); /* GMAC Rx FIFO overrun. */ if ((status & GM_IS_RX_FF_OR) != 0) CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_CLI_RX_FO); /* GMAC Tx FIFO underrun. */ if ((status & GM_IS_TX_FF_UR) != 0) { CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_CLI_TX_FU); device_printf(sc_if->msk_if_dev, "Tx FIFO underrun!\n"); /* * XXX * In case of Tx underrun, we may need to flush/reset * Tx MAC but that would also require resynchronization * with status LEs. Reinitializing status LEs would * affect other port in dual MAC configuration so it * should be avoided as possible as we can. * Due to lack of documentation it's all vague guess but * it needs more investigation. */ } } static void msk_handle_hwerr(struct msk_if_softc *sc_if, uint32_t status) { struct msk_softc *sc; sc = sc_if->msk_softc; if ((status & Y2_IS_PAR_RD1) != 0) { device_printf(sc_if->msk_if_dev, "RAM buffer read parity error\n"); /* Clear IRQ. */ CSR_WRITE_2(sc, SELECT_RAM_BUFFER(sc_if->msk_port, B3_RI_CTRL), RI_CLR_RD_PERR); } if ((status & Y2_IS_PAR_WR1) != 0) { device_printf(sc_if->msk_if_dev, "RAM buffer write parity error\n"); /* Clear IRQ. */ CSR_WRITE_2(sc, SELECT_RAM_BUFFER(sc_if->msk_port, B3_RI_CTRL), RI_CLR_WR_PERR); } if ((status & Y2_IS_PAR_MAC1) != 0) { device_printf(sc_if->msk_if_dev, "Tx MAC parity error\n"); /* Clear IRQ. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_CLI_TX_PE); } if ((status & Y2_IS_PAR_RX1) != 0) { device_printf(sc_if->msk_if_dev, "Rx parity error\n"); /* Clear IRQ. */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_CLR_IRQ_PAR); } if ((status & (Y2_IS_TCP_TXS1 | Y2_IS_TCP_TXA1)) != 0) { device_printf(sc_if->msk_if_dev, "TCP segmentation error\n"); /* Clear IRQ. */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_CLR_IRQ_TCP); } } static void msk_intr_hwerr(struct msk_softc *sc) { uint32_t status; uint32_t tlphead[4]; status = CSR_READ_4(sc, B0_HWE_ISRC); /* Time Stamp timer overflow. */ if ((status & Y2_IS_TIST_OV) != 0) CSR_WRITE_1(sc, GMAC_TI_ST_CTRL, GMT_ST_CLR_IRQ); if ((status & Y2_IS_PCI_NEXP) != 0) { /* * PCI Express Error occured which is not described in PEX * spec. * This error is also mapped either to Master Abort( * Y2_IS_MST_ERR) or Target Abort (Y2_IS_IRQ_STAT) bit and * can only be cleared there. */ device_printf(sc->msk_dev, "PCI Express protocol violation error\n"); } if ((status & (Y2_IS_MST_ERR | Y2_IS_IRQ_STAT)) != 0) { uint16_t v16; if ((status & Y2_IS_MST_ERR) != 0) device_printf(sc->msk_dev, "unexpected IRQ Status error\n"); else device_printf(sc->msk_dev, "unexpected IRQ Master error\n"); /* Reset all bits in the PCI status register. */ v16 = pci_read_config(sc->msk_dev, PCIR_STATUS, 2); CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_ON); pci_write_config(sc->msk_dev, PCIR_STATUS, v16 | PCIM_STATUS_PERR | PCIM_STATUS_SERR | PCIM_STATUS_RMABORT | PCIM_STATUS_RTABORT | PCIM_STATUS_MDPERR, 2); CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF); } /* Check for PCI Express Uncorrectable Error. */ if ((status & Y2_IS_PCI_EXP) != 0) { uint32_t v32; /* * On PCI Express bus bridges are called root complexes (RC). * PCI Express errors are recognized by the root complex too, * which requests the system to handle the problem. After * error occurrence it may be that no access to the adapter * may be performed any longer. */ v32 = CSR_PCI_READ_4(sc, PEX_UNC_ERR_STAT); if ((v32 & PEX_UNSUP_REQ) != 0) { /* Ignore unsupported request error. */ device_printf(sc->msk_dev, "Uncorrectable PCI Express error\n"); } if ((v32 & (PEX_FATAL_ERRORS | PEX_POIS_TLP)) != 0) { int i; /* Get TLP header form Log Registers. */ for (i = 0; i < 4; i++) tlphead[i] = CSR_PCI_READ_4(sc, PEX_HEADER_LOG + i * 4); /* Check for vendor defined broadcast message. */ if (!(tlphead[0] == 0x73004001 && tlphead[1] == 0x7f)) { sc->msk_intrhwemask &= ~Y2_IS_PCI_EXP; CSR_WRITE_4(sc, B0_HWE_IMSK, sc->msk_intrhwemask); CSR_READ_4(sc, B0_HWE_IMSK); } } /* Clear the interrupt. */ CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_ON); CSR_PCI_WRITE_4(sc, PEX_UNC_ERR_STAT, 0xffffffff); CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF); } if ((status & Y2_HWE_L1_MASK) != 0 && sc->msk_if[MSK_PORT_A] != NULL) msk_handle_hwerr(sc->msk_if[MSK_PORT_A], status); if ((status & Y2_HWE_L2_MASK) != 0 && sc->msk_if[MSK_PORT_B] != NULL) msk_handle_hwerr(sc->msk_if[MSK_PORT_B], status >> 8); } static __inline void msk_rxput(struct msk_if_softc *sc_if) { struct msk_softc *sc; sc = sc_if->msk_softc; if (sc_if->msk_framesize > (MCLBYTES - MSK_RX_BUF_ALIGN)) bus_dmamap_sync( sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_cdata.msk_jumbo_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); else bus_dmamap_sync( sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_cdata.msk_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CSR_WRITE_2(sc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), sc_if->msk_cdata.msk_rx_prod); } static int msk_handle_events(struct msk_softc *sc) { struct msk_if_softc *sc_if; int rxput[2]; struct msk_stat_desc *sd; uint32_t control, status; int cons, len, port, rxprog; if (sc->msk_stat_cons == CSR_READ_2(sc, STAT_PUT_IDX)) return (0); /* Sync status LEs. */ bus_dmamap_sync(sc->msk_stat_tag, sc->msk_stat_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); rxput[MSK_PORT_A] = rxput[MSK_PORT_B] = 0; rxprog = 0; cons = sc->msk_stat_cons; for (;;) { sd = &sc->msk_stat_ring[cons]; control = le32toh(sd->msk_control); if ((control & HW_OWNER) == 0) break; control &= ~HW_OWNER; sd->msk_control = htole32(control); status = le32toh(sd->msk_status); len = control & STLE_LEN_MASK; port = (control >> 16) & 0x01; sc_if = sc->msk_if[port]; if (sc_if == NULL) { device_printf(sc->msk_dev, "invalid port opcode " "0x%08x\n", control & STLE_OP_MASK); continue; } switch (control & STLE_OP_MASK) { case OP_RXVLAN: sc_if->msk_vtag = ntohs(len); break; case OP_RXCHKSVLAN: sc_if->msk_vtag = ntohs(len); /* FALLTHROUGH */ case OP_RXCHKS: sc_if->msk_csum = status; break; case OP_RXSTAT: if (!(sc_if->msk_flags & MSK_FLAG_RUNNING)) break; if (sc_if->msk_framesize > (MCLBYTES - MSK_RX_BUF_ALIGN)) msk_jumbo_rxeof(sc_if, status, control, len); else msk_rxeof(sc_if, status, control, len); rxprog++; /* * Because there is no way to sync single Rx LE * put the DMA sync operation off until the end of * event processing. */ rxput[port]++; /* Update prefetch unit if we've passed water mark. */ if (rxput[port] >= sc_if->msk_cdata.msk_rx_putwm) { msk_rxput(sc_if); rxput[port] = 0; } break; case OP_TXINDEXLE: if (sc->msk_if[MSK_PORT_A] != NULL) msk_txeof(sc->msk_if[MSK_PORT_A], status & STLE_TXA1_MSKL); if (sc->msk_if[MSK_PORT_B] != NULL) msk_txeof(sc->msk_if[MSK_PORT_B], ((status & STLE_TXA2_MSKL) >> STLE_TXA2_SHIFTL) | ((len & STLE_TXA2_MSKH) << STLE_TXA2_SHIFTH)); break; default: device_printf(sc->msk_dev, "unhandled opcode 0x%08x\n", control & STLE_OP_MASK); break; } MSK_INC(cons, sc->msk_stat_count); if (rxprog > sc->msk_process_limit) break; } sc->msk_stat_cons = cons; bus_dmamap_sync(sc->msk_stat_tag, sc->msk_stat_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (rxput[MSK_PORT_A] > 0) msk_rxput(sc->msk_if[MSK_PORT_A]); if (rxput[MSK_PORT_B] > 0) msk_rxput(sc->msk_if[MSK_PORT_B]); return (sc->msk_stat_cons != CSR_READ_2(sc, STAT_PUT_IDX)); } static void msk_intr(void *xsc) { struct msk_softc *sc; struct msk_if_softc *sc_if0, *sc_if1; uint32_t status; int domore; sc = xsc; MSK_LOCK(sc); /* Reading B0_Y2_SP_ISRC2 masks further interrupts. */ status = CSR_READ_4(sc, B0_Y2_SP_ISRC2); if (status == 0 || status == 0xffffffff || (sc->msk_pflags & MSK_FLAG_SUSPEND) != 0 || (status & sc->msk_intrmask) == 0) { CSR_WRITE_4(sc, B0_Y2_SP_ICR, 2); MSK_UNLOCK(sc); return; } sc_if0 = sc->msk_if[MSK_PORT_A]; sc_if1 = sc->msk_if[MSK_PORT_B]; if ((status & Y2_IS_IRQ_PHY1) != 0 && sc_if0 != NULL) msk_intr_phy(sc_if0); if ((status & Y2_IS_IRQ_PHY2) != 0 && sc_if1 != NULL) msk_intr_phy(sc_if1); if ((status & Y2_IS_IRQ_MAC1) != 0 && sc_if0 != NULL) msk_intr_gmac(sc_if0); if ((status & Y2_IS_IRQ_MAC2) != 0 && sc_if1 != NULL) msk_intr_gmac(sc_if1); if ((status & (Y2_IS_CHK_RX1 | Y2_IS_CHK_RX2)) != 0) { device_printf(sc->msk_dev, "Rx descriptor error\n"); sc->msk_intrmask &= ~(Y2_IS_CHK_RX1 | Y2_IS_CHK_RX2); CSR_WRITE_4(sc, B0_IMSK, sc->msk_intrmask); CSR_READ_4(sc, B0_IMSK); } if ((status & (Y2_IS_CHK_TXA1 | Y2_IS_CHK_TXA2)) != 0) { device_printf(sc->msk_dev, "Tx descriptor error\n"); sc->msk_intrmask &= ~(Y2_IS_CHK_TXA1 | Y2_IS_CHK_TXA2); CSR_WRITE_4(sc, B0_IMSK, sc->msk_intrmask); CSR_READ_4(sc, B0_IMSK); } if ((status & Y2_IS_HW_ERR) != 0) msk_intr_hwerr(sc); domore = msk_handle_events(sc); if ((status & Y2_IS_STAT_BMU) != 0 && domore == 0) CSR_WRITE_4(sc, STAT_CTRL, SC_STAT_CLR_IRQ); /* Reenable interrupts. */ CSR_WRITE_4(sc, B0_Y2_SP_ICR, 2); if (sc_if0 != NULL && (sc_if0->msk_flags & MSK_FLAG_RUNNING) != 0) msk_start(sc_if0); if (sc_if1 != NULL && (sc_if1->msk_flags & MSK_FLAG_RUNNING) != 0) msk_start(sc_if1); MSK_UNLOCK(sc); } static void msk_set_tx_stfwd(struct msk_if_softc *sc_if) { struct msk_softc *sc; if_t ifp; ifp = sc_if->msk_ifp; sc = sc_if->msk_softc; if ((sc->msk_hw_id == CHIP_ID_YUKON_EX && sc->msk_hw_rev != CHIP_REV_YU_EX_A0) || sc->msk_hw_id >= CHIP_ID_YUKON_SUPR) { CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), TX_STFW_ENA); } else { if (sc_if->msk_framesize > MSK_DEFAULT_FRAMESIZE) { /* Set Tx GMAC FIFO Almost Empty Threshold. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_AE_THR), MSK_ECU_JUMBO_WM << 16 | MSK_ECU_AE_THR); /* Disable Store & Forward mode for Tx. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), TX_STFW_DIS); } else { CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), TX_STFW_ENA); } } } static void msk_init(struct msk_if_softc *sc_if) { struct msk_softc *sc; if_t ifp; struct mii_data *mii; uint8_t *eaddr; uint16_t gmac; uint32_t reg; int error; MSK_IF_LOCK_ASSERT(sc_if); ifp = sc_if->msk_ifp; sc = sc_if->msk_softc; mii = device_get_softc(sc_if->msk_miibus); if ((sc_if->msk_flags & MSK_FLAG_RUNNING) != 0) return; error = 0; /* Cancel pending I/O and free all Rx/Tx buffers. */ msk_stop(sc_if); /* GMAC Control reset. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_RST_SET); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_RST_CLR); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_F_LOOPB_OFF); if (sc->msk_hw_id == CHIP_ID_YUKON_EX || sc->msk_hw_id == CHIP_ID_YUKON_SUPR) CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_BYP_MACSECRX_ON | GMC_BYP_MACSECTX_ON | GMC_BYP_RETR_ON); /* * Initialize GMAC first such that speed/duplex/flow-control * parameters are renegotiated when interface is brought up. */ GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, 0); /* Dummy read the Interrupt Source Register. */ CSR_READ_1(sc, MR_ADDR(sc_if->msk_port, GMAC_IRQ_SRC)); /* Clear MIB stats. */ msk_stats_clear(sc_if); /* Disable FCS. */ GMAC_WRITE_2(sc, sc_if->msk_port, GM_RX_CTRL, GM_RXCR_CRC_DIS); /* Setup Transmit Control Register. */ GMAC_WRITE_2(sc, sc_if->msk_port, GM_TX_CTRL, TX_COL_THR(TX_COL_DEF)); /* Setup Transmit Flow Control Register. */ GMAC_WRITE_2(sc, sc_if->msk_port, GM_TX_FLOW_CTRL, 0xffff); /* Setup Transmit Parameter Register. */ GMAC_WRITE_2(sc, sc_if->msk_port, GM_TX_PARAM, TX_JAM_LEN_VAL(TX_JAM_LEN_DEF) | TX_JAM_IPG_VAL(TX_JAM_IPG_DEF) | TX_IPG_JAM_DATA(TX_IPG_JAM_DEF) | TX_BACK_OFF_LIM(TX_BOF_LIM_DEF)); gmac = DATA_BLIND_VAL(DATA_BLIND_DEF) | GM_SMOD_VLAN_ENA | IPG_DATA_VAL(IPG_DATA_DEF); if (sc_if->msk_framesize > MSK_DEFAULT_FRAMESIZE) gmac |= GM_SMOD_JUMBO_ENA; GMAC_WRITE_2(sc, sc_if->msk_port, GM_SERIAL_MODE, gmac); /* Set station address. */ eaddr = if_lladdr(ifp); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_1L, eaddr[0] | (eaddr[1] << 8)); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_1M, eaddr[2] | (eaddr[3] << 8)); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_1H, eaddr[4] | (eaddr[5] << 8)); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_2L, eaddr[0] | (eaddr[1] << 8)); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_2M, eaddr[2] | (eaddr[3] << 8)); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_2H, eaddr[4] | (eaddr[5] << 8)); /* Disable interrupts for counter overflows. */ GMAC_WRITE_2(sc, sc_if->msk_port, GM_TX_IRQ_MSK, 0); GMAC_WRITE_2(sc, sc_if->msk_port, GM_RX_IRQ_MSK, 0); GMAC_WRITE_2(sc, sc_if->msk_port, GM_TR_IRQ_MSK, 0); /* Configure Rx MAC FIFO. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_RST_SET); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_RST_CLR); reg = GMF_OPER_ON | GMF_RX_F_FL_ON; if (sc->msk_hw_id == CHIP_ID_YUKON_FE_P || sc->msk_hw_id == CHIP_ID_YUKON_EX) reg |= GMF_RX_OVER_ON; CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), reg); /* Set receive filter. */ msk_rxfilter(sc_if); if (sc->msk_hw_id == CHIP_ID_YUKON_XL) { /* Clear flush mask - HW bug. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_FL_MSK), 0); } else { /* Flush Rx MAC FIFO on any flow control or error. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_FL_MSK), GMR_FS_ANY_ERR); } /* * Set Rx FIFO flush threshold to 64 bytes + 1 FIFO word * due to hardware hang on receipt of pause frames. */ reg = RX_GMF_FL_THR_DEF + 1; /* Another magic for Yukon FE+ - From Linux. */ if (sc->msk_hw_id == CHIP_ID_YUKON_FE_P && sc->msk_hw_rev == CHIP_REV_YU_FE_P_A0) reg = 0x178; CSR_WRITE_2(sc, MR_ADDR(sc_if->msk_port, RX_GMF_FL_THR), reg); /* Configure Tx MAC FIFO. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_RST_SET); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_RST_CLR); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_OPER_ON); /* Configure hardware VLAN tag insertion/stripping. */ msk_setvlan(sc_if); if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0) { /* Set Rx Pause threshold. */ CSR_WRITE_2(sc, MR_ADDR(sc_if->msk_port, RX_GMF_LP_THR), MSK_ECU_LLPP); CSR_WRITE_2(sc, MR_ADDR(sc_if->msk_port, RX_GMF_UP_THR), MSK_ECU_ULPP); /* Configure store-and-forward for Tx. */ msk_set_tx_stfwd(sc_if); } if (sc->msk_hw_id == CHIP_ID_YUKON_FE_P && sc->msk_hw_rev == CHIP_REV_YU_FE_P_A0) { /* Disable dynamic watermark - from Linux. */ reg = CSR_READ_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_EA)); reg &= ~0x03; CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_EA), reg); } /* * Disable Force Sync bit and Alloc bit in Tx RAM interface * arbiter as we don't use Sync Tx queue. */ CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, TXA_CTRL), TXA_DIS_FSYNC | TXA_DIS_ALLOC | TXA_STOP_RC); /* Enable the RAM Interface Arbiter. */ CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, TXA_CTRL), TXA_ENA_ARB); /* Setup RAM buffer. */ msk_set_rambuffer(sc_if); /* Disable Tx sync Queue. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txsq, RB_CTRL), RB_RST_SET); /* Setup Tx Queue Bus Memory Interface. */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_CLR_RESET); CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_OPER_INIT); CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_FIFO_OP_ON); CSR_WRITE_2(sc, Q_ADDR(sc_if->msk_txq, Q_WM), MSK_BMU_TX_WM); switch (sc->msk_hw_id) { case CHIP_ID_YUKON_EC_U: if (sc->msk_hw_rev == CHIP_REV_YU_EC_U_A0) { /* Fix for Yukon-EC Ultra: set BMU FIFO level */ CSR_WRITE_2(sc, Q_ADDR(sc_if->msk_txq, Q_AL), MSK_ECU_TXFF_LEV); } break; case CHIP_ID_YUKON_EX: /* * Yukon Extreme seems to have silicon bug for * automatic Tx checksum calculation capability. */ if (sc->msk_hw_rev == CHIP_REV_YU_EX_B0) CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_F), F_TX_CHK_AUTO_OFF); break; } /* Setup Rx Queue Bus Memory Interface. */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_CLR_RESET); CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_OPER_INIT); CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_FIFO_OP_ON); CSR_WRITE_2(sc, Q_ADDR(sc_if->msk_rxq, Q_WM), MSK_BMU_RX_WM); if (sc->msk_hw_id == CHIP_ID_YUKON_EC_U && sc->msk_hw_rev >= CHIP_REV_YU_EC_U_A1) { /* MAC Rx RAM Read is controlled by hardware. */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_F), F_M_RX_RAM_DIS); } msk_set_prefetch(sc, sc_if->msk_txq, sc_if->msk_rdata.msk_tx_ring_paddr, MSK_TX_RING_CNT - 1); msk_init_tx_ring(sc_if); /* Disable Rx checksum offload and RSS hash. */ reg = BMU_DIS_RX_RSS_HASH; if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && (sc_if->msk_capenable & IFCAP_RXCSUM) != 0) reg |= BMU_ENA_RX_CHKSUM; else reg |= BMU_DIS_RX_CHKSUM; CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), reg); if (sc_if->msk_framesize > (MCLBYTES - MSK_RX_BUF_ALIGN)) { msk_set_prefetch(sc, sc_if->msk_rxq, sc_if->msk_rdata.msk_jumbo_rx_ring_paddr, MSK_JUMBO_RX_RING_CNT - 1); error = msk_init_jumbo_rx_ring(sc_if); } else { msk_set_prefetch(sc, sc_if->msk_rxq, sc_if->msk_rdata.msk_rx_ring_paddr, MSK_RX_RING_CNT - 1); error = msk_init_rx_ring(sc_if); } if (error != 0) { device_printf(sc_if->msk_if_dev, "initialization failed: no memory for Rx buffers\n"); msk_stop(sc_if); return; } if (sc->msk_hw_id == CHIP_ID_YUKON_EX || sc->msk_hw_id == CHIP_ID_YUKON_SUPR) { /* Disable flushing of non-ASF packets. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_RX_MACSEC_FLUSH_OFF); } /* Configure interrupt handling. */ if (sc_if->msk_port == MSK_PORT_A) { sc->msk_intrmask |= Y2_IS_PORT_A; sc->msk_intrhwemask |= Y2_HWE_L1_MASK; } else { sc->msk_intrmask |= Y2_IS_PORT_B; sc->msk_intrhwemask |= Y2_HWE_L2_MASK; } /* Configure IRQ moderation mask. */ CSR_WRITE_4(sc, B2_IRQM_MSK, sc->msk_intrmask); if (sc->msk_int_holdoff > 0) { /* Configure initial IRQ moderation timer value. */ CSR_WRITE_4(sc, B2_IRQM_INI, MSK_USECS(sc, sc->msk_int_holdoff)); CSR_WRITE_4(sc, B2_IRQM_VAL, MSK_USECS(sc, sc->msk_int_holdoff)); /* Start IRQ moderation. */ CSR_WRITE_1(sc, B2_IRQM_CTRL, TIM_START); } CSR_WRITE_4(sc, B0_HWE_IMSK, sc->msk_intrhwemask); CSR_READ_4(sc, B0_HWE_IMSK); CSR_WRITE_4(sc, B0_IMSK, sc->msk_intrmask); CSR_READ_4(sc, B0_IMSK); sc_if->msk_flags |= MSK_FLAG_RUNNING; sc_if->msk_flags &= ~MSK_FLAG_LINK; mii_mediachg(mii); callout_reset(&sc_if->msk_tick_ch, hz, msk_tick, sc_if); } static void msk_set_rambuffer(struct msk_if_softc *sc_if) { struct msk_softc *sc; int ltpp, utpp; sc = sc_if->msk_softc; if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0) return; /* Setup Rx Queue. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_RST_CLR); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_START), sc->msk_rxqstart[sc_if->msk_port] / 8); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_END), sc->msk_rxqend[sc_if->msk_port] / 8); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_WP), sc->msk_rxqstart[sc_if->msk_port] / 8); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_RP), sc->msk_rxqstart[sc_if->msk_port] / 8); utpp = (sc->msk_rxqend[sc_if->msk_port] + 1 - sc->msk_rxqstart[sc_if->msk_port] - MSK_RB_ULPP) / 8; ltpp = (sc->msk_rxqend[sc_if->msk_port] + 1 - sc->msk_rxqstart[sc_if->msk_port] - MSK_RB_LLPP_B) / 8; if (sc->msk_rxqsize < MSK_MIN_RXQ_SIZE) ltpp += (MSK_RB_LLPP_B - MSK_RB_LLPP_S) / 8; CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_RX_UTPP), utpp); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_RX_LTPP), ltpp); /* Set Rx priority(RB_RX_UTHP/RB_RX_LTHP) thresholds? */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_ENA_OP_MD); CSR_READ_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL)); /* Setup Tx Queue. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_RST_CLR); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_txq, RB_START), sc->msk_txqstart[sc_if->msk_port] / 8); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_txq, RB_END), sc->msk_txqend[sc_if->msk_port] / 8); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_txq, RB_WP), sc->msk_txqstart[sc_if->msk_port] / 8); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_txq, RB_RP), sc->msk_txqstart[sc_if->msk_port] / 8); /* Enable Store & Forward for Tx side. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_ENA_STFWD); CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_ENA_OP_MD); CSR_READ_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL)); } static void msk_set_prefetch(struct msk_softc *sc, int qaddr, bus_addr_t addr, uint32_t count) { /* Reset the prefetch unit. */ CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_CTRL_REG), PREF_UNIT_RST_SET); CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_CTRL_REG), PREF_UNIT_RST_CLR); /* Set LE base address. */ CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_ADDR_LOW_REG), MSK_ADDR_LO(addr)); CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_ADDR_HI_REG), MSK_ADDR_HI(addr)); /* Set the list last index. */ CSR_WRITE_2(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_LAST_IDX_REG), count); /* Turn on prefetch unit. */ CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_CTRL_REG), PREF_UNIT_OP_ON); /* Dummy read to ensure write. */ CSR_READ_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_CTRL_REG)); } static void msk_stop(struct msk_if_softc *sc_if) { struct msk_softc *sc; struct msk_txdesc *txd; struct msk_rxdesc *rxd; struct msk_rxdesc *jrxd; if_t ifp; uint32_t val; int i; MSK_IF_LOCK_ASSERT(sc_if); sc = sc_if->msk_softc; ifp = sc_if->msk_ifp; callout_stop(&sc_if->msk_tick_ch); sc_if->msk_watchdog_timer = 0; /* Disable interrupts. */ if (sc_if->msk_port == MSK_PORT_A) { sc->msk_intrmask &= ~Y2_IS_PORT_A; sc->msk_intrhwemask &= ~Y2_HWE_L1_MASK; } else { sc->msk_intrmask &= ~Y2_IS_PORT_B; sc->msk_intrhwemask &= ~Y2_HWE_L2_MASK; } CSR_WRITE_4(sc, B0_HWE_IMSK, sc->msk_intrhwemask); CSR_READ_4(sc, B0_HWE_IMSK); CSR_WRITE_4(sc, B0_IMSK, sc->msk_intrmask); CSR_READ_4(sc, B0_IMSK); /* Disable Tx/Rx MAC. */ val = GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL); val &= ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA); GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, val); /* Read again to ensure writing. */ GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL); /* Update stats and clear counters. */ msk_stats_update(sc_if); /* Stop Tx BMU. */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_STOP); val = CSR_READ_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR)); for (i = 0; i < MSK_TIMEOUT; i++) { if ((val & (BMU_STOP | BMU_IDLE)) == 0) { CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_STOP); val = CSR_READ_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR)); } else break; DELAY(1); } if (i == MSK_TIMEOUT) device_printf(sc_if->msk_if_dev, "Tx BMU stop failed\n"); CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_RST_SET | RB_DIS_OP_MD); /* Disable all GMAC interrupt. */ CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, GMAC_IRQ_MSK), 0); /* Disable PHY interrupt. */ msk_phy_writereg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_MASK, 0); /* Disable the RAM Interface Arbiter. */ CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, TXA_CTRL), TXA_DIS_ARB); /* Reset the PCI FIFO of the async Tx queue */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_RST_SET | BMU_FIFO_RST); /* Reset the Tx prefetch units. */ CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(sc_if->msk_txq, PREF_UNIT_CTRL_REG), PREF_UNIT_RST_SET); /* Reset the RAM Buffer async Tx queue. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_RST_SET); /* Reset Tx MAC FIFO. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_RST_SET); /* Set Pause Off. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_PAUSE_OFF); /* * The Rx Stop command will not work for Yukon-2 if the BMU does not * reach the end of packet and since we can't make sure that we have * incoming data, we must reset the BMU while it is not during a DMA * transfer. Since it is possible that the Rx path is still active, * the Rx RAM buffer will be stopped first, so any possible incoming * data will not trigger a DMA. After the RAM buffer is stopped, the * BMU is polled until any DMA in progress is ended and only then it * will be reset. */ /* Disable the RAM Buffer receive queue. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_DIS_OP_MD); for (i = 0; i < MSK_TIMEOUT; i++) { if (CSR_READ_1(sc, RB_ADDR(sc_if->msk_rxq, Q_RSL)) == CSR_READ_1(sc, RB_ADDR(sc_if->msk_rxq, Q_RL))) break; DELAY(1); } if (i == MSK_TIMEOUT) device_printf(sc_if->msk_if_dev, "Rx BMU stop failed\n"); CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_RST_SET | BMU_FIFO_RST); /* Reset the Rx prefetch unit. */ CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_CTRL_REG), PREF_UNIT_RST_SET); /* Reset the RAM Buffer receive queue. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_RST_SET); /* Reset Rx MAC FIFO. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_RST_SET); /* Free Rx and Tx mbufs still in the queues. */ for (i = 0; i < MSK_RX_RING_CNT; i++) { rxd = &sc_if->msk_cdata.msk_rxdesc[i]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap); m_freem(rxd->rx_m); rxd->rx_m = NULL; } } for (i = 0; i < MSK_JUMBO_RX_RING_CNT; i++) { jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[i]; if (jrxd->rx_m != NULL) { bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_tag, jrxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->msk_cdata.msk_jumbo_rx_tag, jrxd->rx_dmamap); m_freem(jrxd->rx_m); jrxd->rx_m = NULL; } } for (i = 0; i < MSK_TX_RING_CNT; i++) { txd = &sc_if->msk_cdata.msk_txdesc[i]; if (txd->tx_m != NULL) { bus_dmamap_sync(sc_if->msk_cdata.msk_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc_if->msk_cdata.msk_tx_tag, txd->tx_dmamap); m_freem(txd->tx_m); txd->tx_m = NULL; } } /* * Mark the interface down. */ sc_if->msk_flags &= ~MSK_FLAG_RUNNING; sc_if->msk_flags &= ~MSK_FLAG_LINK; } /* * When GM_PAR_MIB_CLR bit of GM_PHY_ADDR is set, reading lower * counter clears high 16 bits of the counter such that accessing * lower 16 bits should be the last operation. */ #define MSK_READ_MIB32(x, y) \ (((uint32_t)GMAC_READ_2(sc, x, (y) + 4)) << 16) + \ (uint32_t)GMAC_READ_2(sc, x, y) #define MSK_READ_MIB64(x, y) \ (((uint64_t)MSK_READ_MIB32(x, (y) + 8)) << 32) + \ (uint64_t)MSK_READ_MIB32(x, y) static void msk_stats_clear(struct msk_if_softc *sc_if) { struct msk_softc *sc; uint32_t reg; uint16_t gmac; int i; MSK_IF_LOCK_ASSERT(sc_if); sc = sc_if->msk_softc; /* Set MIB Clear Counter Mode. */ gmac = GMAC_READ_2(sc, sc_if->msk_port, GM_PHY_ADDR); GMAC_WRITE_2(sc, sc_if->msk_port, GM_PHY_ADDR, gmac | GM_PAR_MIB_CLR); /* Read all MIB Counters with Clear Mode set. */ for (i = GM_RXF_UC_OK; i <= GM_TXE_FIFO_UR; i += sizeof(uint32_t)) reg = MSK_READ_MIB32(sc_if->msk_port, i); /* Clear MIB Clear Counter Mode. */ gmac &= ~GM_PAR_MIB_CLR; GMAC_WRITE_2(sc, sc_if->msk_port, GM_PHY_ADDR, gmac); } static void msk_stats_update(struct msk_if_softc *sc_if) { struct msk_softc *sc; if_t ifp; struct msk_hw_stats *stats; uint16_t gmac; uint32_t reg; MSK_IF_LOCK_ASSERT(sc_if); ifp = sc_if->msk_ifp; if ((sc_if->msk_flags & MSK_FLAG_RUNNING) == 0) return; sc = sc_if->msk_softc; stats = &sc_if->msk_stats; /* Set MIB Clear Counter Mode. */ gmac = GMAC_READ_2(sc, sc_if->msk_port, GM_PHY_ADDR); GMAC_WRITE_2(sc, sc_if->msk_port, GM_PHY_ADDR, gmac | GM_PAR_MIB_CLR); /* Rx stats. */ stats->rx_ucast_frames += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_UC_OK); stats->rx_bcast_frames += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_BC_OK); stats->rx_pause_frames += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_MPAUSE); stats->rx_mcast_frames += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_MC_OK); stats->rx_crc_errs += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_FCS_ERR); reg = MSK_READ_MIB32(sc_if->msk_port, GM_RXF_SPARE1); stats->rx_good_octets += MSK_READ_MIB64(sc_if->msk_port, GM_RXO_OK_LO); stats->rx_bad_octets += MSK_READ_MIB64(sc_if->msk_port, GM_RXO_ERR_LO); stats->rx_runts += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_SHT); stats->rx_runt_errs += MSK_READ_MIB32(sc_if->msk_port, GM_RXE_FRAG); stats->rx_pkts_64 += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_64B); stats->rx_pkts_65_127 += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_127B); stats->rx_pkts_128_255 += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_255B); stats->rx_pkts_256_511 += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_511B); stats->rx_pkts_512_1023 += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_1023B); stats->rx_pkts_1024_1518 += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_1518B); stats->rx_pkts_1519_max += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_MAX_SZ); stats->rx_pkts_too_long += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_LNG_ERR); stats->rx_pkts_jabbers += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_JAB_PKT); reg = MSK_READ_MIB32(sc_if->msk_port, GM_RXF_SPARE2); stats->rx_fifo_oflows += MSK_READ_MIB32(sc_if->msk_port, GM_RXE_FIFO_OV); reg = MSK_READ_MIB32(sc_if->msk_port, GM_RXF_SPARE3); /* Tx stats. */ stats->tx_ucast_frames += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_UC_OK); stats->tx_bcast_frames += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_BC_OK); stats->tx_pause_frames += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_MPAUSE); stats->tx_mcast_frames += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_MC_OK); stats->tx_octets += MSK_READ_MIB64(sc_if->msk_port, GM_TXO_OK_LO); stats->tx_pkts_64 += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_64B); stats->tx_pkts_65_127 += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_127B); stats->tx_pkts_128_255 += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_255B); stats->tx_pkts_256_511 += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_511B); stats->tx_pkts_512_1023 += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_1023B); stats->tx_pkts_1024_1518 += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_1518B); stats->tx_pkts_1519_max += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_MAX_SZ); reg = MSK_READ_MIB32(sc_if->msk_port, GM_TXF_SPARE1); stats->tx_colls += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_COL); stats->tx_late_colls += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_LAT_COL); stats->tx_excess_colls += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_ABO_COL); stats->tx_multi_colls += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_MUL_COL); stats->tx_single_colls += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_SNG_COL); stats->tx_underflows += MSK_READ_MIB32(sc_if->msk_port, GM_TXE_FIFO_UR); /* Clear MIB Clear Counter Mode. */ gmac &= ~GM_PAR_MIB_CLR; GMAC_WRITE_2(sc, sc_if->msk_port, GM_PHY_ADDR, gmac); } static int msk_sysctl_stat32(SYSCTL_HANDLER_ARGS) { struct msk_softc *sc; struct msk_if_softc *sc_if; uint32_t result, *stat; int off; sc_if = (struct msk_if_softc *)arg1; sc = sc_if->msk_softc; off = arg2; stat = (uint32_t *)((uint8_t *)&sc_if->msk_stats + off); MSK_IF_LOCK(sc_if); result = MSK_READ_MIB32(sc_if->msk_port, GM_MIB_CNT_BASE + off * 2); result += *stat; MSK_IF_UNLOCK(sc_if); return (sysctl_handle_int(oidp, &result, 0, req)); } static int msk_sysctl_stat64(SYSCTL_HANDLER_ARGS) { struct msk_softc *sc; struct msk_if_softc *sc_if; uint64_t result, *stat; int off; sc_if = (struct msk_if_softc *)arg1; sc = sc_if->msk_softc; off = arg2; stat = (uint64_t *)((uint8_t *)&sc_if->msk_stats + off); MSK_IF_LOCK(sc_if); result = MSK_READ_MIB64(sc_if->msk_port, GM_MIB_CNT_BASE + off * 2); result += *stat; MSK_IF_UNLOCK(sc_if); return (sysctl_handle_64(oidp, &result, 0, req)); } #undef MSK_READ_MIB32 #undef MSK_READ_MIB64 #define MSK_SYSCTL_STAT32(sc, c, o, p, n, d) \ SYSCTL_ADD_PROC(c, p, OID_AUTO, o, CTLTYPE_UINT | CTLFLAG_RD, \ sc, offsetof(struct msk_hw_stats, n), msk_sysctl_stat32, \ "IU", d) #define MSK_SYSCTL_STAT64(sc, c, o, p, n, d) \ SYSCTL_ADD_PROC(c, p, OID_AUTO, o, CTLTYPE_U64 | CTLFLAG_RD, \ sc, offsetof(struct msk_hw_stats, n), msk_sysctl_stat64, \ "QU", d) static void msk_sysctl_node(struct msk_if_softc *sc_if) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *child, *schild; struct sysctl_oid *tree; ctx = device_get_sysctl_ctx(sc_if->msk_if_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc_if->msk_if_dev)); tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", CTLFLAG_RD, NULL, "MSK Statistics"); schild = child = SYSCTL_CHILDREN(tree); tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "rx", CTLFLAG_RD, NULL, "MSK RX Statistics"); child = SYSCTL_CHILDREN(tree); MSK_SYSCTL_STAT32(sc_if, ctx, "ucast_frames", child, rx_ucast_frames, "Good unicast frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "bcast_frames", child, rx_bcast_frames, "Good broadcast frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "pause_frames", child, rx_pause_frames, "Pause frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "mcast_frames", child, rx_mcast_frames, "Multicast frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "crc_errs", child, rx_crc_errs, "CRC errors"); MSK_SYSCTL_STAT64(sc_if, ctx, "good_octets", child, rx_good_octets, "Good octets"); MSK_SYSCTL_STAT64(sc_if, ctx, "bad_octets", child, rx_bad_octets, "Bad octets"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_64", child, rx_pkts_64, "64 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_65_127", child, rx_pkts_65_127, "65 to 127 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_128_255", child, rx_pkts_128_255, "128 to 255 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_256_511", child, rx_pkts_256_511, "256 to 511 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_512_1023", child, rx_pkts_512_1023, "512 to 1023 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_1024_1518", child, rx_pkts_1024_1518, "1024 to 1518 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_1519_max", child, rx_pkts_1519_max, "1519 to max frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_too_long", child, rx_pkts_too_long, "frames too long"); MSK_SYSCTL_STAT32(sc_if, ctx, "jabbers", child, rx_pkts_jabbers, "Jabber errors"); MSK_SYSCTL_STAT32(sc_if, ctx, "overflows", child, rx_fifo_oflows, "FIFO overflows"); tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "tx", CTLFLAG_RD, NULL, "MSK TX Statistics"); child = SYSCTL_CHILDREN(tree); MSK_SYSCTL_STAT32(sc_if, ctx, "ucast_frames", child, tx_ucast_frames, "Unicast frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "bcast_frames", child, tx_bcast_frames, "Broadcast frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "pause_frames", child, tx_pause_frames, "Pause frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "mcast_frames", child, tx_mcast_frames, "Multicast frames"); MSK_SYSCTL_STAT64(sc_if, ctx, "octets", child, tx_octets, "Octets"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_64", child, tx_pkts_64, "64 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_65_127", child, tx_pkts_65_127, "65 to 127 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_128_255", child, tx_pkts_128_255, "128 to 255 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_256_511", child, tx_pkts_256_511, "256 to 511 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_512_1023", child, tx_pkts_512_1023, "512 to 1023 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_1024_1518", child, tx_pkts_1024_1518, "1024 to 1518 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_1519_max", child, tx_pkts_1519_max, "1519 to max frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "colls", child, tx_colls, "Collisions"); MSK_SYSCTL_STAT32(sc_if, ctx, "late_colls", child, tx_late_colls, "Late collisions"); MSK_SYSCTL_STAT32(sc_if, ctx, "excess_colls", child, tx_excess_colls, "Excessive collisions"); MSK_SYSCTL_STAT32(sc_if, ctx, "multi_colls", child, tx_multi_colls, "Multiple collisions"); MSK_SYSCTL_STAT32(sc_if, ctx, "single_colls", child, tx_single_colls, "Single collisions"); MSK_SYSCTL_STAT32(sc_if, ctx, "underflows", child, tx_underflows, "FIFO underflows"); } #undef MSK_SYSCTL_STAT32 #undef MSK_SYSCTL_STAT64 static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high) { int error, value; if (!arg1) return (EINVAL); value = *(int *)arg1; error = sysctl_handle_int(oidp, &value, 0, req); if (error || !req->newptr) return (error); if (value < low || value > high) return (EINVAL); *(int *)arg1 = value; return (0); } static int sysctl_hw_msk_proc_limit(SYSCTL_HANDLER_ARGS) { return (sysctl_int_range(oidp, arg1, arg2, req, MSK_PROC_MIN, MSK_PROC_MAX)); } Index: projects/ifnet/sys/dev/usb/usb_pf.c =================================================================== --- projects/ifnet/sys/dev/usb/usb_pf.c (revision 280372) +++ projects/ifnet/sys/dev/usb/usb_pf.c (revision 280373) @@ -1,507 +1,504 @@ /* $FreeBSD$ */ /*- * Copyright (c) 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from the Stanford/CMU enet packet filter, * (net/enet.c) distributed as part of 4.3BSD, and code contributed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence * Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifdef USB_GLOBAL_INCLUDE_FILE #include USB_GLOBAL_INCLUDE_FILE #else #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif /* USB_GLOBAL_INCLUDE_FILE */ static void usbpf_init(void *); static void usbpf_uninit(void *); static int usbpf_clone_match(struct if_clone *, const char *); static int usbpf_clone_create(struct if_clone *, char *, size_t, caddr_t); static int usbpf_clone_destroy(struct if_clone *, if_t); static struct usb_bus *usbpf_ifname2ubus(const char *); static uint32_t usbpf_aggregate_xferflags(struct usb_xfer_flags *); static uint32_t usbpf_aggregate_status(struct usb_xfer_flags_int *); static int usbpf_xfer_frame_is_read(struct usb_xfer *, uint32_t); static uint32_t usbpf_xfer_precompute_size(struct usb_xfer *, int); static const char usbusname[] = "usbus"; static struct ifdriver usbpf_ifdrv = { - .ifdrv_ops = { - .ifop_origin = IFOP_ORIGIN_DRIVER, - }, .ifdrv_name = usbusname, .ifdrv_type = IFT_USB, /* * XXX According to the specification of DLT_USB, it indicates * packets beginning with USB setup header. But not sure all * packets would be. */ .ifdrv_dlt = DLT_USB, .ifdrv_dlt_hdrlen = USBPF_HDR_LEN, }; SYSINIT(usbpf_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, usbpf_init, NULL); SYSUNINIT(usbpf_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, usbpf_uninit, NULL); static void usbpf_init(void *arg) { usbpf_ifdrv.ifdrv_clone = if_clone_advanced(usbusname, 0, usbpf_clone_match, usbpf_clone_create, usbpf_clone_destroy); } static void usbpf_uninit(void *arg) { int devlcnt; device_t *devlp; devclass_t dc; struct usb_bus *ubus; int error; int i; if_clone_detach(usbpf_ifdrv.ifdrv_clone); dc = devclass_find(usbusname); if (dc == NULL) return; error = devclass_get_devices(dc, &devlp, &devlcnt); if (error) return; for (i = 0; i < devlcnt; i++) { ubus = device_get_softc(devlp[i]); if (ubus != NULL && ubus->ifp != NULL) usbpf_clone_destroy(usbpf_ifdrv.ifdrv_clone, ubus->ifp); } free(devlp, M_TEMP); } static struct usb_bus * usbpf_ifname2ubus(const char *ifname) { device_t dev; devclass_t dc; int unit; int error; if (strncmp(ifname, usbusname, sizeof(usbusname) - 1) != 0) return (NULL); error = ifc_name2unit(ifname, &unit); if (error || unit < 0) return (NULL); dc = devclass_find(usbusname); if (dc == NULL) return (NULL); dev = devclass_get_device(dc, unit); if (dev == NULL) return (NULL); return (device_get_softc(dev)); } static int usbpf_clone_match(struct if_clone *ifc, const char *name) { struct usb_bus *ubus; ubus = usbpf_ifname2ubus(name); if (ubus == NULL) return (0); if (ubus->ifp != NULL) return (0); return (1); } static int usbpf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) { struct if_attach_args ifat = { .ifat_version = IF_ATTACH_VERSION, .ifat_drv = &usbpf_ifdrv, .ifat_flags = IFF_UP, }; struct usb_bus *ubus; int error; error = ifc_name2unit(name, &ifat.ifat_dunit); if (error) return (error); if (ifat.ifat_dunit < 0) return (EINVAL); ubus = usbpf_ifname2ubus(name); if (ubus == NULL) return (1); if (ubus->ifp != NULL) return (1); ifat.ifat_softc = ubus; ubus->ifp = if_attach(&ifat); if (ubus->ifp == NULL) { device_printf(ubus->parent, "usbpf: Could not allocate " "instance\n"); return (ifat.ifat_error); } return (0); } static int usbpf_clone_destroy(struct if_clone *ifc, if_t ifp) { struct usb_bus *ubus; ubus = if_getsoftc(ifp, IF_DRIVER_SOFTC); ubus->ifp = NULL; if_detach(ifp); return (0); } void usbpf_attach(struct usb_bus *ubus) { if (bootverbose) device_printf(ubus->parent, "usbpf: Attached\n"); } void usbpf_detach(struct usb_bus *ubus) { if (ubus->ifp != NULL) usbpf_clone_destroy(usbpf_ifdrv.ifdrv_clone, ubus->ifp); if (bootverbose) device_printf(ubus->parent, "usbpf: Detached\n"); } static uint32_t usbpf_aggregate_xferflags(struct usb_xfer_flags *flags) { uint32_t val = 0; if (flags->force_short_xfer == 1) val |= USBPF_FLAG_FORCE_SHORT_XFER; if (flags->short_xfer_ok == 1) val |= USBPF_FLAG_SHORT_XFER_OK; if (flags->short_frames_ok == 1) val |= USBPF_FLAG_SHORT_FRAMES_OK; if (flags->pipe_bof == 1) val |= USBPF_FLAG_PIPE_BOF; if (flags->proxy_buffer == 1) val |= USBPF_FLAG_PROXY_BUFFER; if (flags->ext_buffer == 1) val |= USBPF_FLAG_EXT_BUFFER; if (flags->manual_status == 1) val |= USBPF_FLAG_MANUAL_STATUS; if (flags->no_pipe_ok == 1) val |= USBPF_FLAG_NO_PIPE_OK; if (flags->stall_pipe == 1) val |= USBPF_FLAG_STALL_PIPE; return (val); } static uint32_t usbpf_aggregate_status(struct usb_xfer_flags_int *flags) { uint32_t val = 0; if (flags->open == 1) val |= USBPF_STATUS_OPEN; if (flags->transferring == 1) val |= USBPF_STATUS_TRANSFERRING; if (flags->did_dma_delay == 1) val |= USBPF_STATUS_DID_DMA_DELAY; if (flags->did_close == 1) val |= USBPF_STATUS_DID_CLOSE; if (flags->draining == 1) val |= USBPF_STATUS_DRAINING; if (flags->started == 1) val |= USBPF_STATUS_STARTED; if (flags->bandwidth_reclaimed == 1) val |= USBPF_STATUS_BW_RECLAIMED; if (flags->control_xfr == 1) val |= USBPF_STATUS_CONTROL_XFR; if (flags->control_hdr == 1) val |= USBPF_STATUS_CONTROL_HDR; if (flags->control_act == 1) val |= USBPF_STATUS_CONTROL_ACT; if (flags->control_stall == 1) val |= USBPF_STATUS_CONTROL_STALL; if (flags->short_frames_ok == 1) val |= USBPF_STATUS_SHORT_FRAMES_OK; if (flags->short_xfer_ok == 1) val |= USBPF_STATUS_SHORT_XFER_OK; #if USB_HAVE_BUSDMA if (flags->bdma_enable == 1) val |= USBPF_STATUS_BDMA_ENABLE; if (flags->bdma_no_post_sync == 1) val |= USBPF_STATUS_BDMA_NO_POST_SYNC; if (flags->bdma_setup == 1) val |= USBPF_STATUS_BDMA_SETUP; #endif if (flags->isochronous_xfr == 1) val |= USBPF_STATUS_ISOCHRONOUS_XFR; if (flags->curr_dma_set == 1) val |= USBPF_STATUS_CURR_DMA_SET; if (flags->can_cancel_immed == 1) val |= USBPF_STATUS_CAN_CANCEL_IMMED; if (flags->doing_callback == 1) val |= USBPF_STATUS_DOING_CALLBACK; return (val); } static int usbpf_xfer_frame_is_read(struct usb_xfer *xfer, uint32_t frame) { int isread; if ((frame == 0) && (xfer->flags_int.control_xfr != 0) && (xfer->flags_int.control_hdr != 0)) { /* special case */ if (xfer->flags_int.usb_mode == USB_MODE_DEVICE) { /* The device controller writes to memory */ isread = 1; } else { /* The host controller reads from memory */ isread = 0; } } else { isread = USB_GET_DATA_ISREAD(xfer); } return (isread); } static uint32_t usbpf_xfer_precompute_size(struct usb_xfer *xfer, int type) { uint32_t totlen; uint32_t x; uint32_t nframes; if (type == USBPF_XFERTAP_SUBMIT) nframes = xfer->nframes; else nframes = xfer->aframes; totlen = USBPF_HDR_LEN + (USBPF_FRAME_HDR_LEN * nframes); /* precompute all trace lengths */ for (x = 0; x != nframes; x++) { if (usbpf_xfer_frame_is_read(xfer, x)) { if (type != USBPF_XFERTAP_SUBMIT) { totlen += USBPF_FRAME_ALIGN( xfer->frlengths[x]); } } else { if (type == USBPF_XFERTAP_SUBMIT) { totlen += USBPF_FRAME_ALIGN( xfer->frlengths[x]); } } } return (totlen); } void usbpf_xfertap(struct usb_xfer *xfer, int type) { struct usb_bus *bus; struct usbpf_pkthdr *up; struct usbpf_framehdr *uf; struct bpf_if *bpf; usb_frlength_t offset; uint32_t totlen; uint32_t frame; uint32_t temp; uint32_t nframes; uint32_t x; uint8_t *buf; uint8_t *ptr; bus = xfer->xroot->bus; /* sanity checks */ if (bus->ifp == NULL) return; bpf = if_getsoftc(bus->ifp, IF_BPF); if (!bpf_peers_present(bpf)) return; totlen = usbpf_xfer_precompute_size(xfer, type); if (type == USBPF_XFERTAP_SUBMIT) nframes = xfer->nframes; else nframes = xfer->aframes; /* * XXX TODO XXX * * When BPF supports it we could pass a fragmented array of * buffers avoiding the data copy operation here. */ buf = ptr = malloc(totlen, M_TEMP, M_NOWAIT); if (buf == NULL) { device_printf(bus->parent, "usbpf: Out of memory\n"); return; } up = (struct usbpf_pkthdr *)ptr; ptr += USBPF_HDR_LEN; /* fill out header */ temp = device_get_unit(bus->bdev); up->up_totlen = htole32(totlen); up->up_busunit = htole32(temp); up->up_address = xfer->xroot->udev->device_index; if (xfer->flags_int.usb_mode == USB_MODE_DEVICE) up->up_mode = USBPF_MODE_DEVICE; else up->up_mode = USBPF_MODE_HOST; up->up_type = type; up->up_xfertype = xfer->endpoint->edesc->bmAttributes & UE_XFERTYPE; temp = usbpf_aggregate_xferflags(&xfer->flags); up->up_flags = htole32(temp); temp = usbpf_aggregate_status(&xfer->flags_int); up->up_status = htole32(temp); temp = xfer->error; up->up_error = htole32(temp); temp = xfer->interval; up->up_interval = htole32(temp); up->up_frames = htole32(nframes); temp = xfer->max_packet_size; up->up_packet_size = htole32(temp); temp = xfer->max_packet_count; up->up_packet_count = htole32(temp); temp = xfer->endpointno; up->up_endpoint = htole32(temp); up->up_speed = xfer->xroot->udev->speed; /* clear reserved area */ memset(up->up_reserved, 0, sizeof(up->up_reserved)); /* init offset and frame */ offset = 0; frame = 0; /* iterate all the USB frames and copy data, if any */ for (x = 0; x != nframes; x++) { uint32_t length; int isread; /* get length */ length = xfer->frlengths[x]; /* get frame header pointer */ uf = (struct usbpf_framehdr *)ptr; ptr += USBPF_FRAME_HDR_LEN; /* fill out packet header */ uf->length = htole32(length); uf->flags = 0; /* get information about data read/write */ isread = usbpf_xfer_frame_is_read(xfer, x); /* check if we need to copy any data */ if (isread) { if (type == USBPF_XFERTAP_SUBMIT) length = 0; else { uf->flags |= htole32( USBPF_FRAMEFLAG_DATA_FOLLOWS); } } else { if (type != USBPF_XFERTAP_SUBMIT) length = 0; else { uf->flags |= htole32( USBPF_FRAMEFLAG_DATA_FOLLOWS); } } /* check if data is read direction */ if (isread) uf->flags |= htole32(USBPF_FRAMEFLAG_READ); /* copy USB data, if any */ if (length != 0) { /* copy data */ usbd_copy_out(&xfer->frbuffers[frame], offset, ptr, length); /* align length */ temp = USBPF_FRAME_ALIGN(length); /* zero pad */ if (temp != length) memset(ptr + length, 0, temp - length); ptr += temp; } if (xfer->flags_int.isochronous_xfr) { offset += usbd_xfer_old_frame_length(xfer, x); } else { frame ++; } } bpf_tap(bpf, buf, totlen); free(buf, M_TEMP); } Index: projects/ifnet/sys/dev/virtio/network/if_vtnet.c =================================================================== --- projects/ifnet/sys/dev/virtio/network/if_vtnet.c (revision 280372) +++ projects/ifnet/sys/dev/virtio/network/if_vtnet.c (revision 280373) @@ -1,3780 +1,3779 @@ /*- * Copyright (c) 2011, Bryan Venteicher * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Driver for VirtIO network devices. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_NETMAP #include #endif /* DEV_NETMAP */ #include "virtio_if.h" #include "opt_inet.h" #include "opt_inet6.h" static int vtnet_modevent(module_t, int, void *); static int vtnet_probe(device_t); static int vtnet_attach(device_t); static int vtnet_detach(device_t); static int vtnet_suspend(device_t); static int vtnet_resume(device_t); static int vtnet_shutdown(device_t); static int vtnet_attach_completed(device_t); static int vtnet_config_change(device_t); static void vtnet_negotiate_features(struct vtnet_softc *); static void vtnet_setup_features(struct vtnet_softc *); static int vtnet_init_rxq(struct vtnet_softc *, int); static int vtnet_init_txq(struct vtnet_softc *, int); static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); static void vtnet_free_rxtx_queues(struct vtnet_softc *); static int vtnet_alloc_rx_filters(struct vtnet_softc *); static void vtnet_free_rx_filters(struct vtnet_softc *); static int vtnet_alloc_virtqueues(struct vtnet_softc *); static void vtnet_setup_interface(struct vtnet_softc *); static int vtnet_change_mtu(struct vtnet_softc *, int); static int vtnet_ioctl(if_t, u_long, void *, struct thread *); static uint64_t vtnet_get_counter(if_t, ift_counter); static int vtnet_rxq_populate(struct vtnet_rxq *); static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); static struct mbuf * vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *, struct mbuf *, int); static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); static int vtnet_rxq_new_buf(struct vtnet_rxq *); static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, struct virtio_net_hdr *); static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *); static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int); static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *, struct virtio_net_hdr *); static int vtnet_rxq_eof(struct vtnet_rxq *); static void vtnet_rx_vq_intr(void *); static void vtnet_rxq_tq_intr(void *, int); static int vtnet_txq_below_threshold(struct vtnet_txq *); static int vtnet_txq_notify(struct vtnet_txq *); static void vtnet_txq_free_mbufs(struct vtnet_txq *); static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *, int *, int *, int *); static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int, int, struct virtio_net_hdr *); static struct mbuf * vtnet_txq_offload(struct vtnet_txq *, struct mbuf *, struct virtio_net_hdr *); static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, struct vtnet_tx_header *); static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **); static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *); static int vtnet_txq_mq_start(if_t, struct mbuf *); static void vtnet_txq_tq_deferred(void *, int); static void vtnet_txq_start(struct vtnet_txq *); static void vtnet_txq_tq_intr(void *, int); static int vtnet_txq_eof(struct vtnet_txq *); static void vtnet_tx_vq_intr(void *); static void vtnet_tx_start_all(struct vtnet_softc *); static void vtnet_qflush(if_t); static int vtnet_watchdog(struct vtnet_txq *); static void vtnet_accum_stats(struct vtnet_softc *, struct vtnet_rxq_stats *, struct vtnet_txq_stats *); static void vtnet_tick(void *); static void vtnet_start_taskqueues(struct vtnet_softc *); static void vtnet_free_taskqueues(struct vtnet_softc *); static void vtnet_drain_taskqueues(struct vtnet_softc *); static void vtnet_drain_rxtx_queues(struct vtnet_softc *); static void vtnet_stop_rendezvous(struct vtnet_softc *); static void vtnet_stop(struct vtnet_softc *); static int vtnet_virtio_reinit(struct vtnet_softc *); static void vtnet_init_rx_filters(struct vtnet_softc *); static int vtnet_init_rx_queues(struct vtnet_softc *); static int vtnet_init_tx_queues(struct vtnet_softc *); static int vtnet_init_rxtx_queues(struct vtnet_softc *); static void vtnet_set_active_vq_pairs(struct vtnet_softc *); static int vtnet_reinit(struct vtnet_softc *); static void vtnet_init(struct vtnet_softc *); static void vtnet_free_ctrl_vq(struct vtnet_softc *); static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, struct sglist *, int, int); static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int); static int vtnet_set_promisc(struct vtnet_softc *, int); static int vtnet_set_allmulti(struct vtnet_softc *, int); static void vtnet_attach_disable_promisc(struct vtnet_softc *); static void vtnet_rx_filter(struct vtnet_softc *); static void vtnet_rx_filter_mac(struct vtnet_softc *); static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); static void vtnet_rx_filter_vlan(struct vtnet_softc *); static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t); static void vtnet_register_vlan(void *, if_t, uint16_t); static void vtnet_unregister_vlan(void *, if_t, uint16_t); static int vtnet_is_link_up(struct vtnet_softc *); static void vtnet_update_link_status(struct vtnet_softc *); static int vtnet_ifmedia_upd(if_t); static void vtnet_ifmedia_sts(if_t, struct ifmediareq *); static void vtnet_get_hwaddr(struct vtnet_softc *); static void vtnet_set_hwaddr(struct vtnet_softc *); static void vtnet_vlan_tag_remove(struct mbuf *); static void vtnet_set_rx_process_limit(struct vtnet_softc *); static void vtnet_set_tx_intr_threshold(struct vtnet_softc *); static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct vtnet_rxq *); static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct vtnet_txq *); static void vtnet_setup_queue_sysctl(struct vtnet_softc *); static void vtnet_setup_sysctl(struct vtnet_softc *); static int vtnet_rxq_enable_intr(struct vtnet_rxq *); static void vtnet_rxq_disable_intr(struct vtnet_rxq *); static int vtnet_txq_enable_intr(struct vtnet_txq *); static void vtnet_txq_disable_intr(struct vtnet_txq *); static void vtnet_enable_rx_interrupts(struct vtnet_softc *); static void vtnet_enable_tx_interrupts(struct vtnet_softc *); static void vtnet_enable_interrupts(struct vtnet_softc *); static void vtnet_disable_rx_interrupts(struct vtnet_softc *); static void vtnet_disable_tx_interrupts(struct vtnet_softc *); static void vtnet_disable_interrupts(struct vtnet_softc *); static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); /* Tunables. */ static int vtnet_csum_disable = 0; TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable); static int vtnet_tso_disable = 0; TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable); static int vtnet_lro_disable = 0; TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable); static int vtnet_mq_disable = 0; TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable); static int vtnet_mq_max_pairs = 0; TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs); static int vtnet_rx_process_limit = 512; TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); static uma_zone_t vtnet_tx_header_zone; static struct virtio_feature_desc vtnet_feature_desc[] = { { VIRTIO_NET_F_CSUM, "TxChecksum" }, { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, { VIRTIO_NET_F_MAC, "MacAddress" }, { VIRTIO_NET_F_GSO, "TxAllGSO" }, { VIRTIO_NET_F_GUEST_TSO4, "RxTSOv4" }, { VIRTIO_NET_F_GUEST_TSO6, "RxTSOv6" }, { VIRTIO_NET_F_GUEST_ECN, "RxECN" }, { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, { VIRTIO_NET_F_STATUS, "Status" }, { VIRTIO_NET_F_CTRL_VQ, "ControlVq" }, { VIRTIO_NET_F_CTRL_RX, "RxMode" }, { VIRTIO_NET_F_CTRL_VLAN, "VLanFilter" }, { VIRTIO_NET_F_CTRL_RX_EXTRA, "RxModeExtra" }, { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, { VIRTIO_NET_F_MQ, "Multiqueue" }, { VIRTIO_NET_F_CTRL_MAC_ADDR, "SetMacAddress" }, { 0, NULL } }; static device_method_t vtnet_methods[] = { /* Device methods. */ DEVMETHOD(device_probe, vtnet_probe), DEVMETHOD(device_attach, vtnet_attach), DEVMETHOD(device_detach, vtnet_detach), DEVMETHOD(device_suspend, vtnet_suspend), DEVMETHOD(device_resume, vtnet_resume), DEVMETHOD(device_shutdown, vtnet_shutdown), /* VirtIO methods. */ DEVMETHOD(virtio_attach_completed, vtnet_attach_completed), DEVMETHOD(virtio_config_change, vtnet_config_change), DEVMETHOD_END }; static driver_t vtnet_driver = { "vtnet", vtnet_methods, sizeof(struct vtnet_softc) }; static devclass_t vtnet_devclass; DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass, vtnet_modevent, 0); DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass, vtnet_modevent, 0); MODULE_VERSION(vtnet, 1); MODULE_DEPEND(vtnet, virtio, 1, 1, 1); static struct ifdriver vtnet_ifdrv = { .ifdrv_ops = { - .ifop_origin = IFOP_ORIGIN_DRIVER, .ifop_ioctl = vtnet_ioctl, .ifop_get_counter = vtnet_get_counter, .ifop_transmit = vtnet_txq_mq_start, .ifop_qflush = vtnet_qflush, }, .ifdrv_name = "vtnet", .ifdrv_type = IFT_ETHER, .ifdrv_hdrlen = sizeof(struct ether_vlan_header), }; static int vtnet_modevent(module_t mod, int type, void *unused) { int error; error = 0; switch (type) { case MOD_LOAD: vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr", sizeof(struct vtnet_tx_header), NULL, NULL, NULL, NULL, 0, 0); break; case MOD_QUIESCE: case MOD_UNLOAD: if (uma_zone_get_cur(vtnet_tx_header_zone) > 0) error = EBUSY; else if (type == MOD_UNLOAD) { uma_zdestroy(vtnet_tx_header_zone); vtnet_tx_header_zone = NULL; } break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } static int vtnet_probe(device_t dev) { if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK) return (ENXIO); device_set_desc(dev, "VirtIO Networking Adapter"); return (BUS_PROBE_DEFAULT); } static int vtnet_attach(device_t dev) { struct vtnet_softc *sc; int error; sc = device_get_softc(dev); sc->vtnet_dev = dev; /* Register our feature descriptions. */ virtio_set_feature_desc(dev, vtnet_feature_desc); VTNET_CORE_LOCK_INIT(sc); callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); vtnet_setup_sysctl(sc); vtnet_setup_features(sc); error = vtnet_alloc_rx_filters(sc); if (error) { device_printf(dev, "cannot allocate Rx filters\n"); goto fail; } error = vtnet_alloc_rxtx_queues(sc); if (error) { device_printf(dev, "cannot allocate queues\n"); goto fail; } error = vtnet_alloc_virtqueues(sc); if (error) { device_printf(dev, "cannot allocate virtqueues\n"); goto fail; } error = virtio_setup_intr(dev, INTR_TYPE_NET); if (error) { device_printf(dev, "cannot setup virtqueue interrupts\n"); goto fail; } vtnet_setup_interface(sc); #ifdef DEV_NETMAP vtnet_netmap_attach(sc); #endif /* DEV_NETMAP */ vtnet_start_taskqueues(sc); fail: if (error) vtnet_detach(dev); return (error); } static int vtnet_detach(device_t dev) { struct vtnet_softc *sc; sc = device_get_softc(dev); if (device_is_attached(dev)) { VTNET_CORE_LOCK(sc); vtnet_stop(sc); VTNET_CORE_UNLOCK(sc); callout_drain(&sc->vtnet_tick_ch); vtnet_drain_taskqueues(sc); #ifdef DEV_NETMAP netmap_detach(sc->vtnet_ifp); #endif /* DEV_NETMAP */ if_detach(sc->vtnet_ifp); } vtnet_free_taskqueues(sc); if (sc->vtnet_vlan_attach != NULL) { EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach); sc->vtnet_vlan_attach = NULL; } if (sc->vtnet_vlan_detach != NULL) { EVENTHANDLER_DEREGISTER(vlan_unconfg, sc->vtnet_vlan_detach); sc->vtnet_vlan_detach = NULL; } ifmedia_removeall(&sc->vtnet_media); vtnet_free_rxtx_queues(sc); vtnet_free_rx_filters(sc); if (sc->vtnet_ctrl_vq != NULL) vtnet_free_ctrl_vq(sc); VTNET_CORE_LOCK_DESTROY(sc); return (0); } static int vtnet_suspend(device_t dev) { struct vtnet_softc *sc; sc = device_get_softc(dev); VTNET_CORE_LOCK(sc); vtnet_stop(sc); sc->vtnet_flags |= VTNET_FLAG_SUSPENDED; VTNET_CORE_UNLOCK(sc); return (0); } static int vtnet_resume(device_t dev) { struct vtnet_softc *sc; sc = device_get_softc(dev); VTNET_CORE_LOCK(sc); if (sc->vtnet_if_flags & IFF_UP) vtnet_init(sc); sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED; VTNET_CORE_UNLOCK(sc); return (0); } static int vtnet_shutdown(device_t dev) { /* * Suspend already does all of what we need to * do here; we just never expect to be resumed. */ return (vtnet_suspend(dev)); } static int vtnet_attach_completed(device_t dev) { vtnet_attach_disable_promisc(device_get_softc(dev)); return (0); } static int vtnet_config_change(device_t dev) { struct vtnet_softc *sc; sc = device_get_softc(dev); VTNET_CORE_LOCK(sc); vtnet_update_link_status(sc); if (sc->vtnet_link_active != 0) vtnet_tx_start_all(sc); VTNET_CORE_UNLOCK(sc); return (0); } static void vtnet_negotiate_features(struct vtnet_softc *sc) { device_t dev; uint64_t mask, features; dev = sc->vtnet_dev; mask = 0; /* * TSO and LRO are only available when their corresponding checksum * offload feature is also negotiated. */ if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) { mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM; mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES; } if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) mask |= VTNET_TSO_FEATURES; if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) mask |= VTNET_LRO_FEATURES; if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) mask |= VIRTIO_NET_F_MQ; features = VTNET_FEATURES & ~mask; sc->vtnet_features = virtio_negotiate_features(dev, features); if (virtio_with_feature(dev, VTNET_LRO_FEATURES) && virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) { /* * LRO without mergeable buffers requires special care. This * is not ideal because every receive buffer must be large * enough to hold the maximum TCP packet, the Ethernet header, * and the header. This requires up to 34 descriptors with * MCLBYTES clusters. If we do not have indirect descriptors, * LRO is disabled since the virtqueue will not contain very * many receive buffers. */ if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) { device_printf(dev, "LRO disabled due to both mergeable buffers and " "indirect descriptors not negotiated\n"); features &= ~VTNET_LRO_FEATURES; sc->vtnet_features = virtio_negotiate_features(dev, features); } else sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; } } static void vtnet_setup_features(struct vtnet_softc *sc) { device_t dev; int max_pairs, max; dev = sc->vtnet_dev; vtnet_negotiate_features(sc); if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) sc->vtnet_flags |= VTNET_FLAG_INDIRECT; if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX; if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) { /* This feature should always be negotiated. */ sc->vtnet_flags |= VTNET_FLAG_MAC; } if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); } else sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS; else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS; else sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS; if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS; else sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) sc->vtnet_flags |= VTNET_FLAG_CTRL_RX; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; } if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) && sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { max_pairs = virtio_read_dev_config_2(dev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) max_pairs = 1; } else max_pairs = 1; if (max_pairs > 1) { /* * Limit the maximum number of queue pairs to the number of * CPUs or the configured maximum. The actual number of * queues that get used may be less. */ max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); if (max > 0 && max_pairs > max) max_pairs = max; if (max_pairs > mp_ncpus) max_pairs = mp_ncpus; if (max_pairs > VTNET_MAX_QUEUE_PAIRS) max_pairs = VTNET_MAX_QUEUE_PAIRS; if (max_pairs > 1) sc->vtnet_flags |= VTNET_FLAG_MULTIQ; } sc->vtnet_max_vq_pairs = max_pairs; } static int vtnet_init_rxq(struct vtnet_softc *sc, int id) { struct vtnet_rxq *rxq; rxq = &sc->vtnet_rxqs[id]; snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d", device_get_nameunit(sc->vtnet_dev), id); mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF); rxq->vtnrx_sc = sc; rxq->vtnrx_id = id; rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT); if (rxq->vtnrx_sg == NULL) return (ENOMEM); TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, taskqueue_thread_enqueue, &rxq->vtnrx_tq); return (rxq->vtnrx_tq == NULL ? ENOMEM : 0); } static int vtnet_init_txq(struct vtnet_softc *sc, int id) { struct vtnet_txq *txq; txq = &sc->vtnet_txqs[id]; snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d", device_get_nameunit(sc->vtnet_dev), id); mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF); txq->vtntx_sc = sc; txq->vtntx_id = id; txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT); if (txq->vtntx_sg == NULL) return (ENOMEM); txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF, M_NOWAIT, &txq->vtntx_mtx); if (txq->vtntx_br == NULL) return (ENOMEM); TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq); TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq); txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT, taskqueue_thread_enqueue, &txq->vtntx_tq); if (txq->vtntx_tq == NULL) return (ENOMEM); return (0); } static int vtnet_alloc_rxtx_queues(struct vtnet_softc *sc) { int i, npairs, error; npairs = sc->vtnet_max_vq_pairs; sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF, M_NOWAIT | M_ZERO); sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL) return (ENOMEM); for (i = 0; i < npairs; i++) { error = vtnet_init_rxq(sc, i); if (error) return (error); error = vtnet_init_txq(sc, i); if (error) return (error); } vtnet_setup_queue_sysctl(sc); return (0); } static void vtnet_destroy_rxq(struct vtnet_rxq *rxq) { rxq->vtnrx_sc = NULL; rxq->vtnrx_id = -1; if (rxq->vtnrx_sg != NULL) { sglist_free(rxq->vtnrx_sg); rxq->vtnrx_sg = NULL; } if (mtx_initialized(&rxq->vtnrx_mtx) != 0) mtx_destroy(&rxq->vtnrx_mtx); } static void vtnet_destroy_txq(struct vtnet_txq *txq) { txq->vtntx_sc = NULL; txq->vtntx_id = -1; if (txq->vtntx_sg != NULL) { sglist_free(txq->vtntx_sg); txq->vtntx_sg = NULL; } if (txq->vtntx_br != NULL) { buf_ring_free(txq->vtntx_br, M_DEVBUF); txq->vtntx_br = NULL; } if (mtx_initialized(&txq->vtntx_mtx) != 0) mtx_destroy(&txq->vtntx_mtx); } static void vtnet_free_rxtx_queues(struct vtnet_softc *sc) { int i; if (sc->vtnet_rxqs != NULL) { for (i = 0; i < sc->vtnet_max_vq_pairs; i++) vtnet_destroy_rxq(&sc->vtnet_rxqs[i]); free(sc->vtnet_rxqs, M_DEVBUF); sc->vtnet_rxqs = NULL; } if (sc->vtnet_txqs != NULL) { for (i = 0; i < sc->vtnet_max_vq_pairs; i++) vtnet_destroy_txq(&sc->vtnet_txqs[i]); free(sc->vtnet_txqs, M_DEVBUF); sc->vtnet_txqs = NULL; } } static int vtnet_alloc_rx_filters(struct vtnet_softc *sc) { if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter), M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vtnet_mac_filter == NULL) return (ENOMEM); } if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) * VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vtnet_vlan_filter == NULL) return (ENOMEM); } return (0); } static void vtnet_free_rx_filters(struct vtnet_softc *sc) { if (sc->vtnet_mac_filter != NULL) { free(sc->vtnet_mac_filter, M_DEVBUF); sc->vtnet_mac_filter = NULL; } if (sc->vtnet_vlan_filter != NULL) { free(sc->vtnet_vlan_filter, M_DEVBUF); sc->vtnet_vlan_filter = NULL; } } static int vtnet_alloc_virtqueues(struct vtnet_softc *sc) { device_t dev; struct vq_alloc_info *info; struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i, idx, flags, nvqs, error; dev = sc->vtnet_dev; flags = 0; nvqs = sc->vtnet_max_vq_pairs * 2; if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) nvqs++; info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT); if (info == NULL) return (ENOMEM); for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) { rxq = &sc->vtnet_rxqs[i]; VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id); txq = &sc->vtnet_txqs[i]; VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs, vtnet_tx_vq_intr, txq, &txq->vtntx_vq, "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id); } if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); } /* * Enable interrupt binding if this is multiqueue. This only matters * when per-vq MSIX is available. */ if (sc->vtnet_flags & VTNET_FLAG_MULTIQ) flags |= 0; error = virtio_alloc_virtqueues(dev, flags, nvqs, info); free(info, M_TEMP); return (error); } static void vtnet_setup_interface(struct vtnet_softc *sc) { struct if_attach_args ifat = { .ifat_version = IF_ATTACH_VERSION, .ifat_drv = &vtnet_ifdrv, .ifat_softc = sc, .ifat_baudrate = IF_Gbps(10), /* Approx. */ .ifat_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST, .ifat_capabilities = IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU, }; device_t dev; dev = sc->vtnet_dev; /* Read (or generate) the MAC address for the adapter. */ vtnet_get_hwaddr(sc); ifat.ifat_dunit = device_get_unit(dev); ifat.ifat_lla = sc->vtnet_hwaddr; if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) ifat.ifat_capabilities |= IFCAP_LINKSTATE; if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { ifat.ifat_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6; if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) { ifat.ifat_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; } else { if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) ifat.ifat_capabilities |= IFCAP_TSO4; if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) ifat.ifat_capabilities |= IFCAP_TSO6; if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; } if (ifat.ifat_capabilities & IFCAP_TSO) ifat.ifat_capabilities |= IFCAP_VLAN_HWTSO; } if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) { ifat.ifat_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) ifat.ifat_capabilities |= IFCAP_LRO; } if (ifat.ifat_capabilities & IFCAP_HWCSUM) { /* * VirtIO does not support VLAN tagging, but we can fake * it by inserting and removing the 802.1Q header during * transmit and receive. We are then able to do checksum * offloading of VLAN frames. */ ifat.ifat_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; } ifat.ifat_capenable = ifat.ifat_capabilities; /* * Capabilities after here are not enabled by default. */ if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { ifat.ifat_capabilities |= IFCAP_VLAN_HWFILTER; sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST); sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); } vtnet_set_rx_process_limit(sc); vtnet_set_tx_intr_threshold(sc); ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd, vtnet_ifmedia_sts); ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL); ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE); sc->vtnet_ifp = if_attach(&ifat); } static int vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu) { if_t ifp; int frame_size, clsize; ifp = sc->vtnet_ifp; if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU) return (EINVAL); frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) + new_mtu; /* * Based on the new MTU (and hence frame size) determine which * cluster size is most appropriate for the receive queues. */ if (frame_size <= MCLBYTES) { clsize = MCLBYTES; } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { /* Avoid going past 9K jumbos. */ if (frame_size > MJUM9BYTES) return (EINVAL); clsize = MJUM9BYTES; } else clsize = MJUMPAGESIZE; sc->vtnet_rx_new_clsize = clsize; if (sc->vtnet_flags & VTNET_FLAG_RUNNING) { sc->vtnet_flags &= ~VTNET_FLAG_RUNNING; vtnet_init(sc); } return (0); } static int vtnet_ioctl(if_t ifp, u_long cmd, void *data, struct thread *td) { struct vtnet_softc *sc; struct ifreq *ifr; int oflags, error; sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); ifr = (struct ifreq *) data; error = 0; switch (cmd) { case SIOCSIFMTU: VTNET_CORE_LOCK(sc); error = vtnet_change_mtu(sc, ifr->ifr_mtu); VTNET_CORE_UNLOCK(sc); break; case SIOCSIFFLAGS: if ((ifr->ifr_flags & (IFF_PROMISC | IFF_ALLMULTI)) && (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) { error = EINVAL; break; } VTNET_CORE_LOCK(sc); oflags = sc->vtnet_if_flags; sc->vtnet_if_flags = ifr->ifr_flags; if ((sc->vtnet_if_flags & IFF_UP) == 0) { if (sc->vtnet_flags & VTNET_FLAG_RUNNING) vtnet_stop(sc); } else if (sc->vtnet_flags & VTNET_FLAG_RUNNING) { if ((oflags ^ sc->vtnet_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) vtnet_rx_filter(sc); } else vtnet_init(sc); VTNET_CORE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) break; VTNET_CORE_LOCK(sc); if (sc->vtnet_flags & VTNET_FLAG_RUNNING) vtnet_rx_filter_mac(sc); VTNET_CORE_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd); break; case SIOCSIFCAP: sc->vtnet_capenable = ifr->ifr_reqcap; /* These Rx features require us to renegotiate. */ if ((ifr->ifr_reqcap ^ ifr->ifr_curcap) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO | IFCAP_VLAN_HWFILTER) && (sc->vtnet_flags & VTNET_FLAG_RUNNING)) { VTNET_CORE_LOCK(sc); sc->vtnet_flags &= ~VTNET_FLAG_RUNNING; vtnet_init(sc); VTNET_CORE_UNLOCK(sc); } ifr->ifr_hwassist = 0; if (ifr->ifr_reqcap & IFCAP_TXCSUM) ifr->ifr_hwassist |= VTNET_CSUM_OFFLOAD; if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) ifr->ifr_hwassist |= VTNET_CSUM_OFFLOAD_IPV6; if (ifr->ifr_reqcap & IFCAP_TSO4) ifr->ifr_hwassist |= CSUM_TSO; if (ifr->ifr_reqcap & IFCAP_TSO6) ifr->ifr_hwassist |= CSUM_IP6_TSO; break; default: error = EOPNOTSUPP; break; } VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc); return (error); } static int vtnet_rxq_populate(struct vtnet_rxq *rxq) { struct virtqueue *vq; int nbufs, error; vq = rxq->vtnrx_vq; error = ENOSPC; for (nbufs = 0; !virtqueue_full(vq); nbufs++) { error = vtnet_rxq_new_buf(rxq); if (error) break; } if (nbufs > 0) { virtqueue_notify(vq); /* * EMSGSIZE signifies the virtqueue did not have enough * entries available to hold the last mbuf. This is not * an error. */ if (error == EMSGSIZE) error = 0; } return (error); } static void vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq) { struct virtqueue *vq; struct mbuf *m; int last; vq = rxq->vtnrx_vq; last = 0; while ((m = virtqueue_drain(vq, &last)) != NULL) m_freem(m); KASSERT(virtqueue_empty(vq), ("%s: mbufs remaining in rx queue %p", __func__, rxq)); } static struct mbuf * vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) { struct mbuf *m_head, *m_tail, *m; int i, clsize; clsize = sc->vtnet_rx_clsize; KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs)); m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize); if (m_head == NULL) goto fail; m_head->m_len = clsize; m_tail = m_head; /* Allocate the rest of the chain. */ for (i = 1; i < nbufs; i++) { m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize); if (m == NULL) goto fail; m->m_len = clsize; m_tail->m_next = m; m_tail = m; } if (m_tailp != NULL) *m_tailp = m_tail; return (m_head); fail: sc->vtnet_stats.mbuf_alloc_failed++; m_freem(m_head); return (NULL); } /* * Slow path for when LRO without mergeable buffers is negotiated. */ static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0, int len0) { struct vtnet_softc *sc; struct mbuf *m, *m_prev; struct mbuf *m_new, *m_tail; int len, clsize, nreplace, error; sc = rxq->vtnrx_sc; clsize = sc->vtnet_rx_clsize; m_prev = NULL; m_tail = NULL; nreplace = 0; m = m0; len = len0; /* * Since these mbuf chains are so large, we avoid allocating an * entire replacement chain if possible. When the received frame * did not consume the entire chain, the unused mbufs are moved * to the replacement chain. */ while (len > 0) { /* * Something is seriously wrong if we received a frame * larger than the chain. Drop it. */ if (m == NULL) { sc->vtnet_stats.rx_frame_too_large++; return (EMSGSIZE); } /* We always allocate the same cluster size. */ KASSERT(m->m_len == clsize, ("%s: mbuf size %d is not the cluster size %d", __func__, m->m_len, clsize)); m->m_len = MIN(m->m_len, len); len -= m->m_len; m_prev = m; m = m->m_next; nreplace++; } KASSERT(nreplace <= sc->vtnet_rx_nmbufs, ("%s: too many replacement mbufs %d max %d", __func__, nreplace, sc->vtnet_rx_nmbufs)); m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); if (m_new == NULL) { m_prev->m_len = clsize; return (ENOBUFS); } /* * Move any unused mbufs from the received chain onto the end * of the new chain. */ if (m_prev->m_next != NULL) { m_tail->m_next = m_prev->m_next; m_prev->m_next = NULL; } error = vtnet_rxq_enqueue_buf(rxq, m_new); if (error) { /* * BAD! We could not enqueue the replacement mbuf chain. We * must restore the m0 chain to the original state if it was * modified so we can subsequently discard it. * * NOTE: The replacement is suppose to be an identical copy * to the one just dequeued so this is an unexpected error. */ sc->vtnet_stats.rx_enq_replacement_failed++; if (m_tail->m_next != NULL) { m_prev->m_next = m_tail->m_next; m_tail->m_next = NULL; } m_prev->m_len = clsize; m_freem(m_new); } return (error); } static int vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) { struct vtnet_softc *sc; struct mbuf *m_new; int error; sc = rxq->vtnrx_sc; KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, ("%s: chained mbuf without LRO_NOMRG", __func__)); if (m->m_next == NULL) { /* Fast-path for the common case of just one mbuf. */ if (m->m_len < len) return (EINVAL); m_new = vtnet_rx_alloc_buf(sc, 1, NULL); if (m_new == NULL) return (ENOBUFS); error = vtnet_rxq_enqueue_buf(rxq, m_new); if (error) { /* * The new mbuf is suppose to be an identical * copy of the one just dequeued so this is an * unexpected error. */ m_freem(m_new); sc->vtnet_stats.rx_enq_replacement_failed++; } else m->m_len = len; } else error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len); return (error); } static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) { struct vtnet_softc *sc; struct sglist *sg; struct vtnet_rx_header *rxhdr; uint8_t *mdata; int offset, error; sc = rxq->vtnrx_sc; sg = rxq->vtnrx_sg; mdata = mtod(m, uint8_t *); VTNET_RXQ_LOCK_ASSERT(rxq); KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, ("%s: chained mbuf without LRO_NOMRG", __func__)); KASSERT(m->m_len == sc->vtnet_rx_clsize, ("%s: unexpected cluster size %d/%d", __func__, m->m_len, sc->vtnet_rx_clsize)); sglist_reset(sg); if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); rxhdr = (struct vtnet_rx_header *) mdata; sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); offset = sizeof(struct vtnet_rx_header); } else offset = 0; sglist_append(sg, mdata + offset, m->m_len - offset); if (m->m_next != NULL) { error = sglist_append_mbuf(sg, m->m_next); MPASS(error == 0); } error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg); return (error); } static int vtnet_rxq_new_buf(struct vtnet_rxq *rxq) { struct vtnet_softc *sc; struct mbuf *m; int error; sc = rxq->vtnrx_sc; m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL); if (m == NULL) return (ENOBUFS); error = vtnet_rxq_enqueue_buf(rxq, m); if (error) m_freem(m); return (error); } /* * Use the checksum offset in the VirtIO header to set the * correct CSUM_* flags. */ static int vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; #if defined(INET) || defined(INET6) int offset = hdr->csum_start + hdr->csum_offset; #endif sc = rxq->vtnrx_sc; /* Only do a basic sanity check on the offset. */ switch (eth_type) { #if defined(INET) case ETHERTYPE_IP: if (__predict_false(offset < ip_start + sizeof(struct ip))) return (1); break; #endif #if defined(INET6) case ETHERTYPE_IPV6: if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) return (1); break; #endif default: sc->vtnet_stats.rx_csum_bad_ethtype++; return (1); } /* * Use the offset to determine the appropriate CSUM_* flags. This is * a bit dirty, but we can get by with it since the checksum offsets * happen to be different. We assume the host host does not do IPv4 * header checksum offloading. */ switch (hdr->csum_offset) { case offsetof(struct udphdr, uh_sum): case offsetof(struct tcphdr, th_sum): m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; case offsetof(struct sctphdr, checksum): m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; break; default: sc->vtnet_stats.rx_csum_bad_offset++; return (1); } return (0); } static int vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; int offset, proto; sc = rxq->vtnrx_sc; switch (eth_type) { #if defined(INET) case ETHERTYPE_IP: { struct ip *ip; if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) return (1); ip = (struct ip *)(m->m_data + ip_start); proto = ip->ip_p; offset = ip_start + (ip->ip_hl << 2); break; } #endif #if defined(INET6) case ETHERTYPE_IPV6: if (__predict_false(m->m_len < ip_start + sizeof(struct ip6_hdr))) return (1); offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); if (__predict_false(offset < 0)) return (1); break; #endif default: sc->vtnet_stats.rx_csum_bad_ethtype++; return (1); } switch (proto) { case IPPROTO_TCP: if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) return (1); m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; case IPPROTO_UDP: if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) return (1); m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; case IPPROTO_SCTP: if (__predict_false(m->m_len < offset + sizeof(struct sctphdr))) return (1); m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; break; default: /* * For the remaining protocols, FreeBSD does not support * checksum offloading, so the checksum will be recomputed. */ #if 0 if_printf(sc->vtnet_ifp, "cksum offload of unsupported " "protocol eth_type=%#x proto=%d csum_start=%d " "csum_offset=%d\n", __func__, eth_type, proto, hdr->csum_start, hdr->csum_offset); #endif break; } return (0); } /* * Set the appropriate CSUM_* flags. Unfortunately, the information * provided is not directly useful to us. The VirtIO header gives the * offset of the checksum, which is all Linux needs, but this is not * how FreeBSD does things. We are forced to peek inside the packet * a bit. * * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD * could accept the offsets and let the stack figure it out. */ static int vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct ether_header *eh; struct ether_vlan_header *evh; uint16_t eth_type; int offset, error; eh = mtod(m, struct ether_header *); eth_type = ntohs(eh->ether_type); if (eth_type == ETHERTYPE_VLAN) { /* BMV: We should handle nested VLAN tags too. */ evh = mtod(m, struct ether_vlan_header *); eth_type = ntohs(evh->evl_proto); offset = sizeof(struct ether_vlan_header); } else offset = sizeof(struct ether_header); if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr); else error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr); return (error); } static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs) { struct mbuf *m; while (--nbufs > 0) { m = virtqueue_dequeue(rxq->vtnrx_vq, NULL); if (m == NULL) break; vtnet_rxq_discard_buf(rxq, m); } } static void vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m) { int error; /* * Requeue the discarded mbuf. This should always be successful * since it was just dequeued. */ error = vtnet_rxq_enqueue_buf(rxq, m); KASSERT(error == 0, ("%s: cannot requeue discarded mbuf %d", __func__, error)); } static int vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) { struct vtnet_softc *sc; struct virtqueue *vq; struct mbuf *m, *m_tail; int len; sc = rxq->vtnrx_sc; vq = rxq->vtnrx_vq; m_tail = m_head; while (--nbufs > 0) { m = virtqueue_dequeue(vq, &len); if (m == NULL) { rxq->vtnrx_stats.vrxs_ierrors++; goto fail; } if (vtnet_rxq_new_buf(rxq) != 0) { rxq->vtnrx_stats.vrxs_iqdrops++; vtnet_rxq_discard_buf(rxq, m); if (nbufs > 1) vtnet_rxq_discard_merged_bufs(rxq, nbufs); goto fail; } if (m->m_len < len) len = m->m_len; m->m_len = len; m->m_flags &= ~M_PKTHDR; m_head->m_pkthdr.len += len; m_tail->m_next = m; m_tail = m; } return (0); fail: sc->vtnet_stats.rx_mergeable_failed++; m_freem(m_head); return (1); } static void vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; if_t ifp; struct ether_header *eh; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; if (sc->vtnet_capenable & IFCAP_VLAN_HWTAGGING) { eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { vtnet_vlan_tag_remove(m); /* * With the 802.1Q header removed, update the * checksum starting location accordingly. */ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) hdr->csum_start -= ETHER_VLAN_ENCAP_LEN; } } m->m_pkthdr.flowid = rxq->vtnrx_id; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); /* * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum * distinction that Linux does. Need to reevaluate if performing * offloading for the NEEDS_CSUM case is really appropriate. */ if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) { if (vtnet_rxq_csum(rxq, m, hdr) == 0) rxq->vtnrx_stats.vrxs_csum++; else rxq->vtnrx_stats.vrxs_csum_failed++; } rxq->vtnrx_stats.vrxs_ipackets++; rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; VTNET_RXQ_UNLOCK(rxq); if_input(ifp, m); VTNET_RXQ_LOCK(rxq); } static int vtnet_rxq_eof(struct vtnet_rxq *rxq) { struct virtio_net_hdr lhdr, *hdr; struct vtnet_softc *sc; if_t ifp; struct virtqueue *vq; struct mbuf *m; struct virtio_net_hdr_mrg_rxbuf *mhdr; int len, deq, nbufs, adjsz, count; sc = rxq->vtnrx_sc; vq = rxq->vtnrx_vq; ifp = sc->vtnet_ifp; hdr = &lhdr; deq = 0; count = sc->vtnet_rx_process_limit; VTNET_RXQ_LOCK_ASSERT(rxq); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, 0, &deq)) { return (FALSE); } #endif /* DEV_NETMAP */ while (count-- > 0) { m = virtqueue_dequeue(vq, &len); if (m == NULL) break; deq++; if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { rxq->vtnrx_stats.vrxs_ierrors++; vtnet_rxq_discard_buf(rxq, m); continue; } if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { nbufs = 1; adjsz = sizeof(struct vtnet_rx_header); /* * Account for our pad inserted between the header * and the actual start of the frame. */ len += VTNET_RX_HEADER_PAD; } else { mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *); nbufs = mhdr->num_buffers; adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); } if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { rxq->vtnrx_stats.vrxs_iqdrops++; vtnet_rxq_discard_buf(rxq, m); if (nbufs > 1) vtnet_rxq_discard_merged_bufs(rxq, nbufs); continue; } m->m_pkthdr.len = len; m->m_pkthdr.rcvif = ifp; m->m_pkthdr.csum_flags = 0; if (nbufs > 1) { /* Dequeue the rest of chain. */ if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0) continue; } /* * Save copy of header before we strip it. For both mergeable * and non-mergeable, the header is at the beginning of the * mbuf data. We no longer need num_buffers, so always use a * regular header. * * BMV: Is this memcpy() expensive? We know the mbuf data is * still valid even after the m_adj(). */ memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr)); m_adj(m, adjsz); vtnet_rxq_input(rxq, m, hdr); /* Must recheck after dropping the Rx lock. */ if ((sc->vtnet_flags & VTNET_FLAG_RUNNING) == 0) break; } if (deq > 0) virtqueue_notify(vq); return (count > 0 ? 0 : EAGAIN); } static void vtnet_rx_vq_intr(void *xrxq) { struct vtnet_softc *sc; struct vtnet_rxq *rxq; if_t ifp; int tries, more; rxq = xrxq; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; tries = 0; if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) { /* * Ignore this interrupt. Either this is a spurious interrupt * or multiqueue without per-VQ MSIX so every queue needs to * be polled (a brain dead configuration we could try harder * to avoid). */ vtnet_rxq_disable_intr(rxq); return; } VTNET_RXQ_LOCK(rxq); again: if ((sc->vtnet_flags & VTNET_FLAG_RUNNING) == 0) { VTNET_RXQ_UNLOCK(rxq); return; } more = vtnet_rxq_eof(rxq); if (more || vtnet_rxq_enable_intr(rxq) != 0) { if (!more) vtnet_rxq_disable_intr(rxq); /* * This is an occasional condition or race (when !more), * so retry a few times before scheduling the taskqueue. */ if (tries++ < VTNET_INTR_DISABLE_RETRIES) goto again; VTNET_RXQ_UNLOCK(rxq); rxq->vtnrx_stats.vrxs_rescheduled++; taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); } else VTNET_RXQ_UNLOCK(rxq); } static void vtnet_rxq_tq_intr(void *xrxq, int pending) { struct vtnet_softc *sc; struct vtnet_rxq *rxq; if_t ifp; int more; rxq = xrxq; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; VTNET_RXQ_LOCK(rxq); if ((sc->vtnet_flags & VTNET_FLAG_RUNNING) == 0) { VTNET_RXQ_UNLOCK(rxq); return; } more = vtnet_rxq_eof(rxq); if (more || vtnet_rxq_enable_intr(rxq) != 0) { if (!more) vtnet_rxq_disable_intr(rxq); rxq->vtnrx_stats.vrxs_rescheduled++; taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); } VTNET_RXQ_UNLOCK(rxq); } static int vtnet_txq_below_threshold(struct vtnet_txq *txq) { struct vtnet_softc *sc; struct virtqueue *vq; sc = txq->vtntx_sc; vq = txq->vtntx_vq; return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh); } static int vtnet_txq_notify(struct vtnet_txq *txq) { struct virtqueue *vq; vq = txq->vtntx_vq; txq->vtntx_watchdog = VTNET_TX_TIMEOUT; virtqueue_notify(vq); if (vtnet_txq_enable_intr(txq) == 0) return (0); /* * Drain frames that were completed since last checked. If this * causes the queue to go above the threshold, the caller should * continue transmitting. */ if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) { virtqueue_disable_intr(vq); return (1); } return (0); } static void vtnet_txq_free_mbufs(struct vtnet_txq *txq) { struct virtqueue *vq; struct vtnet_tx_header *txhdr; int last; vq = txq->vtntx_vq; last = 0; while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { m_freem(txhdr->vth_mbuf); uma_zfree(vtnet_tx_header_zone, txhdr); } KASSERT(virtqueue_empty(vq), ("%s: mbufs remaining in tx queue %p", __func__, txq)); } /* * BMV: Much of this can go away once we finally have offsets in * the mbuf packet header. Bug andre@. */ static int vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype, int *proto, int *start) { struct vtnet_softc *sc; struct ether_vlan_header *evh; int offset; sc = txq->vtntx_sc; evh = mtod(m, struct ether_vlan_header *); if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { /* BMV: We should handle nested VLAN tags too. */ *etype = ntohs(evh->evl_proto); offset = sizeof(struct ether_vlan_header); } else { *etype = ntohs(evh->evl_encap_proto); offset = sizeof(struct ether_header); } switch (*etype) { #if defined(INET) case ETHERTYPE_IP: { struct ip *ip, iphdr; if (__predict_false(m->m_len < offset + sizeof(struct ip))) { m_copydata(m, offset, sizeof(struct ip), (caddr_t) &iphdr); ip = &iphdr; } else ip = (struct ip *)(m->m_data + offset); *proto = ip->ip_p; *start = offset + (ip->ip_hl << 2); break; } #endif #if defined(INET6) case ETHERTYPE_IPV6: *proto = -1; *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); /* Assert the network stack sent us a valid packet. */ KASSERT(*start > offset, ("%s: mbuf %p start %d offset %d proto %d", __func__, m, *start, offset, *proto)); break; #endif default: sc->vtnet_stats.tx_csum_bad_ethtype++; return (EINVAL); } return (0); } static int vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, int offset, struct virtio_net_hdr *hdr) { static struct timeval lastecn; static int curecn; struct vtnet_softc *sc; struct tcphdr *tcp, tcphdr; sc = txq->vtntx_sc; if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); tcp = &tcphdr; } else tcp = (struct tcphdr *)(m->m_data + offset); hdr->hdr_len = offset + (tcp->th_off << 2); hdr->gso_size = m->m_pkthdr.tso_segsz; hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : VIRTIO_NET_HDR_GSO_TCPV6; if (tcp->th_flags & TH_CWR) { /* * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, * ECN support is not on a per-interface basis, but globally via * the net.inet.tcp.ecn.enable sysctl knob. The default is off. */ if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { if (ppsratecheck(&lastecn, &curecn, 1)) if_printf(sc->vtnet_ifp, "TSO with ECN not negotiated with host\n"); return (ENOTSUP); } hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; } txq->vtntx_stats.vtxs_tso++; return (0); } static struct mbuf * vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; int flags, etype, csum_start, proto, error; sc = txq->vtntx_sc; flags = m->m_pkthdr.csum_flags; error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start); if (error) goto drop; if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) || (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) { /* * We could compare the IP protocol vs the CSUM_ flag too, * but that really should not be necessary. */ hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; hdr->csum_start = csum_start; hdr->csum_offset = m->m_pkthdr.csum_data; txq->vtntx_stats.vtxs_csum++; } if (flags & CSUM_TSO) { if (__predict_false(proto != IPPROTO_TCP)) { /* Likely failed to correctly parse the mbuf. */ sc->vtnet_stats.tx_tso_not_tcp++; goto drop; } KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, ("%s: mbuf %p TSO without checksum offload %#x", __func__, m, flags)); error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); if (error) goto drop; } return (m); drop: m_freem(m); return (NULL); } static int vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, struct vtnet_tx_header *txhdr) { struct vtnet_softc *sc; struct virtqueue *vq; struct sglist *sg; struct mbuf *m; int error; sc = txq->vtntx_sc; vq = txq->vtntx_vq; sg = txq->vtntx_sg; m = *m_head; sglist_reset(sg); error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); KASSERT(error == 0 && sg->sg_nseg == 1, ("%s: error %d adding header to sglist", __func__, error)); error = sglist_append_mbuf(sg, m); if (error) { m = m_defrag(m, M_NOWAIT); if (m == NULL) goto fail; *m_head = m; sc->vtnet_stats.tx_defragged++; error = sglist_append_mbuf(sg, m); if (error) goto fail; } txhdr->vth_mbuf = m; error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); return (error); fail: sc->vtnet_stats.tx_defrag_failed++; m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } static int vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head) { struct vtnet_tx_header *txhdr; struct virtio_net_hdr *hdr; struct mbuf *m; int error; m = *m_head; M_ASSERTPKTHDR(m); txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO); if (txhdr == NULL) { m_freem(m); *m_head = NULL; return (ENOMEM); } /* * Always use the non-mergeable header, regardless if the feature * was negotiated. For transmit, num_buffers is always zero. The * vtnet_hdr_size is used to enqueue the correct header size. */ hdr = &txhdr->vth_uhdr.hdr; if (m->m_flags & M_VLANTAG) { m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); if ((*m_head = m) == NULL) { error = ENOBUFS; goto fail; } m->m_flags &= ~M_VLANTAG; } if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) { m = vtnet_txq_offload(txq, m, hdr); if ((*m_head = m) == NULL) { error = ENOBUFS; goto fail; } } error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); if (error == 0) return (0); fail: uma_zfree(vtnet_tx_header_zone, txhdr); return (error); } static int vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) { struct vtnet_softc *sc; struct virtqueue *vq; struct buf_ring *br; if_t ifp; int enq, tries, error; sc = txq->vtntx_sc; vq = txq->vtntx_vq; br = txq->vtntx_br; ifp = sc->vtnet_ifp; tries = 0; error = 0; VTNET_TXQ_LOCK_ASSERT(txq); if ((sc->vtnet_flags & VTNET_FLAG_RUNNING) == 0 || sc->vtnet_link_active == 0) { if (m != NULL) error = buf_ring_enqueue(br, m); return (error); } if (m != NULL) { error = buf_ring_enqueue(br, m); if (error) return (error); } vtnet_txq_eof(txq); again: enq = 0; while ((m = buf_ring_peek(br)) != NULL) { if (virtqueue_full(vq)) { buf_ring_putback_sc(br, m); break; } if (vtnet_txq_encap(txq, &m) != 0) { if (m != NULL) buf_ring_putback_sc(br, m); else buf_ring_advance_sc(br); break; } buf_ring_advance_sc(br); enq++; if_mtap(ifp, m, NULL, 0); } if (enq > 0 && vtnet_txq_notify(txq) != 0) { if (tries++ < VTNET_NOTIFY_RETRIES) goto again; txq->vtntx_stats.vtxs_rescheduled++; taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); } return (0); } static int vtnet_txq_mq_start(if_t ifp, struct mbuf *m) { struct vtnet_softc *sc; struct vtnet_txq *txq; int i, npairs, error; sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); npairs = sc->vtnet_act_vq_pairs; /* check if flowid is set */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % npairs; else i = curcpu % npairs; txq = &sc->vtnet_txqs[i]; if (VTNET_TXQ_TRYLOCK(txq) != 0) { error = vtnet_txq_mq_start_locked(txq, m); VTNET_TXQ_UNLOCK(txq); } else { error = buf_ring_enqueue(txq->vtntx_br, m); taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask); } return (error); } static void vtnet_txq_tq_deferred(void *xtxq, int pending) { struct vtnet_softc *sc; struct vtnet_txq *txq; txq = xtxq; sc = txq->vtntx_sc; VTNET_TXQ_LOCK(txq); if (!buf_ring_empty(txq->vtntx_br)) vtnet_txq_mq_start_locked(txq, NULL); VTNET_TXQ_UNLOCK(txq); } static void vtnet_txq_start(struct vtnet_txq *txq) { struct vtnet_softc *sc; sc = txq->vtntx_sc; if (!buf_ring_empty(txq->vtntx_br)) vtnet_txq_mq_start_locked(txq, NULL); } static void vtnet_txq_tq_intr(void *xtxq, int pending) { struct vtnet_softc *sc; struct vtnet_txq *txq; if_t ifp; txq = xtxq; sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; VTNET_TXQ_LOCK(txq); if ((sc->vtnet_flags & VTNET_FLAG_RUNNING) == 0) { VTNET_TXQ_UNLOCK(txq); return; } vtnet_txq_eof(txq); vtnet_txq_start(txq); VTNET_TXQ_UNLOCK(txq); } static int vtnet_txq_eof(struct vtnet_txq *txq) { struct virtqueue *vq; struct vtnet_tx_header *txhdr; struct mbuf *m; int deq; vq = txq->vtntx_vq; deq = 0; VTNET_TXQ_LOCK_ASSERT(txq); #ifdef DEV_NETMAP if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) { virtqueue_disable_intr(vq); // XXX luigi return 0; // XXX or 1 ? } #endif /* DEV_NETMAP */ while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { m = txhdr->vth_mbuf; deq++; txq->vtntx_stats.vtxs_opackets++; txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len; if (m->m_flags & M_MCAST) txq->vtntx_stats.vtxs_omcasts++; m_freem(m); uma_zfree(vtnet_tx_header_zone, txhdr); } if (virtqueue_empty(vq)) txq->vtntx_watchdog = 0; return (deq); } static void vtnet_tx_vq_intr(void *xtxq) { struct vtnet_softc *sc; struct vtnet_txq *txq; if_t ifp; txq = xtxq; sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) { /* * Ignore this interrupt. Either this is a spurious interrupt * or multiqueue without per-VQ MSIX so every queue needs to * be polled (a brain dead configuration we could try harder * to avoid). */ vtnet_txq_disable_intr(txq); return; } VTNET_TXQ_LOCK(txq); if ((sc->vtnet_flags & VTNET_FLAG_RUNNING) == 0) { VTNET_TXQ_UNLOCK(txq); return; } vtnet_txq_eof(txq); vtnet_txq_start(txq); VTNET_TXQ_UNLOCK(txq); } static void vtnet_tx_start_all(struct vtnet_softc *sc) { struct vtnet_txq *txq; int i; VTNET_CORE_LOCK_ASSERT(sc); for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; VTNET_TXQ_LOCK(txq); vtnet_txq_start(txq); VTNET_TXQ_UNLOCK(txq); } } static void vtnet_qflush(if_t ifp) { struct vtnet_softc *sc; struct vtnet_txq *txq; struct mbuf *m; int i; sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; VTNET_TXQ_LOCK(txq); while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL) m_freem(m); VTNET_TXQ_UNLOCK(txq); } } static int vtnet_watchdog(struct vtnet_txq *txq) { if_t ifp; ifp = txq->vtntx_sc->vtnet_ifp; VTNET_TXQ_LOCK(txq); if (txq->vtntx_watchdog == 1) { /* * Only drain completed frames if the watchdog is about to * expire. If any frames were drained, there may be enough * free descriptors now available to transmit queued frames. * In that case, the timer will immediately be decremented * below, but the timeout is generous enough that should not * be a problem. */ if (vtnet_txq_eof(txq) != 0) vtnet_txq_start(txq); } if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) { VTNET_TXQ_UNLOCK(txq); return (0); } VTNET_TXQ_UNLOCK(txq); if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id); return (1); } static void vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc, struct vtnet_txq_stats *txacc) { bzero(rxacc, sizeof(struct vtnet_rxq_stats)); bzero(txacc, sizeof(struct vtnet_txq_stats)); for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) { struct vtnet_rxq_stats *rxst; struct vtnet_txq_stats *txst; rxst = &sc->vtnet_rxqs[i].vtnrx_stats; rxacc->vrxs_ipackets += rxst->vrxs_ipackets; rxacc->vrxs_ibytes += rxst->vrxs_ibytes; rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops; rxacc->vrxs_csum += rxst->vrxs_csum; rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed; rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled; txst = &sc->vtnet_txqs[i].vtntx_stats; txacc->vtxs_opackets += txst->vtxs_opackets; txacc->vtxs_obytes += txst->vtxs_obytes; txacc->vtxs_csum += txst->vtxs_csum; txacc->vtxs_tso += txst->vtxs_tso; txacc->vtxs_rescheduled += txst->vtxs_rescheduled; } } static uint64_t vtnet_get_counter(if_t ifp, ift_counter cnt) { struct vtnet_softc *sc; struct vtnet_rxq_stats rxaccum; struct vtnet_txq_stats txaccum; sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); vtnet_accum_stats(sc, &rxaccum, &txaccum); switch (cnt) { case IFCOUNTER_IPACKETS: return (rxaccum.vrxs_ipackets); case IFCOUNTER_IQDROPS: return (rxaccum.vrxs_iqdrops); case IFCOUNTER_IERRORS: return (rxaccum.vrxs_ierrors); case IFCOUNTER_OPACKETS: return (txaccum.vtxs_opackets); case IFCOUNTER_OBYTES: return (txaccum.vtxs_obytes); case IFCOUNTER_OMCASTS: return (txaccum.vtxs_omcasts); default: return (if_get_counter_default(ifp, cnt)); } } static void vtnet_tick(void *xsc) { struct vtnet_softc *sc; if_t ifp; int i, timedout; sc = xsc; ifp = sc->vtnet_ifp; timedout = 0; VTNET_CORE_LOCK_ASSERT(sc); for (i = 0; i < sc->vtnet_act_vq_pairs; i++) timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]); if (timedout != 0) { sc->vtnet_flags &= ~VTNET_FLAG_RUNNING; vtnet_init(sc); } else callout_schedule(&sc->vtnet_tick_ch, hz); } static void vtnet_start_taskqueues(struct vtnet_softc *sc) { device_t dev; struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i, error; dev = sc->vtnet_dev; /* * Errors here are very difficult to recover from - we cannot * easily fail because, if this is during boot, we will hang * when freeing any successfully started taskqueues because * the scheduler isn't up yet. * * Most drivers just ignore the return value - it only fails * with ENOMEM so an error is not likely. */ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); if (error) { device_printf(dev, "failed to start rx taskq %d\n", rxq->vtnrx_id); } txq = &sc->vtnet_txqs[i]; error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET, "%s txq %d", device_get_nameunit(dev), txq->vtntx_id); if (error) { device_printf(dev, "failed to start tx taskq %d\n", txq->vtntx_id); } } } static void vtnet_free_taskqueues(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; if (rxq->vtnrx_tq != NULL) { taskqueue_free(rxq->vtnrx_tq); rxq->vtnrx_vq = NULL; } txq = &sc->vtnet_txqs[i]; if (txq->vtntx_tq != NULL) { taskqueue_free(txq->vtntx_tq); txq->vtntx_tq = NULL; } } } static void vtnet_drain_taskqueues(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; if (rxq->vtnrx_tq != NULL) taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); txq = &sc->vtnet_txqs[i]; if (txq->vtntx_tq != NULL) { taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask); taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask); } } } static void vtnet_drain_rxtx_queues(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; vtnet_rxq_free_mbufs(rxq); txq = &sc->vtnet_txqs[i]; vtnet_txq_free_mbufs(txq); } } static void vtnet_stop_rendezvous(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; /* * Lock and unlock the per-queue mutex so we known the stop * state is visible. Doing only the active queues should be * sufficient, but it does not cost much extra to do all the * queues. Note we hold the core mutex here too. */ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; VTNET_RXQ_LOCK(rxq); VTNET_RXQ_UNLOCK(rxq); txq = &sc->vtnet_txqs[i]; VTNET_TXQ_LOCK(txq); VTNET_TXQ_UNLOCK(txq); } } static void vtnet_stop(struct vtnet_softc *sc) { device_t dev; if_t ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); sc->vtnet_flags &= ~VTNET_FLAG_RUNNING; sc->vtnet_link_active = 0; callout_stop(&sc->vtnet_tick_ch); /* Only advisory. */ vtnet_disable_interrupts(sc); /* * Stop the host adapter. This resets it to the pre-initialized * state. It will not generate any interrupts until after it is * reinitialized. */ virtio_stop(dev); vtnet_stop_rendezvous(sc); /* Free any mbufs left in the virtqueues. */ vtnet_drain_rxtx_queues(sc); } static int vtnet_virtio_reinit(struct vtnet_softc *sc) { device_t dev; if_t ifp; uint64_t features; uint32_t mask; int error; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; features = sc->vtnet_features; mask = 0; #if defined(INET) mask |= IFCAP_RXCSUM; #endif #if defined (INET6) mask |= IFCAP_RXCSUM_IPV6; #endif /* * Re-negotiate with the host, removing any disabled receive * features. Transmit features are disabled only on our side * via if_capenable and if_hwassist. * * We require both IPv4 and IPv6 offloading to be enabled * in order to negotiated it: VirtIO does not distinguish * between the two. */ if ((sc->vtnet_capenable & mask) != mask) features &= ~VIRTIO_NET_F_GUEST_CSUM; if ((sc->vtnet_capenable & IFCAP_LRO) == 0) features &= ~VTNET_LRO_FEATURES; if ((sc->vtnet_capenable & IFCAP_VLAN_HWFILTER) == 0) features &= ~VIRTIO_NET_F_CTRL_VLAN; error = virtio_reinit(dev, features); if (error) device_printf(dev, "virtio reinit error %d\n", error); return (error); } static void vtnet_init_rx_filters(struct vtnet_softc *sc) { if_t ifp; ifp = sc->vtnet_ifp; if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { /* Restore promiscuous and all-multicast modes. */ vtnet_rx_filter(sc); /* Restore filtered MAC addresses. */ vtnet_rx_filter_mac(sc); } if (sc->vtnet_capenable & IFCAP_VLAN_HWFILTER) vtnet_rx_filter_vlan(sc); } static int vtnet_init_rx_queues(struct vtnet_softc *sc) { device_t dev; struct vtnet_rxq *rxq; int i, clsize, error; dev = sc->vtnet_dev; /* * Use the new cluster size if one has been set (via a MTU * change). Otherwise, use the standard 2K clusters. * * BMV: It might make sense to use page sized clusters as * the default (depending on the features negotiated). */ if (sc->vtnet_rx_new_clsize != 0) { clsize = sc->vtnet_rx_new_clsize; sc->vtnet_rx_new_clsize = 0; } else clsize = MCLBYTES; sc->vtnet_rx_clsize = clsize; sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize); KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS || sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, ("%s: too many rx mbufs %d for %d segments", __func__, sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); #ifdef DEV_NETMAP if (vtnet_netmap_init_rx_buffers(sc)) return 0; #endif /* DEV_NETMAP */ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; /* Hold the lock to satisfy asserts. */ VTNET_RXQ_LOCK(rxq); error = vtnet_rxq_populate(rxq); VTNET_RXQ_UNLOCK(rxq); if (error) { device_printf(dev, "cannot allocate mbufs for Rx queue %d\n", i); return (error); } } return (0); } static int vtnet_init_tx_queues(struct vtnet_softc *sc) { struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; txq->vtntx_watchdog = 0; } return (0); } static int vtnet_init_rxtx_queues(struct vtnet_softc *sc) { int error; error = vtnet_init_rx_queues(sc); if (error) return (error); error = vtnet_init_tx_queues(sc); if (error) return (error); return (0); } static void vtnet_set_active_vq_pairs(struct vtnet_softc *sc) { device_t dev; int npairs; dev = sc->vtnet_dev; if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) { MPASS(sc->vtnet_max_vq_pairs == 1); sc->vtnet_act_vq_pairs = 1; return; } /* BMV: Just use the maximum configured for now. */ npairs = sc->vtnet_max_vq_pairs; if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { device_printf(dev, "cannot set active queue pairs to %d\n", npairs); npairs = 1; } sc->vtnet_act_vq_pairs = npairs; } static int vtnet_reinit(struct vtnet_softc *sc) { if_t ifp; int error; ifp = sc->vtnet_ifp; /* Use the current MAC address. */ bcopy(if_lladdr(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); vtnet_set_hwaddr(sc); vtnet_set_active_vq_pairs(sc); if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) vtnet_init_rx_filters(sc); error = vtnet_init_rxtx_queues(sc); if (error) return (error); vtnet_enable_interrupts(sc); sc->vtnet_flags |= VTNET_FLAG_RUNNING; return (0); } static void vtnet_init(struct vtnet_softc *sc) { device_t dev; if_t ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); if (sc->vtnet_flags & VTNET_FLAG_RUNNING) return; vtnet_stop(sc); /* Reinitialize with the host. */ if (vtnet_virtio_reinit(sc) != 0) goto fail; if (vtnet_reinit(sc) != 0) goto fail; virtio_reinit_complete(dev); vtnet_update_link_status(sc); callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); return; fail: vtnet_stop(sc); } static void vtnet_free_ctrl_vq(struct vtnet_softc *sc) { struct virtqueue *vq; vq = sc->vtnet_ctrl_vq; /* * The control virtqueue is only polled and therefore it should * already be empty. */ KASSERT(virtqueue_empty(vq), ("%s: ctrl vq %p not empty", __func__, vq)); } static void vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie, struct sglist *sg, int readable, int writable) { struct virtqueue *vq; vq = sc->vtnet_ctrl_vq; VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ, ("%s: CTRL_VQ feature not negotiated", __func__)); if (!virtqueue_empty(vq)) return; if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0) return; /* * Poll for the response, but the command is likely already * done when we return from the notify. */ virtqueue_notify(vq); virtqueue_poll(vq, NULL); } static int vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) { struct virtio_net_ctrl_hdr hdr __aligned(2); struct sglist_seg segs[3]; struct sglist sg; uint8_t ack; int error; hdr.class = VIRTIO_NET_CTRL_MAC; hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN); error |= sglist_append(&sg, &ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding set MAC msg to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); return (ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs) { struct sglist_seg segs[3]; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr; uint8_t pad1; struct virtio_net_ctrl_mq mq; uint8_t pad2; uint8_t ack; } s __aligned(2); int error; s.hdr.class = VIRTIO_NET_CTRL_MQ; s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; s.mq.virtqueue_pairs = npairs; s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding MQ message to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on) { struct sglist_seg segs[3]; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr; uint8_t pad1; uint8_t onoff; uint8_t pad2; uint8_t ack; } s __aligned(2); int error; KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, ("%s: CTRL_RX feature not negotiated", __func__)); s.hdr.class = VIRTIO_NET_CTRL_RX; s.hdr.cmd = cmd; s.onoff = !!on; s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding Rx message to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_set_promisc(struct vtnet_softc *sc, int on) { return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on)); } static int vtnet_set_allmulti(struct vtnet_softc *sc, int on) { return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); } /* * The device defaults to promiscuous mode for backwards compatibility. * Turn it off at attach time if possible. */ static void vtnet_attach_disable_promisc(struct vtnet_softc *sc) { struct ifreq ifr; if_t ifp; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK(sc); if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) { (void )if_drvioctl(ifp, SIOCGIFFLAGS, &ifr, curthread); ifr.ifr_flags |= IFF_PROMISC; (void )if_drvioctl(ifp, SIOCSIFFLAGS, &ifr, curthread); } else if (vtnet_set_promisc(sc, 0) != 0) { (void )if_drvioctl(ifp, SIOCGIFFLAGS, &ifr, curthread); ifr.ifr_flags |= IFF_PROMISC; (void )if_drvioctl(ifp, SIOCSIFFLAGS, &ifr, curthread); device_printf(sc->vtnet_dev, "cannot disable default promiscuous mode\n"); } VTNET_CORE_UNLOCK(sc); } static void vtnet_rx_filter(struct vtnet_softc *sc) { device_t dev; if_t ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); if (vtnet_set_promisc(sc, sc->vtnet_if_flags & IFF_PROMISC) != 0) device_printf(dev, "cannot %s promiscuous mode\n", sc->vtnet_if_flags & IFF_PROMISC ? "enable" : "disable"); if (vtnet_set_allmulti(sc, sc->vtnet_if_flags & IFF_ALLMULTI) != 0) device_printf(dev, "cannot %s all-multicast mode\n", sc->vtnet_if_flags & IFF_ALLMULTI ? "enable" : "disable"); } static void vtnet_copy_unicast_mac(void *arg, struct sockaddr *addr, struct sockaddr *dstaddr, struct sockaddr *mask) { struct vtnet_softc *sc = arg; struct vtnet_mac_filter *filter = sc->vtnet_mac_filter; struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; if (addr->sa_family != AF_LINK) return; if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0) return; if (filter->vmf_unicast.nentries == VTNET_MAX_MAC_ENTRIES) { filter->vmf_unicast.nentries++; return; } bcopy(LLADDR(sdl), &filter->vmf_unicast.macs[filter->vmf_unicast.nentries++], ETHER_ADDR_LEN); } static void vtnet_copy_multicast_mac(void *arg, struct sockaddr *maddr) { struct vtnet_softc *sc = arg; struct vtnet_mac_filter *filter = sc->vtnet_mac_filter; struct sockaddr_dl *sdl = (struct sockaddr_dl *)maddr; if (maddr->sa_family != AF_LINK) return; if (filter->vmf_multicast.nentries == VTNET_MAX_MAC_ENTRIES) { filter->vmf_multicast.nentries++; return; } bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[filter->vmf_multicast.nentries++], ETHER_ADDR_LEN); } static void vtnet_rx_filter_mac(struct vtnet_softc *sc) { struct virtio_net_ctrl_hdr hdr __aligned(2); struct vtnet_mac_filter *filter; struct sglist_seg segs[4]; struct sglist sg; if_t ifp; int promisc, allmulti, error; uint8_t ack; ifp = sc->vtnet_ifp; filter = sc->vtnet_mac_filter; promisc = 0; allmulti = 0; VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, ("%s: CTRL_RX feature not negotiated", __func__)); /* Unicast MAC addresses: */ if_foreach_addr(ifp, vtnet_copy_unicast_mac, sc); if (filter->vmf_unicast.nentries > VTNET_MAX_MAC_ENTRIES) { promisc = 1; filter->vmf_unicast.nentries = 0; if_printf(ifp, "more than %d MAC addresses assigned, " "falling back to promiscuous mode\n", VTNET_MAX_MAC_ENTRIES); } /* Multicast MAC addresses: */ if_foreach_maddr(ifp, vtnet_copy_multicast_mac, sc); if (filter->vmf_multicast.nentries > VTNET_MAX_MAC_ENTRIES) { allmulti = 1; filter->vmf_multicast.nentries = 0; if_printf(ifp, "more than %d multicast MAC addresses " "assigned, falling back to all-multicast mode\n", VTNET_MAX_MAC_ENTRIES); } if (promisc != 0 && allmulti != 0) goto out; hdr.class = VIRTIO_NET_CTRL_MAC; hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; ack = VIRTIO_NET_ERR; sglist_init(&sg, 4, segs); error = 0; error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &filter->vmf_unicast, sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN); error |= sglist_append(&sg, &filter->vmf_multicast, sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN); error |= sglist_append(&sg, &ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 4, ("%s: error %d adding MAC filter msg to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); if (ack != VIRTIO_NET_OK) if_printf(ifp, "error setting host MAC filter table\n"); out: if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0) if_printf(ifp, "cannot enable promiscuous mode\n"); if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0) if_printf(ifp, "cannot enable all-multicast mode\n"); } static int vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) { struct sglist_seg segs[3]; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr; uint8_t pad1; uint16_t tag; uint8_t pad2; uint8_t ack; } s __aligned(2); int error; s.hdr.class = VIRTIO_NET_CTRL_VLAN; s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; s.tag = tag; s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding VLAN message to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static void vtnet_rx_filter_vlan(struct vtnet_softc *sc) { uint32_t w; uint16_t tag; int i, bit; VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER, ("%s: VLAN_FILTER feature not negotiated", __func__)); /* Enable the filter for each configured VLAN. */ for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { w = sc->vtnet_vlan_filter[i]; while ((bit = ffs(w) - 1) != -1) { w &= ~(1 << bit); tag = sizeof(w) * CHAR_BIT * i + bit; if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) { device_printf(sc->vtnet_dev, "cannot enable VLAN %d filter\n", tag); } } } } static void vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) { if_t ifp; int idx, bit; ifp = sc->vtnet_ifp; idx = (tag >> 5) & 0x7F; bit = tag & 0x1F; if (tag == 0 || tag > 4095) return; VTNET_CORE_LOCK(sc); if (add) sc->vtnet_vlan_filter[idx] |= (1 << bit); else sc->vtnet_vlan_filter[idx] &= ~(1 << bit); if ((sc->vtnet_capenable & IFCAP_VLAN_HWFILTER) && vtnet_exec_vlan_filter(sc, add, tag) != 0) { device_printf(sc->vtnet_dev, "cannot %s VLAN %d %s the host filter table\n", add ? "add" : "remove", tag, add ? "to" : "from"); } VTNET_CORE_UNLOCK(sc); } static void vtnet_register_vlan(void *arg, if_t ifp, uint16_t tag) { if (if_getsoftc(ifp, IF_DRIVER_SOFTC) != arg) return; vtnet_update_vlan_filter(arg, 1, tag); } static void vtnet_unregister_vlan(void *arg, if_t ifp, uint16_t tag) { if (if_getsoftc(ifp, IF_DRIVER_SOFTC) != arg) return; vtnet_update_vlan_filter(arg, 0, tag); } static int vtnet_is_link_up(struct vtnet_softc *sc) { device_t dev; if_t ifp; uint16_t status; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; if (!virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) status = VIRTIO_NET_S_LINK_UP; else status = virtio_read_dev_config_2(dev, offsetof(struct virtio_net_config, status)); return ((status & VIRTIO_NET_S_LINK_UP) != 0); } static void vtnet_update_link_status(struct vtnet_softc *sc) { if_t ifp; int link; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); link = vtnet_is_link_up(sc); /* Notify if the link status has changed. */ if (link != 0 && sc->vtnet_link_active == 0) { sc->vtnet_link_active = 1; if_link_state_change(ifp, LINK_STATE_UP); } else if (link == 0 && sc->vtnet_link_active != 0) { sc->vtnet_link_active = 0; if_link_state_change(ifp, LINK_STATE_DOWN); } } static int vtnet_ifmedia_upd(if_t ifp) { struct vtnet_softc *sc; struct ifmedia *ifm; sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); ifm = &sc->vtnet_media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); return (0); } static void vtnet_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) { struct vtnet_softc *sc; sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; VTNET_CORE_LOCK(sc); if (vtnet_is_link_up(sc) != 0) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= VTNET_MEDIATYPE; } else ifmr->ifm_active |= IFM_NONE; VTNET_CORE_UNLOCK(sc); } static void vtnet_set_hwaddr(struct vtnet_softc *sc) { device_t dev; int i; dev = sc->vtnet_dev; if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0) device_printf(dev, "unable to set MAC address\n"); } else if (sc->vtnet_flags & VTNET_FLAG_MAC) { for (i = 0; i < ETHER_ADDR_LEN; i++) { virtio_write_dev_config_1(dev, offsetof(struct virtio_net_config, mac) + i, sc->vtnet_hwaddr[i]); } } } static void vtnet_get_hwaddr(struct vtnet_softc *sc) { device_t dev; int i; dev = sc->vtnet_dev; if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) { /* * Generate a random locally administered unicast address. * * It would be nice to generate the same MAC address across * reboots, but it seems all the hosts currently available * support the MAC feature, so this isn't too important. */ sc->vtnet_hwaddr[0] = 0xB2; arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); vtnet_set_hwaddr(sc); return; } for (i = 0; i < ETHER_ADDR_LEN; i++) { sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev, offsetof(struct virtio_net_config, mac) + i); } } static void vtnet_vlan_tag_remove(struct mbuf *m) { struct ether_vlan_header *evh; evh = mtod(m, struct ether_vlan_header *); m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); m->m_flags |= M_VLANTAG; /* Strip the 802.1Q header. */ bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); } static void vtnet_set_rx_process_limit(struct vtnet_softc *sc) { int limit; limit = vtnet_tunable_int(sc, "rx_process_limit", vtnet_rx_process_limit); if (limit < 0) limit = INT_MAX; sc->vtnet_rx_process_limit = limit; } static void vtnet_set_tx_intr_threshold(struct vtnet_softc *sc) { device_t dev; int size, thresh; dev = sc->vtnet_dev; size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq); /* * The Tx interrupt is disabled until the queue free count falls * below our threshold. Completed frames are drained from the Tx * virtqueue before transmitting new frames and in the watchdog * callout, so the frequency of Tx interrupts is greatly reduced, * at the cost of not freeing mbufs as quickly as they otherwise * would be. * * N.B. We assume all the Tx queues are the same size. */ thresh = size / 4; /* * Without indirect descriptors, leave enough room for the most * segments we handle. */ if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 && thresh < sc->vtnet_tx_nsegs) thresh = sc->vtnet_tx_nsegs; sc->vtnet_tx_intr_thresh = thresh; } static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_rxq *rxq) { struct sysctl_oid *node; struct sysctl_oid_list *list; struct vtnet_rxq_stats *stats; char namebuf[16]; snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Receive Queue"); list = SYSCTL_CHILDREN(node); stats = &rxq->vtnrx_stats; SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, &stats->vrxs_ipackets, "Receive packets"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, &stats->vrxs_ibytes, "Receive bytes"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, &stats->vrxs_iqdrops, "Receive drops"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, &stats->vrxs_ierrors, "Receive errors"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, &stats->vrxs_csum, "Receive checksum offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, &stats->vrxs_csum_failed, "Receive checksum offload failed"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, &stats->vrxs_rescheduled, "Receive interrupt handler rescheduled"); } static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_txq *txq) { struct sysctl_oid *node; struct sysctl_oid_list *list; struct vtnet_txq_stats *stats; char namebuf[16]; snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Transmit Queue"); list = SYSCTL_CHILDREN(node); stats = &txq->vtntx_stats; SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, &stats->vtxs_opackets, "Transmit packets"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, &stats->vtxs_obytes, "Transmit bytes"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, &stats->vtxs_omcasts, "Transmit multicasts"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, &stats->vtxs_csum, "Transmit checksum offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, &stats->vtxs_tso, "Transmit segmentation offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, &stats->vtxs_rescheduled, "Transmit interrupt handler rescheduled"); } static void vtnet_setup_queue_sysctl(struct vtnet_softc *sc) { device_t dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; int i; dev = sc->vtnet_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); } } static void vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_softc *sc) { struct vtnet_statistics *stats; struct vtnet_rxq_stats rxaccum; struct vtnet_txq_stats txaccum; vtnet_accum_stats(sc, &rxaccum, &txaccum); stats = &sc->vtnet_stats; stats->rx_csum_offloaded = rxaccum.vrxs_csum; stats->rx_csum_failed = rxaccum.vrxs_csum_failed; stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled; stats->tx_csum_offloaded = txaccum.vtxs_csum; stats->tx_tso_offloaded = txaccum.vtxs_tso; stats->tx_task_rescheduled = txaccum.vtxs_rescheduled; SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed", CTLFLAG_RD, &stats->mbuf_alloc_failed, "Mbuf cluster allocation failures"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large", CTLFLAG_RD, &stats->rx_frame_too_large, "Received frame larger than the mbuf chain"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed", CTLFLAG_RD, &stats->rx_enq_replacement_failed, "Enqueuing the replacement receive mbuf failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed", CTLFLAG_RD, &stats->rx_mergeable_failed, "Mergeable buffers receive failures"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", CTLFLAG_RD, &stats->rx_csum_bad_ethtype, "Received checksum offloaded buffer with unsupported " "Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", CTLFLAG_RD, &stats->rx_csum_bad_ipproto, "Received checksum offloaded buffer with incorrect IP protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset", CTLFLAG_RD, &stats->rx_csum_bad_offset, "Received checksum offloaded buffer with incorrect offset"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto", CTLFLAG_RD, &stats->rx_csum_bad_proto, "Received checksum offloaded buffer with incorrect protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed", CTLFLAG_RD, &stats->rx_csum_failed, "Received buffer checksum offload failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded", CTLFLAG_RD, &stats->rx_csum_offloaded, "Received buffer checksum offload succeeded"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled", CTLFLAG_RD, &stats->rx_task_rescheduled, "Times the receive interrupt task rescheduled itself"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype", CTLFLAG_RD, &stats->tx_csum_bad_ethtype, "Aborted transmit of checksum offloaded buffer with unknown " "Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype", CTLFLAG_RD, &stats->tx_tso_bad_ethtype, "Aborted transmit of TSO buffer with unknown Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", CTLFLAG_RD, &stats->tx_tso_not_tcp, "Aborted transmit of TSO buffer with non TCP protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", CTLFLAG_RD, &stats->tx_defragged, "Transmit mbufs defragged"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed", CTLFLAG_RD, &stats->tx_defrag_failed, "Aborted transmit of buffer because defrag failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded", CTLFLAG_RD, &stats->tx_csum_offloaded, "Offloaded checksum of transmitted buffer"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded", CTLFLAG_RD, &stats->tx_tso_offloaded, "Segmentation offload of transmitted buffer"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled", CTLFLAG_RD, &stats->tx_task_rescheduled, "Times the transmit interrupt task rescheduled itself"); } static void vtnet_setup_sysctl(struct vtnet_softc *sc) { device_t dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; dev = sc->vtnet_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, "Maximum number of supported virtqueue pairs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, "Number of active virtqueue pairs"); vtnet_setup_stat_sysctl(ctx, child, sc); } static int vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) { return (virtqueue_enable_intr(rxq->vtnrx_vq)); } static void vtnet_rxq_disable_intr(struct vtnet_rxq *rxq) { virtqueue_disable_intr(rxq->vtnrx_vq); } static int vtnet_txq_enable_intr(struct vtnet_txq *txq) { struct virtqueue *vq; vq = txq->vtntx_vq; if (vtnet_txq_below_threshold(txq) != 0) return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG)); /* * The free count is above our threshold. Keep the Tx interrupt * disabled until the queue is fuller. */ return (0); } static void vtnet_txq_disable_intr(struct vtnet_txq *txq) { virtqueue_disable_intr(txq->vtntx_vq); } static void vtnet_enable_rx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]); } static void vtnet_enable_tx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_txq_enable_intr(&sc->vtnet_txqs[i]); } static void vtnet_enable_interrupts(struct vtnet_softc *sc) { vtnet_enable_rx_interrupts(sc); vtnet_enable_tx_interrupts(sc); } static void vtnet_disable_rx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); } static void vtnet_disable_tx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); } static void vtnet_disable_interrupts(struct vtnet_softc *sc) { vtnet_disable_rx_interrupts(sc); vtnet_disable_tx_interrupts(sc); } static int vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) { char path[64]; snprintf(path, sizeof(path), "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob); TUNABLE_INT_FETCH(path, &def); return (def); } Index: projects/ifnet/sys/dev/xl/if_xl.c =================================================================== --- projects/ifnet/sys/dev/xl/if_xl.c (revision 280372) +++ projects/ifnet/sys/dev/xl/if_xl.c (revision 280373) @@ -1,3222 +1,3221 @@ /*- * Copyright (c) 1997, 1998, 1999 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * 3Com 3c90x Etherlink XL PCI NIC driver * * Supports the 3Com "boomerang", "cyclone" and "hurricane" PCI * bus-master chips (3c90x cards and embedded controllers) including * the following: * * 3Com 3c900-TPO 10Mbps/RJ-45 * 3Com 3c900-COMBO 10Mbps/RJ-45,AUI,BNC * 3Com 3c905-TX 10/100Mbps/RJ-45 * 3Com 3c905-T4 10/100Mbps/RJ-45 * 3Com 3c900B-TPO 10Mbps/RJ-45 * 3Com 3c900B-COMBO 10Mbps/RJ-45,AUI,BNC * 3Com 3c900B-TPC 10Mbps/RJ-45,BNC * 3Com 3c900B-FL 10Mbps/Fiber-optic * 3Com 3c905B-COMBO 10/100Mbps/RJ-45,AUI,BNC * 3Com 3c905B-TX 10/100Mbps/RJ-45 * 3Com 3c905B-FL/FX 10/100Mbps/Fiber-optic * 3Com 3c905C-TX 10/100Mbps/RJ-45 (Tornado ASIC) * 3Com 3c980-TX 10/100Mbps server adapter (Hurricane ASIC) * 3Com 3c980C-TX 10/100Mbps server adapter (Tornado ASIC) * 3Com 3cSOHO100-TX 10/100Mbps/RJ-45 (Hurricane ASIC) * 3Com 3c450-TX 10/100Mbps/RJ-45 (Tornado ASIC) * 3Com 3c555 10/100Mbps/RJ-45 (MiniPCI, Laptop Hurricane) * 3Com 3c556 10/100Mbps/RJ-45 (MiniPCI, Hurricane ASIC) * 3Com 3c556B 10/100Mbps/RJ-45 (MiniPCI, Hurricane ASIC) * 3Com 3c575TX 10/100Mbps/RJ-45 (Cardbus, Hurricane ASIC) * 3Com 3c575B 10/100Mbps/RJ-45 (Cardbus, Hurricane ASIC) * 3Com 3c575C 10/100Mbps/RJ-45 (Cardbus, Hurricane ASIC) * 3Com 3cxfem656 10/100Mbps/RJ-45 (Cardbus, Hurricane ASIC) * 3Com 3cxfem656b 10/100Mbps/RJ-45 (Cardbus, Hurricane ASIC) * 3Com 3cxfem656c 10/100Mbps/RJ-45 (Cardbus, Tornado ASIC) * Dell Optiplex GX1 on-board 3c918 10/100Mbps/RJ-45 * Dell on-board 3c920 10/100Mbps/RJ-45 * Dell Precision on-board 3c905B 10/100Mbps/RJ-45 * Dell Latitude laptop docking station embedded 3c905-TX * * Written by Bill Paul * Electrical Engineering Department * Columbia University, New York City */ /* * The 3c90x series chips use a bus-master DMA interface for transfering * packets to and from the controller chip. Some of the "vortex" cards * (3c59x) also supported a bus master mode, however for those chips * you could only DMA packets to/from a contiguous memory buffer. For * transmission this would mean copying the contents of the queued mbuf * chain into an mbuf cluster and then DMAing the cluster. This extra * copy would sort of defeat the purpose of the bus master support for * any packet that doesn't fit into a single mbuf. * * By contrast, the 3c90x cards support a fragment-based bus master * mode where mbuf chains can be encapsulated using TX descriptors. * This is similar to other PCI chips such as the Texas Instruments * ThunderLAN and the Intel 82557/82558. * * The "vortex" driver (if_vx.c) happens to work for the "boomerang" * bus master chips because they maintain the old PIO interface for * backwards compatibility, but starting with the 3c905B and the * "cyclone" chips, the compatibility interface has been dropped. * Since using bus master DMA is a big win, we use this driver to * support the PCI "boomerang" chips even though they work with the * "vortex" driver in order to obtain better performance. */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_DEPEND(xl, pci, 1, 1, 1); MODULE_DEPEND(xl, ether, 1, 1, 1); MODULE_DEPEND(xl, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" #include /* * TX Checksumming is disabled by default for two reasons: * - TX Checksumming will occasionally produce corrupt packets * - TX Checksumming seems to reduce performance * * Only 905B/C cards were reported to have this problem, it is possible * that later chips _may_ be immune. */ #define XL905B_TXCSUM_BROKEN 1 #ifdef XL905B_TXCSUM_BROKEN #define XL905B_CSUM_FEATURES 0 #else #define XL905B_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP) #endif /* * Various supported device vendors/types and their names. */ static const struct xl_type xl_devs[] = { { TC_VENDORID, TC_DEVICEID_BOOMERANG_10BT, "3Com 3c900-TPO Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_BOOMERANG_10BT_COMBO, "3Com 3c900-COMBO Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_BOOMERANG_10_100BT, "3Com 3c905-TX Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_BOOMERANG_100BT4, "3Com 3c905-T4 Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_KRAKATOA_10BT, "3Com 3c900B-TPO Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_KRAKATOA_10BT_COMBO, "3Com 3c900B-COMBO Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_KRAKATOA_10BT_TPC, "3Com 3c900B-TPC Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_CYCLONE_10FL, "3Com 3c900B-FL Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_10_100BT, "3Com 3c905B-TX Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_CYCLONE_10_100BT4, "3Com 3c905B-T4 Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_CYCLONE_10_100FX, "3Com 3c905B-FX/SC Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_CYCLONE_10_100_COMBO, "3Com 3c905B-COMBO Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_TORNADO_10_100BT, "3Com 3c905C-TX Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_TORNADO_10_100BT_920B, "3Com 3c920B-EMB Integrated Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_TORNADO_10_100BT_920B_WNM, "3Com 3c920B-EMB-WNM Integrated Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_10_100BT_SERV, "3Com 3c980 Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_TORNADO_10_100BT_SERV, "3Com 3c980C Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_SOHO100TX, "3Com 3cSOHO100-TX OfficeConnect" }, { TC_VENDORID, TC_DEVICEID_TORNADO_HOMECONNECT, "3Com 3c450-TX HomeConnect" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_555, "3Com 3c555 Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_556, "3Com 3c556 Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_556B, "3Com 3c556B Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_575A, "3Com 3c575TX Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_575B, "3Com 3c575B Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_575C, "3Com 3c575C Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_656, "3Com 3c656 Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_656B, "3Com 3c656B Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_TORNADO_656C, "3Com 3c656C Fast Etherlink XL" }, { 0, 0, NULL } }; static int xl_probe(device_t); static int xl_attach(device_t); static int xl_detach(device_t); static int xl_newbuf(struct xl_softc *, struct xl_chain_onefrag *); static void xl_tick(void *); static void xl_stats_update(struct xl_softc *); static int xl_encap(struct xl_softc *, struct xl_chain *, struct mbuf **); static int xl_rxeof(struct xl_softc *); static void xl_rxeof_task(void *, int); static int xl_rx_resync(struct xl_softc *); static void xl_txeof(struct xl_softc *); static void xl_txeof_90xB(struct xl_softc *); static void xl_txeoc(struct xl_softc *); static void xl_intr(void *); static int xl_transmit(if_t, struct mbuf *); static void xl_start_locked(struct xl_softc *); static void xl_start_90xB_locked(struct xl_softc *); static int xl_ioctl(if_t, u_long, void *, struct thread *); static void xl_init(struct xl_softc *); static void xl_stop(struct xl_softc *); static int xl_watchdog(struct xl_softc *); static int xl_shutdown(device_t); static int xl_suspend(device_t); static int xl_resume(device_t); static void xl_setwol(struct xl_softc *); #ifdef DEVICE_POLLING static int xl_poll(if_t, enum poll_cmd cmd, int count); #endif static int xl_ifmedia_upd(if_t); static void xl_ifmedia_sts(if_t, struct ifmediareq *); static int xl_eeprom_wait(struct xl_softc *); static int xl_read_eeprom(struct xl_softc *, caddr_t, int, int, int); static void xl_rxfilter(struct xl_softc *); static void xl_rxfilter_90x(struct xl_softc *); static void xl_rxfilter_90xB(struct xl_softc *); static void xl_setcfg(struct xl_softc *); static void xl_setmode(struct xl_softc *, int); static void xl_reset(struct xl_softc *); static int xl_list_rx_init(struct xl_softc *); static int xl_list_tx_init(struct xl_softc *); static int xl_list_tx_init_90xB(struct xl_softc *); static void xl_wait(struct xl_softc *); static void xl_mediacheck(struct xl_softc *); static void xl_choose_media(struct xl_softc *sc, int *media); static void xl_choose_xcvr(struct xl_softc *, int); static void xl_dma_map_addr(void *, bus_dma_segment_t *, int, int); #ifdef notdef static void xl_testpacket(struct xl_softc *); #endif static int xl_miibus_readreg(device_t, int, int); static int xl_miibus_writereg(device_t, int, int, int); static void xl_miibus_statchg(device_t); static void xl_miibus_mediainit(device_t); /* * MII bit-bang glue */ static uint32_t xl_mii_bitbang_read(device_t); static void xl_mii_bitbang_write(device_t, uint32_t); static const struct mii_bitbang_ops xl_mii_bitbang_ops = { xl_mii_bitbang_read, xl_mii_bitbang_write, { XL_MII_DATA, /* MII_BIT_MDO */ XL_MII_DATA, /* MII_BIT_MDI */ XL_MII_CLK, /* MII_BIT_MDC */ XL_MII_DIR, /* MII_BIT_DIR_HOST_PHY */ 0, /* MII_BIT_DIR_PHY_HOST */ } }; static device_method_t xl_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xl_probe), DEVMETHOD(device_attach, xl_attach), DEVMETHOD(device_detach, xl_detach), DEVMETHOD(device_shutdown, xl_shutdown), DEVMETHOD(device_suspend, xl_suspend), DEVMETHOD(device_resume, xl_resume), /* MII interface */ DEVMETHOD(miibus_readreg, xl_miibus_readreg), DEVMETHOD(miibus_writereg, xl_miibus_writereg), DEVMETHOD(miibus_statchg, xl_miibus_statchg), DEVMETHOD(miibus_mediainit, xl_miibus_mediainit), DEVMETHOD_END }; static driver_t xl_driver = { "xl", xl_methods, sizeof(struct xl_softc) }; static devclass_t xl_devclass; DRIVER_MODULE_ORDERED(xl, pci, xl_driver, xl_devclass, NULL, NULL, SI_ORDER_ANY); DRIVER_MODULE(miibus, xl, miibus_driver, miibus_devclass, NULL, NULL); static struct ifdriver xl_ifdrv = { .ifdrv_ops = { - .ifop_origin = IFOP_ORIGIN_DRIVER, .ifop_ioctl = xl_ioctl, .ifop_transmit = xl_transmit, #ifdef DEVICE_POLLING .ifop_poll = xl_poll, #endif }, .ifdrv_name = "xl", .ifdrv_type = IFT_ETHER, .ifdrv_maxqlen = XL_TX_LIST_CNT - 1, }; static void xl_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { u_int32_t *paddr; paddr = arg; *paddr = segs->ds_addr; } /* * Murphy's law says that it's possible the chip can wedge and * the 'command in progress' bit may never clear. Hence, we wait * only a finite amount of time to avoid getting caught in an * infinite loop. Normally this delay routine would be a macro, * but it isn't called during normal operation so we can afford * to make it a function. Suppress warning when card gone. */ static void xl_wait(struct xl_softc *sc) { register int i; for (i = 0; i < XL_TIMEOUT; i++) { if ((CSR_READ_2(sc, XL_STATUS) & XL_STAT_CMDBUSY) == 0) break; } if (i == XL_TIMEOUT && bus_child_present(sc->xl_dev)) device_printf(sc->xl_dev, "command never completed!\n"); } /* * MII access routines are provided for adapters with external * PHYs (3c905-TX, 3c905-T4, 3c905B-T4) and those with built-in * autoneg logic that's faked up to look like a PHY (3c905B-TX). * Note: if you don't perform the MDIO operations just right, * it's possible to end up with code that works correctly with * some chips/CPUs/processor speeds/bus speeds/etc but not * with others. */ /* * Read the MII serial port for the MII bit-bang module. */ static uint32_t xl_mii_bitbang_read(device_t dev) { struct xl_softc *sc; uint32_t val; sc = device_get_softc(dev); /* We're already in window 4. */ val = CSR_READ_2(sc, XL_W4_PHY_MGMT); CSR_BARRIER(sc, XL_W4_PHY_MGMT, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (val); } /* * Write the MII serial port for the MII bit-bang module. */ static void xl_mii_bitbang_write(device_t dev, uint32_t val) { struct xl_softc *sc; sc = device_get_softc(dev); /* We're already in window 4. */ CSR_WRITE_2(sc, XL_W4_PHY_MGMT, val); CSR_BARRIER(sc, XL_W4_PHY_MGMT, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } static int xl_miibus_readreg(device_t dev, int phy, int reg) { struct xl_softc *sc; sc = device_get_softc(dev); /* Select the window 4. */ XL_SEL_WIN(4); return (mii_bitbang_readreg(dev, &xl_mii_bitbang_ops, phy, reg)); } static int xl_miibus_writereg(device_t dev, int phy, int reg, int data) { struct xl_softc *sc; sc = device_get_softc(dev); /* Select the window 4. */ XL_SEL_WIN(4); mii_bitbang_writereg(dev, &xl_mii_bitbang_ops, phy, reg, data); return (0); } static void xl_miibus_statchg(device_t dev) { struct xl_softc *sc; struct mii_data *mii; uint8_t macctl; sc = device_get_softc(dev); mii = device_get_softc(sc->xl_miibus); xl_setcfg(sc); /* Set ASIC's duplex mode to match the PHY. */ XL_SEL_WIN(3); macctl = CSR_READ_1(sc, XL_W3_MAC_CTRL); if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) { macctl |= XL_MACCTRL_DUPLEX; if (sc->xl_type == XL_TYPE_905B) { if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) macctl |= XL_MACCTRL_FLOW_CONTROL_ENB; else macctl &= ~XL_MACCTRL_FLOW_CONTROL_ENB; } } else { macctl &= ~XL_MACCTRL_DUPLEX; if (sc->xl_type == XL_TYPE_905B) macctl &= ~XL_MACCTRL_FLOW_CONTROL_ENB; } CSR_WRITE_1(sc, XL_W3_MAC_CTRL, macctl); if (sc->xl_ifp != NULL) { if_setbaudrate(sc->xl_ifp, ifmedia_baudrate(mii->mii_media_active)); if_link_state_change(sc->xl_ifp, ifmedia_link_state(mii->mii_media_status)); } } /* * Special support for the 3c905B-COMBO. This card has 10/100 support * plus BNC and AUI ports. This means we will have both an miibus attached * plus some non-MII media settings. In order to allow this, we have to * add the extra media to the miibus's ifmedia struct, but we can't do * that during xl_attach() because the miibus hasn't been attached yet. * So instead, we wait until the miibus probe/attach is done, at which * point we will get a callback telling is that it's safe to add our * extra media. */ static void xl_miibus_mediainit(device_t dev) { struct xl_softc *sc; struct mii_data *mii; struct ifmedia *ifm; sc = device_get_softc(dev); mii = device_get_softc(sc->xl_miibus); ifm = &mii->mii_media; if (sc->xl_media & (XL_MEDIAOPT_AUI | XL_MEDIAOPT_10FL)) { /* * Check for a 10baseFL board in disguise. */ if (sc->xl_type == XL_TYPE_905B && sc->xl_media == XL_MEDIAOPT_10FL) { if (bootverbose) device_printf(sc->xl_dev, "found 10baseFL\n"); ifmedia_add(ifm, IFM_ETHER | IFM_10_FL, 0, NULL); ifmedia_add(ifm, IFM_ETHER | IFM_10_FL|IFM_HDX, 0, NULL); if (sc->xl_caps & XL_CAPS_FULL_DUPLEX) ifmedia_add(ifm, IFM_ETHER | IFM_10_FL | IFM_FDX, 0, NULL); } else { if (bootverbose) device_printf(sc->xl_dev, "found AUI\n"); ifmedia_add(ifm, IFM_ETHER | IFM_10_5, 0, NULL); } } if (sc->xl_media & XL_MEDIAOPT_BNC) { if (bootverbose) device_printf(sc->xl_dev, "found BNC\n"); ifmedia_add(ifm, IFM_ETHER | IFM_10_2, 0, NULL); } } /* * The EEPROM is slow: give it time to come ready after issuing * it a command. */ static int xl_eeprom_wait(struct xl_softc *sc) { int i; for (i = 0; i < 100; i++) { if (CSR_READ_2(sc, XL_W0_EE_CMD) & XL_EE_BUSY) DELAY(162); else break; } if (i == 100) { device_printf(sc->xl_dev, "eeprom failed to come ready\n"); return (1); } return (0); } /* * Read a sequence of words from the EEPROM. Note that ethernet address * data is stored in the EEPROM in network byte order. */ static int xl_read_eeprom(struct xl_softc *sc, caddr_t dest, int off, int cnt, int swap) { int err = 0, i; u_int16_t word = 0, *ptr; #define EEPROM_5BIT_OFFSET(A) ((((A) << 2) & 0x7F00) | ((A) & 0x003F)) #define EEPROM_8BIT_OFFSET(A) ((A) & 0x003F) /* * XXX: WARNING! DANGER! * It's easy to accidentally overwrite the rom content! * Note: the 3c575 uses 8bit EEPROM offsets. */ XL_SEL_WIN(0); if (xl_eeprom_wait(sc)) return (1); if (sc->xl_flags & XL_FLAG_EEPROM_OFFSET_30) off += 0x30; for (i = 0; i < cnt; i++) { if (sc->xl_flags & XL_FLAG_8BITROM) CSR_WRITE_2(sc, XL_W0_EE_CMD, XL_EE_8BIT_READ | EEPROM_8BIT_OFFSET(off + i)); else CSR_WRITE_2(sc, XL_W0_EE_CMD, XL_EE_READ | EEPROM_5BIT_OFFSET(off + i)); err = xl_eeprom_wait(sc); if (err) break; word = CSR_READ_2(sc, XL_W0_EE_DATA); ptr = (u_int16_t *)(dest + (i * 2)); if (swap) *ptr = ntohs(word); else *ptr = word; } return (err ? 1 : 0); } static void xl_rxfilter(struct xl_softc *sc) { if (sc->xl_type == XL_TYPE_905B) xl_rxfilter_90xB(sc); else xl_rxfilter_90x(sc); } /* * NICs older than the 3c905B have only one multicast option, which * is to enable reception of all multicast frames. */ static void xl_check_maddr_90x(void *arg, struct sockaddr *maddr) { struct sockaddr *sa = (struct sockaddr *)maddr; uint8_t *rxfilt = arg; if (sa->sa_family == AF_LINK) *rxfilt |= XL_RXFILTER_ALLMULTI; } static void xl_rxfilter_90x(struct xl_softc *sc) { uint8_t rxfilt; XL_LOCK_ASSERT(sc); XL_SEL_WIN(5); rxfilt = CSR_READ_1(sc, XL_W5_RX_FILTER); rxfilt &= ~(XL_RXFILTER_ALLFRAMES | XL_RXFILTER_ALLMULTI | XL_RXFILTER_BROADCAST | XL_RXFILTER_INDIVIDUAL); /* Set the individual bit to receive frames for this host only. */ rxfilt |= XL_RXFILTER_INDIVIDUAL; /* Set capture broadcast bit to capture broadcast frames. */ rxfilt |= XL_RXFILTER_BROADCAST; /* If we want promiscuous mode, set the allframes bit. */ if (sc->xl_if_flags & (IFF_PROMISC | IFF_ALLMULTI)) { if (sc->xl_if_flags & IFF_PROMISC) rxfilt |= XL_RXFILTER_ALLFRAMES; if (sc->xl_if_flags & IFF_ALLMULTI) rxfilt |= XL_RXFILTER_ALLMULTI; } else if_foreach_maddr(sc->xl_ifp, xl_check_maddr_90x, &rxfilt); CSR_WRITE_2(sc, XL_COMMAND, rxfilt | XL_CMD_RX_SET_FILT); XL_SEL_WIN(7); } /* * 3c905B adapters have a hash filter that we can program. * Note: the 3c905B currently only supports a 64-bit hash table, which means * we really only need 6 bits, but the manual indicates that future chip * revisions will have a 256-bit hash table, hence the routine is set up to * calculate 8 bits of position info in case we need it some day. * Note II, The Sequel: _CURRENT_ versions of the 3c905B have a 256 bit hash * table. This means we have to use all 8 bits regardless. On older cards, * the upper 2 bits will be ignored. Grrrr.... */ static void xl_check_maddr_90xB(void *arg, struct sockaddr *maddr) { struct sockaddr_dl *sdl = (struct sockaddr_dl *)maddr; struct xl_softc *sc = arg; uint16_t h; if (sdl->sdl_family != AF_LINK) return; h = ether_crc32_be(LLADDR(sdl), ETHER_ADDR_LEN) & 0xFF; CSR_WRITE_2(sc, XL_COMMAND, h | XL_CMD_RX_SET_HASH | XL_HASH_SET); } static void xl_rxfilter_90xB(struct xl_softc *sc) { uint8_t rxfilt; XL_LOCK_ASSERT(sc); XL_SEL_WIN(5); rxfilt = CSR_READ_1(sc, XL_W5_RX_FILTER); rxfilt &= ~(XL_RXFILTER_ALLFRAMES | XL_RXFILTER_ALLMULTI | XL_RXFILTER_BROADCAST | XL_RXFILTER_INDIVIDUAL | XL_RXFILTER_MULTIHASH); /* Set the individual bit to receive frames for this host only. */ rxfilt |= XL_RXFILTER_INDIVIDUAL; /* Set capture broadcast bit to capture broadcast frames. */ rxfilt |= XL_RXFILTER_BROADCAST; /* If we want promiscuous mode, set the allframes bit. */ if (sc->xl_if_flags & (IFF_PROMISC | IFF_ALLMULTI)) { if (sc->xl_if_flags & IFF_PROMISC) rxfilt |= XL_RXFILTER_ALLFRAMES; if (sc->xl_if_flags & IFF_ALLMULTI) rxfilt |= XL_RXFILTER_ALLMULTI; } else { /* First, zot all the existing hash bits. */ for (int i = 0; i < XL_HASHFILT_SIZE; i++) CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_SET_HASH | i); /* Now program new ones. */ if_foreach_maddr(sc->xl_ifp, xl_check_maddr_90xB, sc); /* * XXXGL: a bit dirty, but easier then make a context * containing softc and rxfilt. */ if_foreach_maddr(sc->xl_ifp, xl_check_maddr_90x, &rxfilt); } CSR_WRITE_2(sc, XL_COMMAND, rxfilt | XL_CMD_RX_SET_FILT); XL_SEL_WIN(7); } static void xl_setcfg(struct xl_softc *sc) { u_int32_t icfg; /*XL_LOCK_ASSERT(sc);*/ XL_SEL_WIN(3); icfg = CSR_READ_4(sc, XL_W3_INTERNAL_CFG); icfg &= ~XL_ICFG_CONNECTOR_MASK; if (sc->xl_media & XL_MEDIAOPT_MII || sc->xl_media & XL_MEDIAOPT_BT4) icfg |= (XL_XCVR_MII << XL_ICFG_CONNECTOR_BITS); if (sc->xl_media & XL_MEDIAOPT_BTX) icfg |= (XL_XCVR_AUTO << XL_ICFG_CONNECTOR_BITS); CSR_WRITE_4(sc, XL_W3_INTERNAL_CFG, icfg); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_COAX_STOP); } static void xl_setmode(struct xl_softc *sc, int media) { u_int32_t icfg; u_int16_t mediastat; char *pmsg = "", *dmsg = ""; XL_LOCK_ASSERT(sc); XL_SEL_WIN(4); mediastat = CSR_READ_2(sc, XL_W4_MEDIA_STATUS); XL_SEL_WIN(3); icfg = CSR_READ_4(sc, XL_W3_INTERNAL_CFG); if (sc->xl_media & XL_MEDIAOPT_BT) { if (IFM_SUBTYPE(media) == IFM_10_T) { pmsg = "10baseT transceiver"; sc->xl_xcvr = XL_XCVR_10BT; icfg &= ~XL_ICFG_CONNECTOR_MASK; icfg |= (XL_XCVR_10BT << XL_ICFG_CONNECTOR_BITS); mediastat |= XL_MEDIASTAT_LINKBEAT | XL_MEDIASTAT_JABGUARD; mediastat &= ~XL_MEDIASTAT_SQEENB; } } if (sc->xl_media & XL_MEDIAOPT_BFX) { if (IFM_SUBTYPE(media) == IFM_100_FX) { pmsg = "100baseFX port"; sc->xl_xcvr = XL_XCVR_100BFX; icfg &= ~XL_ICFG_CONNECTOR_MASK; icfg |= (XL_XCVR_100BFX << XL_ICFG_CONNECTOR_BITS); mediastat |= XL_MEDIASTAT_LINKBEAT; mediastat &= ~XL_MEDIASTAT_SQEENB; } } if (sc->xl_media & (XL_MEDIAOPT_AUI|XL_MEDIAOPT_10FL)) { if (IFM_SUBTYPE(media) == IFM_10_5) { pmsg = "AUI port"; sc->xl_xcvr = XL_XCVR_AUI; icfg &= ~XL_ICFG_CONNECTOR_MASK; icfg |= (XL_XCVR_AUI << XL_ICFG_CONNECTOR_BITS); mediastat &= ~(XL_MEDIASTAT_LINKBEAT | XL_MEDIASTAT_JABGUARD); mediastat |= ~XL_MEDIASTAT_SQEENB; } if (IFM_SUBTYPE(media) == IFM_10_FL) { pmsg = "10baseFL transceiver"; sc->xl_xcvr = XL_XCVR_AUI; icfg &= ~XL_ICFG_CONNECTOR_MASK; icfg |= (XL_XCVR_AUI << XL_ICFG_CONNECTOR_BITS); mediastat &= ~(XL_MEDIASTAT_LINKBEAT | XL_MEDIASTAT_JABGUARD); mediastat |= ~XL_MEDIASTAT_SQEENB; } } if (sc->xl_media & XL_MEDIAOPT_BNC) { if (IFM_SUBTYPE(media) == IFM_10_2) { pmsg = "AUI port"; sc->xl_xcvr = XL_XCVR_COAX; icfg &= ~XL_ICFG_CONNECTOR_MASK; icfg |= (XL_XCVR_COAX << XL_ICFG_CONNECTOR_BITS); mediastat &= ~(XL_MEDIASTAT_LINKBEAT | XL_MEDIASTAT_JABGUARD | XL_MEDIASTAT_SQEENB); } } if ((media & IFM_GMASK) == IFM_FDX || IFM_SUBTYPE(media) == IFM_100_FX) { dmsg = "full"; XL_SEL_WIN(3); CSR_WRITE_1(sc, XL_W3_MAC_CTRL, XL_MACCTRL_DUPLEX); } else { dmsg = "half"; XL_SEL_WIN(3); CSR_WRITE_1(sc, XL_W3_MAC_CTRL, (CSR_READ_1(sc, XL_W3_MAC_CTRL) & ~XL_MACCTRL_DUPLEX)); } if (IFM_SUBTYPE(media) == IFM_10_2) CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_COAX_START); else CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_COAX_STOP); CSR_WRITE_4(sc, XL_W3_INTERNAL_CFG, icfg); XL_SEL_WIN(4); CSR_WRITE_2(sc, XL_W4_MEDIA_STATUS, mediastat); DELAY(800); XL_SEL_WIN(7); device_printf(sc->xl_dev, "selecting %s, %s duplex\n", pmsg, dmsg); } static void xl_reset(struct xl_softc *sc) { register int i; XL_LOCK_ASSERT(sc); XL_SEL_WIN(0); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RESET | ((sc->xl_flags & XL_FLAG_WEIRDRESET) ? XL_RESETOPT_DISADVFD:0)); /* * If we're using memory mapped register mode, pause briefly * after issuing the reset command before trying to access any * other registers. With my 3c575C CardBus card, failing to do * this results in the system locking up while trying to poll * the command busy bit in the status register. */ if (sc->xl_flags & XL_FLAG_USE_MMIO) DELAY(100000); for (i = 0; i < XL_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_2(sc, XL_STATUS) & XL_STAT_CMDBUSY)) break; } if (i == XL_TIMEOUT) device_printf(sc->xl_dev, "reset didn't complete\n"); /* Reset TX and RX. */ /* Note: the RX reset takes an absurd amount of time * on newer versions of the Tornado chips such as those * on the 3c905CX and newer 3c908C cards. We wait an * extra amount of time so that xl_wait() doesn't complain * and annoy the users. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_RESET); DELAY(100000); xl_wait(sc); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_RESET); xl_wait(sc); if (sc->xl_flags & XL_FLAG_INVERT_LED_PWR || sc->xl_flags & XL_FLAG_INVERT_MII_PWR) { XL_SEL_WIN(2); CSR_WRITE_2(sc, XL_W2_RESET_OPTIONS, CSR_READ_2(sc, XL_W2_RESET_OPTIONS) | ((sc->xl_flags & XL_FLAG_INVERT_LED_PWR) ? XL_RESETOPT_INVERT_LED : 0) | ((sc->xl_flags & XL_FLAG_INVERT_MII_PWR) ? XL_RESETOPT_INVERT_MII : 0)); } /* Wait a little while for the chip to get its brains in order. */ DELAY(100000); } /* * Probe for a 3Com Etherlink XL chip. Check the PCI vendor and device * IDs against our list and return a device name if we find a match. */ static int xl_probe(device_t dev) { const struct xl_type *t; t = xl_devs; while (t->xl_name != NULL) { if ((pci_get_vendor(dev) == t->xl_vid) && (pci_get_device(dev) == t->xl_did)) { device_set_desc(dev, t->xl_name); return (BUS_PROBE_DEFAULT); } t++; } return (ENXIO); } /* * This routine is a kludge to work around possible hardware faults * or manufacturing defects that can cause the media options register * (or reset options register, as it's called for the first generation * 3c90x adapters) to return an incorrect result. I have encountered * one Dell Latitude laptop docking station with an integrated 3c905-TX * which doesn't have any of the 'mediaopt' bits set. This screws up * the attach routine pretty badly because it doesn't know what media * to look for. If we find ourselves in this predicament, this routine * will try to guess the media options values and warn the user of a * possible manufacturing defect with his adapter/system/whatever. */ static void xl_mediacheck(struct xl_softc *sc) { /* * If some of the media options bits are set, assume they are * correct. If not, try to figure it out down below. * XXX I should check for 10baseFL, but I don't have an adapter * to test with. */ if (sc->xl_media & (XL_MEDIAOPT_MASK & ~XL_MEDIAOPT_VCO)) { /* * Check the XCVR value. If it's not in the normal range * of values, we need to fake it up here. */ if (sc->xl_xcvr <= XL_XCVR_AUTO) return; else { device_printf(sc->xl_dev, "bogus xcvr value in EEPROM (%x)\n", sc->xl_xcvr); device_printf(sc->xl_dev, "choosing new default based on card type\n"); } } else { if (sc->xl_type == XL_TYPE_905B && sc->xl_media & XL_MEDIAOPT_10FL) return; device_printf(sc->xl_dev, "WARNING: no media options bits set in the media options register!!\n"); device_printf(sc->xl_dev, "this could be a manufacturing defect in your adapter or system\n"); device_printf(sc->xl_dev, "attempting to guess media type; you should probably consult your vendor\n"); } xl_choose_xcvr(sc, 1); } static void xl_choose_xcvr(struct xl_softc *sc, int verbose) { u_int16_t devid; /* * Read the device ID from the EEPROM. * This is what's loaded into the PCI device ID register, so it has * to be correct otherwise we wouldn't have gotten this far. */ xl_read_eeprom(sc, (caddr_t)&devid, XL_EE_PRODID, 1, 0); switch (devid) { case TC_DEVICEID_BOOMERANG_10BT: /* 3c900-TPO */ case TC_DEVICEID_KRAKATOA_10BT: /* 3c900B-TPO */ sc->xl_media = XL_MEDIAOPT_BT; sc->xl_xcvr = XL_XCVR_10BT; if (verbose) device_printf(sc->xl_dev, "guessing 10BaseT transceiver\n"); break; case TC_DEVICEID_BOOMERANG_10BT_COMBO: /* 3c900-COMBO */ case TC_DEVICEID_KRAKATOA_10BT_COMBO: /* 3c900B-COMBO */ sc->xl_media = XL_MEDIAOPT_BT|XL_MEDIAOPT_BNC|XL_MEDIAOPT_AUI; sc->xl_xcvr = XL_XCVR_10BT; if (verbose) device_printf(sc->xl_dev, "guessing COMBO (AUI/BNC/TP)\n"); break; case TC_DEVICEID_KRAKATOA_10BT_TPC: /* 3c900B-TPC */ sc->xl_media = XL_MEDIAOPT_BT|XL_MEDIAOPT_BNC; sc->xl_xcvr = XL_XCVR_10BT; if (verbose) device_printf(sc->xl_dev, "guessing TPC (BNC/TP)\n"); break; case TC_DEVICEID_CYCLONE_10FL: /* 3c900B-FL */ sc->xl_media = XL_MEDIAOPT_10FL; sc->xl_xcvr = XL_XCVR_AUI; if (verbose) device_printf(sc->xl_dev, "guessing 10baseFL\n"); break; case TC_DEVICEID_BOOMERANG_10_100BT: /* 3c905-TX */ case TC_DEVICEID_HURRICANE_555: /* 3c555 */ case TC_DEVICEID_HURRICANE_556: /* 3c556 */ case TC_DEVICEID_HURRICANE_556B: /* 3c556B */ case TC_DEVICEID_HURRICANE_575A: /* 3c575TX */ case TC_DEVICEID_HURRICANE_575B: /* 3c575B */ case TC_DEVICEID_HURRICANE_575C: /* 3c575C */ case TC_DEVICEID_HURRICANE_656: /* 3c656 */ case TC_DEVICEID_HURRICANE_656B: /* 3c656B */ case TC_DEVICEID_TORNADO_656C: /* 3c656C */ case TC_DEVICEID_TORNADO_10_100BT_920B: /* 3c920B-EMB */ case TC_DEVICEID_TORNADO_10_100BT_920B_WNM: /* 3c920B-EMB-WNM */ sc->xl_media = XL_MEDIAOPT_MII; sc->xl_xcvr = XL_XCVR_MII; if (verbose) device_printf(sc->xl_dev, "guessing MII\n"); break; case TC_DEVICEID_BOOMERANG_100BT4: /* 3c905-T4 */ case TC_DEVICEID_CYCLONE_10_100BT4: /* 3c905B-T4 */ sc->xl_media = XL_MEDIAOPT_BT4; sc->xl_xcvr = XL_XCVR_MII; if (verbose) device_printf(sc->xl_dev, "guessing 100baseT4/MII\n"); break; case TC_DEVICEID_HURRICANE_10_100BT: /* 3c905B-TX */ case TC_DEVICEID_HURRICANE_10_100BT_SERV:/*3c980-TX */ case TC_DEVICEID_TORNADO_10_100BT_SERV: /* 3c980C-TX */ case TC_DEVICEID_HURRICANE_SOHO100TX: /* 3cSOHO100-TX */ case TC_DEVICEID_TORNADO_10_100BT: /* 3c905C-TX */ case TC_DEVICEID_TORNADO_HOMECONNECT: /* 3c450-TX */ sc->xl_media = XL_MEDIAOPT_BTX; sc->xl_xcvr = XL_XCVR_AUTO; if (verbose) device_printf(sc->xl_dev, "guessing 10/100 internal\n"); break; case TC_DEVICEID_CYCLONE_10_100_COMBO: /* 3c905B-COMBO */ sc->xl_media = XL_MEDIAOPT_BTX|XL_MEDIAOPT_BNC|XL_MEDIAOPT_AUI; sc->xl_xcvr = XL_XCVR_AUTO; if (verbose) device_printf(sc->xl_dev, "guessing 10/100 plus BNC/AUI\n"); break; default: device_printf(sc->xl_dev, "unknown device ID: %x -- defaulting to 10baseT\n", devid); sc->xl_media = XL_MEDIAOPT_BT; break; } } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int xl_attach(device_t dev) { struct if_attach_args ifat = { .ifat_version = IF_ATTACH_VERSION, .ifat_drv = &xl_ifdrv, .ifat_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST, .ifat_capabilities = IFCAP_VLAN_MTU | IFCAP_LINKSTATE, }; u_char eaddr[ETHER_ADDR_LEN]; u_int16_t sinfo2, xcvr[2]; struct xl_softc *sc; if_t ifp; int media, pmcap; int error = 0, rid, res, unit; uint16_t did; sc = device_get_softc(dev); sc->xl_dev = dev; unit = device_get_unit(dev); mtx_init(&sc->xl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); ifmedia_init(&sc->ifmedia, 0, xl_ifmedia_upd, xl_ifmedia_sts); did = pci_get_device(dev); sc->xl_flags = 0; if (did == TC_DEVICEID_HURRICANE_555) sc->xl_flags |= XL_FLAG_EEPROM_OFFSET_30 | XL_FLAG_PHYOK; if (did == TC_DEVICEID_HURRICANE_556 || did == TC_DEVICEID_HURRICANE_556B) sc->xl_flags |= XL_FLAG_FUNCREG | XL_FLAG_PHYOK | XL_FLAG_EEPROM_OFFSET_30 | XL_FLAG_WEIRDRESET | XL_FLAG_INVERT_LED_PWR | XL_FLAG_INVERT_MII_PWR; if (did == TC_DEVICEID_HURRICANE_555 || did == TC_DEVICEID_HURRICANE_556) sc->xl_flags |= XL_FLAG_8BITROM; if (did == TC_DEVICEID_HURRICANE_556B) sc->xl_flags |= XL_FLAG_NO_XCVR_PWR; if (did == TC_DEVICEID_HURRICANE_575B || did == TC_DEVICEID_HURRICANE_575C || did == TC_DEVICEID_HURRICANE_656B || did == TC_DEVICEID_TORNADO_656C) sc->xl_flags |= XL_FLAG_FUNCREG; if (did == TC_DEVICEID_HURRICANE_575A || did == TC_DEVICEID_HURRICANE_575B || did == TC_DEVICEID_HURRICANE_575C || did == TC_DEVICEID_HURRICANE_656B || did == TC_DEVICEID_TORNADO_656C) sc->xl_flags |= XL_FLAG_PHYOK | XL_FLAG_EEPROM_OFFSET_30 | XL_FLAG_8BITROM; if (did == TC_DEVICEID_HURRICANE_656) sc->xl_flags |= XL_FLAG_FUNCREG | XL_FLAG_PHYOK; if (did == TC_DEVICEID_HURRICANE_575B) sc->xl_flags |= XL_FLAG_INVERT_LED_PWR; if (did == TC_DEVICEID_HURRICANE_575C) sc->xl_flags |= XL_FLAG_INVERT_MII_PWR; if (did == TC_DEVICEID_TORNADO_656C) sc->xl_flags |= XL_FLAG_INVERT_MII_PWR; if (did == TC_DEVICEID_HURRICANE_656 || did == TC_DEVICEID_HURRICANE_656B) sc->xl_flags |= XL_FLAG_INVERT_MII_PWR | XL_FLAG_INVERT_LED_PWR; if (did == TC_DEVICEID_TORNADO_10_100BT_920B || did == TC_DEVICEID_TORNADO_10_100BT_920B_WNM) sc->xl_flags |= XL_FLAG_PHYOK; switch (did) { case TC_DEVICEID_BOOMERANG_10_100BT: /* 3c905-TX */ case TC_DEVICEID_HURRICANE_575A: case TC_DEVICEID_HURRICANE_575B: case TC_DEVICEID_HURRICANE_575C: sc->xl_flags |= XL_FLAG_NO_MMIO; break; default: break; } /* * Map control/status registers. */ pci_enable_busmaster(dev); if ((sc->xl_flags & XL_FLAG_NO_MMIO) == 0) { rid = XL_PCI_LOMEM; res = SYS_RES_MEMORY; sc->xl_res = bus_alloc_resource_any(dev, res, &rid, RF_ACTIVE); } if (sc->xl_res != NULL) { sc->xl_flags |= XL_FLAG_USE_MMIO; if (bootverbose) device_printf(dev, "using memory mapped I/O\n"); } else { rid = XL_PCI_LOIO; res = SYS_RES_IOPORT; sc->xl_res = bus_alloc_resource_any(dev, res, &rid, RF_ACTIVE); if (sc->xl_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto fail; } if (bootverbose) device_printf(dev, "using port I/O\n"); } sc->xl_btag = rman_get_bustag(sc->xl_res); sc->xl_bhandle = rman_get_bushandle(sc->xl_res); if (sc->xl_flags & XL_FLAG_FUNCREG) { rid = XL_PCI_FUNCMEM; sc->xl_fres = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->xl_fres == NULL) { device_printf(dev, "couldn't map funcreg memory\n"); error = ENXIO; goto fail; } sc->xl_ftag = rman_get_bustag(sc->xl_fres); sc->xl_fhandle = rman_get_bushandle(sc->xl_fres); } /* Allocate interrupt */ rid = 0; sc->xl_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->xl_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } /* Reset the adapter. */ XL_LOCK(sc); xl_reset(sc); XL_UNLOCK(sc); /* * Get station address from the EEPROM. */ if (xl_read_eeprom(sc, (caddr_t)&eaddr, XL_EE_OEM_ADR0, 3, 1)) { device_printf(dev, "failed to read station address\n"); error = ENXIO; goto fail; } callout_init_mtx(&sc->xl_tick_callout, &sc->xl_mtx, 0); TASK_INIT(&sc->xl_task, 0, xl_rxeof_task, sc); /* * Now allocate a tag for the DMA descriptor lists and a chunk * of DMA-able memory based on the tag. Also obtain the DMA * addresses of the RX and TX ring, which we'll need later. * All of our lists are allocated as a contiguous block * of memory. */ error = bus_dma_tag_create(bus_get_dma_tag(dev), 8, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, XL_RX_LIST_SZ, 1, XL_RX_LIST_SZ, 0, NULL, NULL, &sc->xl_ldata.xl_rx_tag); if (error) { device_printf(dev, "failed to allocate rx dma tag\n"); goto fail; } error = bus_dmamem_alloc(sc->xl_ldata.xl_rx_tag, (void **)&sc->xl_ldata.xl_rx_list, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->xl_ldata.xl_rx_dmamap); if (error) { device_printf(dev, "no memory for rx list buffers!\n"); bus_dma_tag_destroy(sc->xl_ldata.xl_rx_tag); sc->xl_ldata.xl_rx_tag = NULL; goto fail; } error = bus_dmamap_load(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap, sc->xl_ldata.xl_rx_list, XL_RX_LIST_SZ, xl_dma_map_addr, &sc->xl_ldata.xl_rx_dmaaddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "cannot get dma address of the rx ring!\n"); bus_dmamem_free(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_list, sc->xl_ldata.xl_rx_dmamap); bus_dma_tag_destroy(sc->xl_ldata.xl_rx_tag); sc->xl_ldata.xl_rx_tag = NULL; goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(dev), 8, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, XL_TX_LIST_SZ, 1, XL_TX_LIST_SZ, 0, NULL, NULL, &sc->xl_ldata.xl_tx_tag); if (error) { device_printf(dev, "failed to allocate tx dma tag\n"); goto fail; } error = bus_dmamem_alloc(sc->xl_ldata.xl_tx_tag, (void **)&sc->xl_ldata.xl_tx_list, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->xl_ldata.xl_tx_dmamap); if (error) { device_printf(dev, "no memory for list buffers!\n"); bus_dma_tag_destroy(sc->xl_ldata.xl_tx_tag); sc->xl_ldata.xl_tx_tag = NULL; goto fail; } error = bus_dmamap_load(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_dmamap, sc->xl_ldata.xl_tx_list, XL_TX_LIST_SZ, xl_dma_map_addr, &sc->xl_ldata.xl_tx_dmaaddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "cannot get dma address of the tx ring!\n"); bus_dmamem_free(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_list, sc->xl_ldata.xl_tx_dmamap); bus_dma_tag_destroy(sc->xl_ldata.xl_tx_tag); sc->xl_ldata.xl_tx_tag = NULL; goto fail; } /* * Allocate a DMA tag for the mapping of mbufs. */ error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * XL_MAXFRAGS, XL_MAXFRAGS, MCLBYTES, 0, NULL, NULL, &sc->xl_mtag); if (error) { device_printf(dev, "failed to allocate mbuf dma tag\n"); goto fail; } /* We need a spare DMA map for the RX ring. */ error = bus_dmamap_create(sc->xl_mtag, 0, &sc->xl_tmpmap); if (error) goto fail; /* * Figure out the card type. 3c905B adapters have the * 'supportsNoTxLength' bit set in the capabilities * word in the EEPROM. * Note: my 3c575C CardBus card lies. It returns a value * of 0x1578 for its capabilities word, which is somewhat * nonsensical. Another way to distinguish a 3c90x chip * from a 3c90xB/C chip is to check for the 'supportsLargePackets' * bit. This will only be set for 3c90x boomerage chips. */ xl_read_eeprom(sc, (caddr_t)&sc->xl_caps, XL_EE_CAPS, 1, 0); if (sc->xl_caps & XL_CAPS_NO_TXLENGTH || !(sc->xl_caps & XL_CAPS_LARGE_PKTS)) sc->xl_type = XL_TYPE_905B; else sc->xl_type = XL_TYPE_90X; /* Check availability of WOL. */ if ((sc->xl_caps & XL_CAPS_PWRMGMT) != 0 && pci_find_cap(dev, PCIY_PMG, &pmcap) == 0) { sc->xl_pmcap = pmcap; sc->xl_flags |= XL_FLAG_WOL; sinfo2 = 0; xl_read_eeprom(sc, (caddr_t)&sinfo2, XL_EE_SOFTINFO2, 1, 0); if ((sinfo2 & XL_SINFO2_AUX_WOL_CON) == 0 && bootverbose) device_printf(dev, "No auxiliary remote wakeup connector!\n"); } /* Set the TX start threshold for best performance. */ sc->xl_tx_thresh = XL_MIN_FRAMELEN; /* * Now we have to see what sort of media we have. * This includes probing for an MII interace and a * possible PHY. */ XL_SEL_WIN(3); sc->xl_media = CSR_READ_2(sc, XL_W3_MEDIA_OPT); if (bootverbose) device_printf(dev, "media options word: %x\n", sc->xl_media); xl_read_eeprom(sc, (char *)&xcvr, XL_EE_ICFG_0, 2, 0); sc->xl_xcvr = xcvr[0] | xcvr[1] << 16; sc->xl_xcvr &= XL_ICFG_CONNECTOR_MASK; sc->xl_xcvr >>= XL_ICFG_CONNECTOR_BITS; xl_mediacheck(sc); if (sc->xl_media & XL_MEDIAOPT_MII || sc->xl_media & XL_MEDIAOPT_BTX || sc->xl_media & XL_MEDIAOPT_BT4) { struct mii_data *mii; int phy; if (bootverbose) device_printf(dev, "found MII/AUTO\n"); xl_setcfg(sc); /* * Attach PHYs only at MII address 24 if !XL_FLAG_PHYOK. * This is to guard against problems with certain 3Com ASIC * revisions that incorrectly map the internal transceiver * control registers at all MII addresses. */ phy = MII_PHY_ANY; if ((sc->xl_flags & XL_FLAG_PHYOK) == 0) phy = 24; error = mii_attach(dev, &sc->xl_miibus, xl_ifmedia_upd, xl_ifmedia_sts, BMSR_DEFCAPMASK, phy, MII_OFFSET_ANY, sc->xl_type == XL_TYPE_905B ? MIIF_DOPAUSE : 0); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } mii = device_get_softc(sc->xl_miibus); ifat.ifat_baudrate = ifmedia_baudrate(mii->mii_media_active); goto media_done; } /* * Sanity check. If the user has selected "auto" and this isn't * a 10/100 card of some kind, we need to force the transceiver * type to something sane. */ if (sc->xl_xcvr == XL_XCVR_AUTO) xl_choose_xcvr(sc, bootverbose); /* * Do ifmedia setup. */ if (sc->xl_media & XL_MEDIAOPT_BT) { if (bootverbose) device_printf(dev, "found 10baseT\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_HDX, 0, NULL); if (sc->xl_caps & XL_CAPS_FULL_DUPLEX) ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); } if (sc->xl_media & (XL_MEDIAOPT_AUI|XL_MEDIAOPT_10FL)) { /* * Check for a 10baseFL board in disguise. */ if (sc->xl_type == XL_TYPE_905B && sc->xl_media == XL_MEDIAOPT_10FL) { if (bootverbose) device_printf(dev, "found 10baseFL\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_FL, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_FL|IFM_HDX, 0, NULL); if (sc->xl_caps & XL_CAPS_FULL_DUPLEX) ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_FL|IFM_FDX, 0, NULL); } else { if (bootverbose) device_printf(dev, "found AUI\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_5, 0, NULL); } } if (sc->xl_media & XL_MEDIAOPT_BNC) { if (bootverbose) device_printf(dev, "found BNC\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_2, 0, NULL); } if (sc->xl_media & XL_MEDIAOPT_BFX) { if (bootverbose) device_printf(dev, "found 100baseFX\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_FX, 0, NULL); } media = IFM_ETHER|IFM_100_TX|IFM_FDX; xl_choose_media(sc, &media); if (sc->xl_miibus == NULL) ifmedia_set(&sc->ifmedia, media); ifat.ifat_baudrate = ifmedia_baudrate(sc->ifmedia.ifm_media); media_done: if (sc->xl_flags & XL_FLAG_NO_XCVR_PWR) { XL_SEL_WIN(0); CSR_WRITE_2(sc, XL_W0_MFG_ID, XL_NO_XCVR_PWR_MAGICBITS); } error = bus_setup_intr(dev, sc->xl_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, xl_intr, sc, &sc->xl_intrhand); if (error) { device_printf(dev, "couldn't set up irq\n"); goto fail; } /* Initialize interface. */ ifat.ifat_softc = sc; ifat.ifat_dunit = device_get_unit(dev); ifat.ifat_lla = eaddr; if (sc->xl_type == XL_TYPE_905B) { ifat.ifat_hwassist = XL905B_CSUM_FEATURES; #ifdef XL905B_TXCSUM_BROKEN ifat.ifat_capabilities |= IFCAP_RXCSUM; #else ifat.ifat_capabilities |= IFCAP_HWCSUM; #endif } if ((sc->xl_flags & XL_FLAG_WOL) != 0) ifat.ifat_capabilities |= IFCAP_WOL_MAGIC; ifat.ifat_capenable = ifat.ifat_capabilities; #ifdef DEVICE_POLLING ifat.ifat_capabilities |= IFCAP_POLLING; #endif ifp = sc->xl_ifp = if_attach(&ifat); return (0); fail: xl_detach(dev); return (error); } /* * Choose a default media. * XXX This is a leaf function only called by xl_attach() and * acquires/releases the non-recursible driver mutex to * satisfy lock assertions. */ static void xl_choose_media(struct xl_softc *sc, int *media) { XL_LOCK(sc); switch (sc->xl_xcvr) { case XL_XCVR_10BT: *media = IFM_ETHER|IFM_10_T; xl_setmode(sc, *media); break; case XL_XCVR_AUI: if (sc->xl_type == XL_TYPE_905B && sc->xl_media == XL_MEDIAOPT_10FL) { *media = IFM_ETHER|IFM_10_FL; xl_setmode(sc, *media); } else { *media = IFM_ETHER|IFM_10_5; xl_setmode(sc, *media); } break; case XL_XCVR_COAX: *media = IFM_ETHER|IFM_10_2; xl_setmode(sc, *media); break; case XL_XCVR_AUTO: case XL_XCVR_100BTX: case XL_XCVR_MII: /* Chosen by miibus */ break; case XL_XCVR_100BFX: *media = IFM_ETHER|IFM_100_FX; break; default: device_printf(sc->xl_dev, "unknown XCVR type: %d\n", sc->xl_xcvr); /* * This will probably be wrong, but it prevents * the ifmedia code from panicking. */ *media = IFM_ETHER|IFM_10_T; break; } XL_UNLOCK(sc); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int xl_detach(device_t dev) { struct xl_softc *sc; if_t ifp; int rid, res; sc = device_get_softc(dev); ifp = sc->xl_ifp; KASSERT(mtx_initialized(&sc->xl_mtx), ("xl mutex not initialized")); if (sc->xl_flags & XL_FLAG_USE_MMIO) { rid = XL_PCI_LOMEM; res = SYS_RES_MEMORY; } else { rid = XL_PCI_LOIO; res = SYS_RES_IOPORT; } /* These should only be active if attach succeeded */ if (device_is_attached(dev)) { XL_LOCK(sc); xl_stop(sc); XL_UNLOCK(sc); taskqueue_drain(taskqueue_swi, &sc->xl_task); callout_drain(&sc->xl_tick_callout); if_detach(ifp); } if (sc->xl_miibus) device_delete_child(dev, sc->xl_miibus); bus_generic_detach(dev); ifmedia_removeall(&sc->ifmedia); if (sc->xl_intrhand) bus_teardown_intr(dev, sc->xl_irq, sc->xl_intrhand); if (sc->xl_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->xl_irq); if (sc->xl_fres != NULL) bus_release_resource(dev, SYS_RES_MEMORY, XL_PCI_FUNCMEM, sc->xl_fres); if (sc->xl_res) bus_release_resource(dev, res, rid, sc->xl_res); if (sc->xl_mtag) { bus_dmamap_destroy(sc->xl_mtag, sc->xl_tmpmap); bus_dma_tag_destroy(sc->xl_mtag); } if (sc->xl_ldata.xl_rx_tag) { bus_dmamap_unload(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap); bus_dmamem_free(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_list, sc->xl_ldata.xl_rx_dmamap); bus_dma_tag_destroy(sc->xl_ldata.xl_rx_tag); } if (sc->xl_ldata.xl_tx_tag) { bus_dmamap_unload(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_dmamap); bus_dmamem_free(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_list, sc->xl_ldata.xl_tx_dmamap); bus_dma_tag_destroy(sc->xl_ldata.xl_tx_tag); } mtx_destroy(&sc->xl_mtx); return (0); } /* * Initialize the transmit descriptors. */ static int xl_list_tx_init(struct xl_softc *sc) { struct xl_chain_data *cd; struct xl_list_data *ld; int error, i; XL_LOCK_ASSERT(sc); cd = &sc->xl_cdata; ld = &sc->xl_ldata; for (i = 0; i < XL_TX_LIST_CNT; i++) { cd->xl_tx_chain[i].xl_ptr = &ld->xl_tx_list[i]; error = bus_dmamap_create(sc->xl_mtag, 0, &cd->xl_tx_chain[i].xl_map); if (error) return (error); cd->xl_tx_chain[i].xl_phys = ld->xl_tx_dmaaddr + i * sizeof(struct xl_list); if (i == (XL_TX_LIST_CNT - 1)) cd->xl_tx_chain[i].xl_next = NULL; else cd->xl_tx_chain[i].xl_next = &cd->xl_tx_chain[i + 1]; } cd->xl_tx_free = &cd->xl_tx_chain[0]; cd->xl_tx_tail = cd->xl_tx_head = NULL; bus_dmamap_sync(ld->xl_tx_tag, ld->xl_tx_dmamap, BUS_DMASYNC_PREWRITE); return (0); } /* * Initialize the transmit descriptors. */ static int xl_list_tx_init_90xB(struct xl_softc *sc) { struct xl_chain_data *cd; struct xl_list_data *ld; int error, i; XL_LOCK_ASSERT(sc); cd = &sc->xl_cdata; ld = &sc->xl_ldata; for (i = 0; i < XL_TX_LIST_CNT; i++) { cd->xl_tx_chain[i].xl_ptr = &ld->xl_tx_list[i]; error = bus_dmamap_create(sc->xl_mtag, 0, &cd->xl_tx_chain[i].xl_map); if (error) return (error); cd->xl_tx_chain[i].xl_phys = ld->xl_tx_dmaaddr + i * sizeof(struct xl_list); if (i == (XL_TX_LIST_CNT - 1)) cd->xl_tx_chain[i].xl_next = &cd->xl_tx_chain[0]; else cd->xl_tx_chain[i].xl_next = &cd->xl_tx_chain[i + 1]; if (i == 0) cd->xl_tx_chain[i].xl_prev = &cd->xl_tx_chain[XL_TX_LIST_CNT - 1]; else cd->xl_tx_chain[i].xl_prev = &cd->xl_tx_chain[i - 1]; } bzero(ld->xl_tx_list, XL_TX_LIST_SZ); ld->xl_tx_list[0].xl_status = htole32(XL_TXSTAT_EMPTY); cd->xl_tx_prod = 1; cd->xl_tx_cons = 1; cd->xl_tx_cnt = 0; bus_dmamap_sync(ld->xl_tx_tag, ld->xl_tx_dmamap, BUS_DMASYNC_PREWRITE); return (0); } /* * Initialize the RX descriptors and allocate mbufs for them. Note that * we arrange the descriptors in a closed ring, so that the last descriptor * points back to the first. */ static int xl_list_rx_init(struct xl_softc *sc) { struct xl_chain_data *cd; struct xl_list_data *ld; int error, i, next; u_int32_t nextptr; XL_LOCK_ASSERT(sc); cd = &sc->xl_cdata; ld = &sc->xl_ldata; for (i = 0; i < XL_RX_LIST_CNT; i++) { cd->xl_rx_chain[i].xl_ptr = &ld->xl_rx_list[i]; error = bus_dmamap_create(sc->xl_mtag, 0, &cd->xl_rx_chain[i].xl_map); if (error) return (error); error = xl_newbuf(sc, &cd->xl_rx_chain[i]); if (error) return (error); if (i == (XL_RX_LIST_CNT - 1)) next = 0; else next = i + 1; nextptr = ld->xl_rx_dmaaddr + next * sizeof(struct xl_list_onefrag); cd->xl_rx_chain[i].xl_next = &cd->xl_rx_chain[next]; ld->xl_rx_list[i].xl_next = htole32(nextptr); } bus_dmamap_sync(ld->xl_rx_tag, ld->xl_rx_dmamap, BUS_DMASYNC_PREWRITE); cd->xl_rx_head = &cd->xl_rx_chain[0]; return (0); } /* * Initialize an RX descriptor and attach an MBUF cluster. * If we fail to do so, we need to leave the old mbuf and * the old DMA map untouched so that it can be reused. */ static int xl_newbuf(struct xl_softc *sc, struct xl_chain_onefrag *c) { struct mbuf *m_new = NULL; bus_dmamap_t map; bus_dma_segment_t segs[1]; int error, nseg; XL_LOCK_ASSERT(sc); m_new = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m_new == NULL) return (ENOBUFS); m_new->m_len = m_new->m_pkthdr.len = MCLBYTES; /* Force longword alignment for packet payload. */ m_adj(m_new, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->xl_mtag, sc->xl_tmpmap, m_new, segs, &nseg, BUS_DMA_NOWAIT); if (error) { m_freem(m_new); device_printf(sc->xl_dev, "can't map mbuf (error %d)\n", error); return (error); } KASSERT(nseg == 1, ("%s: too many DMA segments (%d)", __func__, nseg)); bus_dmamap_unload(sc->xl_mtag, c->xl_map); map = c->xl_map; c->xl_map = sc->xl_tmpmap; sc->xl_tmpmap = map; c->xl_mbuf = m_new; c->xl_ptr->xl_frag.xl_len = htole32(m_new->m_len | XL_LAST_FRAG); c->xl_ptr->xl_frag.xl_addr = htole32(segs->ds_addr); c->xl_ptr->xl_status = 0; bus_dmamap_sync(sc->xl_mtag, c->xl_map, BUS_DMASYNC_PREREAD); return (0); } static int xl_rx_resync(struct xl_softc *sc) { struct xl_chain_onefrag *pos; int i; XL_LOCK_ASSERT(sc); pos = sc->xl_cdata.xl_rx_head; for (i = 0; i < XL_RX_LIST_CNT; i++) { if (pos->xl_ptr->xl_status) break; pos = pos->xl_next; } if (i == XL_RX_LIST_CNT) return (0); sc->xl_cdata.xl_rx_head = pos; return (EAGAIN); } /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. */ static int xl_rxeof(struct xl_softc *sc) { struct mbuf *m; if_t ifp = sc->xl_ifp; struct xl_chain_onefrag *cur_rx; int total_len; int rx_npkts = 0; u_int32_t rxstat; XL_LOCK_ASSERT(sc); again: bus_dmamap_sync(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap, BUS_DMASYNC_POSTREAD); while ((rxstat = le32toh(sc->xl_cdata.xl_rx_head->xl_ptr->xl_status))) { #ifdef DEVICE_POLLING if (sc->xl_capenable & IFCAP_POLLING) { if (sc->rxcycles <= 0) break; sc->rxcycles--; } #endif cur_rx = sc->xl_cdata.xl_rx_head; sc->xl_cdata.xl_rx_head = cur_rx->xl_next; total_len = rxstat & XL_RXSTAT_LENMASK; rx_npkts++; /* * Since we have told the chip to allow large frames, * we need to trap giant frame errors in software. We allow * a little more than the normal frame size to account for * frames with VLAN tags. */ if (total_len > XL_MAX_FRAMELEN) rxstat |= (XL_RXSTAT_UP_ERROR|XL_RXSTAT_OVERSIZE); /* * If an error occurs, update stats, clear the * status word and leave the mbuf cluster in place: * it should simply get re-used next time this descriptor * comes up in the ring. */ if (rxstat & XL_RXSTAT_UP_ERROR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); cur_rx->xl_ptr->xl_status = 0; bus_dmamap_sync(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap, BUS_DMASYNC_PREWRITE); continue; } /* * If the error bit was not set, the upload complete * bit should be set which means we have a valid packet. * If not, something truly strange has happened. */ if (!(rxstat & XL_RXSTAT_UP_CMPLT)) { device_printf(sc->xl_dev, "bad receive status -- packet dropped\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); cur_rx->xl_ptr->xl_status = 0; bus_dmamap_sync(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap, BUS_DMASYNC_PREWRITE); continue; } /* No errors; receive the packet. */ bus_dmamap_sync(sc->xl_mtag, cur_rx->xl_map, BUS_DMASYNC_POSTREAD); m = cur_rx->xl_mbuf; /* * Try to conjure up a new mbuf cluster. If that * fails, it means we have an out of memory condition and * should leave the buffer in place and continue. This will * result in a lost packet, but there's little else we * can do in this situation. */ if (xl_newbuf(sc, cur_rx)) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); cur_rx->xl_ptr->xl_status = 0; bus_dmamap_sync(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap, BUS_DMASYNC_PREWRITE); continue; } bus_dmamap_sync(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap, BUS_DMASYNC_PREWRITE); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = total_len; if (sc->xl_capenable & IFCAP_RXCSUM) { /* Do IP checksum checking. */ if (rxstat & XL_RXSTAT_IPCKOK) m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (!(rxstat & XL_RXSTAT_IPCKERR)) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; if ((rxstat & XL_RXSTAT_TCPCOK && !(rxstat & XL_RXSTAT_TCPCKERR)) || (rxstat & XL_RXSTAT_UDPCKOK && !(rxstat & XL_RXSTAT_UDPCKERR))) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } } XL_UNLOCK(sc); if_input(ifp, m); XL_LOCK(sc); /* * If we are running from the taskqueue, the interface * might have been stopped while we were passing the last * packet up the network stack. */ if (!(sc->xl_flags & XL_FLAG_RUNNING)) return (rx_npkts); } /* * Handle the 'end of channel' condition. When the upload * engine hits the end of the RX ring, it will stall. This * is our cue to flush the RX ring, reload the uplist pointer * register and unstall the engine. * XXX This is actually a little goofy. With the ThunderLAN * chip, you get an interrupt when the receiver hits the end * of the receive ring, which tells you exactly when you * you need to reload the ring pointer. Here we have to * fake it. I'm mad at myself for not being clever enough * to avoid the use of a goto here. */ if (CSR_READ_4(sc, XL_UPLIST_PTR) == 0 || CSR_READ_4(sc, XL_UPLIST_STATUS) & XL_PKTSTAT_UP_STALLED) { CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_UP_STALL); xl_wait(sc); CSR_WRITE_4(sc, XL_UPLIST_PTR, sc->xl_ldata.xl_rx_dmaaddr); sc->xl_cdata.xl_rx_head = &sc->xl_cdata.xl_rx_chain[0]; CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_UP_UNSTALL); goto again; } return (rx_npkts); } /* * Taskqueue wrapper for xl_rxeof(). */ static void xl_rxeof_task(void *arg, int pending) { struct xl_softc *sc = (struct xl_softc *)arg; XL_LOCK(sc); if (sc->xl_flags & XL_FLAG_RUNNING) xl_rxeof(sc); XL_UNLOCK(sc); } /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. */ static void xl_txeof(struct xl_softc *sc) { struct xl_chain *cur_tx; XL_LOCK_ASSERT(sc); /* * Go through our tx list and free mbufs for those * frames that have been uploaded. Note: the 3c905B * sets a special bit in the status word to let us * know that a frame has been downloaded, but the * original 3c900/3c905 adapters don't do that. * Consequently, we have to use a different test if * xl_type != XL_TYPE_905B. */ while (sc->xl_cdata.xl_tx_head != NULL) { cur_tx = sc->xl_cdata.xl_tx_head; if (CSR_READ_4(sc, XL_DOWNLIST_PTR)) break; sc->xl_cdata.xl_tx_head = cur_tx->xl_next; bus_dmamap_sync(sc->xl_mtag, cur_tx->xl_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->xl_mtag, cur_tx->xl_map); if_inc_txcounters(sc->xl_ifp, cur_tx->xl_mbuf); m_freem(cur_tx->xl_mbuf); cur_tx->xl_mbuf = NULL; cur_tx->xl_next = sc->xl_cdata.xl_tx_free; sc->xl_cdata.xl_tx_free = cur_tx; } if (sc->xl_cdata.xl_tx_head == NULL) { sc->xl_wdog_timer = 0; sc->xl_cdata.xl_tx_tail = NULL; } else { if (CSR_READ_4(sc, XL_DMACTL) & XL_DMACTL_DOWN_STALLED || !CSR_READ_4(sc, XL_DOWNLIST_PTR)) { CSR_WRITE_4(sc, XL_DOWNLIST_PTR, sc->xl_cdata.xl_tx_head->xl_phys); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_UNSTALL); } } } static void xl_txeof_90xB(struct xl_softc *sc) { struct xl_chain *cur_tx = NULL; int idx; XL_LOCK_ASSERT(sc); bus_dmamap_sync(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_dmamap, BUS_DMASYNC_POSTREAD); idx = sc->xl_cdata.xl_tx_cons; while (idx != sc->xl_cdata.xl_tx_prod) { cur_tx = &sc->xl_cdata.xl_tx_chain[idx]; if (!(le32toh(cur_tx->xl_ptr->xl_status) & XL_TXSTAT_DL_COMPLETE)) break; if (cur_tx->xl_mbuf != NULL) { bus_dmamap_sync(sc->xl_mtag, cur_tx->xl_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->xl_mtag, cur_tx->xl_map); if_inc_txcounters(sc->xl_ifp, cur_tx->xl_mbuf); m_freem(cur_tx->xl_mbuf); cur_tx->xl_mbuf = NULL; } sc->xl_cdata.xl_tx_cnt--; XL_INC(idx, XL_TX_LIST_CNT); } if (sc->xl_cdata.xl_tx_cnt == 0) sc->xl_wdog_timer = 0; sc->xl_cdata.xl_tx_cons = idx; } /* * TX 'end of channel' interrupt handler. Actually, we should * only get a 'TX complete' interrupt if there's a transmit error, * so this is really TX error handler. */ static void xl_txeoc(struct xl_softc *sc) { u_int8_t txstat; XL_LOCK_ASSERT(sc); while ((txstat = CSR_READ_1(sc, XL_TX_STATUS))) { if (txstat & XL_TXSTATUS_UNDERRUN || txstat & XL_TXSTATUS_JABBER || txstat & XL_TXSTATUS_RECLAIM) { device_printf(sc->xl_dev, "transmission error: 0x%02x\n", txstat); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_RESET); xl_wait(sc); if (sc->xl_type == XL_TYPE_905B) { if (sc->xl_cdata.xl_tx_cnt) { int i; struct xl_chain *c; i = sc->xl_cdata.xl_tx_cons; c = &sc->xl_cdata.xl_tx_chain[i]; CSR_WRITE_4(sc, XL_DOWNLIST_PTR, c->xl_phys); CSR_WRITE_1(sc, XL_DOWN_POLL, 64); sc->xl_wdog_timer = 5; } } else { if (sc->xl_cdata.xl_tx_head != NULL) { CSR_WRITE_4(sc, XL_DOWNLIST_PTR, sc->xl_cdata.xl_tx_head->xl_phys); sc->xl_wdog_timer = 5; } } /* * Remember to set this for the * first generation 3c90X chips. */ CSR_WRITE_1(sc, XL_TX_FREETHRESH, XL_PACKET_SIZE >> 8); if (txstat & XL_TXSTATUS_UNDERRUN && sc->xl_tx_thresh < XL_PACKET_SIZE) { sc->xl_tx_thresh += XL_MIN_FRAMELEN; device_printf(sc->xl_dev, "tx underrun, increasing tx start threshold to %d bytes\n", sc->xl_tx_thresh); } CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_SET_START|sc->xl_tx_thresh); if (sc->xl_type == XL_TYPE_905B) { CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_SET_TX_RECLAIM|(XL_PACKET_SIZE >> 4)); } CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_ENABLE); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_UNSTALL); } else { CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_ENABLE); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_UNSTALL); } /* * Write an arbitrary byte to the TX_STATUS register * to clear this interrupt/error and advance to the next. */ CSR_WRITE_1(sc, XL_TX_STATUS, 0x01); } } static void xl_intr(void *arg) { struct xl_softc *sc = arg; if_t ifp = sc->xl_ifp; u_int16_t status; XL_LOCK(sc); #ifdef DEVICE_POLLING if (sc->xl_capenable & IFCAP_POLLING) { XL_UNLOCK(sc); return; } #endif for (;;) { status = CSR_READ_2(sc, XL_STATUS); if ((status & XL_INTRS) == 0 || status == 0xFFFF) break; CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ACK|(status & XL_INTRS)); if ((sc->xl_flags & XL_FLAG_RUNNING) == 0) break; if (status & XL_STAT_UP_COMPLETE) { if (xl_rxeof(sc) == 0) { while (xl_rx_resync(sc)) xl_rxeof(sc); } } if (status & XL_STAT_DOWN_COMPLETE) { if (sc->xl_type == XL_TYPE_905B) xl_txeof_90xB(sc); else xl_txeof(sc); } if (status & XL_STAT_TX_COMPLETE) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); xl_txeoc(sc); } if (status & XL_STAT_ADFAIL) { sc->xl_flags &= ~XL_FLAG_RUNNING; xl_init(sc); break; } if (status & XL_STAT_STATSOFLOW) xl_stats_update(sc); } if (if_snd_len(ifp) && sc->xl_flags & XL_FLAG_RUNNING) { if (sc->xl_type == XL_TYPE_905B) xl_start_90xB_locked(sc); else xl_start_locked(sc); } XL_UNLOCK(sc); } #ifdef DEVICE_POLLING static int xl_poll(if_t ifp, enum poll_cmd cmd, int count) { struct xl_softc *sc; int rx_npkts = 0; sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); XL_LOCK(sc); if (sc->xl_flags & XL_FLAG_RUNNING) { XL_UNLOCK(sc); return (0); } sc->rxcycles = count; rx_npkts = xl_rxeof(sc); if (sc->xl_type == XL_TYPE_905B) xl_txeof_90xB(sc); else xl_txeof(sc); if (if_snd_len(ifp)) { if (sc->xl_type == XL_TYPE_905B) xl_start_90xB_locked(sc); else xl_start_locked(sc); } if (cmd == POLL_AND_CHECK_STATUS) { u_int16_t status; status = CSR_READ_2(sc, XL_STATUS); if (status & XL_INTRS && status != 0xFFFF) { CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ACK|(status & XL_INTRS)); if (status & XL_STAT_TX_COMPLETE) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); xl_txeoc(sc); } if (status & XL_STAT_ADFAIL) { sc->xl_flags &= ~XL_FLAG_RUNNING; xl_init(sc); } if (status & XL_STAT_STATSOFLOW) xl_stats_update(sc); } } XL_UNLOCK(sc); return (rx_npkts); } #endif /* DEVICE_POLLING */ static void xl_tick(void *xsc) { struct xl_softc *sc = xsc; struct mii_data *mii; XL_LOCK_ASSERT(sc); if (sc->xl_miibus != NULL) { mii = device_get_softc(sc->xl_miibus); mii_tick(mii); } xl_stats_update(sc); if (xl_watchdog(sc) == EJUSTRETURN) return; callout_reset(&sc->xl_tick_callout, hz, xl_tick, sc); } static void xl_stats_update(struct xl_softc *sc) { if_t ifp = sc->xl_ifp; struct xl_stats xl_stats; u_int8_t *p; int i; XL_LOCK_ASSERT(sc); bzero((char *)&xl_stats, sizeof(struct xl_stats)); p = (u_int8_t *)&xl_stats; /* Read all the stats registers. */ XL_SEL_WIN(6); for (i = 0; i < 16; i++) *p++ = CSR_READ_1(sc, XL_W6_CARRIER_LOST + i); if_inc_counter(ifp, IFCOUNTER_IERRORS, xl_stats.xl_rx_overrun); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, xl_stats.xl_tx_multi_collision + xl_stats.xl_tx_single_collision + xl_stats.xl_tx_late_collision); /* * Boomerang and cyclone chips have an extra stats counter * in window 4 (BadSSD). We have to read this too in order * to clear out all the stats registers and avoid a statsoflow * interrupt. */ XL_SEL_WIN(4); CSR_READ_1(sc, XL_W4_BADSSD); XL_SEL_WIN(7); } /* * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data * pointers to the fragment pointers. */ static int xl_encap(struct xl_softc *sc, struct xl_chain *c, struct mbuf **m_head) { struct mbuf *m_new; if_t ifp = sc->xl_ifp; int error, i, nseg, total_len; u_int32_t status; XL_LOCK_ASSERT(sc); error = bus_dmamap_load_mbuf_sg(sc->xl_mtag, c->xl_map, *m_head, sc->xl_cdata.xl_tx_segs, &nseg, BUS_DMA_NOWAIT); if (error && error != EFBIG) { if_printf(ifp, "can't map mbuf (error %d)\n", error); return (error); } /* * Handle special case: we used up all 63 fragments, * but we have more mbufs left in the chain. Copy the * data into an mbuf cluster. Note that we don't * bother clearing the values in the other fragment * pointers/counters; it wouldn't gain us anything, * and would waste cycles. */ if (error) { m_new = m_collapse(*m_head, M_NOWAIT, XL_MAXFRAGS); if (m_new == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m_new; error = bus_dmamap_load_mbuf_sg(sc->xl_mtag, c->xl_map, *m_head, sc->xl_cdata.xl_tx_segs, &nseg, BUS_DMA_NOWAIT); if (error) { m_freem(*m_head); *m_head = NULL; if_printf(ifp, "can't map mbuf (error %d)\n", error); return (error); } } KASSERT(nseg <= XL_MAXFRAGS, ("%s: too many DMA segments (%d)", __func__, nseg)); if (nseg == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } bus_dmamap_sync(sc->xl_mtag, c->xl_map, BUS_DMASYNC_PREWRITE); total_len = 0; for (i = 0; i < nseg; i++) { KASSERT(sc->xl_cdata.xl_tx_segs[i].ds_len <= MCLBYTES, ("segment size too large")); c->xl_ptr->xl_frag[i].xl_addr = htole32(sc->xl_cdata.xl_tx_segs[i].ds_addr); c->xl_ptr->xl_frag[i].xl_len = htole32(sc->xl_cdata.xl_tx_segs[i].ds_len); total_len += sc->xl_cdata.xl_tx_segs[i].ds_len; } c->xl_ptr->xl_frag[nseg - 1].xl_len |= htole32(XL_LAST_FRAG); if (sc->xl_type == XL_TYPE_905B) { status = XL_TXSTAT_RND_DEFEAT; #ifndef XL905B_TXCSUM_BROKEN if ((*m_head)->m_pkthdr.csum_flags) { if ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) status |= XL_TXSTAT_IPCKSUM; if ((*m_head)->m_pkthdr.csum_flags & CSUM_TCP) status |= XL_TXSTAT_TCPCKSUM; if ((*m_head)->m_pkthdr.csum_flags & CSUM_UDP) status |= XL_TXSTAT_UDPCKSUM; } #endif } else status = total_len; c->xl_ptr->xl_status = htole32(status); c->xl_ptr->xl_next = 0; c->xl_mbuf = *m_head; return (0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit lists. We also save a * copy of the pointers since the transmit list fragment pointers are * physical addresses. */ static int xl_transmit(if_t ifp, struct mbuf *m) { struct xl_softc *sc; int error; if ((error = if_snd_enqueue(ifp, m)) != 0) return (error); sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); XL_LOCK(sc); if (sc->xl_type == XL_TYPE_905B) xl_start_90xB_locked(sc); else xl_start_locked(sc); XL_UNLOCK(sc); return (0); } static void xl_start_locked(struct xl_softc *sc) { struct mbuf *m_head; struct xl_chain *prev = NULL, *cur_tx = NULL, *start_tx; struct xl_chain *prev_tx; int error; XL_LOCK_ASSERT(sc); if ((sc->xl_flags & XL_FLAG_RUNNING) != XL_FLAG_RUNNING) return; /* * Check for an available queue slot. If there are none, * punt. */ if (sc->xl_cdata.xl_tx_free == NULL) { xl_txeoc(sc); xl_txeof(sc); if (sc->xl_cdata.xl_tx_free == NULL) return; } start_tx = sc->xl_cdata.xl_tx_free; while (sc->xl_cdata.xl_tx_free != NULL && ((m_head = if_snd_dequeue(sc->xl_ifp)) != NULL)) { /* Pick a descriptor off the free list. */ prev_tx = cur_tx; cur_tx = sc->xl_cdata.xl_tx_free; /* Pack the data into the descriptor. */ error = xl_encap(sc, cur_tx, &m_head); if (error) { cur_tx = prev_tx; if (m_head == NULL) break; if_snd_prepend(sc->xl_ifp, m_head); break; } sc->xl_cdata.xl_tx_free = cur_tx->xl_next; cur_tx->xl_next = NULL; /* Chain it together. */ if (prev != NULL) { prev->xl_next = cur_tx; prev->xl_ptr->xl_next = htole32(cur_tx->xl_phys); } prev = cur_tx; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ if_mtap(sc->xl_ifp, cur_tx->xl_mbuf, NULL, 0); } /* * If there are no packets queued, bail. */ if (cur_tx == NULL) return; /* * Place the request for the upload interrupt * in the last descriptor in the chain. This way, if * we're chaining several packets at once, we'll only * get an interrupt once for the whole chain rather than * once for each packet. */ cur_tx->xl_ptr->xl_status |= htole32(XL_TXSTAT_DL_INTR); /* * Queue the packets. If the TX channel is clear, update * the downlist pointer register. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_STALL); xl_wait(sc); if (sc->xl_cdata.xl_tx_head != NULL) { sc->xl_cdata.xl_tx_tail->xl_next = start_tx; sc->xl_cdata.xl_tx_tail->xl_ptr->xl_next = htole32(start_tx->xl_phys); sc->xl_cdata.xl_tx_tail->xl_ptr->xl_status &= htole32(~XL_TXSTAT_DL_INTR); sc->xl_cdata.xl_tx_tail = cur_tx; } else { sc->xl_cdata.xl_tx_head = start_tx; sc->xl_cdata.xl_tx_tail = cur_tx; } bus_dmamap_sync(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_dmamap, BUS_DMASYNC_PREWRITE); if (!CSR_READ_4(sc, XL_DOWNLIST_PTR)) CSR_WRITE_4(sc, XL_DOWNLIST_PTR, start_tx->xl_phys); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_UNSTALL); XL_SEL_WIN(7); /* * Set a timeout in case the chip goes out to lunch. */ sc->xl_wdog_timer = 5; /* * XXX Under certain conditions, usually on slower machines * where interrupts may be dropped, it's possible for the * adapter to chew up all the buffers in the receive ring * and stall, without us being able to do anything about it. * To guard against this, we need to make a pass over the * RX queue to make sure there aren't any packets pending. * Doing it here means we can flush the receive ring at the * same time the chip is DMAing the transmit descriptors we * just gave it. * * 3Com goes to some lengths to emphasize the Parallel Tasking (tm) * nature of their chips in all their marketing literature; * we may as well take advantage of it. :) */ taskqueue_enqueue(taskqueue_swi, &sc->xl_task); } static void xl_start_90xB_locked(struct xl_softc *sc) { struct mbuf *m_head; struct xl_chain *prev = NULL, *cur_tx = NULL, *start_tx; struct xl_chain *prev_tx; int error, idx; XL_LOCK_ASSERT(sc); if ((sc->xl_flags & XL_FLAG_RUNNING) != XL_FLAG_RUNNING) return; idx = sc->xl_cdata.xl_tx_prod; start_tx = &sc->xl_cdata.xl_tx_chain[idx]; while (sc->xl_cdata.xl_tx_chain[idx].xl_mbuf == NULL && ((XL_TX_LIST_CNT - sc->xl_cdata.xl_tx_cnt) >= 3) && ((m_head = if_snd_dequeue(sc->xl_ifp)) != NULL)) { prev_tx = cur_tx; cur_tx = &sc->xl_cdata.xl_tx_chain[idx]; /* Pack the data into the descriptor. */ error = xl_encap(sc, cur_tx, &m_head); if (error) { cur_tx = prev_tx; if (m_head == NULL) break; if_snd_prepend(sc->xl_ifp, m_head); break; } /* Chain it together. */ if (prev != NULL) prev->xl_ptr->xl_next = htole32(cur_tx->xl_phys); prev = cur_tx; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ if_mtap(sc->xl_ifp, cur_tx->xl_mbuf, NULL, 0); XL_INC(idx, XL_TX_LIST_CNT); sc->xl_cdata.xl_tx_cnt++; } /* * If there are no packets queued, bail. */ if (cur_tx == NULL) return; /* * Place the request for the upload interrupt * in the last descriptor in the chain. This way, if * we're chaining several packets at once, we'll only * get an interrupt once for the whole chain rather than * once for each packet. */ cur_tx->xl_ptr->xl_status |= htole32(XL_TXSTAT_DL_INTR); /* Start transmission */ sc->xl_cdata.xl_tx_prod = idx; start_tx->xl_prev->xl_ptr->xl_next = htole32(start_tx->xl_phys); bus_dmamap_sync(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_dmamap, BUS_DMASYNC_PREWRITE); /* * Set a timeout in case the chip goes out to lunch. */ sc->xl_wdog_timer = 5; } static void xl_init(struct xl_softc *sc) { int error, i; struct mii_data *mii = NULL; XL_LOCK_ASSERT(sc); if ((sc->xl_flags & XL_FLAG_RUNNING) != 0) return; /* * Cancel pending I/O and free all RX/TX buffers. */ xl_stop(sc); /* Reset the chip to a known state. */ xl_reset(sc); if (sc->xl_miibus == NULL) { CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_RESET); xl_wait(sc); } CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_RESET); xl_wait(sc); DELAY(10000); if (sc->xl_miibus != NULL) mii = device_get_softc(sc->xl_miibus); /* * Clear WOL status and disable all WOL feature as WOL * would interfere Rx operation under normal environments. */ if ((sc->xl_flags & XL_FLAG_WOL) != 0) { XL_SEL_WIN(7); CSR_READ_2(sc, XL_W7_BM_PME); CSR_WRITE_2(sc, XL_W7_BM_PME, 0); } /* Init our MAC address */ XL_SEL_WIN(2); for (i = 0; i < ETHER_ADDR_LEN; i++) { CSR_WRITE_1(sc, XL_W2_STATION_ADDR_LO + i, if_lladdr(sc->xl_ifp)[i]); } /* Clear the station mask. */ for (i = 0; i < 3; i++) CSR_WRITE_2(sc, XL_W2_STATION_MASK_LO + (i * 2), 0); #ifdef notdef /* Reset TX and RX. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_RESET); xl_wait(sc); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_RESET); xl_wait(sc); #endif /* Init circular RX list. */ error = xl_list_rx_init(sc); if (error) { device_printf(sc->xl_dev, "initialization of the rx ring failed (%d)\n", error); xl_stop(sc); return; } /* Init TX descriptors. */ if (sc->xl_type == XL_TYPE_905B) error = xl_list_tx_init_90xB(sc); else error = xl_list_tx_init(sc); if (error) { device_printf(sc->xl_dev, "initialization of the tx ring failed (%d)\n", error); xl_stop(sc); return; } /* * Set the TX freethresh value. * Note that this has no effect on 3c905B "cyclone" * cards but is required for 3c900/3c905 "boomerang" * cards in order to enable the download engine. */ CSR_WRITE_1(sc, XL_TX_FREETHRESH, XL_PACKET_SIZE >> 8); /* Set the TX start threshold for best performance. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_SET_START|sc->xl_tx_thresh); /* * If this is a 3c905B, also set the tx reclaim threshold. * This helps cut down on the number of tx reclaim errors * that could happen on a busy network. The chip multiplies * the register value by 16 to obtain the actual threshold * in bytes, so we divide by 16 when setting the value here. * The existing threshold value can be examined by reading * the register at offset 9 in window 5. */ if (sc->xl_type == XL_TYPE_905B) { CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_SET_TX_RECLAIM|(XL_PACKET_SIZE >> 4)); } /* Set RX filter bits. */ xl_rxfilter(sc); /* * Load the address of the RX list. We have to * stall the upload engine before we can manipulate * the uplist pointer register, then unstall it when * we're finished. We also have to wait for the * stall command to complete before proceeding. * Note that we have to do this after any RX resets * have completed since the uplist register is cleared * by a reset. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_UP_STALL); xl_wait(sc); CSR_WRITE_4(sc, XL_UPLIST_PTR, sc->xl_ldata.xl_rx_dmaaddr); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_UP_UNSTALL); xl_wait(sc); if (sc->xl_type == XL_TYPE_905B) { /* Set polling interval */ CSR_WRITE_1(sc, XL_DOWN_POLL, 64); /* Load the address of the TX list */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_STALL); xl_wait(sc); CSR_WRITE_4(sc, XL_DOWNLIST_PTR, sc->xl_cdata.xl_tx_chain[0].xl_phys); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_UNSTALL); xl_wait(sc); } /* * If the coax transceiver is on, make sure to enable * the DC-DC converter. */ XL_SEL_WIN(3); if (sc->xl_xcvr == XL_XCVR_COAX) CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_COAX_START); else CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_COAX_STOP); /* * increase packet size to allow reception of 802.1q or ISL packets. * For the 3c90x chip, set the 'allow large packets' bit in the MAC * control register. For 3c90xB/C chips, use the RX packet size * register. */ if (sc->xl_type == XL_TYPE_905B) CSR_WRITE_2(sc, XL_W3_MAXPKTSIZE, XL_PACKET_SIZE); else { u_int8_t macctl; macctl = CSR_READ_1(sc, XL_W3_MAC_CTRL); macctl |= XL_MACCTRL_ALLOW_LARGE_PACK; CSR_WRITE_1(sc, XL_W3_MAC_CTRL, macctl); } /* Clear out the stats counters. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_STATS_DISABLE); xl_stats_update(sc); XL_SEL_WIN(4); CSR_WRITE_2(sc, XL_W4_NET_DIAG, XL_NETDIAG_UPPER_BYTES_ENABLE); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_STATS_ENABLE); /* * Enable interrupts. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ACK|0xFF); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_STAT_ENB|XL_INTRS); #ifdef DEVICE_POLLING /* Disable interrupts if we are polling. */ if (sc->xl_capenable & IFCAP_POLLING) CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ENB|0); else #endif CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ENB|XL_INTRS); if (sc->xl_flags & XL_FLAG_FUNCREG) bus_space_write_4(sc->xl_ftag, sc->xl_fhandle, 4, 0x8000); /* Set the RX early threshold */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_SET_THRESH|(XL_PACKET_SIZE >>2)); CSR_WRITE_4(sc, XL_DMACTL, XL_DMACTL_UP_RX_EARLY); /* Enable receiver and transmitter. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_ENABLE); xl_wait(sc); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_ENABLE); xl_wait(sc); /* XXX Downcall to miibus. */ if (mii != NULL) mii_mediachg(mii); /* Select window 7 for normal operations. */ XL_SEL_WIN(7); sc->xl_flags |= XL_FLAG_RUNNING; sc->xl_wdog_timer = 0; callout_reset(&sc->xl_tick_callout, hz, xl_tick, sc); } /* * Set media options. */ static int xl_ifmedia_upd(if_t ifp) { struct xl_softc *sc; struct ifmedia *ifm = NULL; struct mii_data *mii = NULL; sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); XL_LOCK(sc); if (sc->xl_miibus != NULL) mii = device_get_softc(sc->xl_miibus); if (mii == NULL) ifm = &sc->ifmedia; else ifm = &mii->mii_media; switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_100_FX: case IFM_10_FL: case IFM_10_2: case IFM_10_5: xl_setmode(sc, ifm->ifm_media); XL_UNLOCK(sc); return (0); } if (sc->xl_media & XL_MEDIAOPT_MII || sc->xl_media & XL_MEDIAOPT_BTX || sc->xl_media & XL_MEDIAOPT_BT4) { sc->xl_flags &= ~XL_FLAG_RUNNING; xl_init(sc); } else { xl_setmode(sc, ifm->ifm_media); } XL_UNLOCK(sc); return (0); } /* * Report current media status. */ static void xl_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) { struct xl_softc *sc; u_int32_t icfg; u_int16_t status = 0; struct mii_data *mii = NULL; sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); XL_LOCK(sc); if (sc->xl_miibus != NULL) mii = device_get_softc(sc->xl_miibus); XL_SEL_WIN(4); status = CSR_READ_2(sc, XL_W4_MEDIA_STATUS); XL_SEL_WIN(3); icfg = CSR_READ_4(sc, XL_W3_INTERNAL_CFG) & XL_ICFG_CONNECTOR_MASK; icfg >>= XL_ICFG_CONNECTOR_BITS; ifmr->ifm_active = IFM_ETHER; ifmr->ifm_status = IFM_AVALID; if ((status & XL_MEDIASTAT_CARRIER) == 0) ifmr->ifm_status |= IFM_ACTIVE; switch (icfg) { case XL_XCVR_10BT: ifmr->ifm_active = IFM_ETHER|IFM_10_T; if (CSR_READ_1(sc, XL_W3_MAC_CTRL) & XL_MACCTRL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; break; case XL_XCVR_AUI: if (sc->xl_type == XL_TYPE_905B && sc->xl_media == XL_MEDIAOPT_10FL) { ifmr->ifm_active = IFM_ETHER|IFM_10_FL; if (CSR_READ_1(sc, XL_W3_MAC_CTRL) & XL_MACCTRL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } else ifmr->ifm_active = IFM_ETHER|IFM_10_5; break; case XL_XCVR_COAX: ifmr->ifm_active = IFM_ETHER|IFM_10_2; break; /* * XXX MII and BTX/AUTO should be separate cases. */ case XL_XCVR_100BTX: case XL_XCVR_AUTO: case XL_XCVR_MII: if (mii != NULL) { mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; } break; case XL_XCVR_100BFX: ifmr->ifm_active = IFM_ETHER|IFM_100_FX; break; default: if_printf(ifp, "unknown XCVR type: %d\n", icfg); break; } XL_UNLOCK(sc); } static int xl_ioctl(if_t ifp, u_long command, void *data, struct thread *td) { struct xl_softc *sc; struct ifreq *ifr = (struct ifreq *) data; uint32_t oflags; int error = 0; struct mii_data *mii = NULL; sc = if_getsoftc(ifp, IF_DRIVER_SOFTC); switch (command) { case SIOCSIFFLAGS: XL_LOCK(sc); oflags = sc->xl_if_flags; sc->xl_if_flags = ifr->ifr_flags; if (sc->xl_if_flags & IFF_UP) { if (sc->xl_flags & XL_FLAG_RUNNING && (oflags ^ sc->xl_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) xl_rxfilter(sc); else xl_init(sc); } else if (sc->xl_flags & XL_FLAG_RUNNING) xl_stop(sc); XL_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: /* XXX Downcall from if_addmulti() possibly with locks held. */ XL_LOCK(sc); if (sc->xl_flags & XL_FLAG_RUNNING) xl_rxfilter(sc); XL_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: if (sc->xl_miibus != NULL) mii = device_get_softc(sc->xl_miibus); if (mii == NULL) error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); else error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; case SIOCSIFCAP: #ifdef DEVICE_POLLING if (((ifr->ifr_reqcap ^ ifr->ifr_curcap) & IFCAP_POLLING)) { XL_LOCK(sc); if ((ifr->ifr_reqcap & IFCAP_POLLING) != 0) { /* Disable interrupts */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ENB|0); } else { /* Enable interrupts. */ XL_LOCK(sc); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ACK | 0xFF); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ENB | XL_INTRS); if (sc->xl_flags & XL_FLAG_FUNCREG) bus_space_write_4(sc->xl_ftag, sc->xl_fhandle, 4, 0x8000); } XL_UNLOCK(sc); } #endif /* DEVICE_POLLING */ sc->xl_capenable = ifr->ifr_reqcap; if ((ifr->ifr_reqcap & IFCAP_TXCSUM) != 0) ifr->ifr_hwassist = XL905B_CSUM_FEATURES; else ifr->ifr_hwassist = 0; break; default: error = EOPNOTSUPP; break; } return (error); } static int xl_watchdog(struct xl_softc *sc) { if_t ifp = sc->xl_ifp; u_int16_t status = 0; int misintr; XL_LOCK_ASSERT(sc); if (sc->xl_wdog_timer == 0 || --sc->xl_wdog_timer != 0) return (0); xl_rxeof(sc); xl_txeoc(sc); misintr = 0; if (sc->xl_type == XL_TYPE_905B) { xl_txeof_90xB(sc); if (sc->xl_cdata.xl_tx_cnt == 0) misintr++; } else { xl_txeof(sc); if (sc->xl_cdata.xl_tx_head == NULL) misintr++; } if (misintr != 0) { device_printf(sc->xl_dev, "watchdog timeout (missed Tx interrupts) -- recovering\n"); return (0); } if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); XL_SEL_WIN(4); status = CSR_READ_2(sc, XL_W4_MEDIA_STATUS); device_printf(sc->xl_dev, "watchdog timeout\n"); if (status & XL_MEDIASTAT_CARRIER) device_printf(sc->xl_dev, "no carrier - transceiver cable problem?\n"); sc->xl_flags &= ~XL_FLAG_RUNNING; xl_init(sc); if (if_snd_len(ifp)) { if (sc->xl_type == XL_TYPE_905B) xl_start_90xB_locked(sc); else xl_start_locked(sc); } return (EJUSTRETURN); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void xl_stop(struct xl_softc *sc) { register int i; XL_LOCK_ASSERT(sc); sc->xl_wdog_timer = 0; CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_DISABLE); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_STATS_DISABLE); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ENB); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_DISCARD); xl_wait(sc); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_DISABLE); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_COAX_STOP); DELAY(800); #ifdef foo CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_RESET); xl_wait(sc); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_RESET); xl_wait(sc); #endif CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ACK|XL_STAT_INTLATCH); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_STAT_ENB|0); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ENB|0); if (sc->xl_flags & XL_FLAG_FUNCREG) bus_space_write_4(sc->xl_ftag, sc->xl_fhandle, 4, 0x8000); /* Stop the stats updater. */ callout_stop(&sc->xl_tick_callout); /* * Free data in the RX lists. */ for (i = 0; i < XL_RX_LIST_CNT; i++) { if (sc->xl_cdata.xl_rx_chain[i].xl_mbuf != NULL) { bus_dmamap_unload(sc->xl_mtag, sc->xl_cdata.xl_rx_chain[i].xl_map); bus_dmamap_destroy(sc->xl_mtag, sc->xl_cdata.xl_rx_chain[i].xl_map); m_freem(sc->xl_cdata.xl_rx_chain[i].xl_mbuf); sc->xl_cdata.xl_rx_chain[i].xl_mbuf = NULL; } } if (sc->xl_ldata.xl_rx_list != NULL) bzero(sc->xl_ldata.xl_rx_list, XL_RX_LIST_SZ); /* * Free the TX list buffers. */ for (i = 0; i < XL_TX_LIST_CNT; i++) { if (sc->xl_cdata.xl_tx_chain[i].xl_mbuf != NULL) { bus_dmamap_unload(sc->xl_mtag, sc->xl_cdata.xl_tx_chain[i].xl_map); bus_dmamap_destroy(sc->xl_mtag, sc->xl_cdata.xl_tx_chain[i].xl_map); m_freem(sc->xl_cdata.xl_tx_chain[i].xl_mbuf); sc->xl_cdata.xl_tx_chain[i].xl_mbuf = NULL; } } if (sc->xl_ldata.xl_tx_list != NULL) bzero(sc->xl_ldata.xl_tx_list, XL_TX_LIST_SZ); sc->xl_flags &= ~XL_FLAG_RUNNING; } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int xl_shutdown(device_t dev) { return (xl_suspend(dev)); } static int xl_suspend(device_t dev) { struct xl_softc *sc; sc = device_get_softc(dev); XL_LOCK(sc); xl_stop(sc); xl_setwol(sc); XL_UNLOCK(sc); return (0); } static int xl_resume(device_t dev) { struct xl_softc *sc; sc = device_get_softc(dev); XL_LOCK(sc); if (sc->xl_if_flags & IFF_UP) { sc->xl_flags &= ~XL_FLAG_RUNNING; xl_init(sc); } XL_UNLOCK(sc); return (0); } static void xl_setwol(struct xl_softc *sc) { if_t ifp; u_int16_t cfg, pmstat; if ((sc->xl_flags & XL_FLAG_WOL) == 0) return; ifp = sc->xl_ifp; XL_SEL_WIN(7); /* Clear any pending PME events. */ CSR_READ_2(sc, XL_W7_BM_PME); cfg = 0; if ((sc->xl_capenable & IFCAP_WOL_MAGIC) != 0) cfg |= XL_BM_PME_MAGIC; CSR_WRITE_2(sc, XL_W7_BM_PME, cfg); /* Enable RX. */ if ((sc->xl_capenable & IFCAP_WOL_MAGIC) != 0) CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_ENABLE); /* Request PME. */ pmstat = pci_read_config(sc->xl_dev, sc->xl_pmcap + PCIR_POWER_STATUS, 2); if ((sc->xl_capenable & IFCAP_WOL_MAGIC) != 0) pmstat |= PCIM_PSTAT_PMEENABLE; else pmstat &= ~PCIM_PSTAT_PMEENABLE; pci_write_config(sc->xl_dev, sc->xl_pmcap + PCIR_POWER_STATUS, pmstat, 2); } Index: projects/ifnet/sys/net/if.c =================================================================== --- projects/ifnet/sys/net/if.c (revision 280372) +++ projects/ifnet/sys/net/if.c (revision 280373) @@ -1,3646 +1,3647 @@ /*- * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.c 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #include "opt_compat.h" #include "opt_device_polling.h" #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #ifdef INET #include #endif /* INET */ #ifdef INET6 #include #include #endif /* INET6 */ #endif /* INET || INET6 */ #include #ifdef COMPAT_FREEBSD32 #include #include #endif SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); int ifqmaxlen = IFQ_MAXLEN; SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN, &ifqmaxlen, 0, "max send queue size"); /* Log link state change events */ static int log_link_state_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW, &log_link_state_change, 0, "log interface link state change events"); /* Interface description */ static unsigned int ifdescr_maxlen = 1024; SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, &ifdescr_maxlen, 0, "administrative maximum length for interface description"); static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); /* global sx for non-critical path ifdescr */ static struct sx ifdescr_sx; SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr"); void (*bridge_linkstate_p)(struct ifnet *ifp); void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); void (*lagg_linkstate_p)(struct ifnet *ifp, int state); /* These are external hooks for CARP. */ void (*carp_linkstate_p)(struct ifnet *ifp); void (*carp_demote_adj_p)(int, char *); int (*carp_master_p)(struct ifaddr *); #if defined(INET) || defined(INET6) int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost); int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa); int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); int (*carp_attach_p)(struct ifaddr *, int); void (*carp_detach_p)(struct ifaddr *); #endif #ifdef INET int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); #endif #ifdef INET6 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6); caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr); #endif struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL; /* * XXX: Style; these should be sorted alphabetically, and unprototyped * static functions should be prototyped. Currently they are sorted by * declaration order. */ static void if_attachdomain(void *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, caddr_t); static void if_freemulti(struct ifmultiaddr *); static void if_grow(void); static int if_setflag(struct ifnet *, int, int, int *, int); static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int if_rtdel(struct radix_node *, void *); static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int); static void do_link_state_change(void *, int); static int if_getgroup(struct ifgroupreq *, struct ifnet *); static int if_getgroupmembers(struct ifgroupreq *); static void if_delgroups(struct ifnet *); static void if_attach_internal(struct ifnet *, int, struct if_clone *); static void if_detach_internal(struct ifnet *, int, struct if_clone **); static struct ifqueue * if_snd_alloc(int); static void if_snd_free(struct ifqueue *); static void if_snd_qflush(if_t); #ifdef INET6 /* * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ extern void nd6_setmtu(struct ifnet *); #endif VNET_DEFINE(int, if_index); VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ VNET_DEFINE(struct ifgrouphead, ifg_head); static VNET_DEFINE(int, if_indexlim) = 8; /* Table of ifnet by index. */ VNET_DEFINE(struct ifnet **, ifindex_table); #define V_if_indexlim VNET(if_indexlim) #define V_ifindex_table VNET(ifindex_table) static struct iftsomax default_tsomax = { /* * The TSO defaults need to be such that an NFS mbuf list of 35 * mbufs totalling just below 64K works and that a chain of mbufs * can be defragged into at most 32 segments. */ .tsomax_bytes = MIN(IP_MAXPACKET, (32 * MCLBYTES) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)), .tsomax_segcount = 35, .tsomax_segsize = 2048, }; /* * The global network interface list (V_ifnet) and related state (such as * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and * an rwlock. Either may be acquired shared to stablize the list, but both * must be acquired writable to modify the list. This model allows us to * both stablize the interface list during interrupt thread processing, but * also to stablize it over long-running ioctls, without introducing priority * inversions and deadlocks. */ struct rwlock ifnet_rwlock; RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE); struct sx ifnet_sxlock; SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); /* * The allocation of network interfaces is a rather non-atomic affair; we * need to select an index before we are ready to expose the interface for * use, so will use this pointer value to indicate reservation. */ #define IFNET_HOLD (void *)(uintptr_t)(-1) static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); static struct ifops ifdead_ops; struct ifnet * ifnet_byindex_locked(u_short idx) { if (idx > V_if_index) return (NULL); if (V_ifindex_table[idx] == IFNET_HOLD) return (NULL); return (V_ifindex_table[idx]); } struct ifnet * ifnet_byindex(u_short idx) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } struct ifnet * ifnet_byindex_ref(u_short idx) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); if (ifp == NULL || (ifp->if_flags & IFF_DYING)) { IFNET_RUNLOCK_NOSLEEP(); return (NULL); } if_ref(ifp); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } /* * Allocate an ifindex array entry. */ static void ifindex_alloc(struct ifnet *ifp) { u_short idx; IFNET_WLOCK(); retry: /* * Try to find an empty slot below V_if_index. If we fail, take the * next slot. */ for (idx = 1; idx <= V_if_index; idx++) { if (V_ifindex_table[idx] == NULL) break; } /* Catch if_index overflow. */ if (idx >= V_if_indexlim) { if_grow(); goto retry; } if (idx > V_if_index) V_if_index = idx; V_ifindex_table[idx] = ifp; ifp->if_index = idx; IFNET_WUNLOCK(); } static void ifindex_free(u_short idx) { IFNET_WLOCK_ASSERT(); V_ifindex_table[idx] = NULL; while (V_if_index > 0 && V_ifindex_table[V_if_index] == NULL) V_if_index--; } struct ifaddr * ifaddr_byindex(u_short idx) { struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); ifa = ifnet_byindex_locked(idx)->if_addr; if (ifa != NULL) ifa_ref(ifa); IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Network interface utility routines. * * Routines with ifa_ifwith* names take sockaddr *'s as * parameters. */ static void vnet_if_init(const void *unused __unused) { TAILQ_INIT(&V_ifnet); TAILQ_INIT(&V_ifg_head); IFNET_WLOCK(); if_grow(); /* create initial table */ IFNET_WUNLOCK(); vnet_if_clone_init(); } VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, NULL); #ifdef VIMAGE static void vnet_if_uninit(const void *unused __unused) { VNET_ASSERT(TAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p " "not empty", __func__, __LINE__, &V_ifnet)); VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p " "not empty", __func__, __LINE__, &V_ifg_head)); free((caddr_t)V_ifindex_table, M_IFNET); } VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_uninit, NULL); #endif static void if_grow(void) { int oldlim; u_int n; struct ifnet **e; IFNET_WLOCK_ASSERT(); oldlim = V_if_indexlim; IFNET_WUNLOCK(); n = (oldlim << 1) * sizeof(*e); e = malloc(n, M_IFNET, M_WAITOK | M_ZERO); IFNET_WLOCK(); if (V_if_indexlim != oldlim) { free(e, M_IFNET); return; } if (V_ifindex_table != NULL) { memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2); free((caddr_t)V_ifindex_table, M_IFNET); } V_if_indexlim <<= 1; V_ifindex_table = e; } /* * Registration/deregistration of interface types. A type can carry * common methods. Certain drivers depend on types to be loaded. */ static SLIST_HEAD(, iftype) iftypehead = SLIST_HEAD_INITIALIZER(iftypehead); void iftype_register(struct iftype *ift) { IFNET_WLOCK(); SLIST_INSERT_HEAD(&iftypehead, ift, ift_next); IFNET_WUNLOCK(); } void iftype_unregister(struct iftype *ift) { IFNET_WLOCK(); SLIST_REMOVE(&iftypehead, ift, iftype, ift_next); IFNET_WUNLOCK(); } static struct iftype * iftype_find(ifType type) { struct iftype *ift; IFNET_RLOCK(); SLIST_FOREACH(ift, &iftypehead, ift_next) if (ift->ift_type == type) break; IFNET_RUNLOCK(); return (ift); } #define ifdrv_flags __ifdrv_stack_owned #define IFDRV_BLESSED 0x00000001 static void ifdriver_bless(struct ifdriver *ifdrv, struct iftype *ift) { if (ift != NULL) { #define COPY(op) if (ifdrv->ifdrv_ops.op == NULL) \ ifdrv->ifdrv_ops.op = ift->ift_ops.op COPY(ifop_input); COPY(ifop_transmit); COPY(ifop_output); COPY(ifop_ioctl); COPY(ifop_get_counter); COPY(ifop_qflush); COPY(ifop_resolvemulti); COPY(ifop_reassign); #undef COPY #define COPY(f) if (ifdrv->ifdrv_ ## f == 0) \ ifdrv->ifdrv_ ## f = ift->ift_ ## f COPY(hdrlen); COPY(addrlen); COPY(dlt); COPY(dlt_hdrlen); #undef COPY } /* * If driver has ifdrv_maxqlen defined, then it opts-in * for * generic software queue, and thus for default * ifop_qflush. */ if (ifdrv->ifdrv_maxqlen > 0) { KASSERT(ifdrv->ifdrv_ops.ifop_qflush == NULL, ("%s: fdrv_maxqlen > 0 and ifop_qflush", ifdrv->ifdrv_name)); ifdrv->ifdrv_ops.ifop_qflush = if_snd_qflush; } if (ifdrv->ifdrv_ops.ifop_get_counter == NULL) ifdrv->ifdrv_ops.ifop_get_counter = if_get_counter_default; #if defined(INET) || defined(INET6) /* Use defaults for TSO, if nothing is set. */ if (ifdrv->ifdrv_tsomax == NULL) ifdrv->ifdrv_tsomax = &default_tsomax; else KASSERT(ifdrv->ifdrv_tsomax->tsomax_bytes == 0 || ifdrv->ifdrv_tsomax->tsomax_bytes >= (IP_MAXPACKET / 8), ("%s: tsomax_bytes is outside of range", ifdrv->ifdrv_name)); #endif + ifdrv->ifdrv_ops.ifop_origin = IFOP_ORIGIN_DRIVER; ifdrv->ifdrv_flags |= IFDRV_BLESSED; } /* * Allocate a struct ifnet and an index for an interface. A layer 2 * common structure will also be allocated if an allocation routine is * registered for the passed type. * * The only reason for this function to fail is failure to allocate a * unit number, which is possible only if driver does cloning. */ if_t if_attach(struct if_attach_args *ifat) { struct ifdriver *ifdrv; struct iftype *ift; struct ifnet *ifp; struct ifaddr *ifa; struct sockaddr_dl *sdl; int socksize, ifasize, namelen, masklen; KASSERT(ifat->ifat_version == IF_ATTACH_VERSION, ("%s: version %d, expected %d", __func__, ifat->ifat_version, IF_ATTACH_VERSION)); ifdrv = ifat->ifat_drv; ift = iftype_find(ifdrv->ifdrv_type); if ((ifdrv->ifdrv_flags & IFDRV_BLESSED) == 0) ifdriver_bless(ifdrv, ift); if (ifdrv->ifdrv_clone != NULL) { int error; error = ifc_alloc_unit(ifdrv->ifdrv_clone, &ifat->ifat_dunit); if (error) { log(LOG_WARNING, "%s unit allocation failure: %d\n", ifdrv->ifdrv_name, error); ifat->ifat_error = error; return (NULL); } } ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK | M_ZERO); for (int i = 0; i < IFCOUNTERS; i++) ifp->if_counters[i] = counter_u64_alloc(M_WAITOK); #ifdef MAC mac_ifnet_init(ifp); mac_ifnet_create(ifp); #endif ifp->if_ops = &ifdrv->ifdrv_ops; ifp->if_drv = ifdrv; ifp->if_type = ift; #define COPY(f) ifp->if_ ## f = ifat->ifat_ ## f COPY(softc); COPY(mtu); COPY(flags); COPY(capabilities); COPY(capenable); COPY(hwassist); COPY(baudrate); #undef COPY if (ifat->ifat_tsomax) { /* * Driver wants dynamic tsomax on this interface, we * will allocate one and are responsible for freeing * it on detach. */ KASSERT(ifat->ifat_tsomax->tsomax_bytes == 0 || ifat->ifat_tsomax->tsomax_bytes >= (IP_MAXPACKET / 8), ("%s: tsomax_bytes is outside of range", ifdrv->ifdrv_name)); ifp->if_tsomax = malloc(sizeof(struct iftsomax), M_IFNET, M_WAITOK); bcopy(ifat->ifat_tsomax, ifp->if_tsomax, sizeof(struct iftsomax)); } else ifp->if_tsomax = ifdrv->ifdrv_tsomax; if (ifdrv->ifdrv_maxqlen > 0) ifp->if_snd = if_snd_alloc(ifdrv->ifdrv_maxqlen); IF_ADDR_LOCK_INIT(ifp); IF_AFDATA_LOCK_INIT(ifp); TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp); TAILQ_INIT(&ifp->if_addrhead); TAILQ_INIT(&ifp->if_multiaddrs); TAILQ_INIT(&ifp->if_groups); /* XXXGL: there is no check that name is unique. */ ifp->if_dunit = ifat->ifat_dunit; if (ifat->ifat_name) strlcpy(ifp->if_xname, ifat->ifat_name, IFNAMSIZ); else if (ifat->ifat_dunit != IFAT_DUNIT_NONE) snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", ifdrv->ifdrv_name, ifat->ifat_dunit); else strlcpy(ifp->if_xname, ifdrv->ifdrv_name, IFNAMSIZ); ifindex_alloc(ifp); refcount_init(&ifp->if_refcount, 1); /* * Allocate ifaddr to store link level address and name for this * interface. Always save enough space for any possiable name so * we can do a rename in place later. */ namelen = strlen(ifp->if_xname); masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ; socksize = masklen + ifdrv->ifdrv_addrlen; if (socksize < sizeof(*sdl)) socksize = sizeof(*sdl); socksize = roundup2(socksize, sizeof(long)); ifasize = sizeof(*ifa) + 2 * socksize; ifa = ifa_alloc(ifasize, M_WAITOK); sdl = (struct sockaddr_dl *)(ifa + 1); sdl->sdl_len = socksize; sdl->sdl_family = AF_LINK; bcopy(ifp->if_xname, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifdrv->ifdrv_type; ifp->if_addr = ifa; ifa->ifa_ifp = ifp; ifa->ifa_rtrequest = link_rtrequest; ifa->ifa_addr = (struct sockaddr *)sdl; sdl = (struct sockaddr_dl *)(socksize + (char *)sdl); ifa->ifa_netmask = (struct sockaddr *)sdl; sdl->sdl_len = masklen; while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); if (ift) ift->ift_attach(ifp, ifat); bpfattach(ifp, ifdrv->ifdrv_dlt, ifdrv->ifdrv_dlt_hdrlen); if_attach_internal(ifp, 0, NULL); return (ifp); } /* * Do the actual work of freeing a struct ifnet, and layer 2 common * structure. This call is made when the last reference to an * interface is released. */ static void if_free_internal(struct ifnet *ifp) { KASSERT((ifp->if_flags & IFF_DYING), ("if_free_internal: interface not dying")); #ifdef MAC mac_ifnet_destroy(ifp); #endif /* MAC */ if (ifp->if_description != NULL) free(ifp->if_description, M_IFDESCR); IF_AFDATA_DESTROY(ifp); IF_ADDR_LOCK_DESTROY(ifp); if (ifp->if_snd) if_snd_free(ifp->if_snd); for (int i = 0; i < IFCOUNTERS; i++) counter_u64_free(ifp->if_counters[i]); if (ifp->if_tsomax != ifp->if_drv->ifdrv_tsomax) free(ifp->if_tsomax, M_IFNET); free(ifp, M_IFNET); } void if_mtap(if_t ifp, struct mbuf *m, void *data, u_int dlen) { if (!bpf_peers_present(ifp->if_bpf)) return; if (dlen == 0) { if (m->m_flags & M_VLANTAG) ether_vlan_mtap(ifp->if_bpf, m, NULL, 0); else bpf_mtap(ifp->if_bpf, m); } else bpf_mtap2(ifp->if_bpf, data, dlen, m); } /* * Interfaces to keep an ifnet type-stable despite the possibility of the * driver calling if_free(). If there are additional references, we defer * freeing the underlying data structure. */ void if_ref(struct ifnet *ifp) { /* We don't assert the ifnet list lock here, but arguably should. */ refcount_acquire(&ifp->if_refcount); } void if_rele(struct ifnet *ifp) { if (!refcount_release(&ifp->if_refcount)) return; if_free_internal(ifp); } /* * Compute the least common TSO limit. */ void if_tsomax_common(const struct iftsomax *from, struct iftsomax *to) { /* * 1) If there is no limit currently, take the limit from * the network adapter. * * 2) If the network adapter has a limit below the current * limit, apply it. */ if (to->tsomax_bytes == 0 || (from->tsomax_bytes != 0 && from->tsomax_bytes < to->tsomax_bytes)) { to->tsomax_bytes = from->tsomax_bytes; } if (to->tsomax_segcount == 0 || (from->tsomax_segcount != 0 && from->tsomax_segcount < to->tsomax_segcount)) { to->tsomax_segcount = from->tsomax_segcount; } if (to->tsomax_segsize == 0 || (from->tsomax_segsize != 0 && from->tsomax_segsize < to->tsomax_segsize)) { to->tsomax_segsize = from->tsomax_segsize; } } /* * Update TSO limit of a network adapter. * * Returns zero if no change. Else non-zero. */ int if_tsomax_update(if_t ifp, const struct iftsomax *new) { int retval = 0; KASSERT(ifp->if_tsomax != ifp->if_drv->ifdrv_tsomax, ("%s: interface %s (driver %s) has static if_tsomax", __func__, ifp->if_xname, ifp->if_drv->ifdrv_name)); if (ifp->if_tsomax->tsomax_bytes != new->tsomax_bytes) { ifp->if_tsomax->tsomax_bytes = new->tsomax_bytes; retval++; } if (ifp->if_tsomax->tsomax_segsize != new->tsomax_segsize) { ifp->if_tsomax->tsomax_segsize = new->tsomax_segsize; retval++; } if (ifp->if_tsomax->tsomax_segcount != new->tsomax_segcount) { ifp->if_tsomax->tsomax_segcount = new->tsomax_segcount; retval++; } KASSERT(ifp->if_tsomax->tsomax_bytes == 0 || ifp->if_tsomax->tsomax_bytes >= (IP_MAXPACKET / 8), ("%s: tsomax_bytes is outside of range", ifp->if_xname)); return (retval); } static void if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc) { if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index)) panic ("%s: BUG: if_attach called without if_alloc'd input()\n", ifp->if_xname); #ifdef VIMAGE ifp->if_vnet = curvnet; if (ifp->if_home_vnet == NULL) ifp->if_home_vnet = curvnet; #endif if_addgroup(ifp, IFG_ALL); /* Restore group membership for cloned interfaces. */ if (vmove && ifc != NULL) if_clone_addgroup(ifp, ifc); getmicrotime(&ifp->if_lastchange); ifp->if_epoch = time_uptime; #ifdef VIMAGE /* * Update the interface index in the link layer address * of the interface. */ for (ifa = ifp->if_addr; ifa != NULL; ifa = TAILQ_NEXT(ifa, ifa_link)) { if (ifa->ifa_addr->sa_family == AF_LINK) { sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_index = ifp->if_index; } } #endif IFNET_WLOCK(); TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); #ifdef VIMAGE curvnet->vnet_ifcnt++; #endif IFNET_WUNLOCK(); if (domain_init_status >= 2) if_attachdomain1(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); /* Announce the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); } static void if_attachdomain(void *dummy) { struct ifnet *ifp; TAILQ_FOREACH(ifp, &V_ifnet, if_link) if_attachdomain1(ifp); } SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND, if_attachdomain, NULL); static void if_attachdomain1(struct ifnet *ifp) { struct domain *dp; /* * Since dp->dom_ifattach calls malloc() with M_WAITOK, we * cannot lock ifp->if_afdata initialization, entirely. */ if (IF_AFDATA_TRYLOCK(ifp) == 0) return; if (ifp->if_afdata_initialized >= domain_init_status) { IF_AFDATA_UNLOCK(ifp); log(LOG_WARNING, "%s called more than once on %s\n", __func__, ifp->if_xname); return; } ifp->if_afdata_initialized = domain_init_status; IF_AFDATA_UNLOCK(ifp); /* address family dependent data region */ bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_ifattach) ifp->if_afdata[dp->dom_family] = (*dp->dom_ifattach)(ifp); } } /* * Remove any unicast or broadcast network addresses from an interface. */ void if_purgeaddrs(struct ifnet *ifp) { struct ifaddr *ifa, *next; TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { if (ifa->ifa_addr->sa_family == AF_LINK) continue; #ifdef INET /* XXX: Ugly!! ad hoc just for INET */ if (ifa->ifa_addr->sa_family == AF_INET) { struct ifaliasreq ifr; bzero(&ifr, sizeof(ifr)); ifr.ifra_addr = *ifa->ifa_addr; if (ifa->ifa_dstaddr) ifr.ifra_broadaddr = *ifa->ifa_dstaddr; if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, NULL) == 0) continue; } #endif /* INET */ #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) { in6_purgeaddr(ifa); /* ifp_addrhead is already updated */ continue; } #endif /* INET6 */ TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); ifa_free(ifa); } } /* * Remove any multicast network addresses from an interface when an ifnet * is going away. */ static void if_purgemaddrs(struct ifnet *ifp) { struct ifmultiaddr *ifma; struct ifmultiaddr *next; IF_ADDR_WLOCK(ifp); TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) if_delmulti_locked(ifp, ifma, 1); IF_ADDR_WUNLOCK(ifp); } /* * Detach an interface, removing it from the list of "active" interfaces. * If vmove flag is set on entry to if_detach_internal(), perform only a * limited subset of cleanup tasks, given that we are moving an ifnet from * one vnet to another, where it must be fully operational. * * XXXRW: There are some significant questions about event ordering, and * how to prevent things from starting to use the interface during detach. */ void if_detach(if_t ifp) { ifp->if_flags |= IFF_DYING; /* XXX: Locking */ bpfdetach(ifp); #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) if_poll_deregister(ifp); #endif CURVNET_SET_QUIET(ifp->if_vnet); if_detach_internal(ifp, 0, NULL); IFNET_WLOCK(); KASSERT(ifp == ifnet_byindex_locked(ifp->if_index), ("%s: freeing unallocated ifnet", ifp->if_xname)); ifindex_free(ifp->if_index); IFNET_WUNLOCK(); if (ifp->if_drv->ifdrv_clone != NULL) ifc_free_unit(ifp->if_drv->ifdrv_clone, ifp->if_dunit); if (refcount_release(&ifp->if_refcount)) if_free_internal(ifp); CURVNET_RESTORE(); } static void if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) { struct ifaddr *ifa; struct radix_node_head *rnh; int i, j; struct domain *dp; struct ifnet *iter; int found = 0; IFNET_WLOCK(); TAILQ_FOREACH(iter, &V_ifnet, if_link) if (iter == ifp) { TAILQ_REMOVE(&V_ifnet, ifp, if_link); found = 1; break; } #ifdef VIMAGE if (found) curvnet->vnet_ifcnt--; #endif IFNET_WUNLOCK(); if (!found) { if (vmove) panic("%s: ifp=%p not on the ifnet tailq %p", __func__, ifp, &V_ifnet); else return; /* XXX this should panic as well? */ } /* Check if this is a cloned interface or not. */ if (vmove && ifcp != NULL) *ifcp = if_clone_findifc(ifp); /* * Remove/wait for pending events. */ taskqueue_drain(taskqueue_swi, &ifp->if_linktask); /* * Remove routes and flush queues. */ if_down(ifp); #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_disable(&ifp->if_snd); if (ALTQ_IS_ATTACHED(&ifp->if_snd)) altq_detach(&ifp->if_snd); #endif if_purgeaddrs(ifp); #ifdef INET in_ifdetach(ifp); #endif #ifdef INET6 /* * Remove all IPv6 kernel structs related to ifp. This should be done * before removing routing entries below, since IPv6 interface direct * routes are expected to be removed by the IPv6-specific kernel API. * Otherwise, the kernel will detect some inconsistency and bark it. */ in6_ifdetach(ifp); #endif if_purgemaddrs(ifp); /* Announce that the interface is gone. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); if (!vmove) { struct iftype *ift = ifp->if_type; if (ift != NULL && ift->ift_detach != NULL) ift->ift_detach(ifp); /* * Prevent further calls into the device driver via ifnet. */ ifp->if_ops = &ifdead_ops; /* * Remove link ifaddr pointer and maybe decrement if_index. * Clean up all addresses. */ ifp->if_addr = NULL; /* We can now free link ifaddr. */ if (!TAILQ_EMPTY(&ifp->if_addrhead)) { ifa = TAILQ_FIRST(&ifp->if_addrhead); TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); ifa_free(ifa); } } /* * Delete all remaining routes using this interface * Unfortuneatly the only way to do this is to slog through * the entire routing table looking for routes which point * to this interface...oh well... */ for (i = 1; i <= AF_MAX; i++) { for (j = 0; j < rt_numfibs; j++) { rnh = rt_tables_get_rnh(j, i); if (rnh == NULL) continue; RADIX_NODE_HEAD_LOCK(rnh); (void) rnh->rnh_walktree(rnh, if_rtdel, ifp); RADIX_NODE_HEAD_UNLOCK(rnh); } } if_delgroups(ifp); /* * We cannot hold the lock over dom_ifdetach calls as they might * sleep, for example trying to drain a callout, thus open up the * theoretical race with re-attaching. */ IF_AFDATA_LOCK(ifp); i = ifp->if_afdata_initialized; ifp->if_afdata_initialized = 0; IF_AFDATA_UNLOCK(ifp); for (dp = domains; i > 0 && dp; dp = dp->dom_next) { if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) (*dp->dom_ifdetach)(ifp, ifp->if_afdata[dp->dom_family]); } } #ifdef VIMAGE /* * if_vmove() performs a limited version of if_detach() in current * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg. * An attempt is made to shrink if_index in current vnet, find an * unused if_index in target vnet and calls if_grow() if necessary, * and finally find an unused if_xname for the target vnet. */ void if_vmove(struct ifnet *ifp, struct vnet *new_vnet) { struct if_clone *ifc; /* * Detach from current vnet, but preserve LLADDR info, do not * mark as dead etc. so that the ifnet can be reattached later. */ if_detach_internal(ifp, 1, &ifc); /* * Unlink the ifnet from ifindex_table[] in current vnet, and shrink * the if_index for that vnet if possible. * * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized, * or we'd lock on one vnet and unlock on another. */ IFNET_WLOCK(); ifindex_free(ifp->if_index); IFNET_WUNLOCK(); /* * Perform interface-specific reassignment tasks, if provided by * the driver. */ if (ifp->if_reassign != NULL) ifp->if_reassign(ifp, new_vnet, NULL); /* * Switch to the context of the target vnet. */ CURVNET_SET_QUIET(new_vnet); IFNET_WLOCK(); ifp->if_index = ifindex_alloc(); ifnet_setbyindex_locked(ifp->if_index, ifp); IFNET_WUNLOCK(); if_attach_internal(ifp, 1, ifc); CURVNET_RESTORE(); } /* * Move an ifnet to or from another child prison/vnet, specified by the jail id. */ static int if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) { struct prison *pr; struct ifnet *difp; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Do not try to move the iface from and to the same prison. */ if (pr->pr_vnet == ifp->if_vnet) { prison_free(pr); return (EEXIST); } /* Make sure the named iface does not exists in the dst. prison/vnet. */ /* XXX Lock interfaces to avoid races. */ CURVNET_SET_QUIET(pr->pr_vnet); difp = ifunit(ifname); CURVNET_RESTORE(); if (difp != NULL) { prison_free(pr); return (EEXIST); } /* Move the interface into the child jail/vnet. */ if_vmove(ifp, pr->pr_vnet); /* Report the new if_xname back to the userland. */ sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (0); } static int if_vmove_reclaim(struct thread *td, char *ifname, int jid) { struct prison *pr; struct vnet *vnet_dst; struct ifnet *ifp; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Make sure the named iface exists in the source prison/vnet. */ CURVNET_SET(pr->pr_vnet); ifp = ifunit(ifname); /* XXX Lock to avoid races. */ if (ifp == NULL) { CURVNET_RESTORE(); prison_free(pr); return (ENXIO); } /* Do not try to move the iface from and to the same prison. */ vnet_dst = TD_TO_VNET(td); if (vnet_dst == ifp->if_vnet) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Get interface back from child jail/vnet. */ if_vmove(ifp, vnet_dst); CURVNET_RESTORE(); /* Report the new if_xname back to the userland. */ sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (0); } #endif /* VIMAGE */ /* * Add a group to an interface */ int if_addgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_group *ifg = NULL; struct ifg_member *ifgm; int new = 0; if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && groupname[strlen(groupname) - 1] <= '9') return (EINVAL); IFNET_WLOCK(); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) { IFNET_WUNLOCK(); return (EEXIST); } if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP, M_NOWAIT)) == NULL) { IFNET_WUNLOCK(); return (ENOMEM); } if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, groupname)) break; if (ifg == NULL) { if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); free(ifgm, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); ifg->ifg_refcnt = 0; TAILQ_INIT(&ifg->ifg_members); TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next); new = 1; } ifg->ifg_refcnt++; ifgl->ifgl_group = ifg; ifgm->ifgm_ifp = ifp; IF_ADDR_WLOCK(ifp); TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); IFNET_WUNLOCK(); if (new) EVENTHANDLER_INVOKE(group_attach_event, ifg); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Remove a group from an interface */ int if_delgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_member *ifgm; IFNET_WLOCK(); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) break; if (ifgl == NULL) { IFNET_WUNLOCK(); return (ENOENT); } IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) if (ifgm->ifgm_ifp == ifp) break; if (ifgm != NULL) { TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); free(ifgm, M_TEMP); } if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next); IFNET_WUNLOCK(); EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } else IFNET_WUNLOCK(); free(ifgl, M_TEMP); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Remove an interface from all groups */ static void if_delgroups(struct ifnet *ifp) { struct ifg_list *ifgl; struct ifg_member *ifgm; char groupname[IFNAMSIZ]; IFNET_WLOCK(); while (!TAILQ_EMPTY(&ifp->if_groups)) { ifgl = TAILQ_FIRST(&ifp->if_groups); strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ); IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) if (ifgm->ifgm_ifp == ifp) break; if (ifgm != NULL) { TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); free(ifgm, M_TEMP); } if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next); IFNET_WUNLOCK(); EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } else IFNET_WUNLOCK(); free(ifgl, M_TEMP); EVENTHANDLER_INVOKE(group_change_event, groupname); IFNET_WLOCK(); } IFNET_WUNLOCK(); } /* * Stores all groups from an interface in memory pointed * to by data */ static int if_getgroup(struct ifgroupreq *data, struct ifnet *ifp) { int len, error; struct ifg_list *ifgl; struct ifg_req ifgrq, *ifgp; struct ifgroupreq *ifgr = data; if (ifgr->ifgr_len == 0) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) ifgr->ifgr_len += sizeof(struct ifg_req); IF_ADDR_RUNLOCK(ifp); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; /* XXX: wire */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { if (len < sizeof(ifgrq)) { IF_ADDR_RUNLOCK(ifp); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, sizeof(ifgrq.ifgrq_group)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IF_ADDR_RUNLOCK(ifp); return (error); } len -= sizeof(ifgrq); ifgp++; } IF_ADDR_RUNLOCK(ifp); return (0); } /* * Stores all members of a group in memory pointed to by data */ static int if_getgroupmembers(struct ifgroupreq *data) { struct ifgroupreq *ifgr = data; struct ifg_group *ifg; struct ifg_member *ifgm; struct ifg_req ifgrq, *ifgp; int len, error; IFNET_RLOCK(); TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) break; if (ifg == NULL) { IFNET_RUNLOCK(); return (ENOENT); } if (ifgr->ifgr_len == 0) { TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) ifgr->ifgr_len += sizeof(ifgrq); IFNET_RUNLOCK(); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { if (len < sizeof(ifgrq)) { IFNET_RUNLOCK(); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, sizeof(ifgrq.ifgrq_member)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IFNET_RUNLOCK(); return (error); } len -= sizeof(ifgrq); ifgp++; } IFNET_RUNLOCK(); return (0); } /* * Delete Routes for a Network Interface * * Called for each routing entry via the rnh->rnh_walktree() call above * to delete all route entries referencing a detaching network interface. * * Arguments: * rn pointer to node in the routing table * arg argument passed to rnh->rnh_walktree() - detaching interface * * Returns: * 0 successful * errno failed - reason indicated * */ static int if_rtdel(struct radix_node *rn, void *arg) { struct rtentry *rt = (struct rtentry *)rn; struct ifnet *ifp = arg; int err; if (rt->rt_ifp == ifp) { /* * Protect (sorta) against walktree recursion problems * with cloned routes */ if ((rt->rt_flags & RTF_UP) == 0) return (0); err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags|RTF_RNH_LOCKED|RTF_PINNED, (struct rtentry **) NULL, rt->rt_fibnum); if (err) { log(LOG_WARNING, "if_rtdel: error %d\n", err); } } return (0); } /* * Returning different software contexts associated with ifnet. */ void * if_getsoftc(struct ifnet *ifp, ift_feature f) { switch (f) { case IF_DRIVER_SOFTC: return (ifp->if_softc); case IF_LLADDR: return (LLADDR((struct sockaddr_dl *)(ifp->if_addr->ifa_addr))); case IF_BPF: return (ifp->if_bpf); case IF_NAME: return (ifp->if_xname); case IF_VLAN: return (ifp->if_vlantrunk); default: panic("%s: unknown feature %d", __func__, f); }; } /* * Return counter values from counter(9)s stored in ifnet. */ uint64_t if_get_counter_default(struct ifnet *ifp, ift_counter cnt) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); return (counter_u64_fetch(ifp->if_counters[cnt])); } /* * Increase an ifnet counter. Usually used for counters shared * between the stack and a driver, but function supports them all. */ void if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); counter_u64_add(ifp->if_counters[cnt], inc); } /* * Account successful transmission of an mbuf. */ void if_inc_txcounters(struct ifnet *ifp, struct mbuf *m) { counter_u64_add(ifp->if_counters[IFCOUNTER_OBYTES], m->m_pkthdr.len); counter_u64_add(ifp->if_counters[IFCOUNTER_OPACKETS], 1); if (m->m_flags & M_MCAST) counter_u64_add(ifp->if_counters[IFCOUNTER_OMCASTS], 1); } /* * Set the baudrate. */ void if_setbaudrate(struct ifnet *ifp, uint64_t baudrate) { ifp->if_baudrate = baudrate; } /* * Copy data from ifnet to userland API structure if_data. */ void if_data_copy(struct ifnet *ifp, struct if_data *ifd) { ifd->ifi_type = if_type(ifp); ifd->ifi_physical = 0; ifd->ifi_addrlen = if_addrlen(ifp); ifd->ifi_hdrlen = ifp->if_drv->ifdrv_hdrlen; ifd->ifi_link_state = ifp->if_link_state; ifd->ifi_vhid = 0; ifd->ifi_datalen = sizeof(struct if_data); ifd->ifi_mtu = ifp->if_mtu; ifd->ifi_metric = ifp->if_metric; ifd->ifi_baudrate = ifp->if_baudrate; ifd->ifi_hwassist = ifp->if_hwassist; ifd->ifi_epoch = ifp->if_epoch; ifd->ifi_lastchange = ifp->if_lastchange; ifd->ifi_ipackets = if_get_counter(ifp, IFCOUNTER_IPACKETS); ifd->ifi_ierrors = if_get_counter(ifp, IFCOUNTER_IERRORS); ifd->ifi_opackets = if_get_counter(ifp, IFCOUNTER_OPACKETS); ifd->ifi_oerrors = if_get_counter(ifp, IFCOUNTER_OERRORS); ifd->ifi_collisions = if_get_counter(ifp, IFCOUNTER_COLLISIONS); ifd->ifi_ibytes = if_get_counter(ifp, IFCOUNTER_IBYTES); ifd->ifi_obytes = if_get_counter(ifp, IFCOUNTER_OBYTES); ifd->ifi_imcasts = if_get_counter(ifp, IFCOUNTER_IMCASTS); ifd->ifi_omcasts = if_get_counter(ifp, IFCOUNTER_OMCASTS); ifd->ifi_iqdrops = if_get_counter(ifp, IFCOUNTER_IQDROPS); ifd->ifi_oqdrops = if_get_counter(ifp, IFCOUNTER_OQDROPS); ifd->ifi_noproto = if_get_counter(ifp, IFCOUNTER_NOPROTO); } /* * Initialization, destruction and refcounting functions for ifaddrs. */ struct ifaddr * ifa_alloc(size_t size, int flags) { struct ifaddr *ifa; KASSERT(size >= sizeof(struct ifaddr), ("%s: invalid size %zu", __func__, size)); ifa = malloc(size, M_IFADDR, M_ZERO | flags); if (ifa == NULL) return (NULL); if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL) goto fail; refcount_init(&ifa->ifa_refcnt, 1); return (ifa); fail: /* free(NULL) is okay */ counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); return (NULL); } void ifa_ref(struct ifaddr *ifa) { refcount_acquire(&ifa->ifa_refcnt); } void ifa_free(struct ifaddr *ifa) { if (refcount_release(&ifa->ifa_refcnt)) { counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); } } int ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { int error = 0; struct rtentry *rt = NULL; struct rt_addrinfo info; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; bzero(&info, sizeof(info)); info.rti_ifp = V_loif; info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC; info.rti_info[RTAX_DST] = ia; info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; error = rtrequest1_fib(RTM_ADD, &info, &rt, ifa->ifa_ifp->if_fib); if (error == 0 && rt != NULL) { RT_LOCK(rt); ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = if_type(ifa->ifa_ifp); ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = ifa->ifa_ifp->if_index; RT_REMREF(rt); RT_UNLOCK(rt); } else if (error != 0) log(LOG_DEBUG, "%s: insertion failed: %u\n", __func__, error); return (error); } int ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { int error = 0; struct rt_addrinfo info; struct sockaddr_dl null_sdl; bzero(&null_sdl, sizeof(null_sdl)); null_sdl.sdl_len = sizeof(null_sdl); null_sdl.sdl_family = AF_LINK; null_sdl.sdl_type = if_type(ifa->ifa_ifp); null_sdl.sdl_index = ifa->ifa_ifp->if_index; bzero(&info, sizeof(info)); info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC; info.rti_info[RTAX_DST] = ia; info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; error = rtrequest1_fib(RTM_DELETE, &info, NULL, ifa->ifa_ifp->if_fib); if (error != 0) log(LOG_DEBUG, "%s: deletion failed: %u\n", __func__, error); return (error); } int ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *sa, int fib) { struct rtentry *rt; rt = rtalloc1_fib(sa, 0, 0, fib); if (rt == NULL) { log(LOG_DEBUG, "%s: fail", __func__); return (EHOSTUNREACH); } ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = if_type(ifa->ifa_ifp); ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = ifa->ifa_ifp->if_index; RTFREE_LOCKED(rt); return (0); } /* * XXX: Because sockaddr_dl has deeper structure than the sockaddr * structs used to represent other address families, it is necessary * to perform a different comparison. */ #define sa_dl_equal(a1, a2) \ ((((struct sockaddr_dl *)(a1))->sdl_len == \ ((struct sockaddr_dl *)(a2))->sdl_len) && \ (bcmp(LLADDR((struct sockaddr_dl *)(a1)), \ LLADDR((struct sockaddr_dl *)(a2)), \ ((struct sockaddr_dl *)(a1))->sdl_alen) == 0)) /* * Locate an interface based on a complete address. */ /*ARGSUSED*/ static struct ifaddr * ifa_ifwithaddr_internal(struct sockaddr *addr, int getref) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (sa_equal(addr, ifa->ifa_addr)) { if (getref) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } /* IP6 doesn't have broadcast */ if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { if (getref) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } IF_ADDR_RUNLOCK(ifp); } ifa = NULL; done: IFNET_RUNLOCK_NOSLEEP(); return (ifa); } struct ifaddr * ifa_ifwithaddr(struct sockaddr *addr) { return (ifa_ifwithaddr_internal(addr, 1)); } int ifa_ifwithaddr_check(struct sockaddr *addr) { return (ifa_ifwithaddr_internal(addr, 0) != NULL); } /* * Locate an interface based on the broadcast address. */ /* ARGSUSED */ struct ifaddr * ifa_ifwithbroadaddr(struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } IF_ADDR_RUNLOCK(ifp); } ifa = NULL; done: IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Locate the point to point interface with a given destination address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithdstaddr(struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } IF_ADDR_RUNLOCK(ifp); } ifa = NULL; done: IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Find an interface on a specific network. If many, choice * is most specific found. */ struct ifaddr * ifa_ifwithnet(struct sockaddr *addr, int ignore_ptp, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; char *addr_data = addr->sa_data, *cplim; /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. */ if (af == AF_LINK) { struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; if (sdl->sdl_index && sdl->sdl_index <= V_if_index) return (ifaddr_byindex(sdl->sdl_index)); } /* * Scan though each interface, looking for ones that have addresses * in this address family and the requested fib. Maintain a reference * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that * kept it stable when we move onto the next interface. */ IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { char *cp, *cp2, *cp3; if (ifa->ifa_addr->sa_family != af) next: continue; if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) { /* * This is a bit broken as it doesn't * take into account that the remote end may * be a single node in the network we are * looking for. * The trouble is that we don't know the * netmask for the remote end. */ if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } else { /* * Scan all the bits in the ifa's address. * If a bit dissagrees with what we are * looking for, mask it with the netmask * to see if it really matters. * (A byte at a time) */ if (ifa->ifa_netmask == 0) continue; cp = addr_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; while (cp3 < cplim) if ((*cp++ ^ *cp2++) & *cp3++) goto next; /* next address! */ /* * If the netmask of what we just found * is more specific than what we had before * (if we had one), or if the virtual status * of new prefix is better than of the old one, * then remember the new one before continuing * to search for an even better one. */ if (ifa_maybe == NULL || ifa_preferred(ifa_maybe, ifa) || rn_refines((caddr_t)ifa->ifa_netmask, (caddr_t)ifa_maybe->ifa_netmask)) { if (ifa_maybe != NULL) ifa_free(ifa_maybe); ifa_maybe = ifa; ifa_ref(ifa_maybe); } } } IF_ADDR_RUNLOCK(ifp); } ifa = ifa_maybe; ifa_maybe = NULL; done: IFNET_RUNLOCK_NOSLEEP(); if (ifa_maybe != NULL) ifa_free(ifa_maybe); return (ifa); } /* * Find an interface address specific to an interface best matching * a given address. */ struct ifaddr * ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) { struct ifaddr *ifa; char *cp, *cp2, *cp3; char *cplim; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; if (af >= AF_MAX) return (NULL); IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != af) continue; if (ifa_maybe == NULL) ifa_maybe = ifa; if (ifa->ifa_netmask == 0) { if (sa_equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr))) goto done; continue; } if (ifp->if_flags & IFF_POINTOPOINT) { if (sa_equal(addr, ifa->ifa_dstaddr)) goto done; } else { cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; for (; cp3 < cplim; cp3++) if ((*cp++ ^ *cp2++) & *cp3) break; if (cp3 == cplim) goto done; } } ifa = ifa_maybe; done: if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); return (ifa); } /* * See whether new ifa is better than current one: * 1) A non-virtual one is preferred over virtual. * 2) A virtual in master state preferred over any other state. * * Used in several address selecting functions. */ int ifa_preferred(struct ifaddr *cur, struct ifaddr *next) { return (cur->ifa_carp && (!next->ifa_carp || ((*carp_master_p)(next) && !(*carp_master_p)(cur)))); } #include /* * Default action when installing a route with a Link Level gateway. * Lookup an appropriate real ifa to point to. * This should be moved to /sys/net/link.c eventually. */ static void link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) { struct ifaddr *ifa, *oifa; struct sockaddr *dst; struct ifnet *ifp; if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) || ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0)) return; ifa = ifaof_ifpforaddr(dst, ifp); if (ifa) { oifa = rt->rt_ifa; rt->rt_ifa = ifa; ifa_free(oifa); if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) ifa->ifa_rtrequest(cmd, rt, info); } } struct sockaddr_dl * link_alloc_sdl(size_t size, int flags) { return (malloc(size, M_TEMP, flags)); } void link_free_sdl(struct sockaddr *sa) { free(sa, M_TEMP); } /* * Fills in given sdl with interface basic info. * Returns pointer to filled sdl. */ struct sockaddr_dl * link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)paddr; memset(sdl, 0, sizeof(struct sockaddr_dl)); sdl->sdl_len = sizeof(struct sockaddr_dl); sdl->sdl_family = AF_LINK; sdl->sdl_index = ifp->if_index; sdl->sdl_type = iftype; return (sdl); } void (*vlan_link_state_p)(struct ifnet *); /* XXX: private from if_vlan */ void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); int (*vlan_tag_p)(struct ifnet *, uint16_t *); int (*vlan_setcookie_p)(struct ifnet *, void *); void *(*vlan_cookie_p)(struct ifnet *); /* * Handle a change in the interface link state. To avoid LORs * between driver lock and upper layer locks, as well as possible * recursions, we post event to taskqueue, and all job * is done in static do_link_state_change(). */ void if_link_state_change(struct ifnet *ifp, int link_state) { /* Return if state hasn't changed. */ if (ifp->if_link_state == link_state) return; ifp->if_link_state = link_state; taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask); } static void do_link_state_change(void *arg, int pending) { struct ifnet *ifp = (struct ifnet *)arg; int link_state = ifp->if_link_state; CURVNET_SET(ifp->if_vnet); /* Notify that the link state has changed. */ rt_ifmsg(ifp); if (ifp->if_vlantrunk != NULL) (*vlan_link_state_p)(ifp); /* XXXGL: make ng_ether softc pointer */ if ((if_type(ifp) == IFT_ETHER || if_type(ifp) == IFT_L2VLAN) && ifp->if_l2com != NULL) (*ng_ether_link_state_p)(ifp, link_state); if (ifp->if_carp) (*carp_linkstate_p)(ifp); if (ifp->if_bridge) (*bridge_linkstate_p)(ifp); if (ifp->if_lagg) (*lagg_linkstate_p)(ifp, link_state); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); if (pending > 1) if_printf(ifp, "%d link states coalesced\n", pending); if (log_link_state_change) log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname, (link_state == LINK_STATE_UP) ? "UP" : "DOWN" ); EVENTHANDLER_INVOKE(ifnet_link_event, ifp, ifp->if_link_state); CURVNET_RESTORE(); } /* * Mark an interface down and notify protocols of * the transition. */ void if_down(struct ifnet *ifp) { struct ifaddr *ifa; ifp->if_flags &= ~IFF_UP; getmicrotime(&ifp->if_lastchange); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) pfctlinput(PRC_IFDOWN, ifa->ifa_addr); if_qflush(ifp); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp); } /* * Mark an interface up and notify protocols of * the transition. */ void if_up(struct ifnet *ifp) { struct ifaddr *ifa; ifp->if_flags |= IFF_UP; getmicrotime(&ifp->if_lastchange); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) pfctlinput(PRC_IFUP, ifa->ifa_addr); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp); #ifdef INET6 in6_if_up(ifp); #endif } /* * Map interface name to interface structure pointer, with or without * returning a reference. */ struct ifnet * ifunit_ref(const char *name) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 && !(ifp->if_flags & IFF_DYING)) break; } if (ifp != NULL) if_ref(ifp); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } struct ifnet * ifunit(const char *name) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0) break; } IFNET_RUNLOCK_NOSLEEP(); return (ifp); } /* * Hardware specific interface ioctls. */ int if_drvioctl(struct ifnet *ifp, u_long cmd, void *data, struct thread *td) { struct ifreq *ifr; size_t namelen, onamelen; size_t descrlen; char *descrbuf, *odescrbuf; char new_name[IFNAMSIZ]; struct ifaddr *ifa; struct sockaddr_dl *sdl; uint32_t flags, oflags; int error = 0; ifr = (struct ifreq *)data; switch (cmd) { case SIOCGIFINDEX: ifr->ifr_index = ifp->if_index; break; case SIOCGIFFLAGS: ifr->ifr_flags = ifp->if_flags & 0xffff; ifr->ifr_flagshigh = ifp->if_flags >> 16; /* * Some software may care about IFF_RUNNING, so make * it happy. */ if (ifp->if_flags & IFF_UP) ifr->ifr_flags |= IFF_RUNNING; break; case SIOCGIFCAP: ifr->ifr_reqcap = ifp->if_capabilities; ifr->ifr_curcap = ifp->if_capenable; break; #ifdef MAC case SIOCGIFMAC: error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp); break; #endif case SIOCGIFMETRIC: ifr->ifr_metric = ifp->if_metric; break; case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; case SIOCGIFPHYS: /* XXXGL: did this ever worked? */ ifr->ifr_phys = 0; break; case SIOCGIFDESCR: error = 0; sx_slock(&ifdescr_sx); if (ifp->if_description == NULL) error = ENOMSG; else { /* space for terminating nul */ descrlen = strlen(ifp->if_description) + 1; if (ifr->ifr_buffer.length < descrlen) ifr->ifr_buffer.buffer = NULL; else error = copyout(ifp->if_description, ifr->ifr_buffer.buffer, descrlen); ifr->ifr_buffer.length = descrlen; } sx_sunlock(&ifdescr_sx); break; case SIOCSIFDESCR: error = priv_check(td, PRIV_NET_SETIFDESCR); if (error) return (error); /* * Copy only (length-1) bytes to make sure that * if_description is always nul terminated. The * length parameter is supposed to count the * terminating nul in. */ if (ifr->ifr_buffer.length > ifdescr_maxlen) return (ENAMETOOLONG); else if (ifr->ifr_buffer.length == 0) descrbuf = NULL; else { descrbuf = malloc(ifr->ifr_buffer.length, M_IFDESCR, M_WAITOK | M_ZERO); error = copyin(ifr->ifr_buffer.buffer, descrbuf, ifr->ifr_buffer.length - 1); if (error) { free(descrbuf, M_IFDESCR); break; } } sx_xlock(&ifdescr_sx); odescrbuf = ifp->if_description; ifp->if_description = descrbuf; sx_xunlock(&ifdescr_sx); getmicrotime(&ifp->if_lastchange); free(odescrbuf, M_IFDESCR); break; case SIOCGIFFIB: ifr->ifr_fib = ifp->if_fib; break; case SIOCSIFFIB: error = priv_check(td, PRIV_NET_SETIFFIB); if (error) return (error); if (ifr->ifr_fib >= rt_numfibs) return (EINVAL); ifp->if_fib = ifr->ifr_fib; (void )if_ioctl(ifp, cmd, data, td); break; case SIOCSIFFLAGS: error = priv_check(td, PRIV_NET_SETIFFLAGS); if (error) return (error); /* * Historically if_flags were 16-bit, and thus * they come from userland in two parts, that * we need to swap. Clear IFF_RUNNING that is * no longer used in kernel. */ ifr->ifr_flags &= ~IFF_RUNNING; flags = (ifr->ifr_flags & 0xffff) | (ifr->ifr_flagshigh << 16); if ((flags & IFF_CANTCHANGE) != (ifp->if_flags & IFF_CANTCHANGE)) return (EINVAL); /* * Pass new flags down to driver and see if it accepts them. */ error = if_ioctl(ifp, cmd, data, td); if (error) return (error); flags = (ifr->ifr_flags & 0xffff) | (ifr->ifr_flagshigh << 16); oflags = ifp->if_flags; ifp->if_flags = flags; getmicrotime(&ifp->if_lastchange); /* * Manage IFF_UP flip. */ if (oflags & IFF_UP && (flags & IFF_UP) == 0) if_down(ifp); else if (flags & IFF_UP && (oflags & IFF_UP) == 0) if_up(ifp); /* See if permanently promiscuous mode bit is about to flip. */ if ((oflags ^ flags) & IFF_PPROMISC) { if (flags & IFF_PPROMISC) ifp->if_flags |= IFF_PROMISC; else if (ifp->if_pcount == 0) ifp->if_flags &= ~IFF_PROMISC; log(LOG_INFO, "%s: permanently promiscuous mode %s\n", ifp->if_xname, (flags & IFF_PPROMISC) ? "enabled" : "disabled"); } break; case SIOCSIFCAP: error = priv_check(td, PRIV_NET_SETIFCAP); if (error) return (error); if ((ifr->ifr_reqcap & IFCAP_VLAN_HWTSO) != 0) ifr->ifr_reqcap |= IFCAP_VLAN_HWTAGGING; if (ifr->ifr_reqcap & ~ifp->if_capabilities) return (EINVAL); if (ifr->ifr_reqcap == ifp->if_capenable) return (0); ifr->ifr_curcap = ifp->if_capenable; error = if_ioctl(ifp, cmd, data, td); if (error != 0) break; #ifdef DEVICE_POLLING if ((ifr->ifr_reqcap ^ ifr->ifr_curcap) & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) if_poll_register(ifp); else if_poll_deregister(ifp); } #endif ifp->if_capenable = ifr->ifr_reqcap; ifp->if_hwassist = ifr->ifr_hwassist; getmicrotime(&ifp->if_lastchange); if (ifp->if_vlantrunk != NULL) (*vlan_trunk_cap_p)(ifp); break; #ifdef MAC case SIOCSIFMAC: error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp); break; #endif case SIOCSIFNAME: error = priv_check(td, PRIV_NET_SETIFNAME); if (error) return (error); error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); if (error != 0) return (error); if (new_name[0] == '\0') return (EINVAL); if (ifunit(new_name) != NULL) return (EEXIST); /* * XXX: Locking. Nothing else seems to lock if_flags, * and there are numerous other races with the * ifunit() checks not being atomic with namespace * changes (renames, vmoves, if_attach, etc). */ ifp->if_flags |= IFF_RENAMING; /* Announce the departure of the interface. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); log(LOG_INFO, "%s: changing name to '%s'\n", ifp->if_xname, new_name); IF_ADDR_WLOCK(ifp); strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; namelen = strlen(new_name); onamelen = sdl->sdl_nlen; /* * Move the address if needed. This is safe because we * allocate space for a name of length IFNAMSIZ when we * create this in if_attach(). */ if (namelen != onamelen) { bcopy(sdl->sdl_data + onamelen, sdl->sdl_data + namelen, sdl->sdl_alen); } bcopy(new_name, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl = (struct sockaddr_dl *)ifa->ifa_netmask; bzero(sdl->sdl_data, onamelen); while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; IF_ADDR_WUNLOCK(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); /* Announce the return of the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); ifp->if_flags &= ~IFF_RENAMING; break; #ifdef VIMAGE case SIOCSIFVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error) return (error); error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid); break; #endif case SIOCSIFMETRIC: error = priv_check(td, PRIV_NET_SETIFMETRIC); if (error) return (error); ifp->if_metric = ifr->ifr_metric; getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYS: error = priv_check(td, PRIV_NET_SETIFPHYS); if (error) return (error); error = if_ioctl(ifp, cmd, data, td); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFMTU: error = priv_check(td, PRIV_NET_SETIFMTU); if (error) return (error); if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) return (EINVAL); if (ifr->ifr_mtu == ifp->if_mtu) return (0); error = if_ioctl(ifp, cmd, data, td); if (error == 0) { ifp->if_mtu = ifr->ifr_mtu; getmicrotime(&ifp->if_lastchange); rt_ifmsg(ifp); #ifdef INET6 nd6_setmtu(ifp); #endif rt_updatemtu(ifp); } break; case SIOCADDMULTI: case SIOCDELMULTI: if (cmd == SIOCADDMULTI) error = priv_check(td, PRIV_NET_ADDMULTI); else error = priv_check(td, PRIV_NET_DELMULTI); if (error) return (error); /* Don't allow group membership on non-multicast interfaces. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) return (EOPNOTSUPP); /* Don't let users screw up protocols' entries. */ if (ifr->ifr_addr.sa_family != AF_LINK) return (EINVAL); if (cmd == SIOCADDMULTI) { struct ifmultiaddr *ifma; /* * Userland is only permitted to join groups once * via the if_addmulti() KPI, because it cannot hold * struct ifmultiaddr * between calls. It may also * lose a race while we check if the membership * already exists. */ IF_ADDR_RLOCK(ifp); ifma = if_findmulti(ifp, &ifr->ifr_addr); IF_ADDR_RUNLOCK(ifp); if (ifma != NULL) error = EADDRINUSE; else error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); } else { error = if_delmulti(ifp, &ifr->ifr_addr); } if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif case SIOCSIFMEDIA: case SIOCSIFGENERIC: error = priv_check(td, PRIV_NET_HWIOCTL); if (error) return (error); error = if_ioctl(ifp, cmd, data, td); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCGIFSTATUS: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGIFMEDIA: case SIOCGIFGENERIC: error = if_ioctl(ifp, cmd, data, td); break; case SIOCSIFLLADDR: error = priv_check(td, PRIV_NET_SETLLADDR); if (error) return (error); error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); EVENTHANDLER_INVOKE(iflladdr_event, ifp); break; case SIOCAIFGROUP: { struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr; error = priv_check(td, PRIV_NET_ADDIFGROUP); if (error) return (error); if ((error = if_addgroup(ifp, ifgr->ifgr_group))) return (error); break; } case SIOCGIFGROUP: if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp))) return (error); break; case SIOCDIFGROUP: { struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr; error = priv_check(td, PRIV_NET_DELIFGROUP); if (error) return (error); if ((error = if_delgroup(ifp, ifgr->ifgr_group))) return (error); break; } default: error = ENOIOCTL; break; } return (error); } #ifdef COMPAT_FREEBSD32 struct ifconf32 { int32_t ifc_len; union { uint32_t ifcu_buf; uint32_t ifcu_req; } ifc_ifcu; }; #define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32) #endif /* * Interface ioctls. */ int ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) { struct ifnet *ifp; struct ifreq *ifr; int error; int oif_flags; CURVNET_SET(so->so_vnet); switch (cmd) { case SIOCGIFCONF: error = ifconf(cmd, data); CURVNET_RESTORE(); return (error); #ifdef COMPAT_FREEBSD32 case SIOCGIFCONF32: { struct ifconf32 *ifc32; struct ifconf ifc; ifc32 = (struct ifconf32 *)data; ifc.ifc_len = ifc32->ifc_len; ifc.ifc_buf = PTRIN(ifc32->ifc_buf); error = ifconf(SIOCGIFCONF, (void *)&ifc); CURVNET_RESTORE(); if (error == 0) ifc32->ifc_len = ifc.ifc_len; return (error); } #endif } ifr = (struct ifreq *)data; switch (cmd) { #ifdef VIMAGE case SIOCSIFRVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error == 0) error = if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid); CURVNET_RESTORE(); return (error); #endif case SIOCIFCREATE: case SIOCIFCREATE2: error = priv_check(td, PRIV_NET_IFCREATE); if (error == 0) error = if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL); CURVNET_RESTORE(); return (error); case SIOCIFDESTROY: error = priv_check(td, PRIV_NET_IFDESTROY); if (error == 0) error = if_clone_destroy(ifr->ifr_name); CURVNET_RESTORE(); return (error); case SIOCIFGCLONERS: error = if_clone_list((struct if_clonereq *)data); CURVNET_RESTORE(); return (error); case SIOCGIFGMEMB: error = if_getgroupmembers((struct ifgroupreq *)data); CURVNET_RESTORE(); return (error); #if defined(INET) || defined(INET6) case SIOCSVH: case SIOCGVH: if (carp_ioctl_p == NULL) error = EPROTONOSUPPORT; else error = (*carp_ioctl_p)(ifr, cmd, td); CURVNET_RESTORE(); return (error); #endif } ifp = ifunit_ref(ifr->ifr_name); if (ifp == NULL) { CURVNET_RESTORE(); return (ENXIO); } error = if_drvioctl(ifp, cmd, data, td); if (error != ENOIOCTL) { if_rele(ifp); CURVNET_RESTORE(); return (error); } oif_flags = ifp->if_flags; if (so->so_proto == NULL) { if_rele(ifp); CURVNET_RESTORE(); return (EOPNOTSUPP); } /* * Pass the request on to the socket control method, and if the * latter returns EOPNOTSUPP, directly to the interface. */ error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, ifp, td)); if (error == EOPNOTSUPP) error = if_ioctl(ifp, cmd, data, td); if ((oif_flags ^ ifp->if_flags) & IFF_UP) { #ifdef INET6 if (ifp->if_flags & IFF_UP) in6_if_up(ifp); #endif } if_rele(ifp); CURVNET_RESTORE(); return (error); } /* * The code common to handling reference counted flags, * e.g., in ifpromisc() and if_allmulti(). * The "pflag" argument can specify a permanent mode flag to check, * such as IFF_PPROMISC for promiscuous mode; should be 0 if none. * * Only to be used on stack-owned flags, not driver-owned flags. */ static int if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch) { struct ifreq ifr; int error; int oldflags, oldcount; if (onswitch) KASSERT(*refcount >= 0, ("%s: increment negative refcount %d for flag %d", __func__, *refcount, flag)); else KASSERT(*refcount > 0, ("%s: decrement non-positive refcount %d for flag %d", __func__, *refcount, flag)); /* In case this mode is permanent, just touch refcount */ if (ifp->if_flags & pflag) { *refcount += onswitch ? 1 : -1; return (0); } /* Save ifnet parameters for if_ioctl() may fail */ oldcount = *refcount; oldflags = ifp->if_flags; /* * See if we aren't the only and touching refcount is enough. * Actually toggle interface flag if we are the first or last. */ if (onswitch) { if ((*refcount)++) return (0); ifp->if_flags |= flag; } else { if (--(*refcount)) return (0); ifp->if_flags &= ~flag; } /* Call down the driver since we've changed interface flags */ ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; error = if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, curthread); if (error) goto recover; /* Notify userland that interface flags have changed */ rt_ifmsg(ifp); return (0); recover: /* Recover after driver error */ *refcount = oldcount; ifp->if_flags = oldflags; return (error); } /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ int ifpromisc(struct ifnet *ifp, int pswitch) { int error; int oldflags = ifp->if_flags; error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC, &ifp->if_pcount, pswitch); /* If promiscuous mode status has changed, log a message */ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC)) log(LOG_INFO, "%s: promiscuous mode %s\n", ifp->if_xname, (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled"); return (error); } /* * Return interface configuration * of system. List may be used * in later ioctl's (above) to get * other information. */ /*ARGSUSED*/ static int ifconf(u_long cmd, caddr_t data) { struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr; struct sbuf *sb; int error, full = 0, valid_len, max_len; /* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */ max_len = MAXPHYS - 1; /* Prevent hostile input from being able to crash the system */ if (ifc->ifc_len <= 0) return (EINVAL); again: if (ifc->ifc_len <= max_len) { max_len = ifc->ifc_len; full = 1; } sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN); max_len = 0; valid_len = 0; IFNET_RLOCK(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { int addrs; /* * Zero the ifr_name buffer to make sure we don't * disclose the contents of the stack. */ memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name)); if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) >= sizeof(ifr.ifr_name)) { sbuf_delete(sb); IFNET_RUNLOCK(); return (ENAMETOOLONG); } addrs = 0; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa = ifa->ifa_addr; if (prison_if(curthread->td_ucred, sa) != 0) continue; addrs++; if (sa->sa_len <= sizeof(*sa)) { ifr.ifr_addr = *sa; sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); } else { sbuf_bcat(sb, &ifr, offsetof(struct ifreq, ifr_addr)); max_len += offsetof(struct ifreq, ifr_addr); sbuf_bcat(sb, sa, sa->sa_len); max_len += sa->sa_len; } if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } IF_ADDR_RUNLOCK(ifp); if (addrs == 0) { bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr)); sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } } IFNET_RUNLOCK(); /* * If we didn't allocate enough space (uncommon), try again. If * we have already allocated as much space as we are allowed, * return what we've got. */ if (valid_len != max_len && !full) { sbuf_delete(sb); goto again; } ifc->ifc_len = valid_len; sbuf_finish(sb); error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len); sbuf_delete(sb); return (error); } /* * Just like ifpromisc(), but for all-multicast-reception mode. */ int if_allmulti(struct ifnet *ifp, int onswitch) { return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch)); } struct ifmultiaddr * if_findmulti(struct ifnet *ifp, struct sockaddr *sa) { struct ifmultiaddr *ifma; IF_ADDR_LOCK_ASSERT(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (sa->sa_family == AF_LINK) { if (sa_dl_equal(ifma->ifma_addr, sa)) break; } else { if (sa_equal(ifma->ifma_addr, sa)) break; } } return ifma; } /* * Allocate a new ifmultiaddr and initialize based on passed arguments. We * make copies of passed sockaddrs. The ifmultiaddr will not be added to * the ifnet multicast address list here, so the caller must do that and * other setup work (such as notifying the device driver). The reference * count is initialized to 1. */ static struct ifmultiaddr * if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa, int mflags) { struct ifmultiaddr *ifma; struct sockaddr *dupsa; ifma = malloc(sizeof *ifma, M_IFMADDR, mflags | M_ZERO); if (ifma == NULL) return (NULL); dupsa = malloc(sa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma, M_IFMADDR); return (NULL); } bcopy(sa, dupsa, sa->sa_len); ifma->ifma_addr = dupsa; ifma->ifma_ifp = ifp; ifma->ifma_refcount = 1; ifma->ifma_protospec = NULL; if (llsa == NULL) { ifma->ifma_lladdr = NULL; return (ifma); } dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); return (NULL); } bcopy(llsa, dupsa, llsa->sa_len); ifma->ifma_lladdr = dupsa; return (ifma); } /* * if_freemulti: free ifmultiaddr structure and possibly attached related * addresses. The caller is responsible for implementing reference * counting, notifying the driver, handling routing messages, and releasing * any dependent link layer state. */ static void if_freemulti(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d", ifma->ifma_refcount)); if (ifma->ifma_lladdr != NULL) free(ifma->ifma_lladdr, M_IFMADDR); free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); } /* * Register an additional multicast address with a network interface. * * - If the address is already present, bump the reference count on the * address and return. * - If the address is not link-layer, look up a link layer address. * - Allocate address structures for one or both addresses, and attach to the * multicast address list on the interface. If automatically adding a link * layer address, the protocol address will own a reference to the link * layer address, to be freed when it is freed. * - Notify the network device driver of an addition to the multicast address * list. * * 'sa' points to caller-owned memory with the desired multicast address. * * 'retifma' will be used to return a pointer to the resulting multicast * address reference, if desired. */ int if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma) { struct ifmultiaddr *ifma, *ll_ifma; struct sockaddr *llsa; struct sockaddr_dl sdl; int error; /* * If the address is already present, return a new reference to it; * otherwise, allocate storage and set up a new address. */ IF_ADDR_WLOCK(ifp); ifma = if_findmulti(ifp, sa); if (ifma != NULL) { ifma->ifma_refcount++; if (retifma != NULL) *retifma = ifma; IF_ADDR_WUNLOCK(ifp); return (0); } /* * The address isn't already present; resolve the protocol address * into a link layer address, and then look that up, bump its * refcount or allocate an ifma for that also. * Most link layer resolving functions returns address data which * fits inside default sockaddr_dl structure. However callback * can allocate another sockaddr structure, in that case we need to * free it later. */ sdl.sdl_len = sizeof(sdl); llsa = (struct sockaddr *)&sdl; error = if_resolvemulti(ifp, &llsa, sa); if (error == EOPNOTSUPP) llsa = NULL; else if (error) goto unlock_out; /* * Allocate the new address. Don't hook it up yet, as we may also * need to allocate a link layer multicast address. */ ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT); if (ifma == NULL) { error = ENOMEM; goto free_llsa_out; } /* * If a link layer address is found, we'll need to see if it's * already present in the address list, or allocate is as well. * When this block finishes, the link layer address will be on the * list. */ if (llsa != NULL) { ll_ifma = if_findmulti(ifp, llsa); if (ll_ifma == NULL) { ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT); if (ll_ifma == NULL) { --ifma->ifma_refcount; if_freemulti(ifma); error = ENOMEM; goto free_llsa_out; } TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma, ifma_link); } else ll_ifma->ifma_refcount++; ifma->ifma_llifma = ll_ifma; } /* * We now have a new multicast address, ifma, and possibly a new or * referenced link layer address. Add the primary address to the * ifnet address list. */ TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); if (retifma != NULL) *retifma = ifma; /* * Must generate the message while holding the lock so that 'ifma' * pointer is still valid. */ rt_newmaddrmsg(RTM_NEWMADDR, ifma); IF_ADDR_WUNLOCK(ifp); /* * We are certain we have added something, so call down to the * interface to let them know about it. */ if_ioctl(ifp, SIOCADDMULTI, 0, curthread); if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); return (0); free_llsa_out: if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); unlock_out: IF_ADDR_WUNLOCK(ifp); return (error); } /* * Delete a multicast group membership by network-layer group address. * * Returns ENOENT if the entry could not be found. If ifp no longer * exists, results are undefined. This entry point should only be used * from subsystems which do appropriate locking to hold ifp for the * duration of the call. * Network-layer protocol domains must use if_delmulti_ifma(). */ int if_delmulti(struct ifnet *ifp, struct sockaddr *sa) { struct ifmultiaddr *ifma; int lastref; #ifdef INVARIANTS struct ifnet *oifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; if (ifp != oifp) ifp = NULL; IFNET_RUNLOCK_NOSLEEP(); KASSERT(ifp != NULL, ("%s: ifnet went away", __func__)); #endif if (ifp == NULL) return (ENOENT); IF_ADDR_WLOCK(ifp); lastref = 0; ifma = if_findmulti(ifp, sa); if (ifma != NULL) lastref = if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); if (ifma == NULL) return (ENOENT); if (lastref) if_ioctl(ifp, SIOCDELMULTI, 0, curthread); return (0); } /* * Delete all multicast group membership for an interface. * Should be used to quickly flush all multicast filters. */ void if_delallmulti(struct ifnet *ifp) { struct ifmultiaddr *ifma; struct ifmultiaddr *next; IF_ADDR_WLOCK(ifp); TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); } /* * Delete a multicast group membership by group membership pointer. * Network-layer protocol domains must use this routine. * * It is safe to call this routine if the ifp disappeared. */ void if_delmulti_ifma(struct ifmultiaddr *ifma) { struct ifnet *ifp; int lastref; ifp = ifma->ifma_ifp; #ifdef DIAGNOSTIC if (ifp == NULL) { printf("%s: ifma_ifp seems to be detached\n", __func__); } else { struct ifnet *oifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; if (ifp != oifp) { printf("%s: ifnet %p disappeared\n", __func__, ifp); ifp = NULL; } IFNET_RUNLOCK_NOSLEEP(); } #endif /* * If and only if the ifnet instance exists: Acquire the address lock. */ if (ifp != NULL) IF_ADDR_WLOCK(ifp); lastref = if_delmulti_locked(ifp, ifma, 0); if (ifp != NULL) { /* * If and only if the ifnet instance exists: * Release the address lock. * If the group was left: update the hardware hash filter. */ IF_ADDR_WUNLOCK(ifp); if (lastref) if_ioctl(ifp, SIOCDELMULTI, 0, curthread); } } /* * Perform deletion of network-layer and/or link-layer multicast address. * * Return 0 if the reference count was decremented. * Return 1 if the final reference was released, indicating that the * hardware hash filter should be reprogrammed. */ static int if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching) { struct ifmultiaddr *ll_ifma; if (ifp != NULL && ifma->ifma_ifp != NULL) { KASSERT(ifma->ifma_ifp == ifp, ("%s: inconsistent ifp %p", __func__, ifp)); IF_ADDR_WLOCK_ASSERT(ifp); } ifp = ifma->ifma_ifp; /* * If the ifnet is detaching, null out references to ifnet, * so that upper protocol layers will notice, and not attempt * to obtain locks for an ifnet which no longer exists. The * routing socket announcement must happen before the ifnet * instance is detached from the system. */ if (detaching) { #ifdef DIAGNOSTIC printf("%s: detaching ifnet instance %p\n", __func__, ifp); #endif /* * ifp may already be nulled out if we are being reentered * to delete the ll_ifma. */ if (ifp != NULL) { rt_newmaddrmsg(RTM_DELMADDR, ifma); ifma->ifma_ifp = NULL; } } if (--ifma->ifma_refcount > 0) return 0; /* * If this ifma is a network-layer ifma, a link-layer ifma may * have been associated with it. Release it first if so. */ ll_ifma = ifma->ifma_llifma; if (ll_ifma != NULL) { KASSERT(ifma->ifma_lladdr != NULL, ("%s: llifma w/o lladdr", __func__)); if (detaching) ll_ifma->ifma_ifp = NULL; /* XXX */ if (--ll_ifma->ifma_refcount == 0) { if (ifp != NULL) { TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifma_link); } if_freemulti(ll_ifma); } } if (ifp != NULL) TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); if_freemulti(ifma); /* * The last reference to this instance of struct ifmultiaddr * was released; the hardware should be notified of this change. */ return 1; } /* * Set the link layer address on an interface. * * At this time we only support certain types of interfaces, * and we don't allow the length of the address to change. */ int if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) { struct sockaddr_dl *sdl; struct ifaddr *ifa; struct ifreq ifr; IF_ADDR_RLOCK(ifp); ifa = ifp->if_addr; if (ifa == NULL) { IF_ADDR_RUNLOCK(ifp); return (EINVAL); } ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl == NULL) { ifa_free(ifa); return (EINVAL); } if (len != sdl->sdl_alen) { /* don't allow length to change */ ifa_free(ifa); return (EINVAL); } switch (if_type(ifp)) { case IFT_ETHER: case IFT_FDDI: case IFT_XETHER: case IFT_ISO88025: case IFT_L2VLAN: case IFT_BRIDGE: case IFT_ARCNET: case IFT_IEEE8023ADLAG: case IFT_IEEE80211: bcopy(lladdr, LLADDR(sdl), len); ifa_free(ifa); break; default: ifa_free(ifa); return (ENODEV); } /* * If the interface is already up, we need * to re-init it in order to reprogram its * address filter. */ if ((ifp->if_flags & IFF_UP) != 0) { ifp->if_flags &= ~IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; if_ioctl(ifp, SIOCSIFFLAGS, &ifr, curthread); ifp->if_flags |= IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; if_ioctl(ifp, SIOCSIFFLAGS, &ifr, curthread); #ifdef INET /* * Also send gratuitous ARPs to notify other nodes about * the address change. */ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET) arp_ifinit(ifp, ifa); } #endif } return (0); } int if_printf(struct ifnet *ifp, const char * fmt, ...) { va_list ap; int retval; retval = printf("%s: ", ifp->if_xname); va_start(ap, fmt); retval += vprintf(fmt, ap); va_end(ap); return (retval); } int if_getmtu_family(if_t ifp, int family) { struct domain *dp; for (dp = domains; dp; dp = dp->dom_next) if (dp->dom_family == family && dp->dom_ifmtu != NULL) return (dp->dom_ifmtu(ifp)); return (ifp->if_mtu); } /* * Methods for drivers to access interface unicast and multicast * addresses. Driver do not know 'struct ifaddr' neither 'struct ifmultiaddr'. */ void if_foreach_addr(if_t ifp, ifaddr_cb_t cb, void *cb_arg) { struct ifaddr *ifa; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) (*cb)(cb_arg, ifa->ifa_addr, ifa->ifa_dstaddr, ifa->ifa_netmask); IF_ADDR_RUNLOCK(ifp); } void if_foreach_maddr(if_t ifp, ifmaddr_cb_t cb, void *cb_arg) { struct ifmultiaddr *ifma; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) (*cb)(cb_arg, ifma->ifma_addr); IF_ADDR_RUNLOCK(ifp); } /* * Generic software queue, that many non-high-end drivers use. For now * it is minimalistic version of classic BSD ifqueue, but we can swap it * to any other implementation later. */ struct ifqueue { struct mbufq ifq_mbq; struct mtx ifq_mtx; }; static struct ifqueue * if_snd_alloc(int maxlen) { struct ifqueue *ifq; ifq = malloc(sizeof(struct ifqueue), M_IFNET, M_WAITOK); mbufq_init(&ifq->ifq_mbq, maxlen); mtx_init(&ifq->ifq_mtx, "ifqueue", NULL, MTX_DEF | MTX_NEW); return (ifq); } static void if_snd_free(struct ifqueue *ifq) { mtx_destroy(&ifq->ifq_mtx); free(ifq, M_IFNET); } /* * Flush software interface queue. */ static void if_snd_qflush(if_t ifp) { struct ifqueue *ifq = ifp->if_snd; mtx_lock(&ifq->ifq_mtx); mbufq_drain(&ifq->ifq_mbq); mtx_unlock(&ifq->ifq_mtx); } int if_snd_len(if_t ifp) { struct ifqueue *ifq = ifp->if_snd; return (mbufq_len(&ifq->ifq_mbq)); } int if_snd_enqueue(struct ifnet *ifp, struct mbuf *m) { struct ifqueue *ifq = ifp->if_snd; int error; mtx_lock(&ifq->ifq_mtx); error = mbufq_enqueue(&ifq->ifq_mbq, m); mtx_unlock(&ifq->ifq_mtx); if (error) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); } return (error); } struct mbuf * if_snd_dequeue(if_t ifp) { struct ifqueue *ifq = ifp->if_snd; struct mbuf *m; mtx_lock(&ifq->ifq_mtx); m = mbufq_dequeue(&ifq->ifq_mbq); mtx_unlock(&ifq->ifq_mtx); return (m); } void if_snd_prepend(if_t ifp, struct mbuf *m) { struct ifqueue *ifq = ifp->if_snd; mtx_lock(&ifq->ifq_mtx); mbufq_prepend(&ifq->ifq_mbq, m); mtx_unlock(&ifq->ifq_mtx); } /* * Implementation of if ops, that can be called from drivers. */ void if_input_noinline(if_t ifp, struct mbuf *m) { return (if_input(ifp, m)); } int if_transmit_noinline(if_t ifp, struct mbuf *m) { return (if_transmit(ifp, m)); } Index: projects/ifnet/sys/net/if_loop.c =================================================================== --- projects/ifnet/sys/net/if_loop.c (revision 280372) +++ projects/ifnet/sys/net/if_loop.c (revision 280373) @@ -1,379 +1,378 @@ /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if_loop.c 8.2 (Berkeley) 1/9/95 * $FreeBSD$ */ /* * Loopback interface driver for protocol testing and timing. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #endif #ifdef INET6 #ifndef INET #include #endif #include #endif #include #ifdef TINY_LOMTU #define LOMTU (1024+512) #elif defined(LARGE_LOMTU) #define LOMTU 131072 #else #define LOMTU 16384 #endif #define LO_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP) #define LO_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6) #define LO_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \ CSUM_PSEUDO_HDR | \ CSUM_IP_CHECKED | CSUM_IP_VALID | \ CSUM_SCTP_VALID) int if_simloop(if_t, struct mbuf *, int, int); static int loioctl(if_t, u_long, void *, struct thread *); static int looutput(if_t, struct mbuf *, const struct sockaddr *, struct route *); static int lo_clone_create(struct if_clone *, int, caddr_t); static void lo_clone_destroy(if_t); VNET_DEFINE(if_t, loif); /* Used externally. */ #define V_loif VNET(loif) #ifdef VIMAGE static VNET_DEFINE(struct if_clone *, lo_cloner); #define V_lo_cloner VNET(lo_cloner) #endif static struct if_clone *lo_cloner; static const char loname[] = "lo"; static struct ifdriver lo_ifdrv = { .ifdrv_ops = { - .ifop_origin = IFOP_ORIGIN_DRIVER, .ifop_ioctl = loioctl, .ifop_output = looutput, }, .ifdrv_name = loname, .ifdrv_type = IFT_LOOP, .ifdrv_dlt = DLT_NULL, .ifdrv_dlt_hdrlen = sizeof(uint32_t), }; static void lo_clone_destroy(if_t ifp) { #ifndef VIMAGE /* XXX: destroying lo0 will lead to panics. */ KASSERT(V_loif != ifp, ("%s: destroying lo0", __func__)); #endif if_detach(ifp); } static int lo_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct if_attach_args ifat = { .ifat_version = IF_ATTACH_VERSION, .ifat_drv = &lo_ifdrv, .ifat_dunit = unit, .ifat_mtu = LOMTU, .ifat_flags = IFF_LOOPBACK | IFF_MULTICAST, .ifat_capabilities = IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6, .ifat_capenable = IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6, .ifat_hwassist = LO_CSUM_FEATURES | LO_CSUM_FEATURES6, }; if_t ifp; ifp = if_attach(&ifat); if (V_loif == NULL) V_loif = ifp; return (0); } static void vnet_loif_init(const void *unused __unused) { #ifdef VIMAGE lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy, 1); V_lo_cloner = lo_cloner; #else lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy, 1); #endif } VNET_SYSINIT(vnet_loif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_loif_init, NULL); #ifdef VIMAGE static void vnet_loif_uninit(const void *unused __unused) { if_clone_detach(V_lo_cloner); V_loif = NULL; } VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_loif_uninit, NULL); #endif static int loop_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: break; case MOD_UNLOAD: printf("loop module unload - not possible for this module type\n"); return (EINVAL); default: return (EOPNOTSUPP); } return (0); } static moduledata_t loop_mod = { "if_lo", loop_modevent, 0 }; DECLARE_MODULE(if_lo, loop_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); int looutput(if_t ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { u_int32_t af; struct rtentry *rt = NULL; #ifdef MAC int error; #endif M_ASSERTPKTHDR(m); /* check if we have the packet header */ if (ro != NULL) rt = ro->ro_rt; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { m_freem(m); return (error); } #endif if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { m_freem(m); return (rt->rt_flags & RTF_BLACKHOLE ? 0 : rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); } if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); /* BPF writes need to be handled specially. */ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; switch (af) { case AF_INET: m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = LO_CSUM_SET; m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES; break; case AF_INET6: m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = LO_CSUM_SET; m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES6; break; default: printf("looutput: af=%d unexpected\n", af); m_freem(m); return (EAFNOSUPPORT); } return (if_simloop(ifp, m, af, 0)); } /* * if_simloop() * * This function is to support software emulation of hardware loopback, * i.e., for interfaces with the IFF_SIMPLEX attribute. Since they can't * hear their own broadcasts, we create a copy of the packet that we * would normally receive via a hardware loopback. * * This function expects the packet to include the media header of length hlen. */ int if_simloop(if_t ifp, struct mbuf *m, int af, int hlen) { int isr; M_ASSERTPKTHDR(m); m_tag_delete_nonpersistent(m); m->m_pkthdr.rcvif = ifp; #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif /* * Let BPF see incoming packet in the following manner: * - Emulated packet loopback for a simplex interface * (net/if_ethersubr.c) * -> passes it to ifp's BPF * - IPv4/v6 multicast packet loopback (netinet(6)/ip(6)_output.c) * -> not passes it to any BPF * - Normal packet loopback from myself to myself (net/if_loop.c) * -> passes to lo0's BPF (even in case of IPv6, where ifp!=lo0) */ if (hlen > 0) if_mtap(ifp, m, NULL, 0); else if ((m->m_flags & M_MCAST) == 0 || V_loif == ifp) if_mtap(V_loif, m, &af, sizeof(af)); /* Strip away media header */ if (hlen > 0) { m_adj(m, hlen); #ifndef __NO_STRICT_ALIGNMENT /* * Some archs do not like unaligned data, so * we move data down in the first mbuf. */ if (mtod(m, vm_offset_t) & 3) { KASSERT(hlen >= 3, ("if_simloop: hlen too small")); bcopy(m->m_data, (char *)(mtod(m, vm_offset_t) - (mtod(m, vm_offset_t) & 3)), m->m_len); m->m_data -= (mtod(m,vm_offset_t) & 3); } #endif } /* Deliver to upper layer protocol */ switch (af) { #ifdef INET case AF_INET: isr = NETISR_IP; break; #endif #ifdef INET6 case AF_INET6: m->m_flags |= M_LOOP; isr = NETISR_IPV6; break; #endif default: printf("if_simloop: can't handle af=%d\n", af); m_freem(m); return (EAFNOSUPPORT); } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); netisr_queue(isr, m); /* mbuf is free'd on failure. */ return (0); } /* * Process an ioctl request. */ /* ARGSUSED */ int loioctl(if_t ifp, u_long cmd, void *data, struct thread *td) { struct ifreq *ifr = (struct ifreq *)data; switch (cmd) { case SIOCADDMULTI: case SIOCDELMULTI: if (ifr == NULL) return (EAFNOSUPPORT); /* XXX */ switch (ifr->ifr_addr.sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: return (EAFNOSUPPORT); } break; case SIOCSIFMTU: case SIOCSIFFLAGS: break; default: return (EOPNOTSUPP); } return (0); } Index: projects/ifnet/sys/netgraph/ng_iface.c =================================================================== --- projects/ifnet/sys/netgraph/ng_iface.c (revision 280372) +++ projects/ifnet/sys/netgraph/ng_iface.c (revision 280373) @@ -1,732 +1,731 @@ /* * ng_iface.c */ /*- * Copyright (c) 1996-1999 Whistle Communications, Inc. * All rights reserved. * * Subject to the following obligations and disclaimer of warranty, use and * redistribution of this software, in source or object code forms, with or * without modifications are expressly permitted by Whistle Communications; * provided, however, that: * 1. Any and all reproductions of the source or object code must include the * copyright notice above and the following disclaimer of warranties; and * 2. No rights are granted, in any manner or form, to use Whistle * Communications, Inc. trademarks, including the mark "WHISTLE * COMMUNICATIONS" on advertising, endorsements, or otherwise except as * such appears in the above copyright notice or in the software. * * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE, * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE. * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * * Author: Archie Cobbs * * $FreeBSD$ * $Whistle: ng_iface.c,v 1.33 1999/11/01 09:24:51 julian Exp $ */ /* * This node is also a system networking interface. It has * a hook for each protocol (IP, AppleTalk, etc). Packets * are simply relayed between the interface and the hooks. * * Interfaces are named ng0, ng1, etc. New nodes take the * first available interface name. * * This node also includes Berkeley packet filter support. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NG_SEPARATE_MALLOC static MALLOC_DEFINE(M_NETGRAPH_IFACE, "netgraph_iface", "netgraph iface node"); #else #define M_NETGRAPH_IFACE M_NETGRAPH #endif /* This struct describes one address family */ struct iffam { sa_family_t family; /* Address family */ const char *hookname; /* Name for hook */ }; typedef const struct iffam *iffam_p; /* List of address families supported by our interface */ const static struct iffam gFamilies[] = { { AF_INET, NG_IFACE_HOOK_INET }, { AF_INET6, NG_IFACE_HOOK_INET6 }, { AF_ATM, NG_IFACE_HOOK_ATM }, { AF_NATM, NG_IFACE_HOOK_NATM }, }; #define NUM_FAMILIES (sizeof(gFamilies) / sizeof(*gFamilies)) /* Node private data */ struct ng_iface_private { if_t ifp; /* Our interface */ u_int fib; /* Interface fib */ int unit; /* Interface unit number */ node_p node; /* Our netgraph node */ hook_p hooks[NUM_FAMILIES]; /* Hook for each address family */ }; typedef struct ng_iface_private *priv_p; /* Interface methods */ static int ng_iface_transmit(if_t, struct mbuf *); static int ng_iface_ioctl(if_t, u_long, void *, struct thread *); static int ng_iface_output(if_t, struct mbuf *m0, const struct sockaddr *dst, struct route *ro); static int ng_iface_send(if_t, struct mbuf *m, sa_family_t sa); #ifdef DEBUG static void ng_iface_print_ioctl(if_t, int cmd, caddr_t data); #endif /* Netgraph methods */ static int ng_iface_mod_event(module_t, int, void *); static ng_constructor_t ng_iface_constructor; static ng_rcvmsg_t ng_iface_rcvmsg; static ng_shutdown_t ng_iface_shutdown; static ng_newhook_t ng_iface_newhook; static ng_rcvdata_t ng_iface_rcvdata; static ng_disconnect_t ng_iface_disconnect; /* Helper stuff */ static iffam_p get_iffam_from_af(sa_family_t family); static iffam_p get_iffam_from_hook(priv_p priv, hook_p hook); static iffam_p get_iffam_from_name(const char *name); static hook_p *get_hook_from_iffam(priv_p priv, iffam_p iffam); /* List of commands and how to convert arguments to/from ASCII */ static const struct ng_cmdlist ng_iface_cmds[] = { { NGM_IFACE_COOKIE, NGM_IFACE_GET_IFNAME, "getifname", NULL, &ng_parse_string_type }, { NGM_IFACE_COOKIE, NGM_IFACE_POINT2POINT, "point2point", NULL, NULL }, { NGM_IFACE_COOKIE, NGM_IFACE_BROADCAST, "broadcast", NULL, NULL }, { NGM_IFACE_COOKIE, NGM_IFACE_GET_IFINDEX, "getifindex", NULL, &ng_parse_uint32_type }, { 0 } }; /* Node type descriptor */ static struct ng_type typestruct = { .version = NG_ABI_VERSION, .name = NG_IFACE_NODE_TYPE, .mod_event = ng_iface_mod_event, .constructor = ng_iface_constructor, .rcvmsg = ng_iface_rcvmsg, .shutdown = ng_iface_shutdown, .newhook = ng_iface_newhook, .rcvdata = ng_iface_rcvdata, .disconnect = ng_iface_disconnect, .cmdlist = ng_iface_cmds, }; NETGRAPH_INIT(iface, &typestruct); static VNET_DEFINE(struct unrhdr *, ng_iface_unit); #define V_ng_iface_unit VNET(ng_iface_unit) static struct ifdriver ng_ifdrv = { .ifdrv_ops = { - .ifop_origin = IFOP_ORIGIN_DRIVER, .ifop_output = ng_iface_output, .ifop_transmit = ng_iface_transmit, .ifop_ioctl = ng_iface_ioctl, }, .ifdrv_name = NG_IFACE_IFACE_NAME, .ifdrv_type = IFT_PROPVIRTUAL, .ifdrv_dlt = DLT_NULL, .ifdrv_dlt_hdrlen = sizeof(uint32_t), }; /************************************************************************ HELPER STUFF ************************************************************************/ /* * Get the family descriptor from the family ID */ static __inline iffam_p get_iffam_from_af(sa_family_t family) { iffam_p iffam; int k; for (k = 0; k < NUM_FAMILIES; k++) { iffam = &gFamilies[k]; if (iffam->family == family) return (iffam); } return (NULL); } /* * Get the family descriptor from the hook */ static __inline iffam_p get_iffam_from_hook(priv_p priv, hook_p hook) { int k; for (k = 0; k < NUM_FAMILIES; k++) if (priv->hooks[k] == hook) return (&gFamilies[k]); return (NULL); } /* * Get the hook from the iffam descriptor */ static __inline hook_p * get_hook_from_iffam(priv_p priv, iffam_p iffam) { return (&priv->hooks[iffam - gFamilies]); } /* * Get the iffam descriptor from the name */ static __inline iffam_p get_iffam_from_name(const char *name) { iffam_p iffam; int k; for (k = 0; k < NUM_FAMILIES; k++) { iffam = &gFamilies[k]; if (!strcmp(iffam->hookname, name)) return (iffam); } return (NULL); } /************************************************************************ INTERFACE STUFF ************************************************************************/ /* * Process an ioctl for the virtual interface */ static int ng_iface_ioctl(if_t ifp, u_long command, void *data, struct thread *td) { const priv_p priv = if_getsoftc(ifp, IF_DRIVER_SOFTC); struct ifreq *const ifr = (struct ifreq *) data; int error = 0; #ifdef DEBUG ng_iface_print_ioctl(ifp, command, data); #endif switch (command) { /* These two are mostly handled at a higher layer */ case SIOCSIFADDR: if_addflags(ifp, IF_FLAGS, IFF_UP); break; /* Set the interface MTU */ case SIOCSIFMTU: if (ifr->ifr_mtu > NG_IFACE_MTU_MAX || ifr->ifr_mtu < NG_IFACE_MTU_MIN) error = EINVAL; break; /* Update interface FIB */ case SIOCSIFFIB: priv->fib = ifr->ifr_fib; break; /* Stuff that's not supported */ case SIOCGIFADDR: case SIOCSIFFLAGS: case SIOCADDMULTI: case SIOCDELMULTI: break; default: error = EOPNOTSUPP; break; } return (error); } /* * This routine is called to deliver a packet out the interface. * We simply look at the address family and relay the packet to * the corresponding hook, if it exists and is connected. */ static int ng_iface_output(if_t ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { struct m_tag *mtag; uint32_t af; /* Check interface flags */ if ((if_get(ifp, IF_FLAGS) & IFF_UP) == 0) { m_freem(m); return (ENETDOWN); } /* Protect from deadly infinite recursion. */ mtag = NULL; while ((mtag = m_tag_locate(m, MTAG_NGIF, MTAG_NGIF_CALLED, mtag))) { if (*(if_t *)(mtag + 1) == ifp) { log(LOG_NOTICE, "Loop detected on %s\n", if_name(ifp)); m_freem(m); return (EDEADLK); } } mtag = m_tag_alloc(MTAG_NGIF, MTAG_NGIF_CALLED, sizeof(if_t), M_NOWAIT); if (mtag == NULL) { m_freem(m); return (ENOMEM); } *(if_t *)(mtag + 1) = ifp; m_tag_prepend(m, mtag); /* BPF writes need to be handled specially. */ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; M_PREPEND(m, sizeof(sa_family_t), M_NOWAIT); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); return (ENOBUFS); } *(sa_family_t *)m->m_data = af; return (if_transmit(ifp, m)); } /* * Start method is used only when ALTQ is enabled. */ static int ng_iface_transmit(if_t ifp, struct mbuf *m) { sa_family_t af; af = *mtod(m, sa_family_t *); KASSERT(af != AF_UNSPEC, ("%s: af = AF_UNSPEC", __func__)); m_adj(m, sizeof(sa_family_t)); if_mtap(ifp, m, &af, sizeof(af)); return (ng_iface_send(ifp, m, af)); } /* * This routine does actual delivery of the packet into the * netgraph(4). It is called from ng_iface_start() and * ng_iface_output(). */ static int ng_iface_send(if_t ifp, struct mbuf *m, sa_family_t sa) { const priv_p priv = if_getsoftc(ifp, IF_DRIVER_SOFTC); const iffam_p iffam = get_iffam_from_af(sa); int error; int len; /* Check address family to determine hook (if known) */ if (iffam == NULL) { m_freem(m); log(LOG_WARNING, "%s: can't handle af%d\n", if_name(ifp), sa); return (EAFNOSUPPORT); } /* Copy length before the mbuf gets invalidated. */ len = m->m_pkthdr.len; /* Send packet. If hook is not connected, mbuf will get freed. */ NG_OUTBOUND_THREAD_REF(); NG_SEND_DATA_ONLY(error, *get_hook_from_iffam(priv, iffam), m); NG_OUTBOUND_THREAD_UNREF(); /* Update stats. */ if (error == 0) { if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); } else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } #ifdef DEBUG /* * Display an ioctl to the virtual interface */ static void ng_iface_print_ioctl(if_t ifp, int command, caddr_t data) { char *str; switch (command & IOC_DIRMASK) { case IOC_VOID: str = "IO"; break; case IOC_OUT: str = "IOR"; break; case IOC_IN: str = "IOW"; break; case IOC_INOUT: str = "IORW"; break; default: str = "IO??"; } log(LOG_DEBUG, "%s: %s('%c', %d, char[%d])\n", if_name(ifp), str, IOCGROUP(command), command & 0xff, IOCPARM_LEN(command)); } #endif /* DEBUG */ /************************************************************************ NETGRAPH NODE STUFF ************************************************************************/ /* * Constructor for a node */ static int ng_iface_constructor(node_p node) { struct if_attach_args ifat = { .ifat_version = IF_ATTACH_VERSION, .ifat_drv = &ng_ifdrv, .ifat_mtu = NG_IFACE_MTU_DEFAULT, .ifat_flags = IFF_SIMPLEX | IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST, .ifat_baudrate = 64000, /* XXX */ }; priv_p priv; priv = malloc(sizeof(*priv), M_NETGRAPH_IFACE, M_WAITOK | M_ZERO); NG_NODE_SET_PRIVATE(node, priv); priv->node = node; ifat.ifat_softc = priv; ifat.ifat_dunit = priv->unit = alloc_unr(V_ng_iface_unit); priv->ifp = if_attach(&ifat); /* Give this node the same name as the interface (if possible) */ if (ng_name_node(node, if_name(priv->ifp)) != 0) log(LOG_WARNING, "%s: can't acquire netgraph name\n", if_name(priv->ifp)); return (0); } /* * Give our ok for a hook to be added */ static int ng_iface_newhook(node_p node, hook_p hook, const char *name) { const iffam_p iffam = get_iffam_from_name(name); hook_p *hookptr; if (iffam == NULL) return (EPFNOSUPPORT); hookptr = get_hook_from_iffam(NG_NODE_PRIVATE(node), iffam); if (*hookptr != NULL) return (EISCONN); *hookptr = hook; NG_HOOK_HI_STACK(hook); NG_HOOK_SET_TO_INBOUND(hook); return (0); } /* * Receive a control message */ static int ng_iface_rcvmsg(node_p node, item_p item, hook_p lasthook) { const priv_p priv = NG_NODE_PRIVATE(node); const if_t ifp = priv->ifp; struct ng_mesg *resp = NULL; int error = 0; struct ng_mesg *msg; NGI_GET_MSG(item, msg); switch (msg->header.typecookie) { case NGM_IFACE_COOKIE: switch (msg->header.cmd) { case NGM_IFACE_GET_IFNAME: NG_MKRESPONSE(resp, msg, IFNAMSIZ, M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } strlcpy(resp->data, if_name(ifp), IFNAMSIZ); break; case NGM_IFACE_POINT2POINT: case NGM_IFACE_BROADCAST: { /* Deny request if interface is UP */ if ((if_get(ifp, IF_FLAGS) & IFF_UP) != 0) return (EBUSY); /* Change flags */ switch (msg->header.cmd) { case NGM_IFACE_POINT2POINT: if_addflags(ifp, IF_FLAGS, IFF_POINTOPOINT); if_clrflags(ifp, IF_FLAGS, IFF_BROADCAST); break; case NGM_IFACE_BROADCAST: if_clrflags(ifp, IF_FLAGS, IFF_POINTOPOINT); if_addflags(ifp, IF_FLAGS, IFF_BROADCAST); break; } break; } case NGM_IFACE_GET_IFINDEX: { struct ifreq ifr; NG_MKRESPONSE(resp, msg, sizeof(uint32_t), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } if_drvioctl(ifp, SIOCGIFINDEX, &ifr, curthread); *((uint32_t *)resp->data) = ifr.ifr_index; break; } default: error = EINVAL; break; } break; case NGM_FLOW_COOKIE: switch (msg->header.cmd) { case NGM_LINK_IS_UP: if_link_state_change(ifp, LINK_STATE_UP); break; case NGM_LINK_IS_DOWN: if_link_state_change(ifp, LINK_STATE_DOWN); break; default: break; } break; default: error = EINVAL; break; } NG_RESPOND_MSG(error, node, item, resp); NG_FREE_MSG(msg); return (error); } /* * Recive data from a hook. Pass the packet to the correct input routine. */ static int ng_iface_rcvdata(hook_p hook, item_p item) { const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook)); const iffam_p iffam = get_iffam_from_hook(priv, hook); const if_t ifp = priv->ifp; struct mbuf *m; sa_family_t af; int isr; NGI_GET_M(item, m); NG_FREE_ITEM(item); /* Sanity checks */ KASSERT(iffam != NULL, ("%s: iffam", __func__)); M_ASSERTPKTHDR(m); if ((if_get(ifp, IF_FLAGS) & IFF_UP) == 0) { NG_FREE_M(m); return (ENETDOWN); } /* Update interface stats */ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); /* Note receiving interface */ m->m_pkthdr.rcvif = ifp; /* Berkeley packet filter */ af = iffam->family; if_mtap(ifp, m, &af, sizeof(af)); /* Send packet */ switch (iffam->family) { #ifdef INET case AF_INET: isr = NETISR_IP; break; #endif #ifdef INET6 case AF_INET6: isr = NETISR_IPV6; break; #endif default: m_freem(m); return (EAFNOSUPPORT); } random_harvest(&(m->m_data), 12, 2, RANDOM_NET_NG); M_SETFIB(m, priv->fib); netisr_dispatch(isr, m); return (0); } /* * Shutdown and remove the node and its associated interface. */ static int ng_iface_shutdown(node_p node) { const priv_p priv = NG_NODE_PRIVATE(node); /* * The ifnet may be in a different vnet than the netgraph node, * hence we have to change the current vnet context here. */ CURVNET_SET_QUIET(priv->ifp->if_vnet); if_detach(priv->ifp); CURVNET_RESTORE(); priv->ifp = NULL; free_unr(V_ng_iface_unit, priv->unit); free(priv, M_NETGRAPH_IFACE); NG_NODE_SET_PRIVATE(node, NULL); NG_NODE_UNREF(node); return (0); } /* * Hook disconnection. Note that we do *not* shutdown when all * hooks have been disconnected. */ static int ng_iface_disconnect(hook_p hook) { const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook)); const iffam_p iffam = get_iffam_from_hook(priv, hook); if (iffam == NULL) panic("%s", __func__); *get_hook_from_iffam(priv, iffam) = NULL; return (0); } /* * Handle loading and unloading for this node type. */ static int ng_iface_mod_event(module_t mod, int event, void *data) { int error = 0; switch (event) { case MOD_LOAD: case MOD_UNLOAD: break; default: error = EOPNOTSUPP; break; } return (error); } static void vnet_ng_iface_init(const void *unused) { V_ng_iface_unit = new_unrhdr(0, 0xffff, NULL); } VNET_SYSINIT(vnet_ng_iface_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_ng_iface_init, NULL); static void vnet_ng_iface_uninit(const void *unused) { delete_unrhdr(V_ng_iface_unit); } VNET_SYSUNINIT(vnet_ng_iface_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_ng_iface_uninit, NULL);