Index: sys/amd64/conf/GENERIC =================================================================== --- sys/amd64/conf/GENERIC +++ sys/amd64/conf/GENERIC @@ -367,3 +367,5 @@ # The crypto framework is required by IPSEC device crypto # Required by IPSEC + +options IFLIB Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -1479,6 +1479,8 @@ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/if_igb.c optional igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" +dev/e1000/iflib_igb_txrx.c optional igb iflib \ + compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_80003es2lan.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82540.c optional em | igb \ Index: sys/dev/e1000/if_igb.h =================================================================== --- sys/dev/e1000/if_igb.h +++ sys/dev/e1000/if_igb.h @@ -32,603 +32,12 @@ ******************************************************************************/ /*$FreeBSD$*/ -#ifndef _IF_IGB_H_ -#define _IF_IGB_H_ - -#ifdef ALTQ -#define IGB_LEGACY_TX -#endif - -#include -#include -#ifndef IGB_LEGACY_TX -#include +#ifndef KLD_MODULE +#include "opt_iflib.h" #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#ifdef RSS -#include -#include -#endif - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "e1000_api.h" -#include "e1000_82575.h" - -/* Tunables */ -/* - * IGB_TXD: Maximum number of Transmit Descriptors - * - * This value is the number of transmit descriptors allocated by the driver. - * Increasing this value allows the driver to queue more transmits. Each - * descriptor is 16 bytes. - * Since TDLEN should be multiple of 128bytes, the number of transmit - * desscriptors should meet the following condition. - * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 - */ -#define IGB_MIN_TXD 256 -#define IGB_DEFAULT_TXD 1024 -#define IGB_MAX_TXD 4096 - -/* - * IGB_RXD: Maximum number of Receive Descriptors - * - * This value is the number of receive descriptors allocated by the driver. - * Increasing this value allows the driver to buffer more incoming packets. - * Each descriptor is 16 bytes. A receive buffer is also allocated for each - * descriptor. The maximum MTU size is 16110. - * Since TDLEN should be multiple of 128bytes, the number of transmit - * desscriptors should meet the following condition. - * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 - */ -#define IGB_MIN_RXD 256 -#define IGB_DEFAULT_RXD 1024 -#define IGB_MAX_RXD 4096 - -/* - * IGB_TIDV - Transmit Interrupt Delay Value - * Valid Range: 0-65535 (0=off) - * Default Value: 64 - * This value delays the generation of transmit interrupts in units of - * 1.024 microseconds. Transmit interrupt reduction can improve CPU - * efficiency if properly tuned for specific network traffic. If the - * system is reporting dropped transmits, this value may be set too high - * causing the driver to run out of available transmit descriptors. - */ -#define IGB_TIDV 64 - -/* - * IGB_TADV - Transmit Absolute Interrupt Delay Value - * Valid Range: 0-65535 (0=off) - * Default Value: 64 - * This value, in units of 1.024 microseconds, limits the delay in which a - * transmit interrupt is generated. Useful only if IGB_TIDV is non-zero, - * this value ensures that an interrupt is generated after the initial - * packet is sent on the wire within the set amount of time. Proper tuning, - * along with IGB_TIDV, may improve traffic throughput in specific - * network conditions. - */ -#define IGB_TADV 64 - -/* - * IGB_RDTR - Receive Interrupt Delay Timer (Packet Timer) - * Valid Range: 0-65535 (0=off) - * Default Value: 0 - * This value delays the generation of receive interrupts in units of 1.024 - * microseconds. Receive interrupt reduction can improve CPU efficiency if - * properly tuned for specific network traffic. Increasing this value adds - * extra latency to frame reception and can end up decreasing the throughput - * of TCP traffic. If the system is reporting dropped receives, this value - * may be set too high, causing the driver to run out of available receive - * descriptors. - * - * CAUTION: When setting IGB_RDTR to a value other than 0, adapters - * may hang (stop transmitting) under certain network conditions. - * If this occurs a WATCHDOG message is logged in the system - * event log. In addition, the controller is automatically reset, - * restoring the network connection. To eliminate the potential - * for the hang ensure that IGB_RDTR is set to 0. - */ -#define IGB_RDTR 0 - -/* - * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) - * Valid Range: 0-65535 (0=off) - * Default Value: 64 - * This value, in units of 1.024 microseconds, limits the delay in which a - * receive interrupt is generated. Useful only if IGB_RDTR is non-zero, - * this value ensures that an interrupt is generated after the initial - * packet is received within the set amount of time. Proper tuning, - * along with IGB_RDTR, may improve traffic throughput in specific network - * conditions. - */ -#define IGB_RADV 64 - -/* - * This parameter controls the duration of transmit watchdog timer. - */ -#define IGB_WATCHDOG (10 * hz) - -/* - * This parameter controls when the driver calls the routine to reclaim - * transmit descriptors. Cleaning earlier seems a win. - */ -#define IGB_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 2) - -/* - * This parameter controls whether or not autonegotation is enabled. - * 0 - Disable autonegotiation - * 1 - Enable autonegotiation - */ -#define DO_AUTO_NEG 1 - -/* - * This parameter control whether or not the driver will wait for - * autonegotiation to complete. - * 1 - Wait for autonegotiation to complete - * 0 - Don't wait for autonegotiation to complete - */ -#define WAIT_FOR_AUTO_NEG_DEFAULT 0 - -/* Tunables -- End */ - -#define AUTONEG_ADV_DEFAULT (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ - ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ - ADVERTISE_1000_FULL) -#define AUTO_ALL_MODES 0 - -/* PHY master/slave setting */ -#define IGB_MASTER_SLAVE e1000_ms_hw_default - -/* Support AutoMediaDetect for Marvell M88 PHY in i354 */ -#define IGB_MEDIA_RESET (1 << 0) - -/* - * Micellaneous constants - */ -#define IGB_INTEL_VENDOR_ID 0x8086 - -#define IGB_JUMBO_PBA 0x00000028 -#define IGB_DEFAULT_PBA 0x00000030 -#define IGB_SMARTSPEED_DOWNSHIFT 3 -#define IGB_SMARTSPEED_MAX 15 -#define IGB_MAX_LOOP 10 - -#define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : \ - ((hw->mac.type <= e1000_82576) ? 16 : 8)) -#define IGB_RX_HTHRESH 8 -#define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ - adapter->msix_mem) ? 1 : 4) - -#define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) -#define IGB_TX_HTHRESH 1 -#define IGB_TX_WTHRESH ((hw->mac.type != e1000_82575 && \ - adapter->msix_mem) ? 1 : 16) - -#define MAX_NUM_MULTICAST_ADDRESSES 128 -#define PCI_ANY_ID (~0U) -#define ETHER_ALIGN 2 -#define IGB_TX_BUFFER_SIZE ((uint32_t) 1514) -#define IGB_FC_PAUSE_TIME 0x0680 -#define IGB_EEPROM_APME 0x400; -/* Queue minimum free for use */ -#define IGB_QUEUE_THRESHOLD (adapter->num_tx_desc / 8) - -/* - * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be - * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will - * also optimize cache line size effect. H/W supports up to cache line size 128. - */ -#define IGB_DBA_ALIGN 128 - -#define SPEED_MODE_BIT (1<<21) /* On PCI-E MACs only */ - -/* PCI Config defines */ -#define IGB_MSIX_BAR 3 - -/* Defines for printing debug information */ -#define DEBUG_INIT 0 -#define DEBUG_IOCTL 0 -#define DEBUG_HW 0 - -#define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") -#define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) -#define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) -#define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") -#define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) -#define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) -#define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") -#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) -#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) - -#define IGB_MAX_SCATTER 40 -#define IGB_VFTA_SIZE 128 -#define IGB_BR_SIZE 4096 /* ring buf size */ -#define IGB_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) -#define IGB_TSO_SEG_SIZE 4096 /* Max dma segment size */ -#define IGB_TXPBSIZE 20408 -#define IGB_HDR_BUF 128 -#define IGB_PKTTYPE_MASK 0x0000FFF0 -#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ -#define ETH_ZLEN 60 -#define ETH_ADDR_LEN 6 - -/* Offload bits in mbuf flag */ -#if __FreeBSD_version >= 1000000 -#define CSUM_OFFLOAD_IPV4 (CSUM_IP|CSUM_IP_TCP|CSUM_IP_UDP|CSUM_IP_SCTP) -#define CSUM_OFFLOAD_IPV6 (CSUM_IP6_TCP|CSUM_IP6_UDP|CSUM_IP6_SCTP) -#define CSUM_OFFLOAD (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6) -#elif __FreeBSD_version >= 800000 -#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP) +#ifdef IFLIB +#include #else -#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP) +#include #endif - -/* Define the starting Interrupt rate per Queue */ -#define IGB_INTS_PER_SEC 8000 -#define IGB_DEFAULT_ITR ((1000000/IGB_INTS_PER_SEC) << 2) - -#define IGB_LINK_ITR 2000 -#define I210_LINK_DELAY 1000 - -/* Precision Time Sync (IEEE 1588) defines */ -#define ETHERTYPE_IEEE1588 0x88F7 -#define PICOSECS_PER_TICK 20833 -#define TSYNC_PORT 319 /* UDP port for the protocol */ - -/* - * Bus dma allocation structure used by - * e1000_dma_malloc and e1000_dma_free. - */ -struct igb_dma_alloc { - bus_addr_t dma_paddr; - caddr_t dma_vaddr; - bus_dma_tag_t dma_tag; - bus_dmamap_t dma_map; - bus_dma_segment_t dma_seg; - int dma_nseg; -}; - - -/* -** Driver queue struct: this is the interrupt container -** for the associated tx and rx ring. -*/ -struct igb_queue { - struct adapter *adapter; - u32 msix; /* This queue's MSIX vector */ - u32 eims; /* This queue's EIMS bit */ - u32 eitr_setting; - struct resource *res; - void *tag; - struct tx_ring *txr; - struct rx_ring *rxr; - struct task que_task; - struct taskqueue *tq; - u64 irqs; -}; - -/* - * The transmit ring, one per queue - */ -struct tx_ring { - struct adapter *adapter; - struct mtx tx_mtx; - u32 me; - int watchdog_time; - union e1000_adv_tx_desc *tx_base; - struct igb_tx_buf *tx_buffers; - struct igb_dma_alloc txdma; - volatile u16 tx_avail; - u16 next_avail_desc; - u16 next_to_clean; - u16 num_desc; - enum { - IGB_QUEUE_IDLE = 1, - IGB_QUEUE_WORKING = 2, - IGB_QUEUE_HUNG = 4, - IGB_QUEUE_DEPLETED = 8, - } queue_status; - u32 txd_cmd; - bus_dma_tag_t txtag; - char mtx_name[16]; -#ifndef IGB_LEGACY_TX - struct buf_ring *br; - struct task txq_task; -#endif - u32 bytes; /* used for AIM */ - u32 packets; - /* Soft Stats */ - unsigned long tso_tx; - unsigned long no_tx_map_avail; - unsigned long no_tx_dma_setup; - u64 no_desc_avail; - u64 total_packets; -}; - -/* - * Receive ring: one per queue - */ -struct rx_ring { - struct adapter *adapter; - u32 me; - struct igb_dma_alloc rxdma; - union e1000_adv_rx_desc *rx_base; - struct lro_ctrl lro; - bool lro_enabled; - bool hdr_split; - struct mtx rx_mtx; - char mtx_name[16]; - u32 next_to_refresh; - u32 next_to_check; - struct igb_rx_buf *rx_buffers; - bus_dma_tag_t htag; /* dma tag for rx head */ - bus_dma_tag_t ptag; /* dma tag for rx packet */ - /* - * First/last mbuf pointers, for - * collecting multisegment RX packets. - */ - struct mbuf *fmp; - struct mbuf *lmp; - - u32 bytes; - u32 packets; - int rdt; - int rdh; - - /* Soft stats */ - u64 rx_split_packets; - u64 rx_discarded; - u64 rx_packets; - u64 rx_bytes; -}; - -struct adapter { - struct ifnet *ifp; - struct e1000_hw hw; - - struct e1000_osdep osdep; - device_t dev; - struct cdev *led_dev; - - struct resource *pci_mem; - struct resource *msix_mem; - int memrid; - - /* - * Interrupt resources: this set is - * either used for legacy, or for Link - * when doing MSIX - */ - void *tag; - struct resource *res; - - struct ifmedia media; - struct callout timer; - int msix; - int if_flags; - int pause_frames; - - struct mtx core_mtx; - - eventhandler_tag vlan_attach; - eventhandler_tag vlan_detach; - - u16 num_vlans; - u16 num_queues; - - /* - ** Shadow VFTA table, this is needed because - ** the real vlan filter table gets cleared during - ** a soft reset and the driver needs to be able - ** to repopulate it. - */ - u32 shadow_vfta[IGB_VFTA_SIZE]; - - /* Info about the interface */ - u32 optics; - u32 fc; /* local flow ctrl setting */ - int advertise; /* link speeds */ - bool link_active; - u16 max_frame_size; - u16 num_segs; - u16 link_speed; - bool link_up; - u32 linkvec; - u16 link_duplex; - u32 dmac; - int link_mask; - - /* Flags */ - u32 flags; - - /* Mbuf cluster size */ - u32 rx_mbuf_sz; - - /* Support for pluggable optics */ - bool sfp_probe; - struct task link_task; /* Link tasklet */ - struct task mod_task; /* SFP tasklet */ - struct task msf_task; /* Multispeed Fiber */ - struct taskqueue *tq; - - /* - ** Queues: - ** This is the irq holder, it has - ** and RX/TX pair or rings associated - ** with it. - */ - struct igb_queue *queues; - - /* - * Transmit rings: - * Allocated at run time, an array of rings. - */ - struct tx_ring *tx_rings; - u32 num_tx_desc; - - /* - * Receive rings: - * Allocated at run time, an array of rings. - */ - struct rx_ring *rx_rings; - u64 que_mask; - u32 num_rx_desc; - - /* Multicast array memory */ - u8 *mta; - - /* Misc stats maintained by the driver */ - unsigned long device_control; - unsigned long dropped_pkts; - unsigned long eint_mask; - unsigned long int_mask; - unsigned long link_irq; - unsigned long mbuf_defrag_failed; - unsigned long no_tx_dma_setup; - unsigned long packet_buf_alloc_rx; - unsigned long packet_buf_alloc_tx; - unsigned long rx_control; - unsigned long rx_overruns; - unsigned long watchdog_events; - - /* Used in pf and vf */ - void *stats; - - int enable_aim; - int has_manage; - int wol; - int rx_process_limit; - int tx_process_limit; - u16 vf_ifp; /* a VF interface */ - bool in_detach; /* Used only in igb_ioctl */ - -}; - -/* ****************************************************************************** - * vendor_info_array - * - * This array contains the list of Subvendor/Subdevice IDs on which the driver - * should load. - * - * ******************************************************************************/ -typedef struct _igb_vendor_info_t { - unsigned int vendor_id; - unsigned int device_id; - unsigned int subvendor_id; - unsigned int subdevice_id; - unsigned int index; -} igb_vendor_info_t; - -struct igb_tx_buf { - union e1000_adv_tx_desc *eop; - struct mbuf *m_head; - bus_dmamap_t map; -}; - -struct igb_rx_buf { - struct mbuf *m_head; - struct mbuf *m_pack; - bus_dmamap_t hmap; /* bus_dma map for header */ - bus_dmamap_t pmap; /* bus_dma map for packet */ -}; - -/* -** Find the number of unrefreshed RX descriptors -*/ -static inline u16 -igb_rx_unrefreshed(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - - if (rxr->next_to_check > rxr->next_to_refresh) - return (rxr->next_to_check - rxr->next_to_refresh - 1); - else - return ((adapter->num_rx_desc + rxr->next_to_check) - - rxr->next_to_refresh - 1); -} - -#define IGB_CORE_LOCK_INIT(_sc, _name) \ - mtx_init(&(_sc)->core_mtx, _name, "IGB Core Lock", MTX_DEF) -#define IGB_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) -#define IGB_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) -#define IGB_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) -#define IGB_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) - -#define IGB_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) -#define IGB_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) -#define IGB_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) -#define IGB_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) -#define IGB_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) - -#define IGB_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) -#define IGB_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) -#define IGB_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) -#define IGB_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_mtx, MA_OWNED) - -#define UPDATE_VF_REG(reg, last, cur) \ -{ \ - u32 new = E1000_READ_REG(hw, reg); \ - if (new < last) \ - cur += 0x100000000LL; \ - last = new; \ - cur &= 0xFFFFFFFF00000000LL; \ - cur |= new; \ -} - -#if __FreeBSD_version >= 800000 && __FreeBSD_version < 800504 -static __inline int -drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br) -{ -#ifdef ALTQ - if (ALTQ_IS_ENABLED(&ifp->if_snd)) - return (1); -#endif - return (!buf_ring_empty(br)); -} -#endif - -#endif /* _IF_IGB_H_ */ - - Index: sys/dev/e1000/if_igb.c =================================================================== --- sys/dev/e1000/if_igb.c +++ sys/dev/e1000/if_igb.c @@ -32,6410 +32,13 @@ ******************************************************************************/ /*$FreeBSD$*/ - -#include "opt_inet.h" -#include "opt_inet6.h" -#include "opt_rss.h" - -#ifdef HAVE_KERNEL_OPTION_HEADERS -#include "opt_device_polling.h" -#include "opt_altq.h" -#endif - -#include "if_igb.h" - -/********************************************************************* - * Driver version: - *********************************************************************/ -char igb_driver_version[] = "2.5.3-k"; - - -/********************************************************************* - * PCI Device ID Table - * - * Used by probe to select devices to load on - * Last field stores an index into e1000_strings - * Last entry must be all 0s - * - * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } - *********************************************************************/ - -static igb_vendor_info_t igb_vendor_info_array[] = -{ - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, 0, 0, 0}, - /* required last entry */ - {0, 0, 0, 0, 0} -}; - -/********************************************************************* - * Table of branding strings for all supported NICs. - *********************************************************************/ - -static char *igb_strings[] = { - "Intel(R) PRO/1000 Network Connection" -}; - -/********************************************************************* - * Function prototypes - *********************************************************************/ -static int igb_probe(device_t); -static int igb_attach(device_t); -static int igb_detach(device_t); -static int igb_shutdown(device_t); -static int igb_suspend(device_t); -static int igb_resume(device_t); -#ifndef IGB_LEGACY_TX -static int igb_mq_start(struct ifnet *, struct mbuf *); -static int igb_mq_start_locked(struct ifnet *, struct tx_ring *); -static void igb_qflush(struct ifnet *); -static void igb_deferred_mq_start(void *, int); -#else -static void igb_start(struct ifnet *); -static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); -#endif -static int igb_ioctl(struct ifnet *, u_long, caddr_t); -static uint64_t igb_get_counter(if_t, ift_counter); -static void igb_init(void *); -static void igb_init_locked(struct adapter *); -static void igb_stop(void *); -static void igb_media_status(struct ifnet *, struct ifmediareq *); -static int igb_media_change(struct ifnet *); -static void igb_identify_hardware(struct adapter *); -static int igb_allocate_pci_resources(struct adapter *); -static int igb_allocate_msix(struct adapter *); -static int igb_allocate_legacy(struct adapter *); -static int igb_setup_msix(struct adapter *); -static void igb_free_pci_resources(struct adapter *); -static void igb_local_timer(void *); -static void igb_reset(struct adapter *); -static int igb_setup_interface(device_t, struct adapter *); -static int igb_allocate_queues(struct adapter *); -static void igb_configure_queues(struct adapter *); - -static int igb_allocate_transmit_buffers(struct tx_ring *); -static void igb_setup_transmit_structures(struct adapter *); -static void igb_setup_transmit_ring(struct tx_ring *); -static void igb_initialize_transmit_units(struct adapter *); -static void igb_free_transmit_structures(struct adapter *); -static void igb_free_transmit_buffers(struct tx_ring *); - -static int igb_allocate_receive_buffers(struct rx_ring *); -static int igb_setup_receive_structures(struct adapter *); -static int igb_setup_receive_ring(struct rx_ring *); -static void igb_initialize_receive_units(struct adapter *); -static void igb_free_receive_structures(struct adapter *); -static void igb_free_receive_buffers(struct rx_ring *); -static void igb_free_receive_ring(struct rx_ring *); - -static void igb_enable_intr(struct adapter *); -static void igb_disable_intr(struct adapter *); -static void igb_update_stats_counters(struct adapter *); -static bool igb_txeof(struct tx_ring *); - -static __inline void igb_rx_discard(struct rx_ring *, int); -static __inline void igb_rx_input(struct rx_ring *, - struct ifnet *, struct mbuf *, u32); - -static bool igb_rxeof(struct igb_queue *, int, int *); -static void igb_rx_checksum(u32, struct mbuf *, u32); -static int igb_tx_ctx_setup(struct tx_ring *, - struct mbuf *, u32 *, u32 *); -static int igb_tso_setup(struct tx_ring *, - struct mbuf *, u32 *, u32 *); -static void igb_set_promisc(struct adapter *); -static void igb_disable_promisc(struct adapter *); -static void igb_set_multi(struct adapter *); -static void igb_update_link_status(struct adapter *); -static void igb_refresh_mbufs(struct rx_ring *, int); - -static void igb_register_vlan(void *, struct ifnet *, u16); -static void igb_unregister_vlan(void *, struct ifnet *, u16); -static void igb_setup_vlan_hw_support(struct adapter *); - -static int igb_xmit(struct tx_ring *, struct mbuf **); -static int igb_dma_malloc(struct adapter *, bus_size_t, - struct igb_dma_alloc *, int); -static void igb_dma_free(struct adapter *, struct igb_dma_alloc *); -static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); -static void igb_print_nvm_info(struct adapter *); -static int igb_is_valid_ether_addr(u8 *); -static void igb_add_hw_stats(struct adapter *); - -static void igb_vf_init_stats(struct adapter *); -static void igb_update_vf_stats_counters(struct adapter *); - -/* Management and WOL Support */ -static void igb_init_manageability(struct adapter *); -static void igb_release_manageability(struct adapter *); -static void igb_get_hw_control(struct adapter *); -static void igb_release_hw_control(struct adapter *); -static void igb_enable_wakeup(device_t); -static void igb_led_func(void *, int); - -static int igb_irq_fast(void *); -static void igb_msix_que(void *); -static void igb_msix_link(void *); -static void igb_handle_que(void *context, int pending); -static void igb_handle_link(void *context, int pending); -static void igb_handle_link_locked(struct adapter *); - -static void igb_set_sysctl_value(struct adapter *, const char *, - const char *, int *, int); -static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS); -static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS); -static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS); - -#ifdef DEVICE_POLLING -static poll_handler_t igb_poll; -#endif /* POLLING */ - -/********************************************************************* - * FreeBSD Device Interface Entry Points - *********************************************************************/ - -static device_method_t igb_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, igb_probe), - DEVMETHOD(device_attach, igb_attach), - DEVMETHOD(device_detach, igb_detach), - DEVMETHOD(device_shutdown, igb_shutdown), - DEVMETHOD(device_suspend, igb_suspend), - DEVMETHOD(device_resume, igb_resume), - DEVMETHOD_END -}; - -static driver_t igb_driver = { - "igb", igb_methods, sizeof(struct adapter), -}; - -static devclass_t igb_devclass; -DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); -MODULE_DEPEND(igb, pci, 1, 1, 1); -MODULE_DEPEND(igb, ether, 1, 1, 1); -#ifdef DEV_NETMAP -MODULE_DEPEND(igb, netmap, 1, 1, 1); -#endif /* DEV_NETMAP */ - -/********************************************************************* - * Tunable default values. - *********************************************************************/ - -static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters"); - -/* Descriptor defaults */ -static int igb_rxd = IGB_DEFAULT_RXD; -static int igb_txd = IGB_DEFAULT_TXD; -SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0, - "Number of receive descriptors per queue"); -SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0, - "Number of transmit descriptors per queue"); - -/* -** AIM: Adaptive Interrupt Moderation -** which means that the interrupt rate -** is varied over time based on the -** traffic for that interrupt vector -*/ -static int igb_enable_aim = TRUE; -SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0, - "Enable adaptive interrupt moderation"); - -/* - * MSIX should be the default for best performance, - * but this allows it to be forced off for testing. - */ -static int igb_enable_msix = 1; -SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0, - "Enable MSI-X interrupts"); - -/* -** Tuneable Interrupt rate -*/ -static int igb_max_interrupt_rate = 8000; -SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, - &igb_max_interrupt_rate, 0, "Maximum interrupts per second"); - -#ifndef IGB_LEGACY_TX -/* -** Tuneable number of buffers in the buf-ring (drbr_xxx) -*/ -static int igb_buf_ring_size = IGB_BR_SIZE; -SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN, - &igb_buf_ring_size, 0, "Size of the bufring"); -#endif - -/* -** Header split causes the packet header to -** be dma'd to a separate mbuf from the payload. -** this can have memory alignment benefits. But -** another plus is that small packets often fit -** into the header and thus use no cluster. Its -** a very workload dependent type feature. -*/ -static int igb_header_split = FALSE; -SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0, - "Enable receive mbuf header split"); - -/* -** This will autoconfigure based on the -** number of CPUs and max supported -** MSIX messages if left at 0. -*/ -static int igb_num_queues = 0; -SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0, - "Number of queues to configure, 0 indicates autoconfigure"); - -/* -** Global variable to store last used CPU when binding queues -** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a -** queue is bound to a cpu. -*/ -static int igb_last_bind_cpu = -1; - -/* How many packets rxeof tries to clean at a time */ -static int igb_rx_process_limit = 100; -SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, - &igb_rx_process_limit, 0, - "Maximum number of received packets to process at a time, -1 means unlimited"); - -/* How many packets txeof tries to clean at a time */ -static int igb_tx_process_limit = -1; -SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, - &igb_tx_process_limit, 0, - "Maximum number of sent packets to process at a time, -1 means unlimited"); - -#ifdef DEV_NETMAP /* see ixgbe.c for details */ -#include -#endif /* DEV_NETMAP */ -/********************************************************************* - * Device identification routine - * - * igb_probe determines if the driver should be loaded on - * adapter based on PCI vendor/device id of the adapter. - * - * return BUS_PROBE_DEFAULT on success, positive on failure - *********************************************************************/ - -static int -igb_probe(device_t dev) -{ - char adapter_name[256]; - uint16_t pci_vendor_id = 0; - uint16_t pci_device_id = 0; - uint16_t pci_subvendor_id = 0; - uint16_t pci_subdevice_id = 0; - igb_vendor_info_t *ent; - - INIT_DEBUGOUT("igb_probe: begin"); - - pci_vendor_id = pci_get_vendor(dev); - if (pci_vendor_id != IGB_INTEL_VENDOR_ID) - return (ENXIO); - - pci_device_id = pci_get_device(dev); - pci_subvendor_id = pci_get_subvendor(dev); - pci_subdevice_id = pci_get_subdevice(dev); - - ent = igb_vendor_info_array; - while (ent->vendor_id != 0) { - if ((pci_vendor_id == ent->vendor_id) && - (pci_device_id == ent->device_id) && - - ((pci_subvendor_id == ent->subvendor_id) || - (ent->subvendor_id == 0)) && - - ((pci_subdevice_id == ent->subdevice_id) || - (ent->subdevice_id == 0))) { - sprintf(adapter_name, "%s, Version - %s", - igb_strings[ent->index], - igb_driver_version); - device_set_desc_copy(dev, adapter_name); - return (BUS_PROBE_DEFAULT); - } - ent++; - } - return (ENXIO); -} - -/********************************************************************* - * Device initialization routine - * - * The attach entry point is called when the driver is being loaded. - * This routine identifies the type of hardware, allocates all resources - * and initializes the hardware. - * - * return 0 on success, positive on failure - *********************************************************************/ - -static int -igb_attach(device_t dev) -{ - struct adapter *adapter; - int error = 0; - u16 eeprom_data; - - INIT_DEBUGOUT("igb_attach: begin"); - - if (resource_disabled("igb", device_get_unit(dev))) { - device_printf(dev, "Disabled by device hint\n"); - return (ENXIO); - } - - adapter = device_get_softc(dev); - adapter->dev = adapter->osdep.dev = dev; - IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); - - /* SYSCTLs */ - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, - igb_sysctl_nvm_info, "I", "NVM Information"); - - igb_set_sysctl_value(adapter, "enable_aim", - "Interrupt Moderation", &adapter->enable_aim, - igb_enable_aim); - - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, - adapter, 0, igb_set_flowcntl, "I", "Flow Control"); - - callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); - - /* Determine hardware and mac info */ - igb_identify_hardware(adapter); - - /* Setup PCI resources */ - if (igb_allocate_pci_resources(adapter)) { - device_printf(dev, "Allocation of PCI resources failed\n"); - error = ENXIO; - goto err_pci; - } - - /* Do Shared Code initialization */ - if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { - device_printf(dev, "Setup of Shared code failed\n"); - error = ENXIO; - goto err_pci; - } - - e1000_get_bus_info(&adapter->hw); - - /* Sysctls for limiting the amount of work done in the taskqueues */ - igb_set_sysctl_value(adapter, "rx_processing_limit", - "max number of rx packets to process", - &adapter->rx_process_limit, igb_rx_process_limit); - - igb_set_sysctl_value(adapter, "tx_processing_limit", - "max number of tx packets to process", - &adapter->tx_process_limit, igb_tx_process_limit); - - /* - * Validate number of transmit and receive descriptors. It - * must not exceed hardware maximum, and must be multiple - * of E1000_DBA_ALIGN. - */ - if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 || - (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) { - device_printf(dev, "Using %d TX descriptors instead of %d!\n", - IGB_DEFAULT_TXD, igb_txd); - adapter->num_tx_desc = IGB_DEFAULT_TXD; - } else - adapter->num_tx_desc = igb_txd; - if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 || - (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) { - device_printf(dev, "Using %d RX descriptors instead of %d!\n", - IGB_DEFAULT_RXD, igb_rxd); - adapter->num_rx_desc = IGB_DEFAULT_RXD; - } else - adapter->num_rx_desc = igb_rxd; - - adapter->hw.mac.autoneg = DO_AUTO_NEG; - adapter->hw.phy.autoneg_wait_to_complete = FALSE; - adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; - - /* Copper options */ - if (adapter->hw.phy.media_type == e1000_media_type_copper) { - adapter->hw.phy.mdix = AUTO_ALL_MODES; - adapter->hw.phy.disable_polarity_correction = FALSE; - adapter->hw.phy.ms_type = IGB_MASTER_SLAVE; - } - - /* - * Set the frame limits assuming - * standard ethernet sized frames. - */ - adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; - - /* - ** Allocate and Setup Queues - */ - if (igb_allocate_queues(adapter)) { - error = ENOMEM; - goto err_pci; - } - - /* Allocate the appropriate stats memory */ - if (adapter->vf_ifp) { - adapter->stats = - (struct e1000_vf_stats *)malloc(sizeof \ - (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO); - igb_vf_init_stats(adapter); - } else - adapter->stats = - (struct e1000_hw_stats *)malloc(sizeof \ - (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO); - if (adapter->stats == NULL) { - device_printf(dev, "Can not allocate stats memory\n"); - error = ENOMEM; - goto err_late; - } - - /* Allocate multicast array memory. */ - adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * - MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); - if (adapter->mta == NULL) { - device_printf(dev, "Can not allocate multicast setup array\n"); - error = ENOMEM; - goto err_late; - } - - /* Some adapter-specific advanced features */ - if (adapter->hw.mac.type >= e1000_i350) { - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW, - adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce"); - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW, - adapter, 0, igb_sysctl_eee, "I", - "Disable Energy Efficient Ethernet"); - if (adapter->hw.phy.media_type == e1000_media_type_copper) { - if (adapter->hw.mac.type == e1000_i354) - e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); - else - e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); - } - } - - /* - ** Start from a known state, this is - ** important in reading the nvm and - ** mac from that. - */ - e1000_reset_hw(&adapter->hw); - - /* Make sure we have a good EEPROM before we read from it */ - if (((adapter->hw.mac.type != e1000_i210) && - (adapter->hw.mac.type != e1000_i211)) && - (e1000_validate_nvm_checksum(&adapter->hw) < 0)) { - /* - ** Some PCI-E parts fail the first check due to - ** the link being in sleep state, call it again, - ** if it fails a second time its a real issue. - */ - if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { - device_printf(dev, - "The EEPROM Checksum Is Not Valid\n"); - error = EIO; - goto err_late; - } - } - - /* - ** Copy the permanent MAC address out of the EEPROM - */ - if (e1000_read_mac_addr(&adapter->hw) < 0) { - device_printf(dev, "EEPROM read error while reading MAC" - " address\n"); - error = EIO; - goto err_late; - } - /* Check its sanity */ - if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) { - device_printf(dev, "Invalid MAC address\n"); - error = EIO; - goto err_late; - } - - /* Setup OS specific network interface */ - if (igb_setup_interface(dev, adapter) != 0) - goto err_late; - - /* Now get a good starting state */ - igb_reset(adapter); - - /* Initialize statistics */ - igb_update_stats_counters(adapter); - - adapter->hw.mac.get_link_status = 1; - igb_update_link_status(adapter); - - /* Indicate SOL/IDER usage */ - if (e1000_check_reset_block(&adapter->hw)) - device_printf(dev, - "PHY reset is blocked due to SOL/IDER session.\n"); - - /* Determine if we have to control management hardware */ - adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); - - /* - * Setup Wake-on-Lan - */ - /* APME bit in EEPROM is mapped to WUC.APME */ - eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME; - if (eeprom_data) - adapter->wol = E1000_WUFC_MAG; - - /* Register for VLAN events */ - adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, - igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); - adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, - igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); - - igb_add_hw_stats(adapter); - - /* Tell the stack that the interface is not active */ - adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE; - - adapter->led_dev = led_create(igb_led_func, adapter, - device_get_nameunit(dev)); - - /* - ** Configure Interrupts - */ - if ((adapter->msix > 1) && (igb_enable_msix)) - error = igb_allocate_msix(adapter); - else /* MSI or Legacy */ - error = igb_allocate_legacy(adapter); - if (error) - goto err_late; - -#ifdef DEV_NETMAP - igb_netmap_attach(adapter); -#endif /* DEV_NETMAP */ - INIT_DEBUGOUT("igb_attach: end"); - - return (0); - -err_late: - if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */ - return(error); - igb_free_transmit_structures(adapter); - igb_free_receive_structures(adapter); - igb_release_hw_control(adapter); -err_pci: - igb_free_pci_resources(adapter); - if (adapter->ifp != NULL) - if_free(adapter->ifp); - free(adapter->mta, M_DEVBUF); - IGB_CORE_LOCK_DESTROY(adapter); - - return (error); -} - -/********************************************************************* - * Device removal routine - * - * The detach entry point is called when the driver is being removed. - * This routine stops the adapter and deallocates all the resources - * that were allocated for driver operation. - * - * return 0 on success, positive on failure - *********************************************************************/ - -static int -igb_detach(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - struct ifnet *ifp = adapter->ifp; - - INIT_DEBUGOUT("igb_detach: begin"); - - /* Make sure VLANS are not using driver */ - if (adapter->ifp->if_vlantrunk != NULL) { - device_printf(dev,"Vlan in use, detach first\n"); - return (EBUSY); - } - - ether_ifdetach(adapter->ifp); - - if (adapter->led_dev != NULL) - led_destroy(adapter->led_dev); - -#ifdef DEVICE_POLLING - if (ifp->if_capenable & IFCAP_POLLING) - ether_poll_deregister(ifp); -#endif - - IGB_CORE_LOCK(adapter); - adapter->in_detach = 1; - igb_stop(adapter); - IGB_CORE_UNLOCK(adapter); - - e1000_phy_hw_reset(&adapter->hw); - - /* Give control back to firmware */ - igb_release_manageability(adapter); - igb_release_hw_control(adapter); - - if (adapter->wol) { - E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); - E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); - igb_enable_wakeup(dev); - } - - /* Unregister VLAN events */ - if (adapter->vlan_attach != NULL) - EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); - if (adapter->vlan_detach != NULL) - EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); - - callout_drain(&adapter->timer); - -#ifdef DEV_NETMAP - netmap_detach(adapter->ifp); -#endif /* DEV_NETMAP */ - igb_free_pci_resources(adapter); - bus_generic_detach(dev); - if_free(ifp); - - igb_free_transmit_structures(adapter); - igb_free_receive_structures(adapter); - if (adapter->mta != NULL) - free(adapter->mta, M_DEVBUF); - - IGB_CORE_LOCK_DESTROY(adapter); - - return (0); -} - -/********************************************************************* - * - * Shutdown entry point - * - **********************************************************************/ - -static int -igb_shutdown(device_t dev) -{ - return igb_suspend(dev); -} - -/* - * Suspend/resume device methods. - */ -static int -igb_suspend(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - - IGB_CORE_LOCK(adapter); - - igb_stop(adapter); - - igb_release_manageability(adapter); - igb_release_hw_control(adapter); - - if (adapter->wol) { - E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); - E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); - igb_enable_wakeup(dev); - } - - IGB_CORE_UNLOCK(adapter); - - return bus_generic_suspend(dev); -} - -static int -igb_resume(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - struct tx_ring *txr = adapter->tx_rings; - struct ifnet *ifp = adapter->ifp; - - IGB_CORE_LOCK(adapter); - igb_init_locked(adapter); - igb_init_manageability(adapter); - - if ((ifp->if_flags & IFF_UP) && - (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IGB_TX_LOCK(txr); -#ifndef IGB_LEGACY_TX - /* Process the stack queue only if not depleted */ - if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && - !drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - igb_start_locked(txr, ifp); -#endif - IGB_TX_UNLOCK(txr); - } - } - IGB_CORE_UNLOCK(adapter); - - return bus_generic_resume(dev); -} - - -#ifdef IGB_LEGACY_TX - -/********************************************************************* - * Transmit entry point - * - * igb_start is called by the stack to initiate a transmit. - * The driver will remain in this routine as long as there are - * packets to transmit and transmit resources are available. - * In case resources are not available stack is notified and - * the packet is requeued. - **********************************************************************/ - -static void -igb_start_locked(struct tx_ring *txr, struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct mbuf *m_head; - - IGB_TX_LOCK_ASSERT(txr); - - if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != - IFF_DRV_RUNNING) - return; - if (!adapter->link_active) - return; - - /* Call cleanup if number of TX descriptors low */ - if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) - igb_txeof(txr); - - while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { - if (txr->tx_avail <= IGB_MAX_SCATTER) { - txr->queue_status |= IGB_QUEUE_DEPLETED; - break; - } - IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); - if (m_head == NULL) - break; - /* - * Encapsulation can modify our pointer, and or make it - * NULL on failure. In that event, we can't requeue. - */ - if (igb_xmit(txr, &m_head)) { - if (m_head != NULL) - IFQ_DRV_PREPEND(&ifp->if_snd, m_head); - if (txr->tx_avail <= IGB_MAX_SCATTER) - txr->queue_status |= IGB_QUEUE_DEPLETED; - break; - } - - /* Send a copy of the frame to the BPF listener */ - ETHER_BPF_MTAP(ifp, m_head); - - /* Set watchdog on */ - txr->watchdog_time = ticks; - txr->queue_status |= IGB_QUEUE_WORKING; - } -} - -/* - * Legacy TX driver routine, called from the - * stack, always uses tx[0], and spins for it. - * Should not be used with multiqueue tx - */ -static void -igb_start(struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IGB_TX_LOCK(txr); - igb_start_locked(txr, ifp); - IGB_TX_UNLOCK(txr); - } - return; -} - -#else /* ~IGB_LEGACY_TX */ - -/* -** Multiqueue Transmit Entry: -** quick turnaround to the stack -** -*/ -static int -igb_mq_start(struct ifnet *ifp, struct mbuf *m) -{ - struct adapter *adapter = ifp->if_softc; - struct igb_queue *que; - struct tx_ring *txr; - int i, err = 0; -#ifdef RSS - uint32_t bucket_id; -#endif - - /* Which queue to use */ - /* - * When doing RSS, map it to the same outbound queue - * as the incoming flow would be mapped to. - * - * If everything is setup correctly, it should be the - * same bucket that the current CPU we're on is. - */ - if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { -#ifdef RSS - if (rss_hash2bucket(m->m_pkthdr.flowid, - M_HASHTYPE_GET(m), &bucket_id) == 0) { - /* XXX TODO: spit out something if bucket_id > num_queues? */ - i = bucket_id % adapter->num_queues; - } else { -#endif - i = m->m_pkthdr.flowid % adapter->num_queues; -#ifdef RSS - } -#endif - } else { - i = curcpu % adapter->num_queues; - } - txr = &adapter->tx_rings[i]; - que = &adapter->queues[i]; - - err = drbr_enqueue(ifp, txr->br, m); - if (err) - return (err); - if (IGB_TX_TRYLOCK(txr)) { - igb_mq_start_locked(ifp, txr); - IGB_TX_UNLOCK(txr); - } else - taskqueue_enqueue(que->tq, &txr->txq_task); - - return (0); -} - -static int -igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct mbuf *next; - int err = 0, enq = 0; - - IGB_TX_LOCK_ASSERT(txr); - - if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || - adapter->link_active == 0) - return (ENETDOWN); - - /* Process the queue */ - while ((next = drbr_peek(ifp, txr->br)) != NULL) { - if ((err = igb_xmit(txr, &next)) != 0) { - if (next == NULL) { - /* It was freed, move forward */ - drbr_advance(ifp, txr->br); - } else { - /* - * Still have one left, it may not be - * the same since the transmit function - * may have changed it. - */ - drbr_putback(ifp, txr->br, next); - } - break; - } - drbr_advance(ifp, txr->br); - enq++; - if (next->m_flags & M_MCAST && adapter->vf_ifp) - if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); - ETHER_BPF_MTAP(ifp, next); - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; - } - if (enq > 0) { - /* Set the watchdog */ - txr->queue_status |= IGB_QUEUE_WORKING; - txr->watchdog_time = ticks; - } - if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) - igb_txeof(txr); - if (txr->tx_avail <= IGB_MAX_SCATTER) - txr->queue_status |= IGB_QUEUE_DEPLETED; - return (err); -} - -/* - * Called from a taskqueue to drain queued transmit packets. - */ -static void -igb_deferred_mq_start(void *arg, int pending) -{ - struct tx_ring *txr = arg; - struct adapter *adapter = txr->adapter; - struct ifnet *ifp = adapter->ifp; - - IGB_TX_LOCK(txr); - if (!drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr); - IGB_TX_UNLOCK(txr); -} - -/* -** Flush all ring buffers -*/ -static void -igb_qflush(struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; - struct mbuf *m; - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IGB_TX_LOCK(txr); - while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) - m_freem(m); - IGB_TX_UNLOCK(txr); - } - if_qflush(ifp); -} -#endif /* ~IGB_LEGACY_TX */ - -/********************************************************************* - * Ioctl entry point - * - * igb_ioctl is called when the user wants to configure the - * interface. - * - * return 0 on success, positive on failure - **********************************************************************/ - -static int -igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) -{ - struct adapter *adapter = ifp->if_softc; - struct ifreq *ifr = (struct ifreq *)data; -#if defined(INET) || defined(INET6) - struct ifaddr *ifa = (struct ifaddr *)data; -#endif - bool avoid_reset = FALSE; - int error = 0; - - if (adapter->in_detach) - return (error); - - switch (command) { - case SIOCSIFADDR: -#ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) - avoid_reset = TRUE; -#endif -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) - avoid_reset = TRUE; -#endif - /* - ** Calling init results in link renegotiation, - ** so we avoid doing it when possible. - */ - if (avoid_reset) { - ifp->if_flags |= IFF_UP; - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) - igb_init(adapter); -#ifdef INET - if (!(ifp->if_flags & IFF_NOARP)) - arp_ifinit(ifp, ifa); -#endif - } else - error = ether_ioctl(ifp, command, data); - break; - case SIOCSIFMTU: - { - int max_frame_size; - - IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); - - IGB_CORE_LOCK(adapter); - max_frame_size = 9234; - if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - - ETHER_CRC_LEN) { - IGB_CORE_UNLOCK(adapter); - error = EINVAL; - break; - } - - ifp->if_mtu = ifr->ifr_mtu; - adapter->max_frame_size = - ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - igb_init_locked(adapter); - IGB_CORE_UNLOCK(adapter); - break; - } - case SIOCSIFFLAGS: - IOCTL_DEBUGOUT("ioctl rcv'd:\ - SIOCSIFFLAGS (Set Interface Flags)"); - IGB_CORE_LOCK(adapter); - if (ifp->if_flags & IFF_UP) { - if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { - if ((ifp->if_flags ^ adapter->if_flags) & - (IFF_PROMISC | IFF_ALLMULTI)) { - igb_disable_promisc(adapter); - igb_set_promisc(adapter); - } - } else - igb_init_locked(adapter); - } else - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - igb_stop(adapter); - adapter->if_flags = ifp->if_flags; - IGB_CORE_UNLOCK(adapter); - break; - case SIOCADDMULTI: - case SIOCDELMULTI: - IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IGB_CORE_LOCK(adapter); - igb_disable_intr(adapter); - igb_set_multi(adapter); -#ifdef DEVICE_POLLING - if (!(ifp->if_capenable & IFCAP_POLLING)) -#endif - igb_enable_intr(adapter); - IGB_CORE_UNLOCK(adapter); - } - break; - case SIOCSIFMEDIA: - /* Check SOL/IDER usage */ - IGB_CORE_LOCK(adapter); - if (e1000_check_reset_block(&adapter->hw)) { - IGB_CORE_UNLOCK(adapter); - device_printf(adapter->dev, "Media change is" - " blocked due to SOL/IDER session.\n"); - break; - } - IGB_CORE_UNLOCK(adapter); - case SIOCGIFMEDIA: - IOCTL_DEBUGOUT("ioctl rcv'd: \ - SIOCxIFMEDIA (Get/Set Interface Media)"); - error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); - break; - case SIOCSIFCAP: - { - int mask, reinit; - - IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); - reinit = 0; - mask = ifr->ifr_reqcap ^ ifp->if_capenable; -#ifdef DEVICE_POLLING - if (mask & IFCAP_POLLING) { - if (ifr->ifr_reqcap & IFCAP_POLLING) { - error = ether_poll_register(igb_poll, ifp); - if (error) - return (error); - IGB_CORE_LOCK(adapter); - igb_disable_intr(adapter); - ifp->if_capenable |= IFCAP_POLLING; - IGB_CORE_UNLOCK(adapter); - } else { - error = ether_poll_deregister(ifp); - /* Enable interrupt even in error case */ - IGB_CORE_LOCK(adapter); - igb_enable_intr(adapter); - ifp->if_capenable &= ~IFCAP_POLLING; - IGB_CORE_UNLOCK(adapter); - } - } -#endif -#if __FreeBSD_version >= 1000000 - /* HW cannot turn these on/off separately */ - if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { - ifp->if_capenable ^= IFCAP_RXCSUM; - ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; - reinit = 1; - } - if (mask & IFCAP_TXCSUM) { - ifp->if_capenable ^= IFCAP_TXCSUM; - reinit = 1; - } - if (mask & IFCAP_TXCSUM_IPV6) { - ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; - reinit = 1; - } -#else - if (mask & IFCAP_HWCSUM) { - ifp->if_capenable ^= IFCAP_HWCSUM; - reinit = 1; - } -#endif - if (mask & IFCAP_TSO4) { - ifp->if_capenable ^= IFCAP_TSO4; - reinit = 1; - } - if (mask & IFCAP_TSO6) { - ifp->if_capenable ^= IFCAP_TSO6; - reinit = 1; - } - if (mask & IFCAP_VLAN_HWTAGGING) { - ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; - reinit = 1; - } - if (mask & IFCAP_VLAN_HWFILTER) { - ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; - reinit = 1; - } - if (mask & IFCAP_VLAN_HWTSO) { - ifp->if_capenable ^= IFCAP_VLAN_HWTSO; - reinit = 1; - } - if (mask & IFCAP_LRO) { - ifp->if_capenable ^= IFCAP_LRO; - reinit = 1; - } - if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) - igb_init(adapter); - VLAN_CAPABILITIES(ifp); - break; - } - - default: - error = ether_ioctl(ifp, command, data); - break; - } - - return (error); -} - - -/********************************************************************* - * Init entry point - * - * This routine is used in two ways. It is used by the stack as - * init entry point in network interface structure. It is also used - * by the driver as a hw/sw initialization routine to get to a - * consistent state. - * - * return 0 on success, positive on failure - **********************************************************************/ - -static void -igb_init_locked(struct adapter *adapter) -{ - struct ifnet *ifp = adapter->ifp; - device_t dev = adapter->dev; - - INIT_DEBUGOUT("igb_init: begin"); - - IGB_CORE_LOCK_ASSERT(adapter); - - igb_disable_intr(adapter); - callout_stop(&adapter->timer); - - /* Get the latest mac address, User can use a LAA */ - bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr, - ETHER_ADDR_LEN); - - /* Put the address into the Receive Address Array */ - e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); - - igb_reset(adapter); - igb_update_link_status(adapter); - - E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); - - /* Set hardware offload abilities */ - ifp->if_hwassist = 0; - if (ifp->if_capenable & IFCAP_TXCSUM) { -#if __FreeBSD_version >= 1000000 - ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP); - if (adapter->hw.mac.type != e1000_82575) - ifp->if_hwassist |= CSUM_IP_SCTP; -#else - ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); -#if __FreeBSD_version >= 800000 - if (adapter->hw.mac.type != e1000_82575) - ifp->if_hwassist |= CSUM_SCTP; -#endif -#endif - } - -#if __FreeBSD_version >= 1000000 - if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) { - ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP); - if (adapter->hw.mac.type != e1000_82575) - ifp->if_hwassist |= CSUM_IP6_SCTP; - } -#endif - if (ifp->if_capenable & IFCAP_TSO) - ifp->if_hwassist |= CSUM_TSO; - - /* Clear bad data from Rx FIFOs */ - e1000_rx_fifo_flush_82575(&adapter->hw); - - /* Configure for OS presence */ - igb_init_manageability(adapter); - - /* Prepare transmit descriptors and buffers */ - igb_setup_transmit_structures(adapter); - igb_initialize_transmit_units(adapter); - - /* Setup Multicast table */ - igb_set_multi(adapter); - - /* - ** Figure out the desired mbuf pool - ** for doing jumbo/packetsplit - */ - if (adapter->max_frame_size <= 2048) - adapter->rx_mbuf_sz = MCLBYTES; - else if (adapter->max_frame_size <= 4096) - adapter->rx_mbuf_sz = MJUMPAGESIZE; - else - adapter->rx_mbuf_sz = MJUM9BYTES; - - /* Prepare receive descriptors and buffers */ - if (igb_setup_receive_structures(adapter)) { - device_printf(dev, "Could not setup receive structures\n"); - return; - } - igb_initialize_receive_units(adapter); - - /* Enable VLAN support */ - if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) - igb_setup_vlan_hw_support(adapter); - - /* Don't lose promiscuous settings */ - igb_set_promisc(adapter); - - ifp->if_drv_flags |= IFF_DRV_RUNNING; - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - - callout_reset(&adapter->timer, hz, igb_local_timer, adapter); - e1000_clear_hw_cntrs_base_generic(&adapter->hw); - - if (adapter->msix > 1) /* Set up queue routing */ - igb_configure_queues(adapter); - - /* this clears any pending interrupts */ - E1000_READ_REG(&adapter->hw, E1000_ICR); -#ifdef DEVICE_POLLING - /* - * Only enable interrupts if we are not polling, make sure - * they are off otherwise. - */ - if (ifp->if_capenable & IFCAP_POLLING) - igb_disable_intr(adapter); - else -#endif /* DEVICE_POLLING */ - { - igb_enable_intr(adapter); - E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); - } - - /* Set Energy Efficient Ethernet */ - if (adapter->hw.phy.media_type == e1000_media_type_copper) { - if (adapter->hw.mac.type == e1000_i354) - e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); - else - e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); - } -} - -static void -igb_init(void *arg) -{ - struct adapter *adapter = arg; - - IGB_CORE_LOCK(adapter); - igb_init_locked(adapter); - IGB_CORE_UNLOCK(adapter); -} - - -static void -igb_handle_que(void *context, int pending) -{ - struct igb_queue *que = context; - struct adapter *adapter = que->adapter; - struct tx_ring *txr = que->txr; - struct ifnet *ifp = adapter->ifp; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - bool more; - - more = igb_rxeof(que, adapter->rx_process_limit, NULL); - - IGB_TX_LOCK(txr); - igb_txeof(txr); -#ifndef IGB_LEGACY_TX - /* Process the stack queue only if not depleted */ - if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && - !drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - igb_start_locked(txr, ifp); -#endif - IGB_TX_UNLOCK(txr); - /* Do we need another? */ - if (more) { - taskqueue_enqueue(que->tq, &que->que_task); - return; - } - } - -#ifdef DEVICE_POLLING - if (ifp->if_capenable & IFCAP_POLLING) - return; -#endif - /* Reenable this interrupt */ - if (que->eims) - E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); - else - igb_enable_intr(adapter); -} - -/* Deal with link in a sleepable context */ -static void -igb_handle_link(void *context, int pending) -{ - struct adapter *adapter = context; - - IGB_CORE_LOCK(adapter); - igb_handle_link_locked(adapter); - IGB_CORE_UNLOCK(adapter); -} - -static void -igb_handle_link_locked(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - struct ifnet *ifp = adapter->ifp; - - IGB_CORE_LOCK_ASSERT(adapter); - adapter->hw.mac.get_link_status = 1; - igb_update_link_status(adapter); - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IGB_TX_LOCK(txr); -#ifndef IGB_LEGACY_TX - /* Process the stack queue only if not depleted */ - if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && - !drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - igb_start_locked(txr, ifp); -#endif - IGB_TX_UNLOCK(txr); - } - } -} - -/********************************************************************* - * - * MSI/Legacy Deferred - * Interrupt Service routine - * - *********************************************************************/ -static int -igb_irq_fast(void *arg) -{ - struct adapter *adapter = arg; - struct igb_queue *que = adapter->queues; - u32 reg_icr; - - - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - - /* Hot eject? */ - if (reg_icr == 0xffffffff) - return FILTER_STRAY; - - /* Definitely not our interrupt. */ - if (reg_icr == 0x0) - return FILTER_STRAY; - - if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) - return FILTER_STRAY; - - /* - * Mask interrupts until the taskqueue is finished running. This is - * cheap, just assume that it is needed. This also works around the - * MSI message reordering errata on certain systems. - */ - igb_disable_intr(adapter); - taskqueue_enqueue(que->tq, &que->que_task); - - /* Link status change */ - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) - taskqueue_enqueue(que->tq, &adapter->link_task); - - if (reg_icr & E1000_ICR_RXO) - adapter->rx_overruns++; - return FILTER_HANDLED; -} - -#ifdef DEVICE_POLLING -#if __FreeBSD_version >= 800000 -#define POLL_RETURN_COUNT(a) (a) -static int -#else -#define POLL_RETURN_COUNT(a) -static void -#endif -igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) -{ - struct adapter *adapter = ifp->if_softc; - struct igb_queue *que; - struct tx_ring *txr; - u32 reg_icr, rx_done = 0; - u32 loop = IGB_MAX_LOOP; - bool more; - - IGB_CORE_LOCK(adapter); - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { - IGB_CORE_UNLOCK(adapter); - return POLL_RETURN_COUNT(rx_done); - } - - if (cmd == POLL_AND_CHECK_STATUS) { - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - /* Link status change */ - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) - igb_handle_link_locked(adapter); - - if (reg_icr & E1000_ICR_RXO) - adapter->rx_overruns++; - } - IGB_CORE_UNLOCK(adapter); - - for (int i = 0; i < adapter->num_queues; i++) { - que = &adapter->queues[i]; - txr = que->txr; - - igb_rxeof(que, count, &rx_done); - - IGB_TX_LOCK(txr); - do { - more = igb_txeof(txr); - } while (loop-- && more); -#ifndef IGB_LEGACY_TX - if (!drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - igb_start_locked(txr, ifp); -#endif - IGB_TX_UNLOCK(txr); - } - - return POLL_RETURN_COUNT(rx_done); -} -#endif /* DEVICE_POLLING */ - -/********************************************************************* - * - * MSIX Que Interrupt Service routine - * - **********************************************************************/ -static void -igb_msix_que(void *arg) -{ - struct igb_queue *que = arg; - struct adapter *adapter = que->adapter; - struct ifnet *ifp = adapter->ifp; - struct tx_ring *txr = que->txr; - struct rx_ring *rxr = que->rxr; - u32 newitr = 0; - bool more_rx; - - /* Ignore spurious interrupts */ - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - return; - - E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims); - ++que->irqs; - - IGB_TX_LOCK(txr); - igb_txeof(txr); -#ifndef IGB_LEGACY_TX - /* Process the stack queue only if not depleted */ - if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && - !drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - igb_start_locked(txr, ifp); -#endif - IGB_TX_UNLOCK(txr); - - more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL); - - if (adapter->enable_aim == FALSE) - goto no_calc; - /* - ** Do Adaptive Interrupt Moderation: - ** - Write out last calculated setting - ** - Calculate based on average size over - ** the last interval. - */ - if (que->eitr_setting) - E1000_WRITE_REG(&adapter->hw, - E1000_EITR(que->msix), que->eitr_setting); - - que->eitr_setting = 0; - - /* Idle, do nothing */ - if ((txr->bytes == 0) && (rxr->bytes == 0)) - goto no_calc; - - /* Used half Default if sub-gig */ - if (adapter->link_speed != 1000) - newitr = IGB_DEFAULT_ITR / 2; - else { - if ((txr->bytes) && (txr->packets)) - newitr = txr->bytes/txr->packets; - if ((rxr->bytes) && (rxr->packets)) - newitr = max(newitr, - (rxr->bytes / rxr->packets)); - newitr += 24; /* account for hardware frame, crc */ - /* set an upper boundary */ - newitr = min(newitr, 3000); - /* Be nice to the mid range */ - if ((newitr > 300) && (newitr < 1200)) - newitr = (newitr / 3); - else - newitr = (newitr / 2); - } - newitr &= 0x7FFC; /* Mask invalid bits */ - if (adapter->hw.mac.type == e1000_82575) - newitr |= newitr << 16; - else - newitr |= E1000_EITR_CNT_IGNR; - - /* save for next interrupt */ - que->eitr_setting = newitr; - - /* Reset state */ - txr->bytes = 0; - txr->packets = 0; - rxr->bytes = 0; - rxr->packets = 0; - -no_calc: - /* Schedule a clean task if needed*/ - if (more_rx) - taskqueue_enqueue(que->tq, &que->que_task); - else - /* Reenable this interrupt */ - E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); - return; -} - - -/********************************************************************* - * - * MSIX Link Interrupt Service routine - * - **********************************************************************/ - -static void -igb_msix_link(void *arg) -{ - struct adapter *adapter = arg; - u32 icr; - - ++adapter->link_irq; - icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - if (!(icr & E1000_ICR_LSC)) - goto spurious; - igb_handle_link(adapter, 0); - -spurious: - /* Rearm */ - E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); - E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); - return; -} - - -/********************************************************************* - * - * Media Ioctl callback - * - * This routine is called whenever the user queries the status of - * the interface using ifconfig. - * - **********************************************************************/ -static void -igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) -{ - struct adapter *adapter = ifp->if_softc; - - INIT_DEBUGOUT("igb_media_status: begin"); - - IGB_CORE_LOCK(adapter); - igb_update_link_status(adapter); - - ifmr->ifm_status = IFM_AVALID; - ifmr->ifm_active = IFM_ETHER; - - if (!adapter->link_active) { - IGB_CORE_UNLOCK(adapter); - return; - } - - ifmr->ifm_status |= IFM_ACTIVE; - - switch (adapter->link_speed) { - case 10: - ifmr->ifm_active |= IFM_10_T; - break; - case 100: - /* - ** Support for 100Mb SFP - these are Fiber - ** but the media type appears as serdes - */ - if (adapter->hw.phy.media_type == - e1000_media_type_internal_serdes) - ifmr->ifm_active |= IFM_100_FX; - else - ifmr->ifm_active |= IFM_100_TX; - break; - case 1000: - ifmr->ifm_active |= IFM_1000_T; - break; - case 2500: - ifmr->ifm_active |= IFM_2500_SX; - break; - } - - if (adapter->link_duplex == FULL_DUPLEX) - ifmr->ifm_active |= IFM_FDX; - else - ifmr->ifm_active |= IFM_HDX; - - IGB_CORE_UNLOCK(adapter); -} - -/********************************************************************* - * - * Media Ioctl callback - * - * This routine is called when the user changes speed/duplex using - * media/mediopt option with ifconfig. - * - **********************************************************************/ -static int -igb_media_change(struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct ifmedia *ifm = &adapter->media; - - INIT_DEBUGOUT("igb_media_change: begin"); - - if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) - return (EINVAL); - - IGB_CORE_LOCK(adapter); - switch (IFM_SUBTYPE(ifm->ifm_media)) { - case IFM_AUTO: - adapter->hw.mac.autoneg = DO_AUTO_NEG; - adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; - break; - case IFM_1000_LX: - case IFM_1000_SX: - case IFM_1000_T: - adapter->hw.mac.autoneg = DO_AUTO_NEG; - adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; - break; - case IFM_100_TX: - adapter->hw.mac.autoneg = FALSE; - adapter->hw.phy.autoneg_advertised = 0; - if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) - adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; - else - adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; - break; - case IFM_10_T: - adapter->hw.mac.autoneg = FALSE; - adapter->hw.phy.autoneg_advertised = 0; - if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) - adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; - else - adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; - break; - default: - device_printf(adapter->dev, "Unsupported media type\n"); - } - - igb_init_locked(adapter); - IGB_CORE_UNLOCK(adapter); - - return (0); -} - - -/********************************************************************* - * - * This routine maps the mbufs to Advanced TX descriptors. - * - **********************************************************************/ -static int -igb_xmit(struct tx_ring *txr, struct mbuf **m_headp) -{ - struct adapter *adapter = txr->adapter; - u32 olinfo_status = 0, cmd_type_len; - int i, j, error, nsegs; - int first; - bool remap = TRUE; - struct mbuf *m_head; - bus_dma_segment_t segs[IGB_MAX_SCATTER]; - bus_dmamap_t map; - struct igb_tx_buf *txbuf; - union e1000_adv_tx_desc *txd = NULL; - - m_head = *m_headp; - - /* Basic descriptor defines */ - cmd_type_len = (E1000_ADVTXD_DTYP_DATA | - E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); - - if (m_head->m_flags & M_VLANTAG) - cmd_type_len |= E1000_ADVTXD_DCMD_VLE; - - /* - * Important to capture the first descriptor - * used because it will contain the index of - * the one we tell the hardware to report back - */ - first = txr->next_avail_desc; - txbuf = &txr->tx_buffers[first]; - map = txbuf->map; - - /* - * Map the packet for DMA. - */ -retry: - error = bus_dmamap_load_mbuf_sg(txr->txtag, map, - *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); - - if (__predict_false(error)) { - struct mbuf *m; - - switch (error) { - case EFBIG: - /* Try it again? - one try */ - if (remap == TRUE) { - remap = FALSE; - m = m_collapse(*m_headp, M_NOWAIT, - IGB_MAX_SCATTER); - if (m == NULL) { - adapter->mbuf_defrag_failed++; - m_freem(*m_headp); - *m_headp = NULL; - return (ENOBUFS); - } - *m_headp = m; - goto retry; - } else - return (error); - default: - txr->no_tx_dma_setup++; - m_freem(*m_headp); - *m_headp = NULL; - return (error); - } - } - - /* Make certain there are enough descriptors */ - if (txr->tx_avail < (nsegs + 2)) { - txr->no_desc_avail++; - bus_dmamap_unload(txr->txtag, map); - return (ENOBUFS); - } - m_head = *m_headp; - - /* - ** Set up the appropriate offload context - ** this will consume the first descriptor - */ - error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); - if (__predict_false(error)) { - m_freem(*m_headp); - *m_headp = NULL; - return (error); - } - - /* 82575 needs the queue index added */ - if (adapter->hw.mac.type == e1000_82575) - olinfo_status |= txr->me << 4; - - i = txr->next_avail_desc; - for (j = 0; j < nsegs; j++) { - bus_size_t seglen; - bus_addr_t segaddr; - - txbuf = &txr->tx_buffers[i]; - txd = &txr->tx_base[i]; - seglen = segs[j].ds_len; - segaddr = htole64(segs[j].ds_addr); - - txd->read.buffer_addr = segaddr; - txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | - cmd_type_len | seglen); - txd->read.olinfo_status = htole32(olinfo_status); - - if (++i == txr->num_desc) - i = 0; - } - - txd->read.cmd_type_len |= - htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); - txr->tx_avail -= nsegs; - txr->next_avail_desc = i; - - txbuf->m_head = m_head; - /* - ** Here we swap the map so the last descriptor, - ** which gets the completion interrupt has the - ** real map, and the first descriptor gets the - ** unused map from this descriptor. - */ - txr->tx_buffers[first].map = txbuf->map; - txbuf->map = map; - bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); - - /* Set the EOP descriptor that will be marked done */ - txbuf = &txr->tx_buffers[first]; - txbuf->eop = txd; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * Advance the Transmit Descriptor Tail (Tdt), this tells the - * hardware that this frame is available to transmit. - */ - ++txr->total_packets; - E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); - - return (0); -} -static void -igb_set_promisc(struct adapter *adapter) -{ - struct ifnet *ifp = adapter->ifp; - struct e1000_hw *hw = &adapter->hw; - u32 reg; - - if (adapter->vf_ifp) { - e1000_promisc_set_vf(hw, e1000_promisc_enabled); - return; - } - - reg = E1000_READ_REG(hw, E1000_RCTL); - if (ifp->if_flags & IFF_PROMISC) { - reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); - E1000_WRITE_REG(hw, E1000_RCTL, reg); - } else if (ifp->if_flags & IFF_ALLMULTI) { - reg |= E1000_RCTL_MPE; - reg &= ~E1000_RCTL_UPE; - E1000_WRITE_REG(hw, E1000_RCTL, reg); - } -} - -static void -igb_disable_promisc(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - u32 reg; - int mcnt = 0; - - if (adapter->vf_ifp) { - e1000_promisc_set_vf(hw, e1000_promisc_disabled); - return; - } - reg = E1000_READ_REG(hw, E1000_RCTL); - reg &= (~E1000_RCTL_UPE); - if (ifp->if_flags & IFF_ALLMULTI) - mcnt = MAX_NUM_MULTICAST_ADDRESSES; - else { - struct ifmultiaddr *ifma; -#if __FreeBSD_version < 800000 - IF_ADDR_LOCK(ifp); -#else - if_maddr_rlock(ifp); -#endif - TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_LINK) - continue; - if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) - break; - mcnt++; - } -#if __FreeBSD_version < 800000 - IF_ADDR_UNLOCK(ifp); -#else - if_maddr_runlock(ifp); -#endif - } - /* Don't disable if in MAX groups */ - if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) - reg &= (~E1000_RCTL_MPE); - E1000_WRITE_REG(hw, E1000_RCTL, reg); -} - - -/********************************************************************* - * Multicast Update - * - * This routine is called whenever multicast address list is updated. - * - **********************************************************************/ - -static void -igb_set_multi(struct adapter *adapter) -{ - struct ifnet *ifp = adapter->ifp; - struct ifmultiaddr *ifma; - u32 reg_rctl = 0; - u8 *mta; - - int mcnt = 0; - - IOCTL_DEBUGOUT("igb_set_multi: begin"); - - mta = adapter->mta; - bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN * - MAX_NUM_MULTICAST_ADDRESSES); - -#if __FreeBSD_version < 800000 - IF_ADDR_LOCK(ifp); -#else - if_maddr_rlock(ifp); -#endif - TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_LINK) - continue; - - if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) - break; - - bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), - &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); - mcnt++; - } -#if __FreeBSD_version < 800000 - IF_ADDR_UNLOCK(ifp); -#else - if_maddr_runlock(ifp); -#endif - - if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { - reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); - reg_rctl |= E1000_RCTL_MPE; - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); - } else - e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); -} - - -/********************************************************************* - * Timer routine: - * This routine checks for link status, - * updates statistics, and does the watchdog. - * - **********************************************************************/ - -static void -igb_local_timer(void *arg) -{ - struct adapter *adapter = arg; - device_t dev = adapter->dev; - struct ifnet *ifp = adapter->ifp; - struct tx_ring *txr = adapter->tx_rings; - struct igb_queue *que = adapter->queues; - int hung = 0, busy = 0; - - - IGB_CORE_LOCK_ASSERT(adapter); - - igb_update_link_status(adapter); - igb_update_stats_counters(adapter); - - /* - ** Check the TX queues status - ** - central locked handling of OACTIVE - ** - watchdog only if all queues show hung - */ - for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { - if ((txr->queue_status & IGB_QUEUE_HUNG) && - (adapter->pause_frames == 0)) - ++hung; - if (txr->queue_status & IGB_QUEUE_DEPLETED) - ++busy; - if ((txr->queue_status & IGB_QUEUE_IDLE) == 0) - taskqueue_enqueue(que->tq, &que->que_task); - } - if (hung == adapter->num_queues) - goto timeout; - if (busy == adapter->num_queues) - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) && - (busy < adapter->num_queues)) - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - - adapter->pause_frames = 0; - callout_reset(&adapter->timer, hz, igb_local_timer, adapter); -#ifndef DEVICE_POLLING - /* Schedule all queue interrupts - deadlock protection */ - E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask); -#endif - return; - -timeout: - device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); - device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, - E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)), - E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me))); - device_printf(dev,"TX(%d) desc avail = %d," - "Next TX to Clean = %d\n", - txr->me, txr->tx_avail, txr->next_to_clean); - adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - adapter->watchdog_events++; - igb_init_locked(adapter); -} - -static void -igb_update_link_status(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct e1000_fc_info *fc = &hw->fc; - struct ifnet *ifp = adapter->ifp; - device_t dev = adapter->dev; - struct tx_ring *txr = adapter->tx_rings; - u32 link_check, thstat, ctrl; - char *flowctl = NULL; - - link_check = thstat = ctrl = 0; - - /* Get the cached link value or read for real */ - switch (hw->phy.media_type) { - case e1000_media_type_copper: - if (hw->mac.get_link_status) { - /* Do the work to read phy */ - e1000_check_for_link(hw); - link_check = !hw->mac.get_link_status; - } else - link_check = TRUE; - break; - case e1000_media_type_fiber: - e1000_check_for_link(hw); - link_check = (E1000_READ_REG(hw, E1000_STATUS) & - E1000_STATUS_LU); - break; - case e1000_media_type_internal_serdes: - e1000_check_for_link(hw); - link_check = adapter->hw.mac.serdes_has_link; - break; - /* VF device is type_unknown */ - case e1000_media_type_unknown: - e1000_check_for_link(hw); - link_check = !hw->mac.get_link_status; - /* Fall thru */ - default: - break; - } - - /* Check for thermal downshift or shutdown */ - if (hw->mac.type == e1000_i350) { - thstat = E1000_READ_REG(hw, E1000_THSTAT); - ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); - } - - /* Get the flow control for display */ - switch (fc->current_mode) { - case e1000_fc_rx_pause: - flowctl = "RX"; - break; - case e1000_fc_tx_pause: - flowctl = "TX"; - break; - case e1000_fc_full: - flowctl = "Full"; - break; - case e1000_fc_none: - default: - flowctl = "None"; - break; - } - - /* Now we check if a transition has happened */ - if (link_check && (adapter->link_active == 0)) { - e1000_get_speed_and_duplex(&adapter->hw, - &adapter->link_speed, &adapter->link_duplex); - if (bootverbose) - device_printf(dev, "Link is up %d Mbps %s," - " Flow Control: %s\n", - adapter->link_speed, - ((adapter->link_duplex == FULL_DUPLEX) ? - "Full Duplex" : "Half Duplex"), flowctl); - adapter->link_active = 1; - ifp->if_baudrate = adapter->link_speed * 1000000; - if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && - (thstat & E1000_THSTAT_LINK_THROTTLE)) - device_printf(dev, "Link: thermal downshift\n"); - /* Delay Link Up for Phy update */ - if (((hw->mac.type == e1000_i210) || - (hw->mac.type == e1000_i211)) && - (hw->phy.id == I210_I_PHY_ID)) - msec_delay(I210_LINK_DELAY); - /* Reset if the media type changed. */ - if (hw->dev_spec._82575.media_changed) { - hw->dev_spec._82575.media_changed = false; - adapter->flags |= IGB_MEDIA_RESET; - igb_reset(adapter); - } - /* This can sleep */ - if_link_state_change(ifp, LINK_STATE_UP); - } else if (!link_check && (adapter->link_active == 1)) { - ifp->if_baudrate = adapter->link_speed = 0; - adapter->link_duplex = 0; - if (bootverbose) - device_printf(dev, "Link is Down\n"); - if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && - (thstat & E1000_THSTAT_PWR_DOWN)) - device_printf(dev, "Link: thermal shutdown\n"); - adapter->link_active = 0; - /* This can sleep */ - if_link_state_change(ifp, LINK_STATE_DOWN); - /* Reset queue state */ - for (int i = 0; i < adapter->num_queues; i++, txr++) - txr->queue_status = IGB_QUEUE_IDLE; - } -} - -/********************************************************************* - * - * This routine disables all traffic on the adapter by issuing a - * global reset on the MAC and deallocates TX/RX buffers. - * - **********************************************************************/ - -static void -igb_stop(void *arg) -{ - struct adapter *adapter = arg; - struct ifnet *ifp = adapter->ifp; - struct tx_ring *txr = adapter->tx_rings; - - IGB_CORE_LOCK_ASSERT(adapter); - - INIT_DEBUGOUT("igb_stop: begin"); - - igb_disable_intr(adapter); - - callout_stop(&adapter->timer); - - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - - /* Disarm watchdog timer. */ - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IGB_TX_LOCK(txr); - txr->queue_status = IGB_QUEUE_IDLE; - IGB_TX_UNLOCK(txr); - } - - e1000_reset_hw(&adapter->hw); - E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); - - e1000_led_off(&adapter->hw); - e1000_cleanup_led(&adapter->hw); -} - - -/********************************************************************* - * - * Determine hardware revision. - * - **********************************************************************/ -static void -igb_identify_hardware(struct adapter *adapter) -{ - device_t dev = adapter->dev; - - /* Make sure our PCI config space has the necessary stuff set */ - pci_enable_busmaster(dev); - adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); - - /* Save off the information about this board */ - adapter->hw.vendor_id = pci_get_vendor(dev); - adapter->hw.device_id = pci_get_device(dev); - adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); - adapter->hw.subsystem_vendor_id = - pci_read_config(dev, PCIR_SUBVEND_0, 2); - adapter->hw.subsystem_device_id = - pci_read_config(dev, PCIR_SUBDEV_0, 2); - - /* Set MAC type early for PCI setup */ - e1000_set_mac_type(&adapter->hw); - - /* Are we a VF device? */ - if ((adapter->hw.mac.type == e1000_vfadapt) || - (adapter->hw.mac.type == e1000_vfadapt_i350)) - adapter->vf_ifp = 1; - else - adapter->vf_ifp = 0; -} - -static int -igb_allocate_pci_resources(struct adapter *adapter) -{ - device_t dev = adapter->dev; - int rid; - - rid = PCIR_BAR(0); - adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, - &rid, RF_ACTIVE); - if (adapter->pci_mem == NULL) { - device_printf(dev, "Unable to allocate bus resource: memory\n"); - return (ENXIO); - } - adapter->osdep.mem_bus_space_tag = - rman_get_bustag(adapter->pci_mem); - adapter->osdep.mem_bus_space_handle = - rman_get_bushandle(adapter->pci_mem); - adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; - - adapter->num_queues = 1; /* Defaults for Legacy or MSI */ - - /* This will setup either MSI/X or MSI */ - adapter->msix = igb_setup_msix(adapter); - adapter->hw.back = &adapter->osdep; - - return (0); -} - -/********************************************************************* - * - * Setup the Legacy or MSI Interrupt handler - * - **********************************************************************/ -static int -igb_allocate_legacy(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct igb_queue *que = adapter->queues; -#ifndef IGB_LEGACY_TX - struct tx_ring *txr = adapter->tx_rings; -#endif - int error, rid = 0; - - /* Turn off all interrupts */ - E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); - - /* MSI RID is 1 */ - if (adapter->msix == 1) - rid = 1; - - /* We allocate a single interrupt resource */ - adapter->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (adapter->res == NULL) { - device_printf(dev, "Unable to allocate bus resource: " - "interrupt\n"); - return (ENXIO); - } - -#ifndef IGB_LEGACY_TX - TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); -#endif - - /* - * Try allocating a fast interrupt and the associated deferred - * processing contexts. - */ - TASK_INIT(&que->que_task, 0, igb_handle_que, que); - /* Make tasklet for deferred link handling */ - TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter); - que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT, - taskqueue_thread_enqueue, &que->tq); - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq", - device_get_nameunit(adapter->dev)); - if ((error = bus_setup_intr(dev, adapter->res, - INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, - adapter, &adapter->tag)) != 0) { - device_printf(dev, "Failed to register fast interrupt " - "handler: %d\n", error); - taskqueue_free(que->tq); - que->tq = NULL; - return (error); - } - - return (0); -} - - -/********************************************************************* - * - * Setup the MSIX Queue Interrupt handlers: - * - **********************************************************************/ -static int -igb_allocate_msix(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct igb_queue *que = adapter->queues; - int error, rid, vector = 0; - int cpu_id = 0; -#ifdef RSS - cpuset_t cpu_mask; -#endif - - /* Be sure to start with all interrupts disabled */ - E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); - E1000_WRITE_FLUSH(&adapter->hw); - -#ifdef RSS - /* - * If we're doing RSS, the number of queues needs to - * match the number of RSS buckets that are configured. - * - * + If there's more queues than RSS buckets, we'll end - * up with queues that get no traffic. - * - * + If there's more RSS buckets than queues, we'll end - * up having multiple RSS buckets map to the same queue, - * so there'll be some contention. - */ - if (adapter->num_queues != rss_getnumbuckets()) { - device_printf(dev, - "%s: number of queues (%d) != number of RSS buckets (%d)" - "; performance will be impacted.\n", - __func__, - adapter->num_queues, - rss_getnumbuckets()); - } -#endif - - for (int i = 0; i < adapter->num_queues; i++, vector++, que++) { - rid = vector +1; - que->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (que->res == NULL) { - device_printf(dev, - "Unable to allocate bus resource: " - "MSIX Queue Interrupt\n"); - return (ENXIO); - } - error = bus_setup_intr(dev, que->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, - igb_msix_que, que, &que->tag); - if (error) { - que->res = NULL; - device_printf(dev, "Failed to register Queue handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, que->res, que->tag, "que %d", i); -#endif - que->msix = vector; - if (adapter->hw.mac.type == e1000_82575) - que->eims = E1000_EICR_TX_QUEUE0 << i; - else - que->eims = 1 << vector; - -#ifdef RSS - /* - * The queue ID is used as the RSS layer bucket ID. - * We look up the queue ID -> RSS CPU ID and select - * that. - */ - cpu_id = rss_getcpu(i % rss_getnumbuckets()); -#else - /* - * Bind the msix vector, and thus the - * rings to the corresponding cpu. - * - * This just happens to match the default RSS round-robin - * bucket -> queue -> CPU allocation. - */ - if (adapter->num_queues > 1) { - if (igb_last_bind_cpu < 0) - igb_last_bind_cpu = CPU_FIRST(); - cpu_id = igb_last_bind_cpu; - } -#endif - - if (adapter->num_queues > 1) { - bus_bind_intr(dev, que->res, cpu_id); -#ifdef RSS - device_printf(dev, - "Bound queue %d to RSS bucket %d\n", - i, cpu_id); -#else - device_printf(dev, - "Bound queue %d to cpu %d\n", - i, cpu_id); -#endif - } - -#ifndef IGB_LEGACY_TX - TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, - que->txr); -#endif - /* Make tasklet for deferred handling */ - TASK_INIT(&que->que_task, 0, igb_handle_que, que); - que->tq = taskqueue_create("igb_que", M_NOWAIT, - taskqueue_thread_enqueue, &que->tq); - if (adapter->num_queues > 1) { - /* - * Only pin the taskqueue thread to a CPU if - * RSS is in use. - * - * This again just happens to match the default RSS - * round-robin bucket -> queue -> CPU allocation. - */ -#ifdef RSS - CPU_SETOF(cpu_id, &cpu_mask); - taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, - &cpu_mask, - "%s que (bucket %d)", - device_get_nameunit(adapter->dev), - cpu_id); -#else - taskqueue_start_threads(&que->tq, 1, PI_NET, - "%s que (qid %d)", - device_get_nameunit(adapter->dev), - cpu_id); -#endif - } else { - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", - device_get_nameunit(adapter->dev)); - } - - /* Finally update the last bound CPU id */ - if (adapter->num_queues > 1) - igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu); - } - - /* And Link */ - rid = vector + 1; - adapter->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (adapter->res == NULL) { - device_printf(dev, - "Unable to allocate bus resource: " - "MSIX Link Interrupt\n"); - return (ENXIO); - } - if ((error = bus_setup_intr(dev, adapter->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, - igb_msix_link, adapter, &adapter->tag)) != 0) { - device_printf(dev, "Failed to register Link handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, adapter->res, adapter->tag, "link"); -#endif - adapter->linkvec = vector; - - return (0); -} - - -static void -igb_configure_queues(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct igb_queue *que; - u32 tmp, ivar = 0, newitr = 0; - - /* First turn on RSS capability */ - if (adapter->hw.mac.type != e1000_82575) - E1000_WRITE_REG(hw, E1000_GPIE, - E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | - E1000_GPIE_PBA | E1000_GPIE_NSICR); - - /* Turn on MSIX */ - switch (adapter->hw.mac.type) { - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_i210: - case e1000_i211: - case e1000_vfadapt: - case e1000_vfadapt_i350: - /* RX entries */ - for (int i = 0; i < adapter->num_queues; i++) { - u32 index = i >> 1; - ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); - que = &adapter->queues[i]; - if (i & 1) { - ivar &= 0xFF00FFFF; - ivar |= (que->msix | E1000_IVAR_VALID) << 16; - } else { - ivar &= 0xFFFFFF00; - ivar |= que->msix | E1000_IVAR_VALID; - } - E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); - } - /* TX entries */ - for (int i = 0; i < adapter->num_queues; i++) { - u32 index = i >> 1; - ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); - que = &adapter->queues[i]; - if (i & 1) { - ivar &= 0x00FFFFFF; - ivar |= (que->msix | E1000_IVAR_VALID) << 24; - } else { - ivar &= 0xFFFF00FF; - ivar |= (que->msix | E1000_IVAR_VALID) << 8; - } - E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); - adapter->que_mask |= que->eims; - } - - /* And for the link interrupt */ - ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; - adapter->link_mask = 1 << adapter->linkvec; - E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); - break; - case e1000_82576: - /* RX entries */ - for (int i = 0; i < adapter->num_queues; i++) { - u32 index = i & 0x7; /* Each IVAR has two entries */ - ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); - que = &adapter->queues[i]; - if (i < 8) { - ivar &= 0xFFFFFF00; - ivar |= que->msix | E1000_IVAR_VALID; - } else { - ivar &= 0xFF00FFFF; - ivar |= (que->msix | E1000_IVAR_VALID) << 16; - } - E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); - adapter->que_mask |= que->eims; - } - /* TX entries */ - for (int i = 0; i < adapter->num_queues; i++) { - u32 index = i & 0x7; /* Each IVAR has two entries */ - ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); - que = &adapter->queues[i]; - if (i < 8) { - ivar &= 0xFFFF00FF; - ivar |= (que->msix | E1000_IVAR_VALID) << 8; - } else { - ivar &= 0x00FFFFFF; - ivar |= (que->msix | E1000_IVAR_VALID) << 24; - } - E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); - adapter->que_mask |= que->eims; - } - - /* And for the link interrupt */ - ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; - adapter->link_mask = 1 << adapter->linkvec; - E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); - break; - - case e1000_82575: - /* enable MSI-X support*/ - tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); - tmp |= E1000_CTRL_EXT_PBA_CLR; - /* Auto-Mask interrupts upon ICR read. */ - tmp |= E1000_CTRL_EXT_EIAME; - tmp |= E1000_CTRL_EXT_IRCA; - E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); - - /* Queues */ - for (int i = 0; i < adapter->num_queues; i++) { - que = &adapter->queues[i]; - tmp = E1000_EICR_RX_QUEUE0 << i; - tmp |= E1000_EICR_TX_QUEUE0 << i; - que->eims = tmp; - E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), - i, que->eims); - adapter->que_mask |= que->eims; - } - - /* Link */ - E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), - E1000_EIMS_OTHER); - adapter->link_mask |= E1000_EIMS_OTHER; - default: - break; - } - - /* Set the starting interrupt rate */ - if (igb_max_interrupt_rate > 0) - newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC; - - if (hw->mac.type == e1000_82575) - newitr |= newitr << 16; - else - newitr |= E1000_EITR_CNT_IGNR; - - for (int i = 0; i < adapter->num_queues; i++) { - que = &adapter->queues[i]; - E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr); - } - - return; -} - - -static void -igb_free_pci_resources(struct adapter *adapter) -{ - struct igb_queue *que = adapter->queues; - device_t dev = adapter->dev; - int rid; - - /* - ** There is a slight possibility of a failure mode - ** in attach that will result in entering this function - ** before interrupt resources have been initialized, and - ** in that case we do not want to execute the loops below - ** We can detect this reliably by the state of the adapter - ** res pointer. - */ - if (adapter->res == NULL) - goto mem; - - /* - * First release all the interrupt resources: - */ - for (int i = 0; i < adapter->num_queues; i++, que++) { - rid = que->msix + 1; - if (que->tag != NULL) { - bus_teardown_intr(dev, que->res, que->tag); - que->tag = NULL; - } - if (que->res != NULL) - bus_release_resource(dev, - SYS_RES_IRQ, rid, que->res); - } - - /* Clean the Legacy or Link interrupt last */ - if (adapter->linkvec) /* we are doing MSIX */ - rid = adapter->linkvec + 1; - else - (adapter->msix != 0) ? (rid = 1):(rid = 0); - - que = adapter->queues; - if (adapter->tag != NULL) { - taskqueue_drain(que->tq, &adapter->link_task); - bus_teardown_intr(dev, adapter->res, adapter->tag); - adapter->tag = NULL; - } - if (adapter->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); - - for (int i = 0; i < adapter->num_queues; i++, que++) { - if (que->tq != NULL) { -#ifndef IGB_LEGACY_TX - taskqueue_drain(que->tq, &que->txr->txq_task); -#endif - taskqueue_drain(que->tq, &que->que_task); - taskqueue_free(que->tq); - } - } -mem: - if (adapter->msix) - pci_release_msi(dev); - - if (adapter->msix_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, - adapter->memrid, adapter->msix_mem); - - if (adapter->pci_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), adapter->pci_mem); - -} - -/* - * Setup Either MSI/X or MSI - */ -static int -igb_setup_msix(struct adapter *adapter) -{ - device_t dev = adapter->dev; - int bar, want, queues, msgs, maxqueues; - - /* tuneable override */ - if (igb_enable_msix == 0) - goto msi; - - /* First try MSI/X */ - msgs = pci_msix_count(dev); - if (msgs == 0) - goto msi; - /* - ** Some new devices, as with ixgbe, now may - ** use a different BAR, so we need to keep - ** track of which is used. - */ - adapter->memrid = PCIR_BAR(IGB_MSIX_BAR); - bar = pci_read_config(dev, adapter->memrid, 4); - if (bar == 0) /* use next bar */ - adapter->memrid += 4; - adapter->msix_mem = bus_alloc_resource_any(dev, - SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE); - if (adapter->msix_mem == NULL) { - /* May not be enabled */ - device_printf(adapter->dev, - "Unable to map MSIX table \n"); - goto msi; - } - - queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; - - /* Override via tuneable */ - if (igb_num_queues != 0) - queues = igb_num_queues; - -#ifdef RSS - /* If we're doing RSS, clamp at the number of RSS buckets */ - if (queues > rss_getnumbuckets()) - queues = rss_getnumbuckets(); -#endif - - - /* Sanity check based on HW */ - switch (adapter->hw.mac.type) { - case e1000_82575: - maxqueues = 4; - break; - case e1000_82576: - case e1000_82580: - case e1000_i350: - case e1000_i354: - maxqueues = 8; - break; - case e1000_i210: - maxqueues = 4; - break; - case e1000_i211: - maxqueues = 2; - break; - default: /* VF interfaces */ - maxqueues = 1; - break; - } - - /* Final clamp on the actual hardware capability */ - if (queues > maxqueues) - queues = maxqueues; - - /* - ** One vector (RX/TX pair) per queue - ** plus an additional for Link interrupt - */ - want = queues + 1; - if (msgs >= want) - msgs = want; - else { - device_printf(adapter->dev, - "MSIX Configuration Problem, " - "%d vectors configured, but %d queues wanted!\n", - msgs, want); - goto msi; - } - if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { - device_printf(adapter->dev, - "Using MSIX interrupts with %d vectors\n", msgs); - adapter->num_queues = queues; - return (msgs); - } - /* - ** If MSIX alloc failed or provided us with - ** less than needed, free and fall through to MSI - */ - pci_release_msi(dev); - -msi: - if (adapter->msix_mem != NULL) { - bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem); - adapter->msix_mem = NULL; - } - msgs = 1; - if (pci_alloc_msi(dev, &msgs) == 0) { - device_printf(adapter->dev," Using an MSI interrupt\n"); - return (msgs); - } - device_printf(adapter->dev," Using a Legacy interrupt\n"); - return (0); -} - -/********************************************************************* - * - * Initialize the DMA Coalescing feature - * - **********************************************************************/ -static void -igb_init_dmac(struct adapter *adapter, u32 pba) -{ - device_t dev = adapter->dev; - struct e1000_hw *hw = &adapter->hw; - u32 dmac, reg = ~E1000_DMACR_DMAC_EN; - u16 hwm; - - if (hw->mac.type == e1000_i211) - return; - - if (hw->mac.type > e1000_82580) { - - if (adapter->dmac == 0) { /* Disabling it */ - E1000_WRITE_REG(hw, E1000_DMACR, reg); - return; - } else - device_printf(dev, "DMA Coalescing enabled\n"); - - /* Set starting threshold */ - E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); - - hwm = 64 * pba - adapter->max_frame_size / 16; - if (hwm < 64 * (pba - 6)) - hwm = 64 * (pba - 6); - reg = E1000_READ_REG(hw, E1000_FCRTC); - reg &= ~E1000_FCRTC_RTH_COAL_MASK; - reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) - & E1000_FCRTC_RTH_COAL_MASK); - E1000_WRITE_REG(hw, E1000_FCRTC, reg); - - - dmac = pba - adapter->max_frame_size / 512; - if (dmac < pba - 10) - dmac = pba - 10; - reg = E1000_READ_REG(hw, E1000_DMACR); - reg &= ~E1000_DMACR_DMACTHR_MASK; - reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) - & E1000_DMACR_DMACTHR_MASK); - - /* transition to L0x or L1 if available..*/ - reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); - - /* Check if status is 2.5Gb backplane connection - * before configuration of watchdog timer, which is - * in msec values in 12.8usec intervals - * watchdog timer= msec values in 32usec intervals - * for non 2.5Gb connection - */ - if (hw->mac.type == e1000_i354) { - int status = E1000_READ_REG(hw, E1000_STATUS); - if ((status & E1000_STATUS_2P5_SKU) && - (!(status & E1000_STATUS_2P5_SKU_OVER))) - reg |= ((adapter->dmac * 5) >> 6); - else - reg |= (adapter->dmac >> 5); - } else { - reg |= (adapter->dmac >> 5); - } - - E1000_WRITE_REG(hw, E1000_DMACR, reg); - - E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); - - /* Set the interval before transition */ - reg = E1000_READ_REG(hw, E1000_DMCTLX); - if (hw->mac.type == e1000_i350) - reg |= IGB_DMCTLX_DCFLUSH_DIS; - /* - ** in 2.5Gb connection, TTLX unit is 0.4 usec - ** which is 0x4*2 = 0xA. But delay is still 4 usec - */ - if (hw->mac.type == e1000_i354) { - int status = E1000_READ_REG(hw, E1000_STATUS); - if ((status & E1000_STATUS_2P5_SKU) && - (!(status & E1000_STATUS_2P5_SKU_OVER))) - reg |= 0xA; - else - reg |= 0x4; - } else { - reg |= 0x4; - } - - E1000_WRITE_REG(hw, E1000_DMCTLX, reg); - - /* free space in tx packet buffer to wake from DMA coal */ - E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - - (2 * adapter->max_frame_size)) >> 6); - - /* make low power state decision controlled by DMA coal */ - reg = E1000_READ_REG(hw, E1000_PCIEMISC); - reg &= ~E1000_PCIEMISC_LX_DECISION; - E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); - - } else if (hw->mac.type == e1000_82580) { - u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); - E1000_WRITE_REG(hw, E1000_PCIEMISC, - reg & ~E1000_PCIEMISC_LX_DECISION); - E1000_WRITE_REG(hw, E1000_DMACR, 0); - } -} - - -/********************************************************************* - * - * Set up an fresh starting state - * - **********************************************************************/ -static void -igb_reset(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct e1000_hw *hw = &adapter->hw; - struct e1000_fc_info *fc = &hw->fc; - struct ifnet *ifp = adapter->ifp; - u32 pba = 0; - u16 hwm; - - INIT_DEBUGOUT("igb_reset: begin"); - - /* Let the firmware know the OS is in control */ - igb_get_hw_control(adapter); - - /* - * Packet Buffer Allocation (PBA) - * Writing PBA sets the receive portion of the buffer - * the remainder is used for the transmit buffer. - */ - switch (hw->mac.type) { - case e1000_82575: - pba = E1000_PBA_32K; - break; - case e1000_82576: - case e1000_vfadapt: - pba = E1000_READ_REG(hw, E1000_RXPBS); - pba &= E1000_RXPBS_SIZE_MASK_82576; - break; - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_vfadapt_i350: - pba = E1000_READ_REG(hw, E1000_RXPBS); - pba = e1000_rxpbs_adjust_82580(pba); - break; - case e1000_i210: - case e1000_i211: - pba = E1000_PBA_34K; - default: - break; - } - - /* Special needs in case of Jumbo frames */ - if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { - u32 tx_space, min_tx, min_rx; - pba = E1000_READ_REG(hw, E1000_PBA); - tx_space = pba >> 16; - pba &= 0xffff; - min_tx = (adapter->max_frame_size + - sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; - min_tx = roundup2(min_tx, 1024); - min_tx >>= 10; - min_rx = adapter->max_frame_size; - min_rx = roundup2(min_rx, 1024); - min_rx >>= 10; - if (tx_space < min_tx && - ((min_tx - tx_space) < pba)) { - pba = pba - (min_tx - tx_space); - /* - * if short on rx space, rx wins - * and must trump tx adjustment - */ - if (pba < min_rx) - pba = min_rx; - } - E1000_WRITE_REG(hw, E1000_PBA, pba); - } - - INIT_DEBUGOUT1("igb_init: pba=%dK",pba); - - /* - * These parameters control the automatic generation (Tx) and - * response (Rx) to Ethernet PAUSE frames. - * - High water mark should allow for at least two frames to be - * received after sending an XOFF. - * - Low water mark works best when it is very near the high water mark. - * This allows the receiver to restart by sending XON when it has - * drained a bit. - */ - hwm = min(((pba << 10) * 9 / 10), - ((pba << 10) - 2 * adapter->max_frame_size)); - - if (hw->mac.type < e1000_82576) { - fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ - fc->low_water = fc->high_water - 8; - } else { - fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ - fc->low_water = fc->high_water - 16; - } - - fc->pause_time = IGB_FC_PAUSE_TIME; - fc->send_xon = TRUE; - if (adapter->fc) - fc->requested_mode = adapter->fc; - else - fc->requested_mode = e1000_fc_default; - - /* Issue a global reset */ - e1000_reset_hw(hw); - E1000_WRITE_REG(hw, E1000_WUC, 0); - - /* Reset for AutoMediaDetect */ - if (adapter->flags & IGB_MEDIA_RESET) { - e1000_setup_init_funcs(hw, TRUE); - e1000_get_bus_info(hw); - adapter->flags &= ~IGB_MEDIA_RESET; - } - - if (e1000_init_hw(hw) < 0) - device_printf(dev, "Hardware Initialization Failed\n"); - - /* Setup DMA Coalescing */ - igb_init_dmac(adapter, pba); - - E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); - e1000_get_phy_info(hw); - e1000_check_for_link(hw); - return; -} - -/********************************************************************* - * - * Setup networking device structure and register an interface. - * - **********************************************************************/ -static int -igb_setup_interface(device_t dev, struct adapter *adapter) -{ - struct ifnet *ifp; - - INIT_DEBUGOUT("igb_setup_interface: begin"); - - ifp = adapter->ifp = if_alloc(IFT_ETHER); - if (ifp == NULL) { - device_printf(dev, "can not allocate ifnet structure\n"); - return (-1); - } - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - ifp->if_init = igb_init; - ifp->if_softc = adapter; - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_ioctl = igb_ioctl; - ifp->if_get_counter = igb_get_counter; - - /* TSO parameters */ - ifp->if_hw_tsomax = IP_MAXPACKET; - ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER; - ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE; - -#ifndef IGB_LEGACY_TX - ifp->if_transmit = igb_mq_start; - ifp->if_qflush = igb_qflush; -#else - ifp->if_start = igb_start; - IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); - ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; - IFQ_SET_READY(&ifp->if_snd); -#endif - - ether_ifattach(ifp, adapter->hw.mac.addr); - - ifp->if_capabilities = ifp->if_capenable = 0; - - ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; -#if __FreeBSD_version >= 1000000 - ifp->if_capabilities |= IFCAP_HWCSUM_IPV6; -#endif - ifp->if_capabilities |= IFCAP_TSO; - ifp->if_capabilities |= IFCAP_JUMBO_MTU; - ifp->if_capenable = ifp->if_capabilities; - - /* Don't enable LRO by default */ - ifp->if_capabilities |= IFCAP_LRO; - -#ifdef DEVICE_POLLING - ifp->if_capabilities |= IFCAP_POLLING; -#endif - - /* - * Tell the upper layer(s) we - * support full VLAN capability. - */ - ifp->if_hdrlen = sizeof(struct ether_vlan_header); - ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING - | IFCAP_VLAN_HWTSO - | IFCAP_VLAN_MTU; - ifp->if_capenable |= IFCAP_VLAN_HWTAGGING - | IFCAP_VLAN_HWTSO - | IFCAP_VLAN_MTU; - - /* - ** Don't turn this on by default, if vlans are - ** created on another pseudo device (eg. lagg) - ** then vlan events are not passed thru, breaking - ** operation, but with HW FILTER off it works. If - ** using vlans directly on the igb driver you can - ** enable this and get full hardware tag filtering. - */ - ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; - - /* - * Specify the media types supported by this adapter and register - * callbacks to update media and link information - */ - ifmedia_init(&adapter->media, IFM_IMASK, - igb_media_change, igb_media_status); - if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || - (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, - 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); - } else { - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, - 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, - 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, - 0, NULL); - if (adapter->hw.phy.type != e1000_phy_ife) { - ifmedia_add(&adapter->media, - IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); - ifmedia_add(&adapter->media, - IFM_ETHER | IFM_1000_T, 0, NULL); - } - } - ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); - ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); - return (0); -} - - -/* - * Manage DMA'able memory. - */ -static void -igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) -{ - if (error) - return; - *(bus_addr_t *) arg = segs[0].ds_addr; -} - -static int -igb_dma_malloc(struct adapter *adapter, bus_size_t size, - struct igb_dma_alloc *dma, int mapflags) -{ - int error; - - error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ - IGB_DBA_ALIGN, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - size, /* maxsize */ - 1, /* nsegments */ - size, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockarg */ - &dma->dma_tag); - if (error) { - device_printf(adapter->dev, - "%s: bus_dma_tag_create failed: %d\n", - __func__, error); - goto fail_0; - } - - error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, - BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); - if (error) { - device_printf(adapter->dev, - "%s: bus_dmamem_alloc(%ju) failed: %d\n", - __func__, (uintmax_t)size, error); - goto fail_2; - } - - dma->dma_paddr = 0; - error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, - size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); - if (error || dma->dma_paddr == 0) { - device_printf(adapter->dev, - "%s: bus_dmamap_load failed: %d\n", - __func__, error); - goto fail_3; - } - - return (0); - -fail_3: - bus_dmamap_unload(dma->dma_tag, dma->dma_map); -fail_2: - bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); - bus_dma_tag_destroy(dma->dma_tag); -fail_0: - dma->dma_tag = NULL; - - return (error); -} - -static void -igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma) -{ - if (dma->dma_tag == NULL) - return; - if (dma->dma_paddr != 0) { - bus_dmamap_sync(dma->dma_tag, dma->dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(dma->dma_tag, dma->dma_map); - dma->dma_paddr = 0; - } - if (dma->dma_vaddr != NULL) { - bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); - dma->dma_vaddr = NULL; - } - bus_dma_tag_destroy(dma->dma_tag); - dma->dma_tag = NULL; -} - - -/********************************************************************* - * - * Allocate memory for the transmit and receive rings, and then - * the descriptors associated with each, called only once at attach. - * - **********************************************************************/ -static int -igb_allocate_queues(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct igb_queue *que = NULL; - struct tx_ring *txr = NULL; - struct rx_ring *rxr = NULL; - int rsize, tsize, error = E1000_SUCCESS; - int txconf = 0, rxconf = 0; - - /* First allocate the top level queue structs */ - if (!(adapter->queues = - (struct igb_queue *) malloc(sizeof(struct igb_queue) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate queue memory\n"); - error = ENOMEM; - goto fail; - } - - /* Next allocate the TX ring struct memory */ - if (!(adapter->tx_rings = - (struct tx_ring *) malloc(sizeof(struct tx_ring) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate TX ring memory\n"); - error = ENOMEM; - goto tx_fail; - } - - /* Now allocate the RX */ - if (!(adapter->rx_rings = - (struct rx_ring *) malloc(sizeof(struct rx_ring) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate RX ring memory\n"); - error = ENOMEM; - goto rx_fail; - } - - tsize = roundup2(adapter->num_tx_desc * - sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN); - /* - * Now set up the TX queues, txconf is needed to handle the - * possibility that things fail midcourse and we need to - * undo memory gracefully - */ - for (int i = 0; i < adapter->num_queues; i++, txconf++) { - /* Set up some basics */ - txr = &adapter->tx_rings[i]; - txr->adapter = adapter; - txr->me = i; - txr->num_desc = adapter->num_tx_desc; - - /* Initialize the TX lock */ - snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", - device_get_nameunit(dev), txr->me); - mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); - - if (igb_dma_malloc(adapter, tsize, - &txr->txdma, BUS_DMA_NOWAIT)) { - device_printf(dev, - "Unable to allocate TX Descriptor memory\n"); - error = ENOMEM; - goto err_tx_desc; - } - txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr; - bzero((void *)txr->tx_base, tsize); - - /* Now allocate transmit buffers for the ring */ - if (igb_allocate_transmit_buffers(txr)) { - device_printf(dev, - "Critical Failure setting up transmit buffers\n"); - error = ENOMEM; - goto err_tx_desc; - } -#ifndef IGB_LEGACY_TX - /* Allocate a buf ring */ - txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF, - M_WAITOK, &txr->tx_mtx); -#endif - } - - /* - * Next the RX queues... - */ - rsize = roundup2(adapter->num_rx_desc * - sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); - for (int i = 0; i < adapter->num_queues; i++, rxconf++) { - rxr = &adapter->rx_rings[i]; - rxr->adapter = adapter; - rxr->me = i; - - /* Initialize the RX lock */ - snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", - device_get_nameunit(dev), txr->me); - mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); - - if (igb_dma_malloc(adapter, rsize, - &rxr->rxdma, BUS_DMA_NOWAIT)) { - device_printf(dev, - "Unable to allocate RxDescriptor memory\n"); - error = ENOMEM; - goto err_rx_desc; - } - rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr; - bzero((void *)rxr->rx_base, rsize); - - /* Allocate receive buffers for the ring*/ - if (igb_allocate_receive_buffers(rxr)) { - device_printf(dev, - "Critical Failure setting up receive buffers\n"); - error = ENOMEM; - goto err_rx_desc; - } - } - - /* - ** Finally set up the queue holding structs - */ - for (int i = 0; i < adapter->num_queues; i++) { - que = &adapter->queues[i]; - que->adapter = adapter; - que->txr = &adapter->tx_rings[i]; - que->rxr = &adapter->rx_rings[i]; - } - - return (0); - -err_rx_desc: - for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) - igb_dma_free(adapter, &rxr->rxdma); -err_tx_desc: - for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) - igb_dma_free(adapter, &txr->txdma); - free(adapter->rx_rings, M_DEVBUF); -rx_fail: -#ifndef IGB_LEGACY_TX - buf_ring_free(txr->br, M_DEVBUF); -#endif - free(adapter->tx_rings, M_DEVBUF); -tx_fail: - free(adapter->queues, M_DEVBUF); -fail: - return (error); -} - -/********************************************************************* - * - * Allocate memory for tx_buffer structures. The tx_buffer stores all - * the information needed to transmit a packet on the wire. This is - * called only once at attach, setup is done every reset. - * - **********************************************************************/ -static int -igb_allocate_transmit_buffers(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - device_t dev = adapter->dev; - struct igb_tx_buf *txbuf; - int error, i; - - /* - * Setup DMA descriptor areas. - */ - if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - IGB_TSO_SIZE, /* maxsize */ - IGB_MAX_SCATTER, /* nsegments */ - PAGE_SIZE, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockfuncarg */ - &txr->txtag))) { - device_printf(dev,"Unable to allocate TX DMA tag\n"); - goto fail; - } - - if (!(txr->tx_buffers = - (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) * - adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate tx_buffer memory\n"); - error = ENOMEM; - goto fail; - } - - /* Create the descriptor buffer dma maps */ - txbuf = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { - error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); - if (error != 0) { - device_printf(dev, "Unable to create TX DMA map\n"); - goto fail; - } - } - - return 0; -fail: - /* We free all, it handles case where we are in the middle */ - igb_free_transmit_structures(adapter); - return (error); -} - -/********************************************************************* - * - * Initialize a transmit ring. - * - **********************************************************************/ -static void -igb_setup_transmit_ring(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct igb_tx_buf *txbuf; - int i; -#ifdef DEV_NETMAP - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_slot *slot; -#endif /* DEV_NETMAP */ - - /* Clear the old descriptor contents */ - IGB_TX_LOCK(txr); -#ifdef DEV_NETMAP - slot = netmap_reset(na, NR_TX, txr->me, 0); -#endif /* DEV_NETMAP */ - bzero((void *)txr->tx_base, - (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc); - /* Reset indices */ - txr->next_avail_desc = 0; - txr->next_to_clean = 0; - - /* Free any existing tx buffers. */ - txbuf = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { - if (txbuf->m_head != NULL) { - bus_dmamap_sync(txr->txtag, txbuf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, txbuf->map); - m_freem(txbuf->m_head); - txbuf->m_head = NULL; - } -#ifdef DEV_NETMAP - if (slot) { - int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); - /* no need to set the address */ - netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si)); - } -#endif /* DEV_NETMAP */ - /* clear the watch index */ - txbuf->eop = NULL; - } - - /* Set number of descriptors available */ - txr->tx_avail = adapter->num_tx_desc; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - IGB_TX_UNLOCK(txr); -} - -/********************************************************************* - * - * Initialize all transmit rings. - * - **********************************************************************/ -static void -igb_setup_transmit_structures(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - - for (int i = 0; i < adapter->num_queues; i++, txr++) - igb_setup_transmit_ring(txr); - - return; -} - -/********************************************************************* - * - * Enable transmit unit. - * - **********************************************************************/ -static void -igb_initialize_transmit_units(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - struct e1000_hw *hw = &adapter->hw; - u32 tctl, txdctl; - - INIT_DEBUGOUT("igb_initialize_transmit_units: begin"); - tctl = txdctl = 0; - - /* Setup the Tx Descriptor Rings */ - for (int i = 0; i < adapter->num_queues; i++, txr++) { - u64 bus_addr = txr->txdma.dma_paddr; - - E1000_WRITE_REG(hw, E1000_TDLEN(i), - adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); - E1000_WRITE_REG(hw, E1000_TDBAH(i), - (uint32_t)(bus_addr >> 32)); - E1000_WRITE_REG(hw, E1000_TDBAL(i), - (uint32_t)bus_addr); - - /* Setup the HW Tx Head and Tail descriptor pointers */ - E1000_WRITE_REG(hw, E1000_TDT(i), 0); - E1000_WRITE_REG(hw, E1000_TDH(i), 0); - - HW_DEBUGOUT2("Base = %x, Length = %x\n", - E1000_READ_REG(hw, E1000_TDBAL(i)), - E1000_READ_REG(hw, E1000_TDLEN(i))); - - txr->queue_status = IGB_QUEUE_IDLE; - - txdctl |= IGB_TX_PTHRESH; - txdctl |= IGB_TX_HTHRESH << 8; - txdctl |= IGB_TX_WTHRESH << 16; - txdctl |= E1000_TXDCTL_QUEUE_ENABLE; - E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); - } - - if (adapter->vf_ifp) - return; - - e1000_config_collision_dist(hw); - - /* Program the Transmit Control Register */ - tctl = E1000_READ_REG(hw, E1000_TCTL); - tctl &= ~E1000_TCTL_CT; - tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | - (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); - - /* This write will effectively turn on the transmit unit. */ - E1000_WRITE_REG(hw, E1000_TCTL, tctl); -} - -/********************************************************************* - * - * Free all transmit rings. - * - **********************************************************************/ -static void -igb_free_transmit_structures(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IGB_TX_LOCK(txr); - igb_free_transmit_buffers(txr); - igb_dma_free(adapter, &txr->txdma); - IGB_TX_UNLOCK(txr); - IGB_TX_LOCK_DESTROY(txr); - } - free(adapter->tx_rings, M_DEVBUF); -} - -/********************************************************************* - * - * Free transmit ring related data structures. - * - **********************************************************************/ -static void -igb_free_transmit_buffers(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct igb_tx_buf *tx_buffer; - int i; - - INIT_DEBUGOUT("free_transmit_ring: begin"); - - if (txr->tx_buffers == NULL) - return; - - tx_buffer = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { - if (tx_buffer->m_head != NULL) { - bus_dmamap_sync(txr->txtag, tx_buffer->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, - tx_buffer->map); - m_freem(tx_buffer->m_head); - tx_buffer->m_head = NULL; - if (tx_buffer->map != NULL) { - bus_dmamap_destroy(txr->txtag, - tx_buffer->map); - tx_buffer->map = NULL; - } - } else if (tx_buffer->map != NULL) { - bus_dmamap_unload(txr->txtag, - tx_buffer->map); - bus_dmamap_destroy(txr->txtag, - tx_buffer->map); - tx_buffer->map = NULL; - } - } -#ifndef IGB_LEGACY_TX - if (txr->br != NULL) - buf_ring_free(txr->br, M_DEVBUF); -#endif - if (txr->tx_buffers != NULL) { - free(txr->tx_buffers, M_DEVBUF); - txr->tx_buffers = NULL; - } - if (txr->txtag != NULL) { - bus_dma_tag_destroy(txr->txtag); - txr->txtag = NULL; - } - return; -} - -/********************************************************************** - * - * Setup work for hardware segmentation offload (TSO) on - * adapters using advanced tx descriptors - * - **********************************************************************/ -static int -igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, - u32 *cmd_type_len, u32 *olinfo_status) -{ - struct adapter *adapter = txr->adapter; - struct e1000_adv_tx_context_desc *TXD; - u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; - u32 mss_l4len_idx = 0, paylen; - u16 vtag = 0, eh_type; - int ctxd, ehdrlen, ip_hlen, tcp_hlen; - struct ether_vlan_header *eh; -#ifdef INET6 - struct ip6_hdr *ip6; -#endif -#ifdef INET - struct ip *ip; -#endif - struct tcphdr *th; - - - /* - * Determine where frame payload starts. - * Jump over vlan headers if already present - */ - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - eh_type = eh->evl_proto; - } else { - ehdrlen = ETHER_HDR_LEN; - eh_type = eh->evl_encap_proto; - } - - switch (ntohs(eh_type)) { -#ifdef INET6 - case ETHERTYPE_IPV6: - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - /* XXX-BZ For now we do not pretend to support ext. hdrs. */ - if (ip6->ip6_nxt != IPPROTO_TCP) - return (ENXIO); - ip_hlen = sizeof(struct ip6_hdr); - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); - th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; - break; -#endif -#ifdef INET - case ETHERTYPE_IP: - ip = (struct ip *)(mp->m_data + ehdrlen); - if (ip->ip_p != IPPROTO_TCP) - return (ENXIO); - ip->ip_sum = 0; - ip_hlen = ip->ip_hl << 2; - th = (struct tcphdr *)((caddr_t)ip + ip_hlen); - th->th_sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htons(IPPROTO_TCP)); - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; - /* Tell transmit desc to also do IPv4 checksum. */ - *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; - break; -#endif - default: - panic("%s: CSUM_TSO but no supported IP version (0x%04x)", - __func__, ntohs(eh_type)); - break; - } - - ctxd = txr->next_avail_desc; - TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; - - tcp_hlen = th->th_off << 2; - - /* This is used in the transmit desc in encap */ - paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; - - /* VLAN MACLEN IPLEN */ - if (mp->m_flags & M_VLANTAG) { - vtag = htole16(mp->m_pkthdr.ether_vtag); - vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); - } - - vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; - vlan_macip_lens |= ip_hlen; - TXD->vlan_macip_lens = htole32(vlan_macip_lens); - - /* ADV DTYPE TUCMD */ - type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; - TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); - - /* MSS L4LEN IDX */ - mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); - mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); - /* 82575 needs the queue index added */ - if (adapter->hw.mac.type == e1000_82575) - mss_l4len_idx |= txr->me << 4; - TXD->mss_l4len_idx = htole32(mss_l4len_idx); - - TXD->seqnum_seed = htole32(0); - - if (++ctxd == txr->num_desc) - ctxd = 0; - - txr->tx_avail--; - txr->next_avail_desc = ctxd; - *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; - *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; - *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; - ++txr->tso_tx; - return (0); -} - -/********************************************************************* - * - * Advanced Context Descriptor setup for VLAN, CSUM or TSO - * - **********************************************************************/ - -static int -igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, - u32 *cmd_type_len, u32 *olinfo_status) -{ - struct e1000_adv_tx_context_desc *TXD; - struct adapter *adapter = txr->adapter; - struct ether_vlan_header *eh; - struct ip *ip; - struct ip6_hdr *ip6; - u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; - int ehdrlen, ip_hlen = 0; - u16 etype; - u8 ipproto = 0; - int offload = TRUE; - int ctxd = txr->next_avail_desc; - u16 vtag = 0; - - /* First check if TSO is to be used */ - if (mp->m_pkthdr.csum_flags & CSUM_TSO) - return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status)); - - if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) - offload = FALSE; - - /* Indicate the whole packet as payload when not doing TSO */ - *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT; - - /* Now ready a context descriptor */ - TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; - - /* - ** In advanced descriptors the vlan tag must - ** be placed into the context descriptor. Hence - ** we need to make one even if not doing offloads. - */ - if (mp->m_flags & M_VLANTAG) { - vtag = htole16(mp->m_pkthdr.ether_vtag); - vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); - } else if (offload == FALSE) /* ... no offload to do */ - return (0); - - /* - * Determine where frame payload starts. - * Jump over vlan headers if already present, - * helpful for QinQ too. - */ - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - etype = ntohs(eh->evl_proto); - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - } else { - etype = ntohs(eh->evl_encap_proto); - ehdrlen = ETHER_HDR_LEN; - } - - /* Set the ether header length */ - vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; - - switch (etype) { - case ETHERTYPE_IP: - ip = (struct ip *)(mp->m_data + ehdrlen); - ip_hlen = ip->ip_hl << 2; - ipproto = ip->ip_p; - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; - break; - case ETHERTYPE_IPV6: - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - ip_hlen = sizeof(struct ip6_hdr); - /* XXX-BZ this will go badly in case of ext hdrs. */ - ipproto = ip6->ip6_nxt; - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; - break; - default: - offload = FALSE; - break; - } - - vlan_macip_lens |= ip_hlen; - type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; - - switch (ipproto) { - case IPPROTO_TCP: -#if __FreeBSD_version >= 1000000 - if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) -#else - if (mp->m_pkthdr.csum_flags & CSUM_TCP) -#endif - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; - break; - case IPPROTO_UDP: -#if __FreeBSD_version >= 1000000 - if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) -#else - if (mp->m_pkthdr.csum_flags & CSUM_UDP) -#endif - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; - break; - -#if __FreeBSD_version >= 800000 - case IPPROTO_SCTP: -#if __FreeBSD_version >= 1000000 - if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) -#else - if (mp->m_pkthdr.csum_flags & CSUM_SCTP) -#endif - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; - break; -#endif - default: - offload = FALSE; - break; - } - - if (offload) /* For the TX descriptor setup */ - *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; - - /* 82575 needs the queue index added */ - if (adapter->hw.mac.type == e1000_82575) - mss_l4len_idx = txr->me << 4; - - /* Now copy bits into descriptor */ - TXD->vlan_macip_lens = htole32(vlan_macip_lens); - TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); - TXD->seqnum_seed = htole32(0); - TXD->mss_l4len_idx = htole32(mss_l4len_idx); - - /* We've consumed the first desc, adjust counters */ - if (++ctxd == txr->num_desc) - ctxd = 0; - txr->next_avail_desc = ctxd; - --txr->tx_avail; - - return (0); -} - -/********************************************************************** - * - * Examine each tx_buffer in the used queue. If the hardware is done - * processing the packet then free associated resources. The - * tx_buffer is put back on the free queue. - * - * TRUE return means there's work in the ring to clean, FALSE its empty. - **********************************************************************/ -static bool -igb_txeof(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; -#ifdef DEV_NETMAP - struct ifnet *ifp = adapter->ifp; -#endif /* DEV_NETMAP */ - u32 work, processed = 0; - int limit = adapter->tx_process_limit; - struct igb_tx_buf *buf; - union e1000_adv_tx_desc *txd; - - mtx_assert(&txr->tx_mtx, MA_OWNED); - -#ifdef DEV_NETMAP - if (netmap_tx_irq(ifp, txr->me)) - return (FALSE); -#endif /* DEV_NETMAP */ - - if (txr->tx_avail == txr->num_desc) { - txr->queue_status = IGB_QUEUE_IDLE; - return FALSE; - } - - /* Get work starting point */ - work = txr->next_to_clean; - buf = &txr->tx_buffers[work]; - txd = &txr->tx_base[work]; - work -= txr->num_desc; /* The distance to ring end */ - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - do { - union e1000_adv_tx_desc *eop = buf->eop; - if (eop == NULL) /* No work */ - break; - - if ((eop->wb.status & E1000_TXD_STAT_DD) == 0) - break; /* I/O not complete */ - - if (buf->m_head) { - txr->bytes += - buf->m_head->m_pkthdr.len; - bus_dmamap_sync(txr->txtag, - buf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, - buf->map); - m_freem(buf->m_head); - buf->m_head = NULL; - } - buf->eop = NULL; - ++txr->tx_avail; - - /* We clean the range if multi segment */ - while (txd != eop) { - ++txd; - ++buf; - ++work; - /* wrap the ring? */ - if (__predict_false(!work)) { - work -= txr->num_desc; - buf = txr->tx_buffers; - txd = txr->tx_base; - } - if (buf->m_head) { - txr->bytes += - buf->m_head->m_pkthdr.len; - bus_dmamap_sync(txr->txtag, - buf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, - buf->map); - m_freem(buf->m_head); - buf->m_head = NULL; - } - ++txr->tx_avail; - buf->eop = NULL; - - } - ++txr->packets; - ++processed; - txr->watchdog_time = ticks; - - /* Try the next packet */ - ++txd; - ++buf; - ++work; - /* reset with a wrap */ - if (__predict_false(!work)) { - work -= txr->num_desc; - buf = txr->tx_buffers; - txd = txr->tx_base; - } - prefetch(txd); - } while (__predict_true(--limit)); - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - work += txr->num_desc; - txr->next_to_clean = work; - - /* - ** Watchdog calculation, we know there's - ** work outstanding or the first return - ** would have been taken, so none processed - ** for too long indicates a hang. - */ - if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG)) - txr->queue_status |= IGB_QUEUE_HUNG; - - if (txr->tx_avail >= IGB_QUEUE_THRESHOLD) - txr->queue_status &= ~IGB_QUEUE_DEPLETED; - - if (txr->tx_avail == txr->num_desc) { - txr->queue_status = IGB_QUEUE_IDLE; - return (FALSE); - } - - return (TRUE); -} - -/********************************************************************* - * - * Refresh mbuf buffers for RX descriptor rings - * - now keeps its own state so discards due to resource - * exhaustion are unnecessary, if an mbuf cannot be obtained - * it just returns, keeping its placeholder, thus it can simply - * be recalled to try again. - * - **********************************************************************/ -static void -igb_refresh_mbufs(struct rx_ring *rxr, int limit) -{ - struct adapter *adapter = rxr->adapter; - bus_dma_segment_t hseg[1]; - bus_dma_segment_t pseg[1]; - struct igb_rx_buf *rxbuf; - struct mbuf *mh, *mp; - int i, j, nsegs, error; - bool refreshed = FALSE; - - i = j = rxr->next_to_refresh; - /* - ** Get one descriptor beyond - ** our work mark to control - ** the loop. - */ - if (++j == adapter->num_rx_desc) - j = 0; - - while (j != limit) { - rxbuf = &rxr->rx_buffers[i]; - /* No hdr mbuf used with header split off */ - if (rxr->hdr_split == FALSE) - goto no_split; - if (rxbuf->m_head == NULL) { - mh = m_gethdr(M_NOWAIT, MT_DATA); - if (mh == NULL) - goto update; - } else - mh = rxbuf->m_head; - - mh->m_pkthdr.len = mh->m_len = MHLEN; - mh->m_len = MHLEN; - mh->m_flags |= M_PKTHDR; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->htag, - rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - printf("Refresh mbufs: hdr dmamap load" - " failure - %d\n", error); - m_free(mh); - rxbuf->m_head = NULL; - goto update; - } - rxbuf->m_head = mh; - bus_dmamap_sync(rxr->htag, rxbuf->hmap, - BUS_DMASYNC_PREREAD); - rxr->rx_base[i].read.hdr_addr = - htole64(hseg[0].ds_addr); -no_split: - if (rxbuf->m_pack == NULL) { - mp = m_getjcl(M_NOWAIT, MT_DATA, - M_PKTHDR, adapter->rx_mbuf_sz); - if (mp == NULL) - goto update; - } else - mp = rxbuf->m_pack; - - mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->ptag, - rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - printf("Refresh mbufs: payload dmamap load" - " failure - %d\n", error); - m_free(mp); - rxbuf->m_pack = NULL; - goto update; - } - rxbuf->m_pack = mp; - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_PREREAD); - rxr->rx_base[i].read.pkt_addr = - htole64(pseg[0].ds_addr); - refreshed = TRUE; /* I feel wefreshed :) */ - - i = j; /* our next is precalculated */ - rxr->next_to_refresh = i; - if (++j == adapter->num_rx_desc) - j = 0; - } -update: - if (refreshed) /* update tail */ - E1000_WRITE_REG(&adapter->hw, - E1000_RDT(rxr->me), rxr->next_to_refresh); - return; -} - - -/********************************************************************* - * - * Allocate memory for rx_buffer structures. Since we use one - * rx_buffer per received packet, the maximum number of rx_buffer's - * that we'll need is equal to the number of receive descriptors - * that we've allocated. - * - **********************************************************************/ -static int -igb_allocate_receive_buffers(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - device_t dev = adapter->dev; - struct igb_rx_buf *rxbuf; - int i, bsize, error; - - bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc; - if (!(rxr->rx_buffers = - (struct igb_rx_buf *) malloc(bsize, - M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate rx_buffer memory\n"); - error = ENOMEM; - goto fail; - } - - if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - MSIZE, /* maxsize */ - 1, /* nsegments */ - MSIZE, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockfuncarg */ - &rxr->htag))) { - device_printf(dev, "Unable to create RX DMA tag\n"); - goto fail; - } - - if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - MJUM9BYTES, /* maxsize */ - 1, /* nsegments */ - MJUM9BYTES, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockfuncarg */ - &rxr->ptag))) { - device_printf(dev, "Unable to create RX payload DMA tag\n"); - goto fail; - } - - for (i = 0; i < adapter->num_rx_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap); - if (error) { - device_printf(dev, - "Unable to create RX head DMA maps\n"); - goto fail; - } - error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); - if (error) { - device_printf(dev, - "Unable to create RX packet DMA maps\n"); - goto fail; - } - } - - return (0); - -fail: - /* Frees all, but can handle partial completion */ - igb_free_receive_structures(adapter); - return (error); -} - - -static void -igb_free_receive_ring(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - struct igb_rx_buf *rxbuf; - - - for (int i = 0; i < adapter->num_rx_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->m_head != NULL) { - bus_dmamap_sync(rxr->htag, rxbuf->hmap, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->htag, rxbuf->hmap); - rxbuf->m_head->m_flags |= M_PKTHDR; - m_freem(rxbuf->m_head); - } - if (rxbuf->m_pack != NULL) { - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->ptag, rxbuf->pmap); - rxbuf->m_pack->m_flags |= M_PKTHDR; - m_freem(rxbuf->m_pack); - } - rxbuf->m_head = NULL; - rxbuf->m_pack = NULL; - } -} - - -/********************************************************************* - * - * Initialize a receive ring and its buffers. - * - **********************************************************************/ -static int -igb_setup_receive_ring(struct rx_ring *rxr) -{ - struct adapter *adapter; - struct ifnet *ifp; - device_t dev; - struct igb_rx_buf *rxbuf; - bus_dma_segment_t pseg[1], hseg[1]; - struct lro_ctrl *lro = &rxr->lro; - int rsize, nsegs, error = 0; -#ifdef DEV_NETMAP - struct netmap_adapter *na = NA(rxr->adapter->ifp); - struct netmap_slot *slot; -#endif /* DEV_NETMAP */ - - adapter = rxr->adapter; - dev = adapter->dev; - ifp = adapter->ifp; - - /* Clear the ring contents */ - IGB_RX_LOCK(rxr); -#ifdef DEV_NETMAP - slot = netmap_reset(na, NR_RX, rxr->me, 0); -#endif /* DEV_NETMAP */ - rsize = roundup2(adapter->num_rx_desc * - sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); - bzero((void *)rxr->rx_base, rsize); - - /* - ** Free current RX buffer structures and their mbufs - */ - igb_free_receive_ring(rxr); - - /* Configure for header split? */ - if (igb_header_split) - rxr->hdr_split = TRUE; - - /* Now replenish the ring mbufs */ - for (int j = 0; j < adapter->num_rx_desc; ++j) { - struct mbuf *mh, *mp; - - rxbuf = &rxr->rx_buffers[j]; -#ifdef DEV_NETMAP - if (slot) { - /* slot sj is mapped to the j-th NIC-ring entry */ - int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); - uint64_t paddr; - void *addr; - - addr = PNMB(na, slot + sj, &paddr); - netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); - /* Update descriptor */ - rxr->rx_base[j].read.pkt_addr = htole64(paddr); - continue; - } -#endif /* DEV_NETMAP */ - if (rxr->hdr_split == FALSE) - goto skip_head; - - /* First the header */ - rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA); - if (rxbuf->m_head == NULL) { - error = ENOBUFS; - goto fail; - } - m_adj(rxbuf->m_head, ETHER_ALIGN); - mh = rxbuf->m_head; - mh->m_len = mh->m_pkthdr.len = MHLEN; - mh->m_flags |= M_PKTHDR; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->htag, - rxbuf->hmap, rxbuf->m_head, hseg, - &nsegs, BUS_DMA_NOWAIT); - if (error != 0) /* Nothing elegant to do here */ - goto fail; - bus_dmamap_sync(rxr->htag, - rxbuf->hmap, BUS_DMASYNC_PREREAD); - /* Update descriptor */ - rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr); - -skip_head: - /* Now the payload cluster */ - rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, - M_PKTHDR, adapter->rx_mbuf_sz); - if (rxbuf->m_pack == NULL) { - error = ENOBUFS; - goto fail; - } - mp = rxbuf->m_pack; - mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->ptag, - rxbuf->pmap, mp, pseg, - &nsegs, BUS_DMA_NOWAIT); - if (error != 0) - goto fail; - bus_dmamap_sync(rxr->ptag, - rxbuf->pmap, BUS_DMASYNC_PREREAD); - /* Update descriptor */ - rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr); - } - - /* Setup our descriptor indices */ - rxr->next_to_check = 0; - rxr->next_to_refresh = adapter->num_rx_desc - 1; - rxr->lro_enabled = FALSE; - rxr->rx_split_packets = 0; - rxr->rx_bytes = 0; - - rxr->fmp = NULL; - rxr->lmp = NULL; - - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* - ** Now set up the LRO interface, we - ** also only do head split when LRO - ** is enabled, since so often they - ** are undesirable in similar setups. - */ - if (ifp->if_capenable & IFCAP_LRO) { - error = tcp_lro_init(lro); - if (error) { - device_printf(dev, "LRO Initialization failed!\n"); - goto fail; - } - INIT_DEBUGOUT("RX LRO Initialized\n"); - rxr->lro_enabled = TRUE; - lro->ifp = adapter->ifp; - } - - IGB_RX_UNLOCK(rxr); - return (0); - -fail: - igb_free_receive_ring(rxr); - IGB_RX_UNLOCK(rxr); - return (error); -} - - -/********************************************************************* - * - * Initialize all receive rings. - * - **********************************************************************/ -static int -igb_setup_receive_structures(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - int i; - - for (i = 0; i < adapter->num_queues; i++, rxr++) - if (igb_setup_receive_ring(rxr)) - goto fail; - - return (0); -fail: - /* - * Free RX buffers allocated so far, we will only handle - * the rings that completed, the failing case will have - * cleaned up for itself. 'i' is the endpoint. - */ - for (int j = 0; j < i; ++j) { - rxr = &adapter->rx_rings[j]; - IGB_RX_LOCK(rxr); - igb_free_receive_ring(rxr); - IGB_RX_UNLOCK(rxr); - } - - return (ENOBUFS); -} - -/* - * Initialise the RSS mapping for NICs that support multiple transmit/ - * receive rings. - */ -static void -igb_initialise_rss_mapping(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - int i; - int queue_id; - u32 reta; - u32 rss_key[10], mrqc, shift = 0; - - /* XXX? */ - if (adapter->hw.mac.type == e1000_82575) - shift = 6; - - /* - * The redirection table controls which destination - * queue each bucket redirects traffic to. - * Each DWORD represents four queues, with the LSB - * being the first queue in the DWORD. - * - * This just allocates buckets to queues using round-robin - * allocation. - * - * NOTE: It Just Happens to line up with the default - * RSS allocation method. - */ - - /* Warning FM follows */ - reta = 0; - for (i = 0; i < 128; i++) { -#ifdef RSS - queue_id = rss_get_indirection_to_bucket(i); - /* - * If we have more queues than buckets, we'll - * end up mapping buckets to a subset of the - * queues. - * - * If we have more buckets than queues, we'll - * end up instead assigning multiple buckets - * to queues. - * - * Both are suboptimal, but we need to handle - * the case so we don't go out of bounds - * indexing arrays and such. - */ - queue_id = queue_id % adapter->num_queues; -#else - queue_id = (i % adapter->num_queues); +#ifndef KLD_MODULE +#include "opt_iflib.h" #endif - /* Adjust if required */ - queue_id = queue_id << shift; - - /* - * The low 8 bits are for hash value (n+0); - * The next 8 bits are for hash value (n+1), etc. - */ - reta = reta >> 8; - reta = reta | ( ((uint32_t) queue_id) << 24); - if ((i & 3) == 3) { - E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); - reta = 0; - } - } - /* Now fill in hash table */ - - /* - * MRQC: Multiple Receive Queues Command - * Set queuing to RSS control, number depends on the device. - */ - mrqc = E1000_MRQC_ENABLE_RSS_8Q; - -#ifdef RSS - /* XXX ew typecasting */ - rss_getkey((uint8_t *) &rss_key); +#ifdef IFLIB +#include #else - arc4rand(&rss_key, sizeof(rss_key), 0); -#endif - for (i = 0; i < 10; i++) - E1000_WRITE_REG_ARRAY(hw, - E1000_RSSRK(0), i, rss_key[i]); - - /* - * Configure the RSS fields to hash upon. - */ - mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | - E1000_MRQC_RSS_FIELD_IPV4_TCP); - mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | - E1000_MRQC_RSS_FIELD_IPV6_TCP); - mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | - E1000_MRQC_RSS_FIELD_IPV6_UDP); - mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | - E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); - - E1000_WRITE_REG(hw, E1000_MRQC, mrqc); -} - -/********************************************************************* - * - * Enable receive unit. - * - **********************************************************************/ -static void -igb_initialize_receive_units(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - struct ifnet *ifp = adapter->ifp; - struct e1000_hw *hw = &adapter->hw; - u32 rctl, rxcsum, psize, srrctl = 0; - - INIT_DEBUGOUT("igb_initialize_receive_unit: begin"); - - /* - * Make sure receives are disabled while setting - * up the descriptor ring - */ - rctl = E1000_READ_REG(hw, E1000_RCTL); - E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); - - /* - ** Set up for header split - */ - if (igb_header_split) { - /* Use a standard mbuf for the header */ - srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; - srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; - } else - srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; - - /* - ** Set up for jumbo frames - */ - if (ifp->if_mtu > ETHERMTU) { - rctl |= E1000_RCTL_LPE; - if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { - srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; - rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; - } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { - srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; - rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; - } - /* Set maximum packet len */ - psize = adapter->max_frame_size; - /* are we on a vlan? */ - if (adapter->ifp->if_vlantrunk != NULL) - psize += VLAN_TAG_SIZE; - E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); - } else { - rctl &= ~E1000_RCTL_LPE; - srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; - rctl |= E1000_RCTL_SZ_2048; - } - - /* - * If TX flow control is disabled and there's >1 queue defined, - * enable DROP. - * - * This drops frames rather than hanging the RX MAC for all queues. - */ - if ((adapter->num_queues > 1) && - (adapter->fc == e1000_fc_none || - adapter->fc == e1000_fc_rx_pause)) { - srrctl |= E1000_SRRCTL_DROP_EN; - } - - /* Setup the Base and Length of the Rx Descriptor Rings */ - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - u64 bus_addr = rxr->rxdma.dma_paddr; - u32 rxdctl; - - E1000_WRITE_REG(hw, E1000_RDLEN(i), - adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); - E1000_WRITE_REG(hw, E1000_RDBAH(i), - (uint32_t)(bus_addr >> 32)); - E1000_WRITE_REG(hw, E1000_RDBAL(i), - (uint32_t)bus_addr); - E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); - /* Enable this Queue */ - rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); - rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; - rxdctl &= 0xFFF00000; - rxdctl |= IGB_RX_PTHRESH; - rxdctl |= IGB_RX_HTHRESH << 8; - rxdctl |= IGB_RX_WTHRESH << 16; - E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); - } - - /* - ** Setup for RX MultiQueue - */ - rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); - if (adapter->num_queues >1) { - - /* rss setup */ - igb_initialise_rss_mapping(adapter); - - /* - ** NOTE: Receive Full-Packet Checksum Offload - ** is mutually exclusive with Multiqueue. However - ** this is not the same as TCP/IP checksums which - ** still work. - */ - rxcsum |= E1000_RXCSUM_PCSD; -#if __FreeBSD_version >= 800000 - /* For SCTP Offload */ - if ((hw->mac.type != e1000_82575) && - (ifp->if_capenable & IFCAP_RXCSUM)) - rxcsum |= E1000_RXCSUM_CRCOFL; -#endif - } else { - /* Non RSS setup */ - if (ifp->if_capenable & IFCAP_RXCSUM) { - rxcsum |= E1000_RXCSUM_IPPCSE; -#if __FreeBSD_version >= 800000 - if (adapter->hw.mac.type != e1000_82575) - rxcsum |= E1000_RXCSUM_CRCOFL; -#endif - } else - rxcsum &= ~E1000_RXCSUM_TUOFL; - } - E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); - - /* Setup the Receive Control Register */ - rctl &= ~(3 << E1000_RCTL_MO_SHIFT); - rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | - E1000_RCTL_RDMTS_HALF | - (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); - /* Strip CRC bytes. */ - rctl |= E1000_RCTL_SECRC; - /* Make sure VLAN Filters are off */ - rctl &= ~E1000_RCTL_VFE; - /* Don't store bad packets */ - rctl &= ~E1000_RCTL_SBP; - - /* Enable Receives */ - E1000_WRITE_REG(hw, E1000_RCTL, rctl); - - /* - * Setup the HW Rx Head and Tail Descriptor Pointers - * - needs to be after enable - */ - for (int i = 0; i < adapter->num_queues; i++) { - rxr = &adapter->rx_rings[i]; - E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); -#ifdef DEV_NETMAP - /* - * an init() while a netmap client is active must - * preserve the rx buffers passed to userspace. - * In this driver it means we adjust RDT to - * something different from next_to_refresh - * (which is not used in netmap mode). - */ - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_kring *kring = &na->rx_rings[i]; - int t = rxr->next_to_refresh - nm_kr_rxspace(kring); - - if (t >= adapter->num_rx_desc) - t -= adapter->num_rx_desc; - else if (t < 0) - t += adapter->num_rx_desc; - E1000_WRITE_REG(hw, E1000_RDT(i), t); - } else -#endif /* DEV_NETMAP */ - E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh); - } - return; -} - -/********************************************************************* - * - * Free receive rings. - * - **********************************************************************/ -static void -igb_free_receive_structures(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - struct lro_ctrl *lro = &rxr->lro; - igb_free_receive_buffers(rxr); - tcp_lro_free(lro); - igb_dma_free(adapter, &rxr->rxdma); - } - - free(adapter->rx_rings, M_DEVBUF); -} - -/********************************************************************* - * - * Free receive ring data structures. - * - **********************************************************************/ -static void -igb_free_receive_buffers(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - struct igb_rx_buf *rxbuf; - int i; - - INIT_DEBUGOUT("free_receive_structures: begin"); - - /* Cleanup any existing buffers */ - if (rxr->rx_buffers != NULL) { - for (i = 0; i < adapter->num_rx_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->m_head != NULL) { - bus_dmamap_sync(rxr->htag, rxbuf->hmap, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->htag, rxbuf->hmap); - rxbuf->m_head->m_flags |= M_PKTHDR; - m_freem(rxbuf->m_head); - } - if (rxbuf->m_pack != NULL) { - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->ptag, rxbuf->pmap); - rxbuf->m_pack->m_flags |= M_PKTHDR; - m_freem(rxbuf->m_pack); - } - rxbuf->m_head = NULL; - rxbuf->m_pack = NULL; - if (rxbuf->hmap != NULL) { - bus_dmamap_destroy(rxr->htag, rxbuf->hmap); - rxbuf->hmap = NULL; - } - if (rxbuf->pmap != NULL) { - bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); - rxbuf->pmap = NULL; - } - } - if (rxr->rx_buffers != NULL) { - free(rxr->rx_buffers, M_DEVBUF); - rxr->rx_buffers = NULL; - } - } - - if (rxr->htag != NULL) { - bus_dma_tag_destroy(rxr->htag); - rxr->htag = NULL; - } - if (rxr->ptag != NULL) { - bus_dma_tag_destroy(rxr->ptag); - rxr->ptag = NULL; - } -} - -static __inline void -igb_rx_discard(struct rx_ring *rxr, int i) -{ - struct igb_rx_buf *rbuf; - - rbuf = &rxr->rx_buffers[i]; - - /* Partially received? Free the chain */ - if (rxr->fmp != NULL) { - rxr->fmp->m_flags |= M_PKTHDR; - m_freem(rxr->fmp); - rxr->fmp = NULL; - rxr->lmp = NULL; - } - - /* - ** With advanced descriptors the writeback - ** clobbers the buffer addrs, so its easier - ** to just free the existing mbufs and take - ** the normal refresh path to get new buffers - ** and mapping. - */ - if (rbuf->m_head) { - m_free(rbuf->m_head); - rbuf->m_head = NULL; - bus_dmamap_unload(rxr->htag, rbuf->hmap); - } - - if (rbuf->m_pack) { - m_free(rbuf->m_pack); - rbuf->m_pack = NULL; - bus_dmamap_unload(rxr->ptag, rbuf->pmap); - } - - return; -} - -static __inline void -igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) -{ - - /* - * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet - * should be computed by hardware. Also it should not have VLAN tag in - * ethernet header. - */ - if (rxr->lro_enabled && - (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && - (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && - (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) == - (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) && - (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == - (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { - /* - * Send to the stack if: - ** - LRO not enabled, or - ** - no LRO resources, or - ** - lro enqueue fails - */ - if (rxr->lro.lro_cnt != 0) - if (tcp_lro_rx(&rxr->lro, m, 0) == 0) - return; - } - IGB_RX_UNLOCK(rxr); - (*ifp->if_input)(ifp, m); - IGB_RX_LOCK(rxr); -} - -/********************************************************************* - * - * This routine executes in interrupt context. It replenishes - * the mbufs in the descriptor and sends data which has been - * dma'ed into host memory to upper layer. - * - * We loop at most count times if count is > 0, or until done if - * count < 0. - * - * Return TRUE if more to clean, FALSE otherwise - *********************************************************************/ -static bool -igb_rxeof(struct igb_queue *que, int count, int *done) -{ - struct adapter *adapter = que->adapter; - struct rx_ring *rxr = que->rxr; - struct ifnet *ifp = adapter->ifp; - struct lro_ctrl *lro = &rxr->lro; - int i, processed = 0, rxdone = 0; - u32 ptype, staterr = 0; - union e1000_adv_rx_desc *cur; - - IGB_RX_LOCK(rxr); - /* Sync the ring. */ - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - -#ifdef DEV_NETMAP - if (netmap_rx_irq(ifp, rxr->me, &processed)) { - IGB_RX_UNLOCK(rxr); - return (FALSE); - } -#endif /* DEV_NETMAP */ - - /* Main clean loop */ - for (i = rxr->next_to_check; count != 0;) { - struct mbuf *sendmp, *mh, *mp; - struct igb_rx_buf *rxbuf; - u16 hlen, plen, hdr, vtag, pkt_info; - bool eop = FALSE; - - cur = &rxr->rx_base[i]; - staterr = le32toh(cur->wb.upper.status_error); - if ((staterr & E1000_RXD_STAT_DD) == 0) - break; - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; - count--; - sendmp = mh = mp = NULL; - cur->wb.upper.status_error = 0; - rxbuf = &rxr->rx_buffers[i]; - plen = le16toh(cur->wb.upper.length); - ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; - if (((adapter->hw.mac.type == e1000_i350) || - (adapter->hw.mac.type == e1000_i354)) && - (staterr & E1000_RXDEXT_STATERR_LB)) - vtag = be16toh(cur->wb.upper.vlan); - else - vtag = le16toh(cur->wb.upper.vlan); - hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info); - pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); - eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); - - /* - * Free the frame (all segments) if we're at EOP and - * it's an error. - * - * The datasheet states that EOP + status is only valid for - * the final segment in a multi-segment frame. - */ - if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) { - adapter->dropped_pkts++; - ++rxr->rx_discarded; - igb_rx_discard(rxr, i); - goto next_desc; - } - - /* - ** The way the hardware is configured to - ** split, it will ONLY use the header buffer - ** when header split is enabled, otherwise we - ** get normal behavior, ie, both header and - ** payload are DMA'd into the payload buffer. - ** - ** The fmp test is to catch the case where a - ** packet spans multiple descriptors, in that - ** case only the first header is valid. - */ - if (rxr->hdr_split && rxr->fmp == NULL) { - bus_dmamap_unload(rxr->htag, rxbuf->hmap); - hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >> - E1000_RXDADV_HDRBUFLEN_SHIFT; - if (hlen > IGB_HDR_BUF) - hlen = IGB_HDR_BUF; - mh = rxr->rx_buffers[i].m_head; - mh->m_len = hlen; - /* clear buf pointer for refresh */ - rxbuf->m_head = NULL; - /* - ** Get the payload length, this - ** could be zero if its a small - ** packet. - */ - if (plen > 0) { - mp = rxr->rx_buffers[i].m_pack; - mp->m_len = plen; - mh->m_next = mp; - /* clear buf pointer */ - rxbuf->m_pack = NULL; - rxr->rx_split_packets++; - } - } else { - /* - ** Either no header split, or a - ** secondary piece of a fragmented - ** split packet. - */ - mh = rxr->rx_buffers[i].m_pack; - mh->m_len = plen; - /* clear buf info for refresh */ - rxbuf->m_pack = NULL; - } - bus_dmamap_unload(rxr->ptag, rxbuf->pmap); - - ++processed; /* So we know when to refresh */ - - /* Initial frame - setup */ - if (rxr->fmp == NULL) { - mh->m_pkthdr.len = mh->m_len; - /* Save the head of the chain */ - rxr->fmp = mh; - rxr->lmp = mh; - if (mp != NULL) { - /* Add payload if split */ - mh->m_pkthdr.len += mp->m_len; - rxr->lmp = mh->m_next; - } - } else { - /* Chain mbuf's together */ - rxr->lmp->m_next = mh; - rxr->lmp = rxr->lmp->m_next; - rxr->fmp->m_pkthdr.len += mh->m_len; - } - - if (eop) { - rxr->fmp->m_pkthdr.rcvif = ifp; - rxr->rx_packets++; - /* capture data for AIM */ - rxr->packets++; - rxr->bytes += rxr->fmp->m_pkthdr.len; - rxr->rx_bytes += rxr->fmp->m_pkthdr.len; - - if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) - igb_rx_checksum(staterr, rxr->fmp, ptype); - - if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && - (staterr & E1000_RXD_STAT_VP) != 0) { - rxr->fmp->m_pkthdr.ether_vtag = vtag; - rxr->fmp->m_flags |= M_VLANTAG; - } - - /* - * In case of multiqueue, we have RXCSUM.PCSD bit set - * and never cleared. This means we have RSS hash - * available to be used. - */ - if (adapter->num_queues > 1) { - rxr->fmp->m_pkthdr.flowid = - le32toh(cur->wb.lower.hi_dword.rss); - switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { - case E1000_RXDADV_RSSTYPE_IPV4_TCP: - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_RSS_TCP_IPV4); - break; - case E1000_RXDADV_RSSTYPE_IPV4: - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_RSS_IPV4); - break; - case E1000_RXDADV_RSSTYPE_IPV6_TCP: - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_RSS_TCP_IPV6); - break; - case E1000_RXDADV_RSSTYPE_IPV6_EX: - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_RSS_IPV6_EX); - break; - case E1000_RXDADV_RSSTYPE_IPV6: - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_RSS_IPV6); - break; - case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_RSS_TCP_IPV6_EX); - break; - default: - /* XXX fallthrough */ - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_OPAQUE_HASH); - } - } else { -#ifndef IGB_LEGACY_TX - rxr->fmp->m_pkthdr.flowid = que->msix; - M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE); -#endif - } - sendmp = rxr->fmp; - /* Make sure to set M_PKTHDR. */ - sendmp->m_flags |= M_PKTHDR; - rxr->fmp = NULL; - rxr->lmp = NULL; - } - -next_desc: - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* Advance our pointers to the next descriptor. */ - if (++i == adapter->num_rx_desc) - i = 0; - /* - ** Send to the stack or LRO - */ - if (sendmp != NULL) { - rxr->next_to_check = i; - igb_rx_input(rxr, ifp, sendmp, ptype); - i = rxr->next_to_check; - rxdone++; - } - - /* Every 8 descriptors we go to refresh mbufs */ - if (processed == 8) { - igb_refresh_mbufs(rxr, i); - processed = 0; - } - } - - /* Catch any remainders */ - if (igb_rx_unrefreshed(rxr)) - igb_refresh_mbufs(rxr, i); - - rxr->next_to_check = i; - - /* - * Flush any outstanding LRO work - */ - tcp_lro_flush_all(lro); - - if (done != NULL) - *done += rxdone; - - IGB_RX_UNLOCK(rxr); - return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE); -} - -/********************************************************************* - * - * Verify that the hardware indicated that the checksum is valid. - * Inform the stack about the status of checksum so that stack - * doesn't spend time verifying the checksum. - * - *********************************************************************/ -static void -igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype) -{ - u16 status = (u16)staterr; - u8 errors = (u8) (staterr >> 24); - int sctp; - - /* Ignore Checksum bit is set */ - if (status & E1000_RXD_STAT_IXSM) { - mp->m_pkthdr.csum_flags = 0; - return; - } - - if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && - (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) - sctp = 1; - else - sctp = 0; - if (status & E1000_RXD_STAT_IPCS) { - /* Did it pass? */ - if (!(errors & E1000_RXD_ERR_IPE)) { - /* IP Checksum Good */ - mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; - mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; - } else - mp->m_pkthdr.csum_flags = 0; - } - - if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { - u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); -#if __FreeBSD_version >= 800000 - if (sctp) /* reassign */ - type = CSUM_SCTP_VALID; -#endif - /* Did it pass? */ - if (!(errors & E1000_RXD_ERR_TCPE)) { - mp->m_pkthdr.csum_flags |= type; - if (sctp == 0) - mp->m_pkthdr.csum_data = htons(0xffff); - } - } - return; -} - -/* - * This routine is run via an vlan - * config EVENT - */ -static void -igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) -{ - struct adapter *adapter = ifp->if_softc; - u32 index, bit; - - if (ifp->if_softc != arg) /* Not our event */ - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IGB_CORE_LOCK(adapter); - index = (vtag >> 5) & 0x7F; - bit = vtag & 0x1F; - adapter->shadow_vfta[index] |= (1 << bit); - ++adapter->num_vlans; - /* Change hw filter setting */ - if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) - igb_setup_vlan_hw_support(adapter); - IGB_CORE_UNLOCK(adapter); -} - -/* - * This routine is run via an vlan - * unconfig EVENT - */ -static void -igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) -{ - struct adapter *adapter = ifp->if_softc; - u32 index, bit; - - if (ifp->if_softc != arg) - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IGB_CORE_LOCK(adapter); - index = (vtag >> 5) & 0x7F; - bit = vtag & 0x1F; - adapter->shadow_vfta[index] &= ~(1 << bit); - --adapter->num_vlans; - /* Change hw filter setting */ - if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) - igb_setup_vlan_hw_support(adapter); - IGB_CORE_UNLOCK(adapter); -} - -static void -igb_setup_vlan_hw_support(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - u32 reg; - - if (adapter->vf_ifp) { - e1000_rlpml_set_vf(hw, - adapter->max_frame_size + VLAN_TAG_SIZE); - return; - } - - reg = E1000_READ_REG(hw, E1000_CTRL); - reg |= E1000_CTRL_VME; - E1000_WRITE_REG(hw, E1000_CTRL, reg); - - /* Enable the Filter Table */ - if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { - reg = E1000_READ_REG(hw, E1000_RCTL); - reg &= ~E1000_RCTL_CFIEN; - reg |= E1000_RCTL_VFE; - E1000_WRITE_REG(hw, E1000_RCTL, reg); - } - - /* Update the frame size */ - E1000_WRITE_REG(&adapter->hw, E1000_RLPML, - adapter->max_frame_size + VLAN_TAG_SIZE); - - /* Don't bother with table if no vlans */ - if ((adapter->num_vlans == 0) || - ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) - return; - /* - ** A soft reset zero's out the VFTA, so - ** we need to repopulate it now. - */ - for (int i = 0; i < IGB_VFTA_SIZE; i++) - if (adapter->shadow_vfta[i] != 0) { - if (adapter->vf_ifp) - e1000_vfta_set_vf(hw, - adapter->shadow_vfta[i], TRUE); - else - e1000_write_vfta(hw, - i, adapter->shadow_vfta[i]); - } -} - -static void -igb_enable_intr(struct adapter *adapter) -{ - /* With RSS set up what to auto clear */ - if (adapter->msix_mem) { - u32 mask = (adapter->que_mask | adapter->link_mask); - E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); - E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); - E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); - E1000_WRITE_REG(&adapter->hw, E1000_IMS, - E1000_IMS_LSC); - } else { - E1000_WRITE_REG(&adapter->hw, E1000_IMS, - IMS_ENABLE_MASK); - } - E1000_WRITE_FLUSH(&adapter->hw); - - return; -} - -static void -igb_disable_intr(struct adapter *adapter) -{ - if (adapter->msix_mem) { - E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); - E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); - } - E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); - E1000_WRITE_FLUSH(&adapter->hw); - return; -} - -/* - * Bit of a misnomer, what this really means is - * to enable OS management of the system... aka - * to disable special hardware management features - */ -static void -igb_init_manageability(struct adapter *adapter) -{ - if (adapter->has_manage) { - int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); - int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); - - /* disable hardware interception of ARP */ - manc &= ~(E1000_MANC_ARP_EN); - - /* enable receiving management packets to the host */ - manc |= E1000_MANC_EN_MNG2HOST; - manc2h |= 1 << 5; /* Mng Port 623 */ - manc2h |= 1 << 6; /* Mng Port 664 */ - E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); - E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); - } -} - -/* - * Give control back to hardware management - * controller if there is one. - */ -static void -igb_release_manageability(struct adapter *adapter) -{ - if (adapter->has_manage) { - int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); - - /* re-enable hardware interception of ARP */ - manc |= E1000_MANC_ARP_EN; - manc &= ~E1000_MANC_EN_MNG2HOST; - - E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); - } -} - -/* - * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means that - * the driver is loaded. - * - */ -static void -igb_get_hw_control(struct adapter *adapter) -{ - u32 ctrl_ext; - - if (adapter->vf_ifp) - return; - - /* Let firmware know the driver has taken over */ - ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); - E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, - ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); -} - -/* - * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means that the - * driver is no longer loaded. - * - */ -static void -igb_release_hw_control(struct adapter *adapter) -{ - u32 ctrl_ext; - - if (adapter->vf_ifp) - return; - - /* Let firmware taken over control of h/w */ - ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); - E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, - ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); -} - -static int -igb_is_valid_ether_addr(uint8_t *addr) -{ - char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; - - if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { - return (FALSE); - } - - return (TRUE); -} - - -/* - * Enable PCI Wake On Lan capability - */ -static void -igb_enable_wakeup(device_t dev) -{ - u16 cap, status; - u8 id; - - /* First find the capabilities pointer*/ - cap = pci_read_config(dev, PCIR_CAP_PTR, 2); - /* Read the PM Capabilities */ - id = pci_read_config(dev, cap, 1); - if (id != PCIY_PMG) /* Something wrong */ - return; - /* OK, we have the power capabilities, so - now get the status register */ - cap += PCIR_POWER_STATUS; - status = pci_read_config(dev, cap, 2); - status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; - pci_write_config(dev, cap, status, 2); - return; -} - -static void -igb_led_func(void *arg, int onoff) -{ - struct adapter *adapter = arg; - - IGB_CORE_LOCK(adapter); - if (onoff) { - e1000_setup_led(&adapter->hw); - e1000_led_on(&adapter->hw); - } else { - e1000_led_off(&adapter->hw); - e1000_cleanup_led(&adapter->hw); - } - IGB_CORE_UNLOCK(adapter); -} - -static uint64_t -igb_get_vf_counter(if_t ifp, ift_counter cnt) -{ - struct adapter *adapter; - struct e1000_vf_stats *stats; -#ifndef IGB_LEGACY_TX - struct tx_ring *txr; - uint64_t rv; -#endif - - adapter = if_getsoftc(ifp); - stats = (struct e1000_vf_stats *)adapter->stats; - - switch (cnt) { - case IFCOUNTER_IPACKETS: - return (stats->gprc); - case IFCOUNTER_OPACKETS: - return (stats->gptc); - case IFCOUNTER_IBYTES: - return (stats->gorc); - case IFCOUNTER_OBYTES: - return (stats->gotc); - case IFCOUNTER_IMCASTS: - return (stats->mprc); - case IFCOUNTER_IERRORS: - return (adapter->dropped_pkts); - case IFCOUNTER_OERRORS: - return (adapter->watchdog_events); -#ifndef IGB_LEGACY_TX - case IFCOUNTER_OQDROPS: - rv = 0; - txr = adapter->tx_rings; - for (int i = 0; i < adapter->num_queues; i++, txr++) - rv += txr->br->br_drops; - return (rv); +#include #endif - default: - return (if_get_counter_default(ifp, cnt)); - } -} - -static uint64_t -igb_get_counter(if_t ifp, ift_counter cnt) -{ - struct adapter *adapter; - struct e1000_hw_stats *stats; -#ifndef IGB_LEGACY_TX - struct tx_ring *txr; - uint64_t rv; -#endif - - adapter = if_getsoftc(ifp); - if (adapter->vf_ifp) - return (igb_get_vf_counter(ifp, cnt)); - - stats = (struct e1000_hw_stats *)adapter->stats; - - switch (cnt) { - case IFCOUNTER_IPACKETS: - return (stats->gprc); - case IFCOUNTER_OPACKETS: - return (stats->gptc); - case IFCOUNTER_IBYTES: - return (stats->gorc); - case IFCOUNTER_OBYTES: - return (stats->gotc); - case IFCOUNTER_IMCASTS: - return (stats->mprc); - case IFCOUNTER_OMCASTS: - return (stats->mptc); - case IFCOUNTER_IERRORS: - return (adapter->dropped_pkts + stats->rxerrc + - stats->crcerrs + stats->algnerrc + - stats->ruc + stats->roc + stats->cexterr); - case IFCOUNTER_OERRORS: - return (stats->ecol + stats->latecol + - adapter->watchdog_events); - case IFCOUNTER_COLLISIONS: - return (stats->colc); - case IFCOUNTER_IQDROPS: - return (stats->mpc); -#ifndef IGB_LEGACY_TX - case IFCOUNTER_OQDROPS: - rv = 0; - txr = adapter->tx_rings; - for (int i = 0; i < adapter->num_queues; i++, txr++) - rv += txr->br->br_drops; - return (rv); -#endif - default: - return (if_get_counter_default(ifp, cnt)); - } -} - -/********************************************************************** - * - * Update the board statistics counters. - * - **********************************************************************/ -static void -igb_update_stats_counters(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct e1000_hw_stats *stats; - - /* - ** The virtual function adapter has only a - ** small controlled set of stats, do only - ** those and return. - */ - if (adapter->vf_ifp) { - igb_update_vf_stats_counters(adapter); - return; - } - - stats = (struct e1000_hw_stats *)adapter->stats; - - if (adapter->hw.phy.media_type == e1000_media_type_copper || - (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { - stats->symerrs += - E1000_READ_REG(hw,E1000_SYMERRS); - stats->sec += E1000_READ_REG(hw, E1000_SEC); - } - - stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); - stats->mpc += E1000_READ_REG(hw, E1000_MPC); - stats->scc += E1000_READ_REG(hw, E1000_SCC); - stats->ecol += E1000_READ_REG(hw, E1000_ECOL); - - stats->mcc += E1000_READ_REG(hw, E1000_MCC); - stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); - stats->colc += E1000_READ_REG(hw, E1000_COLC); - stats->dc += E1000_READ_REG(hw, E1000_DC); - stats->rlec += E1000_READ_REG(hw, E1000_RLEC); - stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); - stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); - /* - ** For watchdog management we need to know if we have been - ** paused during the last interval, so capture that here. - */ - adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); - stats->xoffrxc += adapter->pause_frames; - stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); - stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); - stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); - stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); - stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); - stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); - stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); - stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); - stats->gprc += E1000_READ_REG(hw, E1000_GPRC); - stats->bprc += E1000_READ_REG(hw, E1000_BPRC); - stats->mprc += E1000_READ_REG(hw, E1000_MPRC); - stats->gptc += E1000_READ_REG(hw, E1000_GPTC); - - /* For the 64-bit byte counters the low dword must be read first. */ - /* Both registers clear on the read of the high dword */ - - stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + - ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32); - stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + - ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32); - - stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); - stats->ruc += E1000_READ_REG(hw, E1000_RUC); - stats->rfc += E1000_READ_REG(hw, E1000_RFC); - stats->roc += E1000_READ_REG(hw, E1000_ROC); - stats->rjc += E1000_READ_REG(hw, E1000_RJC); - - stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC); - stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC); - stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC); - - stats->tor += E1000_READ_REG(hw, E1000_TORL) + - ((u64)E1000_READ_REG(hw, E1000_TORH) << 32); - stats->tot += E1000_READ_REG(hw, E1000_TOTL) + - ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32); - - stats->tpr += E1000_READ_REG(hw, E1000_TPR); - stats->tpt += E1000_READ_REG(hw, E1000_TPT); - stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); - stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); - stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); - stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); - stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); - stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); - stats->mptc += E1000_READ_REG(hw, E1000_MPTC); - stats->bptc += E1000_READ_REG(hw, E1000_BPTC); - - /* Interrupt Counts */ - - stats->iac += E1000_READ_REG(hw, E1000_IAC); - stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); - stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); - stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); - stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); - stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); - stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); - stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); - stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); - - /* Host to Card Statistics */ - - stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); - stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); - stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); - stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); - stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); - stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); - stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); - stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + - ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32)); - stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + - ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); - stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); - stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); - stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); - - stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); - stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); - stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); - stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); - stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); - stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); - - /* Driver specific counters */ - adapter->device_control = E1000_READ_REG(hw, E1000_CTRL); - adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL); - adapter->int_mask = E1000_READ_REG(hw, E1000_IMS); - adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS); - adapter->packet_buf_alloc_tx = - ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); - adapter->packet_buf_alloc_rx = - (E1000_READ_REG(hw, E1000_PBA) & 0xffff); -} - - -/********************************************************************** - * - * Initialize the VF board statistics counters. - * - **********************************************************************/ -static void -igb_vf_init_stats(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct e1000_vf_stats *stats; - - stats = (struct e1000_vf_stats *)adapter->stats; - if (stats == NULL) - return; - stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); - stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); - stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); - stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); - stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); -} - -/********************************************************************** - * - * Update the VF board statistics counters. - * - **********************************************************************/ -static void -igb_update_vf_stats_counters(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct e1000_vf_stats *stats; - - if (adapter->link_speed == 0) - return; - - stats = (struct e1000_vf_stats *)adapter->stats; - - UPDATE_VF_REG(E1000_VFGPRC, - stats->last_gprc, stats->gprc); - UPDATE_VF_REG(E1000_VFGORC, - stats->last_gorc, stats->gorc); - UPDATE_VF_REG(E1000_VFGPTC, - stats->last_gptc, stats->gptc); - UPDATE_VF_REG(E1000_VFGOTC, - stats->last_gotc, stats->gotc); - UPDATE_VF_REG(E1000_VFMPRC, - stats->last_mprc, stats->mprc); -} - -/* Export a single 32-bit register via a read-only sysctl. */ -static int -igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter; - u_int val; - - adapter = oidp->oid_arg1; - val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); - return (sysctl_handle_int(oidp, &val, 0, req)); -} - -/* -** Tuneable interrupt rate handler -*/ -static int -igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) -{ - struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1); - int error; - u32 reg, usec, rate; - - reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix)); - usec = ((reg & 0x7FFC) >> 2); - if (usec > 0) - rate = 1000000 / usec; - else - rate = 0; - error = sysctl_handle_int(oidp, &rate, 0, req); - if (error || !req->newptr) - return error; - return 0; -} - -/* - * Add sysctl variables, one per statistic, to the system. - */ -static void -igb_add_hw_stats(struct adapter *adapter) -{ - device_t dev = adapter->dev; - - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - - struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); - struct sysctl_oid *tree = device_get_sysctl_tree(dev); - struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); - struct e1000_hw_stats *stats = adapter->stats; - - struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node; - struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list; - -#define QUEUE_NAME_LEN 32 - char namebuf[QUEUE_NAME_LEN]; - - /* Driver Statistics */ - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", - CTLFLAG_RD, &adapter->dropped_pkts, - "Driver dropped packets"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", - CTLFLAG_RD, &adapter->link_irq, - "Link MSIX IRQ Handled"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", - CTLFLAG_RD, &adapter->mbuf_defrag_failed, - "Defragmenting mbuf chain failed"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", - CTLFLAG_RD, &adapter->no_tx_dma_setup, - "Driver tx dma failure in xmit"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", - CTLFLAG_RD, &adapter->rx_overruns, - "RX overruns"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", - CTLFLAG_RD, &adapter->watchdog_events, - "Watchdog timeouts"); - - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", - CTLFLAG_RD, &adapter->device_control, - "Device Control Register"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", - CTLFLAG_RD, &adapter->rx_control, - "Receiver Control Register"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", - CTLFLAG_RD, &adapter->int_mask, - "Interrupt Mask"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", - CTLFLAG_RD, &adapter->eint_mask, - "Extended Interrupt Mask"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", - CTLFLAG_RD, &adapter->packet_buf_alloc_tx, - "Transmit Buffer Packet Allocation"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", - CTLFLAG_RD, &adapter->packet_buf_alloc_rx, - "Receive Buffer Packet Allocation"); - SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", - CTLFLAG_RD, &adapter->hw.fc.high_water, 0, - "Flow Control High Watermark"); - SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", - CTLFLAG_RD, &adapter->hw.fc.low_water, 0, - "Flow Control Low Watermark"); - - for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { - struct lro_ctrl *lro = &rxr->lro; - - snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); - queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, - CTLFLAG_RD, NULL, "Queue Name"); - queue_list = SYSCTL_CHILDREN(queue_node); - - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", - CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i], - sizeof(&adapter->queues[i]), - igb_sysctl_interrupt_rate_handler, - "IU", "Interrupt Rate"); - - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), - igb_sysctl_reg_handler, "IU", - "Transmit Descriptor Head"); - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), - igb_sysctl_reg_handler, "IU", - "Transmit Descriptor Tail"); - SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", - CTLFLAG_RD, &txr->no_desc_avail, - "Queue Descriptors Unavailable"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", - CTLFLAG_RD, &txr->total_packets, - "Queue Packets Transmitted"); - - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), - igb_sysctl_reg_handler, "IU", - "Receive Descriptor Head"); - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), - igb_sysctl_reg_handler, "IU", - "Receive Descriptor Tail"); - SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", - CTLFLAG_RD, &rxr->rx_packets, - "Queue Packets Received"); - SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", - CTLFLAG_RD, &rxr->rx_bytes, - "Queue Bytes Received"); - SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", - CTLFLAG_RD, &lro->lro_queued, 0, - "LRO Queued"); - SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", - CTLFLAG_RD, &lro->lro_flushed, 0, - "LRO Flushed"); - } - - /* MAC stats get their own sub node */ - - stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", - CTLFLAG_RD, NULL, "MAC Statistics"); - stat_list = SYSCTL_CHILDREN(stat_node); - - /* - ** VF adapter has a very limited set of stats - ** since its not managing the metal, so to speak. - */ - if (adapter->vf_ifp) { - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", - CTLFLAG_RD, &stats->gprc, - "Good Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", - CTLFLAG_RD, &stats->gptc, - "Good Packets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", - CTLFLAG_RD, &stats->gorc, - "Good Octets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", - CTLFLAG_RD, &stats->gotc, - "Good Octets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", - CTLFLAG_RD, &stats->mprc, - "Multicast Packets Received"); - return; - } - - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", - CTLFLAG_RD, &stats->ecol, - "Excessive collisions"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", - CTLFLAG_RD, &stats->scc, - "Single collisions"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", - CTLFLAG_RD, &stats->mcc, - "Multiple collisions"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", - CTLFLAG_RD, &stats->latecol, - "Late collisions"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", - CTLFLAG_RD, &stats->colc, - "Collision Count"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors", - CTLFLAG_RD, &stats->symerrs, - "Symbol Errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors", - CTLFLAG_RD, &stats->sec, - "Sequence Errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count", - CTLFLAG_RD, &stats->dc, - "Defer Count"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets", - CTLFLAG_RD, &stats->mpc, - "Missed Packets"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors", - CTLFLAG_RD, &stats->rlec, - "Receive Length Errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", - CTLFLAG_RD, &stats->rnbc, - "Receive No Buffers"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize", - CTLFLAG_RD, &stats->ruc, - "Receive Undersize"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", - CTLFLAG_RD, &stats->rfc, - "Fragmented Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize", - CTLFLAG_RD, &stats->roc, - "Oversized Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber", - CTLFLAG_RD, &stats->rjc, - "Recevied Jabber"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs", - CTLFLAG_RD, &stats->rxerrc, - "Receive Errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs", - CTLFLAG_RD, &stats->crcerrs, - "CRC errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs", - CTLFLAG_RD, &stats->algnerrc, - "Alignment Errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs", - CTLFLAG_RD, &stats->tncrs, - "Transmit with No CRS"); - /* On 82575 these are collision counts */ - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", - CTLFLAG_RD, &stats->cexterr, - "Collision/Carrier extension errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd", - CTLFLAG_RD, &stats->xonrxc, - "XON Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd", - CTLFLAG_RD, &stats->xontxc, - "XON Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", - CTLFLAG_RD, &stats->xoffrxc, - "XOFF Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd", - CTLFLAG_RD, &stats->xofftxc, - "XOFF Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd", - CTLFLAG_RD, &stats->fcruc, - "Unsupported Flow Control Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd", - CTLFLAG_RD, &stats->mgprc, - "Management Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop", - CTLFLAG_RD, &stats->mgpdc, - "Management Packets Dropped"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd", - CTLFLAG_RD, &stats->mgptc, - "Management Packets Transmitted"); - /* Packet Reception Stats */ - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", - CTLFLAG_RD, &stats->tpr, - "Total Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", - CTLFLAG_RD, &stats->gprc, - "Good Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", - CTLFLAG_RD, &stats->bprc, - "Broadcast Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", - CTLFLAG_RD, &stats->mprc, - "Multicast Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", - CTLFLAG_RD, &stats->prc64, - "64 byte frames received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", - CTLFLAG_RD, &stats->prc127, - "65-127 byte frames received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", - CTLFLAG_RD, &stats->prc255, - "128-255 byte frames received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", - CTLFLAG_RD, &stats->prc511, - "256-511 byte frames received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", - CTLFLAG_RD, &stats->prc1023, - "512-1023 byte frames received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", - CTLFLAG_RD, &stats->prc1522, - "1023-1522 byte frames received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", - CTLFLAG_RD, &stats->gorc, - "Good Octets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", - CTLFLAG_RD, &stats->tor, - "Total Octets Received"); - - /* Packet Transmission Stats */ - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", - CTLFLAG_RD, &stats->gotc, - "Good Octets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", - CTLFLAG_RD, &stats->tot, - "Total Octets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", - CTLFLAG_RD, &stats->tpt, - "Total Packets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", - CTLFLAG_RD, &stats->gptc, - "Good Packets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", - CTLFLAG_RD, &stats->bptc, - "Broadcast Packets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", - CTLFLAG_RD, &stats->mptc, - "Multicast Packets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", - CTLFLAG_RD, &stats->ptc64, - "64 byte frames transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", - CTLFLAG_RD, &stats->ptc127, - "65-127 byte frames transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", - CTLFLAG_RD, &stats->ptc255, - "128-255 byte frames transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", - CTLFLAG_RD, &stats->ptc511, - "256-511 byte frames transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", - CTLFLAG_RD, &stats->ptc1023, - "512-1023 byte frames transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", - CTLFLAG_RD, &stats->ptc1522, - "1024-1522 byte frames transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd", - CTLFLAG_RD, &stats->tsctc, - "TSO Contexts Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", - CTLFLAG_RD, &stats->tsctfc, - "TSO Contexts Failed"); - - - /* Interrupt Stats */ - - int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", - CTLFLAG_RD, NULL, "Interrupt Statistics"); - int_list = SYSCTL_CHILDREN(int_node); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts", - CTLFLAG_RD, &stats->iac, - "Interrupt Assertion Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", - CTLFLAG_RD, &stats->icrxptc, - "Interrupt Cause Rx Pkt Timer Expire Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", - CTLFLAG_RD, &stats->icrxatc, - "Interrupt Cause Rx Abs Timer Expire Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", - CTLFLAG_RD, &stats->ictxptc, - "Interrupt Cause Tx Pkt Timer Expire Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", - CTLFLAG_RD, &stats->ictxatc, - "Interrupt Cause Tx Abs Timer Expire Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", - CTLFLAG_RD, &stats->ictxqec, - "Interrupt Cause Tx Queue Empty Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", - CTLFLAG_RD, &stats->ictxqmtc, - "Interrupt Cause Tx Queue Min Thresh Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", - CTLFLAG_RD, &stats->icrxdmtc, - "Interrupt Cause Rx Desc Min Thresh Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun", - CTLFLAG_RD, &stats->icrxoc, - "Interrupt Cause Receiver Overrun Count"); - - /* Host to Card Stats */ - - host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", - CTLFLAG_RD, NULL, - "Host to Card Statistics"); - - host_list = SYSCTL_CHILDREN(host_node); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt", - CTLFLAG_RD, &stats->cbtmpc, - "Circuit Breaker Tx Packet Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard", - CTLFLAG_RD, &stats->htdpmc, - "Host Transmit Discarded Packets"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt", - CTLFLAG_RD, &stats->rpthc, - "Rx Packets To Host"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts", - CTLFLAG_RD, &stats->cbrmpc, - "Circuit Breaker Rx Packet Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop", - CTLFLAG_RD, &stats->cbrdpc, - "Circuit Breaker Rx Dropped Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt", - CTLFLAG_RD, &stats->hgptc, - "Host Good Packets Tx Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop", - CTLFLAG_RD, &stats->htcbdpc, - "Host Tx Circuit Breaker Dropped Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes", - CTLFLAG_RD, &stats->hgorc, - "Host Good Octets Received Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes", - CTLFLAG_RD, &stats->hgotc, - "Host Good Octets Transmit Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors", - CTLFLAG_RD, &stats->lenerrs, - "Length Errors"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt", - CTLFLAG_RD, &stats->scvpc, - "SerDes/SGMII Code Violation Pkt Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed", - CTLFLAG_RD, &stats->hrmpc, - "Header Redirection Missed Packet Count"); -} - - -/********************************************************************** - * - * This routine provides a way to dump out the adapter eeprom, - * often a useful debug/service tool. This only dumps the first - * 32 words, stuff that matters is in that extent. - * - **********************************************************************/ -static int -igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter; - int error; - int result; - - result = -1; - error = sysctl_handle_int(oidp, &result, 0, req); - - if (error || !req->newptr) - return (error); - - /* - * This value will cause a hex dump of the - * first 32 16-bit words of the EEPROM to - * the screen. - */ - if (result == 1) { - adapter = (struct adapter *)arg1; - igb_print_nvm_info(adapter); - } - - return (error); -} - -static void -igb_print_nvm_info(struct adapter *adapter) -{ - u16 eeprom_data; - int i, j, row = 0; - - /* Its a bit crude, but it gets the job done */ - printf("\nInterface EEPROM Dump:\n"); - printf("Offset\n0x0000 "); - for (i = 0, j = 0; i < 32; i++, j++) { - if (j == 8) { /* Make the offset block */ - j = 0; ++row; - printf("\n0x00%x0 ",row); - } - e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); - printf("%04x ", eeprom_data); - } - printf("\n"); -} - -static void -igb_set_sysctl_value(struct adapter *adapter, const char *name, - const char *description, int *limit, int value) -{ - *limit = value; - SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLFLAG_RW, limit, value, description); -} - -/* -** Set flow control using sysctl: -** Flow control values: -** 0 - off -** 1 - rx pause -** 2 - tx pause -** 3 - full -*/ -static int -igb_set_flowcntl(SYSCTL_HANDLER_ARGS) -{ - int error; - static int input = 3; /* default is full */ - struct adapter *adapter = (struct adapter *) arg1; - - error = sysctl_handle_int(oidp, &input, 0, req); - - if ((error) || (req->newptr == NULL)) - return (error); - - switch (input) { - case e1000_fc_rx_pause: - case e1000_fc_tx_pause: - case e1000_fc_full: - case e1000_fc_none: - adapter->hw.fc.requested_mode = input; - adapter->fc = input; - break; - default: - /* Do nothing */ - return (error); - } - - adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; - e1000_force_mac_fc(&adapter->hw); - /* XXX TODO: update DROP_EN on each RX queue if appropriate */ - return (error); -} - -/* -** Manage DMA Coalesce: -** Control values: -** 0/1 - off/on -** Legal timer values are: -** 250,500,1000-10000 in thousands -*/ -static int -igb_sysctl_dmac(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - int error; - - error = sysctl_handle_int(oidp, &adapter->dmac, 0, req); - - if ((error) || (req->newptr == NULL)) - return (error); - - switch (adapter->dmac) { - case 0: - /* Disabling */ - break; - case 1: /* Just enable and use default */ - adapter->dmac = 1000; - break; - case 250: - case 500: - case 1000: - case 2000: - case 3000: - case 4000: - case 5000: - case 6000: - case 7000: - case 8000: - case 9000: - case 10000: - /* Legal values - allow */ - break; - default: - /* Do nothing, illegal value */ - adapter->dmac = 0; - return (EINVAL); - } - /* Reinit the interface */ - igb_init(adapter); - return (error); -} - -/* -** Manage Energy Efficient Ethernet: -** Control values: -** 0/1 - enabled/disabled -*/ -static int -igb_sysctl_eee(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - int error, value; - value = adapter->hw.dev_spec._82575.eee_disable; - error = sysctl_handle_int(oidp, &value, 0, req); - if (error || req->newptr == NULL) - return (error); - IGB_CORE_LOCK(adapter); - adapter->hw.dev_spec._82575.eee_disable = (value != 0); - igb_init_locked(adapter); - IGB_CORE_UNLOCK(adapter); - return (0); -} Index: sys/dev/e1000/iflib_if_igb.h =================================================================== --- /dev/null +++ sys/dev/e1000/iflib_if_igb.h @@ -0,0 +1,583 @@ +/****************************************************************************** + + Copyright (c) 2001-2015, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/*$FreeBSD$*/ + +#ifndef _IF_IGB_H_ +#define _IF_IGB_H_ + +#ifdef ALTQ +#define IGB_LEGACY_TX +#endif + +#include +#include +#ifndef IGB_LEGACY_TX +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef RSS +#include +#include +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "e1000_api.h" +#include "e1000_82575.h" + +/* Tunables */ +/* + * IGB_TXD: Maximum number of Transmit Descriptors + * + * This value is the number of transmit descriptors allocated by the driver. + * Increasing this value allows the driver to queue more transmits. Each + * descriptor is 16 bytes. + * Since TDLEN should be multiple of 128bytes, the number of transmit + * descriptors should meet the following condition. + * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 + */ +#define IGB_MIN_TXD 128 +#define IGB_DEFAULT_TXD 1024 +#define IGB_MAX_TXD 4096 + +/* + * IGB_RXD: Maximum number of Receive Descriptors + * + * This value is the number of receive descriptors allocated by the driver. + * Increasing this value allows the driver to buffer more incoming packets. + * Each descriptor is 16 bytes. A receive buffer is also allocated for each + * descriptor. The maximum MTU size is 16110. + * Since TDLEN should be multiple of 128bytes, the number of transmit + * descriptors should meet the following condition. + * (num_rx_desc * sizeof(struct e1000_rx_desc)) % 128 == 0 + */ +#define IGB_MIN_RXD 128 +#define IGB_DEFAULT_RXD 1024 +#define IGB_MAX_RXD 4096 + +/* + * IGB_TIDV - Transmit Interrupt Delay Value + * Valid Range: 0-65535 (0=off) + * Default Value: 64 + * This value delays the generation of transmit interrupts in units of + * 1.024 microseconds. Transmit interrupt reduction can improve CPU + * efficiency if properly tuned for specific network traffic. If the + * system is reporting dropped transmits, this value may be set too high + * causing the driver to run out of available transmit descriptors. + */ +#define IGB_TIDV 64 + +/* + * IGB_TADV - Transmit Absolute Interrupt Delay Value + * Valid Range: 0-65535 (0=off) + * Default Value: 64 + * This value, in units of 1.024 microseconds, limits the delay in which a + * transmit interrupt is generated. Useful only if IGB_TIDV is non-zero, + * this value ensures that an interrupt is generated after the initial + * packet is sent on the wire within the set amount of time. Proper tuning, + * along with IGB_TIDV, may improve traffic throughput in specific + * network conditions. + */ +#define IGB_TADV 64 + +/* + * IGB_RDTR - Receive Interrupt Delay Timer (Packet Timer) + * Valid Range: 0-65535 (0=off) + * Default Value: 0 + * This value delays the generation of receive interrupts in units of 1.024 + * microseconds. Receive interrupt reduction can improve CPU efficiency if + * properly tuned for specific network traffic. Increasing this value adds + * extra latency to frame reception and can end up decreasing the throughput + * of TCP traffic. If the system is reporting dropped receives, this value + * may be set too high, causing the driver to run out of available receive + * descriptors. + * + * CAUTION: When setting IGB_RDTR to a value other than 0, adapters + * may hang (stop transmitting) under certain network conditions. + * If this occurs a WATCHDOG message is logged in the system + * event log. In addition, the controller is automatically reset, + * restoring the network connection. To eliminate the potential + * for the hang ensure that IGB_RDTR is set to 0. + */ +#define IGB_RDTR 0 + +/* + * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) + * Valid Range: 0-65535 (0=off) + * Default Value: 64 + * This value, in units of 1.024 microseconds, limits the delay in which a + * receive interrupt is generated. Useful only if IGB_RDTR is non-zero, + * this value ensures that an interrupt is generated after the initial + * packet is received within the set amount of time. Proper tuning, + * along with IGB_RDTR, may improve traffic throughput in specific network + * conditions. + */ +#define IGB_RADV 64 + +/* + * This parameter controls the duration of transmit watchdog timer. + */ +#define IGB_WATCHDOG (10 * hz) + +/* + * This parameter controls when the driver calls the routine to reclaim + * transmit descriptors. Cleaning earlier seems a win. + */ +#define IGB_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 2) + +/* + * This parameter controls whether or not autonegotation is enabled. + * 0 - Disable autonegotiation + * 1 - Enable autonegotiation + */ +#define DO_AUTO_NEG 1 + +/* + * This parameter control whether or not the driver will wait for + * autonegotiation to complete. + * 1 - Wait for autonegotiation to complete + * 0 - Don't wait for autonegotiation to complete + */ +#define WAIT_FOR_AUTO_NEG_DEFAULT 0 + +/* Tunables -- End */ + +#define AUTONEG_ADV_DEFAULT (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ + ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ + ADVERTISE_1000_FULL) + +#define AUTO_ALL_MODES 0 + +/* PHY master/slave setting */ +#define IGB_MASTER_SLAVE e1000_ms_hw_default + +/* Support AutoMediaDetect for Marvell M88 PHY in i354 */ +#define IGB_MEDIA_RESET (1 << 0) + +/* + * Micellaneous constants + */ +#define IGB_INTEL_VENDOR_ID 0x8086 + +#define IGB_JUMBO_PBA 0x00000028 +#define IGB_DEFAULT_PBA 0x00000030 +#define IGB_SMARTSPEED_DOWNSHIFT 3 +#define IGB_SMARTSPEED_MAX 15 +#define IGB_MAX_LOOP 10 + +#define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : \ + ((hw->mac.type <= e1000_82576) ? 16 : 8)) +#define IGB_RX_HTHRESH 8 +#define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ + (adapter->intr_type == IFLIB_INTR_MSIX)) ? 1 : 4) + +#define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) +#define IGB_TX_HTHRESH 1 +#define IGB_TX_WTHRESH ((hw->mac.type != e1000_82575 && \ + (adapter->msix_mem) ? 1 : 16) + +#define MAX_NUM_MULTICAST_ADDRESSES 128 +#define PCI_ANY_ID (~0U) +#define ETHER_ALIGN 2 +#define IGB_TX_BUFFER_SIZE ((uint32_t) 1514) +#define IGB_FC_PAUSE_TIME 0x0680 +#define IGB_EEPROM_APME 0x400; +/* Queue minimum free for use */ +#define IGB_QUEUE_THRESHOLD (adapter->num_tx_desc / 8) + +/* + * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be + * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will + * also optimize cache line size effect. H/W supports up to cache line size 128. + */ +#define IGB_DBA_ALIGN 128 + +#define SPEED_MODE_BIT (1<<21) /* On PCI-E MACs only */ + +/* PCI Config defines */ +#define IGB_MSIX_BAR 3 + +/* Defines for printing debug information */ +#define DEBUG_INIT 0 +#define DEBUG_IOCTL 0 +#define DEBUG_HW 0 + +#define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") +#define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) +#define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) +#define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") +#define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) +#define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) +#define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") +#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) +#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) + +#define IGB_MAX_SCATTER 40 +#define IGB_VFTA_SIZE 128 +#define IGB_BR_SIZE 4096 /* ring buf size */ +#define IGB_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) +#define IGB_TSO_SEG_SIZE 4096 /* Max dma segment size */ +#define IGB_TXPBSIZE 20408 +#define IGB_HDR_BUF 128 +#define IGB_PKTTYPE_MASK 0x0000FFF0 +#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ +#define ETH_ZLEN 60 +#define ETH_ADDR_LEN 6 + +/* Offload bits in mbuf flag */ +#if __FreeBSD_version >= 1000000 +#define CSUM_OFFLOAD_IPV4 (CSUM_IP|CSUM_IP_TCP|CSUM_IP_UDP|CSUM_IP_SCTP) +#define CSUM_OFFLOAD_IPV6 (CSUM_IP6_TCP|CSUM_IP6_UDP|CSUM_IP6_SCTP) +#define CSUM_OFFLOAD (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6) +#elif __FreeBSD_version >= 800000 +#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP) +#else +#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP) +#endif + +/* Define the starting Interrupt rate per Queue */ +#define IGB_INTS_PER_SEC 8000 +#define IGB_DEFAULT_ITR ((1000000/IGB_INTS_PER_SEC) << 2) + +#define IGB_LINK_ITR 2000 +#define I210_LINK_DELAY 1000 + +/* Precision Time Sync (IEEE 1588) defines */ +#define ETHERTYPE_IEEE1588 0x88F7 +#define PICOSECS_PER_TICK 20833 +#define TSYNC_PORT 319 /* UDP port for the protocol */ + +/* + * Bus dma allocation structure used by + * e1000_dma_malloc and e1000_dma_free. + */ +struct igb_dma_alloc { + bus_addr_t dma_paddr; + caddr_t dma_vaddr; + bus_dma_tag_t dma_tag; + bus_dmamap_t dma_map; + bus_dma_segment_t dma_seg; + int dma_nseg; +}; + +/* + * The transmit ring, one per queue + */ +struct tx_ring { + struct adapter *adapter; + struct igb_tx_queue *que; + u32 me; + u32 tail; + int watchdog_time; + union e1000_adv_tx_desc *tx_base; + struct igb_tx_buf *tx_buffers; + uint64_t tx_paddr; + volatile u16 tx_avail; + enum { + IGB_QUEUE_IDLE = 1, + IGB_QUEUE_WORKING = 2, + IGB_QUEUE_HUNG = 4, + IGB_QUEUE_DEPLETED = 8, + } queue_status; + u32 txd_cmd; + + u32 bytes; /* used for AIM */ + u32 packets; + /* Soft Stats */ + unsigned long tso_tx; + unsigned long no_tx_map_avail; + unsigned long no_tx_dma_setup; + u64 no_desc_avail; + u64 total_packets; +}; + +/* + * Receive ring: one per queue + */ +struct rx_ring { + struct adapter *adapter; + struct igb_rx_queue *que; + u32 me; + u32 tail; + union e1000_adv_rx_desc *rx_base; + uint64_t rx_paddr; + struct lro_ctrl lro; + bool lro_enabled; + bool hdr_split; + struct igb_rx_buf *rx_buffers; + + u32 bytes; + u32 packets; + int rdt; + int rdh; + + /* Soft stats */ + u64 rx_split_packets; + u64 rx_discarded; + u64 rx_packets; + u64 rx_bytes; +}; + +/* +** Driver queue struct: this is the interrupt container +** for the associated tx and rx ring. +*/ +struct igb_tx_queue { + struct adapter *adapter; + struct tx_ring txr; + u32 me; + u32 eims; + u32 msix; +}; + +struct igb_rx_queue { + struct adapter *adapter; + u32 msix; /* This queue's MSIX vector */ + u32 eims; /* This queue's EIMS bit */ + u32 eitr_setting; + u32 me; + struct resource *res; + void *tag; + struct rx_ring rxr; + u64 irqs; + + struct if_irq que_irq; +}; + +struct adapter { + if_softc_ctx_t shared; + if_ctx_t ctx; +#define tx_num_queues shared->isc_ntxqsets +#define rx_num_queues shared->isc_nrxqsets +#define max_frame_size shared->isc_max_frame_size +#define intr_type shared->isc_intr + struct ifnet *ifp; + struct e1000_hw hw; + + struct e1000_osdep osdep; + struct device *dev; + struct cdev *led_dev; + + struct if_irq irq; + struct resource *pci_mem; + int memrid; + + struct igb_tx_queue *tx_queues; + struct igb_rx_queue *rx_queues; + /* + * Interrupt resources: this set is + * either used for legacy, or for Link + * when doing MSIX + */ + void *tag; + struct resource *res; + + struct ifmedia *media; + int if_flags; + int pause_frames; + + + /* + ** Shadow VFTA table, this is needed because + ** the real vlan filter table gets cleared during + ** a soft reset and the driver needs to be able + ** to repopulate it. + */ + u32 shadow_vfta[IGB_VFTA_SIZE]; + + /* Info about the interface */ + u32 optics; + u32 fc; /* local flow ctrl setting */ + int advertise; /* link speeds */ + bool link_active; + u16 num_segs; + u16 link_speed; + bool link_up; + u32 linkvec; + u16 link_duplex; + u32 dmac; + int link_mask; + + /* Flags */ + u32 flags; + + /* Mbuf cluster size */ + u32 rx_mbuf_sz; + + /* Support for pluggable optics */ + bool sfp_probe; + + struct grouptask mod_task; /* SFP tasklet */ + struct grouptask msf_task; /* Multispeed Fiber */ + + u64 que_mask; + + /* Multicast array memory */ + u8 *mta; + + /* Misc stats maintained by the driver */ + unsigned long device_control; + unsigned long dropped_pkts; + unsigned long eint_mask; + unsigned long int_mask; + unsigned long packet_buf_alloc_rx; + unsigned long packet_buf_alloc_tx; + unsigned long rx_control; + unsigned long rx_overruns; + unsigned long watchdog_events; + + /* Used in pf and vf */ + void *stats; + + int enable_aim; + int has_manage; + int wol; + int rx_process_limit; + int tx_process_limit; + u16 vf_ifp; /* a VF interface */ + bool in_detach; /* Used only in igb_ioctl */ + +}; + +/* ****************************************************************************** + * vendor_info_array + * + * This array contains the list of Subvendor/Subdevice IDs on which the driver + * should load. + * + * ******************************************************************************/ +typedef struct _igb_vendor_info_t { + unsigned int vendor_id; + unsigned int device_id; + unsigned int subvendor_id; + unsigned int subdevice_id; + unsigned int index; +} igb_vendor_info_t; + +struct igb_tx_buf { + union e1000_adv_tx_desc *eop; +}; + +struct igb_rx_buf { + struct mbuf *m_head; + struct mbuf *m_pack; + bus_dmamap_t hmap; /* bus_dma map for header */ + bus_dmamap_t pmap; /* bus_dma map for packet */ +}; + + +#define IGB_CORE_LOCK_INIT(_sc, _name) \ + mtx_init(&(_sc)->core_mtx, _name, "IGB Core Lock", MTX_DEF) +#define IGB_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) +#define IGB_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) +#define IGB_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) +#define IGB_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) + +#define IGB_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) +#define IGB_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) +#define IGB_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) +#define IGB_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) +#define IGB_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) + +#define IGB_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) +#define IGB_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) +#define IGB_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) +#define IGB_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_mtx, MA_OWNED) + +#define UPDATE_VF_REG(reg, last, cur) \ +{ \ + u32 new = E1000_READ_REG(hw, reg); \ + if (new < last) \ + cur += 0x100000000LL; \ + last = new; \ + cur &= 0xFFFFFFFF00000000LL; \ + cur |= new; \ +} + +#if __FreeBSD_version >= 800000 && __FreeBSD_version < 800504 +static __inline int +drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br) +{ +#ifdef ALTQ + if (ALTQ_IS_ENABLED(&ifp->if_snd)) + return (1); +#endif + return (!buf_ring_empty(br)); +} +#endif + +#endif /* _IF_IGB_H_ */ + + Index: sys/dev/e1000/iflib_if_igb.c =================================================================== --- /dev/null +++ sys/dev/e1000/iflib_if_igb.c @@ -0,0 +1,3683 @@ +/****************************************************************************** + + Copyright (c) 2001-2015, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/*$FreeBSD$*/ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_rss.h" + +#ifdef HAVE_KERNEL_OPTION_HEADERS +#include "opt_device_polling.h" +#include "opt_altq.h" +#endif + +#include "iflib_if_igb.h" +#include "ifdi_if.h" + +#include +#include +#include +#include +#include + +/********************************************************************* + * Driver version: + *********************************************************************/ +char igb_driver_version[] = "2.5.3-k"; + +/********************************************************************* + * PCI Device ID Table + * + * Used by probe to select devices to load on + * Last field stores an index into e1000_strings + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } + *********************************************************************/ + +static pci_vendor_info_t igb_vendor_info_array[] = +{ + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + /* required last entry */ + PVID_END +}; + +/********************************************************************* + * Function prototypes + *********************************************************************/ +static void *igb_register(device_t dev); +static int igb_if_attach_pre(if_ctx_t); +static int igb_if_attach_post(if_ctx_t ctx); +static int igb_if_detach(if_ctx_t); +static int igb_if_shutdown(if_ctx_t); +static int igb_if_suspend(if_ctx_t); +static int igb_if_resume(if_ctx_t); + +static void igb_if_stop(if_ctx_t ctx); +static void igb_if_init(if_ctx_t ctx); +static void igb_if_enable_intr(if_ctx_t ctx); +static void igb_if_disable_intr(if_ctx_t ctx); +static int igb_if_media_change(if_ctx_t ctx); +static int igb_if_msix_intr_assign(if_ctx_t, int); +static void igb_if_update_admin_status(if_ctx_t ctx); +static void igb_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr); +static void igb_if_multi_set(if_ctx_t ctx); +static int igb_if_mtu_set(if_ctx_t ctx, uint32_t mtu); + +static int igb_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int nrqsets); +static int igb_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); +static void igb_if_queues_free(if_ctx_t ctx); +static uint64_t igb_if_get_counter(if_ctx_t, ift_counter); +static void igb_identify_hardware(if_ctx_t ctx); +static int igb_allocate_pci_resources(if_ctx_t ctx); +static void igb_free_pci_resources(if_ctx_t ctx); +static void igb_reset(if_ctx_t ctx); +static int igb_setup_interface(if_ctx_t ctx); +static int igb_set_num_queues(if_ctx_t ctx); + +static void igb_configure_queues(struct adapter *); +static void igb_initialize_rss_mapping(struct adapter *adapter); +static void igb_initialize_transmit_units(if_ctx_t ctx); +static void igb_initialize_receive_units(if_ctx_t ctx); +static void igb_if_timer(if_ctx_t ctx, uint16_t qid); +static void igb_update_stats_counters(struct adapter *); + +static int igb_if_set_promisc(if_ctx_t ctx, int flags); +static void igb_disable_promisc(if_ctx_t ctx); + +static void igb_if_vlan_register(if_ctx_t, u16); +static void igb_if_vlan_unregister(if_ctx_t, u16); +static void igb_setup_vlan_hw_support(if_ctx_t ctx); + + int igb_intr(void *arg); +static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); +static void igb_print_nvm_info(struct adapter *); +static int igb_is_valid_ether_addr(u8 *); +static void igb_add_hw_stats(struct adapter *); +static void igb_if_led_func(if_ctx_t, int); + +static void igb_vf_init_stats(struct adapter *); +static void igb_update_vf_stats_counters(struct adapter *); +static void igb_init_dmac(if_ctx_t ctx, u32 pba); +/*static void igb_handle_link(void *context); Legacy mode*/ + +/* Management and WOL Support */ +static void igb_init_manageability(struct adapter *); +static void igb_release_manageability(struct adapter *); +static void igb_get_hw_control(struct adapter *); +static void igb_release_hw_control(struct adapter *); +static void igb_enable_wakeup(device_t); +static void igb_enable_queue(struct adapter *adapter, struct igb_rx_queue *que); +static int igb_if_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); + +static int igb_msix_que(void *); +static int igb_msix_link(void *); +static void igb_set_sysctl_value(struct adapter *, const char *, + const char *, int *, int); +static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS); +static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS); +static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS); + +extern void igb_init_tx_ring(struct igb_tx_queue *que); +static int igb_get_regs(SYSCTL_HANDLER_ARGS); + +/********************************************************************* + * FreeBSD Device Interface Entry Points + *********************************************************************/ +static device_method_t igb_methods[] = { + /* Device interface */ + DEVMETHOD(device_register, igb_register), + DEVMETHOD(device_probe, iflib_device_probe), + DEVMETHOD(device_attach, iflib_device_attach), + DEVMETHOD(device_detach, iflib_device_detach), + DEVMETHOD(device_shutdown, iflib_device_shutdown), + DEVMETHOD(device_suspend, iflib_device_suspend), + DEVMETHOD(device_resume, iflib_device_resume), + DEVMETHOD_END +}; + +static driver_t igb_driver = { + "igb", igb_methods, sizeof(struct adapter), +}; + +static devclass_t igb_devclass; +DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); + +MODULE_DEPEND(igb, pci, 1, 1, 1); +MODULE_DEPEND(igb, ether, 1, 1, 1); +MODULE_DEPEND(igb, iflib, 1, 1, 1); +#ifdef DEV_NETMAP +MODULE_DEPEND(igb, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ + +static device_method_t igb_if_methods[] = { + DEVMETHOD(ifdi_attach_pre, igb_if_attach_pre), + DEVMETHOD(ifdi_attach_post, igb_if_attach_post), + DEVMETHOD(ifdi_detach, igb_if_detach), + DEVMETHOD(ifdi_shutdown, igb_if_shutdown), + DEVMETHOD(ifdi_suspend, igb_if_suspend), + DEVMETHOD(ifdi_resume, igb_if_resume), + DEVMETHOD(ifdi_init, igb_if_init), + DEVMETHOD(ifdi_stop, igb_if_stop), + DEVMETHOD(ifdi_msix_intr_assign, igb_if_msix_intr_assign), + DEVMETHOD(ifdi_intr_enable, igb_if_enable_intr), + DEVMETHOD(ifdi_intr_disable, igb_if_disable_intr), + DEVMETHOD(ifdi_tx_queues_alloc, igb_if_tx_queues_alloc), + DEVMETHOD(ifdi_rx_queues_alloc, igb_if_rx_queues_alloc), + DEVMETHOD(ifdi_queues_free, igb_if_queues_free), + DEVMETHOD(ifdi_update_admin_status, igb_if_update_admin_status), + DEVMETHOD(ifdi_led_func, igb_if_led_func), + DEVMETHOD(ifdi_multi_set, igb_if_multi_set), + DEVMETHOD(ifdi_media_status, igb_if_media_status), + DEVMETHOD(ifdi_media_change, igb_if_media_change), + DEVMETHOD(ifdi_mtu_set, igb_if_mtu_set), + DEVMETHOD(ifdi_promisc_set, igb_if_set_promisc), + DEVMETHOD(ifdi_timer, igb_if_timer), + DEVMETHOD(ifdi_vlan_register, igb_if_vlan_register), + DEVMETHOD(ifdi_vlan_unregister, igb_if_vlan_unregister), + DEVMETHOD(ifdi_get_counter, igb_if_get_counter), + DEVMETHOD(ifdi_queue_intr_enable, igb_if_queue_intr_enable), + DEVMETHOD_END +}; + +/* + * note that if (adapter->msix_mem) is replaced by: + * if (adapter->intr_type == IFLIB_INTR_MSIX) + */ +static driver_t igb_if_driver = { + "igb_if", igb_if_methods, sizeof(struct adapter) +}; + +/********************************************************************* + * Tunable default values. + *********************************************************************/ + +static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters"); + +/* +** AIM: Adaptive Interrupt Moderation +** which means that the interrupt rate +** is varied over time based on the +** traffic for that interrupt vector +*/ +static int igb_enable_aim = TRUE; +SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0, + "Enable adaptive interrupt moderation"); + +/* + * MSIX should be the default for best performance, + * but this allows it to be forced off for testing. + */ +static int igb_enable_msix = 1; +SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0, + "Enable MSI-X interrupts"); + +/* +** Tuneable Interrupt rate +*/ +static int igb_max_interrupt_rate = 8000; +SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, + &igb_max_interrupt_rate, 0, "Maximum interrupts per second"); + +#ifndef IGB_LEGACY_TX +/* +** Tuneable number of buffers in the buf-ring (drbr_xxx) +*/ +static int igb_buf_ring_size = IGB_BR_SIZE; +SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN, + &igb_buf_ring_size, 0, "Size of the bufring"); +#endif + +/* +** Header split causes the packet header to +** be dma'd to a separate mbuf from the payload. +** this can have memory alignment benefits. But +** another plus is that small packets often fit +** into the header and thus use no cluster. Its +** a very workload dependent type feature. +*/ +static int igb_header_split = FALSE; +SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0, + "Enable receive mbuf header split"); + +/* +** Global variable to store last used CPU when binding queues +** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a +** queue is bound to a cpu. +*/ +int igb_last_bind_cpu = -1; + +extern struct if_txrx igb_txrx; + +static struct if_shared_ctx igb_sctx_init = { + .isc_magic = IFLIB_MAGIC, + .isc_q_align = IGB_DBA_ALIGN,/* max(DBA_ALIGN, PAGE_SIZE) */ + .isc_tx_maxsize = IGB_TSO_SIZE, + .isc_tx_maxsegsize = PAGE_SIZE, + + .isc_rx_maxsize = MSIZE, + .isc_rx_nsegments = 1, + .isc_rx_maxsegsize = MSIZE, + .isc_nfl = 1, + .isc_ntxqs = 1, + .isc_nrxqs = 1, + + .isc_admin_intrcnt = 1, + .isc_vendor_info = igb_vendor_info_array, + .isc_driver_version = igb_driver_version, + .isc_txrx = &igb_txrx, + .isc_driver = &igb_if_driver, + + .isc_nrxd_min = {IGB_MIN_RXD}, + .isc_ntxd_min = {IGB_MIN_TXD}, + .isc_nrxd_max = {IGB_MAX_RXD}, + .isc_ntxd_max = {IGB_MAX_TXD}, + .isc_nrxd_default = {IGB_DEFAULT_RXD}, + .isc_ntxd_default = {IGB_DEFAULT_TXD}, +}; + +if_shared_ctx_t igb_sctx = &igb_sctx_init; + +#define IGB_READ_REG(a, reg) igb_read_reg(a, reg) + +inline u32 +igb_read_reg(struct e1000_hw *hw, u32 reg) +{ + return bus_space_read_4(((struct adapter *)hw->back)->osdep.mem_bus_space_tag, + ((struct adapter *)hw->back)->osdep.mem_bus_space_handle, + reg); +} + +/***************************************************************** + * + * Dump Registers + * + ****************************************************************/ +#define IGB_REGS_LEN 739 + +static int igb_get_regs(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *)arg1; + struct e1000_hw *hw = &adapter->hw; + if_softc_ctx_t scctx = adapter->shared; + struct sbuf *sb; + u32 *regs_buff = (u32 *)malloc(sizeof(u32) * IGB_REGS_LEN, M_DEVBUF, M_NOWAIT); + + struct igb_tx_queue *tx_que = &adapter->tx_queues[0]; + struct igb_rx_queue *rx_que = &adapter->rx_queues[0]; + struct rx_ring *rxr = &rx_que->rxr; + struct tx_ring *txr = &tx_que->txr; + int rc, j; + int ntxd = scctx->isc_ntxd[0]; + int nrxd = scctx->isc_nrxd[0]; + + memset(regs_buff, 0, IGB_REGS_LEN * sizeof(u32)); + + rc = sysctl_wire_old_buffer(req, 0); + MPASS(rc == 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 32*400, req); + MPASS(sb != NULL); + if (sb == NULL) + return (ENOMEM); + + /* General Registers */ + regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL); + regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS); + regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT); + regs_buff[3] = E1000_READ_REG(hw, E1000_ICR); + regs_buff[4] = E1000_READ_REG(hw, E1000_RCTL); + regs_buff[5] = E1000_READ_REG(hw, E1000_RDLEN(0)); + regs_buff[6] = E1000_READ_REG(hw, E1000_RDH(0)); + regs_buff[7] = E1000_READ_REG(hw, E1000_RDT(0)); + regs_buff[8] = E1000_READ_REG(hw, E1000_RXDCTL(0)); + regs_buff[9] = E1000_READ_REG(hw, E1000_RDBAL(0)); + regs_buff[10] = E1000_READ_REG(hw, E1000_RDBAH(0)); + regs_buff[11] = E1000_READ_REG(hw, E1000_TCTL); + regs_buff[12] = E1000_READ_REG(hw, E1000_TDBAL(0)); + regs_buff[13] = E1000_READ_REG(hw, E1000_TDBAH(0)); + regs_buff[14] = E1000_READ_REG(hw, E1000_TDLEN(0)); + regs_buff[15] = E1000_READ_REG(hw, E1000_TDH(0)); + regs_buff[16] = E1000_READ_REG(hw, E1000_TDT(0)); + regs_buff[17] = E1000_READ_REG(hw, E1000_TXDCTL(0)); + regs_buff[18] = E1000_READ_REG(hw, E1000_TDFH); + regs_buff[19] = E1000_READ_REG(hw, E1000_TDFT); + regs_buff[20] = E1000_READ_REG(hw, E1000_TDFHS); + regs_buff[21] = E1000_READ_REG(hw, E1000_TDFPC); + + regs_buff[22] = E1000_READ_REG(hw, E1000_EICS); + regs_buff[23] = E1000_READ_REG(hw, E1000_EIAM); + regs_buff[24] = E1000_READ_REG(hw, E1000_EIMS); + regs_buff[25] = E1000_READ_REG(hw, E1000_IMS); + + sbuf_printf(sb, "General Registers\n"); + sbuf_printf(sb, "\tCTRL\t %08x\n", regs_buff[0]); + sbuf_printf(sb, "\tSTATUS\t %08x\n", regs_buff[1]); + sbuf_printf(sb, "\tCTRL_EXIT\t %08x\n\n", regs_buff[2]); + + sbuf_printf(sb, "RX Registers\n"); + sbuf_printf(sb, "\tRCTL\t %08x\n", regs_buff[4]); + sbuf_printf(sb, "\tRDLEN\t %08x\n", regs_buff[5]); + sbuf_printf(sb, "\tRDH\t %08x\n", regs_buff[6]); + sbuf_printf(sb, "\tRDT\t %08x\n", regs_buff[7]); + sbuf_printf(sb, "\tRXDCTL\t %08x\n", regs_buff[8]); + sbuf_printf(sb, "\tRDBAL\t %08x\n", regs_buff[9]); + sbuf_printf(sb, "\tRDBAH\t %08x\n\n", regs_buff[10]); + + sbuf_printf(sb, "TX Registers\n"); + sbuf_printf(sb, "\tTCTL\t %08x\n", regs_buff[11]); + sbuf_printf(sb, "\tTDBAL\t %08x\n", regs_buff[12]); + sbuf_printf(sb, "\tTDBAH\t %08x\n", regs_buff[13]); + sbuf_printf(sb, "\tTDLEN\t %08x\n", regs_buff[14]); + sbuf_printf(sb, "\tTDH\t %08x\n", regs_buff[15]); + sbuf_printf(sb, "\tTDT\t %08x\n", regs_buff[16]); + sbuf_printf(sb, "\tTXDCTL\t %08x\n", regs_buff[17]); + sbuf_printf(sb, "\tTDFH\t %08x\n", regs_buff[18]); + sbuf_printf(sb, "\tTDFT\t %08x\n", regs_buff[19]); + sbuf_printf(sb, "\tTDFHS\t %08x\n", regs_buff[20]); + sbuf_printf(sb, "\tTDFPC\t %08x\n\n", regs_buff[21]); + + sbuf_printf(sb, "Interrupt Registers\n"); + sbuf_printf(sb, "\tICR\t %08x\n\n", regs_buff[3]); + sbuf_printf(sb, "\tE1000_EICS\t %08x\n", regs_buff[22]); + sbuf_printf(sb, "\tE1000_EIAM\t %08x\n", regs_buff[23]); + sbuf_printf(sb, "\tE1000_EIMS\t %08x\n", regs_buff[24]); + sbuf_printf(sb, "\tE1000_IMS\t %08x\n", regs_buff[25]); + + for (j = 0; j < min(nrxd, 128); j++) { + u32 staterr = le32toh(rxr->rx_base[j].wb.upper.status_error); + u32 length = le32toh(rxr->rx_base[j].wb.upper.length); + sbuf_printf(sb, "\tRXD addr %d: %09lx Error:%04x DD: %d EOP: %d Length:%04d\n", j, + rxr->rx_base[j].read.pkt_addr, staterr, staterr & E1000_RXD_STAT_DD, !!(staterr & E1000_RXD_STAT_EOP), length); + } + + for (j = 0; j < min(ntxd, 128); j++) { + unsigned int *ptr = (unsigned int *)&txr->tx_base[j]; + + sbuf_printf(sb, "\tTXD[%03d] [0]: %08x [1]: %08x [2]: %08x [3]: %08x\n", + j, ptr[0], ptr[1], ptr[2], ptr[3]); + + } + + rc = sbuf_finish(sb); + sbuf_delete(sb); + return(rc); +} + +static void * +igb_register(device_t dev) +{ + return (igb_sctx); +} + +static int +igb_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + device_t dev = iflib_get_dev(ctx); + struct igb_tx_queue *que; + int error = E1000_SUCCESS; + int i; + + MPASS(adapter->tx_num_queues > 0); + MPASS(adapter->tx_num_queues == ntxqsets); + + /* First allocate the top level queue structs */ + if (!(adapter->tx_queues = + (struct igb_tx_queue *) malloc(sizeof(struct igb_tx_queue) * + adapter->tx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate queue memory\n"); + return(ENOMEM); + } + + for (i = 0, que = adapter->tx_queues; i < adapter->tx_num_queues; i++, que++) { + /* Set up some basics */ + struct tx_ring *txr = &que->txr; + txr->adapter = que->adapter = adapter; + txr->que = que; + que->me = txr->me = i; + + /* Allocate transmit buffer memory */ + if (!(txr->tx_buffers = (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(iflib_get_dev(ctx), "failed to allocate tx_buffer memory\n"); + error = ENOMEM; + goto fail; + } + + /* get the virtual and physical address of the hardware queues */ + txr->tx_base = (union e1000_adv_tx_desc *)vaddrs[i*ntxqs]; + txr->tx_paddr = paddrs[i*ntxqs]; + } + + device_printf(iflib_get_dev(ctx), "allocated for %d queues\n", adapter->tx_num_queues); + return (0); + +fail: + igb_if_queues_free(ctx); + return(error); +} + +static int +igb_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + struct igb_rx_queue *que; + int error = E1000_SUCCESS; + int i; + + MPASS(adapter->rx_num_queues > 0); + MPASS(adapter->rx_num_queues == nrxqsets); + + /* First allocate the top level queue structs */ + if (!(adapter->rx_queues = + (struct igb_rx_queue *) malloc(sizeof(struct igb_rx_queue) * + adapter->rx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate queue memory\n"); + error = ENOMEM; + goto fail; + } + + for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { + /* Set up some basics */ + struct rx_ring *rxr = &que->rxr; + rxr->adapter = que->adapter = adapter; + rxr->que = que; + que->me = rxr->me = i; + + /* get the virtual and physical address of the hardware queues */ + rxr->rx_base = (union e1000_adv_rx_desc *)vaddrs[nrxqs*i]; + rxr->rx_paddr = paddrs[nrxqs*i]; + } + + device_printf(iflib_get_dev(ctx), "allocated for %d receive queues\n", adapter->rx_num_queues); + return (0); + +fail: + igb_if_queues_free(ctx); + return(error); +} + +static void +igb_if_queues_free(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct igb_tx_queue *tx_que = adapter->tx_queues; + struct igb_rx_queue *rx_que = adapter->rx_queues; + + if (tx_que != NULL) { + for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; + + if (txr->tx_buffers == NULL) + break; + + free(txr->tx_buffers, M_DEVBUF); + txr->tx_buffers = NULL; + } + + free(adapter->tx_queues, M_DEVBUF); + adapter->tx_queues = NULL; + } + + if (rx_que != NULL) { + free(adapter->rx_queues, M_DEVBUF); + adapter->rx_queues = NULL; + } +} + +/********************************************************************* + * Device initialization routine + * + * The attach entry point is called when the driver is being loaded. + * This routine identifies the type of hardware, allocates all resources + * and initializes the hardware. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +igb_if_attach_pre(if_ctx_t ctx) +{ + printf("attach pre called\n"); + device_t dev = iflib_get_dev(ctx); + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx; + int error = 0; + struct e1000_hw *hw; + + INIT_DEBUGOUT("igb_if_attach: begin"); + adapter->hw.mac.get_link_status = 0; + + if (resource_disabled("igb", device_get_unit(dev))) { + device_printf(dev, "Disabled by device hint\n"); + return (ENXIO); + } + + adapter->ctx = ctx; + adapter->dev = adapter->osdep.dev = dev; + scctx = adapter->shared = iflib_get_softc_ctx(ctx); + adapter->media = iflib_get_media(ctx); + + scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN), + scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); + + adapter->tx_process_limit = scctx->isc_ntxd[0]; + + hw = &adapter->hw; + + /* SYSCTLs */ + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, + igb_sysctl_nvm_info, "I", "NVM Information"); + + igb_set_sysctl_value(adapter, "enable_aim", + "Interrupt Moderation", &adapter->enable_aim, + igb_enable_aim); + + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, + adapter, 0, igb_set_flowcntl, "I", "Flow Control"); + + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "reg_dump", CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, + igb_get_regs, "A", "Dump Registers"); + + + /* Determine hardware and mac info & set isc_msix_bar */ + igb_identify_hardware(ctx); + + /* Setup PCI resources */ + if (igb_allocate_pci_resources(ctx)) { + device_printf(dev, "Allocation of PCI resources failed\n"); + error = ENXIO; + goto err_pci; + } + + /* Do Shared Code initialization */ + if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { + device_printf(dev, "Setup of Shared code failed\n"); + error = ENXIO; + goto err_pci; + } + + e1000_get_bus_info(&adapter->hw); + + /* Sysctls for limiting the amount of work done in the taskqueues */ + adapter->hw.mac.autoneg = DO_AUTO_NEG; + adapter->hw.phy.autoneg_wait_to_complete = FALSE; + adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; + + /* Copper options */ + if (adapter->hw.phy.media_type == e1000_media_type_copper) { + adapter->hw.phy.mdix = AUTO_ALL_MODES; + adapter->hw.phy.disable_polarity_correction = FALSE; + adapter->hw.phy.ms_type = IGB_MASTER_SLAVE; + } + + /*Set the frame limits assuming + * standard ethernet sized frames. + */ + adapter->shared->isc_max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; + + /* Allocate the appropriate stats memory */ + if (adapter->vf_ifp) { + adapter->stats = + (struct e1000_vf_stats *)malloc(sizeof \ + (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO); + igb_vf_init_stats(adapter); + } else + adapter->stats = + (struct e1000_hw_stats *)malloc(sizeof \ + (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO); + if (adapter->stats == NULL) { + device_printf(dev, "Can not allocate stats memory\n"); + error = ENOMEM; + goto err_late; + } + + /* Allocate multicast array memory. */ + adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * + MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); + if (adapter->mta == NULL) { + device_printf(dev, "Can not allocate multicast setup array\n"); + error = ENOMEM; + goto err_late; + } + + /* Some adapter-specific advanced features */ + if (adapter->hw.mac.type >= e1000_i350) { + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW, + adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce"); + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW, + adapter, 0, igb_sysctl_eee, "I", + "Disable Energy Efficient Ethernet"); + if (adapter->hw.phy.media_type == e1000_media_type_copper) { + if (adapter->hw.mac.type == e1000_i354) + e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); + else + e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); + } + } + + /* + ** Start from a known state, this is + ** important in reading the nvm and + ** mac from that. + */ + e1000_reset_hw(&adapter->hw); + + /* Make sure we have a good EEPROM before we read from it */ + if (((adapter->hw.mac.type != e1000_i210) && + (adapter->hw.mac.type != e1000_i211)) && + (e1000_validate_nvm_checksum(&adapter->hw) < 0)) { + /* + ** Some PCI-E parts fail the first check due to + ** the link being in sleep state, call it again, + ** if it fails a second time its a real issue. + */ + if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { + device_printf(dev, + "The EEPROM Checksum Is Not Valid\n"); + error = EIO; + goto err_late; + } + } + + /* + ** Copy the permanent MAC address out of the EEPROM + */ + if (e1000_read_mac_addr(&adapter->hw) < 0) { + device_printf(dev, "EEPROM read error while reading MAC" + " address\n"); + error = EIO; + goto err_late; + } + /* Check its sanity */ + if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) { + device_printf(dev, "Invalid MAC address\n"); + error = EIO; + goto err_late; + } + + iflib_set_mac(ctx, hw->mac.addr); + + scctx->isc_msix_bar = PCIR_BAR(IGB_MSIX_BAR); + scctx->isc_tx_nsegments = IGB_MAX_SCATTER; + scctx->isc_tx_tso_segments_max = adapter->shared->isc_tx_nsegments; + scctx->isc_tx_tso_size_max = IGB_TSO_SIZE; + scctx->isc_tx_tso_segsize_max = IGB_TSO_SEG_SIZE; + scctx->isc_nrxqsets_max = scctx->isc_ntxqsets_max = igb_set_num_queues(ctx); + + return(0); + +err_late: + free(adapter->mta, M_DEVBUF); + free(adapter->stats, M_DEVBUF); + igb_release_hw_control(adapter); +err_pci: + igb_free_pci_resources(ctx); + + return (error); +} + +static int +igb_if_attach_post(if_ctx_t ctx) +{ + device_t dev = iflib_get_dev(ctx); + struct adapter *adapter = iflib_get_softc(ctx); + int error; + int eeprom_data = 0; + + /* Setup OS specific network interface */ + error = igb_setup_interface(ctx); + if (error) { + device_printf(dev, "Error in igb_setup_interface"); + goto err_late; + } + + /* Now get a good starting state */ + igb_reset(ctx); + + /* Initialize statistics */ + igb_update_stats_counters(adapter); + + adapter->hw.mac.get_link_status = 1; + igb_if_update_admin_status(ctx); + + /* Indicate SOL/IDER usage */ + if (e1000_check_reset_block(&adapter->hw)) + device_printf(dev, + "PHY reset is blocked due to SOL/IDER session.\n"); + + /* Determine if we have to control management hardware */ + adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); + + /* + * Setup Wake-on-Lan + */ + /* APME bit in EEPROM is mapped to WUC.APME */ + eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME; + if (eeprom_data) + adapter->wol = E1000_WUFC_MAG; + + igb_add_hw_stats(adapter); + + return (0); + +err_late: + + free(adapter->mta, M_DEVBUF); + free(adapter->stats, M_DEVBUF); + igb_release_hw_control(adapter); + igb_free_pci_resources(ctx); + + return (error); + +} + +/********************************************************************* + * Device removal routine + * + * The detach entry point is called when the driver is being removed. + * This routine stops the adapter and deallocates all the resources + * that were allocated for driver operation. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +igb_if_detach(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + + INIT_DEBUGOUT("igb_detach: begin"); + + e1000_phy_hw_reset(&adapter->hw); + + /* Give control back to firmware */ + igb_release_manageability(adapter); + igb_release_hw_control(adapter); + + if (adapter->wol) { + E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); + E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); + igb_enable_wakeup(dev); + } + + igb_free_pci_resources(ctx); + + if (adapter->mta != NULL) + free(adapter->mta, M_DEVBUF); + + if (adapter->stats != NULL) + free(adapter->stats, M_DEVBUF); + + return (0); +} + +/********************************************************************* + * + * Shutdown entry point + * + **********************************************************************/ + +static int +igb_if_shutdown(if_ctx_t ctx) +{ + return igb_if_suspend(ctx); +} + +/* + * Suspend/resume device methods. + */ +static int +igb_if_suspend(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + + igb_if_stop(ctx); + igb_release_manageability(adapter); + igb_release_hw_control(adapter); + + if (adapter->wol) { + E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); + E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); + igb_enable_wakeup(dev); + } + + return 0; +} + +static int +igb_if_resume(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + + igb_if_init(ctx); + igb_init_manageability(adapter); + + return (0); +} + +/********************************************************************* + * Ioctl mtu entry point + * return 0 on success, EINVAL on failure + **********************************************************************/ +static int +igb_if_mtu_set(if_ctx_t ctx, uint32_t mtu) +{ + int maximum_frame_size = 9234; + int error = 0; + struct adapter *adapter = iflib_get_softc(ctx); + + IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); + + if (mtu > maximum_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { + error = EINVAL; + } else { + adapter->max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; + } + + return error; +} + + +/********************************************************************* + * Init entry point + * + * This routine is used in two ways. It is used by the stack as + * init entry point in network interface structure. It is also used + * by the driver as a hw/sw initialization routine to get to a + * consistent state. + * + * return 0 on success, positive on failure + **********************************************************************/ +static void +igb_if_init(if_ctx_t ctx) +{ + struct ifnet *ifp = iflib_get_ifp(ctx); + struct adapter *adapter = iflib_get_softc(ctx); + + INIT_DEBUGOUT("igb_init: begin"); + + /* Get the latest mac address, User can use a LAA */ + bcopy(IF_LLADDR(ifp), adapter->hw.mac.addr, + ETHER_ADDR_LEN); + + /* Put the address into the Receive Address Array */ + e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); + + igb_reset(ctx); + igb_if_update_admin_status(ctx); + + E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); + + /* Set hardware offload abilities */ + ifp->if_hwassist = 0; + if (ifp->if_capenable & IFCAP_TXCSUM) { +#if __FreeBSD_version >= 1000000 + ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP); + if (adapter->hw.mac.type != e1000_82575) + ifp->if_hwassist |= CSUM_IP_SCTP; +#else + ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); +#if __FreeBSD_version >= 800000 + if (adapter->hw.mac.type != e1000_82575) + ifp->if_hwassist |= CSUM_SCTP; +#endif +#endif + } + +#if __FreeBSD_version >= 1000000 + if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) { + ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP); + if (adapter->hw.mac.type != e1000_82575) + ifp->if_hwassist |= CSUM_IP6_SCTP; + } +#endif + + if (ifp->if_capenable & IFCAP_TSO) + ifp->if_hwassist |= CSUM_TSO; + + /* Clear bad data from Rx FIFOs */ + e1000_rx_fifo_flush_82575(&adapter->hw); + + /* Configure for OS presence */ + igb_init_manageability(adapter); + + /* Prepare transmit descriptors and buffers */ + igb_initialize_transmit_units(ctx); + + /* Setup Multicast table */ + igb_if_multi_set(ctx); + + /* Prepare receive descriptors and buffers */ + igb_initialize_receive_units(ctx); + + /* Enable VLAN support */ + if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) + igb_setup_vlan_hw_support(ctx); + + /* Don't lose promiscuous settings */ + igb_if_set_promisc(ctx, if_getflags(ifp)); + + e1000_clear_hw_cntrs_base_generic(&adapter->hw); + + if (adapter->intr_type == IFLIB_INTR_MSIX) /* Set up queue routing */ + igb_configure_queues(adapter); + + /* this clears any pending interrupts */ + E1000_READ_REG(&adapter->hw, E1000_ICR); + E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); + + /* Set Energy Efficient Ethernet */ + if (adapter->hw.phy.media_type == e1000_media_type_copper) { + if (adapter->hw.mac.type == e1000_i354) + e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); + else + e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); + } +} + +#if 0 +/* non-iflib link state handler */ +static void +igb_handle_link(void *context) +{ + if_ctx_t ctx = context; + struct adapter *adapter = iflib_get_softc(ctx); + + adapter->hw.mac.get_link_status = 1; + iflib_admin_intr_deferred(ctx); + +} +#endif + +/********************************************************************* + * + * MSI/Legacy Deferred + * Interrupt Service routine + * + *********************************************************************/ +int +igb_intr(void *arg) +{ + struct igb_rx_queue *que = arg; + struct adapter *adapter = que->adapter; + u32 reg_icr; + + reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); + + /* Hot eject? */ + if (reg_icr == 0xffffffff) + return FILTER_STRAY; + + /* Definitely not our interrupt. */ + if (reg_icr == 0x0) + return FILTER_STRAY; + + if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) + return FILTER_STRAY; + + /* + * Mask interrupts until the taskqueue is finished running. This is + * cheap, just assume that it is needed. This also works around the + * MSI message reordering errata on certain systems. + */ + + if (reg_icr & E1000_ICR_RXO) + adapter->rx_overruns++; + + return (FILTER_SCHEDULE_THREAD); +} + +static int +igb_if_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct igb_rx_queue *que = &adapter->rx_queues[rxqid]; + + igb_enable_queue(adapter, que); + return (0); +} + +static void +igb_enable_queue(struct adapter *adapter, struct igb_rx_queue *que) +{ + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); + device_printf(iflib_get_dev(adapter->ctx), "eims %x\n", que->eims); +} + + +/********************************************************************* + * + * MSIX Que Interrupt Service routine + * + **********************************************************************/ +static int +igb_msix_que(void *arg) +{ + struct igb_rx_queue *que = arg; + struct adapter *adapter = que->adapter; + + struct rx_ring *rxr = &que->rxr; + u32 newitr = 0; + + ++que->irqs; + if (adapter->enable_aim == FALSE) + return (FILTER_SCHEDULE_THREAD); + /* + ** Do Adaptive Interrupt Moderation: + ** - Write out last calculated setting + ** - Calculate based on average size over + ** the last interval. + */ + if (que->eitr_setting) + E1000_WRITE_REG(&adapter->hw, + E1000_EITR(que->msix), que->eitr_setting); + + que->eitr_setting = 0; + + + /* Used half Default if sub-gig */ + if (adapter->link_speed != 1000) + newitr = IGB_DEFAULT_ITR / 2; + else { + if ((rxr->bytes) && (rxr->packets)) + newitr = max(newitr, + (rxr->bytes / rxr->packets)); + newitr += 24; /* account for hardware frame, crc */ + /* set an upper boundary */ + newitr = min(newitr, 3000); + /* Be nice to the mid range */ + if ((newitr > 300) && (newitr < 1200)) + newitr = (newitr / 3); + else + newitr = (newitr / 2); + } + newitr &= 0x7FFC; /* Mask invalid bits */ + if (adapter->hw.mac.type == e1000_82575) + newitr |= newitr << 16; + else + newitr |= E1000_EITR_CNT_IGNR; + + /* save for next interrupt */ + que->eitr_setting = newitr; + + /* Reset state */ + rxr->bytes = 0; + rxr->rx_bytes = 0; + rxr->packets = 0; + return (FILTER_SCHEDULE_THREAD); +} + + +/********************************************************************* + * + * MSIX Link Interrupt Service routine + * + **********************************************************************/ + +static int +igb_msix_link(void *arg) +{ + struct adapter *adapter = arg; + struct igb_rx_queue *que = adapter->rx_queues; + struct e1000_hw *hw = &adapter->hw; + u32 icr; + + ++que->irqs; + MPASS(hw->back != NULL); + + icr = E1000_READ_REG(hw, E1000_ICR); + if (!(icr & E1000_ICR_LSC)) + goto spurious; + + adapter->hw.mac.get_link_status = 1; + iflib_admin_intr_deferred(adapter->ctx); + +spurious: + /* Rearm */ + E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC); + E1000_WRITE_REG(hw, E1000_EIMS, adapter->link_mask); + return (FILTER_HANDLED); +} + + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called whenever the user queries the status of + * the interface using ifconfig. + * + **********************************************************************/ +static void +igb_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) +{ + struct adapter *adapter = iflib_get_softc(ctx); + + INIT_DEBUGOUT("igb_media_status: begin"); + igb_if_update_admin_status(ctx); + + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER; + + if (!adapter->link_active) { + return; + } + + ifmr->ifm_status |= IFM_ACTIVE; + + switch (adapter->link_speed) { + case 10: + ifmr->ifm_active |= IFM_10_T; + break; + case 100: + /* + ** Support for 100Mb SFP - these are Fiber + ** but the media type appears as serdes + */ + if (adapter->hw.phy.media_type == + e1000_media_type_internal_serdes) + ifmr->ifm_active |= IFM_100_FX; + else + ifmr->ifm_active |= IFM_100_TX; + break; + case 1000: + ifmr->ifm_active |= IFM_1000_T; + break; + case 2500: + ifmr->ifm_active |= IFM_2500_SX; + break; + } + + if (adapter->link_duplex == FULL_DUPLEX) + ifmr->ifm_active |= IFM_FDX; + else + ifmr->ifm_active |= IFM_HDX; +} + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called when the user changes speed/duplex using + * media/mediopt option with ifconfig. + * + **********************************************************************/ +static int +igb_if_media_change(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifmedia *ifm = iflib_get_media(ctx); + + INIT_DEBUGOUT("igb_media_change: begin"); + + if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) + return (EINVAL); + + switch (IFM_SUBTYPE(ifm->ifm_media)) { + case IFM_AUTO: + adapter->hw.mac.autoneg = DO_AUTO_NEG; + adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; + break; + case IFM_1000_LX: + case IFM_1000_SX: + case IFM_1000_T: + adapter->hw.mac.autoneg = DO_AUTO_NEG; + adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; + break; + case IFM_100_TX: + adapter->hw.mac.autoneg = FALSE; + adapter->hw.phy.autoneg_advertised = 0; + if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) + adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; + else + adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; + break; + case IFM_10_T: + adapter->hw.mac.autoneg = FALSE; + adapter->hw.phy.autoneg_advertised = 0; + if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) + adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; + else + adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; + break; + default: + device_printf(adapter->dev, "Unsupported media type\n"); + } + + return (0); +} + +static void +igb_disable_promisc(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct e1000_hw *hw = &adapter->hw; + struct ifnet *ifp = iflib_get_ifp(ctx); + u32 reg; + int mcnt = 0; + + if (adapter->vf_ifp) { + e1000_promisc_set_vf(hw, e1000_promisc_disabled); + return; + } + reg = E1000_READ_REG(hw, E1000_RCTL); + reg &= (~E1000_RCTL_UPE); + if (ifp->if_flags & IFF_ALLMULTI) + mcnt = MAX_NUM_MULTICAST_ADDRESSES; + else { + struct ifmultiaddr *ifma; + /* XXX look at callback handler as used in ixl / ixgbe etc */ + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) + break; + mcnt++; + } + + } + /* Don't disable if in MAX groups */ + if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) + reg &= (~E1000_RCTL_MPE); + E1000_WRITE_REG(hw, E1000_RCTL, reg); +} + +static int +igb_if_set_promisc(if_ctx_t ctx, int flags) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + struct e1000_hw *hw = &adapter->hw; + u32 reg; + + if (adapter->vf_ifp) { + e1000_promisc_set_vf(hw, e1000_promisc_enabled); + return (0); + } + igb_disable_promisc(ctx); + + reg = E1000_READ_REG(hw, E1000_RCTL); + if (ifp->if_flags & IFF_PROMISC) { + reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); + E1000_WRITE_REG(hw, E1000_RCTL, reg); + } else if (ifp->if_flags & IFF_ALLMULTI) { + reg |= E1000_RCTL_MPE; + reg &= ~E1000_RCTL_UPE; + E1000_WRITE_REG(hw, E1000_RCTL, reg); + } + return (0); +} + + + +/********************************************************************* + * Multicast Update + * + * This routine is called whenever multicast address list is updated. + * + **********************************************************************/ + +static void +igb_if_multi_set(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + struct ifmultiaddr *ifma; + u32 reg_rctl = 0; + u8 *mta; + + int mcnt = 0; + + IOCTL_DEBUGOUT("igb_set_multi: begin"); + + mta = adapter->mta; + bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN * + MAX_NUM_MULTICAST_ADDRESSES); + + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + + if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) + break; + + bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), + &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); + mcnt++; + } + + if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { + reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); + reg_rctl |= E1000_RCTL_MPE; + E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); + } else + e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); +} + + +/********************************************************************* + * Timer routine: + * This routine checks for link status, + * updates statistics, and does the watchdog. + * + **********************************************************************/ + +static void +igb_if_timer(if_ctx_t ctx, uint16_t qid) +{ + struct adapter *adapter = iflib_get_softc(ctx); + + if (qid != 0) + return; + igb_update_stats_counters(adapter); + iflib_admin_intr_deferred(ctx); +} + +static void +igb_if_update_admin_status(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct e1000_hw *hw = &adapter->hw; + struct e1000_fc_info *fc = &hw->fc; + struct ifnet *ifp = iflib_get_ifp(ctx); + device_t dev = iflib_get_dev(ctx); + u32 link_check, thstat, ctrl; + char *flowctl = NULL; + + link_check = thstat = ctrl = 0; + + /* Get the cached link value or read for real */ + switch (hw->phy.media_type) { + case e1000_media_type_copper: + if (hw->mac.get_link_status) { + /* Do the work to read phy */ + e1000_check_for_link(hw); + link_check = !hw->mac.get_link_status; + } else { + link_check = TRUE; + } + break; + case e1000_media_type_fiber: + e1000_check_for_link(hw); + link_check = (E1000_READ_REG(hw, E1000_STATUS) & + E1000_STATUS_LU); + break; + case e1000_media_type_internal_serdes: + e1000_check_for_link(hw); + link_check = adapter->hw.mac.serdes_has_link; + break; + /* VF device is type_unknown */ + case e1000_media_type_unknown: + e1000_check_for_link(hw); + link_check = !hw->mac.get_link_status; + /* Fall thru */ + default: + break; + } + + /* Check for thermal downshift or shutdown */ + if (hw->mac.type == e1000_i350) { + thstat = E1000_READ_REG(hw, E1000_THSTAT); + ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); + } + + /* Get the flow control for display */ + switch (fc->current_mode) { + case e1000_fc_rx_pause: + flowctl = "RX"; + break; + case e1000_fc_tx_pause: + flowctl = "TX"; + break; + case e1000_fc_full: + flowctl = "Full"; + break; + case e1000_fc_none: + default: + flowctl = "None"; + break; + } + + /* Now we check if a transition has happened */ + if (link_check && (adapter->link_active == 0)) { + e1000_get_speed_and_duplex(&adapter->hw, + &adapter->link_speed, &adapter->link_duplex); + if (bootverbose) + device_printf(dev, "Link is up %d Mbps %s," + " Flow Control: %s\n", + adapter->link_speed, + ((adapter->link_duplex == FULL_DUPLEX) ? + "Full Duplex" : "Half Duplex"), flowctl); + adapter->link_active = 1; + ifp->if_baudrate = adapter->link_speed * 1000000; + if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && + (thstat & E1000_THSTAT_LINK_THROTTLE)) + device_printf(dev, "Link: thermal downshift\n"); + /* Delay Link Up for Phy update */ + if (((hw->mac.type == e1000_i210) || + (hw->mac.type == e1000_i211)) && + (hw->phy.id == I210_I_PHY_ID)) + msec_delay(I210_LINK_DELAY); + /* Reset if the media type changed. */ + if (hw->dev_spec._82575.media_changed) { + hw->dev_spec._82575.media_changed = false; + adapter->flags |= IGB_MEDIA_RESET; + igb_reset(ctx); + } + /* This can sleep */ + iflib_link_state_change(ctx, LINK_STATE_UP, ifp->if_baudrate); + } else if (!link_check && (adapter->link_active == 1)) { + ifp->if_baudrate = adapter->link_speed = 0; + adapter->link_duplex = 0; + if (bootverbose) + device_printf(dev, "Link is Down\n"); + if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && + (thstat & E1000_THSTAT_PWR_DOWN)) + device_printf(dev, "Link: thermal shutdown\n"); + adapter->link_active = 0; + /* This can sleep */ + iflib_link_state_change(ctx, LINK_STATE_DOWN, ifp->if_baudrate); + } +} + +/********************************************************************* + * + * This routine disables all traffic on the adapter by issuing a + * global reset on the MAC and deallocates TX/RX buffers. + * + **********************************************************************/ + +static void +igb_if_stop(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + + INIT_DEBUGOUT("igb_stop: begin"); + + e1000_reset_hw(&adapter->hw); + E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); + + e1000_led_off(&adapter->hw); + e1000_cleanup_led(&adapter->hw); +} + + +/********************************************************************* + * + * Determine hardware revision. + * + **********************************************************************/ +static void +igb_identify_hardware(if_ctx_t ctx) +{ + device_t dev = iflib_get_dev(ctx); + struct adapter *adapter = iflib_get_softc(ctx); + + /* Save off the information about this board */ + adapter->hw.vendor_id = pci_get_vendor(dev); + adapter->hw.device_id = pci_get_device(dev); + adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); + adapter->hw.subsystem_vendor_id = + pci_read_config(dev, PCIR_SUBVEND_0, 2); + adapter->hw.subsystem_device_id = + pci_read_config(dev, PCIR_SUBDEV_0, 2); + + /* Set MAC type early for PCI setup */ + e1000_set_mac_type(&adapter->hw); + + /* Are we a VF device? */ + if ((adapter->hw.mac.type == e1000_vfadapt) || + (adapter->hw.mac.type == e1000_vfadapt_i350)) + adapter->vf_ifp = 1; + else + adapter->vf_ifp = 0; +} + +static int +igb_allocate_pci_resources(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + int rid; + + rid = PCIR_BAR(0); + adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + if (adapter->pci_mem == NULL) { + device_printf(dev, "Unable to allocate bus resource: memory\n"); + return (ENXIO); + } + + adapter->osdep.mem_bus_space_tag = + rman_get_bustag(adapter->pci_mem); + adapter->osdep.mem_bus_space_handle = + rman_get_bushandle(adapter->pci_mem); + adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; + adapter->hw.back = &adapter->osdep; + + return (0); +} + +/********************************************************************* + * + * Setup the MSIX Queue Interrupt handlers: + * + **********************************************************************/ +static int +igb_if_msix_intr_assign(if_ctx_t ctx, int msix) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct igb_rx_queue *rx_que = adapter->rx_queues; + struct igb_tx_queue *tx_que = adapter->tx_queues; + int error, rid, vector = 0; + int i; + char buf[16]; + + /* Be sure to start with all interrupts disabled */ + E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); + E1000_WRITE_FLUSH(&adapter->hw); + + for (i = 0; i < adapter->rx_num_queues; i++, vector++, rx_que++) { + rid = vector +1; + printf("rx_que->me=%d\n", rx_que->me); + snprintf(buf, sizeof(buf), "rxq%d", i); + error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RX, igb_msix_que, rx_que, rx_que->me, buf); + + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); + adapter->rx_num_queues = i + 1; + goto fail; + } + + rx_que->msix = vector; + if (adapter->hw.mac.type == e1000_82575) + rx_que->eims = E1000_EICR_TX_QUEUE0 << vector; + else + rx_que->eims = 1 << vector; + } + + for (i = 0; i < adapter->tx_num_queues; i++, tx_que++) { + snprintf(buf, sizeof(buf), "txq%d", i); + + iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); + + if (adapter->hw.mac.type == e1000_82575) + tx_que->eims = E1000_EICR_TX_QUEUE0 << (i % adapter->rx_num_queues); + else + tx_que->eims = 1 << (i % adapter->rx_num_queues); + + } + + rid = vector + 1; + error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, igb_msix_link, adapter, 0, "aq"); + + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); + goto fail; + } + + adapter->linkvec = vector; + return (0); +fail: + iflib_irq_free(ctx, &adapter->irq); + rx_que = adapter->rx_queues; + for (int i = 0; i < adapter->rx_num_queues; i++, rx_que++) + iflib_irq_free(ctx, &rx_que->que_irq); + return (error); +} + + +static void +igb_configure_queues(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct igb_rx_queue *rx_que; + struct igb_tx_queue *tx_que; + u32 tmp, ivar = 0, newitr = 0; + + /* First turn on RSS capability */ + if (adapter->hw.mac.type != e1000_82575) + E1000_WRITE_REG(hw, E1000_GPIE, + E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | + E1000_GPIE_PBA | E1000_GPIE_NSICR); + + /* Turn on MSIX */ + switch (adapter->hw.mac.type) { + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + case e1000_vfadapt: + case e1000_vfadapt_i350: + /* RX entries */ + for (int i = 0; i < adapter->rx_num_queues; i++) { + u32 index = i >> 1; + ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + rx_que = &adapter->rx_queues[i]; + printf("rx_que->msix %d\n", rx_que->msix); + if (i & 1) { + ivar &= 0xFF00FFFF; + ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; + } else { + ivar &= 0xFFFFFF00; + ivar |= rx_que->msix | E1000_IVAR_VALID; + } + printf("RX ivar %x\n", ivar); + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); + } + /* TX entries */ + for (int i = 0; i < adapter->tx_num_queues; i++) { + u32 index = i >> 1; + ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + tx_que = &adapter->tx_queues[i]; + printf("tx_que->msix %d\n", tx_que->msix); + if (i & 1) { + ivar &= 0x00FFFFFF; + ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; + } else { + ivar &= 0xFFFF00FF; + ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; + } + printf("TX ivar %x\n", ivar); + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); + adapter->que_mask |= tx_que->eims; + } + + /* And for the link interrupt */ + ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; + adapter->link_mask = 1 << adapter->linkvec; + E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); + break; + case e1000_82576: + /* RX entries */ + for (int i = 0; i < adapter->rx_num_queues; i++) { + u32 index = i & 0x7; /* Each IVAR has two entries */ + ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + rx_que = &adapter->rx_queues[i]; + if (i < 8) { + ivar &= 0xFFFFFF00; + ivar |= rx_que->msix | E1000_IVAR_VALID; + } else { + ivar &= 0xFF00FFFF; + ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; + } + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); + adapter->que_mask |= rx_que->eims; + } + /* TX entries */ + for (int i = 0; i < adapter->tx_num_queues; i++) { + u32 index = i & 0x7; /* Each IVAR has two entries */ + ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + tx_que = &adapter->tx_queues[i]; + if (i < 8) { + ivar &= 0xFFFF00FF; + ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; + } else { + ivar &= 0x00FFFFFF; + ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; + } + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); + adapter->que_mask |= tx_que->eims; + } + + /* And for the link interrupt */ + ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; + adapter->link_mask = 1 << adapter->linkvec; + E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); + break; + + case e1000_82575: + /* enable MSI-X support*/ + tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); + tmp |= E1000_CTRL_EXT_PBA_CLR; + /* Auto-Mask interrupts upon ICR read. */ + tmp |= E1000_CTRL_EXT_EIAME; + tmp |= E1000_CTRL_EXT_IRCA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); + + /* Queues */ + for (int i = 0; i < adapter->rx_num_queues; i++) { + rx_que = &adapter->rx_queues[i]; + tmp = E1000_EICR_RX_QUEUE0 << i; + tmp |= E1000_EICR_TX_QUEUE0 << i; + rx_que->eims = tmp; + E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), + i, rx_que->eims); + adapter->que_mask |= rx_que->eims; + } + + /* Link */ + E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), + E1000_EIMS_OTHER); + adapter->link_mask |= E1000_EIMS_OTHER; + default: + break; + } + + /* Set the starting interrupt rate */ + if (igb_max_interrupt_rate > 0) + newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC; + + if (hw->mac.type == e1000_82575) + newitr |= newitr << 16; + else + newitr |= E1000_EITR_CNT_IGNR; + + for (int i = 0; i < adapter->rx_num_queues; i++) { + rx_que = &adapter->rx_queues[i]; + E1000_WRITE_REG(hw, E1000_EITR(rx_que->msix), newitr); + } + + return; +} + + +static void +igb_free_pci_resources(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct igb_rx_queue *que = adapter->rx_queues; + device_t dev = iflib_get_dev(ctx); + + if (adapter->intr_type == IFLIB_INTR_MSIX) { + iflib_irq_free(ctx, &adapter->irq); + } + + /* First release all the interrupt resources */ + for (int i = 0; i < adapter->rx_num_queues; i++, que++) { + iflib_irq_free(ctx, &que->que_irq); + } + + /* Free link/admin interrupt */ + if (adapter->pci_mem != NULL) { + bus_release_resource(dev, SYS_RES_MEMORY, + PCIR_BAR(0), adapter->pci_mem); + } +} + +static int +igb_set_num_queues(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + int maxqueues = adapter->rx_num_queues; + + /* Sanity check based on HW */ + switch (adapter->hw.mac.type) { + case e1000_82575: + maxqueues = 4; + break; + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + maxqueues = 8; + break; + case e1000_i210: + maxqueues = 4; + break; + case e1000_i211: + maxqueues = 2; + break; + default: /* VF interfaces */ + maxqueues = 1; + break; + } + + return maxqueues; +} + +/********************************************************************* + * + * Initialize the DMA Coalescing feature + * + **********************************************************************/ +static void +igb_init_dmac(if_ctx_t ctx, u32 pba) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + struct e1000_hw *hw = &adapter->hw; + u32 dmac, reg = ~E1000_DMACR_DMAC_EN; + u16 hwm; + + if (hw->mac.type == e1000_i211) + return; + + if (hw->mac.type > e1000_82580) { + + if (adapter->dmac == 0) { /* Disabling it */ + E1000_WRITE_REG(hw, E1000_DMACR, reg); + return; + } else + device_printf(dev, "DMA Coalescing enabled\n"); + + /* Set starting threshold */ + E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); + + hwm = 64 * pba - adapter->max_frame_size / 16; + if (hwm < 64 * (pba - 6)) + hwm = 64 * (pba - 6); + reg = E1000_READ_REG(hw, E1000_FCRTC); + reg &= ~E1000_FCRTC_RTH_COAL_MASK; + reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) + & E1000_FCRTC_RTH_COAL_MASK); + E1000_WRITE_REG(hw, E1000_FCRTC, reg); + + + dmac = pba - adapter->max_frame_size / 512; + if (dmac < pba - 10) + dmac = pba - 10; + reg = E1000_READ_REG(hw, E1000_DMACR); + reg &= ~E1000_DMACR_DMACTHR_MASK; + reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) + & E1000_DMACR_DMACTHR_MASK); + + /* transition to L0x or L1 if available..*/ + reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); + + /* Check if status is 2.5Gb backplane connection + * before configuration of watchdog timer, which is + * in msec values in 12.8usec intervals + * watchdog timer= msec values in 32usec intervals + * for non 2.5Gb connection + */ + if (hw->mac.type == e1000_i354) { + int status = E1000_READ_REG(hw, E1000_STATUS); + if ((status & E1000_STATUS_2P5_SKU) && + (!(status & E1000_STATUS_2P5_SKU_OVER))) + reg |= ((adapter->dmac * 5) >> 6); + else + reg |= (adapter->dmac >> 5); + } else { + reg |= (adapter->dmac >> 5); + } + + E1000_WRITE_REG(hw, E1000_DMACR, reg); + + E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); + + /* Set the interval before transition */ + reg = E1000_READ_REG(hw, E1000_DMCTLX); + if (hw->mac.type == e1000_i350) + reg |= IGB_DMCTLX_DCFLUSH_DIS; + /* + ** in 2.5Gb connection, TTLX unit is 0.4 usec + ** which is 0x4*2 = 0xA. But delay is still 4 usec + */ + if (hw->mac.type == e1000_i354) { + int status = E1000_READ_REG(hw, E1000_STATUS); + if ((status & E1000_STATUS_2P5_SKU) && + (!(status & E1000_STATUS_2P5_SKU_OVER))) + reg |= 0xA; + else + reg |= 0x4; + } else { + reg |= 0x4; + } + + E1000_WRITE_REG(hw, E1000_DMCTLX, reg); + + /* free space in tx packet buffer to wake from DMA coal */ + E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - + (2 * adapter->max_frame_size)) >> 6); + + /* make low power state decision controlled by DMA coal */ + reg = E1000_READ_REG(hw, E1000_PCIEMISC); + reg &= ~E1000_PCIEMISC_LX_DECISION; + E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); + + } else if (hw->mac.type == e1000_82580) { + u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); + E1000_WRITE_REG(hw, E1000_PCIEMISC, + reg & ~E1000_PCIEMISC_LX_DECISION); + E1000_WRITE_REG(hw, E1000_DMACR, 0); + } +} + + +/********************************************************************* + * + * Set up an fresh starting state + * + **********************************************************************/ +static void +igb_reset(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + struct e1000_hw *hw = &adapter->hw; + struct e1000_fc_info *fc = &hw->fc; + struct ifnet *ifp = iflib_get_ifp(ctx); + u32 pba = 0; + u16 hwm; + + INIT_DEBUGOUT("igb_reset: begin"); + + /* Let the firmware know the OS is in control */ + igb_get_hw_control(adapter); + + /* + * Packet Buffer Allocation (PBA) + * Writing PBA sets the receive portion of the buffer + * the remainder is used for the transmit buffer. + */ + switch (hw->mac.type) { + case e1000_82575: + pba = E1000_PBA_32K; + break; + case e1000_82576: + case e1000_vfadapt: + pba = E1000_READ_REG(hw, E1000_RXPBS); + pba &= E1000_RXPBS_SIZE_MASK_82576; + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_vfadapt_i350: + pba = E1000_READ_REG(hw, E1000_RXPBS); + pba = e1000_rxpbs_adjust_82580(pba); + break; + case e1000_i210: + case e1000_i211: + pba = E1000_PBA_34K; + default: + break; + } + + /* Special needs in case of Jumbo frames */ + if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { + u32 tx_space, min_tx, min_rx; + pba = E1000_READ_REG(hw, E1000_PBA); + tx_space = pba >> 16; + pba &= 0xffff; + min_tx = (adapter->max_frame_size + + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; + min_tx = roundup2(min_tx, 1024); + min_tx >>= 10; + min_rx = adapter->max_frame_size; + min_rx = roundup2(min_rx, 1024); + min_rx >>= 10; + if (tx_space < min_tx && + ((min_tx - tx_space) < pba)) { + pba = pba - (min_tx - tx_space); + /* + * if short on rx space, rx wins + * and must trump tx adjustment + */ + if (pba < min_rx) + pba = min_rx; + } + E1000_WRITE_REG(hw, E1000_PBA, pba); + } + + INIT_DEBUGOUT1("igb_init: pba=%dK",pba); + + /* + * These parameters control the automatic generation (Tx) and + * response (Rx) to Ethernet PAUSE frames. + * - High water mark should allow for at least two frames to be + * received after sending an XOFF. + * - Low water mark works best when it is very near the high water mark. + * This allows the receiver to restart by sending XON when it has + * drained a bit. + */ + hwm = min(((pba << 10) * 9 / 10), + ((pba << 10) - 2 * adapter->max_frame_size)); + + if (hw->mac.type < e1000_82576) { + fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ + fc->low_water = fc->high_water - 8; + } else { + fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ + fc->low_water = fc->high_water - 16; + } + + fc->pause_time = IGB_FC_PAUSE_TIME; + fc->send_xon = TRUE; + if (adapter->fc) + fc->requested_mode = adapter->fc; + else + fc->requested_mode = e1000_fc_default; + + /* Issue a global reset */ + e1000_reset_hw(hw); + E1000_WRITE_REG(hw, E1000_WUC, 0); + + /* Reset for AutoMediaDetect */ + if (adapter->flags & IGB_MEDIA_RESET) { + e1000_setup_init_funcs(hw, TRUE); + e1000_get_bus_info(hw); + adapter->flags &= ~IGB_MEDIA_RESET; + } + + if (e1000_init_hw(hw) < 0) + device_printf(dev, "Hardware Initialization Failed\n"); + + /* Setup DMA Coalescing */ + igb_init_dmac(ctx, pba); + + E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); + e1000_get_phy_info(hw); + e1000_check_for_link(hw); + return; +} + +/********************************************************************* + * + * Setup networking device structure and register an interface. + * + **********************************************************************/ +static int +igb_setup_interface(if_ctx_t ctx) +{ + struct ifnet *ifp = iflib_get_ifp(ctx); + struct adapter *adapter = iflib_get_softc(ctx); + uint64_t cap = 0; + + INIT_DEBUGOUT("igb_setup_interface: begin"); + +#ifdef IGB_LEGACY_TX + if_softc_ctx_t scctx = adapter->shared; + ifp->if_start = igb_start; + IFQ_SET_MAXLEN(&ifp->if_snd, scctx->isc_ntxd[0] - 1); + ifp->if_snd.ifq_drv_maxlen = scctx->isc_ntxd[0] - 1; + IFQ_SET_READY(&ifp->if_snd); +#endif + + cap = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; + cap |= IFCAP_TSO; + cap |= IFCAP_JUMBO_MTU; + ifp->if_capenable = ifp->if_capabilities; + + /* Don't enable LRO by default */ + cap |= IFCAP_LRO; + + /* + * Tell the upper layer(s) we + * support full VLAN capability. + */ + cap |= IFCAP_VLAN_HWTAGGING + | IFCAP_VLAN_HWTSO + | IFCAP_VLAN_MTU; + cap |= IFCAP_VLAN_HWTAGGING + | IFCAP_VLAN_HWTSO + | IFCAP_VLAN_MTU; + + if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); + if_setcapabilitiesbit(ifp, cap, 0); + if_setcapenable(ifp, if_getcapabilities(ifp)); + /* + ** Don't turn this on by default, if vlans are + ** created on another pseudo device (eg. lagg) + ** then vlan events are not passed thru, breaking + ** operation, but with HW FILTER off it works. If + ** using vlans directly on the igb driver you can + ** enable this and get full hardware tag filtering. + */ + ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; + + /* + * Specify the media types supported by this adapter and register + * callbacks to update media and link information + */ + if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || + (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, + 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); + } else { + ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, + 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX, + 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, + 0, NULL); + if (adapter->hw.phy.type != e1000_phy_ife) { + ifmedia_add(adapter->media, + IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); + ifmedia_add(adapter->media, + IFM_ETHER | IFM_1000_T, 0, NULL); + } + } + ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); + return (0); +} + + +/********************************************************************* + * + * Enable transmit unit. + * + **********************************************************************/ +static void +igb_initialize_transmit_units(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct igb_tx_queue *que; + struct e1000_hw *hw = &adapter->hw; + u32 txdctl, tctl; + int i; + + INIT_DEBUGOUT("igb_initialize_transmit_units: begin"); + + /* Setup the Tx Descriptor Rings */ + for (i = 0, que = adapter->tx_queues; i < adapter->tx_num_queues; i++, que++) { + struct tx_ring *txr = &que->txr; + igb_init_tx_ring(que); + + u64 bus_addr = txr->tx_paddr; + txdctl = tctl = 0; + + E1000_WRITE_REG(hw, E1000_TDLEN(i), + scctx->isc_ntxd[0] * sizeof(struct e1000_tx_desc)); + E1000_WRITE_REG(hw, E1000_TDBAH(i), + (uint32_t)(bus_addr >> 32)); + E1000_WRITE_REG(hw, E1000_TDBAL(i), + (uint32_t)bus_addr); + + /* Setup the HW Tx Head and Tail descriptor pointers */ + E1000_WRITE_REG(hw, E1000_TDT(i), 0); + E1000_WRITE_REG(hw, E1000_TDH(i), 0); + + HW_DEBUGOUT2("Base = %x, Length = %x\n", + E1000_READ_REG(hw, E1000_TDBAL(i)), + E1000_READ_REG(hw, E1000_TDLEN(i))); + + txdctl |= IGB_TX_PTHRESH; + txdctl |= IGB_TX_HTHRESH << 8; + /** NEED TO FIX in iflib.c */ + /* txdctl |= IGB_TX_WTHRESH << 16; */ + txdctl |= E1000_TXDCTL_QUEUE_ENABLE; + E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); + } + if (adapter->vf_ifp) { + return; + } + + e1000_config_collision_dist(hw); + + /* Program the Transmit Control Register */ + tctl = E1000_READ_REG(hw, E1000_TCTL); + tctl &= ~E1000_TCTL_CT; + tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | + (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); + + /* This write will effectively turn on the transmit unit. */ + E1000_WRITE_REG(hw, E1000_TCTL, tctl); +} + +/* + * Initialize the RSS mapping for NICs that support multiple transmit/ + * receive rings. + */ +static void +igb_initialize_rss_mapping(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + int queue_id; + u32 reta; + u32 rss_key[10], mrqc, shift = 0; + + /* XXX? */ + if (adapter->hw.mac.type == e1000_82575) + shift = 6; + + /* + * The redirection table controls which destination + * queue each bucket redirects traffic to. + * Each DWORD represents four queues, with the LSB + * being the first queue in the DWORD. + * + * This just allocates buckets to queues using round-robin + * allocation. + * + * NOTE: It Just Happens to line up with the default + * RSS allocation method. + */ + + /* Warning FM follows */ + reta = 0; + for (i = 0; i < 128; i++) { +#ifdef RSS + queue_id = rss_get_indirection_to_bucket(i); + /* + * If we have more queues than buckets, we'll + * end up mapping buckets to a subset of the + * queues. + * + * If we have more buckets than queues, we'll + * end up instead assigning multiple buckets + * to queues. + * + * Both are suboptimal, but we need to handle + * the case so we don't go out of bounds + * indexing arrays and such. + */ + queue_id = queue_id % adapter->rx_num_queues; +#else + queue_id = (i % adapter->rx_num_queues); +#endif + /* Adjust if required */ + queue_id = queue_id << shift; + + /* + * The low 8 bits are for hash value (n+0); + * The next 8 bits are for hash value (n+1), etc. + */ + reta = reta >> 8; + reta = reta | ( ((uint32_t) queue_id) << 24); + if ((i & 3) == 3) { + E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); + reta = 0; + } + } + + /* Now fill in hash table */ + + /* + * MRQC: Multiple Receive Queues Command + * Set queuing to RSS control, number depends on the device. + */ + mrqc = E1000_MRQC_ENABLE_RSS_8Q; + +#ifdef RSS + /* XXX ew typecasting */ + rss_getkey((uint8_t *) &rss_key); +#else + arc4rand(&rss_key, sizeof(rss_key), 0); +#endif + for (i = 0; i < 10; i++) + E1000_WRITE_REG_ARRAY(hw, + E1000_RSSRK(0), i, rss_key[i]); + + /* + * Configure the RSS fields to hash upon. + */ + mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | + E1000_MRQC_RSS_FIELD_IPV4_TCP); + mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | + E1000_MRQC_RSS_FIELD_IPV6_TCP); + mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | + E1000_MRQC_RSS_FIELD_IPV6_UDP); + mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | + E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); + + E1000_WRITE_REG(hw, E1000_MRQC, mrqc); +} + +/********************************************************************* + * + * Initialize receive registers and features + * + **********************************************************************/ +static void +igb_initialize_receive_units(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct ifnet *ifp = iflib_get_ifp(ctx); + struct e1000_hw *hw = &adapter->hw; + struct igb_rx_queue *que; + u32 rctl, rxcsum, psize, srrctl = 0; + int i; + + INIT_DEBUGOUT("igb_initialize_receive_unit: begin"); + + /* + * Make sure receives are disabled while setting + * up the descriptor ring + */ + rctl = E1000_READ_REG(hw, E1000_RCTL); + E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); + + /* + ** Set up for header split + */ + if (igb_header_split) { + /* Use a standard mbuf for the header */ + srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; + srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; + } else + srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; + + /* + ** Set up for jumbo frames + */ + if (ifp->if_mtu > ETHERMTU) { + rctl |= E1000_RCTL_LPE; + if (scctx->isc_max_frame_size <= 4096) { + srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; + } else if (scctx->isc_max_frame_size > 4096) { + srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; + } + /* Set maximum packet len */ + psize = adapter->max_frame_size; + /* are we on a vlan? */ + if (ifp->if_vlantrunk != NULL) + psize += VLAN_TAG_SIZE; + E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); + } else { + rctl &= ~E1000_RCTL_LPE; + srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + rctl |= E1000_RCTL_SZ_2048; + } + + /* + * If TX flow control is disabled and there's >1 queue defined, + * enable DROP. + * + * This drops frames rather than hanging the RX MAC for all queues. + */ + if ((adapter->rx_num_queues > 1) && + (adapter->fc == e1000_fc_none || + adapter->fc == e1000_fc_rx_pause)) { + srrctl |= E1000_SRRCTL_DROP_EN; + } + + /* Setup the Base and Length of the Rx Descriptor Rings */ + for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { + struct rx_ring *rxr = &que->rxr; + u64 bus_addr = rxr->rx_paddr; + u32 rxdctl; + + /* Configure for header split? */ + rxr->hdr_split = igb_header_split; + + E1000_WRITE_REG(hw, E1000_RDLEN(i), + scctx->isc_nrxd[0] * sizeof(struct e1000_rx_desc)); + E1000_WRITE_REG(hw, E1000_RDBAH(i), + (uint32_t)(bus_addr >> 32)); + E1000_WRITE_REG(hw, E1000_RDBAL(i), + (uint32_t)bus_addr); + E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); + /* Enable this Queue */ + rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); + rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; + rxdctl &= 0xFFF00000; + rxdctl |= IGB_RX_PTHRESH; + rxdctl |= IGB_RX_HTHRESH << 8; + rxdctl |= IGB_RX_WTHRESH << 16; + E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); + } + + /* + ** Setup for RX MultiQueue + */ + rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); + if (adapter->rx_num_queues >1) { + + /* rss setup */ + igb_initialize_rss_mapping(adapter); + + /* + ** NOTE: Receive Full-Packet Checksum Offload + ** is mutually exclusive with Multiqueue. However + ** this is not the same as TCP/IP checksums which + ** still work. + */ + rxcsum |= E1000_RXCSUM_PCSD; +#if __FreeBSD_version >= 800000 + /* For SCTP Offload */ + if ((hw->mac.type != e1000_82575) && + (ifp->if_capenable & IFCAP_RXCSUM)) + rxcsum |= E1000_RXCSUM_CRCOFL; +#endif + } else { + /* Non RSS setup */ + if (ifp->if_capenable & IFCAP_RXCSUM) { + rxcsum |= E1000_RXCSUM_IPPCSE; +#if __FreeBSD_version >= 800000 + if (adapter->hw.mac.type != e1000_82575) + rxcsum |= E1000_RXCSUM_CRCOFL; +#endif + } else + rxcsum &= ~E1000_RXCSUM_TUOFL; + } + E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); + + /* Setup the Receive Control Register */ + rctl &= ~(3 << E1000_RCTL_MO_SHIFT); + rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | + E1000_RCTL_RDMTS_HALF | + (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); + /* Strip CRC bytes. */ + rctl |= E1000_RCTL_SECRC; + /* Make sure VLAN Filters are off */ + rctl &= ~E1000_RCTL_VFE; + /* Don't store bad packets */ + rctl &= ~E1000_RCTL_SBP; + + /* Enable Receives */ + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + + /* + * Setup the HW Rx Head and Tail Descriptor Pointers + * - needs to be after enable + */ + for (i = 0; i < adapter->rx_num_queues; i++) { + E1000_WRITE_REG(hw, E1000_RDH(i), 0); + E1000_WRITE_REG(hw, E1000_RDT(i), 0); + } +} + +static void +igb_if_vlan_register(if_ctx_t ctx, u16 vtag) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + u32 index, bit; + + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + adapter->shadow_vfta[index] |= (1 << bit); + /* Change hw filter setting */ + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) + igb_setup_vlan_hw_support(ctx); +} + +/* + * This routine is run via an vlan + * unconfig EVENT + */ +static void +igb_if_vlan_unregister(if_ctx_t ctx, u16 vtag) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + u32 index, bit; + + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + adapter->shadow_vfta[index] &= ~(1 << bit); + /* Change hw filter setting */ + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) + igb_setup_vlan_hw_support(ctx); +} + +static void +igb_setup_vlan_hw_support(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct e1000_hw *hw = &adapter->hw; + struct ifnet *ifp = iflib_get_ifp(ctx); + u32 reg; + int i; + + if (adapter->vf_ifp) { + e1000_rlpml_set_vf(hw, + adapter->max_frame_size + VLAN_TAG_SIZE); + return; + } + + reg = E1000_READ_REG(hw, E1000_CTRL); + reg |= E1000_CTRL_VME; + E1000_WRITE_REG(hw, E1000_CTRL, reg); + + /* Enable the Filter Table */ + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { + reg = E1000_READ_REG(hw, E1000_RCTL); + reg &= ~E1000_RCTL_CFIEN; + reg |= E1000_RCTL_VFE; + E1000_WRITE_REG(hw, E1000_RCTL, reg); + } + + /* Update the frame size */ + E1000_WRITE_REG(&adapter->hw, E1000_RLPML, + adapter->max_frame_size + VLAN_TAG_SIZE); + + /* + ** A soft reset zero's out the VFTA, so + ** we need to repopulate it now. + */ + for (i = 0; i < IGB_VFTA_SIZE; i++) + if (adapter->shadow_vfta[i] != 0) { + if (adapter->vf_ifp) + e1000_vfta_set_vf(hw, + adapter->shadow_vfta[i], TRUE); + else + e1000_write_vfta(hw, + i, adapter->shadow_vfta[i]); + } +} + +static void +igb_if_enable_intr(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + /* With RSS set up what to auto clear */ + if (adapter->intr_type == IFLIB_INTR_MSIX) { + u32 mask = (adapter->que_mask | adapter->link_mask); + + device_printf(iflib_get_dev(ctx), "enable_mask:%x\n", mask); + E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); + E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); + E1000_WRITE_REG(&adapter->hw, E1000_IMS, + E1000_IMS_LSC); + } else { + E1000_WRITE_REG(&adapter->hw, E1000_IMS, + IMS_ENABLE_MASK); + } + E1000_WRITE_FLUSH(&adapter->hw); + + return; +} + +static void +igb_if_disable_intr(if_ctx_t ctx) +{ + + struct adapter *adapter = iflib_get_softc(ctx); + if (adapter->intr_type == IFLIB_INTR_MSIX) { + E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); + E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); + } + E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); + E1000_WRITE_FLUSH(&adapter->hw); + return; +} + +/* + * Bit of a misnomer, what this really means is + * to enable OS management of the system... aka + * to disable special hardware management features + */ +static void +igb_init_manageability(struct adapter *adapter) +{ + if (adapter->has_manage) { + int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); + int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); + + /* disable hardware interception of ARP */ + manc &= ~(E1000_MANC_ARP_EN); + + /* enable receiving management packets to the host */ + manc |= E1000_MANC_EN_MNG2HOST; + manc2h |= 1 << 5; /* Mng Port 623 */ + manc2h |= 1 << 6; /* Mng Port 664 */ + E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); + E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); + } +} + +/* + * Give control back to hardware management + * controller if there is one. + */ +static void +igb_release_manageability(struct adapter *adapter) +{ + if (adapter->has_manage) { + int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); + + /* re-enable hardware interception of ARP */ + manc |= E1000_MANC_ARP_EN; + manc &= ~E1000_MANC_EN_MNG2HOST; + + E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); + } +} + +/* + * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that + * the driver is loaded. + * + */ +static void +igb_get_hw_control(struct adapter *adapter) +{ + u32 ctrl_ext; + + if (adapter->vf_ifp) + return; + + /* Let firmware know the driver has taken over */ + ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); + E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, + ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); +} + +/* + * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that the + * driver is no longer loaded. + * + */ +static void +igb_release_hw_control(struct adapter *adapter) +{ + u32 ctrl_ext; + + if (adapter->vf_ifp) + return; + + /* Let firmware taken over control of h/w */ + ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); + E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, + ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); +} + +static int +igb_is_valid_ether_addr(uint8_t *addr) +{ + char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; + + if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { + return (FALSE); + } + + return (TRUE); +} + + +/* + * Enable PCI Wake On Lan capability + */ +static void +igb_enable_wakeup(device_t dev) +{ + u16 cap, status; + u8 id; + + /* First find the capabilities pointer*/ + cap = pci_read_config(dev, PCIR_CAP_PTR, 2); + /* Read the PM Capabilities */ + id = pci_read_config(dev, cap, 1); + if (id != PCIY_PMG) /* Something wrong */ + return; + /* OK, we have the power capabilities, so + now get the status register */ + cap += PCIR_POWER_STATUS; + status = pci_read_config(dev, cap, 2); + status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; + pci_write_config(dev, cap, status, 2); + return; +} + +static void +igb_if_led_func(if_ctx_t ctx, int onoff) +{ + struct adapter *adapter = iflib_get_softc(ctx); + + if (onoff) { + e1000_setup_led(&adapter->hw); + e1000_led_on(&adapter->hw); + } else { + e1000_led_off(&adapter->hw); + e1000_cleanup_led(&adapter->hw); + } +} + +static uint64_t +igb_get_vf_counter(if_ctx_t ctx, ift_counter cnt) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + struct e1000_vf_stats *stats; + + stats = (struct e1000_vf_stats *)adapter->stats; + + switch (cnt) { + case IFCOUNTER_IPACKETS: + return (stats->gprc); + case IFCOUNTER_OPACKETS: + return (stats->gptc); + case IFCOUNTER_IBYTES: + return (stats->gorc); + case IFCOUNTER_OBYTES: + return (stats->gotc); + case IFCOUNTER_IMCASTS: + return (stats->mprc); + case IFCOUNTER_IERRORS: + return (adapter->dropped_pkts); + case IFCOUNTER_OERRORS: + return (adapter->watchdog_events); + default: + return (if_get_counter_default(ifp, cnt)); + } +} + +static uint64_t +igb_if_get_counter(if_ctx_t ctx, ift_counter cnt) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + struct e1000_hw_stats *stats; + + if (adapter->vf_ifp) + return (igb_get_vf_counter(ctx, cnt)); + + stats = (struct e1000_hw_stats *)adapter->stats; + + switch (cnt) { + case IFCOUNTER_IPACKETS: + return (stats->gprc); + case IFCOUNTER_OPACKETS: + return (stats->gptc); + case IFCOUNTER_IBYTES: + return (stats->gorc); + case IFCOUNTER_OBYTES: + return (stats->gotc); + case IFCOUNTER_IMCASTS: + return (stats->mprc); + case IFCOUNTER_OMCASTS: + return (stats->mptc); + case IFCOUNTER_IERRORS: + return (adapter->dropped_pkts + stats->rxerrc + + stats->crcerrs + stats->algnerrc + + stats->ruc + stats->roc + stats->cexterr); + case IFCOUNTER_OERRORS: + return (stats->ecol + stats->latecol + + adapter->watchdog_events); + case IFCOUNTER_COLLISIONS: + return (stats->colc); + case IFCOUNTER_IQDROPS: + return (stats->mpc); + default: + return (if_get_counter_default(ifp, cnt)); + } +} + +/********************************************************************** + * + * Update the board statistics counters. + * + **********************************************************************/ +static void +igb_update_stats_counters(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct e1000_hw_stats *stats; + + /* + ** The virtual function adapter has only a + ** small controlled set of stats, do only + ** those and return. + */ + if (adapter->vf_ifp) { + igb_update_vf_stats_counters(adapter); + return; + } + + stats = (struct e1000_hw_stats *)adapter->stats; + + if (adapter->hw.phy.media_type == e1000_media_type_copper || + (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { + stats->symerrs += + E1000_READ_REG(hw,E1000_SYMERRS); + stats->sec += E1000_READ_REG(hw, E1000_SEC); + } + + stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); + stats->mpc += E1000_READ_REG(hw, E1000_MPC); + stats->scc += E1000_READ_REG(hw, E1000_SCC); + stats->ecol += E1000_READ_REG(hw, E1000_ECOL); + + stats->mcc += E1000_READ_REG(hw, E1000_MCC); + stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); + stats->colc += E1000_READ_REG(hw, E1000_COLC); + stats->dc += E1000_READ_REG(hw, E1000_DC); + stats->rlec += E1000_READ_REG(hw, E1000_RLEC); + stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); + stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); + /* + ** For watchdog management we need to know if we have been + ** paused during the last interval, so capture that here. + */ + adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); + stats->xoffrxc += adapter->pause_frames; + stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); + stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); + stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); + stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); + stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); + stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); + stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); + stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); + stats->gprc += E1000_READ_REG(hw, E1000_GPRC); + stats->bprc += E1000_READ_REG(hw, E1000_BPRC); + stats->mprc += E1000_READ_REG(hw, E1000_MPRC); + stats->gptc += E1000_READ_REG(hw, E1000_GPTC); + + /* For the 64-bit byte counters the low dword must be read first. */ + /* Both registers clear on the read of the high dword */ + + stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + + ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32); + stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + + ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32); + + stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); + stats->ruc += E1000_READ_REG(hw, E1000_RUC); + stats->rfc += E1000_READ_REG(hw, E1000_RFC); + stats->roc += E1000_READ_REG(hw, E1000_ROC); + stats->rjc += E1000_READ_REG(hw, E1000_RJC); + + stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC); + stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC); + stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC); + + stats->tor += E1000_READ_REG(hw, E1000_TORL) + + ((u64)E1000_READ_REG(hw, E1000_TORH) << 32); + stats->tot += E1000_READ_REG(hw, E1000_TOTL) + + ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32); + + stats->tpr += E1000_READ_REG(hw, E1000_TPR); + stats->tpt += E1000_READ_REG(hw, E1000_TPT); + stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); + stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); + stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); + stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); + stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); + stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); + stats->mptc += E1000_READ_REG(hw, E1000_MPTC); + stats->bptc += E1000_READ_REG(hw, E1000_BPTC); + + /* Interrupt Counts */ + + stats->iac += E1000_READ_REG(hw, E1000_IAC); + stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); + stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); + stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); + stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); + stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); + stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); + stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); + stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); + + /* Host to Card Statistics */ + + stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); + stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); + stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); + stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); + stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); + stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); + stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); + stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + + ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32)); + stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + + ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); + stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); + stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); + stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); + + stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); + stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); + stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); + stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); + stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); + stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); + + /* Driver specific counters */ + adapter->device_control = E1000_READ_REG(hw, E1000_CTRL); + adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL); + adapter->int_mask = E1000_READ_REG(hw, E1000_IMS); + adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS); + adapter->packet_buf_alloc_tx = + ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); + adapter->packet_buf_alloc_rx = + (E1000_READ_REG(hw, E1000_PBA) & 0xffff); +} + + +/********************************************************************** + * + * Initialize the VF board statistics counters. + * + **********************************************************************/ +static void +igb_vf_init_stats(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct e1000_vf_stats *stats; + + stats = (struct e1000_vf_stats *)adapter->stats; + if (stats == NULL) + return; + stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); + stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); + stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); + stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); + stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); +} + +/********************************************************************** + * + * Update the VF board statistics counters. + * + **********************************************************************/ +static void +igb_update_vf_stats_counters(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct e1000_vf_stats *stats; + + if (adapter->link_speed == 0) + return; + + stats = (struct e1000_vf_stats *)adapter->stats; + + UPDATE_VF_REG(E1000_VFGPRC, + stats->last_gprc, stats->gprc); + UPDATE_VF_REG(E1000_VFGORC, + stats->last_gorc, stats->gorc); + UPDATE_VF_REG(E1000_VFGPTC, + stats->last_gptc, stats->gptc); + UPDATE_VF_REG(E1000_VFGOTC, + stats->last_gotc, stats->gotc); + UPDATE_VF_REG(E1000_VFMPRC, + stats->last_mprc, stats->mprc); +} + +/* Export a single 32-bit register via a read-only sysctl. */ +static int +igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter; + u_int val; + + adapter = oidp->oid_arg1; + val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); + return (sysctl_handle_int(oidp, &val, 0, req)); +} + +/* +** Tuneable interrupt rate handler +*/ +static int +igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) +{ + struct igb_rx_queue *que = ((struct igb_rx_queue *)oidp->oid_arg1); + int error; + u32 reg, usec, rate; + + reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix)); + usec = ((reg & 0x7FFC) >> 2); + if (usec > 0) + rate = 1000000 / usec; + else + rate = 0; + error = sysctl_handle_int(oidp, &rate, 0, req); + if (error || !req->newptr) + return error; + return 0; +} + +/* + * Add sysctl variables, one per statistic, to the system. + */ +static void +igb_add_hw_stats(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct igb_rx_queue *rx_que; + struct igb_tx_queue *tx_que; + int i; + + struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); + struct sysctl_oid *tree = device_get_sysctl_tree(dev); + struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); + struct e1000_hw_stats *stats = adapter->stats; + + struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node; + struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list; + +#define QUEUE_NAME_LEN 32 + char namebuf[QUEUE_NAME_LEN]; + + /* Driver Statistics */ + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", + CTLFLAG_RD, &adapter->dropped_pkts, + "Driver dropped packets"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", + CTLFLAG_RD, &adapter->rx_overruns, + "RX overruns"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", + CTLFLAG_RD, &adapter->watchdog_events, + "Watchdog timeouts"); + + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", + CTLFLAG_RD, &adapter->device_control, + "Device Control Register"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", + CTLFLAG_RD, &adapter->rx_control, + "Receiver Control Register"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", + CTLFLAG_RD, &adapter->int_mask, + "Interrupt Mask"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", + CTLFLAG_RD, &adapter->eint_mask, + "Extended Interrupt Mask"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", + CTLFLAG_RD, &adapter->packet_buf_alloc_tx, + "Transmit Buffer Packet Allocation"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", + CTLFLAG_RD, &adapter->packet_buf_alloc_rx, + "Receive Buffer Packet Allocation"); + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", + CTLFLAG_RD, &adapter->hw.fc.high_water, 0, + "Flow Control High Watermark"); + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", + CTLFLAG_RD, &adapter->hw.fc.low_water, 0, + "Flow Control Low Watermark"); + + for (i = 0, tx_que = adapter->tx_queues; i < adapter->tx_num_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; + + snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); + queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, + CTLFLAG_RD, NULL, "Queue Name"); + queue_list = SYSCTL_CHILDREN(queue_node); + + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", + CTLTYPE_UINT | CTLFLAG_RD, &adapter->tx_queues[i], + sizeof(&adapter->tx_queues[i]), + igb_sysctl_interrupt_rate_handler, + "IU", "Interrupt Rate"); + + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", + CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), + igb_sysctl_reg_handler, "IU", + "Transmit Descriptor Head"); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", + CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), + igb_sysctl_reg_handler, "IU", + "Transmit Descriptor Tail"); + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", + CTLFLAG_RD, &txr->no_desc_avail, + "Queue Descriptors Unavailable"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", + CTLFLAG_RD, &txr->total_packets, + "Queue Packets Transmitted"); + + } + + + for (i = 0, rx_que = adapter->rx_queues; i < adapter->rx_num_queues; i++, rx_que++) { + struct rx_ring *rxr = &rx_que->rxr; + struct lro_ctrl *lro = &rxr->lro; + + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", + CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), + igb_sysctl_reg_handler, "IU", + "Receive Descriptor Head"); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", + CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), + igb_sysctl_reg_handler, "IU", + "Receive Descriptor Tail"); + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", + CTLFLAG_RD, &rxr->rx_packets, + "Queue Packets Received"); + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", + CTLFLAG_RD, &rxr->rx_bytes, + "Queue Bytes Received"); + SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", + CTLFLAG_RD, &lro->lro_queued, 0, + "LRO Queued"); + SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", + CTLFLAG_RD, &lro->lro_flushed, 0, + "LRO Flushed"); + } + + /* MAC stats get their own sub node */ + + stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", + CTLFLAG_RD, NULL, "MAC Statistics"); + stat_list = SYSCTL_CHILDREN(stat_node); + + /* + ** VF adapter has a very limited set of stats + ** since its not managing the metal, so to speak. + */ + if (adapter->vf_ifp) { + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", + CTLFLAG_RD, &stats->gprc, + "Good Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", + CTLFLAG_RD, &stats->gptc, + "Good Packets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", + CTLFLAG_RD, &stats->gorc, + "Good Octets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", + CTLFLAG_RD, &stats->gotc, + "Good Octets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", + CTLFLAG_RD, &stats->mprc, + "Multicast Packets Received"); + return; + } + + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", + CTLFLAG_RD, &stats->ecol, + "Excessive collisions"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", + CTLFLAG_RD, &stats->scc, + "Single collisions"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", + CTLFLAG_RD, &stats->mcc, + "Multiple collisions"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", + CTLFLAG_RD, &stats->latecol, + "Late collisions"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", + CTLFLAG_RD, &stats->colc, + "Collision Count"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors", + CTLFLAG_RD, &stats->symerrs, + "Symbol Errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors", + CTLFLAG_RD, &stats->sec, + "Sequence Errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count", + CTLFLAG_RD, &stats->dc, + "Defer Count"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets", + CTLFLAG_RD, &stats->mpc, + "Missed Packets"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors", + CTLFLAG_RD, &stats->rlec, + "Receive Length Errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", + CTLFLAG_RD, &stats->rnbc, + "Receive No Buffers"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize", + CTLFLAG_RD, &stats->ruc, + "Receive Undersize"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", + CTLFLAG_RD, &stats->rfc, + "Fragmented Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize", + CTLFLAG_RD, &stats->roc, + "Oversized Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber", + CTLFLAG_RD, &stats->rjc, + "Recevied Jabber"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs", + CTLFLAG_RD, &stats->rxerrc, + "Receive Errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs", + CTLFLAG_RD, &stats->crcerrs, + "CRC errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs", + CTLFLAG_RD, &stats->algnerrc, + "Alignment Errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs", + CTLFLAG_RD, &stats->tncrs, + "Transmit with No CRS"); + /* On 82575 these are collision counts */ + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", + CTLFLAG_RD, &stats->cexterr, + "Collision/Carrier extension errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd", + CTLFLAG_RD, &stats->xonrxc, + "XON Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd", + CTLFLAG_RD, &stats->xontxc, + "XON Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", + CTLFLAG_RD, &stats->xoffrxc, + "XOFF Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd", + CTLFLAG_RD, &stats->xofftxc, + "XOFF Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd", + CTLFLAG_RD, &stats->fcruc, + "Unsupported Flow Control Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd", + CTLFLAG_RD, &stats->mgprc, + "Management Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop", + CTLFLAG_RD, &stats->mgpdc, + "Management Packets Dropped"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd", + CTLFLAG_RD, &stats->mgptc, + "Management Packets Transmitted"); + /* Packet Reception Stats */ + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", + CTLFLAG_RD, &stats->tpr, + "Total Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", + CTLFLAG_RD, &stats->gprc, + "Good Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", + CTLFLAG_RD, &stats->bprc, + "Broadcast Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", + CTLFLAG_RD, &stats->mprc, + "Multicast Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", + CTLFLAG_RD, &stats->prc64, + "64 byte frames received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", + CTLFLAG_RD, &stats->prc127, + "65-127 byte frames received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", + CTLFLAG_RD, &stats->prc255, + "128-255 byte frames received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", + CTLFLAG_RD, &stats->prc511, + "256-511 byte frames received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", + CTLFLAG_RD, &stats->prc1023, + "512-1023 byte frames received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", + CTLFLAG_RD, &stats->prc1522, + "1023-1522 byte frames received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", + CTLFLAG_RD, &stats->gorc, + "Good Octets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", + CTLFLAG_RD, &stats->tor, + "Total Octets Received"); + + /* Packet Transmission Stats */ + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", + CTLFLAG_RD, &stats->gotc, + "Good Octets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", + CTLFLAG_RD, &stats->tot, + "Total Octets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", + CTLFLAG_RD, &stats->tpt, + "Total Packets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", + CTLFLAG_RD, &stats->gptc, + "Good Packets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", + CTLFLAG_RD, &stats->bptc, + "Broadcast Packets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", + CTLFLAG_RD, &stats->mptc, + "Multicast Packets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", + CTLFLAG_RD, &stats->ptc64, + "64 byte frames transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", + CTLFLAG_RD, &stats->ptc127, + "65-127 byte frames transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", + CTLFLAG_RD, &stats->ptc255, + "128-255 byte frames transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", + CTLFLAG_RD, &stats->ptc511, + "256-511 byte frames transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", + CTLFLAG_RD, &stats->ptc1023, + "512-1023 byte frames transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", + CTLFLAG_RD, &stats->ptc1522, + "1024-1522 byte frames transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd", + CTLFLAG_RD, &stats->tsctc, + "TSO Contexts Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", + CTLFLAG_RD, &stats->tsctfc, + "TSO Contexts Failed"); + + + /* Interrupt Stats */ + + int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", + CTLFLAG_RD, NULL, "Interrupt Statistics"); + int_list = SYSCTL_CHILDREN(int_node); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts", + CTLFLAG_RD, &stats->iac, + "Interrupt Assertion Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", + CTLFLAG_RD, &stats->icrxptc, + "Interrupt Cause Rx Pkt Timer Expire Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", + CTLFLAG_RD, &stats->icrxatc, + "Interrupt Cause Rx Abs Timer Expire Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", + CTLFLAG_RD, &stats->ictxptc, + "Interrupt Cause Tx Pkt Timer Expire Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", + CTLFLAG_RD, &stats->ictxatc, + "Interrupt Cause Tx Abs Timer Expire Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", + CTLFLAG_RD, &stats->ictxqec, + "Interrupt Cause Tx Queue Empty Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", + CTLFLAG_RD, &stats->ictxqmtc, + "Interrupt Cause Tx Queue Min Thresh Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", + CTLFLAG_RD, &stats->icrxdmtc, + "Interrupt Cause Rx Desc Min Thresh Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun", + CTLFLAG_RD, &stats->icrxoc, + "Interrupt Cause Receiver Overrun Count"); + + /* Host to Card Stats */ + + host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", + CTLFLAG_RD, NULL, + "Host to Card Statistics"); + + host_list = SYSCTL_CHILDREN(host_node); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt", + CTLFLAG_RD, &stats->cbtmpc, + "Circuit Breaker Tx Packet Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard", + CTLFLAG_RD, &stats->htdpmc, + "Host Transmit Discarded Packets"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt", + CTLFLAG_RD, &stats->rpthc, + "Rx Packets To Host"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts", + CTLFLAG_RD, &stats->cbrmpc, + "Circuit Breaker Rx Packet Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop", + CTLFLAG_RD, &stats->cbrdpc, + "Circuit Breaker Rx Dropped Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt", + CTLFLAG_RD, &stats->hgptc, + "Host Good Packets Tx Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop", + CTLFLAG_RD, &stats->htcbdpc, + "Host Tx Circuit Breaker Dropped Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes", + CTLFLAG_RD, &stats->hgorc, + "Host Good Octets Received Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes", + CTLFLAG_RD, &stats->hgotc, + "Host Good Octets Transmit Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors", + CTLFLAG_RD, &stats->lenerrs, + "Length Errors"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt", + CTLFLAG_RD, &stats->scvpc, + "SerDes/SGMII Code Violation Pkt Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed", + CTLFLAG_RD, &stats->hrmpc, + "Header Redirection Missed Packet Count"); +} + + +/********************************************************************** + * + * This routine provides a way to dump out the adapter eeprom, + * often a useful debug/service tool. This only dumps the first + * 32 words, stuff that matters is in that extent. + * + **********************************************************************/ +static int +igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter; + int error; + int result; + + result = -1; + error = sysctl_handle_int(oidp, &result, 0, req); + + if (error || !req->newptr) + return (error); + + /* + * This value will cause a hex dump of the + * first 32 16-bit words of the EEPROM to + * the screen. + */ + if (result == 1) { + adapter = (struct adapter *)arg1; + igb_print_nvm_info(adapter); + } + + return (error); +} + +static void +igb_print_nvm_info(struct adapter *adapter) +{ + u16 eeprom_data; + int i, j, row = 0; + + /* Its a bit crude, but it gets the job done */ + printf("\nInterface EEPROM Dump:\n"); + printf("Offset\n0x0000 "); + for (i = 0, j = 0; i < 32; i++, j++) { + if (j == 8) { /* Make the offset block */ + j = 0; ++row; + printf("\n0x00%x0 ",row); + } + e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); + printf("%04x ", eeprom_data); + } + printf("\n"); +} + +static void +igb_set_sysctl_value(struct adapter *adapter, const char *name, + const char *description, int *limit, int value) +{ + *limit = value; + SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), + OID_AUTO, name, CTLFLAG_RW, limit, value, description); +} + +/* +** Set flow control using sysctl: +** Flow control values: +** 0 - off +** 1 - rx pause +** 2 - tx pause +** 3 - full +*/ +static int +igb_set_flowcntl(SYSCTL_HANDLER_ARGS) +{ + int error; + static int input = 3; /* default is full */ + struct adapter *adapter = (struct adapter *) arg1; + + error = sysctl_handle_int(oidp, &input, 0, req); + + if ((error) || (req->newptr == NULL)) + return (error); + + switch (input) { + case e1000_fc_rx_pause: + case e1000_fc_tx_pause: + case e1000_fc_full: + case e1000_fc_none: + adapter->hw.fc.requested_mode = input; + adapter->fc = input; + break; + default: + /* Do nothing */ + return (error); + } + + adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; + e1000_force_mac_fc(&adapter->hw); + /* XXX TODO: update DROP_EN on each RX queue if appropriate */ + return (error); +} + +/* +** Manage DMA Coalesce: +** Control values: +** 0/1 - off/on +** Legal timer values are: +** 250,500,1000-10000 in thousands +*/ +static int +igb_sysctl_dmac(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + int error; + + error = sysctl_handle_int(oidp, &adapter->dmac, 0, req); + + if ((error) || (req->newptr == NULL)) + return (error); + + switch (adapter->dmac) { + case 0: + /* Disabling */ + break; + case 1: /* Just enable and use default */ + adapter->dmac = 1000; + break; + case 250: + case 500: + case 1000: + case 2000: + case 3000: + case 4000: + case 5000: + case 6000: + case 7000: + case 8000: + case 9000: + case 10000: + /* Legal values - allow */ + break; + default: + /* Do nothing, illegal value */ + adapter->dmac = 0; + return (EINVAL); + } + /* Reinit the interface */ + igb_if_init(adapter->ctx); + return (error); +} + +/* +** Manage Energy Efficient Ethernet: +** Control values: +** 0/1 - enabled/disabled +*/ +static int +igb_sysctl_eee(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + int error, value; + + value = adapter->hw.dev_spec._82575.eee_disable; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error || req->newptr == NULL) + return (error); + adapter->hw.dev_spec._82575.eee_disable = (value != 0); + igb_if_init(adapter->ctx); + return (0); +} Index: sys/dev/e1000/iflib_igb_txrx.c =================================================================== --- /dev/null +++ sys/dev/e1000/iflib_igb_txrx.c @@ -0,0 +1,592 @@ +#include "if_igb.h" + +#ifdef RSS +#include +#include +#endif + +/********************************************************************* + * Local Function prototypes + *********************************************************************/ +static int igb_isc_txd_encap(void *arg, if_pkt_info_t pi); +static void igb_isc_txd_flush(void *arg, uint16_t txqid, uint32_t pidx); +static int igb_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx, bool clear); + +static void igb_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused, + uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buf_len __unused); +static void igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx); +static int igb_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, int budget); +static int igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); + +static int igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status); +static int igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status); + +static void igb_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype); +static int igb_determine_rsstype(u16 pkt_info); +void igb_init_tx_ring(struct igb_tx_queue *que); + +extern void igb_if_enable_intr(if_ctx_t ctx); +extern int igb_intr(void *arg); + +struct if_txrx igb_txrx = { + igb_isc_txd_encap, + igb_isc_txd_flush, + igb_isc_txd_credits_update, + igb_isc_rxd_available, + igb_isc_rxd_pkt_get, + igb_isc_rxd_refill, + igb_isc_rxd_flush, + igb_intr +}; + +extern if_shared_ctx_t igb_sctx; + +void +igb_init_tx_ring(struct igb_tx_queue *que) +{ + struct adapter *adapter = que->adapter; + if_softc_ctx_t scctx = adapter->shared; + struct tx_ring *txr = &que->txr; + struct igb_tx_buf *buf; + + buf = txr->tx_buffers; + for (int i = 0; i < scctx->isc_ntxd[0]; i++, buf++) { + buf->eop = NULL; + } +} + +/********************************************************************** + * + * Setup work for hardware segmentation offload (TSO) on + * adapters using advanced tx descriptors + * + **********************************************************************/ +static int +igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status) +{ + struct e1000_adv_tx_context_desc *TXD; + struct adapter *adapter = txr->adapter; + u32 type_tucmd_mlhl = 0, vlan_macip_lens = 0; + u32 mss_l4len_idx = 0; + u32 paylen; + + switch(pi->ipi_etype) { + case ETHERTYPE_IPV6: + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; + break; + case ETHERTYPE_IP: + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; + /* Tell transmit desc to also do IPv4 checksum. */ + *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; + break; + default: + panic("%s: CSUM_TSO but no supported IP version (0x%04x)", + __func__, ntohs(pi->ipi_etype)); + break; + } + + TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx]; + + /* This is used in the transmit desc in encap */ + paylen = pi->ipi_len - pi->ipi_ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen; + + /* VLAN MACLEN IPLEN */ + if (pi->ipi_mflags & M_VLANTAG) { + vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT); + } + + vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= pi->ipi_ip_hlen; + TXD->vlan_macip_lens = htole32(vlan_macip_lens); + + /* ADV DTYPE TUCMD */ + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; + TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); + + /* MSS L4LEN IDX */ + mss_l4len_idx |= (pi->ipi_tso_segsz << E1000_ADVTXD_MSS_SHIFT); + mss_l4len_idx |= (pi->ipi_tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); + /* 82575 needs the queue index added */ + if (adapter->hw.mac.type == e1000_82575) + mss_l4len_idx |= txr->me << 4; + TXD->mss_l4len_idx = htole32(mss_l4len_idx); + + TXD->seqnum_seed = htole32(0); + *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; + + ++txr->tso_tx; + + return 0; +} + +/********************************************************************* + * + * Advanced Context Descriptor setup for VLAN, CSUM or TSO + * + **********************************************************************/ +static int +igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status) +{ + struct e1000_adv_tx_context_desc *TXD; + struct adapter *adapter = txr->adapter; + u32 vlan_macip_lens, type_tucmd_mlhl; + u32 mss_l4len_idx; + mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0; + int offload = TRUE; + + /* First check if TSO is to be used */ + if (pi->ipi_csum_flags & CSUM_TSO) + return (igb_tso_setup(txr, pi, cmd_type_len, olinfo_status)); + + /* Indicate the whole packet as payload when not doing TSO */ + *olinfo_status |= pi->ipi_len << E1000_ADVTXD_PAYLEN_SHIFT; + + /* Now ready a context descriptor */ + TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx]; + + /* + ** In advanced descriptors the vlan tag must + ** be placed into the context descriptor. Hence + ** we need to make one even if not doing offloads. + */ + if (pi->ipi_mflags & M_VLANTAG) { + vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT); + } else if (pi->ipi_csum_flags & CSUM_OFFLOAD) { + return (0); + } + + /* Set the ether header length */ + vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; + + switch(pi->ipi_etype) { + case ETHERTYPE_IP: + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; + break; + case ETHERTYPE_IPV6: + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; + break; + default: + offload = FALSE; + break; + } + + vlan_macip_lens |= pi->ipi_ip_hlen; + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + + switch (pi->ipi_ipproto) { + case IPPROTO_TCP: + #if __FreeBSD_version >= 1000000 + if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) +#else + if (pi->ipi_csum_flags & CSUM_TCP) +#endif + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; + break; + case IPPROTO_UDP: +#if __FreeBSD_version >= 1000000 + if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) +#else + if (pi->ipi_csum_flags & CSUM_UDP) +#endif + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; + break; + +#if __FreeBSD_version >= 800000 + case IPPROTO_SCTP: +#if __FreeBSD_version >= 1000000 + if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) +#else + if (pi->ipi_csum_flags & CSUM_SCTP) +#endif + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; + break; +#endif + default: + offload = FALSE; + break; + } + + if (offload) /* For the TX descriptor setup */ + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + + /* 82575 needs the queue index added */ + if (adapter->hw.mac.type == e1000_82575) + mss_l4len_idx = txr->me << 4; + + /* Now copy bits into descriptor */ + TXD->vlan_macip_lens = htole32(vlan_macip_lens); + TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); + TXD->seqnum_seed = htole32(0); + TXD->mss_l4len_idx = htole32(mss_l4len_idx); + + return (0); +} + +static int +igb_isc_txd_encap(void *arg, if_pkt_info_t pi) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + struct igb_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; + struct tx_ring *txr = &que->txr; + int nsegs = pi->ipi_nsegs; + bus_dma_segment_t *segs = pi->ipi_segs; + struct igb_tx_buf *txbuf; + union e1000_adv_tx_desc *txd = NULL; + + int i, j, first; + u32 olinfo_status = 0, cmd_type_len; + + /* Basic descriptor defines */ + cmd_type_len = (E1000_ADVTXD_DTYP_DATA | + E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); + + if (pi->ipi_mflags & M_VLANTAG) + cmd_type_len |= E1000_ADVTXD_DCMD_VLE; + + first = i = pi->ipi_pidx; + + /* Consume the first descriptor */ + igb_tx_ctx_setup(txr, pi, &cmd_type_len, &olinfo_status); + if (++i == scctx->isc_ntxd[0]) { + i = 0; + } + + /* 82575 needs the queue index added */ + if (sc->hw.mac.type == e1000_82575) + olinfo_status |= txr->me << 4; + + for (j = 0; j < nsegs; j++) { + bus_size_t seglen; + bus_addr_t segaddr; + + txbuf = &txr->tx_buffers[i]; + txd = &txr->tx_base[i]; + seglen = segs[j].ds_len; + segaddr = htole64(segs[j].ds_addr); + + txd->read.buffer_addr = segaddr; + txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | + cmd_type_len | seglen); + txd->read.olinfo_status = htole32(olinfo_status); + + if (++i == scctx->isc_ntxd[0]) { + i = 0; + } + } + + txd->read.cmd_type_len |= + htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); + + /* Set the EOP descriptor that will be marked done */ + txbuf = &txr->tx_buffers[first]; + txbuf->eop = txd; + + pi->ipi_new_pidx = i; + ++txr->total_packets; + + return (0); +} + +static void +igb_isc_txd_flush(void *arg, uint16_t txqid, uint32_t pidx) +{ + struct adapter *adapter = arg; + struct igb_tx_queue *que = &adapter->tx_queues[txqid]; + struct tx_ring *txr = &que->txr; + + E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx); +} + +static int +igb_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx_init, bool clear) +{ + struct adapter *adapter = arg; + if_softc_ctx_t scctx = adapter->shared; + struct igb_tx_queue *que = &adapter->tx_queues[txqid]; + struct tx_ring *txr = &que->txr; + + u32 cidx, ntxd, processed = 0; + + struct igb_tx_buf *buf; + union e1000_adv_tx_desc *txd; + int limit; + + cidx = cidx_init; + + buf = &txr->tx_buffers[cidx]; + txd = &txr->tx_base[cidx]; + ntxd = scctx->isc_ntxd[0]; + limit = adapter->tx_process_limit; + + do { + union e1000_adv_tx_desc *eop = buf->eop; + + if (eop == NULL) /* No work */ + break; + + if ((eop->wb.status & E1000_TXD_STAT_DD) == 0) + break; /* I/O not complete */ + + if (clear) + buf->eop = NULL; /* clear indicate processed */ + + /* We clean the range if multi segment */ + while (txd != eop) { + ++txd; + ++buf; + /* wrap the ring? */ + if (++cidx == scctx->isc_ntxd[0]) { + cidx = 0; + buf = txr->tx_buffers; + txd = txr->tx_base; + } + + buf = &txr->tx_buffers[cidx]; + if (clear) + buf->eop = NULL; + processed++; + } + processed++; + if (clear) + ++txr->packets; + + /* Try the next packet */ + txd++; + buf++; + + /* reset with a wrap */ + if (++cidx == scctx->isc_ntxd[0]) { + cidx = 0; + buf = txr->tx_buffers; + txd = txr->tx_base; + } + prefetch(txd); + prefetch(txd+1); + } while (__predict_true(--limit) && cidx != cidx_init); + + return (processed); +} + +static void igb_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused, + uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, + uint16_t count, uint16_t buf_len __unused) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + struct igb_rx_queue *que = &sc->rx_queues[rxqid]; + struct rx_ring *rxr = &que->rxr; + int i; + uint32_t next_pidx; + + for (i = 0, next_pidx = pidx; i < count; i++) { + rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]); + if (++next_pidx == scctx->isc_nrxd[0]) + next_pidx = 0; + } +} + +static void igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx) +{ + struct adapter *sc = arg; + struct igb_rx_queue *que = &sc->rx_queues[rxqid]; + struct rx_ring *rxr = &que->rxr; + + E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx); +} + +static int igb_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, int budget) +{ + struct adapter *sc = arg; + struct igb_rx_queue *que = &sc->rx_queues[rxqid]; + struct rx_ring *rxr = &que->rxr; + union e1000_adv_rx_desc *rxd; + u32 staterr = 0; + int cnt, i, nrxd; + + nrxd = sc->shared->isc_nrxd[0]; + for (cnt = 0, i = idx; cnt < nrxd-1 && cnt <= budget;) { + rxd = &rxr->rx_base[i]; + staterr = le32toh(rxd->wb.upper.status_error); + + if ((staterr & E1000_RXD_STAT_DD) == 0) + break; + if (++i == nrxd) + i = 0; + if (staterr & E1000_RXD_STAT_EOP) + cnt++; + } + + return (cnt); +} + +/**************************************************************** + * Routine sends data which has been dma'ed into host memory + * to upper layer. Initialize ri structure. + * + * Returns 0 upon success, errno on failure + ***************************************************************/ + +static int +igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) +{ + struct adapter *adapter = arg; + if_softc_ctx_t scctx = adapter->shared; + struct igb_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; + struct rx_ring *rxr = &que->rxr; + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); + union e1000_adv_rx_desc *rxd; + + u16 pkt_info, len; + u16 vtag = 0; + u32 ptype; + u32 staterr = 0; + bool eop; + int i = 0; + int cidx = ri->iri_cidx; + + do { + rxd = &rxr->rx_base[cidx]; + staterr = le32toh(rxd->wb.upper.status_error); + pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info); + + MPASS ((staterr & E1000_RXD_STAT_DD) != 0); + + len = le16toh(rxd->wb.upper.length); + ptype = le32toh(rxd->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; + + ri->iri_len += len; + rxr->bytes += ri->iri_len; + rxr->rx_bytes += ri->iri_len; + + rxd->wb.upper.status_error = 0; + eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); + + if (((adapter->hw.mac.type == e1000_i350) || + (adapter->hw.mac.type == e1000_i354)) && + (staterr & E1000_RXDEXT_STATERR_LB)) + vtag = be16toh(rxd->wb.upper.vlan); + else + vtag = le16toh(rxd->wb.upper.vlan); + + /* Make sure bad packets are discarded */ + if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) { + adapter->dropped_pkts++; + ++rxr->rx_discarded; + return (EBADMSG); + } + ri->iri_frags[i].irf_flid = 0; + ri->iri_frags[i].irf_idx = cidx; + ri->iri_frags[i].irf_len = len; + + if (++cidx == scctx->isc_nrxd[0]) + cidx = 0; + + if (rxr->hdr_split == TRUE) { + ri->iri_frags[i].irf_flid = 1; + ri->iri_frags[i].irf_idx = cidx; + if (++cidx == scctx->isc_nrxd[0]) + cidx = 0; + } + i++; + } while (!eop); + + rxr->packets++; + rxr->rx_packets++; + + if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) + igb_rx_checksum(staterr, ri, ptype); + + if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && + (staterr & E1000_RXD_STAT_VP) != 0) { + ri->iri_vtag = vtag; + ri->iri_flags |= M_VLANTAG; + } + ri->iri_flowid = + le32toh(rxd->wb.lower.hi_dword.rss); + ri->iri_rsstype = igb_determine_rsstype(pkt_info); + ri->iri_nfrags = i; + + return (0); +} + +/********************************************************************* + * + * Verify that the hardware indicated that the checksum is valid. + * Inform the stack about the status of checksum so that stack + * doesn't spend time verifying the checksum. + * + *********************************************************************/ +static void +igb_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype) +{ + u16 status = (u16)staterr; + u8 errors = (u8) (staterr >> 24); + bool sctp = FALSE; + + /* Ignore Checksum bit is set */ + if (status & E1000_RXD_STAT_IXSM) { + ri->iri_csum_flags = 0; + return; + } + + if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && + (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) + sctp = 1; + else + sctp = 0; + + if (status & E1000_RXD_STAT_IPCS) { + /* Did it pass? */ + if (!(errors & E1000_RXD_ERR_IPE)) { + /* IP Checksum Good */ + ri->iri_csum_flags = CSUM_IP_CHECKED; + ri->iri_csum_flags |= CSUM_IP_VALID; + } else + ri->iri_csum_flags = 0; + } + + if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { + u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); +#if __FreeBSD_version >= 800000 + if (sctp) /* reassign */ + type = CSUM_SCTP_VALID; +#endif + /* Did it pass? */ + if (!(errors & E1000_RXD_ERR_TCPE)) { + ri->iri_csum_flags |= type; + if (sctp == 0) + ri->iri_csum_data = htons(0xffff); + } + } + return; +} + +/******************************************************************** + * + * Parse the packet type to determine the appropriate hash + * + ******************************************************************/ +static int +igb_determine_rsstype(u16 pkt_info) +{ + switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { + case E1000_RXDADV_RSSTYPE_IPV4_TCP: + return M_HASHTYPE_RSS_TCP_IPV4; + case E1000_RXDADV_RSSTYPE_IPV4: + return M_HASHTYPE_RSS_IPV4; + case E1000_RXDADV_RSSTYPE_IPV6_TCP: + return M_HASHTYPE_RSS_TCP_IPV6; + case E1000_RXDADV_RSSTYPE_IPV6_EX: + return M_HASHTYPE_RSS_IPV6_EX; + case E1000_RXDADV_RSSTYPE_IPV6: + return M_HASHTYPE_RSS_IPV6; + case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: + return M_HASHTYPE_RSS_TCP_IPV6_EX; + default: + return M_HASHTYPE_OPAQUE; + } +} Index: sys/dev/e1000/legacy_if_igb.h =================================================================== --- /dev/null +++ sys/dev/e1000/legacy_if_igb.h @@ -0,0 +1,634 @@ +/****************************************************************************** + + Copyright (c) 2001-2015, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/*$FreeBSD$*/ + +#ifndef _IF_IGB_H_ +#define _IF_IGB_H_ + +#ifdef ALTQ +#define IGB_LEGACY_TX +#endif + +#include +#include +#ifndef IGB_LEGACY_TX +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#ifdef RSS +#include +#include +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "e1000_api.h" +#include "e1000_82575.h" + +/* Tunables */ +/* + * IGB_TXD: Maximum number of Transmit Descriptors + * + * This value is the number of transmit descriptors allocated by the driver. + * Increasing this value allows the driver to queue more transmits. Each + * descriptor is 16 bytes. + * Since TDLEN should be multiple of 128bytes, the number of transmit + * desscriptors should meet the following condition. + * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 + */ +#define IGB_MIN_TXD 256 +#define IGB_DEFAULT_TXD 1024 +#define IGB_MAX_TXD 4096 + +/* + * IGB_RXD: Maximum number of Receive Descriptors + * + * This value is the number of receive descriptors allocated by the driver. + * Increasing this value allows the driver to buffer more incoming packets. + * Each descriptor is 16 bytes. A receive buffer is also allocated for each + * descriptor. The maximum MTU size is 16110. + * Since TDLEN should be multiple of 128bytes, the number of transmit + * desscriptors should meet the following condition. + * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 + */ +#define IGB_MIN_RXD 256 +#define IGB_DEFAULT_RXD 1024 +#define IGB_MAX_RXD 4096 + +/* + * IGB_TIDV - Transmit Interrupt Delay Value + * Valid Range: 0-65535 (0=off) + * Default Value: 64 + * This value delays the generation of transmit interrupts in units of + * 1.024 microseconds. Transmit interrupt reduction can improve CPU + * efficiency if properly tuned for specific network traffic. If the + * system is reporting dropped transmits, this value may be set too high + * causing the driver to run out of available transmit descriptors. + */ +#define IGB_TIDV 64 + +/* + * IGB_TADV - Transmit Absolute Interrupt Delay Value + * Valid Range: 0-65535 (0=off) + * Default Value: 64 + * This value, in units of 1.024 microseconds, limits the delay in which a + * transmit interrupt is generated. Useful only if IGB_TIDV is non-zero, + * this value ensures that an interrupt is generated after the initial + * packet is sent on the wire within the set amount of time. Proper tuning, + * along with IGB_TIDV, may improve traffic throughput in specific + * network conditions. + */ +#define IGB_TADV 64 + +/* + * IGB_RDTR - Receive Interrupt Delay Timer (Packet Timer) + * Valid Range: 0-65535 (0=off) + * Default Value: 0 + * This value delays the generation of receive interrupts in units of 1.024 + * microseconds. Receive interrupt reduction can improve CPU efficiency if + * properly tuned for specific network traffic. Increasing this value adds + * extra latency to frame reception and can end up decreasing the throughput + * of TCP traffic. If the system is reporting dropped receives, this value + * may be set too high, causing the driver to run out of available receive + * descriptors. + * + * CAUTION: When setting IGB_RDTR to a value other than 0, adapters + * may hang (stop transmitting) under certain network conditions. + * If this occurs a WATCHDOG message is logged in the system + * event log. In addition, the controller is automatically reset, + * restoring the network connection. To eliminate the potential + * for the hang ensure that IGB_RDTR is set to 0. + */ +#define IGB_RDTR 0 + +/* + * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) + * Valid Range: 0-65535 (0=off) + * Default Value: 64 + * This value, in units of 1.024 microseconds, limits the delay in which a + * receive interrupt is generated. Useful only if IGB_RDTR is non-zero, + * this value ensures that an interrupt is generated after the initial + * packet is received within the set amount of time. Proper tuning, + * along with IGB_RDTR, may improve traffic throughput in specific network + * conditions. + */ +#define IGB_RADV 64 + +/* + * This parameter controls the duration of transmit watchdog timer. + */ +#define IGB_WATCHDOG (10 * hz) + +/* + * This parameter controls when the driver calls the routine to reclaim + * transmit descriptors. Cleaning earlier seems a win. + */ +#define IGB_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 2) + +/* + * This parameter controls whether or not autonegotation is enabled. + * 0 - Disable autonegotiation + * 1 - Enable autonegotiation + */ +#define DO_AUTO_NEG 1 + +/* + * This parameter control whether or not the driver will wait for + * autonegotiation to complete. + * 1 - Wait for autonegotiation to complete + * 0 - Don't wait for autonegotiation to complete + */ +#define WAIT_FOR_AUTO_NEG_DEFAULT 0 + +/* Tunables -- End */ + +#define AUTONEG_ADV_DEFAULT (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ + ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ + ADVERTISE_1000_FULL) + +#define AUTO_ALL_MODES 0 + +/* PHY master/slave setting */ +#define IGB_MASTER_SLAVE e1000_ms_hw_default + +/* Support AutoMediaDetect for Marvell M88 PHY in i354 */ +#define IGB_MEDIA_RESET (1 << 0) + +/* + * Micellaneous constants + */ +#define IGB_INTEL_VENDOR_ID 0x8086 + +#define IGB_JUMBO_PBA 0x00000028 +#define IGB_DEFAULT_PBA 0x00000030 +#define IGB_SMARTSPEED_DOWNSHIFT 3 +#define IGB_SMARTSPEED_MAX 15 +#define IGB_MAX_LOOP 10 + +#define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : \ + ((hw->mac.type <= e1000_82576) ? 16 : 8)) +#define IGB_RX_HTHRESH 8 +#define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ + adapter->msix_mem) ? 1 : 4) + +#define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) +#define IGB_TX_HTHRESH 1 +#define IGB_TX_WTHRESH ((hw->mac.type != e1000_82575 && \ + adapter->msix_mem) ? 1 : 16) + +#define MAX_NUM_MULTICAST_ADDRESSES 128 +#define PCI_ANY_ID (~0U) +#define ETHER_ALIGN 2 +#define IGB_TX_BUFFER_SIZE ((uint32_t) 1514) +#define IGB_FC_PAUSE_TIME 0x0680 +#define IGB_EEPROM_APME 0x400; +/* Queue minimum free for use */ +#define IGB_QUEUE_THRESHOLD (adapter->num_tx_desc / 8) + +/* + * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be + * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will + * also optimize cache line size effect. H/W supports up to cache line size 128. + */ +#define IGB_DBA_ALIGN 128 + +#define SPEED_MODE_BIT (1<<21) /* On PCI-E MACs only */ + +/* PCI Config defines */ +#define IGB_MSIX_BAR 3 + +/* Defines for printing debug information */ +#define DEBUG_INIT 0 +#define DEBUG_IOCTL 0 +#define DEBUG_HW 0 + +#define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") +#define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) +#define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) +#define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") +#define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) +#define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) +#define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") +#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) +#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) + +#define IGB_MAX_SCATTER 40 +#define IGB_VFTA_SIZE 128 +#define IGB_BR_SIZE 4096 /* ring buf size */ +#define IGB_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) +#define IGB_TSO_SEG_SIZE 4096 /* Max dma segment size */ +#define IGB_TXPBSIZE 20408 +#define IGB_HDR_BUF 128 +#define IGB_PKTTYPE_MASK 0x0000FFF0 +#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ +#define ETH_ZLEN 60 +#define ETH_ADDR_LEN 6 + +/* Offload bits in mbuf flag */ +#if __FreeBSD_version >= 1000000 +#define CSUM_OFFLOAD_IPV4 (CSUM_IP|CSUM_IP_TCP|CSUM_IP_UDP|CSUM_IP_SCTP) +#define CSUM_OFFLOAD_IPV6 (CSUM_IP6_TCP|CSUM_IP6_UDP|CSUM_IP6_SCTP) +#define CSUM_OFFLOAD (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6) +#elif __FreeBSD_version >= 800000 +#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP) +#else +#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP) +#endif + +/* Define the starting Interrupt rate per Queue */ +#define IGB_INTS_PER_SEC 8000 +#define IGB_DEFAULT_ITR ((1000000/IGB_INTS_PER_SEC) << 2) + +#define IGB_LINK_ITR 2000 +#define I210_LINK_DELAY 1000 + +/* Precision Time Sync (IEEE 1588) defines */ +#define ETHERTYPE_IEEE1588 0x88F7 +#define PICOSECS_PER_TICK 20833 +#define TSYNC_PORT 319 /* UDP port for the protocol */ + +/* + * Bus dma allocation structure used by + * e1000_dma_malloc and e1000_dma_free. + */ +struct igb_dma_alloc { + bus_addr_t dma_paddr; + caddr_t dma_vaddr; + bus_dma_tag_t dma_tag; + bus_dmamap_t dma_map; + bus_dma_segment_t dma_seg; + int dma_nseg; +}; + + +/* +** Driver queue struct: this is the interrupt container +** for the associated tx and rx ring. +*/ +struct igb_queue { + struct adapter *adapter; + u32 msix; /* This queue's MSIX vector */ + u32 eims; /* This queue's EIMS bit */ + u32 eitr_setting; + struct resource *res; + void *tag; + struct tx_ring *txr; + struct rx_ring *rxr; + struct task que_task; + struct taskqueue *tq; + u64 irqs; +}; + +/* + * The transmit ring, one per queue + */ +struct tx_ring { + struct adapter *adapter; + struct mtx tx_mtx; + u32 me; + int watchdog_time; + union e1000_adv_tx_desc *tx_base; + struct igb_tx_buf *tx_buffers; + struct igb_dma_alloc txdma; + volatile u16 tx_avail; + u16 next_avail_desc; + u16 next_to_clean; + u16 num_desc; + enum { + IGB_QUEUE_IDLE = 1, + IGB_QUEUE_WORKING = 2, + IGB_QUEUE_HUNG = 4, + IGB_QUEUE_DEPLETED = 8, + } queue_status; + u32 txd_cmd; + bus_dma_tag_t txtag; + char mtx_name[16]; +#ifndef IGB_LEGACY_TX + struct buf_ring *br; + struct task txq_task; +#endif + u32 bytes; /* used for AIM */ + u32 packets; + /* Soft Stats */ + unsigned long tso_tx; + unsigned long no_tx_map_avail; + unsigned long no_tx_dma_setup; + u64 no_desc_avail; + u64 total_packets; +}; + +/* + * Receive ring: one per queue + */ +struct rx_ring { + struct adapter *adapter; + u32 me; + struct igb_dma_alloc rxdma; + union e1000_adv_rx_desc *rx_base; + struct lro_ctrl lro; + bool lro_enabled; + bool hdr_split; + struct mtx rx_mtx; + char mtx_name[16]; + u32 next_to_refresh; + u32 next_to_check; + struct igb_rx_buf *rx_buffers; + bus_dma_tag_t htag; /* dma tag for rx head */ + bus_dma_tag_t ptag; /* dma tag for rx packet */ + /* + * First/last mbuf pointers, for + * collecting multisegment RX packets. + */ + struct mbuf *fmp; + struct mbuf *lmp; + + u32 bytes; + u32 packets; + int rdt; + int rdh; + + /* Soft stats */ + u64 rx_split_packets; + u64 rx_discarded; + u64 rx_packets; + u64 rx_bytes; +}; + +struct adapter { + struct ifnet *ifp; + struct e1000_hw hw; + + struct e1000_osdep osdep; + device_t dev; + struct cdev *led_dev; + + struct resource *pci_mem; + struct resource *msix_mem; + int memrid; + + /* + * Interrupt resources: this set is + * either used for legacy, or for Link + * when doing MSIX + */ + void *tag; + struct resource *res; + + struct ifmedia media; + struct callout timer; + int msix; + int if_flags; + int pause_frames; + + struct mtx core_mtx; + + eventhandler_tag vlan_attach; + eventhandler_tag vlan_detach; + + u16 num_vlans; + u16 num_queues; + + /* + ** Shadow VFTA table, this is needed because + ** the real vlan filter table gets cleared during + ** a soft reset and the driver needs to be able + ** to repopulate it. + */ + u32 shadow_vfta[IGB_VFTA_SIZE]; + + /* Info about the interface */ + u32 optics; + u32 fc; /* local flow ctrl setting */ + int advertise; /* link speeds */ + bool link_active; + u16 max_frame_size; + u16 num_segs; + u16 link_speed; + bool link_up; + u32 linkvec; + u16 link_duplex; + u32 dmac; + int link_mask; + + /* Flags */ + u32 flags; + + /* Mbuf cluster size */ + u32 rx_mbuf_sz; + + /* Support for pluggable optics */ + bool sfp_probe; + struct task link_task; /* Link tasklet */ + struct task mod_task; /* SFP tasklet */ + struct task msf_task; /* Multispeed Fiber */ + struct taskqueue *tq; + + /* + ** Queues: + ** This is the irq holder, it has + ** and RX/TX pair or rings associated + ** with it. + */ + struct igb_queue *queues; + + /* + * Transmit rings: + * Allocated at run time, an array of rings. + */ + struct tx_ring *tx_rings; + u32 num_tx_desc; + + /* + * Receive rings: + * Allocated at run time, an array of rings. + */ + struct rx_ring *rx_rings; + u64 que_mask; + u32 num_rx_desc; + + /* Multicast array memory */ + u8 *mta; + + /* Misc stats maintained by the driver */ + unsigned long device_control; + unsigned long dropped_pkts; + unsigned long eint_mask; + unsigned long int_mask; + unsigned long link_irq; + unsigned long mbuf_defrag_failed; + unsigned long no_tx_dma_setup; + unsigned long packet_buf_alloc_rx; + unsigned long packet_buf_alloc_tx; + unsigned long rx_control; + unsigned long rx_overruns; + unsigned long watchdog_events; + + /* Used in pf and vf */ + void *stats; + + int enable_aim; + int has_manage; + int wol; + int rx_process_limit; + int tx_process_limit; + u16 vf_ifp; /* a VF interface */ + bool in_detach; /* Used only in igb_ioctl */ + +}; + +/* ****************************************************************************** + * vendor_info_array + * + * This array contains the list of Subvendor/Subdevice IDs on which the driver + * should load. + * + * ******************************************************************************/ +typedef struct _igb_vendor_info_t { + unsigned int vendor_id; + unsigned int device_id; + unsigned int subvendor_id; + unsigned int subdevice_id; + unsigned int index; +} igb_vendor_info_t; + +struct igb_tx_buf { + union e1000_adv_tx_desc *eop; + struct mbuf *m_head; + bus_dmamap_t map; +}; + +struct igb_rx_buf { + struct mbuf *m_head; + struct mbuf *m_pack; + bus_dmamap_t hmap; /* bus_dma map for header */ + bus_dmamap_t pmap; /* bus_dma map for packet */ +}; + +/* +** Find the number of unrefreshed RX descriptors +*/ +static inline u16 +igb_rx_unrefreshed(struct rx_ring *rxr) +{ + struct adapter *adapter = rxr->adapter; + + if (rxr->next_to_check > rxr->next_to_refresh) + return (rxr->next_to_check - rxr->next_to_refresh - 1); + else + return ((adapter->num_rx_desc + rxr->next_to_check) - + rxr->next_to_refresh - 1); +} + +#define IGB_CORE_LOCK_INIT(_sc, _name) \ + mtx_init(&(_sc)->core_mtx, _name, "IGB Core Lock", MTX_DEF) +#define IGB_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) +#define IGB_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) +#define IGB_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) +#define IGB_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) + +#define IGB_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) +#define IGB_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) +#define IGB_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) +#define IGB_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) +#define IGB_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) + +#define IGB_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) +#define IGB_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) +#define IGB_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) +#define IGB_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_mtx, MA_OWNED) + +#define UPDATE_VF_REG(reg, last, cur) \ +{ \ + u32 new = E1000_READ_REG(hw, reg); \ + if (new < last) \ + cur += 0x100000000LL; \ + last = new; \ + cur &= 0xFFFFFFFF00000000LL; \ + cur |= new; \ +} + +#if __FreeBSD_version >= 800000 && __FreeBSD_version < 800504 +static __inline int +drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br) +{ +#ifdef ALTQ + if (ALTQ_IS_ENABLED(&ifp->if_snd)) + return (1); +#endif + return (!buf_ring_empty(br)); +} +#endif + +#endif /* _IF_IGB_H_ */ + + Index: sys/dev/e1000/legacy_if_igb.c =================================================================== --- /dev/null +++ sys/dev/e1000/legacy_if_igb.c @@ -0,0 +1,6441 @@ +/****************************************************************************** + + Copyright (c) 2001-2015, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/*$FreeBSD$*/ + + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_rss.h" + +#ifdef HAVE_KERNEL_OPTION_HEADERS +#include "opt_device_polling.h" +#include "opt_altq.h" +#endif + +#include "legacy_if_igb.h" + +/********************************************************************* + * Driver version: + *********************************************************************/ +char igb_driver_version[] = "2.5.3-k"; + + +/********************************************************************* + * PCI Device ID Table + * + * Used by probe to select devices to load on + * Last field stores an index into e1000_strings + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } + *********************************************************************/ + +static igb_vendor_info_t igb_vendor_info_array[] = +{ + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0}, + {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, 0, 0, 0}, + /* required last entry */ + {0, 0, 0, 0, 0} +}; + +/********************************************************************* + * Table of branding strings for all supported NICs. + *********************************************************************/ + +static char *igb_strings[] = { + "Intel(R) PRO/1000 Network Connection" +}; + +/********************************************************************* + * Function prototypes + *********************************************************************/ +static int igb_probe(device_t); +static int igb_attach(device_t); +static int igb_detach(device_t); +static int igb_shutdown(device_t); +static int igb_suspend(device_t); +static int igb_resume(device_t); +#ifndef IGB_LEGACY_TX +static int igb_mq_start(struct ifnet *, struct mbuf *); +static int igb_mq_start_locked(struct ifnet *, struct tx_ring *); +static void igb_qflush(struct ifnet *); +static void igb_deferred_mq_start(void *, int); +#else +static void igb_start(struct ifnet *); +static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); +#endif +static int igb_ioctl(struct ifnet *, u_long, caddr_t); +static uint64_t igb_get_counter(if_t, ift_counter); +static void igb_init(void *); +static void igb_init_locked(struct adapter *); +static void igb_stop(void *); +static void igb_media_status(struct ifnet *, struct ifmediareq *); +static int igb_media_change(struct ifnet *); +static void igb_identify_hardware(struct adapter *); +static int igb_allocate_pci_resources(struct adapter *); +static int igb_allocate_msix(struct adapter *); +static int igb_allocate_legacy(struct adapter *); +static int igb_setup_msix(struct adapter *); +static void igb_free_pci_resources(struct adapter *); +static void igb_local_timer(void *); +static void igb_reset(struct adapter *); +static int igb_setup_interface(device_t, struct adapter *); +static int igb_allocate_queues(struct adapter *); +static void igb_configure_queues(struct adapter *); + +static int igb_allocate_transmit_buffers(struct tx_ring *); +static void igb_setup_transmit_structures(struct adapter *); +static void igb_setup_transmit_ring(struct tx_ring *); +static void igb_initialize_transmit_units(struct adapter *); +static void igb_free_transmit_structures(struct adapter *); +static void igb_free_transmit_buffers(struct tx_ring *); + +static int igb_allocate_receive_buffers(struct rx_ring *); +static int igb_setup_receive_structures(struct adapter *); +static int igb_setup_receive_ring(struct rx_ring *); +static void igb_initialize_receive_units(struct adapter *); +static void igb_free_receive_structures(struct adapter *); +static void igb_free_receive_buffers(struct rx_ring *); +static void igb_free_receive_ring(struct rx_ring *); + +static void igb_enable_intr(struct adapter *); +static void igb_disable_intr(struct adapter *); +static void igb_update_stats_counters(struct adapter *); +static bool igb_txeof(struct tx_ring *); + +static __inline void igb_rx_discard(struct rx_ring *, int); +static __inline void igb_rx_input(struct rx_ring *, + struct ifnet *, struct mbuf *, u32); + +static bool igb_rxeof(struct igb_queue *, int, int *); +static void igb_rx_checksum(u32, struct mbuf *, u32); +static int igb_tx_ctx_setup(struct tx_ring *, + struct mbuf *, u32 *, u32 *); +static int igb_tso_setup(struct tx_ring *, + struct mbuf *, u32 *, u32 *); +static void igb_set_promisc(struct adapter *); +static void igb_disable_promisc(struct adapter *); +static void igb_set_multi(struct adapter *); +static void igb_update_link_status(struct adapter *); +static void igb_refresh_mbufs(struct rx_ring *, int); + +static void igb_register_vlan(void *, struct ifnet *, u16); +static void igb_unregister_vlan(void *, struct ifnet *, u16); +static void igb_setup_vlan_hw_support(struct adapter *); + +static int igb_xmit(struct tx_ring *, struct mbuf **); +static int igb_dma_malloc(struct adapter *, bus_size_t, + struct igb_dma_alloc *, int); +static void igb_dma_free(struct adapter *, struct igb_dma_alloc *); +static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); +static void igb_print_nvm_info(struct adapter *); +static int igb_is_valid_ether_addr(u8 *); +static void igb_add_hw_stats(struct adapter *); + +static void igb_vf_init_stats(struct adapter *); +static void igb_update_vf_stats_counters(struct adapter *); + +/* Management and WOL Support */ +static void igb_init_manageability(struct adapter *); +static void igb_release_manageability(struct adapter *); +static void igb_get_hw_control(struct adapter *); +static void igb_release_hw_control(struct adapter *); +static void igb_enable_wakeup(device_t); +static void igb_led_func(void *, int); + +static int igb_irq_fast(void *); +static void igb_msix_que(void *); +static void igb_msix_link(void *); +static void igb_handle_que(void *context, int pending); +static void igb_handle_link(void *context, int pending); +static void igb_handle_link_locked(struct adapter *); + +static void igb_set_sysctl_value(struct adapter *, const char *, + const char *, int *, int); +static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS); +static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS); +static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS); + +#ifdef DEVICE_POLLING +static poll_handler_t igb_poll; +#endif /* POLLING */ + +/********************************************************************* + * FreeBSD Device Interface Entry Points + *********************************************************************/ + +static device_method_t igb_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, igb_probe), + DEVMETHOD(device_attach, igb_attach), + DEVMETHOD(device_detach, igb_detach), + DEVMETHOD(device_shutdown, igb_shutdown), + DEVMETHOD(device_suspend, igb_suspend), + DEVMETHOD(device_resume, igb_resume), + DEVMETHOD_END +}; + +static driver_t igb_driver = { + "igb", igb_methods, sizeof(struct adapter), +}; + +static devclass_t igb_devclass; +DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); +MODULE_DEPEND(igb, pci, 1, 1, 1); +MODULE_DEPEND(igb, ether, 1, 1, 1); +#ifdef DEV_NETMAP +MODULE_DEPEND(igb, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ + +/********************************************************************* + * Tunable default values. + *********************************************************************/ + +static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters"); + +/* Descriptor defaults */ +static int igb_rxd = IGB_DEFAULT_RXD; +static int igb_txd = IGB_DEFAULT_TXD; +SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0, + "Number of receive descriptors per queue"); +SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0, + "Number of transmit descriptors per queue"); + +/* +** AIM: Adaptive Interrupt Moderation +** which means that the interrupt rate +** is varied over time based on the +** traffic for that interrupt vector +*/ +static int igb_enable_aim = TRUE; +SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0, + "Enable adaptive interrupt moderation"); + +/* + * MSIX should be the default for best performance, + * but this allows it to be forced off for testing. + */ +static int igb_enable_msix = 1; +SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0, + "Enable MSI-X interrupts"); + +/* +** Tuneable Interrupt rate +*/ +static int igb_max_interrupt_rate = 8000; +SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, + &igb_max_interrupt_rate, 0, "Maximum interrupts per second"); + +#ifndef IGB_LEGACY_TX +/* +** Tuneable number of buffers in the buf-ring (drbr_xxx) +*/ +static int igb_buf_ring_size = IGB_BR_SIZE; +SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN, + &igb_buf_ring_size, 0, "Size of the bufring"); +#endif + +/* +** Header split causes the packet header to +** be dma'd to a separate mbuf from the payload. +** this can have memory alignment benefits. But +** another plus is that small packets often fit +** into the header and thus use no cluster. Its +** a very workload dependent type feature. +*/ +static int igb_header_split = FALSE; +SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0, + "Enable receive mbuf header split"); + +/* +** This will autoconfigure based on the +** number of CPUs and max supported +** MSIX messages if left at 0. +*/ +static int igb_num_queues = 0; +SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0, + "Number of queues to configure, 0 indicates autoconfigure"); + +/* +** Global variable to store last used CPU when binding queues +** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a +** queue is bound to a cpu. +*/ +static int igb_last_bind_cpu = -1; + +/* How many packets rxeof tries to clean at a time */ +static int igb_rx_process_limit = 100; +SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, + &igb_rx_process_limit, 0, + "Maximum number of received packets to process at a time, -1 means unlimited"); + +/* How many packets txeof tries to clean at a time */ +static int igb_tx_process_limit = -1; +SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, + &igb_tx_process_limit, 0, + "Maximum number of sent packets to process at a time, -1 means unlimited"); + +#ifdef DEV_NETMAP /* see ixgbe.c for details */ +#include +#endif /* DEV_NETMAP */ +/********************************************************************* + * Device identification routine + * + * igb_probe determines if the driver should be loaded on + * adapter based on PCI vendor/device id of the adapter. + * + * return BUS_PROBE_DEFAULT on success, positive on failure + *********************************************************************/ + +static int +igb_probe(device_t dev) +{ + char adapter_name[256]; + uint16_t pci_vendor_id = 0; + uint16_t pci_device_id = 0; + uint16_t pci_subvendor_id = 0; + uint16_t pci_subdevice_id = 0; + igb_vendor_info_t *ent; + + INIT_DEBUGOUT("igb_probe: begin"); + + pci_vendor_id = pci_get_vendor(dev); + if (pci_vendor_id != IGB_INTEL_VENDOR_ID) + return (ENXIO); + + pci_device_id = pci_get_device(dev); + pci_subvendor_id = pci_get_subvendor(dev); + pci_subdevice_id = pci_get_subdevice(dev); + + ent = igb_vendor_info_array; + while (ent->vendor_id != 0) { + if ((pci_vendor_id == ent->vendor_id) && + (pci_device_id == ent->device_id) && + + ((pci_subvendor_id == ent->subvendor_id) || + (ent->subvendor_id == 0)) && + + ((pci_subdevice_id == ent->subdevice_id) || + (ent->subdevice_id == 0))) { + sprintf(adapter_name, "%s, Version - %s", + igb_strings[ent->index], + igb_driver_version); + device_set_desc_copy(dev, adapter_name); + return (BUS_PROBE_DEFAULT); + } + ent++; + } + return (ENXIO); +} + +/********************************************************************* + * Device initialization routine + * + * The attach entry point is called when the driver is being loaded. + * This routine identifies the type of hardware, allocates all resources + * and initializes the hardware. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +igb_attach(device_t dev) +{ + struct adapter *adapter; + int error = 0; + u16 eeprom_data; + + INIT_DEBUGOUT("igb_attach: begin"); + + if (resource_disabled("igb", device_get_unit(dev))) { + device_printf(dev, "Disabled by device hint\n"); + return (ENXIO); + } + + adapter = device_get_softc(dev); + adapter->dev = adapter->osdep.dev = dev; + IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); + + /* SYSCTLs */ + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, + igb_sysctl_nvm_info, "I", "NVM Information"); + + igb_set_sysctl_value(adapter, "enable_aim", + "Interrupt Moderation", &adapter->enable_aim, + igb_enable_aim); + + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, + adapter, 0, igb_set_flowcntl, "I", "Flow Control"); + + callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); + + /* Determine hardware and mac info */ + igb_identify_hardware(adapter); + + /* Setup PCI resources */ + if (igb_allocate_pci_resources(adapter)) { + device_printf(dev, "Allocation of PCI resources failed\n"); + error = ENXIO; + goto err_pci; + } + + /* Do Shared Code initialization */ + if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { + device_printf(dev, "Setup of Shared code failed\n"); + error = ENXIO; + goto err_pci; + } + + e1000_get_bus_info(&adapter->hw); + + /* Sysctls for limiting the amount of work done in the taskqueues */ + igb_set_sysctl_value(adapter, "rx_processing_limit", + "max number of rx packets to process", + &adapter->rx_process_limit, igb_rx_process_limit); + + igb_set_sysctl_value(adapter, "tx_processing_limit", + "max number of tx packets to process", + &adapter->tx_process_limit, igb_tx_process_limit); + + /* + * Validate number of transmit and receive descriptors. It + * must not exceed hardware maximum, and must be multiple + * of E1000_DBA_ALIGN. + */ + if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 || + (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) { + device_printf(dev, "Using %d TX descriptors instead of %d!\n", + IGB_DEFAULT_TXD, igb_txd); + adapter->num_tx_desc = IGB_DEFAULT_TXD; + } else + adapter->num_tx_desc = igb_txd; + if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 || + (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) { + device_printf(dev, "Using %d RX descriptors instead of %d!\n", + IGB_DEFAULT_RXD, igb_rxd); + adapter->num_rx_desc = IGB_DEFAULT_RXD; + } else + adapter->num_rx_desc = igb_rxd; + + adapter->hw.mac.autoneg = DO_AUTO_NEG; + adapter->hw.phy.autoneg_wait_to_complete = FALSE; + adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; + + /* Copper options */ + if (adapter->hw.phy.media_type == e1000_media_type_copper) { + adapter->hw.phy.mdix = AUTO_ALL_MODES; + adapter->hw.phy.disable_polarity_correction = FALSE; + adapter->hw.phy.ms_type = IGB_MASTER_SLAVE; + } + + /* + * Set the frame limits assuming + * standard ethernet sized frames. + */ + adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; + + /* + ** Allocate and Setup Queues + */ + if (igb_allocate_queues(adapter)) { + error = ENOMEM; + goto err_pci; + } + + /* Allocate the appropriate stats memory */ + if (adapter->vf_ifp) { + adapter->stats = + (struct e1000_vf_stats *)malloc(sizeof \ + (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO); + igb_vf_init_stats(adapter); + } else + adapter->stats = + (struct e1000_hw_stats *)malloc(sizeof \ + (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO); + if (adapter->stats == NULL) { + device_printf(dev, "Can not allocate stats memory\n"); + error = ENOMEM; + goto err_late; + } + + /* Allocate multicast array memory. */ + adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * + MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); + if (adapter->mta == NULL) { + device_printf(dev, "Can not allocate multicast setup array\n"); + error = ENOMEM; + goto err_late; + } + + /* Some adapter-specific advanced features */ + if (adapter->hw.mac.type >= e1000_i350) { + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW, + adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce"); + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW, + adapter, 0, igb_sysctl_eee, "I", + "Disable Energy Efficient Ethernet"); + if (adapter->hw.phy.media_type == e1000_media_type_copper) { + if (adapter->hw.mac.type == e1000_i354) + e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); + else + e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); + } + } + + /* + ** Start from a known state, this is + ** important in reading the nvm and + ** mac from that. + */ + e1000_reset_hw(&adapter->hw); + + /* Make sure we have a good EEPROM before we read from it */ + if (((adapter->hw.mac.type != e1000_i210) && + (adapter->hw.mac.type != e1000_i211)) && + (e1000_validate_nvm_checksum(&adapter->hw) < 0)) { + /* + ** Some PCI-E parts fail the first check due to + ** the link being in sleep state, call it again, + ** if it fails a second time its a real issue. + */ + if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { + device_printf(dev, + "The EEPROM Checksum Is Not Valid\n"); + error = EIO; + goto err_late; + } + } + + /* + ** Copy the permanent MAC address out of the EEPROM + */ + if (e1000_read_mac_addr(&adapter->hw) < 0) { + device_printf(dev, "EEPROM read error while reading MAC" + " address\n"); + error = EIO; + goto err_late; + } + /* Check its sanity */ + if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) { + device_printf(dev, "Invalid MAC address\n"); + error = EIO; + goto err_late; + } + + /* Setup OS specific network interface */ + if (igb_setup_interface(dev, adapter) != 0) + goto err_late; + + /* Now get a good starting state */ + igb_reset(adapter); + + /* Initialize statistics */ + igb_update_stats_counters(adapter); + + adapter->hw.mac.get_link_status = 1; + igb_update_link_status(adapter); + + /* Indicate SOL/IDER usage */ + if (e1000_check_reset_block(&adapter->hw)) + device_printf(dev, + "PHY reset is blocked due to SOL/IDER session.\n"); + + /* Determine if we have to control management hardware */ + adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); + + /* + * Setup Wake-on-Lan + */ + /* APME bit in EEPROM is mapped to WUC.APME */ + eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME; + if (eeprom_data) + adapter->wol = E1000_WUFC_MAG; + + /* Register for VLAN events */ + adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, + igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); + adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, + igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); + + igb_add_hw_stats(adapter); + + /* Tell the stack that the interface is not active */ + adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE; + + adapter->led_dev = led_create(igb_led_func, adapter, + device_get_nameunit(dev)); + + /* + ** Configure Interrupts + */ + if ((adapter->msix > 1) && (igb_enable_msix)) + error = igb_allocate_msix(adapter); + else /* MSI or Legacy */ + error = igb_allocate_legacy(adapter); + if (error) + goto err_late; + +#ifdef DEV_NETMAP + igb_netmap_attach(adapter); +#endif /* DEV_NETMAP */ + INIT_DEBUGOUT("igb_attach: end"); + + return (0); + +err_late: + if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */ + return(error); + igb_free_transmit_structures(adapter); + igb_free_receive_structures(adapter); + igb_release_hw_control(adapter); +err_pci: + igb_free_pci_resources(adapter); + if (adapter->ifp != NULL) + if_free(adapter->ifp); + free(adapter->mta, M_DEVBUF); + IGB_CORE_LOCK_DESTROY(adapter); + + return (error); +} + +/********************************************************************* + * Device removal routine + * + * The detach entry point is called when the driver is being removed. + * This routine stops the adapter and deallocates all the resources + * that were allocated for driver operation. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +igb_detach(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + struct ifnet *ifp = adapter->ifp; + + INIT_DEBUGOUT("igb_detach: begin"); + + /* Make sure VLANS are not using driver */ + if (adapter->ifp->if_vlantrunk != NULL) { + device_printf(dev,"Vlan in use, detach first\n"); + return (EBUSY); + } + + ether_ifdetach(adapter->ifp); + + if (adapter->led_dev != NULL) + led_destroy(adapter->led_dev); + +#ifdef DEVICE_POLLING + if (ifp->if_capenable & IFCAP_POLLING) + ether_poll_deregister(ifp); +#endif + + IGB_CORE_LOCK(adapter); + adapter->in_detach = 1; + igb_stop(adapter); + IGB_CORE_UNLOCK(adapter); + + e1000_phy_hw_reset(&adapter->hw); + + /* Give control back to firmware */ + igb_release_manageability(adapter); + igb_release_hw_control(adapter); + + if (adapter->wol) { + E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); + E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); + igb_enable_wakeup(dev); + } + + /* Unregister VLAN events */ + if (adapter->vlan_attach != NULL) + EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); + if (adapter->vlan_detach != NULL) + EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); + + callout_drain(&adapter->timer); + +#ifdef DEV_NETMAP + netmap_detach(adapter->ifp); +#endif /* DEV_NETMAP */ + igb_free_pci_resources(adapter); + bus_generic_detach(dev); + if_free(ifp); + + igb_free_transmit_structures(adapter); + igb_free_receive_structures(adapter); + if (adapter->mta != NULL) + free(adapter->mta, M_DEVBUF); + + IGB_CORE_LOCK_DESTROY(adapter); + + return (0); +} + +/********************************************************************* + * + * Shutdown entry point + * + **********************************************************************/ + +static int +igb_shutdown(device_t dev) +{ + return igb_suspend(dev); +} + +/* + * Suspend/resume device methods. + */ +static int +igb_suspend(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + + IGB_CORE_LOCK(adapter); + + igb_stop(adapter); + + igb_release_manageability(adapter); + igb_release_hw_control(adapter); + + if (adapter->wol) { + E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); + E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); + igb_enable_wakeup(dev); + } + + IGB_CORE_UNLOCK(adapter); + + return bus_generic_suspend(dev); +} + +static int +igb_resume(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + struct tx_ring *txr = adapter->tx_rings; + struct ifnet *ifp = adapter->ifp; + + IGB_CORE_LOCK(adapter); + igb_init_locked(adapter); + igb_init_manageability(adapter); + + if ((ifp->if_flags & IFF_UP) && + (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { + for (int i = 0; i < adapter->num_queues; i++, txr++) { + IGB_TX_LOCK(txr); +#ifndef IGB_LEGACY_TX + /* Process the stack queue only if not depleted */ + if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && + !drbr_empty(ifp, txr->br)) + igb_mq_start_locked(ifp, txr); +#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + igb_start_locked(txr, ifp); +#endif + IGB_TX_UNLOCK(txr); + } + } + IGB_CORE_UNLOCK(adapter); + + return bus_generic_resume(dev); +} + + +#ifdef IGB_LEGACY_TX + +/********************************************************************* + * Transmit entry point + * + * igb_start is called by the stack to initiate a transmit. + * The driver will remain in this routine as long as there are + * packets to transmit and transmit resources are available. + * In case resources are not available stack is notified and + * the packet is requeued. + **********************************************************************/ + +static void +igb_start_locked(struct tx_ring *txr, struct ifnet *ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct mbuf *m_head; + + IGB_TX_LOCK_ASSERT(txr); + + if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return; + if (!adapter->link_active) + return; + + /* Call cleanup if number of TX descriptors low */ + if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) + igb_txeof(txr); + + while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + if (txr->tx_avail <= IGB_MAX_SCATTER) { + txr->queue_status |= IGB_QUEUE_DEPLETED; + break; + } + IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); + if (m_head == NULL) + break; + /* + * Encapsulation can modify our pointer, and or make it + * NULL on failure. In that event, we can't requeue. + */ + if (igb_xmit(txr, &m_head)) { + if (m_head != NULL) + IFQ_DRV_PREPEND(&ifp->if_snd, m_head); + if (txr->tx_avail <= IGB_MAX_SCATTER) + txr->queue_status |= IGB_QUEUE_DEPLETED; + break; + } + + /* Send a copy of the frame to the BPF listener */ + ETHER_BPF_MTAP(ifp, m_head); + + /* Set watchdog on */ + txr->watchdog_time = ticks; + txr->queue_status |= IGB_QUEUE_WORKING; + } +} + +/* + * Legacy TX driver routine, called from the + * stack, always uses tx[0], and spins for it. + * Should not be used with multiqueue tx + */ +static void +igb_start(struct ifnet *ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct tx_ring *txr = adapter->tx_rings; + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + IGB_TX_LOCK(txr); + igb_start_locked(txr, ifp); + IGB_TX_UNLOCK(txr); + } + return; +} + +#else /* ~IGB_LEGACY_TX */ + +/* +** Multiqueue Transmit Entry: +** quick turnaround to the stack +** +*/ +static int +igb_mq_start(struct ifnet *ifp, struct mbuf *m) +{ + struct adapter *adapter = ifp->if_softc; + struct igb_queue *que; + struct tx_ring *txr; + int i, err = 0; +#ifdef RSS + uint32_t bucket_id; +#endif + + /* Which queue to use */ + /* + * When doing RSS, map it to the same outbound queue + * as the incoming flow would be mapped to. + * + * If everything is setup correctly, it should be the + * same bucket that the current CPU we're on is. + */ + if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { +#ifdef RSS + if (rss_hash2bucket(m->m_pkthdr.flowid, + M_HASHTYPE_GET(m), &bucket_id) == 0) { + /* XXX TODO: spit out something if bucket_id > num_queues? */ + i = bucket_id % adapter->num_queues; + } else { +#endif + i = m->m_pkthdr.flowid % adapter->num_queues; +#ifdef RSS + } +#endif + } else { + i = curcpu % adapter->num_queues; + } + txr = &adapter->tx_rings[i]; + que = &adapter->queues[i]; + + err = drbr_enqueue(ifp, txr->br, m); + if (err) + return (err); + if (IGB_TX_TRYLOCK(txr)) { + igb_mq_start_locked(ifp, txr); + IGB_TX_UNLOCK(txr); + } else + taskqueue_enqueue(que->tq, &txr->txq_task); + + return (0); +} + +static int +igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; + struct mbuf *next; + int err = 0, enq = 0; + + IGB_TX_LOCK_ASSERT(txr); + + if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || + adapter->link_active == 0) + return (ENETDOWN); + + /* Process the queue */ + while ((next = drbr_peek(ifp, txr->br)) != NULL) { + if ((err = igb_xmit(txr, &next)) != 0) { + if (next == NULL) { + /* It was freed, move forward */ + drbr_advance(ifp, txr->br); + } else { + /* + * Still have one left, it may not be + * the same since the transmit function + * may have changed it. + */ + drbr_putback(ifp, txr->br, next); + } + break; + } + drbr_advance(ifp, txr->br); + enq++; + if (next->m_flags & M_MCAST && adapter->vf_ifp) + if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); + ETHER_BPF_MTAP(ifp, next); + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + break; + } + if (enq > 0) { + /* Set the watchdog */ + txr->queue_status |= IGB_QUEUE_WORKING; + txr->watchdog_time = ticks; + } + if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) + igb_txeof(txr); + if (txr->tx_avail <= IGB_MAX_SCATTER) + txr->queue_status |= IGB_QUEUE_DEPLETED; + return (err); +} + +/* + * Called from a taskqueue to drain queued transmit packets. + */ +static void +igb_deferred_mq_start(void *arg, int pending) +{ + struct tx_ring *txr = arg; + struct adapter *adapter = txr->adapter; + struct ifnet *ifp = adapter->ifp; + + IGB_TX_LOCK(txr); + if (!drbr_empty(ifp, txr->br)) + igb_mq_start_locked(ifp, txr); + IGB_TX_UNLOCK(txr); +} + +/* +** Flush all ring buffers +*/ +static void +igb_qflush(struct ifnet *ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct tx_ring *txr = adapter->tx_rings; + struct mbuf *m; + + for (int i = 0; i < adapter->num_queues; i++, txr++) { + IGB_TX_LOCK(txr); + while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) + m_freem(m); + IGB_TX_UNLOCK(txr); + } + if_qflush(ifp); +} +#endif /* ~IGB_LEGACY_TX */ + +/********************************************************************* + * Ioctl entry point + * + * igb_ioctl is called when the user wants to configure the + * interface. + * + * return 0 on success, positive on failure + **********************************************************************/ + +static int +igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) +{ + struct adapter *adapter = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; +#if defined(INET) || defined(INET6) + struct ifaddr *ifa = (struct ifaddr *)data; +#endif + bool avoid_reset = FALSE; + int error = 0; + + if (adapter->in_detach) + return (error); + + switch (command) { + case SIOCSIFADDR: +#ifdef INET + if (ifa->ifa_addr->sa_family == AF_INET) + avoid_reset = TRUE; +#endif +#ifdef INET6 + if (ifa->ifa_addr->sa_family == AF_INET6) + avoid_reset = TRUE; +#endif + /* + ** Calling init results in link renegotiation, + ** so we avoid doing it when possible. + */ + if (avoid_reset) { + ifp->if_flags |= IFF_UP; + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) + igb_init(adapter); +#ifdef INET + if (!(ifp->if_flags & IFF_NOARP)) + arp_ifinit(ifp, ifa); +#endif + } else + error = ether_ioctl(ifp, command, data); + break; + case SIOCSIFMTU: + { + int max_frame_size; + + IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); + + IGB_CORE_LOCK(adapter); + max_frame_size = 9234; + if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - + ETHER_CRC_LEN) { + IGB_CORE_UNLOCK(adapter); + error = EINVAL; + break; + } + + ifp->if_mtu = ifr->ifr_mtu; + adapter->max_frame_size = + ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + igb_init_locked(adapter); + IGB_CORE_UNLOCK(adapter); + break; + } + case SIOCSIFFLAGS: + IOCTL_DEBUGOUT("ioctl rcv'd:\ + SIOCSIFFLAGS (Set Interface Flags)"); + IGB_CORE_LOCK(adapter); + if (ifp->if_flags & IFF_UP) { + if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { + if ((ifp->if_flags ^ adapter->if_flags) & + (IFF_PROMISC | IFF_ALLMULTI)) { + igb_disable_promisc(adapter); + igb_set_promisc(adapter); + } + } else + igb_init_locked(adapter); + } else + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + igb_stop(adapter); + adapter->if_flags = ifp->if_flags; + IGB_CORE_UNLOCK(adapter); + break; + case SIOCADDMULTI: + case SIOCDELMULTI: + IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + IGB_CORE_LOCK(adapter); + igb_disable_intr(adapter); + igb_set_multi(adapter); +#ifdef DEVICE_POLLING + if (!(ifp->if_capenable & IFCAP_POLLING)) +#endif + igb_enable_intr(adapter); + IGB_CORE_UNLOCK(adapter); + } + break; + case SIOCSIFMEDIA: + /* Check SOL/IDER usage */ + IGB_CORE_LOCK(adapter); + if (e1000_check_reset_block(&adapter->hw)) { + IGB_CORE_UNLOCK(adapter); + device_printf(adapter->dev, "Media change is" + " blocked due to SOL/IDER session.\n"); + break; + } + IGB_CORE_UNLOCK(adapter); + case SIOCGIFMEDIA: + IOCTL_DEBUGOUT("ioctl rcv'd: \ + SIOCxIFMEDIA (Get/Set Interface Media)"); + error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); + break; + case SIOCSIFCAP: + { + int mask, reinit; + + IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); + reinit = 0; + mask = ifr->ifr_reqcap ^ ifp->if_capenable; +#ifdef DEVICE_POLLING + if (mask & IFCAP_POLLING) { + if (ifr->ifr_reqcap & IFCAP_POLLING) { + error = ether_poll_register(igb_poll, ifp); + if (error) + return (error); + IGB_CORE_LOCK(adapter); + igb_disable_intr(adapter); + ifp->if_capenable |= IFCAP_POLLING; + IGB_CORE_UNLOCK(adapter); + } else { + error = ether_poll_deregister(ifp); + /* Enable interrupt even in error case */ + IGB_CORE_LOCK(adapter); + igb_enable_intr(adapter); + ifp->if_capenable &= ~IFCAP_POLLING; + IGB_CORE_UNLOCK(adapter); + } + } +#endif +#if __FreeBSD_version >= 1000000 + /* HW cannot turn these on/off separately */ + if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { + ifp->if_capenable ^= IFCAP_RXCSUM; + ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; + reinit = 1; + } + if (mask & IFCAP_TXCSUM) { + ifp->if_capenable ^= IFCAP_TXCSUM; + reinit = 1; + } + if (mask & IFCAP_TXCSUM_IPV6) { + ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; + reinit = 1; + } +#else + if (mask & IFCAP_HWCSUM) { + ifp->if_capenable ^= IFCAP_HWCSUM; + reinit = 1; + } +#endif + if (mask & IFCAP_TSO4) { + ifp->if_capenable ^= IFCAP_TSO4; + reinit = 1; + } + if (mask & IFCAP_TSO6) { + ifp->if_capenable ^= IFCAP_TSO6; + reinit = 1; + } + if (mask & IFCAP_VLAN_HWTAGGING) { + ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; + reinit = 1; + } + if (mask & IFCAP_VLAN_HWFILTER) { + ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; + reinit = 1; + } + if (mask & IFCAP_VLAN_HWTSO) { + ifp->if_capenable ^= IFCAP_VLAN_HWTSO; + reinit = 1; + } + if (mask & IFCAP_LRO) { + ifp->if_capenable ^= IFCAP_LRO; + reinit = 1; + } + if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) + igb_init(adapter); + VLAN_CAPABILITIES(ifp); + break; + } + + default: + error = ether_ioctl(ifp, command, data); + break; + } + + return (error); +} + + +/********************************************************************* + * Init entry point + * + * This routine is used in two ways. It is used by the stack as + * init entry point in network interface structure. It is also used + * by the driver as a hw/sw initialization routine to get to a + * consistent state. + * + * return 0 on success, positive on failure + **********************************************************************/ + +static void +igb_init_locked(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + device_t dev = adapter->dev; + + INIT_DEBUGOUT("igb_init: begin"); + + IGB_CORE_LOCK_ASSERT(adapter); + + igb_disable_intr(adapter); + callout_stop(&adapter->timer); + + /* Get the latest mac address, User can use a LAA */ + bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr, + ETHER_ADDR_LEN); + + /* Put the address into the Receive Address Array */ + e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); + + igb_reset(adapter); + igb_update_link_status(adapter); + + E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); + + /* Set hardware offload abilities */ + ifp->if_hwassist = 0; + if (ifp->if_capenable & IFCAP_TXCSUM) { +#if __FreeBSD_version >= 1000000 + ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP); + if (adapter->hw.mac.type != e1000_82575) + ifp->if_hwassist |= CSUM_IP_SCTP; +#else + ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); +#if __FreeBSD_version >= 800000 + if (adapter->hw.mac.type != e1000_82575) + ifp->if_hwassist |= CSUM_SCTP; +#endif +#endif + } + +#if __FreeBSD_version >= 1000000 + if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) { + ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP); + if (adapter->hw.mac.type != e1000_82575) + ifp->if_hwassist |= CSUM_IP6_SCTP; + } +#endif + if (ifp->if_capenable & IFCAP_TSO) + ifp->if_hwassist |= CSUM_TSO; + + /* Clear bad data from Rx FIFOs */ + e1000_rx_fifo_flush_82575(&adapter->hw); + + /* Configure for OS presence */ + igb_init_manageability(adapter); + + /* Prepare transmit descriptors and buffers */ + igb_setup_transmit_structures(adapter); + igb_initialize_transmit_units(adapter); + + /* Setup Multicast table */ + igb_set_multi(adapter); + + /* + ** Figure out the desired mbuf pool + ** for doing jumbo/packetsplit + */ + if (adapter->max_frame_size <= 2048) + adapter->rx_mbuf_sz = MCLBYTES; + else if (adapter->max_frame_size <= 4096) + adapter->rx_mbuf_sz = MJUMPAGESIZE; + else + adapter->rx_mbuf_sz = MJUM9BYTES; + + /* Prepare receive descriptors and buffers */ + if (igb_setup_receive_structures(adapter)) { + device_printf(dev, "Could not setup receive structures\n"); + return; + } + igb_initialize_receive_units(adapter); + + /* Enable VLAN support */ + if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) + igb_setup_vlan_hw_support(adapter); + + /* Don't lose promiscuous settings */ + igb_set_promisc(adapter); + + ifp->if_drv_flags |= IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + + callout_reset(&adapter->timer, hz, igb_local_timer, adapter); + e1000_clear_hw_cntrs_base_generic(&adapter->hw); + + if (adapter->msix > 1) /* Set up queue routing */ + igb_configure_queues(adapter); + + /* this clears any pending interrupts */ + E1000_READ_REG(&adapter->hw, E1000_ICR); +#ifdef DEVICE_POLLING + /* + * Only enable interrupts if we are not polling, make sure + * they are off otherwise. + */ + if (ifp->if_capenable & IFCAP_POLLING) + igb_disable_intr(adapter); + else +#endif /* DEVICE_POLLING */ + { + igb_enable_intr(adapter); + E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); + } + + /* Set Energy Efficient Ethernet */ + if (adapter->hw.phy.media_type == e1000_media_type_copper) { + if (adapter->hw.mac.type == e1000_i354) + e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); + else + e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); + } +} + +static void +igb_init(void *arg) +{ + struct adapter *adapter = arg; + + IGB_CORE_LOCK(adapter); + igb_init_locked(adapter); + IGB_CORE_UNLOCK(adapter); +} + + +static void +igb_handle_que(void *context, int pending) +{ + struct igb_queue *que = context; + struct adapter *adapter = que->adapter; + struct tx_ring *txr = que->txr; + struct ifnet *ifp = adapter->ifp; + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + bool more; + + more = igb_rxeof(que, adapter->rx_process_limit, NULL); + + IGB_TX_LOCK(txr); + igb_txeof(txr); +#ifndef IGB_LEGACY_TX + /* Process the stack queue only if not depleted */ + if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && + !drbr_empty(ifp, txr->br)) + igb_mq_start_locked(ifp, txr); +#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + igb_start_locked(txr, ifp); +#endif + IGB_TX_UNLOCK(txr); + /* Do we need another? */ + if (more) { + taskqueue_enqueue(que->tq, &que->que_task); + return; + } + } + +#ifdef DEVICE_POLLING + if (ifp->if_capenable & IFCAP_POLLING) + return; +#endif + /* Re-enable this interrupt */ + if (que->eims) + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); + else + igb_enable_intr(adapter); +} + +/* Deal with link in a sleepable context */ +static void +igb_handle_link(void *context, int pending) +{ + struct adapter *adapter = context; + + IGB_CORE_LOCK(adapter); + igb_handle_link_locked(adapter); + IGB_CORE_UNLOCK(adapter); +} + +static void +igb_handle_link_locked(struct adapter *adapter) +{ + struct tx_ring *txr = adapter->tx_rings; + struct ifnet *ifp = adapter->ifp; + + IGB_CORE_LOCK_ASSERT(adapter); + adapter->hw.mac.get_link_status = 1; + igb_update_link_status(adapter); + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { + for (int i = 0; i < adapter->num_queues; i++, txr++) { + IGB_TX_LOCK(txr); +#ifndef IGB_LEGACY_TX + /* Process the stack queue only if not depleted */ + if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && + !drbr_empty(ifp, txr->br)) + igb_mq_start_locked(ifp, txr); +#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + igb_start_locked(txr, ifp); +#endif + IGB_TX_UNLOCK(txr); + } + } +} + +/********************************************************************* + * + * MSI/Legacy Deferred + * Interrupt Service routine + * + *********************************************************************/ +static int +igb_irq_fast(void *arg) +{ + struct adapter *adapter = arg; + struct igb_queue *que = adapter->queues; + u32 reg_icr; + + + reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); + + /* Hot eject? */ + if (reg_icr == 0xffffffff) + return FILTER_STRAY; + + /* Definitely not our interrupt. */ + if (reg_icr == 0x0) + return FILTER_STRAY; + + if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) + return FILTER_STRAY; + + /* + * Mask interrupts until the taskqueue is finished running. This is + * cheap, just assume that it is needed. This also works around the + * MSI message reordering errata on certain systems. + */ + igb_disable_intr(adapter); + taskqueue_enqueue(que->tq, &que->que_task); + + /* Link status change */ + if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) + taskqueue_enqueue(que->tq, &adapter->link_task); + + if (reg_icr & E1000_ICR_RXO) + adapter->rx_overruns++; + return FILTER_HANDLED; +} + +#ifdef DEVICE_POLLING +#if __FreeBSD_version >= 800000 +#define POLL_RETURN_COUNT(a) (a) +static int +#else +#define POLL_RETURN_COUNT(a) +static void +#endif +igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) +{ + struct adapter *adapter = ifp->if_softc; + struct igb_queue *que; + struct tx_ring *txr; + u32 reg_icr, rx_done = 0; + u32 loop = IGB_MAX_LOOP; + bool more; + + IGB_CORE_LOCK(adapter); + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { + IGB_CORE_UNLOCK(adapter); + return POLL_RETURN_COUNT(rx_done); + } + + if (cmd == POLL_AND_CHECK_STATUS) { + reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); + /* Link status change */ + if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) + igb_handle_link_locked(adapter); + + if (reg_icr & E1000_ICR_RXO) + adapter->rx_overruns++; + } + IGB_CORE_UNLOCK(adapter); + + for (int i = 0; i < adapter->num_queues; i++) { + que = &adapter->queues[i]; + txr = que->txr; + + igb_rxeof(que, count, &rx_done); + + IGB_TX_LOCK(txr); + do { + more = igb_txeof(txr); + } while (loop-- && more); +#ifndef IGB_LEGACY_TX + if (!drbr_empty(ifp, txr->br)) + igb_mq_start_locked(ifp, txr); +#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + igb_start_locked(txr, ifp); +#endif + IGB_TX_UNLOCK(txr); + } + + return POLL_RETURN_COUNT(rx_done); +} +#endif /* DEVICE_POLLING */ + +/********************************************************************* + * + * MSIX Que Interrupt Service routine + * + **********************************************************************/ +static void +igb_msix_que(void *arg) +{ + struct igb_queue *que = arg; + struct adapter *adapter = que->adapter; + struct ifnet *ifp = adapter->ifp; + struct tx_ring *txr = que->txr; + struct rx_ring *rxr = que->rxr; + u32 newitr = 0; + bool more_rx; + + /* Ignore spurious interrupts */ + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + return; + + E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims); + ++que->irqs; + + IGB_TX_LOCK(txr); + igb_txeof(txr); +#ifndef IGB_LEGACY_TX + /* Process the stack queue only if not depleted */ + if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && + !drbr_empty(ifp, txr->br)) + igb_mq_start_locked(ifp, txr); +#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + igb_start_locked(txr, ifp); +#endif + IGB_TX_UNLOCK(txr); + + more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL); + + if (adapter->enable_aim == FALSE) + goto no_calc; + /* + ** Do Adaptive Interrupt Moderation: + ** - Write out last calculated setting + ** - Calculate based on average size over + ** the last interval. + */ + if (que->eitr_setting) + E1000_WRITE_REG(&adapter->hw, + E1000_EITR(que->msix), que->eitr_setting); + + que->eitr_setting = 0; + + /* Idle, do nothing */ + if ((txr->bytes == 0) && (rxr->bytes == 0)) + goto no_calc; + + /* Used half Default if sub-gig */ + if (adapter->link_speed != 1000) + newitr = IGB_DEFAULT_ITR / 2; + else { + if ((txr->bytes) && (txr->packets)) + newitr = txr->bytes/txr->packets; + if ((rxr->bytes) && (rxr->packets)) + newitr = max(newitr, + (rxr->bytes / rxr->packets)); + newitr += 24; /* account for hardware frame, crc */ + /* set an upper boundary */ + newitr = min(newitr, 3000); + /* Be nice to the mid range */ + if ((newitr > 300) && (newitr < 1200)) + newitr = (newitr / 3); + else + newitr = (newitr / 2); + } + newitr &= 0x7FFC; /* Mask invalid bits */ + if (adapter->hw.mac.type == e1000_82575) + newitr |= newitr << 16; + else + newitr |= E1000_EITR_CNT_IGNR; + + /* save for next interrupt */ + que->eitr_setting = newitr; + + /* Reset state */ + txr->bytes = 0; + txr->packets = 0; + rxr->bytes = 0; + rxr->packets = 0; + +no_calc: + /* Schedule a clean task if needed*/ + if (more_rx) + taskqueue_enqueue(que->tq, &que->que_task); + else + /* Re-enable this interrupt */ + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); + return; +} + + +/********************************************************************* + * + * MSIX Link Interrupt Service routine + * + **********************************************************************/ + +static void +igb_msix_link(void *arg) +{ + struct adapter *adapter = arg; + u32 icr; + + ++adapter->link_irq; + icr = E1000_READ_REG(&adapter->hw, E1000_ICR); + if (!(icr & E1000_ICR_LSC)) + goto spurious; + igb_handle_link(adapter, 0); + +spurious: + /* Rearm */ + E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); + return; +} + + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called whenever the user queries the status of + * the interface using ifconfig. + * + **********************************************************************/ +static void +igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) +{ + struct adapter *adapter = ifp->if_softc; + + INIT_DEBUGOUT("igb_media_status: begin"); + + IGB_CORE_LOCK(adapter); + igb_update_link_status(adapter); + + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER; + + if (!adapter->link_active) { + IGB_CORE_UNLOCK(adapter); + return; + } + + ifmr->ifm_status |= IFM_ACTIVE; + + switch (adapter->link_speed) { + case 10: + ifmr->ifm_active |= IFM_10_T; + break; + case 100: + /* + ** Support for 100Mb SFP - these are Fiber + ** but the media type appears as serdes + */ + if (adapter->hw.phy.media_type == + e1000_media_type_internal_serdes) + ifmr->ifm_active |= IFM_100_FX; + else + ifmr->ifm_active |= IFM_100_TX; + break; + case 1000: + ifmr->ifm_active |= IFM_1000_T; + break; + case 2500: + ifmr->ifm_active |= IFM_2500_SX; + break; + } + + if (adapter->link_duplex == FULL_DUPLEX) + ifmr->ifm_active |= IFM_FDX; + else + ifmr->ifm_active |= IFM_HDX; + + IGB_CORE_UNLOCK(adapter); +} + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called when the user changes speed/duplex using + * media/mediopt option with ifconfig. + * + **********************************************************************/ +static int +igb_media_change(struct ifnet *ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct ifmedia *ifm = &adapter->media; + + INIT_DEBUGOUT("igb_media_change: begin"); + + if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) + return (EINVAL); + + IGB_CORE_LOCK(adapter); + switch (IFM_SUBTYPE(ifm->ifm_media)) { + case IFM_AUTO: + adapter->hw.mac.autoneg = DO_AUTO_NEG; + adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; + break; + case IFM_1000_LX: + case IFM_1000_SX: + case IFM_1000_T: + adapter->hw.mac.autoneg = DO_AUTO_NEG; + adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; + break; + case IFM_100_TX: + adapter->hw.mac.autoneg = FALSE; + adapter->hw.phy.autoneg_advertised = 0; + if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) + adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; + else + adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; + break; + case IFM_10_T: + adapter->hw.mac.autoneg = FALSE; + adapter->hw.phy.autoneg_advertised = 0; + if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) + adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; + else + adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; + break; + default: + device_printf(adapter->dev, "Unsupported media type\n"); + } + + igb_init_locked(adapter); + IGB_CORE_UNLOCK(adapter); + + return (0); +} + + +/********************************************************************* + * + * This routine maps the mbufs to Advanced TX descriptors. + * + **********************************************************************/ +static int +igb_xmit(struct tx_ring *txr, struct mbuf **m_headp) +{ + struct adapter *adapter = txr->adapter; + u32 olinfo_status = 0, cmd_type_len; + int i, j, error, nsegs; + int first; + bool remap = TRUE; + struct mbuf *m_head; + bus_dma_segment_t segs[IGB_MAX_SCATTER]; + bus_dmamap_t map; + struct igb_tx_buf *txbuf; + union e1000_adv_tx_desc *txd = NULL; + + m_head = *m_headp; + + /* Basic descriptor defines */ + cmd_type_len = (E1000_ADVTXD_DTYP_DATA | + E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); + + if (m_head->m_flags & M_VLANTAG) + cmd_type_len |= E1000_ADVTXD_DCMD_VLE; + + /* + * Important to capture the first descriptor + * used because it will contain the index of + * the one we tell the hardware to report back + */ + first = txr->next_avail_desc; + txbuf = &txr->tx_buffers[first]; + map = txbuf->map; + + /* + * Map the packet for DMA. + */ +retry: + error = bus_dmamap_load_mbuf_sg(txr->txtag, map, + *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); + + if (__predict_false(error)) { + struct mbuf *m; + + switch (error) { + case EFBIG: + /* Try it again? - one try */ + if (remap == TRUE) { + remap = FALSE; + m = m_collapse(*m_headp, M_NOWAIT, + IGB_MAX_SCATTER); + if (m == NULL) { + adapter->mbuf_defrag_failed++; + m_freem(*m_headp); + *m_headp = NULL; + return (ENOBUFS); + } + *m_headp = m; + goto retry; + } else + return (error); + default: + txr->no_tx_dma_setup++; + m_freem(*m_headp); + *m_headp = NULL; + return (error); + } + } + + /* Make certain there are enough descriptors */ + if (txr->tx_avail < (nsegs + 2)) { + txr->no_desc_avail++; + bus_dmamap_unload(txr->txtag, map); + return (ENOBUFS); + } + m_head = *m_headp; + + /* + ** Set up the appropriate offload context + ** this will consume the first descriptor + */ + error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); + if (__predict_false(error)) { + m_freem(*m_headp); + *m_headp = NULL; + return (error); + } + + /* 82575 needs the queue index added */ + if (adapter->hw.mac.type == e1000_82575) + olinfo_status |= txr->me << 4; + + i = txr->next_avail_desc; + for (j = 0; j < nsegs; j++) { + bus_size_t seglen; + bus_addr_t segaddr; + + txbuf = &txr->tx_buffers[i]; + txd = &txr->tx_base[i]; + seglen = segs[j].ds_len; + segaddr = htole64(segs[j].ds_addr); + + txd->read.buffer_addr = segaddr; + txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | + cmd_type_len | seglen); + txd->read.olinfo_status = htole32(olinfo_status); + + if (++i == txr->num_desc) + i = 0; + } + + txd->read.cmd_type_len |= + htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); + txr->tx_avail -= nsegs; + txr->next_avail_desc = i; + + txbuf->m_head = m_head; + /* + ** Here we swap the map so the last descriptor, + ** which gets the completion interrupt has the + ** real map, and the first descriptor gets the + ** unused map from this descriptor. + */ + txr->tx_buffers[first].map = txbuf->map; + txbuf->map = map; + bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); + + /* Set the EOP descriptor that will be marked done */ + txbuf = &txr->tx_buffers[first]; + txbuf->eop = txd; + + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + /* + * Advance the Transmit Descriptor Tail (Tdt), this tells the + * hardware that this frame is available to transmit. + */ + ++txr->total_packets; + E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); + + return (0); +} +static void +igb_set_promisc(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + struct e1000_hw *hw = &adapter->hw; + u32 reg; + + if (adapter->vf_ifp) { + e1000_promisc_set_vf(hw, e1000_promisc_enabled); + return; + } + + reg = E1000_READ_REG(hw, E1000_RCTL); + if (ifp->if_flags & IFF_PROMISC) { + reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); + E1000_WRITE_REG(hw, E1000_RCTL, reg); + } else if (ifp->if_flags & IFF_ALLMULTI) { + reg |= E1000_RCTL_MPE; + reg &= ~E1000_RCTL_UPE; + E1000_WRITE_REG(hw, E1000_RCTL, reg); + } +} + +static void +igb_disable_promisc(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct ifnet *ifp = adapter->ifp; + u32 reg; + int mcnt = 0; + + if (adapter->vf_ifp) { + e1000_promisc_set_vf(hw, e1000_promisc_disabled); + return; + } + reg = E1000_READ_REG(hw, E1000_RCTL); + reg &= (~E1000_RCTL_UPE); + if (ifp->if_flags & IFF_ALLMULTI) + mcnt = MAX_NUM_MULTICAST_ADDRESSES; + else { + struct ifmultiaddr *ifma; +#if __FreeBSD_version < 800000 + IF_ADDR_LOCK(ifp); +#else + if_maddr_rlock(ifp); +#endif + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) + break; + mcnt++; + } +#if __FreeBSD_version < 800000 + IF_ADDR_UNLOCK(ifp); +#else + if_maddr_runlock(ifp); +#endif + } + /* Don't disable if in MAX groups */ + if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) + reg &= (~E1000_RCTL_MPE); + E1000_WRITE_REG(hw, E1000_RCTL, reg); +} + + +/********************************************************************* + * Multicast Update + * + * This routine is called whenever multicast address list is updated. + * + **********************************************************************/ + +static void +igb_set_multi(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + struct ifmultiaddr *ifma; + u32 reg_rctl = 0; + u8 *mta; + + int mcnt = 0; + + IOCTL_DEBUGOUT("igb_set_multi: begin"); + + mta = adapter->mta; + bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN * + MAX_NUM_MULTICAST_ADDRESSES); + +#if __FreeBSD_version < 800000 + IF_ADDR_LOCK(ifp); +#else + if_maddr_rlock(ifp); +#endif + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + + if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) + break; + + bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), + &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); + mcnt++; + } +#if __FreeBSD_version < 800000 + IF_ADDR_UNLOCK(ifp); +#else + if_maddr_runlock(ifp); +#endif + + if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { + reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); + reg_rctl |= E1000_RCTL_MPE; + E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); + } else + e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); +} + + +/********************************************************************* + * Timer routine: + * This routine checks for link status, + * updates statistics, and does the watchdog. + * + **********************************************************************/ + +static void +igb_local_timer(void *arg) +{ + struct adapter *adapter = arg; + device_t dev = adapter->dev; + struct ifnet *ifp = adapter->ifp; + struct tx_ring *txr = adapter->tx_rings; + struct igb_queue *que = adapter->queues; + int hung = 0, busy = 0; + + + IGB_CORE_LOCK_ASSERT(adapter); + + igb_update_link_status(adapter); + igb_update_stats_counters(adapter); + + /* + ** Check the TX queues status + ** - central locked handling of OACTIVE + ** - watchdog only if all queues show hung + */ + for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { + if ((txr->queue_status & IGB_QUEUE_HUNG) && + (adapter->pause_frames == 0)) + ++hung; + if (txr->queue_status & IGB_QUEUE_DEPLETED) + ++busy; + if ((txr->queue_status & IGB_QUEUE_IDLE) == 0) + taskqueue_enqueue(que->tq, &que->que_task); + } + if (hung == adapter->num_queues) + goto timeout; + if (busy == adapter->num_queues) + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) && + (busy < adapter->num_queues)) + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + + adapter->pause_frames = 0; + callout_reset(&adapter->timer, hz, igb_local_timer, adapter); +#ifndef DEVICE_POLLING + /* Schedule all queue interrupts - deadlock protection */ + E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask); +#endif + return; + +timeout: + device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); + device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, + E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)), + E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me))); + device_printf(dev,"TX(%d) desc avail = %d," + "Next TX to Clean = %d\n", + txr->me, txr->tx_avail, txr->next_to_clean); + adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + adapter->watchdog_events++; + igb_init_locked(adapter); +} + +static void +igb_update_link_status(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct e1000_fc_info *fc = &hw->fc; + struct ifnet *ifp = adapter->ifp; + device_t dev = adapter->dev; + struct tx_ring *txr = adapter->tx_rings; + u32 link_check, thstat, ctrl; + char *flowctl = NULL; + + link_check = thstat = ctrl = 0; + + /* Get the cached link value or read for real */ + switch (hw->phy.media_type) { + case e1000_media_type_copper: + if (hw->mac.get_link_status) { + /* Do the work to read phy */ + e1000_check_for_link(hw); + link_check = !hw->mac.get_link_status; + } else + link_check = TRUE; + break; + case e1000_media_type_fiber: + e1000_check_for_link(hw); + link_check = (E1000_READ_REG(hw, E1000_STATUS) & + E1000_STATUS_LU); + break; + case e1000_media_type_internal_serdes: + e1000_check_for_link(hw); + link_check = adapter->hw.mac.serdes_has_link; + break; + /* VF device is type_unknown */ + case e1000_media_type_unknown: + e1000_check_for_link(hw); + link_check = !hw->mac.get_link_status; + /* Fall thru */ + default: + break; + } + + /* Check for thermal downshift or shutdown */ + if (hw->mac.type == e1000_i350) { + thstat = E1000_READ_REG(hw, E1000_THSTAT); + ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); + } + + /* Get the flow control for display */ + switch (fc->current_mode) { + case e1000_fc_rx_pause: + flowctl = "RX"; + break; + case e1000_fc_tx_pause: + flowctl = "TX"; + break; + case e1000_fc_full: + flowctl = "Full"; + break; + case e1000_fc_none: + default: + flowctl = "None"; + break; + } + + /* Now we check if a transition has happened */ + if (link_check && (adapter->link_active == 0)) { + e1000_get_speed_and_duplex(&adapter->hw, + &adapter->link_speed, &adapter->link_duplex); + if (bootverbose) + device_printf(dev, "Link is up %d Mbps %s," + " Flow Control: %s\n", + adapter->link_speed, + ((adapter->link_duplex == FULL_DUPLEX) ? + "Full Duplex" : "Half Duplex"), flowctl); + adapter->link_active = 1; + ifp->if_baudrate = adapter->link_speed * 1000000; + if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && + (thstat & E1000_THSTAT_LINK_THROTTLE)) + device_printf(dev, "Link: thermal downshift\n"); + /* Delay Link Up for Phy update */ + if (((hw->mac.type == e1000_i210) || + (hw->mac.type == e1000_i211)) && + (hw->phy.id == I210_I_PHY_ID)) + msec_delay(I210_LINK_DELAY); + /* Reset if the media type changed. */ + if (hw->dev_spec._82575.media_changed) { + hw->dev_spec._82575.media_changed = false; + adapter->flags |= IGB_MEDIA_RESET; + igb_reset(adapter); + } + /* This can sleep */ + if_link_state_change(ifp, LINK_STATE_UP); + } else if (!link_check && (adapter->link_active == 1)) { + ifp->if_baudrate = adapter->link_speed = 0; + adapter->link_duplex = 0; + if (bootverbose) + device_printf(dev, "Link is Down\n"); + if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && + (thstat & E1000_THSTAT_PWR_DOWN)) + device_printf(dev, "Link: thermal shutdown\n"); + adapter->link_active = 0; + /* This can sleep */ + if_link_state_change(ifp, LINK_STATE_DOWN); + /* Reset queue state */ + for (int i = 0; i < adapter->num_queues; i++, txr++) + txr->queue_status = IGB_QUEUE_IDLE; + } +} + +/********************************************************************* + * + * This routine disables all traffic on the adapter by issuing a + * global reset on the MAC and deallocates TX/RX buffers. + * + **********************************************************************/ + +static void +igb_stop(void *arg) +{ + struct adapter *adapter = arg; + struct ifnet *ifp = adapter->ifp; + struct tx_ring *txr = adapter->tx_rings; + + IGB_CORE_LOCK_ASSERT(adapter); + + INIT_DEBUGOUT("igb_stop: begin"); + + igb_disable_intr(adapter); + + callout_stop(&adapter->timer); + + /* Tell the stack that the interface is no longer active */ + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + + /* Disarm watchdog timer. */ + for (int i = 0; i < adapter->num_queues; i++, txr++) { + IGB_TX_LOCK(txr); + txr->queue_status = IGB_QUEUE_IDLE; + IGB_TX_UNLOCK(txr); + } + + e1000_reset_hw(&adapter->hw); + E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); + + e1000_led_off(&adapter->hw); + e1000_cleanup_led(&adapter->hw); +} + + +/********************************************************************* + * + * Determine hardware revision. + * + **********************************************************************/ +static void +igb_identify_hardware(struct adapter *adapter) +{ + device_t dev = adapter->dev; + + /* Make sure our PCI config space has the necessary stuff set */ + pci_enable_busmaster(dev); + adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); + + /* Save off the information about this board */ + adapter->hw.vendor_id = pci_get_vendor(dev); + adapter->hw.device_id = pci_get_device(dev); + adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); + adapter->hw.subsystem_vendor_id = + pci_read_config(dev, PCIR_SUBVEND_0, 2); + adapter->hw.subsystem_device_id = + pci_read_config(dev, PCIR_SUBDEV_0, 2); + + /* Set MAC type early for PCI setup */ + e1000_set_mac_type(&adapter->hw); + + /* Are we a VF device? */ + if ((adapter->hw.mac.type == e1000_vfadapt) || + (adapter->hw.mac.type == e1000_vfadapt_i350)) + adapter->vf_ifp = 1; + else + adapter->vf_ifp = 0; +} + +static int +igb_allocate_pci_resources(struct adapter *adapter) +{ + device_t dev = adapter->dev; + int rid; + + rid = PCIR_BAR(0); + adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + if (adapter->pci_mem == NULL) { + device_printf(dev, "Unable to allocate bus resource: memory\n"); + return (ENXIO); + } + adapter->osdep.mem_bus_space_tag = + rman_get_bustag(adapter->pci_mem); + adapter->osdep.mem_bus_space_handle = + rman_get_bushandle(adapter->pci_mem); + adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; + + adapter->num_queues = 1; /* Defaults for Legacy or MSI */ + + /* This will setup either MSI/X or MSI */ + adapter->msix = igb_setup_msix(adapter); + adapter->hw.back = &adapter->osdep; + + return (0); +} + +/********************************************************************* + * + * Setup the Legacy or MSI Interrupt handler + * + **********************************************************************/ +static int +igb_allocate_legacy(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct igb_queue *que = adapter->queues; +#ifndef IGB_LEGACY_TX + struct tx_ring *txr = adapter->tx_rings; +#endif + int error, rid = 0; + + /* Turn off all interrupts */ + E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); + + /* MSI RID is 1 */ + if (adapter->msix == 1) + rid = 1; + + /* We allocate a single interrupt resource */ + adapter->res = bus_alloc_resource_any(dev, + SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); + if (adapter->res == NULL) { + device_printf(dev, "Unable to allocate bus resource: " + "interrupt\n"); + return (ENXIO); + } + +#ifndef IGB_LEGACY_TX + TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); +#endif + + /* + * Try allocating a fast interrupt and the associated deferred + * processing contexts. + */ + TASK_INIT(&que->que_task, 0, igb_handle_que, que); + /* Make tasklet for deferred link handling */ + TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter); + que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT, + taskqueue_thread_enqueue, &que->tq); + taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq", + device_get_nameunit(adapter->dev)); + if ((error = bus_setup_intr(dev, adapter->res, + INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, + adapter, &adapter->tag)) != 0) { + device_printf(dev, "Failed to register fast interrupt " + "handler: %d\n", error); + taskqueue_free(que->tq); + que->tq = NULL; + return (error); + } + + return (0); +} + + +/********************************************************************* + * + * Setup the MSIX Queue Interrupt handlers: + * + **********************************************************************/ +static int +igb_allocate_msix(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct igb_queue *que = adapter->queues; + int error, rid, vector = 0; + int cpu_id = 0; +#ifdef RSS + cpuset_t cpu_mask; +#endif + + /* Be sure to start with all interrupts disabled */ + E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); + E1000_WRITE_FLUSH(&adapter->hw); + +#ifdef RSS + /* + * If we're doing RSS, the number of queues needs to + * match the number of RSS buckets that are configured. + * + * + If there's more queues than RSS buckets, we'll end + * up with queues that get no traffic. + * + * + If there's more RSS buckets than queues, we'll end + * up having multiple RSS buckets map to the same queue, + * so there'll be some contention. + */ + if (adapter->num_queues != rss_getnumbuckets()) { + device_printf(dev, + "%s: number of queues (%d) != number of RSS buckets (%d)" + "; performance will be impacted.\n", + __func__, + adapter->num_queues, + rss_getnumbuckets()); + } +#endif + + for (int i = 0; i < adapter->num_queues; i++, vector++, que++) { + rid = vector +1; + que->res = bus_alloc_resource_any(dev, + SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); + if (que->res == NULL) { + device_printf(dev, + "Unable to allocate bus resource: " + "MSIX Queue Interrupt\n"); + return (ENXIO); + } + error = bus_setup_intr(dev, que->res, + INTR_TYPE_NET | INTR_MPSAFE, NULL, + igb_msix_que, que, &que->tag); + if (error) { + que->res = NULL; + device_printf(dev, "Failed to register Queue handler"); + return (error); + } +#if __FreeBSD_version >= 800504 + bus_describe_intr(dev, que->res, que->tag, "que %d", i); +#endif + que->msix = vector; + if (adapter->hw.mac.type == e1000_82575) + que->eims = E1000_EICR_TX_QUEUE0 << i; + else + que->eims = 1 << vector; + +#ifdef RSS + /* + * The queue ID is used as the RSS layer bucket ID. + * We look up the queue ID -> RSS CPU ID and select + * that. + */ + cpu_id = rss_getcpu(i % rss_getnumbuckets()); +#else + /* + * Bind the msix vector, and thus the + * rings to the corresponding cpu. + * + * This just happens to match the default RSS round-robin + * bucket -> queue -> CPU allocation. + */ + if (adapter->num_queues > 1) { + if (igb_last_bind_cpu < 0) + igb_last_bind_cpu = CPU_FIRST(); + cpu_id = igb_last_bind_cpu; + } +#endif + + if (adapter->num_queues > 1) { + bus_bind_intr(dev, que->res, cpu_id); +#ifdef RSS + device_printf(dev, + "Bound queue %d to RSS bucket %d\n", + i, cpu_id); +#else + device_printf(dev, + "Bound queue %d to cpu %d\n", + i, cpu_id); +#endif + } + +#ifndef IGB_LEGACY_TX + TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, + que->txr); +#endif + /* Make tasklet for deferred handling */ + TASK_INIT(&que->que_task, 0, igb_handle_que, que); + que->tq = taskqueue_create("igb_que", M_NOWAIT, + taskqueue_thread_enqueue, &que->tq); + if (adapter->num_queues > 1) { + /* + * Only pin the taskqueue thread to a CPU if + * RSS is in use. + * + * This again just happens to match the default RSS + * round-robin bucket -> queue -> CPU allocation. + */ +#ifdef RSS + CPU_SETOF(cpu_id, &cpu_mask); + taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, + &cpu_mask, + "%s que (bucket %d)", + device_get_nameunit(adapter->dev), + cpu_id); +#else + taskqueue_start_threads(&que->tq, 1, PI_NET, + "%s que (qid %d)", + device_get_nameunit(adapter->dev), + cpu_id); +#endif + } else { + taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", + device_get_nameunit(adapter->dev)); + } + + /* Finally update the last bound CPU id */ + if (adapter->num_queues > 1) + igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu); + } + + /* And Link */ + rid = vector + 1; + adapter->res = bus_alloc_resource_any(dev, + SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); + if (adapter->res == NULL) { + device_printf(dev, + "Unable to allocate bus resource: " + "MSIX Link Interrupt\n"); + return (ENXIO); + } + if ((error = bus_setup_intr(dev, adapter->res, + INTR_TYPE_NET | INTR_MPSAFE, NULL, + igb_msix_link, adapter, &adapter->tag)) != 0) { + device_printf(dev, "Failed to register Link handler"); + return (error); + } +#if __FreeBSD_version >= 800504 + bus_describe_intr(dev, adapter->res, adapter->tag, "link"); +#endif + adapter->linkvec = vector; + + return (0); +} + + +static void +igb_configure_queues(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct igb_queue *que; + u32 tmp, ivar = 0, newitr = 0; + + /* First turn on RSS capability */ + if (adapter->hw.mac.type != e1000_82575) + E1000_WRITE_REG(hw, E1000_GPIE, + E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | + E1000_GPIE_PBA | E1000_GPIE_NSICR); + + /* Turn on MSIX */ + switch (adapter->hw.mac.type) { + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + case e1000_vfadapt: + case e1000_vfadapt_i350: + /* RX entries */ + for (int i = 0; i < adapter->num_queues; i++) { + u32 index = i >> 1; + ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + que = &adapter->queues[i]; + if (i & 1) { + ivar &= 0xFF00FFFF; + ivar |= (que->msix | E1000_IVAR_VALID) << 16; + } else { + ivar &= 0xFFFFFF00; + ivar |= que->msix | E1000_IVAR_VALID; + } + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); + } + /* TX entries */ + for (int i = 0; i < adapter->num_queues; i++) { + u32 index = i >> 1; + ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + que = &adapter->queues[i]; + if (i & 1) { + ivar &= 0x00FFFFFF; + ivar |= (que->msix | E1000_IVAR_VALID) << 24; + } else { + ivar &= 0xFFFF00FF; + ivar |= (que->msix | E1000_IVAR_VALID) << 8; + } + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); + adapter->que_mask |= que->eims; + } + + /* And for the link interrupt */ + ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; + adapter->link_mask = 1 << adapter->linkvec; + E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); + break; + case e1000_82576: + /* RX entries */ + for (int i = 0; i < adapter->num_queues; i++) { + u32 index = i & 0x7; /* Each IVAR has two entries */ + ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + que = &adapter->queues[i]; + if (i < 8) { + ivar &= 0xFFFFFF00; + ivar |= que->msix | E1000_IVAR_VALID; + } else { + ivar &= 0xFF00FFFF; + ivar |= (que->msix | E1000_IVAR_VALID) << 16; + } + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); + adapter->que_mask |= que->eims; + } + /* TX entries */ + for (int i = 0; i < adapter->num_queues; i++) { + u32 index = i & 0x7; /* Each IVAR has two entries */ + ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + que = &adapter->queues[i]; + if (i < 8) { + ivar &= 0xFFFF00FF; + ivar |= (que->msix | E1000_IVAR_VALID) << 8; + } else { + ivar &= 0x00FFFFFF; + ivar |= (que->msix | E1000_IVAR_VALID) << 24; + } + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); + adapter->que_mask |= que->eims; + } + + /* And for the link interrupt */ + ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; + adapter->link_mask = 1 << adapter->linkvec; + E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); + break; + + case e1000_82575: + /* enable MSI-X support*/ + tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); + tmp |= E1000_CTRL_EXT_PBA_CLR; + /* Auto-Mask interrupts upon ICR read. */ + tmp |= E1000_CTRL_EXT_EIAME; + tmp |= E1000_CTRL_EXT_IRCA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); + + /* Queues */ + for (int i = 0; i < adapter->num_queues; i++) { + que = &adapter->queues[i]; + tmp = E1000_EICR_RX_QUEUE0 << i; + tmp |= E1000_EICR_TX_QUEUE0 << i; + que->eims = tmp; + E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), + i, que->eims); + adapter->que_mask |= que->eims; + } + + /* Link */ + E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), + E1000_EIMS_OTHER); + adapter->link_mask |= E1000_EIMS_OTHER; + default: + break; + } + + /* Set the starting interrupt rate */ + if (igb_max_interrupt_rate > 0) + newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC; + + if (hw->mac.type == e1000_82575) + newitr |= newitr << 16; + else + newitr |= E1000_EITR_CNT_IGNR; + + for (int i = 0; i < adapter->num_queues; i++) { + que = &adapter->queues[i]; + E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr); + } + + return; +} + + +static void +igb_free_pci_resources(struct adapter *adapter) +{ + struct igb_queue *que = adapter->queues; + device_t dev = adapter->dev; + int rid; + + /* + ** There is a slight possibility of a failure mode + ** in attach that will result in entering this function + ** before interrupt resources have been initialized, and + ** in that case we do not want to execute the loops below + ** We can detect this reliably by the state of the adapter + ** res pointer. + */ + if (adapter->res == NULL) + goto mem; + + /* + * First release all the interrupt resources: + */ + for (int i = 0; i < adapter->num_queues; i++, que++) { + rid = que->msix + 1; + if (que->tag != NULL) { + bus_teardown_intr(dev, que->res, que->tag); + que->tag = NULL; + } + if (que->res != NULL) + bus_release_resource(dev, + SYS_RES_IRQ, rid, que->res); + } + + /* Clean the Legacy or Link interrupt last */ + if (adapter->linkvec) /* we are doing MSIX */ + rid = adapter->linkvec + 1; + else + (adapter->msix != 0) ? (rid = 1):(rid = 0); + + que = adapter->queues; + if (adapter->tag != NULL) { + taskqueue_drain(que->tq, &adapter->link_task); + bus_teardown_intr(dev, adapter->res, adapter->tag); + adapter->tag = NULL; + } + if (adapter->res != NULL) + bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); + + for (int i = 0; i < adapter->num_queues; i++, que++) { + if (que->tq != NULL) { +#ifndef IGB_LEGACY_TX + taskqueue_drain(que->tq, &que->txr->txq_task); +#endif + taskqueue_drain(que->tq, &que->que_task); + taskqueue_free(que->tq); + } + } +mem: + if (adapter->msix) + pci_release_msi(dev); + + if (adapter->msix_mem != NULL) + bus_release_resource(dev, SYS_RES_MEMORY, + adapter->memrid, adapter->msix_mem); + + if (adapter->pci_mem != NULL) + bus_release_resource(dev, SYS_RES_MEMORY, + PCIR_BAR(0), adapter->pci_mem); + +} + +/* + * Setup Either MSI/X or MSI + */ +static int +igb_setup_msix(struct adapter *adapter) +{ + device_t dev = adapter->dev; + int bar, want, queues, msgs, maxqueues; + + /* tuneable override */ + if (igb_enable_msix == 0) + goto msi; + + /* First try MSI/X */ + msgs = pci_msix_count(dev); + if (msgs == 0) + goto msi; + /* + ** Some new devices, as with ixgbe, now may + ** use a different BAR, so we need to keep + ** track of which is used. + */ + adapter->memrid = PCIR_BAR(IGB_MSIX_BAR); + bar = pci_read_config(dev, adapter->memrid, 4); + if (bar == 0) /* use next bar */ + adapter->memrid += 4; + adapter->msix_mem = bus_alloc_resource_any(dev, + SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE); + if (adapter->msix_mem == NULL) { + /* May not be enabled */ + device_printf(adapter->dev, + "Unable to map MSIX table \n"); + goto msi; + } + + queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; + + /* Override via tuneable */ + if (igb_num_queues != 0) + queues = igb_num_queues; + +#ifdef RSS + /* If we're doing RSS, clamp at the number of RSS buckets */ + if (queues > rss_getnumbuckets()) + queues = rss_getnumbuckets(); +#endif + + + /* Sanity check based on HW */ + switch (adapter->hw.mac.type) { + case e1000_82575: + maxqueues = 4; + break; + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + maxqueues = 8; + break; + case e1000_i210: + maxqueues = 4; + break; + case e1000_i211: + maxqueues = 2; + break; + default: /* VF interfaces */ + maxqueues = 1; + break; + } + + /* Final clamp on the actual hardware capability */ + if (queues > maxqueues) + queues = maxqueues; + + /* + ** One vector (RX/TX pair) per queue + ** plus an additional for Link interrupt + */ + want = queues + 1; + if (msgs >= want) + msgs = want; + else { + device_printf(adapter->dev, + "MSIX Configuration Problem, " + "%d vectors configured, but %d queues wanted!\n", + msgs, want); + goto msi; + } + if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { + device_printf(adapter->dev, + "Using MSIX interrupts with %d vectors\n", msgs); + adapter->num_queues = queues; + return (msgs); + } + /* + ** If MSIX alloc failed or provided us with + ** less than needed, free and fall through to MSI + */ + pci_release_msi(dev); + +msi: + if (adapter->msix_mem != NULL) { + bus_release_resource(dev, SYS_RES_MEMORY, + PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem); + adapter->msix_mem = NULL; + } + msgs = 1; + if (pci_alloc_msi(dev, &msgs) == 0) { + device_printf(adapter->dev," Using an MSI interrupt\n"); + return (msgs); + } + device_printf(adapter->dev," Using a Legacy interrupt\n"); + return (0); +} + +/********************************************************************* + * + * Initialize the DMA Coalescing feature + * + **********************************************************************/ +static void +igb_init_dmac(struct adapter *adapter, u32 pba) +{ + device_t dev = adapter->dev; + struct e1000_hw *hw = &adapter->hw; + u32 dmac, reg = ~E1000_DMACR_DMAC_EN; + u16 hwm; + + if (hw->mac.type == e1000_i211) + return; + + if (hw->mac.type > e1000_82580) { + + if (adapter->dmac == 0) { /* Disabling it */ + E1000_WRITE_REG(hw, E1000_DMACR, reg); + return; + } else + device_printf(dev, "DMA Coalescing enabled\n"); + + /* Set starting threshold */ + E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); + + hwm = 64 * pba - adapter->max_frame_size / 16; + if (hwm < 64 * (pba - 6)) + hwm = 64 * (pba - 6); + reg = E1000_READ_REG(hw, E1000_FCRTC); + reg &= ~E1000_FCRTC_RTH_COAL_MASK; + reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) + & E1000_FCRTC_RTH_COAL_MASK); + E1000_WRITE_REG(hw, E1000_FCRTC, reg); + + + dmac = pba - adapter->max_frame_size / 512; + if (dmac < pba - 10) + dmac = pba - 10; + reg = E1000_READ_REG(hw, E1000_DMACR); + reg &= ~E1000_DMACR_DMACTHR_MASK; + reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) + & E1000_DMACR_DMACTHR_MASK); + + /* transition to L0x or L1 if available..*/ + reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); + + /* Check if status is 2.5Gb backplane connection + * before configuration of watchdog timer, which is + * in msec values in 12.8usec intervals + * watchdog timer= msec values in 32usec intervals + * for non 2.5Gb connection + */ + if (hw->mac.type == e1000_i354) { + int status = E1000_READ_REG(hw, E1000_STATUS); + if ((status & E1000_STATUS_2P5_SKU) && + (!(status & E1000_STATUS_2P5_SKU_OVER))) + reg |= ((adapter->dmac * 5) >> 6); + else + reg |= (adapter->dmac >> 5); + } else { + reg |= (adapter->dmac >> 5); + } + + E1000_WRITE_REG(hw, E1000_DMACR, reg); + + E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); + + /* Set the interval before transition */ + reg = E1000_READ_REG(hw, E1000_DMCTLX); + if (hw->mac.type == e1000_i350) + reg |= IGB_DMCTLX_DCFLUSH_DIS; + /* + ** in 2.5Gb connection, TTLX unit is 0.4 usec + ** which is 0x4*2 = 0xA. But delay is still 4 usec + */ + if (hw->mac.type == e1000_i354) { + int status = E1000_READ_REG(hw, E1000_STATUS); + if ((status & E1000_STATUS_2P5_SKU) && + (!(status & E1000_STATUS_2P5_SKU_OVER))) + reg |= 0xA; + else + reg |= 0x4; + } else { + reg |= 0x4; + } + + E1000_WRITE_REG(hw, E1000_DMCTLX, reg); + + /* free space in tx packet buffer to wake from DMA coal */ + E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - + (2 * adapter->max_frame_size)) >> 6); + + /* make low power state decision controlled by DMA coal */ + reg = E1000_READ_REG(hw, E1000_PCIEMISC); + reg &= ~E1000_PCIEMISC_LX_DECISION; + E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); + + } else if (hw->mac.type == e1000_82580) { + u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); + E1000_WRITE_REG(hw, E1000_PCIEMISC, + reg & ~E1000_PCIEMISC_LX_DECISION); + E1000_WRITE_REG(hw, E1000_DMACR, 0); + } +} + + +/********************************************************************* + * + * Set up an fresh starting state + * + **********************************************************************/ +static void +igb_reset(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct e1000_hw *hw = &adapter->hw; + struct e1000_fc_info *fc = &hw->fc; + struct ifnet *ifp = adapter->ifp; + u32 pba = 0; + u16 hwm; + + INIT_DEBUGOUT("igb_reset: begin"); + + /* Let the firmware know the OS is in control */ + igb_get_hw_control(adapter); + + /* + * Packet Buffer Allocation (PBA) + * Writing PBA sets the receive portion of the buffer + * the remainder is used for the transmit buffer. + */ + switch (hw->mac.type) { + case e1000_82575: + pba = E1000_PBA_32K; + break; + case e1000_82576: + case e1000_vfadapt: + pba = E1000_READ_REG(hw, E1000_RXPBS); + pba &= E1000_RXPBS_SIZE_MASK_82576; + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_vfadapt_i350: + pba = E1000_READ_REG(hw, E1000_RXPBS); + pba = e1000_rxpbs_adjust_82580(pba); + break; + case e1000_i210: + case e1000_i211: + pba = E1000_PBA_34K; + default: + break; + } + + /* Special needs in case of Jumbo frames */ + if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { + u32 tx_space, min_tx, min_rx; + pba = E1000_READ_REG(hw, E1000_PBA); + tx_space = pba >> 16; + pba &= 0xffff; + min_tx = (adapter->max_frame_size + + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; + min_tx = roundup2(min_tx, 1024); + min_tx >>= 10; + min_rx = adapter->max_frame_size; + min_rx = roundup2(min_rx, 1024); + min_rx >>= 10; + if (tx_space < min_tx && + ((min_tx - tx_space) < pba)) { + pba = pba - (min_tx - tx_space); + /* + * if short on rx space, rx wins + * and must trump tx adjustment + */ + if (pba < min_rx) + pba = min_rx; + } + E1000_WRITE_REG(hw, E1000_PBA, pba); + } + + INIT_DEBUGOUT1("igb_init: pba=%dK",pba); + + /* + * These parameters control the automatic generation (Tx) and + * response (Rx) to Ethernet PAUSE frames. + * - High water mark should allow for at least two frames to be + * received after sending an XOFF. + * - Low water mark works best when it is very near the high water mark. + * This allows the receiver to restart by sending XON when it has + * drained a bit. + */ + hwm = min(((pba << 10) * 9 / 10), + ((pba << 10) - 2 * adapter->max_frame_size)); + + if (hw->mac.type < e1000_82576) { + fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ + fc->low_water = fc->high_water - 8; + } else { + fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ + fc->low_water = fc->high_water - 16; + } + + fc->pause_time = IGB_FC_PAUSE_TIME; + fc->send_xon = TRUE; + if (adapter->fc) + fc->requested_mode = adapter->fc; + else + fc->requested_mode = e1000_fc_default; + + /* Issue a global reset */ + e1000_reset_hw(hw); + E1000_WRITE_REG(hw, E1000_WUC, 0); + + /* Reset for AutoMediaDetect */ + if (adapter->flags & IGB_MEDIA_RESET) { + e1000_setup_init_funcs(hw, TRUE); + e1000_get_bus_info(hw); + adapter->flags &= ~IGB_MEDIA_RESET; + } + + if (e1000_init_hw(hw) < 0) + device_printf(dev, "Hardware Initialization Failed\n"); + + /* Setup DMA Coalescing */ + igb_init_dmac(adapter, pba); + + E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); + e1000_get_phy_info(hw); + e1000_check_for_link(hw); + return; +} + +/********************************************************************* + * + * Setup networking device structure and register an interface. + * + **********************************************************************/ +static int +igb_setup_interface(device_t dev, struct adapter *adapter) +{ + struct ifnet *ifp; + + INIT_DEBUGOUT("igb_setup_interface: begin"); + + ifp = adapter->ifp = if_alloc(IFT_ETHER); + if (ifp == NULL) { + device_printf(dev, "can not allocate ifnet structure\n"); + return (-1); + } + if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + ifp->if_init = igb_init; + ifp->if_softc = adapter; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_ioctl = igb_ioctl; + ifp->if_get_counter = igb_get_counter; + + /* TSO parameters */ + ifp->if_hw_tsomax = IP_MAXPACKET; + ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER; + ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE; + +#ifndef IGB_LEGACY_TX + ifp->if_transmit = igb_mq_start; + ifp->if_qflush = igb_qflush; +#else + ifp->if_start = igb_start; + IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); + ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; + IFQ_SET_READY(&ifp->if_snd); +#endif + + ether_ifattach(ifp, adapter->hw.mac.addr); + + ifp->if_capabilities = ifp->if_capenable = 0; + + ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; +#if __FreeBSD_version >= 1000000 + ifp->if_capabilities |= IFCAP_HWCSUM_IPV6; +#endif + ifp->if_capabilities |= IFCAP_TSO; + ifp->if_capabilities |= IFCAP_JUMBO_MTU; + ifp->if_capenable = ifp->if_capabilities; + + /* Don't enable LRO by default */ + ifp->if_capabilities |= IFCAP_LRO; + +#ifdef DEVICE_POLLING + ifp->if_capabilities |= IFCAP_POLLING; +#endif + + /* + * Tell the upper layer(s) we + * support full VLAN capability. + */ + ifp->if_hdrlen = sizeof(struct ether_vlan_header); + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING + | IFCAP_VLAN_HWTSO + | IFCAP_VLAN_MTU; + ifp->if_capenable |= IFCAP_VLAN_HWTAGGING + | IFCAP_VLAN_HWTSO + | IFCAP_VLAN_MTU; + + /* + ** Don't turn this on by default, if vlans are + ** created on another pseudo device (eg. lagg) + ** then vlan events are not passed thru, breaking + ** operation, but with HW FILTER off it works. If + ** using vlans directly on the igb driver you can + ** enable this and get full hardware tag filtering. + */ + ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; + + /* + * Specify the media types supported by this adapter and register + * callbacks to update media and link information + */ + ifmedia_init(&adapter->media, IFM_IMASK, + igb_media_change, igb_media_status); + if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || + (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { + ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, + 0, NULL); + ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); + } else { + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, + 0, NULL); + ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, + 0, NULL); + ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, + 0, NULL); + if (adapter->hw.phy.type != e1000_phy_ife) { + ifmedia_add(&adapter->media, + IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); + ifmedia_add(&adapter->media, + IFM_ETHER | IFM_1000_T, 0, NULL); + } + } + ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); + return (0); +} + + +/* + * Manage DMA'able memory. + */ +static void +igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) +{ + if (error) + return; + *(bus_addr_t *) arg = segs[0].ds_addr; +} + +static int +igb_dma_malloc(struct adapter *adapter, bus_size_t size, + struct igb_dma_alloc *dma, int mapflags) +{ + int error; + + error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ + IGB_DBA_ALIGN, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + size, /* maxsize */ + 1, /* nsegments */ + size, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockarg */ + &dma->dma_tag); + if (error) { + device_printf(adapter->dev, + "%s: bus_dma_tag_create failed: %d\n", + __func__, error); + goto fail_0; + } + + error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, + BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); + if (error) { + device_printf(adapter->dev, + "%s: bus_dmamem_alloc(%ju) failed: %d\n", + __func__, (uintmax_t)size, error); + goto fail_2; + } + + dma->dma_paddr = 0; + error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, + size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); + if (error || dma->dma_paddr == 0) { + device_printf(adapter->dev, + "%s: bus_dmamap_load failed: %d\n", + __func__, error); + goto fail_3; + } + + return (0); + +fail_3: + bus_dmamap_unload(dma->dma_tag, dma->dma_map); +fail_2: + bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); + bus_dma_tag_destroy(dma->dma_tag); +fail_0: + dma->dma_tag = NULL; + + return (error); +} + +static void +igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma) +{ + if (dma->dma_tag == NULL) + return; + if (dma->dma_paddr != 0) { + bus_dmamap_sync(dma->dma_tag, dma->dma_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(dma->dma_tag, dma->dma_map); + dma->dma_paddr = 0; + } + if (dma->dma_vaddr != NULL) { + bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); + dma->dma_vaddr = NULL; + } + bus_dma_tag_destroy(dma->dma_tag); + dma->dma_tag = NULL; +} + + +/********************************************************************* + * + * Allocate memory for the transmit and receive rings, and then + * the descriptors associated with each, called only once at attach. + * + **********************************************************************/ +static int +igb_allocate_queues(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct igb_queue *que = NULL; + struct tx_ring *txr = NULL; + struct rx_ring *rxr = NULL; + int rsize, tsize, error = E1000_SUCCESS; + int txconf = 0, rxconf = 0; + + /* First allocate the top level queue structs */ + if (!(adapter->queues = + (struct igb_queue *) malloc(sizeof(struct igb_queue) * + adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate queue memory\n"); + error = ENOMEM; + goto fail; + } + + /* Next allocate the TX ring struct memory */ + if (!(adapter->tx_rings = + (struct tx_ring *) malloc(sizeof(struct tx_ring) * + adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate TX ring memory\n"); + error = ENOMEM; + goto tx_fail; + } + + /* Now allocate the RX */ + if (!(adapter->rx_rings = + (struct rx_ring *) malloc(sizeof(struct rx_ring) * + adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate RX ring memory\n"); + error = ENOMEM; + goto rx_fail; + } + + tsize = roundup2(adapter->num_tx_desc * + sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN); + /* + * Now set up the TX queues, txconf is needed to handle the + * possibility that things fail midcourse and we need to + * undo memory gracefully + */ + for (int i = 0; i < adapter->num_queues; i++, txconf++) { + /* Set up some basics */ + txr = &adapter->tx_rings[i]; + txr->adapter = adapter; + txr->me = i; + txr->num_desc = adapter->num_tx_desc; + + /* Initialize the TX lock */ + snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", + device_get_nameunit(dev), txr->me); + mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); + + if (igb_dma_malloc(adapter, tsize, + &txr->txdma, BUS_DMA_NOWAIT)) { + device_printf(dev, + "Unable to allocate TX Descriptor memory\n"); + error = ENOMEM; + goto err_tx_desc; + } + txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr; + bzero((void *)txr->tx_base, tsize); + + /* Now allocate transmit buffers for the ring */ + if (igb_allocate_transmit_buffers(txr)) { + device_printf(dev, + "Critical Failure setting up transmit buffers\n"); + error = ENOMEM; + goto err_tx_desc; + } +#ifndef IGB_LEGACY_TX + /* Allocate a buf ring */ + txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF, + M_WAITOK, &txr->tx_mtx); +#endif + } + + /* + * Next the RX queues... + */ + rsize = roundup2(adapter->num_rx_desc * + sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); + for (int i = 0; i < adapter->num_queues; i++, rxconf++) { + rxr = &adapter->rx_rings[i]; + rxr->adapter = adapter; + rxr->me = i; + + /* Initialize the RX lock */ + snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", + device_get_nameunit(dev), txr->me); + mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); + + if (igb_dma_malloc(adapter, rsize, + &rxr->rxdma, BUS_DMA_NOWAIT)) { + device_printf(dev, + "Unable to allocate RxDescriptor memory\n"); + error = ENOMEM; + goto err_rx_desc; + } + rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr; + bzero((void *)rxr->rx_base, rsize); + + /* Allocate receive buffers for the ring*/ + if (igb_allocate_receive_buffers(rxr)) { + device_printf(dev, + "Critical Failure setting up receive buffers\n"); + error = ENOMEM; + goto err_rx_desc; + } + } + + /* + ** Finally set up the queue holding structs + */ + for (int i = 0; i < adapter->num_queues; i++) { + que = &adapter->queues[i]; + que->adapter = adapter; + que->txr = &adapter->tx_rings[i]; + que->rxr = &adapter->rx_rings[i]; + } + + return (0); + +err_rx_desc: + for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) + igb_dma_free(adapter, &rxr->rxdma); +err_tx_desc: + for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) + igb_dma_free(adapter, &txr->txdma); + free(adapter->rx_rings, M_DEVBUF); +rx_fail: +#ifndef IGB_LEGACY_TX + buf_ring_free(txr->br, M_DEVBUF); +#endif + free(adapter->tx_rings, M_DEVBUF); +tx_fail: + free(adapter->queues, M_DEVBUF); +fail: + return (error); +} + +/********************************************************************* + * + * Allocate memory for tx_buffer structures. The tx_buffer stores all + * the information needed to transmit a packet on the wire. This is + * called only once at attach, setup is done every reset. + * + **********************************************************************/ +static int +igb_allocate_transmit_buffers(struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; + device_t dev = adapter->dev; + struct igb_tx_buf *txbuf; + int error, i; + + /* + * Setup DMA descriptor areas. + */ + if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), + 1, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + IGB_TSO_SIZE, /* maxsize */ + IGB_MAX_SCATTER, /* nsegments */ + PAGE_SIZE, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockfuncarg */ + &txr->txtag))) { + device_printf(dev,"Unable to allocate TX DMA tag\n"); + goto fail; + } + + if (!(txr->tx_buffers = + (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) * + adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate tx_buffer memory\n"); + error = ENOMEM; + goto fail; + } + + /* Create the descriptor buffer dma maps */ + txbuf = txr->tx_buffers; + for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { + error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); + if (error != 0) { + device_printf(dev, "Unable to create TX DMA map\n"); + goto fail; + } + } + + return 0; +fail: + /* We free all, it handles case where we are in the middle */ + igb_free_transmit_structures(adapter); + return (error); +} + +/********************************************************************* + * + * Initialize a transmit ring. + * + **********************************************************************/ +static void +igb_setup_transmit_ring(struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; + struct igb_tx_buf *txbuf; + int i; +#ifdef DEV_NETMAP + struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_slot *slot; +#endif /* DEV_NETMAP */ + + /* Clear the old descriptor contents */ + IGB_TX_LOCK(txr); +#ifdef DEV_NETMAP + slot = netmap_reset(na, NR_TX, txr->me, 0); +#endif /* DEV_NETMAP */ + bzero((void *)txr->tx_base, + (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc); + /* Reset indices */ + txr->next_avail_desc = 0; + txr->next_to_clean = 0; + + /* Free any existing tx buffers. */ + txbuf = txr->tx_buffers; + for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { + if (txbuf->m_head != NULL) { + bus_dmamap_sync(txr->txtag, txbuf->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(txr->txtag, txbuf->map); + m_freem(txbuf->m_head); + txbuf->m_head = NULL; + } +#ifdef DEV_NETMAP + if (slot) { + int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); + /* no need to set the address */ + netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si)); + } +#endif /* DEV_NETMAP */ + /* clear the watch index */ + txbuf->eop = NULL; + } + + /* Set number of descriptors available */ + txr->tx_avail = adapter->num_tx_desc; + + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + IGB_TX_UNLOCK(txr); +} + +/********************************************************************* + * + * Initialize all transmit rings. + * + **********************************************************************/ +static void +igb_setup_transmit_structures(struct adapter *adapter) +{ + struct tx_ring *txr = adapter->tx_rings; + + for (int i = 0; i < adapter->num_queues; i++, txr++) + igb_setup_transmit_ring(txr); + + return; +} + +/********************************************************************* + * + * Enable transmit unit. + * + **********************************************************************/ +static void +igb_initialize_transmit_units(struct adapter *adapter) +{ + struct tx_ring *txr = adapter->tx_rings; + struct e1000_hw *hw = &adapter->hw; + u32 tctl, txdctl; + + INIT_DEBUGOUT("igb_initialize_transmit_units: begin"); + tctl = txdctl = 0; + + /* Setup the Tx Descriptor Rings */ + for (int i = 0; i < adapter->num_queues; i++, txr++) { + u64 bus_addr = txr->txdma.dma_paddr; + + E1000_WRITE_REG(hw, E1000_TDLEN(i), + adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); + E1000_WRITE_REG(hw, E1000_TDBAH(i), + (uint32_t)(bus_addr >> 32)); + E1000_WRITE_REG(hw, E1000_TDBAL(i), + (uint32_t)bus_addr); + + /* Setup the HW Tx Head and Tail descriptor pointers */ + E1000_WRITE_REG(hw, E1000_TDT(i), 0); + E1000_WRITE_REG(hw, E1000_TDH(i), 0); + + HW_DEBUGOUT2("Base = %x, Length = %x\n", + E1000_READ_REG(hw, E1000_TDBAL(i)), + E1000_READ_REG(hw, E1000_TDLEN(i))); + + txr->queue_status = IGB_QUEUE_IDLE; + + txdctl |= IGB_TX_PTHRESH; + txdctl |= IGB_TX_HTHRESH << 8; + txdctl |= IGB_TX_WTHRESH << 16; + txdctl |= E1000_TXDCTL_QUEUE_ENABLE; + E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); + } + + if (adapter->vf_ifp) + return; + + e1000_config_collision_dist(hw); + + /* Program the Transmit Control Register */ + tctl = E1000_READ_REG(hw, E1000_TCTL); + tctl &= ~E1000_TCTL_CT; + tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | + (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); + + /* This write will effectively turn on the transmit unit. */ + E1000_WRITE_REG(hw, E1000_TCTL, tctl); +} + +/********************************************************************* + * + * Free all transmit rings. + * + **********************************************************************/ +static void +igb_free_transmit_structures(struct adapter *adapter) +{ + struct tx_ring *txr = adapter->tx_rings; + + for (int i = 0; i < adapter->num_queues; i++, txr++) { + IGB_TX_LOCK(txr); + igb_free_transmit_buffers(txr); + igb_dma_free(adapter, &txr->txdma); + IGB_TX_UNLOCK(txr); + IGB_TX_LOCK_DESTROY(txr); + } + free(adapter->tx_rings, M_DEVBUF); +} + +/********************************************************************* + * + * Free transmit ring related data structures. + * + **********************************************************************/ +static void +igb_free_transmit_buffers(struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; + struct igb_tx_buf *tx_buffer; + int i; + + INIT_DEBUGOUT("free_transmit_ring: begin"); + + if (txr->tx_buffers == NULL) + return; + + tx_buffer = txr->tx_buffers; + for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { + if (tx_buffer->m_head != NULL) { + bus_dmamap_sync(txr->txtag, tx_buffer->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(txr->txtag, + tx_buffer->map); + m_freem(tx_buffer->m_head); + tx_buffer->m_head = NULL; + if (tx_buffer->map != NULL) { + bus_dmamap_destroy(txr->txtag, + tx_buffer->map); + tx_buffer->map = NULL; + } + } else if (tx_buffer->map != NULL) { + bus_dmamap_unload(txr->txtag, + tx_buffer->map); + bus_dmamap_destroy(txr->txtag, + tx_buffer->map); + tx_buffer->map = NULL; + } + } +#ifndef IGB_LEGACY_TX + if (txr->br != NULL) + buf_ring_free(txr->br, M_DEVBUF); +#endif + if (txr->tx_buffers != NULL) { + free(txr->tx_buffers, M_DEVBUF); + txr->tx_buffers = NULL; + } + if (txr->txtag != NULL) { + bus_dma_tag_destroy(txr->txtag); + txr->txtag = NULL; + } + return; +} + +/********************************************************************** + * + * Setup work for hardware segmentation offload (TSO) on + * adapters using advanced tx descriptors + * + **********************************************************************/ +static int +igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, + u32 *cmd_type_len, u32 *olinfo_status) +{ + struct adapter *adapter = txr->adapter; + struct e1000_adv_tx_context_desc *TXD; + u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; + u32 mss_l4len_idx = 0, paylen; + u16 vtag = 0, eh_type; + int ctxd, ehdrlen, ip_hlen, tcp_hlen; + struct ether_vlan_header *eh; +#ifdef INET6 + struct ip6_hdr *ip6; +#endif +#ifdef INET + struct ip *ip; +#endif + struct tcphdr *th; + + + /* + * Determine where frame payload starts. + * Jump over vlan headers if already present + */ + eh = mtod(mp, struct ether_vlan_header *); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + eh_type = eh->evl_proto; + } else { + ehdrlen = ETHER_HDR_LEN; + eh_type = eh->evl_encap_proto; + } + + switch (ntohs(eh_type)) { +#ifdef INET6 + case ETHERTYPE_IPV6: + ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); + /* XXX-BZ For now we do not pretend to support ext. hdrs. */ + if (ip6->ip6_nxt != IPPROTO_TCP) + return (ENXIO); + ip_hlen = sizeof(struct ip6_hdr); + ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); + th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); + th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; + break; +#endif +#ifdef INET + case ETHERTYPE_IP: + ip = (struct ip *)(mp->m_data + ehdrlen); + if (ip->ip_p != IPPROTO_TCP) + return (ENXIO); + ip->ip_sum = 0; + ip_hlen = ip->ip_hl << 2; + th = (struct tcphdr *)((caddr_t)ip + ip_hlen); + th->th_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htons(IPPROTO_TCP)); + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; + /* Tell transmit desc to also do IPv4 checksum. */ + *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; + break; +#endif + default: + panic("%s: CSUM_TSO but no supported IP version (0x%04x)", + __func__, ntohs(eh_type)); + break; + } + + ctxd = txr->next_avail_desc; + TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; + + tcp_hlen = th->th_off << 2; + + /* This is used in the transmit desc in encap */ + paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; + + /* VLAN MACLEN IPLEN */ + if (mp->m_flags & M_VLANTAG) { + vtag = htole16(mp->m_pkthdr.ether_vtag); + vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); + } + + vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= ip_hlen; + TXD->vlan_macip_lens = htole32(vlan_macip_lens); + + /* ADV DTYPE TUCMD */ + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; + TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); + + /* MSS L4LEN IDX */ + mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); + mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); + /* 82575 needs the queue index added */ + if (adapter->hw.mac.type == e1000_82575) + mss_l4len_idx |= txr->me << 4; + TXD->mss_l4len_idx = htole32(mss_l4len_idx); + + TXD->seqnum_seed = htole32(0); + + if (++ctxd == txr->num_desc) + ctxd = 0; + + txr->tx_avail--; + txr->next_avail_desc = ctxd; + *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; + ++txr->tso_tx; + return (0); +} + +/********************************************************************* + * + * Advanced Context Descriptor setup for VLAN, CSUM or TSO + * + **********************************************************************/ + +static int +igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, + u32 *cmd_type_len, u32 *olinfo_status) +{ + struct e1000_adv_tx_context_desc *TXD; + struct adapter *adapter = txr->adapter; + struct ether_vlan_header *eh; + struct ip *ip; + struct ip6_hdr *ip6; + u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; + int ehdrlen, ip_hlen = 0; + u16 etype; + u8 ipproto = 0; + int offload = TRUE; + int ctxd = txr->next_avail_desc; + u16 vtag = 0; + + /* First check if TSO is to be used */ + if (mp->m_pkthdr.csum_flags & CSUM_TSO) + return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status)); + + if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) + offload = FALSE; + + /* Indicate the whole packet as payload when not doing TSO */ + *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT; + + /* Now ready a context descriptor */ + TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; + + /* + ** In advanced descriptors the vlan tag must + ** be placed into the context descriptor. Hence + ** we need to make one even if not doing offloads. + */ + if (mp->m_flags & M_VLANTAG) { + vtag = htole16(mp->m_pkthdr.ether_vtag); + vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); + } else if (offload == FALSE) /* ... no offload to do */ + return (0); + + /* + * Determine where frame payload starts. + * Jump over vlan headers if already present, + * helpful for QinQ too. + */ + eh = mtod(mp, struct ether_vlan_header *); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + etype = ntohs(eh->evl_proto); + ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + } else { + etype = ntohs(eh->evl_encap_proto); + ehdrlen = ETHER_HDR_LEN; + } + + /* Set the ether header length */ + vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; + + switch (etype) { + case ETHERTYPE_IP: + ip = (struct ip *)(mp->m_data + ehdrlen); + ip_hlen = ip->ip_hl << 2; + ipproto = ip->ip_p; + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; + break; + case ETHERTYPE_IPV6: + ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); + ip_hlen = sizeof(struct ip6_hdr); + /* XXX-BZ this will go badly in case of ext hdrs. */ + ipproto = ip6->ip6_nxt; + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; + break; + default: + offload = FALSE; + break; + } + + vlan_macip_lens |= ip_hlen; + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + + switch (ipproto) { + case IPPROTO_TCP: +#if __FreeBSD_version >= 1000000 + if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) +#else + if (mp->m_pkthdr.csum_flags & CSUM_TCP) +#endif + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; + break; + case IPPROTO_UDP: +#if __FreeBSD_version >= 1000000 + if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) +#else + if (mp->m_pkthdr.csum_flags & CSUM_UDP) +#endif + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; + break; + +#if __FreeBSD_version >= 800000 + case IPPROTO_SCTP: +#if __FreeBSD_version >= 1000000 + if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) +#else + if (mp->m_pkthdr.csum_flags & CSUM_SCTP) +#endif + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; + break; +#endif + default: + offload = FALSE; + break; + } + + if (offload) /* For the TX descriptor setup */ + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + + /* 82575 needs the queue index added */ + if (adapter->hw.mac.type == e1000_82575) + mss_l4len_idx = txr->me << 4; + + /* Now copy bits into descriptor */ + TXD->vlan_macip_lens = htole32(vlan_macip_lens); + TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); + TXD->seqnum_seed = htole32(0); + TXD->mss_l4len_idx = htole32(mss_l4len_idx); + + /* We've consumed the first desc, adjust counters */ + if (++ctxd == txr->num_desc) + ctxd = 0; + txr->next_avail_desc = ctxd; + --txr->tx_avail; + + return (0); +} + +/********************************************************************** + * + * Examine each tx_buffer in the used queue. If the hardware is done + * processing the packet then free associated resources. The + * tx_buffer is put back on the free queue. + * + * TRUE return means there's work in the ring to clean, FALSE its empty. + **********************************************************************/ +static bool +igb_txeof(struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; +#ifdef DEV_NETMAP + struct ifnet *ifp = adapter->ifp; +#endif /* DEV_NETMAP */ + u32 work, processed = 0; + int limit = adapter->tx_process_limit; + struct igb_tx_buf *buf; + union e1000_adv_tx_desc *txd; + + mtx_assert(&txr->tx_mtx, MA_OWNED); + +#ifdef DEV_NETMAP + if (netmap_tx_irq(ifp, txr->me)) + return (FALSE); +#endif /* DEV_NETMAP */ + + if (txr->tx_avail == txr->num_desc) { + txr->queue_status = IGB_QUEUE_IDLE; + return FALSE; + } + + /* Get work starting point */ + work = txr->next_to_clean; + buf = &txr->tx_buffers[work]; + txd = &txr->tx_base[work]; + work -= txr->num_desc; /* The distance to ring end */ + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + do { + union e1000_adv_tx_desc *eop = buf->eop; + if (eop == NULL) /* No work */ + break; + + if ((eop->wb.status & E1000_TXD_STAT_DD) == 0) + break; /* I/O not complete */ + + if (buf->m_head) { + txr->bytes += + buf->m_head->m_pkthdr.len; + bus_dmamap_sync(txr->txtag, + buf->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(txr->txtag, + buf->map); + m_freem(buf->m_head); + buf->m_head = NULL; + } + buf->eop = NULL; + ++txr->tx_avail; + + /* We clean the range if multi segment */ + while (txd != eop) { + ++txd; + ++buf; + ++work; + /* wrap the ring? */ + if (__predict_false(!work)) { + work -= txr->num_desc; + buf = txr->tx_buffers; + txd = txr->tx_base; + } + if (buf->m_head) { + txr->bytes += + buf->m_head->m_pkthdr.len; + bus_dmamap_sync(txr->txtag, + buf->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(txr->txtag, + buf->map); + m_freem(buf->m_head); + buf->m_head = NULL; + } + ++txr->tx_avail; + buf->eop = NULL; + + } + ++txr->packets; + ++processed; + txr->watchdog_time = ticks; + + /* Try the next packet */ + ++txd; + ++buf; + ++work; + /* reset with a wrap */ + if (__predict_false(!work)) { + work -= txr->num_desc; + buf = txr->tx_buffers; + txd = txr->tx_base; + } + prefetch(txd); + } while (__predict_true(--limit)); + + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + work += txr->num_desc; + txr->next_to_clean = work; + + /* + ** Watchdog calculation, we know there's + ** work outstanding or the first return + ** would have been taken, so none processed + ** for too long indicates a hang. + */ + if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG)) + txr->queue_status |= IGB_QUEUE_HUNG; + + if (txr->tx_avail >= IGB_QUEUE_THRESHOLD) + txr->queue_status &= ~IGB_QUEUE_DEPLETED; + + if (txr->tx_avail == txr->num_desc) { + txr->queue_status = IGB_QUEUE_IDLE; + return (FALSE); + } + + return (TRUE); +} + +/********************************************************************* + * + * Refresh mbuf buffers for RX descriptor rings + * - now keeps its own state so discards due to resource + * exhaustion are unnecessary, if an mbuf cannot be obtained + * it just returns, keeping its placeholder, thus it can simply + * be recalled to try again. + * + **********************************************************************/ +static void +igb_refresh_mbufs(struct rx_ring *rxr, int limit) +{ + struct adapter *adapter = rxr->adapter; + bus_dma_segment_t hseg[1]; + bus_dma_segment_t pseg[1]; + struct igb_rx_buf *rxbuf; + struct mbuf *mh, *mp; + int i, j, nsegs, error; + bool refreshed = FALSE; + + i = j = rxr->next_to_refresh; + /* + ** Get one descriptor beyond + ** our work mark to control + ** the loop. + */ + if (++j == adapter->num_rx_desc) + j = 0; + + while (j != limit) { + rxbuf = &rxr->rx_buffers[i]; + /* No hdr mbuf used with header split off */ + if (rxr->hdr_split == FALSE) + goto no_split; + if (rxbuf->m_head == NULL) { + mh = m_gethdr(M_NOWAIT, MT_DATA); + if (mh == NULL) + goto update; + } else + mh = rxbuf->m_head; + + mh->m_pkthdr.len = mh->m_len = MHLEN; + mh->m_len = MHLEN; + mh->m_flags |= M_PKTHDR; + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->htag, + rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); + if (error != 0) { + printf("Refresh mbufs: hdr dmamap load" + " failure - %d\n", error); + m_free(mh); + rxbuf->m_head = NULL; + goto update; + } + rxbuf->m_head = mh; + bus_dmamap_sync(rxr->htag, rxbuf->hmap, + BUS_DMASYNC_PREREAD); + rxr->rx_base[i].read.hdr_addr = + htole64(hseg[0].ds_addr); +no_split: + if (rxbuf->m_pack == NULL) { + mp = m_getjcl(M_NOWAIT, MT_DATA, + M_PKTHDR, adapter->rx_mbuf_sz); + if (mp == NULL) + goto update; + } else + mp = rxbuf->m_pack; + + mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->ptag, + rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); + if (error != 0) { + printf("Refresh mbufs: payload dmamap load" + " failure - %d\n", error); + m_free(mp); + rxbuf->m_pack = NULL; + goto update; + } + rxbuf->m_pack = mp; + bus_dmamap_sync(rxr->ptag, rxbuf->pmap, + BUS_DMASYNC_PREREAD); + rxr->rx_base[i].read.pkt_addr = + htole64(pseg[0].ds_addr); + refreshed = TRUE; /* I feel wefreshed :) */ + + i = j; /* our next is precalculated */ + rxr->next_to_refresh = i; + if (++j == adapter->num_rx_desc) + j = 0; + } +update: + if (refreshed) /* update tail */ + E1000_WRITE_REG(&adapter->hw, + E1000_RDT(rxr->me), rxr->next_to_refresh); + return; +} + + +/********************************************************************* + * + * Allocate memory for rx_buffer structures. Since we use one + * rx_buffer per received packet, the maximum number of rx_buffer's + * that we'll need is equal to the number of receive descriptors + * that we've allocated. + * + **********************************************************************/ +static int +igb_allocate_receive_buffers(struct rx_ring *rxr) +{ + struct adapter *adapter = rxr->adapter; + device_t dev = adapter->dev; + struct igb_rx_buf *rxbuf; + int i, bsize, error; + + bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc; + if (!(rxr->rx_buffers = + (struct igb_rx_buf *) malloc(bsize, + M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate rx_buffer memory\n"); + error = ENOMEM; + goto fail; + } + + if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), + 1, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + MSIZE, /* maxsize */ + 1, /* nsegments */ + MSIZE, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockfuncarg */ + &rxr->htag))) { + device_printf(dev, "Unable to create RX DMA tag\n"); + goto fail; + } + + if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), + 1, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + MJUM9BYTES, /* maxsize */ + 1, /* nsegments */ + MJUM9BYTES, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockfuncarg */ + &rxr->ptag))) { + device_printf(dev, "Unable to create RX payload DMA tag\n"); + goto fail; + } + + for (i = 0; i < adapter->num_rx_desc; i++) { + rxbuf = &rxr->rx_buffers[i]; + error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap); + if (error) { + device_printf(dev, + "Unable to create RX head DMA maps\n"); + goto fail; + } + error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); + if (error) { + device_printf(dev, + "Unable to create RX packet DMA maps\n"); + goto fail; + } + } + + return (0); + +fail: + /* Frees all, but can handle partial completion */ + igb_free_receive_structures(adapter); + return (error); +} + + +static void +igb_free_receive_ring(struct rx_ring *rxr) +{ + struct adapter *adapter = rxr->adapter; + struct igb_rx_buf *rxbuf; + + + for (int i = 0; i < adapter->num_rx_desc; i++) { + rxbuf = &rxr->rx_buffers[i]; + if (rxbuf->m_head != NULL) { + bus_dmamap_sync(rxr->htag, rxbuf->hmap, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(rxr->htag, rxbuf->hmap); + rxbuf->m_head->m_flags |= M_PKTHDR; + m_freem(rxbuf->m_head); + } + if (rxbuf->m_pack != NULL) { + bus_dmamap_sync(rxr->ptag, rxbuf->pmap, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(rxr->ptag, rxbuf->pmap); + rxbuf->m_pack->m_flags |= M_PKTHDR; + m_freem(rxbuf->m_pack); + } + rxbuf->m_head = NULL; + rxbuf->m_pack = NULL; + } +} + + +/********************************************************************* + * + * Initialize a receive ring and its buffers. + * + **********************************************************************/ +static int +igb_setup_receive_ring(struct rx_ring *rxr) +{ + struct adapter *adapter; + struct ifnet *ifp; + device_t dev; + struct igb_rx_buf *rxbuf; + bus_dma_segment_t pseg[1], hseg[1]; + struct lro_ctrl *lro = &rxr->lro; + int rsize, nsegs, error = 0; +#ifdef DEV_NETMAP + struct netmap_adapter *na = NA(rxr->adapter->ifp); + struct netmap_slot *slot; +#endif /* DEV_NETMAP */ + + adapter = rxr->adapter; + dev = adapter->dev; + ifp = adapter->ifp; + + /* Clear the ring contents */ + IGB_RX_LOCK(rxr); +#ifdef DEV_NETMAP + slot = netmap_reset(na, NR_RX, rxr->me, 0); +#endif /* DEV_NETMAP */ + rsize = roundup2(adapter->num_rx_desc * + sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); + bzero((void *)rxr->rx_base, rsize); + + /* + ** Free current RX buffer structures and their mbufs + */ + igb_free_receive_ring(rxr); + + /* Configure for header split? */ + if (igb_header_split) + rxr->hdr_split = TRUE; + + /* Now replenish the ring mbufs */ + for (int j = 0; j < adapter->num_rx_desc; ++j) { + struct mbuf *mh, *mp; + + rxbuf = &rxr->rx_buffers[j]; +#ifdef DEV_NETMAP + if (slot) { + /* slot sj is mapped to the j-th NIC-ring entry */ + int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); + uint64_t paddr; + void *addr; + + addr = PNMB(na, slot + sj, &paddr); + netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); + /* Update descriptor */ + rxr->rx_base[j].read.pkt_addr = htole64(paddr); + continue; + } +#endif /* DEV_NETMAP */ + if (rxr->hdr_split == FALSE) + goto skip_head; + + /* First the header */ + rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA); + if (rxbuf->m_head == NULL) { + error = ENOBUFS; + goto fail; + } + m_adj(rxbuf->m_head, ETHER_ALIGN); + mh = rxbuf->m_head; + mh->m_len = mh->m_pkthdr.len = MHLEN; + mh->m_flags |= M_PKTHDR; + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->htag, + rxbuf->hmap, rxbuf->m_head, hseg, + &nsegs, BUS_DMA_NOWAIT); + if (error != 0) /* Nothing elegant to do here */ + goto fail; + bus_dmamap_sync(rxr->htag, + rxbuf->hmap, BUS_DMASYNC_PREREAD); + /* Update descriptor */ + rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr); + +skip_head: + /* Now the payload cluster */ + rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, + M_PKTHDR, adapter->rx_mbuf_sz); + if (rxbuf->m_pack == NULL) { + error = ENOBUFS; + goto fail; + } + mp = rxbuf->m_pack; + mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->ptag, + rxbuf->pmap, mp, pseg, + &nsegs, BUS_DMA_NOWAIT); + if (error != 0) + goto fail; + bus_dmamap_sync(rxr->ptag, + rxbuf->pmap, BUS_DMASYNC_PREREAD); + /* Update descriptor */ + rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr); + } + + /* Setup our descriptor indices */ + rxr->next_to_check = 0; + rxr->next_to_refresh = adapter->num_rx_desc - 1; + rxr->lro_enabled = FALSE; + rxr->rx_split_packets = 0; + rxr->rx_bytes = 0; + + rxr->fmp = NULL; + rxr->lmp = NULL; + + bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + /* + ** Now set up the LRO interface, we + ** also only do head split when LRO + ** is enabled, since so often they + ** are undesirable in similar setups. + */ + if (ifp->if_capenable & IFCAP_LRO) { + error = tcp_lro_init(lro); + if (error) { + device_printf(dev, "LRO Initialization failed!\n"); + goto fail; + } + INIT_DEBUGOUT("RX LRO Initialized\n"); + rxr->lro_enabled = TRUE; + lro->ifp = adapter->ifp; + } + + IGB_RX_UNLOCK(rxr); + return (0); + +fail: + igb_free_receive_ring(rxr); + IGB_RX_UNLOCK(rxr); + return (error); +} + + +/********************************************************************* + * + * Initialize all receive rings. + * + **********************************************************************/ +static int +igb_setup_receive_structures(struct adapter *adapter) +{ + struct rx_ring *rxr = adapter->rx_rings; + int i; + + for (i = 0; i < adapter->num_queues; i++, rxr++) + if (igb_setup_receive_ring(rxr)) + goto fail; + + return (0); +fail: + /* + * Free RX buffers allocated so far, we will only handle + * the rings that completed, the failing case will have + * cleaned up for itself. 'i' is the endpoint. + */ + for (int j = 0; j < i; ++j) { + rxr = &adapter->rx_rings[j]; + IGB_RX_LOCK(rxr); + igb_free_receive_ring(rxr); + IGB_RX_UNLOCK(rxr); + } + + return (ENOBUFS); +} + +/* + * Initialise the RSS mapping for NICs that support multiple transmit/ + * receive rings. + */ +static void +igb_initialise_rss_mapping(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + int queue_id; + u32 reta; + u32 rss_key[10], mrqc, shift = 0; + + /* XXX? */ + if (adapter->hw.mac.type == e1000_82575) + shift = 6; + + /* + * The redirection table controls which destination + * queue each bucket redirects traffic to. + * Each DWORD represents four queues, with the LSB + * being the first queue in the DWORD. + * + * This just allocates buckets to queues using round-robin + * allocation. + * + * NOTE: It Just Happens to line up with the default + * RSS allocation method. + */ + + /* Warning FM follows */ + reta = 0; + for (i = 0; i < 128; i++) { +#ifdef RSS + queue_id = rss_get_indirection_to_bucket(i); + /* + * If we have more queues than buckets, we'll + * end up mapping buckets to a subset of the + * queues. + * + * If we have more buckets than queues, we'll + * end up instead assigning multiple buckets + * to queues. + * + * Both are suboptimal, but we need to handle + * the case so we don't go out of bounds + * indexing arrays and such. + */ + queue_id = queue_id % adapter->num_queues; +#else + queue_id = (i % adapter->num_queues); +#endif + /* Adjust if required */ + queue_id = queue_id << shift; + + /* + * The low 8 bits are for hash value (n+0); + * The next 8 bits are for hash value (n+1), etc. + */ + reta = reta >> 8; + reta = reta | ( ((uint32_t) queue_id) << 24); + if ((i & 3) == 3) { + E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); + reta = 0; + } + } + + /* Now fill in hash table */ + + /* + * MRQC: Multiple Receive Queues Command + * Set queuing to RSS control, number depends on the device. + */ + mrqc = E1000_MRQC_ENABLE_RSS_8Q; + +#ifdef RSS + /* XXX ew typecasting */ + rss_getkey((uint8_t *) &rss_key); +#else + arc4rand(&rss_key, sizeof(rss_key), 0); +#endif + for (i = 0; i < 10; i++) + E1000_WRITE_REG_ARRAY(hw, + E1000_RSSRK(0), i, rss_key[i]); + + /* + * Configure the RSS fields to hash upon. + */ + mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | + E1000_MRQC_RSS_FIELD_IPV4_TCP); + mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | + E1000_MRQC_RSS_FIELD_IPV6_TCP); + mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | + E1000_MRQC_RSS_FIELD_IPV6_UDP); + mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | + E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); + + E1000_WRITE_REG(hw, E1000_MRQC, mrqc); +} + +/********************************************************************* + * + * Enable receive unit. + * + **********************************************************************/ +static void +igb_initialize_receive_units(struct adapter *adapter) +{ + struct rx_ring *rxr = adapter->rx_rings; + struct ifnet *ifp = adapter->ifp; + struct e1000_hw *hw = &adapter->hw; + u32 rctl, rxcsum, psize, srrctl = 0; + + INIT_DEBUGOUT("igb_initialize_receive_unit: begin"); + + /* + * Make sure receives are disabled while setting + * up the descriptor ring + */ + rctl = E1000_READ_REG(hw, E1000_RCTL); + E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); + + /* + ** Set up for header split + */ + if (igb_header_split) { + /* Use a standard mbuf for the header */ + srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; + srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; + } else + srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; + + /* + ** Set up for jumbo frames + */ + if (ifp->if_mtu > ETHERMTU) { + rctl |= E1000_RCTL_LPE; + if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { + srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; + } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { + srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; + } + /* Set maximum packet len */ + psize = adapter->max_frame_size; + /* are we on a vlan? */ + if (adapter->ifp->if_vlantrunk != NULL) + psize += VLAN_TAG_SIZE; + E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); + } else { + rctl &= ~E1000_RCTL_LPE; + srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + rctl |= E1000_RCTL_SZ_2048; + } + + /* + * If TX flow control is disabled and there's >1 queue defined, + * enable DROP. + * + * This drops frames rather than hanging the RX MAC for all queues. + */ + if ((adapter->num_queues > 1) && + (adapter->fc == e1000_fc_none || + adapter->fc == e1000_fc_rx_pause)) { + srrctl |= E1000_SRRCTL_DROP_EN; + } + + /* Setup the Base and Length of the Rx Descriptor Rings */ + for (int i = 0; i < adapter->num_queues; i++, rxr++) { + u64 bus_addr = rxr->rxdma.dma_paddr; + u32 rxdctl; + + E1000_WRITE_REG(hw, E1000_RDLEN(i), + adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); + E1000_WRITE_REG(hw, E1000_RDBAH(i), + (uint32_t)(bus_addr >> 32)); + E1000_WRITE_REG(hw, E1000_RDBAL(i), + (uint32_t)bus_addr); + E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); + /* Enable this Queue */ + rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); + rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; + rxdctl &= 0xFFF00000; + rxdctl |= IGB_RX_PTHRESH; + rxdctl |= IGB_RX_HTHRESH << 8; + rxdctl |= IGB_RX_WTHRESH << 16; + E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); + } + + /* + ** Setup for RX MultiQueue + */ + rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); + if (adapter->num_queues >1) { + + /* rss setup */ + igb_initialise_rss_mapping(adapter); + + /* + ** NOTE: Receive Full-Packet Checksum Offload + ** is mutually exclusive with Multiqueue. However + ** this is not the same as TCP/IP checksums which + ** still work. + */ + rxcsum |= E1000_RXCSUM_PCSD; +#if __FreeBSD_version >= 800000 + /* For SCTP Offload */ + if ((hw->mac.type != e1000_82575) && + (ifp->if_capenable & IFCAP_RXCSUM)) + rxcsum |= E1000_RXCSUM_CRCOFL; +#endif + } else { + /* Non RSS setup */ + if (ifp->if_capenable & IFCAP_RXCSUM) { + rxcsum |= E1000_RXCSUM_IPPCSE; +#if __FreeBSD_version >= 800000 + if (adapter->hw.mac.type != e1000_82575) + rxcsum |= E1000_RXCSUM_CRCOFL; +#endif + } else + rxcsum &= ~E1000_RXCSUM_TUOFL; + } + E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); + + /* Setup the Receive Control Register */ + rctl &= ~(3 << E1000_RCTL_MO_SHIFT); + rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | + E1000_RCTL_RDMTS_HALF | + (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); + /* Strip CRC bytes. */ + rctl |= E1000_RCTL_SECRC; + /* Make sure VLAN Filters are off */ + rctl &= ~E1000_RCTL_VFE; + /* Don't store bad packets */ + rctl &= ~E1000_RCTL_SBP; + + /* Enable Receives */ + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + + /* + * Setup the HW Rx Head and Tail Descriptor Pointers + * - needs to be after enable + */ + for (int i = 0; i < adapter->num_queues; i++) { + rxr = &adapter->rx_rings[i]; + E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); +#ifdef DEV_NETMAP + /* + * an init() while a netmap client is active must + * preserve the rx buffers passed to userspace. + * In this driver it means we adjust RDT to + * something different from next_to_refresh + * (which is not used in netmap mode). + */ + if (ifp->if_capenable & IFCAP_NETMAP) { + struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_kring *kring = &na->rx_rings[i]; + int t = rxr->next_to_refresh - nm_kr_rxspace(kring); + + if (t >= adapter->num_rx_desc) + t -= adapter->num_rx_desc; + else if (t < 0) + t += adapter->num_rx_desc; + E1000_WRITE_REG(hw, E1000_RDT(i), t); + } else +#endif /* DEV_NETMAP */ + E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh); + } + return; +} + +/********************************************************************* + * + * Free receive rings. + * + **********************************************************************/ +static void +igb_free_receive_structures(struct adapter *adapter) +{ + struct rx_ring *rxr = adapter->rx_rings; + + for (int i = 0; i < adapter->num_queues; i++, rxr++) { + struct lro_ctrl *lro = &rxr->lro; + igb_free_receive_buffers(rxr); + tcp_lro_free(lro); + igb_dma_free(adapter, &rxr->rxdma); + } + + free(adapter->rx_rings, M_DEVBUF); +} + +/********************************************************************* + * + * Free receive ring data structures. + * + **********************************************************************/ +static void +igb_free_receive_buffers(struct rx_ring *rxr) +{ + struct adapter *adapter = rxr->adapter; + struct igb_rx_buf *rxbuf; + int i; + + INIT_DEBUGOUT("free_receive_structures: begin"); + + /* Cleanup any existing buffers */ + if (rxr->rx_buffers != NULL) { + for (i = 0; i < adapter->num_rx_desc; i++) { + rxbuf = &rxr->rx_buffers[i]; + if (rxbuf->m_head != NULL) { + bus_dmamap_sync(rxr->htag, rxbuf->hmap, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(rxr->htag, rxbuf->hmap); + rxbuf->m_head->m_flags |= M_PKTHDR; + m_freem(rxbuf->m_head); + } + if (rxbuf->m_pack != NULL) { + bus_dmamap_sync(rxr->ptag, rxbuf->pmap, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(rxr->ptag, rxbuf->pmap); + rxbuf->m_pack->m_flags |= M_PKTHDR; + m_freem(rxbuf->m_pack); + } + rxbuf->m_head = NULL; + rxbuf->m_pack = NULL; + if (rxbuf->hmap != NULL) { + bus_dmamap_destroy(rxr->htag, rxbuf->hmap); + rxbuf->hmap = NULL; + } + if (rxbuf->pmap != NULL) { + bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); + rxbuf->pmap = NULL; + } + } + if (rxr->rx_buffers != NULL) { + free(rxr->rx_buffers, M_DEVBUF); + rxr->rx_buffers = NULL; + } + } + + if (rxr->htag != NULL) { + bus_dma_tag_destroy(rxr->htag); + rxr->htag = NULL; + } + if (rxr->ptag != NULL) { + bus_dma_tag_destroy(rxr->ptag); + rxr->ptag = NULL; + } +} + +static __inline void +igb_rx_discard(struct rx_ring *rxr, int i) +{ + struct igb_rx_buf *rbuf; + + rbuf = &rxr->rx_buffers[i]; + + /* Partially received? Free the chain */ + if (rxr->fmp != NULL) { + rxr->fmp->m_flags |= M_PKTHDR; + m_freem(rxr->fmp); + rxr->fmp = NULL; + rxr->lmp = NULL; + } + + /* + ** With advanced descriptors the writeback + ** clobbers the buffer addrs, so its easier + ** to just free the existing mbufs and take + ** the normal refresh path to get new buffers + ** and mapping. + */ + if (rbuf->m_head) { + m_free(rbuf->m_head); + rbuf->m_head = NULL; + bus_dmamap_unload(rxr->htag, rbuf->hmap); + } + + if (rbuf->m_pack) { + m_free(rbuf->m_pack); + rbuf->m_pack = NULL; + bus_dmamap_unload(rxr->ptag, rbuf->pmap); + } + + return; +} + +static __inline void +igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) +{ + + /* + * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet + * should be computed by hardware. Also it should not have VLAN tag in + * ethernet header. + */ + if (rxr->lro_enabled && + (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && + (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && + (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) == + (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) && + (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == + (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { + /* + * Send to the stack if: + ** - LRO not enabled, or + ** - no LRO resources, or + ** - lro enqueue fails + */ + if (rxr->lro.lro_cnt != 0) + if (tcp_lro_rx(&rxr->lro, m, 0) == 0) + return; + } + IGB_RX_UNLOCK(rxr); + (*ifp->if_input)(ifp, m); + IGB_RX_LOCK(rxr); +} + +/********************************************************************* + * + * This routine executes in interrupt context. It replenishes + * the mbufs in the descriptor and sends data which has been + * dma'ed into host memory to upper layer. + * + * We loop at most count times if count is > 0, or until done if + * count < 0. + * + * Return TRUE if more to clean, FALSE otherwise + *********************************************************************/ +static bool +igb_rxeof(struct igb_queue *que, int count, int *done) +{ + struct adapter *adapter = que->adapter; + struct rx_ring *rxr = que->rxr; + struct ifnet *ifp = adapter->ifp; + struct lro_ctrl *lro = &rxr->lro; + int i, processed = 0, rxdone = 0; + u32 ptype, staterr = 0; + union e1000_adv_rx_desc *cur; + + IGB_RX_LOCK(rxr); + /* Sync the ring. */ + bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + +#ifdef DEV_NETMAP + if (netmap_rx_irq(ifp, rxr->me, &processed)) { + IGB_RX_UNLOCK(rxr); + return (FALSE); + } +#endif /* DEV_NETMAP */ + + /* Main clean loop */ + for (i = rxr->next_to_check; count != 0;) { + struct mbuf *sendmp, *mh, *mp; + struct igb_rx_buf *rxbuf; + u16 hlen, plen, hdr, vtag, pkt_info; + bool eop = FALSE; + + cur = &rxr->rx_base[i]; + staterr = le32toh(cur->wb.upper.status_error); + if ((staterr & E1000_RXD_STAT_DD) == 0) + break; + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + break; + count--; + sendmp = mh = mp = NULL; + cur->wb.upper.status_error = 0; + rxbuf = &rxr->rx_buffers[i]; + plen = le16toh(cur->wb.upper.length); + ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; + if (((adapter->hw.mac.type == e1000_i350) || + (adapter->hw.mac.type == e1000_i354)) && + (staterr & E1000_RXDEXT_STATERR_LB)) + vtag = be16toh(cur->wb.upper.vlan); + else + vtag = le16toh(cur->wb.upper.vlan); + hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info); + pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); + eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); + + /* + * Free the frame (all segments) if we're at EOP and + * it's an error. + * + * The datasheet states that EOP + status is only valid for + * the final segment in a multi-segment frame. + */ + if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) { + adapter->dropped_pkts++; + ++rxr->rx_discarded; + igb_rx_discard(rxr, i); + goto next_desc; + } + + /* + ** The way the hardware is configured to + ** split, it will ONLY use the header buffer + ** when header split is enabled, otherwise we + ** get normal behavior, ie, both header and + ** payload are DMA'd into the payload buffer. + ** + ** The fmp test is to catch the case where a + ** packet spans multiple descriptors, in that + ** case only the first header is valid. + */ + if (rxr->hdr_split && rxr->fmp == NULL) { + bus_dmamap_unload(rxr->htag, rxbuf->hmap); + hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >> + E1000_RXDADV_HDRBUFLEN_SHIFT; + if (hlen > IGB_HDR_BUF) + hlen = IGB_HDR_BUF; + mh = rxr->rx_buffers[i].m_head; + mh->m_len = hlen; + /* clear buf pointer for refresh */ + rxbuf->m_head = NULL; + /* + ** Get the payload length, this + ** could be zero if its a small + ** packet. + */ + if (plen > 0) { + mp = rxr->rx_buffers[i].m_pack; + mp->m_len = plen; + mh->m_next = mp; + /* clear buf pointer */ + rxbuf->m_pack = NULL; + rxr->rx_split_packets++; + } + } else { + /* + ** Either no header split, or a + ** secondary piece of a fragmented + ** split packet. + */ + mh = rxr->rx_buffers[i].m_pack; + mh->m_len = plen; + /* clear buf info for refresh */ + rxbuf->m_pack = NULL; + } + bus_dmamap_unload(rxr->ptag, rxbuf->pmap); + + ++processed; /* So we know when to refresh */ + + /* Initial frame - setup */ + if (rxr->fmp == NULL) { + mh->m_pkthdr.len = mh->m_len; + /* Save the head of the chain */ + rxr->fmp = mh; + rxr->lmp = mh; + if (mp != NULL) { + /* Add payload if split */ + mh->m_pkthdr.len += mp->m_len; + rxr->lmp = mh->m_next; + } + } else { + /* Chain mbuf's together */ + rxr->lmp->m_next = mh; + rxr->lmp = rxr->lmp->m_next; + rxr->fmp->m_pkthdr.len += mh->m_len; + } + + if (eop) { + rxr->fmp->m_pkthdr.rcvif = ifp; + rxr->rx_packets++; + /* capture data for AIM */ + rxr->packets++; + rxr->bytes += rxr->fmp->m_pkthdr.len; + rxr->rx_bytes += rxr->fmp->m_pkthdr.len; + + if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) + igb_rx_checksum(staterr, rxr->fmp, ptype); + + if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && + (staterr & E1000_RXD_STAT_VP) != 0) { + rxr->fmp->m_pkthdr.ether_vtag = vtag; + rxr->fmp->m_flags |= M_VLANTAG; + } + + /* + * In case of multiqueue, we have RXCSUM.PCSD bit set + * and never cleared. This means we have RSS hash + * available to be used. + */ + if (adapter->num_queues > 1) { + rxr->fmp->m_pkthdr.flowid = + le32toh(cur->wb.lower.hi_dword.rss); + switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { + case E1000_RXDADV_RSSTYPE_IPV4_TCP: + M_HASHTYPE_SET(rxr->fmp, + M_HASHTYPE_RSS_TCP_IPV4); + break; + case E1000_RXDADV_RSSTYPE_IPV4: + M_HASHTYPE_SET(rxr->fmp, + M_HASHTYPE_RSS_IPV4); + break; + case E1000_RXDADV_RSSTYPE_IPV6_TCP: + M_HASHTYPE_SET(rxr->fmp, + M_HASHTYPE_RSS_TCP_IPV6); + break; + case E1000_RXDADV_RSSTYPE_IPV6_EX: + M_HASHTYPE_SET(rxr->fmp, + M_HASHTYPE_RSS_IPV6_EX); + break; + case E1000_RXDADV_RSSTYPE_IPV6: + M_HASHTYPE_SET(rxr->fmp, + M_HASHTYPE_RSS_IPV6); + break; + case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: + M_HASHTYPE_SET(rxr->fmp, + M_HASHTYPE_RSS_TCP_IPV6_EX); + break; + default: + /* XXX fallthrough */ + M_HASHTYPE_SET(rxr->fmp, + M_HASHTYPE_OPAQUE_HASH); + } + } else { +#ifndef IGB_LEGACY_TX + rxr->fmp->m_pkthdr.flowid = que->msix; + M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE); +#endif + } + sendmp = rxr->fmp; + /* Make sure to set M_PKTHDR. */ + sendmp->m_flags |= M_PKTHDR; + rxr->fmp = NULL; + rxr->lmp = NULL; + } + +next_desc: + bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + /* Advance our pointers to the next descriptor. */ + if (++i == adapter->num_rx_desc) + i = 0; + /* + ** Send to the stack or LRO + */ + if (sendmp != NULL) { + rxr->next_to_check = i; + igb_rx_input(rxr, ifp, sendmp, ptype); + i = rxr->next_to_check; + rxdone++; + } + + /* Every 8 descriptors we go to refresh mbufs */ + if (processed == 8) { + igb_refresh_mbufs(rxr, i); + processed = 0; + } + } + + /* Catch any remainders */ + if (igb_rx_unrefreshed(rxr)) + igb_refresh_mbufs(rxr, i); + + rxr->next_to_check = i; + + /* + * Flush any outstanding LRO work + */ + tcp_lro_flush_all(lro); + + if (done != NULL) + *done += rxdone; + + IGB_RX_UNLOCK(rxr); + return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE); +} + +/********************************************************************* + * + * Verify that the hardware indicated that the checksum is valid. + * Inform the stack about the status of checksum so that stack + * doesn't spend time verifying the checksum. + * + *********************************************************************/ +static void +igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype) +{ + u16 status = (u16)staterr; + u8 errors = (u8) (staterr >> 24); + int sctp; + + /* Ignore Checksum bit is set */ + if (status & E1000_RXD_STAT_IXSM) { + mp->m_pkthdr.csum_flags = 0; + return; + } + + if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && + (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) + sctp = 1; + else + sctp = 0; + if (status & E1000_RXD_STAT_IPCS) { + /* Did it pass? */ + if (!(errors & E1000_RXD_ERR_IPE)) { + /* IP Checksum Good */ + mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; + mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; + } else + mp->m_pkthdr.csum_flags = 0; + } + + if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { + u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); +#if __FreeBSD_version >= 800000 + if (sctp) /* reassign */ + type = CSUM_SCTP_VALID; +#endif + /* Did it pass? */ + if (!(errors & E1000_RXD_ERR_TCPE)) { + mp->m_pkthdr.csum_flags |= type; + if (sctp == 0) + mp->m_pkthdr.csum_data = htons(0xffff); + } + } + return; +} + +/* + * This routine is run via an vlan + * config EVENT + */ +static void +igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) +{ + struct adapter *adapter = ifp->if_softc; + u32 index, bit; + + if (ifp->if_softc != arg) /* Not our event */ + return; + + if ((vtag == 0) || (vtag > 4095)) /* Invalid */ + return; + + IGB_CORE_LOCK(adapter); + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + adapter->shadow_vfta[index] |= (1 << bit); + ++adapter->num_vlans; + /* Change hw filter setting */ + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) + igb_setup_vlan_hw_support(adapter); + IGB_CORE_UNLOCK(adapter); +} + +/* + * This routine is run via an vlan + * unconfig EVENT + */ +static void +igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) +{ + struct adapter *adapter = ifp->if_softc; + u32 index, bit; + + if (ifp->if_softc != arg) + return; + + if ((vtag == 0) || (vtag > 4095)) /* Invalid */ + return; + + IGB_CORE_LOCK(adapter); + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + adapter->shadow_vfta[index] &= ~(1 << bit); + --adapter->num_vlans; + /* Change hw filter setting */ + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) + igb_setup_vlan_hw_support(adapter); + IGB_CORE_UNLOCK(adapter); +} + +static void +igb_setup_vlan_hw_support(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct ifnet *ifp = adapter->ifp; + u32 reg; + + if (adapter->vf_ifp) { + e1000_rlpml_set_vf(hw, + adapter->max_frame_size + VLAN_TAG_SIZE); + return; + } + + reg = E1000_READ_REG(hw, E1000_CTRL); + reg |= E1000_CTRL_VME; + E1000_WRITE_REG(hw, E1000_CTRL, reg); + + /* Enable the Filter Table */ + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { + reg = E1000_READ_REG(hw, E1000_RCTL); + reg &= ~E1000_RCTL_CFIEN; + reg |= E1000_RCTL_VFE; + E1000_WRITE_REG(hw, E1000_RCTL, reg); + } + + /* Update the frame size */ + E1000_WRITE_REG(&adapter->hw, E1000_RLPML, + adapter->max_frame_size + VLAN_TAG_SIZE); + + /* Don't bother with table if no vlans */ + if ((adapter->num_vlans == 0) || + ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) + return; + /* + ** A soft reset zero's out the VFTA, so + ** we need to repopulate it now. + */ + for (int i = 0; i < IGB_VFTA_SIZE; i++) + if (adapter->shadow_vfta[i] != 0) { + if (adapter->vf_ifp) + e1000_vfta_set_vf(hw, + adapter->shadow_vfta[i], TRUE); + else + e1000_write_vfta(hw, + i, adapter->shadow_vfta[i]); + } +} + +static void +igb_enable_intr(struct adapter *adapter) +{ + /* With RSS set up what to auto clear */ + if (adapter->msix_mem) { + u32 mask = (adapter->que_mask | adapter->link_mask); + E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); + E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); + E1000_WRITE_REG(&adapter->hw, E1000_IMS, + E1000_IMS_LSC); + } else { + E1000_WRITE_REG(&adapter->hw, E1000_IMS, + IMS_ENABLE_MASK); + } + E1000_WRITE_FLUSH(&adapter->hw); + + return; +} + +static void +igb_disable_intr(struct adapter *adapter) +{ + if (adapter->msix_mem) { + E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); + E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); + } + E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); + E1000_WRITE_FLUSH(&adapter->hw); + return; +} + +/* + * Bit of a misnomer, what this really means is + * to enable OS management of the system... aka + * to disable special hardware management features + */ +static void +igb_init_manageability(struct adapter *adapter) +{ + if (adapter->has_manage) { + int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); + int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); + + /* disable hardware interception of ARP */ + manc &= ~(E1000_MANC_ARP_EN); + + /* enable receiving management packets to the host */ + manc |= E1000_MANC_EN_MNG2HOST; + manc2h |= 1 << 5; /* Mng Port 623 */ + manc2h |= 1 << 6; /* Mng Port 664 */ + E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); + E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); + } +} + +/* + * Give control back to hardware management + * controller if there is one. + */ +static void +igb_release_manageability(struct adapter *adapter) +{ + if (adapter->has_manage) { + int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); + + /* re-enable hardware interception of ARP */ + manc |= E1000_MANC_ARP_EN; + manc &= ~E1000_MANC_EN_MNG2HOST; + + E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); + } +} + +/* + * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that + * the driver is loaded. + * + */ +static void +igb_get_hw_control(struct adapter *adapter) +{ + u32 ctrl_ext; + + if (adapter->vf_ifp) + return; + + /* Let firmware know the driver has taken over */ + ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); + E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, + ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); +} + +/* + * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that the + * driver is no longer loaded. + * + */ +static void +igb_release_hw_control(struct adapter *adapter) +{ + u32 ctrl_ext; + + if (adapter->vf_ifp) + return; + + /* Let firmware taken over control of h/w */ + ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); + E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, + ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); +} + +static int +igb_is_valid_ether_addr(uint8_t *addr) +{ + char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; + + if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { + return (FALSE); + } + + return (TRUE); +} + + +/* + * Enable PCI Wake On Lan capability + */ +static void +igb_enable_wakeup(device_t dev) +{ + u16 cap, status; + u8 id; + + /* First find the capabilities pointer*/ + cap = pci_read_config(dev, PCIR_CAP_PTR, 2); + /* Read the PM Capabilities */ + id = pci_read_config(dev, cap, 1); + if (id != PCIY_PMG) /* Something wrong */ + return; + /* OK, we have the power capabilities, so + now get the status register */ + cap += PCIR_POWER_STATUS; + status = pci_read_config(dev, cap, 2); + status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; + pci_write_config(dev, cap, status, 2); + return; +} + +static void +igb_led_func(void *arg, int onoff) +{ + struct adapter *adapter = arg; + + IGB_CORE_LOCK(adapter); + if (onoff) { + e1000_setup_led(&adapter->hw); + e1000_led_on(&adapter->hw); + } else { + e1000_led_off(&adapter->hw); + e1000_cleanup_led(&adapter->hw); + } + IGB_CORE_UNLOCK(adapter); +} + +static uint64_t +igb_get_vf_counter(if_t ifp, ift_counter cnt) +{ + struct adapter *adapter; + struct e1000_vf_stats *stats; +#ifndef IGB_LEGACY_TX + struct tx_ring *txr; + uint64_t rv; +#endif + + adapter = if_getsoftc(ifp); + stats = (struct e1000_vf_stats *)adapter->stats; + + switch (cnt) { + case IFCOUNTER_IPACKETS: + return (stats->gprc); + case IFCOUNTER_OPACKETS: + return (stats->gptc); + case IFCOUNTER_IBYTES: + return (stats->gorc); + case IFCOUNTER_OBYTES: + return (stats->gotc); + case IFCOUNTER_IMCASTS: + return (stats->mprc); + case IFCOUNTER_IERRORS: + return (adapter->dropped_pkts); + case IFCOUNTER_OERRORS: + return (adapter->watchdog_events); +#ifndef IGB_LEGACY_TX + case IFCOUNTER_OQDROPS: + rv = 0; + txr = adapter->tx_rings; + for (int i = 0; i < adapter->num_queues; i++, txr++) + rv += txr->br->br_drops; + return (rv); +#endif + default: + return (if_get_counter_default(ifp, cnt)); + } +} + +static uint64_t +igb_get_counter(if_t ifp, ift_counter cnt) +{ + struct adapter *adapter; + struct e1000_hw_stats *stats; +#ifndef IGB_LEGACY_TX + struct tx_ring *txr; + uint64_t rv; +#endif + + adapter = if_getsoftc(ifp); + if (adapter->vf_ifp) + return (igb_get_vf_counter(ifp, cnt)); + + stats = (struct e1000_hw_stats *)adapter->stats; + + switch (cnt) { + case IFCOUNTER_IPACKETS: + return (stats->gprc); + case IFCOUNTER_OPACKETS: + return (stats->gptc); + case IFCOUNTER_IBYTES: + return (stats->gorc); + case IFCOUNTER_OBYTES: + return (stats->gotc); + case IFCOUNTER_IMCASTS: + return (stats->mprc); + case IFCOUNTER_OMCASTS: + return (stats->mptc); + case IFCOUNTER_IERRORS: + return (adapter->dropped_pkts + stats->rxerrc + + stats->crcerrs + stats->algnerrc + + stats->ruc + stats->roc + stats->cexterr); + case IFCOUNTER_OERRORS: + return (stats->ecol + stats->latecol + + adapter->watchdog_events); + case IFCOUNTER_COLLISIONS: + return (stats->colc); + case IFCOUNTER_IQDROPS: + return (stats->mpc); +#ifndef IGB_LEGACY_TX + case IFCOUNTER_OQDROPS: + rv = 0; + txr = adapter->tx_rings; + for (int i = 0; i < adapter->num_queues; i++, txr++) + rv += txr->br->br_drops; + return (rv); +#endif + default: + return (if_get_counter_default(ifp, cnt)); + } +} + +/********************************************************************** + * + * Update the board statistics counters. + * + **********************************************************************/ +static void +igb_update_stats_counters(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct e1000_hw_stats *stats; + + /* + ** The virtual function adapter has only a + ** small controlled set of stats, do only + ** those and return. + */ + if (adapter->vf_ifp) { + igb_update_vf_stats_counters(adapter); + return; + } + + stats = (struct e1000_hw_stats *)adapter->stats; + + if (adapter->hw.phy.media_type == e1000_media_type_copper || + (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { + stats->symerrs += + E1000_READ_REG(hw,E1000_SYMERRS); + stats->sec += E1000_READ_REG(hw, E1000_SEC); + } + + stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); + stats->mpc += E1000_READ_REG(hw, E1000_MPC); + stats->scc += E1000_READ_REG(hw, E1000_SCC); + stats->ecol += E1000_READ_REG(hw, E1000_ECOL); + + stats->mcc += E1000_READ_REG(hw, E1000_MCC); + stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); + stats->colc += E1000_READ_REG(hw, E1000_COLC); + stats->dc += E1000_READ_REG(hw, E1000_DC); + stats->rlec += E1000_READ_REG(hw, E1000_RLEC); + stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); + stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); + /* + ** For watchdog management we need to know if we have been + ** paused during the last interval, so capture that here. + */ + adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); + stats->xoffrxc += adapter->pause_frames; + stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); + stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); + stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); + stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); + stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); + stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); + stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); + stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); + stats->gprc += E1000_READ_REG(hw, E1000_GPRC); + stats->bprc += E1000_READ_REG(hw, E1000_BPRC); + stats->mprc += E1000_READ_REG(hw, E1000_MPRC); + stats->gptc += E1000_READ_REG(hw, E1000_GPTC); + + /* For the 64-bit byte counters the low dword must be read first. */ + /* Both registers clear on the read of the high dword */ + + stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + + ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32); + stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + + ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32); + + stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); + stats->ruc += E1000_READ_REG(hw, E1000_RUC); + stats->rfc += E1000_READ_REG(hw, E1000_RFC); + stats->roc += E1000_READ_REG(hw, E1000_ROC); + stats->rjc += E1000_READ_REG(hw, E1000_RJC); + + stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC); + stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC); + stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC); + + stats->tor += E1000_READ_REG(hw, E1000_TORL) + + ((u64)E1000_READ_REG(hw, E1000_TORH) << 32); + stats->tot += E1000_READ_REG(hw, E1000_TOTL) + + ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32); + + stats->tpr += E1000_READ_REG(hw, E1000_TPR); + stats->tpt += E1000_READ_REG(hw, E1000_TPT); + stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); + stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); + stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); + stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); + stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); + stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); + stats->mptc += E1000_READ_REG(hw, E1000_MPTC); + stats->bptc += E1000_READ_REG(hw, E1000_BPTC); + + /* Interrupt Counts */ + + stats->iac += E1000_READ_REG(hw, E1000_IAC); + stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); + stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); + stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); + stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); + stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); + stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); + stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); + stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); + + /* Host to Card Statistics */ + + stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); + stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); + stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); + stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); + stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); + stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); + stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); + stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + + ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32)); + stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + + ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); + stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); + stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); + stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); + + stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); + stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); + stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); + stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); + stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); + stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); + + /* Driver specific counters */ + adapter->device_control = E1000_READ_REG(hw, E1000_CTRL); + adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL); + adapter->int_mask = E1000_READ_REG(hw, E1000_IMS); + adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS); + adapter->packet_buf_alloc_tx = + ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); + adapter->packet_buf_alloc_rx = + (E1000_READ_REG(hw, E1000_PBA) & 0xffff); +} + + +/********************************************************************** + * + * Initialize the VF board statistics counters. + * + **********************************************************************/ +static void +igb_vf_init_stats(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct e1000_vf_stats *stats; + + stats = (struct e1000_vf_stats *)adapter->stats; + if (stats == NULL) + return; + stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); + stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); + stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); + stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); + stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); +} + +/********************************************************************** + * + * Update the VF board statistics counters. + * + **********************************************************************/ +static void +igb_update_vf_stats_counters(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct e1000_vf_stats *stats; + + if (adapter->link_speed == 0) + return; + + stats = (struct e1000_vf_stats *)adapter->stats; + + UPDATE_VF_REG(E1000_VFGPRC, + stats->last_gprc, stats->gprc); + UPDATE_VF_REG(E1000_VFGORC, + stats->last_gorc, stats->gorc); + UPDATE_VF_REG(E1000_VFGPTC, + stats->last_gptc, stats->gptc); + UPDATE_VF_REG(E1000_VFGOTC, + stats->last_gotc, stats->gotc); + UPDATE_VF_REG(E1000_VFMPRC, + stats->last_mprc, stats->mprc); +} + +/* Export a single 32-bit register via a read-only sysctl. */ +static int +igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter; + u_int val; + + adapter = oidp->oid_arg1; + val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); + return (sysctl_handle_int(oidp, &val, 0, req)); +} + +/* +** Tuneable interrupt rate handler +*/ +static int +igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) +{ + struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1); + int error; + u32 reg, usec, rate; + + reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix)); + usec = ((reg & 0x7FFC) >> 2); + if (usec > 0) + rate = 1000000 / usec; + else + rate = 0; + error = sysctl_handle_int(oidp, &rate, 0, req); + if (error || !req->newptr) + return error; + return 0; +} + +/* + * Add sysctl variables, one per statistic, to the system. + */ +static void +igb_add_hw_stats(struct adapter *adapter) +{ + device_t dev = adapter->dev; + + struct tx_ring *txr = adapter->tx_rings; + struct rx_ring *rxr = adapter->rx_rings; + + struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); + struct sysctl_oid *tree = device_get_sysctl_tree(dev); + struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); + struct e1000_hw_stats *stats = adapter->stats; + + struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node; + struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list; + +#define QUEUE_NAME_LEN 32 + char namebuf[QUEUE_NAME_LEN]; + + /* Driver Statistics */ + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", + CTLFLAG_RD, &adapter->dropped_pkts, + "Driver dropped packets"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", + CTLFLAG_RD, &adapter->link_irq, + "Link MSIX IRQ Handled"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", + CTLFLAG_RD, &adapter->mbuf_defrag_failed, + "Defragmenting mbuf chain failed"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", + CTLFLAG_RD, &adapter->no_tx_dma_setup, + "Driver tx dma failure in xmit"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", + CTLFLAG_RD, &adapter->rx_overruns, + "RX overruns"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", + CTLFLAG_RD, &adapter->watchdog_events, + "Watchdog timeouts"); + + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", + CTLFLAG_RD, &adapter->device_control, + "Device Control Register"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", + CTLFLAG_RD, &adapter->rx_control, + "Receiver Control Register"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", + CTLFLAG_RD, &adapter->int_mask, + "Interrupt Mask"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", + CTLFLAG_RD, &adapter->eint_mask, + "Extended Interrupt Mask"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", + CTLFLAG_RD, &adapter->packet_buf_alloc_tx, + "Transmit Buffer Packet Allocation"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", + CTLFLAG_RD, &adapter->packet_buf_alloc_rx, + "Receive Buffer Packet Allocation"); + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", + CTLFLAG_RD, &adapter->hw.fc.high_water, 0, + "Flow Control High Watermark"); + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", + CTLFLAG_RD, &adapter->hw.fc.low_water, 0, + "Flow Control Low Watermark"); + + for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { + struct lro_ctrl *lro = &rxr->lro; + + snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); + queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, + CTLFLAG_RD, NULL, "Queue Name"); + queue_list = SYSCTL_CHILDREN(queue_node); + + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", + CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i], + sizeof(&adapter->queues[i]), + igb_sysctl_interrupt_rate_handler, + "IU", "Interrupt Rate"); + + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", + CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), + igb_sysctl_reg_handler, "IU", + "Transmit Descriptor Head"); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", + CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), + igb_sysctl_reg_handler, "IU", + "Transmit Descriptor Tail"); + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", + CTLFLAG_RD, &txr->no_desc_avail, + "Queue Descriptors Unavailable"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", + CTLFLAG_RD, &txr->total_packets, + "Queue Packets Transmitted"); + + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", + CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), + igb_sysctl_reg_handler, "IU", + "Receive Descriptor Head"); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", + CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), + igb_sysctl_reg_handler, "IU", + "Receive Descriptor Tail"); + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", + CTLFLAG_RD, &rxr->rx_packets, + "Queue Packets Received"); + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", + CTLFLAG_RD, &rxr->rx_bytes, + "Queue Bytes Received"); + SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", + CTLFLAG_RD, &lro->lro_queued, 0, + "LRO Queued"); + SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", + CTLFLAG_RD, &lro->lro_flushed, 0, + "LRO Flushed"); + } + + /* MAC stats get their own sub node */ + + stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", + CTLFLAG_RD, NULL, "MAC Statistics"); + stat_list = SYSCTL_CHILDREN(stat_node); + + /* + ** VF adapter has a very limited set of stats + ** since its not managing the metal, so to speak. + */ + if (adapter->vf_ifp) { + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", + CTLFLAG_RD, &stats->gprc, + "Good Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", + CTLFLAG_RD, &stats->gptc, + "Good Packets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", + CTLFLAG_RD, &stats->gorc, + "Good Octets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", + CTLFLAG_RD, &stats->gotc, + "Good Octets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", + CTLFLAG_RD, &stats->mprc, + "Multicast Packets Received"); + return; + } + + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", + CTLFLAG_RD, &stats->ecol, + "Excessive collisions"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", + CTLFLAG_RD, &stats->scc, + "Single collisions"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", + CTLFLAG_RD, &stats->mcc, + "Multiple collisions"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", + CTLFLAG_RD, &stats->latecol, + "Late collisions"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", + CTLFLAG_RD, &stats->colc, + "Collision Count"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors", + CTLFLAG_RD, &stats->symerrs, + "Symbol Errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors", + CTLFLAG_RD, &stats->sec, + "Sequence Errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count", + CTLFLAG_RD, &stats->dc, + "Defer Count"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets", + CTLFLAG_RD, &stats->mpc, + "Missed Packets"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors", + CTLFLAG_RD, &stats->rlec, + "Receive Length Errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", + CTLFLAG_RD, &stats->rnbc, + "Receive No Buffers"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize", + CTLFLAG_RD, &stats->ruc, + "Receive Undersize"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", + CTLFLAG_RD, &stats->rfc, + "Fragmented Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize", + CTLFLAG_RD, &stats->roc, + "Oversized Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber", + CTLFLAG_RD, &stats->rjc, + "Recevied Jabber"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs", + CTLFLAG_RD, &stats->rxerrc, + "Receive Errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs", + CTLFLAG_RD, &stats->crcerrs, + "CRC errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs", + CTLFLAG_RD, &stats->algnerrc, + "Alignment Errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs", + CTLFLAG_RD, &stats->tncrs, + "Transmit with No CRS"); + /* On 82575 these are collision counts */ + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", + CTLFLAG_RD, &stats->cexterr, + "Collision/Carrier extension errors"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd", + CTLFLAG_RD, &stats->xonrxc, + "XON Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd", + CTLFLAG_RD, &stats->xontxc, + "XON Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", + CTLFLAG_RD, &stats->xoffrxc, + "XOFF Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd", + CTLFLAG_RD, &stats->xofftxc, + "XOFF Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd", + CTLFLAG_RD, &stats->fcruc, + "Unsupported Flow Control Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd", + CTLFLAG_RD, &stats->mgprc, + "Management Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop", + CTLFLAG_RD, &stats->mgpdc, + "Management Packets Dropped"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd", + CTLFLAG_RD, &stats->mgptc, + "Management Packets Transmitted"); + /* Packet Reception Stats */ + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", + CTLFLAG_RD, &stats->tpr, + "Total Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", + CTLFLAG_RD, &stats->gprc, + "Good Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", + CTLFLAG_RD, &stats->bprc, + "Broadcast Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", + CTLFLAG_RD, &stats->mprc, + "Multicast Packets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", + CTLFLAG_RD, &stats->prc64, + "64 byte frames received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", + CTLFLAG_RD, &stats->prc127, + "65-127 byte frames received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", + CTLFLAG_RD, &stats->prc255, + "128-255 byte frames received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", + CTLFLAG_RD, &stats->prc511, + "256-511 byte frames received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", + CTLFLAG_RD, &stats->prc1023, + "512-1023 byte frames received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", + CTLFLAG_RD, &stats->prc1522, + "1023-1522 byte frames received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", + CTLFLAG_RD, &stats->gorc, + "Good Octets Received"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", + CTLFLAG_RD, &stats->tor, + "Total Octets Received"); + + /* Packet Transmission Stats */ + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", + CTLFLAG_RD, &stats->gotc, + "Good Octets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", + CTLFLAG_RD, &stats->tot, + "Total Octets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", + CTLFLAG_RD, &stats->tpt, + "Total Packets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", + CTLFLAG_RD, &stats->gptc, + "Good Packets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", + CTLFLAG_RD, &stats->bptc, + "Broadcast Packets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", + CTLFLAG_RD, &stats->mptc, + "Multicast Packets Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", + CTLFLAG_RD, &stats->ptc64, + "64 byte frames transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", + CTLFLAG_RD, &stats->ptc127, + "65-127 byte frames transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", + CTLFLAG_RD, &stats->ptc255, + "128-255 byte frames transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", + CTLFLAG_RD, &stats->ptc511, + "256-511 byte frames transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", + CTLFLAG_RD, &stats->ptc1023, + "512-1023 byte frames transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", + CTLFLAG_RD, &stats->ptc1522, + "1024-1522 byte frames transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd", + CTLFLAG_RD, &stats->tsctc, + "TSO Contexts Transmitted"); + SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", + CTLFLAG_RD, &stats->tsctfc, + "TSO Contexts Failed"); + + + /* Interrupt Stats */ + + int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", + CTLFLAG_RD, NULL, "Interrupt Statistics"); + int_list = SYSCTL_CHILDREN(int_node); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts", + CTLFLAG_RD, &stats->iac, + "Interrupt Assertion Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", + CTLFLAG_RD, &stats->icrxptc, + "Interrupt Cause Rx Pkt Timer Expire Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", + CTLFLAG_RD, &stats->icrxatc, + "Interrupt Cause Rx Abs Timer Expire Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", + CTLFLAG_RD, &stats->ictxptc, + "Interrupt Cause Tx Pkt Timer Expire Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", + CTLFLAG_RD, &stats->ictxatc, + "Interrupt Cause Tx Abs Timer Expire Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", + CTLFLAG_RD, &stats->ictxqec, + "Interrupt Cause Tx Queue Empty Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", + CTLFLAG_RD, &stats->ictxqmtc, + "Interrupt Cause Tx Queue Min Thresh Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", + CTLFLAG_RD, &stats->icrxdmtc, + "Interrupt Cause Rx Desc Min Thresh Count"); + + SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun", + CTLFLAG_RD, &stats->icrxoc, + "Interrupt Cause Receiver Overrun Count"); + + /* Host to Card Stats */ + + host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", + CTLFLAG_RD, NULL, + "Host to Card Statistics"); + + host_list = SYSCTL_CHILDREN(host_node); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt", + CTLFLAG_RD, &stats->cbtmpc, + "Circuit Breaker Tx Packet Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard", + CTLFLAG_RD, &stats->htdpmc, + "Host Transmit Discarded Packets"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt", + CTLFLAG_RD, &stats->rpthc, + "Rx Packets To Host"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts", + CTLFLAG_RD, &stats->cbrmpc, + "Circuit Breaker Rx Packet Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop", + CTLFLAG_RD, &stats->cbrdpc, + "Circuit Breaker Rx Dropped Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt", + CTLFLAG_RD, &stats->hgptc, + "Host Good Packets Tx Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop", + CTLFLAG_RD, &stats->htcbdpc, + "Host Tx Circuit Breaker Dropped Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes", + CTLFLAG_RD, &stats->hgorc, + "Host Good Octets Received Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes", + CTLFLAG_RD, &stats->hgotc, + "Host Good Octets Transmit Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors", + CTLFLAG_RD, &stats->lenerrs, + "Length Errors"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt", + CTLFLAG_RD, &stats->scvpc, + "SerDes/SGMII Code Violation Pkt Count"); + + SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed", + CTLFLAG_RD, &stats->hrmpc, + "Header Redirection Missed Packet Count"); +} + + +/********************************************************************** + * + * This routine provides a way to dump out the adapter eeprom, + * often a useful debug/service tool. This only dumps the first + * 32 words, stuff that matters is in that extent. + * + **********************************************************************/ +static int +igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter; + int error; + int result; + + result = -1; + error = sysctl_handle_int(oidp, &result, 0, req); + + if (error || !req->newptr) + return (error); + + /* + * This value will cause a hex dump of the + * first 32 16-bit words of the EEPROM to + * the screen. + */ + if (result == 1) { + adapter = (struct adapter *)arg1; + igb_print_nvm_info(adapter); + } + + return (error); +} + +static void +igb_print_nvm_info(struct adapter *adapter) +{ + u16 eeprom_data; + int i, j, row = 0; + + /* Its a bit crude, but it gets the job done */ + printf("\nInterface EEPROM Dump:\n"); + printf("Offset\n0x0000 "); + for (i = 0, j = 0; i < 32; i++, j++) { + if (j == 8) { /* Make the offset block */ + j = 0; ++row; + printf("\n0x00%x0 ",row); + } + e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); + printf("%04x ", eeprom_data); + } + printf("\n"); +} + +static void +igb_set_sysctl_value(struct adapter *adapter, const char *name, + const char *description, int *limit, int value) +{ + *limit = value; + SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), + OID_AUTO, name, CTLFLAG_RW, limit, value, description); +} + +/* +** Set flow control using sysctl: +** Flow control values: +** 0 - off +** 1 - rx pause +** 2 - tx pause +** 3 - full +*/ +static int +igb_set_flowcntl(SYSCTL_HANDLER_ARGS) +{ + int error; + static int input = 3; /* default is full */ + struct adapter *adapter = (struct adapter *) arg1; + + error = sysctl_handle_int(oidp, &input, 0, req); + + if ((error) || (req->newptr == NULL)) + return (error); + + switch (input) { + case e1000_fc_rx_pause: + case e1000_fc_tx_pause: + case e1000_fc_full: + case e1000_fc_none: + adapter->hw.fc.requested_mode = input; + adapter->fc = input; + break; + default: + /* Do nothing */ + return (error); + } + + adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; + e1000_force_mac_fc(&adapter->hw); + /* XXX TODO: update DROP_EN on each RX queue if appropriate */ + return (error); +} + +/* +** Manage DMA Coalesce: +** Control values: +** 0/1 - off/on +** Legal timer values are: +** 250,500,1000-10000 in thousands +*/ +static int +igb_sysctl_dmac(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + int error; + + error = sysctl_handle_int(oidp, &adapter->dmac, 0, req); + + if ((error) || (req->newptr == NULL)) + return (error); + + switch (adapter->dmac) { + case 0: + /* Disabling */ + break; + case 1: /* Just enable and use default */ + adapter->dmac = 1000; + break; + case 250: + case 500: + case 1000: + case 2000: + case 3000: + case 4000: + case 5000: + case 6000: + case 7000: + case 8000: + case 9000: + case 10000: + /* Legal values - allow */ + break; + default: + /* Do nothing, illegal value */ + adapter->dmac = 0; + return (EINVAL); + } + /* Reinit the interface */ + igb_init(adapter); + return (error); +} + +/* +** Manage Energy Efficient Ethernet: +** Control values: +** 0/1 - enabled/disabled +*/ +static int +igb_sysctl_eee(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + int error, value; + + value = adapter->hw.dev_spec._82575.eee_disable; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error || req->newptr == NULL) + return (error); + IGB_CORE_LOCK(adapter); + adapter->hw.dev_spec._82575.eee_disable = (value != 0); + igb_init_locked(adapter); + IGB_CORE_UNLOCK(adapter); + return (0); +} Index: sys/modules/igb/Makefile =================================================================== --- sys/modules/igb/Makefile +++ sys/modules/igb/Makefile @@ -1,24 +1,12 @@ -#$FreeBSD$ +# $FreeBSD$ -.PATH: ${.CURDIR}/../../dev/e1000 -KMOD = if_igb -SRCS = device_if.h bus_if.h pci_if.h opt_inet.h opt_inet6.h opt_rss.h -SRCS += if_igb.c $(SHARED_SRCS) -SHARED_SRCS = e1000_api.c e1000_phy.c e1000_nvm.c e1000_mac.c e1000_manage.c -SHARED_SRCS += e1000_80003es2lan.c e1000_82542.c e1000_82541.c e1000_82543.c -SHARED_SRCS += e1000_82540.c e1000_ich8lan.c e1000_82571.c e1000_osdep.c -SHARED_SRCS += e1000_82575.c e1000_vf.c e1000_mbx.c e1000_i210.c +SYSDIR?=${.CURDIR}/../.. +.include "${SYSDIR}/conf/kern.opts.mk" -CFLAGS += -I${.CURDIR}/../../dev/e1000 -DSMP -# DEVICE_POLLING gives you non-interrupt handling -# not advisable since MSIX gives better results -#CFLAGS += -DDEVICE_POLLING -# IGB_LEGACY_TX will override the stack if_transmit path and -# instead use the older if_start non-multiqueue capable interface. -# This might be desirable for testing, or to enable the use of -# ALTQ. -#CFLAGS += -DIGB_LEGACY_TX +SUBDIR = \ + igb_iflib \ + igb_legacy -.include +.include Index: sys/modules/igb/igb_iflib/Makefile =================================================================== --- /dev/null +++ sys/modules/igb/igb_iflib/Makefile @@ -0,0 +1,25 @@ +#$FreeBSD$ + +.PATH: ${.CURDIR}/../../../dev/e1000 +KMOD = if_igb_iflib +SRCS = device_if.h bus_if.h pci_if.h opt_inet.h opt_inet6.h opt_rss.h ifdi_if.h +SRCS += if_igb.c iflib_igb_txrx.c $(SHARED_SRCS) +SHARED_SRCS = e1000_api.c e1000_phy.c e1000_nvm.c e1000_mac.c e1000_manage.c +SHARED_SRCS += e1000_80003es2lan.c e1000_82542.c e1000_82541.c e1000_82543.c +SHARED_SRCS += e1000_82540.c e1000_ich8lan.c e1000_82571.c e1000_osdep.c +SHARED_SRCS += e1000_82575.c e1000_vf.c e1000_mbx.c e1000_i210.c + +CFLAGS += -I${.CURDIR}/../../../dev/e1000 -DSMP +CFLAGS += -DIFLIB + +# DEVICE_POLLING gives you non-interrupt handling +# not advisable since MSIX gives better results +#CFLAGS += -DDEVICE_POLLING + +# IGB_LEGACY_TX will override the stack if_transmit path and +# instead use the older if_start non-multiqueue capable interface. +# This might be desirable for testing, or to enable the use of +# ALTQ. +#CFLAGS += -DIGB_LEGACY_TX + +.include Index: sys/modules/igb/igb_legacy/Makefile =================================================================== --- /dev/null +++ sys/modules/igb/igb_legacy/Makefile @@ -0,0 +1,24 @@ +#$FreeBSD$ + +.PATH: ${.CURDIR}/../../../dev/e1000 +KMOD = if_igb +SRCS = device_if.h bus_if.h pci_if.h opt_inet.h opt_inet6.h opt_rss.h ifdi_if.h +SRCS += if_igb.c $(SHARED_SRCS) +SHARED_SRCS = e1000_api.c e1000_phy.c e1000_nvm.c e1000_mac.c e1000_manage.c +SHARED_SRCS += e1000_80003es2lan.c e1000_82542.c e1000_82541.c e1000_82543.c +SHARED_SRCS += e1000_82540.c e1000_ich8lan.c e1000_82571.c e1000_osdep.c +SHARED_SRCS += e1000_82575.c e1000_vf.c e1000_mbx.c e1000_i210.c + +CFLAGS += -I${.CURDIR}/../../../dev/e1000 -DSMP + +# DEVICE_POLLING gives you non-interrupt handling +# not advisable since MSIX gives better results +#CFLAGS += -DDEVICE_POLLING + +# IGB_LEGACY_TX will override the stack if_transmit path and +# instead use the older if_start non-multiqueue capable interface. +# This might be desirable for testing, or to enable the use of +# ALTQ. +#CFLAGS += -DIGB_LEGACY_TX + +.include