Index: sys/amd64/conf/GENERIC =================================================================== --- sys/amd64/conf/GENERIC +++ sys/amd64/conf/GENERIC @@ -369,3 +369,5 @@ # The crypto framework is required by IPSEC device crypto # Required by IPSEC + +options EM_MULTIQUEUE Index: sys/dev/e1000/e1000_defines.h =================================================================== --- sys/dev/e1000/e1000_defines.h +++ sys/dev/e1000/e1000_defines.h @@ -89,6 +89,8 @@ #define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 #define E1000_CTRL_EXT_LINK_MODE_SGMII 0x00800000 #define E1000_CTRL_EXT_EIAME 0x01000000 +#define E1000_CTRL_EXT_LS 0x00800000 +#define E1000_CTRL_EXT_LS_FLOW 0x00400000 #define E1000_CTRL_EXT_IRCA 0x00000001 #define E1000_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */ #define E1000_CTRL_EXT_IAME 0x08000000 /* Int ACK Auto-mask */ Index: sys/dev/e1000/if_em.h =================================================================== --- sys/dev/e1000/if_em.h +++ sys/dev/e1000/if_em.h @@ -286,14 +286,6 @@ * solve it just using this define. */ #define EM_EIAC 0x000DC -/* - * 82574 only reports 3 MSI-X vectors by default; - * defines assisting with making it report 5 are - * located here. - */ -#define EM_NVM_PCIE_CTRL 0x1B -#define EM_NVM_MSIX_N_MASK (0x7 << EM_NVM_MSIX_N_SHIFT) -#define EM_NVM_MSIX_N_SHIFT 7 /* * Bus dma allocation structure used by @@ -317,6 +309,25 @@ }; /* +** Driver queue struct: this is the interrupt container +** for the associated tx and rx ring. +*/ +struct em_queue { + struct adapter *adapter; + u32 msix; /* This queue's MSIX vector */ + u32 ims; /* This queue's EIMS bit */ + u32 eitr_setting; + struct resource *res; + void *tag; + struct tx_ring *txr; + struct rx_ring *rxr; + struct task que_task; + struct task txq_task; + struct taskqueue *tq; + u64 irqs; +}; + +/* * The transmit ring, one per tx queue */ struct tx_ring { @@ -324,16 +335,12 @@ struct mtx tx_mtx; char mtx_name[16]; u32 me; - u32 msix; - u32 ims; - int busy; + int busy; struct em_dma_alloc txdma; struct e1000_tx_desc *tx_base; - struct task tx_task; - struct taskqueue *tq; u32 next_avail_desc; u32 next_to_clean; - struct em_txbuffer *tx_buffers; + struct em_buffer *tx_buffers; volatile u16 tx_avail; u32 tx_tso; /* last tx was tso */ u16 last_hw_offload; @@ -346,10 +353,8 @@ #endif /* Interrupt resources */ bus_dma_tag_t txtag; - void *tag; - struct resource *res; - unsigned long tx_irq; unsigned long no_desc_avail; + u64 total_packets; }; /* @@ -358,32 +363,29 @@ struct rx_ring { struct adapter *adapter; u32 me; - u32 msix; - u32 ims; struct mtx rx_mtx; char mtx_name[16]; - u32 payload; - struct task rx_task; - struct taskqueue *tq; union e1000_rx_desc_extended *rx_base; struct em_dma_alloc rxdma; u32 next_to_refresh; u32 next_to_check; - struct em_rxbuffer *rx_buffers; + struct em_buffer *rx_buffers; struct mbuf *fmp; struct mbuf *lmp; /* Interrupt resources */ - void *tag; - struct resource *res; bus_dma_tag_t rxtag; bool discard; /* Soft stats */ - unsigned long rx_irq; unsigned long rx_discarded; - unsigned long rx_packets; - unsigned long rx_bytes; + u64 rx_packets; + u64 rx_hash_ipv4_tcp; + u64 rx_hash_ipv4; + u64 rx_hash_ipv6_tcp; + u64 rx_hash_ipv6_ex; + u64 rx_hash_ipv6; + u64 rx_hash_none; }; @@ -407,14 +409,14 @@ u32 ivars; struct ifmedia media; - struct callout timer; + struct callout watchdog; + struct callout stats_collector; int msix; int if_flags; int max_frame_size; int min_frame_size; - struct mtx core_mtx; + struct mtx watchdog_mtx; int em_insert_vlan_header; - u32 ims; bool in_detach; /* Task for FAST handling */ @@ -428,6 +430,8 @@ u16 num_vlans; u8 num_queues; + /* Interface queues */ + struct em_queue *queues; /* * Transmit rings: * Allocated at run time, an array of rings. @@ -445,6 +449,8 @@ u32 rx_process_limit; u32 rx_mbuf_sz; + u64 que_mask; + /* Management and WOL features */ u32 wol; bool has_manage; @@ -501,13 +507,7 @@ unsigned int index; } em_vendor_info_t; -struct em_txbuffer { - int next_eop; /* Index of the desc to watch */ - struct mbuf *m_head; - bus_dmamap_t map; /* bus_dma map for packet */ -}; - -struct em_rxbuffer { +struct em_buffer { int next_eop; /* Index of the desc to watch */ struct mbuf *m_head; bus_dmamap_t map; /* bus_dma map for packet */ @@ -531,22 +531,23 @@ } #define EM_CORE_LOCK_INIT(_sc, _name) \ - mtx_init(&(_sc)->core_mtx, _name, "EM Core Lock", MTX_DEF) + mtx_init(&(_sc)->watchdog_mtx, _name, "EM Core Lock", MTX_DEF) #define EM_TX_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->tx_mtx, _name, "EM TX Lock", MTX_DEF) #define EM_RX_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->rx_mtx, _name, "EM RX Lock", MTX_DEF) -#define EM_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) +#define EM_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->watchdog_mtx) #define EM_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) #define EM_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) -#define EM_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) +#define EM_CORE_LOCK(_sc) mtx_lock(&(_sc)->watchdog_mtx) #define EM_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) #define EM_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) #define EM_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) -#define EM_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) +#define EM_RX_TRYLOCK(_sc) mtx_trylock(&(_sc)->rx_mtx) +#define EM_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->watchdog_mtx) #define EM_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) #define EM_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) -#define EM_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) +#define EM_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->watchdog_mtx, MA_OWNED) #define EM_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) #define EM_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_mtx, MA_OWNED) Index: sys/dev/e1000/if_em.c =================================================================== --- sys/dev/e1000/if_em.c +++ sys/dev/e1000/if_em.c @@ -225,6 +225,7 @@ static int em_mq_start_locked(if_t, struct tx_ring *); static void em_qflush(if_t); +static void em_deferred_mq_start(void *, int); #else static void em_start(if_t); static void em_start_locked(if_t, struct tx_ring *); @@ -243,7 +244,7 @@ static int em_allocate_queues(struct adapter *); static int em_setup_msix(struct adapter *); static void em_free_pci_resources(struct adapter *); -static void em_local_timer(void *); +static void em_watchdog(void *); static void em_reset(struct adapter *); static int em_setup_interface(device_t, struct adapter *); static void em_flush_desc_rings(struct adapter *); @@ -262,15 +263,15 @@ static void em_enable_intr(struct adapter *); static void em_disable_intr(struct adapter *); -static void em_update_stats_counters(struct adapter *); +static void em_update_stats_counters(void *); static void em_add_hw_stats(struct adapter *adapter); static void em_txeof(struct tx_ring *); -static bool em_rxeof(struct rx_ring *, int, int *); +static bool em_rxeof(struct em_queue *, int, int *); #ifndef __NO_STRICT_ALIGNMENT static int em_fixup_rx(struct rx_ring *); #endif static void em_setup_rxdesc(union e1000_rx_desc_extended *, - const struct em_rxbuffer *rxbuf); + const struct em_buffer *rxbuf); static void em_receive_checksum(uint32_t status, struct mbuf *); static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int, struct ip *, u32 *, u32 *); @@ -284,7 +285,7 @@ static void em_register_vlan(void *, if_t, u16); static void em_unregister_vlan(void *, if_t, u16); static void em_setup_vlan_hw_support(struct adapter *); -static int em_xmit(struct tx_ring *, struct mbuf **); +int em_xmit(struct tx_ring *, struct mbuf **); static int em_dma_malloc(struct adapter *, bus_size_t, struct em_dma_alloc *, int); static void em_dma_free(struct adapter *, struct em_dma_alloc *); @@ -310,17 +311,11 @@ static int em_irq_fast(void *); /* MSIX handlers */ -static void em_msix_tx(void *); -static void em_msix_rx(void *); +static void em_msix_que(void *); static void em_msix_link(void *); -static void em_handle_tx(void *context, int pending); -static void em_handle_rx(void *context, int pending); +static void em_handle_que(void *context, int pending); static void em_handle_link(void *context, int pending); -#ifdef EM_MULTIQUEUE -static void em_enable_vectors_82574(struct adapter *); -#endif - static void em_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); @@ -366,7 +361,11 @@ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) +#ifdef EM_MULTIQUEUE +#define MAX_INTS_PER_SEC 4000 +#else #define MAX_INTS_PER_SEC 8000 +#endif #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) #define TSO_WORKAROUND 4 @@ -523,6 +522,7 @@ adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; hw = &adapter->hw; + EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL stuff */ @@ -541,7 +541,8 @@ OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_set_flowcntl, "I", "Flow Control"); - callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); + callout_init_mtx(&adapter->watchdog, &adapter->watchdog_mtx, 0); + callout_init(&adapter->stats_collector, 0); /* Determine hardware and mac info */ em_identify_hardware(adapter); @@ -869,7 +870,8 @@ EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); - callout_drain(&adapter->timer); + callout_drain(&adapter->watchdog); + callout_drain(&adapter->stats_collector); #ifdef DEV_NETMAP netmap_detach(ifp); @@ -969,6 +971,8 @@ return; while (!if_sendq_empty(ifp)) { + int xmit_err = 0; + /* Call cleanup if number of TX descriptors low */ if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) em_txeof(txr); @@ -983,7 +987,8 @@ * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ - if (em_xmit(txr, &m_head)) { + xmit_err = em_xmit(txr, &m_head); + if (xmit_err) { if (m_head == NULL) break; if_sendq_prepend(ifp, m_head); @@ -1020,9 +1025,7 @@ * Multiqueue Transmit routines * * em_mq_start is called by the stack to initiate a transmit. - * however, if busy the driver can queue the request rather - * than do an immediate send. It is this that is an advantage - * in this driver, rather than also having multiple tx queues. + * This is done asynchronously by a taskqueue thread. **********************************************************************/ /* ** Multiqueue capable stack interface @@ -1031,25 +1034,42 @@ em_mq_start(if_t ifp, struct mbuf *m) { struct adapter *adapter = if_getsoftc(ifp); - struct tx_ring *txr = adapter->tx_rings; + struct tx_ring *txr; + struct em_queue *que; unsigned int i, error; +#ifdef RSS + uint32_t bucket_id; +#endif if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) - i = m->m_pkthdr.flowid % adapter->num_queues; +#ifdef RSS + if (rss_hash2bucket(m->m_pkthdr.flowid, + M_HASHTYPE_GET(m), &bucket_id) == 0) { + i = bucket_id % adapter->num_queues; + } else { +#endif + i = m->m_pkthdr.flowid % adapter->num_queues; +#ifdef RSS + } +#endif else i = curcpu % adapter->num_queues; txr = &adapter->tx_rings[i]; + que = &adapter->queues[i]; error = drbr_enqueue(ifp, txr->br, m); if (error) return (error); - if (EM_TX_TRYLOCK(txr)) { - em_mq_start_locked(ifp, txr); - EM_TX_UNLOCK(txr); - } else - taskqueue_enqueue(txr->tq, &txr->tx_task); + /* + * Never ever process packets from if_transmit() + * leave it to a running taskqueue to ensure packets + * are processed in order and don't bother enqueuing a + * txq_task if one is already pending. + */ + if (!que->txq_task.ta_pending) + taskqueue_enqueue(que->tq, &que->txq_task); return (0); } @@ -1092,20 +1112,47 @@ ETHER_BPF_MTAP(ifp, next); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) break; + /* + * Just check to see if we can clean a bit while + * we are here. + */ + if (txr->tx_avail < EM_TX_CLEANUP_THRESHOLD) + em_txeof(txr); } /* Mark the queue as having work */ if ((enq > 0) && (txr->busy == EM_TX_IDLE)) txr->busy = EM_TX_BUSY; - if (txr->tx_avail < EM_MAX_SCATTER) + if (txr->tx_avail < EM_TX_CLEANUP_THRESHOLD) { em_txeof(txr); - if (txr->tx_avail < EM_MAX_SCATTER) { - if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0); + if (txr->tx_avail < EM_MAX_SCATTER) + if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0); } return (err); } +static void +em_deferred_mq_start(void *arg, int pending) +{ + struct em_queue *que = arg; + struct tx_ring *txr = que->txr; + struct adapter *adapter = que->adapter; + if_t ifp = adapter->ifp; + + /* + * If we can acquire this tx queue lock , then we + * can become the em_xmit() thread and start doing + * our business. Otherwise, the data that we were + * going to xmit will be handled by an already executing + * instance. + */ + if (EM_TX_TRYLOCK(txr)) { + if (!drbr_empty(ifp, txr->br)) + em_mq_start_locked(ifp, txr); + EM_TX_UNLOCK(txr); + } +} /* ** Flush all ring buffers */ @@ -1359,7 +1406,8 @@ EM_CORE_LOCK_ASSERT(adapter); em_disable_intr(adapter); - callout_stop(&adapter->timer); + callout_stop(&adapter->watchdog); + callout_stop(&adapter->stats_collector); /* Get the latest mac address, User can use a LAA */ bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr, @@ -1443,7 +1491,8 @@ /* Set the interface as ACTIVE */ if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); - callout_reset(&adapter->timer, hz, em_local_timer, adapter); + callout_reset(&adapter->watchdog, hz, em_watchdog, adapter); + callout_reset(&adapter->stats_collector, hz, em_update_stats_counters, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); /* MSI/X configuration for 82574 */ @@ -1451,6 +1500,19 @@ int tmp; tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; + /* + * Table 10.2.2.5 of 82574 manual states that LS_FLOW + * should be set on and LS should be set to off when + * TSO is enabled. + */ + if (if_getcapenable(ifp) & IFCAP_TSO4) { + tmp |= E1000_CTRL_EXT_LS_FLOW; + tmp &= ~E1000_CTRL_EXT_LS; + } else { + tmp &= ~E1000_CTRL_EXT_LS_FLOW; + tmp |= E1000_CTRL_EXT_LS; + } + E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); @@ -1507,16 +1569,18 @@ if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - callout_stop(&adapter->timer); + callout_stop(&adapter->watchdog); adapter->hw.mac.get_link_status = 1; em_update_link_status(adapter); - callout_reset(&adapter->timer, hz, - em_local_timer, adapter); + callout_reset(&adapter->watchdog, hz, + em_watchdog, adapter); } } EM_CORE_UNLOCK(adapter); + EM_RX_LOCK(rxr) em_rxeof(rxr, count, &rx_done); + EM_RX_UNLOCK(rxr); EM_TX_LOCK(txr); em_txeof(txr); @@ -1542,7 +1606,8 @@ static int em_irq_fast(void *arg) { - struct adapter *adapter = arg; + struct em_queue *que = arg; + struct adapter *adapter = que->adapter; if_t ifp; u32 reg_icr; @@ -1567,7 +1632,7 @@ return FILTER_STRAY; em_disable_intr(adapter); - taskqueue_enqueue(adapter->tq, &adapter->que_task); + taskqueue_enqueue(que->tq, &que->que_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { @@ -1584,87 +1649,65 @@ static void em_handle_que(void *context, int pending) { - struct adapter *adapter = context; + struct em_queue *que = context; + struct adapter *adapter = que->adapter; + struct tx_ring *txr = que->txr; + struct rx_ring *rxr = que->rxr; if_t ifp = adapter->ifp; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL); + bool more = TRUE; + int interrupt = 0; - EM_TX_LOCK(txr); - em_txeof(txr); + if (EM_TX_TRYLOCK(txr)) { + em_txeof(txr); #ifdef EM_MULTIQUEUE - if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr); + if (!drbr_empty(ifp, txr->br)) + em_mq_start_locked(ifp, txr); #else - if (!if_sendq_empty(ifp)) - em_start_locked(ifp, txr); + if (!if_sendq_empty(ifp)) + em_start_locked(ifp, txr); #endif - EM_TX_UNLOCK(txr); - if (more) { - taskqueue_enqueue(adapter->tq, &adapter->que_task); - return; + /* reset tx interrupt for this queue */ + interrupt |= (1 << (22 + txr->me)); + EM_TX_UNLOCK(txr); } - } - - em_enable_intr(adapter); - return; -} + if (EM_RX_TRYLOCK(rxr)) { + while (more) + more = em_rxeof(que, adapter->rx_process_limit, NULL); + /* reset rx interrupt for this queue */ + interrupt |= (1 << (20 + rxr->me)); + EM_RX_UNLOCK(rxr); + } -/********************************************************************* - * - * MSIX Interrupt Service Routines - * - **********************************************************************/ -static void -em_msix_tx(void *arg) -{ - struct tx_ring *txr = arg; - struct adapter *adapter = txr->adapter; - if_t ifp = adapter->ifp; + /* Re-enable this interrupt */ + if (interrupt) { + if (adapter->num_queues > 1) + E1000_WRITE_REG(&adapter->hw, E1000_EIAC, interrupt); + else + em_enable_intr(adapter); + } - ++txr->tx_irq; - EM_TX_LOCK(txr); - em_txeof(txr); -#ifdef EM_MULTIQUEUE - if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr); -#else - if (!if_sendq_empty(ifp)) - em_start_locked(ifp, txr); -#endif + } + callout_reset(&adapter->watchdog, hz, em_watchdog, adapter); - /* Reenable this interrupt */ - E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); - EM_TX_UNLOCK(txr); return; } + /********************************************************************* * - * MSIX RX Interrupt Service routine + * MSIX Interrupt Service Routines * **********************************************************************/ - static void -em_msix_rx(void *arg) +em_msix_que(void *arg) { - struct rx_ring *rxr = arg; - struct adapter *adapter = rxr->adapter; - bool more; + struct em_queue *que = arg; - ++rxr->rx_irq; - if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)) - return; - more = em_rxeof(rxr, adapter->rx_process_limit, NULL); - if (more) - taskqueue_enqueue(rxr->tq, &rxr->rx_task); - else { - /* Reenable this interrupt */ - E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); - } + ++que->irqs; + em_handle_que(arg, 0); return; } @@ -1699,48 +1742,12 @@ */ if (reg_icr) { E1000_WRITE_REG(&adapter->hw, - E1000_ICS, adapter->ims); + E1000_ICS, adapter->que_mask); } return; } static void -em_handle_rx(void *context, int pending) -{ - struct rx_ring *rxr = context; - struct adapter *adapter = rxr->adapter; - bool more; - - more = em_rxeof(rxr, adapter->rx_process_limit, NULL); - if (more) - taskqueue_enqueue(rxr->tq, &rxr->rx_task); - else { - /* Reenable this interrupt */ - E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); - } -} - -static void -em_handle_tx(void *context, int pending) -{ - struct tx_ring *txr = context; - struct adapter *adapter = txr->adapter; - if_t ifp = adapter->ifp; - - EM_TX_LOCK(txr); - em_txeof(txr); -#ifdef EM_MULTIQUEUE - if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr); -#else - if (!if_sendq_empty(ifp)) - em_start_locked(ifp, txr); -#endif - E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); - EM_TX_UNLOCK(txr); -} - -static void em_handle_link(void *context, int pending) { struct adapter *adapter = context; @@ -1751,9 +1758,9 @@ return; EM_CORE_LOCK(adapter); - callout_stop(&adapter->timer); + callout_stop(&adapter->watchdog); em_update_link_status(adapter); - callout_reset(&adapter->timer, hz, em_local_timer, adapter); + callout_reset(&adapter->watchdog, hz, em_watchdog, adapter); E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); if (adapter->link_active) { @@ -1889,13 +1896,13 @@ * return 0 on success, positive on failure **********************************************************************/ -static int +int em_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; bus_dma_segment_t segs[EM_MAX_SCATTER]; bus_dmamap_t map; - struct em_txbuffer *tx_buffer, *tx_buffer_mapped; + struct em_buffer *tx_buffer, *tx_buffer_mapped; struct e1000_tx_desc *ctxd = NULL; struct mbuf *m_head; struct ether_header *eh; @@ -2196,6 +2203,7 @@ */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + ++txr->total_packets; E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); return (0); @@ -2302,18 +2310,17 @@ **********************************************************************/ static void -em_local_timer(void *arg) +em_watchdog(void *arg) { struct adapter *adapter = arg; if_t ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - u32 trigger = 0; + struct em_queue *que = adapter->queues; + u32 trigger; EM_CORE_LOCK_ASSERT(adapter); em_update_link_status(adapter); - em_update_stats_counters(adapter); /* Reset LAA into RAR[0] on 82571 */ if ((adapter->hw.mac.type == e1000_82571) && @@ -2321,11 +2328,9 @@ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* Mask to use in the irq trigger */ - if (adapter->msix_mem) { - for (int i = 0; i < adapter->num_queues; i++, rxr++) - trigger |= rxr->ims; - rxr = adapter->rx_rings; - } else + if (adapter->msix_mem) + trigger = adapter->que_mask; + else trigger = E1000_ICS_RXDMT0; /* @@ -2333,17 +2338,17 @@ ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ - for (int i = 0; i < adapter->num_queues; i++, txr++) { + for (int i = 0; i < adapter->num_queues; i++, txr++, que++) { if (txr->busy == EM_TX_HUNG) goto hung; if (txr->busy >= EM_TX_MAXTRIES) txr->busy = EM_TX_HUNG; /* Schedule a TX tasklet if needed */ if (txr->tx_avail <= EM_MAX_SCATTER) - taskqueue_enqueue(txr->tq, &txr->tx_task); + taskqueue_enqueue(que->tq, &que->que_task); } - callout_reset(&adapter->timer, hz, em_local_timer, adapter); + callout_reset(&adapter->watchdog, hz, em_watchdog, adapter); #ifndef DEVICE_POLLING /* Trigger an RX interrupt to guarantee mbuf refresh */ E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); @@ -2410,7 +2415,18 @@ if_sethwassistbits(ifp, 0, CSUM_TSO); if_setcapenablebit(ifp, 0, IFCAP_TSO4); if_setcapabilitiesbit(ifp, 0, IFCAP_TSO4); - + if (adapter->hw.mac.type == e1000_82574) { + int tmp; + tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); + /* + * Table 10.2.2.5 of 82574 manual states that LS_FLOW + * should be set on and LS should be set to off when + * TSO is enabled. + */ + tmp &= ~E1000_CTRL_EXT_LS_FLOW; + tmp |= E1000_CTRL_EXT_LS; + E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); + } } /* Check if we must disable SPEED_MODE bit on PCI-E */ @@ -2429,6 +2445,7 @@ "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; + if_setbaudrate(ifp, adapter->link_speed * 1000000); if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { @@ -2466,7 +2483,8 @@ INIT_DEBUGOUT("em_stop: begin"); em_disable_intr(adapter); - callout_stop(&adapter->timer); + callout_stop(&adapter->watchdog); + callout_stop(&adapter->stats_collector); /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); @@ -2553,7 +2571,7 @@ em_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; - struct tx_ring *txr = adapter->tx_rings; + struct em_queue *que = adapter->queues; int error, rid = 0; /* Manually turn off all interrupts */ @@ -2562,9 +2580,9 @@ if (adapter->msix == 1) /* using MSI */ rid = 1; /* We allocate a single interrupt resource */ - adapter->res = bus_alloc_resource_any(dev, + que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (adapter->res == NULL) { + if (que->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); @@ -2574,24 +2592,20 @@ * Allocate a fast interrupt and the associated * deferred processing contexts. */ - TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter); - adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, - taskqueue_thread_enqueue, &adapter->tq); - taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que", - device_get_nameunit(adapter->dev)); - /* Use a TX only tasklet for local timer */ - TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); - txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, - taskqueue_thread_enqueue, &txr->tq); - taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", + TASK_INIT(&que->que_task, 0, em_handle_que, que); + que->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, + taskqueue_thread_enqueue, &que->tq); + taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); + TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); - if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET, - em_irq_fast, NULL, adapter, &adapter->tag)) != 0) { + + if ((error = bus_setup_intr(dev, que->res, INTR_TYPE_NET, + em_irq_fast, NULL, que, &que->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); - taskqueue_free(adapter->tq); - adapter->tq = NULL; + taskqueue_free(que->tq); + que->tq = NULL; return (error); } @@ -2610,105 +2624,90 @@ em_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + struct em_queue *que = adapter->queues; int error, rid, vector = 0; int cpu_id = 0; +#ifdef RSS + cpuset_t cpu_mask; +#endif /* Make sure all interrupts are disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); +#ifdef RSS + if (adapter->num_queues != rss_getnumbuckets()) { + device_printf(dev, + "%s: number of queues (%d) != number of RSS buckets (%d)" + "; performance will be impacted.\n", + __func__, + adapter->num_queues, + rss_getnumbuckets()); + } +#endif /* First set up ring resources */ - for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) { + for (int i = 0; i < adapter->num_queues; i++, vector++, que++) { /* RX ring */ rid = vector + 1; - rxr->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_ACTIVE); - if (rxr->res == NULL) { + que->res = bus_alloc_resource_any(dev, + SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); + if (que->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " - "RX MSIX Interrupt %d\n", i); + "MSIX QUEUE Interrupt %d\n", i); return (ENXIO); } - if ((error = bus_setup_intr(dev, rxr->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, - rxr, &rxr->tag)) != 0) { - device_printf(dev, "Failed to register RX handler"); + if ((error = bus_setup_intr(dev, que->res, + INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_que, + que, &que->tag)) != 0) { + device_printf(dev, "Failed to register QUEUE handler"); return (error); } #if __FreeBSD_version >= 800504 - bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i); + bus_describe_intr(dev, que->res, que->tag, "que%d", i); #endif - rxr->msix = vector; + que->msix = vector; +#ifdef RSS + cpu_id = rss_getcpu(i % rss_getnumbuckets()); +#else if (em_last_bind_cpu < 0) em_last_bind_cpu = CPU_FIRST(); cpu_id = em_last_bind_cpu; - bus_bind_intr(dev, rxr->res, cpu_id); +#endif /* RSS */ + bus_bind_intr(dev, que->res, cpu_id); - TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); - rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, - taskqueue_thread_enqueue, &rxr->tq); - taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)", - device_get_nameunit(adapter->dev), cpu_id); + TASK_INIT(&que->que_task, 0, em_handle_que, que); +#ifdef EM_MULTIQUEUE + TASK_INIT(&que->txq_task, 0, em_deferred_mq_start, que); + que->tq = taskqueue_create_fast("em_queue", M_NOWAIT, + taskqueue_thread_enqueue, &que->tq); +#endif /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 20 and 21 - ** are for RX0 and RX1, note this has + ** are for RX0 and RX1, bits 22 and 23 + ** are for TX0 and TX1. note this has ** NOTHING to do with the MSIX vector */ - rxr->ims = 1 << (20 + i); - adapter->ims |= rxr->ims; - adapter->ivars |= (8 | rxr->msix) << (i * 4); + que->ims = (1 << (20 + i)) | (1 << (22 + i)); + adapter->ivars |= (8 | que->msix) << (i * 4); + adapter->ivars |= (8 | que->msix) << (8 + (i * 4)); + adapter->que_mask |= que->ims; - em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); - } - - for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) { - /* TX ring */ - rid = vector + 1; - txr->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_ACTIVE); - if (txr->res == NULL) { - device_printf(dev, - "Unable to allocate bus resource: " - "TX MSIX Interrupt %d\n", i); - return (ENXIO); - } - if ((error = bus_setup_intr(dev, txr->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, - txr, &txr->tag)) != 0) { - device_printf(dev, "Failed to register TX handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i); -#endif - txr->msix = vector; - - if (em_last_bind_cpu < 0) - em_last_bind_cpu = CPU_FIRST(); - cpu_id = em_last_bind_cpu; - bus_bind_intr(dev, txr->res, cpu_id); - - TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); - txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, - taskqueue_thread_enqueue, &txr->tq); - taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)", +#ifdef EM_MULTIQUEUE +#ifdef RSS + CPU_SETOF(cpu_id, &cpu_mask); + taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, + "%s que (bucket %d)", device_get_nameunit(adapter->dev), cpu_id); - /* - ** Set the bit to enable interrupt - ** in E1000_IMS -- bits 22 and 23 - ** are for TX0 and TX1, note this has - ** NOTHING to do with the MSIX vector - */ - txr->ims = 1 << (22 + i); - adapter->ims |= txr->ims; - adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); - +#else + taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que (qid %d)", + device_get_nameunit(adapter->dev), cpu_id); +#endif +#endif em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); } @@ -2744,41 +2743,23 @@ static void em_free_pci_resources(struct adapter *adapter) { + struct em_queue *que = adapter->queues; device_t dev = adapter->dev; - struct tx_ring *txr; - struct rx_ring *rxr; int rid; /* ** Release all the queue interrupt resources: */ - for (int i = 0; i < adapter->num_queues; i++) { - txr = &adapter->tx_rings[i]; - /* an early abort? */ - if (txr == NULL) - break; - rid = txr->msix +1; - if (txr->tag != NULL) { - bus_teardown_intr(dev, txr->res, txr->tag); - txr->tag = NULL; + for (int i = 0; i < adapter->num_queues; i++, que++) { + rid = que->msix +1; + if (que->tag != NULL) { + bus_teardown_intr(dev, que->res, que->tag); + que->tag = NULL; } - if (txr->res != NULL) + if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, - rid, txr->res); - - rxr = &adapter->rx_rings[i]; - /* an early abort? */ - if (rxr == NULL) - break; - rid = rxr->msix +1; - if (rxr->tag != NULL) { - bus_teardown_intr(dev, rxr->res, rxr->tag); - rxr->tag = NULL; - } - if (rxr->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, - rid, rxr->res); + rid, que->res); } if (adapter->linkvec) /* we are doing MSIX */ @@ -2826,12 +2807,13 @@ /* ** Try using MSI-X for Hartwell adapters */ - if ((adapter->hw.mac.type == e1000_82574) && - (em_enable_msix == TRUE)) { + if (em_enable_msix == TRUE) { +#ifdef RSS + if (adapter->num_queues > rss_getnumbuckets()) + adapter->num_queues = rss_getnumbuckets(); +#endif #ifdef EM_MULTIQUEUE adapter->num_queues = (em_num_queues == 1) ? 1 : 2; - if (adapter->num_queues > 1) - em_enable_vectors_82574(adapter); #endif /* Map the MSIX BAR */ int rid = PCIR_BAR(EM_MSIX_BAR); @@ -2846,10 +2828,10 @@ val = pci_msix_count(dev); #ifdef EM_MULTIQUEUE - /* We need 5 vectors in the multiqueue case */ - if (adapter->num_queues > 1 ) { - if (val >= 5) - val = 5; + /* We need 3 vectors in the multiqueue case */ + if (adapter->num_queues > 1) { + if (val >= 3) + val = 3; else { adapter->num_queues = 1; device_printf(adapter->dev, @@ -3386,19 +3368,29 @@ em_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; + struct em_queue *que = NULL; struct tx_ring *txr = NULL; struct rx_ring *rxr = NULL; int rsize, tsize, error = E1000_SUCCESS; int txconf = 0, rxconf = 0; + /* First allocate the top level queue structs */ + if (!(adapter->queues = + (struct em_queue *) malloc(sizeof(struct em_queue) * + adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate queue memory\n"); + error = ENOMEM; + goto fail; + } + /* Allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; - goto fail; + goto tx_fail; } /* Now allocate the RX */ @@ -3485,6 +3477,16 @@ } } + /* + ** Finally set up the queue holding structs + */ + for (int i = 0; i < adapter->num_queues; i++) { + que = &adapter->queues[i]; + que->adapter = adapter; + que->txr = &adapter->tx_rings[i]; + que->rxr = &adapter->rx_rings[i]; + } + return (0); err_rx_desc: @@ -3499,6 +3501,8 @@ buf_ring_free(txr->br, M_DEVBUF); #endif free(adapter->tx_rings, M_DEVBUF); +tx_fail: + free(adapter->queues, M_DEVBUF); fail: return (error); } @@ -3516,7 +3520,7 @@ { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; - struct em_txbuffer *txbuf; + struct em_buffer *txbuf; int error, i; /* @@ -3539,7 +3543,7 @@ } if (!(txr->tx_buffers = - (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) * + (struct em_buffer *) malloc(sizeof(struct em_buffer) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; @@ -3572,7 +3576,7 @@ em_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; - struct em_txbuffer *txbuf; + struct em_buffer *txbuf; int i; #ifdef DEV_NETMAP struct netmap_slot *slot; @@ -3800,7 +3804,7 @@ em_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; - struct em_txbuffer *txbuf; + struct em_buffer *txbuf; INIT_DEBUGOUT("free_transmit_ring: begin"); @@ -3867,7 +3871,7 @@ { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD = NULL; - struct em_txbuffer *tx_buffer; + struct em_buffer *tx_buffer; int cur, hdr_len; u32 cmd = 0; u16 offload = 0; @@ -4022,7 +4026,7 @@ { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD; - struct em_txbuffer *tx_buffer; + struct em_buffer *tx_buffer; int cur, hdr_len; /* @@ -4100,7 +4104,7 @@ { struct adapter *adapter = txr->adapter; int first, last, done, processed; - struct em_txbuffer *tx_buffer; + struct em_buffer *tx_buffer; struct e1000_tx_desc *tx_desc, *eop_desc; if_t ifp = adapter->ifp; @@ -4217,7 +4221,7 @@ struct adapter *adapter = rxr->adapter; struct mbuf *m; bus_dma_segment_t segs; - struct em_rxbuffer *rxbuf; + struct em_buffer *rxbuf; int i, j, error, nsegs; bool cleaned = FALSE; @@ -4299,10 +4303,10 @@ { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; - struct em_rxbuffer *rxbuf; + struct em_buffer *rxbuf; int error; - rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) * + rxr->rx_buffers = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); if (rxr->rx_buffers == NULL) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); @@ -4355,7 +4359,7 @@ em_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; - struct em_rxbuffer *rxbuf; + struct em_buffer *rxbuf; bus_dma_segment_t seg[1]; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP @@ -4463,7 +4467,7 @@ for (int i = 0; i < q; ++i) { rxr = &adapter->rx_rings[i]; for (int n = 0; n < adapter->num_rx_desc; n++) { - struct em_rxbuffer *rxbuf; + struct em_buffer *rxbuf; rxbuf = &rxr->rx_buffers[n]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, @@ -4510,7 +4514,7 @@ em_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; - struct em_rxbuffer *rxbuf = NULL; + struct em_buffer *rxbuf = NULL; INIT_DEBUGOUT("free_receive_buffers: begin"); @@ -4552,7 +4556,7 @@ static void em_initialize_receive_unit(struct adapter *adapter) { - struct rx_ring *rxr = adapter->rx_rings; + struct rx_ring *rxr = adapter->rx_rings; if_t ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u32 rctl, rxcsum, rfctl; @@ -4634,11 +4638,16 @@ uint8_t rss_key[4 * RSSKEYLEN]; uint32_t reta = 0; int i; + u32 mrqc; /* * Configure RSS key */ +#ifdef RSS + rss_getkey((uint8_t *)&rss_key); +#else arc4rand(rss_key, sizeof(rss_key), 0); +#endif for (i = 0; i < RSSKEYLEN; ++i) { uint32_t rssrk = 0; @@ -4653,7 +4662,12 @@ for (i = 0; i < sizeof(reta); ++i) { uint32_t q; +#ifdef RSS + q = rss_get_indirection_to_bucket(i); + q = (q % adapter->num_queues) << 7; +#else q = (i % adapter->num_queues) << 7; +#endif reta |= q << (8 * i); } @@ -4661,12 +4675,15 @@ E1000_WRITE_REG(hw, E1000_RETA(i), reta); } - E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | - E1000_MRQC_RSS_FIELD_IPV4_TCP | - E1000_MRQC_RSS_FIELD_IPV4 | - E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | - E1000_MRQC_RSS_FIELD_IPV6_EX | - E1000_MRQC_RSS_FIELD_IPV6); + + mrqc = E1000_READ_REG(hw, E1000_MRQC); + mrqc |= (E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | + E1000_MRQC_RSS_FIELD_IPV4_TCP | + E1000_MRQC_RSS_FIELD_IPV4 | + E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | + E1000_MRQC_RSS_FIELD_IPV6_EX | + E1000_MRQC_RSS_FIELD_IPV6); + E1000_WRITE_REG(hw, E1000_MRQC, mrqc); } #endif /* @@ -4766,9 +4783,10 @@ * For polling we also now return the number of cleaned packets *********************************************************************/ static bool -em_rxeof(struct rx_ring *rxr, int count, int *done) +em_rxeof(struct em_queue *que, int count, int *done) { - struct adapter *adapter = rxr->adapter; + struct adapter *adapter = que->adapter; + struct rx_ring *rxr = que->rxr; if_t ifp = adapter->ifp; struct mbuf *mp, *sendmp; u32 status = 0; @@ -4777,7 +4795,7 @@ bool eop; union e1000_rx_desc_extended *cur; - EM_RX_LOCK(rxr); + EM_RX_LOCK_ASSERT(rxr); /* Sync the ring */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, @@ -4786,7 +4804,6 @@ #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, rxr->me, &processed)) { - EM_RX_UNLOCK(rxr); return (FALSE); } #endif /* DEV_NETMAP */ @@ -4839,6 +4856,7 @@ if (eop) { --count; + rxr->rx_packets++; sendmp = rxr->fmp; if_setrcvif(sendmp, ifp); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); @@ -4846,18 +4864,75 @@ #ifndef __NO_STRICT_ALIGNMENT if (adapter->hw.mac.max_frame_size > (MCLBYTES - ETHER_ALIGN) && - em_fixup_rx(rxr) != 0) + em_fixup_rx(rxr) != 0) { goto skip; + } #endif if (status & E1000_RXD_STAT_VP) { if_setvtag(sendmp, le16toh(cur->wb.upper.vlan)); sendmp->m_flags |= M_VLANTAG; } + /* + * In case of multiqueue, we have RXCSUM.PCSD bit set + * and never cleared. This means we have RSS hash + * available to be used. + */ + if (adapter->num_queues > 1) { +#if __FreeBSD_version < 1100000 + sendmp->m_pkthdr.flowid = + le32toh(cur->wb.lower.hi_dword.rss); + M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); +#else + u32 pkt_info; + sendmp->m_pkthdr.flowid = + le32toh(cur->wb.lower.hi_dword.rss); + pkt_info = + le32toh(cur->wb.lower.mrq); + switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { + case E1000_RXDADV_RSSTYPE_IPV4_TCP: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_TCP_IPV4); + rxr->rx_hash_ipv4_tcp++; + break; + case E1000_RXDADV_RSSTYPE_IPV4: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_IPV4); + rxr->rx_hash_ipv4++; + break; + case E1000_RXDADV_RSSTYPE_IPV6_TCP: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_TCP_IPV6); + rxr->rx_hash_ipv6_tcp++; + break; + case E1000_RXDADV_RSSTYPE_IPV6_EX: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_IPV6_EX); + rxr->rx_hash_ipv6_ex++; + break; + case E1000_RXDADV_RSSTYPE_IPV6: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_IPV6); + rxr->rx_hash_ipv6++; + break; + default: + /* XXX fallthrough */ + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_OPAQUE); + rxr->rx_hash_none++; + } + } else { + sendmp->m_pkthdr.flowid = que->msix; + M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); + rxr->rx_hash_none++; +#endif + } #ifndef __NO_STRICT_ALIGNMENT skip: #endif rxr->fmp = rxr->lmp = NULL; + /* Make sure to set M_PKTHDR. */ + sendmp->m_flags |= M_PKTHDR; } next_desc: /* Sync the ring */ @@ -4877,7 +4952,7 @@ if (sendmp != NULL) { rxr->next_to_check = i; EM_RX_UNLOCK(rxr); - if_input(ifp, sendmp); + (*ifp->if_input)(ifp, sendmp); EM_RX_LOCK(rxr); i = rxr->next_to_check; } @@ -4896,7 +4971,6 @@ rxr->next_to_check = i; if (done != NULL) *done = rxdone; - EM_RX_UNLOCK(rxr); return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); } @@ -4904,7 +4978,7 @@ static __inline void em_rx_discard(struct rx_ring *rxr, int i) { - struct em_rxbuffer *rbuf; + struct em_buffer *rbuf; rbuf = &rxr->rx_buffers[i]; bus_dmamap_unload(rxr->rxtag, rbuf->map); @@ -4977,7 +5051,7 @@ #endif static void -em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf) +em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_buffer *rxbuf) { rxd->read.buffer_addr = htole64(rxbuf->paddr); /* DD bits must be cleared */ @@ -5114,8 +5188,8 @@ u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { - E1000_WRITE_REG(hw, EM_EIAC, adapter->ims); - ims_mask |= adapter->ims; + E1000_WRITE_REG(hw, EM_EIAC, adapter->que_mask); + ims_mask |= adapter->que_mask; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } @@ -5512,8 +5586,9 @@ * **********************************************************************/ static void -em_update_stats_counters(struct adapter *adapter) +em_update_stats_counters(void *arg) { + struct adapter *adapter = arg; if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { @@ -5595,23 +5670,36 @@ E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); - adapter->stats.tsctc += + adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } + callout_reset(&adapter->stats_collector, hz, em_update_stats_counters, adapter); } static uint64_t em_get_counter(if_t ifp, ift_counter cnt) { struct adapter *adapter; + struct tx_ring *txr; + uint64_t rv; adapter = if_getsoftc(ifp); switch (cnt) { - case IFCOUNTER_COLLISIONS: - return (adapter->stats.colc); + case IFCOUNTER_IPACKETS: + return (adapter->stats.gprc); + case IFCOUNTER_OPACKETS: + return (adapter->stats.gptc); + case IFCOUNTER_IBYTES: + return (adapter->stats.gorc); + case IFCOUNTER_OBYTES: + return (adapter->stats.gotc); + case IFCOUNTER_IMCASTS: + return (adapter->stats.mprc); + case IFCOUNTER_OMCASTS: + return (adapter->stats.mptc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + @@ -5620,6 +5708,16 @@ case IFCOUNTER_OERRORS: return (adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events); + case IFCOUNTER_COLLISIONS: + return (adapter->stats.colc); + case IFCOUNTER_IQDROPS: + return (adapter->stats.mpc); + case IFCOUNTER_OQDROPS: + rv = 0; + txr = adapter->tx_rings; + for (int i = 0; i < adapter->num_queues; i++, txr++) + rv += txr->br->br_drops; + return (rv); default: return (if_get_counter_default(ifp, cnt)); } @@ -5647,6 +5745,7 @@ struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; + struct em_queue *que = adapter->queues; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); @@ -5694,7 +5793,7 @@ CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); - for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { + for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++, que++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Queue Name"); @@ -5710,12 +5809,15 @@ E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); - SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", - CTLFLAG_RD, &txr->tx_irq, + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "queue_irq", + CTLFLAG_RD, &que->irqs, "Queue MSI-X Transmit Interrupts"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", + CTLFLAG_RD, &txr->total_packets, + "Queue Packets Transmitted"); snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, @@ -5732,9 +5834,32 @@ E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); - SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", - CTLFLAG_RD, &rxr->rx_irq, - "Queue MSI-X Receive Interrupts"); + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", + CTLFLAG_RD, &rxr->rx_packets, + "Queue Packets Received"); +#if __FreeBSD_version >= 1100000 + /* + * count packets that are hashed based on card capabilities. + */ + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_hash_ipv4_tcp", + CTLFLAG_RD, &rxr->rx_hash_ipv4_tcp, + "Packets Hashed On IPV4 TCP"); + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_hash_ipv4", + CTLFLAG_RD, &rxr->rx_hash_ipv4, + "Packets Hashed On IPV4"); + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_hash_ipv6_tcp", + CTLFLAG_RD, &rxr->rx_hash_ipv6_tcp, + "Packets Hashed On IPV6 TCP"); + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_hash_ipv6_ex", + CTLFLAG_RD, &rxr->rx_hash_ipv6_ex, + "Packets Hashed On IPV6 Extended Header "); + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_hash_ipv6", + CTLFLAG_RD, &rxr->rx_hash_ipv6, + "Packets Hashed On IPV6"); + SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_hash_none", + CTLFLAG_RD, &rxr->rx_hash_none, + "Packets did not hash"); +#endif } /* MAC stats get their own sub node */ @@ -6155,7 +6280,7 @@ else printf("and ACTIVE\n"); - for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { + for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { device_printf(dev, "TX Queue %d ------\n", i); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_TDH(i)), @@ -6176,33 +6301,6 @@ } } -#ifdef EM_MULTIQUEUE -/* - * 82574 only: - * Write a new value to the EEPROM increasing the number of MSIX - * vectors from 3 to 5, for proper multiqueue support. - */ -static void -em_enable_vectors_82574(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - device_t dev = adapter->dev; - u16 edata; - - e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); - printf("Current cap: %#06x\n", edata); - if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { - device_printf(dev, "Writing to eeprom: increasing " - "reported MSIX vectors from 3 to 5...\n"); - edata &= ~(EM_NVM_MSIX_N_MASK); - edata |= 4 << EM_NVM_MSIX_N_SHIFT; - e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); - e1000_update_nvm_checksum(hw); - device_printf(dev, "Writing to eeprom: done\n"); - } -} -#endif - #ifdef DDB DB_COMMAND(em_reset_dev, em_ddb_reset_dev) { Index: sys/dev/netmap/if_em_netmap.h =================================================================== --- sys/dev/netmap/if_em_netmap.h +++ sys/dev/netmap/if_em_netmap.h @@ -45,14 +45,11 @@ { if (adapter->msix > 1) { /* MSIX */ int i; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + struct em_queue *que = adapter->queues; - for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) { - taskqueue_block(txr->tq); - taskqueue_drain(txr->tq, &txr->tx_task); - taskqueue_block(rxr->tq); - taskqueue_drain(rxr->tq, &rxr->rx_task); + for (i = 0; i < adapter->num_queues; i++, que++) { + taskqueue_block(que->tq); + taskqueue_drain(que->tq, &que->que_task); } } else { /* legacy */ taskqueue_block(adapter->tq); @@ -66,13 +63,11 @@ em_netmap_unblock_tasks(struct adapter *adapter) { if (adapter->msix > 1) { - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + struct em_queue *que = adapter->queues; int i; - for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) { - taskqueue_unblock(txr->tq); - taskqueue_unblock(rxr->tq); + for (i = 0; i < adapter->num_queues; i++, que++) { + taskqueue_unblock(que->tq); } } else { /* legacy */ taskqueue_unblock(adapter->tq); @@ -148,7 +143,7 @@ /* device-specific */ struct e1000_tx_desc *curr = &txr->tx_base[nic_i]; - struct em_txbuffer *txbuf = &txr->tx_buffers[nic_i]; + struct em_buffer *txbuf = &txr->tx_buffers[nic_i]; int flags = (slot->flags & NS_REPORT || nic_i == 0 || nic_i == report_frequency) ? E1000_TXD_CMD_RS : 0; @@ -242,7 +237,7 @@ union e1000_rx_desc_extended *curr = &rxr->rx_base[nic_i]; uint32_t staterr = le32toh(curr->wb.upper.status_error); - if ((staterr & E1000_RXD_STAT_DD) == 0) + if ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) == 0) break; ring->slot[nm_i].len = le16toh(curr->wb.upper.length); ring->slot[nm_i].flags = slot_flags; @@ -272,7 +267,7 @@ void *addr = PNMB(na, slot, &paddr); union e1000_rx_desc_extended *curr = &rxr->rx_base[nic_i]; - struct em_rxbuffer *rxbuf = &rxr->rx_buffers[nic_i]; + struct em_buffer *rxbuf = &rxr->rx_buffers[nic_i]; if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ goto ring_reset; @@ -322,7 +317,8 @@ na.nm_txsync = em_netmap_txsync; na.nm_rxsync = em_netmap_rxsync; na.nm_register = em_netmap_reg; - na.num_tx_rings = na.num_rx_rings = adapter->num_queues; + na.num_tx_rings = adapter->num_queues; + na.num_rx_rings = adapter->num_queues; netmap_attach(&na); }