Index: sys/dev/e1000/em_txrx.c =================================================================== --- sys/dev/e1000/em_txrx.c +++ sys/dev/e1000/em_txrx.c @@ -42,10 +42,10 @@ /********************************************************************* * Local Function prototypes *********************************************************************/ -static int em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, - u32 *txd_lower); -static int em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, - u32 *txd_upper, u32 *txd_lower); +static int em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, + uint32_t *txd_upper, uint32_t *txd_lower); +static int em_transmit_checksum_setup(struct adapter *adapter, + if_pkt_info_t pi, uint32_t *txd_upper, uint32_t *txd_lower); static int em_isc_txd_encap(void *arg, if_pkt_info_t pi); static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear); @@ -62,9 +62,10 @@ qidx_t budget); static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); -static void lem_receive_checksum(int status, int errors, if_rxd_info_t ri); +static void lem_receive_checksum(uint32_t status, uint32_t errors, + if_rxd_info_t ri); static void em_receive_checksum(uint32_t status, if_rxd_info_t ri); -static int em_determine_rsstype(u32 pkt_info); +static int em_determine_rsstype(uint32_t pkt_info); extern int em_intr(void *arg); struct if_txrx em_txrx = { @@ -133,13 +134,16 @@ * **********************************************************************/ static int -em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower) +em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, uint32_t *txd_upper, + uint32_t *txd_lower) { if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; + struct e1000_hw *hw = &adapter->hw; struct e1000_context_desc *TXD; int cur, hdr_len; + uint32_t cmd_type_len; hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen; *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ @@ -179,44 +183,35 @@ TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz); TXD->tcp_seg_setup.fields.hdr_len = hdr_len; - TXD->cmd_and_length = htole32(adapter->txd_cmd | - E1000_TXD_CMD_DEXT | /* Extended descr */ - E1000_TXD_CMD_TSE | /* TSE context */ - E1000_TXD_CMD_IP | /* Do IP csum */ - E1000_TXD_CMD_TCP | /* Do TCP checksum */ - (pi->ipi_len - hdr_len)); /* Total len */ + /* 8254x SDM4.0 page 45, and PCIe GbE SDM2.5 page 63 + * - Set up basic TUCMDs + * - Enable IP bit on 82544. + * - For others on indicates IPv4, while off indicates IPv6 + */ + cmd_type_len = adapter->txd_cmd | + E1000_TXD_CMD_DEXT | /* Extended descr */ + E1000_TXD_CMD_TSE | /* TSE context */ + E1000_TXD_CMD_TCP; /* Do TCP checksum */ + if (hw->mac.type == e1000_82544) + cmd_type_len |= E1000_TXD_CMD_IP; + else if (pi->ipi_etype == ETHERTYPE_IP) + cmd_type_len |= E1000_TXD_CMD_IP; + + TXD->cmd_and_length = htole32(cmd_type_len | + (pi->ipi_len - hdr_len)); /* Total len */ txr->tx_tso = TRUE; if (++cur == scctx->isc_ntxd[0]) { cur = 0; } - DPRINTF(iflib_get_dev(adapter->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__, pi->ipi_pidx, cur); + DPRINTF(iflib_get_dev(adapter->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__, + pi->ipi_pidx, cur); return (cur); } -#define TSO_WORKAROUND 4 -#define DONT_FORCE_CTX 1 - - -/********************************************************************* - * The offload context is protocol specific (TCP/UDP) and thus - * only needs to be set when the protocol changes. The occasion - * of a context change can be a performance detriment, and - * might be better just disabled. The reason arises in the way - * in which the controller supports pipelined requests from the - * Tx data DMA. Up to four requests can be pipelined, and they may - * belong to the same packet or to multiple packets. However all - * requests for one packet are issued before a request is issued - * for a subsequent packet and if a request for the next packet - * requires a context change, that request will be stalled - * until the previous request completes. This means setting up - * a new context effectively disables pipelined Tx data DMA which - * in turn greatly slow down performance to send small sized - * frames. - **********************************************************************/ - static int -em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower) +em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, + uint32_t *txd_upper, uint32_t *txd_lower) { struct e1000_context_desc *TXD = NULL; if_softc_ctx_t scctx = adapter->shared; @@ -224,19 +219,35 @@ struct tx_ring *txr = &que->txr; int csum_flags = pi->ipi_csum_flags; int cur, hdr_len; - u32 cmd; + uint32_t cmd; cur = pi->ipi_pidx; hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen; cmd = adapter->txd_cmd; - /* - * The 82574L can only remember the *last* context used - * regardless of queue that it was use for. We cannot reuse - * contexts on this hardware platform and must generate a new - * context every time. 82574L hardware spec, section 7.2.6, - * second note. - */ + /********************************************************************* + * The offload context is protocol specific (TCP/UDP) and thus + * only needs to be set when the protocol changes. The occasion + * of a context change can be a performance detriment, and + * might be better just disabled. The reason arises in the way + * in which the controller supports pipelined requests from the + * Tx data DMA. Up to four requests can be pipelined, and they may + * belong to the same packet or to multiple packets. However all + * requests for one packet are issued before a request is issued + * for a subsequent packet and if a request for the next packet + * requires a context change, that request will be stalled + * until the previous request completes. This means setting up + * a new context effectively disables pipelined Tx data DMA which + * in turn greatly slow down performance to send small sized + * frames. + * + * The 82574L can only remember the *last* context used + * regardless of queue that it was use for. We cannot reuse + * contexts on this hardware platform and must generate a new + * context every time. 82574L hardware spec, section 7.2.6, + * second note. + **********************************************************************/ +#define DONT_FORCE_CTX 1 if (DONT_FORCE_CTX && adapter->tx_num_queues == 1 && txr->csum_lhlen == pi->ipi_ehdrlen && @@ -261,7 +272,8 @@ */ TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen; TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); - TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum); + TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + + offsetof(struct ip, ip_sum); cmd |= E1000_TXD_CMD_IP; } @@ -310,7 +322,7 @@ int nsegs = pi->ipi_nsegs; int csum_flags = pi->ipi_csum_flags; int i, j, first, pidx_last; - u32 txd_flags, txd_upper = 0, txd_lower = 0; + uint32_t txd_flags, txd_upper = 0, txd_lower = 0; struct e1000_tx_desc *ctxd = NULL; bool do_tso, tso_desc; @@ -341,7 +353,7 @@ i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower); } - if (pi->ipi_mflags & M_VLANTAG) { + if (pi->ipi_mflags & M_VLANTAG && !do_tso) { /* Set the vlan id. */ txd_upper |= htole16(pi->ipi_vtag) << 16; /* Tell hardware to add tag */ @@ -367,6 +379,7 @@ * If this is the last descriptor, we want to * split it so we have a small final sentinel */ +#define TSO_WORKAROUND 4 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { seg_len -= TSO_WORKAROUND; ctxd->buffer_addr = htole64(seg_addr); @@ -556,7 +569,7 @@ struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; - u32 staterr = 0; + uint32_t staterr = 0; int cnt, i; for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { @@ -581,7 +594,7 @@ struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; - u32 staterr = 0; + uint32_t staterr = 0; int cnt, i; for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { @@ -602,16 +615,17 @@ lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct adapter *adapter = arg; + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); if_softc_ctx_t scctx = adapter->shared; struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; - u16 len; - u32 status, errors; + uint16_t len, vtag; + uint32_t status, errors; bool eop; int i, cidx; - status = errors = i = 0; + status = errors = i = vtag = 0; cidx = ri->iri_cidx; do { @@ -645,12 +659,13 @@ i++; } while (!eop); - /* XXX add a faster way to look this up */ - if (adapter->hw.mac.type >= e1000_82543 && !(status & E1000_RXD_STAT_IXSM)) + ri->iri_csum_flags = 0; + if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) lem_receive_checksum(status, errors, ri); - if (status & E1000_RXD_STAT_VP) { - ri->iri_vtag = le16toh(rxd->special); + if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && + (status & E1000_RXD_STAT_VP) != 0) { + ri->iri_vtag = le16toh(rxd->special & E1000_RXD_SPC_VLAN_MASK); ri->iri_flags |= M_VLANTAG; } @@ -663,18 +678,19 @@ em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct adapter *adapter = arg; + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); if_softc_ctx_t scctx = adapter->shared; struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; - u16 len; - u32 pkt_info; - u32 staterr = 0; + uint16_t len, vtag; + uint32_t pkt_info; + uint32_t staterr; bool eop; - int i, cidx, vtag; + int i, cidx; - i = vtag = 0; + staterr = i = vtag = 0; cidx = ri->iri_cidx; do { @@ -707,17 +723,15 @@ i++; } while (!eop); - /* XXX add a faster way to look this up */ - if (adapter->hw.mac.type >= e1000_82543) + ri->iri_csum_flags = 0; + if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) em_receive_checksum(staterr, ri); - if (staterr & E1000_RXD_STAT_VP) { - vtag = le16toh(rxd->wb.upper.vlan); - } - - ri->iri_vtag = vtag; - if (vtag) + if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && + (staterr & E1000_RXD_STAT_VP) != 0) { + ri->iri_vtag = vtag; ri->iri_flags |= M_VLANTAG; + } ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); ri->iri_rsstype = em_determine_rsstype(pkt_info); @@ -727,78 +741,89 @@ } /********************************************************************* - * + * lem_receive_checksum * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void -lem_receive_checksum(int status, int errors, if_rxd_info_t ri) +lem_receive_checksum(uint32_t status, uint32_t errors, if_rxd_info_t ri) { - /* Did it pass? */ - if (status & E1000_RXD_STAT_IPCS && !(errors & E1000_RXD_ERR_IPE)) - ri->iri_csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); - - if (status & E1000_RXD_STAT_TCPCS) { - /* Did it pass? */ - if (!(errors & E1000_RXD_ERR_TCPE)) { - ri->iri_csum_flags |= - (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); - ri->iri_csum_data = htons(0xffff); - } - } -} + /* Ignore Checksum bit is set */ + if (__predict_false(status & E1000_RXD_STAT_IXSM)) + return; -/******************************************************************** - * - * Parse the packet type to determine the appropriate hash - * - ******************************************************************/ -static int -em_determine_rsstype(u32 pkt_info) -{ - switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { - case E1000_RXDADV_RSSTYPE_IPV4_TCP: - return M_HASHTYPE_RSS_TCP_IPV4; - case E1000_RXDADV_RSSTYPE_IPV4: - return M_HASHTYPE_RSS_IPV4; - case E1000_RXDADV_RSSTYPE_IPV6_TCP: - return M_HASHTYPE_RSS_TCP_IPV6; - case E1000_RXDADV_RSSTYPE_IPV6_EX: - return M_HASHTYPE_RSS_IPV6_EX; - case E1000_RXDADV_RSSTYPE_IPV6: - return M_HASHTYPE_RSS_IPV6; - case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: - return M_HASHTYPE_RSS_TCP_IPV6_EX; - default: - return M_HASHTYPE_OPAQUE; + /* If there is a layer 3 or 4 error we are done */ + if (errors & (E1000_RXD_ERR_IPE | E1000_RXD_ERR_TCPE)) + return; + + /* IP Checksum Good */ + if (__predict_true(status & E1000_RXD_STAT_IPCS)) + ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); + + /* Valid TCP or UDP checksum */ + if (__predict_true(status & E1000_RXD_STAT_TCPCS)) { + ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + ri->iri_csum_data = htons(0xffff); } } +/********************************************************************* + * em_receive_checksum + * Verify that the hardware indicated that the checksum is valid. + * Inform the stack about the status of checksum so that stack + * doesn't spend time verifying the checksum. + * + *********************************************************************/ static void -em_receive_checksum(uint32_t status, if_rxd_info_t ri) +em_receive_checksum(uint32_t staterr, if_rxd_info_t ri) { - ri->iri_csum_flags = 0; + uint16_t status = (uint16_t)staterr; + uint8_t errors = (uint8_t)(staterr >> 24); /* Ignore Checksum bit is set */ - if (status & E1000_RXD_STAT_IXSM) + if (__predict_false(status & E1000_RXD_STAT_IXSM)) + return; + + /* If there is a layer 3 or 4 error we are done */ + if (errors & (E1000_RXD_ERR_IPE | E1000_RXD_ERR_TCPE)) return; - /* If the IP checksum exists and there is no IP Checksum error */ - if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == - E1000_RXD_STAT_IPCS) { + /* IP Checksum Good */ + if (__predict_true(status & E1000_RXD_STAT_IPCS)) ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); - } - /* TCP or UDP checksum */ - if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == - E1000_RXD_STAT_TCPCS) { + /* Valid TCP or UDP checksum */ + if (__predict_true(status & + (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) { ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); ri->iri_csum_data = htons(0xffff); } - if (status & E1000_RXD_STAT_UDPCS) { - ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); - ri->iri_csum_data = htons(0xffff); +} + +/******************************************************************** + * + * Parse the packet type to determine the appropriate hash + * + ******************************************************************/ +static int +em_determine_rsstype(uint32_t pkt_info) +{ + switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { + case E1000_RXDADV_RSSTYPE_IPV4_TCP: + return M_HASHTYPE_RSS_TCP_IPV4; + case E1000_RXDADV_RSSTYPE_IPV4: + return M_HASHTYPE_RSS_IPV4; + case E1000_RXDADV_RSSTYPE_IPV6_TCP: + return M_HASHTYPE_RSS_TCP_IPV6; + case E1000_RXDADV_RSSTYPE_IPV6_EX: + return M_HASHTYPE_RSS_IPV6_EX; + case E1000_RXDADV_RSSTYPE_IPV6: + return M_HASHTYPE_RSS_IPV6; + case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: + return M_HASHTYPE_RSS_TCP_IPV6_EX; + default: + return M_HASHTYPE_OPAQUE; } } Index: sys/dev/e1000/if_em.c =================================================================== --- sys/dev/e1000/if_em.c +++ sys/dev/e1000/if_em.c @@ -762,7 +762,7 @@ #define EM_CAPS \ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER | IFCAP_TSO4 | \ - IFCAP_LRO | IFCAP_VLAN_HWTSO + IFCAP_LRO | IFCAP_VLAN_HWTSO | IFCAP_HWCSUM_IPV6 | IFCAP_TSO6 #define IGB_CAPS \ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \ @@ -884,7 +884,8 @@ * with Gigabit - in which case users may enable TSO manually. */ scctx->isc_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO); - scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; + scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO | + CSUM_IP6_TCP | CSUM_IP6_UDP; /* * We support MSI-X with 82574 only, but indicate to iflib(4) * that it shall give MSI at least a try with other devices. @@ -905,6 +906,10 @@ scctx->isc_capabilities = scctx->isc_capenable = LEM_CAPS; if (hw->mac.type < e1000_82543) scctx->isc_capenable &= ~(IFCAP_HWCSUM|IFCAP_VLAN_HWCSUM); + /* 8254x SDM4.0 page 33 - FDX requirement on these chips */ + if (hw->mac.type == e1000_82547 || hw->mac.type == e1000_82547_rev_2) + scctx->isc_capenable &= ~(IFCAP_HWCSUM|IFCAP_VLAN_HWCSUM); + /* INTx only */ scctx->isc_msix_bar = 0; }