Index: head/sys/dev/e1000/em_txrx.c =================================================================== --- head/sys/dev/e1000/em_txrx.c (revision 311930) +++ head/sys/dev/e1000/em_txrx.c (revision 311931) @@ -1,746 +1,746 @@ /*- * Copyright (c) 2016 Matt Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #ifdef RSS #include #include #endif #ifdef VERBOSE_DEBUG #define DPRINTF device_printf #else #define DPRINTF(...) #endif /********************************************************************* * Local Function prototypes *********************************************************************/ static int em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower); static int em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower); static int em_isc_txd_encap(void *arg, if_pkt_info_t pi); static void em_isc_txd_flush(void *arg, uint16_t txqid, uint32_t pidx); static int em_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx_init, bool clear); static void em_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buflen __unused); static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx); static int em_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, int budget); static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static void lem_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buflen __unused); static int lem_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, int budget); static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static void lem_receive_checksum(int status, int errors, if_rxd_info_t ri); static void em_receive_checksum(uint32_t status, if_rxd_info_t ri); extern int em_intr(void *arg); struct if_txrx em_txrx = { em_isc_txd_encap, em_isc_txd_flush, em_isc_txd_credits_update, em_isc_rxd_available, em_isc_rxd_pkt_get, em_isc_rxd_refill, em_isc_rxd_flush, em_intr }; struct if_txrx lem_txrx = { em_isc_txd_encap, em_isc_txd_flush, em_isc_txd_credits_update, lem_isc_rxd_available, lem_isc_rxd_pkt_get, lem_isc_rxd_refill, em_isc_rxd_flush, em_intr }; extern if_shared_ctx_t em_sctx; /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower) { if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; struct e1000_context_desc *TXD; struct em_txbuffer *tx_buffer; int cur, hdr_len; hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen; *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ E1000_TXD_DTYP_D | /* Data descr type */ E1000_TXD_CMD_TSE); /* Do TSE on this packet */ /* IP and/or TCP header checksum calculation and insertion. */ *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; cur = pi->ipi_pidx; TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; tx_buffer = &txr->tx_buffers[cur]; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place put the checksum. */ TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen; TXD->lower_setup.ip_fields.ipcse = htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1); TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum); /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen; TXD->upper_setup.tcp_fields.tucse = 0; TXD->upper_setup.tcp_fields.tucso = pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum); /* * Payload size per packet w/o any headers. * Length of all headers up to payload. */ TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz); TXD->tcp_seg_setup.fields.hdr_len = hdr_len; TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | /* Extended descr */ E1000_TXD_CMD_TSE | /* TSE context */ E1000_TXD_CMD_IP | /* Do IP csum */ E1000_TXD_CMD_TCP | /* Do TCP checksum */ (pi->ipi_len - hdr_len)); /* Total len */ tx_buffer->eop = -1; txr->tx_tso = TRUE; if (++cur == scctx->isc_ntxd[0]) { cur = 0; } DPRINTF(iflib_get_dev(adapter->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__, pi->ipi_pidx, cur); return (cur); } #define TSO_WORKAROUND 4 #define DONT_FORCE_CTX 1 /********************************************************************* * The offload context is protocol specific (TCP/UDP) and thus * only needs to be set when the protocol changes. The occasion * of a context change can be a performance detriment, and * might be better just disabled. The reason arises in the way * in which the controller supports pipelined requests from the * Tx data DMA. Up to four requests can be pipelined, and they may * belong to the same packet or to multiple packets. However all * requests for one packet are issued before a request is issued * for a subsequent packet and if a request for the next packet * requires a context change, that request will be stalled * until the previous request completes. This means setting up * a new context effectively disables pipelined Tx data DMA which * in turn greatly slow down performance to send small sized * frames. **********************************************************************/ static int em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower) { struct e1000_context_desc *TXD = NULL; if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; struct em_txbuffer *tx_buffer; int csum_flags = pi->ipi_csum_flags; int cur, hdr_len; u32 cmd; cur = pi->ipi_pidx; hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen; cmd = adapter->txd_cmd; /* * The 82574L can only remember the *last* context used * regardless of queue that it was use for. We cannot reuse * contexts on this hardware platform and must generate a new * context every time. 82574L hardware spec, section 7.2.6, * second note. */ if (DONT_FORCE_CTX && adapter->tx_num_queues == 1 && txr->csum_lhlen == pi->ipi_ehdrlen && txr->csum_iphlen == pi->ipi_ip_hlen && txr->csum_flags == csum_flags) { /* * Same csum offload context as the previous packets; * just return. */ *txd_upper = txr->csum_txd_upper; *txd_lower = txr->csum_txd_lower; return (cur); } TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; if (csum_flags & CSUM_IP) { *txd_upper |= E1000_TXD_POPTS_IXSM << 8; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen; TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum); cmd |= E1000_TXD_CMD_IP; } if (csum_flags & (CSUM_TCP|CSUM_UDP)) { uint8_t tucso; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; if (csum_flags & CSUM_TCP) { tucso = hdr_len + offsetof(struct tcphdr, th_sum); cmd |= E1000_TXD_CMD_TCP; } else tucso = hdr_len + offsetof(struct udphdr, uh_sum); TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; } txr->csum_lhlen = pi->ipi_ehdrlen; txr->csum_iphlen = pi->ipi_ip_hlen; txr->csum_flags = csum_flags; txr->csum_txd_upper = *txd_upper; txr->csum_txd_lower = *txd_lower; TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd); tx_buffer = &txr->tx_buffers[cur]; tx_buffer->eop = -1; if (++cur == scctx->isc_ntxd[0]) { cur = 0; } DPRINTF(iflib_get_dev(adapter->ctx), "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n", csum_flags, *txd_upper, *txd_lower, hdr_len, cmd); return (cur); } static int em_isc_txd_encap(void *arg, if_pkt_info_t pi) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; bus_dma_segment_t *segs = pi->ipi_segs; int nsegs = pi->ipi_nsegs; int csum_flags = pi->ipi_csum_flags; int i, j, first, pidx_last; u32 txd_upper = 0, txd_lower = 0; struct em_txbuffer *tx_buffer; struct e1000_tx_desc *ctxd = NULL; bool do_tso, tso_desc; i = first = pi->ipi_pidx; do_tso = (csum_flags & CSUM_TSO); tso_desc = FALSE; /* * TSO Hardware workaround, if this packet is not * TSO, and is only a single descriptor long, and * it follows a TSO burst, then we need to add a * sentinel descriptor to prevent premature writeback. */ if ((!do_tso) && (txr->tx_tso == TRUE)) { if (nsegs == 1) tso_desc = TRUE; txr->tx_tso = FALSE; } /* Do hardware assists */ if (do_tso) { i = em_tso_setup(sc, pi, &txd_upper, &txd_lower); tso_desc = TRUE; - } else if (csum_flags & CSUM_OFFLOAD) { + } else if (csum_flags & EM_CSUM_OFFLOAD) { i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower); } if (pi->ipi_mflags & M_VLANTAG) { /* Set the vlan id. */ txd_upper |= htole16(pi->ipi_vtag) << 16; /* Tell hardware to add tag */ txd_lower |= htole32(E1000_TXD_CMD_VLE); } DPRINTF(iflib_get_dev(sc->ctx), "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i); /* XXX adapter->pcix_82544 -- lem_fill_descriptors */ /* Set up our transmit descriptors */ for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; uint32_t cmd; ctxd = &txr->tx_base[i]; tx_buffer = &txr->tx_buffers[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd; /* ** TSO Workaround: ** If this is the last descriptor, we want to ** split it so we have a small final sentinel */ if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { seg_len -= TSO_WORKAROUND; ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32(cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); if (++i == scctx->isc_ntxd[0]) i = 0; /* Now make the sentinel */ ctxd = &txr->tx_base[i]; tx_buffer = &txr->tx_buffers[i]; ctxd->buffer_addr = htole64(seg_addr + seg_len); ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND); ctxd->upper.data = htole32(txd_upper); pidx_last = i; if (++i == scctx->isc_ntxd[0]) i = 0; DPRINTF(iflib_get_dev(sc->ctx), "TSO path pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]); } else { ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32(cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); pidx_last = i; if (++i == scctx->isc_ntxd[0]) i = 0; DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]); } tx_buffer->eop = -1; } /* * Last Descriptor of Packet * needs End Of Packet (EOP) * and Report Status (RS) */ ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); tx_buffer = &txr->tx_buffers[first]; tx_buffer->eop = pidx_last; DPRINTF(iflib_get_dev(sc->ctx), "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i); pi->ipi_new_pidx = i; return (0); } static void em_isc_txd_flush(void *arg, uint16_t txqid, uint32_t pidx) { struct adapter *adapter = arg; struct em_tx_queue *que = &adapter->tx_queues[txqid]; struct tx_ring *txr = &que->txr; E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx); } static int em_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx_init, bool clear) { struct adapter *adapter = arg; if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que = &adapter->tx_queues[txqid]; struct tx_ring *txr = &que->txr; u32 cidx, processed = 0; int last, done; struct em_txbuffer *buf; struct e1000_tx_desc *tx_desc, *eop_desc; cidx = cidx_init; buf = &txr->tx_buffers[cidx]; tx_desc = &txr->tx_base[cidx]; last = buf->eop; eop_desc = &txr->tx_base[last]; DPRINTF(iflib_get_dev(adapter->ctx), "credits_update: cidx_init=%d clear=%d last=%d\n", cidx_init, clear, last); /* * What this does is get the index of the * first descriptor AFTER the EOP of the * first packet, that way we can do the * simple comparison on the inner while loop. */ if (++last == scctx->isc_ntxd[0]) last = 0; done = last; while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { /* We clean the range of the packet */ while (cidx != done) { if (clear) { tx_desc->upper.data = 0; tx_desc->lower.data = 0; tx_desc->buffer_addr = 0; buf->eop = -1; } tx_desc++; buf++; processed++; /* wrap the ring ? */ if (++cidx == scctx->isc_ntxd[0]) { cidx = 0; } buf = &txr->tx_buffers[cidx]; tx_desc = &txr->tx_base[cidx]; } /* See if we can continue to the next packet */ last = buf->eop; if (last == -1) break; eop_desc = &txr->tx_base[last]; /* Get new done point */ if (++last == scctx->isc_ntxd[0]) last = 0; done = last; } DPRINTF(iflib_get_dev(adapter->ctx), "Processed %d credits update\n", processed); return(processed); } static void lem_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buflen __unused) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; int i; uint32_t next_pidx; for (i = 0, next_pidx = pidx; i < count; i++) { rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx]; rxd->buffer_addr = htole64(paddrs[i]); /* status bits must be cleared */ rxd->status = 0; if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void em_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buflen __unused) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; int i; uint32_t next_pidx; for (i = 0, next_pidx = pidx; i < count; i++) { rxd = &rxr->rx_base[next_pidx]; rxd->read.buffer_addr = htole64(paddrs[i]); /* DD bits must be cleared */ rxd->wb.upper.status_error = 0; if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx) { struct adapter *sc = arg; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx); } static int lem_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, int budget) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; u32 staterr = 0; int cnt, i; for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = (struct e1000_rx_desc *)&rxr->rx_base[i]; staterr = rxd->status; if ((staterr & E1000_RXD_STAT_DD) == 0) break; if (++i == scctx->isc_nrxd[0]) i = 0; if (staterr & E1000_RXD_STAT_EOP) cnt++; } return (cnt); } static int em_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, int budget) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; u32 staterr = 0; int cnt, i; for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = &rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; if (++i == scctx->isc_nrxd[0]) { i = 0; } if (staterr & E1000_RXD_STAT_EOP) cnt++; } return (cnt); } static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct adapter *adapter = arg; if_softc_ctx_t scctx = adapter->shared; struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; u16 len; u32 status, errors; bool eop; int i, cidx; status = errors = i = 0; cidx = ri->iri_cidx; do { rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx]; status = rxd->status; errors = rxd->errors; /* Error Checking then decrement count */ MPASS ((status & E1000_RXD_STAT_DD) != 0); len = le16toh(rxd->length); ri->iri_len += len; eop = (status & E1000_RXD_STAT_EOP) != 0; /* Make sure bad packets are discarded */ if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) { adapter->dropped_pkts++; /* XXX fixup if common */ return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; /* Zero out the receive descriptors status. */ rxd->status = 0; if (++cidx == scctx->isc_nrxd[0]) cidx = 0; i++; } while (!eop); /* XXX add a faster way to look this up */ if (adapter->hw.mac.type >= e1000_82543 && !(status & E1000_RXD_STAT_IXSM)) lem_receive_checksum(status, errors, ri); if (status & E1000_RXD_STAT_VP) { ri->iri_vtag = le16toh(rxd->special); ri->iri_flags |= M_VLANTAG; } ri->iri_nfrags = i; return (0); } static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct adapter *adapter = arg; if_softc_ctx_t scctx = adapter->shared; struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; u16 len; u32 staterr = 0; bool eop; int i, cidx, vtag; i = vtag = 0; cidx = ri->iri_cidx; do { rxd = &rxr->rx_base[cidx]; staterr = le32toh(rxd->wb.upper.status_error); /* Error Checking then decrement count */ MPASS ((staterr & E1000_RXD_STAT_DD) != 0); len = le16toh(rxd->wb.upper.length); ri->iri_len += len; eop = (staterr & E1000_RXD_STAT_EOP) != 0; /* Make sure bad packets are discarded */ if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) { adapter->dropped_pkts++; return EBADMSG; } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; /* Zero out the receive descriptors status. */ rxd->wb.upper.status_error &= htole32(~0xFF); if (++cidx == scctx->isc_nrxd[0]) cidx = 0; i++; } while (!eop); /* XXX add a faster way to look this up */ if (adapter->hw.mac.type >= e1000_82543) em_receive_checksum(staterr, ri); if (staterr & E1000_RXD_STAT_VP) { vtag = le16toh(rxd->wb.upper.vlan); } ri->iri_vtag = vtag; ri->iri_nfrags = i; if (vtag) ri->iri_flags |= M_VLANTAG; return (0); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void lem_receive_checksum(int status, int errors, if_rxd_info_t ri) { /* Did it pass? */ if (status & E1000_RXD_STAT_IPCS && !(errors & E1000_RXD_ERR_IPE)) ri->iri_csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); if (status & E1000_RXD_STAT_TCPCS) { /* Did it pass? */ if (!(errors & E1000_RXD_ERR_TCPE)) { ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); ri->iri_csum_data = htons(0xffff); } } } static void em_receive_checksum(uint32_t status, if_rxd_info_t ri) { ri->iri_csum_flags = 0; /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) return; /* If the IP checksum exists and there is no IP Checksum error */ if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == E1000_RXD_STAT_IPCS) { ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); } /* TCP or UDP checksum */ if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == E1000_RXD_STAT_TCPCS) { ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); ri->iri_csum_data = htons(0xffff); } if (status & E1000_RXD_STAT_UDPCS) { ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); ri->iri_csum_data = htons(0xffff); } } Index: head/sys/dev/e1000/if_em.h =================================================================== --- head/sys/dev/e1000/if_em.h (revision 311930) +++ head/sys/dev/e1000/if_em.h (revision 311931) @@ -1,573 +1,574 @@ /*- * Copyright (c) 2016 Matt Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*$FreeBSD$*/ #include "opt_em.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #ifdef DDB #include #include #endif #if __FreeBSD_version >= 800000 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "e1000_82571.h" #include "ifdi_if.h" #ifndef _EM_H_DEFINED_ #define _EM_H_DEFINED_ /* Tunables */ /* * EM_TXD: Maximum number of Transmit Descriptors * Valid Range: 80-256 for 82542 and 82543-based adapters * 80-4096 for others * Default Value: 256 * This value is the number of transmit descriptors allocated by the driver. * Increasing this value allows the driver to queue more transmits. Each * descriptor is 16 bytes. * Since TDLEN should be multiple of 128bytes, the number of transmit * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define EM_MIN_TXD 128 #define EM_MAX_TXD 4096 #define EM_DEFAULT_TXD 1024 #define EM_DEFAULT_MULTI_TXD 4096 /* * EM_RXD - Maximum number of receive Descriptors * Valid Range: 80-256 for 82542 and 82543-based adapters * 80-4096 for others * Default Value: 256 * This value is the number of receive descriptors allocated by the driver. * Increasing this value allows the driver to buffer more incoming packets. * Each descriptor is 16 bytes. A receive buffer is also allocated for each * descriptor. The maximum MTU size is 16110. * Since TDLEN should be multiple of 128bytes, the number of transmit * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define EM_MIN_RXD 128 #define EM_MAX_RXD 4096 #define EM_DEFAULT_RXD 1024 #define EM_DEFAULT_MULTI_RXD 4096 /* * EM_TIDV - Transmit Interrupt Delay Value * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value delays the generation of transmit interrupts in units of * 1.024 microseconds. Transmit interrupt reduction can improve CPU * efficiency if properly tuned for specific network traffic. If the * system is reporting dropped transmits, this value may be set too high * causing the driver to run out of available transmit descriptors. */ #define EM_TIDV 64 /* * EM_TADV - Transmit Absolute Interrupt Delay Value * (Not valid for 82542/82543/82544) * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value, in units of 1.024 microseconds, limits the delay in which a * transmit interrupt is generated. Useful only if EM_TIDV is non-zero, * this value ensures that an interrupt is generated after the initial * packet is sent on the wire within the set amount of time. Proper tuning, * along with EM_TIDV, may improve traffic throughput in specific * network conditions. */ #define EM_TADV 64 /* * EM_RDTR - Receive Interrupt Delay Timer (Packet Timer) * Valid Range: 0-65535 (0=off) * Default Value: 0 * This value delays the generation of receive interrupts in units of 1.024 * microseconds. Receive interrupt reduction can improve CPU efficiency if * properly tuned for specific network traffic. Increasing this value adds * extra latency to frame reception and can end up decreasing the throughput * of TCP traffic. If the system is reporting dropped receives, this value * may be set too high, causing the driver to run out of available receive * descriptors. * * CAUTION: When setting EM_RDTR to a value other than 0, adapters * may hang (stop transmitting) under certain network conditions. * If this occurs a WATCHDOG message is logged in the system * event log. In addition, the controller is automatically reset, * restoring the network connection. To eliminate the potential * for the hang ensure that EM_RDTR is set to 0. */ #ifdef EM_MULTIQUEUE #define EM_RDTR 64 #else #define EM_RDTR 0 #endif /* * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value, in units of 1.024 microseconds, limits the delay in which a * receive interrupt is generated. Useful only if EM_RDTR is non-zero, * this value ensures that an interrupt is generated after the initial * packet is received within the set amount of time. Proper tuning, * along with EM_RDTR, may improve traffic throughput in specific network * conditions. */ #ifdef EM_MULTIQUEUE #define EM_RADV 128 #else #define EM_RADV 64 #endif /* * This parameter controls whether or not autonegotation is enabled. * 0 - Disable autonegotiation * 1 - Enable autonegotiation */ #define DO_AUTO_NEG 1 /* * This parameter control whether or not the driver will wait for * autonegotiation to complete. * 1 - Wait for autonegotiation to complete * 0 - Don't wait for autonegotiation to complete */ #define WAIT_FOR_AUTO_NEG_DEFAULT 0 /* Tunables -- End */ #define AUTONEG_ADV_DEFAULT (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ ADVERTISE_1000_FULL) #define AUTO_ALL_MODES 0 /* PHY master/slave setting */ #define EM_MASTER_SLAVE e1000_ms_hw_default /* * Micellaneous constants */ #define EM_VENDOR_ID 0x8086 #define EM_FLASH 0x0014 #define EM_JUMBO_PBA 0x00000028 #define EM_DEFAULT_PBA 0x00000030 #define EM_SMARTSPEED_DOWNSHIFT 3 #define EM_SMARTSPEED_MAX 15 #define EM_MAX_LOOP 10 #define MAX_NUM_MULTICAST_ADDRESSES 128 #define PCI_ANY_ID (~0U) #define ETHER_ALIGN 2 #define EM_FC_PAUSE_TIME 0x0680 #define EM_EEPROM_APME 0x400; #define EM_82544_APME 0x0004; /* * Driver state logic for the detection of a hung state * in hardware. Set TX_HUNG whenever a TX packet is used * (data is sent) and clear it when txeof() is invoked if * any descriptors from the ring are cleaned/reclaimed. * Increment internal counter if no descriptors are cleaned * and compare to TX_MAXTRIES. When counter > TX_MAXTRIES, * reset adapter. */ #define EM_TX_IDLE 0x00000000 #define EM_TX_BUSY 0x00000001 #define EM_TX_HUNG 0x80000000 #define EM_TX_MAXTRIES 10 #define PCICFG_DESC_RING_STATUS 0xe4 #define FLUSH_DESC_REQUIRED 0x100 #define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : \ ((hw->mac.type <= e1000_82576) ? 16 : 8)) #define IGB_RX_HTHRESH 8 #define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ (adapter->intr_type == IFLIB_INTR_MSIX)) ? 1 : 4) #define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) #define IGB_TX_HTHRESH 1 #define IGB_TX_WTHRESH ((hw->mac.type != e1000_82575 && \ (adapter->intr_type == IFLIB_INTR_MSIX) ? 1 : 16) /* * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will * also optimize cache line size effect. H/W supports up to cache line size 128. */ #define EM_DBA_ALIGN 128 /* * See Intel 82574 Driver Programming Interface Manual, Section 10.2.6.9 */ #define TARC_COMPENSATION_MODE (1 << 7) /* Compensation Mode */ #define TARC_SPEED_MODE_BIT (1 << 21) /* On PCI-E MACs only */ #define TARC_MQ_FIX (1 << 23) | \ (1 << 24) | \ (1 << 25) /* Handle errata in MQ mode */ #define TARC_ERRATA_BIT (1 << 26) /* Note from errata on 82574 */ /* PCI Config defines */ #define EM_BAR_TYPE(v) ((v) & EM_BAR_TYPE_MASK) #define EM_BAR_TYPE_MASK 0x00000001 #define EM_BAR_TYPE_MMEM 0x00000000 #define EM_BAR_TYPE_IO 0x00000001 #define EM_BAR_TYPE_FLASH 0x0014 #define EM_BAR_MEM_TYPE(v) ((v) & EM_BAR_MEM_TYPE_MASK) #define EM_BAR_MEM_TYPE_MASK 0x00000006 #define EM_BAR_MEM_TYPE_32BIT 0x00000000 #define EM_BAR_MEM_TYPE_64BIT 0x00000004 #define EM_MSIX_BAR 3 /* On 82575 */ /* More backward compatibility */ #if __FreeBSD_version < 900000 #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD #endif /* Defines for printing debug information */ #define DEBUG_INIT 0 #define DEBUG_IOCTL 0 #define DEBUG_HW 0 #define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") #define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) #define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) #define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") #define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) #define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) #define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") #define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) #define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) #define EM_MAX_SCATTER 40 #define EM_VFTA_SIZE 128 #define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) #define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ #define EM_MSIX_MASK 0x01F00000 /* For 82574 use */ #define EM_MSIX_LINK 0x01000000 /* For 82574 use */ #define ETH_ZLEN 60 #define ETH_ADDR_LEN 6 -#define CSUM_OFFLOAD 7 /* Offload bits in mbuf flag */ +#define EM_CSUM_OFFLOAD 7 /* Offload bits in mbuf flag */ +#define IGB_CSUM_OFFLOAD 0x0E0F /* Offload bits in mbuf flag */ #define IGB_PKTTYPE_MASK 0x0000FFF0 #define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ /* * 82574 has a nonstandard address for EIAC * and since its only used in MSIX, and in * the em driver only 82574 uses MSIX we can * solve it just using this define. */ #define EM_EIAC 0x000DC /* * 82574 only reports 3 MSI-X vectors by default; * defines assisting with making it report 5 are * located here. */ #define EM_NVM_PCIE_CTRL 0x1B #define EM_NVM_MSIX_N_MASK (0x7 << EM_NVM_MSIX_N_SHIFT) #define EM_NVM_MSIX_N_SHIFT 7 struct adapter; struct em_int_delay_info { struct adapter *adapter; /* Back-pointer to the adapter struct */ int offset; /* Register offset to read/write */ int value; /* Current value in usecs */ }; /* * The transmit ring, one per tx queue */ struct tx_ring { struct adapter *adapter; struct em_tx_queue *que; u32 me; int busy; struct e1000_tx_desc *tx_base; uint64_t tx_paddr; struct em_txbuffer *tx_buffers; u32 tx_tso; /* last tx was tso */ /* Interrupt resources */ void *tag; struct resource *res; unsigned long tx_irq; unsigned long no_desc_avail; /* Saved csum offloading context information */ int csum_flags; int csum_lhlen; int csum_iphlen; int csum_thlen; int csum_mss; int csum_pktlen; uint32_t csum_txd_upper; uint32_t csum_txd_lower; /* last field */ }; /* * The Receive ring, one per rx queue */ struct rx_ring { struct adapter *adapter; struct em_rx_queue *que; u32 me; u32 payload; union e1000_rx_desc_extended *rx_base; uint64_t rx_paddr; /* Interrupt resources */ void *tag; struct resource *res; bool discard; /* Soft stats */ unsigned long rx_irq; unsigned long rx_discarded; unsigned long rx_packets; unsigned long rx_bytes; }; struct em_tx_queue { struct adapter *adapter; u32 msix; u32 eims; /* This queue's EIMS bit */ u32 me; struct tx_ring txr; }; struct em_rx_queue { struct adapter *adapter; u32 me; u32 msix; u32 eims; struct rx_ring rxr; u64 irqs; struct if_irq que_irq; }; /* Our adapter structure */ struct adapter { struct ifnet *ifp; struct e1000_hw hw; if_softc_ctx_t shared; if_ctx_t ctx; #define tx_num_queues shared->isc_ntxqsets #define rx_num_queues shared->isc_nrxqsets #define intr_type shared->isc_intr /* FreeBSD operating-system-specific structures. */ struct e1000_osdep osdep; struct device *dev; struct cdev *led_dev; struct em_tx_queue *tx_queues; struct em_rx_queue *rx_queues; struct if_irq irq; struct resource *memory; struct resource *flash; struct resource *ioport; int io_rid; struct resource *res; void *tag; u32 linkvec; u32 ivars; struct ifmedia *media; int msix; int if_flags; int min_frame_size; int em_insert_vlan_header; u32 ims; bool in_detach; /* Task for FAST handling */ struct grouptask link_task; u16 num_vlans; u32 txd_cmd; u32 tx_process_limit; u32 rx_process_limit; u32 rx_mbuf_sz; /* Management and WOL features */ u32 wol; bool has_manage; bool has_amt; /* Multicast array memory */ u8 *mta; /* ** Shadow VFTA table, this is needed because ** the real vlan filter table gets cleared during ** a soft reset and the driver needs to be able ** to repopulate it. */ u32 shadow_vfta[EM_VFTA_SIZE]; /* Info about the interface */ u16 link_active; u16 fc; u16 link_speed; u16 link_duplex; u32 smartspeed; u32 dmac; int link_mask; u64 que_mask; struct em_int_delay_info tx_int_delay; struct em_int_delay_info tx_abs_int_delay; struct em_int_delay_info rx_int_delay; struct em_int_delay_info rx_abs_int_delay; struct em_int_delay_info tx_itr; /* Misc stats maintained by the driver */ unsigned long dropped_pkts; unsigned long link_irq; unsigned long mbuf_defrag_failed; unsigned long no_tx_dma_setup; unsigned long no_tx_map_avail; unsigned long rx_overruns; unsigned long watchdog_events; struct e1000_hw_stats stats; }; /******************************************************************************** * vendor_info_array * * This array contains the list of Subvendor/Subdevice IDs on which the driver * should load. * ********************************************************************************/ typedef struct _em_vendor_info_t { unsigned int vendor_id; unsigned int device_id; unsigned int subvendor_id; unsigned int subdevice_id; unsigned int index; } em_vendor_info_t; struct em_txbuffer { int eop; }; #define EM_CORE_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->core_mtx, _name, "EM Core Lock", MTX_DEF) #define EM_TX_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->tx_mtx, _name, "EM TX Lock", MTX_DEF) #define EM_RX_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->rx_mtx, _name, "EM RX Lock", MTX_DEF) #define EM_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) #define EM_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) #define EM_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) #define EM_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) #define EM_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) #define EM_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) #define EM_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) #define EM_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) #define EM_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) #define EM_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) #define EM_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) #define EM_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) #define EM_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_mtx, MA_OWNED) #define EM_RSSRK_SIZE 4 #define EM_RSSRK_VAL(key, i) (key[(i) * EM_RSSRK_SIZE] | \ key[(i) * EM_RSSRK_SIZE + 1] << 8 | \ key[(i) * EM_RSSRK_SIZE + 2] << 16 | \ key[(i) * EM_RSSRK_SIZE + 3] << 24) #endif /* _EM_H_DEFINED_ */ Index: head/sys/dev/e1000/igb_txrx.c =================================================================== --- head/sys/dev/e1000/igb_txrx.c (revision 311930) +++ head/sys/dev/e1000/igb_txrx.c (revision 311931) @@ -1,620 +1,620 @@ /*- * Copyright (c) 2016 Matt Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #ifdef RSS #include #include #endif #ifdef VERBOSE_DEBUG #define DPRINTF device_printf #else #define DPRINTF(...) #endif /********************************************************************* * Local Function prototypes *********************************************************************/ static int igb_isc_txd_encap(void *arg, if_pkt_info_t pi); static void igb_isc_txd_flush(void *arg, uint16_t txqid, uint32_t pidx); static int igb_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx, bool clear); static void igb_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buf_len __unused); static void igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx); static int igb_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, int budget); static int igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static int igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status); static int igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status); static void igb_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype); static int igb_determine_rsstype(u16 pkt_info); extern void igb_if_enable_intr(if_ctx_t ctx); extern int em_intr(void *arg); struct if_txrx igb_txrx = { igb_isc_txd_encap, igb_isc_txd_flush, igb_isc_txd_credits_update, igb_isc_rxd_available, igb_isc_rxd_pkt_get, igb_isc_rxd_refill, igb_isc_rxd_flush, em_intr }; extern if_shared_ctx_t em_sctx; /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status) { struct e1000_adv_tx_context_desc *TXD; struct adapter *adapter = txr->adapter; u32 type_tucmd_mlhl = 0, vlan_macip_lens = 0; u32 mss_l4len_idx = 0; u32 paylen; switch(pi->ipi_etype) { case ETHERTYPE_IPV6: type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; case ETHERTYPE_IP: type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; break; default: panic("%s: CSUM_TSO but no supported IP version (0x%04x)", __func__, ntohs(pi->ipi_etype)); break; } TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx]; /* This is used in the transmit desc in encap */ paylen = pi->ipi_len - pi->ipi_ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen; /* VLAN MACLEN IPLEN */ if (pi->ipi_mflags & M_VLANTAG) { vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT); } vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= pi->ipi_ip_hlen; TXD->vlan_macip_lens = htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); /* MSS L4LEN IDX */ mss_l4len_idx |= (pi->ipi_tso_segsz << E1000_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (pi->ipi_tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx |= txr->me << 4; TXD->mss_l4len_idx = htole32(mss_l4len_idx); TXD->seqnum_seed = htole32(0); *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; return (1); } /********************************************************************* * * Advanced Context Descriptor setup for VLAN, CSUM or TSO * **********************************************************************/ static int igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status) { struct e1000_adv_tx_context_desc *TXD; struct adapter *adapter = txr->adapter; u32 vlan_macip_lens, type_tucmd_mlhl; u32 mss_l4len_idx; mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0; int offload = TRUE; /* First check if TSO is to be used */ if (pi->ipi_csum_flags & CSUM_TSO) return (igb_tso_setup(txr, pi, cmd_type_len, olinfo_status)); /* Indicate the whole packet as payload when not doing TSO */ *olinfo_status |= pi->ipi_len << E1000_ADVTXD_PAYLEN_SHIFT; /* Now ready a context descriptor */ TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx]; /* ** In advanced descriptors the vlan tag must ** be placed into the context descriptor. Hence ** we need to make one even if not doing offloads. */ if (pi->ipi_mflags & M_VLANTAG) { vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT); - } else if ((pi->ipi_csum_flags & CSUM_OFFLOAD) == 0) { + } else if ((pi->ipi_csum_flags & IGB_CSUM_OFFLOAD) == 0) { return (0); } /* Set the ether header length */ vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; switch(pi->ipi_etype) { case ETHERTYPE_IP: type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; default: offload = FALSE; break; } vlan_macip_lens |= pi->ipi_ip_hlen; type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; switch (pi->ipi_ipproto) { case IPPROTO_TCP: #if __FreeBSD_version >= 1000000 if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) #else if (pi->ipi_csum_flags & CSUM_TCP) #endif type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; break; case IPPROTO_UDP: #if __FreeBSD_version >= 1000000 if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) #else if (pi->ipi_csum_flags & CSUM_UDP) #endif type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; break; #if __FreeBSD_version >= 800000 case IPPROTO_SCTP: #if __FreeBSD_version >= 1000000 if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) #else if (pi->ipi_csum_flags & CSUM_SCTP) #endif type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; break; #endif default: offload = FALSE; break; } if (offload) /* For the TX descriptor setup */ *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx = txr->me << 4; /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(mss_l4len_idx); return (1); } static int igb_isc_txd_encap(void *arg, if_pkt_info_t pi) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; int nsegs = pi->ipi_nsegs; bus_dma_segment_t *segs = pi->ipi_segs; struct em_txbuffer *txbuf; union e1000_adv_tx_desc *txd = NULL; int i, j, first, pidx_last; u32 olinfo_status, cmd_type_len; pidx_last = olinfo_status = 0; /* Basic descriptor defines */ cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); if (pi->ipi_mflags & M_VLANTAG) cmd_type_len |= E1000_ADVTXD_DCMD_VLE; first = i = pi->ipi_pidx; /* Consume the first descriptor */ i += igb_tx_ctx_setup(txr, pi, &cmd_type_len, &olinfo_status); if (i == scctx->isc_ntxd[0]) i = 0; /* 82575 needs the queue index added */ if (sc->hw.mac.type == e1000_82575) olinfo_status |= txr->me << 4; for (j = 0; j < nsegs; j++) { bus_size_t seglen; bus_addr_t segaddr; txbuf = &txr->tx_buffers[i]; txd = (union e1000_adv_tx_desc *)&txr->tx_base[i]; seglen = segs[j].ds_len; segaddr = htole64(segs[j].ds_addr); txd->read.buffer_addr = segaddr; txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | cmd_type_len | seglen); txd->read.olinfo_status = htole32(olinfo_status); pidx_last = i; if (++i == scctx->isc_ntxd[0]) { i = 0; } } txd->read.cmd_type_len |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); /* Set the EOP descriptor that will be marked done */ txbuf = &txr->tx_buffers[first]; txbuf->eop = pidx_last; pi->ipi_new_pidx = i; return (0); } static void igb_isc_txd_flush(void *arg, uint16_t txqid, uint32_t pidx) { struct adapter *adapter = arg; struct em_tx_queue *que = &adapter->tx_queues[txqid]; struct tx_ring *txr = &que->txr; E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx); } static int igb_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx_init, bool clear) { struct adapter *adapter = arg; if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que = &adapter->tx_queues[txqid]; struct tx_ring *txr = &que->txr; u32 cidx, ntxd, processed = 0; struct em_txbuffer *buf; union e1000_adv_tx_desc *txd, *eop; int limit; cidx = cidx_init; buf = &txr->tx_buffers[cidx]; txd = (union e1000_adv_tx_desc *)&txr->tx_base[cidx]; ntxd = scctx->isc_ntxd[0]; limit = adapter->tx_process_limit; do { if (buf->eop == -1) /* No work */ break; eop = (union e1000_adv_tx_desc *)&txr->tx_base[buf->eop]; if ((eop->wb.status & E1000_TXD_STAT_DD) == 0) break; /* I/O not complete */ if (clear) buf->eop = -1; /* clear indicate processed */ /* We clean the range if multi segment */ while (txd != eop) { ++txd; ++buf; /* wrap the ring? */ if (++cidx == scctx->isc_ntxd[0]) { cidx = 0; buf = txr->tx_buffers; txd = (union e1000_adv_tx_desc *)txr->tx_base; } buf = &txr->tx_buffers[cidx]; if (clear) buf->eop = -1; processed++; } processed++; /* Try the next packet */ txd++; buf++; /* reset with a wrap */ if (++cidx == scctx->isc_ntxd[0]) { cidx = 0; buf = txr->tx_buffers; txd = (union e1000_adv_tx_desc *)txr->tx_base; } prefetch(txd); prefetch(txd+1); } while (__predict_true(--limit) && cidx != cidx_init); return (processed); } static void igb_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buf_len __unused) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; union e1000_adv_rx_desc *rxd; struct rx_ring *rxr = &que->rxr; int i; uint32_t next_pidx; for (i = 0, next_pidx = pidx; i < count; i++) { rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[next_pidx]; rxd->read.pkt_addr = htole64(paddrs[i]); if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx) { struct adapter *sc = arg; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx); } static int igb_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, int budget) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union e1000_adv_rx_desc *rxd; u32 staterr = 0; int cnt, i, iter; for (iter = cnt = 0, i = idx; iter < scctx->isc_nrxd[0] && iter <= budget;) { rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; if (++i == scctx->isc_nrxd[0]) { i = 0; } if (staterr & E1000_RXD_STAT_EOP) cnt++; iter++; } { struct e1000_hw *hw = &sc->hw; int rdt, rdh; rdt = E1000_READ_REG(hw, E1000_RDT(rxr->me)); rdh = E1000_READ_REG(hw, E1000_RDH(rxr->me)); DPRINTF(iflib_get_dev(sc->ctx), "sidx:%d eidx:%d iter=%d pktcnt=%d RDT=%d RDH=%d\n", idx, i, iter, cnt, rdt, rdh); } return (cnt); } /**************************************************************** * Routine sends data which has been dma'ed into host memory * to upper layer. Initialize ri structure. * * Returns 0 upon success, errno on failure ***************************************************************/ static int igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct adapter *adapter = arg; if_softc_ctx_t scctx = adapter->shared; struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; struct ifnet *ifp = iflib_get_ifp(adapter->ctx); union e1000_adv_rx_desc *rxd; u16 pkt_info, len; u16 vtag = 0; u32 ptype; u32 staterr = 0; bool eop; int i = 0; int cidx = ri->iri_cidx; do { rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[cidx]; staterr = le32toh(rxd->wb.upper.status_error); pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info); MPASS ((staterr & E1000_RXD_STAT_DD) != 0); len = le16toh(rxd->wb.upper.length); ptype = le32toh(rxd->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; ri->iri_len += len; rxr->rx_bytes += ri->iri_len; rxd->wb.upper.status_error = 0; eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); if (((adapter->hw.mac.type == e1000_i350) || (adapter->hw.mac.type == e1000_i354)) && (staterr & E1000_RXDEXT_STATERR_LB)) vtag = be16toh(rxd->wb.upper.vlan); else vtag = le16toh(rxd->wb.upper.vlan); /* Make sure bad packets are discarded */ if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) { adapter->dropped_pkts++; ++rxr->rx_discarded; return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; if (++cidx == scctx->isc_nrxd[0]) cidx = 0; #ifdef notyet if (rxr->hdr_split == TRUE) { ri->iri_frags[i].irf_flid = 1; ri->iri_frags[i].irf_idx = cidx; if (++cidx == scctx->isc_nrxd[0]) cidx = 0; } #endif i++; } while (!eop); rxr->rx_packets++; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) igb_rx_checksum(staterr, ri, ptype); if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (staterr & E1000_RXD_STAT_VP) != 0) { ri->iri_vtag = vtag; ri->iri_flags |= M_VLANTAG; } ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); ri->iri_rsstype = igb_determine_rsstype(pkt_info); ri->iri_nfrags = i; return (0); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void igb_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype) { u16 status = (u16)staterr; u8 errors = (u8) (staterr >> 24); bool sctp = FALSE; /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) { ri->iri_csum_flags = 0; return; } if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) sctp = 1; else sctp = 0; if (status & E1000_RXD_STAT_IPCS) { /* Did it pass? */ if (!(errors & E1000_RXD_ERR_IPE)) { /* IP Checksum Good */ ri->iri_csum_flags = CSUM_IP_CHECKED; ri->iri_csum_flags |= CSUM_IP_VALID; } else ri->iri_csum_flags = 0; } if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); #if __FreeBSD_version >= 800000 if (sctp) /* reassign */ type = CSUM_SCTP_VALID; #endif /* Did it pass? */ if (!(errors & E1000_RXD_ERR_TCPE)) { ri->iri_csum_flags |= type; if (sctp == 0) ri->iri_csum_data = htons(0xffff); } } return; } /******************************************************************** * * Parse the packet type to determine the appropriate hash * ******************************************************************/ static int igb_determine_rsstype(u16 pkt_info) { switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { case E1000_RXDADV_RSSTYPE_IPV4_TCP: return M_HASHTYPE_RSS_TCP_IPV4; case E1000_RXDADV_RSSTYPE_IPV4: return M_HASHTYPE_RSS_IPV4; case E1000_RXDADV_RSSTYPE_IPV6_TCP: return M_HASHTYPE_RSS_TCP_IPV6; case E1000_RXDADV_RSSTYPE_IPV6_EX: return M_HASHTYPE_RSS_IPV6_EX; case E1000_RXDADV_RSSTYPE_IPV6: return M_HASHTYPE_RSS_IPV6; case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: return M_HASHTYPE_RSS_TCP_IPV6_EX; default: return M_HASHTYPE_OPAQUE; } }