diff --git a/sys/dev/e1000/em_txrx.c b/sys/dev/e1000/em_txrx.c index 069a1c00a4b2..1192286d1c9c 100644 --- a/sys/dev/e1000/em_txrx.c +++ b/sys/dev/e1000/em_txrx.c @@ -1,808 +1,808 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2016 Nicole Graziano * Copyright (c) 2017 Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #ifdef RSS #include #include #endif #ifdef VERBOSE_DEBUG #define DPRINTF device_printf #else #define DPRINTF(...) #endif /********************************************************************* * Local Function prototypes *********************************************************************/ static int em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi, uint32_t *txd_upper, uint32_t *txd_lower); static int em_transmit_checksum_setup(struct e1000_softc *sc, if_pkt_info_t pi, uint32_t *txd_upper, uint32_t *txd_lower); static int em_isc_txd_encap(void *arg, if_pkt_info_t pi); static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear); static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru); static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx); static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget); static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru); static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget); static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static void em_receive_checksum(uint16_t, uint8_t, if_rxd_info_t); static int em_determine_rsstype(uint32_t pkt_info); extern int em_intr(void *arg); struct if_txrx em_txrx = { .ift_txd_encap = em_isc_txd_encap, .ift_txd_flush = em_isc_txd_flush, .ift_txd_credits_update = em_isc_txd_credits_update, .ift_rxd_available = em_isc_rxd_available, .ift_rxd_pkt_get = em_isc_rxd_pkt_get, .ift_rxd_refill = em_isc_rxd_refill, .ift_rxd_flush = em_isc_rxd_flush, .ift_legacy_intr = em_intr }; struct if_txrx lem_txrx = { .ift_txd_encap = em_isc_txd_encap, .ift_txd_flush = em_isc_txd_flush, .ift_txd_credits_update = em_isc_txd_credits_update, .ift_rxd_available = lem_isc_rxd_available, .ift_rxd_pkt_get = lem_isc_rxd_pkt_get, .ift_rxd_refill = lem_isc_rxd_refill, .ift_rxd_flush = em_isc_rxd_flush, .ift_legacy_intr = em_intr }; extern if_shared_ctx_t em_sctx; void em_dump_rs(struct e1000_softc *sc) { if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que; struct tx_ring *txr; qidx_t i, ntxd, qid, cur; int16_t rs_cidx; uint8_t status; printf("\n"); ntxd = scctx->isc_ntxd[0]; for (qid = 0; qid < sc->tx_num_queues; qid++) { que = &sc->tx_queues[qid]; txr = &que->txr; rs_cidx = txr->tx_rs_cidx; if (rs_cidx != txr->tx_rs_pidx) { cur = txr->tx_rsq[rs_cidx]; status = txr->tx_base[cur].upper.fields.status; if (!(status & E1000_TXD_STAT_DD)) printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur); } else { rs_cidx = (rs_cidx-1)&(ntxd-1); cur = txr->tx_rsq[rs_cidx]; printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d ", qid, rs_cidx, cur); } printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed, txr->tx_rs_pidx); for (i = 0; i < ntxd; i++) { if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD) printf("%d set ", i); } printf("\n"); } } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi, uint32_t *txd_upper, uint32_t *txd_lower) { if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; struct e1000_hw *hw = &sc->hw; struct e1000_context_desc *TXD; int cur, hdr_len; uint32_t cmd_type_len; hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen; *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ E1000_TXD_DTYP_D | /* Data descr type */ E1000_TXD_CMD_TSE); /* Do TSE on this packet */ /* IP and/or TCP header checksum calculation and insertion. */ *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; cur = pi->ipi_pidx; TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place put the checksum. */ TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen; TXD->lower_setup.ip_fields.ipcse = htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1); TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum); /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen; TXD->upper_setup.tcp_fields.tucse = 0; TXD->upper_setup.tcp_fields.tucso = pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum); /* * Payload size per packet w/o any headers. * Length of all headers up to payload. */ TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz); TXD->tcp_seg_setup.fields.hdr_len = hdr_len; /* * 8254x SDM4.0 page 45, and PCIe GbE SDM2.5 page 63 * - Set up basic TUCMDs * - Enable IP bit on 82544 * - For others IP bit on indicates IPv4, while off indicates IPv6 */ cmd_type_len = sc->txd_cmd | E1000_TXD_CMD_DEXT | /* Extended descr */ E1000_TXD_CMD_TSE | /* TSE context */ E1000_TXD_CMD_TCP; /* Do TCP checksum */ if (hw->mac.type == e1000_82544) cmd_type_len |= E1000_TXD_CMD_IP; else if (pi->ipi_etype == ETHERTYPE_IP) cmd_type_len |= E1000_TXD_CMD_IP; TXD->cmd_and_length = htole32(cmd_type_len | (pi->ipi_len - hdr_len)); /* Total len */ txr->tx_tso = true; if (++cur == scctx->isc_ntxd[0]) { cur = 0; } DPRINTF(iflib_get_dev(sc->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__, pi->ipi_pidx, cur); return (cur); } #define TSO_WORKAROUND 4 #define DONT_FORCE_CTX 1 /********************************************************************* * The offload context is protocol specific (TCP/UDP) and thus * only needs to be set when the protocol changes. The occasion * of a context change can be a performance detriment, and * might be better just disabled. The reason arises in the way * in which the controller supports pipelined requests from the * Tx data DMA. Up to four requests can be pipelined, and they may * belong to the same packet or to multiple packets. However all * requests for one packet are issued before a request is issued * for a subsequent packet and if a request for the next packet * requires a context change, that request will be stalled * until the previous request completes. This means setting up * a new context effectively disables pipelined Tx data DMA which * in turn greatly slow down performance to send small sized * frames. **********************************************************************/ static int em_transmit_checksum_setup(struct e1000_softc *sc, if_pkt_info_t pi, uint32_t *txd_upper, uint32_t *txd_lower) { struct e1000_context_desc *TXD = NULL; if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; int csum_flags = pi->ipi_csum_flags; int cur, hdr_len; uint32_t cmd; cur = pi->ipi_pidx; hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen; cmd = sc->txd_cmd; /* * The 82574L can only remember the *last* context used * regardless of queue that it was use for. We cannot reuse * contexts on this hardware platform and must generate a new * context every time. 82574L hardware spec, section 7.2.6, * second note. */ if (DONT_FORCE_CTX && sc->tx_num_queues == 1 && txr->csum_lhlen == pi->ipi_ehdrlen && txr->csum_iphlen == pi->ipi_ip_hlen && txr->csum_flags == csum_flags) { /* * Same csum offload context as the previous packets; * just return. */ *txd_upper = txr->csum_txd_upper; *txd_lower = txr->csum_txd_lower; return (cur); } TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; if (csum_flags & CSUM_IP) { *txd_upper |= E1000_TXD_POPTS_IXSM << 8; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen; TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum); cmd |= E1000_TXD_CMD_IP; } - if (csum_flags & (CSUM_TCP|CSUM_UDP)) { + if (csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_IP6_TCP | CSUM_IP6_UDP)) { uint8_t tucso; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; - if (csum_flags & CSUM_TCP) { + if (csum_flags & CSUM_TCP | CSUM_IP6_TCP) { tucso = hdr_len + offsetof(struct tcphdr, th_sum); cmd |= E1000_TXD_CMD_TCP; } else tucso = hdr_len + offsetof(struct udphdr, uh_sum); TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; } txr->csum_lhlen = pi->ipi_ehdrlen; txr->csum_iphlen = pi->ipi_ip_hlen; txr->csum_flags = csum_flags; txr->csum_txd_upper = *txd_upper; txr->csum_txd_lower = *txd_lower; TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd); if (++cur == scctx->isc_ntxd[0]) { cur = 0; } DPRINTF(iflib_get_dev(sc->ctx), "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n", csum_flags, *txd_upper, *txd_lower, hdr_len, cmd); return (cur); } static int em_isc_txd_encap(void *arg, if_pkt_info_t pi) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; bus_dma_segment_t *segs = pi->ipi_segs; int nsegs = pi->ipi_nsegs; int csum_flags = pi->ipi_csum_flags; int i, j, first, pidx_last; uint32_t txd_flags, txd_upper = 0, txd_lower = 0; struct e1000_tx_desc *ctxd = NULL; bool do_tso, tso_desc; qidx_t ntxd; txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0; i = first = pi->ipi_pidx; do_tso = (csum_flags & CSUM_TSO); tso_desc = false; ntxd = scctx->isc_ntxd[0]; /* * TSO Hardware workaround, if this packet is not * TSO, and is only a single descriptor long, and * it follows a TSO burst, then we need to add a * sentinel descriptor to prevent premature writeback. */ if ((!do_tso) && (txr->tx_tso == true)) { if (nsegs == 1) tso_desc = true; txr->tx_tso = false; } /* Do hardware assists */ if (do_tso) { i = em_tso_setup(sc, pi, &txd_upper, &txd_lower); tso_desc = true; } else if (csum_flags & EM_CSUM_OFFLOAD) { i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower); } if (pi->ipi_mflags & M_VLANTAG) { /* Set the vlan id. */ txd_upper |= htole16(pi->ipi_vtag) << 16; /* Tell hardware to add tag */ txd_lower |= htole32(E1000_TXD_CMD_VLE); } DPRINTF(iflib_get_dev(sc->ctx), "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i); /* XXX sc->pcix_82544 -- lem_fill_descriptors */ /* Set up our transmit descriptors */ for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; uint32_t cmd; ctxd = &txr->tx_base[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd; /* * TSO Workaround: * If this is the last descriptor, we want to * split it so we have a small final sentinel */ if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { seg_len -= TSO_WORKAROUND; ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32(cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); if (++i == scctx->isc_ntxd[0]) i = 0; /* Now make the sentinel */ ctxd = &txr->tx_base[i]; ctxd->buffer_addr = htole64(seg_addr + seg_len); ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND); ctxd->upper.data = htole32(txd_upper); pidx_last = i; if (++i == scctx->isc_ntxd[0]) i = 0; DPRINTF(iflib_get_dev(sc->ctx), "TSO path pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]); } else { ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32(cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); pidx_last = i; if (++i == scctx->isc_ntxd[0]) i = 0; DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]); } } /* * Last Descriptor of Packet * needs End Of Packet (EOP) * and Report Status (RS) */ if (txd_flags && nsegs) { txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; DPRINTF(iflib_get_dev(sc->ctx), "setting to RS on %d rs_pidx %d first: %d\n", pidx_last, txr->tx_rs_pidx, first); txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1); MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx); } ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags); DPRINTF(iflib_get_dev(sc->ctx), "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i); pi->ipi_new_pidx = i; return (0); } static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) { struct e1000_softc *sc = arg; struct em_tx_queue *que = &sc->tx_queues[txqid]; struct tx_ring *txr = &que->txr; E1000_WRITE_REG(&sc->hw, E1000_TDT(txr->me), pidx); } static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[txqid]; struct tx_ring *txr = &que->txr; qidx_t processed = 0; int updated; qidx_t cur, prev, ntxd, rs_cidx; int32_t delta; uint8_t status; rs_cidx = txr->tx_rs_cidx; if (rs_cidx == txr->tx_rs_pidx) return (0); cur = txr->tx_rsq[rs_cidx]; MPASS(cur != QIDX_INVALID); status = txr->tx_base[cur].upper.fields.status; updated = !!(status & E1000_TXD_STAT_DD); if (!updated) return (0); /* If clear is false just let caller know that there * are descriptors to reclaim */ if (!clear) return (1); prev = txr->tx_cidx_processed; ntxd = scctx->isc_ntxd[0]; do { MPASS(prev != cur); delta = (int32_t)cur - (int32_t)prev; if (delta < 0) delta += ntxd; MPASS(delta > 0); DPRINTF(iflib_get_dev(sc->ctx), "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n", __FUNCTION__, prev, cur, clear, delta); processed += delta; prev = cur; rs_cidx = (rs_cidx + 1) & (ntxd-1); if (rs_cidx == txr->tx_rs_pidx) break; cur = txr->tx_rsq[rs_cidx]; MPASS(cur != QIDX_INVALID); status = txr->tx_base[cur].upper.fields.status; } while ((status & E1000_TXD_STAT_DD)); txr->tx_rs_cidx = rs_cidx; txr->tx_cidx_processed = prev; return(processed); } static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; uint64_t *paddrs; uint32_t next_pidx, pidx; uint16_t count; int i; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx]; rxd->buffer_addr = htole64(paddrs[i]); /* status bits must be cleared */ rxd->status = 0; if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; uint16_t rxqid = iru->iru_qsidx; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; uint64_t *paddrs; uint32_t next_pidx, pidx; uint16_t count; int i; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxd = &rxr->rx_base[next_pidx]; rxd->read.buffer_addr = htole64(paddrs[i]); /* DD bits must be cleared */ rxd->wb.upper.status_error = 0; if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx) { struct e1000_softc *sc = arg; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx); } static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; uint32_t staterr = 0; int cnt, i; for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = (struct e1000_rx_desc *)&rxr->rx_base[i]; staterr = rxd->status; if ((staterr & E1000_RXD_STAT_DD) == 0) break; if (++i == scctx->isc_nrxd[0]) i = 0; if (staterr & E1000_RXD_STAT_EOP) cnt++; } return (cnt); } static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; uint32_t staterr = 0; int cnt, i; for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = &rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; if (++i == scctx->isc_nrxd[0]) i = 0; if (staterr & E1000_RXD_STAT_EOP) cnt++; } return (cnt); } static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; uint16_t len; uint32_t status, errors; bool eop; int i, cidx; status = errors = i = 0; cidx = ri->iri_cidx; do { rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx]; status = rxd->status; errors = rxd->errors; /* Error Checking then decrement count */ MPASS ((status & E1000_RXD_STAT_DD) != 0); len = le16toh(rxd->length); ri->iri_len += len; eop = (status & E1000_RXD_STAT_EOP) != 0; /* Make sure bad packets are discarded */ if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) { sc->dropped_pkts++; /* XXX fixup if common */ return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; /* Zero out the receive descriptors status. */ rxd->status = 0; if (++cidx == scctx->isc_nrxd[0]) cidx = 0; i++; } while (!eop); if (scctx->isc_capenable & IFCAP_RXCSUM) em_receive_checksum(status, errors, ri); if (scctx->isc_capenable & IFCAP_VLAN_HWTAGGING && status & E1000_RXD_STAT_VP) { ri->iri_vtag = le16toh(rxd->special & E1000_RXD_SPC_VLAN_MASK); ri->iri_flags |= M_VLANTAG; } ri->iri_nfrags = i; return (0); } static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; uint16_t len; uint32_t pkt_info; uint32_t staterr; bool eop; int i, cidx; staterr = i = 0; cidx = ri->iri_cidx; do { rxd = &rxr->rx_base[cidx]; staterr = le32toh(rxd->wb.upper.status_error); pkt_info = le32toh(rxd->wb.lower.mrq); /* Error Checking then decrement count */ MPASS ((staterr & E1000_RXD_STAT_DD) != 0); len = le16toh(rxd->wb.upper.length); ri->iri_len += len; eop = (staterr & E1000_RXD_STAT_EOP) != 0; /* Make sure bad packets are discarded */ if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) { sc->dropped_pkts++; return EBADMSG; } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; /* Zero out the receive descriptors status. */ rxd->wb.upper.status_error &= htole32(~0xFF); if (++cidx == scctx->isc_nrxd[0]) cidx = 0; i++; } while (!eop); if (scctx->isc_capenable & IFCAP_RXCSUM) em_receive_checksum(staterr, staterr >> 24, ri); if (scctx->isc_capenable & IFCAP_VLAN_HWTAGGING && staterr & E1000_RXD_STAT_VP) { ri->iri_vtag = le16toh(rxd->wb.upper.vlan); ri->iri_flags |= M_VLANTAG; } ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); ri->iri_rsstype = em_determine_rsstype(pkt_info); ri->iri_nfrags = i; return (0); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void em_receive_checksum(uint16_t status, uint8_t errors, if_rxd_info_t ri) { if (__predict_false(status & E1000_RXD_STAT_IXSM)) return; /* If there is a layer 3 or 4 error we are done */ if (__predict_false(errors & (E1000_RXD_ERR_IPE | E1000_RXD_ERR_TCPE))) return; /* IP Checksum Good */ if (status & E1000_RXD_STAT_IPCS) ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); /* Valid L4E checksum */ if (__predict_true(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) { ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; ri->iri_csum_data = htons(0xffff); } } /******************************************************************** * * Parse the packet type to determine the appropriate hash * ******************************************************************/ static int em_determine_rsstype(uint32_t pkt_info) { switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { case E1000_RXDADV_RSSTYPE_IPV4_TCP: return M_HASHTYPE_RSS_TCP_IPV4; case E1000_RXDADV_RSSTYPE_IPV4: return M_HASHTYPE_RSS_IPV4; case E1000_RXDADV_RSSTYPE_IPV6_TCP: return M_HASHTYPE_RSS_TCP_IPV6; case E1000_RXDADV_RSSTYPE_IPV6_EX: return M_HASHTYPE_RSS_IPV6_EX; case E1000_RXDADV_RSSTYPE_IPV6: return M_HASHTYPE_RSS_IPV6; case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: return M_HASHTYPE_RSS_TCP_IPV6_EX; default: return M_HASHTYPE_OPAQUE; } } diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h index a46ce2fdb6ae..8c5abf5b48cb 100644 --- a/sys/dev/e1000/if_em.h +++ b/sys/dev/e1000/if_em.h @@ -1,553 +1,556 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2016 Nicole Graziano * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*$FreeBSD$*/ #ifndef _EM_H_DEFINED_ #define _EM_H_DEFINED_ #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #ifdef DDB #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "e1000_82571.h" #include "ifdi_if.h" /* Tunables */ /* * EM_MAX_TXD: Maximum number of Transmit Descriptors * Valid Range: 80-256 for 82542 and 82543-based adapters * 80-4096 for others * Default Value: 1024 * This value is the number of transmit descriptors allocated by the driver. * Increasing this value allows the driver to queue more transmits. Each * descriptor is 16 bytes. * Since TDLEN should be multiple of 128bytes, the number of transmit * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define EM_MIN_TXD 128 #define EM_MAX_TXD 4096 #define EM_DEFAULT_TXD 1024 #define EM_DEFAULT_MULTI_TXD 4096 #define IGB_MAX_TXD 4096 /* * EM_MAX_RXD - Maximum number of receive Descriptors * Valid Range: 80-256 for 82542 and 82543-based adapters * 80-4096 for others * Default Value: 1024 * This value is the number of receive descriptors allocated by the driver. * Increasing this value allows the driver to buffer more incoming packets. * Each descriptor is 16 bytes. A receive buffer is also allocated for each * descriptor. The maximum MTU size is 16110. * Since TDLEN should be multiple of 128bytes, the number of transmit * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define EM_MIN_RXD 128 #define EM_MAX_RXD 4096 #define EM_DEFAULT_RXD 1024 #define EM_DEFAULT_MULTI_RXD 4096 #define IGB_MAX_RXD 4096 /* * EM_TIDV - Transmit Interrupt Delay Value * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value delays the generation of transmit interrupts in units of * 1.024 microseconds. Transmit interrupt reduction can improve CPU * efficiency if properly tuned for specific network traffic. If the * system is reporting dropped transmits, this value may be set too high * causing the driver to run out of available transmit descriptors. */ #define EM_TIDV 64 /* * EM_TADV - Transmit Absolute Interrupt Delay Value * (Not valid for 82542/82543/82544) * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value, in units of 1.024 microseconds, limits the delay in which a * transmit interrupt is generated. Useful only if EM_TIDV is non-zero, * this value ensures that an interrupt is generated after the initial * packet is sent on the wire within the set amount of time. Proper tuning, * along with EM_TIDV, may improve traffic throughput in specific * network conditions. */ #define EM_TADV 64 /* * EM_RDTR - Receive Interrupt Delay Timer (Packet Timer) * Valid Range: 0-65535 (0=off) * Default Value: 0 * This value delays the generation of receive interrupts in units of 1.024 * microseconds. Receive interrupt reduction can improve CPU efficiency if * properly tuned for specific network traffic. Increasing this value adds * extra latency to frame reception and can end up decreasing the throughput * of TCP traffic. If the system is reporting dropped receives, this value * may be set too high, causing the driver to run out of available receive * descriptors. * * CAUTION: When setting EM_RDTR to a value other than 0, adapters * may hang (stop transmitting) under certain network conditions. * If this occurs a WATCHDOG message is logged in the system * event log. In addition, the controller is automatically reset, * restoring the network connection. To eliminate the potential * for the hang ensure that EM_RDTR is set to 0. */ #define EM_RDTR 0 /* * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value, in units of 1.024 microseconds, limits the delay in which a * receive interrupt is generated. Useful only if EM_RDTR is non-zero, * this value ensures that an interrupt is generated after the initial * packet is received within the set amount of time. Proper tuning, * along with EM_RDTR, may improve traffic throughput in specific network * conditions. */ #define EM_RADV 64 /* * This parameter controls whether or not autonegotiation is enabled. * 0 - Disable autonegotiation * 1 - Enable autonegotiation */ #define DO_AUTO_NEG 1 /* * This parameter control whether or not the driver will wait for * autonegotiation to complete. * 1 - Wait for autonegotiation to complete * 0 - Don't wait for autonegotiation to complete */ #define WAIT_FOR_AUTO_NEG_DEFAULT 0 /* Tunables -- End */ #define AUTONEG_ADV_DEFAULT (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ ADVERTISE_1000_FULL) #define AUTO_ALL_MODES 0 /* PHY master/slave setting */ #define EM_MASTER_SLAVE e1000_ms_hw_default /* * Miscellaneous constants */ #define EM_VENDOR_ID 0x8086 #define EM_FLASH 0x0014 #define EM_JUMBO_PBA 0x00000028 #define EM_DEFAULT_PBA 0x00000030 #define EM_SMARTSPEED_DOWNSHIFT 3 #define EM_SMARTSPEED_MAX 15 #define EM_MAX_LOOP 10 #define MAX_NUM_MULTICAST_ADDRESSES 128 #define PCI_ANY_ID (~0U) #define ETHER_ALIGN 2 #define EM_FC_PAUSE_TIME 0x0680 #define EM_EEPROM_APME 0x400; #define EM_82544_APME 0x0004; /* Support AutoMediaDetect for Marvell M88 PHY in i354 */ #define IGB_MEDIA_RESET (1 << 0) /* Define the starting Interrupt rate per Queue */ #define IGB_INTS_PER_SEC 8000 #define IGB_DEFAULT_ITR ((1000000/IGB_INTS_PER_SEC) << 2) #define IGB_LINK_ITR 2000 #define I210_LINK_DELAY 1000 #define IGB_TXPBSIZE 20408 #define IGB_HDR_BUF 128 #define IGB_PKTTYPE_MASK 0x0000FFF0 #define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ /* * Driver state logic for the detection of a hung state * in hardware. Set TX_HUNG whenever a TX packet is used * (data is sent) and clear it when txeof() is invoked if * any descriptors from the ring are cleaned/reclaimed. * Increment internal counter if no descriptors are cleaned * and compare to TX_MAXTRIES. When counter > TX_MAXTRIES, * reset adapter. */ #define EM_TX_IDLE 0x00000000 #define EM_TX_BUSY 0x00000001 #define EM_TX_HUNG 0x80000000 #define EM_TX_MAXTRIES 10 #define PCICFG_DESC_RING_STATUS 0xe4 #define FLUSH_DESC_REQUIRED 0x100 #define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : \ ((hw->mac.type <= e1000_82576) ? 16 : 8)) #define IGB_RX_HTHRESH 8 #define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ (sc->intr_type == IFLIB_INTR_MSIX)) ? 1 : 4) #define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) #define IGB_TX_HTHRESH 1 #define IGB_TX_WTHRESH ((hw->mac.type != e1000_82575 && \ sc->intr_type == IFLIB_INTR_MSIX) ? 1 : 16) /* * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will * also optimize cache line size effect. H/W supports up to cache line size 128. */ #define EM_DBA_ALIGN 128 /* * See Intel 82574 Driver Programming Interface Manual, Section 10.2.6.9 */ #define TARC_COMPENSATION_MODE (1 << 7) /* Compensation Mode */ #define TARC_SPEED_MODE_BIT (1 << 21) /* On PCI-E MACs only */ #define TARC_MQ_FIX (1 << 23) | \ (1 << 24) | \ (1 << 25) /* Handle errata in MQ mode */ #define TARC_ERRATA_BIT (1 << 26) /* Note from errata on 82574 */ /* PCI Config defines */ #define EM_BAR_TYPE(v) ((v) & EM_BAR_TYPE_MASK) #define EM_BAR_TYPE_MASK 0x00000001 #define EM_BAR_TYPE_MMEM 0x00000000 #define EM_BAR_TYPE_IO 0x00000001 #define EM_BAR_TYPE_FLASH 0x0014 #define EM_BAR_MEM_TYPE(v) ((v) & EM_BAR_MEM_TYPE_MASK) #define EM_BAR_MEM_TYPE_MASK 0x00000006 #define EM_BAR_MEM_TYPE_32BIT 0x00000000 #define EM_BAR_MEM_TYPE_64BIT 0x00000004 /* Defines for printing debug information */ #define DEBUG_INIT 0 #define DEBUG_IOCTL 0 #define DEBUG_HW 0 #define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") #define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) #define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) #define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") #define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) #define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) #define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") #define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) #define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) #define EM_MAX_SCATTER 40 #define EM_VFTA_SIZE 128 #define EM_TSO_SIZE 65535 #define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ #define ETH_ZLEN 60 -#define EM_CSUM_OFFLOAD (CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP) /* Offload bits in mbuf flag */ + +/* Offload bits in mbuf flag */ +#define EM_CSUM_OFFLOAD (CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP | \ + CSUM_IP6_UDP | CSUM_IP6_TCP) #define IGB_CSUM_OFFLOAD (CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP | \ CSUM_IP_SCTP | CSUM_IP6_UDP | CSUM_IP6_TCP | \ - CSUM_IP6_SCTP) /* Offload bits in mbuf flag */ + CSUM_IP6_SCTP) #define IGB_PKTTYPE_MASK 0x0000FFF0 #define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ /* * 82574 has a nonstandard address for EIAC * and since its only used in MSI-X, and in * the em driver only 82574 uses MSI-X we can * solve it just using this define. */ #define EM_EIAC 0x000DC /* * 82574 only reports 3 MSI-X vectors by default; * defines assisting with making it report 5 are * located here. */ #define EM_NVM_PCIE_CTRL 0x1B #define EM_NVM_MSIX_N_MASK (0x7 << EM_NVM_MSIX_N_SHIFT) #define EM_NVM_MSIX_N_SHIFT 7 struct e1000_softc; struct em_int_delay_info { struct e1000_softc *sc; /* Back-pointer to the sc struct */ int offset; /* Register offset to read/write */ int value; /* Current value in usecs */ }; /* * The transmit ring, one per tx queue */ struct tx_ring { struct e1000_softc *sc; struct e1000_tx_desc *tx_base; uint64_t tx_paddr; qidx_t *tx_rsq; bool tx_tso; /* last tx was tso */ uint8_t me; qidx_t tx_rs_cidx; qidx_t tx_rs_pidx; qidx_t tx_cidx_processed; /* Interrupt resources */ void *tag; struct resource *res; unsigned long tx_irq; /* Saved csum offloading context information */ int csum_flags; int csum_lhlen; int csum_iphlen; int csum_thlen; int csum_mss; int csum_pktlen; uint32_t csum_txd_upper; uint32_t csum_txd_lower; /* last field */ }; /* * The Receive ring, one per rx queue */ struct rx_ring { struct e1000_softc *sc; struct em_rx_queue *que; u32 me; u32 payload; union e1000_rx_desc_extended *rx_base; uint64_t rx_paddr; /* Interrupt resources */ void *tag; struct resource *res; bool discard; /* Soft stats */ unsigned long rx_irq; unsigned long rx_discarded; unsigned long rx_packets; unsigned long rx_bytes; }; struct em_tx_queue { struct e1000_softc *sc; u32 msix; u32 eims; /* This queue's EIMS bit */ u32 me; struct tx_ring txr; }; struct em_rx_queue { struct e1000_softc *sc; u32 me; u32 msix; u32 eims; struct rx_ring rxr; u64 irqs; struct if_irq que_irq; }; /* Our softc structure */ struct e1000_softc { struct e1000_hw hw; if_softc_ctx_t shared; if_ctx_t ctx; #define tx_num_queues shared->isc_ntxqsets #define rx_num_queues shared->isc_nrxqsets #define intr_type shared->isc_intr /* FreeBSD operating-system-specific structures. */ struct e1000_osdep osdep; device_t dev; struct cdev *led_dev; struct em_tx_queue *tx_queues; struct em_rx_queue *rx_queues; struct if_irq irq; struct resource *memory; struct resource *flash; struct resource *ioport; struct resource *res; void *tag; u32 linkvec; u32 ivars; struct ifmedia *media; int msix; int if_flags; int em_insert_vlan_header; u32 ims; bool in_detach; u32 flags; /* Task for FAST handling */ struct grouptask link_task; u16 num_vlans; u32 txd_cmd; u32 tx_process_limit; u32 rx_process_limit; u32 rx_mbuf_sz; /* Management and WOL features */ u32 wol; bool has_manage; bool has_amt; /* Multicast array memory */ u8 *mta; /* ** Shadow VFTA table, this is needed because ** the real vlan filter table gets cleared during ** a soft reset and the driver needs to be able ** to repopulate it. */ u32 shadow_vfta[EM_VFTA_SIZE]; /* Info about the interface */ u16 link_active; u16 fc; u16 link_speed; u16 link_duplex; u32 smartspeed; u32 dmac; int link_mask; u64 que_mask; /* We need to store this at attach due to e1000 hw/sw locking model */ struct e1000_fw_version fw_ver; struct em_int_delay_info tx_int_delay; struct em_int_delay_info tx_abs_int_delay; struct em_int_delay_info rx_int_delay; struct em_int_delay_info rx_abs_int_delay; struct em_int_delay_info tx_itr; /* Misc stats maintained by the driver */ unsigned long dropped_pkts; unsigned long link_irq; unsigned long rx_overruns; unsigned long watchdog_events; struct e1000_hw_stats stats; u16 vf_ifp; }; /******************************************************************************** * vendor_info_array * * This array contains the list of Subvendor/Subdevice IDs on which the driver * should load. * ********************************************************************************/ typedef struct _em_vendor_info_t { unsigned int vendor_id; unsigned int device_id; unsigned int subvendor_id; unsigned int subdevice_id; unsigned int index; } em_vendor_info_t; void em_dump_rs(struct e1000_softc *); #define EM_RSSRK_SIZE 4 #define EM_RSSRK_VAL(key, i) (key[(i) * EM_RSSRK_SIZE] | \ key[(i) * EM_RSSRK_SIZE + 1] << 8 | \ key[(i) * EM_RSSRK_SIZE + 2] << 16 | \ key[(i) * EM_RSSRK_SIZE + 3] << 24) #endif /* _EM_H_DEFINED_ */