diff --git a/sys/dev/e1000/em_txrx.c b/sys/dev/e1000/em_txrx.c index 58f9345ea19d..aebafca73cca 100644 --- a/sys/dev/e1000/em_txrx.c +++ b/sys/dev/e1000/em_txrx.c @@ -1,780 +1,777 @@ /*- * Copyright (c) 2016 Nicole Graziano * Copyright (c) 2017 Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #ifdef RSS #include #include #endif #ifdef VERBOSE_DEBUG #define DPRINTF device_printf #else #define DPRINTF(...) #endif /********************************************************************* * Local Function prototypes *********************************************************************/ static int em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower); static int em_transmit_checksum_setup(struct e1000_softc *sc, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower); static int em_isc_txd_encap(void *arg, if_pkt_info_t pi); static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear); static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru); static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx); static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget); static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru); static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget); static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static void em_receive_checksum(uint16_t, uint8_t, if_rxd_info_t); static int em_determine_rsstype(u32 pkt_info); extern int em_intr(void *arg); struct if_txrx em_txrx = { .ift_txd_encap = em_isc_txd_encap, .ift_txd_flush = em_isc_txd_flush, .ift_txd_credits_update = em_isc_txd_credits_update, .ift_rxd_available = em_isc_rxd_available, .ift_rxd_pkt_get = em_isc_rxd_pkt_get, .ift_rxd_refill = em_isc_rxd_refill, .ift_rxd_flush = em_isc_rxd_flush, .ift_legacy_intr = em_intr }; struct if_txrx lem_txrx = { .ift_txd_encap = em_isc_txd_encap, .ift_txd_flush = em_isc_txd_flush, .ift_txd_credits_update = em_isc_txd_credits_update, .ift_rxd_available = lem_isc_rxd_available, .ift_rxd_pkt_get = lem_isc_rxd_pkt_get, .ift_rxd_refill = lem_isc_rxd_refill, .ift_rxd_flush = em_isc_rxd_flush, .ift_legacy_intr = em_intr }; extern if_shared_ctx_t em_sctx; void em_dump_rs(struct e1000_softc *sc) { if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que; struct tx_ring *txr; qidx_t i, ntxd, qid, cur; int16_t rs_cidx; uint8_t status; printf("\n"); ntxd = scctx->isc_ntxd[0]; for (qid = 0; qid < sc->tx_num_queues; qid++) { que = &sc->tx_queues[qid]; txr = &que->txr; rs_cidx = txr->tx_rs_cidx; if (rs_cidx != txr->tx_rs_pidx) { cur = txr->tx_rsq[rs_cidx]; status = txr->tx_base[cur].upper.fields.status; if (!(status & E1000_TXD_STAT_DD)) printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur); } else { rs_cidx = (rs_cidx-1)&(ntxd-1); cur = txr->tx_rsq[rs_cidx]; printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d ", qid, rs_cidx, cur); } printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed, txr->tx_rs_pidx); for (i = 0; i < ntxd; i++) { if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD) printf("%d set ", i); } printf("\n"); } } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower) { if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; struct e1000_context_desc *TXD; int cur, hdr_len; hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen; *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ E1000_TXD_DTYP_D | /* Data descr type */ E1000_TXD_CMD_TSE); /* Do TSE on this packet */ /* IP and/or TCP header checksum calculation and insertion. */ *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; cur = pi->ipi_pidx; TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place put the checksum. */ TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen; TXD->lower_setup.ip_fields.ipcse = htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1); TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum); /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen; TXD->upper_setup.tcp_fields.tucse = 0; TXD->upper_setup.tcp_fields.tucso = pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum); /* * Payload size per packet w/o any headers. * Length of all headers up to payload. */ TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz); TXD->tcp_seg_setup.fields.hdr_len = hdr_len; TXD->cmd_and_length = htole32(sc->txd_cmd | E1000_TXD_CMD_DEXT | /* Extended descr */ E1000_TXD_CMD_TSE | /* TSE context */ E1000_TXD_CMD_IP | /* Do IP csum */ E1000_TXD_CMD_TCP | /* Do TCP checksum */ (pi->ipi_len - hdr_len)); /* Total len */ txr->tx_tso = true; if (++cur == scctx->isc_ntxd[0]) { cur = 0; } DPRINTF(iflib_get_dev(sc->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__, pi->ipi_pidx, cur); return (cur); } #define TSO_WORKAROUND 4 #define DONT_FORCE_CTX 1 /********************************************************************* * The offload context is protocol specific (TCP/UDP) and thus * only needs to be set when the protocol changes. The occasion * of a context change can be a performance detriment, and * might be better just disabled. The reason arises in the way * in which the controller supports pipelined requests from the * Tx data DMA. Up to four requests can be pipelined, and they may * belong to the same packet or to multiple packets. However all * requests for one packet are issued before a request is issued * for a subsequent packet and if a request for the next packet * requires a context change, that request will be stalled * until the previous request completes. This means setting up * a new context effectively disables pipelined Tx data DMA which * in turn greatly slow down performance to send small sized * frames. **********************************************************************/ static int em_transmit_checksum_setup(struct e1000_softc *sc, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower) { struct e1000_context_desc *TXD = NULL; if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; int csum_flags = pi->ipi_csum_flags; int cur, hdr_len; u32 cmd; cur = pi->ipi_pidx; hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen; cmd = sc->txd_cmd; /* * The 82574L can only remember the *last* context used * regardless of queue that it was use for. We cannot reuse * contexts on this hardware platform and must generate a new * context every time. 82574L hardware spec, section 7.2.6, * second note. */ if (DONT_FORCE_CTX && sc->tx_num_queues == 1 && txr->csum_lhlen == pi->ipi_ehdrlen && txr->csum_iphlen == pi->ipi_ip_hlen && txr->csum_flags == csum_flags) { /* * Same csum offload context as the previous packets; * just return. */ *txd_upper = txr->csum_txd_upper; *txd_lower = txr->csum_txd_lower; return (cur); } TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; if (csum_flags & CSUM_IP) { *txd_upper |= E1000_TXD_POPTS_IXSM << 8; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen; TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum); cmd |= E1000_TXD_CMD_IP; } if (csum_flags & (CSUM_TCP|CSUM_UDP)) { uint8_t tucso; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; if (csum_flags & CSUM_TCP) { tucso = hdr_len + offsetof(struct tcphdr, th_sum); cmd |= E1000_TXD_CMD_TCP; } else tucso = hdr_len + offsetof(struct udphdr, uh_sum); TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; } txr->csum_lhlen = pi->ipi_ehdrlen; txr->csum_iphlen = pi->ipi_ip_hlen; txr->csum_flags = csum_flags; txr->csum_txd_upper = *txd_upper; txr->csum_txd_lower = *txd_lower; TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd); if (++cur == scctx->isc_ntxd[0]) { cur = 0; } DPRINTF(iflib_get_dev(sc->ctx), "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n", csum_flags, *txd_upper, *txd_lower, hdr_len, cmd); return (cur); } static int em_isc_txd_encap(void *arg, if_pkt_info_t pi) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; bus_dma_segment_t *segs = pi->ipi_segs; int nsegs = pi->ipi_nsegs; int csum_flags = pi->ipi_csum_flags; int i, j, first, pidx_last; u32 txd_flags, txd_upper = 0, txd_lower = 0; struct e1000_tx_desc *ctxd = NULL; bool do_tso, tso_desc; qidx_t ntxd; txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0; i = first = pi->ipi_pidx; do_tso = (csum_flags & CSUM_TSO); tso_desc = false; ntxd = scctx->isc_ntxd[0]; /* * TSO Hardware workaround, if this packet is not * TSO, and is only a single descriptor long, and * it follows a TSO burst, then we need to add a * sentinel descriptor to prevent premature writeback. */ if ((!do_tso) && (txr->tx_tso == true)) { if (nsegs == 1) tso_desc = true; txr->tx_tso = false; } /* Do hardware assists */ if (do_tso) { i = em_tso_setup(sc, pi, &txd_upper, &txd_lower); tso_desc = true; } else if (csum_flags & EM_CSUM_OFFLOAD) { i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower); } if (pi->ipi_mflags & M_VLANTAG) { /* Set the vlan id. */ txd_upper |= htole16(pi->ipi_vtag) << 16; /* Tell hardware to add tag */ txd_lower |= htole32(E1000_TXD_CMD_VLE); } DPRINTF(iflib_get_dev(sc->ctx), "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i); /* XXX sc->pcix_82544 -- lem_fill_descriptors */ /* Set up our transmit descriptors */ for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; uint32_t cmd; ctxd = &txr->tx_base[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd; /* * TSO Workaround: * If this is the last descriptor, we want to * split it so we have a small final sentinel */ if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { seg_len -= TSO_WORKAROUND; ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32(cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); if (++i == scctx->isc_ntxd[0]) i = 0; /* Now make the sentinel */ ctxd = &txr->tx_base[i]; ctxd->buffer_addr = htole64(seg_addr + seg_len); ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND); ctxd->upper.data = htole32(txd_upper); pidx_last = i; if (++i == scctx->isc_ntxd[0]) i = 0; DPRINTF(iflib_get_dev(sc->ctx), "TSO path pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]); } else { ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32(cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); pidx_last = i; if (++i == scctx->isc_ntxd[0]) i = 0; DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]); } } /* * Last Descriptor of Packet * needs End Of Packet (EOP) * and Report Status (RS) */ if (txd_flags && nsegs) { txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; DPRINTF(iflib_get_dev(sc->ctx), "setting to RS on %d rs_pidx %d first: %d\n", pidx_last, txr->tx_rs_pidx, first); txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1); MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx); } ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags); DPRINTF(iflib_get_dev(sc->ctx), "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i); pi->ipi_new_pidx = i; return (0); } static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) { struct e1000_softc *sc = arg; struct em_tx_queue *que = &sc->tx_queues[txqid]; struct tx_ring *txr = &que->txr; E1000_WRITE_REG(&sc->hw, E1000_TDT(txr->me), pidx); } static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[txqid]; struct tx_ring *txr = &que->txr; qidx_t processed = 0; int updated; qidx_t cur, prev, ntxd, rs_cidx; int32_t delta; uint8_t status; rs_cidx = txr->tx_rs_cidx; if (rs_cidx == txr->tx_rs_pidx) return (0); cur = txr->tx_rsq[rs_cidx]; MPASS(cur != QIDX_INVALID); status = txr->tx_base[cur].upper.fields.status; updated = !!(status & E1000_TXD_STAT_DD); if (!updated) return (0); /* If clear is false just let caller know that there * are descriptors to reclaim */ if (!clear) return (1); prev = txr->tx_cidx_processed; ntxd = scctx->isc_ntxd[0]; do { MPASS(prev != cur); delta = (int32_t)cur - (int32_t)prev; if (delta < 0) delta += ntxd; MPASS(delta > 0); DPRINTF(iflib_get_dev(sc->ctx), "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n", __FUNCTION__, prev, cur, clear, delta); processed += delta; prev = cur; rs_cidx = (rs_cidx + 1) & (ntxd-1); if (rs_cidx == txr->tx_rs_pidx) break; cur = txr->tx_rsq[rs_cidx]; MPASS(cur != QIDX_INVALID); status = txr->tx_base[cur].upper.fields.status; } while ((status & E1000_TXD_STAT_DD)); txr->tx_rs_cidx = rs_cidx; txr->tx_cidx_processed = prev; return(processed); } static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; uint64_t *paddrs; uint32_t next_pidx, pidx; uint16_t count; int i; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx]; rxd->buffer_addr = htole64(paddrs[i]); /* status bits must be cleared */ rxd->status = 0; if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; uint16_t rxqid = iru->iru_qsidx; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; uint64_t *paddrs; uint32_t next_pidx, pidx; uint16_t count; int i; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxd = &rxr->rx_base[next_pidx]; rxd->read.buffer_addr = htole64(paddrs[i]); /* DD bits must be cleared */ rxd->wb.upper.status_error = 0; if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx) { struct e1000_softc *sc = arg; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx); } static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; u32 staterr = 0; int cnt, i; for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = (struct e1000_rx_desc *)&rxr->rx_base[i]; staterr = rxd->status; if ((staterr & E1000_RXD_STAT_DD) == 0) break; if (++i == scctx->isc_nrxd[0]) i = 0; if (staterr & E1000_RXD_STAT_EOP) cnt++; } return (cnt); } static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; u32 staterr = 0; int cnt, i; for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = &rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; if (++i == scctx->isc_nrxd[0]) i = 0; if (staterr & E1000_RXD_STAT_EOP) cnt++; } return (cnt); } static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; u16 len; u32 status, errors; bool eop; int i, cidx; status = errors = i = 0; cidx = ri->iri_cidx; do { rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx]; status = rxd->status; errors = rxd->errors; /* Error Checking then decrement count */ MPASS ((status & E1000_RXD_STAT_DD) != 0); len = le16toh(rxd->length); ri->iri_len += len; eop = (status & E1000_RXD_STAT_EOP) != 0; /* Make sure bad packets are discarded */ if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) { sc->dropped_pkts++; /* XXX fixup if common */ return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; /* Zero out the receive descriptors status. */ rxd->status = 0; if (++cidx == scctx->isc_nrxd[0]) cidx = 0; i++; } while (!eop); /* XXX add a faster way to look this up */ if (sc->hw.mac.type >= e1000_82543) em_receive_checksum(status, errors, ri); if (status & E1000_RXD_STAT_VP) { ri->iri_vtag = le16toh(rxd->special); ri->iri_flags |= M_VLANTAG; } ri->iri_nfrags = i; return (0); } static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; u16 len; u32 pkt_info; u32 staterr = 0; bool eop; - int i, cidx, vtag; + int i, cidx; - i = vtag = 0; + i = 0; cidx = ri->iri_cidx; do { rxd = &rxr->rx_base[cidx]; staterr = le32toh(rxd->wb.upper.status_error); pkt_info = le32toh(rxd->wb.lower.mrq); /* Error Checking then decrement count */ MPASS ((staterr & E1000_RXD_STAT_DD) != 0); len = le16toh(rxd->wb.upper.length); ri->iri_len += len; eop = (staterr & E1000_RXD_STAT_EOP) != 0; /* Make sure bad packets are discarded */ if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) { sc->dropped_pkts++; return EBADMSG; } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; /* Zero out the receive descriptors status. */ rxd->wb.upper.status_error &= htole32(~0xFF); if (++cidx == scctx->isc_nrxd[0]) cidx = 0; i++; } while (!eop); if (scctx->isc_capenable & IFCAP_RXCSUM) em_receive_checksum(staterr, staterr >> 24, ri); if (staterr & E1000_RXD_STAT_VP) { - vtag = le16toh(rxd->wb.upper.vlan); - } - - ri->iri_vtag = vtag; - if (vtag) + ri->iri_vtag = le16toh(rxd->wb.upper.vlan); ri->iri_flags |= M_VLANTAG; + } ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); ri->iri_rsstype = em_determine_rsstype(pkt_info); ri->iri_nfrags = i; return (0); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void em_receive_checksum(uint16_t status, uint8_t errors, if_rxd_info_t ri) { if (__predict_false(status & E1000_RXD_STAT_IXSM)) return; /* If there is a layer 3 or 4 error we are done */ if (__predict_false(errors & (E1000_RXD_ERR_IPE | E1000_RXD_ERR_TCPE))) return; /* IP Checksum Good */ if (status & E1000_RXD_STAT_IPCS) ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); /* Valid L4E checksum */ if (__predict_true(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) { ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; ri->iri_csum_data = htons(0xffff); } } /******************************************************************** * * Parse the packet type to determine the appropriate hash * ******************************************************************/ static int em_determine_rsstype(u32 pkt_info) { switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { case E1000_RXDADV_RSSTYPE_IPV4_TCP: return M_HASHTYPE_RSS_TCP_IPV4; case E1000_RXDADV_RSSTYPE_IPV4: return M_HASHTYPE_RSS_IPV4; case E1000_RXDADV_RSSTYPE_IPV6_TCP: return M_HASHTYPE_RSS_TCP_IPV6; case E1000_RXDADV_RSSTYPE_IPV6_EX: return M_HASHTYPE_RSS_IPV6_EX; case E1000_RXDADV_RSSTYPE_IPV6: return M_HASHTYPE_RSS_IPV6; case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: return M_HASHTYPE_RSS_TCP_IPV6_EX; default: return M_HASHTYPE_OPAQUE; } } diff --git a/sys/dev/e1000/igb_txrx.c b/sys/dev/e1000/igb_txrx.c index 548ceee8ab46..1c1500e78d19 100644 --- a/sys/dev/e1000/igb_txrx.c +++ b/sys/dev/e1000/igb_txrx.c @@ -1,574 +1,571 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2016 Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #ifdef RSS #include #include #endif #ifdef VERBOSE_DEBUG #define DPRINTF device_printf #else #define DPRINTF(...) #endif /********************************************************************* * Local Function prototypes *********************************************************************/ static int igb_isc_txd_encap(void *arg, if_pkt_info_t pi); static void igb_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); static int igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear); static void igb_isc_rxd_refill(void *arg, if_rxd_update_t iru); static void igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx); static int igb_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget); static int igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static int igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len, uint32_t *olinfo_status); static int igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len, uint32_t *olinfo_status); static void igb_rx_checksum(uint32_t staterr, if_rxd_info_t ri, uint32_t ptype); static int igb_determine_rsstype(uint16_t pkt_info); extern void igb_if_enable_intr(if_ctx_t ctx); extern int em_intr(void *arg); struct if_txrx igb_txrx = { .ift_txd_encap = igb_isc_txd_encap, .ift_txd_flush = igb_isc_txd_flush, .ift_txd_credits_update = igb_isc_txd_credits_update, .ift_rxd_available = igb_isc_rxd_available, .ift_rxd_pkt_get = igb_isc_rxd_pkt_get, .ift_rxd_refill = igb_isc_rxd_refill, .ift_rxd_flush = igb_isc_rxd_flush, .ift_legacy_intr = em_intr }; /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len, uint32_t *olinfo_status) { struct e1000_adv_tx_context_desc *TXD; struct e1000_softc *sc = txr->sc; uint32_t type_tucmd_mlhl = 0, vlan_macip_lens = 0; uint32_t mss_l4len_idx = 0; uint32_t paylen; switch(pi->ipi_etype) { case ETHERTYPE_IPV6: type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; case ETHERTYPE_IP: type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; break; default: panic("%s: CSUM_TSO but no supported IP version (0x%04x)", __func__, ntohs(pi->ipi_etype)); break; } TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx]; /* This is used in the transmit desc in encap */ paylen = pi->ipi_len - pi->ipi_ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen; /* VLAN MACLEN IPLEN */ if (pi->ipi_mflags & M_VLANTAG) { vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT); } vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= pi->ipi_ip_hlen; TXD->vlan_macip_lens = htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); /* MSS L4LEN IDX */ mss_l4len_idx |= (pi->ipi_tso_segsz << E1000_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (pi->ipi_tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); /* 82575 needs the queue index added */ if (sc->hw.mac.type == e1000_82575) mss_l4len_idx |= txr->me << 4; TXD->mss_l4len_idx = htole32(mss_l4len_idx); TXD->u.seqnum_seed = htole32(0); *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; return (1); } /********************************************************************* * * Advanced Context Descriptor setup for VLAN, CSUM or TSO * **********************************************************************/ static int igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len, uint32_t *olinfo_status) { struct e1000_adv_tx_context_desc *TXD; struct e1000_softc *sc = txr->sc; uint32_t vlan_macip_lens, type_tucmd_mlhl; uint32_t mss_l4len_idx; mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0; /* First check if TSO is to be used */ if (pi->ipi_csum_flags & CSUM_TSO) return (igb_tso_setup(txr, pi, cmd_type_len, olinfo_status)); /* Indicate the whole packet as payload when not doing TSO */ *olinfo_status |= pi->ipi_len << E1000_ADVTXD_PAYLEN_SHIFT; /* Now ready a context descriptor */ TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx]; /* ** In advanced descriptors the vlan tag must ** be placed into the context descriptor. Hence ** we need to make one even if not doing offloads. */ if (pi->ipi_mflags & M_VLANTAG) { vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT); } else if ((pi->ipi_csum_flags & IGB_CSUM_OFFLOAD) == 0) { return (0); } /* Set the ether header length */ vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; switch(pi->ipi_etype) { case ETHERTYPE_IP: type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; default: break; } vlan_macip_lens |= pi->ipi_ip_hlen; type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; switch (pi->ipi_ipproto) { case IPPROTO_TCP: if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) { type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; } break; case IPPROTO_UDP: if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) { type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; } break; case IPPROTO_SCTP: if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) { type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; } break; default: break; } /* 82575 needs the queue index added */ if (sc->hw.mac.type == e1000_82575) mss_l4len_idx = txr->me << 4; /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->u.seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(mss_l4len_idx); return (1); } static int igb_isc_txd_encap(void *arg, if_pkt_info_t pi) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; int nsegs = pi->ipi_nsegs; bus_dma_segment_t *segs = pi->ipi_segs; union e1000_adv_tx_desc *txd = NULL; int i, j, pidx_last; uint32_t olinfo_status, cmd_type_len, txd_flags; qidx_t ntxd; pidx_last = olinfo_status = 0; /* Basic descriptor defines */ cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); if (pi->ipi_mflags & M_VLANTAG) cmd_type_len |= E1000_ADVTXD_DCMD_VLE; i = pi->ipi_pidx; ntxd = scctx->isc_ntxd[0]; txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_ADVTXD_DCMD_RS : 0; /* Consume the first descriptor */ i += igb_tx_ctx_setup(txr, pi, &cmd_type_len, &olinfo_status); if (i == scctx->isc_ntxd[0]) i = 0; /* 82575 needs the queue index added */ if (sc->hw.mac.type == e1000_82575) olinfo_status |= txr->me << 4; for (j = 0; j < nsegs; j++) { bus_size_t seglen; bus_addr_t segaddr; txd = (union e1000_adv_tx_desc *)&txr->tx_base[i]; seglen = segs[j].ds_len; segaddr = htole64(segs[j].ds_addr); txd->read.buffer_addr = segaddr; txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | cmd_type_len | seglen); txd->read.olinfo_status = htole32(olinfo_status); pidx_last = i; if (++i == scctx->isc_ntxd[0]) { i = 0; } } if (txd_flags) { txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1); MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx); } txd->read.cmd_type_len |= htole32(E1000_TXD_CMD_EOP | txd_flags); pi->ipi_new_pidx = i; return (0); } static void igb_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) { struct e1000_softc *sc = arg; struct em_tx_queue *que = &sc->tx_queues[txqid]; struct tx_ring *txr = &que->txr; E1000_WRITE_REG(&sc->hw, E1000_TDT(txr->me), pidx); } static int igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[txqid]; struct tx_ring *txr = &que->txr; qidx_t processed = 0; int updated; qidx_t cur, prev, ntxd, rs_cidx; int32_t delta; uint8_t status; rs_cidx = txr->tx_rs_cidx; if (rs_cidx == txr->tx_rs_pidx) return (0); cur = txr->tx_rsq[rs_cidx]; status = ((union e1000_adv_tx_desc *)&txr->tx_base[cur])->wb.status; updated = !!(status & E1000_TXD_STAT_DD); if (!updated) return (0); /* If clear is false just let caller know that there * are descriptors to reclaim */ if (!clear) return (1); prev = txr->tx_cidx_processed; ntxd = scctx->isc_ntxd[0]; do { MPASS(prev != cur); delta = (int32_t)cur - (int32_t)prev; if (delta < 0) delta += ntxd; MPASS(delta > 0); processed += delta; prev = cur; rs_cidx = (rs_cidx + 1) & (ntxd-1); if (rs_cidx == txr->tx_rs_pidx) break; cur = txr->tx_rsq[rs_cidx]; status = ((union e1000_adv_tx_desc *)&txr->tx_base[cur])->wb.status; } while ((status & E1000_TXD_STAT_DD)); txr->tx_rs_cidx = rs_cidx; txr->tx_cidx_processed = prev; return (processed); } static void igb_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; uint16_t rxqid = iru->iru_qsidx; struct em_rx_queue *que = &sc->rx_queues[rxqid]; union e1000_adv_rx_desc *rxd; struct rx_ring *rxr = &que->rxr; uint64_t *paddrs; uint32_t next_pidx, pidx; uint16_t count; int i; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[next_pidx]; rxd->read.pkt_addr = htole64(paddrs[i]); if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx) { struct e1000_softc *sc = arg; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx); } static int igb_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union e1000_adv_rx_desc *rxd; uint32_t staterr = 0; int cnt, i; for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; if (++i == scctx->isc_nrxd[0]) i = 0; if (staterr & E1000_RXD_STAT_EOP) cnt++; } return (cnt); } /**************************************************************** * Routine sends data which has been dma'ed into host memory * to upper layer. Initialize ri structure. * * Returns 0 upon success, errno on failure ***************************************************************/ static int igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct e1000_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; union e1000_adv_rx_desc *rxd; - uint16_t pkt_info, len, vtag; + uint16_t pkt_info, len; uint32_t ptype, staterr; int i, cidx; bool eop; - staterr = i = vtag = 0; + staterr = i = 0; cidx = ri->iri_cidx; do { rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[cidx]; staterr = le32toh(rxd->wb.upper.status_error); pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info); MPASS ((staterr & E1000_RXD_STAT_DD) != 0); len = le16toh(rxd->wb.upper.length); ptype = le32toh(rxd->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; ri->iri_len += len; rxr->rx_bytes += ri->iri_len; rxd->wb.upper.status_error = 0; eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); - if (((sc->hw.mac.type == e1000_i350) || - (sc->hw.mac.type == e1000_i354)) && - (staterr & E1000_RXDEXT_STATERR_LB)) - vtag = be16toh(rxd->wb.upper.vlan); - else - vtag = le16toh(rxd->wb.upper.vlan); - /* Make sure bad packets are discarded */ if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) { sc->dropped_pkts++; ++rxr->rx_discarded; return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; if (++cidx == scctx->isc_nrxd[0]) cidx = 0; #ifdef notyet if (rxr->hdr_split == true) { ri->iri_frags[i].irf_flid = 1; ri->iri_frags[i].irf_idx = cidx; if (++cidx == scctx->isc_nrxd[0]) cidx = 0; } #endif i++; } while (!eop); rxr->rx_packets++; if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0) igb_rx_checksum(staterr, ri, ptype); - if ((scctx->isc_capenable & IFCAP_VLAN_HWTAGGING) != 0 && - (staterr & E1000_RXD_STAT_VP) != 0) { - ri->iri_vtag = vtag; + if (staterr & E1000_RXD_STAT_VP) { + if (((sc->hw.mac.type == e1000_i350) || + (sc->hw.mac.type == e1000_i354)) && + (staterr & E1000_RXDEXT_STATERR_LB)) + ri->iri_vtag = be16toh(rxd->wb.upper.vlan); + else + ri->iri_vtag = le16toh(rxd->wb.upper.vlan); ri->iri_flags |= M_VLANTAG; } ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); ri->iri_rsstype = igb_determine_rsstype(pkt_info); ri->iri_nfrags = i; return (0); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void igb_rx_checksum(uint32_t staterr, if_rxd_info_t ri, uint32_t ptype) { uint16_t status = (uint16_t)staterr; uint8_t errors = (uint8_t)(staterr >> 24); if (__predict_false(status & E1000_RXD_STAT_IXSM)) return; /* If there is a layer 3 or 4 error we are done */ if (__predict_false(errors & (E1000_RXD_ERR_IPE | E1000_RXD_ERR_TCPE))) return; /* IP Checksum Good */ if (status & E1000_RXD_STAT_IPCS) ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); /* Valid L4E checksum */ if (__predict_true(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) { /* SCTP header present */ if (__predict_false((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)) { ri->iri_csum_flags |= CSUM_SCTP_VALID; } else { ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; ri->iri_csum_data = htons(0xffff); } } } /******************************************************************** * * Parse the packet type to determine the appropriate hash * ******************************************************************/ static int igb_determine_rsstype(uint16_t pkt_info) { switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { case E1000_RXDADV_RSSTYPE_IPV4_TCP: return M_HASHTYPE_RSS_TCP_IPV4; case E1000_RXDADV_RSSTYPE_IPV4: return M_HASHTYPE_RSS_IPV4; case E1000_RXDADV_RSSTYPE_IPV6_TCP: return M_HASHTYPE_RSS_TCP_IPV6; case E1000_RXDADV_RSSTYPE_IPV6_EX: return M_HASHTYPE_RSS_IPV6_EX; case E1000_RXDADV_RSSTYPE_IPV6: return M_HASHTYPE_RSS_IPV6; case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: return M_HASHTYPE_RSS_TCP_IPV6_EX; default: return M_HASHTYPE_OPAQUE; } } diff --git a/sys/dev/iavf/iavf_txrx_iflib.c b/sys/dev/iavf/iavf_txrx_iflib.c index f536f7f23ff5..5f24b9c18452 100644 --- a/sys/dev/iavf/iavf_txrx_iflib.c +++ b/sys/dev/iavf/iavf_txrx_iflib.c @@ -1,790 +1,787 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2021, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*$FreeBSD$*/ /** * @file iavf_txrx_iflib.c * @brief Tx/Rx hotpath implementation for the iflib driver * * Contains functions used to implement the Tx and Rx hotpaths of the iflib * driver implementation. */ #include "iavf_iflib.h" #include "iavf_txrx_common.h" #ifdef RSS #include #endif /* Local Prototypes */ static void iavf_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype); static int iavf_isc_txd_encap(void *arg, if_pkt_info_t pi); static void iavf_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); static int iavf_isc_txd_credits_update_hwb(void *arg, uint16_t txqid, bool clear); static int iavf_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear); static void iavf_isc_rxd_refill(void *arg, if_rxd_update_t iru); static void iavf_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx); static int iavf_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget); static int iavf_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); /** * @var iavf_txrx_hwb * @brief iflib Tx/Rx operations for head write back * * iflib ops structure for when operating the device in head write back mode. */ struct if_txrx iavf_txrx_hwb = { iavf_isc_txd_encap, iavf_isc_txd_flush, iavf_isc_txd_credits_update_hwb, iavf_isc_rxd_available, iavf_isc_rxd_pkt_get, iavf_isc_rxd_refill, iavf_isc_rxd_flush, NULL }; /** * @var iavf_txrx_dwb * @brief iflib Tx/Rx operations for descriptor write back * * iflib ops structure for when operating the device in descriptor write back * mode. */ struct if_txrx iavf_txrx_dwb = { iavf_isc_txd_encap, iavf_isc_txd_flush, iavf_isc_txd_credits_update_dwb, iavf_isc_rxd_available, iavf_isc_rxd_pkt_get, iavf_isc_rxd_refill, iavf_isc_rxd_flush, NULL }; /** * iavf_is_tx_desc_done - Check if a Tx descriptor is ready * @txr: the Tx ring to check in * @idx: ring index to check * * @returns true if the descriptor has been written back by hardware, and * false otherwise. */ static bool iavf_is_tx_desc_done(struct tx_ring *txr, int idx) { return (((txr->tx_base[idx].cmd_type_offset_bsz >> IAVF_TXD_QW1_DTYPE_SHIFT) & IAVF_TXD_QW1_DTYPE_MASK) == IAVF_TX_DESC_DTYPE_DESC_DONE); } /** * iavf_tso_detect_sparse - detect TSO packets with too many segments * @segs: packet segments array * @nsegs: number of packet segments * @pi: packet information * * Hardware only transmits packets with a maximum of 8 descriptors. For TSO * packets, hardware needs to be able to build the split packets using 8 or * fewer descriptors. Additionally, the header must be contained within at * most 3 descriptors. * * To verify this, we walk the headers to find out how many descriptors the * headers require (usually 1). Then we ensure that, for each TSO segment, its * data plus the headers are contained within 8 or fewer descriptors. * * @returns zero if the packet is valid, one otherwise. */ static int iavf_tso_detect_sparse(bus_dma_segment_t *segs, int nsegs, if_pkt_info_t pi) { int count, curseg, i, hlen, segsz, seglen, tsolen; if (nsegs <= IAVF_MAX_TX_SEGS-2) return (0); segsz = pi->ipi_tso_segsz; curseg = count = 0; hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen; tsolen = pi->ipi_len - hlen; i = 0; curseg = segs[0].ds_len; while (hlen > 0) { count++; if (count > IAVF_MAX_TX_SEGS - 2) return (1); if (curseg == 0) { i++; if (__predict_false(i == nsegs)) return (1); curseg = segs[i].ds_len; } seglen = min(curseg, hlen); curseg -= seglen; hlen -= seglen; } while (tsolen > 0) { segsz = pi->ipi_tso_segsz; while (segsz > 0 && tsolen != 0) { count++; if (count > IAVF_MAX_TX_SEGS - 2) { return (1); } if (curseg == 0) { i++; if (__predict_false(i == nsegs)) { return (1); } curseg = segs[i].ds_len; } seglen = min(curseg, segsz); segsz -= seglen; curseg -= seglen; tsolen -= seglen; } count = 0; } return (0); } /** * iavf_tx_setup_offload - Setup Tx offload parameters * @que: pointer to the Tx queue * @pi: Tx packet info * @cmd: pointer to command descriptor value * @off: pointer to offset descriptor value * * Based on packet type and Tx offloads requested, sets up the command and * offset values for a Tx descriptor to enable the requested offloads. */ static void iavf_tx_setup_offload(struct iavf_tx_queue *que __unused, if_pkt_info_t pi, u32 *cmd, u32 *off) { switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: if (pi->ipi_csum_flags & IAVF_CSUM_IPV4) *cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM; else *cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: *cmd |= IAVF_TX_DESC_CMD_IIPT_IPV6; break; #endif default: break; } *off |= (pi->ipi_ehdrlen >> 1) << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT; *off |= (pi->ipi_ip_hlen >> 2) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT; switch (pi->ipi_ipproto) { case IPPROTO_TCP: if (pi->ipi_csum_flags & IAVF_CSUM_TCP) { *cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP; *off |= (pi->ipi_tcp_hlen >> 2) << IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; /* Check for NO_HEAD MDD event */ MPASS(pi->ipi_tcp_hlen != 0); } break; case IPPROTO_UDP: if (pi->ipi_csum_flags & IAVF_CSUM_UDP) { *cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP; *off |= (sizeof(struct udphdr) >> 2) << IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; } break; case IPPROTO_SCTP: if (pi->ipi_csum_flags & IAVF_CSUM_SCTP) { *cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_SCTP; *off |= (sizeof(struct sctphdr) >> 2) << IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; } /* Fall Thru */ default: break; } } /** * iavf_tso_setup - Setup TSO context descriptor * @txr: the Tx ring to process * @pi: packet info structure * * Enable hardware segmentation offload (TSO) for a given packet by creating * a context descriptor with the necessary details for offloading. * * @returns the new ring index to use for the data descriptor. */ static int iavf_tso_setup(struct tx_ring *txr, if_pkt_info_t pi) { if_softc_ctx_t scctx; struct iavf_tx_context_desc *TXD; u32 cmd, mss, type, tsolen; int idx, total_hdr_len; u64 type_cmd_tso_mss; idx = pi->ipi_pidx; TXD = (struct iavf_tx_context_desc *) &txr->tx_base[idx]; total_hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen; tsolen = pi->ipi_len - total_hdr_len; scctx = txr->que->vsi->shared; type = IAVF_TX_DESC_DTYPE_CONTEXT; cmd = IAVF_TX_CTX_DESC_TSO; /* * TSO MSS must not be less than 64; this prevents a * BAD_LSO_MSS MDD event when the MSS is too small. */ if (pi->ipi_tso_segsz < IAVF_MIN_TSO_MSS) { txr->mss_too_small++; pi->ipi_tso_segsz = IAVF_MIN_TSO_MSS; } mss = pi->ipi_tso_segsz; /* Check for BAD_LS0_MSS MDD event (mss too large) */ MPASS(mss <= IAVF_MAX_TSO_MSS); /* Check for NO_HEAD MDD event (header lengths are 0) */ MPASS(pi->ipi_ehdrlen != 0); MPASS(pi->ipi_ip_hlen != 0); /* Partial check for BAD_LSO_LEN MDD event */ MPASS(tsolen != 0); /* Partial check for WRONG_SIZE MDD event (during TSO) */ MPASS(total_hdr_len + mss <= IAVF_MAX_FRAME); type_cmd_tso_mss = ((u64)type << IAVF_TXD_CTX_QW1_DTYPE_SHIFT) | ((u64)cmd << IAVF_TXD_CTX_QW1_CMD_SHIFT) | ((u64)tsolen << IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT) | ((u64)mss << IAVF_TXD_CTX_QW1_MSS_SHIFT); TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); TXD->tunneling_params = htole32(0); txr->que->tso++; return ((idx + 1) & (scctx->isc_ntxd[0]-1)); } #define IAVF_TXD_CMD (IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_RS) /** * iavf_isc_txd_encap - Encapsulate a Tx packet into descriptors * @arg: void pointer to the VSI structure * @pi: packet info to encapsulate * * This routine maps the mbufs to tx descriptors, allowing the * TX engine to transmit the packets. * * @returns 0 on success, positive on failure */ static int iavf_isc_txd_encap(void *arg, if_pkt_info_t pi) { struct iavf_vsi *vsi = arg; if_softc_ctx_t scctx = vsi->shared; struct iavf_tx_queue *que = &vsi->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; int nsegs = pi->ipi_nsegs; bus_dma_segment_t *segs = pi->ipi_segs; struct iavf_tx_desc *txd = NULL; int i, j, mask, pidx_last; u32 cmd, off, tx_intr; if (__predict_false(pi->ipi_len < IAVF_MIN_FRAME)) { que->pkt_too_small++; return (EINVAL); } cmd = off = 0; i = pi->ipi_pidx; tx_intr = (pi->ipi_flags & IPI_TX_INTR); /* Set up the TSO/CSUM offload */ if (pi->ipi_csum_flags & CSUM_OFFLOAD) { /* Set up the TSO context descriptor if required */ if (pi->ipi_csum_flags & CSUM_TSO) { /* Prevent MAX_BUFF MDD event (for TSO) */ if (iavf_tso_detect_sparse(segs, nsegs, pi)) return (EFBIG); i = iavf_tso_setup(txr, pi); } iavf_tx_setup_offload(que, pi, &cmd, &off); } if (pi->ipi_mflags & M_VLANTAG) cmd |= IAVF_TX_DESC_CMD_IL2TAG1; cmd |= IAVF_TX_DESC_CMD_ICRC; mask = scctx->isc_ntxd[0] - 1; /* Check for WRONG_SIZE MDD event */ MPASS(pi->ipi_len >= IAVF_MIN_FRAME); #ifdef INVARIANTS if (!(pi->ipi_csum_flags & CSUM_TSO)) MPASS(pi->ipi_len <= IAVF_MAX_FRAME); #endif for (j = 0; j < nsegs; j++) { bus_size_t seglen; txd = &txr->tx_base[i]; seglen = segs[j].ds_len; /* Check for ZERO_BSIZE MDD event */ MPASS(seglen != 0); txd->buffer_addr = htole64(segs[j].ds_addr); txd->cmd_type_offset_bsz = htole64(IAVF_TX_DESC_DTYPE_DATA | ((u64)cmd << IAVF_TXD_QW1_CMD_SHIFT) | ((u64)off << IAVF_TXD_QW1_OFFSET_SHIFT) | ((u64)seglen << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT) | ((u64)htole16(pi->ipi_vtag) << IAVF_TXD_QW1_L2TAG1_SHIFT)); txr->tx_bytes += seglen; pidx_last = i; i = (i+1) & mask; } /* Set the last descriptor for report */ txd->cmd_type_offset_bsz |= htole64(((u64)IAVF_TXD_CMD << IAVF_TXD_QW1_CMD_SHIFT)); /* Add to report status array (if using TX interrupts) */ if (!vsi->enable_head_writeback && tx_intr) { txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & mask; MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx); } pi->ipi_new_pidx = i; ++txr->tx_packets; return (0); } /** * iavf_isc_txd_flush - Flush Tx ring * @arg: void pointer to the VSI * @txqid: the Tx queue to flush * @pidx: the ring index to flush to * * Advance the Transmit Descriptor Tail (Tdt), this tells the * hardware that this frame is available to transmit. */ static void iavf_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) { struct iavf_vsi *vsi = arg; struct tx_ring *txr = &vsi->tx_queues[txqid].txr; /* Check for ENDLESS_TX MDD event */ MPASS(pidx < vsi->shared->isc_ntxd[0]); wr32(vsi->hw, txr->tail, pidx); } /** * iavf_init_tx_ring - Initialize queue Tx ring * @vsi: pointer to the VSI * @que: pointer to queue to initialize * * (Re)Initialize a queue transmit ring by clearing its memory. */ void iavf_init_tx_ring(struct iavf_vsi *vsi, struct iavf_tx_queue *que) { struct tx_ring *txr = &que->txr; /* Clear the old ring contents */ bzero((void *)txr->tx_base, (sizeof(struct iavf_tx_desc)) * (vsi->shared->isc_ntxd[0] + (vsi->enable_head_writeback ? 1 : 0))); wr32(vsi->hw, txr->tail, 0); } /** * iavf_get_tx_head - Get the index of the head of a ring * @que: queue to read * * Retrieve the value from the location the HW records its HEAD index * * @returns the index of the HW head of the Tx queue */ static inline u32 iavf_get_tx_head(struct iavf_tx_queue *que) { if_softc_ctx_t scctx = que->vsi->shared; struct tx_ring *txr = &que->txr; void *head = &txr->tx_base[scctx->isc_ntxd[0]]; return LE32_TO_CPU(*(volatile __le32 *)head); } /** * iavf_isc_txd_credits_update_hwb - Update Tx ring credits * @arg: void pointer to the VSI * @qid: the queue id to update * @clear: whether to update or only report current status * * Checks the number of packets in the queue that could be cleaned up. * * if clear is true, the iflib stack has cleaned the packets and is * notifying the driver to update its processed ring pointer. * * @returns the number of packets in the ring that can be cleaned. * * @remark this function is intended for the head write back mode. */ static int iavf_isc_txd_credits_update_hwb(void *arg, uint16_t qid, bool clear) { struct iavf_vsi *vsi = arg; if_softc_ctx_t scctx = vsi->shared; struct iavf_tx_queue *que = &vsi->tx_queues[qid]; struct tx_ring *txr = &que->txr; int head, credits; /* Get the Head WB value */ head = iavf_get_tx_head(que); credits = head - txr->tx_cidx_processed; if (credits < 0) credits += scctx->isc_ntxd[0]; if (clear) txr->tx_cidx_processed = head; return (credits); } /** * iavf_isc_txd_credits_update_dwb - Update Tx ring credits * @arg: void pointer to the VSI * @txqid: the queue id to update * @clear: whether to update or only report current status * * Checks the number of packets in the queue that could be cleaned up. * * if clear is true, the iflib stack has cleaned the packets and is * notifying the driver to update its processed ring pointer. * * @returns the number of packets in the ring that can be cleaned. * * @remark this function is intended for the descriptor write back mode. */ static int iavf_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear) { struct iavf_vsi *vsi = arg; struct iavf_tx_queue *tx_que = &vsi->tx_queues[txqid]; if_softc_ctx_t scctx = vsi->shared; struct tx_ring *txr = &tx_que->txr; qidx_t processed = 0; qidx_t cur, prev, ntxd, rs_cidx; int32_t delta; bool is_done; rs_cidx = txr->tx_rs_cidx; if (rs_cidx == txr->tx_rs_pidx) return (0); cur = txr->tx_rsq[rs_cidx]; MPASS(cur != QIDX_INVALID); is_done = iavf_is_tx_desc_done(txr, cur); if (!is_done) return (0); /* If clear is false just let caller know that there * are descriptors to reclaim */ if (!clear) return (1); prev = txr->tx_cidx_processed; ntxd = scctx->isc_ntxd[0]; do { MPASS(prev != cur); delta = (int32_t)cur - (int32_t)prev; if (delta < 0) delta += ntxd; MPASS(delta > 0); processed += delta; prev = cur; rs_cidx = (rs_cidx + 1) & (ntxd-1); if (rs_cidx == txr->tx_rs_pidx) break; cur = txr->tx_rsq[rs_cidx]; MPASS(cur != QIDX_INVALID); is_done = iavf_is_tx_desc_done(txr, cur); } while (is_done); txr->tx_rs_cidx = rs_cidx; txr->tx_cidx_processed = prev; return (processed); } /** * iavf_isc_rxd_refill - Prepare descriptors for re-use * @arg: void pointer to the VSI * @iru: the Rx descriptor update structure * * Update Rx descriptors for a given queue so that they can be re-used by * hardware for future packets. */ static void iavf_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct iavf_vsi *vsi = arg; if_softc_ctx_t scctx = vsi->shared; struct rx_ring *rxr = &((vsi->rx_queues[iru->iru_qsidx]).rxr); uint64_t *paddrs; uint16_t next_pidx, pidx; uint16_t count; int i; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]); if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } /** * iavf_isc_rxd_flush - Notify hardware of new Rx descriptors * @arg: void pointer to the VSI * @rxqid: Rx queue to update * @flid: unused parameter * @pidx: ring index to update to * * Updates the tail pointer of the Rx ring, notifying hardware of new * descriptors available for receiving packets. */ static void iavf_isc_rxd_flush(void * arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx) { struct iavf_vsi *vsi = arg; struct rx_ring *rxr = &vsi->rx_queues[rxqid].rxr; wr32(vsi->hw, rxr->tail, pidx); } /** * iavf_isc_rxd_available - Calculate number of available Rx descriptors * @arg: void pointer to the VSI * @rxqid: Rx queue to check * @idx: starting index to check from * @budget: maximum Rx budget * * Determines how many packets are ready to be processed in the Rx queue, up * to the specified budget. * * @returns the number of packets ready to be processed, up to the budget. */ static int iavf_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct iavf_vsi *vsi = arg; struct rx_ring *rxr = &vsi->rx_queues[rxqid].rxr; union iavf_rx_desc *rxd; u64 qword; uint32_t status; int cnt, i, nrxd; nrxd = vsi->shared->isc_nrxd[0]; for (cnt = 0, i = idx; cnt < nrxd - 1 && cnt <= budget;) { rxd = &rxr->rx_base[i]; qword = le64toh(rxd->wb.qword1.status_error_len); status = (qword & IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT; if ((status & (1 << IAVF_RX_DESC_STATUS_DD_SHIFT)) == 0) break; if (++i == nrxd) i = 0; if (status & (1 << IAVF_RX_DESC_STATUS_EOF_SHIFT)) cnt++; } return (cnt); } /** * iavf_isc_rxd_pkt_get - Decapsulate packet from Rx descriptors * @arg: void pointer to the VSI * @ri: packet info structure * * Read packet data from the Rx ring descriptors and fill in the packet info * structure so that the iflib stack can process the packet. * * @remark this routine executes in ithread context. * * @returns zero success, or EBADMSG if the packet is corrupted. */ static int iavf_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct iavf_vsi *vsi = arg; if_softc_ctx_t scctx = vsi->shared; struct iavf_rx_queue *que = &vsi->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; union iavf_rx_desc *cur; u32 status, error; - u16 plen, vtag; + u16 plen; u64 qword; u8 ptype; bool eop; int i, cidx; cidx = ri->iri_cidx; i = 0; do { /* 5 descriptor receive limit */ MPASS(i < IAVF_MAX_RX_SEGS); cur = &rxr->rx_base[cidx]; qword = le64toh(cur->wb.qword1.status_error_len); status = (qword & IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT; error = (qword & IAVF_RXD_QW1_ERROR_MASK) >> IAVF_RXD_QW1_ERROR_SHIFT; plen = (qword & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >> IAVF_RXD_QW1_LENGTH_PBUF_SHIFT; ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK) >> IAVF_RXD_QW1_PTYPE_SHIFT; /* we should never be called without a valid descriptor */ MPASS((status & (1 << IAVF_RX_DESC_STATUS_DD_SHIFT)) != 0); ri->iri_len += plen; rxr->rx_bytes += plen; cur->wb.qword1.status_error_len = 0; eop = (status & (1 << IAVF_RX_DESC_STATUS_EOF_SHIFT)); - if (status & (1 << IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) - vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); - else - vtag = 0; /* ** Make sure bad packets are discarded, ** note that only EOP descriptor has valid ** error results. */ if (eop && (error & (1 << IAVF_RX_DESC_ERROR_RXE_SHIFT))) { rxr->desc_errs++; return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = plen; if (++cidx == vsi->shared->isc_nrxd[0]) cidx = 0; i++; } while (!eop); /* capture data for dynamic ITR adjustment */ rxr->packets++; rxr->rx_packets++; if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0) iavf_rx_checksum(ri, status, error, ptype); ri->iri_flowid = le32toh(cur->wb.qword0.hi_dword.rss); ri->iri_rsstype = iavf_ptype_to_hash(ptype); - ri->iri_vtag = vtag; - ri->iri_nfrags = i; - if (vtag) + if (status & (1 << IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) { + ri->iri_vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); ri->iri_flags |= M_VLANTAG; + } + ri->iri_nfrags = i; return (0); } /** * iavf_rx_checksum - Handle Rx hardware checksum indication * @ri: Rx packet info structure * @status: status from Rx descriptor * @error: error from Rx descriptor * @ptype: packet type * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. */ static void iavf_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype) { struct iavf_rx_ptype_decoded decoded; ri->iri_csum_flags = 0; /* No L3 or L4 checksum was calculated */ if (!(status & (1 << IAVF_RX_DESC_STATUS_L3L4P_SHIFT))) return; decoded = decode_rx_desc_ptype(ptype); /* IPv6 with extension headers likely have bad csum */ if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP && decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV6) { if (status & (1 << IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { ri->iri_csum_flags = 0; return; } } ri->iri_csum_flags |= CSUM_L3_CALC; /* IPv4 checksum error */ if (error & (1 << IAVF_RX_DESC_ERROR_IPE_SHIFT)) return; ri->iri_csum_flags |= CSUM_L3_VALID; ri->iri_csum_flags |= CSUM_L4_CALC; /* L4 checksum error */ if (error & (1 << IAVF_RX_DESC_ERROR_L4E_SHIFT)) return; ri->iri_csum_flags |= CSUM_L4_VALID; ri->iri_csum_data |= htons(0xffff); } diff --git a/sys/dev/ice/ice_iflib_txrx.c b/sys/dev/ice/ice_iflib_txrx.c index 7d89c51ddec0..0ce59d96c0ed 100644 --- a/sys/dev/ice/ice_iflib_txrx.c +++ b/sys/dev/ice/ice_iflib_txrx.c @@ -1,402 +1,399 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2021, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*$FreeBSD$*/ /** * @file ice_iflib_txrx.c * @brief iflib Tx/Rx hotpath * * Main location for the iflib Tx/Rx hotpath implementation. * * Contains the implementation for the iflib function callbacks and the * if_txrx ops structure. */ #include "ice_iflib.h" /* Tx/Rx hotpath utility functions */ #include "ice_common_txrx.h" /* * iflib txrx method declarations */ static int ice_ift_txd_encap(void *arg, if_pkt_info_t pi); static int ice_ift_rxd_pkt_get(void *arg, if_rxd_info_t ri); static void ice_ift_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); static int ice_ift_txd_credits_update(void *arg, uint16_t txqid, bool clear); static int ice_ift_rxd_available(void *arg, uint16_t rxqid, qidx_t pidx, qidx_t budget); static void ice_ift_rxd_flush(void *arg, uint16_t rxqid, uint8_t flidx, qidx_t pidx); static void ice_ift_rxd_refill(void *arg, if_rxd_update_t iru); /* Macro to help extract the NIC mode flexible Rx descriptor fields from the * advanced 32byte Rx descriptors. */ #define RX_FLEX_NIC(desc, field) \ (((struct ice_32b_rx_flex_desc_nic *)desc)->field) /** * @var ice_txrx * @brief Tx/Rx operations for the iflib stack * * Structure defining the Tx and Rx related operations that iflib can request * the driver to perform. These are the main entry points for the hot path of * the transmit and receive paths in the iflib driver. */ struct if_txrx ice_txrx = { .ift_txd_encap = ice_ift_txd_encap, .ift_txd_flush = ice_ift_txd_flush, .ift_txd_credits_update = ice_ift_txd_credits_update, .ift_rxd_available = ice_ift_rxd_available, .ift_rxd_pkt_get = ice_ift_rxd_pkt_get, .ift_rxd_refill = ice_ift_rxd_refill, .ift_rxd_flush = ice_ift_rxd_flush, }; /** * ice_ift_txd_encap - prepare Tx descriptors for a packet * @arg: the iflib softc structure pointer * @pi: packet info * * Prepares and encapsulates the given packet into into Tx descriptors, in * preparation for sending to the transmit engine. Sets the necessary context * descriptors for TSO and other offloads, and prepares the last descriptor * for the writeback status. * * Return 0 on success, non-zero error code on failure. */ static int ice_ift_txd_encap(void *arg, if_pkt_info_t pi) { struct ice_softc *sc = (struct ice_softc *)arg; struct ice_tx_queue *txq = &sc->pf_vsi.tx_queues[pi->ipi_qsidx]; int nsegs = pi->ipi_nsegs; bus_dma_segment_t *segs = pi->ipi_segs; struct ice_tx_desc *txd = NULL; int i, j, mask, pidx_last; u32 cmd, off; cmd = off = 0; i = pi->ipi_pidx; /* Set up the TSO/CSUM offload */ if (pi->ipi_csum_flags & ICE_CSUM_OFFLOAD) { /* Set up the TSO context descriptor if required */ if (pi->ipi_csum_flags & CSUM_TSO) { if (ice_tso_detect_sparse(pi)) return (EFBIG); i = ice_tso_setup(txq, pi); } ice_tx_setup_offload(txq, pi, &cmd, &off); } if (pi->ipi_mflags & M_VLANTAG) cmd |= ICE_TX_DESC_CMD_IL2TAG1; mask = txq->desc_count - 1; for (j = 0; j < nsegs; j++) { bus_size_t seglen; txd = &txq->tx_base[i]; seglen = segs[j].ds_len; txd->buf_addr = htole64(segs[j].ds_addr); txd->cmd_type_offset_bsz = htole64(ICE_TX_DESC_DTYPE_DATA | ((u64)cmd << ICE_TXD_QW1_CMD_S) | ((u64)off << ICE_TXD_QW1_OFFSET_S) | ((u64)seglen << ICE_TXD_QW1_TX_BUF_SZ_S) | ((u64)htole16(pi->ipi_vtag) << ICE_TXD_QW1_L2TAG1_S)); txq->stats.tx_bytes += seglen; pidx_last = i; i = (i+1) & mask; } /* Set the last descriptor for report */ #define ICE_TXD_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS) txd->cmd_type_offset_bsz |= htole64(((u64)ICE_TXD_CMD << ICE_TXD_QW1_CMD_S)); /* Add to report status array */ txq->tx_rsq[txq->tx_rs_pidx] = pidx_last; txq->tx_rs_pidx = (txq->tx_rs_pidx+1) & mask; MPASS(txq->tx_rs_pidx != txq->tx_rs_cidx); pi->ipi_new_pidx = i; ++txq->stats.tx_packets; return (0); } /** * ice_ift_txd_flush - Flush Tx descriptors to hardware * @arg: device specific softc pointer * @txqid: the Tx queue to flush * @pidx: descriptor index to advance tail to * * Advance the Transmit Descriptor Tail (TDT). This indicates to hardware that * frames are available for transmit. */ static void ice_ift_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) { struct ice_softc *sc = (struct ice_softc *)arg; struct ice_tx_queue *txq = &sc->pf_vsi.tx_queues[txqid]; struct ice_hw *hw = &sc->hw; wr32(hw, txq->tail, pidx); } /** * ice_ift_txd_credits_update - cleanup Tx descriptors * @arg: device private softc * @txqid: the Tx queue to update * @clear: if false, only report, do not actually clean * * If clear is false, iflib is asking if we *could* clean up any Tx * descriptors. * * If clear is true, iflib is requesting to cleanup and reclaim used Tx * descriptors. */ static int ice_ift_txd_credits_update(void *arg, uint16_t txqid, bool clear) { struct ice_softc *sc = (struct ice_softc *)arg; struct ice_tx_queue *txq = &sc->pf_vsi.tx_queues[txqid]; qidx_t processed = 0; qidx_t cur, prev, ntxd, rs_cidx; int32_t delta; bool is_done; rs_cidx = txq->tx_rs_cidx; if (rs_cidx == txq->tx_rs_pidx) return (0); cur = txq->tx_rsq[rs_cidx]; MPASS(cur != QIDX_INVALID); is_done = ice_is_tx_desc_done(&txq->tx_base[cur]); if (!is_done) return (0); else if (clear == false) return (1); prev = txq->tx_cidx_processed; ntxd = txq->desc_count; do { MPASS(prev != cur); delta = (int32_t)cur - (int32_t)prev; if (delta < 0) delta += ntxd; MPASS(delta > 0); processed += delta; prev = cur; rs_cidx = (rs_cidx + 1) & (ntxd-1); if (rs_cidx == txq->tx_rs_pidx) break; cur = txq->tx_rsq[rs_cidx]; MPASS(cur != QIDX_INVALID); is_done = ice_is_tx_desc_done(&txq->tx_base[cur]); } while (is_done); txq->tx_rs_cidx = rs_cidx; txq->tx_cidx_processed = prev; return (processed); } /** * ice_ift_rxd_available - Return number of available Rx packets * @arg: device private softc * @rxqid: the Rx queue id * @pidx: descriptor start point * @budget: maximum Rx budget * * Determines how many Rx packets are available on the queue, up to a maximum * of the given budget. */ static int ice_ift_rxd_available(void *arg, uint16_t rxqid, qidx_t pidx, qidx_t budget) { struct ice_softc *sc = (struct ice_softc *)arg; struct ice_rx_queue *rxq = &sc->pf_vsi.rx_queues[rxqid]; union ice_32b_rx_flex_desc *rxd; uint16_t status0; int cnt, i, nrxd; nrxd = rxq->desc_count; for (cnt = 0, i = pidx; cnt < nrxd - 1 && cnt < budget;) { rxd = &rxq->rx_base[i]; status0 = le16toh(rxd->wb.status_error0); if ((status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) == 0) break; if (++i == nrxd) i = 0; if (status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)) cnt++; } return (cnt); } /** * ice_ift_rxd_pkt_get - Called by iflib to send data to upper layer * @arg: device specific softc * @ri: receive packet info * * This function is called by iflib, and executes in ithread context. It is * called by iflib to obtain data which has been DMA'ed into host memory. * Returns zero on success, and an error code on failure. */ static int ice_ift_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct ice_softc *sc = (struct ice_softc *)arg; if_softc_ctx_t scctx = sc->scctx; struct ice_rx_queue *rxq = &sc->pf_vsi.rx_queues[ri->iri_qsidx]; union ice_32b_rx_flex_desc *cur; - u16 status0, plen, vtag, ptype; + u16 status0, plen, ptype; bool eop; size_t cidx; int i; cidx = ri->iri_cidx; i = 0; do { /* 5 descriptor receive limit */ MPASS(i < ICE_MAX_RX_SEGS); cur = &rxq->rx_base[cidx]; status0 = le16toh(cur->wb.status_error0); plen = le16toh(cur->wb.pkt_len) & ICE_RX_FLX_DESC_PKT_LEN_M; ptype = le16toh(cur->wb.ptype_flex_flags0) & ICE_RX_FLEX_DESC_PTYPE_M; /* we should never be called without a valid descriptor */ MPASS((status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) != 0); ri->iri_len += plen; cur->wb.status_error0 = 0; eop = (status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)); - if (status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) - vtag = le16toh(cur->wb.l2tag1); - else - vtag = 0; /* * Make sure packets with bad L2 values are discarded. * NOTE: Only the EOP descriptor has valid error results. */ if (eop && (status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S))) { rxq->stats.desc_errs++; return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = plen; if (++cidx == rxq->desc_count) cidx = 0; i++; } while (!eop); /* capture soft statistics for this Rx queue */ rxq->stats.rx_packets++; rxq->stats.rx_bytes += ri->iri_len; if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0) ice_rx_checksum(rxq, &ri->iri_csum_flags, &ri->iri_csum_data, status0, ptype); ri->iri_flowid = le32toh(RX_FLEX_NIC(&cur->wb, rss_hash)); ri->iri_rsstype = ice_ptype_to_hash(ptype); - ri->iri_vtag = vtag; - ri->iri_nfrags = i; - if (vtag) + if (status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) { + ri->iri_vtag = le16toh(cur->wb.l2tag1); ri->iri_flags |= M_VLANTAG; + } + ri->iri_nfrags = i; return (0); } /** * ice_ift_rxd_refill - Prepare Rx descriptors for re-use by hardware * @arg: device specific softc structure * @iru: the Rx descriptor update structure * * Update the Rx descriptor indices for a given queue, assigning new physical * addresses to the descriptors, preparing them for re-use by the hardware. */ static void ice_ift_rxd_refill(void *arg, if_rxd_update_t iru) { struct ice_softc *sc = (struct ice_softc *)arg; struct ice_rx_queue *rxq; uint32_t next_pidx; int i; uint64_t *paddrs; uint32_t pidx; uint16_t qsidx, count; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; qsidx = iru->iru_qsidx; count = iru->iru_count; rxq = &(sc->pf_vsi.rx_queues[qsidx]); for (i = 0, next_pidx = pidx; i < count; i++) { rxq->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]); if (++next_pidx == (uint32_t)rxq->desc_count) next_pidx = 0; } } /** * ice_ift_rxd_flush - Flush Rx descriptors to hardware * @arg: device specific softc pointer * @rxqid: the Rx queue to flush * @flidx: unused parameter * @pidx: descriptor index to advance tail to * * Advance the Receive Descriptor Tail (RDT). This indicates to hardware that * software is done with the descriptor and it can be recycled. */ static void ice_ift_rxd_flush(void *arg, uint16_t rxqid, uint8_t flidx __unused, qidx_t pidx) { struct ice_softc *sc = (struct ice_softc *)arg; struct ice_rx_queue *rxq = &sc->pf_vsi.rx_queues[rxqid]; struct ice_hw *hw = &sc->hw; wr32(hw, rxq->tail, pidx); } diff --git a/sys/dev/igc/igc_txrx.c b/sys/dev/igc/igc_txrx.c index ee4d5a5c0c6b..4f8234c855f6 100644 --- a/sys/dev/igc/igc_txrx.c +++ b/sys/dev/igc/igc_txrx.c @@ -1,594 +1,592 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2016 Matthew Macy * All rights reserved. * Copyright (c) 2021 Rubicon Communications, LLC (Netgate) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "if_igc.h" #ifdef RSS #include #include #endif #ifdef VERBOSE_DEBUG #define DPRINTF device_printf #else #define DPRINTF(...) #endif /********************************************************************* * Local Function prototypes *********************************************************************/ static int igc_isc_txd_encap(void *arg, if_pkt_info_t pi); static void igc_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); static int igc_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear); static void igc_isc_rxd_refill(void *arg, if_rxd_update_t iru); static void igc_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx); static int igc_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget); static int igc_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static int igc_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len, uint32_t *olinfo_status); static int igc_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len, uint32_t *olinfo_status); static void igc_rx_checksum(uint32_t staterr, if_rxd_info_t ri, uint32_t ptype); static int igc_determine_rsstype(uint16_t pkt_info); extern void igc_if_enable_intr(if_ctx_t ctx); extern int igc_intr(void *arg); struct if_txrx igc_txrx = { .ift_txd_encap = igc_isc_txd_encap, .ift_txd_flush = igc_isc_txd_flush, .ift_txd_credits_update = igc_isc_txd_credits_update, .ift_rxd_available = igc_isc_rxd_available, .ift_rxd_pkt_get = igc_isc_rxd_pkt_get, .ift_rxd_refill = igc_isc_rxd_refill, .ift_rxd_flush = igc_isc_rxd_flush, .ift_legacy_intr = igc_intr }; void igc_dump_rs(struct igc_adapter *adapter) { if_softc_ctx_t scctx = adapter->shared; struct igc_tx_queue *que; struct tx_ring *txr; qidx_t i, ntxd, qid, cur; int16_t rs_cidx; uint8_t status; printf("\n"); ntxd = scctx->isc_ntxd[0]; for (qid = 0; qid < adapter->tx_num_queues; qid++) { que = &adapter->tx_queues[qid]; txr = &que->txr; rs_cidx = txr->tx_rs_cidx; if (rs_cidx != txr->tx_rs_pidx) { cur = txr->tx_rsq[rs_cidx]; status = txr->tx_base[cur].upper.fields.status; if (!(status & IGC_TXD_STAT_DD)) printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur); } else { rs_cidx = (rs_cidx-1)&(ntxd-1); cur = txr->tx_rsq[rs_cidx]; printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d ", qid, rs_cidx, cur); } printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed, txr->tx_rs_pidx); for (i = 0; i < ntxd; i++) { if (txr->tx_base[i].upper.fields.status & IGC_TXD_STAT_DD) printf("%d set ", i); } printf("\n"); } } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int igc_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len, uint32_t *olinfo_status) { struct igc_adv_tx_context_desc *TXD; uint32_t type_tucmd_mlhl = 0, vlan_macip_lens = 0; uint32_t mss_l4len_idx = 0; uint32_t paylen; switch(pi->ipi_etype) { case ETHERTYPE_IPV6: type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV6; break; case ETHERTYPE_IP: type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ *olinfo_status |= IGC_TXD_POPTS_IXSM << 8; break; default: panic("%s: CSUM_TSO but no supported IP version (0x%04x)", __func__, ntohs(pi->ipi_etype)); break; } TXD = (struct igc_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx]; /* This is used in the transmit desc in encap */ paylen = pi->ipi_len - pi->ipi_ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen; /* VLAN MACLEN IPLEN */ if (pi->ipi_mflags & M_VLANTAG) { vlan_macip_lens |= (pi->ipi_vtag << IGC_ADVTXD_VLAN_SHIFT); } vlan_macip_lens |= pi->ipi_ehdrlen << IGC_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= pi->ipi_ip_hlen; TXD->vlan_macip_lens = htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP; TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); /* MSS L4LEN IDX */ mss_l4len_idx |= (pi->ipi_tso_segsz << IGC_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (pi->ipi_tcp_hlen << IGC_ADVTXD_L4LEN_SHIFT); TXD->mss_l4len_idx = htole32(mss_l4len_idx); TXD->seqnum_seed = htole32(0); *cmd_type_len |= IGC_ADVTXD_DCMD_TSE; *olinfo_status |= IGC_TXD_POPTS_TXSM << 8; *olinfo_status |= paylen << IGC_ADVTXD_PAYLEN_SHIFT; return (1); } /********************************************************************* * * Advanced Context Descriptor setup for VLAN, CSUM or TSO * **********************************************************************/ static int igc_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len, uint32_t *olinfo_status) { struct igc_adv_tx_context_desc *TXD; uint32_t vlan_macip_lens, type_tucmd_mlhl; uint32_t mss_l4len_idx; mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0; /* First check if TSO is to be used */ if (pi->ipi_csum_flags & CSUM_TSO) return (igc_tso_setup(txr, pi, cmd_type_len, olinfo_status)); /* Indicate the whole packet as payload when not doing TSO */ *olinfo_status |= pi->ipi_len << IGC_ADVTXD_PAYLEN_SHIFT; /* Now ready a context descriptor */ TXD = (struct igc_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx]; /* ** In advanced descriptors the vlan tag must ** be placed into the context descriptor. Hence ** we need to make one even if not doing offloads. */ if (pi->ipi_mflags & M_VLANTAG) { vlan_macip_lens |= (pi->ipi_vtag << IGC_ADVTXD_VLAN_SHIFT); } else if ((pi->ipi_csum_flags & IGC_CSUM_OFFLOAD) == 0) { return (0); } /* Set the ether header length */ vlan_macip_lens |= pi->ipi_ehdrlen << IGC_ADVTXD_MACLEN_SHIFT; switch(pi->ipi_etype) { case ETHERTYPE_IP: type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV6; break; default: break; } vlan_macip_lens |= pi->ipi_ip_hlen; type_tucmd_mlhl |= IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DTYP_CTXT; switch (pi->ipi_ipproto) { case IPPROTO_TCP: if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) { type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP; *olinfo_status |= IGC_TXD_POPTS_TXSM << 8; } break; case IPPROTO_UDP: if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) { type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP; *olinfo_status |= IGC_TXD_POPTS_TXSM << 8; } break; case IPPROTO_SCTP: if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) { type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_SCTP; *olinfo_status |= IGC_TXD_POPTS_TXSM << 8; } break; default: break; } /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(mss_l4len_idx); return (1); } static int igc_isc_txd_encap(void *arg, if_pkt_info_t pi) { struct igc_adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct igc_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; int nsegs = pi->ipi_nsegs; bus_dma_segment_t *segs = pi->ipi_segs; union igc_adv_tx_desc *txd = NULL; int i, j, pidx_last; uint32_t olinfo_status, cmd_type_len, txd_flags; qidx_t ntxd; pidx_last = olinfo_status = 0; /* Basic descriptor defines */ cmd_type_len = (IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DCMD_DEXT); if (pi->ipi_mflags & M_VLANTAG) cmd_type_len |= IGC_ADVTXD_DCMD_VLE; i = pi->ipi_pidx; ntxd = scctx->isc_ntxd[0]; txd_flags = pi->ipi_flags & IPI_TX_INTR ? IGC_ADVTXD_DCMD_RS : 0; /* Consume the first descriptor */ i += igc_tx_ctx_setup(txr, pi, &cmd_type_len, &olinfo_status); if (i == scctx->isc_ntxd[0]) i = 0; for (j = 0; j < nsegs; j++) { bus_size_t seglen; bus_addr_t segaddr; txd = (union igc_adv_tx_desc *)&txr->tx_base[i]; seglen = segs[j].ds_len; segaddr = htole64(segs[j].ds_addr); txd->read.buffer_addr = segaddr; txd->read.cmd_type_len = htole32(IGC_ADVTXD_DCMD_IFCS | cmd_type_len | seglen); txd->read.olinfo_status = htole32(olinfo_status); pidx_last = i; if (++i == scctx->isc_ntxd[0]) { i = 0; } } if (txd_flags) { txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1); MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx); } txd->read.cmd_type_len |= htole32(IGC_ADVTXD_DCMD_EOP | txd_flags); pi->ipi_new_pidx = i; return (0); } static void igc_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) { struct igc_adapter *adapter = arg; struct igc_tx_queue *que = &adapter->tx_queues[txqid]; struct tx_ring *txr = &que->txr; IGC_WRITE_REG(&adapter->hw, IGC_TDT(txr->me), pidx); } static int igc_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) { struct igc_adapter *adapter = arg; if_softc_ctx_t scctx = adapter->shared; struct igc_tx_queue *que = &adapter->tx_queues[txqid]; struct tx_ring *txr = &que->txr; qidx_t processed = 0; int updated; qidx_t cur, prev, ntxd, rs_cidx; int32_t delta; uint8_t status; rs_cidx = txr->tx_rs_cidx; if (rs_cidx == txr->tx_rs_pidx) return (0); cur = txr->tx_rsq[rs_cidx]; status = ((union igc_adv_tx_desc *)&txr->tx_base[cur])->wb.status; updated = !!(status & IGC_TXD_STAT_DD); if (!updated) return (0); /* If clear is false just let caller know that there * are descriptors to reclaim */ if (!clear) return (1); prev = txr->tx_cidx_processed; ntxd = scctx->isc_ntxd[0]; do { MPASS(prev != cur); delta = (int32_t)cur - (int32_t)prev; if (delta < 0) delta += ntxd; MPASS(delta > 0); processed += delta; prev = cur; rs_cidx = (rs_cidx + 1) & (ntxd-1); if (rs_cidx == txr->tx_rs_pidx) break; cur = txr->tx_rsq[rs_cidx]; status = ((union igc_adv_tx_desc *)&txr->tx_base[cur])->wb.status; } while ((status & IGC_TXD_STAT_DD)); txr->tx_rs_cidx = rs_cidx; txr->tx_cidx_processed = prev; return (processed); } static void igc_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct igc_adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; uint16_t rxqid = iru->iru_qsidx; struct igc_rx_queue *que = &sc->rx_queues[rxqid]; union igc_adv_rx_desc *rxd; struct rx_ring *rxr = &que->rxr; uint64_t *paddrs; uint32_t next_pidx, pidx; uint16_t count; int i; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxd = (union igc_adv_rx_desc *)&rxr->rx_base[next_pidx]; rxd->read.pkt_addr = htole64(paddrs[i]); if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void igc_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx) { struct igc_adapter *sc = arg; struct igc_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; IGC_WRITE_REG(&sc->hw, IGC_RDT(rxr->me), pidx); } static int igc_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct igc_adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct igc_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union igc_adv_rx_desc *rxd; uint32_t staterr = 0; int cnt, i; for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = (union igc_adv_rx_desc *)&rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & IGC_RXD_STAT_DD) == 0) break; if (++i == scctx->isc_nrxd[0]) i = 0; if (staterr & IGC_RXD_STAT_EOP) cnt++; } return (cnt); } /**************************************************************** * Routine sends data which has been dma'ed into host memory * to upper layer. Initialize ri structure. * * Returns 0 upon success, errno on failure ***************************************************************/ static int igc_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct igc_adapter *adapter = arg; if_softc_ctx_t scctx = adapter->shared; struct igc_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; union igc_adv_rx_desc *rxd; - uint16_t pkt_info, len, vtag; + uint16_t pkt_info, len; uint32_t ptype, staterr; int i, cidx; bool eop; - staterr = i = vtag = 0; + staterr = i = 0; cidx = ri->iri_cidx; do { rxd = (union igc_adv_rx_desc *)&rxr->rx_base[cidx]; staterr = le32toh(rxd->wb.upper.status_error); pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info); MPASS ((staterr & IGC_RXD_STAT_DD) != 0); len = le16toh(rxd->wb.upper.length); ptype = le32toh(rxd->wb.lower.lo_dword.data) & IGC_PKTTYPE_MASK; ri->iri_len += len; rxr->rx_bytes += ri->iri_len; rxd->wb.upper.status_error = 0; eop = ((staterr & IGC_RXD_STAT_EOP) == IGC_RXD_STAT_EOP); - vtag = le16toh(rxd->wb.upper.vlan); - /* Make sure bad packets are discarded */ if (eop && ((staterr & IGC_RXDEXT_STATERR_RXE) != 0)) { adapter->dropped_pkts++; ++rxr->rx_discarded; return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; if (++cidx == scctx->isc_nrxd[0]) cidx = 0; #ifdef notyet if (rxr->hdr_split == true) { ri->iri_frags[i].irf_flid = 1; ri->iri_frags[i].irf_idx = cidx; if (++cidx == scctx->isc_nrxd[0]) cidx = 0; } #endif i++; } while (!eop); rxr->rx_packets++; if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0) igc_rx_checksum(staterr, ri, ptype); if ((scctx->isc_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (staterr & IGC_RXD_STAT_VP) != 0) { - ri->iri_vtag = vtag; + ri->iri_vtag = le16toh(rxd->wb.upper.vlan); ri->iri_flags |= M_VLANTAG; } ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); ri->iri_rsstype = igc_determine_rsstype(pkt_info); ri->iri_nfrags = i; return (0); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void igc_rx_checksum(uint32_t staterr, if_rxd_info_t ri, uint32_t ptype) { uint16_t status = (uint16_t)staterr; uint8_t errors = (uint8_t)(staterr >> 24); if (__predict_false(status & IGC_RXD_STAT_IXSM)) return; /* If there is a layer 3 or 4 error we are done */ if (__predict_false(errors & (IGC_RXD_ERR_IPE | IGC_RXD_ERR_TCPE))) return; /* IP Checksum Good */ if (status & IGC_RXD_STAT_IPCS) ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); /* Valid L4E checksum */ if (__predict_true(status & (IGC_RXD_STAT_TCPCS | IGC_RXD_STAT_UDPCS))) { /* SCTP header present */ if (__predict_false((ptype & IGC_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & IGC_RXDADV_PKTTYPE_SCTP) != 0)) { ri->iri_csum_flags |= CSUM_SCTP_VALID; } else { ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; ri->iri_csum_data = htons(0xffff); } } } /******************************************************************** * * Parse the packet type to determine the appropriate hash * ******************************************************************/ static int igc_determine_rsstype(uint16_t pkt_info) { switch (pkt_info & IGC_RXDADV_RSSTYPE_MASK) { case IGC_RXDADV_RSSTYPE_IPV4_TCP: return M_HASHTYPE_RSS_TCP_IPV4; case IGC_RXDADV_RSSTYPE_IPV4: return M_HASHTYPE_RSS_IPV4; case IGC_RXDADV_RSSTYPE_IPV6_TCP: return M_HASHTYPE_RSS_TCP_IPV6; case IGC_RXDADV_RSSTYPE_IPV6_EX: return M_HASHTYPE_RSS_IPV6_EX; case IGC_RXDADV_RSSTYPE_IPV6: return M_HASHTYPE_RSS_IPV6; case IGC_RXDADV_RSSTYPE_IPV6_TCP_EX: return M_HASHTYPE_RSS_TCP_IPV6_EX; default: return M_HASHTYPE_OPAQUE; } } diff --git a/sys/dev/ixgbe/ix_txrx.c b/sys/dev/ixgbe/ix_txrx.c index 7c87b0ec10fc..589f4c72c9ce 100644 --- a/sys/dev/ixgbe/ix_txrx.c +++ b/sys/dev/ixgbe/ix_txrx.c @@ -1,537 +1,532 @@ /****************************************************************************** Copyright (c) 2001-2017, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef IXGBE_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #endif #include "ixgbe.h" /************************************************************************ * Local Function prototypes ************************************************************************/ static int ixgbe_isc_txd_encap(void *, if_pkt_info_t); static void ixgbe_isc_txd_flush(void *, uint16_t, qidx_t); static int ixgbe_isc_txd_credits_update(void *, uint16_t, bool); static void ixgbe_isc_rxd_refill(void *, if_rxd_update_t); static void ixgbe_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t); static int ixgbe_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t); static int ixgbe_isc_rxd_pkt_get(void *, if_rxd_info_t); static void ixgbe_rx_checksum(uint32_t, if_rxd_info_t, uint32_t); static int ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *, if_pkt_info_t); extern void ixgbe_if_enable_intr(if_ctx_t ctx); static int ixgbe_determine_rsstype(uint16_t pkt_info); struct if_txrx ixgbe_txrx = { .ift_txd_encap = ixgbe_isc_txd_encap, .ift_txd_flush = ixgbe_isc_txd_flush, .ift_txd_credits_update = ixgbe_isc_txd_credits_update, .ift_rxd_available = ixgbe_isc_rxd_available, .ift_rxd_pkt_get = ixgbe_isc_rxd_pkt_get, .ift_rxd_refill = ixgbe_isc_rxd_refill, .ift_rxd_flush = ixgbe_isc_rxd_flush, .ift_legacy_intr = NULL }; /************************************************************************ * ixgbe_tx_ctx_setup * * Advanced Context Descriptor setup for VLAN, CSUM or TSO * ************************************************************************/ static int ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *TXD, if_pkt_info_t pi) { uint32_t vlan_macip_lens, type_tucmd_mlhl; uint32_t olinfo_status, mss_l4len_idx, pktlen, offload; u8 ehdrlen; offload = true; olinfo_status = mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0; /* VLAN MACLEN IPLEN */ vlan_macip_lens |= (htole16(pi->ipi_vtag) << IXGBE_ADVTXD_VLAN_SHIFT); /* * Some of our VF devices need a context descriptor for every * packet. That means the ehdrlen needs to be non-zero in order * for the host driver not to flag a malicious event. The stack * will most likely populate this for all other reasons of why * this function was called. */ if (pi->ipi_ehdrlen == 0) { ehdrlen = ETHER_HDR_LEN; ehdrlen += (pi->ipi_vtag != 0) ? ETHER_VLAN_ENCAP_LEN : 0; } else ehdrlen = pi->ipi_ehdrlen; vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; pktlen = pi->ipi_len; /* First check if TSO is to be used */ if (pi->ipi_csum_flags & CSUM_TSO) { /* This is used in the transmit desc in encap */ pktlen = pi->ipi_len - ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen; mss_l4len_idx |= (pi->ipi_tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (pi->ipi_tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); } olinfo_status |= pktlen << IXGBE_ADVTXD_PAYLEN_SHIFT; if (pi->ipi_flags & IPI_TX_IPV4) { type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ if (pi->ipi_csum_flags & (CSUM_IP|CSUM_TSO)) olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; } else if (pi->ipi_flags & IPI_TX_IPV6) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; else offload = false; vlan_macip_lens |= pi->ipi_ip_hlen; switch (pi->ipi_ipproto) { case IPPROTO_TCP: if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP | CSUM_TSO)) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; else offload = false; break; case IPPROTO_UDP: if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; else offload = false; break; case IPPROTO_SCTP: if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; else offload = false; break; default: offload = false; break; } /* Insert L4 checksum into data descriptors */ if (offload) olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(mss_l4len_idx); return (olinfo_status); } /* ixgbe_tx_ctx_setup */ /************************************************************************ * ixgbe_isc_txd_encap ************************************************************************/ static int ixgbe_isc_txd_encap(void *arg, if_pkt_info_t pi) { struct ixgbe_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct ix_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; int nsegs = pi->ipi_nsegs; bus_dma_segment_t *segs = pi->ipi_segs; union ixgbe_adv_tx_desc *txd = NULL; struct ixgbe_adv_tx_context_desc *TXD; int i, j, first, pidx_last; uint32_t olinfo_status, cmd, flags; qidx_t ntxd; cmd = (IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); if (pi->ipi_mflags & M_VLANTAG) cmd |= IXGBE_ADVTXD_DCMD_VLE; i = first = pi->ipi_pidx; flags = (pi->ipi_flags & IPI_TX_INTR) ? IXGBE_TXD_CMD_RS : 0; ntxd = scctx->isc_ntxd[0]; TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[first]; if ((pi->ipi_csum_flags & CSUM_OFFLOAD) || (sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) || pi->ipi_vtag) { /********************************************* * Set up the appropriate offload context * this will consume the first descriptor *********************************************/ olinfo_status = ixgbe_tx_ctx_setup(TXD, pi); if (pi->ipi_csum_flags & CSUM_TSO) { cmd |= IXGBE_ADVTXD_DCMD_TSE; ++txr->tso_tx; } if (++i == scctx->isc_ntxd[0]) i = 0; } else { /* Indicate the whole packet as payload when not doing TSO */ olinfo_status = pi->ipi_len << IXGBE_ADVTXD_PAYLEN_SHIFT; } olinfo_status |= IXGBE_ADVTXD_CC; pidx_last = 0; for (j = 0; j < nsegs; j++) { bus_size_t seglen; txd = &txr->tx_base[i]; seglen = segs[j].ds_len; txd->read.buffer_addr = htole64(segs[j].ds_addr); txd->read.cmd_type_len = htole32(cmd | seglen); txd->read.olinfo_status = htole32(olinfo_status); pidx_last = i; if (++i == scctx->isc_ntxd[0]) { i = 0; } } if (flags) { txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; txr->tx_rs_pidx = (txr->tx_rs_pidx + 1) & (ntxd - 1); } txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | flags); txr->bytes += pi->ipi_len; pi->ipi_new_pidx = i; ++txr->total_packets; return (0); } /* ixgbe_isc_txd_encap */ /************************************************************************ * ixgbe_isc_txd_flush ************************************************************************/ static void ixgbe_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) { struct ixgbe_softc *sc = arg; struct ix_tx_queue *que = &sc->tx_queues[txqid]; struct tx_ring *txr = &que->txr; IXGBE_WRITE_REG(&sc->hw, txr->tail, pidx); } /* ixgbe_isc_txd_flush */ /************************************************************************ * ixgbe_isc_txd_credits_update ************************************************************************/ static int ixgbe_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) { struct ixgbe_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct ix_tx_queue *que = &sc->tx_queues[txqid]; struct tx_ring *txr = &que->txr; qidx_t processed = 0; int updated; qidx_t cur, prev, ntxd, rs_cidx; int32_t delta; uint8_t status; rs_cidx = txr->tx_rs_cidx; if (rs_cidx == txr->tx_rs_pidx) return (0); cur = txr->tx_rsq[rs_cidx]; status = txr->tx_base[cur].wb.status; updated = !!(status & IXGBE_TXD_STAT_DD); if (!updated) return (0); /* If clear is false just let caller know that there * are descriptors to reclaim */ if (!clear) return (1); prev = txr->tx_cidx_processed; ntxd = scctx->isc_ntxd[0]; do { MPASS(prev != cur); delta = (int32_t)cur - (int32_t)prev; if (delta < 0) delta += ntxd; MPASS(delta > 0); processed += delta; prev = cur; rs_cidx = (rs_cidx + 1) & (ntxd - 1); if (rs_cidx == txr->tx_rs_pidx) break; cur = txr->tx_rsq[rs_cidx]; status = txr->tx_base[cur].wb.status; } while ((status & IXGBE_TXD_STAT_DD)); txr->tx_rs_cidx = rs_cidx; txr->tx_cidx_processed = prev; return (processed); } /* ixgbe_isc_txd_credits_update */ /************************************************************************ * ixgbe_isc_rxd_refill ************************************************************************/ static void ixgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct ixgbe_softc *sc = arg; struct ix_rx_queue *que = &sc->rx_queues[iru->iru_qsidx]; struct rx_ring *rxr = &que->rxr; uint64_t *paddrs; int i; uint32_t next_pidx, pidx; uint16_t count; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]); if (++next_pidx == sc->shared->isc_nrxd[0]) next_pidx = 0; } } /* ixgbe_isc_rxd_refill */ /************************************************************************ * ixgbe_isc_rxd_flush ************************************************************************/ static void ixgbe_isc_rxd_flush(void *arg, uint16_t qsidx, uint8_t flidx __unused, qidx_t pidx) { struct ixgbe_softc *sc = arg; struct ix_rx_queue *que = &sc->rx_queues[qsidx]; struct rx_ring *rxr = &que->rxr; IXGBE_WRITE_REG(&sc->hw, rxr->tail, pidx); } /* ixgbe_isc_rxd_flush */ /************************************************************************ * ixgbe_isc_rxd_available ************************************************************************/ static int ixgbe_isc_rxd_available(void *arg, uint16_t qsidx, qidx_t pidx, qidx_t budget) { struct ixgbe_softc *sc = arg; struct ix_rx_queue *que = &sc->rx_queues[qsidx]; struct rx_ring *rxr = &que->rxr; union ixgbe_adv_rx_desc *rxd; uint32_t staterr; int cnt, i, nrxd; nrxd = sc->shared->isc_nrxd[0]; for (cnt = 0, i = pidx; cnt < nrxd && cnt <= budget;) { rxd = &rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & IXGBE_RXD_STAT_DD) == 0) break; if (++i == nrxd) i = 0; if (staterr & IXGBE_RXD_STAT_EOP) cnt++; } return (cnt); } /* ixgbe_isc_rxd_available */ /************************************************************************ * ixgbe_isc_rxd_pkt_get * * Routine sends data which has been dma'ed into host memory * to upper layer. Initialize ri structure. * * Returns 0 upon success, errno on failure ************************************************************************/ static int ixgbe_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct ixgbe_softc *sc = arg; if_softc_ctx_t scctx = sc->shared; struct ix_rx_queue *que = &sc->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; union ixgbe_adv_rx_desc *rxd; uint16_t pkt_info, len, cidx, i; - uint16_t vtag = 0; uint32_t ptype; uint32_t staterr = 0; bool eop; i = 0; cidx = ri->iri_cidx; do { rxd = &rxr->rx_base[cidx]; staterr = le32toh(rxd->wb.upper.status_error); pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info); /* Error Checking then decrement count */ MPASS ((staterr & IXGBE_RXD_STAT_DD) != 0); len = le16toh(rxd->wb.upper.length); ptype = le32toh(rxd->wb.lower.lo_dword.data) & IXGBE_RXDADV_PKTTYPE_MASK; ri->iri_len += len; rxr->bytes += len; rxd->wb.upper.status_error = 0; eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); - if ( (rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP) ) { - vtag = le16toh(rxd->wb.upper.vlan); - } else { - vtag = 0; - } - /* Make sure bad packets are discarded */ if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { if (sc->feat_en & IXGBE_FEATURE_VF) if_inc_counter(ri->iri_ifp, IFCOUNTER_IERRORS, 1); rxr->rx_discarded++; return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; if (++cidx == sc->shared->isc_nrxd[0]) cidx = 0; i++; /* even a 16K packet shouldn't consume more than 8 clusters */ MPASS(i < 9); } while (!eop); rxr->rx_packets++; rxr->packets++; rxr->rx_bytes += ri->iri_len; if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0) ixgbe_rx_checksum(staterr, ri, ptype); ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); ri->iri_rsstype = ixgbe_determine_rsstype(pkt_info); if ((sc->feat_en & IXGBE_FEATURE_RSS) == 0) { if (ri->iri_rsstype == M_HASHTYPE_OPAQUE) ri->iri_rsstype = M_HASHTYPE_NONE; else ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH; } - ri->iri_vtag = vtag; - ri->iri_nfrags = i; - if (vtag) + if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP)) { + ri->iri_vtag = le16toh(rxd->wb.upper.vlan); ri->iri_flags |= M_VLANTAG; + } + + ri->iri_nfrags = i; return (0); } /* ixgbe_isc_rxd_pkt_get */ /************************************************************************ * ixgbe_rx_checksum * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. ************************************************************************/ static void ixgbe_rx_checksum(uint32_t staterr, if_rxd_info_t ri, uint32_t ptype) { uint16_t status = (uint16_t)staterr; uint8_t errors = (uint8_t)(staterr >> 24); /* If there is a layer 3 or 4 error we are done */ if (__predict_false(errors & (IXGBE_RXD_ERR_IPE | IXGBE_RXD_ERR_TCPE))) return; /* IP Checksum Good */ if (status & IXGBE_RXD_STAT_IPCS) ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); /* Valid L4E checksum */ if (__predict_true(status & IXGBE_RXD_STAT_L4CS)) { /* SCTP header present. */ if (__predict_false((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)) { ri->iri_csum_flags |= CSUM_SCTP_VALID; } else { ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; ri->iri_csum_data = htons(0xffff); } } } /* ixgbe_rx_checksum */ /************************************************************************ * ixgbe_determine_rsstype * * Parse the packet type to determine the appropriate hash ************************************************************************/ static int ixgbe_determine_rsstype(uint16_t pkt_info) { switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: return M_HASHTYPE_RSS_TCP_IPV4; case IXGBE_RXDADV_RSSTYPE_IPV4: return M_HASHTYPE_RSS_IPV4; case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: return M_HASHTYPE_RSS_TCP_IPV6; case IXGBE_RXDADV_RSSTYPE_IPV6_EX: return M_HASHTYPE_RSS_IPV6_EX; case IXGBE_RXDADV_RSSTYPE_IPV6: return M_HASHTYPE_RSS_IPV6; case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: return M_HASHTYPE_RSS_TCP_IPV6_EX; case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: return M_HASHTYPE_RSS_UDP_IPV4; case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: return M_HASHTYPE_RSS_UDP_IPV6; case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: return M_HASHTYPE_RSS_UDP_IPV6_EX; default: return M_HASHTYPE_OPAQUE; } } /* ixgbe_determine_rsstype */ diff --git a/sys/dev/ixl/ixl_txrx.c b/sys/dev/ixl/ixl_txrx.c index 58ae751e5e10..20050e73c09b 100644 --- a/sys/dev/ixl/ixl_txrx.c +++ b/sys/dev/ixl/ixl_txrx.c @@ -1,967 +1,964 @@ /****************************************************************************** Copyright (c) 2013-2018, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* ** IXL driver TX/RX Routines: ** This was seperated to allow usage by ** both the PF and VF drivers. */ #ifndef IXL_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #endif #include "ixl.h" #ifdef RSS #include #endif /* Local Prototypes */ static u8 ixl_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype); static int ixl_isc_txd_encap(void *arg, if_pkt_info_t pi); static void ixl_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); static int ixl_isc_txd_credits_update_hwb(void *arg, uint16_t txqid, bool clear); static int ixl_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear); static void ixl_isc_rxd_refill(void *arg, if_rxd_update_t iru); static void ixl_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx); static int ixl_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget); static int ixl_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); struct if_txrx ixl_txrx_hwb = { ixl_isc_txd_encap, ixl_isc_txd_flush, ixl_isc_txd_credits_update_hwb, ixl_isc_rxd_available, ixl_isc_rxd_pkt_get, ixl_isc_rxd_refill, ixl_isc_rxd_flush, NULL }; struct if_txrx ixl_txrx_dwb = { ixl_isc_txd_encap, ixl_isc_txd_flush, ixl_isc_txd_credits_update_dwb, ixl_isc_rxd_available, ixl_isc_rxd_pkt_get, ixl_isc_rxd_refill, ixl_isc_rxd_flush, NULL }; /* * @key key is saved into this parameter */ void ixl_get_default_rss_key(u32 *key) { MPASS(key != NULL); u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 0x183cfd8c, 0xce880440, 0x580cbc3c, 0x35897377, 0x328b25e1, 0x4fa98922, 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 0x0, 0x0, 0x0}; bcopy(rss_seed, key, IXL_RSS_KEY_SIZE); } /** * i40e_vc_stat_str - convert virtchnl status err code to a string * @hw: pointer to the HW structure * @stat_err: the status error code to convert **/ const char * i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err) { switch (stat_err) { case VIRTCHNL_STATUS_SUCCESS: return "OK"; case VIRTCHNL_ERR_PARAM: return "VIRTCHNL_ERR_PARAM"; case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH: return "VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH"; case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR: return "VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR"; case VIRTCHNL_STATUS_ERR_INVALID_VF_ID: return "VIRTCHNL_STATUS_ERR_INVALID_VF_ID"; case VIRTCHNL_STATUS_NOT_SUPPORTED: return "VIRTCHNL_STATUS_NOT_SUPPORTED"; } snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err); return hw->err_str; } void ixl_debug_core(device_t dev, u32 enabled_mask, u32 mask, char *fmt, ...) { va_list args; if (!(mask & enabled_mask)) return; /* Re-implement device_printf() */ device_print_prettyname(dev); va_start(args, fmt); vprintf(fmt, args); va_end(args); } static bool ixl_is_tx_desc_done(struct tx_ring *txr, int idx) { return (((txr->tx_base[idx].cmd_type_offset_bsz >> I40E_TXD_QW1_DTYPE_SHIFT) & I40E_TXD_QW1_DTYPE_MASK) == I40E_TX_DESC_DTYPE_DESC_DONE); } static int ixl_tso_detect_sparse(bus_dma_segment_t *segs, int nsegs, if_pkt_info_t pi) { int count, curseg, i, hlen, segsz, seglen, tsolen; if (nsegs <= IXL_MAX_TX_SEGS-2) return (0); segsz = pi->ipi_tso_segsz; curseg = count = 0; hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen; tsolen = pi->ipi_len - hlen; i = 0; curseg = segs[0].ds_len; while (hlen > 0) { count++; if (count > IXL_MAX_TX_SEGS - 2) return (1); if (curseg == 0) { i++; if (__predict_false(i == nsegs)) return (1); curseg = segs[i].ds_len; } seglen = min(curseg, hlen); curseg -= seglen; hlen -= seglen; // printf("H:seglen = %d, count=%d\n", seglen, count); } while (tsolen > 0) { segsz = pi->ipi_tso_segsz; while (segsz > 0 && tsolen != 0) { count++; if (count > IXL_MAX_TX_SEGS - 2) { // printf("bad: count = %d\n", count); return (1); } if (curseg == 0) { i++; if (__predict_false(i == nsegs)) { // printf("bad: tsolen = %d", tsolen); return (1); } curseg = segs[i].ds_len; } seglen = min(curseg, segsz); segsz -= seglen; curseg -= seglen; tsolen -= seglen; // printf("D:seglen = %d, count=%d\n", seglen, count); } count = 0; } return (0); } /********************************************************************* * * Setup descriptor for hw offloads * **********************************************************************/ static void ixl_tx_setup_offload(struct ixl_tx_queue *que, if_pkt_info_t pi, u32 *cmd, u32 *off) { switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: if (pi->ipi_csum_flags & IXL_CSUM_IPV4) *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; else *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; break; #endif default: break; } *off |= (pi->ipi_ehdrlen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; *off |= (pi->ipi_ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; switch (pi->ipi_ipproto) { case IPPROTO_TCP: if (pi->ipi_csum_flags & IXL_CSUM_TCP) { *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; *off |= (pi->ipi_tcp_hlen >> 2) << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; /* Check for NO_HEAD MDD event */ MPASS(pi->ipi_tcp_hlen != 0); } break; case IPPROTO_UDP: if (pi->ipi_csum_flags & IXL_CSUM_UDP) { *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; *off |= (sizeof(struct udphdr) >> 2) << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; } break; case IPPROTO_SCTP: if (pi->ipi_csum_flags & IXL_CSUM_SCTP) { *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; *off |= (sizeof(struct sctphdr) >> 2) << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; } /* Fall Thru */ default: break; } } /********************************************************************** * * Setup context for hardware segmentation offload (TSO) * **********************************************************************/ static int ixl_tso_setup(struct tx_ring *txr, if_pkt_info_t pi) { if_softc_ctx_t scctx; struct i40e_tx_context_desc *TXD; u32 cmd, mss, type, tsolen; int idx, total_hdr_len; u64 type_cmd_tso_mss; idx = pi->ipi_pidx; TXD = (struct i40e_tx_context_desc *) &txr->tx_base[idx]; total_hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen; tsolen = pi->ipi_len - total_hdr_len; scctx = txr->que->vsi->shared; type = I40E_TX_DESC_DTYPE_CONTEXT; cmd = I40E_TX_CTX_DESC_TSO; /* * TSO MSS must not be less than 64; this prevents a * BAD_LSO_MSS MDD event when the MSS is too small. */ if (pi->ipi_tso_segsz < IXL_MIN_TSO_MSS) { txr->mss_too_small++; pi->ipi_tso_segsz = IXL_MIN_TSO_MSS; } mss = pi->ipi_tso_segsz; /* Check for BAD_LS0_MSS MDD event (mss too large) */ MPASS(mss <= IXL_MAX_TSO_MSS); /* Check for NO_HEAD MDD event (header lengths are 0) */ MPASS(pi->ipi_ehdrlen != 0); MPASS(pi->ipi_ip_hlen != 0); /* Partial check for BAD_LSO_LEN MDD event */ MPASS(tsolen != 0); /* Partial check for WRONG_SIZE MDD event (during TSO) */ MPASS(total_hdr_len + mss <= IXL_MAX_FRAME); type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); TXD->tunneling_params = htole32(0); txr->que->tso++; return ((idx + 1) & (scctx->isc_ntxd[0]-1)); } /********************************************************************* * * This routine maps the mbufs to tx descriptors, allowing the * TX engine to transmit the packets. * - return 0 on success, positive on failure * **********************************************************************/ #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) static int ixl_isc_txd_encap(void *arg, if_pkt_info_t pi) { struct ixl_vsi *vsi = arg; if_softc_ctx_t scctx = vsi->shared; struct ixl_tx_queue *que = &vsi->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; int nsegs = pi->ipi_nsegs; bus_dma_segment_t *segs = pi->ipi_segs; struct i40e_tx_desc *txd = NULL; int i, j, mask, pidx_last; u32 cmd, off, tx_intr; cmd = off = 0; i = pi->ipi_pidx; tx_intr = (pi->ipi_flags & IPI_TX_INTR); /* Set up the TSO/CSUM offload */ if (pi->ipi_csum_flags & CSUM_OFFLOAD) { /* Set up the TSO context descriptor if required */ if (pi->ipi_csum_flags & CSUM_TSO) { /* Prevent MAX_BUFF MDD event (for TSO) */ if (ixl_tso_detect_sparse(segs, nsegs, pi)) return (EFBIG); i = ixl_tso_setup(txr, pi); } ixl_tx_setup_offload(que, pi, &cmd, &off); } if (pi->ipi_mflags & M_VLANTAG) cmd |= I40E_TX_DESC_CMD_IL2TAG1; cmd |= I40E_TX_DESC_CMD_ICRC; mask = scctx->isc_ntxd[0] - 1; /* Check for WRONG_SIZE MDD event */ MPASS(pi->ipi_len >= IXL_MIN_FRAME); #ifdef INVARIANTS if (!(pi->ipi_csum_flags & CSUM_TSO)) MPASS(pi->ipi_len <= IXL_MAX_FRAME); #endif for (j = 0; j < nsegs; j++) { bus_size_t seglen; txd = &txr->tx_base[i]; seglen = segs[j].ds_len; /* Check for ZERO_BSIZE MDD event */ MPASS(seglen != 0); txd->buffer_addr = htole64(segs[j].ds_addr); txd->cmd_type_offset_bsz = htole64(I40E_TX_DESC_DTYPE_DATA | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | ((u64)htole16(pi->ipi_vtag) << I40E_TXD_QW1_L2TAG1_SHIFT)); txr->tx_bytes += seglen; pidx_last = i; i = (i+1) & mask; } /* Set the last descriptor for report */ txd->cmd_type_offset_bsz |= htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); /* Add to report status array (if using TX interrupts) */ if (!vsi->enable_head_writeback && tx_intr) { txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & mask; MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx); } pi->ipi_new_pidx = i; ++txr->tx_packets; return (0); } static void ixl_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) { struct ixl_vsi *vsi = arg; struct tx_ring *txr = &vsi->tx_queues[txqid].txr; /* * Advance the Transmit Descriptor Tail (Tdt), this tells the * hardware that this frame is available to transmit. */ /* Check for ENDLESS_TX MDD event */ MPASS(pidx < vsi->shared->isc_ntxd[0]); wr32(vsi->hw, txr->tail, pidx); } /********************************************************************* * * (Re)Initialize a queue transmit ring by clearing its memory. * **********************************************************************/ void ixl_init_tx_ring(struct ixl_vsi *vsi, struct ixl_tx_queue *que) { struct tx_ring *txr = &que->txr; /* Clear the old ring contents */ bzero((void *)txr->tx_base, (sizeof(struct i40e_tx_desc)) * (vsi->shared->isc_ntxd[0] + (vsi->enable_head_writeback ? 1 : 0))); wr32(vsi->hw, txr->tail, 0); } /* * ixl_get_tx_head - Retrieve the value from the * location the HW records its HEAD index */ static inline u32 ixl_get_tx_head(struct ixl_tx_queue *que) { if_softc_ctx_t scctx = que->vsi->shared; struct tx_ring *txr = &que->txr; void *head = &txr->tx_base[scctx->isc_ntxd[0]]; return LE32_TO_CPU(*(volatile __le32 *)head); } static int ixl_isc_txd_credits_update_hwb(void *arg, uint16_t qid, bool clear) { struct ixl_vsi *vsi = arg; if_softc_ctx_t scctx = vsi->shared; struct ixl_tx_queue *que = &vsi->tx_queues[qid]; struct tx_ring *txr = &que->txr; int head, credits; /* Get the Head WB value */ head = ixl_get_tx_head(que); credits = head - txr->tx_cidx_processed; if (credits < 0) credits += scctx->isc_ntxd[0]; if (clear) txr->tx_cidx_processed = head; return (credits); } static int ixl_isc_txd_credits_update_dwb(void *arg, uint16_t txqid, bool clear) { struct ixl_vsi *vsi = arg; struct ixl_tx_queue *tx_que = &vsi->tx_queues[txqid]; if_softc_ctx_t scctx = vsi->shared; struct tx_ring *txr = &tx_que->txr; qidx_t processed = 0; qidx_t cur, prev, ntxd, rs_cidx; int32_t delta; bool is_done; rs_cidx = txr->tx_rs_cidx; #if 0 device_printf(iflib_get_dev(vsi->ctx), "%s: (q%d) rs_cidx %d, txr->tx_rs_pidx %d\n", __func__, txr->me, rs_cidx, txr->tx_rs_pidx); #endif if (rs_cidx == txr->tx_rs_pidx) return (0); cur = txr->tx_rsq[rs_cidx]; MPASS(cur != QIDX_INVALID); is_done = ixl_is_tx_desc_done(txr, cur); if (!is_done) return (0); /* If clear is false just let caller know that there * are descriptors to reclaim */ if (!clear) return (1); prev = txr->tx_cidx_processed; ntxd = scctx->isc_ntxd[0]; do { MPASS(prev != cur); delta = (int32_t)cur - (int32_t)prev; if (delta < 0) delta += ntxd; MPASS(delta > 0); #if 0 device_printf(iflib_get_dev(vsi->ctx), "%s: (q%d) cidx_processed=%u cur=%u clear=%d delta=%d\n", __func__, txr->me, prev, cur, clear, delta); #endif processed += delta; prev = cur; rs_cidx = (rs_cidx + 1) & (ntxd-1); if (rs_cidx == txr->tx_rs_pidx) break; cur = txr->tx_rsq[rs_cidx]; MPASS(cur != QIDX_INVALID); is_done = ixl_is_tx_desc_done(txr, cur); } while (is_done); txr->tx_rs_cidx = rs_cidx; txr->tx_cidx_processed = prev; #if 0 device_printf(iflib_get_dev(vsi->ctx), "%s: (q%d) processed %d\n", __func__, txr->me, processed); #endif return (processed); } static void ixl_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct ixl_vsi *vsi = arg; if_softc_ctx_t scctx = vsi->shared; struct rx_ring *rxr = &((vsi->rx_queues[iru->iru_qsidx]).rxr); uint64_t *paddrs; uint32_t next_pidx, pidx; uint16_t count; int i; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]); if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void ixl_isc_rxd_flush(void * arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx) { struct ixl_vsi *vsi = arg; struct rx_ring *rxr = &vsi->rx_queues[rxqid].rxr; wr32(vsi->hw, rxr->tail, pidx); } static int ixl_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct ixl_vsi *vsi = arg; struct rx_ring *rxr = &vsi->rx_queues[rxqid].rxr; union i40e_rx_desc *rxd; u64 qword; uint32_t status; int cnt, i, nrxd; nrxd = vsi->shared->isc_nrxd[0]; for (cnt = 0, i = idx; cnt < nrxd - 1 && cnt <= budget;) { rxd = &rxr->rx_base[i]; qword = le64toh(rxd->wb.qword1.status_error_len); status = (qword & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT; if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) break; if (++i == nrxd) i = 0; if (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)) cnt++; } return (cnt); } /* ** i40e_ptype_to_hash: parse the packet type ** to determine the appropriate hash. */ static inline int ixl_ptype_to_hash(u8 ptype) { struct i40e_rx_ptype_decoded decoded; decoded = decode_rx_desc_ptype(ptype); if (!decoded.known) return M_HASHTYPE_OPAQUE; if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) return M_HASHTYPE_OPAQUE; /* Note: anything that gets to this point is IP */ if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { switch (decoded.inner_prot) { case I40E_RX_PTYPE_INNER_PROT_TCP: return M_HASHTYPE_RSS_TCP_IPV6; case I40E_RX_PTYPE_INNER_PROT_UDP: return M_HASHTYPE_RSS_UDP_IPV6; default: return M_HASHTYPE_RSS_IPV6; } } if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { switch (decoded.inner_prot) { case I40E_RX_PTYPE_INNER_PROT_TCP: return M_HASHTYPE_RSS_TCP_IPV4; case I40E_RX_PTYPE_INNER_PROT_UDP: return M_HASHTYPE_RSS_UDP_IPV4; default: return M_HASHTYPE_RSS_IPV4; } } /* We should never get here!! */ return M_HASHTYPE_OPAQUE; } /********************************************************************* * * This routine executes in ithread context. It sends data which has been * dma'ed into host memory to upper layer. * * Returns 0 upon success, errno on failure * *********************************************************************/ static int ixl_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct ixl_vsi *vsi = arg; if_softc_ctx_t scctx = vsi->shared; struct ixl_rx_queue *que = &vsi->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; union i40e_rx_desc *cur; u32 status, error; - u16 plen, vtag; + u16 plen; u64 qword; u8 ptype; bool eop; int i, cidx; cidx = ri->iri_cidx; i = 0; do { /* 5 descriptor receive limit */ MPASS(i < IXL_MAX_RX_SEGS); cur = &rxr->rx_base[cidx]; qword = le64toh(cur->wb.qword1.status_error_len); status = (qword & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT; error = (qword & I40E_RXD_QW1_ERROR_MASK) >> I40E_RXD_QW1_ERROR_SHIFT; plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; /* we should never be called without a valid descriptor */ MPASS((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) != 0); ri->iri_len += plen; rxr->rx_bytes += plen; cur->wb.qword1.status_error_len = 0; eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); - if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) - vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); - else - vtag = 0; /* ** Make sure bad packets are discarded, ** note that only EOP descriptor has valid ** error results. */ if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { rxr->desc_errs++; return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = plen; if (++cidx == vsi->shared->isc_nrxd[0]) cidx = 0; i++; } while (!eop); /* capture data for dynamic ITR adjustment */ rxr->packets++; rxr->rx_packets++; if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0) rxr->csum_errs += ixl_rx_checksum(ri, status, error, ptype); ri->iri_flowid = le32toh(cur->wb.qword0.hi_dword.rss); ri->iri_rsstype = ixl_ptype_to_hash(ptype); - ri->iri_vtag = vtag; - ri->iri_nfrags = i; - if (vtag) + if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) { + ri->iri_vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); ri->iri_flags |= M_VLANTAG; + } + ri->iri_nfrags = i; return (0); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static u8 ixl_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype) { struct i40e_rx_ptype_decoded decoded; ri->iri_csum_flags = 0; /* No L3 or L4 checksum was calculated */ if (!(status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT))) return (0); decoded = decode_rx_desc_ptype(ptype); /* IPv6 with extension headers likely have bad csum */ if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { if (status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { ri->iri_csum_flags = 0; return (1); } } ri->iri_csum_flags |= CSUM_L3_CALC; /* IPv4 checksum error */ if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT)) return (1); ri->iri_csum_flags |= CSUM_L3_VALID; ri->iri_csum_flags |= CSUM_L4_CALC; /* L4 checksum error */ if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)) return (1); ri->iri_csum_flags |= CSUM_L4_VALID; ri->iri_csum_data |= htons(0xffff); return (0); } /* Set Report Status queue fields to 0 */ void ixl_init_tx_rsqs(struct ixl_vsi *vsi) { if_softc_ctx_t scctx = vsi->shared; struct ixl_tx_queue *tx_que; int i, j; for (i = 0, tx_que = vsi->tx_queues; i < vsi->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; txr->tx_rs_cidx = txr->tx_rs_pidx; /* Initialize the last processed descriptor to be the end of * the ring, rather than the start, so that we avoid an * off-by-one error when calculating how many descriptors are * done in the credits_update function. */ txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1; for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; } } void ixl_init_tx_cidx(struct ixl_vsi *vsi) { if_softc_ctx_t scctx = vsi->shared; struct ixl_tx_queue *tx_que; int i; for (i = 0, tx_que = vsi->tx_queues; i < vsi->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1; } } /* * Input: bitmap of enum virtchnl_link_speed */ u64 ixl_max_vc_speed_to_value(u8 link_speeds) { if (link_speeds & VIRTCHNL_LINK_SPEED_40GB) return IF_Gbps(40); if (link_speeds & VIRTCHNL_LINK_SPEED_25GB) return IF_Gbps(25); if (link_speeds & VIRTCHNL_LINK_SPEED_20GB) return IF_Gbps(20); if (link_speeds & VIRTCHNL_LINK_SPEED_10GB) return IF_Gbps(10); if (link_speeds & VIRTCHNL_LINK_SPEED_1GB) return IF_Gbps(1); if (link_speeds & VIRTCHNL_LINK_SPEED_100MB) return IF_Mbps(100); else /* Minimum supported link speed */ return IF_Mbps(100); } void ixl_add_vsi_sysctls(device_t dev, struct ixl_vsi *vsi, struct sysctl_ctx_list *ctx, const char *sysctl_name) { struct sysctl_oid *tree; struct sysctl_oid_list *child; struct sysctl_oid_list *vsi_list; tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); vsi->vsi_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, sysctl_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "VSI Number"); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); ixl_add_sysctls_eth_stats(ctx, vsi_list, &vsi->eth_stats); } void ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_eth_stats *eth_stats) { struct ixl_sysctl_info ctls[] = { {ð_stats->rx_bytes, "good_octets_rcvd", "Good Octets Received"}, {ð_stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received"}, {ð_stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received"}, {ð_stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received"}, {ð_stats->rx_discards, "rx_discards", "Discarded RX packets"}, {ð_stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted"}, {ð_stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"}, {ð_stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted"}, {ð_stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted"}, // end {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } void ixl_vsi_add_queues_stats(struct ixl_vsi *vsi, struct sysctl_ctx_list *ctx) { struct sysctl_oid_list *vsi_list, *queue_list; struct sysctl_oid *queue_node; char queue_namebuf[IXL_QUEUE_NAME_LEN]; struct ixl_rx_queue *rx_que; struct ixl_tx_queue *tx_que; struct tx_ring *txr; struct rx_ring *rxr; vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); /* Queue statistics */ for (int q = 0; q < vsi->num_rx_queues; q++) { bzero(queue_namebuf, sizeof(queue_namebuf)); snprintf(queue_namebuf, sizeof(queue_namebuf), "rxq%02d", q); queue_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, queue_namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX Queue #"); queue_list = SYSCTL_CHILDREN(queue_node); rx_que = &(vsi->rx_queues[q]); rxr = &(rx_que->rxr); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(rx_que->irqs), "irqs on this queue (both Tx and Rx)"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "packets", CTLFLAG_RD, &(rxr->rx_packets), "Queue Packets Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "bytes", CTLFLAG_RD, &(rxr->rx_bytes), "Queue Bytes Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "desc_err", CTLFLAG_RD, &(rxr->desc_errs), "Queue Rx Descriptor Errors"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "itr", CTLFLAG_RD, &(rxr->itr), 0, "Queue Rx ITR Interval"); } for (int q = 0; q < vsi->num_tx_queues; q++) { bzero(queue_namebuf, sizeof(queue_namebuf)); snprintf(queue_namebuf, sizeof(queue_namebuf), "txq%02d", q); queue_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, queue_namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX Queue #"); queue_list = SYSCTL_CHILDREN(queue_node); tx_que = &(vsi->tx_queues[q]); txr = &(tx_que->txr); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso", CTLFLAG_RD, &(tx_que->tso), "TSO"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mss_too_small", CTLFLAG_RD, &(txr->mss_too_small), "TSO sends with an MSS less than 64"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "packets", CTLFLAG_RD, &(txr->tx_packets), "Queue Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "bytes", CTLFLAG_RD, &(txr->tx_bytes), "Queue Bytes Transmitted"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "itr", CTLFLAG_RD, &(txr->itr), 0, "Queue Tx ITR Interval"); } }