Index: sys/dev/bnxt/bnxt.h =================================================================== --- sys/dev/bnxt/bnxt.h +++ sys/dev/bnxt/bnxt.h @@ -438,6 +438,7 @@ uint32_t ring_size; /* Must be a power of two */ uint16_t id; /* Logical ID */ uint16_t phys_id; + struct bnxt_full_tpa_start *tpa_start; }; struct bnxt_cp_ring { @@ -564,7 +565,6 @@ struct sysctl_ctx_list hw_stats; struct sysctl_oid *hw_stats_oid; - struct bnxt_full_tpa_start *tpa_start; struct bnxt_ver_info *ver_info; struct bnxt_nvram_info *nvm_info; bool wol; Index: sys/dev/bnxt/bnxt_hwrm.c =================================================================== --- sys/dev/bnxt/bnxt_hwrm.c +++ sys/dev/bnxt/bnxt_hwrm.c @@ -935,7 +935,7 @@ /* TODO: Calculate this based on ring size? */ req.max_agg_segs = htole16(3); /* Base this in the allocated TPA start size... */ - req.max_aggs = htole16(2); + req.max_aggs = htole16(7); /* * TODO: max_agg_timer? * req.mag_agg_timer = htole32(XXX); Index: sys/dev/bnxt/bnxt_txrx.c =================================================================== --- sys/dev/bnxt/bnxt_txrx.c +++ sys/dev/bnxt/bnxt_txrx.c @@ -264,6 +264,7 @@ uint8_t flid; uint64_t *paddrs; caddr_t *vaddrs; + qidx_t *frag_idxs; rxqid = iru->iru_qsidx; count = iru->iru_count; @@ -272,6 +273,7 @@ flid = iru->iru_flidx; vaddrs = iru->iru_vaddrs; paddrs = iru->iru_paddrs; + frag_idxs = iru->iru_idxs; if (flid == 0) { rx_ring = &softc->rx_rings[rxqid]; @@ -287,8 +289,8 @@ rxbd[pidx].flags_type = htole16(type); rxbd[pidx].len = htole16(len); /* No need to byte-swap the opaque value */ - rxbd[pidx].opaque = ((rxqid & 0xff) << 24) | (flid << 16) - | pidx; + rxbd[pidx].opaque = (((rxqid & 0xff) << 24) | (flid << 16) + | (frag_idxs[i])); rxbd[pidx].addr = htole64(paddrs[i]); if (++pidx == rx_ring->ring_size) pidx = 0; @@ -329,7 +331,6 @@ struct bnxt_softc *softc = (struct bnxt_softc *)sc; struct bnxt_cp_ring *cpr = &softc->rx_cp_rings[rxqid]; struct rx_pkt_cmpl *rcp; - struct rx_tpa_start_cmpl *rtpa; struct rx_tpa_end_cmpl *rtpae; struct cmpl_base *cmp = (struct cmpl_base *)cpr->ring.vaddr; int avail = 0; @@ -338,7 +339,6 @@ uint8_t ags; int i; uint16_t type; - uint8_t agg_id; for (;;) { NEXT_CP_CONS_V(&cpr->ring, cons, v_bit); @@ -388,18 +388,11 @@ avail++; break; case CMPL_BASE_TYPE_RX_TPA_START: - rtpa = (void *)&cmp[cons]; - agg_id = (rtpa->agg_id & - RX_TPA_START_CMPL_AGG_ID_MASK) >> - RX_TPA_START_CMPL_AGG_ID_SFT; - softc->tpa_start[agg_id].low = *rtpa; NEXT_CP_CONS_V(&cpr->ring, cons, v_bit); CMPL_PREFETCH_NEXT(cpr, cons); if (!CMP_VALID(&cmp[cons], v_bit)) goto cmpl_invalid; - softc->tpa_start[agg_id].high = - ((struct rx_tpa_start_cmpl_hi *)cmp)[cons]; break; case CMPL_BASE_TYPE_RX_AGG: break; @@ -549,7 +542,7 @@ /* Get the agg_id */ agg_id = (agend->agg_id & RX_TPA_END_CMPL_AGG_ID_MASK) >> RX_TPA_END_CMPL_AGG_ID_SFT; - tpas = &softc->tpa_start[agg_id]; + tpas = &(softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id]); /* Extract from the first 16-byte BD */ if (le16toh(tpas->low.flags_type) & RX_TPA_START_CMPL_FLAGS_RSS_VALID) { @@ -563,8 +556,8 @@ RX_TPA_END_CMPL_AGG_BUFS_SFT; ri->iri_nfrags = ags + 1; /* No need to byte-swap the opaque value */ - ri->iri_frags[0].irf_flid = (tpas->low.opaque >> 16) & 0xff; - ri->iri_frags[0].irf_idx = tpas->low.opaque & 0xffff; + ri->iri_frags[0].irf_flid = ((tpas->low.opaque >> 16) & 0xff); + ri->iri_frags[0].irf_idx = (tpas->low.opaque & 0xffff); ri->iri_frags[0].irf_len = le16toh(tpas->low.len); ri->iri_len = le16toh(tpas->low.len); @@ -600,8 +593,8 @@ acp = &((struct rx_abuf_cmpl *)cpr->ring.vaddr)[cpr->cons]; /* No need to byte-swap the opaque value */ - ri->iri_frags[i].irf_flid = (acp->opaque >> 16) & 0xff; - ri->iri_frags[i].irf_idx = acp->opaque & 0xffff; + ri->iri_frags[i].irf_flid = ((acp->opaque >> 16) & 0xff); + ri->iri_frags[i].irf_idx = (acp->opaque & 0xffff); ri->iri_frags[i].irf_len = le16toh(acp->len); ri->iri_len += le16toh(acp->len); } @@ -609,8 +602,8 @@ /* And finally, the empty BD at the end... */ ri->iri_nfrags++; /* No need to byte-swap the opaque value */ - ri->iri_frags[i].irf_flid = (agend->opaque >> 16) % 0xff; - ri->iri_frags[i].irf_idx = agend->opaque & 0xffff; + ri->iri_frags[i].irf_flid = ((agend->opaque >> 16) & 0xff); + ri->iri_frags[i].irf_idx = (agend->opaque & 0xffff); ri->iri_frags[i].irf_len = le16toh(agend->len); ri->iri_len += le16toh(agend->len); @@ -623,9 +616,12 @@ { struct bnxt_softc *softc = (struct bnxt_softc *)sc; struct bnxt_cp_ring *cpr = &softc->rx_cp_rings[ri->iri_qsidx]; + struct cmpl_base *cmp_q = (struct cmpl_base *)cpr->ring.vaddr; struct cmpl_base *cmp; + struct rx_tpa_start_cmpl *rtpa; uint16_t flags_type; uint16_t type; + uint8_t agg_id; for (;;) { NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit); @@ -642,9 +638,18 @@ case CMPL_BASE_TYPE_RX_TPA_END: return bnxt_pkt_get_tpa(softc, ri, cpr, flags_type); case CMPL_BASE_TYPE_RX_TPA_START: + rtpa = (void *)&cmp_q[cpr->cons]; + agg_id = (rtpa->agg_id & + RX_TPA_START_CMPL_AGG_ID_MASK) >> + RX_TPA_START_CMPL_AGG_ID_SFT; + softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id].low = *rtpa; + NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit); ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx); CMPL_PREFETCH_NEXT(cpr, cpr->cons); + + softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id].high = + ((struct rx_tpa_start_cmpl_hi *)cmp_q)[cpr->cons]; break; default: device_printf(softc->dev, Index: sys/dev/bnxt/if_bnxt.c =================================================================== --- sys/dev/bnxt/if_bnxt.c +++ sys/dev/bnxt/if_bnxt.c @@ -506,6 +506,17 @@ softc->rx_rings[i].vaddr = vaddrs[i * nrxqs + 1]; softc->rx_rings[i].paddr = paddrs[i * nrxqs + 1]; + /* Allocate the TPA start buffer */ + softc->rx_rings[i].tpa_start = malloc(sizeof(struct bnxt_full_tpa_start) * + (RX_TPA_START_CMPL_AGG_ID_MASK >> RX_TPA_START_CMPL_AGG_ID_SFT), + M_DEVBUF, M_NOWAIT | M_ZERO); + if (softc->rx_rings[i].tpa_start == NULL) { + rc = -ENOMEM; + device_printf(softc->dev, + "Unable to allocate space for TPA\n"); + goto tpa_alloc_fail; + } + /* Allocate the AG ring */ softc->ag_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->ag_rings[i].softc = softc; @@ -571,7 +582,10 @@ iflib_dma_free(&softc->vnic_info.rss_hash_key_tbl); rss_hash_alloc_fail: iflib_dma_free(&softc->vnic_info.mc_list); +tpa_alloc_fail: mc_list_alloc_fail: + for (i = i - 1; i >= 0; i--) + free(softc->rx_rings[i].tpa_start, M_DEVBUF); iflib_dma_free(&softc->rx_stats); hw_stats_alloc_fail: free(softc->grp_info, M_DEVBUF); @@ -635,16 +649,6 @@ if (rc) goto dma_fail; - /* Allocate the TPA start buffer */ - softc->tpa_start = malloc(sizeof(struct bnxt_full_tpa_start) * - (RX_TPA_START_CMPL_AGG_ID_MASK >> RX_TPA_START_CMPL_AGG_ID_SFT), - M_DEVBUF, M_NOWAIT | M_ZERO); - if (softc->tpa_start == NULL) { - rc = ENOMEM; - device_printf(softc->dev, - "Unable to allocate space for TPA\n"); - goto tpa_failed; - } /* Get firmware version and compare with driver */ softc->ver_info = malloc(sizeof(struct bnxt_ver_info), @@ -814,8 +818,6 @@ ver_fail: free(softc->ver_info, M_DEVBUF); ver_alloc_fail: - free(softc->tpa_start, M_DEVBUF); -tpa_failed: bnxt_free_hwrm_dma_mem(softc); dma_fail: BNXT_HWRM_LOCK_DESTROY(softc); @@ -877,7 +879,8 @@ SLIST_FOREACH_SAFE(tag, &softc->vnic_info.vlan_tags, next, tmp) free(tag, M_DEVBUF); iflib_dma_free(&softc->def_cp_ring_mem); - free(softc->tpa_start, M_DEVBUF); + for (i = 0; i < softc->nrxqsets; i++) + free(softc->rx_rings[i].tpa_start, M_DEVBUF); free(softc->ver_info, M_DEVBUF); free(softc->nvm_info, M_DEVBUF); @@ -1009,14 +1012,17 @@ if (rc) goto fail; -#ifdef notyet - /* Enable LRO/TPA/GRO */ + /* + * Enable LRO/TPA/GRO + * TBD: + * Enable / Disable HW_LRO based on + * ifconfig lro / ifconfig -lro setting + */ rc = bnxt_hwrm_vnic_tpa_cfg(softc, &softc->vnic_info, (if_getcapenable(iflib_get_ifp(ctx)) & IFCAP_LRO) ? HWRM_VNIC_TPA_CFG_INPUT_FLAGS_TPA : 0); if (rc) goto fail; -#endif for (i = 0; i < softc->ntxqsets; i++) { /* Allocate the statistics context */ Index: sys/net/iflib.c =================================================================== --- sys/net/iflib.c +++ sys/net/iflib.c @@ -93,6 +93,7 @@ #include #endif +#include /* * enable accounting of every mbuf as it comes in to and goes out of * iflib's software descriptor references @@ -381,6 +382,8 @@ #endif /* implicit pad */ + bitstr_t *ifl_rx_bitmap;; + qidx_t ifl_fragidx; /* constant */ qidx_t ifl_size; uint16_t ifl_buf_size; @@ -1797,7 +1800,7 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) { struct mbuf *m; - int idx, pidx = fl->ifl_pidx; + int idx, frag_idx = fl->ifl_fragidx, pidx = fl->ifl_pidx; caddr_t cl, *sd_cl; struct mbuf **sd_m; uint8_t *sd_flags; @@ -1840,8 +1843,11 @@ * * If the cluster is still set then we know a minimum sized packet was received */ - if ((cl = sd_cl[idx]) == NULL) { - if ((cl = sd_cl[idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) + bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx); + if ((frag_idx < 0) || (frag_idx >= fl->ifl_size)) + bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); + if ((cl = sd_cl[frag_idx]) == NULL) { + if ((cl = sd_cl[frag_idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) break; #if MEMORY_LOGGING fl->ifl_cl_enqueued++; @@ -1867,10 +1873,11 @@ cb_arg.error = 0; q = fl->ifl_rxq; MPASS(sd_map != NULL); - MPASS(sd_map[idx] != NULL); - err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[idx], + MPASS(sd_map[frag_idx] != NULL); + err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[frag_idx], cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0); - bus_dmamap_sync(fl->ifl_desc_tag, sd_map[idx], BUS_DMASYNC_PREREAD); + bus_dmamap_sync(fl->ifl_desc_tag, sd_map[frag_idx], + BUS_DMASYNC_PREREAD); if (err != 0 || cb_arg.error) { /* @@ -1884,12 +1891,13 @@ } bus_addr = cb_arg.seg.ds_addr; } - sd_flags[idx] |= RX_SW_DESC_INUSE; + bit_set(fl->ifl_rx_bitmap, frag_idx); + sd_flags[frag_idx] |= RX_SW_DESC_INUSE; - MPASS(sd_m[idx] == NULL); - sd_cl[idx] = cl; - sd_m[idx] = m; - fl->ifl_rxd_idxs[i] = idx; + MPASS(sd_m[frag_idx] == NULL); + sd_cl[frag_idx] = cl; + sd_m[frag_idx] = m; + fl->ifl_rxd_idxs[i] = frag_idx; fl->ifl_bus_addrs[i] = bus_addr; fl->ifl_vm_addrs[i] = cl; fl->ifl_credits++; @@ -1905,8 +1913,8 @@ ctx->isc_rxd_refill(ctx->ifc_softc, &iru); i = 0; pidx = idx; + fl->ifl_pidx = idx; } - fl->ifl_pidx = idx; } done: @@ -1920,6 +1928,7 @@ bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx); + fl->ifl_fragidx = frag_idx; } static __inline void @@ -1983,7 +1992,7 @@ /* * Reset free list values */ - fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = 0;; + fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; bzero(idi->idi_vaddr, idi->idi_size); } @@ -1999,6 +2008,7 @@ if_ctx_t ctx = rxq->ifr_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; + bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size); /* ** Free current RX buffer structs and their mbufs */ @@ -2348,6 +2358,7 @@ if (map != NULL) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + bit_clear(fl->ifl_rx_bitmap, cidx); } static struct mbuf * @@ -4243,8 +4254,9 @@ iflib_txq_t txq; iflib_rxq_t rxq; device_t dev = ctx->ifc_dev; - int i; + int i, j; struct taskqgroup *tqg; + iflib_fl_t fl; /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { @@ -4279,6 +4291,10 @@ for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { if (rxq->ifr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &rxq->ifr_task); + + for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) + free(fl->ifl_rx_bitmap, M_IFLIB); + } tqg = qgroup_if_config_tqg; if (ctx->ifc_admin_task.gt_uniq != NULL) @@ -4672,6 +4688,9 @@ err = ENOMEM; goto err_rx_desc; } + + for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) + fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK|M_ZERO); } /* TXQs */