Page MenuHomeFreeBSD

D15142.id41688.diff
No OneTemporary

D15142.id41688.diff

Index: sys/dev/bnxt/bnxt.h
===================================================================
--- sys/dev/bnxt/bnxt.h
+++ sys/dev/bnxt/bnxt.h
@@ -203,6 +203,8 @@
/* Chip info */
#define BNXT_TSO_SIZE UINT16_MAX
+#define BNXT_MIN_FRAME_SIZE 52 /* Frames must be padded to this size for some A0 chips */
+
/* NVRAM access */
enum bnxt_nvm_directory_type {
BNX_DIR_TYPE_UNUSED = 0,
@@ -427,6 +429,7 @@
uint32_t ring_size; /* Must be a power of two */
uint16_t id; /* Logical ID */
uint16_t phys_id;
+ struct bnxt_full_tpa_start *tpa_start;
};
struct bnxt_cp_ring {
@@ -552,7 +555,6 @@
struct sysctl_ctx_list hw_stats;
struct sysctl_oid *hw_stats_oid;
- struct bnxt_full_tpa_start *tpa_start;
struct bnxt_ver_info *ver_info;
struct bnxt_nvram_info *nvm_info;
};
Index: sys/dev/bnxt/bnxt_hwrm.c
===================================================================
--- sys/dev/bnxt/bnxt_hwrm.c
+++ sys/dev/bnxt/bnxt_hwrm.c
@@ -931,7 +931,7 @@
/* TODO: Calculate this based on ring size? */
req.max_agg_segs = htole16(3);
/* Base this in the allocated TPA start size... */
- req.max_aggs = htole16(2);
+ req.max_aggs = htole16(7);
/*
* TODO: max_agg_timer?
* req.mag_agg_timer = htole32(XXX);
Index: sys/dev/bnxt/bnxt_txrx.c
===================================================================
--- sys/dev/bnxt/bnxt_txrx.c
+++ sys/dev/bnxt/bnxt_txrx.c
@@ -48,17 +48,19 @@
*/
static int bnxt_isc_txd_encap(void *sc, if_pkt_info_t pi);
-static void bnxt_isc_txd_flush(void *sc, uint16_t txqid, uint32_t pidx);
-static int bnxt_isc_txd_credits_update(void *sc, uint16_t txqid, uint32_t cidx,
- bool clear);
+static void bnxt_isc_txd_flush(void *sc, uint16_t txqid, qidx_t pidx);
+static int bnxt_isc_txd_credits_update(void *sc, uint16_t txqid, bool clear);
-static void bnxt_isc_rxd_refill(void *sc, uint16_t rxqid, uint8_t flid,
+static void bnxt_isc_rxd_refill(void *sc, if_rxd_update_t iru);
+
+/* uint16_t rxqid, uint8_t flid,
uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs, uint16_t count,
uint16_t buf_size);
+*/
static void bnxt_isc_rxd_flush(void *sc, uint16_t rxqid, uint8_t flid,
- uint32_t pidx);
-static int bnxt_isc_rxd_available(void *sc, uint16_t rxqid, uint32_t idx,
- int budget);
+ qidx_t pidx);
+static int bnxt_isc_rxd_available(void *sc, uint16_t rxqid, qidx_t idx,
+ qidx_t budget);
static int bnxt_isc_rxd_pkt_get(void *sc, if_rxd_info_t ri);
static int bnxt_intr(void *sc);
@@ -172,7 +174,7 @@
}
static void
-bnxt_isc_txd_flush(void *sc, uint16_t txqid, uint32_t pidx)
+bnxt_isc_txd_flush(void *sc, uint16_t txqid, qidx_t pidx)
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_ring *tx_ring = &softc->tx_rings[txqid];
@@ -185,7 +187,7 @@
}
static int
-bnxt_isc_txd_credits_update(void *sc, uint16_t txqid, uint32_t idx, bool clear)
+bnxt_isc_txd_credits_update(void *sc, uint16_t txqid, bool clear)
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_cp_ring *cpr = &softc->tx_cp_rings[txqid];
@@ -249,16 +251,30 @@
}
static void
-bnxt_isc_rxd_refill(void *sc, uint16_t rxqid, uint8_t flid,
- uint32_t pidx, uint64_t *paddrs,
- caddr_t *vaddrs, uint16_t count, uint16_t len)
+bnxt_isc_rxd_refill(void *sc, if_rxd_update_t iru)
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_ring *rx_ring;
struct rx_prod_pkt_bd *rxbd;
uint16_t type;
uint16_t i;
+ uint16_t rxqid;
+ uint16_t count, len;
+ uint32_t pidx;
+ uint8_t flid;
+ uint64_t *paddrs;
+ caddr_t *vaddrs;
+ qidx_t *frag_idxs;
+ rxqid = iru->iru_qsidx;
+ count = iru->iru_count;
+ len = iru->iru_buf_size;
+ pidx = iru->iru_pidx;
+ flid = iru->iru_flidx;
+ vaddrs = iru->iru_vaddrs;
+ paddrs = iru->iru_paddrs;
+ frag_idxs = iru->iru_idxs;
+
if (flid == 0) {
rx_ring = &softc->rx_rings[rxqid];
type = RX_PROD_PKT_BD_TYPE_RX_PROD_PKT;
@@ -273,8 +289,8 @@
rxbd[pidx].flags_type = htole16(type);
rxbd[pidx].len = htole16(len);
/* No need to byte-swap the opaque value */
- rxbd[pidx].opaque = ((rxqid & 0xff) << 24) | (flid << 16)
- | pidx;
+ rxbd[pidx].opaque = (((rxqid & 0xff) << 24) | (flid << 16)
+ | (frag_idxs[i]));
rxbd[pidx].addr = htole64(paddrs[i]);
if (++pidx == rx_ring->ring_size)
pidx = 0;
@@ -284,7 +300,7 @@
static void
bnxt_isc_rxd_flush(void *sc, uint16_t rxqid, uint8_t flid,
- uint32_t pidx)
+ qidx_t pidx)
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_ring *rx_ring;
@@ -310,12 +326,11 @@
}
static int
-bnxt_isc_rxd_available(void *sc, uint16_t rxqid, uint32_t idx, int budget)
+bnxt_isc_rxd_available(void *sc, uint16_t rxqid, qidx_t idx, qidx_t budget)
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_cp_ring *cpr = &softc->rx_cp_rings[rxqid];
struct rx_pkt_cmpl *rcp;
- struct rx_tpa_start_cmpl *rtpa;
struct rx_tpa_end_cmpl *rtpae;
struct cmpl_base *cmp = (struct cmpl_base *)cpr->ring.vaddr;
int avail = 0;
@@ -324,7 +339,6 @@
uint8_t ags;
int i;
uint16_t type;
- uint8_t agg_id;
for (;;) {
NEXT_CP_CONS_V(&cpr->ring, cons, v_bit);
@@ -374,18 +388,11 @@
avail++;
break;
case CMPL_BASE_TYPE_RX_TPA_START:
- rtpa = (void *)&cmp[cons];
- agg_id = (rtpa->agg_id &
- RX_TPA_START_CMPL_AGG_ID_MASK) >>
- RX_TPA_START_CMPL_AGG_ID_SFT;
- softc->tpa_start[agg_id].low = *rtpa;
NEXT_CP_CONS_V(&cpr->ring, cons, v_bit);
CMPL_PREFETCH_NEXT(cpr, cons);
if (!CMP_VALID(&cmp[cons], v_bit))
goto cmpl_invalid;
- softc->tpa_start[agg_id].high =
- ((struct rx_tpa_start_cmpl_hi *)cmp)[cons];
break;
case CMPL_BASE_TYPE_RX_AGG:
break;
@@ -510,7 +517,7 @@
/* Get the agg_id */
agg_id = (agend->agg_id & RX_TPA_END_CMPL_AGG_ID_MASK) >>
RX_TPA_END_CMPL_AGG_ID_SFT;
- tpas = &softc->tpa_start[agg_id];
+ tpas = &(softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id]);
/* Extract from the first 16-byte BD */
if (le16toh(tpas->low.flags_type) & RX_TPA_START_CMPL_FLAGS_RSS_VALID) {
@@ -530,8 +537,8 @@
RX_TPA_END_CMPL_AGG_BUFS_SFT;
ri->iri_nfrags = ags + 1;
/* No need to byte-swap the opaque value */
- ri->iri_frags[0].irf_flid = (tpas->low.opaque >> 16) & 0xff;
- ri->iri_frags[0].irf_idx = tpas->low.opaque & 0xffff;
+ ri->iri_frags[0].irf_flid = ((tpas->low.opaque >> 16) & 0xff);
+ ri->iri_frags[0].irf_idx = (tpas->low.opaque & 0xffff);
ri->iri_frags[0].irf_len = le16toh(tpas->low.len);
ri->iri_len = le16toh(tpas->low.len);
@@ -567,8 +574,8 @@
acp = &((struct rx_abuf_cmpl *)cpr->ring.vaddr)[cpr->cons];
/* No need to byte-swap the opaque value */
- ri->iri_frags[i].irf_flid = (acp->opaque >> 16) & 0xff;
- ri->iri_frags[i].irf_idx = acp->opaque & 0xffff;
+ ri->iri_frags[i].irf_flid = ((acp->opaque >> 16) & 0xff);
+ ri->iri_frags[i].irf_idx = (acp->opaque & 0xffff);
ri->iri_frags[i].irf_len = le16toh(acp->len);
ri->iri_len += le16toh(acp->len);
}
@@ -576,8 +583,8 @@
/* And finally, the empty BD at the end... */
ri->iri_nfrags++;
/* No need to byte-swap the opaque value */
- ri->iri_frags[i].irf_flid = (agend->opaque >> 16) % 0xff;
- ri->iri_frags[i].irf_idx = agend->opaque & 0xffff;
+ ri->iri_frags[i].irf_flid = ((agend->opaque >> 16) & 0xff);
+ ri->iri_frags[i].irf_idx = (agend->opaque & 0xffff);
ri->iri_frags[i].irf_len = le16toh(agend->len);
ri->iri_len += le16toh(agend->len);
@@ -590,9 +597,12 @@
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_cp_ring *cpr = &softc->rx_cp_rings[ri->iri_qsidx];
+ struct cmpl_base *cmp_q = (struct cmpl_base *)cpr->ring.vaddr;
struct cmpl_base *cmp;
+ struct rx_tpa_start_cmpl *rtpa;
uint16_t flags_type;
uint16_t type;
+ uint8_t agg_id;
for (;;) {
NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit);
@@ -609,9 +619,18 @@
case CMPL_BASE_TYPE_RX_TPA_END:
return bnxt_pkt_get_tpa(softc, ri, cpr, flags_type);
case CMPL_BASE_TYPE_RX_TPA_START:
+ rtpa = (void *)&cmp_q[cpr->cons];
+ agg_id = (rtpa->agg_id &
+ RX_TPA_START_CMPL_AGG_ID_MASK) >>
+ RX_TPA_START_CMPL_AGG_ID_SFT;
+ softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id].low = *rtpa;
+
NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit);
ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx);
CMPL_PREFETCH_NEXT(cpr, cpr->cons);
+
+ softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id].high =
+ ((struct rx_tpa_start_cmpl_hi *)cmp_q)[cpr->cons];
break;
default:
device_printf(softc->dev,
Index: sys/dev/bnxt/if_bnxt.c
===================================================================
--- sys/dev/bnxt/if_bnxt.c
+++ sys/dev/bnxt/if_bnxt.c
@@ -235,6 +235,8 @@
MODULE_DEPEND(bnxt, ether, 1, 1, 1);
MODULE_DEPEND(bnxt, iflib, 1, 1, 1);
+IFLIB_PNP_INFO(pci, bnxt, bnxt_vendor_info_array);
+
static device_method_t bnxt_iflib_methods[] = {
DEVMETHOD(ifdi_tx_queues_alloc, bnxt_tx_queues_alloc),
DEVMETHOD(ifdi_rx_queues_alloc, bnxt_rx_queues_alloc),
@@ -255,7 +257,8 @@
DEVMETHOD(ifdi_update_admin_status, bnxt_update_admin_status),
DEVMETHOD(ifdi_intr_enable, bnxt_intr_enable),
- DEVMETHOD(ifdi_queue_intr_enable, bnxt_queue_intr_enable),
+ DEVMETHOD(ifdi_tx_queue_intr_enable, bnxt_queue_intr_enable),
+ DEVMETHOD(ifdi_rx_queue_intr_enable, bnxt_queue_intr_enable),
DEVMETHOD(ifdi_intr_disable, bnxt_disable_intr),
DEVMETHOD(ifdi_msix_intr_assign, bnxt_msix_intr_assign),
@@ -279,10 +282,9 @@
extern struct if_txrx bnxt_txrx;
static struct if_shared_ctx bnxt_sctx_init = {
.isc_magic = IFLIB_MAGIC,
- .isc_txrx = &bnxt_txrx,
.isc_driver = &bnxt_iflib_driver,
.isc_nfl = 2, // Number of Free Lists
- .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ,
+ .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_NEED_ETHER_PAD,
.isc_q_align = PAGE_SIZE,
.isc_tx_maxsize = BNXT_TSO_SIZE,
.isc_tx_maxsegsize = BNXT_TSO_SIZE,
@@ -494,6 +496,17 @@
softc->rx_rings[i].vaddr = vaddrs[i * nrxqs + 1];
softc->rx_rings[i].paddr = paddrs[i * nrxqs + 1];
+ /* Allocate the TPA start buffer */
+ softc->rx_rings[i].tpa_start = malloc(sizeof(struct bnxt_full_tpa_start) *
+ (RX_TPA_START_CMPL_AGG_ID_MASK >> RX_TPA_START_CMPL_AGG_ID_SFT),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (softc->rx_rings[i].tpa_start == NULL) {
+ rc = -ENOMEM;
+ device_printf(softc->dev,
+ "Unable to allocate space for TPA\n");
+ goto tpa_alloc_fail;
+ }
+
/* Allocate the AG ring */
softc->ag_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE;
softc->ag_rings[i].softc = softc;
@@ -559,7 +572,10 @@
iflib_dma_free(&softc->vnic_info.rss_hash_key_tbl);
rss_hash_alloc_fail:
iflib_dma_free(&softc->vnic_info.mc_list);
+tpa_alloc_fail:
mc_list_alloc_fail:
+ for (i = i - 1; i >= 0; i--)
+ free(softc->rx_rings[i].tpa_start, M_DEVBUF);
iflib_dma_free(&softc->rx_stats);
hw_stats_alloc_fail:
free(softc->grp_info, M_DEVBUF);
@@ -623,16 +639,6 @@
if (rc)
goto dma_fail;
- /* Allocate the TPA start buffer */
- softc->tpa_start = malloc(sizeof(struct bnxt_full_tpa_start) *
- (RX_TPA_START_CMPL_AGG_ID_MASK >> RX_TPA_START_CMPL_AGG_ID_SFT),
- M_DEVBUF, M_NOWAIT | M_ZERO);
- if (softc->tpa_start == NULL) {
- rc = ENOMEM;
- device_printf(softc->dev,
- "Unable to allocate space for TPA\n");
- goto tpa_failed;
- }
/* Get firmware version and compare with driver */
softc->ver_info = malloc(sizeof(struct bnxt_ver_info),
@@ -681,6 +687,20 @@
goto failed;
iflib_set_mac(ctx, softc->func.mac_addr);
+ scctx->isc_txrx = &bnxt_txrx;
+ scctx->isc_tx_csum_flags = (CSUM_IP | CSUM_TCP | CSUM_UDP |
+ CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_TSO);
+ scctx->isc_capenable =
+ /* These are translated to hwassit bits */
+ IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 |
+ /* These are checked by iflib */
+ IFCAP_LRO | IFCAP_VLAN_HWFILTER |
+ /* These are part of the iflib mask */
+ IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_VLAN_MTU |
+ IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
+ /* These likely get lost... */
+ IFCAP_VLAN_HWCSUM | IFCAP_JUMBO_MTU;
+
/* Get the queue config */
rc = bnxt_hwrm_queue_qportcfg(softc);
if (rc) {
@@ -700,6 +720,9 @@
scctx->isc_tx_tso_size_max = BNXT_TSO_SIZE;
scctx->isc_tx_tso_segsize_max = BNXT_TSO_SIZE;
scctx->isc_vectors = softc->func.max_cp_rings;
+ scctx->isc_min_frame_size = BNXT_MIN_FRAME_SIZE;
+ scctx->isc_txrx = &bnxt_txrx;
+
if (scctx->isc_nrxd[0] <
((scctx->isc_nrxd[1] * 4) + scctx->isc_nrxd[2]))
device_printf(softc->dev,
@@ -717,12 +740,12 @@
scctx->isc_nrxd[1];
scctx->isc_rxqsizes[2] = sizeof(struct rx_prod_pkt_bd) *
scctx->isc_nrxd[2];
- scctx->isc_max_rxqsets = min(pci_msix_count(softc->dev)-1,
+ scctx->isc_nrxqsets_max = min(pci_msix_count(softc->dev)-1,
softc->func.max_cp_rings - 1);
- scctx->isc_max_rxqsets = min(scctx->isc_max_rxqsets,
+ scctx->isc_nrxqsets_max = min(scctx->isc_nrxqsets_max,
softc->func.max_rx_rings);
- scctx->isc_max_txqsets = min(softc->func.max_rx_rings,
- softc->func.max_cp_rings - scctx->isc_max_rxqsets - 1);
+ scctx->isc_ntxqsets_max = min(softc->func.max_rx_rings,
+ softc->func.max_cp_rings - scctx->isc_nrxqsets_max - 1);
scctx->isc_rss_table_size = HW_HASH_INDEX_SIZE;
scctx->isc_rss_table_mask = scctx->isc_rss_table_size - 1;
@@ -780,8 +803,6 @@
ver_fail:
free(softc->ver_info, M_DEVBUF);
ver_alloc_fail:
- free(softc->tpa_start, M_DEVBUF);
-tpa_failed:
bnxt_free_hwrm_dma_mem(softc);
dma_fail:
BNXT_HWRM_LOCK_DESTROY(softc);
@@ -795,7 +816,6 @@
{
struct bnxt_softc *softc = iflib_get_softc(ctx);
if_t ifp = iflib_get_ifp(ctx);
- int capabilities, enabling;
int rc;
bnxt_create_config_sysctls_post(softc);
@@ -810,26 +830,6 @@
bnxt_add_media_types(softc);
ifmedia_set(softc->media, IFM_ETHER | IFM_AUTO);
- if_sethwassist(ifp, (CSUM_TCP | CSUM_UDP | CSUM_TCP_IPV6 |
- CSUM_UDP_IPV6 | CSUM_TSO));
-
- capabilities =
- /* These are translated to hwassit bits */
- IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 |
- /* These are checked by iflib */
- IFCAP_LRO | IFCAP_VLAN_HWFILTER |
- /* These are part of the iflib mask */
- IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_VLAN_MTU |
- IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
- /* These likely get lost... */
- IFCAP_VLAN_HWCSUM | IFCAP_JUMBO_MTU;
-
- if_setcapabilities(ifp, capabilities);
-
- enabling = capabilities;
-
- if_setcapenable(ifp, enabling);
-
softc->scctx->isc_max_frame_size = ifp->if_mtu + ETHER_HDR_LEN +
ETHER_CRC_LEN;
@@ -863,7 +863,8 @@
SLIST_FOREACH_SAFE(tag, &softc->vnic_info.vlan_tags, next, tmp)
free(tag, M_DEVBUF);
iflib_dma_free(&softc->def_cp_ring_mem);
- free(softc->tpa_start, M_DEVBUF);
+ for (i = 0; i < softc->nrxqsets; i++)
+ free(softc->rx_rings[i].tpa_start, M_DEVBUF);
free(softc->ver_info, M_DEVBUF);
free(softc->nvm_info, M_DEVBUF);
@@ -995,14 +996,17 @@
if (rc)
goto fail;
-#ifdef notyet
- /* Enable LRO/TPA/GRO */
+ /*
+ * Enable LRO/TPA/GRO
+ * TBD:
+ * Enable / Disable HW_LRO based on
+ * ifconfig lro / ifconfig -lro setting
+ */
rc = bnxt_hwrm_vnic_tpa_cfg(softc, &softc->vnic_info,
(if_getcapenable(iflib_get_ifp(ctx)) & IFCAP_LRO) ?
HWRM_VNIC_TPA_CFG_INPUT_FLAGS_TPA : 0);
if (rc)
goto fail;
-#endif
for (i = 0; i < softc->ntxqsets; i++) {
/* Allocate the statistics context */
@@ -1489,7 +1493,7 @@
for (i=0; i<softc->scctx->isc_nrxqsets; i++) {
rc = iflib_irq_alloc_generic(ctx, &softc->rx_cp_rings[i].irq,
- softc->rx_cp_rings[i].ring.id + 1, IFLIB_INTR_RX,
+ softc->rx_cp_rings[i].ring.id + 1, IFLIB_INTR_RXTX,
bnxt_handle_rx_cp, &softc->rx_cp_rings[i], i, "rx_cp");
if (rc) {
device_printf(iflib_get_dev(ctx),
@@ -1500,8 +1504,7 @@
}
for (i=0; i<softc->scctx->isc_ntxqsets; i++)
- iflib_softirq_alloc_generic(ctx, i + 1, IFLIB_INTR_TX, NULL, i,
- "tx_cp");
+ iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i, "tx_cp");
return rc;
Index: sys/kern/kern_cpuset.c
===================================================================
--- sys/kern/kern_cpuset.c
+++ sys/kern/kern_cpuset.c
@@ -1127,6 +1127,8 @@
case CPU_WHICH_JAIL:
break;
case CPU_WHICH_IRQ:
+ case CPU_WHICH_INTRHANDLER:
+ case CPU_WHICH_ITHREAD:
case CPU_WHICH_DOMAIN:
error = EINVAL;
goto out;
@@ -1157,7 +1159,9 @@
CPU_COPY(&set->cs_mask, mask);
break;
case CPU_WHICH_IRQ:
- error = intr_getaffinity(id, mask);
+ case CPU_WHICH_INTRHANDLER:
+ case CPU_WHICH_ITHREAD:
+ error = intr_getaffinity(id, which, mask);
break;
case CPU_WHICH_DOMAIN:
if (id < 0 || id >= MAXMEMDOM)
@@ -1260,6 +1264,8 @@
case CPU_WHICH_JAIL:
break;
case CPU_WHICH_IRQ:
+ case CPU_WHICH_INTRHANDLER:
+ case CPU_WHICH_ITHREAD:
case CPU_WHICH_DOMAIN:
error = EINVAL;
goto out;
@@ -1289,7 +1295,9 @@
}
break;
case CPU_WHICH_IRQ:
- error = intr_setaffinity(id, mask);
+ case CPU_WHICH_INTRHANDLER:
+ case CPU_WHICH_ITHREAD:
+ error = intr_setaffinity(id, which, mask);
break;
default:
error = EINVAL;
Index: sys/kern/kern_intr.c
===================================================================
--- sys/kern/kern_intr.c
+++ sys/kern/kern_intr.c
@@ -287,13 +287,11 @@
/*
* Bind an interrupt event to the specified CPU. Note that not all
* platforms support binding an interrupt to a CPU. For those
- * platforms this request will fail. For supported platforms, any
- * associated ithreads as well as the primary interrupt context will
- * be bound to the specificed CPU. Using a cpu id of NOCPU unbinds
+ * platforms this request will fail. Using a cpu id of NOCPU unbinds
* the interrupt event.
*/
-int
-intr_event_bind(struct intr_event *ie, int cpu)
+static int
+_intr_event_bind(struct intr_event *ie, int cpu, bool bindirq, bool bindithread)
{
lwpid_t id;
int error;
@@ -313,35 +311,75 @@
* If we have any ithreads try to set their mask first to verify
* permissions, etc.
*/
- mtx_lock(&ie->ie_lock);
- if (ie->ie_thread != NULL) {
- id = ie->ie_thread->it_thread->td_tid;
- mtx_unlock(&ie->ie_lock);
- error = cpuset_setithread(id, cpu);
- if (error)
- return (error);
- } else
- mtx_unlock(&ie->ie_lock);
- error = ie->ie_assign_cpu(ie->ie_source, cpu);
- if (error) {
+ if (bindithread) {
mtx_lock(&ie->ie_lock);
if (ie->ie_thread != NULL) {
- cpu = ie->ie_cpu;
id = ie->ie_thread->it_thread->td_tid;
mtx_unlock(&ie->ie_lock);
- (void)cpuset_setithread(id, cpu);
+ error = cpuset_setithread(id, cpu);
+ if (error)
+ return (error);
} else
mtx_unlock(&ie->ie_lock);
+ }
+ if (bindirq)
+ error = ie->ie_assign_cpu(ie->ie_source, cpu);
+ if (error) {
+ if (bindithread) {
+ mtx_lock(&ie->ie_lock);
+ if (ie->ie_thread != NULL) {
+ cpu = ie->ie_cpu;
+ id = ie->ie_thread->it_thread->td_tid;
+ mtx_unlock(&ie->ie_lock);
+ (void)cpuset_setithread(id, cpu);
+ } else
+ mtx_unlock(&ie->ie_lock);
+ }
return (error);
}
- mtx_lock(&ie->ie_lock);
- ie->ie_cpu = cpu;
- mtx_unlock(&ie->ie_lock);
+ if (bindirq) {
+ mtx_lock(&ie->ie_lock);
+ ie->ie_cpu = cpu;
+ mtx_unlock(&ie->ie_lock);
+ }
return (error);
}
+/*
+ * Bind an interrupt event to the specified CPU. For supported platforms, any
+ * associated ithreads as well as the primary interrupt context will be bound
+ * to the specificed CPU.
+ */
+int
+intr_event_bind(struct intr_event *ie, int cpu)
+{
+
+ return (_intr_event_bind(ie, cpu, true, true));
+}
+
+/*
+ * Bind an interrupt event to the specified CPU, but do not bind associated
+ * ithreads.
+ */
+int
+intr_event_bind_irqonly(struct intr_event *ie, int cpu)
+{
+
+ return (_intr_event_bind(ie, cpu, true, false));
+}
+
+/*
+ * Bind an interrupt event's ithread to the specified CPU.
+ */
+int
+intr_event_bind_ithread(struct intr_event *ie, int cpu)
+{
+
+ return (_intr_event_bind(ie, cpu, false, true));
+}
+
static struct intr_event *
intr_lookup(int irq)
{
@@ -358,7 +396,7 @@
}
int
-intr_setaffinity(int irq, void *m)
+intr_setaffinity(int irq, int mode, void *m)
{
struct intr_event *ie;
cpuset_t *mask;
@@ -382,26 +420,62 @@
ie = intr_lookup(irq);
if (ie == NULL)
return (ESRCH);
- return (intr_event_bind(ie, cpu));
+ switch (mode) {
+ case CPU_WHICH_IRQ:
+ return (intr_event_bind(ie, cpu));
+ case CPU_WHICH_INTRHANDLER:
+ return (intr_event_bind_irqonly(ie, cpu));
+ case CPU_WHICH_ITHREAD:
+ return (intr_event_bind_ithread(ie, cpu));
+ default:
+ return (EINVAL);
+ }
}
int
-intr_getaffinity(int irq, void *m)
+intr_getaffinity(int irq, int mode, void *m)
{
struct intr_event *ie;
+ struct thread *td;
+ struct proc *p;
cpuset_t *mask;
+ lwpid_t id;
+ int error;
mask = m;
ie = intr_lookup(irq);
if (ie == NULL)
return (ESRCH);
+
+ error = 0;
CPU_ZERO(mask);
- mtx_lock(&ie->ie_lock);
- if (ie->ie_cpu == NOCPU)
- CPU_COPY(cpuset_root, mask);
- else
- CPU_SET(ie->ie_cpu, mask);
- mtx_unlock(&ie->ie_lock);
+ switch (mode) {
+ case CPU_WHICH_IRQ:
+ case CPU_WHICH_INTRHANDLER:
+ mtx_lock(&ie->ie_lock);
+ if (ie->ie_cpu == NOCPU)
+ CPU_COPY(cpuset_root, mask);
+ else
+ CPU_SET(ie->ie_cpu, mask);
+ mtx_unlock(&ie->ie_lock);
+ break;
+ case CPU_WHICH_ITHREAD:
+ mtx_lock(&ie->ie_lock);
+ if (ie->ie_thread == NULL) {
+ mtx_unlock(&ie->ie_lock);
+ CPU_COPY(cpuset_root, mask);
+ } else {
+ id = ie->ie_thread->it_thread->td_tid;
+ mtx_unlock(&ie->ie_lock);
+ error = cpuset_which(CPU_WHICH_TID, id, &p, &td, NULL);
+ if (error != 0)
+ return (error);
+ CPU_COPY(&td->td_cpuset->cs_mask, mask);
+ PROC_UNLOCK(p);
+ }
+ default:
+ return (EINVAL);
+ }
return (0);
}
Index: sys/kern/subr_gtaskqueue.c
===================================================================
--- sys/kern/subr_gtaskqueue.c
+++ sys/kern/subr_gtaskqueue.c
@@ -48,7 +48,7 @@
#include <sys/unistd.h>
#include <machine/stdarg.h>
-static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues");
+static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
static void gtaskqueue_thread_enqueue(void *);
static void gtaskqueue_thread_loop(void *arg);
@@ -134,8 +134,10 @@
snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
- if (!queue)
+ if (!queue) {
+ free(tq_name, M_GTASKQUEUE);
return (NULL);
+ }
STAILQ_INIT(&queue->tq_queue);
TAILQ_INIT(&queue->tq_active);
@@ -663,10 +665,10 @@
void *uniq, int irq, char *name)
{
cpuset_t mask;
- int qid;
+ int qid, error;
gtask->gt_uniq = uniq;
- gtask->gt_name = name;
+ snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
gtask->gt_irq = irq;
gtask->gt_cpu = -1;
mtx_lock(&qgroup->tqg_lock);
@@ -679,7 +681,9 @@
CPU_ZERO(&mask);
CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
mtx_unlock(&qgroup->tqg_lock);
- intr_setaffinity(irq, &mask);
+ error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+ if (error)
+ printf("%s: setaffinity failed for %s: %d\n", __func__, gtask->gt_name, error);
} else
mtx_unlock(&qgroup->tqg_lock);
}
@@ -688,7 +692,7 @@
taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
{
cpuset_t mask;
- int qid, cpu;
+ int qid, cpu, error;
mtx_lock(&qgroup->tqg_lock);
qid = taskqgroup_find(qgroup, gtask->gt_uniq);
@@ -698,9 +702,11 @@
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
- intr_setaffinity(gtask->gt_irq, &mask);
-
+ error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
mtx_lock(&qgroup->tqg_lock);
+ if (error)
+ printf("%s: %s setaffinity failed: %d\n", __func__, gtask->gt_name, error);
+
}
qgroup->tqg_queue[qid].tgc_cnt++;
@@ -716,11 +722,11 @@
void *uniq, int cpu, int irq, char *name)
{
cpuset_t mask;
- int i, qid;
+ int i, qid, error;
qid = -1;
gtask->gt_uniq = uniq;
- gtask->gt_name = name;
+ snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
gtask->gt_irq = irq;
gtask->gt_cpu = cpu;
mtx_lock(&qgroup->tqg_lock);
@@ -732,6 +738,7 @@
}
if (qid == -1) {
mtx_unlock(&qgroup->tqg_lock);
+ printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
return (EINVAL);
}
} else
@@ -744,8 +751,11 @@
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
- if (irq != -1 && tqg_smp_started)
- intr_setaffinity(irq, &mask);
+ if (irq != -1 && tqg_smp_started) {
+ error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+ if (error)
+ printf("%s: setaffinity failed: %d\n", __func__, error);
+ }
return (0);
}
@@ -753,7 +763,7 @@
taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
{
cpuset_t mask;
- int i, qid, irq, cpu;
+ int i, qid, irq, cpu, error;
qid = -1;
irq = gtask->gt_irq;
@@ -767,6 +777,7 @@
}
if (qid == -1) {
mtx_unlock(&qgroup->tqg_lock);
+ printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
return (EINVAL);
}
qgroup->tqg_queue[qid].tgc_cnt++;
@@ -778,8 +789,11 @@
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
- if (irq != -1)
- intr_setaffinity(irq, &mask);
+ if (irq != -1) {
+ error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+ if (error)
+ printf("%s: setaffinity failed: %d\n", __func__, error);
+ }
return (0);
}
@@ -793,7 +807,7 @@
if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
break;
if (i == qgroup->tqg_cnt)
- panic("taskqgroup_detach: task not in group\n");
+ panic("taskqgroup_detach: task %s not in group\n", gtask->gt_name);
qgroup->tqg_queue[i].tgc_cnt--;
LIST_REMOVE(gtask, gt_list);
mtx_unlock(&qgroup->tqg_lock);
@@ -815,7 +829,7 @@
thread_unlock(curthread);
if (error)
- printf("taskqgroup_binder: setaffinity failed: %d\n",
+ printf("%s: setaffinity failed: %d\n", __func__,
error);
free(gtask, M_DEVBUF);
}
@@ -858,7 +872,7 @@
return (EINVAL);
}
if (qgroup->tqg_adjusting) {
- printf("taskqgroup_adjust failed: adjusting\n");
+ printf("%s failed: adjusting\n", __func__);
return (EBUSY);
}
qgroup->tqg_adjusting = 1;
Index: sys/net/ifdi_if.m
===================================================================
--- sys/net/ifdi_if.m
+++ sys/net/ifdi_if.m
@@ -195,11 +195,16 @@
if_ctx_t _ctx;
};
-METHOD int queue_intr_enable {
+METHOD int rx_queue_intr_enable {
if_ctx_t _ctx;
uint16_t _qid;
} DEFAULT null_queue_intr_enable;
+METHOD int tx_queue_intr_enable {
+ if_ctx_t _ctx;
+ uint16_t _qid;
+} DEFAULT null_queue_intr_enable;
+
METHOD void link_intr_enable {
if_ctx_t _ctx;
} DEFAULT null_void_op;
@@ -229,6 +234,7 @@
METHOD void crcstrip_set {
if_ctx_t _ctx;
int _onoff;
+ int _strip;
};
#
@@ -332,4 +338,6 @@
if_int_delay_info_t _iidi;
} DEFAULT null_sysctl_int_delay;
-
+METHOD void debug {
+ if_ctx_t _ctx;
+} DEFAULT null_void_op;
Index: sys/net/iflib.h
===================================================================
--- sys/net/iflib.h
+++ sys/net/iflib.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014-2015, Matthew Macy (mmacy@nextbsd.org)
+ * Copyright (c) 2014-2017, Matthew Macy (mmacy@nextbsd.org)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,8 +37,14 @@
#include <sys/nv.h>
#include <sys/gtaskqueue.h>
-
/*
+ * The value type for indexing, limits max descriptors
+ * to 65535 can be conditionally redefined to uint32_t
+ * in the future if the need arises.
+ */
+typedef uint16_t qidx_t;
+#define QIDX_INVALID 0xFFFF
+/*
* Most cards can handle much larger TSO requests
* but the FreeBSD TCP stack will break on larger
* values
@@ -63,7 +69,7 @@
typedef struct if_rxd_frag {
uint8_t irf_flid;
- uint16_t irf_idx;
+ qidx_t irf_idx;
uint16_t irf_len;
} *if_rxd_frag_t;
@@ -73,47 +79,61 @@
uint16_t iri_vtag; /* vlan tag - if flag set */
/* XXX redundant with the new irf_len field */
uint16_t iri_len; /* packet length */
- uint16_t iri_cidx; /* consumer index of cq */
+ qidx_t iri_cidx; /* consumer index of cq */
struct ifnet *iri_ifp; /* some drivers >1 interface per softc */
/* updated by driver */
- uint16_t iri_flags; /* mbuf flags for packet */
+ if_rxd_frag_t iri_frags;
uint32_t iri_flowid; /* RSS hash for packet */
uint32_t iri_csum_flags; /* m_pkthdr csum flags */
+
uint32_t iri_csum_data; /* m_pkthdr csum data */
+ uint8_t iri_flags; /* mbuf flags for packet */
uint8_t iri_nfrags; /* number of fragments in packet */
uint8_t iri_rsstype; /* RSS hash type */
uint8_t iri_pad; /* any padding in the received data */
- if_rxd_frag_t iri_frags;
} *if_rxd_info_t;
+typedef struct if_rxd_update {
+ uint64_t *iru_paddrs;
+ caddr_t *iru_vaddrs;
+ qidx_t *iru_idxs;
+ qidx_t iru_pidx;
+ uint16_t iru_qsidx;
+ uint16_t iru_count;
+ uint16_t iru_buf_size;
+ uint8_t iru_flidx;
+} *if_rxd_update_t;
+
#define IPI_TX_INTR 0x1 /* send an interrupt when this packet is sent */
#define IPI_TX_IPV4 0x2 /* ethertype IPv4 */
#define IPI_TX_IPV6 0x4 /* ethertype IPv6 */
typedef struct if_pkt_info {
- uint32_t ipi_len; /* packet length */
- bus_dma_segment_t *ipi_segs; /* physical addresses */
- uint16_t ipi_qsidx; /* queue set index */
- uint16_t ipi_nsegs; /* number of segments */
- uint16_t ipi_ndescs; /* number of descriptors used by encap */
- uint16_t ipi_flags; /* iflib per-packet flags */
- uint32_t ipi_pidx; /* start pidx for encap */
- uint32_t ipi_new_pidx; /* next available pidx post-encap */
+ bus_dma_segment_t *ipi_segs; /* physical addresses */
+ uint32_t ipi_len; /* packet length */
+ uint16_t ipi_qsidx; /* queue set index */
+ qidx_t ipi_nsegs; /* number of segments */
+
+ qidx_t ipi_ndescs; /* number of descriptors used by encap */
+ uint16_t ipi_flags; /* iflib per-packet flags */
+ qidx_t ipi_pidx; /* start pidx for encap */
+ qidx_t ipi_new_pidx; /* next available pidx post-encap */
/* offload handling */
- uint64_t ipi_csum_flags; /* packet checksum flags */
- uint16_t ipi_tso_segsz; /* tso segment size */
- uint16_t ipi_mflags; /* packet mbuf flags */
- uint16_t ipi_vtag; /* VLAN tag */
- uint16_t ipi_etype; /* ether header type */
- uint8_t ipi_ehdrlen; /* ether header length */
- uint8_t ipi_ip_hlen; /* ip header length */
- uint8_t ipi_tcp_hlen; /* tcp header length */
- uint8_t ipi_tcp_hflags; /* tcp header flags */
- uint8_t ipi_ipproto; /* ip protocol */
- /* implied padding */
- uint32_t ipi_tcp_seq; /* tcp seqno */
- uint32_t ipi_tcp_sum; /* tcp csum */
+ uint8_t ipi_ehdrlen; /* ether header length */
+ uint8_t ipi_ip_hlen; /* ip header length */
+ uint8_t ipi_tcp_hlen; /* tcp header length */
+ uint8_t ipi_ipproto; /* ip protocol */
+
+ uint32_t ipi_csum_flags; /* packet checksum flags */
+ uint16_t ipi_tso_segsz; /* tso segment size */
+ uint16_t ipi_vtag; /* VLAN tag */
+ uint16_t ipi_etype; /* ether header type */
+ uint8_t ipi_tcp_hflags; /* tcp header flags */
+ uint8_t ipi_mflags; /* packet mbuf flags */
+
+ uint32_t ipi_tcp_seq; /* tcp seqno */
+ uint32_t ipi_tcp_sum; /* tcp csum */
} *if_pkt_info_t;
typedef struct if_irq {
@@ -154,17 +174,20 @@
#define PVID_OEM(vendor, devid, svid, sdevid, revid, name) {vendor, devid, svid, sdevid, revid, 0, name}
#define PVID_END {0, 0, 0, 0, 0, 0, NULL}
+#define IFLIB_PNP_DESCR "U32:vendor;U32:device;U32:subvendor;U32:subdevice;" \
+ "U32:revision;U32:class;D:human"
+#define IFLIB_PNP_INFO(b, u, t) \
+ MODULE_PNP_INFO(IFLIB_PNP_DESCR, b, u, t, sizeof(t[0]), nitems(t) - 1)
+
typedef struct if_txrx {
int (*ift_txd_encap) (void *, if_pkt_info_t);
- void (*ift_txd_flush) (void *, uint16_t, uint32_t);
- int (*ift_txd_credits_update) (void *, uint16_t, uint32_t, bool);
+ void (*ift_txd_flush) (void *, uint16_t, qidx_t pidx);
+ int (*ift_txd_credits_update) (void *, uint16_t qsidx, bool clear);
- int (*ift_rxd_available) (void *, uint16_t qsidx, uint32_t pidx,
- int budget);
+ int (*ift_rxd_available) (void *, uint16_t qsidx, qidx_t pidx, qidx_t budget);
int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri);
- void (*ift_rxd_refill) (void * , uint16_t qsidx, uint8_t flidx, uint32_t pidx,
- uint64_t *paddrs, caddr_t *vaddrs, uint16_t count, uint16_t buf_size);
- void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, uint32_t pidx);
+ void (*ift_rxd_refill) (void * , if_rxd_update_t iru);
+ void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx);
int (*ift_legacy_intr) (void *);
} *if_txrx_t;
@@ -179,11 +202,15 @@
uint32_t isc_txqsizes[8];
uint32_t isc_rxqsizes[8];
- int isc_max_txqsets;
- int isc_max_rxqsets;
+ /* is there such thing as a descriptor that is more than 248 bytes ? */
+ uint8_t isc_txd_size[8];
+ uint8_t isc_rxd_size[8];
+
int isc_tx_tso_segments_max;
int isc_tx_tso_size_max;
int isc_tx_tso_segsize_max;
+ int isc_tx_csum_flags;
+ int isc_capenable;
int isc_rss_table_size;
int isc_rss_table_mask;
int isc_nrxqsets_max;
@@ -191,32 +218,28 @@
iflib_intr_mode_t isc_intr;
uint16_t isc_max_frame_size; /* set at init time by driver */
+ uint16_t isc_min_frame_size; /* set at init time by driver, only used if
+ IFLIB_NEED_ETHER_PAD is set. */
+ uint32_t isc_pause_frames; /* set by driver for iflib_timer to detect */
pci_vendor_info_t isc_vendor_info; /* set by iflib prior to attach_pre */
+ int isc_disable_msix;
+ if_txrx_t isc_txrx;
} *if_softc_ctx_t;
/*
* Initialization values for device
*/
struct if_shared_ctx {
- int isc_magic;
- if_txrx_t isc_txrx;
+ unsigned isc_magic;
driver_t *isc_driver;
- int isc_nfl;
- int isc_flags;
bus_size_t isc_q_align;
bus_size_t isc_tx_maxsize;
bus_size_t isc_tx_maxsegsize;
bus_size_t isc_rx_maxsize;
bus_size_t isc_rx_maxsegsize;
int isc_rx_nsegments;
- int isc_rx_process_limit;
- int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */
- int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */
int isc_admin_intrcnt; /* # of admin/link interrupts */
-
- int isc_tx_reclaim_thresh;
-
/* fields necessary for probe */
pci_vendor_info_t *isc_vendor_info;
char *isc_driver_version;
@@ -229,6 +252,14 @@
int isc_ntxd_min[8];
int isc_ntxd_default[8];
int isc_ntxd_max[8];
+
+ /* actively used during operation */
+ int isc_nfl __aligned(CACHE_LINE_SIZE);
+ int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */
+ int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */
+ int isc_rx_process_limit;
+ int isc_tx_reclaim_thresh;
+ int isc_flags;
};
typedef struct iflib_dma_info {
@@ -242,8 +273,9 @@
#define IFLIB_MAGIC 0xCAFEF00D
typedef enum {
- IFLIB_INTR_TX,
IFLIB_INTR_RX,
+ IFLIB_INTR_TX,
+ IFLIB_INTR_RXTX,
IFLIB_INTR_ADMIN,
IFLIB_INTR_IOV,
} iflib_intr_type_t;
@@ -256,22 +288,42 @@
/*
* Interface has a separate command queue for RX
*/
-#define IFLIB_HAS_RXCQ 0x1
+#define IFLIB_HAS_RXCQ 0x01
/*
* Driver has already allocated vectors
*/
-#define IFLIB_SKIP_MSIX 0x2
-
+#define IFLIB_SKIP_MSIX 0x02
/*
* Interface is a virtual function
*/
-#define IFLIB_IS_VF 0x4
+#define IFLIB_IS_VF 0x04
/*
* Interface has a separate command queue for TX
*/
-#define IFLIB_HAS_TXCQ 0x8
+#define IFLIB_HAS_TXCQ 0x08
+/*
+ * Interface does checksum in place
+ */
+#define IFLIB_NEED_SCRATCH 0x10
+/*
+ * Interface doesn't expect in_pseudo for th_sum
+ */
+#define IFLIB_TSO_INIT_IP 0x20
+/*
+ * Interface doesn't align IP header
+ */
+#define IFLIB_DO_RX_FIXUP 0x40
+/*
+ * Driver needs csum zeroed for offloading
+ */
+#define IFLIB_NEED_ZERO_CSUM 0x80
+/*
+ * Driver needs frames padded to some minimum length
+ */
+#define IFLIB_NEED_ETHER_PAD 0x100
+
/*
* field accessors
*/
@@ -288,9 +340,6 @@
void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]);
-
-
-
/*
* If the driver can plug cleanly in to newbus use these
*/
@@ -319,7 +368,7 @@
int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
iflib_intr_type_t type, driver_filter_t *filter,
void *filter_arg, int qid, char *name);
-void iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void *arg, int qid, char *name);
+void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, char *name);
void iflib_irq_free(if_ctx_t ctx, if_irq_t irq);
Index: sys/net/iflib.c
===================================================================
--- sys/net/iflib.c
+++ sys/net/iflib.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014-2016, Matthew Macy <mmacy@nextbsd.org>
+ * Copyright (c) 2014-2017, Matthew Macy <mmacy@nextbsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_acpi.h"
+#include "opt_sched.h"
#include <sys/param.h>
#include <sys/types.h>
@@ -59,6 +60,7 @@
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/mp_ring.h>
+#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
@@ -68,6 +70,8 @@
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
+#include <netinet/ip_var.h>
+#include <netinet6/ip6_var.h>
#include <machine/bus.h>
#include <machine/in_cksum.h>
@@ -93,9 +97,10 @@
#include <x86/iommu/busdma_dmar.h>
#endif
-
+#include <sys/bitstring.h>
/*
- * enable accounting of every mbuf as it comes in to and goes out of iflib's software descriptor references
+ * enable accounting of every mbuf as it comes in to and goes out of
+ * iflib's software descriptor references
*/
#define MEMORY_LOGGING 0
/*
@@ -134,10 +139,13 @@
struct iflib_fl;
typedef struct iflib_fl *iflib_fl_t;
+static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid);
+
typedef struct iflib_filter_info {
driver_filter_t *ifi_filter;
void *ifi_filter_arg;
struct grouptask *ifi_task;
+ void *ifi_ctx;
} *iflib_filter_info_t;
struct iflib_ctx {
@@ -156,7 +164,6 @@
struct mtx ifc_mtx;
uint16_t ifc_nhwtxqs;
- uint16_t ifc_nhwrxqs;
iflib_txq_t ifc_txqs;
iflib_rxq_t ifc_rxqs;
@@ -167,7 +174,6 @@
int ifc_link_state;
int ifc_link_irq;
- int ifc_pause_frames;
int ifc_watchdog_events;
struct cdev *ifc_led_dev;
struct resource *ifc_msix_mem;
@@ -182,9 +188,10 @@
uint16_t ifc_sysctl_ntxqs;
uint16_t ifc_sysctl_nrxqs;
uint16_t ifc_sysctl_qs_eq_override;
+ uint16_t ifc_sysctl_rx_budget;
- uint16_t ifc_sysctl_ntxds[8];
- uint16_t ifc_sysctl_nrxds[8];
+ qidx_t ifc_sysctl_ntxds[8];
+ qidx_t ifc_sysctl_nrxds[8];
struct if_txrx ifc_txrx;
#define isc_txd_encap ifc_txrx.ift_txd_encap
#define isc_txd_flush ifc_txrx.ift_txd_flush
@@ -252,7 +259,9 @@
return (ctx->ifc_sctx);
}
+#define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2)
#define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*))
+#define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1)))
#define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP)
#define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF)
@@ -262,62 +271,70 @@
#define RX_SW_DESC_INUSE (1 << 3)
#define TX_SW_DESC_MAPPED (1 << 4)
-typedef struct iflib_sw_rx_desc {
- bus_dmamap_t ifsd_map; /* bus_dma map for packet */
- struct mbuf *ifsd_m; /* rx: uninitialized mbuf */
- caddr_t ifsd_cl; /* direct cluster pointer for rx */
- uint16_t ifsd_flags;
-} *iflib_rxsd_t;
+#define M_TOOBIG M_PROTO1
-typedef struct iflib_sw_tx_desc_val {
- bus_dmamap_t ifsd_map; /* bus_dma map for packet */
- struct mbuf *ifsd_m; /* pkthdr mbuf */
- uint8_t ifsd_flags;
-} *iflib_txsd_val_t;
+typedef struct iflib_sw_rx_desc_array {
+ bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */
+ struct mbuf **ifsd_m; /* pkthdr mbufs */
+ caddr_t *ifsd_cl; /* direct cluster pointer for rx */
+ uint8_t *ifsd_flags;
+} iflib_rxsd_array_t;
typedef struct iflib_sw_tx_desc_array {
bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */
struct mbuf **ifsd_m; /* pkthdr mbufs */
uint8_t *ifsd_flags;
-} iflib_txsd_array_t;
+} if_txsd_vec_t;
/* magic number that should be high enough for any hardware */
#define IFLIB_MAX_TX_SEGS 128
-#define IFLIB_MAX_RX_SEGS 32
+/* bnxt supports 64 with hardware LRO enabled */
+#define IFLIB_MAX_RX_SEGS 64
#define IFLIB_RX_COPY_THRESH 128
#define IFLIB_MAX_RX_REFRESH 32
+/* The minimum descriptors per second before we start coalescing */
+#define IFLIB_MIN_DESC_SEC 16384
+#define IFLIB_DEFAULT_TX_UPDATE_FREQ 16
#define IFLIB_QUEUE_IDLE 0
#define IFLIB_QUEUE_HUNG 1
#define IFLIB_QUEUE_WORKING 2
+/* maximum number of txqs that can share an rx interrupt */
+#define IFLIB_MAX_TX_SHARED_INTR 4
-/* this should really scale with ring size - 32 is a fairly arbitrary value for this */
-#define TX_BATCH_SIZE 16
+/* this should really scale with ring size - this is a fairly arbitrary value */
+#define TX_BATCH_SIZE 32
#define IFLIB_RESTART_BUDGET 8
-#define IFC_LEGACY 0x01
-#define IFC_QFLUSH 0x02
-#define IFC_MULTISEG 0x04
-#define IFC_DMAR 0x08
-#define IFC_SC_ALLOCATED 0x10
+#define IFC_LEGACY 0x001
+#define IFC_QFLUSH 0x002
+#define IFC_MULTISEG 0x004
+#define IFC_DMAR 0x008
+#define IFC_SC_ALLOCATED 0x010
+#define IFC_INIT_DONE 0x020
+#define IFC_PREFETCH 0x040
+#define IFC_DO_RESET 0x080
+#define IFC_CHECK_HUNG 0x100
#define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \
CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \
CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP)
struct iflib_txq {
- uint16_t ift_in_use;
- uint16_t ift_cidx;
- uint16_t ift_cidx_processed;
- uint16_t ift_pidx;
+ qidx_t ift_in_use;
+ qidx_t ift_cidx;
+ qidx_t ift_cidx_processed;
+ qidx_t ift_pidx;
uint8_t ift_gen;
- uint8_t ift_db_pending;
- uint8_t ift_db_pending_queued;
- uint8_t ift_npending;
uint8_t ift_br_offset;
+ uint16_t ift_npending;
+ uint16_t ift_db_pending;
+ uint16_t ift_rs_pending;
/* implicit pad */
+ uint8_t ift_txd_size[8];
uint64_t ift_processed;
uint64_t ift_cleaned;
+ uint64_t ift_cleaned_prev;
#if MEMORY_LOGGING
uint64_t ift_enqueued;
uint64_t ift_dequeued;
@@ -335,19 +352,16 @@
/* constant values */
if_ctx_t ift_ctx;
- struct ifmp_ring **ift_br;
+ struct ifmp_ring *ift_br;
struct grouptask ift_task;
- uint16_t ift_size;
+ qidx_t ift_size;
uint16_t ift_id;
struct callout ift_timer;
- struct callout ift_db_check;
- iflib_txsd_array_t ift_sds;
- uint8_t ift_nbr;
- uint8_t ift_qstatus;
- uint8_t ift_active;
- uint8_t ift_closed;
- int ift_watchdog_time;
+ if_txsd_vec_t ift_sds;
+ uint8_t ift_qstatus;
+ uint8_t ift_closed;
+ uint8_t ift_update_freq;
struct iflib_filter_info ift_filter_info;
bus_dma_tag_t ift_desc_tag;
bus_dma_tag_t ift_tso_desc_tag;
@@ -356,13 +370,17 @@
char ift_mtx_name[MTX_NAME_LEN];
char ift_db_mtx_name[MTX_NAME_LEN];
bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE);
+#ifdef IFLIB_DIAGNOSTICS
+ uint64_t ift_cpu_exec_count[256];
+#endif
} __aligned(CACHE_LINE_SIZE);
struct iflib_fl {
- uint16_t ifl_cidx;
- uint16_t ifl_pidx;
- uint16_t ifl_credits;
+ qidx_t ifl_cidx;
+ qidx_t ifl_pidx;
+ qidx_t ifl_credits;
uint8_t ifl_gen;
+ uint8_t ifl_rxd_size;
#if MEMORY_LOGGING
uint64_t ifl_m_enqueued;
uint64_t ifl_m_dequeued;
@@ -371,24 +389,27 @@
#endif
/* implicit pad */
+ bitstr_t *ifl_rx_bitmap;
+ qidx_t ifl_fragidx;
/* constant */
- uint16_t ifl_size;
+ qidx_t ifl_size;
uint16_t ifl_buf_size;
uint16_t ifl_cltype;
uma_zone_t ifl_zone;
- iflib_rxsd_t ifl_sds;
+ iflib_rxsd_array_t ifl_sds;
iflib_rxq_t ifl_rxq;
uint8_t ifl_id;
bus_dma_tag_t ifl_desc_tag;
iflib_dma_info_t ifl_ifdi;
uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE);
caddr_t ifl_vm_addrs[IFLIB_MAX_RX_REFRESH];
+ qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH];
} __aligned(CACHE_LINE_SIZE);
-static inline int
-get_inuse(int size, int cidx, int pidx, int gen)
+static inline qidx_t
+get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen)
{
- int used;
+ qidx_t used;
if (pidx > cidx)
used = pidx - cidx;
@@ -414,9 +435,9 @@
* these are the cq cidx and pidx. Otherwise
* these are unused.
*/
- uint16_t ifr_size;
- uint16_t ifr_cq_cidx;
- uint16_t ifr_cq_pidx;
+ qidx_t ifr_size;
+ qidx_t ifr_cq_cidx;
+ qidx_t ifr_cq_pidx;
uint8_t ifr_cq_gen;
uint8_t ifr_fl_offset;
@@ -426,26 +447,89 @@
uint16_t ifr_id;
uint8_t ifr_lro_enabled;
uint8_t ifr_nfl;
+ uint8_t ifr_ntxqirq;
+ uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR];
struct lro_ctrl ifr_lc;
struct grouptask ifr_task;
struct iflib_filter_info ifr_filter_info;
iflib_dma_info_t ifr_ifdi;
+
/* dynamically allocate if any drivers need a value substantially larger than this */
struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE);
+#ifdef IFLIB_DIAGNOSTICS
+ uint64_t ifr_cpu_exec_count[256];
+#endif
} __aligned(CACHE_LINE_SIZE);
+typedef struct if_rxsd {
+ caddr_t *ifsd_cl;
+ struct mbuf **ifsd_m;
+ iflib_fl_t ifsd_fl;
+ qidx_t ifsd_cidx;
+} *if_rxsd_t;
+
+/* multiple of word size */
+#ifdef __LP64__
+#define PKT_INFO_SIZE 6
+#define RXD_INFO_SIZE 5
+#define PKT_TYPE uint64_t
+#else
+#define PKT_INFO_SIZE 11
+#define RXD_INFO_SIZE 8
+#define PKT_TYPE uint32_t
+#endif
+#define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3)
+#define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4)
+
+typedef struct if_pkt_info_pad {
+ PKT_TYPE pkt_val[PKT_INFO_SIZE];
+} *if_pkt_info_pad_t;
+typedef struct if_rxd_info_pad {
+ PKT_TYPE rxd_val[RXD_INFO_SIZE];
+} *if_rxd_info_pad_t;
+
+CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info));
+CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info));
+
+
+static inline void
+pkt_info_zero(if_pkt_info_t pi)
+{
+ if_pkt_info_pad_t pi_pad;
+
+ pi_pad = (if_pkt_info_pad_t)pi;
+ pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0;
+ pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0;
+#ifndef __LP64__
+ pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0;
+ pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0;
+#endif
+}
+
+static inline void
+rxd_info_zero(if_rxd_info_t ri)
+{
+ if_rxd_info_pad_t ri_pad;
+ int i;
+
+ ri_pad = (if_rxd_info_pad_t)ri;
+ for (i = 0; i < RXD_LOOP_BOUND; i += 4) {
+ ri_pad->rxd_val[i] = 0;
+ ri_pad->rxd_val[i+1] = 0;
+ ri_pad->rxd_val[i+2] = 0;
+ ri_pad->rxd_val[i+3] = 0;
+ }
+#ifdef __LP64__
+ ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0;
+#endif
+}
+
/*
* Only allow a single packet to take up most 1/nth of the tx ring
*/
#define MAX_SINGLE_PACKET_FRACTION 12
#define IF_BAD_DMA (bus_addr_t)-1
-static int enable_msix = 1;
-
-#define mtx_held(m) (((m)->mtx_lock & ~MTX_FLAGMASK) != (uintptr_t)0)
-
-
-
#define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING))
#define CTX_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF)
@@ -455,12 +539,6 @@
#define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx)
-#define TXDB_LOCK_INIT(txq) mtx_init(&(txq)->ift_db_mtx, (txq)->ift_db_mtx_name, NULL, MTX_DEF)
-#define TXDB_TRYLOCK(txq) mtx_trylock(&(txq)->ift_db_mtx)
-#define TXDB_LOCK(txq) mtx_lock(&(txq)->ift_db_mtx)
-#define TXDB_UNLOCK(txq) mtx_unlock(&(txq)->ift_db_mtx)
-#define TXDB_LOCK_DESTROY(txq) mtx_destroy(&(txq)->ift_db_mtx)
-
#define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx)
#define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx)
@@ -480,6 +558,7 @@
MODULE_DEPEND(iflib, pci, 1, 1, 1);
MODULE_DEPEND(iflib, ether, 1, 1, 1);
+TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1);
TASKQGROUP_DEFINE(if_config_tqg, 1, 1);
#ifndef IFLIB_DEBUG_COUNTERS
@@ -497,9 +576,11 @@
* XXX need to ensure that this can't accidentally cause the head to be moved backwards
*/
static int iflib_min_tx_latency = 0;
-
SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW,
- &iflib_min_tx_latency, 0, "minimize transmit latency at the possibel expense of throughput");
+ &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput");
+static int iflib_no_tx_batch = 0;
+SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW,
+ &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput");
#if IFLIB_DEBUG_COUNTERS
@@ -544,11 +625,14 @@
static int iflib_encap_load_mbuf_fail;
+static int iflib_encap_pad_mbuf_fail;
static int iflib_encap_txq_avail_fail;
static int iflib_encap_txd_encap_fail;
SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD,
&iflib_encap_load_mbuf_fail, 0, "# busdma load failures");
+SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD,
+ &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures");
SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD,
&iflib_encap_txq_avail_fail, 0, "# txq avail failures");
SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD,
@@ -594,10 +678,24 @@
&iflib_verbose_debug, 0, "enable verbose debugging");
#define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1)
+static void
+iflib_debug_reset(void)
+{
+ iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs =
+ iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees =
+ iflib_txq_drain_flushing = iflib_txq_drain_oactive =
+ iflib_txq_drain_notready = iflib_txq_drain_encapfail =
+ iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail =
+ iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail =
+ iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs =
+ iflib_intr_link = iflib_intr_msix = iflib_rx_unavail =
+ iflib_rx_ctx_inactive = iflib_rx_zero_len = iflib_rx_if_input =
+ iflib_rx_mbuf_null = iflib_rxd_flush = 0;
+}
#else
#define DBG_COUNTER_INC(name)
-
+static void iflib_debug_reset(void) {}
#endif
@@ -608,7 +706,7 @@
static void iflib_rx_structures_free(if_ctx_t ctx);
static int iflib_queues_alloc(if_ctx_t ctx);
static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq);
-static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget);
+static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget);
static int iflib_qset_structures_setup(if_ctx_t ctx);
static int iflib_msix_init(if_ctx_t ctx);
static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str);
@@ -618,8 +716,14 @@
static void iflib_init_locked(if_ctx_t ctx);
static void iflib_add_device_sysctl_pre(if_ctx_t ctx);
static void iflib_add_device_sysctl_post(if_ctx_t ctx);
+static void iflib_ifmp_purge(iflib_txq_t txq);
+static void _iflib_pre_assert(if_softc_ctx_t scctx);
+static void iflib_stop(if_ctx_t ctx);
+static void iflib_if_init_locked(if_ctx_t ctx);
+#ifndef __NO_STRICT_ALIGNMENT
+static struct mbuf * iflib_fixup_rx(struct mbuf *m);
+#endif
-
#ifdef DEV_NETMAP
#include <sys/selinfo.h>
#include <net/netmap.h>
@@ -627,6 +731,8 @@
MODULE_DEPEND(iflib, netmap, 1, 1, 1);
+static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init);
+
/*
* device-specific sysctl variables:
*
@@ -662,6 +768,7 @@
{
struct ifnet *ifp = na->ifp;
if_ctx_t ctx = ifp->if_softc;
+ int status;
CTX_LOCK(ctx);
IFDI_INTR_DISABLE(ctx);
@@ -670,7 +777,7 @@
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
if (!CTX_IS_VF(ctx))
- IFDI_CRCSTRIP_SET(ctx, onoff);
+ IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
@@ -678,12 +785,90 @@
} else {
nm_clear_native_flags(na);
}
- IFDI_INIT(ctx);
- IFDI_CRCSTRIP_SET(ctx, onoff); // XXX why twice ?
+ iflib_stop(ctx);
+ iflib_init_locked(ctx);
+ IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ?
+ status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1;
+ if (status)
+ nm_clear_native_flags(na);
CTX_UNLOCK(ctx);
- return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
+ return (status);
}
+static int
+netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init)
+{
+ struct netmap_adapter *na = kring->na;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int head = kring->rhead;
+ struct netmap_ring *ring = kring->ring;
+ bus_dmamap_t *map;
+ struct if_rxd_update iru;
+ if_ctx_t ctx = rxq->ifr_ctx;
+ iflib_fl_t fl = &rxq->ifr_fl[0];
+ uint32_t refill_pidx, nic_i;
+
+ if (nm_i == head && __predict_true(!init))
+ return 0;
+ iru_init(&iru, rxq, 0 /* flid */);
+ map = fl->ifl_sds.ifsd_map;
+ refill_pidx = netmap_idx_k2n(kring, nm_i);
+ /*
+ * IMPORTANT: we must leave one free slot in the ring,
+ * so move head back by one unit
+ */
+ head = nm_prev(head, lim);
+ while (nm_i != head) {
+ for (int tmp_pidx = 0; tmp_pidx < IFLIB_MAX_RX_REFRESH && nm_i != head; tmp_pidx++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
+ void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]);
+ uint32_t nic_i_dma = refill_pidx;
+ nic_i = netmap_idx_k2n(kring, nm_i);
+
+ MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH);
+
+ if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
+ return netmap_ring_reinit(kring);
+
+ fl->ifl_vm_addrs[tmp_pidx] = addr;
+ if (__predict_false(init) && map) {
+ netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr);
+ } else if (map && (slot->flags & NS_BUF_CHANGED)) {
+ /* buffer has changed, reload map */
+ netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr);
+ }
+ slot->flags &= ~NS_BUF_CHANGED;
+
+ nm_i = nm_next(nm_i, lim);
+ fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim);
+ if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1)
+ continue;
+
+ iru.iru_pidx = refill_pidx;
+ iru.iru_count = tmp_pidx+1;
+ ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
+
+ refill_pidx = nic_i;
+ if (map == NULL)
+ continue;
+
+ for (int n = 0; n < iru.iru_count; n++) {
+ bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma],
+ BUS_DMASYNC_PREREAD);
+ /* XXX - change this to not use the netmap func*/
+ nic_i_dma = nm_next(nic_i_dma, lim);
+ }
+ }
+ }
+ kring->nr_hwcur = head;
+
+ if (map)
+ bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
+ return (0);
+}
+
/*
* Reconcile kernel and user view of the transmit ring.
*
@@ -720,14 +905,11 @@
if_ctx_t ctx = ifp->if_softc;
iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id];
- pi.ipi_segs = txq->ift_segs;
- pi.ipi_qsidx = kring->ring_id;
- pi.ipi_ndescs = 0;
+ if (txq->ift_sds.ifsd_map)
+ bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
- bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
- BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
-
/*
* First part: process new packets to send.
* nm_i is the current index in the netmap ring,
@@ -750,13 +932,17 @@
* to prefetch the next slot and txr entry.
*/
- nm_i = kring->nr_hwcur;
+ nm_i = netmap_idx_n2k(kring, kring->nr_hwcur);
+ pkt_info_zero(&pi);
+ pi.ipi_segs = txq->ift_segs;
+ pi.ipi_qsidx = kring->ring_id;
if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
__builtin_prefetch(&ring->slot[nm_i]);
__builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]);
- __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]);
+ if (txq->ift_sds.ifsd_map)
+ __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
@@ -768,6 +954,11 @@
IPI_TX_INTR : 0;
/* device-specific */
+ pi.ipi_len = len;
+ pi.ipi_segs[0].ds_addr = paddr;
+ pi.ipi_segs[0].ds_len = len;
+ pi.ipi_nsegs = 1;
+ pi.ipi_ndescs = 0;
pi.ipi_pidx = nic_i;
pi.ipi_flags = flags;
@@ -777,27 +968,28 @@
/* prefetch for next round */
__builtin_prefetch(&ring->slot[nm_i + 1]);
__builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]);
- __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]);
+ if (txq->ift_sds.ifsd_map) {
+ __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]);
- NM_CHECK_ADDR_LEN(na, addr, len);
+ NM_CHECK_ADDR_LEN(na, addr, len);
- if (slot->flags & NS_BUF_CHANGED) {
- /* buffer has changed, reload map */
- netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr);
+ if (slot->flags & NS_BUF_CHANGED) {
+ /* buffer has changed, reload map */
+ netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr);
+ }
+ /* make sure changes to the buffer are synced */
+ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i],
+ BUS_DMASYNC_PREWRITE);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
-
- /* make sure changes to the buffer are synced */
- bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i],
- BUS_DMASYNC_PREWRITE);
-
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
/* synchronize the NIC ring */
- bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
+ if (txq->ift_sds.ifsd_map)
+ bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* (re)start the tx unit up to slot nic_i (excluded) */
@@ -832,30 +1024,29 @@
iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
- struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
- u_int nm_i; /* index into the netmap ring */
- u_int nic_i; /* index into the NIC ring */
+ uint32_t nm_i; /* index into the netmap ring */
+ uint32_t nic_i; /* index into the NIC ring */
u_int i, n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = kring->rhead;
+ u_int const head = netmap_idx_n2k(kring, kring->rhead);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
struct if_rxd_info ri;
- /* device-specific */
+
+ struct ifnet *ifp = na->ifp;
if_ctx_t ctx = ifp->if_softc;
iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id];
iflib_fl_t fl = rxq->ifr_fl;
if (head > lim)
return netmap_ring_reinit(kring);
- bzero(&ri, sizeof(ri));
- ri.iri_qsidx = kring->ring_id;
- ri.iri_ifp = ctx->ifc_ifp;
/* XXX check sync modes */
- for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++)
+ for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) {
+ if (fl->ifl_sds.ifsd_map == NULL)
+ continue;
bus_dmamap_sync(rxq->ifr_fl[i].ifl_desc_tag, fl->ifl_ifdi->idi_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
-
+ }
/*
* First part: import newly received packets.
*
@@ -876,19 +1067,24 @@
int error, avail;
uint16_t slot_flags = kring->nkr_slot_flags;
- for (fl = rxq->ifr_fl, i = 0; i < rxq->ifr_nfl; i++, fl++) {
+ for (i = 0; i < rxq->ifr_nfl; i++) {
+ fl = &rxq->ifr_fl[i];
nic_i = fl->ifl_cidx;
nm_i = netmap_idx_n2k(kring, nic_i);
- avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i, INT_MAX);
+ avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX);
for (n = 0; avail > 0; n++, avail--) {
+ rxd_info_zero(&ri);
+ ri.iri_frags = rxq->ifr_frags;
+ ri.iri_qsidx = kring->ring_id;
+ ri.iri_ifp = ctx->ifc_ifp;
+ ri.iri_cidx = nic_i;
+
error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
- if (error)
- ring->slot[nm_i].len = 0;
- else
- ring->slot[nm_i].len = ri.iri_len - crclen;
+ ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen;
ring->slot[nm_i].flags = slot_flags;
- bus_dmamap_sync(fl->ifl_ifdi->idi_tag,
- fl->ifl_sds[nic_i].ifsd_map, BUS_DMASYNC_POSTREAD);
+ if (fl->ifl_sds.ifsd_map)
+ bus_dmamap_sync(fl->ifl_ifdi->idi_tag,
+ fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
@@ -899,7 +1095,7 @@
iflib_rx_miss_bufs += n;
}
fl->ifl_cidx = nic_i;
- kring->nr_hwtail = nm_i;
+ kring->nr_hwtail = netmap_idx_k2n(kring, nm_i);
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
@@ -913,51 +1109,27 @@
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*/
/* XXX not sure how this will work with multiple free lists */
- nm_i = kring->nr_hwcur;
- if (nm_i != head) {
- nic_i = netmap_idx_k2n(kring, nm_i);
- for (n = 0; nm_i != head; n++) {
- struct netmap_slot *slot = &ring->slot[nm_i];
- uint64_t paddr;
- caddr_t vaddr;
- void *addr = PNMB(na, slot, &paddr);
+ nm_i = netmap_idx_n2k(kring, kring->nr_hwcur);
- if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
- goto ring_reset;
+ return (netmap_fl_refill(rxq, kring, nm_i, false));
+}
- vaddr = addr;
- if (slot->flags & NS_BUF_CHANGED) {
- /* buffer has changed, reload map */
- netmap_reload_map(na, fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map, addr);
- slot->flags &= ~NS_BUF_CHANGED;
- }
- /*
- * XXX we should be batching this operation - TODO
- */
- ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1, fl->ifl_buf_size);
- bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map,
- BUS_DMASYNC_PREREAD);
- nm_i = nm_next(nm_i, lim);
- nic_i = nm_next(nic_i, lim);
- }
- kring->nr_hwcur = head;
+static void
+iflib_netmap_intr(struct netmap_adapter *na, int onoff)
+{
+ struct ifnet *ifp = na->ifp;
+ if_ctx_t ctx = ifp->if_softc;
- bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- /*
- * IMPORTANT: we must leave one free slot in the ring,
- * so move nic_i back by one unit
- */
- nic_i = nm_prev(nic_i, lim);
- ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
+ CTX_LOCK(ctx);
+ if (onoff) {
+ IFDI_INTR_ENABLE(ctx);
+ } else {
+ IFDI_INTR_DISABLE(ctx);
}
-
- return 0;
-
-ring_reset:
- return netmap_ring_reinit(kring);
+ CTX_UNLOCK(ctx);
}
+
static int
iflib_netmap_attach(if_ctx_t ctx)
{
@@ -976,6 +1148,7 @@
na.nm_txsync = iflib_netmap_txsync;
na.nm_rxsync = iflib_netmap_rxsync;
na.nm_register = iflib_netmap_register;
+ na.nm_intr = iflib_netmap_intr;
na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets;
na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets;
return (netmap_attach(&na));
@@ -990,6 +1163,8 @@
slot = netmap_reset(na, NR_TX, txq->ift_id, 0);
if (slot == NULL)
return;
+ if (txq->ift_sds.ifsd_map == NULL)
+ return;
for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) {
@@ -1004,37 +1179,20 @@
netmap_load_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si));
}
}
+
static void
iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
{
struct netmap_adapter *na = NA(ctx->ifc_ifp);
+ struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id];
struct netmap_slot *slot;
- iflib_rxsd_t sd;
- int nrxd;
+ uint32_t nm_i;
slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0);
if (slot == NULL)
return;
- sd = rxq->ifr_fl[0].ifl_sds;
- nrxd = ctx->ifc_softc_ctx.isc_nrxd[0];
- for (int i = 0; i < nrxd; i++, sd++) {
- int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i);
- uint64_t paddr;
- void *addr;
- caddr_t vaddr;
-
- vaddr = addr = PNMB(na, slot + sj, &paddr);
- netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, sd->ifsd_map, addr);
- /* Update descriptor and the cached value */
- ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1, rxq->ifr_fl[0].ifl_buf_size);
- }
- /* preserve queue */
- if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) {
- struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id];
- int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
- ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, t);
- } else
- ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, nrxd-1);
+ nm_i = netmap_idx_n2k(kring, 0);
+ netmap_fl_refill(rxq, kring, nm_i, true);
}
#define iflib_netmap_detach(ifp) netmap_detach(ifp)
@@ -1046,6 +1204,7 @@
#define iflib_netmap_attach(ctx) (0)
#define netmap_rx_irq(ifp, qid, budget) (0)
+#define netmap_tx_irq(ifp, qid) do {} while (0)
#endif
@@ -1055,11 +1214,34 @@
{
__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
}
+static __inline void
+prefetch2cachelines(void *x)
+{
+ __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
+#if (CACHE_LINE_SIZE < 128)
+ __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long)))));
+#endif
+}
#else
#define prefetch(x)
+#define prefetch2cachelines(x)
#endif
static void
+iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid)
+{
+ iflib_fl_t fl;
+
+ fl = &rxq->ifr_fl[flid];
+ iru->iru_paddrs = fl->ifl_bus_addrs;
+ iru->iru_vaddrs = &fl->ifl_vm_addrs[0];
+ iru->iru_idxs = fl->ifl_rxd_idxs;
+ iru->iru_qsidx = rxq->ifr_id;
+ iru->iru_buf_size = fl->ifl_buf_size;
+ iru->iru_flidx = fl->ifl_id;
+}
+
+static void
_iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
{
if (err)
@@ -1177,6 +1359,8 @@
{
iflib_filter_info_t info = arg;
struct grouptask *gtask = info->ifi_task;
+ if (!iflib_started)
+ return (FILTER_HANDLED);
DBG_COUNTER_INC(fast_intrs);
if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
@@ -1187,19 +1371,77 @@
}
static int
+iflib_fast_intr_rxtx(void *arg)
+{
+ iflib_filter_info_t info = arg;
+ struct grouptask *gtask = info->ifi_task;
+ iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx;
+ if_ctx_t ctx;
+ int i, cidx;
+
+ if (!iflib_started)
+ return (FILTER_HANDLED);
+
+ DBG_COUNTER_INC(fast_intrs);
+ if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
+ return (FILTER_HANDLED);
+
+ for (i = 0; i < rxq->ifr_ntxqirq; i++) {
+ qidx_t txqid = rxq->ifr_txqid[i];
+
+ ctx = rxq->ifr_ctx;
+
+ if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) {
+ IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid);
+ continue;
+ }
+ GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task);
+ }
+ if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ)
+ cidx = rxq->ifr_cq_cidx;
+ else
+ cidx = rxq->ifr_fl[0].ifl_cidx;
+ if (iflib_rxd_avail(ctx, rxq, cidx, 1))
+ GROUPTASK_ENQUEUE(gtask);
+ else
+ IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
+ return (FILTER_HANDLED);
+}
+
+
+static int
+iflib_fast_intr_ctx(void *arg)
+{
+ iflib_filter_info_t info = arg;
+ struct grouptask *gtask = info->ifi_task;
+
+ if (!iflib_started)
+ return (FILTER_HANDLED);
+
+ DBG_COUNTER_INC(fast_intrs);
+ if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
+ return (FILTER_HANDLED);
+
+ GROUPTASK_ENQUEUE(gtask);
+ return (FILTER_HANDLED);
+}
+
+static int
_iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
driver_filter_t filter, driver_intr_t handler, void *arg,
char *name)
{
- int rc;
+ int rc, flags;
struct resource *res;
- void *tag;
+ void *tag = NULL;
device_t dev = ctx->ifc_dev;
+ flags = RF_ACTIVE;
+ if (ctx->ifc_flags & IFC_LEGACY)
+ flags |= RF_SHAREABLE;
MPASS(rid < 512);
irq->ii_rid = rid;
- res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid,
- RF_SHAREABLE | RF_ACTIVE);
+ res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, flags);
if (res == NULL) {
device_printf(dev,
"failed to allocate IRQ for rid %d, name %s.\n", rid, name);
@@ -1265,11 +1507,6 @@
(uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize);
goto fail;
}
-#ifdef IFLIB_DIAGNOSTICS
- device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n",
- sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize);
-
-#endif
if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
@@ -1286,11 +1523,6 @@
goto fail;
}
-#ifdef IFLIB_DIAGNOSTICS
- device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n",
- scctx->isc_tx_tso_size_max, ntsosegments,
- scctx->isc_tx_tso_segsize_max);
-#endif
if (!(txq->ift_sds.ifsd_flags =
(uint8_t *) malloc(sizeof(uint8_t) *
scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
@@ -1307,7 +1539,7 @@
}
/* Create the descriptor buffer dma maps */
-#if defined(ACPI_DMAR) || (!(defined(__i386__) && !defined(__amd64__)))
+#if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__)))
if ((ctx->ifc_flags & IFC_DMAR) == 0)
return (0);
@@ -1406,11 +1638,14 @@
iflib_dma_info_t di;
int i;
- /* Set number of descriptors available */
+ /* Set number of descriptors available */
txq->ift_qstatus = IFLIB_QUEUE_IDLE;
+ /* XXX make configurable */
+ txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ;
/* Reset indices */
- txq->ift_cidx_processed = txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0;
+ txq->ift_cidx_processed = 0;
+ txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0;
txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset];
for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++)
@@ -1439,7 +1674,6 @@
if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
device_t dev = ctx->ifc_dev;
iflib_fl_t fl;
- iflib_rxsd_t rxsd;
int err;
MPASS(scctx->isc_nrxd[0] > 0);
@@ -1447,13 +1681,6 @@
fl = rxq->ifr_fl;
for (int i = 0; i < rxq->ifr_nfl; i++, fl++) {
- fl->ifl_sds = malloc(sizeof(struct iflib_sw_rx_desc) *
- scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB,
- M_WAITOK | M_ZERO);
- if (fl->ifl_sds == NULL) {
- device_printf(dev, "Unable to allocate rx sw desc memory\n");
- return (ENOMEM);
- }
fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */
err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1, 0, /* alignment, bounds */
@@ -1472,16 +1699,48 @@
__func__, err);
goto fail;
}
+ if (!(fl->ifl_sds.ifsd_flags =
+ (uint8_t *) malloc(sizeof(uint8_t) *
+ scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
+ device_printf(dev, "Unable to allocate tx_buffer memory\n");
+ err = ENOMEM;
+ goto fail;
+ }
+ if (!(fl->ifl_sds.ifsd_m =
+ (struct mbuf **) malloc(sizeof(struct mbuf *) *
+ scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
+ device_printf(dev, "Unable to allocate tx_buffer memory\n");
+ err = ENOMEM;
+ goto fail;
+ }
+ if (!(fl->ifl_sds.ifsd_cl =
+ (caddr_t *) malloc(sizeof(caddr_t) *
+ scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
+ device_printf(dev, "Unable to allocate tx_buffer memory\n");
+ err = ENOMEM;
+ goto fail;
+ }
- rxsd = fl->ifl_sds;
- for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++, rxsd++) {
- err = bus_dmamap_create(fl->ifl_desc_tag, 0, &rxsd->ifsd_map);
- if (err) {
- device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
- __func__, err);
+ /* Create the descriptor buffer dma maps */
+#if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__)))
+ if ((ctx->ifc_flags & IFC_DMAR) == 0)
+ continue;
+
+ if (!(fl->ifl_sds.ifsd_map =
+ (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
+ device_printf(dev, "Unable to allocate tx_buffer map memory\n");
+ err = ENOMEM;
+ goto fail;
+ }
+
+ for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) {
+ err = bus_dmamap_create(fl->ifl_desc_tag, 0, &fl->ifl_sds.ifsd_map[i]);
+ if (err != 0) {
+ device_printf(dev, "Unable to create RX buffer DMA map\n");
goto fail;
}
}
+#endif
}
return (0);
@@ -1531,20 +1790,32 @@
_iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
{
struct mbuf *m;
- int pidx = fl->ifl_pidx;
- iflib_rxsd_t rxsd = &fl->ifl_sds[pidx];
- caddr_t cl;
+ int idx, frag_idx = fl->ifl_fragidx;
+ int pidx = fl->ifl_pidx;
+ caddr_t cl, *sd_cl;
+ struct mbuf **sd_m;
+ uint8_t *sd_flags;
+ struct if_rxd_update iru;
+ bus_dmamap_t *sd_map;
int n, i = 0;
uint64_t bus_addr;
int err;
+ qidx_t credits;
+ sd_m = fl->ifl_sds.ifsd_m;
+ sd_map = fl->ifl_sds.ifsd_map;
+ sd_cl = fl->ifl_sds.ifsd_cl;
+ sd_flags = fl->ifl_sds.ifsd_flags;
+ idx = pidx;
+ credits = fl->ifl_credits;
+
n = count;
MPASS(n > 0);
- MPASS(fl->ifl_credits + n <= fl->ifl_size);
+ MPASS(credits + n <= fl->ifl_size);
if (pidx < fl->ifl_cidx)
MPASS(pidx + n <= fl->ifl_cidx);
- if (pidx == fl->ifl_cidx && (fl->ifl_credits < fl->ifl_size))
+ if (pidx == fl->ifl_cidx && (credits < fl->ifl_size))
MPASS(fl->ifl_gen == 0);
if (pidx > fl->ifl_cidx)
MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx);
@@ -1552,7 +1823,7 @@
DBG_COUNTER_INC(fl_refills);
if (n > 8)
DBG_COUNTER_INC(fl_refills_large);
-
+ iru_init(&iru, fl->ifl_rxq, fl->ifl_id);
while (n--) {
/*
* We allocate an uninitialized mbuf + cluster, mbuf is
@@ -1560,8 +1831,11 @@
*
* If the cluster is still set then we know a minimum sized packet was received
*/
- if ((cl = rxsd->ifsd_cl) == NULL) {
- if ((cl = rxsd->ifsd_cl = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL)
+ bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx);
+ if ((frag_idx < 0) || (frag_idx >= fl->ifl_size))
+ bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx);
+ if ((cl = sd_cl[frag_idx]) == NULL) {
+ if ((cl = sd_cl[frag_idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL)
break;
#if MEMORY_LOGGING
fl->ifl_cl_enqueued++;
@@ -1575,19 +1849,6 @@
#endif
DBG_COUNTER_INC(rx_allocs);
-#ifdef notyet
- if ((rxsd->ifsd_flags & RX_SW_DESC_MAP_CREATED) == 0) {
- int err;
-
- if ((err = bus_dmamap_create(fl->ifl_ifdi->idi_tag, 0, &rxsd->ifsd_map))) {
- log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
- uma_zfree(fl->ifl_zone, cl);
- n = 0;
- goto done;
- }
- rxsd->ifsd_flags |= RX_SW_DESC_MAP_CREATED;
- }
-#endif
#if defined(__i386__) || defined(__amd64__)
if (!IS_DMAR(ctx)) {
bus_addr = pmap_kextract((vm_offset_t)cl);
@@ -1599,8 +1860,12 @@
cb_arg.error = 0;
q = fl->ifl_rxq;
- err = bus_dmamap_load(fl->ifl_desc_tag, rxsd->ifsd_map,
+ MPASS(sd_map != NULL);
+ MPASS(sd_map[frag_idx] != NULL);
+ err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[frag_idx],
cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0);
+ bus_dmamap_sync(fl->ifl_desc_tag, sd_map[frag_idx],
+ BUS_DMASYNC_PREREAD);
if (err != 0 || cb_arg.error) {
/*
@@ -1614,36 +1879,52 @@
}
bus_addr = cb_arg.seg.ds_addr;
}
- rxsd->ifsd_flags |= RX_SW_DESC_INUSE;
+ bit_set(fl->ifl_rx_bitmap, frag_idx);
+ sd_flags[frag_idx] |= RX_SW_DESC_INUSE;
- MPASS(rxsd->ifsd_m == NULL);
- rxsd->ifsd_cl = cl;
- rxsd->ifsd_m = m;
+ MPASS(sd_m[frag_idx] == NULL);
+ sd_cl[frag_idx] = cl;
+ sd_m[frag_idx] = m;
+ fl->ifl_rxd_idxs[i] = frag_idx;
fl->ifl_bus_addrs[i] = bus_addr;
fl->ifl_vm_addrs[i] = cl;
- rxsd++;
- fl->ifl_credits++;
+ credits++;
i++;
- MPASS(fl->ifl_credits <= fl->ifl_size);
- if (++fl->ifl_pidx == fl->ifl_size) {
- fl->ifl_pidx = 0;
+ MPASS(credits <= fl->ifl_size);
+ if (++idx == fl->ifl_size) {
fl->ifl_gen = 1;
- rxsd = fl->ifl_sds;
+ idx = 0;
}
if (n == 0 || i == IFLIB_MAX_RX_REFRESH) {
- ctx->isc_rxd_refill(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx,
- fl->ifl_bus_addrs, fl->ifl_vm_addrs, i, fl->ifl_buf_size);
+ iru.iru_pidx = pidx;
+ iru.iru_count = i;
+ ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
i = 0;
- pidx = fl->ifl_pidx;
+ pidx = idx;
+ fl->ifl_pidx = idx;
+ fl->ifl_credits = credits;
}
+
}
done:
+ if (i) {
+ iru.iru_pidx = pidx;
+ iru.iru_count = i;
+ ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
+ fl->ifl_pidx = idx;
+ fl->ifl_credits = credits;
+ }
DBG_COUNTER_INC(rxd_flush);
if (fl->ifl_pidx == 0)
pidx = fl->ifl_size - 1;
else
pidx = fl->ifl_pidx - 1;
+
+ if (sd_map)
+ bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx);
+ fl->ifl_fragidx = frag_idx;
}
static __inline void
@@ -1669,33 +1950,46 @@
uint32_t i;
for (i = 0; i < fl->ifl_size; i++) {
- iflib_rxsd_t d = &fl->ifl_sds[i];
+ struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i];
+ uint8_t *sd_flags = &fl->ifl_sds.ifsd_flags[i];
+ caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i];
- if (d->ifsd_flags & RX_SW_DESC_INUSE) {
- bus_dmamap_unload(fl->ifl_desc_tag, d->ifsd_map);
- bus_dmamap_destroy(fl->ifl_desc_tag, d->ifsd_map);
- if (d->ifsd_m != NULL) {
- m_init(d->ifsd_m, M_NOWAIT, MT_DATA, 0);
- uma_zfree(zone_mbuf, d->ifsd_m);
+ if (*sd_flags & RX_SW_DESC_INUSE) {
+ if (fl->ifl_sds.ifsd_map != NULL) {
+ bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i];
+ bus_dmamap_unload(fl->ifl_desc_tag, sd_map);
+ if (fl->ifl_rxq->ifr_ctx->ifc_in_detach)
+ bus_dmamap_destroy(fl->ifl_desc_tag, sd_map);
}
- if (d->ifsd_cl != NULL)
- uma_zfree(fl->ifl_zone, d->ifsd_cl);
- d->ifsd_flags = 0;
+ if (*sd_m != NULL) {
+ m_init(*sd_m, M_NOWAIT, MT_DATA, 0);
+ uma_zfree(zone_mbuf, *sd_m);
+ }
+ if (*sd_cl != NULL)
+ uma_zfree(fl->ifl_zone, *sd_cl);
+ *sd_flags = 0;
} else {
- MPASS(d->ifsd_cl == NULL);
- MPASS(d->ifsd_m == NULL);
+ MPASS(*sd_cl == NULL);
+ MPASS(*sd_m == NULL);
}
#if MEMORY_LOGGING
fl->ifl_m_dequeued++;
fl->ifl_cl_dequeued++;
#endif
- d->ifsd_cl = NULL;
- d->ifsd_m = NULL;
+ *sd_cl = NULL;
+ *sd_m = NULL;
}
+#ifdef INVARIANTS
+ for (i = 0; i < fl->ifl_size; i++) {
+ MPASS(fl->ifl_sds.ifsd_flags[i] == 0);
+ MPASS(fl->ifl_sds.ifsd_cl[i] == NULL);
+ MPASS(fl->ifl_sds.ifsd_m[i] == NULL);
+ }
+#endif
/*
* Reset free list values
*/
- fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = 0;;
+ fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0;
bzero(idi->idi_vaddr, idi->idi_size);
}
@@ -1711,6 +2005,7 @@
if_ctx_t ctx = rxq->ifr_ctx;
if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
+ bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1);
/*
** Free current RX buffer structs and their mbufs
*/
@@ -1723,12 +2018,17 @@
*/
if (sctx->isc_max_frame_size <= 2048)
fl->ifl_buf_size = MCLBYTES;
+#ifndef CONTIGMALLOC_WORKS
+ else
+ fl->ifl_buf_size = MJUMPAGESIZE;
+#else
else if (sctx->isc_max_frame_size <= 4096)
fl->ifl_buf_size = MJUMPAGESIZE;
else if (sctx->isc_max_frame_size <= 9216)
fl->ifl_buf_size = MJUM9BYTES;
else
fl->ifl_buf_size = MJUM16BYTES;
+#endif
if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size)
ctx->ifc_max_fl_buf_size = fl->ifl_buf_size;
fl->ifl_cltype = m_gettype(fl->ifl_buf_size);
@@ -1770,10 +2070,14 @@
bus_dma_tag_destroy(fl->ifl_desc_tag);
fl->ifl_desc_tag = NULL;
}
+ free(fl->ifl_sds.ifsd_m, M_IFLIB);
+ free(fl->ifl_sds.ifsd_cl, M_IFLIB);
+ /* XXX destroy maps first */
+ free(fl->ifl_sds.ifsd_map, M_IFLIB);
+ fl->ifl_sds.ifsd_m = NULL;
+ fl->ifl_sds.ifsd_cl = NULL;
+ fl->ifl_sds.ifsd_map = NULL;
}
- if (rxq->ifr_fl->ifl_sds != NULL)
- free(rxq->ifr_fl->ifl_sds, M_IFLIB);
-
free(rxq->ifr_fl, M_IFLIB);
rxq->ifr_fl = NULL;
rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0;
@@ -1789,7 +2093,7 @@
{
iflib_txq_t txq = arg;
if_ctx_t ctx = txq->ift_ctx;
- if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
+ if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
return;
@@ -1800,28 +2104,32 @@
*/
IFDI_TIMER(ctx, txq->ift_id);
if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) &&
- (ctx->ifc_pause_frames == 0))
+ ((txq->ift_cleaned_prev == txq->ift_cleaned) ||
+ (sctx->isc_pause_frames == 0)))
goto hung;
- if (TXQ_AVAIL(txq) <= 2*scctx->isc_tx_nsegments ||
- ifmp_ring_is_stalled(txq->ift_br[0]))
+ if (ifmp_ring_is_stalled(txq->ift_br))
+ txq->ift_qstatus = IFLIB_QUEUE_HUNG;
+ txq->ift_cleaned_prev = txq->ift_cleaned;
+ /* handle any laggards */
+ if (txq->ift_db_pending)
GROUPTASK_ENQUEUE(&txq->ift_task);
- ctx->ifc_pause_frames = 0;
+ sctx->isc_pause_frames = 0;
if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)
callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
return;
hung:
CTX_LOCK(ctx);
- if_setdrvflagbits(ctx->ifc_ifp, 0, IFF_DRV_RUNNING);
+ if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n",
txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx);
IFDI_WATCHDOG_RESET(ctx);
ctx->ifc_watchdog_events++;
- ctx->ifc_pause_frames = 0;
- iflib_init_locked(ctx);
+ ctx->ifc_flags |= IFC_DO_RESET;
+ iflib_admin_intr_deferred(ctx);
CTX_UNLOCK(ctx);
}
@@ -1829,22 +2137,25 @@
iflib_init_locked(if_ctx_t ctx)
{
if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
+ if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
if_t ifp = ctx->ifc_ifp;
iflib_fl_t fl;
iflib_txq_t txq;
iflib_rxq_t rxq;
- int i, j;
+ int i, j, tx_ip_csum_flags, tx_ip6_csum_flags;
if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
IFDI_INTR_DISABLE(ctx);
+ tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP);
+ tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP);
/* Set hardware offload abilities */
if_clearhwassist(ifp);
if (if_getcapenable(ifp) & IFCAP_TXCSUM)
- if_sethwassistbits(ifp, CSUM_IP | CSUM_TCP | CSUM_UDP, 0);
+ if_sethwassistbits(ifp, tx_ip_csum_flags, 0);
if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
- if_sethwassistbits(ifp, (CSUM_TCP_IPV6 | CSUM_UDP_IPV6), 0);
+ if_sethwassistbits(ifp, tx_ip6_csum_flags, 0);
if (if_getcapenable(ifp) & IFCAP_TSO4)
if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
if (if_getcapenable(ifp) & IFCAP_TSO6)
@@ -1853,19 +2164,21 @@
for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) {
CALLOUT_LOCK(txq);
callout_stop(&txq->ift_timer);
- callout_stop(&txq->ift_db_check);
CALLOUT_UNLOCK(txq);
iflib_netmap_txq_init(ctx, txq);
}
- for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
- iflib_netmap_rxq_init(ctx, rxq);
- }
#ifdef INVARIANTS
i = if_getdrvflags(ifp);
#endif
IFDI_INIT(ctx);
MPASS(if_getdrvflags(ifp) == i);
for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
+ /* XXX this should really be done on a per-queue basis */
+ if (if_getcapenable(ifp) & IFCAP_NETMAP) {
+ MPASS(rxq->ifr_id == i);
+ iflib_netmap_rxq_init(ctx, rxq);
+ continue;
+ }
for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
if (iflib_fl_setup(fl)) {
device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n");
@@ -1920,102 +2233,159 @@
if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
IFDI_INTR_DISABLE(ctx);
- msleep(ctx, &ctx->ifc_mtx, PUSER, "iflib_init", hz);
+ DELAY(1000);
+ IFDI_STOP(ctx);
+ DELAY(1000);
+ iflib_debug_reset();
/* Wait for current tx queue users to exit to disarm watchdog timer. */
for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) {
/* make sure all transmitters have completed before proceeding XXX */
+ CALLOUT_LOCK(txq);
+ callout_stop(&txq->ift_timer);
+ CALLOUT_UNLOCK(txq);
+
/* clean any enqueued buffers */
- iflib_txq_check_drain(txq, 0);
+ iflib_ifmp_purge(txq);
/* Free any existing tx buffers. */
for (j = 0; j < txq->ift_size; j++) {
iflib_txsd_free(ctx, txq, j);
}
txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0;
- txq->ift_in_use = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
+ txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0;
txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0;
txq->ift_pullups = 0;
- ifmp_ring_reset_stats(txq->ift_br[0]);
+ ifmp_ring_reset_stats(txq->ift_br);
for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++)
bzero((void *)di->idi_vaddr, di->idi_size);
}
for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) {
/* make sure all transmitters have completed before proceeding XXX */
- for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwrxqs; j++, di++)
+ for (j = 0, di = rxq->ifr_ifdi; j < rxq->ifr_nfl; j++, di++)
bzero((void *)di->idi_vaddr, di->idi_size);
/* also resets the free lists pidx/cidx */
for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
iflib_fl_bufs_free(fl);
}
- IFDI_STOP(ctx);
}
-static iflib_rxsd_t
-rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int *cltype, int unload)
+static inline caddr_t
+calc_next_rxd(iflib_fl_t fl, int cidx)
{
+ qidx_t size;
+ int nrxd;
+ caddr_t start, end, cur, next;
+
+ nrxd = fl->ifl_size;
+ size = fl->ifl_rxd_size;
+ start = fl->ifl_ifdi->idi_vaddr;
+
+ if (__predict_false(size == 0))
+ return (start);
+ cur = start + size*cidx;
+ end = start + size*nrxd;
+ next = CACHE_PTR_NEXT(cur);
+ return (next < end ? next : start);
+}
+
+static inline void
+prefetch_pkts(iflib_fl_t fl, int cidx)
+{
+ int nextptr;
+ int nrxd = fl->ifl_size;
+ caddr_t next_rxd;
+
+
+ nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1);
+ prefetch(&fl->ifl_sds.ifsd_m[nextptr]);
+ prefetch(&fl->ifl_sds.ifsd_cl[nextptr]);
+ next_rxd = calc_next_rxd(fl, cidx);
+ prefetch(next_rxd);
+ prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]);
+}
+
+static void
+rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd)
+{
int flid, cidx;
- iflib_rxsd_t sd;
+ bus_dmamap_t map;
iflib_fl_t fl;
iflib_dma_info_t di;
+ int next;
+ map = NULL;
flid = irf->irf_flid;
cidx = irf->irf_idx;
fl = &rxq->ifr_fl[flid];
+ sd->ifsd_fl = fl;
+ sd->ifsd_cidx = cidx;
+ sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx];
+ sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx];
fl->ifl_credits--;
#if MEMORY_LOGGING
fl->ifl_m_dequeued++;
- if (cltype)
- fl->ifl_cl_dequeued++;
#endif
- sd = &fl->ifl_sds[cidx];
- di = fl->ifl_ifdi;
- bus_dmamap_sync(di->idi_tag, di->idi_map,
- BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+ if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH)
+ prefetch_pkts(fl, cidx);
+ if (fl->ifl_sds.ifsd_map != NULL) {
+ next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1);
+ prefetch(&fl->ifl_sds.ifsd_map[next]);
+ map = fl->ifl_sds.ifsd_map[cidx];
+ di = fl->ifl_ifdi;
+ next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1);
+ prefetch(&fl->ifl_sds.ifsd_flags[next]);
+ bus_dmamap_sync(di->idi_tag, di->idi_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/* not valid assert if bxe really does SGE from non-contiguous elements */
- MPASS(fl->ifl_cidx == cidx);
- if (unload)
- bus_dmamap_unload(fl->ifl_desc_tag, sd->ifsd_map);
-
- if (__predict_false(++fl->ifl_cidx == fl->ifl_size)) {
- fl->ifl_cidx = 0;
- fl->ifl_gen = 0;
+ MPASS(fl->ifl_cidx == cidx);
+ if (unload)
+ bus_dmamap_unload(fl->ifl_desc_tag, map);
}
- /* YES ick */
- if (cltype)
- *cltype = fl->ifl_cltype;
- return (sd);
+ fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1);
+ if (__predict_false(fl->ifl_cidx == 0))
+ fl->ifl_gen = 0;
+ if (map != NULL)
+ bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ bit_clear(fl->ifl_rx_bitmap, cidx);
}
static struct mbuf *
-assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri)
+assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd)
{
- int i, padlen , flags, cltype;
+ int i, padlen , flags;
struct mbuf *m, *mh, *mt;
- iflib_rxsd_t sd;
caddr_t cl;
i = 0;
mh = NULL;
do {
- sd = rxd_frag_to_sd(rxq, &ri->iri_frags[i], &cltype, TRUE);
+ rxd_frag_to_sd(rxq, &ri->iri_frags[i], TRUE, sd);
- MPASS(sd->ifsd_cl != NULL);
- MPASS(sd->ifsd_m != NULL);
+ MPASS(*sd->ifsd_cl != NULL);
+ MPASS(*sd->ifsd_m != NULL);
/* Don't include zero-length frags */
if (ri->iri_frags[i].irf_len == 0) {
/* XXX we can save the cluster here, but not the mbuf */
- m_init(sd->ifsd_m, M_NOWAIT, MT_DATA, 0);
- m_free(sd->ifsd_m);
- sd->ifsd_m = NULL;
+ m_init(*sd->ifsd_m, M_NOWAIT, MT_DATA, 0);
+ m_free(*sd->ifsd_m);
+ *sd->ifsd_m = NULL;
continue;
}
-
- m = sd->ifsd_m;
+ m = *sd->ifsd_m;
+ *sd->ifsd_m = NULL;
if (mh == NULL) {
flags = M_PKTHDR|M_EXT;
mh = mt = m;
@@ -2027,13 +2397,12 @@
/* assuming padding is only on the first fragment */
padlen = 0;
}
- sd->ifsd_m = NULL;
- cl = sd->ifsd_cl;
- sd->ifsd_cl = NULL;
+ cl = *sd->ifsd_cl;
+ *sd->ifsd_cl = NULL;
/* Can these two be made one ? */
m_init(m, M_NOWAIT, MT_DATA, flags);
- m_cljset(m, cl, cltype);
+ m_cljset(m, cl, sd->ifsd_fl->ifl_cltype);
/*
* These must follow m_init and m_cljset
*/
@@ -2051,20 +2420,24 @@
static struct mbuf *
iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri)
{
+ struct if_rxsd sd;
struct mbuf *m;
- iflib_rxsd_t sd;
/* should I merge this back in now that the two paths are basically duplicated? */
if (ri->iri_nfrags == 1 &&
ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) {
- sd = rxd_frag_to_sd(rxq, &ri->iri_frags[0], NULL, FALSE);
- m = sd->ifsd_m;
- sd->ifsd_m = NULL;
+ rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd);
+ m = *sd.ifsd_m;
+ *sd.ifsd_m = NULL;
m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR);
- memcpy(m->m_data, sd->ifsd_cl, ri->iri_len);
+#ifndef __NO_STRICT_ALIGNMENT
+ if (!IP_ALIGNED(m))
+ m->m_data += 2;
+#endif
+ memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len);
m->m_len = ri->iri_frags[0].irf_len;
} else {
- m = assemble_segments(rxq, ri);
+ m = assemble_segments(rxq, ri, &sd);
}
m->m_pkthdr.len = ri->iri_len;
m->m_pkthdr.rcvif = ri->iri_ifp;
@@ -2077,29 +2450,76 @@
return (m);
}
+#if defined(INET6) || defined(INET)
+static void
+iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6)
+{
+ CURVNET_SET(lc->ifp->if_vnet);
+#if defined(INET6)
+ *v6 = VNET(ip6_forwarding);
+#endif
+#if defined(INET)
+ *v4 = VNET(ipforwarding);
+#endif
+ CURVNET_RESTORE();
+}
+
+/*
+ * Returns true if it's possible this packet could be LROed.
+ * if it returns false, it is guaranteed that tcp_lro_rx()
+ * would not return zero.
+ */
static bool
-iflib_rxeof(iflib_rxq_t rxq, int budget)
+iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding)
{
+ struct ether_header *eh;
+ uint16_t eh_type;
+
+ eh = mtod(m, struct ether_header *);
+ eh_type = ntohs(eh->ether_type);
+ switch (eh_type) {
+#if defined(INET6)
+ case ETHERTYPE_IPV6:
+ return !v6_forwarding;
+#endif
+#if defined (INET)
+ case ETHERTYPE_IP:
+ return !v4_forwarding;
+#endif
+ }
+
+ return false;
+}
+#else
+static void
+iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused)
+{
+}
+#endif
+
+static bool
+iflib_rxeof(iflib_rxq_t rxq, qidx_t budget)
+{
if_ctx_t ctx = rxq->ifr_ctx;
if_shared_ctx_t sctx = ctx->ifc_sctx;
if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
int avail, i;
- uint16_t *cidxp;
+ qidx_t *cidxp;
struct if_rxd_info ri;
int err, budget_left, rx_bytes, rx_pkts;
iflib_fl_t fl;
struct ifnet *ifp;
int lro_enabled;
+ bool lro_possible = false;
+ bool v4_forwarding, v6_forwarding;
+
/*
* XXX early demux data packets so that if_input processing only handles
* acks in interrupt context
*/
- struct mbuf *m, *mh, *mt;
+ struct mbuf *m, *mh, *mt, *mf;
- if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &budget)) {
- return (FALSE);
- }
-
+ ifp = ctx->ifc_ifp;
mh = mt = NULL;
MPASS(budget > 0);
rx_pkts = rx_bytes = 0;
@@ -2122,18 +2542,19 @@
/*
* Reset client set fields to their default values
*/
- bzero(&ri, sizeof(ri));
+ rxd_info_zero(&ri);
ri.iri_qsidx = rxq->ifr_id;
ri.iri_cidx = *cidxp;
- ri.iri_ifp = ctx->ifc_ifp;
+ ri.iri_ifp = ifp;
ri.iri_frags = rxq->ifr_frags;
err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
- /* in lieu of handling correctly - make sure it isn't being unhandled */
- MPASS(err == 0);
+ if (err)
+ goto err;
if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
*cidxp = ri.iri_cidx;
/* Update our consumer index */
+ /* XXX NB: shurd - check if this is still safe */
while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) {
rxq->ifr_cq_cidx -= scctx->isc_nrxd[0];
rxq->ifr_cq_gen = 0;
@@ -2166,20 +2587,52 @@
for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
__iflib_fl_refill_lt(ctx, fl, budget + 8);
- ifp = ctx->ifc_ifp;
lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO);
+ if (lro_enabled)
+ iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding);
+ mt = mf = NULL;
while (mh != NULL) {
m = mh;
mh = mh->m_nextpkt;
m->m_nextpkt = NULL;
+#ifndef __NO_STRICT_ALIGNMENT
+ if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL)
+ continue;
+#endif
rx_bytes += m->m_pkthdr.len;
rx_pkts++;
#if defined(INET6) || defined(INET)
- if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0)
- continue;
+ if (lro_enabled) {
+ if (!lro_possible) {
+ lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding);
+ if (lro_possible && mf != NULL) {
+ ifp->if_input(ifp, mf);
+ DBG_COUNTER_INC(rx_if_input);
+ mt = mf = NULL;
+ }
+ }
+ if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) ==
+ (CSUM_L4_CALC|CSUM_L4_VALID)) {
+ if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0)
+ continue;
+ }
+ }
#endif
+ if (lro_possible) {
+ ifp->if_input(ifp, m);
+ DBG_COUNTER_INC(rx_if_input);
+ continue;
+ }
+
+ if (mf == NULL)
+ mf = m;
+ if (mt != NULL)
+ mt->m_nextpkt = m;
+ mt = m;
+ }
+ if (mf != NULL) {
+ ifp->if_input(ifp, mf);
DBG_COUNTER_INC(rx_if_input);
- ifp->if_input(ifp, m);
}
if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
@@ -2194,45 +2647,78 @@
if (avail)
return true;
return (iflib_rxd_avail(ctx, rxq, *cidxp, 1));
+err:
+ CTX_LOCK(ctx);
+ ctx->ifc_flags |= IFC_DO_RESET;
+ iflib_admin_intr_deferred(ctx);
+ CTX_UNLOCK(ctx);
+ return (false);
}
+#define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1)
+static inline qidx_t
+txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use)
+{
+ qidx_t notify_count = TXD_NOTIFY_COUNT(txq);
+ qidx_t minthresh = txq->ift_size / 8;
+ if (in_use > 4*minthresh)
+ return (notify_count);
+ if (in_use > 2*minthresh)
+ return (notify_count >> 1);
+ if (in_use > minthresh)
+ return (notify_count >> 3);
+ return (0);
+}
+
+static inline qidx_t
+txq_max_rs_deferred(iflib_txq_t txq)
+{
+ qidx_t notify_count = TXD_NOTIFY_COUNT(txq);
+ qidx_t minthresh = txq->ift_size / 8;
+ if (txq->ift_in_use > 4*minthresh)
+ return (notify_count);
+ if (txq->ift_in_use > 2*minthresh)
+ return (notify_count >> 1);
+ if (txq->ift_in_use > minthresh)
+ return (notify_count >> 2);
+ return (2);
+}
+
#define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags)
#define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG)
-#define TXQ_MAX_DB_DEFERRED(size) (size >> 5)
+
+#define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use))
+#define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq)
#define TXQ_MAX_DB_CONSUMED(size) (size >> 4)
-static __inline void
-iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring)
-{
- uint32_t dbval;
+/* forward compatibility for cxgb */
+#define FIRST_QSET(ctx) 0
+#define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets)
+#define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets)
+#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx))
+#define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments))
- if (ring || txq->ift_db_pending >=
- TXQ_MAX_DB_DEFERRED(txq->ift_size)) {
+/* XXX we should be setting this to something other than zero */
+#define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh)
+#define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max)
- /* the lock will only ever be contended in the !min_latency case */
- if (!TXDB_TRYLOCK(txq))
- return;
+static inline bool
+iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use)
+{
+ qidx_t dbval, max;
+ bool rang;
+
+ rang = false;
+ max = TXQ_MAX_DB_DEFERRED(txq, in_use);
+ if (ring || txq->ift_db_pending >= max) {
dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx;
ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval);
txq->ift_db_pending = txq->ift_npending = 0;
- TXDB_UNLOCK(txq);
+ rang = true;
}
+ return (rang);
}
-static void
-iflib_txd_deferred_db_check(void * arg)
-{
- iflib_txq_t txq = arg;
-
- /* simple non-zero boolean so use bitwise OR */
- if ((txq->ift_db_pending | txq->ift_npending) &&
- txq->ift_db_pending >= txq->ift_db_pending_queued)
- iflib_txd_db_check(txq->ift_ctx, txq, TRUE);
- txq->ift_db_pending_queued = 0;
- if (ifmp_ring_is_stalled(txq->ift_br[0]))
- iflib_txq_check_drain(txq, 4);
-}
-
#ifdef PKT_DEBUG
static void
print_pkt(if_pkt_info_t pi)
@@ -2252,10 +2738,21 @@
static int
iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
{
+ if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
struct ether_vlan_header *eh;
struct mbuf *m, *n;
n = m = *mp;
+ if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) &&
+ M_WRITABLE(m) == 0) {
+ if ((m = m_dup(m, M_NOWAIT)) == NULL) {
+ return (ENOMEM);
+ } else {
+ m_freem(*mp);
+ n = *mp = m;
+ }
+ }
+
/*
* Determine where frame payload starts.
* Jump over vlan headers if already present,
@@ -2319,26 +2816,30 @@
pi->ipi_ipproto = ip->ip_p;
pi->ipi_flags |= IPI_TX_IPV4;
- if (pi->ipi_csum_flags & CSUM_IP)
+ if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP))
ip->ip_sum = 0;
- if (pi->ipi_ipproto == IPPROTO_TCP) {
- if (__predict_false(th == NULL)) {
- txq->ift_pullups++;
- if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
- return (ENOMEM);
- th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
- }
- pi->ipi_tcp_hflags = th->th_flags;
- pi->ipi_tcp_hlen = th->th_off << 2;
- pi->ipi_tcp_seq = th->th_seq;
- }
if (IS_TSO4(pi)) {
+ if (pi->ipi_ipproto == IPPROTO_TCP) {
+ if (__predict_false(th == NULL)) {
+ txq->ift_pullups++;
+ if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
+ return (ENOMEM);
+ th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
+ }
+ pi->ipi_tcp_hflags = th->th_flags;
+ pi->ipi_tcp_hlen = th->th_off << 2;
+ pi->ipi_tcp_seq = th->th_seq;
+ }
if (__predict_false(ip->ip_p != IPPROTO_TCP))
return (ENXIO);
th->th_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(IPPROTO_TCP));
pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
+ if (sctx->isc_flags & IFLIB_TSO_INIT_IP) {
+ ip->ip_sum = 0;
+ ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz);
+ }
}
break;
}
@@ -2360,15 +2861,15 @@
pi->ipi_ipproto = ip6->ip6_nxt;
pi->ipi_flags |= IPI_TX_IPV6;
- if (pi->ipi_ipproto == IPPROTO_TCP) {
- if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
- if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
- return (ENOMEM);
- }
- pi->ipi_tcp_hflags = th->th_flags;
- pi->ipi_tcp_hlen = th->th_off << 2;
- }
if (IS_TSO6(pi)) {
+ if (pi->ipi_ipproto == IPPROTO_TCP) {
+ if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
+ if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
+ return (ENOMEM);
+ }
+ pi->ipi_tcp_hflags = th->th_flags;
+ pi->ipi_tcp_hlen = th->th_off << 2;
+ }
if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP))
return (ENXIO);
@@ -2390,10 +2891,10 @@
break;
}
*mp = m;
+
return (0);
}
-
static __noinline struct mbuf *
collapse_pkthdr(struct mbuf *m0)
{
@@ -2460,8 +2961,8 @@
if_ctx_t ctx;
if_shared_ctx_t sctx;
if_softc_ctx_t scctx;
- int i, next, pidx, mask, err, maxsegsz, ntxd, count;
- struct mbuf *m, *tmp, **ifsd_m, **mp;
+ int i, next, pidx, err, ntxd, count;
+ struct mbuf *m, *tmp, **ifsd_m;
m = *m0;
@@ -2485,28 +2986,46 @@
if (err)
return (err);
ifsd_flags[pidx] |= TX_SW_DESC_MAPPED;
- i = 0;
- next = pidx;
- mask = (txq->ift_size-1);
+ count = 0;
m = *m0;
do {
- mp = &ifsd_m[next];
- *mp = m;
+ if (__predict_false(m->m_len <= 0)) {
+ tmp = m;
+ m = m->m_next;
+ tmp->m_next = NULL;
+ m_free(tmp);
+ continue;
+ }
m = m->m_next;
- if (__predict_false((*mp)->m_len == 0)) {
- m_free(*mp);
- *mp = NULL;
- } else
- next = (pidx + i) & (ntxd-1);
+ count++;
} while (m != NULL);
+ if (count > *nsegs) {
+ ifsd_m[pidx] = *m0;
+ ifsd_m[pidx]->m_flags |= M_TOOBIG;
+ return (0);
+ }
+ m = *m0;
+ count = 0;
+ do {
+ next = (pidx + count) & (ntxd-1);
+ MPASS(ifsd_m[next] == NULL);
+ ifsd_m[next] = m;
+ count++;
+ tmp = m;
+ m = m->m_next;
+ } while (m != NULL);
} else {
- int buflen, sgsize, max_sgsize;
+ int buflen, sgsize, maxsegsz, max_sgsize;
vm_offset_t vaddr;
vm_paddr_t curaddr;
count = i = 0;
- maxsegsz = sctx->isc_tx_maxsize;
m = *m0;
+ if (m->m_pkthdr.csum_flags & CSUM_TSO)
+ maxsegsz = scctx->isc_tx_tso_segsize_max;
+ else
+ maxsegsz = sctx->isc_tx_maxsegsize;
+
do {
if (__predict_false(m->m_len <= 0)) {
tmp = m;
@@ -2528,6 +3047,8 @@
#endif
ifsd_m[next] = m;
while (buflen > 0) {
+ if (i >= max_segs)
+ goto err;
max_sgsize = MIN(buflen, maxsegsz);
curaddr = pmap_kextract(vaddr);
sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
@@ -2537,8 +3058,6 @@
vaddr += sgsize;
buflen -= sgsize;
i++;
- if (i >= max_segs)
- goto err;
}
count++;
tmp = m;
@@ -2552,6 +3071,67 @@
return (EFBIG);
}
+static inline caddr_t
+calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid)
+{
+ qidx_t size;
+ int ntxd;
+ caddr_t start, end, cur, next;
+
+ ntxd = txq->ift_size;
+ size = txq->ift_txd_size[qid];
+ start = txq->ift_ifdi[qid].idi_vaddr;
+
+ if (__predict_false(size == 0))
+ return (start);
+ cur = start + size*cidx;
+ end = start + size*ntxd;
+ next = CACHE_PTR_NEXT(cur);
+ return (next < end ? next : start);
+}
+
+/*
+ * Pad an mbuf to ensure a minimum ethernet frame size.
+ * min_frame_size is the frame size (less CRC) to pad the mbuf to
+ */
+static __noinline int
+iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size)
+{
+ /*
+ * 18 is enough bytes to pad an ARP packet to 46 bytes, and
+ * and ARP message is the smallest common payload I can think of
+ */
+ static char pad[18]; /* just zeros */
+ int n;
+ struct mbuf *new_head;
+
+ if (!M_WRITABLE(*m_head)) {
+ new_head = m_dup(*m_head, M_NOWAIT);
+ if (new_head == NULL) {
+ m_freem(*m_head);
+ device_printf(dev, "cannot pad short frame, m_dup() failed");
+ DBG_COUNTER_INC(encap_pad_mbuf_fail);
+ return ENOMEM;
+ }
+ m_freem(*m_head);
+ *m_head = new_head;
+ }
+
+ for (n = min_frame_size - (*m_head)->m_pkthdr.len;
+ n > 0; n -= sizeof(pad))
+ if (!m_append(*m_head, min(n, sizeof(pad)), pad))
+ break;
+
+ if (n > 0) {
+ m_freem(*m_head);
+ device_printf(dev, "cannot pad short frame\n");
+ DBG_COUNTER_INC(encap_pad_mbuf_fail);
+ return (ENOBUFS);
+ }
+
+ return 0;
+}
+
static int
iflib_encap(iflib_txq_t txq, struct mbuf **m_headp)
{
@@ -2560,6 +3140,7 @@
if_softc_ctx_t scctx;
bus_dma_segment_t *segs;
struct mbuf *m_head;
+ void *next_txd;
bus_dmamap_t map;
struct if_pkt_info pi;
int remap = 0;
@@ -2580,18 +3161,23 @@
*/
cidx = txq->ift_cidx;
pidx = txq->ift_pidx;
- next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1);
+ if (ctx->ifc_flags & IFC_PREFETCH) {
+ next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1);
+ if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) {
+ next_txd = calc_next_txd(txq, cidx, 0);
+ prefetch(next_txd);
+ }
- /* prefetch the next cache line of mbuf pointers and flags */
- prefetch(&txq->ift_sds.ifsd_m[next]);
- if (txq->ift_sds.ifsd_map != NULL) {
- prefetch(&txq->ift_sds.ifsd_map[next]);
+ /* prefetch the next cache line of mbuf pointers and flags */
+ prefetch(&txq->ift_sds.ifsd_m[next]);
+ if (txq->ift_sds.ifsd_map != NULL) {
+ prefetch(&txq->ift_sds.ifsd_map[next]);
+ next = (cidx + CACHE_LINE_SIZE) & (ntxd-1);
+ prefetch(&txq->ift_sds.ifsd_flags[next]);
+ }
+ } else if (txq->ift_sds.ifsd_map != NULL)
map = txq->ift_sds.ifsd_map[pidx];
- next = (cidx + CACHE_LINE_SIZE) & (ntxd-1);
- prefetch(&txq->ift_sds.ifsd_flags[next]);
- }
-
if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
desc_tag = txq->ift_tso_desc_tag;
max_segs = scctx->isc_tx_tso_segments_max;
@@ -2599,14 +3185,21 @@
desc_tag = txq->ift_desc_tag;
max_segs = scctx->isc_tx_nsegments;
}
+ if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) &&
+ __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) {
+ err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size);
+ if (err)
+ return err;
+ }
m_head = *m_headp;
- bzero(&pi, sizeof(pi));
- pi.ipi_len = m_head->m_pkthdr.len;
+
+ pkt_info_zero(&pi);
pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST));
- pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags;
- pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0;
pi.ipi_pidx = pidx;
pi.ipi_qsidx = txq->ift_id;
+ pi.ipi_len = m_head->m_pkthdr.len;
+ pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags;
+ pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0;
/* deliberate bitwise OR to make one condition */
if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) {
@@ -2662,6 +3255,19 @@
GROUPTASK_ENQUEUE(&txq->ift_task);
return (ENOBUFS);
}
+ /*
+ * On Intel cards we can greatly reduce the number of TX interrupts
+ * we see by only setting report status on every Nth descriptor.
+ * However, this also means that the driver will need to keep track
+ * of the descriptors that RS was set on to check them for the DD bit.
+ */
+ txq->ift_rs_pending += nsegs + 1;
+ if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) ||
+ iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs - 1) <= MAX_TX_DESC(ctx)) {
+ pi.ipi_flags |= IPI_TX_INTR;
+ txq->ift_rs_pending = 0;
+ }
+
pi.ipi_segs = segs;
pi.ipi_nsegs = nsegs;
@@ -2669,22 +3275,29 @@
#ifdef PKT_DEBUG
print_pkt(&pi);
#endif
+ if (map != NULL)
+ bus_dmamap_sync(desc_tag, map, BUS_DMASYNC_PREWRITE);
if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) {
- bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
-
+ if (map != NULL)
+ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
DBG_COUNTER_INC(tx_encap);
- MPASS(pi.ipi_new_pidx >= 0 &&
- pi.ipi_new_pidx < txq->ift_size);
+ MPASS(pi.ipi_new_pidx < txq->ift_size);
ndesc = pi.ipi_new_pidx - pi.ipi_pidx;
if (pi.ipi_new_pidx < pi.ipi_pidx) {
ndesc += txq->ift_size;
txq->ift_gen = 1;
}
+ /*
+ * drivers can need as many as
+ * two sentinels
+ */
+ MPASS(ndesc <= pi.ipi_nsegs + 2);
MPASS(pi.ipi_new_pidx != pidx);
MPASS(ndesc > 0);
txq->ift_in_use += ndesc;
+
/*
* We update the last software descriptor again here because there may
* be a sentinel and/or there may be more mbufs than segments
@@ -2709,36 +3322,6 @@
return (ENOMEM);
}
-/* forward compatibility for cxgb */
-#define FIRST_QSET(ctx) 0
-
-#define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets)
-#define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets)
-#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx))
-#define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments))
-#define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh)
-#define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max)
-
-
-
-/* if there are more than TXQ_MIN_OCCUPANCY packets pending we consider deferring
- * doorbell writes
- *
- * ORing with 2 assures that min occupancy is never less than 2 without any conditional logic
- */
-#define TXQ_MIN_OCCUPANCY(size) ((size >> 6)| 0x2)
-
-static inline int
-iflib_txq_min_occupancy(iflib_txq_t txq)
-{
- if_ctx_t ctx;
-
- ctx = txq->ift_ctx;
- return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx,
- txq->ift_gen) < TXQ_MIN_OCCUPANCY(txq->ift_size) +
- MAX_TX_DESC(ctx));
-}
-
static void
iflib_tx_desc_free(iflib_txq_t txq, int n)
{
@@ -2747,6 +3330,7 @@
struct mbuf *m, **ifsd_m;
uint8_t *ifsd_flags;
bus_dmamap_t *ifsd_map;
+ bool do_prefetch;
cidx = txq->ift_cidx;
gen = txq->ift_gen;
@@ -2756,11 +3340,13 @@
ifsd_flags = txq->ift_sds.ifsd_flags;
ifsd_m = txq->ift_sds.ifsd_m;
ifsd_map = txq->ift_sds.ifsd_map;
+ do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH);
while (n--) {
- prefetch(ifsd_m[(cidx + 3) & mask]);
- prefetch(ifsd_m[(cidx + 4) & mask]);
-
+ if (do_prefetch) {
+ prefetch(ifsd_m[(cidx + 3) & mask]);
+ prefetch(ifsd_m[(cidx + 4) & mask]);
+ }
if (ifsd_m[cidx] != NULL) {
prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]);
prefetch(&ifsd_flags[(cidx + CACHE_PTR_INCREMENT) & mask]);
@@ -2775,8 +3361,15 @@
if ((m = ifsd_m[cidx]) != NULL) {
/* XXX we don't support any drivers that batch packets yet */
MPASS(m->m_nextpkt == NULL);
-
- m_free(m);
+ /* if the number of clusters exceeds the number of segments
+ * there won't be space on the ring to save a pointer to each
+ * cluster so we simply free the list here
+ */
+ if (m->m_flags & M_TOOBIG) {
+ m_freem(m);
+ } else {
+ m_free(m);
+ }
ifsd_m[cidx] = NULL;
#if MEMORY_LOGGING
txq->ift_dequeued++;
@@ -2823,24 +3416,34 @@
txq->ift_cleaned += reclaim;
txq->ift_in_use -= reclaim;
- if (txq->ift_active == FALSE)
- txq->ift_active = TRUE;
-
return (reclaim);
}
static struct mbuf **
-_ring_peek_one(struct ifmp_ring *r, int cidx, int offset)
+_ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining)
{
+ int next, size;
+ struct mbuf **items;
- return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (r->size-1)]));
+ size = r->size;
+ next = (cidx + CACHE_PTR_INCREMENT) & (size-1);
+ items = __DEVOLATILE(struct mbuf **, &r->items[0]);
+
+ prefetch(items[(cidx + offset) & (size-1)]);
+ if (remaining > 1) {
+ prefetch2cachelines(&items[next]);
+ prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]);
+ prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]);
+ prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]);
+ }
+ return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)]));
}
static void
iflib_txq_check_drain(iflib_txq_t txq, int budget)
{
- ifmp_ring_check_drainage(txq->ift_br[0], budget);
+ ifmp_ring_check_drainage(txq->ift_br, budget);
}
static uint32_t
@@ -2849,8 +3452,8 @@
iflib_txq_t txq = r->cookie;
if_ctx_t ctx = txq->ift_ctx;
- return ((TXQ_AVAIL(txq) >= MAX_TX_DESC(ctx)) ||
- ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, txq->ift_cidx_processed, false));
+ return ((TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) ||
+ ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false));
}
static uint32_t
@@ -2858,16 +3461,19 @@
{
iflib_txq_t txq = r->cookie;
if_ctx_t ctx = txq->ift_ctx;
- if_t ifp = ctx->ifc_ifp;
+ struct ifnet *ifp = ctx->ifc_ifp;
struct mbuf **mp, *m;
- int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail, err, in_use_prev, desc_used;
+ int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail;
+ int reclaimed, err, in_use_prev, desc_used;
+ bool do_prefetch, ring, rang;
if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) ||
!LINK_ACTIVE(ctx))) {
DBG_COUNTER_INC(txq_drain_notready);
return (0);
}
-
+ reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
+ rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use);
avail = IDXDIFF(pidx, cidx, r->size);
if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) {
DBG_COUNTER_INC(txq_drain_flushing);
@@ -2877,77 +3483,153 @@
}
return (avail);
}
- iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
+
if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) {
txq->ift_qstatus = IFLIB_QUEUE_IDLE;
CALLOUT_LOCK(txq);
callout_stop(&txq->ift_timer);
- callout_stop(&txq->ift_db_check);
CALLOUT_UNLOCK(txq);
DBG_COUNTER_INC(txq_drain_oactive);
return (0);
}
+ if (reclaimed)
+ txq->ift_qstatus = IFLIB_QUEUE_IDLE;
consumed = mcast_sent = bytes_sent = pkt_sent = 0;
count = MIN(avail, TX_BATCH_SIZE);
+#ifdef INVARIANTS
+ if (iflib_verbose_debug)
+ printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__,
+ avail, ctx->ifc_flags, TXQ_AVAIL(txq));
+#endif
+ do_prefetch = (ctx->ifc_flags & IFC_PREFETCH);
+ avail = TXQ_AVAIL(txq);
+ for (desc_used = i = 0; i < count && avail > MAX_TX_DESC(ctx) + 2; i++) {
+ int pidx_prev, rem = do_prefetch ? count - i : 0;
- for (desc_used = i = 0; i < count && TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2; i++) {
- mp = _ring_peek_one(r, cidx, i);
+ mp = _ring_peek_one(r, cidx, i, rem);
+ MPASS(mp != NULL && *mp != NULL);
+ if (__predict_false(*mp == (struct mbuf *)txq)) {
+ consumed++;
+ reclaimed++;
+ continue;
+ }
in_use_prev = txq->ift_in_use;
+ pidx_prev = txq->ift_pidx;
err = iflib_encap(txq, mp);
- /*
- * What other errors should we bail out for?
- */
- if (err == ENOBUFS) {
+ if (__predict_false(err)) {
DBG_COUNTER_INC(txq_drain_encapfail);
- break;
+ /* no room - bail out */
+ if (err == ENOBUFS)
+ break;
+ consumed++;
+ DBG_COUNTER_INC(txq_drain_encapfail);
+ /* we can't send this packet - skip it */
+ continue;
}
consumed++;
- if (err)
- continue;
-
pkt_sent++;
m = *mp;
DBG_COUNTER_INC(tx_sent);
bytes_sent += m->m_pkthdr.len;
- if (m->m_flags & M_MCAST)
- mcast_sent++;
+ mcast_sent += !!(m->m_flags & M_MCAST);
+ avail = TXQ_AVAIL(txq);
txq->ift_db_pending += (txq->ift_in_use - in_use_prev);
desc_used += (txq->ift_in_use - in_use_prev);
- iflib_txd_db_check(ctx, txq, FALSE);
ETHER_BPF_MTAP(ifp, m);
- if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
+ if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING)))
break;
-
- if (desc_used > TXQ_MAX_DB_CONSUMED(txq->ift_size))
- break;
+ rang = iflib_txd_db_check(ctx, txq, false, in_use_prev);
}
- if ((iflib_min_tx_latency || iflib_txq_min_occupancy(txq)) && txq->ift_db_pending)
- iflib_txd_db_check(ctx, txq, TRUE);
- else if ((txq->ift_db_pending || TXQ_AVAIL(txq) < MAX_TX_DESC(ctx)) &&
- (callout_pending(&txq->ift_db_check) == 0)) {
- txq->ift_db_pending_queued = txq->ift_db_pending;
- callout_reset_on(&txq->ift_db_check, 1, iflib_txd_deferred_db_check,
- txq, txq->ift_db_check.c_cpu);
- }
+ /* deliberate use of bitwise or to avoid gratuitous short-circuit */
+ ring = rang ? false : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx));
+ iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use);
if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent);
if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent);
if (mcast_sent)
if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent);
-
+#ifdef INVARIANTS
+ if (iflib_verbose_debug)
+ printf("consumed=%d\n", consumed);
+#endif
return (consumed);
}
+static uint32_t
+iflib_txq_drain_always(struct ifmp_ring *r)
+{
+ return (1);
+}
+
+static uint32_t
+iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
+{
+ int i, avail;
+ struct mbuf **mp;
+ iflib_txq_t txq;
+
+ txq = r->cookie;
+
+ txq->ift_qstatus = IFLIB_QUEUE_IDLE;
+ CALLOUT_LOCK(txq);
+ callout_stop(&txq->ift_timer);
+ CALLOUT_UNLOCK(txq);
+
+ avail = IDXDIFF(pidx, cidx, r->size);
+ for (i = 0; i < avail; i++) {
+ mp = _ring_peek_one(r, cidx, i, avail - i);
+ if (__predict_false(*mp == (struct mbuf *)txq))
+ continue;
+ m_freem(*mp);
+ }
+ MPASS(ifmp_ring_is_stalled(r) == 0);
+ return (avail);
+}
+
static void
+iflib_ifmp_purge(iflib_txq_t txq)
+{
+ struct ifmp_ring *r;
+
+ r = txq->ift_br;
+ r->drain = iflib_txq_drain_free;
+ r->can_drain = iflib_txq_drain_always;
+
+ ifmp_ring_check_drainage(r, r->size);
+
+ r->drain = iflib_txq_drain;
+ r->can_drain = iflib_txq_can_drain;
+}
+
+static void
_task_fn_tx(void *context)
{
iflib_txq_t txq = context;
if_ctx_t ctx = txq->ift_ctx;
+ struct ifnet *ifp = ctx->ifc_ifp;
+ int rc;
+#ifdef IFLIB_DIAGNOSTICS
+ txq->ift_cpu_exec_count[curcpu]++;
+#endif
if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
return;
- ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE);
+ if (if_getcapenable(ifp) & IFCAP_NETMAP) {
+ if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false))
+ netmap_tx_irq(ifp, txq->ift_id);
+ IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
+ return;
+ }
+ if (txq->ift_db_pending)
+ ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE);
+ ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
+ if (ctx->ifc_flags & IFC_LEGACY)
+ IFDI_INTR_ENABLE(ctx);
+ else {
+ rc = IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
+ KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver"));
+ }
}
static void
@@ -2957,17 +3639,32 @@
if_ctx_t ctx = rxq->ifr_ctx;
bool more;
int rc;
+ uint16_t budget;
+#ifdef IFLIB_DIAGNOSTICS
+ rxq->ifr_cpu_exec_count[curcpu]++;
+#endif
DBG_COUNTER_INC(task_fn_rxs);
if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
return;
-
- if ((more = iflib_rxeof(rxq, 16 /* XXX */)) == false) {
+ more = true;
+#ifdef DEV_NETMAP
+ if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) {
+ u_int work = 0;
+ if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) {
+ more = false;
+ }
+ }
+#endif
+ budget = ctx->ifc_sysctl_rx_budget;
+ if (budget == 0)
+ budget = 16; /* XXX */
+ if (more == false || (more = iflib_rxeof(rxq, budget)) == false) {
if (ctx->ifc_flags & IFC_LEGACY)
IFDI_INTR_ENABLE(ctx);
else {
DBG_COUNTER_INC(rx_intr_enables);
- rc = IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
+ rc = IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver"));
}
}
@@ -2985,8 +3682,11 @@
iflib_txq_t txq;
int i;
- if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
- return;
+ if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) {
+ if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) {
+ return;
+ }
+ }
CTX_LOCK(ctx);
for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) {
@@ -2998,6 +3698,10 @@
for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
IFDI_LINK_INTR_ENABLE(ctx);
+ if (ctx->ifc_flags & IFC_DO_RESET) {
+ ctx->ifc_flags &= ~IFC_DO_RESET;
+ iflib_if_init_locked(ctx);
+ }
CTX_UNLOCK(ctx);
if (LINK_ACTIVE(ctx) == 0)
@@ -3072,7 +3776,7 @@
if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
DBG_COUNTER_INC(tx_frees);
m_freem(m);
- return (0);
+ return (ENOBUFS);
}
MPASS(m->m_nextpkt == NULL);
@@ -3119,18 +3823,16 @@
}
#endif
DBG_COUNTER_INC(tx_seen);
- err = ifmp_ring_enqueue(txq->ift_br[0], (void **)&m, 1, TX_BATCH_SIZE);
+ err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE);
+ GROUPTASK_ENQUEUE(&txq->ift_task);
if (err) {
- GROUPTASK_ENQUEUE(&txq->ift_task);
/* support forthcoming later */
#ifdef DRIVER_BACKPRESSURE
txq->ift_closed = TRUE;
#endif
- ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE);
+ ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
m_freem(m);
- } else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) {
- GROUPTASK_ENQUEUE(&txq->ift_task);
}
return (err);
@@ -3147,7 +3849,7 @@
ctx->ifc_flags |= IFC_QFLUSH;
CTX_UNLOCK(ctx);
for (i = 0; i < NTXQSETS(ctx); i++, txq++)
- while (!(ifmp_ring_is_idle(txq->ift_br[0]) || ifmp_ring_is_stalled(txq->ift_br[0])))
+ while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br)))
iflib_txq_check_drain(txq, 0);
CTX_LOCK(ctx);
ctx->ifc_flags &= ~IFC_QFLUSH;
@@ -3158,11 +3860,9 @@
#define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
- IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | \
+ IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \
IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO)
-#define IFCAP_REINIT IFCAP_FLAGS
-
static int
iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
{
@@ -3236,8 +3936,6 @@
ctx->ifc_if_flags = if_getflags(ifp);
CTX_UNLOCK(ctx);
break;
-
- break;
case SIOCADDMULTI:
case SIOCDELMULTI:
if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
@@ -3254,6 +3952,7 @@
CTX_UNLOCK(ctx);
/* falls thru */
case SIOCGIFMEDIA:
+ case SIOCGIFXMEDIA:
err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command);
break;
case SIOCGI2C:
@@ -3288,6 +3987,8 @@
#endif
setmask |= (mask & IFCAP_FLAGS);
+ if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
+ setmask |= (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6);
if ((mask & IFCAP_WOL) &&
(if_getcapabilities(ifp) & IFCAP_WOL) != 0)
setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC));
@@ -3298,10 +3999,10 @@
if (setmask) {
CTX_LOCK(ctx);
bits = if_getdrvflags(ifp);
- if (setmask & IFCAP_REINIT)
+ if (bits & IFF_DRV_RUNNING)
iflib_stop(ctx);
if_togglecapenable(ifp, setmask);
- if (setmask & IFCAP_REINIT)
+ if (bits & IFF_DRV_RUNNING)
iflib_init_locked(ctx);
if_setdrvflags(ifp, bits);
CTX_UNLOCK(ctx);
@@ -3353,7 +4054,7 @@
IFDI_VLAN_REGISTER(ctx, vtag);
/* Re-init to load the changes */
if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
- iflib_init_locked(ctx);
+ iflib_if_init_locked(ctx);
CTX_UNLOCK(ctx);
}
@@ -3372,7 +4073,7 @@
IFDI_VLAN_UNREGISTER(ctx, vtag);
/* Re-init to load the changes */
if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
- iflib_init_locked(ctx);
+ iflib_if_init_locked(ctx);
CTX_UNLOCK(ctx);
}
@@ -3462,7 +4163,6 @@
ctx->ifc_sctx = sctx;
ctx->ifc_dev = dev;
- ctx->ifc_txrx = *sctx->isc_txrx;
ctx->ifc_softc = sc;
if ((err = iflib_register(ctx)) != 0) {
@@ -3472,6 +4172,9 @@
iflib_add_device_sysctl_pre(ctx);
scctx = &ctx->ifc_softc_ctx;
+ ifp = ctx->ifc_ifp;
+ ctx->ifc_nhwtxqs = sctx->isc_ntxqs;
+
/*
* XXX sanity check that ntxd & nrxd are a power of 2
*/
@@ -3524,30 +4227,35 @@
device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
return (err);
}
- if (scctx->isc_ntxqsets_max)
- scctx->isc_ntxqsets = min(scctx->isc_ntxqsets, scctx->isc_ntxqsets_max);
- if (scctx->isc_nrxqsets_max)
- scctx->isc_nrxqsets = min(scctx->isc_nrxqsets, scctx->isc_nrxqsets_max);
+ _iflib_pre_assert(scctx);
+ ctx->ifc_txrx = *scctx->isc_txrx;
+#ifdef INVARIANTS
+ MPASS(scctx->isc_capenable);
+ if (scctx->isc_capenable & IFCAP_TXCSUM)
+ MPASS(scctx->isc_tx_csum_flags);
+#endif
+
+ if_setcapabilities(ifp, scctx->isc_capenable | IFCAP_HWSTATS);
+ if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS);
+
+ if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets))
+ scctx->isc_ntxqsets = scctx->isc_ntxqsets_max;
+ if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets))
+ scctx->isc_nrxqsets = scctx->isc_nrxqsets_max;
+
#ifdef ACPI_DMAR
if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL)
ctx->ifc_flags |= IFC_DMAR;
+#elif !(defined(__i386__) || defined(__amd64__))
+ /* set unconditionally for !x86 */
+ ctx->ifc_flags |= IFC_DMAR;
#endif
msix_bar = scctx->isc_msix_bar;
+ main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0;
+ main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0;
- ifp = ctx->ifc_ifp;
-
- if(sctx->isc_flags & IFLIB_HAS_TXCQ)
- main_txq = 1;
- else
- main_txq = 0;
-
- if(sctx->isc_flags & IFLIB_HAS_RXCQ)
- main_rxq = 1;
- else
- main_rxq = 0;
-
/* XXX change for per-queue sizes */
device_printf(dev, "using %d tx descriptors and %d rx descriptors\n",
scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]);
@@ -3590,6 +4298,18 @@
if (scctx->isc_rss_table_size == 0)
scctx->isc_rss_table_size = 64;
scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;
+
+ GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
+ /* XXX format name */
+ taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin");
+
+ /* Set up cpu set. If it fails, use the set of all CPUs. */
+ if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) {
+ device_printf(dev, "Unable to fetch CPU list\n");
+ CPU_COPY(&all_cpus, &ctx->ifc_cpus);
+ }
+ MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0);
+
/*
** Now setup MSI or MSI/X, should
** return us the number of supported
@@ -3598,6 +4318,10 @@
if (sctx->isc_flags & IFLIB_SKIP_MSIX) {
msix = scctx->isc_vectors;
} else if (scctx->isc_msix_bar != 0)
+ /*
+ * The simple fact that isc_msix_bar is not 0 does not mean we
+ * we have a good value there that is known to work.
+ */
msix = iflib_msix_init(ctx);
else {
scctx->isc_vectors = 1;
@@ -3617,6 +4341,7 @@
goto fail_queues;
}
+ IFDI_INTR_DISABLE(ctx);
if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) {
device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err);
goto fail_intr_free;
@@ -3679,8 +4404,9 @@
iflib_txq_t txq;
iflib_rxq_t rxq;
device_t dev = ctx->ifc_dev;
- int i;
+ int i, j;
struct taskqgroup *tqg;
+ iflib_fl_t fl;
/* Make sure VLANS are not using driver */
if (if_vlantrunkinuse(ifp)) {
@@ -3706,16 +4432,19 @@
if (ctx->ifc_led_dev != NULL)
led_destroy(ctx->ifc_led_dev);
/* XXX drain any dependent tasks */
- tqg = qgroup_softirq;
+ tqg = qgroup_if_io_tqg;
for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
callout_drain(&txq->ift_timer);
- callout_drain(&txq->ift_db_check);
if (txq->ift_task.gt_uniq != NULL)
taskqgroup_detach(tqg, &txq->ift_task);
}
for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) {
if (rxq->ifr_task.gt_uniq != NULL)
taskqgroup_detach(tqg, &rxq->ifr_task);
+
+ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
+ free(fl->ifl_rx_bitmap, M_IFLIB);
+
}
tqg = qgroup_if_config_tqg;
if (ctx->ifc_admin_task.gt_uniq != NULL)
@@ -3886,15 +4615,6 @@
MPASS(sctx->isc_rx_nsegments);
MPASS(sctx->isc_rx_maxsegsize);
-
- MPASS(sctx->isc_txrx->ift_txd_encap);
- MPASS(sctx->isc_txrx->ift_txd_flush);
- MPASS(sctx->isc_txrx->ift_txd_credits_update);
- MPASS(sctx->isc_txrx->ift_rxd_available);
- MPASS(sctx->isc_txrx->ift_rxd_pkt_get);
- MPASS(sctx->isc_txrx->ift_rxd_refill);
- MPASS(sctx->isc_txrx->ift_rxd_flush);
-
MPASS(sctx->isc_nrxd_min[0]);
MPASS(sctx->isc_nrxd_max[0]);
MPASS(sctx->isc_nrxd_default[0]);
@@ -3903,6 +4623,19 @@
MPASS(sctx->isc_ntxd_default[0]);
}
+static void
+_iflib_pre_assert(if_softc_ctx_t scctx)
+{
+
+ MPASS(scctx->isc_txrx->ift_txd_encap);
+ MPASS(scctx->isc_txrx->ift_txd_flush);
+ MPASS(scctx->isc_txrx->ift_txd_credits_update);
+ MPASS(scctx->isc_txrx->ift_rxd_available);
+ MPASS(scctx->isc_txrx->ift_rxd_pkt_get);
+ MPASS(scctx->isc_txrx->ift_rxd_refill);
+ MPASS(scctx->isc_txrx->ift_rxd_flush);
+}
+
static int
iflib_register(if_ctx_t ctx)
{
@@ -3937,9 +4670,6 @@
if_setqflushfn(ifp, iflib_if_qflush);
if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
- if_setcapabilities(ifp, 0);
- if_setcapenable(ifp, 0);
-
ctx->ifc_vlan_attach_event =
EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx,
EVENTHANDLER_PRI_FIRST);
@@ -3975,7 +4705,6 @@
caddr_t *vaddrs;
uint64_t *paddrs;
struct ifmp_ring **brscp;
- int nbuf_rings = 1; /* XXX determine dynamically */
KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1"));
KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1"));
@@ -4001,11 +4730,6 @@
err = ENOMEM;
goto rx_fail;
}
- if (!(brscp = malloc(sizeof(void *) * nbuf_rings * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
- device_printf(dev, "Unable to buf_ring_sc * memory\n");
- err = ENOMEM;
- goto rx_fail;
- }
ctx->ifc_txqs = txq;
ctx->ifc_rxqs = rxq;
@@ -4028,6 +4752,7 @@
err = ENOMEM;
goto err_tx_desc;
}
+ txq->ift_txd_size[j] = scctx->isc_txd_size[j];
bzero((void *)ifdip->idi_vaddr, txqsizes[j]);
}
txq->ift_ctx = ctx;
@@ -4039,8 +4764,6 @@
}
/* XXX fix this */
txq->ift_timer.c_cpu = cpu;
- txq->ift_db_check.c_cpu = cpu;
- txq->ift_nbr = nbuf_rings;
if (iflib_txsd_alloc(txq)) {
device_printf(dev, "Critical Failure setting up TX buffers\n");
@@ -4053,21 +4776,16 @@
device_get_nameunit(dev), txq->ift_id);
mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF);
callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0);
- callout_init_mtx(&txq->ift_db_check, &txq->ift_mtx, 0);
snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db",
device_get_nameunit(dev), txq->ift_id);
- TXDB_LOCK_INIT(txq);
- txq->ift_br = brscp + i*nbuf_rings;
- for (j = 0; j < nbuf_rings; j++) {
- err = ifmp_ring_alloc(&txq->ift_br[j], 2048, txq, iflib_txq_drain,
- iflib_txq_can_drain, M_IFLIB, M_WAITOK);
- if (err) {
- /* XXX free any allocated rings */
- device_printf(dev, "Unable to allocate buf_ring\n");
- goto err_tx_desc;
- }
+ err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain,
+ iflib_txq_can_drain, M_IFLIB, M_WAITOK);
+ if (err) {
+ /* XXX free any allocated rings */
+ device_printf(dev, "Unable to allocate buf_ring\n");
+ goto err_tx_desc;
}
}
@@ -4081,6 +4799,9 @@
}
rxq->ifr_ifdi = ifdip;
+ /* XXX this needs to be changed if #rx queues != #tx queues */
+ rxq->ifr_ntxqirq = 1;
+ rxq->ifr_txqid[0] = i;
for (j = 0; j < nrxqs; j++, ifdip++) {
if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, BUS_DMA_NOWAIT)) {
device_printf(dev, "Unable to allocate Descriptor memory\n");
@@ -4105,10 +4826,10 @@
}
rxq->ifr_fl = fl;
for (j = 0; j < nfree_lists; j++) {
- rxq->ifr_fl[j].ifl_rxq = rxq;
- rxq->ifr_fl[j].ifl_id = j;
- rxq->ifr_fl[j].ifl_ifdi =
- &rxq->ifr_ifdi[j + rxq->ifr_fl_offset];
+ fl[j].ifl_rxq = rxq;
+ fl[j].ifl_id = j;
+ fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset];
+ fl[j].ifl_rxd_size = scctx->isc_rxd_size[j];
}
/* Allocate receive buffers for the ring*/
if (iflib_rxsd_alloc(rxq)) {
@@ -4117,6 +4838,9 @@
err = ENOMEM;
goto err_rx_desc;
}
+
+ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
+ fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK|M_ZERO);
}
/* TXQs */
@@ -4294,19 +5018,152 @@
return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name));
}
-static void
-find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid)
+#ifdef SMP
+static int
+find_nth(if_ctx_t ctx, int qid)
{
- int i, cpuid;
+ cpuset_t cpus;
+ int i, cpuid, eqid, count;
- CPU_COPY(&ctx->ifc_cpus, cpus);
+ CPU_COPY(&ctx->ifc_cpus, &cpus);
+ count = CPU_COUNT(&cpus);
+ eqid = qid % count;
/* clear up to the qid'th bit */
- for (i = 0; i < qid; i++) {
- cpuid = CPU_FFS(cpus);
- CPU_CLR(cpuid, cpus);
+ for (i = 0; i < eqid; i++) {
+ cpuid = CPU_FFS(&cpus);
+ MPASS(cpuid != 0);
+ CPU_CLR(cpuid-1, &cpus);
}
+ cpuid = CPU_FFS(&cpus);
+ MPASS(cpuid != 0);
+ return (cpuid-1);
}
+#ifdef SCHED_ULE
+extern struct cpu_group *cpu_top; /* CPU topology */
+
+static int
+find_child_with_core(int cpu, struct cpu_group *grp)
+{
+ int i;
+
+ if (grp->cg_children == 0)
+ return -1;
+
+ MPASS(grp->cg_child);
+ for (i = 0; i < grp->cg_children; i++) {
+ if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
+ return i;
+ }
+
+ return -1;
+}
+
+/*
+ * Find the nth "close" core to the specified core
+ * "close" is defined as the deepest level that shares
+ * at least an L2 cache. With threads, this will be
+ * threads on the same core. If the sahred cache is L3
+ * or higher, simply returns the same core.
+ */
+static int
+find_close_core(int cpu, int core_offset)
+{
+ struct cpu_group *grp;
+ int i;
+ int fcpu;
+ cpuset_t cs;
+
+ grp = cpu_top;
+ if (grp == NULL)
+ return cpu;
+ i = 0;
+ while ((i = find_child_with_core(cpu, grp)) != -1) {
+ /* If the child only has one cpu, don't descend */
+ if (grp->cg_child[i].cg_count <= 1)
+ break;
+ grp = &grp->cg_child[i];
+ }
+
+ /* If they don't share at least an L2 cache, use the same CPU */
+ if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE)
+ return cpu;
+
+ /* Now pick one */
+ CPU_COPY(&grp->cg_mask, &cs);
+
+ /* Add the selected CPU offset to core offset. */
+ for (i = 0; (fcpu = CPU_FFS(&cs)) != 0; i++) {
+ if (fcpu - 1 == cpu)
+ break;
+ CPU_CLR(fcpu - 1, &cs);
+ }
+ MPASS(fcpu);
+
+ core_offset += i;
+
+ CPU_COPY(&grp->cg_mask, &cs);
+ for (i = core_offset % grp->cg_count; i > 0; i--) {
+ MPASS(CPU_FFS(&cs));
+ CPU_CLR(CPU_FFS(&cs) - 1, &cs);
+ }
+ MPASS(CPU_FFS(&cs));
+ return CPU_FFS(&cs) - 1;
+}
+#else
+static int
+find_close_core(int cpu, int core_offset __unused)
+{
+ return cpu;
+}
+#endif
+
+static int
+get_core_offset(if_ctx_t ctx, iflib_intr_type_t type, int qid)
+{
+ switch (type) {
+ case IFLIB_INTR_TX:
+ /* TX queues get cores which share at least an L2 cache with the corresponding RX queue */
+ /* XXX handle multiple RX threads per core and more than two core per L2 group */
+ return qid / CPU_COUNT(&ctx->ifc_cpus) + 1;
+ case IFLIB_INTR_RX:
+ case IFLIB_INTR_RXTX:
+ /* RX queues get the specified core */
+ return qid / CPU_COUNT(&ctx->ifc_cpus);
+ default:
+ return -1;
+ }
+}
+#else
+#define get_core_offset(ctx, type, qid) CPU_FIRST()
+#define find_close_core(cpuid, tid) CPU_FIRST()
+#define find_nth(ctx, gid) CPU_FIRST()
+#endif
+
+/* Just to avoid copy/paste */
+static inline int
+iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid,
+ struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name)
+{
+ int cpuid;
+ int err, tid;
+
+ cpuid = find_nth(ctx, qid);
+ tid = get_core_offset(ctx, type, qid);
+ MPASS(tid >= 0);
+ cpuid = find_close_core(cpuid, tid);
+ err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name);
+ if (err) {
+ device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err);
+ return (err);
+ }
+#ifdef notyet
+ if (cpuid > ctx->ifc_cpuid_highest)
+ ctx->ifc_cpuid_highest = cpuid;
+#endif
+ return 0;
+}
+
int
iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
iflib_intr_type_t type, driver_filter_t *filter,
@@ -4315,12 +5172,13 @@
struct grouptask *gtask;
struct taskqgroup *tqg;
iflib_filter_info_t info;
- cpuset_t cpus;
gtask_fn_t *fn;
int tqrid, err;
+ driver_filter_t *intr_fast;
void *q;
info = &ctx->ifc_filter_info;
+ tqrid = rid;
switch (type) {
/* XXX merge tx/rx for netmap? */
@@ -4328,90 +5186,111 @@
q = &ctx->ifc_txqs[qid];
info = &ctx->ifc_txqs[qid].ift_filter_info;
gtask = &ctx->ifc_txqs[qid].ift_task;
- tqg = qgroup_softirq;
- tqrid = irq->ii_rid;
+ tqg = qgroup_if_io_tqg;
fn = _task_fn_tx;
+ intr_fast = iflib_fast_intr;
+ GROUPTASK_INIT(gtask, 0, fn, q);
break;
case IFLIB_INTR_RX:
q = &ctx->ifc_rxqs[qid];
info = &ctx->ifc_rxqs[qid].ifr_filter_info;
gtask = &ctx->ifc_rxqs[qid].ifr_task;
- tqg = qgroup_softirq;
- tqrid = irq->ii_rid;
+ tqg = qgroup_if_io_tqg;
fn = _task_fn_rx;
+ intr_fast = iflib_fast_intr;
+ GROUPTASK_INIT(gtask, 0, fn, q);
break;
+ case IFLIB_INTR_RXTX:
+ q = &ctx->ifc_rxqs[qid];
+ info = &ctx->ifc_rxqs[qid].ifr_filter_info;
+ gtask = &ctx->ifc_rxqs[qid].ifr_task;
+ tqg = qgroup_if_io_tqg;
+ fn = _task_fn_rx;
+ intr_fast = iflib_fast_intr_rxtx;
+ GROUPTASK_INIT(gtask, 0, fn, q);
+ break;
case IFLIB_INTR_ADMIN:
q = ctx;
+ tqrid = -1;
info = &ctx->ifc_filter_info;
gtask = &ctx->ifc_admin_task;
tqg = qgroup_if_config_tqg;
- tqrid = -1;
fn = _task_fn_admin;
+ intr_fast = iflib_fast_intr_ctx;
break;
default:
panic("unknown net intr type");
}
- GROUPTASK_INIT(gtask, 0, fn, q);
info->ifi_filter = filter;
info->ifi_filter_arg = filter_arg;
info->ifi_task = gtask;
+ info->ifi_ctx = q;
- /* XXX query cpu that rid belongs to */
-
- err = _iflib_irq_alloc(ctx, irq, rid, iflib_fast_intr, NULL, info, name);
- if (err != 0)
+ err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name);
+ if (err != 0) {
+ device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err);
return (err);
+ }
+ if (type == IFLIB_INTR_ADMIN)
+ return (0);
+
if (tqrid != -1) {
- find_nth(ctx, &cpus, qid);
- taskqgroup_attach_cpu(tqg, gtask, q, CPU_FFS(&cpus), irq->ii_rid, name);
- } else
- taskqgroup_attach(tqg, gtask, q, tqrid, name);
+ err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name);
+ if (err)
+ return (err);
+ } else {
+ taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name);
+ }
-
return (0);
}
void
-iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void *arg, int qid, char *name)
+iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, char *name)
{
struct grouptask *gtask;
struct taskqgroup *tqg;
gtask_fn_t *fn;
void *q;
+ int irq_num = -1;
+ int err;
switch (type) {
case IFLIB_INTR_TX:
q = &ctx->ifc_txqs[qid];
gtask = &ctx->ifc_txqs[qid].ift_task;
- tqg = qgroup_softirq;
+ tqg = qgroup_if_io_tqg;
fn = _task_fn_tx;
+ if (irq != NULL)
+ irq_num = rman_get_start(irq->ii_res);
break;
case IFLIB_INTR_RX:
q = &ctx->ifc_rxqs[qid];
gtask = &ctx->ifc_rxqs[qid].ifr_task;
- tqg = qgroup_softirq;
+ tqg = qgroup_if_io_tqg;
fn = _task_fn_rx;
+ if (irq != NULL)
+ irq_num = rman_get_start(irq->ii_res);
break;
- case IFLIB_INTR_ADMIN:
- q = ctx;
- gtask = &ctx->ifc_admin_task;
- tqg = qgroup_if_config_tqg;
- rid = -1;
- fn = _task_fn_admin;
- break;
case IFLIB_INTR_IOV:
q = ctx;
gtask = &ctx->ifc_vflr_task;
tqg = qgroup_if_config_tqg;
- rid = -1;
fn = _task_fn_iov;
break;
default:
panic("unknown net intr type");
}
GROUPTASK_INIT(gtask, 0, fn, q);
- taskqgroup_attach(tqg, gtask, q, rid, name);
+ if (irq_num != -1) {
+ err = iflib_irq_set_affinity(ctx, irq_num, type, qid, gtask, tqg, q, name);
+ if (err)
+ taskqgroup_attach(tqg, gtask, q, irq_num, name);
+ }
+ else {
+ taskqgroup_attach(tqg, gtask, q, irq_num, name);
+ }
}
void
@@ -4438,10 +5317,17 @@
void *q;
int err;
+ /*
+ * group taskqueues aren't properly set up until SMP is started
+ * so we disable interrupts until we can handle them post
+ * SI_SUB_SMP
+ */
+ IFDI_INTR_DISABLE(ctx);
+
q = &ctx->ifc_rxqs[0];
info = &rxq[0].ifr_filter_info;
gtask = &rxq[0].ifr_task;
- tqg = qgroup_softirq;
+ tqg = qgroup_if_io_tqg;
tqrid = irq->ii_rid = *rid;
fn = _task_fn_rx;
@@ -4451,16 +5337,13 @@
info->ifi_task = gtask;
/* We allocate a single interrupt resource */
- if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr, NULL, info, name)) != 0)
+ if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0)
return (err);
GROUPTASK_INIT(gtask, 0, fn, q);
- taskqgroup_attach(tqg, gtask, q, tqrid, name);
+ taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name);
GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq);
- taskqgroup_attach(qgroup_softirq, &txq->ift_task, txq, tqrid, "tx");
- GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
- taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin/link");
-
+ taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, rman_get_start(irq->ii_res), "tx");
return (0);
}
@@ -4469,7 +5352,7 @@
{
ctx->ifc_led_dev = led_create(iflib_led_func, ctx,
- device_get_nameunit(ctx->ifc_dev));
+ device_get_nameunit(ctx->ifc_dev));
}
void
@@ -4489,7 +5372,13 @@
void
iflib_admin_intr_deferred(if_ctx_t ctx)
{
+#ifdef INVARIANTS
+ struct grouptask *gtask;
+ gtask = &ctx->ifc_admin_task;
+ MPASS(gtask != NULL && gtask->gt_taskqueue != NULL);
+#endif
+
GROUPTASK_ENQUEUE(&ctx->ifc_admin_task);
}
@@ -4504,7 +5393,7 @@
iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name)
{
- taskqgroup_attach_cpu(qgroup_softirq, gt, uniq, cpu, -1, name);
+ taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name);
}
void
@@ -4529,8 +5418,9 @@
if_t ifp = ctx->ifc_ifp;
iflib_txq_t txq = ctx->ifc_txqs;
-
if_setbaudrate(ifp, baudrate);
+ if (baudrate >= IF_Gbps(10))
+ ctx->ifc_flags |= IFC_PREFETCH;
/* If link down, disable watchdog */
if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) {
@@ -4545,23 +5435,27 @@
iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq)
{
int credits;
+#ifdef INVARIANTS
+ int credits_pre = txq->ift_cidx_processed;
+#endif
if (ctx->isc_txd_credits_update == NULL)
return (0);
- if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, txq->ift_cidx_processed, true)) == 0)
+ if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0)
return (0);
txq->ift_processed += credits;
txq->ift_cidx_processed += credits;
+ MPASS(credits_pre + credits == txq->ift_cidx_processed);
if (txq->ift_cidx_processed >= txq->ift_size)
txq->ift_cidx_processed -= txq->ift_size;
return (credits);
}
static int
-iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget)
+iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget)
{
return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx,
@@ -4599,13 +5493,28 @@
int iflib_num_tx_queues, iflib_num_rx_queues;
int err, admincnt, bar;
- iflib_num_tx_queues = scctx->isc_ntxqsets;
- iflib_num_rx_queues = scctx->isc_nrxqsets;
+ iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs;
+ iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs;
+ device_printf(dev, "msix_init qsets capped at %d\n", imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets));
+
bar = ctx->ifc_softc_ctx.isc_msix_bar;
admincnt = sctx->isc_admin_intrcnt;
+ /* Override by global tuneable */
+ {
+ int i;
+ size_t len = sizeof(i);
+ err = kernel_sysctlbyname(curthread, "hw.pci.enable_msix", &i, &len, NULL, 0, NULL, 0);
+ if (err == 0) {
+ if (i == 0)
+ goto msi;
+ }
+ else {
+ device_printf(dev, "unable to read hw.pci.enable_msix.");
+ }
+ }
/* Override by tuneable */
- if (enable_msix == 0)
+ if (scctx->isc_disable_msix)
goto msi;
/*
@@ -4617,18 +5526,20 @@
** successfully initialize us.
*/
{
- uint16_t pci_cmd_word;
int msix_ctrl, rid;
+ pci_enable_busmaster(dev);
rid = 0;
- pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
- pci_cmd_word |= PCIM_CMD_BUSMASTEREN;
- pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2);
- pci_find_cap(dev, PCIY_MSIX, &rid);
- rid += PCIR_MSIX_CTRL;
- msix_ctrl = pci_read_config(dev, rid, 2);
- msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
- pci_write_config(dev, rid, msix_ctrl, 2);
+ if (pci_find_cap(dev, PCIY_MSIX, &rid) == 0 && rid != 0) {
+ rid += PCIR_MSIX_CTRL;
+ msix_ctrl = pci_read_config(dev, rid, 2);
+ msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
+ pci_write_config(dev, rid, msix_ctrl, 2);
+ } else {
+ device_printf(dev, "PCIY_MSIX capability not found; "
+ "or rid %d == 0.\n", rid);
+ goto msi;
+ }
}
/*
@@ -4661,20 +5572,14 @@
#else
queuemsgs = msgs - admincnt;
#endif
- if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) == 0) {
#ifdef RSS
- queues = imin(queuemsgs, rss_getnumbuckets());
+ queues = imin(queuemsgs, rss_getnumbuckets());
#else
- queues = queuemsgs;
+ queues = queuemsgs;
#endif
- queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues);
- device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n",
- CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt);
- } else {
- device_printf(dev, "Unable to fetch CPU list\n");
- /* Figure out a reasonable auto config value */
- queues = min(queuemsgs, mp_ncpus);
- }
+ queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues);
+ device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n",
+ CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt);
#ifdef RSS
/* If we're doing RSS, clamp at the number of RSS buckets */
if (queues > rss_getnumbuckets())
@@ -4684,6 +5589,10 @@
rx_queues = iflib_num_rx_queues;
else
rx_queues = queues;
+
+ if (rx_queues > scctx->isc_nrxqsets)
+ rx_queues = scctx->isc_nrxqsets;
+
/*
* We want this to be all logical CPUs by default
*/
@@ -4692,6 +5601,9 @@
else
tx_queues = mp_ncpus;
+ if (tx_queues > scctx->isc_ntxqsets)
+ tx_queues = scctx->isc_ntxqsets;
+
if (ctx->ifc_sysctl_qs_eq_override == 0) {
#ifdef INVARIANTS
if (tx_queues != rx_queues)
@@ -4773,7 +5685,7 @@
if_ctx_t ctx = (void *)arg1;
enum iflib_ndesc_handler type = arg2;
char buf[256] = {0};
- uint16_t *ndesc;
+ qidx_t *ndesc;
char *p, *next;
int nqs, rc, i;
@@ -4843,6 +5755,12 @@
SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable",
CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0,
"permit #txq != #rxq");
+ SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix",
+ CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0,
+ "disable MSIX (default 0)");
+ SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget",
+ CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0,
+ "set the rx budget");
/* XXX change for per-queue sizes */
SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
@@ -4935,27 +5853,26 @@
CTLFLAG_RD,
&txq->ift_cleaned, "total cleaned");
SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state",
- CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br[0]->state),
+ CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br->state),
0, mp_ring_state_handler, "A", "soft ring state");
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues",
- CTLFLAG_RD, &txq->ift_br[0]->enqueues,
+ CTLFLAG_RD, &txq->ift_br->enqueues,
"# of enqueues to the mp_ring for this queue");
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops",
- CTLFLAG_RD, &txq->ift_br[0]->drops,
+ CTLFLAG_RD, &txq->ift_br->drops,
"# of drops in the mp_ring for this queue");
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts",
- CTLFLAG_RD, &txq->ift_br[0]->starts,
+ CTLFLAG_RD, &txq->ift_br->starts,
"# of normal consumer starts in the mp_ring for this queue");
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls",
- CTLFLAG_RD, &txq->ift_br[0]->stalls,
+ CTLFLAG_RD, &txq->ift_br->stalls,
"# of consumer stalls in the mp_ring for this queue");
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts",
- CTLFLAG_RD, &txq->ift_br[0]->restarts,
+ CTLFLAG_RD, &txq->ift_br->restarts,
"# of consumer restarts in the mp_ring for this queue");
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications",
- CTLFLAG_RD, &txq->ift_br[0]->abdications,
+ CTLFLAG_RD, &txq->ift_br->abdications,
"# of consumer abdications in the mp_ring for this queue");
-
}
if (scctx->isc_nrxqsets > 100)
@@ -4977,6 +5894,7 @@
CTLFLAG_RD,
&rxq->ifr_cq_cidx, 1, "Consumer Index");
}
+
for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j);
fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf,
@@ -5010,3 +5928,30 @@
}
}
+
+#ifndef __NO_STRICT_ALIGNMENT
+static struct mbuf *
+iflib_fixup_rx(struct mbuf *m)
+{
+ struct mbuf *n;
+
+ if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
+ bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
+ m->m_data += ETHER_HDR_LEN;
+ n = m;
+ } else {
+ MGETHDR(n, M_NOWAIT, MT_DATA);
+ if (n == NULL) {
+ m_freem(m);
+ return (NULL);
+ }
+ bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
+ m->m_data += ETHER_HDR_LEN;
+ m->m_len -= ETHER_HDR_LEN;
+ n->m_len = ETHER_HDR_LEN;
+ M_MOVE_PKTHDR(n, m);
+ n->m_next = m;
+ }
+ return (n);
+}
+#endif
Index: sys/net/mp_ring.c
===================================================================
--- sys/net/mp_ring.c
+++ sys/net/mp_ring.c
@@ -454,18 +454,12 @@
do {
os.state = ns.state = r->state;
ns.pidx_tail = pidx_stop;
- ns.flags = BUSY;
+ if (os.flags == IDLE)
+ ns.flags = ABDICATED;
} while (atomic_cmpset_rel_64(&r->state, os.state, ns.state) == 0);
critical_exit();
counter_u64_add(r->enqueues, n);
- /*
- * Turn into a consumer if some other thread isn't active as a consumer
- * already.
- */
- if (os.flags != BUSY)
- drain_ring_lockless(r, ns, os.flags, budget);
-
return (0);
}
#endif
@@ -476,7 +470,9 @@
union ring_state os, ns;
os.state = r->state;
- if (os.flags != STALLED || os.pidx_head != os.pidx_tail || r->can_drain(r) == 0)
+ if ((os.flags != STALLED && os.flags != ABDICATED) || // Only continue in STALLED and ABDICATED
+ os.pidx_head != os.pidx_tail || // Require work to be available
+ (os.flags != ABDICATED && r->can_drain(r) == 0)) // Can either drain, or everyone left
return;
MPASS(os.cidx != os.pidx_tail); /* implied by STALLED */
Index: sys/sys/_task.h
===================================================================
--- sys/sys/_task.h
+++ sys/sys/_task.h
@@ -65,7 +65,8 @@
void *gt_taskqueue;
LIST_ENTRY(grouptask) gt_list;
void *gt_uniq;
- char *gt_name;
+#define GROUPTASK_NAMELEN 32
+ char gt_name[GROUPTASK_NAMELEN];
int16_t gt_irq;
int16_t gt_cpu;
};
Index: sys/sys/cpuset.h
===================================================================
--- sys/sys/cpuset.h
+++ sys/sys/cpuset.h
@@ -83,6 +83,8 @@
#define CPU_WHICH_IRQ 4 /* Specifies an irq #. */
#define CPU_WHICH_JAIL 5 /* Specifies a jail id. */
#define CPU_WHICH_DOMAIN 6 /* Specifies a NUMA domain id. */
+#define CPU_WHICH_INTRHANDLER 7 /* Specifies an irq # (not ithread). */
+#define CPU_WHICH_ITHREAD 8 /* Specifies an irq's ithread. */
/*
* Reserved cpuset identifiers.
Index: sys/sys/interrupt.h
===================================================================
--- sys/sys/interrupt.h
+++ sys/sys/interrupt.h
@@ -162,6 +162,8 @@
driver_filter_t filter, driver_intr_t handler, void *arg,
u_char pri, enum intr_type flags, void **cookiep);
int intr_event_bind(struct intr_event *ie, int cpu);
+int intr_event_bind_irqonly(struct intr_event *ie, int cpu);
+int intr_event_bind_ithread(struct intr_event *ie, int cpu);
int intr_event_create(struct intr_event **event, void *source,
int flags, int irq, void (*pre_ithread)(void *),
void (*post_ithread)(void *), void (*post_filter)(void *),
@@ -173,9 +175,9 @@
void intr_event_execute_handlers(struct proc *p, struct intr_event *ie);
int intr_event_handle(struct intr_event *ie, struct trapframe *frame);
int intr_event_remove_handler(void *cookie);
-int intr_getaffinity(int irq, void *mask);
+int intr_getaffinity(int irq, int mode, void *mask);
void *intr_handler_source(void *cookie);
-int intr_setaffinity(int irq, void *mask);
+int intr_setaffinity(int irq, int mode, void *mask);
void _intr_drain(int irq); /* Linux compat only. */
int swi_add(struct intr_event **eventp, const char *name,
driver_intr_t handler, void *arg, int pri, enum intr_type flags,

File Metadata

Mime Type
text/plain
Expires
Mon, Oct 13, 2:00 AM (11 h, 47 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23651213
Default Alt Text
D15142.id41688.diff (149 KB)

Event Timeline