Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F131899593
D15142.id41688.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
149 KB
Referenced Files
None
Subscribers
None
D15142.id41688.diff
View Options
Index: sys/dev/bnxt/bnxt.h
===================================================================
--- sys/dev/bnxt/bnxt.h
+++ sys/dev/bnxt/bnxt.h
@@ -203,6 +203,8 @@
/* Chip info */
#define BNXT_TSO_SIZE UINT16_MAX
+#define BNXT_MIN_FRAME_SIZE 52 /* Frames must be padded to this size for some A0 chips */
+
/* NVRAM access */
enum bnxt_nvm_directory_type {
BNX_DIR_TYPE_UNUSED = 0,
@@ -427,6 +429,7 @@
uint32_t ring_size; /* Must be a power of two */
uint16_t id; /* Logical ID */
uint16_t phys_id;
+ struct bnxt_full_tpa_start *tpa_start;
};
struct bnxt_cp_ring {
@@ -552,7 +555,6 @@
struct sysctl_ctx_list hw_stats;
struct sysctl_oid *hw_stats_oid;
- struct bnxt_full_tpa_start *tpa_start;
struct bnxt_ver_info *ver_info;
struct bnxt_nvram_info *nvm_info;
};
Index: sys/dev/bnxt/bnxt_hwrm.c
===================================================================
--- sys/dev/bnxt/bnxt_hwrm.c
+++ sys/dev/bnxt/bnxt_hwrm.c
@@ -931,7 +931,7 @@
/* TODO: Calculate this based on ring size? */
req.max_agg_segs = htole16(3);
/* Base this in the allocated TPA start size... */
- req.max_aggs = htole16(2);
+ req.max_aggs = htole16(7);
/*
* TODO: max_agg_timer?
* req.mag_agg_timer = htole32(XXX);
Index: sys/dev/bnxt/bnxt_txrx.c
===================================================================
--- sys/dev/bnxt/bnxt_txrx.c
+++ sys/dev/bnxt/bnxt_txrx.c
@@ -48,17 +48,19 @@
*/
static int bnxt_isc_txd_encap(void *sc, if_pkt_info_t pi);
-static void bnxt_isc_txd_flush(void *sc, uint16_t txqid, uint32_t pidx);
-static int bnxt_isc_txd_credits_update(void *sc, uint16_t txqid, uint32_t cidx,
- bool clear);
+static void bnxt_isc_txd_flush(void *sc, uint16_t txqid, qidx_t pidx);
+static int bnxt_isc_txd_credits_update(void *sc, uint16_t txqid, bool clear);
-static void bnxt_isc_rxd_refill(void *sc, uint16_t rxqid, uint8_t flid,
+static void bnxt_isc_rxd_refill(void *sc, if_rxd_update_t iru);
+
+/* uint16_t rxqid, uint8_t flid,
uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs, uint16_t count,
uint16_t buf_size);
+*/
static void bnxt_isc_rxd_flush(void *sc, uint16_t rxqid, uint8_t flid,
- uint32_t pidx);
-static int bnxt_isc_rxd_available(void *sc, uint16_t rxqid, uint32_t idx,
- int budget);
+ qidx_t pidx);
+static int bnxt_isc_rxd_available(void *sc, uint16_t rxqid, qidx_t idx,
+ qidx_t budget);
static int bnxt_isc_rxd_pkt_get(void *sc, if_rxd_info_t ri);
static int bnxt_intr(void *sc);
@@ -172,7 +174,7 @@
}
static void
-bnxt_isc_txd_flush(void *sc, uint16_t txqid, uint32_t pidx)
+bnxt_isc_txd_flush(void *sc, uint16_t txqid, qidx_t pidx)
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_ring *tx_ring = &softc->tx_rings[txqid];
@@ -185,7 +187,7 @@
}
static int
-bnxt_isc_txd_credits_update(void *sc, uint16_t txqid, uint32_t idx, bool clear)
+bnxt_isc_txd_credits_update(void *sc, uint16_t txqid, bool clear)
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_cp_ring *cpr = &softc->tx_cp_rings[txqid];
@@ -249,16 +251,30 @@
}
static void
-bnxt_isc_rxd_refill(void *sc, uint16_t rxqid, uint8_t flid,
- uint32_t pidx, uint64_t *paddrs,
- caddr_t *vaddrs, uint16_t count, uint16_t len)
+bnxt_isc_rxd_refill(void *sc, if_rxd_update_t iru)
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_ring *rx_ring;
struct rx_prod_pkt_bd *rxbd;
uint16_t type;
uint16_t i;
+ uint16_t rxqid;
+ uint16_t count, len;
+ uint32_t pidx;
+ uint8_t flid;
+ uint64_t *paddrs;
+ caddr_t *vaddrs;
+ qidx_t *frag_idxs;
+ rxqid = iru->iru_qsidx;
+ count = iru->iru_count;
+ len = iru->iru_buf_size;
+ pidx = iru->iru_pidx;
+ flid = iru->iru_flidx;
+ vaddrs = iru->iru_vaddrs;
+ paddrs = iru->iru_paddrs;
+ frag_idxs = iru->iru_idxs;
+
if (flid == 0) {
rx_ring = &softc->rx_rings[rxqid];
type = RX_PROD_PKT_BD_TYPE_RX_PROD_PKT;
@@ -273,8 +289,8 @@
rxbd[pidx].flags_type = htole16(type);
rxbd[pidx].len = htole16(len);
/* No need to byte-swap the opaque value */
- rxbd[pidx].opaque = ((rxqid & 0xff) << 24) | (flid << 16)
- | pidx;
+ rxbd[pidx].opaque = (((rxqid & 0xff) << 24) | (flid << 16)
+ | (frag_idxs[i]));
rxbd[pidx].addr = htole64(paddrs[i]);
if (++pidx == rx_ring->ring_size)
pidx = 0;
@@ -284,7 +300,7 @@
static void
bnxt_isc_rxd_flush(void *sc, uint16_t rxqid, uint8_t flid,
- uint32_t pidx)
+ qidx_t pidx)
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_ring *rx_ring;
@@ -310,12 +326,11 @@
}
static int
-bnxt_isc_rxd_available(void *sc, uint16_t rxqid, uint32_t idx, int budget)
+bnxt_isc_rxd_available(void *sc, uint16_t rxqid, qidx_t idx, qidx_t budget)
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_cp_ring *cpr = &softc->rx_cp_rings[rxqid];
struct rx_pkt_cmpl *rcp;
- struct rx_tpa_start_cmpl *rtpa;
struct rx_tpa_end_cmpl *rtpae;
struct cmpl_base *cmp = (struct cmpl_base *)cpr->ring.vaddr;
int avail = 0;
@@ -324,7 +339,6 @@
uint8_t ags;
int i;
uint16_t type;
- uint8_t agg_id;
for (;;) {
NEXT_CP_CONS_V(&cpr->ring, cons, v_bit);
@@ -374,18 +388,11 @@
avail++;
break;
case CMPL_BASE_TYPE_RX_TPA_START:
- rtpa = (void *)&cmp[cons];
- agg_id = (rtpa->agg_id &
- RX_TPA_START_CMPL_AGG_ID_MASK) >>
- RX_TPA_START_CMPL_AGG_ID_SFT;
- softc->tpa_start[agg_id].low = *rtpa;
NEXT_CP_CONS_V(&cpr->ring, cons, v_bit);
CMPL_PREFETCH_NEXT(cpr, cons);
if (!CMP_VALID(&cmp[cons], v_bit))
goto cmpl_invalid;
- softc->tpa_start[agg_id].high =
- ((struct rx_tpa_start_cmpl_hi *)cmp)[cons];
break;
case CMPL_BASE_TYPE_RX_AGG:
break;
@@ -510,7 +517,7 @@
/* Get the agg_id */
agg_id = (agend->agg_id & RX_TPA_END_CMPL_AGG_ID_MASK) >>
RX_TPA_END_CMPL_AGG_ID_SFT;
- tpas = &softc->tpa_start[agg_id];
+ tpas = &(softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id]);
/* Extract from the first 16-byte BD */
if (le16toh(tpas->low.flags_type) & RX_TPA_START_CMPL_FLAGS_RSS_VALID) {
@@ -530,8 +537,8 @@
RX_TPA_END_CMPL_AGG_BUFS_SFT;
ri->iri_nfrags = ags + 1;
/* No need to byte-swap the opaque value */
- ri->iri_frags[0].irf_flid = (tpas->low.opaque >> 16) & 0xff;
- ri->iri_frags[0].irf_idx = tpas->low.opaque & 0xffff;
+ ri->iri_frags[0].irf_flid = ((tpas->low.opaque >> 16) & 0xff);
+ ri->iri_frags[0].irf_idx = (tpas->low.opaque & 0xffff);
ri->iri_frags[0].irf_len = le16toh(tpas->low.len);
ri->iri_len = le16toh(tpas->low.len);
@@ -567,8 +574,8 @@
acp = &((struct rx_abuf_cmpl *)cpr->ring.vaddr)[cpr->cons];
/* No need to byte-swap the opaque value */
- ri->iri_frags[i].irf_flid = (acp->opaque >> 16) & 0xff;
- ri->iri_frags[i].irf_idx = acp->opaque & 0xffff;
+ ri->iri_frags[i].irf_flid = ((acp->opaque >> 16) & 0xff);
+ ri->iri_frags[i].irf_idx = (acp->opaque & 0xffff);
ri->iri_frags[i].irf_len = le16toh(acp->len);
ri->iri_len += le16toh(acp->len);
}
@@ -576,8 +583,8 @@
/* And finally, the empty BD at the end... */
ri->iri_nfrags++;
/* No need to byte-swap the opaque value */
- ri->iri_frags[i].irf_flid = (agend->opaque >> 16) % 0xff;
- ri->iri_frags[i].irf_idx = agend->opaque & 0xffff;
+ ri->iri_frags[i].irf_flid = ((agend->opaque >> 16) & 0xff);
+ ri->iri_frags[i].irf_idx = (agend->opaque & 0xffff);
ri->iri_frags[i].irf_len = le16toh(agend->len);
ri->iri_len += le16toh(agend->len);
@@ -590,9 +597,12 @@
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_cp_ring *cpr = &softc->rx_cp_rings[ri->iri_qsidx];
+ struct cmpl_base *cmp_q = (struct cmpl_base *)cpr->ring.vaddr;
struct cmpl_base *cmp;
+ struct rx_tpa_start_cmpl *rtpa;
uint16_t flags_type;
uint16_t type;
+ uint8_t agg_id;
for (;;) {
NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit);
@@ -609,9 +619,18 @@
case CMPL_BASE_TYPE_RX_TPA_END:
return bnxt_pkt_get_tpa(softc, ri, cpr, flags_type);
case CMPL_BASE_TYPE_RX_TPA_START:
+ rtpa = (void *)&cmp_q[cpr->cons];
+ agg_id = (rtpa->agg_id &
+ RX_TPA_START_CMPL_AGG_ID_MASK) >>
+ RX_TPA_START_CMPL_AGG_ID_SFT;
+ softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id].low = *rtpa;
+
NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit);
ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx);
CMPL_PREFETCH_NEXT(cpr, cpr->cons);
+
+ softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id].high =
+ ((struct rx_tpa_start_cmpl_hi *)cmp_q)[cpr->cons];
break;
default:
device_printf(softc->dev,
Index: sys/dev/bnxt/if_bnxt.c
===================================================================
--- sys/dev/bnxt/if_bnxt.c
+++ sys/dev/bnxt/if_bnxt.c
@@ -235,6 +235,8 @@
MODULE_DEPEND(bnxt, ether, 1, 1, 1);
MODULE_DEPEND(bnxt, iflib, 1, 1, 1);
+IFLIB_PNP_INFO(pci, bnxt, bnxt_vendor_info_array);
+
static device_method_t bnxt_iflib_methods[] = {
DEVMETHOD(ifdi_tx_queues_alloc, bnxt_tx_queues_alloc),
DEVMETHOD(ifdi_rx_queues_alloc, bnxt_rx_queues_alloc),
@@ -255,7 +257,8 @@
DEVMETHOD(ifdi_update_admin_status, bnxt_update_admin_status),
DEVMETHOD(ifdi_intr_enable, bnxt_intr_enable),
- DEVMETHOD(ifdi_queue_intr_enable, bnxt_queue_intr_enable),
+ DEVMETHOD(ifdi_tx_queue_intr_enable, bnxt_queue_intr_enable),
+ DEVMETHOD(ifdi_rx_queue_intr_enable, bnxt_queue_intr_enable),
DEVMETHOD(ifdi_intr_disable, bnxt_disable_intr),
DEVMETHOD(ifdi_msix_intr_assign, bnxt_msix_intr_assign),
@@ -279,10 +282,9 @@
extern struct if_txrx bnxt_txrx;
static struct if_shared_ctx bnxt_sctx_init = {
.isc_magic = IFLIB_MAGIC,
- .isc_txrx = &bnxt_txrx,
.isc_driver = &bnxt_iflib_driver,
.isc_nfl = 2, // Number of Free Lists
- .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ,
+ .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_NEED_ETHER_PAD,
.isc_q_align = PAGE_SIZE,
.isc_tx_maxsize = BNXT_TSO_SIZE,
.isc_tx_maxsegsize = BNXT_TSO_SIZE,
@@ -494,6 +496,17 @@
softc->rx_rings[i].vaddr = vaddrs[i * nrxqs + 1];
softc->rx_rings[i].paddr = paddrs[i * nrxqs + 1];
+ /* Allocate the TPA start buffer */
+ softc->rx_rings[i].tpa_start = malloc(sizeof(struct bnxt_full_tpa_start) *
+ (RX_TPA_START_CMPL_AGG_ID_MASK >> RX_TPA_START_CMPL_AGG_ID_SFT),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (softc->rx_rings[i].tpa_start == NULL) {
+ rc = -ENOMEM;
+ device_printf(softc->dev,
+ "Unable to allocate space for TPA\n");
+ goto tpa_alloc_fail;
+ }
+
/* Allocate the AG ring */
softc->ag_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE;
softc->ag_rings[i].softc = softc;
@@ -559,7 +572,10 @@
iflib_dma_free(&softc->vnic_info.rss_hash_key_tbl);
rss_hash_alloc_fail:
iflib_dma_free(&softc->vnic_info.mc_list);
+tpa_alloc_fail:
mc_list_alloc_fail:
+ for (i = i - 1; i >= 0; i--)
+ free(softc->rx_rings[i].tpa_start, M_DEVBUF);
iflib_dma_free(&softc->rx_stats);
hw_stats_alloc_fail:
free(softc->grp_info, M_DEVBUF);
@@ -623,16 +639,6 @@
if (rc)
goto dma_fail;
- /* Allocate the TPA start buffer */
- softc->tpa_start = malloc(sizeof(struct bnxt_full_tpa_start) *
- (RX_TPA_START_CMPL_AGG_ID_MASK >> RX_TPA_START_CMPL_AGG_ID_SFT),
- M_DEVBUF, M_NOWAIT | M_ZERO);
- if (softc->tpa_start == NULL) {
- rc = ENOMEM;
- device_printf(softc->dev,
- "Unable to allocate space for TPA\n");
- goto tpa_failed;
- }
/* Get firmware version and compare with driver */
softc->ver_info = malloc(sizeof(struct bnxt_ver_info),
@@ -681,6 +687,20 @@
goto failed;
iflib_set_mac(ctx, softc->func.mac_addr);
+ scctx->isc_txrx = &bnxt_txrx;
+ scctx->isc_tx_csum_flags = (CSUM_IP | CSUM_TCP | CSUM_UDP |
+ CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_TSO);
+ scctx->isc_capenable =
+ /* These are translated to hwassit bits */
+ IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 |
+ /* These are checked by iflib */
+ IFCAP_LRO | IFCAP_VLAN_HWFILTER |
+ /* These are part of the iflib mask */
+ IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_VLAN_MTU |
+ IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
+ /* These likely get lost... */
+ IFCAP_VLAN_HWCSUM | IFCAP_JUMBO_MTU;
+
/* Get the queue config */
rc = bnxt_hwrm_queue_qportcfg(softc);
if (rc) {
@@ -700,6 +720,9 @@
scctx->isc_tx_tso_size_max = BNXT_TSO_SIZE;
scctx->isc_tx_tso_segsize_max = BNXT_TSO_SIZE;
scctx->isc_vectors = softc->func.max_cp_rings;
+ scctx->isc_min_frame_size = BNXT_MIN_FRAME_SIZE;
+ scctx->isc_txrx = &bnxt_txrx;
+
if (scctx->isc_nrxd[0] <
((scctx->isc_nrxd[1] * 4) + scctx->isc_nrxd[2]))
device_printf(softc->dev,
@@ -717,12 +740,12 @@
scctx->isc_nrxd[1];
scctx->isc_rxqsizes[2] = sizeof(struct rx_prod_pkt_bd) *
scctx->isc_nrxd[2];
- scctx->isc_max_rxqsets = min(pci_msix_count(softc->dev)-1,
+ scctx->isc_nrxqsets_max = min(pci_msix_count(softc->dev)-1,
softc->func.max_cp_rings - 1);
- scctx->isc_max_rxqsets = min(scctx->isc_max_rxqsets,
+ scctx->isc_nrxqsets_max = min(scctx->isc_nrxqsets_max,
softc->func.max_rx_rings);
- scctx->isc_max_txqsets = min(softc->func.max_rx_rings,
- softc->func.max_cp_rings - scctx->isc_max_rxqsets - 1);
+ scctx->isc_ntxqsets_max = min(softc->func.max_rx_rings,
+ softc->func.max_cp_rings - scctx->isc_nrxqsets_max - 1);
scctx->isc_rss_table_size = HW_HASH_INDEX_SIZE;
scctx->isc_rss_table_mask = scctx->isc_rss_table_size - 1;
@@ -780,8 +803,6 @@
ver_fail:
free(softc->ver_info, M_DEVBUF);
ver_alloc_fail:
- free(softc->tpa_start, M_DEVBUF);
-tpa_failed:
bnxt_free_hwrm_dma_mem(softc);
dma_fail:
BNXT_HWRM_LOCK_DESTROY(softc);
@@ -795,7 +816,6 @@
{
struct bnxt_softc *softc = iflib_get_softc(ctx);
if_t ifp = iflib_get_ifp(ctx);
- int capabilities, enabling;
int rc;
bnxt_create_config_sysctls_post(softc);
@@ -810,26 +830,6 @@
bnxt_add_media_types(softc);
ifmedia_set(softc->media, IFM_ETHER | IFM_AUTO);
- if_sethwassist(ifp, (CSUM_TCP | CSUM_UDP | CSUM_TCP_IPV6 |
- CSUM_UDP_IPV6 | CSUM_TSO));
-
- capabilities =
- /* These are translated to hwassit bits */
- IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 |
- /* These are checked by iflib */
- IFCAP_LRO | IFCAP_VLAN_HWFILTER |
- /* These are part of the iflib mask */
- IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_VLAN_MTU |
- IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
- /* These likely get lost... */
- IFCAP_VLAN_HWCSUM | IFCAP_JUMBO_MTU;
-
- if_setcapabilities(ifp, capabilities);
-
- enabling = capabilities;
-
- if_setcapenable(ifp, enabling);
-
softc->scctx->isc_max_frame_size = ifp->if_mtu + ETHER_HDR_LEN +
ETHER_CRC_LEN;
@@ -863,7 +863,8 @@
SLIST_FOREACH_SAFE(tag, &softc->vnic_info.vlan_tags, next, tmp)
free(tag, M_DEVBUF);
iflib_dma_free(&softc->def_cp_ring_mem);
- free(softc->tpa_start, M_DEVBUF);
+ for (i = 0; i < softc->nrxqsets; i++)
+ free(softc->rx_rings[i].tpa_start, M_DEVBUF);
free(softc->ver_info, M_DEVBUF);
free(softc->nvm_info, M_DEVBUF);
@@ -995,14 +996,17 @@
if (rc)
goto fail;
-#ifdef notyet
- /* Enable LRO/TPA/GRO */
+ /*
+ * Enable LRO/TPA/GRO
+ * TBD:
+ * Enable / Disable HW_LRO based on
+ * ifconfig lro / ifconfig -lro setting
+ */
rc = bnxt_hwrm_vnic_tpa_cfg(softc, &softc->vnic_info,
(if_getcapenable(iflib_get_ifp(ctx)) & IFCAP_LRO) ?
HWRM_VNIC_TPA_CFG_INPUT_FLAGS_TPA : 0);
if (rc)
goto fail;
-#endif
for (i = 0; i < softc->ntxqsets; i++) {
/* Allocate the statistics context */
@@ -1489,7 +1493,7 @@
for (i=0; i<softc->scctx->isc_nrxqsets; i++) {
rc = iflib_irq_alloc_generic(ctx, &softc->rx_cp_rings[i].irq,
- softc->rx_cp_rings[i].ring.id + 1, IFLIB_INTR_RX,
+ softc->rx_cp_rings[i].ring.id + 1, IFLIB_INTR_RXTX,
bnxt_handle_rx_cp, &softc->rx_cp_rings[i], i, "rx_cp");
if (rc) {
device_printf(iflib_get_dev(ctx),
@@ -1500,8 +1504,7 @@
}
for (i=0; i<softc->scctx->isc_ntxqsets; i++)
- iflib_softirq_alloc_generic(ctx, i + 1, IFLIB_INTR_TX, NULL, i,
- "tx_cp");
+ iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i, "tx_cp");
return rc;
Index: sys/kern/kern_cpuset.c
===================================================================
--- sys/kern/kern_cpuset.c
+++ sys/kern/kern_cpuset.c
@@ -1127,6 +1127,8 @@
case CPU_WHICH_JAIL:
break;
case CPU_WHICH_IRQ:
+ case CPU_WHICH_INTRHANDLER:
+ case CPU_WHICH_ITHREAD:
case CPU_WHICH_DOMAIN:
error = EINVAL;
goto out;
@@ -1157,7 +1159,9 @@
CPU_COPY(&set->cs_mask, mask);
break;
case CPU_WHICH_IRQ:
- error = intr_getaffinity(id, mask);
+ case CPU_WHICH_INTRHANDLER:
+ case CPU_WHICH_ITHREAD:
+ error = intr_getaffinity(id, which, mask);
break;
case CPU_WHICH_DOMAIN:
if (id < 0 || id >= MAXMEMDOM)
@@ -1260,6 +1264,8 @@
case CPU_WHICH_JAIL:
break;
case CPU_WHICH_IRQ:
+ case CPU_WHICH_INTRHANDLER:
+ case CPU_WHICH_ITHREAD:
case CPU_WHICH_DOMAIN:
error = EINVAL;
goto out;
@@ -1289,7 +1295,9 @@
}
break;
case CPU_WHICH_IRQ:
- error = intr_setaffinity(id, mask);
+ case CPU_WHICH_INTRHANDLER:
+ case CPU_WHICH_ITHREAD:
+ error = intr_setaffinity(id, which, mask);
break;
default:
error = EINVAL;
Index: sys/kern/kern_intr.c
===================================================================
--- sys/kern/kern_intr.c
+++ sys/kern/kern_intr.c
@@ -287,13 +287,11 @@
/*
* Bind an interrupt event to the specified CPU. Note that not all
* platforms support binding an interrupt to a CPU. For those
- * platforms this request will fail. For supported platforms, any
- * associated ithreads as well as the primary interrupt context will
- * be bound to the specificed CPU. Using a cpu id of NOCPU unbinds
+ * platforms this request will fail. Using a cpu id of NOCPU unbinds
* the interrupt event.
*/
-int
-intr_event_bind(struct intr_event *ie, int cpu)
+static int
+_intr_event_bind(struct intr_event *ie, int cpu, bool bindirq, bool bindithread)
{
lwpid_t id;
int error;
@@ -313,35 +311,75 @@
* If we have any ithreads try to set their mask first to verify
* permissions, etc.
*/
- mtx_lock(&ie->ie_lock);
- if (ie->ie_thread != NULL) {
- id = ie->ie_thread->it_thread->td_tid;
- mtx_unlock(&ie->ie_lock);
- error = cpuset_setithread(id, cpu);
- if (error)
- return (error);
- } else
- mtx_unlock(&ie->ie_lock);
- error = ie->ie_assign_cpu(ie->ie_source, cpu);
- if (error) {
+ if (bindithread) {
mtx_lock(&ie->ie_lock);
if (ie->ie_thread != NULL) {
- cpu = ie->ie_cpu;
id = ie->ie_thread->it_thread->td_tid;
mtx_unlock(&ie->ie_lock);
- (void)cpuset_setithread(id, cpu);
+ error = cpuset_setithread(id, cpu);
+ if (error)
+ return (error);
} else
mtx_unlock(&ie->ie_lock);
+ }
+ if (bindirq)
+ error = ie->ie_assign_cpu(ie->ie_source, cpu);
+ if (error) {
+ if (bindithread) {
+ mtx_lock(&ie->ie_lock);
+ if (ie->ie_thread != NULL) {
+ cpu = ie->ie_cpu;
+ id = ie->ie_thread->it_thread->td_tid;
+ mtx_unlock(&ie->ie_lock);
+ (void)cpuset_setithread(id, cpu);
+ } else
+ mtx_unlock(&ie->ie_lock);
+ }
return (error);
}
- mtx_lock(&ie->ie_lock);
- ie->ie_cpu = cpu;
- mtx_unlock(&ie->ie_lock);
+ if (bindirq) {
+ mtx_lock(&ie->ie_lock);
+ ie->ie_cpu = cpu;
+ mtx_unlock(&ie->ie_lock);
+ }
return (error);
}
+/*
+ * Bind an interrupt event to the specified CPU. For supported platforms, any
+ * associated ithreads as well as the primary interrupt context will be bound
+ * to the specificed CPU.
+ */
+int
+intr_event_bind(struct intr_event *ie, int cpu)
+{
+
+ return (_intr_event_bind(ie, cpu, true, true));
+}
+
+/*
+ * Bind an interrupt event to the specified CPU, but do not bind associated
+ * ithreads.
+ */
+int
+intr_event_bind_irqonly(struct intr_event *ie, int cpu)
+{
+
+ return (_intr_event_bind(ie, cpu, true, false));
+}
+
+/*
+ * Bind an interrupt event's ithread to the specified CPU.
+ */
+int
+intr_event_bind_ithread(struct intr_event *ie, int cpu)
+{
+
+ return (_intr_event_bind(ie, cpu, false, true));
+}
+
static struct intr_event *
intr_lookup(int irq)
{
@@ -358,7 +396,7 @@
}
int
-intr_setaffinity(int irq, void *m)
+intr_setaffinity(int irq, int mode, void *m)
{
struct intr_event *ie;
cpuset_t *mask;
@@ -382,26 +420,62 @@
ie = intr_lookup(irq);
if (ie == NULL)
return (ESRCH);
- return (intr_event_bind(ie, cpu));
+ switch (mode) {
+ case CPU_WHICH_IRQ:
+ return (intr_event_bind(ie, cpu));
+ case CPU_WHICH_INTRHANDLER:
+ return (intr_event_bind_irqonly(ie, cpu));
+ case CPU_WHICH_ITHREAD:
+ return (intr_event_bind_ithread(ie, cpu));
+ default:
+ return (EINVAL);
+ }
}
int
-intr_getaffinity(int irq, void *m)
+intr_getaffinity(int irq, int mode, void *m)
{
struct intr_event *ie;
+ struct thread *td;
+ struct proc *p;
cpuset_t *mask;
+ lwpid_t id;
+ int error;
mask = m;
ie = intr_lookup(irq);
if (ie == NULL)
return (ESRCH);
+
+ error = 0;
CPU_ZERO(mask);
- mtx_lock(&ie->ie_lock);
- if (ie->ie_cpu == NOCPU)
- CPU_COPY(cpuset_root, mask);
- else
- CPU_SET(ie->ie_cpu, mask);
- mtx_unlock(&ie->ie_lock);
+ switch (mode) {
+ case CPU_WHICH_IRQ:
+ case CPU_WHICH_INTRHANDLER:
+ mtx_lock(&ie->ie_lock);
+ if (ie->ie_cpu == NOCPU)
+ CPU_COPY(cpuset_root, mask);
+ else
+ CPU_SET(ie->ie_cpu, mask);
+ mtx_unlock(&ie->ie_lock);
+ break;
+ case CPU_WHICH_ITHREAD:
+ mtx_lock(&ie->ie_lock);
+ if (ie->ie_thread == NULL) {
+ mtx_unlock(&ie->ie_lock);
+ CPU_COPY(cpuset_root, mask);
+ } else {
+ id = ie->ie_thread->it_thread->td_tid;
+ mtx_unlock(&ie->ie_lock);
+ error = cpuset_which(CPU_WHICH_TID, id, &p, &td, NULL);
+ if (error != 0)
+ return (error);
+ CPU_COPY(&td->td_cpuset->cs_mask, mask);
+ PROC_UNLOCK(p);
+ }
+ default:
+ return (EINVAL);
+ }
return (0);
}
Index: sys/kern/subr_gtaskqueue.c
===================================================================
--- sys/kern/subr_gtaskqueue.c
+++ sys/kern/subr_gtaskqueue.c
@@ -48,7 +48,7 @@
#include <sys/unistd.h>
#include <machine/stdarg.h>
-static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues");
+static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
static void gtaskqueue_thread_enqueue(void *);
static void gtaskqueue_thread_loop(void *arg);
@@ -134,8 +134,10 @@
snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
- if (!queue)
+ if (!queue) {
+ free(tq_name, M_GTASKQUEUE);
return (NULL);
+ }
STAILQ_INIT(&queue->tq_queue);
TAILQ_INIT(&queue->tq_active);
@@ -663,10 +665,10 @@
void *uniq, int irq, char *name)
{
cpuset_t mask;
- int qid;
+ int qid, error;
gtask->gt_uniq = uniq;
- gtask->gt_name = name;
+ snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
gtask->gt_irq = irq;
gtask->gt_cpu = -1;
mtx_lock(&qgroup->tqg_lock);
@@ -679,7 +681,9 @@
CPU_ZERO(&mask);
CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
mtx_unlock(&qgroup->tqg_lock);
- intr_setaffinity(irq, &mask);
+ error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+ if (error)
+ printf("%s: setaffinity failed for %s: %d\n", __func__, gtask->gt_name, error);
} else
mtx_unlock(&qgroup->tqg_lock);
}
@@ -688,7 +692,7 @@
taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
{
cpuset_t mask;
- int qid, cpu;
+ int qid, cpu, error;
mtx_lock(&qgroup->tqg_lock);
qid = taskqgroup_find(qgroup, gtask->gt_uniq);
@@ -698,9 +702,11 @@
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
- intr_setaffinity(gtask->gt_irq, &mask);
-
+ error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
mtx_lock(&qgroup->tqg_lock);
+ if (error)
+ printf("%s: %s setaffinity failed: %d\n", __func__, gtask->gt_name, error);
+
}
qgroup->tqg_queue[qid].tgc_cnt++;
@@ -716,11 +722,11 @@
void *uniq, int cpu, int irq, char *name)
{
cpuset_t mask;
- int i, qid;
+ int i, qid, error;
qid = -1;
gtask->gt_uniq = uniq;
- gtask->gt_name = name;
+ snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
gtask->gt_irq = irq;
gtask->gt_cpu = cpu;
mtx_lock(&qgroup->tqg_lock);
@@ -732,6 +738,7 @@
}
if (qid == -1) {
mtx_unlock(&qgroup->tqg_lock);
+ printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
return (EINVAL);
}
} else
@@ -744,8 +751,11 @@
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
- if (irq != -1 && tqg_smp_started)
- intr_setaffinity(irq, &mask);
+ if (irq != -1 && tqg_smp_started) {
+ error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+ if (error)
+ printf("%s: setaffinity failed: %d\n", __func__, error);
+ }
return (0);
}
@@ -753,7 +763,7 @@
taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
{
cpuset_t mask;
- int i, qid, irq, cpu;
+ int i, qid, irq, cpu, error;
qid = -1;
irq = gtask->gt_irq;
@@ -767,6 +777,7 @@
}
if (qid == -1) {
mtx_unlock(&qgroup->tqg_lock);
+ printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
return (EINVAL);
}
qgroup->tqg_queue[qid].tgc_cnt++;
@@ -778,8 +789,11 @@
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
- if (irq != -1)
- intr_setaffinity(irq, &mask);
+ if (irq != -1) {
+ error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+ if (error)
+ printf("%s: setaffinity failed: %d\n", __func__, error);
+ }
return (0);
}
@@ -793,7 +807,7 @@
if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
break;
if (i == qgroup->tqg_cnt)
- panic("taskqgroup_detach: task not in group\n");
+ panic("taskqgroup_detach: task %s not in group\n", gtask->gt_name);
qgroup->tqg_queue[i].tgc_cnt--;
LIST_REMOVE(gtask, gt_list);
mtx_unlock(&qgroup->tqg_lock);
@@ -815,7 +829,7 @@
thread_unlock(curthread);
if (error)
- printf("taskqgroup_binder: setaffinity failed: %d\n",
+ printf("%s: setaffinity failed: %d\n", __func__,
error);
free(gtask, M_DEVBUF);
}
@@ -858,7 +872,7 @@
return (EINVAL);
}
if (qgroup->tqg_adjusting) {
- printf("taskqgroup_adjust failed: adjusting\n");
+ printf("%s failed: adjusting\n", __func__);
return (EBUSY);
}
qgroup->tqg_adjusting = 1;
Index: sys/net/ifdi_if.m
===================================================================
--- sys/net/ifdi_if.m
+++ sys/net/ifdi_if.m
@@ -195,11 +195,16 @@
if_ctx_t _ctx;
};
-METHOD int queue_intr_enable {
+METHOD int rx_queue_intr_enable {
if_ctx_t _ctx;
uint16_t _qid;
} DEFAULT null_queue_intr_enable;
+METHOD int tx_queue_intr_enable {
+ if_ctx_t _ctx;
+ uint16_t _qid;
+} DEFAULT null_queue_intr_enable;
+
METHOD void link_intr_enable {
if_ctx_t _ctx;
} DEFAULT null_void_op;
@@ -229,6 +234,7 @@
METHOD void crcstrip_set {
if_ctx_t _ctx;
int _onoff;
+ int _strip;
};
#
@@ -332,4 +338,6 @@
if_int_delay_info_t _iidi;
} DEFAULT null_sysctl_int_delay;
-
+METHOD void debug {
+ if_ctx_t _ctx;
+} DEFAULT null_void_op;
Index: sys/net/iflib.h
===================================================================
--- sys/net/iflib.h
+++ sys/net/iflib.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014-2015, Matthew Macy (mmacy@nextbsd.org)
+ * Copyright (c) 2014-2017, Matthew Macy (mmacy@nextbsd.org)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,8 +37,14 @@
#include <sys/nv.h>
#include <sys/gtaskqueue.h>
-
/*
+ * The value type for indexing, limits max descriptors
+ * to 65535 can be conditionally redefined to uint32_t
+ * in the future if the need arises.
+ */
+typedef uint16_t qidx_t;
+#define QIDX_INVALID 0xFFFF
+/*
* Most cards can handle much larger TSO requests
* but the FreeBSD TCP stack will break on larger
* values
@@ -63,7 +69,7 @@
typedef struct if_rxd_frag {
uint8_t irf_flid;
- uint16_t irf_idx;
+ qidx_t irf_idx;
uint16_t irf_len;
} *if_rxd_frag_t;
@@ -73,47 +79,61 @@
uint16_t iri_vtag; /* vlan tag - if flag set */
/* XXX redundant with the new irf_len field */
uint16_t iri_len; /* packet length */
- uint16_t iri_cidx; /* consumer index of cq */
+ qidx_t iri_cidx; /* consumer index of cq */
struct ifnet *iri_ifp; /* some drivers >1 interface per softc */
/* updated by driver */
- uint16_t iri_flags; /* mbuf flags for packet */
+ if_rxd_frag_t iri_frags;
uint32_t iri_flowid; /* RSS hash for packet */
uint32_t iri_csum_flags; /* m_pkthdr csum flags */
+
uint32_t iri_csum_data; /* m_pkthdr csum data */
+ uint8_t iri_flags; /* mbuf flags for packet */
uint8_t iri_nfrags; /* number of fragments in packet */
uint8_t iri_rsstype; /* RSS hash type */
uint8_t iri_pad; /* any padding in the received data */
- if_rxd_frag_t iri_frags;
} *if_rxd_info_t;
+typedef struct if_rxd_update {
+ uint64_t *iru_paddrs;
+ caddr_t *iru_vaddrs;
+ qidx_t *iru_idxs;
+ qidx_t iru_pidx;
+ uint16_t iru_qsidx;
+ uint16_t iru_count;
+ uint16_t iru_buf_size;
+ uint8_t iru_flidx;
+} *if_rxd_update_t;
+
#define IPI_TX_INTR 0x1 /* send an interrupt when this packet is sent */
#define IPI_TX_IPV4 0x2 /* ethertype IPv4 */
#define IPI_TX_IPV6 0x4 /* ethertype IPv6 */
typedef struct if_pkt_info {
- uint32_t ipi_len; /* packet length */
- bus_dma_segment_t *ipi_segs; /* physical addresses */
- uint16_t ipi_qsidx; /* queue set index */
- uint16_t ipi_nsegs; /* number of segments */
- uint16_t ipi_ndescs; /* number of descriptors used by encap */
- uint16_t ipi_flags; /* iflib per-packet flags */
- uint32_t ipi_pidx; /* start pidx for encap */
- uint32_t ipi_new_pidx; /* next available pidx post-encap */
+ bus_dma_segment_t *ipi_segs; /* physical addresses */
+ uint32_t ipi_len; /* packet length */
+ uint16_t ipi_qsidx; /* queue set index */
+ qidx_t ipi_nsegs; /* number of segments */
+
+ qidx_t ipi_ndescs; /* number of descriptors used by encap */
+ uint16_t ipi_flags; /* iflib per-packet flags */
+ qidx_t ipi_pidx; /* start pidx for encap */
+ qidx_t ipi_new_pidx; /* next available pidx post-encap */
/* offload handling */
- uint64_t ipi_csum_flags; /* packet checksum flags */
- uint16_t ipi_tso_segsz; /* tso segment size */
- uint16_t ipi_mflags; /* packet mbuf flags */
- uint16_t ipi_vtag; /* VLAN tag */
- uint16_t ipi_etype; /* ether header type */
- uint8_t ipi_ehdrlen; /* ether header length */
- uint8_t ipi_ip_hlen; /* ip header length */
- uint8_t ipi_tcp_hlen; /* tcp header length */
- uint8_t ipi_tcp_hflags; /* tcp header flags */
- uint8_t ipi_ipproto; /* ip protocol */
- /* implied padding */
- uint32_t ipi_tcp_seq; /* tcp seqno */
- uint32_t ipi_tcp_sum; /* tcp csum */
+ uint8_t ipi_ehdrlen; /* ether header length */
+ uint8_t ipi_ip_hlen; /* ip header length */
+ uint8_t ipi_tcp_hlen; /* tcp header length */
+ uint8_t ipi_ipproto; /* ip protocol */
+
+ uint32_t ipi_csum_flags; /* packet checksum flags */
+ uint16_t ipi_tso_segsz; /* tso segment size */
+ uint16_t ipi_vtag; /* VLAN tag */
+ uint16_t ipi_etype; /* ether header type */
+ uint8_t ipi_tcp_hflags; /* tcp header flags */
+ uint8_t ipi_mflags; /* packet mbuf flags */
+
+ uint32_t ipi_tcp_seq; /* tcp seqno */
+ uint32_t ipi_tcp_sum; /* tcp csum */
} *if_pkt_info_t;
typedef struct if_irq {
@@ -154,17 +174,20 @@
#define PVID_OEM(vendor, devid, svid, sdevid, revid, name) {vendor, devid, svid, sdevid, revid, 0, name}
#define PVID_END {0, 0, 0, 0, 0, 0, NULL}
+#define IFLIB_PNP_DESCR "U32:vendor;U32:device;U32:subvendor;U32:subdevice;" \
+ "U32:revision;U32:class;D:human"
+#define IFLIB_PNP_INFO(b, u, t) \
+ MODULE_PNP_INFO(IFLIB_PNP_DESCR, b, u, t, sizeof(t[0]), nitems(t) - 1)
+
typedef struct if_txrx {
int (*ift_txd_encap) (void *, if_pkt_info_t);
- void (*ift_txd_flush) (void *, uint16_t, uint32_t);
- int (*ift_txd_credits_update) (void *, uint16_t, uint32_t, bool);
+ void (*ift_txd_flush) (void *, uint16_t, qidx_t pidx);
+ int (*ift_txd_credits_update) (void *, uint16_t qsidx, bool clear);
- int (*ift_rxd_available) (void *, uint16_t qsidx, uint32_t pidx,
- int budget);
+ int (*ift_rxd_available) (void *, uint16_t qsidx, qidx_t pidx, qidx_t budget);
int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri);
- void (*ift_rxd_refill) (void * , uint16_t qsidx, uint8_t flidx, uint32_t pidx,
- uint64_t *paddrs, caddr_t *vaddrs, uint16_t count, uint16_t buf_size);
- void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, uint32_t pidx);
+ void (*ift_rxd_refill) (void * , if_rxd_update_t iru);
+ void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx);
int (*ift_legacy_intr) (void *);
} *if_txrx_t;
@@ -179,11 +202,15 @@
uint32_t isc_txqsizes[8];
uint32_t isc_rxqsizes[8];
- int isc_max_txqsets;
- int isc_max_rxqsets;
+ /* is there such thing as a descriptor that is more than 248 bytes ? */
+ uint8_t isc_txd_size[8];
+ uint8_t isc_rxd_size[8];
+
int isc_tx_tso_segments_max;
int isc_tx_tso_size_max;
int isc_tx_tso_segsize_max;
+ int isc_tx_csum_flags;
+ int isc_capenable;
int isc_rss_table_size;
int isc_rss_table_mask;
int isc_nrxqsets_max;
@@ -191,32 +218,28 @@
iflib_intr_mode_t isc_intr;
uint16_t isc_max_frame_size; /* set at init time by driver */
+ uint16_t isc_min_frame_size; /* set at init time by driver, only used if
+ IFLIB_NEED_ETHER_PAD is set. */
+ uint32_t isc_pause_frames; /* set by driver for iflib_timer to detect */
pci_vendor_info_t isc_vendor_info; /* set by iflib prior to attach_pre */
+ int isc_disable_msix;
+ if_txrx_t isc_txrx;
} *if_softc_ctx_t;
/*
* Initialization values for device
*/
struct if_shared_ctx {
- int isc_magic;
- if_txrx_t isc_txrx;
+ unsigned isc_magic;
driver_t *isc_driver;
- int isc_nfl;
- int isc_flags;
bus_size_t isc_q_align;
bus_size_t isc_tx_maxsize;
bus_size_t isc_tx_maxsegsize;
bus_size_t isc_rx_maxsize;
bus_size_t isc_rx_maxsegsize;
int isc_rx_nsegments;
- int isc_rx_process_limit;
- int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */
- int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */
int isc_admin_intrcnt; /* # of admin/link interrupts */
-
- int isc_tx_reclaim_thresh;
-
/* fields necessary for probe */
pci_vendor_info_t *isc_vendor_info;
char *isc_driver_version;
@@ -229,6 +252,14 @@
int isc_ntxd_min[8];
int isc_ntxd_default[8];
int isc_ntxd_max[8];
+
+ /* actively used during operation */
+ int isc_nfl __aligned(CACHE_LINE_SIZE);
+ int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */
+ int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */
+ int isc_rx_process_limit;
+ int isc_tx_reclaim_thresh;
+ int isc_flags;
};
typedef struct iflib_dma_info {
@@ -242,8 +273,9 @@
#define IFLIB_MAGIC 0xCAFEF00D
typedef enum {
- IFLIB_INTR_TX,
IFLIB_INTR_RX,
+ IFLIB_INTR_TX,
+ IFLIB_INTR_RXTX,
IFLIB_INTR_ADMIN,
IFLIB_INTR_IOV,
} iflib_intr_type_t;
@@ -256,22 +288,42 @@
/*
* Interface has a separate command queue for RX
*/
-#define IFLIB_HAS_RXCQ 0x1
+#define IFLIB_HAS_RXCQ 0x01
/*
* Driver has already allocated vectors
*/
-#define IFLIB_SKIP_MSIX 0x2
-
+#define IFLIB_SKIP_MSIX 0x02
/*
* Interface is a virtual function
*/
-#define IFLIB_IS_VF 0x4
+#define IFLIB_IS_VF 0x04
/*
* Interface has a separate command queue for TX
*/
-#define IFLIB_HAS_TXCQ 0x8
+#define IFLIB_HAS_TXCQ 0x08
+/*
+ * Interface does checksum in place
+ */
+#define IFLIB_NEED_SCRATCH 0x10
+/*
+ * Interface doesn't expect in_pseudo for th_sum
+ */
+#define IFLIB_TSO_INIT_IP 0x20
+/*
+ * Interface doesn't align IP header
+ */
+#define IFLIB_DO_RX_FIXUP 0x40
+/*
+ * Driver needs csum zeroed for offloading
+ */
+#define IFLIB_NEED_ZERO_CSUM 0x80
+/*
+ * Driver needs frames padded to some minimum length
+ */
+#define IFLIB_NEED_ETHER_PAD 0x100
+
/*
* field accessors
*/
@@ -288,9 +340,6 @@
void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]);
-
-
-
/*
* If the driver can plug cleanly in to newbus use these
*/
@@ -319,7 +368,7 @@
int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
iflib_intr_type_t type, driver_filter_t *filter,
void *filter_arg, int qid, char *name);
-void iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void *arg, int qid, char *name);
+void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, char *name);
void iflib_irq_free(if_ctx_t ctx, if_irq_t irq);
Index: sys/net/iflib.c
===================================================================
--- sys/net/iflib.c
+++ sys/net/iflib.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014-2016, Matthew Macy <mmacy@nextbsd.org>
+ * Copyright (c) 2014-2017, Matthew Macy <mmacy@nextbsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_acpi.h"
+#include "opt_sched.h"
#include <sys/param.h>
#include <sys/types.h>
@@ -59,6 +60,7 @@
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/mp_ring.h>
+#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
@@ -68,6 +70,8 @@
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
+#include <netinet/ip_var.h>
+#include <netinet6/ip6_var.h>
#include <machine/bus.h>
#include <machine/in_cksum.h>
@@ -93,9 +97,10 @@
#include <x86/iommu/busdma_dmar.h>
#endif
-
+#include <sys/bitstring.h>
/*
- * enable accounting of every mbuf as it comes in to and goes out of iflib's software descriptor references
+ * enable accounting of every mbuf as it comes in to and goes out of
+ * iflib's software descriptor references
*/
#define MEMORY_LOGGING 0
/*
@@ -134,10 +139,13 @@
struct iflib_fl;
typedef struct iflib_fl *iflib_fl_t;
+static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid);
+
typedef struct iflib_filter_info {
driver_filter_t *ifi_filter;
void *ifi_filter_arg;
struct grouptask *ifi_task;
+ void *ifi_ctx;
} *iflib_filter_info_t;
struct iflib_ctx {
@@ -156,7 +164,6 @@
struct mtx ifc_mtx;
uint16_t ifc_nhwtxqs;
- uint16_t ifc_nhwrxqs;
iflib_txq_t ifc_txqs;
iflib_rxq_t ifc_rxqs;
@@ -167,7 +174,6 @@
int ifc_link_state;
int ifc_link_irq;
- int ifc_pause_frames;
int ifc_watchdog_events;
struct cdev *ifc_led_dev;
struct resource *ifc_msix_mem;
@@ -182,9 +188,10 @@
uint16_t ifc_sysctl_ntxqs;
uint16_t ifc_sysctl_nrxqs;
uint16_t ifc_sysctl_qs_eq_override;
+ uint16_t ifc_sysctl_rx_budget;
- uint16_t ifc_sysctl_ntxds[8];
- uint16_t ifc_sysctl_nrxds[8];
+ qidx_t ifc_sysctl_ntxds[8];
+ qidx_t ifc_sysctl_nrxds[8];
struct if_txrx ifc_txrx;
#define isc_txd_encap ifc_txrx.ift_txd_encap
#define isc_txd_flush ifc_txrx.ift_txd_flush
@@ -252,7 +259,9 @@
return (ctx->ifc_sctx);
}
+#define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2)
#define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*))
+#define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1)))
#define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP)
#define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF)
@@ -262,62 +271,70 @@
#define RX_SW_DESC_INUSE (1 << 3)
#define TX_SW_DESC_MAPPED (1 << 4)
-typedef struct iflib_sw_rx_desc {
- bus_dmamap_t ifsd_map; /* bus_dma map for packet */
- struct mbuf *ifsd_m; /* rx: uninitialized mbuf */
- caddr_t ifsd_cl; /* direct cluster pointer for rx */
- uint16_t ifsd_flags;
-} *iflib_rxsd_t;
+#define M_TOOBIG M_PROTO1
-typedef struct iflib_sw_tx_desc_val {
- bus_dmamap_t ifsd_map; /* bus_dma map for packet */
- struct mbuf *ifsd_m; /* pkthdr mbuf */
- uint8_t ifsd_flags;
-} *iflib_txsd_val_t;
+typedef struct iflib_sw_rx_desc_array {
+ bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */
+ struct mbuf **ifsd_m; /* pkthdr mbufs */
+ caddr_t *ifsd_cl; /* direct cluster pointer for rx */
+ uint8_t *ifsd_flags;
+} iflib_rxsd_array_t;
typedef struct iflib_sw_tx_desc_array {
bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */
struct mbuf **ifsd_m; /* pkthdr mbufs */
uint8_t *ifsd_flags;
-} iflib_txsd_array_t;
+} if_txsd_vec_t;
/* magic number that should be high enough for any hardware */
#define IFLIB_MAX_TX_SEGS 128
-#define IFLIB_MAX_RX_SEGS 32
+/* bnxt supports 64 with hardware LRO enabled */
+#define IFLIB_MAX_RX_SEGS 64
#define IFLIB_RX_COPY_THRESH 128
#define IFLIB_MAX_RX_REFRESH 32
+/* The minimum descriptors per second before we start coalescing */
+#define IFLIB_MIN_DESC_SEC 16384
+#define IFLIB_DEFAULT_TX_UPDATE_FREQ 16
#define IFLIB_QUEUE_IDLE 0
#define IFLIB_QUEUE_HUNG 1
#define IFLIB_QUEUE_WORKING 2
+/* maximum number of txqs that can share an rx interrupt */
+#define IFLIB_MAX_TX_SHARED_INTR 4
-/* this should really scale with ring size - 32 is a fairly arbitrary value for this */
-#define TX_BATCH_SIZE 16
+/* this should really scale with ring size - this is a fairly arbitrary value */
+#define TX_BATCH_SIZE 32
#define IFLIB_RESTART_BUDGET 8
-#define IFC_LEGACY 0x01
-#define IFC_QFLUSH 0x02
-#define IFC_MULTISEG 0x04
-#define IFC_DMAR 0x08
-#define IFC_SC_ALLOCATED 0x10
+#define IFC_LEGACY 0x001
+#define IFC_QFLUSH 0x002
+#define IFC_MULTISEG 0x004
+#define IFC_DMAR 0x008
+#define IFC_SC_ALLOCATED 0x010
+#define IFC_INIT_DONE 0x020
+#define IFC_PREFETCH 0x040
+#define IFC_DO_RESET 0x080
+#define IFC_CHECK_HUNG 0x100
#define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \
CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \
CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP)
struct iflib_txq {
- uint16_t ift_in_use;
- uint16_t ift_cidx;
- uint16_t ift_cidx_processed;
- uint16_t ift_pidx;
+ qidx_t ift_in_use;
+ qidx_t ift_cidx;
+ qidx_t ift_cidx_processed;
+ qidx_t ift_pidx;
uint8_t ift_gen;
- uint8_t ift_db_pending;
- uint8_t ift_db_pending_queued;
- uint8_t ift_npending;
uint8_t ift_br_offset;
+ uint16_t ift_npending;
+ uint16_t ift_db_pending;
+ uint16_t ift_rs_pending;
/* implicit pad */
+ uint8_t ift_txd_size[8];
uint64_t ift_processed;
uint64_t ift_cleaned;
+ uint64_t ift_cleaned_prev;
#if MEMORY_LOGGING
uint64_t ift_enqueued;
uint64_t ift_dequeued;
@@ -335,19 +352,16 @@
/* constant values */
if_ctx_t ift_ctx;
- struct ifmp_ring **ift_br;
+ struct ifmp_ring *ift_br;
struct grouptask ift_task;
- uint16_t ift_size;
+ qidx_t ift_size;
uint16_t ift_id;
struct callout ift_timer;
- struct callout ift_db_check;
- iflib_txsd_array_t ift_sds;
- uint8_t ift_nbr;
- uint8_t ift_qstatus;
- uint8_t ift_active;
- uint8_t ift_closed;
- int ift_watchdog_time;
+ if_txsd_vec_t ift_sds;
+ uint8_t ift_qstatus;
+ uint8_t ift_closed;
+ uint8_t ift_update_freq;
struct iflib_filter_info ift_filter_info;
bus_dma_tag_t ift_desc_tag;
bus_dma_tag_t ift_tso_desc_tag;
@@ -356,13 +370,17 @@
char ift_mtx_name[MTX_NAME_LEN];
char ift_db_mtx_name[MTX_NAME_LEN];
bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE);
+#ifdef IFLIB_DIAGNOSTICS
+ uint64_t ift_cpu_exec_count[256];
+#endif
} __aligned(CACHE_LINE_SIZE);
struct iflib_fl {
- uint16_t ifl_cidx;
- uint16_t ifl_pidx;
- uint16_t ifl_credits;
+ qidx_t ifl_cidx;
+ qidx_t ifl_pidx;
+ qidx_t ifl_credits;
uint8_t ifl_gen;
+ uint8_t ifl_rxd_size;
#if MEMORY_LOGGING
uint64_t ifl_m_enqueued;
uint64_t ifl_m_dequeued;
@@ -371,24 +389,27 @@
#endif
/* implicit pad */
+ bitstr_t *ifl_rx_bitmap;
+ qidx_t ifl_fragidx;
/* constant */
- uint16_t ifl_size;
+ qidx_t ifl_size;
uint16_t ifl_buf_size;
uint16_t ifl_cltype;
uma_zone_t ifl_zone;
- iflib_rxsd_t ifl_sds;
+ iflib_rxsd_array_t ifl_sds;
iflib_rxq_t ifl_rxq;
uint8_t ifl_id;
bus_dma_tag_t ifl_desc_tag;
iflib_dma_info_t ifl_ifdi;
uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE);
caddr_t ifl_vm_addrs[IFLIB_MAX_RX_REFRESH];
+ qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH];
} __aligned(CACHE_LINE_SIZE);
-static inline int
-get_inuse(int size, int cidx, int pidx, int gen)
+static inline qidx_t
+get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen)
{
- int used;
+ qidx_t used;
if (pidx > cidx)
used = pidx - cidx;
@@ -414,9 +435,9 @@
* these are the cq cidx and pidx. Otherwise
* these are unused.
*/
- uint16_t ifr_size;
- uint16_t ifr_cq_cidx;
- uint16_t ifr_cq_pidx;
+ qidx_t ifr_size;
+ qidx_t ifr_cq_cidx;
+ qidx_t ifr_cq_pidx;
uint8_t ifr_cq_gen;
uint8_t ifr_fl_offset;
@@ -426,26 +447,89 @@
uint16_t ifr_id;
uint8_t ifr_lro_enabled;
uint8_t ifr_nfl;
+ uint8_t ifr_ntxqirq;
+ uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR];
struct lro_ctrl ifr_lc;
struct grouptask ifr_task;
struct iflib_filter_info ifr_filter_info;
iflib_dma_info_t ifr_ifdi;
+
/* dynamically allocate if any drivers need a value substantially larger than this */
struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE);
+#ifdef IFLIB_DIAGNOSTICS
+ uint64_t ifr_cpu_exec_count[256];
+#endif
} __aligned(CACHE_LINE_SIZE);
+typedef struct if_rxsd {
+ caddr_t *ifsd_cl;
+ struct mbuf **ifsd_m;
+ iflib_fl_t ifsd_fl;
+ qidx_t ifsd_cidx;
+} *if_rxsd_t;
+
+/* multiple of word size */
+#ifdef __LP64__
+#define PKT_INFO_SIZE 6
+#define RXD_INFO_SIZE 5
+#define PKT_TYPE uint64_t
+#else
+#define PKT_INFO_SIZE 11
+#define RXD_INFO_SIZE 8
+#define PKT_TYPE uint32_t
+#endif
+#define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3)
+#define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4)
+
+typedef struct if_pkt_info_pad {
+ PKT_TYPE pkt_val[PKT_INFO_SIZE];
+} *if_pkt_info_pad_t;
+typedef struct if_rxd_info_pad {
+ PKT_TYPE rxd_val[RXD_INFO_SIZE];
+} *if_rxd_info_pad_t;
+
+CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info));
+CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info));
+
+
+static inline void
+pkt_info_zero(if_pkt_info_t pi)
+{
+ if_pkt_info_pad_t pi_pad;
+
+ pi_pad = (if_pkt_info_pad_t)pi;
+ pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0;
+ pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0;
+#ifndef __LP64__
+ pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0;
+ pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0;
+#endif
+}
+
+static inline void
+rxd_info_zero(if_rxd_info_t ri)
+{
+ if_rxd_info_pad_t ri_pad;
+ int i;
+
+ ri_pad = (if_rxd_info_pad_t)ri;
+ for (i = 0; i < RXD_LOOP_BOUND; i += 4) {
+ ri_pad->rxd_val[i] = 0;
+ ri_pad->rxd_val[i+1] = 0;
+ ri_pad->rxd_val[i+2] = 0;
+ ri_pad->rxd_val[i+3] = 0;
+ }
+#ifdef __LP64__
+ ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0;
+#endif
+}
+
/*
* Only allow a single packet to take up most 1/nth of the tx ring
*/
#define MAX_SINGLE_PACKET_FRACTION 12
#define IF_BAD_DMA (bus_addr_t)-1
-static int enable_msix = 1;
-
-#define mtx_held(m) (((m)->mtx_lock & ~MTX_FLAGMASK) != (uintptr_t)0)
-
-
-
#define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING))
#define CTX_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF)
@@ -455,12 +539,6 @@
#define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx)
-#define TXDB_LOCK_INIT(txq) mtx_init(&(txq)->ift_db_mtx, (txq)->ift_db_mtx_name, NULL, MTX_DEF)
-#define TXDB_TRYLOCK(txq) mtx_trylock(&(txq)->ift_db_mtx)
-#define TXDB_LOCK(txq) mtx_lock(&(txq)->ift_db_mtx)
-#define TXDB_UNLOCK(txq) mtx_unlock(&(txq)->ift_db_mtx)
-#define TXDB_LOCK_DESTROY(txq) mtx_destroy(&(txq)->ift_db_mtx)
-
#define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx)
#define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx)
@@ -480,6 +558,7 @@
MODULE_DEPEND(iflib, pci, 1, 1, 1);
MODULE_DEPEND(iflib, ether, 1, 1, 1);
+TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1);
TASKQGROUP_DEFINE(if_config_tqg, 1, 1);
#ifndef IFLIB_DEBUG_COUNTERS
@@ -497,9 +576,11 @@
* XXX need to ensure that this can't accidentally cause the head to be moved backwards
*/
static int iflib_min_tx_latency = 0;
-
SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW,
- &iflib_min_tx_latency, 0, "minimize transmit latency at the possibel expense of throughput");
+ &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput");
+static int iflib_no_tx_batch = 0;
+SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW,
+ &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput");
#if IFLIB_DEBUG_COUNTERS
@@ -544,11 +625,14 @@
static int iflib_encap_load_mbuf_fail;
+static int iflib_encap_pad_mbuf_fail;
static int iflib_encap_txq_avail_fail;
static int iflib_encap_txd_encap_fail;
SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD,
&iflib_encap_load_mbuf_fail, 0, "# busdma load failures");
+SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD,
+ &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures");
SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD,
&iflib_encap_txq_avail_fail, 0, "# txq avail failures");
SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD,
@@ -594,10 +678,24 @@
&iflib_verbose_debug, 0, "enable verbose debugging");
#define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1)
+static void
+iflib_debug_reset(void)
+{
+ iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs =
+ iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees =
+ iflib_txq_drain_flushing = iflib_txq_drain_oactive =
+ iflib_txq_drain_notready = iflib_txq_drain_encapfail =
+ iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail =
+ iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail =
+ iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs =
+ iflib_intr_link = iflib_intr_msix = iflib_rx_unavail =
+ iflib_rx_ctx_inactive = iflib_rx_zero_len = iflib_rx_if_input =
+ iflib_rx_mbuf_null = iflib_rxd_flush = 0;
+}
#else
#define DBG_COUNTER_INC(name)
-
+static void iflib_debug_reset(void) {}
#endif
@@ -608,7 +706,7 @@
static void iflib_rx_structures_free(if_ctx_t ctx);
static int iflib_queues_alloc(if_ctx_t ctx);
static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq);
-static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget);
+static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget);
static int iflib_qset_structures_setup(if_ctx_t ctx);
static int iflib_msix_init(if_ctx_t ctx);
static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str);
@@ -618,8 +716,14 @@
static void iflib_init_locked(if_ctx_t ctx);
static void iflib_add_device_sysctl_pre(if_ctx_t ctx);
static void iflib_add_device_sysctl_post(if_ctx_t ctx);
+static void iflib_ifmp_purge(iflib_txq_t txq);
+static void _iflib_pre_assert(if_softc_ctx_t scctx);
+static void iflib_stop(if_ctx_t ctx);
+static void iflib_if_init_locked(if_ctx_t ctx);
+#ifndef __NO_STRICT_ALIGNMENT
+static struct mbuf * iflib_fixup_rx(struct mbuf *m);
+#endif
-
#ifdef DEV_NETMAP
#include <sys/selinfo.h>
#include <net/netmap.h>
@@ -627,6 +731,8 @@
MODULE_DEPEND(iflib, netmap, 1, 1, 1);
+static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init);
+
/*
* device-specific sysctl variables:
*
@@ -662,6 +768,7 @@
{
struct ifnet *ifp = na->ifp;
if_ctx_t ctx = ifp->if_softc;
+ int status;
CTX_LOCK(ctx);
IFDI_INTR_DISABLE(ctx);
@@ -670,7 +777,7 @@
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
if (!CTX_IS_VF(ctx))
- IFDI_CRCSTRIP_SET(ctx, onoff);
+ IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
@@ -678,12 +785,90 @@
} else {
nm_clear_native_flags(na);
}
- IFDI_INIT(ctx);
- IFDI_CRCSTRIP_SET(ctx, onoff); // XXX why twice ?
+ iflib_stop(ctx);
+ iflib_init_locked(ctx);
+ IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ?
+ status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1;
+ if (status)
+ nm_clear_native_flags(na);
CTX_UNLOCK(ctx);
- return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
+ return (status);
}
+static int
+netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init)
+{
+ struct netmap_adapter *na = kring->na;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int head = kring->rhead;
+ struct netmap_ring *ring = kring->ring;
+ bus_dmamap_t *map;
+ struct if_rxd_update iru;
+ if_ctx_t ctx = rxq->ifr_ctx;
+ iflib_fl_t fl = &rxq->ifr_fl[0];
+ uint32_t refill_pidx, nic_i;
+
+ if (nm_i == head && __predict_true(!init))
+ return 0;
+ iru_init(&iru, rxq, 0 /* flid */);
+ map = fl->ifl_sds.ifsd_map;
+ refill_pidx = netmap_idx_k2n(kring, nm_i);
+ /*
+ * IMPORTANT: we must leave one free slot in the ring,
+ * so move head back by one unit
+ */
+ head = nm_prev(head, lim);
+ while (nm_i != head) {
+ for (int tmp_pidx = 0; tmp_pidx < IFLIB_MAX_RX_REFRESH && nm_i != head; tmp_pidx++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
+ void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]);
+ uint32_t nic_i_dma = refill_pidx;
+ nic_i = netmap_idx_k2n(kring, nm_i);
+
+ MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH);
+
+ if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
+ return netmap_ring_reinit(kring);
+
+ fl->ifl_vm_addrs[tmp_pidx] = addr;
+ if (__predict_false(init) && map) {
+ netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr);
+ } else if (map && (slot->flags & NS_BUF_CHANGED)) {
+ /* buffer has changed, reload map */
+ netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr);
+ }
+ slot->flags &= ~NS_BUF_CHANGED;
+
+ nm_i = nm_next(nm_i, lim);
+ fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim);
+ if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1)
+ continue;
+
+ iru.iru_pidx = refill_pidx;
+ iru.iru_count = tmp_pidx+1;
+ ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
+
+ refill_pidx = nic_i;
+ if (map == NULL)
+ continue;
+
+ for (int n = 0; n < iru.iru_count; n++) {
+ bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma],
+ BUS_DMASYNC_PREREAD);
+ /* XXX - change this to not use the netmap func*/
+ nic_i_dma = nm_next(nic_i_dma, lim);
+ }
+ }
+ }
+ kring->nr_hwcur = head;
+
+ if (map)
+ bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
+ return (0);
+}
+
/*
* Reconcile kernel and user view of the transmit ring.
*
@@ -720,14 +905,11 @@
if_ctx_t ctx = ifp->if_softc;
iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id];
- pi.ipi_segs = txq->ift_segs;
- pi.ipi_qsidx = kring->ring_id;
- pi.ipi_ndescs = 0;
+ if (txq->ift_sds.ifsd_map)
+ bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
- bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
- BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
-
/*
* First part: process new packets to send.
* nm_i is the current index in the netmap ring,
@@ -750,13 +932,17 @@
* to prefetch the next slot and txr entry.
*/
- nm_i = kring->nr_hwcur;
+ nm_i = netmap_idx_n2k(kring, kring->nr_hwcur);
+ pkt_info_zero(&pi);
+ pi.ipi_segs = txq->ift_segs;
+ pi.ipi_qsidx = kring->ring_id;
if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
__builtin_prefetch(&ring->slot[nm_i]);
__builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]);
- __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]);
+ if (txq->ift_sds.ifsd_map)
+ __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
@@ -768,6 +954,11 @@
IPI_TX_INTR : 0;
/* device-specific */
+ pi.ipi_len = len;
+ pi.ipi_segs[0].ds_addr = paddr;
+ pi.ipi_segs[0].ds_len = len;
+ pi.ipi_nsegs = 1;
+ pi.ipi_ndescs = 0;
pi.ipi_pidx = nic_i;
pi.ipi_flags = flags;
@@ -777,27 +968,28 @@
/* prefetch for next round */
__builtin_prefetch(&ring->slot[nm_i + 1]);
__builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]);
- __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]);
+ if (txq->ift_sds.ifsd_map) {
+ __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]);
- NM_CHECK_ADDR_LEN(na, addr, len);
+ NM_CHECK_ADDR_LEN(na, addr, len);
- if (slot->flags & NS_BUF_CHANGED) {
- /* buffer has changed, reload map */
- netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr);
+ if (slot->flags & NS_BUF_CHANGED) {
+ /* buffer has changed, reload map */
+ netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr);
+ }
+ /* make sure changes to the buffer are synced */
+ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i],
+ BUS_DMASYNC_PREWRITE);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
-
- /* make sure changes to the buffer are synced */
- bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i],
- BUS_DMASYNC_PREWRITE);
-
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
/* synchronize the NIC ring */
- bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
+ if (txq->ift_sds.ifsd_map)
+ bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* (re)start the tx unit up to slot nic_i (excluded) */
@@ -832,30 +1024,29 @@
iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
- struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
- u_int nm_i; /* index into the netmap ring */
- u_int nic_i; /* index into the NIC ring */
+ uint32_t nm_i; /* index into the netmap ring */
+ uint32_t nic_i; /* index into the NIC ring */
u_int i, n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = kring->rhead;
+ u_int const head = netmap_idx_n2k(kring, kring->rhead);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
struct if_rxd_info ri;
- /* device-specific */
+
+ struct ifnet *ifp = na->ifp;
if_ctx_t ctx = ifp->if_softc;
iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id];
iflib_fl_t fl = rxq->ifr_fl;
if (head > lim)
return netmap_ring_reinit(kring);
- bzero(&ri, sizeof(ri));
- ri.iri_qsidx = kring->ring_id;
- ri.iri_ifp = ctx->ifc_ifp;
/* XXX check sync modes */
- for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++)
+ for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) {
+ if (fl->ifl_sds.ifsd_map == NULL)
+ continue;
bus_dmamap_sync(rxq->ifr_fl[i].ifl_desc_tag, fl->ifl_ifdi->idi_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
-
+ }
/*
* First part: import newly received packets.
*
@@ -876,19 +1067,24 @@
int error, avail;
uint16_t slot_flags = kring->nkr_slot_flags;
- for (fl = rxq->ifr_fl, i = 0; i < rxq->ifr_nfl; i++, fl++) {
+ for (i = 0; i < rxq->ifr_nfl; i++) {
+ fl = &rxq->ifr_fl[i];
nic_i = fl->ifl_cidx;
nm_i = netmap_idx_n2k(kring, nic_i);
- avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i, INT_MAX);
+ avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX);
for (n = 0; avail > 0; n++, avail--) {
+ rxd_info_zero(&ri);
+ ri.iri_frags = rxq->ifr_frags;
+ ri.iri_qsidx = kring->ring_id;
+ ri.iri_ifp = ctx->ifc_ifp;
+ ri.iri_cidx = nic_i;
+
error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
- if (error)
- ring->slot[nm_i].len = 0;
- else
- ring->slot[nm_i].len = ri.iri_len - crclen;
+ ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen;
ring->slot[nm_i].flags = slot_flags;
- bus_dmamap_sync(fl->ifl_ifdi->idi_tag,
- fl->ifl_sds[nic_i].ifsd_map, BUS_DMASYNC_POSTREAD);
+ if (fl->ifl_sds.ifsd_map)
+ bus_dmamap_sync(fl->ifl_ifdi->idi_tag,
+ fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
@@ -899,7 +1095,7 @@
iflib_rx_miss_bufs += n;
}
fl->ifl_cidx = nic_i;
- kring->nr_hwtail = nm_i;
+ kring->nr_hwtail = netmap_idx_k2n(kring, nm_i);
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
@@ -913,51 +1109,27 @@
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*/
/* XXX not sure how this will work with multiple free lists */
- nm_i = kring->nr_hwcur;
- if (nm_i != head) {
- nic_i = netmap_idx_k2n(kring, nm_i);
- for (n = 0; nm_i != head; n++) {
- struct netmap_slot *slot = &ring->slot[nm_i];
- uint64_t paddr;
- caddr_t vaddr;
- void *addr = PNMB(na, slot, &paddr);
+ nm_i = netmap_idx_n2k(kring, kring->nr_hwcur);
- if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
- goto ring_reset;
+ return (netmap_fl_refill(rxq, kring, nm_i, false));
+}
- vaddr = addr;
- if (slot->flags & NS_BUF_CHANGED) {
- /* buffer has changed, reload map */
- netmap_reload_map(na, fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map, addr);
- slot->flags &= ~NS_BUF_CHANGED;
- }
- /*
- * XXX we should be batching this operation - TODO
- */
- ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1, fl->ifl_buf_size);
- bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map,
- BUS_DMASYNC_PREREAD);
- nm_i = nm_next(nm_i, lim);
- nic_i = nm_next(nic_i, lim);
- }
- kring->nr_hwcur = head;
+static void
+iflib_netmap_intr(struct netmap_adapter *na, int onoff)
+{
+ struct ifnet *ifp = na->ifp;
+ if_ctx_t ctx = ifp->if_softc;
- bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- /*
- * IMPORTANT: we must leave one free slot in the ring,
- * so move nic_i back by one unit
- */
- nic_i = nm_prev(nic_i, lim);
- ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
+ CTX_LOCK(ctx);
+ if (onoff) {
+ IFDI_INTR_ENABLE(ctx);
+ } else {
+ IFDI_INTR_DISABLE(ctx);
}
-
- return 0;
-
-ring_reset:
- return netmap_ring_reinit(kring);
+ CTX_UNLOCK(ctx);
}
+
static int
iflib_netmap_attach(if_ctx_t ctx)
{
@@ -976,6 +1148,7 @@
na.nm_txsync = iflib_netmap_txsync;
na.nm_rxsync = iflib_netmap_rxsync;
na.nm_register = iflib_netmap_register;
+ na.nm_intr = iflib_netmap_intr;
na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets;
na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets;
return (netmap_attach(&na));
@@ -990,6 +1163,8 @@
slot = netmap_reset(na, NR_TX, txq->ift_id, 0);
if (slot == NULL)
return;
+ if (txq->ift_sds.ifsd_map == NULL)
+ return;
for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) {
@@ -1004,37 +1179,20 @@
netmap_load_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si));
}
}
+
static void
iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
{
struct netmap_adapter *na = NA(ctx->ifc_ifp);
+ struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id];
struct netmap_slot *slot;
- iflib_rxsd_t sd;
- int nrxd;
+ uint32_t nm_i;
slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0);
if (slot == NULL)
return;
- sd = rxq->ifr_fl[0].ifl_sds;
- nrxd = ctx->ifc_softc_ctx.isc_nrxd[0];
- for (int i = 0; i < nrxd; i++, sd++) {
- int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i);
- uint64_t paddr;
- void *addr;
- caddr_t vaddr;
-
- vaddr = addr = PNMB(na, slot + sj, &paddr);
- netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, sd->ifsd_map, addr);
- /* Update descriptor and the cached value */
- ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1, rxq->ifr_fl[0].ifl_buf_size);
- }
- /* preserve queue */
- if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) {
- struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id];
- int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
- ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, t);
- } else
- ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, nrxd-1);
+ nm_i = netmap_idx_n2k(kring, 0);
+ netmap_fl_refill(rxq, kring, nm_i, true);
}
#define iflib_netmap_detach(ifp) netmap_detach(ifp)
@@ -1046,6 +1204,7 @@
#define iflib_netmap_attach(ctx) (0)
#define netmap_rx_irq(ifp, qid, budget) (0)
+#define netmap_tx_irq(ifp, qid) do {} while (0)
#endif
@@ -1055,11 +1214,34 @@
{
__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
}
+static __inline void
+prefetch2cachelines(void *x)
+{
+ __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
+#if (CACHE_LINE_SIZE < 128)
+ __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long)))));
+#endif
+}
#else
#define prefetch(x)
+#define prefetch2cachelines(x)
#endif
static void
+iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid)
+{
+ iflib_fl_t fl;
+
+ fl = &rxq->ifr_fl[flid];
+ iru->iru_paddrs = fl->ifl_bus_addrs;
+ iru->iru_vaddrs = &fl->ifl_vm_addrs[0];
+ iru->iru_idxs = fl->ifl_rxd_idxs;
+ iru->iru_qsidx = rxq->ifr_id;
+ iru->iru_buf_size = fl->ifl_buf_size;
+ iru->iru_flidx = fl->ifl_id;
+}
+
+static void
_iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
{
if (err)
@@ -1177,6 +1359,8 @@
{
iflib_filter_info_t info = arg;
struct grouptask *gtask = info->ifi_task;
+ if (!iflib_started)
+ return (FILTER_HANDLED);
DBG_COUNTER_INC(fast_intrs);
if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
@@ -1187,19 +1371,77 @@
}
static int
+iflib_fast_intr_rxtx(void *arg)
+{
+ iflib_filter_info_t info = arg;
+ struct grouptask *gtask = info->ifi_task;
+ iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx;
+ if_ctx_t ctx;
+ int i, cidx;
+
+ if (!iflib_started)
+ return (FILTER_HANDLED);
+
+ DBG_COUNTER_INC(fast_intrs);
+ if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
+ return (FILTER_HANDLED);
+
+ for (i = 0; i < rxq->ifr_ntxqirq; i++) {
+ qidx_t txqid = rxq->ifr_txqid[i];
+
+ ctx = rxq->ifr_ctx;
+
+ if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) {
+ IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid);
+ continue;
+ }
+ GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task);
+ }
+ if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ)
+ cidx = rxq->ifr_cq_cidx;
+ else
+ cidx = rxq->ifr_fl[0].ifl_cidx;
+ if (iflib_rxd_avail(ctx, rxq, cidx, 1))
+ GROUPTASK_ENQUEUE(gtask);
+ else
+ IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
+ return (FILTER_HANDLED);
+}
+
+
+static int
+iflib_fast_intr_ctx(void *arg)
+{
+ iflib_filter_info_t info = arg;
+ struct grouptask *gtask = info->ifi_task;
+
+ if (!iflib_started)
+ return (FILTER_HANDLED);
+
+ DBG_COUNTER_INC(fast_intrs);
+ if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
+ return (FILTER_HANDLED);
+
+ GROUPTASK_ENQUEUE(gtask);
+ return (FILTER_HANDLED);
+}
+
+static int
_iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
driver_filter_t filter, driver_intr_t handler, void *arg,
char *name)
{
- int rc;
+ int rc, flags;
struct resource *res;
- void *tag;
+ void *tag = NULL;
device_t dev = ctx->ifc_dev;
+ flags = RF_ACTIVE;
+ if (ctx->ifc_flags & IFC_LEGACY)
+ flags |= RF_SHAREABLE;
MPASS(rid < 512);
irq->ii_rid = rid;
- res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid,
- RF_SHAREABLE | RF_ACTIVE);
+ res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, flags);
if (res == NULL) {
device_printf(dev,
"failed to allocate IRQ for rid %d, name %s.\n", rid, name);
@@ -1265,11 +1507,6 @@
(uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize);
goto fail;
}
-#ifdef IFLIB_DIAGNOSTICS
- device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n",
- sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize);
-
-#endif
if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
@@ -1286,11 +1523,6 @@
goto fail;
}
-#ifdef IFLIB_DIAGNOSTICS
- device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n",
- scctx->isc_tx_tso_size_max, ntsosegments,
- scctx->isc_tx_tso_segsize_max);
-#endif
if (!(txq->ift_sds.ifsd_flags =
(uint8_t *) malloc(sizeof(uint8_t) *
scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
@@ -1307,7 +1539,7 @@
}
/* Create the descriptor buffer dma maps */
-#if defined(ACPI_DMAR) || (!(defined(__i386__) && !defined(__amd64__)))
+#if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__)))
if ((ctx->ifc_flags & IFC_DMAR) == 0)
return (0);
@@ -1406,11 +1638,14 @@
iflib_dma_info_t di;
int i;
- /* Set number of descriptors available */
+ /* Set number of descriptors available */
txq->ift_qstatus = IFLIB_QUEUE_IDLE;
+ /* XXX make configurable */
+ txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ;
/* Reset indices */
- txq->ift_cidx_processed = txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0;
+ txq->ift_cidx_processed = 0;
+ txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0;
txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset];
for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++)
@@ -1439,7 +1674,6 @@
if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
device_t dev = ctx->ifc_dev;
iflib_fl_t fl;
- iflib_rxsd_t rxsd;
int err;
MPASS(scctx->isc_nrxd[0] > 0);
@@ -1447,13 +1681,6 @@
fl = rxq->ifr_fl;
for (int i = 0; i < rxq->ifr_nfl; i++, fl++) {
- fl->ifl_sds = malloc(sizeof(struct iflib_sw_rx_desc) *
- scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB,
- M_WAITOK | M_ZERO);
- if (fl->ifl_sds == NULL) {
- device_printf(dev, "Unable to allocate rx sw desc memory\n");
- return (ENOMEM);
- }
fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */
err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1, 0, /* alignment, bounds */
@@ -1472,16 +1699,48 @@
__func__, err);
goto fail;
}
+ if (!(fl->ifl_sds.ifsd_flags =
+ (uint8_t *) malloc(sizeof(uint8_t) *
+ scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
+ device_printf(dev, "Unable to allocate tx_buffer memory\n");
+ err = ENOMEM;
+ goto fail;
+ }
+ if (!(fl->ifl_sds.ifsd_m =
+ (struct mbuf **) malloc(sizeof(struct mbuf *) *
+ scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
+ device_printf(dev, "Unable to allocate tx_buffer memory\n");
+ err = ENOMEM;
+ goto fail;
+ }
+ if (!(fl->ifl_sds.ifsd_cl =
+ (caddr_t *) malloc(sizeof(caddr_t) *
+ scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
+ device_printf(dev, "Unable to allocate tx_buffer memory\n");
+ err = ENOMEM;
+ goto fail;
+ }
- rxsd = fl->ifl_sds;
- for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++, rxsd++) {
- err = bus_dmamap_create(fl->ifl_desc_tag, 0, &rxsd->ifsd_map);
- if (err) {
- device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
- __func__, err);
+ /* Create the descriptor buffer dma maps */
+#if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__)))
+ if ((ctx->ifc_flags & IFC_DMAR) == 0)
+ continue;
+
+ if (!(fl->ifl_sds.ifsd_map =
+ (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
+ device_printf(dev, "Unable to allocate tx_buffer map memory\n");
+ err = ENOMEM;
+ goto fail;
+ }
+
+ for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) {
+ err = bus_dmamap_create(fl->ifl_desc_tag, 0, &fl->ifl_sds.ifsd_map[i]);
+ if (err != 0) {
+ device_printf(dev, "Unable to create RX buffer DMA map\n");
goto fail;
}
}
+#endif
}
return (0);
@@ -1531,20 +1790,32 @@
_iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
{
struct mbuf *m;
- int pidx = fl->ifl_pidx;
- iflib_rxsd_t rxsd = &fl->ifl_sds[pidx];
- caddr_t cl;
+ int idx, frag_idx = fl->ifl_fragidx;
+ int pidx = fl->ifl_pidx;
+ caddr_t cl, *sd_cl;
+ struct mbuf **sd_m;
+ uint8_t *sd_flags;
+ struct if_rxd_update iru;
+ bus_dmamap_t *sd_map;
int n, i = 0;
uint64_t bus_addr;
int err;
+ qidx_t credits;
+ sd_m = fl->ifl_sds.ifsd_m;
+ sd_map = fl->ifl_sds.ifsd_map;
+ sd_cl = fl->ifl_sds.ifsd_cl;
+ sd_flags = fl->ifl_sds.ifsd_flags;
+ idx = pidx;
+ credits = fl->ifl_credits;
+
n = count;
MPASS(n > 0);
- MPASS(fl->ifl_credits + n <= fl->ifl_size);
+ MPASS(credits + n <= fl->ifl_size);
if (pidx < fl->ifl_cidx)
MPASS(pidx + n <= fl->ifl_cidx);
- if (pidx == fl->ifl_cidx && (fl->ifl_credits < fl->ifl_size))
+ if (pidx == fl->ifl_cidx && (credits < fl->ifl_size))
MPASS(fl->ifl_gen == 0);
if (pidx > fl->ifl_cidx)
MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx);
@@ -1552,7 +1823,7 @@
DBG_COUNTER_INC(fl_refills);
if (n > 8)
DBG_COUNTER_INC(fl_refills_large);
-
+ iru_init(&iru, fl->ifl_rxq, fl->ifl_id);
while (n--) {
/*
* We allocate an uninitialized mbuf + cluster, mbuf is
@@ -1560,8 +1831,11 @@
*
* If the cluster is still set then we know a minimum sized packet was received
*/
- if ((cl = rxsd->ifsd_cl) == NULL) {
- if ((cl = rxsd->ifsd_cl = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL)
+ bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx);
+ if ((frag_idx < 0) || (frag_idx >= fl->ifl_size))
+ bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx);
+ if ((cl = sd_cl[frag_idx]) == NULL) {
+ if ((cl = sd_cl[frag_idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL)
break;
#if MEMORY_LOGGING
fl->ifl_cl_enqueued++;
@@ -1575,19 +1849,6 @@
#endif
DBG_COUNTER_INC(rx_allocs);
-#ifdef notyet
- if ((rxsd->ifsd_flags & RX_SW_DESC_MAP_CREATED) == 0) {
- int err;
-
- if ((err = bus_dmamap_create(fl->ifl_ifdi->idi_tag, 0, &rxsd->ifsd_map))) {
- log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
- uma_zfree(fl->ifl_zone, cl);
- n = 0;
- goto done;
- }
- rxsd->ifsd_flags |= RX_SW_DESC_MAP_CREATED;
- }
-#endif
#if defined(__i386__) || defined(__amd64__)
if (!IS_DMAR(ctx)) {
bus_addr = pmap_kextract((vm_offset_t)cl);
@@ -1599,8 +1860,12 @@
cb_arg.error = 0;
q = fl->ifl_rxq;
- err = bus_dmamap_load(fl->ifl_desc_tag, rxsd->ifsd_map,
+ MPASS(sd_map != NULL);
+ MPASS(sd_map[frag_idx] != NULL);
+ err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[frag_idx],
cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0);
+ bus_dmamap_sync(fl->ifl_desc_tag, sd_map[frag_idx],
+ BUS_DMASYNC_PREREAD);
if (err != 0 || cb_arg.error) {
/*
@@ -1614,36 +1879,52 @@
}
bus_addr = cb_arg.seg.ds_addr;
}
- rxsd->ifsd_flags |= RX_SW_DESC_INUSE;
+ bit_set(fl->ifl_rx_bitmap, frag_idx);
+ sd_flags[frag_idx] |= RX_SW_DESC_INUSE;
- MPASS(rxsd->ifsd_m == NULL);
- rxsd->ifsd_cl = cl;
- rxsd->ifsd_m = m;
+ MPASS(sd_m[frag_idx] == NULL);
+ sd_cl[frag_idx] = cl;
+ sd_m[frag_idx] = m;
+ fl->ifl_rxd_idxs[i] = frag_idx;
fl->ifl_bus_addrs[i] = bus_addr;
fl->ifl_vm_addrs[i] = cl;
- rxsd++;
- fl->ifl_credits++;
+ credits++;
i++;
- MPASS(fl->ifl_credits <= fl->ifl_size);
- if (++fl->ifl_pidx == fl->ifl_size) {
- fl->ifl_pidx = 0;
+ MPASS(credits <= fl->ifl_size);
+ if (++idx == fl->ifl_size) {
fl->ifl_gen = 1;
- rxsd = fl->ifl_sds;
+ idx = 0;
}
if (n == 0 || i == IFLIB_MAX_RX_REFRESH) {
- ctx->isc_rxd_refill(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx,
- fl->ifl_bus_addrs, fl->ifl_vm_addrs, i, fl->ifl_buf_size);
+ iru.iru_pidx = pidx;
+ iru.iru_count = i;
+ ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
i = 0;
- pidx = fl->ifl_pidx;
+ pidx = idx;
+ fl->ifl_pidx = idx;
+ fl->ifl_credits = credits;
}
+
}
done:
+ if (i) {
+ iru.iru_pidx = pidx;
+ iru.iru_count = i;
+ ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
+ fl->ifl_pidx = idx;
+ fl->ifl_credits = credits;
+ }
DBG_COUNTER_INC(rxd_flush);
if (fl->ifl_pidx == 0)
pidx = fl->ifl_size - 1;
else
pidx = fl->ifl_pidx - 1;
+
+ if (sd_map)
+ bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx);
+ fl->ifl_fragidx = frag_idx;
}
static __inline void
@@ -1669,33 +1950,46 @@
uint32_t i;
for (i = 0; i < fl->ifl_size; i++) {
- iflib_rxsd_t d = &fl->ifl_sds[i];
+ struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i];
+ uint8_t *sd_flags = &fl->ifl_sds.ifsd_flags[i];
+ caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i];
- if (d->ifsd_flags & RX_SW_DESC_INUSE) {
- bus_dmamap_unload(fl->ifl_desc_tag, d->ifsd_map);
- bus_dmamap_destroy(fl->ifl_desc_tag, d->ifsd_map);
- if (d->ifsd_m != NULL) {
- m_init(d->ifsd_m, M_NOWAIT, MT_DATA, 0);
- uma_zfree(zone_mbuf, d->ifsd_m);
+ if (*sd_flags & RX_SW_DESC_INUSE) {
+ if (fl->ifl_sds.ifsd_map != NULL) {
+ bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i];
+ bus_dmamap_unload(fl->ifl_desc_tag, sd_map);
+ if (fl->ifl_rxq->ifr_ctx->ifc_in_detach)
+ bus_dmamap_destroy(fl->ifl_desc_tag, sd_map);
}
- if (d->ifsd_cl != NULL)
- uma_zfree(fl->ifl_zone, d->ifsd_cl);
- d->ifsd_flags = 0;
+ if (*sd_m != NULL) {
+ m_init(*sd_m, M_NOWAIT, MT_DATA, 0);
+ uma_zfree(zone_mbuf, *sd_m);
+ }
+ if (*sd_cl != NULL)
+ uma_zfree(fl->ifl_zone, *sd_cl);
+ *sd_flags = 0;
} else {
- MPASS(d->ifsd_cl == NULL);
- MPASS(d->ifsd_m == NULL);
+ MPASS(*sd_cl == NULL);
+ MPASS(*sd_m == NULL);
}
#if MEMORY_LOGGING
fl->ifl_m_dequeued++;
fl->ifl_cl_dequeued++;
#endif
- d->ifsd_cl = NULL;
- d->ifsd_m = NULL;
+ *sd_cl = NULL;
+ *sd_m = NULL;
}
+#ifdef INVARIANTS
+ for (i = 0; i < fl->ifl_size; i++) {
+ MPASS(fl->ifl_sds.ifsd_flags[i] == 0);
+ MPASS(fl->ifl_sds.ifsd_cl[i] == NULL);
+ MPASS(fl->ifl_sds.ifsd_m[i] == NULL);
+ }
+#endif
/*
* Reset free list values
*/
- fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = 0;;
+ fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0;
bzero(idi->idi_vaddr, idi->idi_size);
}
@@ -1711,6 +2005,7 @@
if_ctx_t ctx = rxq->ifr_ctx;
if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
+ bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1);
/*
** Free current RX buffer structs and their mbufs
*/
@@ -1723,12 +2018,17 @@
*/
if (sctx->isc_max_frame_size <= 2048)
fl->ifl_buf_size = MCLBYTES;
+#ifndef CONTIGMALLOC_WORKS
+ else
+ fl->ifl_buf_size = MJUMPAGESIZE;
+#else
else if (sctx->isc_max_frame_size <= 4096)
fl->ifl_buf_size = MJUMPAGESIZE;
else if (sctx->isc_max_frame_size <= 9216)
fl->ifl_buf_size = MJUM9BYTES;
else
fl->ifl_buf_size = MJUM16BYTES;
+#endif
if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size)
ctx->ifc_max_fl_buf_size = fl->ifl_buf_size;
fl->ifl_cltype = m_gettype(fl->ifl_buf_size);
@@ -1770,10 +2070,14 @@
bus_dma_tag_destroy(fl->ifl_desc_tag);
fl->ifl_desc_tag = NULL;
}
+ free(fl->ifl_sds.ifsd_m, M_IFLIB);
+ free(fl->ifl_sds.ifsd_cl, M_IFLIB);
+ /* XXX destroy maps first */
+ free(fl->ifl_sds.ifsd_map, M_IFLIB);
+ fl->ifl_sds.ifsd_m = NULL;
+ fl->ifl_sds.ifsd_cl = NULL;
+ fl->ifl_sds.ifsd_map = NULL;
}
- if (rxq->ifr_fl->ifl_sds != NULL)
- free(rxq->ifr_fl->ifl_sds, M_IFLIB);
-
free(rxq->ifr_fl, M_IFLIB);
rxq->ifr_fl = NULL;
rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0;
@@ -1789,7 +2093,7 @@
{
iflib_txq_t txq = arg;
if_ctx_t ctx = txq->ift_ctx;
- if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
+ if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
return;
@@ -1800,28 +2104,32 @@
*/
IFDI_TIMER(ctx, txq->ift_id);
if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) &&
- (ctx->ifc_pause_frames == 0))
+ ((txq->ift_cleaned_prev == txq->ift_cleaned) ||
+ (sctx->isc_pause_frames == 0)))
goto hung;
- if (TXQ_AVAIL(txq) <= 2*scctx->isc_tx_nsegments ||
- ifmp_ring_is_stalled(txq->ift_br[0]))
+ if (ifmp_ring_is_stalled(txq->ift_br))
+ txq->ift_qstatus = IFLIB_QUEUE_HUNG;
+ txq->ift_cleaned_prev = txq->ift_cleaned;
+ /* handle any laggards */
+ if (txq->ift_db_pending)
GROUPTASK_ENQUEUE(&txq->ift_task);
- ctx->ifc_pause_frames = 0;
+ sctx->isc_pause_frames = 0;
if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)
callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
return;
hung:
CTX_LOCK(ctx);
- if_setdrvflagbits(ctx->ifc_ifp, 0, IFF_DRV_RUNNING);
+ if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n",
txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx);
IFDI_WATCHDOG_RESET(ctx);
ctx->ifc_watchdog_events++;
- ctx->ifc_pause_frames = 0;
- iflib_init_locked(ctx);
+ ctx->ifc_flags |= IFC_DO_RESET;
+ iflib_admin_intr_deferred(ctx);
CTX_UNLOCK(ctx);
}
@@ -1829,22 +2137,25 @@
iflib_init_locked(if_ctx_t ctx)
{
if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
+ if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
if_t ifp = ctx->ifc_ifp;
iflib_fl_t fl;
iflib_txq_t txq;
iflib_rxq_t rxq;
- int i, j;
+ int i, j, tx_ip_csum_flags, tx_ip6_csum_flags;
if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
IFDI_INTR_DISABLE(ctx);
+ tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP);
+ tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP);
/* Set hardware offload abilities */
if_clearhwassist(ifp);
if (if_getcapenable(ifp) & IFCAP_TXCSUM)
- if_sethwassistbits(ifp, CSUM_IP | CSUM_TCP | CSUM_UDP, 0);
+ if_sethwassistbits(ifp, tx_ip_csum_flags, 0);
if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
- if_sethwassistbits(ifp, (CSUM_TCP_IPV6 | CSUM_UDP_IPV6), 0);
+ if_sethwassistbits(ifp, tx_ip6_csum_flags, 0);
if (if_getcapenable(ifp) & IFCAP_TSO4)
if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
if (if_getcapenable(ifp) & IFCAP_TSO6)
@@ -1853,19 +2164,21 @@
for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) {
CALLOUT_LOCK(txq);
callout_stop(&txq->ift_timer);
- callout_stop(&txq->ift_db_check);
CALLOUT_UNLOCK(txq);
iflib_netmap_txq_init(ctx, txq);
}
- for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
- iflib_netmap_rxq_init(ctx, rxq);
- }
#ifdef INVARIANTS
i = if_getdrvflags(ifp);
#endif
IFDI_INIT(ctx);
MPASS(if_getdrvflags(ifp) == i);
for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
+ /* XXX this should really be done on a per-queue basis */
+ if (if_getcapenable(ifp) & IFCAP_NETMAP) {
+ MPASS(rxq->ifr_id == i);
+ iflib_netmap_rxq_init(ctx, rxq);
+ continue;
+ }
for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
if (iflib_fl_setup(fl)) {
device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n");
@@ -1920,102 +2233,159 @@
if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
IFDI_INTR_DISABLE(ctx);
- msleep(ctx, &ctx->ifc_mtx, PUSER, "iflib_init", hz);
+ DELAY(1000);
+ IFDI_STOP(ctx);
+ DELAY(1000);
+ iflib_debug_reset();
/* Wait for current tx queue users to exit to disarm watchdog timer. */
for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) {
/* make sure all transmitters have completed before proceeding XXX */
+ CALLOUT_LOCK(txq);
+ callout_stop(&txq->ift_timer);
+ CALLOUT_UNLOCK(txq);
+
/* clean any enqueued buffers */
- iflib_txq_check_drain(txq, 0);
+ iflib_ifmp_purge(txq);
/* Free any existing tx buffers. */
for (j = 0; j < txq->ift_size; j++) {
iflib_txsd_free(ctx, txq, j);
}
txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0;
- txq->ift_in_use = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
+ txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0;
txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0;
txq->ift_pullups = 0;
- ifmp_ring_reset_stats(txq->ift_br[0]);
+ ifmp_ring_reset_stats(txq->ift_br);
for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++)
bzero((void *)di->idi_vaddr, di->idi_size);
}
for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) {
/* make sure all transmitters have completed before proceeding XXX */
- for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwrxqs; j++, di++)
+ for (j = 0, di = rxq->ifr_ifdi; j < rxq->ifr_nfl; j++, di++)
bzero((void *)di->idi_vaddr, di->idi_size);
/* also resets the free lists pidx/cidx */
for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
iflib_fl_bufs_free(fl);
}
- IFDI_STOP(ctx);
}
-static iflib_rxsd_t
-rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int *cltype, int unload)
+static inline caddr_t
+calc_next_rxd(iflib_fl_t fl, int cidx)
{
+ qidx_t size;
+ int nrxd;
+ caddr_t start, end, cur, next;
+
+ nrxd = fl->ifl_size;
+ size = fl->ifl_rxd_size;
+ start = fl->ifl_ifdi->idi_vaddr;
+
+ if (__predict_false(size == 0))
+ return (start);
+ cur = start + size*cidx;
+ end = start + size*nrxd;
+ next = CACHE_PTR_NEXT(cur);
+ return (next < end ? next : start);
+}
+
+static inline void
+prefetch_pkts(iflib_fl_t fl, int cidx)
+{
+ int nextptr;
+ int nrxd = fl->ifl_size;
+ caddr_t next_rxd;
+
+
+ nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1);
+ prefetch(&fl->ifl_sds.ifsd_m[nextptr]);
+ prefetch(&fl->ifl_sds.ifsd_cl[nextptr]);
+ next_rxd = calc_next_rxd(fl, cidx);
+ prefetch(next_rxd);
+ prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]);
+ prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]);
+}
+
+static void
+rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd)
+{
int flid, cidx;
- iflib_rxsd_t sd;
+ bus_dmamap_t map;
iflib_fl_t fl;
iflib_dma_info_t di;
+ int next;
+ map = NULL;
flid = irf->irf_flid;
cidx = irf->irf_idx;
fl = &rxq->ifr_fl[flid];
+ sd->ifsd_fl = fl;
+ sd->ifsd_cidx = cidx;
+ sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx];
+ sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx];
fl->ifl_credits--;
#if MEMORY_LOGGING
fl->ifl_m_dequeued++;
- if (cltype)
- fl->ifl_cl_dequeued++;
#endif
- sd = &fl->ifl_sds[cidx];
- di = fl->ifl_ifdi;
- bus_dmamap_sync(di->idi_tag, di->idi_map,
- BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+ if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH)
+ prefetch_pkts(fl, cidx);
+ if (fl->ifl_sds.ifsd_map != NULL) {
+ next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1);
+ prefetch(&fl->ifl_sds.ifsd_map[next]);
+ map = fl->ifl_sds.ifsd_map[cidx];
+ di = fl->ifl_ifdi;
+ next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1);
+ prefetch(&fl->ifl_sds.ifsd_flags[next]);
+ bus_dmamap_sync(di->idi_tag, di->idi_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/* not valid assert if bxe really does SGE from non-contiguous elements */
- MPASS(fl->ifl_cidx == cidx);
- if (unload)
- bus_dmamap_unload(fl->ifl_desc_tag, sd->ifsd_map);
-
- if (__predict_false(++fl->ifl_cidx == fl->ifl_size)) {
- fl->ifl_cidx = 0;
- fl->ifl_gen = 0;
+ MPASS(fl->ifl_cidx == cidx);
+ if (unload)
+ bus_dmamap_unload(fl->ifl_desc_tag, map);
}
- /* YES ick */
- if (cltype)
- *cltype = fl->ifl_cltype;
- return (sd);
+ fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1);
+ if (__predict_false(fl->ifl_cidx == 0))
+ fl->ifl_gen = 0;
+ if (map != NULL)
+ bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ bit_clear(fl->ifl_rx_bitmap, cidx);
}
static struct mbuf *
-assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri)
+assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd)
{
- int i, padlen , flags, cltype;
+ int i, padlen , flags;
struct mbuf *m, *mh, *mt;
- iflib_rxsd_t sd;
caddr_t cl;
i = 0;
mh = NULL;
do {
- sd = rxd_frag_to_sd(rxq, &ri->iri_frags[i], &cltype, TRUE);
+ rxd_frag_to_sd(rxq, &ri->iri_frags[i], TRUE, sd);
- MPASS(sd->ifsd_cl != NULL);
- MPASS(sd->ifsd_m != NULL);
+ MPASS(*sd->ifsd_cl != NULL);
+ MPASS(*sd->ifsd_m != NULL);
/* Don't include zero-length frags */
if (ri->iri_frags[i].irf_len == 0) {
/* XXX we can save the cluster here, but not the mbuf */
- m_init(sd->ifsd_m, M_NOWAIT, MT_DATA, 0);
- m_free(sd->ifsd_m);
- sd->ifsd_m = NULL;
+ m_init(*sd->ifsd_m, M_NOWAIT, MT_DATA, 0);
+ m_free(*sd->ifsd_m);
+ *sd->ifsd_m = NULL;
continue;
}
-
- m = sd->ifsd_m;
+ m = *sd->ifsd_m;
+ *sd->ifsd_m = NULL;
if (mh == NULL) {
flags = M_PKTHDR|M_EXT;
mh = mt = m;
@@ -2027,13 +2397,12 @@
/* assuming padding is only on the first fragment */
padlen = 0;
}
- sd->ifsd_m = NULL;
- cl = sd->ifsd_cl;
- sd->ifsd_cl = NULL;
+ cl = *sd->ifsd_cl;
+ *sd->ifsd_cl = NULL;
/* Can these two be made one ? */
m_init(m, M_NOWAIT, MT_DATA, flags);
- m_cljset(m, cl, cltype);
+ m_cljset(m, cl, sd->ifsd_fl->ifl_cltype);
/*
* These must follow m_init and m_cljset
*/
@@ -2051,20 +2420,24 @@
static struct mbuf *
iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri)
{
+ struct if_rxsd sd;
struct mbuf *m;
- iflib_rxsd_t sd;
/* should I merge this back in now that the two paths are basically duplicated? */
if (ri->iri_nfrags == 1 &&
ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) {
- sd = rxd_frag_to_sd(rxq, &ri->iri_frags[0], NULL, FALSE);
- m = sd->ifsd_m;
- sd->ifsd_m = NULL;
+ rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd);
+ m = *sd.ifsd_m;
+ *sd.ifsd_m = NULL;
m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR);
- memcpy(m->m_data, sd->ifsd_cl, ri->iri_len);
+#ifndef __NO_STRICT_ALIGNMENT
+ if (!IP_ALIGNED(m))
+ m->m_data += 2;
+#endif
+ memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len);
m->m_len = ri->iri_frags[0].irf_len;
} else {
- m = assemble_segments(rxq, ri);
+ m = assemble_segments(rxq, ri, &sd);
}
m->m_pkthdr.len = ri->iri_len;
m->m_pkthdr.rcvif = ri->iri_ifp;
@@ -2077,29 +2450,76 @@
return (m);
}
+#if defined(INET6) || defined(INET)
+static void
+iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6)
+{
+ CURVNET_SET(lc->ifp->if_vnet);
+#if defined(INET6)
+ *v6 = VNET(ip6_forwarding);
+#endif
+#if defined(INET)
+ *v4 = VNET(ipforwarding);
+#endif
+ CURVNET_RESTORE();
+}
+
+/*
+ * Returns true if it's possible this packet could be LROed.
+ * if it returns false, it is guaranteed that tcp_lro_rx()
+ * would not return zero.
+ */
static bool
-iflib_rxeof(iflib_rxq_t rxq, int budget)
+iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding)
{
+ struct ether_header *eh;
+ uint16_t eh_type;
+
+ eh = mtod(m, struct ether_header *);
+ eh_type = ntohs(eh->ether_type);
+ switch (eh_type) {
+#if defined(INET6)
+ case ETHERTYPE_IPV6:
+ return !v6_forwarding;
+#endif
+#if defined (INET)
+ case ETHERTYPE_IP:
+ return !v4_forwarding;
+#endif
+ }
+
+ return false;
+}
+#else
+static void
+iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused)
+{
+}
+#endif
+
+static bool
+iflib_rxeof(iflib_rxq_t rxq, qidx_t budget)
+{
if_ctx_t ctx = rxq->ifr_ctx;
if_shared_ctx_t sctx = ctx->ifc_sctx;
if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
int avail, i;
- uint16_t *cidxp;
+ qidx_t *cidxp;
struct if_rxd_info ri;
int err, budget_left, rx_bytes, rx_pkts;
iflib_fl_t fl;
struct ifnet *ifp;
int lro_enabled;
+ bool lro_possible = false;
+ bool v4_forwarding, v6_forwarding;
+
/*
* XXX early demux data packets so that if_input processing only handles
* acks in interrupt context
*/
- struct mbuf *m, *mh, *mt;
+ struct mbuf *m, *mh, *mt, *mf;
- if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &budget)) {
- return (FALSE);
- }
-
+ ifp = ctx->ifc_ifp;
mh = mt = NULL;
MPASS(budget > 0);
rx_pkts = rx_bytes = 0;
@@ -2122,18 +2542,19 @@
/*
* Reset client set fields to their default values
*/
- bzero(&ri, sizeof(ri));
+ rxd_info_zero(&ri);
ri.iri_qsidx = rxq->ifr_id;
ri.iri_cidx = *cidxp;
- ri.iri_ifp = ctx->ifc_ifp;
+ ri.iri_ifp = ifp;
ri.iri_frags = rxq->ifr_frags;
err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
- /* in lieu of handling correctly - make sure it isn't being unhandled */
- MPASS(err == 0);
+ if (err)
+ goto err;
if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
*cidxp = ri.iri_cidx;
/* Update our consumer index */
+ /* XXX NB: shurd - check if this is still safe */
while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) {
rxq->ifr_cq_cidx -= scctx->isc_nrxd[0];
rxq->ifr_cq_gen = 0;
@@ -2166,20 +2587,52 @@
for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
__iflib_fl_refill_lt(ctx, fl, budget + 8);
- ifp = ctx->ifc_ifp;
lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO);
+ if (lro_enabled)
+ iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding);
+ mt = mf = NULL;
while (mh != NULL) {
m = mh;
mh = mh->m_nextpkt;
m->m_nextpkt = NULL;
+#ifndef __NO_STRICT_ALIGNMENT
+ if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL)
+ continue;
+#endif
rx_bytes += m->m_pkthdr.len;
rx_pkts++;
#if defined(INET6) || defined(INET)
- if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0)
- continue;
+ if (lro_enabled) {
+ if (!lro_possible) {
+ lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding);
+ if (lro_possible && mf != NULL) {
+ ifp->if_input(ifp, mf);
+ DBG_COUNTER_INC(rx_if_input);
+ mt = mf = NULL;
+ }
+ }
+ if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) ==
+ (CSUM_L4_CALC|CSUM_L4_VALID)) {
+ if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0)
+ continue;
+ }
+ }
#endif
+ if (lro_possible) {
+ ifp->if_input(ifp, m);
+ DBG_COUNTER_INC(rx_if_input);
+ continue;
+ }
+
+ if (mf == NULL)
+ mf = m;
+ if (mt != NULL)
+ mt->m_nextpkt = m;
+ mt = m;
+ }
+ if (mf != NULL) {
+ ifp->if_input(ifp, mf);
DBG_COUNTER_INC(rx_if_input);
- ifp->if_input(ifp, m);
}
if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
@@ -2194,45 +2647,78 @@
if (avail)
return true;
return (iflib_rxd_avail(ctx, rxq, *cidxp, 1));
+err:
+ CTX_LOCK(ctx);
+ ctx->ifc_flags |= IFC_DO_RESET;
+ iflib_admin_intr_deferred(ctx);
+ CTX_UNLOCK(ctx);
+ return (false);
}
+#define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1)
+static inline qidx_t
+txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use)
+{
+ qidx_t notify_count = TXD_NOTIFY_COUNT(txq);
+ qidx_t minthresh = txq->ift_size / 8;
+ if (in_use > 4*minthresh)
+ return (notify_count);
+ if (in_use > 2*minthresh)
+ return (notify_count >> 1);
+ if (in_use > minthresh)
+ return (notify_count >> 3);
+ return (0);
+}
+
+static inline qidx_t
+txq_max_rs_deferred(iflib_txq_t txq)
+{
+ qidx_t notify_count = TXD_NOTIFY_COUNT(txq);
+ qidx_t minthresh = txq->ift_size / 8;
+ if (txq->ift_in_use > 4*minthresh)
+ return (notify_count);
+ if (txq->ift_in_use > 2*minthresh)
+ return (notify_count >> 1);
+ if (txq->ift_in_use > minthresh)
+ return (notify_count >> 2);
+ return (2);
+}
+
#define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags)
#define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG)
-#define TXQ_MAX_DB_DEFERRED(size) (size >> 5)
+
+#define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use))
+#define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq)
#define TXQ_MAX_DB_CONSUMED(size) (size >> 4)
-static __inline void
-iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring)
-{
- uint32_t dbval;
+/* forward compatibility for cxgb */
+#define FIRST_QSET(ctx) 0
+#define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets)
+#define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets)
+#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx))
+#define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments))
- if (ring || txq->ift_db_pending >=
- TXQ_MAX_DB_DEFERRED(txq->ift_size)) {
+/* XXX we should be setting this to something other than zero */
+#define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh)
+#define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max)
- /* the lock will only ever be contended in the !min_latency case */
- if (!TXDB_TRYLOCK(txq))
- return;
+static inline bool
+iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use)
+{
+ qidx_t dbval, max;
+ bool rang;
+
+ rang = false;
+ max = TXQ_MAX_DB_DEFERRED(txq, in_use);
+ if (ring || txq->ift_db_pending >= max) {
dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx;
ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval);
txq->ift_db_pending = txq->ift_npending = 0;
- TXDB_UNLOCK(txq);
+ rang = true;
}
+ return (rang);
}
-static void
-iflib_txd_deferred_db_check(void * arg)
-{
- iflib_txq_t txq = arg;
-
- /* simple non-zero boolean so use bitwise OR */
- if ((txq->ift_db_pending | txq->ift_npending) &&
- txq->ift_db_pending >= txq->ift_db_pending_queued)
- iflib_txd_db_check(txq->ift_ctx, txq, TRUE);
- txq->ift_db_pending_queued = 0;
- if (ifmp_ring_is_stalled(txq->ift_br[0]))
- iflib_txq_check_drain(txq, 4);
-}
-
#ifdef PKT_DEBUG
static void
print_pkt(if_pkt_info_t pi)
@@ -2252,10 +2738,21 @@
static int
iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
{
+ if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
struct ether_vlan_header *eh;
struct mbuf *m, *n;
n = m = *mp;
+ if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) &&
+ M_WRITABLE(m) == 0) {
+ if ((m = m_dup(m, M_NOWAIT)) == NULL) {
+ return (ENOMEM);
+ } else {
+ m_freem(*mp);
+ n = *mp = m;
+ }
+ }
+
/*
* Determine where frame payload starts.
* Jump over vlan headers if already present,
@@ -2319,26 +2816,30 @@
pi->ipi_ipproto = ip->ip_p;
pi->ipi_flags |= IPI_TX_IPV4;
- if (pi->ipi_csum_flags & CSUM_IP)
+ if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP))
ip->ip_sum = 0;
- if (pi->ipi_ipproto == IPPROTO_TCP) {
- if (__predict_false(th == NULL)) {
- txq->ift_pullups++;
- if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
- return (ENOMEM);
- th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
- }
- pi->ipi_tcp_hflags = th->th_flags;
- pi->ipi_tcp_hlen = th->th_off << 2;
- pi->ipi_tcp_seq = th->th_seq;
- }
if (IS_TSO4(pi)) {
+ if (pi->ipi_ipproto == IPPROTO_TCP) {
+ if (__predict_false(th == NULL)) {
+ txq->ift_pullups++;
+ if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
+ return (ENOMEM);
+ th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
+ }
+ pi->ipi_tcp_hflags = th->th_flags;
+ pi->ipi_tcp_hlen = th->th_off << 2;
+ pi->ipi_tcp_seq = th->th_seq;
+ }
if (__predict_false(ip->ip_p != IPPROTO_TCP))
return (ENXIO);
th->th_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(IPPROTO_TCP));
pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
+ if (sctx->isc_flags & IFLIB_TSO_INIT_IP) {
+ ip->ip_sum = 0;
+ ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz);
+ }
}
break;
}
@@ -2360,15 +2861,15 @@
pi->ipi_ipproto = ip6->ip6_nxt;
pi->ipi_flags |= IPI_TX_IPV6;
- if (pi->ipi_ipproto == IPPROTO_TCP) {
- if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
- if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
- return (ENOMEM);
- }
- pi->ipi_tcp_hflags = th->th_flags;
- pi->ipi_tcp_hlen = th->th_off << 2;
- }
if (IS_TSO6(pi)) {
+ if (pi->ipi_ipproto == IPPROTO_TCP) {
+ if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
+ if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
+ return (ENOMEM);
+ }
+ pi->ipi_tcp_hflags = th->th_flags;
+ pi->ipi_tcp_hlen = th->th_off << 2;
+ }
if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP))
return (ENXIO);
@@ -2390,10 +2891,10 @@
break;
}
*mp = m;
+
return (0);
}
-
static __noinline struct mbuf *
collapse_pkthdr(struct mbuf *m0)
{
@@ -2460,8 +2961,8 @@
if_ctx_t ctx;
if_shared_ctx_t sctx;
if_softc_ctx_t scctx;
- int i, next, pidx, mask, err, maxsegsz, ntxd, count;
- struct mbuf *m, *tmp, **ifsd_m, **mp;
+ int i, next, pidx, err, ntxd, count;
+ struct mbuf *m, *tmp, **ifsd_m;
m = *m0;
@@ -2485,28 +2986,46 @@
if (err)
return (err);
ifsd_flags[pidx] |= TX_SW_DESC_MAPPED;
- i = 0;
- next = pidx;
- mask = (txq->ift_size-1);
+ count = 0;
m = *m0;
do {
- mp = &ifsd_m[next];
- *mp = m;
+ if (__predict_false(m->m_len <= 0)) {
+ tmp = m;
+ m = m->m_next;
+ tmp->m_next = NULL;
+ m_free(tmp);
+ continue;
+ }
m = m->m_next;
- if (__predict_false((*mp)->m_len == 0)) {
- m_free(*mp);
- *mp = NULL;
- } else
- next = (pidx + i) & (ntxd-1);
+ count++;
} while (m != NULL);
+ if (count > *nsegs) {
+ ifsd_m[pidx] = *m0;
+ ifsd_m[pidx]->m_flags |= M_TOOBIG;
+ return (0);
+ }
+ m = *m0;
+ count = 0;
+ do {
+ next = (pidx + count) & (ntxd-1);
+ MPASS(ifsd_m[next] == NULL);
+ ifsd_m[next] = m;
+ count++;
+ tmp = m;
+ m = m->m_next;
+ } while (m != NULL);
} else {
- int buflen, sgsize, max_sgsize;
+ int buflen, sgsize, maxsegsz, max_sgsize;
vm_offset_t vaddr;
vm_paddr_t curaddr;
count = i = 0;
- maxsegsz = sctx->isc_tx_maxsize;
m = *m0;
+ if (m->m_pkthdr.csum_flags & CSUM_TSO)
+ maxsegsz = scctx->isc_tx_tso_segsize_max;
+ else
+ maxsegsz = sctx->isc_tx_maxsegsize;
+
do {
if (__predict_false(m->m_len <= 0)) {
tmp = m;
@@ -2528,6 +3047,8 @@
#endif
ifsd_m[next] = m;
while (buflen > 0) {
+ if (i >= max_segs)
+ goto err;
max_sgsize = MIN(buflen, maxsegsz);
curaddr = pmap_kextract(vaddr);
sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
@@ -2537,8 +3058,6 @@
vaddr += sgsize;
buflen -= sgsize;
i++;
- if (i >= max_segs)
- goto err;
}
count++;
tmp = m;
@@ -2552,6 +3071,67 @@
return (EFBIG);
}
+static inline caddr_t
+calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid)
+{
+ qidx_t size;
+ int ntxd;
+ caddr_t start, end, cur, next;
+
+ ntxd = txq->ift_size;
+ size = txq->ift_txd_size[qid];
+ start = txq->ift_ifdi[qid].idi_vaddr;
+
+ if (__predict_false(size == 0))
+ return (start);
+ cur = start + size*cidx;
+ end = start + size*ntxd;
+ next = CACHE_PTR_NEXT(cur);
+ return (next < end ? next : start);
+}
+
+/*
+ * Pad an mbuf to ensure a minimum ethernet frame size.
+ * min_frame_size is the frame size (less CRC) to pad the mbuf to
+ */
+static __noinline int
+iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size)
+{
+ /*
+ * 18 is enough bytes to pad an ARP packet to 46 bytes, and
+ * and ARP message is the smallest common payload I can think of
+ */
+ static char pad[18]; /* just zeros */
+ int n;
+ struct mbuf *new_head;
+
+ if (!M_WRITABLE(*m_head)) {
+ new_head = m_dup(*m_head, M_NOWAIT);
+ if (new_head == NULL) {
+ m_freem(*m_head);
+ device_printf(dev, "cannot pad short frame, m_dup() failed");
+ DBG_COUNTER_INC(encap_pad_mbuf_fail);
+ return ENOMEM;
+ }
+ m_freem(*m_head);
+ *m_head = new_head;
+ }
+
+ for (n = min_frame_size - (*m_head)->m_pkthdr.len;
+ n > 0; n -= sizeof(pad))
+ if (!m_append(*m_head, min(n, sizeof(pad)), pad))
+ break;
+
+ if (n > 0) {
+ m_freem(*m_head);
+ device_printf(dev, "cannot pad short frame\n");
+ DBG_COUNTER_INC(encap_pad_mbuf_fail);
+ return (ENOBUFS);
+ }
+
+ return 0;
+}
+
static int
iflib_encap(iflib_txq_t txq, struct mbuf **m_headp)
{
@@ -2560,6 +3140,7 @@
if_softc_ctx_t scctx;
bus_dma_segment_t *segs;
struct mbuf *m_head;
+ void *next_txd;
bus_dmamap_t map;
struct if_pkt_info pi;
int remap = 0;
@@ -2580,18 +3161,23 @@
*/
cidx = txq->ift_cidx;
pidx = txq->ift_pidx;
- next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1);
+ if (ctx->ifc_flags & IFC_PREFETCH) {
+ next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1);
+ if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) {
+ next_txd = calc_next_txd(txq, cidx, 0);
+ prefetch(next_txd);
+ }
- /* prefetch the next cache line of mbuf pointers and flags */
- prefetch(&txq->ift_sds.ifsd_m[next]);
- if (txq->ift_sds.ifsd_map != NULL) {
- prefetch(&txq->ift_sds.ifsd_map[next]);
+ /* prefetch the next cache line of mbuf pointers and flags */
+ prefetch(&txq->ift_sds.ifsd_m[next]);
+ if (txq->ift_sds.ifsd_map != NULL) {
+ prefetch(&txq->ift_sds.ifsd_map[next]);
+ next = (cidx + CACHE_LINE_SIZE) & (ntxd-1);
+ prefetch(&txq->ift_sds.ifsd_flags[next]);
+ }
+ } else if (txq->ift_sds.ifsd_map != NULL)
map = txq->ift_sds.ifsd_map[pidx];
- next = (cidx + CACHE_LINE_SIZE) & (ntxd-1);
- prefetch(&txq->ift_sds.ifsd_flags[next]);
- }
-
if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
desc_tag = txq->ift_tso_desc_tag;
max_segs = scctx->isc_tx_tso_segments_max;
@@ -2599,14 +3185,21 @@
desc_tag = txq->ift_desc_tag;
max_segs = scctx->isc_tx_nsegments;
}
+ if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) &&
+ __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) {
+ err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size);
+ if (err)
+ return err;
+ }
m_head = *m_headp;
- bzero(&pi, sizeof(pi));
- pi.ipi_len = m_head->m_pkthdr.len;
+
+ pkt_info_zero(&pi);
pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST));
- pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags;
- pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0;
pi.ipi_pidx = pidx;
pi.ipi_qsidx = txq->ift_id;
+ pi.ipi_len = m_head->m_pkthdr.len;
+ pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags;
+ pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0;
/* deliberate bitwise OR to make one condition */
if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) {
@@ -2662,6 +3255,19 @@
GROUPTASK_ENQUEUE(&txq->ift_task);
return (ENOBUFS);
}
+ /*
+ * On Intel cards we can greatly reduce the number of TX interrupts
+ * we see by only setting report status on every Nth descriptor.
+ * However, this also means that the driver will need to keep track
+ * of the descriptors that RS was set on to check them for the DD bit.
+ */
+ txq->ift_rs_pending += nsegs + 1;
+ if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) ||
+ iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs - 1) <= MAX_TX_DESC(ctx)) {
+ pi.ipi_flags |= IPI_TX_INTR;
+ txq->ift_rs_pending = 0;
+ }
+
pi.ipi_segs = segs;
pi.ipi_nsegs = nsegs;
@@ -2669,22 +3275,29 @@
#ifdef PKT_DEBUG
print_pkt(&pi);
#endif
+ if (map != NULL)
+ bus_dmamap_sync(desc_tag, map, BUS_DMASYNC_PREWRITE);
if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) {
- bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
-
+ if (map != NULL)
+ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
DBG_COUNTER_INC(tx_encap);
- MPASS(pi.ipi_new_pidx >= 0 &&
- pi.ipi_new_pidx < txq->ift_size);
+ MPASS(pi.ipi_new_pidx < txq->ift_size);
ndesc = pi.ipi_new_pidx - pi.ipi_pidx;
if (pi.ipi_new_pidx < pi.ipi_pidx) {
ndesc += txq->ift_size;
txq->ift_gen = 1;
}
+ /*
+ * drivers can need as many as
+ * two sentinels
+ */
+ MPASS(ndesc <= pi.ipi_nsegs + 2);
MPASS(pi.ipi_new_pidx != pidx);
MPASS(ndesc > 0);
txq->ift_in_use += ndesc;
+
/*
* We update the last software descriptor again here because there may
* be a sentinel and/or there may be more mbufs than segments
@@ -2709,36 +3322,6 @@
return (ENOMEM);
}
-/* forward compatibility for cxgb */
-#define FIRST_QSET(ctx) 0
-
-#define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets)
-#define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets)
-#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx))
-#define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments))
-#define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh)
-#define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max)
-
-
-
-/* if there are more than TXQ_MIN_OCCUPANCY packets pending we consider deferring
- * doorbell writes
- *
- * ORing with 2 assures that min occupancy is never less than 2 without any conditional logic
- */
-#define TXQ_MIN_OCCUPANCY(size) ((size >> 6)| 0x2)
-
-static inline int
-iflib_txq_min_occupancy(iflib_txq_t txq)
-{
- if_ctx_t ctx;
-
- ctx = txq->ift_ctx;
- return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx,
- txq->ift_gen) < TXQ_MIN_OCCUPANCY(txq->ift_size) +
- MAX_TX_DESC(ctx));
-}
-
static void
iflib_tx_desc_free(iflib_txq_t txq, int n)
{
@@ -2747,6 +3330,7 @@
struct mbuf *m, **ifsd_m;
uint8_t *ifsd_flags;
bus_dmamap_t *ifsd_map;
+ bool do_prefetch;
cidx = txq->ift_cidx;
gen = txq->ift_gen;
@@ -2756,11 +3340,13 @@
ifsd_flags = txq->ift_sds.ifsd_flags;
ifsd_m = txq->ift_sds.ifsd_m;
ifsd_map = txq->ift_sds.ifsd_map;
+ do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH);
while (n--) {
- prefetch(ifsd_m[(cidx + 3) & mask]);
- prefetch(ifsd_m[(cidx + 4) & mask]);
-
+ if (do_prefetch) {
+ prefetch(ifsd_m[(cidx + 3) & mask]);
+ prefetch(ifsd_m[(cidx + 4) & mask]);
+ }
if (ifsd_m[cidx] != NULL) {
prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]);
prefetch(&ifsd_flags[(cidx + CACHE_PTR_INCREMENT) & mask]);
@@ -2775,8 +3361,15 @@
if ((m = ifsd_m[cidx]) != NULL) {
/* XXX we don't support any drivers that batch packets yet */
MPASS(m->m_nextpkt == NULL);
-
- m_free(m);
+ /* if the number of clusters exceeds the number of segments
+ * there won't be space on the ring to save a pointer to each
+ * cluster so we simply free the list here
+ */
+ if (m->m_flags & M_TOOBIG) {
+ m_freem(m);
+ } else {
+ m_free(m);
+ }
ifsd_m[cidx] = NULL;
#if MEMORY_LOGGING
txq->ift_dequeued++;
@@ -2823,24 +3416,34 @@
txq->ift_cleaned += reclaim;
txq->ift_in_use -= reclaim;
- if (txq->ift_active == FALSE)
- txq->ift_active = TRUE;
-
return (reclaim);
}
static struct mbuf **
-_ring_peek_one(struct ifmp_ring *r, int cidx, int offset)
+_ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining)
{
+ int next, size;
+ struct mbuf **items;
- return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (r->size-1)]));
+ size = r->size;
+ next = (cidx + CACHE_PTR_INCREMENT) & (size-1);
+ items = __DEVOLATILE(struct mbuf **, &r->items[0]);
+
+ prefetch(items[(cidx + offset) & (size-1)]);
+ if (remaining > 1) {
+ prefetch2cachelines(&items[next]);
+ prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]);
+ prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]);
+ prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]);
+ }
+ return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)]));
}
static void
iflib_txq_check_drain(iflib_txq_t txq, int budget)
{
- ifmp_ring_check_drainage(txq->ift_br[0], budget);
+ ifmp_ring_check_drainage(txq->ift_br, budget);
}
static uint32_t
@@ -2849,8 +3452,8 @@
iflib_txq_t txq = r->cookie;
if_ctx_t ctx = txq->ift_ctx;
- return ((TXQ_AVAIL(txq) >= MAX_TX_DESC(ctx)) ||
- ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, txq->ift_cidx_processed, false));
+ return ((TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) ||
+ ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false));
}
static uint32_t
@@ -2858,16 +3461,19 @@
{
iflib_txq_t txq = r->cookie;
if_ctx_t ctx = txq->ift_ctx;
- if_t ifp = ctx->ifc_ifp;
+ struct ifnet *ifp = ctx->ifc_ifp;
struct mbuf **mp, *m;
- int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail, err, in_use_prev, desc_used;
+ int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail;
+ int reclaimed, err, in_use_prev, desc_used;
+ bool do_prefetch, ring, rang;
if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) ||
!LINK_ACTIVE(ctx))) {
DBG_COUNTER_INC(txq_drain_notready);
return (0);
}
-
+ reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
+ rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use);
avail = IDXDIFF(pidx, cidx, r->size);
if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) {
DBG_COUNTER_INC(txq_drain_flushing);
@@ -2877,77 +3483,153 @@
}
return (avail);
}
- iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
+
if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) {
txq->ift_qstatus = IFLIB_QUEUE_IDLE;
CALLOUT_LOCK(txq);
callout_stop(&txq->ift_timer);
- callout_stop(&txq->ift_db_check);
CALLOUT_UNLOCK(txq);
DBG_COUNTER_INC(txq_drain_oactive);
return (0);
}
+ if (reclaimed)
+ txq->ift_qstatus = IFLIB_QUEUE_IDLE;
consumed = mcast_sent = bytes_sent = pkt_sent = 0;
count = MIN(avail, TX_BATCH_SIZE);
+#ifdef INVARIANTS
+ if (iflib_verbose_debug)
+ printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__,
+ avail, ctx->ifc_flags, TXQ_AVAIL(txq));
+#endif
+ do_prefetch = (ctx->ifc_flags & IFC_PREFETCH);
+ avail = TXQ_AVAIL(txq);
+ for (desc_used = i = 0; i < count && avail > MAX_TX_DESC(ctx) + 2; i++) {
+ int pidx_prev, rem = do_prefetch ? count - i : 0;
- for (desc_used = i = 0; i < count && TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2; i++) {
- mp = _ring_peek_one(r, cidx, i);
+ mp = _ring_peek_one(r, cidx, i, rem);
+ MPASS(mp != NULL && *mp != NULL);
+ if (__predict_false(*mp == (struct mbuf *)txq)) {
+ consumed++;
+ reclaimed++;
+ continue;
+ }
in_use_prev = txq->ift_in_use;
+ pidx_prev = txq->ift_pidx;
err = iflib_encap(txq, mp);
- /*
- * What other errors should we bail out for?
- */
- if (err == ENOBUFS) {
+ if (__predict_false(err)) {
DBG_COUNTER_INC(txq_drain_encapfail);
- break;
+ /* no room - bail out */
+ if (err == ENOBUFS)
+ break;
+ consumed++;
+ DBG_COUNTER_INC(txq_drain_encapfail);
+ /* we can't send this packet - skip it */
+ continue;
}
consumed++;
- if (err)
- continue;
-
pkt_sent++;
m = *mp;
DBG_COUNTER_INC(tx_sent);
bytes_sent += m->m_pkthdr.len;
- if (m->m_flags & M_MCAST)
- mcast_sent++;
+ mcast_sent += !!(m->m_flags & M_MCAST);
+ avail = TXQ_AVAIL(txq);
txq->ift_db_pending += (txq->ift_in_use - in_use_prev);
desc_used += (txq->ift_in_use - in_use_prev);
- iflib_txd_db_check(ctx, txq, FALSE);
ETHER_BPF_MTAP(ifp, m);
- if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
+ if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING)))
break;
-
- if (desc_used > TXQ_MAX_DB_CONSUMED(txq->ift_size))
- break;
+ rang = iflib_txd_db_check(ctx, txq, false, in_use_prev);
}
- if ((iflib_min_tx_latency || iflib_txq_min_occupancy(txq)) && txq->ift_db_pending)
- iflib_txd_db_check(ctx, txq, TRUE);
- else if ((txq->ift_db_pending || TXQ_AVAIL(txq) < MAX_TX_DESC(ctx)) &&
- (callout_pending(&txq->ift_db_check) == 0)) {
- txq->ift_db_pending_queued = txq->ift_db_pending;
- callout_reset_on(&txq->ift_db_check, 1, iflib_txd_deferred_db_check,
- txq, txq->ift_db_check.c_cpu);
- }
+ /* deliberate use of bitwise or to avoid gratuitous short-circuit */
+ ring = rang ? false : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx));
+ iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use);
if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent);
if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent);
if (mcast_sent)
if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent);
-
+#ifdef INVARIANTS
+ if (iflib_verbose_debug)
+ printf("consumed=%d\n", consumed);
+#endif
return (consumed);
}
+static uint32_t
+iflib_txq_drain_always(struct ifmp_ring *r)
+{
+ return (1);
+}
+
+static uint32_t
+iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
+{
+ int i, avail;
+ struct mbuf **mp;
+ iflib_txq_t txq;
+
+ txq = r->cookie;
+
+ txq->ift_qstatus = IFLIB_QUEUE_IDLE;
+ CALLOUT_LOCK(txq);
+ callout_stop(&txq->ift_timer);
+ CALLOUT_UNLOCK(txq);
+
+ avail = IDXDIFF(pidx, cidx, r->size);
+ for (i = 0; i < avail; i++) {
+ mp = _ring_peek_one(r, cidx, i, avail - i);
+ if (__predict_false(*mp == (struct mbuf *)txq))
+ continue;
+ m_freem(*mp);
+ }
+ MPASS(ifmp_ring_is_stalled(r) == 0);
+ return (avail);
+}
+
static void
+iflib_ifmp_purge(iflib_txq_t txq)
+{
+ struct ifmp_ring *r;
+
+ r = txq->ift_br;
+ r->drain = iflib_txq_drain_free;
+ r->can_drain = iflib_txq_drain_always;
+
+ ifmp_ring_check_drainage(r, r->size);
+
+ r->drain = iflib_txq_drain;
+ r->can_drain = iflib_txq_can_drain;
+}
+
+static void
_task_fn_tx(void *context)
{
iflib_txq_t txq = context;
if_ctx_t ctx = txq->ift_ctx;
+ struct ifnet *ifp = ctx->ifc_ifp;
+ int rc;
+#ifdef IFLIB_DIAGNOSTICS
+ txq->ift_cpu_exec_count[curcpu]++;
+#endif
if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
return;
- ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE);
+ if (if_getcapenable(ifp) & IFCAP_NETMAP) {
+ if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false))
+ netmap_tx_irq(ifp, txq->ift_id);
+ IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
+ return;
+ }
+ if (txq->ift_db_pending)
+ ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE);
+ ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
+ if (ctx->ifc_flags & IFC_LEGACY)
+ IFDI_INTR_ENABLE(ctx);
+ else {
+ rc = IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
+ KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver"));
+ }
}
static void
@@ -2957,17 +3639,32 @@
if_ctx_t ctx = rxq->ifr_ctx;
bool more;
int rc;
+ uint16_t budget;
+#ifdef IFLIB_DIAGNOSTICS
+ rxq->ifr_cpu_exec_count[curcpu]++;
+#endif
DBG_COUNTER_INC(task_fn_rxs);
if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
return;
-
- if ((more = iflib_rxeof(rxq, 16 /* XXX */)) == false) {
+ more = true;
+#ifdef DEV_NETMAP
+ if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) {
+ u_int work = 0;
+ if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) {
+ more = false;
+ }
+ }
+#endif
+ budget = ctx->ifc_sysctl_rx_budget;
+ if (budget == 0)
+ budget = 16; /* XXX */
+ if (more == false || (more = iflib_rxeof(rxq, budget)) == false) {
if (ctx->ifc_flags & IFC_LEGACY)
IFDI_INTR_ENABLE(ctx);
else {
DBG_COUNTER_INC(rx_intr_enables);
- rc = IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
+ rc = IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver"));
}
}
@@ -2985,8 +3682,11 @@
iflib_txq_t txq;
int i;
- if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
- return;
+ if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) {
+ if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) {
+ return;
+ }
+ }
CTX_LOCK(ctx);
for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) {
@@ -2998,6 +3698,10 @@
for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
IFDI_LINK_INTR_ENABLE(ctx);
+ if (ctx->ifc_flags & IFC_DO_RESET) {
+ ctx->ifc_flags &= ~IFC_DO_RESET;
+ iflib_if_init_locked(ctx);
+ }
CTX_UNLOCK(ctx);
if (LINK_ACTIVE(ctx) == 0)
@@ -3072,7 +3776,7 @@
if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
DBG_COUNTER_INC(tx_frees);
m_freem(m);
- return (0);
+ return (ENOBUFS);
}
MPASS(m->m_nextpkt == NULL);
@@ -3119,18 +3823,16 @@
}
#endif
DBG_COUNTER_INC(tx_seen);
- err = ifmp_ring_enqueue(txq->ift_br[0], (void **)&m, 1, TX_BATCH_SIZE);
+ err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE);
+ GROUPTASK_ENQUEUE(&txq->ift_task);
if (err) {
- GROUPTASK_ENQUEUE(&txq->ift_task);
/* support forthcoming later */
#ifdef DRIVER_BACKPRESSURE
txq->ift_closed = TRUE;
#endif
- ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE);
+ ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
m_freem(m);
- } else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) {
- GROUPTASK_ENQUEUE(&txq->ift_task);
}
return (err);
@@ -3147,7 +3849,7 @@
ctx->ifc_flags |= IFC_QFLUSH;
CTX_UNLOCK(ctx);
for (i = 0; i < NTXQSETS(ctx); i++, txq++)
- while (!(ifmp_ring_is_idle(txq->ift_br[0]) || ifmp_ring_is_stalled(txq->ift_br[0])))
+ while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br)))
iflib_txq_check_drain(txq, 0);
CTX_LOCK(ctx);
ctx->ifc_flags &= ~IFC_QFLUSH;
@@ -3158,11 +3860,9 @@
#define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
- IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | \
+ IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \
IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO)
-#define IFCAP_REINIT IFCAP_FLAGS
-
static int
iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
{
@@ -3236,8 +3936,6 @@
ctx->ifc_if_flags = if_getflags(ifp);
CTX_UNLOCK(ctx);
break;
-
- break;
case SIOCADDMULTI:
case SIOCDELMULTI:
if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
@@ -3254,6 +3952,7 @@
CTX_UNLOCK(ctx);
/* falls thru */
case SIOCGIFMEDIA:
+ case SIOCGIFXMEDIA:
err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command);
break;
case SIOCGI2C:
@@ -3288,6 +3987,8 @@
#endif
setmask |= (mask & IFCAP_FLAGS);
+ if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
+ setmask |= (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6);
if ((mask & IFCAP_WOL) &&
(if_getcapabilities(ifp) & IFCAP_WOL) != 0)
setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC));
@@ -3298,10 +3999,10 @@
if (setmask) {
CTX_LOCK(ctx);
bits = if_getdrvflags(ifp);
- if (setmask & IFCAP_REINIT)
+ if (bits & IFF_DRV_RUNNING)
iflib_stop(ctx);
if_togglecapenable(ifp, setmask);
- if (setmask & IFCAP_REINIT)
+ if (bits & IFF_DRV_RUNNING)
iflib_init_locked(ctx);
if_setdrvflags(ifp, bits);
CTX_UNLOCK(ctx);
@@ -3353,7 +4054,7 @@
IFDI_VLAN_REGISTER(ctx, vtag);
/* Re-init to load the changes */
if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
- iflib_init_locked(ctx);
+ iflib_if_init_locked(ctx);
CTX_UNLOCK(ctx);
}
@@ -3372,7 +4073,7 @@
IFDI_VLAN_UNREGISTER(ctx, vtag);
/* Re-init to load the changes */
if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
- iflib_init_locked(ctx);
+ iflib_if_init_locked(ctx);
CTX_UNLOCK(ctx);
}
@@ -3462,7 +4163,6 @@
ctx->ifc_sctx = sctx;
ctx->ifc_dev = dev;
- ctx->ifc_txrx = *sctx->isc_txrx;
ctx->ifc_softc = sc;
if ((err = iflib_register(ctx)) != 0) {
@@ -3472,6 +4172,9 @@
iflib_add_device_sysctl_pre(ctx);
scctx = &ctx->ifc_softc_ctx;
+ ifp = ctx->ifc_ifp;
+ ctx->ifc_nhwtxqs = sctx->isc_ntxqs;
+
/*
* XXX sanity check that ntxd & nrxd are a power of 2
*/
@@ -3524,30 +4227,35 @@
device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
return (err);
}
- if (scctx->isc_ntxqsets_max)
- scctx->isc_ntxqsets = min(scctx->isc_ntxqsets, scctx->isc_ntxqsets_max);
- if (scctx->isc_nrxqsets_max)
- scctx->isc_nrxqsets = min(scctx->isc_nrxqsets, scctx->isc_nrxqsets_max);
+ _iflib_pre_assert(scctx);
+ ctx->ifc_txrx = *scctx->isc_txrx;
+#ifdef INVARIANTS
+ MPASS(scctx->isc_capenable);
+ if (scctx->isc_capenable & IFCAP_TXCSUM)
+ MPASS(scctx->isc_tx_csum_flags);
+#endif
+
+ if_setcapabilities(ifp, scctx->isc_capenable | IFCAP_HWSTATS);
+ if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS);
+
+ if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets))
+ scctx->isc_ntxqsets = scctx->isc_ntxqsets_max;
+ if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets))
+ scctx->isc_nrxqsets = scctx->isc_nrxqsets_max;
+
#ifdef ACPI_DMAR
if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL)
ctx->ifc_flags |= IFC_DMAR;
+#elif !(defined(__i386__) || defined(__amd64__))
+ /* set unconditionally for !x86 */
+ ctx->ifc_flags |= IFC_DMAR;
#endif
msix_bar = scctx->isc_msix_bar;
+ main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0;
+ main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0;
- ifp = ctx->ifc_ifp;
-
- if(sctx->isc_flags & IFLIB_HAS_TXCQ)
- main_txq = 1;
- else
- main_txq = 0;
-
- if(sctx->isc_flags & IFLIB_HAS_RXCQ)
- main_rxq = 1;
- else
- main_rxq = 0;
-
/* XXX change for per-queue sizes */
device_printf(dev, "using %d tx descriptors and %d rx descriptors\n",
scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]);
@@ -3590,6 +4298,18 @@
if (scctx->isc_rss_table_size == 0)
scctx->isc_rss_table_size = 64;
scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;
+
+ GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
+ /* XXX format name */
+ taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin");
+
+ /* Set up cpu set. If it fails, use the set of all CPUs. */
+ if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) {
+ device_printf(dev, "Unable to fetch CPU list\n");
+ CPU_COPY(&all_cpus, &ctx->ifc_cpus);
+ }
+ MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0);
+
/*
** Now setup MSI or MSI/X, should
** return us the number of supported
@@ -3598,6 +4318,10 @@
if (sctx->isc_flags & IFLIB_SKIP_MSIX) {
msix = scctx->isc_vectors;
} else if (scctx->isc_msix_bar != 0)
+ /*
+ * The simple fact that isc_msix_bar is not 0 does not mean we
+ * we have a good value there that is known to work.
+ */
msix = iflib_msix_init(ctx);
else {
scctx->isc_vectors = 1;
@@ -3617,6 +4341,7 @@
goto fail_queues;
}
+ IFDI_INTR_DISABLE(ctx);
if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) {
device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err);
goto fail_intr_free;
@@ -3679,8 +4404,9 @@
iflib_txq_t txq;
iflib_rxq_t rxq;
device_t dev = ctx->ifc_dev;
- int i;
+ int i, j;
struct taskqgroup *tqg;
+ iflib_fl_t fl;
/* Make sure VLANS are not using driver */
if (if_vlantrunkinuse(ifp)) {
@@ -3706,16 +4432,19 @@
if (ctx->ifc_led_dev != NULL)
led_destroy(ctx->ifc_led_dev);
/* XXX drain any dependent tasks */
- tqg = qgroup_softirq;
+ tqg = qgroup_if_io_tqg;
for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
callout_drain(&txq->ift_timer);
- callout_drain(&txq->ift_db_check);
if (txq->ift_task.gt_uniq != NULL)
taskqgroup_detach(tqg, &txq->ift_task);
}
for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) {
if (rxq->ifr_task.gt_uniq != NULL)
taskqgroup_detach(tqg, &rxq->ifr_task);
+
+ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
+ free(fl->ifl_rx_bitmap, M_IFLIB);
+
}
tqg = qgroup_if_config_tqg;
if (ctx->ifc_admin_task.gt_uniq != NULL)
@@ -3886,15 +4615,6 @@
MPASS(sctx->isc_rx_nsegments);
MPASS(sctx->isc_rx_maxsegsize);
-
- MPASS(sctx->isc_txrx->ift_txd_encap);
- MPASS(sctx->isc_txrx->ift_txd_flush);
- MPASS(sctx->isc_txrx->ift_txd_credits_update);
- MPASS(sctx->isc_txrx->ift_rxd_available);
- MPASS(sctx->isc_txrx->ift_rxd_pkt_get);
- MPASS(sctx->isc_txrx->ift_rxd_refill);
- MPASS(sctx->isc_txrx->ift_rxd_flush);
-
MPASS(sctx->isc_nrxd_min[0]);
MPASS(sctx->isc_nrxd_max[0]);
MPASS(sctx->isc_nrxd_default[0]);
@@ -3903,6 +4623,19 @@
MPASS(sctx->isc_ntxd_default[0]);
}
+static void
+_iflib_pre_assert(if_softc_ctx_t scctx)
+{
+
+ MPASS(scctx->isc_txrx->ift_txd_encap);
+ MPASS(scctx->isc_txrx->ift_txd_flush);
+ MPASS(scctx->isc_txrx->ift_txd_credits_update);
+ MPASS(scctx->isc_txrx->ift_rxd_available);
+ MPASS(scctx->isc_txrx->ift_rxd_pkt_get);
+ MPASS(scctx->isc_txrx->ift_rxd_refill);
+ MPASS(scctx->isc_txrx->ift_rxd_flush);
+}
+
static int
iflib_register(if_ctx_t ctx)
{
@@ -3937,9 +4670,6 @@
if_setqflushfn(ifp, iflib_if_qflush);
if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
- if_setcapabilities(ifp, 0);
- if_setcapenable(ifp, 0);
-
ctx->ifc_vlan_attach_event =
EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx,
EVENTHANDLER_PRI_FIRST);
@@ -3975,7 +4705,6 @@
caddr_t *vaddrs;
uint64_t *paddrs;
struct ifmp_ring **brscp;
- int nbuf_rings = 1; /* XXX determine dynamically */
KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1"));
KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1"));
@@ -4001,11 +4730,6 @@
err = ENOMEM;
goto rx_fail;
}
- if (!(brscp = malloc(sizeof(void *) * nbuf_rings * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
- device_printf(dev, "Unable to buf_ring_sc * memory\n");
- err = ENOMEM;
- goto rx_fail;
- }
ctx->ifc_txqs = txq;
ctx->ifc_rxqs = rxq;
@@ -4028,6 +4752,7 @@
err = ENOMEM;
goto err_tx_desc;
}
+ txq->ift_txd_size[j] = scctx->isc_txd_size[j];
bzero((void *)ifdip->idi_vaddr, txqsizes[j]);
}
txq->ift_ctx = ctx;
@@ -4039,8 +4764,6 @@
}
/* XXX fix this */
txq->ift_timer.c_cpu = cpu;
- txq->ift_db_check.c_cpu = cpu;
- txq->ift_nbr = nbuf_rings;
if (iflib_txsd_alloc(txq)) {
device_printf(dev, "Critical Failure setting up TX buffers\n");
@@ -4053,21 +4776,16 @@
device_get_nameunit(dev), txq->ift_id);
mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF);
callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0);
- callout_init_mtx(&txq->ift_db_check, &txq->ift_mtx, 0);
snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db",
device_get_nameunit(dev), txq->ift_id);
- TXDB_LOCK_INIT(txq);
- txq->ift_br = brscp + i*nbuf_rings;
- for (j = 0; j < nbuf_rings; j++) {
- err = ifmp_ring_alloc(&txq->ift_br[j], 2048, txq, iflib_txq_drain,
- iflib_txq_can_drain, M_IFLIB, M_WAITOK);
- if (err) {
- /* XXX free any allocated rings */
- device_printf(dev, "Unable to allocate buf_ring\n");
- goto err_tx_desc;
- }
+ err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain,
+ iflib_txq_can_drain, M_IFLIB, M_WAITOK);
+ if (err) {
+ /* XXX free any allocated rings */
+ device_printf(dev, "Unable to allocate buf_ring\n");
+ goto err_tx_desc;
}
}
@@ -4081,6 +4799,9 @@
}
rxq->ifr_ifdi = ifdip;
+ /* XXX this needs to be changed if #rx queues != #tx queues */
+ rxq->ifr_ntxqirq = 1;
+ rxq->ifr_txqid[0] = i;
for (j = 0; j < nrxqs; j++, ifdip++) {
if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, BUS_DMA_NOWAIT)) {
device_printf(dev, "Unable to allocate Descriptor memory\n");
@@ -4105,10 +4826,10 @@
}
rxq->ifr_fl = fl;
for (j = 0; j < nfree_lists; j++) {
- rxq->ifr_fl[j].ifl_rxq = rxq;
- rxq->ifr_fl[j].ifl_id = j;
- rxq->ifr_fl[j].ifl_ifdi =
- &rxq->ifr_ifdi[j + rxq->ifr_fl_offset];
+ fl[j].ifl_rxq = rxq;
+ fl[j].ifl_id = j;
+ fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset];
+ fl[j].ifl_rxd_size = scctx->isc_rxd_size[j];
}
/* Allocate receive buffers for the ring*/
if (iflib_rxsd_alloc(rxq)) {
@@ -4117,6 +4838,9 @@
err = ENOMEM;
goto err_rx_desc;
}
+
+ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
+ fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK|M_ZERO);
}
/* TXQs */
@@ -4294,19 +5018,152 @@
return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name));
}
-static void
-find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid)
+#ifdef SMP
+static int
+find_nth(if_ctx_t ctx, int qid)
{
- int i, cpuid;
+ cpuset_t cpus;
+ int i, cpuid, eqid, count;
- CPU_COPY(&ctx->ifc_cpus, cpus);
+ CPU_COPY(&ctx->ifc_cpus, &cpus);
+ count = CPU_COUNT(&cpus);
+ eqid = qid % count;
/* clear up to the qid'th bit */
- for (i = 0; i < qid; i++) {
- cpuid = CPU_FFS(cpus);
- CPU_CLR(cpuid, cpus);
+ for (i = 0; i < eqid; i++) {
+ cpuid = CPU_FFS(&cpus);
+ MPASS(cpuid != 0);
+ CPU_CLR(cpuid-1, &cpus);
}
+ cpuid = CPU_FFS(&cpus);
+ MPASS(cpuid != 0);
+ return (cpuid-1);
}
+#ifdef SCHED_ULE
+extern struct cpu_group *cpu_top; /* CPU topology */
+
+static int
+find_child_with_core(int cpu, struct cpu_group *grp)
+{
+ int i;
+
+ if (grp->cg_children == 0)
+ return -1;
+
+ MPASS(grp->cg_child);
+ for (i = 0; i < grp->cg_children; i++) {
+ if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
+ return i;
+ }
+
+ return -1;
+}
+
+/*
+ * Find the nth "close" core to the specified core
+ * "close" is defined as the deepest level that shares
+ * at least an L2 cache. With threads, this will be
+ * threads on the same core. If the sahred cache is L3
+ * or higher, simply returns the same core.
+ */
+static int
+find_close_core(int cpu, int core_offset)
+{
+ struct cpu_group *grp;
+ int i;
+ int fcpu;
+ cpuset_t cs;
+
+ grp = cpu_top;
+ if (grp == NULL)
+ return cpu;
+ i = 0;
+ while ((i = find_child_with_core(cpu, grp)) != -1) {
+ /* If the child only has one cpu, don't descend */
+ if (grp->cg_child[i].cg_count <= 1)
+ break;
+ grp = &grp->cg_child[i];
+ }
+
+ /* If they don't share at least an L2 cache, use the same CPU */
+ if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE)
+ return cpu;
+
+ /* Now pick one */
+ CPU_COPY(&grp->cg_mask, &cs);
+
+ /* Add the selected CPU offset to core offset. */
+ for (i = 0; (fcpu = CPU_FFS(&cs)) != 0; i++) {
+ if (fcpu - 1 == cpu)
+ break;
+ CPU_CLR(fcpu - 1, &cs);
+ }
+ MPASS(fcpu);
+
+ core_offset += i;
+
+ CPU_COPY(&grp->cg_mask, &cs);
+ for (i = core_offset % grp->cg_count; i > 0; i--) {
+ MPASS(CPU_FFS(&cs));
+ CPU_CLR(CPU_FFS(&cs) - 1, &cs);
+ }
+ MPASS(CPU_FFS(&cs));
+ return CPU_FFS(&cs) - 1;
+}
+#else
+static int
+find_close_core(int cpu, int core_offset __unused)
+{
+ return cpu;
+}
+#endif
+
+static int
+get_core_offset(if_ctx_t ctx, iflib_intr_type_t type, int qid)
+{
+ switch (type) {
+ case IFLIB_INTR_TX:
+ /* TX queues get cores which share at least an L2 cache with the corresponding RX queue */
+ /* XXX handle multiple RX threads per core and more than two core per L2 group */
+ return qid / CPU_COUNT(&ctx->ifc_cpus) + 1;
+ case IFLIB_INTR_RX:
+ case IFLIB_INTR_RXTX:
+ /* RX queues get the specified core */
+ return qid / CPU_COUNT(&ctx->ifc_cpus);
+ default:
+ return -1;
+ }
+}
+#else
+#define get_core_offset(ctx, type, qid) CPU_FIRST()
+#define find_close_core(cpuid, tid) CPU_FIRST()
+#define find_nth(ctx, gid) CPU_FIRST()
+#endif
+
+/* Just to avoid copy/paste */
+static inline int
+iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid,
+ struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name)
+{
+ int cpuid;
+ int err, tid;
+
+ cpuid = find_nth(ctx, qid);
+ tid = get_core_offset(ctx, type, qid);
+ MPASS(tid >= 0);
+ cpuid = find_close_core(cpuid, tid);
+ err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name);
+ if (err) {
+ device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err);
+ return (err);
+ }
+#ifdef notyet
+ if (cpuid > ctx->ifc_cpuid_highest)
+ ctx->ifc_cpuid_highest = cpuid;
+#endif
+ return 0;
+}
+
int
iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
iflib_intr_type_t type, driver_filter_t *filter,
@@ -4315,12 +5172,13 @@
struct grouptask *gtask;
struct taskqgroup *tqg;
iflib_filter_info_t info;
- cpuset_t cpus;
gtask_fn_t *fn;
int tqrid, err;
+ driver_filter_t *intr_fast;
void *q;
info = &ctx->ifc_filter_info;
+ tqrid = rid;
switch (type) {
/* XXX merge tx/rx for netmap? */
@@ -4328,90 +5186,111 @@
q = &ctx->ifc_txqs[qid];
info = &ctx->ifc_txqs[qid].ift_filter_info;
gtask = &ctx->ifc_txqs[qid].ift_task;
- tqg = qgroup_softirq;
- tqrid = irq->ii_rid;
+ tqg = qgroup_if_io_tqg;
fn = _task_fn_tx;
+ intr_fast = iflib_fast_intr;
+ GROUPTASK_INIT(gtask, 0, fn, q);
break;
case IFLIB_INTR_RX:
q = &ctx->ifc_rxqs[qid];
info = &ctx->ifc_rxqs[qid].ifr_filter_info;
gtask = &ctx->ifc_rxqs[qid].ifr_task;
- tqg = qgroup_softirq;
- tqrid = irq->ii_rid;
+ tqg = qgroup_if_io_tqg;
fn = _task_fn_rx;
+ intr_fast = iflib_fast_intr;
+ GROUPTASK_INIT(gtask, 0, fn, q);
break;
+ case IFLIB_INTR_RXTX:
+ q = &ctx->ifc_rxqs[qid];
+ info = &ctx->ifc_rxqs[qid].ifr_filter_info;
+ gtask = &ctx->ifc_rxqs[qid].ifr_task;
+ tqg = qgroup_if_io_tqg;
+ fn = _task_fn_rx;
+ intr_fast = iflib_fast_intr_rxtx;
+ GROUPTASK_INIT(gtask, 0, fn, q);
+ break;
case IFLIB_INTR_ADMIN:
q = ctx;
+ tqrid = -1;
info = &ctx->ifc_filter_info;
gtask = &ctx->ifc_admin_task;
tqg = qgroup_if_config_tqg;
- tqrid = -1;
fn = _task_fn_admin;
+ intr_fast = iflib_fast_intr_ctx;
break;
default:
panic("unknown net intr type");
}
- GROUPTASK_INIT(gtask, 0, fn, q);
info->ifi_filter = filter;
info->ifi_filter_arg = filter_arg;
info->ifi_task = gtask;
+ info->ifi_ctx = q;
- /* XXX query cpu that rid belongs to */
-
- err = _iflib_irq_alloc(ctx, irq, rid, iflib_fast_intr, NULL, info, name);
- if (err != 0)
+ err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name);
+ if (err != 0) {
+ device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err);
return (err);
+ }
+ if (type == IFLIB_INTR_ADMIN)
+ return (0);
+
if (tqrid != -1) {
- find_nth(ctx, &cpus, qid);
- taskqgroup_attach_cpu(tqg, gtask, q, CPU_FFS(&cpus), irq->ii_rid, name);
- } else
- taskqgroup_attach(tqg, gtask, q, tqrid, name);
+ err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name);
+ if (err)
+ return (err);
+ } else {
+ taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name);
+ }
-
return (0);
}
void
-iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void *arg, int qid, char *name)
+iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, char *name)
{
struct grouptask *gtask;
struct taskqgroup *tqg;
gtask_fn_t *fn;
void *q;
+ int irq_num = -1;
+ int err;
switch (type) {
case IFLIB_INTR_TX:
q = &ctx->ifc_txqs[qid];
gtask = &ctx->ifc_txqs[qid].ift_task;
- tqg = qgroup_softirq;
+ tqg = qgroup_if_io_tqg;
fn = _task_fn_tx;
+ if (irq != NULL)
+ irq_num = rman_get_start(irq->ii_res);
break;
case IFLIB_INTR_RX:
q = &ctx->ifc_rxqs[qid];
gtask = &ctx->ifc_rxqs[qid].ifr_task;
- tqg = qgroup_softirq;
+ tqg = qgroup_if_io_tqg;
fn = _task_fn_rx;
+ if (irq != NULL)
+ irq_num = rman_get_start(irq->ii_res);
break;
- case IFLIB_INTR_ADMIN:
- q = ctx;
- gtask = &ctx->ifc_admin_task;
- tqg = qgroup_if_config_tqg;
- rid = -1;
- fn = _task_fn_admin;
- break;
case IFLIB_INTR_IOV:
q = ctx;
gtask = &ctx->ifc_vflr_task;
tqg = qgroup_if_config_tqg;
- rid = -1;
fn = _task_fn_iov;
break;
default:
panic("unknown net intr type");
}
GROUPTASK_INIT(gtask, 0, fn, q);
- taskqgroup_attach(tqg, gtask, q, rid, name);
+ if (irq_num != -1) {
+ err = iflib_irq_set_affinity(ctx, irq_num, type, qid, gtask, tqg, q, name);
+ if (err)
+ taskqgroup_attach(tqg, gtask, q, irq_num, name);
+ }
+ else {
+ taskqgroup_attach(tqg, gtask, q, irq_num, name);
+ }
}
void
@@ -4438,10 +5317,17 @@
void *q;
int err;
+ /*
+ * group taskqueues aren't properly set up until SMP is started
+ * so we disable interrupts until we can handle them post
+ * SI_SUB_SMP
+ */
+ IFDI_INTR_DISABLE(ctx);
+
q = &ctx->ifc_rxqs[0];
info = &rxq[0].ifr_filter_info;
gtask = &rxq[0].ifr_task;
- tqg = qgroup_softirq;
+ tqg = qgroup_if_io_tqg;
tqrid = irq->ii_rid = *rid;
fn = _task_fn_rx;
@@ -4451,16 +5337,13 @@
info->ifi_task = gtask;
/* We allocate a single interrupt resource */
- if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr, NULL, info, name)) != 0)
+ if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0)
return (err);
GROUPTASK_INIT(gtask, 0, fn, q);
- taskqgroup_attach(tqg, gtask, q, tqrid, name);
+ taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name);
GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq);
- taskqgroup_attach(qgroup_softirq, &txq->ift_task, txq, tqrid, "tx");
- GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
- taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin/link");
-
+ taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, rman_get_start(irq->ii_res), "tx");
return (0);
}
@@ -4469,7 +5352,7 @@
{
ctx->ifc_led_dev = led_create(iflib_led_func, ctx,
- device_get_nameunit(ctx->ifc_dev));
+ device_get_nameunit(ctx->ifc_dev));
}
void
@@ -4489,7 +5372,13 @@
void
iflib_admin_intr_deferred(if_ctx_t ctx)
{
+#ifdef INVARIANTS
+ struct grouptask *gtask;
+ gtask = &ctx->ifc_admin_task;
+ MPASS(gtask != NULL && gtask->gt_taskqueue != NULL);
+#endif
+
GROUPTASK_ENQUEUE(&ctx->ifc_admin_task);
}
@@ -4504,7 +5393,7 @@
iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name)
{
- taskqgroup_attach_cpu(qgroup_softirq, gt, uniq, cpu, -1, name);
+ taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name);
}
void
@@ -4529,8 +5418,9 @@
if_t ifp = ctx->ifc_ifp;
iflib_txq_t txq = ctx->ifc_txqs;
-
if_setbaudrate(ifp, baudrate);
+ if (baudrate >= IF_Gbps(10))
+ ctx->ifc_flags |= IFC_PREFETCH;
/* If link down, disable watchdog */
if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) {
@@ -4545,23 +5435,27 @@
iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq)
{
int credits;
+#ifdef INVARIANTS
+ int credits_pre = txq->ift_cidx_processed;
+#endif
if (ctx->isc_txd_credits_update == NULL)
return (0);
- if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, txq->ift_cidx_processed, true)) == 0)
+ if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0)
return (0);
txq->ift_processed += credits;
txq->ift_cidx_processed += credits;
+ MPASS(credits_pre + credits == txq->ift_cidx_processed);
if (txq->ift_cidx_processed >= txq->ift_size)
txq->ift_cidx_processed -= txq->ift_size;
return (credits);
}
static int
-iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget)
+iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget)
{
return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx,
@@ -4599,13 +5493,28 @@
int iflib_num_tx_queues, iflib_num_rx_queues;
int err, admincnt, bar;
- iflib_num_tx_queues = scctx->isc_ntxqsets;
- iflib_num_rx_queues = scctx->isc_nrxqsets;
+ iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs;
+ iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs;
+ device_printf(dev, "msix_init qsets capped at %d\n", imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets));
+
bar = ctx->ifc_softc_ctx.isc_msix_bar;
admincnt = sctx->isc_admin_intrcnt;
+ /* Override by global tuneable */
+ {
+ int i;
+ size_t len = sizeof(i);
+ err = kernel_sysctlbyname(curthread, "hw.pci.enable_msix", &i, &len, NULL, 0, NULL, 0);
+ if (err == 0) {
+ if (i == 0)
+ goto msi;
+ }
+ else {
+ device_printf(dev, "unable to read hw.pci.enable_msix.");
+ }
+ }
/* Override by tuneable */
- if (enable_msix == 0)
+ if (scctx->isc_disable_msix)
goto msi;
/*
@@ -4617,18 +5526,20 @@
** successfully initialize us.
*/
{
- uint16_t pci_cmd_word;
int msix_ctrl, rid;
+ pci_enable_busmaster(dev);
rid = 0;
- pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
- pci_cmd_word |= PCIM_CMD_BUSMASTEREN;
- pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2);
- pci_find_cap(dev, PCIY_MSIX, &rid);
- rid += PCIR_MSIX_CTRL;
- msix_ctrl = pci_read_config(dev, rid, 2);
- msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
- pci_write_config(dev, rid, msix_ctrl, 2);
+ if (pci_find_cap(dev, PCIY_MSIX, &rid) == 0 && rid != 0) {
+ rid += PCIR_MSIX_CTRL;
+ msix_ctrl = pci_read_config(dev, rid, 2);
+ msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
+ pci_write_config(dev, rid, msix_ctrl, 2);
+ } else {
+ device_printf(dev, "PCIY_MSIX capability not found; "
+ "or rid %d == 0.\n", rid);
+ goto msi;
+ }
}
/*
@@ -4661,20 +5572,14 @@
#else
queuemsgs = msgs - admincnt;
#endif
- if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) == 0) {
#ifdef RSS
- queues = imin(queuemsgs, rss_getnumbuckets());
+ queues = imin(queuemsgs, rss_getnumbuckets());
#else
- queues = queuemsgs;
+ queues = queuemsgs;
#endif
- queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues);
- device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n",
- CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt);
- } else {
- device_printf(dev, "Unable to fetch CPU list\n");
- /* Figure out a reasonable auto config value */
- queues = min(queuemsgs, mp_ncpus);
- }
+ queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues);
+ device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n",
+ CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt);
#ifdef RSS
/* If we're doing RSS, clamp at the number of RSS buckets */
if (queues > rss_getnumbuckets())
@@ -4684,6 +5589,10 @@
rx_queues = iflib_num_rx_queues;
else
rx_queues = queues;
+
+ if (rx_queues > scctx->isc_nrxqsets)
+ rx_queues = scctx->isc_nrxqsets;
+
/*
* We want this to be all logical CPUs by default
*/
@@ -4692,6 +5601,9 @@
else
tx_queues = mp_ncpus;
+ if (tx_queues > scctx->isc_ntxqsets)
+ tx_queues = scctx->isc_ntxqsets;
+
if (ctx->ifc_sysctl_qs_eq_override == 0) {
#ifdef INVARIANTS
if (tx_queues != rx_queues)
@@ -4773,7 +5685,7 @@
if_ctx_t ctx = (void *)arg1;
enum iflib_ndesc_handler type = arg2;
char buf[256] = {0};
- uint16_t *ndesc;
+ qidx_t *ndesc;
char *p, *next;
int nqs, rc, i;
@@ -4843,6 +5755,12 @@
SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable",
CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0,
"permit #txq != #rxq");
+ SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix",
+ CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0,
+ "disable MSIX (default 0)");
+ SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget",
+ CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0,
+ "set the rx budget");
/* XXX change for per-queue sizes */
SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
@@ -4935,27 +5853,26 @@
CTLFLAG_RD,
&txq->ift_cleaned, "total cleaned");
SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state",
- CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br[0]->state),
+ CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br->state),
0, mp_ring_state_handler, "A", "soft ring state");
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues",
- CTLFLAG_RD, &txq->ift_br[0]->enqueues,
+ CTLFLAG_RD, &txq->ift_br->enqueues,
"# of enqueues to the mp_ring for this queue");
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops",
- CTLFLAG_RD, &txq->ift_br[0]->drops,
+ CTLFLAG_RD, &txq->ift_br->drops,
"# of drops in the mp_ring for this queue");
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts",
- CTLFLAG_RD, &txq->ift_br[0]->starts,
+ CTLFLAG_RD, &txq->ift_br->starts,
"# of normal consumer starts in the mp_ring for this queue");
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls",
- CTLFLAG_RD, &txq->ift_br[0]->stalls,
+ CTLFLAG_RD, &txq->ift_br->stalls,
"# of consumer stalls in the mp_ring for this queue");
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts",
- CTLFLAG_RD, &txq->ift_br[0]->restarts,
+ CTLFLAG_RD, &txq->ift_br->restarts,
"# of consumer restarts in the mp_ring for this queue");
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications",
- CTLFLAG_RD, &txq->ift_br[0]->abdications,
+ CTLFLAG_RD, &txq->ift_br->abdications,
"# of consumer abdications in the mp_ring for this queue");
-
}
if (scctx->isc_nrxqsets > 100)
@@ -4977,6 +5894,7 @@
CTLFLAG_RD,
&rxq->ifr_cq_cidx, 1, "Consumer Index");
}
+
for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j);
fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf,
@@ -5010,3 +5928,30 @@
}
}
+
+#ifndef __NO_STRICT_ALIGNMENT
+static struct mbuf *
+iflib_fixup_rx(struct mbuf *m)
+{
+ struct mbuf *n;
+
+ if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
+ bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
+ m->m_data += ETHER_HDR_LEN;
+ n = m;
+ } else {
+ MGETHDR(n, M_NOWAIT, MT_DATA);
+ if (n == NULL) {
+ m_freem(m);
+ return (NULL);
+ }
+ bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
+ m->m_data += ETHER_HDR_LEN;
+ m->m_len -= ETHER_HDR_LEN;
+ n->m_len = ETHER_HDR_LEN;
+ M_MOVE_PKTHDR(n, m);
+ n->m_next = m;
+ }
+ return (n);
+}
+#endif
Index: sys/net/mp_ring.c
===================================================================
--- sys/net/mp_ring.c
+++ sys/net/mp_ring.c
@@ -454,18 +454,12 @@
do {
os.state = ns.state = r->state;
ns.pidx_tail = pidx_stop;
- ns.flags = BUSY;
+ if (os.flags == IDLE)
+ ns.flags = ABDICATED;
} while (atomic_cmpset_rel_64(&r->state, os.state, ns.state) == 0);
critical_exit();
counter_u64_add(r->enqueues, n);
- /*
- * Turn into a consumer if some other thread isn't active as a consumer
- * already.
- */
- if (os.flags != BUSY)
- drain_ring_lockless(r, ns, os.flags, budget);
-
return (0);
}
#endif
@@ -476,7 +470,9 @@
union ring_state os, ns;
os.state = r->state;
- if (os.flags != STALLED || os.pidx_head != os.pidx_tail || r->can_drain(r) == 0)
+ if ((os.flags != STALLED && os.flags != ABDICATED) || // Only continue in STALLED and ABDICATED
+ os.pidx_head != os.pidx_tail || // Require work to be available
+ (os.flags != ABDICATED && r->can_drain(r) == 0)) // Can either drain, or everyone left
return;
MPASS(os.cidx != os.pidx_tail); /* implied by STALLED */
Index: sys/sys/_task.h
===================================================================
--- sys/sys/_task.h
+++ sys/sys/_task.h
@@ -65,7 +65,8 @@
void *gt_taskqueue;
LIST_ENTRY(grouptask) gt_list;
void *gt_uniq;
- char *gt_name;
+#define GROUPTASK_NAMELEN 32
+ char gt_name[GROUPTASK_NAMELEN];
int16_t gt_irq;
int16_t gt_cpu;
};
Index: sys/sys/cpuset.h
===================================================================
--- sys/sys/cpuset.h
+++ sys/sys/cpuset.h
@@ -83,6 +83,8 @@
#define CPU_WHICH_IRQ 4 /* Specifies an irq #. */
#define CPU_WHICH_JAIL 5 /* Specifies a jail id. */
#define CPU_WHICH_DOMAIN 6 /* Specifies a NUMA domain id. */
+#define CPU_WHICH_INTRHANDLER 7 /* Specifies an irq # (not ithread). */
+#define CPU_WHICH_ITHREAD 8 /* Specifies an irq's ithread. */
/*
* Reserved cpuset identifiers.
Index: sys/sys/interrupt.h
===================================================================
--- sys/sys/interrupt.h
+++ sys/sys/interrupt.h
@@ -162,6 +162,8 @@
driver_filter_t filter, driver_intr_t handler, void *arg,
u_char pri, enum intr_type flags, void **cookiep);
int intr_event_bind(struct intr_event *ie, int cpu);
+int intr_event_bind_irqonly(struct intr_event *ie, int cpu);
+int intr_event_bind_ithread(struct intr_event *ie, int cpu);
int intr_event_create(struct intr_event **event, void *source,
int flags, int irq, void (*pre_ithread)(void *),
void (*post_ithread)(void *), void (*post_filter)(void *),
@@ -173,9 +175,9 @@
void intr_event_execute_handlers(struct proc *p, struct intr_event *ie);
int intr_event_handle(struct intr_event *ie, struct trapframe *frame);
int intr_event_remove_handler(void *cookie);
-int intr_getaffinity(int irq, void *mask);
+int intr_getaffinity(int irq, int mode, void *mask);
void *intr_handler_source(void *cookie);
-int intr_setaffinity(int irq, void *mask);
+int intr_setaffinity(int irq, int mode, void *mask);
void _intr_drain(int irq); /* Linux compat only. */
int swi_add(struct intr_event **eventp, const char *name,
driver_intr_t handler, void *arg, int pri, enum intr_type flags,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Oct 13, 2:00 AM (11 h, 47 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23651213
Default Alt Text
D15142.id41688.diff (149 KB)
Attached To
Mode
D15142: Merge iflib to 11-STABLE
Attached
Detach File
Event Timeline
Log In to Comment