Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F142275577
D54356.id169266.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D54356.id169266.diff
View Options
diff --git a/share/man/man4/iflib.4 b/share/man/man4/iflib.4
--- a/share/man/man4/iflib.4
+++ b/share/man/man4/iflib.4
@@ -1,4 +1,4 @@
-.Dd August 20, 2025
+.Dd January 7, 2026
.Dt IFLIB 4
.Os
.Sh NAME
@@ -94,6 +94,12 @@
ring is serviced by a different task.
This returns control to the caller faster and under high receive load,
may result in fewer dropped RX frames.
+.It Va tx_defer_mfree
+Controls the threshold in packets before iflib will free the memory
+(mbufs) for the packets that it has transmitted.
+When this is nonzero, mbufs will be freed outside the transmit lock.
+Setting this can reduce lock contention and CPU use when using simple_tx.
+Note that this applies only when simple_tx is enabled.
.It Va tx_reclaim_thresh
Controls the threshold in packets before iflib will ask the driver
how many transmitted packets can be reclaimed.
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -202,6 +202,7 @@
uint16_t ifc_sysctl_extra_msix_vectors;
bool ifc_cpus_are_physical_cores;
bool ifc_sysctl_simple_tx;
+ bool ifc_sysctl_tx_defer_mfree;
uint16_t ifc_sysctl_tx_reclaim_thresh;
uint16_t ifc_sysctl_tx_reclaim_ticks;
@@ -298,6 +299,8 @@
bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */
bus_dmamap_t *ifsd_tso_map; /* bus_dma maps for TSO packet */
struct mbuf **ifsd_m; /* pkthdr mbufs */
+ struct mbuf **ifsd_m_defer; /* deferred mbuf ptr */
+ struct mbuf **ifsd_m_deferb;/* deferred mbuf backing ptr */
} if_txsd_vec_t;
/* magic number that should be high enough for any hardware */
@@ -318,6 +321,20 @@
#define IFLIB_RESTART_BUDGET 8
+
+/*
+ * Encode TSO or !TSO in the low bits of the tx ifsd_m pointer so as
+ * to avoid defref'ing the mbuf to determine the correct busdma resources
+ * to release
+ */
+#define IFLIB_TSO (1ULL << 0)
+#define IFLIB_NO_TSO (2ULL << 0)
+#define IFLIB_FLAGS_MASK (0x3ULL)
+#define IFLIB_SAVE_MBUF(mbuf, flags) ((void *)(((uintptr_t)mbuf) | flags))
+#define IFLIB_GET_FLAGS(a) ((uintptr_t)a & IFLIB_FLAGS_MASK)
+#define IFLIB_GET_MBUF(a) ((struct mbuf *)((uintptr_t)a & ~IFLIB_FLAGS_MASK))
+
+
#define IFC_LEGACY 0x001
#define IFC_QFLUSH 0x002
#define IFC_MULTISEG 0x004
@@ -343,7 +360,9 @@
qidx_t ift_cidx_processed;
qidx_t ift_pidx;
uint8_t ift_gen;
- uint8_t ift_br_offset;
+ uint8_t ift_br_offset:1,
+ ift_defer_mfree:1,
+ ift_spare_bits0:6;
uint16_t ift_npending;
uint16_t ift_db_pending;
uint16_t ift_rs_pending;
@@ -735,7 +754,8 @@
#ifndef __NO_STRICT_ALIGNMENT
static struct mbuf *iflib_fixup_rx(struct mbuf *m);
#endif
-static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq);
+static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq,
+ struct mbuf **m_defer);
static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets =
SLIST_HEAD_INITIALIZER(cpu_offsets);
@@ -1786,7 +1806,16 @@
err = ENOMEM;
goto fail;
}
-
+ if (ctx->ifc_sysctl_simple_tx) {
+ if (!(txq->ift_sds.ifsd_m_defer =
+ (struct mbuf **) malloc(sizeof(struct mbuf *) *
+ scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
+ device_printf(dev, "Unable to allocate TX mbuf map memory\n");
+ err = ENOMEM;
+ goto fail;
+ }
+ }
+ txq->ift_sds.ifsd_m_deferb = txq->ift_sds.ifsd_m_defer;
/*
* Create the DMA maps for TX buffers.
*/
@@ -1879,6 +1908,10 @@
free(txq->ift_sds.ifsd_m, M_IFLIB);
txq->ift_sds.ifsd_m = NULL;
}
+ if (txq->ift_sds.ifsd_m_defer != NULL) {
+ free(txq->ift_sds.ifsd_m_defer, M_IFLIB);
+ txq->ift_sds.ifsd_m_defer = NULL;
+ }
if (txq->ift_buf_tag != NULL) {
bus_dma_tag_destroy(txq->ift_buf_tag);
txq->ift_buf_tag = NULL;
@@ -1895,10 +1928,10 @@
static void
iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i)
{
- struct mbuf **mp;
+ struct mbuf *m;
- mp = &txq->ift_sds.ifsd_m[i];
- if (*mp == NULL)
+ m = IFLIB_GET_MBUF(txq->ift_sds.ifsd_m[i]);
+ if (m == NULL)
return;
if (txq->ift_sds.ifsd_map != NULL) {
@@ -1912,9 +1945,8 @@
bus_dmamap_unload(txq->ift_tso_buf_tag,
txq->ift_sds.ifsd_tso_map[i]);
}
- m_freem(*mp);
+ m_freem(m);
DBG_COUNTER_INC(tx_frees);
- *mp = NULL;
}
static int
@@ -3440,7 +3472,7 @@
ntxd = txq->ift_size;
pidx = txq->ift_pidx & (ntxd - 1);
ifsd_m = txq->ift_sds.ifsd_m;
- m = ifsd_m[pidx];
+ m = IFLIB_GET_MBUF(ifsd_m[pidx]);
ifsd_m[pidx] = NULL;
bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[pidx]);
if (txq->ift_sds.ifsd_tso_map != NULL)
@@ -3507,6 +3539,7 @@
struct mbuf *m_head, **ifsd_m;
bus_dmamap_t map;
struct if_pkt_info pi;
+ uintptr_t flags;
int remap = 0;
int err, nsegs, ndesc, max_segs, pidx;
@@ -3530,10 +3563,12 @@
map = txq->ift_sds.ifsd_tso_map[pidx];
MPASS(buf_tag != NULL);
MPASS(max_segs > 0);
+ flags = IFLIB_TSO;
} else {
buf_tag = txq->ift_buf_tag;
max_segs = scctx->isc_tx_nsegments;
map = txq->ift_sds.ifsd_map[pidx];
+ flags = IFLIB_NO_TSO;
}
if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) &&
__predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) {
@@ -3606,7 +3641,7 @@
DBG_COUNTER_INC(encap_txd_encap_fail);
return (err);
}
- ifsd_m[pidx] = m_head;
+ ifsd_m[pidx] = IFLIB_SAVE_MBUF(m_head, flags);
if (m_head->m_pkthdr.csum_flags & CSUM_SND_TAG)
pi.ipi_mbuf = m_head;
else
@@ -3617,7 +3652,7 @@
* cxgb
*/
if (__predict_false(nsegs > TXQ_AVAIL(txq))) {
- (void)iflib_completed_tx_reclaim(txq);
+ (void)iflib_completed_tx_reclaim(txq, NULL);
if (__predict_false(nsegs > TXQ_AVAIL(txq))) {
txq->ift_no_desc_avail++;
bus_dmamap_unload(buf_tag, map);
@@ -3707,19 +3742,22 @@
}
static void
-iflib_tx_desc_free(iflib_txq_t txq, int n)
+iflib_tx_desc_free(iflib_txq_t txq, int n, struct mbuf **m_defer)
{
uint32_t qsize, cidx, gen;
struct mbuf *m, **ifsd_m;
+ uintptr_t flags;
cidx = txq->ift_cidx;
gen = txq->ift_gen;
qsize = txq->ift_size;
- ifsd_m = txq->ift_sds.ifsd_m;
+ ifsd_m =txq->ift_sds.ifsd_m;
while (n-- > 0) {
- if ((m = ifsd_m[cidx]) != NULL) {
- if (m->m_pkthdr.csum_flags & CSUM_TSO) {
+ if ((m = IFLIB_GET_MBUF(ifsd_m[cidx])) != NULL) {
+ flags = IFLIB_GET_FLAGS(ifsd_m[cidx]);
+ MPASS(flags != 0);
+ if (flags & IFLIB_TSO) {
bus_dmamap_sync(txq->ift_tso_buf_tag,
txq->ift_sds.ifsd_tso_map[cidx],
BUS_DMASYNC_POSTWRITE);
@@ -3734,7 +3772,12 @@
}
/* XXX we don't support any drivers that batch packets yet */
MPASS(m->m_nextpkt == NULL);
- m_freem(m);
+ if (m_defer == NULL) {
+ m_freem(m);
+ } else if (m != NULL) {
+ *m_defer = m;
+ m_defer++;
+ }
ifsd_m[cidx] = NULL;
#if MEMORY_LOGGING
txq->ift_dequeued++;
@@ -3751,28 +3794,20 @@
}
static __inline int
-iflib_completed_tx_reclaim(iflib_txq_t txq)
+iflib_txq_can_reclaim(iflib_txq_t txq)
{
int reclaim, thresh;
- uint32_t now;
- if_ctx_t ctx = txq->ift_ctx;
thresh = txq->ift_reclaim_thresh;
KASSERT(thresh >= 0, ("invalid threshold to reclaim"));
MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size);
- now = ticks;
- if (now <= (txq->ift_last_reclaim + txq->ift_reclaim_ticks) &&
+ if (ticks <= (txq->ift_last_reclaim + txq->ift_reclaim_ticks) &&
txq->ift_in_use < thresh)
- return (0);
- txq->ift_last_reclaim = now;
- /*
- * Need a rate-limiting check so that this isn't called every time
- */
- iflib_tx_credits_update(ctx, txq);
+ return (false);
+ iflib_tx_credits_update(txq->ift_ctx, txq);
reclaim = DESC_RECLAIMABLE(txq);
-
- if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) {
+ if (reclaim <= thresh) {
#ifdef INVARIANTS
if (iflib_verbose_debug) {
printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __func__,
@@ -3782,10 +3817,27 @@
#endif
return (0);
}
- iflib_tx_desc_free(txq, reclaim);
+ return (reclaim);
+}
+
+static __inline void
+_iflib_completed_tx_reclaim(iflib_txq_t txq, struct mbuf **m_defer, int reclaim)
+{
+ txq->ift_last_reclaim = ticks;
+ iflib_tx_desc_free(txq, reclaim, m_defer);
txq->ift_cleaned += reclaim;
txq->ift_in_use -= reclaim;
+}
+
+static __inline int
+iflib_completed_tx_reclaim(iflib_txq_t txq, struct mbuf **m_defer)
+{
+ int reclaim;
+ reclaim = iflib_txq_can_reclaim(txq);
+ if (reclaim == 0)
+ return (0);
+ _iflib_completed_tx_reclaim(txq, m_defer, reclaim);
return (reclaim);
}
@@ -3846,7 +3898,7 @@
DBG_COUNTER_INC(txq_drain_notready);
return (0);
}
- reclaimed = iflib_completed_tx_reclaim(txq);
+ reclaimed = iflib_completed_tx_reclaim(txq, NULL);
rang = iflib_txd_db_check(txq, reclaimed && txq->ift_db_pending);
avail = IDXDIFF(pidx, cidx, r->size);
@@ -4005,7 +4057,7 @@
#endif
if (ctx->ifc_sysctl_simple_tx) {
mtx_lock(&txq->ift_mtx);
- (void)iflib_completed_tx_reclaim(txq);
+ (void)iflib_completed_tx_reclaim(txq, NULL);
mtx_unlock(&txq->ift_mtx);
goto skip_ifmp;
}
@@ -6817,6 +6869,34 @@
return (err);
}
+static int
+iflib_handle_tx_defer_mfree(SYSCTL_HANDLER_ARGS)
+{
+ if_ctx_t ctx = (void *)arg1;
+ iflib_txq_t txq;
+ int i, err;
+ int defer;
+
+ defer = ctx->ifc_sysctl_tx_defer_mfree;
+ err = sysctl_handle_int(oidp, &defer, arg2, req);
+ if (err != 0) {
+ return err;
+ }
+
+ if (defer == ctx->ifc_sysctl_tx_defer_mfree)
+ return 0;
+
+ ctx->ifc_sysctl_tx_defer_mfree = defer;
+ if (ctx->ifc_txqs == NULL)
+ return (err);
+
+ txq = &ctx->ifc_txqs[0];
+ for (i = 0; i < NTXQSETS(ctx); i++, txq++) {
+ txq->ift_defer_mfree = defer;
+ }
+ return (err);
+}
+
#define NAME_BUFLEN 32
static void
iflib_add_device_sysctl_pre(if_ctx_t ctx)
@@ -6915,6 +6995,11 @@
0, iflib_handle_tx_reclaim_ticks, "I",
"Number of ticks before a TX reclaim is forced");
+ SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "tx_defer_mfree",
+ CTLTYPE_INT | CTLFLAG_RWTUN, ctx,
+ 0, iflib_handle_tx_defer_mfree, "I",
+ "Free completed transmits outside of TX ring lock");
+
if (scctx->isc_ntxqsets > 100)
qfmt = "txq%03d";
else if (scctx->isc_ntxqsets > 10)
@@ -7162,7 +7247,7 @@
return (EBUSY);
txq = &ctx->ifc_txqs[0];
- (void)iflib_completed_tx_reclaim(txq);
+ (void)iflib_completed_tx_reclaim(txq, NULL);
NET_EPOCH_ENTER(et);
for (i = 0; i < scctx->isc_nrxqsets; i++)
@@ -7190,7 +7275,8 @@
{
if_ctx_t ctx;
iflib_txq_t txq;
- int error;
+ struct mbuf **m_defer;
+ int error, i, reclaimable;
int bytes_sent = 0, pkt_sent = 0, mcast_sent = 0;
@@ -7216,8 +7302,34 @@
else
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
- (void)iflib_completed_tx_reclaim(txq);
+ m_defer = NULL;
+ reclaimable = iflib_txq_can_reclaim(txq);
+ if (reclaimable != 0) {
+ /*
+ * Try to set m_defer to the deferred mbuf reclaim array. If
+ * we can, the frees will happen outside the tx lock. If we
+ * can't, it means another thread is still proccessing frees.
+ */
+ if (txq->ift_defer_mfree &&
+ atomic_cmpset_acq_ptr((uintptr_t *)&txq->ift_sds.ifsd_m_defer,
+ (uintptr_t )txq->ift_sds.ifsd_m_deferb, 0)) {
+ m_defer = txq->ift_sds.ifsd_m_deferb;
+ }
+ _iflib_completed_tx_reclaim(txq, m_defer, reclaimable);
+ }
mtx_unlock(&txq->ift_mtx);
+
+ /*
+ * Process mbuf frees outside the tx lock
+ */
+ if (m_defer != NULL) {
+ for (i = 0; m_defer[i] != NULL; i++) {
+ m_freem(m_defer[i]);
+ m_defer[i] = NULL;
+ }
+ atomic_store_rel_ptr((uintptr_t *)&txq->ift_sds.ifsd_m_defer,
+ (uintptr_t)m_defer);
+ }
if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent);
if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent);
if (mcast_sent)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Jan 19, 2:00 AM (7 h, 41 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27717668
Default Alt Text
D54356.id169266.diff (11 KB)
Attached To
Mode
D54356: iflib: Drop tx lock when freeing mbufs using simple_transmit
Attached
Detach File
Event Timeline
Log In to Comment