Page MenuHomeFreeBSD

D25454.diff
No OneTemporary

D25454.diff

Index: head/sys/dev/cxgbe/adapter.h
===================================================================
--- head/sys/dev/cxgbe/adapter.h
+++ head/sys/dev/cxgbe/adapter.h
@@ -550,6 +550,23 @@
struct mp_ring;
+struct txpkts {
+ uint8_t wr_type; /* type 0 or type 1 */
+ uint8_t npkt; /* # of packets in this work request */
+ uint8_t len16; /* # of 16B pieces used by this work request */
+ uint8_t score; /* 1-10. coalescing attempted if score > 3 */
+ uint8_t max_npkt; /* maximum number of packets allowed */
+ uint16_t plen; /* total payload (sum of all packets) */
+
+ /* straight from fw_eth_tx_pkts_vm_wr. */
+ __u8 ethmacdst[6];
+ __u8 ethmacsrc[6];
+ __be16 ethtype;
+ __be16 vlantci;
+
+ struct mbuf *mb[15];
+};
+
/* txq: SGE egress queue + what's needed for Ethernet NIC */
struct sge_txq {
struct sge_eq eq; /* MUST be first */
@@ -560,6 +577,7 @@
struct sglist *gl;
__be32 cpl_ctrl0; /* for convenience */
int tc_idx; /* traffic class */
+ struct txpkts txp;
struct task tx_reclaim_task;
/* stats for common events first */
Index: head/sys/dev/cxgbe/common/common.h
===================================================================
--- head/sys/dev/cxgbe/common/common.h
+++ head/sys/dev/cxgbe/common/common.h
@@ -389,6 +389,7 @@
bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */
bool fr_nsmr_tpte_wr_support; /* FW support for FR_NSMR_TPTE_WR */
bool viid_smt_extn_support; /* FW returns vin, vfvld & smt index? */
+ unsigned int max_pkts_per_eth_tx_pkts_wr;
};
#define CHELSIO_T4 0x4
Index: head/sys/dev/cxgbe/t4_main.c
===================================================================
--- head/sys/dev/cxgbe/t4_main.c
+++ head/sys/dev/cxgbe/t4_main.c
@@ -2191,7 +2191,7 @@
vi->rsrv_noflowq);
items[0] = m;
- rc = mp_ring_enqueue(txq->r, items, 1, 4096);
+ rc = mp_ring_enqueue(txq->r, items, 1, 256);
if (__predict_false(rc != 0))
m_freem(m);
@@ -2212,7 +2212,7 @@
txq->eq.flags |= EQ_QFLUSH;
TXQ_UNLOCK(txq);
while (!mp_ring_is_idle(txq->r)) {
- mp_ring_check_drainage(txq->r, 0);
+ mp_ring_check_drainage(txq->r, 4096);
pause("qflush", 1);
}
TXQ_LOCK(txq);
@@ -2261,7 +2261,7 @@
struct sge_txq *txq;
for_each_txq(vi, i, txq)
- drops += counter_u64_fetch(txq->r->drops);
+ drops += counter_u64_fetch(txq->r->dropped);
}
return (drops);
@@ -2326,7 +2326,7 @@
struct sge_txq *txq;
for_each_txq(vi, i, txq)
- drops += counter_u64_fetch(txq->r->drops);
+ drops += counter_u64_fetch(txq->r->dropped);
}
return (drops);
@@ -4457,6 +4457,13 @@
else
sc->params.fr_nsmr_tpte_wr_support = false;
+ param[0] = FW_PARAM_PFVF(MAX_PKTS_PER_ETH_TX_PKTS_WR);
+ rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
+ if (rc == 0)
+ sc->params.max_pkts_per_eth_tx_pkts_wr = val[0];
+ else
+ sc->params.max_pkts_per_eth_tx_pkts_wr = 15;
+
/* get capabilites */
bzero(&caps, sizeof(caps));
caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
@@ -5965,7 +5972,7 @@
/* Wait for the mp_ring to empty. */
while (!mp_ring_is_idle(txq->r)) {
- mp_ring_check_drainage(txq->r, 0);
+ mp_ring_check_drainage(txq->r, 4096);
pause("rquiesce", 1);
}
Index: head/sys/dev/cxgbe/t4_mp_ring.h
===================================================================
--- head/sys/dev/cxgbe/t4_mp_ring.h
+++ head/sys/dev/cxgbe/t4_mp_ring.h
@@ -36,33 +36,38 @@
#endif
struct mp_ring;
-typedef u_int (*ring_drain_t)(struct mp_ring *, u_int, u_int);
+typedef u_int (*ring_drain_t)(struct mp_ring *, u_int, u_int, bool *);
typedef u_int (*ring_can_drain_t)(struct mp_ring *);
struct mp_ring {
volatile uint64_t state __aligned(CACHE_LINE_SIZE);
+ struct malloc_type * mt;
int size __aligned(CACHE_LINE_SIZE);
void * cookie;
- struct malloc_type * mt;
ring_drain_t drain;
ring_can_drain_t can_drain; /* cheap, may be unreliable */
- counter_u64_t enqueues;
- counter_u64_t drops;
- counter_u64_t starts;
- counter_u64_t stalls;
- counter_u64_t restarts; /* recovered after stalling */
+ struct mtx * cons_lock;
+ counter_u64_t dropped;
+ counter_u64_t consumer[4];
+ counter_u64_t not_consumer;
counter_u64_t abdications;
+ counter_u64_t consumed;
+ counter_u64_t cons_idle;
+ counter_u64_t cons_idle2;
+ counter_u64_t stalls;
void * volatile items[] __aligned(CACHE_LINE_SIZE);
};
int mp_ring_alloc(struct mp_ring **, int, void *, ring_drain_t,
- ring_can_drain_t, struct malloc_type *, int);
+ ring_can_drain_t, struct malloc_type *, struct mtx *, int);
void mp_ring_free(struct mp_ring *);
int mp_ring_enqueue(struct mp_ring *, void **, int, int);
void mp_ring_check_drainage(struct mp_ring *, int);
void mp_ring_reset_stats(struct mp_ring *);
-int mp_ring_is_idle(struct mp_ring *);
+bool mp_ring_is_idle(struct mp_ring *);
+void mp_ring_sysctls(struct mp_ring *, struct sysctl_ctx_list *,
+ struct sysctl_oid_list *);
#endif
Index: head/sys/dev/cxgbe/t4_mp_ring.c
===================================================================
--- head/sys/dev/cxgbe/t4_mp_ring.c
+++ head/sys/dev/cxgbe/t4_mp_ring.c
@@ -34,6 +34,8 @@
#include <sys/counter.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
#include <machine/cpu.h>
#include "t4_mp_ring.h"
@@ -43,6 +45,23 @@
#define atomic_cmpset_rel_64 atomic_cmpset_64
#endif
+/*
+ * mp_ring handles multiple threads (producers) enqueueing data to a tx queue.
+ * The thread that is writing the hardware descriptors is the consumer and it
+ * runs with the consumer lock held. A producer becomes the consumer if there
+ * isn't one already. The consumer runs with the flags sets to BUSY and
+ * consumes everything (IDLE or COALESCING) or gets STALLED. If it is running
+ * over its budget it sets flags to TOO_BUSY. A producer that observes a
+ * TOO_BUSY consumer will become the new consumer by setting flags to
+ * TAKING_OVER. The original consumer stops and sets the flags back to BUSY for
+ * the new consumer.
+ *
+ * COALESCING is the same as IDLE except there are items being held in the hope
+ * that they can be coalesced with items that follow. The driver must arrange
+ * for a tx update or some other event that transmits all the held items in a
+ * timely manner if nothing else is enqueued.
+ */
+
union ring_state {
struct {
uint16_t pidx_head;
@@ -54,13 +73,21 @@
};
enum {
- IDLE = 0, /* consumer ran to completion, nothing more to do. */
+ IDLE = 0, /* tx is all caught up, nothing to do. */
+ COALESCING, /* IDLE, but tx frames are being held for coalescing */
BUSY, /* consumer is running already, or will be shortly. */
+ TOO_BUSY, /* consumer is running and is beyond its budget */
+ TAKING_OVER, /* new consumer taking over from a TOO_BUSY consumer */
STALLED, /* consumer stopped due to lack of resources. */
- ABDICATED, /* consumer stopped even though there was work to be
- done because it wants another thread to take over. */
};
+enum {
+ C_FAST = 0,
+ C_2,
+ C_3,
+ C_TAKEOVER,
+};
+
static inline uint16_t
space_available(struct mp_ring *r, union ring_state s)
{
@@ -83,93 +110,104 @@
return (x > n ? idx + n : n - x);
}
-/* Consumer is about to update the ring's state to s */
-static inline uint16_t
-state_to_flags(union ring_state s, int abdicate)
-{
-
- if (s.cidx == s.pidx_tail)
- return (IDLE);
- else if (abdicate && s.pidx_tail != s.pidx_head)
- return (ABDICATED);
-
- return (BUSY);
-}
-
/*
- * Caller passes in a state, with a guarantee that there is work to do and that
- * all items up to the pidx_tail in the state are visible.
+ * Consumer. Called with the consumer lock held and a guarantee that there is
+ * work to do.
*/
static void
-drain_ring(struct mp_ring *r, union ring_state os, uint16_t prev, int budget)
+drain_ring(struct mp_ring *r, int budget)
{
- union ring_state ns;
+ union ring_state os, ns;
int n, pending, total;
- uint16_t cidx = os.cidx;
- uint16_t pidx = os.pidx_tail;
+ uint16_t cidx;
+ uint16_t pidx;
+ bool coalescing;
+ mtx_assert(r->cons_lock, MA_OWNED);
+
+ os.state = atomic_load_acq_64(&r->state);
MPASS(os.flags == BUSY);
+
+ cidx = os.cidx;
+ pidx = os.pidx_tail;
MPASS(cidx != pidx);
- if (prev == IDLE)
- counter_u64_add(r->starts, 1);
pending = 0;
total = 0;
while (cidx != pidx) {
/* Items from cidx to pidx are available for consumption. */
- n = r->drain(r, cidx, pidx);
+ n = r->drain(r, cidx, pidx, &coalescing);
if (n == 0) {
critical_enter();
- os.state = r->state;
+ os.state = atomic_load_64(&r->state);
do {
ns.state = os.state;
ns.cidx = cidx;
- ns.flags = STALLED;
+
+ MPASS(os.flags == BUSY ||
+ os.flags == TOO_BUSY ||
+ os.flags == TAKING_OVER);
+
+ if (os.flags == TAKING_OVER)
+ ns.flags = BUSY;
+ else
+ ns.flags = STALLED;
} while (atomic_fcmpset_64(&r->state, &os.state,
ns.state) == 0);
critical_exit();
- if (prev != STALLED)
+ if (os.flags == TAKING_OVER)
+ counter_u64_add(r->abdications, 1);
+ else if (ns.flags == STALLED)
counter_u64_add(r->stalls, 1);
- else if (total > 0) {
- counter_u64_add(r->restarts, 1);
- counter_u64_add(r->stalls, 1);
- }
break;
}
cidx = increment_idx(r, cidx, n);
pending += n;
total += n;
+ counter_u64_add(r->consumed, n);
- /*
- * We update the cidx only if we've caught up with the pidx, the
- * real cidx is getting too far ahead of the one visible to
- * everyone else, or we have exceeded our budget.
- */
- if (cidx != pidx && pending < 64 && total < budget)
- continue;
- critical_enter();
- os.state = r->state;
+ os.state = atomic_load_64(&r->state);
do {
+ MPASS(os.flags == BUSY || os.flags == TOO_BUSY ||
+ os.flags == TAKING_OVER);
+
ns.state = os.state;
ns.cidx = cidx;
- ns.flags = state_to_flags(ns, total >= budget);
+ if (__predict_false(os.flags == TAKING_OVER)) {
+ MPASS(total >= budget);
+ ns.flags = BUSY;
+ continue;
+ }
+ if (cidx == os.pidx_tail) {
+ ns.flags = coalescing ? COALESCING : IDLE;
+ continue;
+ }
+ if (total >= budget) {
+ ns.flags = TOO_BUSY;
+ continue;
+ }
+ MPASS(os.flags == BUSY);
+ if (pending < 32)
+ break;
} while (atomic_fcmpset_acq_64(&r->state, &os.state, ns.state) == 0);
- critical_exit();
- if (ns.flags == ABDICATED)
+ if (__predict_false(os.flags == TAKING_OVER)) {
+ MPASS(ns.flags == BUSY);
counter_u64_add(r->abdications, 1);
- if (ns.flags != BUSY) {
- /* Wrong loop exit if we're going to stall. */
- MPASS(ns.flags != STALLED);
- if (prev == STALLED) {
- MPASS(total > 0);
- counter_u64_add(r->restarts, 1);
- }
break;
}
+ if (ns.flags == IDLE || ns.flags == COALESCING) {
+ MPASS(ns.pidx_tail == cidx);
+ if (ns.pidx_head != ns.pidx_tail)
+ counter_u64_add(r->cons_idle2, 1);
+ else
+ counter_u64_add(r->cons_idle, 1);
+ break;
+ }
+
/*
* The acquire style atomic above guarantees visibility of items
* associated with any pidx change that we notice here.
@@ -177,13 +215,55 @@
pidx = ns.pidx_tail;
pending = 0;
}
+
+#ifdef INVARIANTS
+ if (os.flags == TAKING_OVER)
+ MPASS(ns.flags == BUSY);
+ else {
+ MPASS(ns.flags == IDLE || ns.flags == COALESCING ||
+ ns.flags == STALLED);
+ }
+#endif
}
+static void
+drain_txpkts(struct mp_ring *r, union ring_state os, int budget)
+{
+ union ring_state ns;
+ uint16_t cidx = os.cidx;
+ uint16_t pidx = os.pidx_tail;
+ bool coalescing;
+
+ mtx_assert(r->cons_lock, MA_OWNED);
+ MPASS(os.flags == BUSY);
+ MPASS(cidx == pidx);
+
+ r->drain(r, cidx, pidx, &coalescing);
+ MPASS(coalescing == false);
+ critical_enter();
+ os.state = atomic_load_64(&r->state);
+ do {
+ ns.state = os.state;
+ MPASS(os.flags == BUSY);
+ MPASS(os.cidx == cidx);
+ if (ns.cidx == ns.pidx_tail)
+ ns.flags = IDLE;
+ else
+ ns.flags = BUSY;
+ } while (atomic_fcmpset_acq_64(&r->state, &os.state, ns.state) == 0);
+ critical_exit();
+
+ if (ns.flags == BUSY)
+ drain_ring(r, budget);
+}
+
int
mp_ring_alloc(struct mp_ring **pr, int size, void *cookie, ring_drain_t drain,
- ring_can_drain_t can_drain, struct malloc_type *mt, int flags)
+ ring_can_drain_t can_drain, struct malloc_type *mt, struct mtx *lck,
+ int flags)
{
struct mp_ring *r;
+ int i;
/* All idx are 16b so size can be 65536 at most */
if (pr == NULL || size < 2 || size > 65536 || drain == NULL ||
@@ -201,43 +281,59 @@
r->mt = mt;
r->drain = drain;
r->can_drain = can_drain;
- r->enqueues = counter_u64_alloc(flags);
- r->drops = counter_u64_alloc(flags);
- r->starts = counter_u64_alloc(flags);
- r->stalls = counter_u64_alloc(flags);
- r->restarts = counter_u64_alloc(flags);
- r->abdications = counter_u64_alloc(flags);
- if (r->enqueues == NULL || r->drops == NULL || r->starts == NULL ||
- r->stalls == NULL || r->restarts == NULL ||
- r->abdications == NULL) {
- mp_ring_free(r);
- return (ENOMEM);
+ r->cons_lock = lck;
+ if ((r->dropped = counter_u64_alloc(flags)) == NULL)
+ goto failed;
+ for (i = 0; i < nitems(r->consumer); i++) {
+ if ((r->consumer[i] = counter_u64_alloc(flags)) == NULL)
+ goto failed;
}
-
+ if ((r->not_consumer = counter_u64_alloc(flags)) == NULL)
+ goto failed;
+ if ((r->abdications = counter_u64_alloc(flags)) == NULL)
+ goto failed;
+ if ((r->stalls = counter_u64_alloc(flags)) == NULL)
+ goto failed;
+ if ((r->consumed = counter_u64_alloc(flags)) == NULL)
+ goto failed;
+ if ((r->cons_idle = counter_u64_alloc(flags)) == NULL)
+ goto failed;
+ if ((r->cons_idle2 = counter_u64_alloc(flags)) == NULL)
+ goto failed;
*pr = r;
return (0);
+failed:
+ mp_ring_free(r);
+ return (ENOMEM);
}
void
mp_ring_free(struct mp_ring *r)
{
+ int i;
if (r == NULL)
return;
- if (r->enqueues != NULL)
- counter_u64_free(r->enqueues);
- if (r->drops != NULL)
- counter_u64_free(r->drops);
- if (r->starts != NULL)
- counter_u64_free(r->starts);
- if (r->stalls != NULL)
- counter_u64_free(r->stalls);
- if (r->restarts != NULL)
- counter_u64_free(r->restarts);
+ if (r->dropped != NULL)
+ counter_u64_free(r->dropped);
+ for (i = 0; i < nitems(r->consumer); i++) {
+ if (r->consumer[i] != NULL)
+ counter_u64_free(r->consumer[i]);
+ }
+ if (r->not_consumer != NULL)
+ counter_u64_free(r->not_consumer);
if (r->abdications != NULL)
counter_u64_free(r->abdications);
+ if (r->stalls != NULL)
+ counter_u64_free(r->stalls);
+ if (r->consumed != NULL)
+ counter_u64_free(r->consumed);
+ if (r->cons_idle != NULL)
+ counter_u64_free(r->cons_idle);
+ if (r->cons_idle2 != NULL)
+ counter_u64_free(r->cons_idle2);
free(r, r->mt);
}
@@ -252,7 +348,8 @@
{
union ring_state os, ns;
uint16_t pidx_start, pidx_stop;
- int i;
+ int i, nospc, cons;
+ bool consumer;
MPASS(items != NULL);
MPASS(n > 0);
@@ -261,26 +358,70 @@
* Reserve room for the new items. Our reservation, if successful, is
* from 'pidx_start' to 'pidx_stop'.
*/
- os.state = r->state;
+ nospc = 0;
+ os.state = atomic_load_64(&r->state);
for (;;) {
- if (n >= space_available(r, os)) {
- counter_u64_add(r->drops, n);
+ for (;;) {
+ if (__predict_true(space_available(r, os) >= n))
+ break;
+
+ /* Not enough room in the ring. */
+
MPASS(os.flags != IDLE);
+ MPASS(os.flags != COALESCING);
+ if (__predict_false(++nospc > 100)) {
+ counter_u64_add(r->dropped, n);
+ return (ENOBUFS);
+ }
if (os.flags == STALLED)
- mp_ring_check_drainage(r, 0);
- return (ENOBUFS);
+ mp_ring_check_drainage(r, 64);
+ else
+ cpu_spinwait();
+ os.state = atomic_load_64(&r->state);
}
+
+ /* There is room in the ring. */
+
+ cons = -1;
ns.state = os.state;
ns.pidx_head = increment_idx(r, os.pidx_head, n);
+ if (os.flags == IDLE || os.flags == COALESCING) {
+ MPASS(os.pidx_tail == os.cidx);
+ if (os.pidx_head == os.pidx_tail) {
+ cons = C_FAST;
+ ns.pidx_tail = increment_idx(r, os.pidx_tail, n);
+ } else
+ cons = C_2;
+ ns.flags = BUSY;
+ } else if (os.flags == TOO_BUSY) {
+ cons = C_TAKEOVER;
+ ns.flags = TAKING_OVER;
+ }
critical_enter();
if (atomic_fcmpset_64(&r->state, &os.state, ns.state))
break;
critical_exit();
cpu_spinwait();
- }
+ };
+
pidx_start = os.pidx_head;
pidx_stop = ns.pidx_head;
+ if (cons == C_FAST) {
+ i = pidx_start;
+ do {
+ r->items[i] = *items++;
+ if (__predict_false(++i == r->size))
+ i = 0;
+ } while (i != pidx_stop);
+ critical_exit();
+ counter_u64_add(r->consumer[C_FAST], 1);
+ mtx_lock(r->cons_lock);
+ drain_ring(r, budget);
+ mtx_unlock(r->cons_lock);
+ return (0);
+ }
+
/*
* Wait for other producers who got in ahead of us to enqueue their
* items, one producer at a time. It is our turn when the ring's
@@ -288,7 +429,7 @@
*/
while (ns.pidx_tail != pidx_start) {
cpu_spinwait();
- ns.state = r->state;
+ ns.state = atomic_load_64(&r->state);
}
/* Now it is our turn to fill up the area we reserved earlier. */
@@ -303,21 +444,33 @@
* Update the ring's pidx_tail. The release style atomic guarantees
* that the items are visible to any thread that sees the updated pidx.
*/
- os.state = r->state;
+ os.state = atomic_load_64(&r->state);
do {
+ consumer = false;
ns.state = os.state;
ns.pidx_tail = pidx_stop;
- ns.flags = BUSY;
+ if (os.flags == IDLE || os.flags == COALESCING ||
+ (os.flags == STALLED && r->can_drain(r))) {
+ MPASS(cons == -1);
+ consumer = true;
+ ns.flags = BUSY;
+ }
} while (atomic_fcmpset_rel_64(&r->state, &os.state, ns.state) == 0);
critical_exit();
- counter_u64_add(r->enqueues, n);
- /*
- * Turn into a consumer if some other thread isn't active as a consumer
- * already.
- */
- if (os.flags != BUSY)
- drain_ring(r, ns, os.flags, budget);
+ if (cons == -1) {
+ if (consumer)
+ cons = C_3;
+ else {
+ counter_u64_add(r->not_consumer, 1);
+ return (0);
+ }
+ }
+ MPASS(cons > C_FAST && cons < nitems(r->consumer));
+ counter_u64_add(r->consumer[cons], 1);
+ mtx_lock(r->cons_lock);
+ drain_ring(r, budget);
+ mtx_unlock(r->cons_lock);
return (0);
}
@@ -327,46 +480,96 @@
{
union ring_state os, ns;
- os.state = r->state;
- if (os.flags != STALLED || os.pidx_head != os.pidx_tail ||
- r->can_drain(r) == 0)
- return;
-
- MPASS(os.cidx != os.pidx_tail); /* implied by STALLED */
- ns.state = os.state;
- ns.flags = BUSY;
-
- /*
- * The acquire style atomic guarantees visibility of items associated
- * with the pidx that we read here.
- */
- if (!atomic_cmpset_acq_64(&r->state, os.state, ns.state))
- return;
-
- drain_ring(r, ns, os.flags, budget);
+ os.state = atomic_load_64(&r->state);
+ if (os.flags == STALLED && r->can_drain(r)) {
+ MPASS(os.cidx != os.pidx_tail); /* implied by STALLED */
+ ns.state = os.state;
+ ns.flags = BUSY;
+ if (atomic_cmpset_acq_64(&r->state, os.state, ns.state)) {
+ mtx_lock(r->cons_lock);
+ drain_ring(r, budget);
+ mtx_unlock(r->cons_lock);
+ }
+ } else if (os.flags == COALESCING) {
+ MPASS(os.cidx == os.pidx_tail);
+ ns.state = os.state;
+ ns.flags = BUSY;
+ if (atomic_cmpset_acq_64(&r->state, os.state, ns.state)) {
+ mtx_lock(r->cons_lock);
+ drain_txpkts(r, ns, budget);
+ mtx_unlock(r->cons_lock);
+ }
+ }
}
void
mp_ring_reset_stats(struct mp_ring *r)
{
+ int i;
- counter_u64_zero(r->enqueues);
- counter_u64_zero(r->drops);
- counter_u64_zero(r->starts);
- counter_u64_zero(r->stalls);
- counter_u64_zero(r->restarts);
+ counter_u64_zero(r->dropped);
+ for (i = 0; i < nitems(r->consumer); i++)
+ counter_u64_zero(r->consumer[i]);
+ counter_u64_zero(r->not_consumer);
counter_u64_zero(r->abdications);
+ counter_u64_zero(r->stalls);
+ counter_u64_zero(r->consumed);
+ counter_u64_zero(r->cons_idle);
+ counter_u64_zero(r->cons_idle2);
}
-int
+bool
mp_ring_is_idle(struct mp_ring *r)
{
union ring_state s;
- s.state = r->state;
+ s.state = atomic_load_64(&r->state);
if (s.pidx_head == s.pidx_tail && s.pidx_tail == s.cidx &&
s.flags == IDLE)
- return (1);
+ return (true);
- return (0);
+ return (false);
+}
+
+void
+mp_ring_sysctls(struct mp_ring *r, struct sysctl_ctx_list *ctx,
+ struct sysctl_oid_list *children)
+{
+ struct sysctl_oid *oid;
+
+ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "mp_ring", CTLFLAG_RD |
+ CTLFLAG_MPSAFE, NULL, "mp_ring statistics");
+ children = SYSCTL_CHILDREN(oid);
+
+ SYSCTL_ADD_U64(ctx, children, OID_AUTO, "state", CTLFLAG_RD,
+ __DEVOLATILE(uint64_t *, &r->state), 0, "ring state");
+ SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "dropped", CTLFLAG_RD,
+ &r->dropped, "# of items dropped");
+ SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "consumed",
+ CTLFLAG_RD, &r->consumed, "# of items consumed");
+ SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "fast_consumer",
+ CTLFLAG_RD, &r->consumer[C_FAST],
+ "# of times producer became consumer (fast)");
+ SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "consumer2",
+ CTLFLAG_RD, &r->consumer[C_2],
+ "# of times producer became consumer (2)");
+ SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "consumer3",
+ CTLFLAG_RD, &r->consumer[C_3],
+ "# of times producer became consumer (3)");
+ SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "takeovers",
+ CTLFLAG_RD, &r->consumer[C_TAKEOVER],
+ "# of times producer took over from another consumer.");
+ SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "not_consumer",
+ CTLFLAG_RD, &r->not_consumer,
+ "# of times producer did not become consumer");
+ SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "abdications",
+ CTLFLAG_RD, &r->abdications, "# of consumer abdications");
+ SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "stalls",
+ CTLFLAG_RD, &r->stalls, "# of consumer stalls");
+ SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "cons_idle",
+ CTLFLAG_RD, &r->cons_idle,
+ "# of times consumer ran fully to completion");
+ SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "cons_idle2",
+ CTLFLAG_RD, &r->cons_idle2,
+ "# of times consumer idled when another enqueue was in progress");
}
Index: head/sys/dev/cxgbe/t4_sge.c
===================================================================
--- head/sys/dev/cxgbe/t4_sge.c
+++ head/sys/dev/cxgbe/t4_sge.c
@@ -203,19 +203,6 @@
SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_mbufs, CTLFLAG_RDTUN, &lro_mbufs, 0,
"Enable presorting of LRO frames");
-struct txpkts {
- u_int wr_type; /* type 0 or type 1 */
- u_int npkt; /* # of packets in this work request */
- u_int plen; /* total payload (sum of all packets) */
- u_int len16; /* # of 16B pieces used by this work request */
-};
-
-/* A packet's SGL. This + m_pkthdr has all info needed for tx */
-struct sgl {
- struct sglist sg;
- struct sglist_seg seg[TX_SGL_SEGS];
-};
-
static int service_iq(struct sge_iq *, int);
static int service_iq_fl(struct sge_iq *, int);
static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t);
@@ -284,14 +271,16 @@
static inline u_int txpkts0_len16(u_int);
static inline u_int txpkts1_len16(void);
static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int);
-static u_int write_txpkt_wr(struct adapter *, struct sge_txq *,
- struct fw_eth_tx_pkt_wr *, struct mbuf *, u_int);
+static u_int write_txpkt_wr(struct adapter *, struct sge_txq *, struct mbuf *,
+ u_int);
static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *,
- struct fw_eth_tx_pkt_vm_wr *, struct mbuf *, u_int);
-static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int);
-static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int);
-static u_int write_txpkts_wr(struct adapter *, struct sge_txq *,
- struct fw_eth_tx_pkts_wr *, struct mbuf *, const struct txpkts *, u_int);
+ struct mbuf *);
+static int add_to_txpkts_vf(struct adapter *, struct sge_txq *, struct mbuf *,
+ int, bool *);
+static int add_to_txpkts_pf(struct adapter *, struct sge_txq *, struct mbuf *,
+ int, bool *);
+static u_int write_txpkts_wr(struct adapter *, struct sge_txq *);
+static u_int write_txpkts_vm_wr(struct adapter *, struct sge_txq *);
static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int);
static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int);
@@ -2839,7 +2828,7 @@
return (total_available_tx_desc(eq) > eq->sidx / 8);
}
-static inline int
+static inline bool
cannot_use_txpkts(struct mbuf *m)
{
/* maybe put a GL limit too, to avoid silliness? */
@@ -2855,8 +2844,9 @@
}
static inline int
-wr_can_update_eq(struct fw_eth_tx_pkts_wr *wr)
+wr_can_update_eq(void *p)
{
+ struct fw_eth_tx_pkts_wr *wr = p;
switch (G_FW_WR_OP(be32toh(wr->op_pkd))) {
case FW_ULPTX_WR:
@@ -2864,159 +2854,232 @@
case FW_ETH_TX_PKTS_WR:
case FW_ETH_TX_PKTS2_WR:
case FW_ETH_TX_PKT_VM_WR:
+ case FW_ETH_TX_PKTS_VM_WR:
return (1);
default:
return (0);
}
}
+static inline void
+set_txupdate_flags(struct sge_txq *txq, u_int avail,
+ struct fw_eth_tx_pkt_wr *wr)
+{
+ struct sge_eq *eq = &txq->eq;
+ struct txpkts *txp = &txq->txp;
+
+ if ((txp->npkt > 0 || avail < eq->sidx / 2) &&
+ atomic_cmpset_int(&eq->equiq, 0, 1)) {
+ wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
+ eq->equeqidx = eq->pidx;
+ } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) {
+ wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ);
+ eq->equeqidx = eq->pidx;
+ }
+}
+
/*
* r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to
* be consumed. Return the actual number consumed. 0 indicates a stall.
*/
static u_int
-eth_tx(struct mp_ring *r, u_int cidx, u_int pidx)
+eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing)
{
struct sge_txq *txq = r->cookie;
- struct sge_eq *eq = &txq->eq;
struct ifnet *ifp = txq->ifp;
+ struct sge_eq *eq = &txq->eq;
+ struct txpkts *txp = &txq->txp;
struct vi_info *vi = ifp->if_softc;
struct adapter *sc = vi->adapter;
u_int total, remaining; /* # of packets */
- u_int available, dbdiff; /* # of hardware descriptors */
- u_int n, next_cidx;
- struct mbuf *m0, *tail;
- struct txpkts txp;
- struct fw_eth_tx_pkts_wr *wr; /* any fw WR struct will do */
+ u_int n, avail, dbdiff; /* # of hardware descriptors */
+ int i, rc;
+ struct mbuf *m0;
+ bool snd;
+ void *wr; /* start of the last WR written to the ring */
- remaining = IDXDIFF(pidx, cidx, r->size);
- MPASS(remaining > 0); /* Must not be called without work to do. */
- total = 0;
+ TXQ_LOCK_ASSERT_OWNED(txq);
- TXQ_LOCK(txq);
+ remaining = IDXDIFF(pidx, cidx, r->size);
if (__predict_false(discard_tx(eq))) {
+ for (i = 0; i < txp->npkt; i++)
+ m_freem(txp->mb[i]);
+ txp->npkt = 0;
while (cidx != pidx) {
m0 = r->items[cidx];
m_freem(m0);
if (++cidx == r->size)
cidx = 0;
}
- reclaim_tx_descs(txq, 2048);
- total = remaining;
- goto done;
+ reclaim_tx_descs(txq, eq->sidx);
+ *coalescing = false;
+ return (remaining); /* emptied */
}
/* How many hardware descriptors do we have readily available. */
- if (eq->pidx == eq->cidx)
- available = eq->sidx - 1;
- else
- available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
- dbdiff = IDXDIFF(eq->pidx, eq->dbidx, eq->sidx);
+ if (eq->pidx == eq->cidx) {
+ avail = eq->sidx - 1;
+ if (txp->score++ >= 5)
+ txp->score = 5; /* tx is completely idle, reset. */
+ } else
+ avail = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
- while (remaining > 0) {
+ total = 0;
+ if (remaining == 0) {
+ if (txp->score-- == 1) /* egr_update had to drain txpkts */
+ txp->score = 1;
+ goto send_txpkts;
+ }
+ dbdiff = 0;
+ MPASS(remaining > 0);
+ while (remaining > 0) {
m0 = r->items[cidx];
M_ASSERTPKTHDR(m0);
MPASS(m0->m_nextpkt == NULL);
- if (available < tx_len16_to_desc(mbuf_len16(m0))) {
- available += reclaim_tx_descs(txq, 64);
- if (available < tx_len16_to_desc(mbuf_len16(m0)))
- break; /* out of descriptors */
+ if (avail < 2 * SGE_MAX_WR_NDESC)
+ avail += reclaim_tx_descs(txq, 64);
+
+ if (txp->npkt > 0 || remaining > 1 || txp->score > 3 ||
+ atomic_load_int(&txq->eq.equiq) != 0) {
+ if (sc->flags & IS_VF)
+ rc = add_to_txpkts_vf(sc, txq, m0, avail, &snd);
+ else
+ rc = add_to_txpkts_pf(sc, txq, m0, avail, &snd);
+ } else {
+ snd = false;
+ rc = EINVAL;
}
+ if (snd) {
+ MPASS(txp->npkt > 0);
+ for (i = 0; i < txp->npkt; i++)
+ ETHER_BPF_MTAP(ifp, txp->mb[i]);
+ if (txp->npkt > 1) {
+ if (txp->score++ >= 10)
+ txp->score = 10;
+ MPASS(avail >= tx_len16_to_desc(txp->len16));
+ if (sc->flags & IS_VF)
+ n = write_txpkts_vm_wr(sc, txq);
+ else
+ n = write_txpkts_wr(sc, txq);
+ } else {
+ MPASS(avail >=
+ tx_len16_to_desc(mbuf_len16(txp->mb[0])));
+ if (sc->flags & IS_VF)
+ n = write_txpkt_vm_wr(sc, txq,
+ txp->mb[0]);
+ else
+ n = write_txpkt_wr(sc, txq, txp->mb[0],
+ avail);
+ }
+ MPASS(n <= SGE_MAX_WR_NDESC);
+ avail -= n;
+ dbdiff += n;
+ wr = &eq->desc[eq->pidx];
+ IDXINCR(eq->pidx, n, eq->sidx);
+ txp->npkt = 0; /* emptied */
+ }
+ if (rc == 0) {
+ /* m0 was coalesced into txq->txpkts. */
+ goto next_mbuf;
+ }
+ if (rc == EAGAIN) {
+ /*
+ * m0 is suitable for tx coalescing but could not be
+ * combined with the existing txq->txpkts, which has now
+ * been transmitted. Start a new txpkts with m0.
+ */
+ MPASS(snd);
+ MPASS(txp->npkt == 0);
+ continue;
+ }
- next_cidx = cidx + 1;
- if (__predict_false(next_cidx == r->size))
- next_cidx = 0;
-
- wr = (void *)&eq->desc[eq->pidx];
+ MPASS(rc != 0 && rc != EAGAIN);
+ MPASS(txp->npkt == 0);
+ wr = &eq->desc[eq->pidx];
if (mbuf_cflags(m0) & MC_RAW_WR) {
- total++;
- remaining--;
- n = write_raw_wr(txq, (void *)wr, m0, available);
+ n = write_raw_wr(txq, wr, m0, avail);
#ifdef KERN_TLS
} else if (mbuf_cflags(m0) & MC_TLS) {
- total++;
- remaining--;
ETHER_BPF_MTAP(ifp, m0);
- n = t6_ktls_write_wr(txq,(void *)wr, m0,
- mbuf_nsegs(m0), available);
+ n = t6_ktls_write_wr(txq, wr, m0, mbuf_nsegs(m0),
+ avail);
#endif
- } else if (sc->flags & IS_VF) {
- total++;
- remaining--;
- ETHER_BPF_MTAP(ifp, m0);
- n = write_txpkt_vm_wr(sc, txq, (void *)wr, m0,
- available);
- } else if (remaining > 1 &&
- try_txpkts(m0, r->items[next_cidx], &txp, available) == 0) {
-
- /* pkts at cidx, next_cidx should both be in txp. */
- MPASS(txp.npkt == 2);
- tail = r->items[next_cidx];
- MPASS(tail->m_nextpkt == NULL);
- ETHER_BPF_MTAP(ifp, m0);
- ETHER_BPF_MTAP(ifp, tail);
- m0->m_nextpkt = tail;
-
- if (__predict_false(++next_cidx == r->size))
- next_cidx = 0;
-
- while (next_cidx != pidx) {
- if (add_to_txpkts(r->items[next_cidx], &txp,
- available) != 0)
- break;
- tail->m_nextpkt = r->items[next_cidx];
- tail = tail->m_nextpkt;
- ETHER_BPF_MTAP(ifp, tail);
- if (__predict_false(++next_cidx == r->size))
- next_cidx = 0;
- }
-
- n = write_txpkts_wr(sc, txq, wr, m0, &txp, available);
- total += txp.npkt;
- remaining -= txp.npkt;
} else {
- total++;
- remaining--;
- ETHER_BPF_MTAP(ifp, m0);
- n = write_txpkt_wr(sc, txq, (void *)wr, m0, available);
+ n = tx_len16_to_desc(mbuf_len16(m0));
+ if (__predict_false(avail < n)) {
+ avail += reclaim_tx_descs(txq, 32);
+ if (avail < n)
+ break; /* out of descriptors */
+ }
+ if (sc->flags & IS_VF)
+ n = write_txpkt_vm_wr(sc, txq, m0);
+ else
+ n = write_txpkt_wr(sc, txq, m0, avail);
}
- MPASS(n >= 1 && n <= available);
+ MPASS(n >= 1 && n <= avail);
if (!(mbuf_cflags(m0) & MC_TLS))
MPASS(n <= SGE_MAX_WR_NDESC);
- available -= n;
+ avail -= n;
dbdiff += n;
IDXINCR(eq->pidx, n, eq->sidx);
- if (wr_can_update_eq(wr)) {
- if (total_available_tx_desc(eq) < eq->sidx / 4 &&
- atomic_cmpset_int(&eq->equiq, 0, 1)) {
- wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ |
- F_FW_WR_EQUEQ);
- eq->equeqidx = eq->pidx;
- } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >=
- 32) {
- wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ);
- eq->equeqidx = eq->pidx;
- }
- }
-
- if (dbdiff >= 16 && remaining >= 4) {
+ if (dbdiff >= 512 / EQ_ESIZE) { /* X_FETCHBURSTMAX_512B */
+ if (wr_can_update_eq(wr))
+ set_txupdate_flags(txq, avail, wr);
ring_eq_db(sc, eq, dbdiff);
- available += reclaim_tx_descs(txq, 4 * dbdiff);
+ avail += reclaim_tx_descs(txq, 32);
dbdiff = 0;
}
-
- cidx = next_cidx;
+next_mbuf:
+ total++;
+ remaining--;
+ if (__predict_false(++cidx == r->size))
+ cidx = 0;
}
if (dbdiff != 0) {
+ if (wr_can_update_eq(wr))
+ set_txupdate_flags(txq, avail, wr);
ring_eq_db(sc, eq, dbdiff);
reclaim_tx_descs(txq, 32);
+ } else if (eq->pidx == eq->cidx && txp->npkt > 0 &&
+ atomic_load_int(&txq->eq.equiq) == 0) {
+ /*
+ * If nothing was submitted to the chip for tx (it was coalesced
+ * into txpkts instead) and there is no tx update outstanding
+ * then we need to send txpkts now.
+ */
+send_txpkts:
+ MPASS(txp->npkt > 0);
+ for (i = 0; i < txp->npkt; i++)
+ ETHER_BPF_MTAP(ifp, txp->mb[i]);
+ if (txp->npkt > 1) {
+ MPASS(avail >= tx_len16_to_desc(txp->len16));
+ if (sc->flags & IS_VF)
+ n = write_txpkts_vm_wr(sc, txq);
+ else
+ n = write_txpkts_wr(sc, txq);
+ } else {
+ MPASS(avail >=
+ tx_len16_to_desc(mbuf_len16(txp->mb[0])));
+ if (sc->flags & IS_VF)
+ n = write_txpkt_vm_wr(sc, txq, txp->mb[0]);
+ else
+ n = write_txpkt_wr(sc, txq, txp->mb[0], avail);
+ }
+ MPASS(n <= SGE_MAX_WR_NDESC);
+ wr = &eq->desc[eq->pidx];
+ IDXINCR(eq->pidx, n, eq->sidx);
+ txp->npkt = 0; /* emptied */
+
+ MPASS(wr_can_update_eq(wr));
+ set_txupdate_flags(txq, avail - n, wr);
+ ring_eq_db(sc, eq, n);
+ reclaim_tx_descs(txq, 32);
}
-done:
- TXQ_UNLOCK(txq);
+ *coalescing = txp->npkt > 0;
return (total);
}
@@ -4106,11 +4169,12 @@
struct port_info *pi = vi->pi;
struct adapter *sc = pi->adapter;
struct sge_eq *eq = &txq->eq;
+ struct txpkts *txp;
char name[16];
struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, can_resume_eth_tx,
- M_CXGBE, M_WAITOK);
+ M_CXGBE, &eq->eq_lock, M_WAITOK);
if (rc != 0) {
device_printf(sc->dev, "failed to allocate mp_ring: %d\n", rc);
return (rc);
@@ -4147,6 +4211,12 @@
txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE,
M_ZERO | M_WAITOK);
+ txp = &txq->txp;
+ txp->score = 5;
+ MPASS(nitems(txp->mb) >= sc->params.max_pkts_per_eth_tx_pkts_wr);
+ txq->txp.max_npkt = min(nitems(txp->mb),
+ sc->params.max_pkts_per_eth_tx_pkts_wr);
+
snprintf(name, sizeof(name), "%d", idx);
oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name,
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "tx queue");
@@ -4242,26 +4312,8 @@
"# of NIC TLS sessions using AES-GCM");
}
#endif
+ mp_ring_sysctls(txq->r, &vi->ctx, children);
- SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues",
- CTLFLAG_RD, &txq->r->enqueues,
- "# of enqueues to the mp_ring for this queue");
- SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_drops",
- CTLFLAG_RD, &txq->r->drops,
- "# of drops in the mp_ring for this queue");
- SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_starts",
- CTLFLAG_RD, &txq->r->starts,
- "# of normal consumer starts in the mp_ring for this queue");
- SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_stalls",
- CTLFLAG_RD, &txq->r->stalls,
- "# of consumer stalls in the mp_ring for this queue");
- SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_restarts",
- CTLFLAG_RD, &txq->r->restarts,
- "# of consumer restarts in the mp_ring for this queue");
- SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_abdications",
- CTLFLAG_RD, &txq->r->abdications,
- "# of consumer abdications in the mp_ring for this queue");
-
return (0);
}
@@ -4655,10 +4707,10 @@
* The return value is the # of hardware descriptors used.
*/
static u_int
-write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq,
- struct fw_eth_tx_pkt_vm_wr *wr, struct mbuf *m0, u_int available)
+write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0)
{
- struct sge_eq *eq = &txq->eq;
+ struct sge_eq *eq;
+ struct fw_eth_tx_pkt_vm_wr *wr;
struct tx_sdesc *txsd;
struct cpl_tx_pkt_core *cpl;
uint32_t ctrl; /* used in many unrelated places */
@@ -4668,7 +4720,6 @@
TXQ_LOCK_ASSERT_OWNED(txq);
M_ASSERTPKTHDR(m0);
- MPASS(available > 0 && available < eq->sidx);
len16 = mbuf_len16(m0);
nsegs = mbuf_nsegs(m0);
@@ -4677,10 +4728,10 @@
if (needs_tso(m0))
ctrl += sizeof(struct cpl_tx_pkt_lso_core);
ndesc = tx_len16_to_desc(len16);
- MPASS(ndesc <= available);
/* Firmware work request header */
- MPASS(wr == (void *)&eq->desc[eq->pidx]);
+ eq = &txq->eq;
+ wr = (void *)&eq->desc[eq->pidx];
wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) |
V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
@@ -4760,7 +4811,6 @@
} else
write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx);
txq->sgl_wrs++;
-
txq->txpkt_wrs++;
txsd = &txq->sdesc[eq->pidx];
@@ -4811,10 +4861,11 @@
* The return value is the # of hardware descriptors used.
*/
static u_int
-write_txpkt_wr(struct adapter *sc, struct sge_txq *txq,
- struct fw_eth_tx_pkt_wr *wr, struct mbuf *m0, u_int available)
+write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0,
+ u_int available)
{
- struct sge_eq *eq = &txq->eq;
+ struct sge_eq *eq;
+ struct fw_eth_tx_pkt_wr *wr;
struct tx_sdesc *txsd;
struct cpl_tx_pkt_core *cpl;
uint32_t ctrl; /* used in many unrelated places */
@@ -4824,7 +4875,6 @@
TXQ_LOCK_ASSERT_OWNED(txq);
M_ASSERTPKTHDR(m0);
- MPASS(available > 0 && available < eq->sidx);
len16 = mbuf_len16(m0);
nsegs = mbuf_nsegs(m0);
@@ -4844,7 +4894,8 @@
MPASS(ndesc <= available);
/* Firmware work request header */
- MPASS(wr == (void *)&eq->desc[eq->pidx]);
+ eq = &txq->eq;
+ wr = (void *)&eq->desc[eq->pidx];
wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
@@ -4927,71 +4978,151 @@
return (ndesc);
}
+static inline bool
+cmp_l2hdr(struct txpkts *txp, struct mbuf *m)
+{
+ int len;
+
+ MPASS(txp->npkt > 0);
+ MPASS(m->m_len >= 16); /* type1 implies 1 GL with all of the frame. */
+
+ if (txp->ethtype == be16toh(ETHERTYPE_VLAN))
+ len = sizeof(struct ether_vlan_header);
+ else
+ len = sizeof(struct ether_header);
+
+ return (memcmp(m->m_data, &txp->ethmacdst[0], len) != 0);
+}
+
+static inline void
+save_l2hdr(struct txpkts *txp, struct mbuf *m)
+{
+ MPASS(m->m_len >= 16); /* type1 implies 1 GL with all of the frame. */
+
+ memcpy(&txp->ethmacdst[0], mtod(m, const void *), 16);
+}
+
static int
-try_txpkts(struct mbuf *m, struct mbuf *n, struct txpkts *txp, u_int available)
+add_to_txpkts_vf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m,
+ int avail, bool *send)
{
- u_int needed, nsegs1, nsegs2, l1, l2;
+ struct txpkts *txp = &txq->txp;
- if (cannot_use_txpkts(m) || cannot_use_txpkts(n))
- return (1);
+ MPASS(sc->flags & IS_VF);
- nsegs1 = mbuf_nsegs(m);
- nsegs2 = mbuf_nsegs(n);
- if (nsegs1 + nsegs2 == 2) {
- txp->wr_type = 1;
- l1 = l2 = txpkts1_len16();
- } else {
- txp->wr_type = 0;
- l1 = txpkts0_len16(nsegs1);
- l2 = txpkts0_len16(nsegs2);
+ /* Cannot have TSO and coalesce at the same time. */
+ if (cannot_use_txpkts(m)) {
+cannot_coalesce:
+ *send = txp->npkt > 0;
+ return (EINVAL);
}
- txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + l1 + l2;
- needed = tx_len16_to_desc(txp->len16);
- if (needed > SGE_MAX_WR_NDESC || needed > available)
- return (1);
- txp->plen = m->m_pkthdr.len + n->m_pkthdr.len;
- if (txp->plen > 65535)
- return (1);
+ /* VF allows coalescing of type 1 (1 GL) only */
+ if (mbuf_nsegs(m) > 1)
+ goto cannot_coalesce;
- txp->npkt = 2;
- set_mbuf_len16(m, l1);
- set_mbuf_len16(n, l2);
+ *send = false;
+ if (txp->npkt > 0) {
+ MPASS(tx_len16_to_desc(txp->len16) <= avail);
+ MPASS(txp->npkt < txp->max_npkt);
+ MPASS(txp->wr_type == 1); /* VF supports type 1 only */
+ if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) > avail) {
+retry_after_send:
+ *send = true;
+ return (EAGAIN);
+ }
+ if (m->m_pkthdr.len + txp->plen > 65535)
+ goto retry_after_send;
+ if (cmp_l2hdr(txp, m))
+ goto retry_after_send;
+
+ txp->len16 += txpkts1_len16();
+ txp->plen += m->m_pkthdr.len;
+ txp->mb[txp->npkt++] = m;
+ if (txp->npkt == txp->max_npkt)
+ *send = true;
+ } else {
+ txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_vm_wr), 16) +
+ txpkts1_len16();
+ if (tx_len16_to_desc(txp->len16) > avail)
+ goto cannot_coalesce;
+ txp->npkt = 1;
+ txp->wr_type = 1;
+ txp->plen = m->m_pkthdr.len;
+ txp->mb[0] = m;
+ save_l2hdr(txp, m);
+ }
return (0);
}
static int
-add_to_txpkts(struct mbuf *m, struct txpkts *txp, u_int available)
+add_to_txpkts_pf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m,
+ int avail, bool *send)
{
- u_int plen, len16, needed, nsegs;
+ struct txpkts *txp = &txq->txp;
+ int nsegs;
- MPASS(txp->wr_type == 0 || txp->wr_type == 1);
+ MPASS(!(sc->flags & IS_VF));
- if (cannot_use_txpkts(m))
- return (1);
+ /* Cannot have TSO and coalesce at the same time. */
+ if (cannot_use_txpkts(m)) {
+cannot_coalesce:
+ *send = txp->npkt > 0;
+ return (EINVAL);
+ }
+ *send = false;
nsegs = mbuf_nsegs(m);
- if (txp->wr_type == 1 && nsegs != 1)
- return (1);
+ if (txp->npkt == 0) {
+ if (m->m_pkthdr.len > 65535)
+ goto cannot_coalesce;
+ if (nsegs > 1) {
+ txp->wr_type = 0;
+ txp->len16 =
+ howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) +
+ txpkts0_len16(nsegs);
+ } else {
+ txp->wr_type = 1;
+ txp->len16 =
+ howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) +
+ txpkts1_len16();
+ }
+ if (tx_len16_to_desc(txp->len16) > avail)
+ goto cannot_coalesce;
+ txp->npkt = 1;
+ txp->plen = m->m_pkthdr.len;
+ txp->mb[0] = m;
+ } else {
+ MPASS(tx_len16_to_desc(txp->len16) <= avail);
+ MPASS(txp->npkt < txp->max_npkt);
- plen = txp->plen + m->m_pkthdr.len;
- if (plen > 65535)
- return (1);
+ if (m->m_pkthdr.len + txp->plen > 65535) {
+retry_after_send:
+ *send = true;
+ return (EAGAIN);
+ }
- if (txp->wr_type == 0)
- len16 = txpkts0_len16(nsegs);
- else
- len16 = txpkts1_len16();
- needed = tx_len16_to_desc(txp->len16 + len16);
- if (needed > SGE_MAX_WR_NDESC || needed > available)
- return (1);
+ MPASS(txp->wr_type == 0 || txp->wr_type == 1);
+ if (txp->wr_type == 0) {
+ if (tx_len16_to_desc(txp->len16 +
+ txpkts0_len16(nsegs)) > min(avail, SGE_MAX_WR_NDESC))
+ goto retry_after_send;
+ txp->len16 += txpkts0_len16(nsegs);
+ } else {
+ if (nsegs != 1)
+ goto retry_after_send;
+ if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) >
+ avail)
+ goto retry_after_send;
+ txp->len16 += txpkts1_len16();
+ }
- txp->npkt++;
- txp->plen = plen;
- txp->len16 += len16;
- set_mbuf_len16(m, len16);
-
+ txp->plen += m->m_pkthdr.len;
+ txp->mb[txp->npkt++] = m;
+ if (txp->npkt == txp->max_npkt)
+ *send = true;
+ }
return (0);
}
@@ -5003,34 +5134,25 @@
* The return value is the # of hardware descriptors used.
*/
static u_int
-write_txpkts_wr(struct adapter *sc, struct sge_txq *txq,
- struct fw_eth_tx_pkts_wr *wr, struct mbuf *m0, const struct txpkts *txp,
- u_int available)
+write_txpkts_wr(struct adapter *sc, struct sge_txq *txq)
{
+ const struct txpkts *txp = &txq->txp;
struct sge_eq *eq = &txq->eq;
+ struct fw_eth_tx_pkts_wr *wr;
struct tx_sdesc *txsd;
struct cpl_tx_pkt_core *cpl;
- uint32_t ctrl;
uint64_t ctrl1;
- int ndesc, checkwrap;
- struct mbuf *m;
+ int ndesc, i, checkwrap;
+ struct mbuf *m, *last;
void *flitp;
TXQ_LOCK_ASSERT_OWNED(txq);
MPASS(txp->npkt > 0);
- MPASS(txp->plen < 65536);
- MPASS(m0 != NULL);
- MPASS(m0->m_nextpkt != NULL);
MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16));
- MPASS(available > 0 && available < eq->sidx);
- ndesc = tx_len16_to_desc(txp->len16);
- MPASS(ndesc <= available);
-
- MPASS(wr == (void *)&eq->desc[eq->pidx]);
+ wr = (void *)&eq->desc[eq->pidx];
wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
- ctrl = V_FW_WR_LEN16(txp->len16);
- wr->equiq_to_len16 = htobe32(ctrl);
+ wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16));
wr->plen = htobe16(txp->plen);
wr->npkt = txp->npkt;
wr->r3 = 0;
@@ -5042,8 +5164,11 @@
* set then we know the WR is going to wrap around somewhere. We'll
* check for that at appropriate points.
*/
+ ndesc = tx_len16_to_desc(txp->len16);
+ last = NULL;
checkwrap = eq->sidx - ndesc < eq->pidx;
- for (m = m0; m != NULL; m = m->m_nextpkt) {
+ for (i = 0; i < txp->npkt; i++) {
+ m = txp->mb[i];
if (txp->wr_type == 0) {
struct ulp_txpkt *ulpmc;
struct ulptx_idata *ulpsc;
@@ -5052,7 +5177,7 @@
ulpmc = flitp;
ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) |
V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid));
- ulpmc->len = htobe32(mbuf_len16(m));
+ ulpmc->len = htobe32(txpkts0_len16(mbuf_nsegs(m)));
/* ULP subcommand */
ulpsc = (void *)(ulpmc + 1);
@@ -5093,8 +5218,12 @@
write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap);
+ if (last != NULL)
+ last->m_nextpkt = m;
+ last = m;
}
+ txq->sgl_wrs++;
if (txp->wr_type == 0) {
txq->txpkts0_pkts += txp->npkt;
txq->txpkts0_wrs++;
@@ -5104,12 +5233,92 @@
}
txsd = &txq->sdesc[eq->pidx];
- txsd->m = m0;
+ txsd->m = txp->mb[0];
txsd->desc_used = ndesc;
return (ndesc);
}
+static u_int
+write_txpkts_vm_wr(struct adapter *sc, struct sge_txq *txq)
+{
+ const struct txpkts *txp = &txq->txp;
+ struct sge_eq *eq = &txq->eq;
+ struct fw_eth_tx_pkts_vm_wr *wr;
+ struct tx_sdesc *txsd;
+ struct cpl_tx_pkt_core *cpl;
+ uint64_t ctrl1;
+ int ndesc, i;
+ struct mbuf *m, *last;
+ void *flitp;
+
+ TXQ_LOCK_ASSERT_OWNED(txq);
+ MPASS(txp->npkt > 0);
+ MPASS(txp->wr_type == 1); /* VF supports type 1 only */
+ MPASS(txp->mb[0] != NULL);
+ MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16));
+
+ wr = (void *)&eq->desc[eq->pidx];
+ wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_VM_WR));
+ wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16));
+ wr->r3 = 0;
+ wr->plen = htobe16(txp->plen);
+ wr->npkt = txp->npkt;
+ wr->r4 = 0;
+ memcpy(&wr->ethmacdst[0], &txp->ethmacdst[0], 16);
+ flitp = wr + 1;
+
+ /*
+ * At this point we are 32B into a hardware descriptor. Each mbuf in
+ * the WR will take 32B so we check for the end of the descriptor ring
+ * before writing odd mbufs (mb[1], 3, 5, ..)
+ */
+ ndesc = tx_len16_to_desc(txp->len16);
+ last = NULL;
+ for (i = 0; i < txp->npkt; i++) {
+ m = txp->mb[i];
+ if (i & 1 && (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx])
+ flitp = &eq->desc[0];
+ cpl = flitp;
+
+ /* Checksum offload */
+ ctrl1 = csum_to_ctrl(sc, m);
+ if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS))
+ txq->txcsum++; /* some hardware assistance provided */
+
+ /* VLAN tag insertion */
+ if (needs_vlan_insertion(m)) {
+ ctrl1 |= F_TXPKT_VLAN_VLD |
+ V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
+ txq->vlan_insertion++;
+ }
+
+ /* CPL header */
+ cpl->ctrl0 = txq->cpl_ctrl0;
+ cpl->pack = 0;
+ cpl->len = htobe16(m->m_pkthdr.len);
+ cpl->ctrl1 = htobe64(ctrl1);
+
+ flitp = cpl + 1;
+ MPASS(mbuf_nsegs(m) == 1);
+ write_gl_to_txd(txq, m, (caddr_t *)(&flitp), 0);
+
+ if (last != NULL)
+ last->m_nextpkt = m;
+ last = m;
+ }
+
+ txq->sgl_wrs++;
+ txq->txpkts1_pkts += txp->npkt;
+ txq->txpkts1_wrs++;
+
+ txsd = &txq->sdesc[eq->pidx];
+ txsd->m = txp->mb[0];
+ txsd->desc_used = ndesc;
+
+ return (ndesc);
+}
+
/*
* If the SGL ends on an address that is not 16 byte aligned, this function will
* add a 0 filled flit at the end.
@@ -5444,8 +5653,10 @@
MPASS((eq->flags & EQ_TYPEMASK) == EQ_ETH);
atomic_readandclear_int(&eq->equiq);
- mp_ring_check_drainage(txq->r, 0);
- taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task);
+ if (mp_ring_is_idle(txq->r))
+ taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task);
+ else
+ mp_ring_check_drainage(txq->r, 64);
}
static int
Index: head/sys/dev/cxgbe/t4_vf.c
===================================================================
--- head/sys/dev/cxgbe/t4_vf.c
+++ head/sys/dev/cxgbe/t4_vf.c
@@ -231,6 +231,7 @@
get_params__post_init(struct adapter *sc)
{
int rc;
+ uint32_t param, val;
rc = -t4vf_get_sge_params(sc);
if (rc != 0) {
@@ -281,6 +282,13 @@
return (EINVAL);
}
sc->params.portvec = sc->params.vfres.pmask;
+
+ param = FW_PARAM_PFVF(MAX_PKTS_PER_ETH_TX_PKTS_WR);
+ rc = -t4vf_query_params(sc, 1, &param, &val);
+ if (rc == 0)
+ sc->params.max_pkts_per_eth_tx_pkts_wr = val;
+ else
+ sc->params.max_pkts_per_eth_tx_pkts_wr = 14;
return (0);
}

File Metadata

Mime Type
text/plain
Expires
Thu, Nov 20, 6:16 AM (6 h, 36 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25711521
Default Alt Text
D25454.diff (47 KB)

Event Timeline