Changeset View
Changeset View
Standalone View
Standalone View
head/sys/dev/cxgbe/t4_sge.c
Show First 20 Lines • Show All 76 Lines • ▼ Show 20 Lines | |||||
#include "t4_mp_ring.h" | #include "t4_mp_ring.h" | ||||
#ifdef T4_PKT_TIMESTAMP | #ifdef T4_PKT_TIMESTAMP | ||||
#define RX_COPY_THRESHOLD (MINCLSIZE - 8) | #define RX_COPY_THRESHOLD (MINCLSIZE - 8) | ||||
#else | #else | ||||
#define RX_COPY_THRESHOLD MINCLSIZE | #define RX_COPY_THRESHOLD MINCLSIZE | ||||
#endif | #endif | ||||
/* Internal mbuf flags stored in PH_loc.eight[1]. */ | |||||
#define MC_RAW_WR 0x02 | |||||
/* | /* | ||||
* Ethernet frames are DMA'd at this byte offset into the freelist buffer. | * Ethernet frames are DMA'd at this byte offset into the freelist buffer. | ||||
* 0-7 are valid values. | * 0-7 are valid values. | ||||
*/ | */ | ||||
static int fl_pktshift = 0; | static int fl_pktshift = 0; | ||||
TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift); | TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift); | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 172 Lines • ▼ Show 20 Lines | |||||
static void find_safe_refill_source(struct adapter *, struct sge_fl *); | static void find_safe_refill_source(struct adapter *, struct sge_fl *); | ||||
static void add_fl_to_sfl(struct adapter *, struct sge_fl *); | static void add_fl_to_sfl(struct adapter *, struct sge_fl *); | ||||
static inline void get_pkt_gl(struct mbuf *, struct sglist *); | static inline void get_pkt_gl(struct mbuf *, struct sglist *); | ||||
static inline u_int txpkt_len16(u_int, u_int); | static inline u_int txpkt_len16(u_int, u_int); | ||||
static inline u_int txpkt_vm_len16(u_int, u_int); | static inline u_int txpkt_vm_len16(u_int, u_int); | ||||
static inline u_int txpkts0_len16(u_int); | static inline u_int txpkts0_len16(u_int); | ||||
static inline u_int txpkts1_len16(void); | static inline u_int txpkts1_len16(void); | ||||
static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int); | |||||
static u_int write_txpkt_wr(struct sge_txq *, struct fw_eth_tx_pkt_wr *, | static u_int write_txpkt_wr(struct sge_txq *, struct fw_eth_tx_pkt_wr *, | ||||
struct mbuf *, u_int); | struct mbuf *, u_int); | ||||
static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *, | static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *, | ||||
struct fw_eth_tx_pkt_vm_wr *, struct mbuf *, u_int); | struct fw_eth_tx_pkt_vm_wr *, struct mbuf *, u_int); | ||||
static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int); | static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int); | ||||
static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int); | static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int); | ||||
static u_int write_txpkts_wr(struct sge_txq *, struct fw_eth_tx_pkts_wr *, | static u_int write_txpkts_wr(struct sge_txq *, struct fw_eth_tx_pkts_wr *, | ||||
struct mbuf *, const struct txpkts *, u_int); | struct mbuf *, const struct txpkts *, u_int); | ||||
▲ Show 20 Lines • Show All 1,910 Lines • ▼ Show 20 Lines | |||||
set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) | set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) | ||||
{ | { | ||||
M_ASSERTPKTHDR(m); | M_ASSERTPKTHDR(m); | ||||
m->m_pkthdr.l5hlen = nsegs; | m->m_pkthdr.l5hlen = nsegs; | ||||
} | } | ||||
static inline int | static inline int | ||||
mbuf_cflags(struct mbuf *m) | |||||
{ | |||||
M_ASSERTPKTHDR(m); | |||||
return (m->m_pkthdr.PH_loc.eight[4]); | |||||
} | |||||
static inline void | |||||
set_mbuf_cflags(struct mbuf *m, uint8_t flags) | |||||
{ | |||||
M_ASSERTPKTHDR(m); | |||||
m->m_pkthdr.PH_loc.eight[4] = flags; | |||||
} | |||||
static inline int | |||||
mbuf_len16(struct mbuf *m) | mbuf_len16(struct mbuf *m) | ||||
{ | { | ||||
int n; | int n; | ||||
M_ASSERTPKTHDR(m); | M_ASSERTPKTHDR(m); | ||||
n = m->m_pkthdr.PH_loc.eight[0]; | n = m->m_pkthdr.PH_loc.eight[0]; | ||||
MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); | MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); | ||||
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | |||||
static inline int | static inline int | ||||
needs_eo(struct mbuf *m) | needs_eo(struct mbuf *m) | ||||
{ | { | ||||
return (m->m_pkthdr.snd_tag != NULL); | return (m->m_pkthdr.snd_tag != NULL); | ||||
} | } | ||||
#endif | #endif | ||||
/* | |||||
* Try to allocate an mbuf to contain a raw work request. To make it | |||||
* easy to construct the work request, don't allocate a chain but a | |||||
* single mbuf. | |||||
*/ | |||||
struct mbuf * | |||||
alloc_wr_mbuf(int len, int how) | |||||
{ | |||||
struct mbuf *m; | |||||
if (len <= MHLEN) | |||||
m = m_gethdr(how, MT_DATA); | |||||
else if (len <= MCLBYTES) | |||||
m = m_getcl(how, MT_DATA, M_PKTHDR); | |||||
else | |||||
m = NULL; | |||||
if (m == NULL) | |||||
return (NULL); | |||||
m->m_pkthdr.len = len; | |||||
m->m_len = len; | |||||
set_mbuf_cflags(m, MC_RAW_WR); | |||||
set_mbuf_len16(m, howmany(len, 16)); | |||||
return (m); | |||||
} | |||||
static inline int | static inline int | ||||
needs_tso(struct mbuf *m) | needs_tso(struct mbuf *m) | ||||
{ | { | ||||
M_ASSERTPKTHDR(m); | M_ASSERTPKTHDR(m); | ||||
return (m->m_pkthdr.csum_flags & CSUM_TSO); | return (m->m_pkthdr.csum_flags & CSUM_TSO); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 157 Lines • ▼ Show 20 Lines | if (m0 == NULL) { | ||||
/* Should have left well enough alone. */ | /* Should have left well enough alone. */ | ||||
rc = EFBIG; | rc = EFBIG; | ||||
goto fail; | goto fail; | ||||
} | } | ||||
*mp = m0; /* update caller's copy after pullup */ | *mp = m0; /* update caller's copy after pullup */ | ||||
goto restart; | goto restart; | ||||
} | } | ||||
set_mbuf_nsegs(m0, nsegs); | set_mbuf_nsegs(m0, nsegs); | ||||
set_mbuf_cflags(m0, 0); | |||||
if (sc->flags & IS_VF) | if (sc->flags & IS_VF) | ||||
set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0))); | set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0))); | ||||
else | else | ||||
set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0))); | set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0))); | ||||
#ifdef RATELIMIT | #ifdef RATELIMIT | ||||
/* | /* | ||||
* Ethofld is limited to TCP and UDP for now, and only when L4 hw | * Ethofld is limited to TCP and UDP for now, and only when L4 hw | ||||
▲ Show 20 Lines • Show All 226 Lines • ▼ Show 20 Lines | can_resume_eth_tx(struct mp_ring *r) | ||||
return (total_available_tx_desc(eq) > eq->sidx / 8); | return (total_available_tx_desc(eq) > eq->sidx / 8); | ||||
} | } | ||||
static inline int | static inline int | ||||
cannot_use_txpkts(struct mbuf *m) | cannot_use_txpkts(struct mbuf *m) | ||||
{ | { | ||||
/* maybe put a GL limit too, to avoid silliness? */ | /* maybe put a GL limit too, to avoid silliness? */ | ||||
return (needs_tso(m)); | return (needs_tso(m) || (mbuf_cflags(m) & MC_RAW_WR) != 0); | ||||
} | } | ||||
static inline int | static inline int | ||||
discard_tx(struct sge_eq *eq) | discard_tx(struct sge_eq *eq) | ||||
{ | { | ||||
return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED); | return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED); | ||||
} | } | ||||
static inline int | |||||
wr_can_update_eq(struct fw_eth_tx_pkts_wr *wr) | |||||
{ | |||||
switch (G_FW_WR_OP(be32toh(wr->op_pkd))) { | |||||
case FW_ULPTX_WR: | |||||
case FW_ETH_TX_PKT_WR: | |||||
case FW_ETH_TX_PKTS_WR: | |||||
case FW_ETH_TX_PKT_VM_WR: | |||||
return (1); | |||||
default: | |||||
return (0); | |||||
} | |||||
} | |||||
/* | /* | ||||
* r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to | * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to | ||||
* be consumed. Return the actual number consumed. 0 indicates a stall. | * be consumed. Return the actual number consumed. 0 indicates a stall. | ||||
*/ | */ | ||||
static u_int | static u_int | ||||
eth_tx(struct mp_ring *r, u_int cidx, u_int pidx) | eth_tx(struct mp_ring *r, u_int cidx, u_int pidx) | ||||
{ | { | ||||
struct sge_txq *txq = r->cookie; | struct sge_txq *txq = r->cookie; | ||||
▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines | if (sc->flags & IS_VF) { | ||||
ETHER_BPF_MTAP(ifp, tail); | ETHER_BPF_MTAP(ifp, tail); | ||||
if (__predict_false(++next_cidx == r->size)) | if (__predict_false(++next_cidx == r->size)) | ||||
next_cidx = 0; | next_cidx = 0; | ||||
} | } | ||||
n = write_txpkts_wr(txq, wr, m0, &txp, available); | n = write_txpkts_wr(txq, wr, m0, &txp, available); | ||||
total += txp.npkt; | total += txp.npkt; | ||||
remaining -= txp.npkt; | remaining -= txp.npkt; | ||||
} else if (mbuf_cflags(m0) & MC_RAW_WR) { | |||||
total++; | |||||
remaining--; | |||||
n = write_raw_wr(txq, (void *)wr, m0, available); | |||||
} else { | } else { | ||||
total++; | total++; | ||||
remaining--; | remaining--; | ||||
ETHER_BPF_MTAP(ifp, m0); | ETHER_BPF_MTAP(ifp, m0); | ||||
n = write_txpkt_wr(txq, (void *)wr, m0, available); | n = write_txpkt_wr(txq, (void *)wr, m0, available); | ||||
} | } | ||||
MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC); | MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC); | ||||
available -= n; | available -= n; | ||||
dbdiff += n; | dbdiff += n; | ||||
IDXINCR(eq->pidx, n, eq->sidx); | IDXINCR(eq->pidx, n, eq->sidx); | ||||
if (wr_can_update_eq(wr)) { | |||||
if (total_available_tx_desc(eq) < eq->sidx / 4 && | if (total_available_tx_desc(eq) < eq->sidx / 4 && | ||||
atomic_cmpset_int(&eq->equiq, 0, 1)) { | atomic_cmpset_int(&eq->equiq, 0, 1)) { | ||||
wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | | wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | | ||||
F_FW_WR_EQUEQ); | F_FW_WR_EQUEQ); | ||||
eq->equeqidx = eq->pidx; | eq->equeqidx = eq->pidx; | ||||
} else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { | } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= | ||||
32) { | |||||
wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); | wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); | ||||
eq->equeqidx = eq->pidx; | eq->equeqidx = eq->pidx; | ||||
} | } | ||||
} | |||||
if (dbdiff >= 16 && remaining >= 4) { | if (dbdiff >= 16 && remaining >= 4) { | ||||
ring_eq_db(sc, eq, dbdiff); | ring_eq_db(sc, eq, dbdiff); | ||||
available += reclaim_tx_descs(txq, 4 * dbdiff); | available += reclaim_tx_descs(txq, 4 * dbdiff); | ||||
dbdiff = 0; | dbdiff = 0; | ||||
} | } | ||||
cidx = next_cidx; | cidx = next_cidx; | ||||
▲ Show 20 Lines • Show All 1,177 Lines • ▼ Show 20 Lines | SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_wrs", | ||||
CTLFLAG_RD, &txq->txpkts1_wrs, | CTLFLAG_RD, &txq->txpkts1_wrs, | ||||
"# of txpkts (type 1) work requests"); | "# of txpkts (type 1) work requests"); | ||||
SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts", | SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts", | ||||
CTLFLAG_RD, &txq->txpkts0_pkts, | CTLFLAG_RD, &txq->txpkts0_pkts, | ||||
"# of frames tx'd using type0 txpkts work requests"); | "# of frames tx'd using type0 txpkts work requests"); | ||||
SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts", | SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts", | ||||
CTLFLAG_RD, &txq->txpkts1_pkts, | CTLFLAG_RD, &txq->txpkts1_pkts, | ||||
"# of frames tx'd using type1 txpkts work requests"); | "# of frames tx'd using type1 txpkts work requests"); | ||||
SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD, | |||||
&txq->raw_wrs, "# of raw work requests (non-packets)"); | |||||
SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues", | SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues", | ||||
CTLFLAG_RD, &txq->r->enqueues, | CTLFLAG_RD, &txq->r->enqueues, | ||||
"# of enqueues to the mp_ring for this queue"); | "# of enqueues to the mp_ring for this queue"); | ||||
SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_drops", | SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_drops", | ||||
CTLFLAG_RD, &txq->r->drops, | CTLFLAG_RD, &txq->r->drops, | ||||
"# of drops in the mp_ring for this queue"); | "# of drops in the mp_ring for this queue"); | ||||
SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_starts", | SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_starts", | ||||
▲ Show 20 Lines • Show All 522 Lines • ▼ Show 20 Lines | #endif | ||||
if (dst == (void *)&eq->desc[eq->sidx]) { | if (dst == (void *)&eq->desc[eq->sidx]) { | ||||
dst = (void *)&eq->desc[0]; | dst = (void *)&eq->desc[0]; | ||||
write_gl_to_txd(txq, m0, &dst, 0); | write_gl_to_txd(txq, m0, &dst, 0); | ||||
} else | } else | ||||
write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); | write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); | ||||
txq->sgl_wrs++; | txq->sgl_wrs++; | ||||
txq->txpkt_wrs++; | txq->txpkt_wrs++; | ||||
txsd = &txq->sdesc[eq->pidx]; | |||||
txsd->m = m0; | |||||
txsd->desc_used = ndesc; | |||||
return (ndesc); | |||||
} | |||||
/* | |||||
* Write a raw WR to the hardware descriptors, update the software | |||||
* descriptor, and advance the pidx. It is guaranteed that enough | |||||
* descriptors are available. | |||||
* | |||||
* The return value is the # of hardware descriptors used. | |||||
*/ | |||||
static u_int | |||||
write_raw_wr(struct sge_txq *txq, void *wr, struct mbuf *m0, u_int available) | |||||
{ | |||||
struct sge_eq *eq = &txq->eq; | |||||
struct tx_sdesc *txsd; | |||||
struct mbuf *m; | |||||
caddr_t dst; | |||||
int len16, ndesc; | |||||
len16 = mbuf_len16(m0); | |||||
ndesc = howmany(len16, EQ_ESIZE / 16); | |||||
MPASS(ndesc <= available); | |||||
dst = wr; | |||||
for (m = m0; m != NULL; m = m->m_next) | |||||
copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); | |||||
txq->raw_wrs++; | |||||
txsd = &txq->sdesc[eq->pidx]; | txsd = &txq->sdesc[eq->pidx]; | ||||
txsd->m = m0; | txsd->m = m0; | ||||
txsd->desc_used = ndesc; | txsd->desc_used = ndesc; | ||||
return (ndesc); | return (ndesc); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 1,361 Lines • Show Last 20 Lines |