diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -619,6 +619,48 @@ __func__, nsegs, start, stop)); } +bool +t4_push_raw_wr(struct adapter *sc, struct toepcb *toep, struct mbuf *m) +{ +#ifdef INVARIANTS + struct inpcb *inp = toep->inp; +#endif + struct wrqe *wr; + struct ofld_tx_sdesc *txsd; + u_int credits, plen; + + INP_WLOCK_ASSERT(inp); + MPASS(mbuf_raw_wr(m)); + plen = m->m_pkthdr.len; + credits = howmany(plen, 16); + if (credits > toep->tx_credits) + return (false); + + wr = alloc_wrqe(roundup2(plen, 16), &toep->ofld_txq->wrq); + if (wr == NULL) + return (false); + + m_copydata(m, 0, plen, wrtod(wr)); + m_freem(m); + + toep->tx_credits -= credits; + if (toep->tx_credits < MIN_OFLD_TX_CREDITS) + toep->flags |= TPF_TX_SUSPENDED; + + KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); + KASSERT(credits <= MAX_OFLD_TX_SDESC_CREDITS, + ("%s: tx_credits %u too large", __func__, credits)); + txsd = &toep->txsd[toep->txsd_pidx]; + txsd->plen = 0; + txsd->tx_credits = credits; + if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) + toep->txsd_pidx = 0; + toep->txsd_avail--; + + t4_wrq_tx(sc, wr); + return (true); +} + /* * Max number of SGL entries an offload tx work request can have. This is 41 * (1 + 40) for a full 512B work request. @@ -651,6 +693,7 @@ struct tcpcb *tp = intotcpcb(inp); struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_snd; + struct mbufq *pduq = &toep->ulp_pduq; int tx_credits, shove, compl, sowwakeup; struct ofld_tx_sdesc *txsd; bool nomap_mbuf_seen; @@ -695,6 +738,19 @@ max_imm = max_imm_payload(tx_credits, 0); max_nsegs = max_dsgl_nsegs(tx_credits, 0); + if (__predict_false((sndptr = mbufq_first(pduq)) != NULL)) { + if (!t4_push_raw_wr(sc, toep, sndptr)) { + toep->flags |= TPF_TX_SUSPENDED; + return; + } + + m = mbufq_dequeue(pduq); + MPASS(m == sndptr); + + txsd = &toep->txsd[toep->txsd_pidx]; + continue; + } + SOCKBUF_LOCK(sb); sowwakeup = drop; if (drop) { @@ -1253,6 +1309,35 @@ t4_push_frames(sc, toep, drop); } +void +t4_raw_wr_tx(struct adapter *sc, struct toepcb *toep, struct mbuf *m) +{ +#ifdef INVARIANTS + struct inpcb *inp = toep->inp; +#endif + + INP_WLOCK_ASSERT(inp); + + /* + * If there are other raw WRs enqueued, enqueue to preserve + * FIFO ordering. + */ + if (!mbufq_empty(&toep->ulp_pduq)) { + mbufq_enqueue(&toep->ulp_pduq, m); + return; + } + + /* + * Cannot call t4_push_data here as that will lock so_snd and + * some callers of this run in rx handlers with so_rcv locked. + * Instead, just try to transmit this WR. + */ + if (!t4_push_raw_wr(sc, toep, m)) { + mbufq_enqueue(&toep->ulp_pduq, m); + toep->flags |= TPF_TX_SUSPENDED; + } +} + int t4_tod_output(struct toedev *tod, struct tcpcb *tp) { diff --git a/sys/dev/cxgbe/tom/t4_tls.c b/sys/dev/cxgbe/tom/t4_tls.c --- a/sys/dev/cxgbe/tom/t4_tls.c +++ b/sys/dev/cxgbe/tom/t4_tls.c @@ -496,6 +496,7 @@ struct tcpcb *tp = intotcpcb(inp); struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_snd; + struct mbufq *pduq = &toep->ulp_pduq; int tls_size, tx_credits, shove, sowwakeup; struct ofld_tx_sdesc *txsd; char *buf; @@ -538,6 +539,18 @@ for (;;) { tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); + if (__predict_false((m = mbufq_first(pduq)) != NULL)) { + if (!t4_push_raw_wr(sc, toep, m)) { + toep->flags |= TPF_TX_SUSPENDED; + return; + } + + (void)mbufq_dequeue(pduq); + + txsd = &toep->txsd[toep->txsd_pidx]; + continue; + } + SOCKBUF_LOCK(sb); sowwakeup = drop; if (drop) { diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h --- a/sys/dev/cxgbe/tom/t4_tom.h +++ b/sys/dev/cxgbe/tom/t4_tom.h @@ -529,6 +529,8 @@ void t4_set_tcb_field(struct adapter *, struct sge_wrq *, struct toepcb *, uint16_t, uint64_t, uint64_t, int, int); void t4_push_pdus(struct adapter *, struct toepcb *, int); +bool t4_push_raw_wr(struct adapter *, struct toepcb *, struct mbuf *); +void t4_raw_wr_tx(struct adapter *, struct toepcb *, struct mbuf *); /* t4_ddp.c */ int t4_init_ppod_region(struct ppod_region *, struct t4_range *, u_int,