Index: sys/dev/cxgbe/cxgbei/icl_cxgbei.c =================================================================== --- sys/dev/cxgbe/cxgbei/icl_cxgbei.c +++ sys/dev/cxgbe/cxgbei/icl_cxgbei.c @@ -844,8 +844,8 @@ goto no_ddp; } - rc = t4_write_page_pods_for_buf(sc, &toep->ofld_txq->wrq, toep->tid, - prsv, (vm_offset_t)csio->data_ptr, csio->dxfer_len); + rc = t4_write_page_pods_for_buf(sc, toep, prsv, + (vm_offset_t)csio->data_ptr, csio->dxfer_len); if (rc != 0) { t4_free_page_pods(prsv); uma_zfree(prsv_zone, prsv); @@ -959,8 +959,7 @@ goto no_ddp; } - rc = t4_write_page_pods_for_buf(sc, &toep->ofld_txq->wrq, - toep->tid, prsv, buf, xferlen); + rc = t4_write_page_pods_for_buf(sc, toep, prsv, buf, xferlen); if (rc != 0) { t4_free_page_pods(prsv); uma_zfree(prsv_zone, prsv); Index: sys/dev/cxgbe/tom/t4_cpl_io.c =================================================================== --- sys/dev/cxgbe/tom/t4_cpl_io.c +++ sys/dev/cxgbe/tom/t4_cpl_io.c @@ -927,10 +927,10 @@ } } -void -t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) +static struct wrqe * +write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr) { - struct mbuf *sndptr, *m; + struct mbuf *m; struct fw_ofld_tx_data_wr *txwr; struct wrqe *wr; u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; @@ -938,10 +938,130 @@ struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); int tx_credits, shove; - struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; - struct mbufq *pduq = &toep->ulp_pduq; static const u_int ulp_extra_len[] = {0, 4, 4, 8}; + M_ASSERTPKTHDR(sndptr); + + tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); + if (mbuf_raw_wr(sndptr)) { + plen = sndptr->m_pkthdr.len; + KASSERT(plen <= SGE_MAX_WR_LEN, + ("raw WR len %u is greater than max WR len", plen)); + if (plen > tx_credits * 16) + return (NULL); + + wr = alloc_wrqe(roundup2(plen, 16), &toep->ofld_txq->wrq); + if (__predict_false(wr == NULL)) + return (NULL); + + m_copydata(sndptr, 0, plen, wrtod(wr)); + return (wr); + } + + max_imm = max_imm_payload(tx_credits); + max_nsegs = max_dsgl_nsegs(tx_credits); + + plen = 0; + nsegs = 0; + max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ + for (m = sndptr; m != NULL; m = m->m_next) { + int n = sglist_count(mtod(m, void *), m->m_len); + + nsegs += n; + plen += m->m_len; + + /* + * This mbuf would send us _over_ the nsegs limit. + * Suspend tx because the PDU can't be sent out. + */ + if (plen > max_imm && nsegs > max_nsegs) + return (NULL); + + if (max_nsegs_1mbuf < n) + max_nsegs_1mbuf = n; + } + + if (__predict_false(toep->flags & TPF_FIN_SENT)) + panic("%s: excess tx.", __func__); + + /* + * We have a PDU to send. All of it goes out in one WR so 'm' + * is NULL. A PDU's length is always a multiple of 4. + */ + MPASS(m == NULL); + MPASS((plen & 3) == 0); + MPASS(sndptr->m_pkthdr.len == plen); + + shove = !(tp->t_flags & TF_MORETOCOME); + ulp_submode = mbuf_ulp_submode(sndptr); + MPASS(ulp_submode < nitems(ulp_extra_len)); + + /* + * plen doesn't include header and data digests, which are + * generated and inserted in the right places by the TOE, but + * they do occupy TCP sequence space and need to be accounted + * for. + */ + adjusted_plen = plen + ulp_extra_len[ulp_submode]; + if (plen <= max_imm) { + + /* Immediate data tx */ + + wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), + &toep->ofld_txq->wrq); + if (wr == NULL) { + /* XXX: how will we recover from this? */ + return (NULL); + } + txwr = wrtod(wr); + credits = howmany(wr->wr_len, 16); + write_tx_wr(txwr, toep, plen, adjusted_plen, credits, + shove, ulp_submode); + m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); + nsegs = 0; + } else { + int wr_len; + + /* DSGL tx */ + wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; + wr = alloc_wrqe(roundup2(wr_len, 16), + &toep->ofld_txq->wrq); + if (wr == NULL) { + /* XXX: how will we recover from this? */ + return (NULL); + } + txwr = wrtod(wr); + credits = howmany(wr_len, 16); + write_tx_wr(txwr, toep, 0, adjusted_plen, credits, + shove, ulp_submode); + write_tx_sgl(txwr + 1, sndptr, m, nsegs, max_nsegs_1mbuf); + if (wr_len & 0xf) { + uint64_t *pad = (uint64_t *)((uintptr_t)txwr + wr_len); + *pad = 0; + } + } + + tp->snd_nxt += adjusted_plen; + tp->snd_max += adjusted_plen; + + counter_u64_add(toep->ofld_txq->tx_iscsi_pdus, 1); + counter_u64_add(toep->ofld_txq->tx_iscsi_octets, plen); + + return (wr); +} + +void +t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) +{ + struct mbuf *sndptr, *m; + struct fw_wr_hdr *wrhdr; + struct wrqe *wr; + u_int plen, credits; + struct inpcb *inp = toep->inp; + struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; + struct mbufq *pduq = &toep->ulp_pduq; + INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); @@ -965,99 +1085,14 @@ rqdrop_locked(&toep->ulp_pdu_reclaimq, drop); while ((sndptr = mbufq_first(pduq)) != NULL) { - M_ASSERTPKTHDR(sndptr); - - tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); - max_imm = max_imm_payload(tx_credits); - max_nsegs = max_dsgl_nsegs(tx_credits); - - plen = 0; - nsegs = 0; - max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ - for (m = sndptr; m != NULL; m = m->m_next) { - int n = sglist_count(mtod(m, void *), m->m_len); - - nsegs += n; - plen += m->m_len; - - /* - * This mbuf would send us _over_ the nsegs limit. - * Suspend tx because the PDU can't be sent out. - */ - if (plen > max_imm && nsegs > max_nsegs) { - toep->flags |= TPF_TX_SUSPENDED; - return; - } - - if (max_nsegs_1mbuf < n) - max_nsegs_1mbuf = n; - } - - if (__predict_false(toep->flags & TPF_FIN_SENT)) - panic("%s: excess tx.", __func__); - - /* - * We have a PDU to send. All of it goes out in one WR so 'm' - * is NULL. A PDU's length is always a multiple of 4. - */ - MPASS(m == NULL); - MPASS((plen & 3) == 0); - MPASS(sndptr->m_pkthdr.len == plen); - - shove = !(tp->t_flags & TF_MORETOCOME); - ulp_submode = mbuf_ulp_submode(sndptr); - MPASS(ulp_submode < nitems(ulp_extra_len)); - - /* - * plen doesn't include header and data digests, which are - * generated and inserted in the right places by the TOE, but - * they do occupy TCP sequence space and need to be accounted - * for. - */ - adjusted_plen = plen + ulp_extra_len[ulp_submode]; - if (plen <= max_imm) { - - /* Immediate data tx */ - - wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), - &toep->ofld_txq->wrq); - if (wr == NULL) { - /* XXX: how will we recover from this? */ - toep->flags |= TPF_TX_SUSPENDED; - return; - } - txwr = wrtod(wr); - credits = howmany(wr->wr_len, 16); - write_tx_wr(txwr, toep, plen, adjusted_plen, credits, - shove, ulp_submode); - m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); - nsegs = 0; - } else { - int wr_len; - - /* DSGL tx */ - wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + - ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; - wr = alloc_wrqe(roundup2(wr_len, 16), - &toep->ofld_txq->wrq); - if (wr == NULL) { - /* XXX: how will we recover from this? */ - toep->flags |= TPF_TX_SUSPENDED; - return; - } - txwr = wrtod(wr); - credits = howmany(wr_len, 16); - write_tx_wr(txwr, toep, 0, adjusted_plen, credits, - shove, ulp_submode); - write_tx_sgl(txwr + 1, sndptr, m, nsegs, - max_nsegs_1mbuf); - if (wr_len & 0xf) { - uint64_t *pad = (uint64_t *) - ((uintptr_t)txwr + wr_len); - *pad = 0; - } + wr = write_iscsi_mbuf_wr(toep, sndptr); + if (wr == NULL) { + toep->flags |= TPF_TX_SUSPENDED; + return; } + plen = sndptr->m_pkthdr.len; + credits = howmany(wr->wr_len, 16); KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); @@ -1068,16 +1103,21 @@ toep->tx_credits -= credits; toep->tx_nocompl += credits; toep->plen_nocompl += plen; - if (toep->tx_credits <= toep->tx_total * 3 / 8 && + + /* + * Since we are also sending ulp_tx WR through offload + * queue, make sure that this condition sets + * F_FW_WR_COMPL flag when the available credits is <= + * max ulp_tx WR size. + */ + if (toep->tx_credits <= toep->tx_total * 5 / 8 && toep->tx_nocompl >= toep->tx_total / 4) { - txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); + wrhdr = wrtod(wr); + wrhdr->hi |= htobe32(F_FW_WR_COMPL); toep->tx_nocompl = 0; toep->plen_nocompl = 0; } - tp->snd_nxt += adjusted_plen; - tp->snd_max += adjusted_plen; - toep->flags |= TPF_TX_DATA_SENT; if (toep->tx_credits < MIN_OFLD_TX_CREDITS) toep->flags |= TPF_TX_SUSPENDED; @@ -1092,9 +1132,6 @@ } toep->txsd_avail--; - counter_u64_add(toep->ofld_txq->tx_iscsi_pdus, 1); - counter_u64_add(toep->ofld_txq->tx_iscsi_octets, plen); - t4_l2t_send(sc, wr, toep->l2te); } Index: sys/dev/cxgbe/tom/t4_ddp.c =================================================================== --- sys/dev/cxgbe/tom/t4_ddp.c +++ sys/dev/cxgbe/tom/t4_ddp.c @@ -1081,11 +1081,30 @@ return (0); } +static struct mbuf * +alloc_raw_wr_mbuf(int len) +{ + struct mbuf *m; + + if (len <= MHLEN) + m = m_gethdr(M_NOWAIT, MT_DATA); + else if (len <= MCLBYTES) + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + else + m = NULL; + if (m == NULL) + return (NULL); + m->m_pkthdr.len = len; + m->m_len = len; + set_mbuf_raw_wr(m, true); + return (m); +} + int -t4_write_page_pods_for_buf(struct adapter *sc, struct sge_wrq *wrq, int tid, +t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep, struct ppod_reservation *prsv, vm_offset_t buf, int buflen) { - struct wrqe *wr; + struct inpcb *inp = toep->inp; struct ulp_mem_io *ulpmc; struct ulptx_idata *ulpsc; struct pagepod *ppod; @@ -1094,6 +1113,8 @@ uint32_t cmd; struct ppod_region *pr = prsv->prsv_pr; uintptr_t end_pva, pva, pa; + struct mbuf *m; + struct mbufq wrq; cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE)); if (is_t4(sc)) @@ -1105,6 +1126,7 @@ ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask); pva = trunc_page(buf); end_pva = trunc_page(buf + buflen - 1); + mbufq_init(&wrq, INT_MAX); for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) { /* How many page pods are we writing in this cycle */ @@ -1113,12 +1135,14 @@ chunk = PPOD_SZ(n); len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16); - wr = alloc_wrqe(len, wrq); - if (wr == NULL) - return (ENOMEM); /* ok to just bail out */ - ulpmc = wrtod(wr); + m = alloc_raw_wr_mbuf(len); + if (m == NULL) { + mbufq_drain(&wrq); + return (ENOMEM); + } + ulpmc = mtod(m, struct ulp_mem_io *); - INIT_ULPTX_WR(ulpmc, len, 0, 0); + INIT_ULPTX_WR(ulpmc, len, 0, toep->tid); ulpmc->cmd = cmd; ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32)); ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16)); @@ -1131,7 +1155,7 @@ ppod = (struct pagepod *)(ulpsc + 1); for (j = 0; j < n; i++, j++, ppod++) { ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID | - V_PPOD_TID(tid) | + V_PPOD_TID(toep->tid) | (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ))); ppod->len_offset = htobe64(V_PPOD_LEN(buflen) | V_PPOD_OFST(offset)); @@ -1148,7 +1172,7 @@ #if 0 CTR5(KTR_CXGBE, "%s: tid %d ppod[%d]->addr[%d] = %p", - __func__, tid, i, k, + __func__, toep->tid, i, k, htobe64(ppod->addr[k])); #endif } @@ -1161,9 +1185,13 @@ pva -= ddp_pgsz; } - t4_wrq_tx(sc, wr); + mbufq_enqueue(&wrq, m); } + INP_WLOCK(inp); + mbufq_concat(&toep->ulp_pduq, &wrq); + INP_WUNLOCK(inp); + MPASS(pva <= end_pva); return (0); Index: sys/dev/cxgbe/tom/t4_tom.h =================================================================== --- sys/dev/cxgbe/tom/t4_tom.h +++ sys/dev/cxgbe/tom/t4_tom.h @@ -330,6 +330,22 @@ return (td->tod.tod_softc); } +static inline void +set_mbuf_raw_wr(struct mbuf *m, bool raw) +{ + + M_ASSERTPKTHDR(m); + m->m_pkthdr.PH_per.eight[6] = raw; +} + +static inline bool +mbuf_raw_wr(struct mbuf *m) +{ + + M_ASSERTPKTHDR(m); + return (m->m_pkthdr.PH_per.eight[6]); +} + static inline void set_mbuf_ulp_submode(struct mbuf *m, uint8_t ulp_submode) { @@ -423,7 +439,7 @@ struct ppod_reservation *); int t4_write_page_pods_for_ps(struct adapter *, struct sge_wrq *, int, struct pageset *); -int t4_write_page_pods_for_buf(struct adapter *, struct sge_wrq *, int, +int t4_write_page_pods_for_buf(struct adapter *, struct toepcb *, struct ppod_reservation *, vm_offset_t, int); void t4_free_page_pods(struct ppod_reservation *); int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *,