diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c index 732754d07f8f..a1bc88bdea7f 100644 --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -1,2376 +1,2361 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012, 2015 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_kern_tls.h" #include "opt_ratelimit.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #include #include #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_tcb.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" static void t4_aiotx_cancel(struct kaiocb *job); static void t4_aiotx_queue_toep(struct socket *so, struct toepcb *toep); void send_flowc_wr(struct toepcb *toep, struct tcpcb *tp) { struct wrqe *wr; struct fw_flowc_wr *flowc; unsigned int nparams, flowclen, paramidx; struct vi_info *vi = toep->vi; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; unsigned int pfvf = sc->pf << S_FW_VIID_PFN; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), ("%s: flowc for tid %u sent already", __func__, toep->tid)); if (tp != NULL) nparams = 8; else nparams = 6; if (ulp_mode(toep) == ULP_MODE_TLS) nparams++; if (toep->tls.fcplenmax != 0) nparams++; if (toep->params.tc_idx != -1) { MPASS(toep->params.tc_idx >= 0 && toep->params.tc_idx < sc->chip_params->nsched_cls); nparams++; } flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); wr = alloc_wrqe(roundup2(flowclen, 16), &toep->ofld_txq->wrq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } flowc = wrtod(wr); memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | V_FW_WR_FLOWID(toep->tid)); #define FLOWC_PARAM(__m, __v) \ do { \ flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \ flowc->mnemval[paramidx].val = htobe32(__v); \ paramidx++; \ } while (0) paramidx = 0; FLOWC_PARAM(PFNVFN, pfvf); FLOWC_PARAM(CH, pi->tx_chan); FLOWC_PARAM(PORT, pi->tx_chan); FLOWC_PARAM(IQID, toep->ofld_rxq->iq.abs_id); FLOWC_PARAM(SNDBUF, toep->params.sndbuf); if (tp) { FLOWC_PARAM(MSS, toep->params.emss); FLOWC_PARAM(SNDNXT, tp->snd_nxt); FLOWC_PARAM(RCVNXT, tp->rcv_nxt); } else FLOWC_PARAM(MSS, 512); CTR6(KTR_CXGBE, "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x", __func__, toep->tid, toep->params.emss, toep->params.sndbuf, tp ? tp->snd_nxt : 0, tp ? tp->rcv_nxt : 0); if (ulp_mode(toep) == ULP_MODE_TLS) FLOWC_PARAM(ULP_MODE, ulp_mode(toep)); if (toep->tls.fcplenmax != 0) FLOWC_PARAM(TXDATAPLEN_MAX, toep->tls.fcplenmax); if (toep->params.tc_idx != -1) FLOWC_PARAM(SCHEDCLASS, toep->params.tc_idx); #undef FLOWC_PARAM KASSERT(paramidx == nparams, ("nparams mismatch")); txsd->tx_credits = howmany(flowclen, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; toep->flags |= TPF_FLOWC_WR_SENT; t4_wrq_tx(sc, wr); } #ifdef RATELIMIT /* * Input is Bytes/second (so_max_pacing_rate), chip counts in Kilobits/second. */ static int update_tx_rate_limit(struct adapter *sc, struct toepcb *toep, u_int Bps) { int tc_idx, rc; const u_int kbps = (u_int) (uint64_t)Bps * 8ULL / 1000; const int port_id = toep->vi->pi->port_id; CTR3(KTR_CXGBE, "%s: tid %u, rate %uKbps", __func__, toep->tid, kbps); if (kbps == 0) { /* unbind */ tc_idx = -1; } else { rc = t4_reserve_cl_rl_kbps(sc, port_id, kbps, &tc_idx); if (rc != 0) return (rc); MPASS(tc_idx >= 0 && tc_idx < sc->chip_params->nsched_cls); } if (toep->params.tc_idx != tc_idx) { struct wrqe *wr; struct fw_flowc_wr *flowc; int nparams = 1, flowclen, flowclen16; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); flowclen16 = howmany(flowclen, 16); if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0 || (wr = alloc_wrqe(roundup2(flowclen, 16), &toep->ofld_txq->wrq)) == NULL) { if (tc_idx >= 0) t4_release_cl_rl(sc, port_id, tc_idx); return (ENOMEM); } flowc = wrtod(wr); memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; if (tc_idx == -1) flowc->mnemval[0].val = htobe32(0xff); else flowc->mnemval[0].val = htobe32(tc_idx); txsd->tx_credits = flowclen16; txsd->plen = 0; toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; t4_wrq_tx(sc, wr); } if (toep->params.tc_idx >= 0) t4_release_cl_rl(sc, port_id, toep->params.tc_idx); toep->params.tc_idx = tc_idx; return (0); } #endif void send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) { struct wrqe *wr; struct cpl_abort_req *req; int tid = toep->tid; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ INP_WLOCK_ASSERT(inp); CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", __func__, toep->tid, inp->inp_flags & INP_DROPPED ? "inp dropped" : tcpstates[tp->t_state], toep->flags, inp->inp_flags, toep->flags & TPF_ABORT_SHUTDOWN ? " (abort already in progress)" : ""); if (toep->flags & TPF_ABORT_SHUTDOWN) return; /* abort already in progress */ toep->flags |= TPF_ABORT_SHUTDOWN; KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %d.", __func__, tid)); wr = alloc_wrqe(sizeof(*req), &toep->ofld_txq->wrq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); if (inp->inp_flags & INP_DROPPED) req->rsvd0 = htobe32(snd_nxt); else req->rsvd0 = htobe32(tp->snd_nxt); req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT); req->cmd = CPL_ABORT_SEND_RST; /* * XXX: What's the correct way to tell that the inp hasn't been detached * from its socket? Should I even be flushing the snd buffer here? */ if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) /* because I'm not sure. See comment above */ sbflush(&so->so_snd); } t4_l2t_send(sc, wr, toep->l2te); } /* * Called when a connection is established to translate the TCP options * reported by HW to FreeBSD's native format. */ static void assign_rxopt(struct tcpcb *tp, uint16_t opt) { struct toepcb *toep = tp->t_toe; struct inpcb *inp = tp->t_inpcb; struct adapter *sc = td_adapter(toep->td); INP_LOCK_ASSERT(inp); toep->params.mtu_idx = G_TCPOPT_MSS(opt); tp->t_maxseg = sc->params.mtus[toep->params.mtu_idx]; if (inp->inp_inc.inc_flags & INC_ISIPV6) tp->t_maxseg -= sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else tp->t_maxseg -= sizeof(struct ip) + sizeof(struct tcphdr); toep->params.emss = tp->t_maxseg; if (G_TCPOPT_TSTAMP(opt)) { toep->params.tstamp = 1; toep->params.emss -= TCPOLEN_TSTAMP_APPA; tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ tp->ts_recent = 0; /* hmmm */ tp->ts_recent_age = tcp_ts_getticks(); } else toep->params.tstamp = 0; if (G_TCPOPT_SACK(opt)) { toep->params.sack = 1; tp->t_flags |= TF_SACK_PERMIT; /* should already be set */ } else { toep->params.sack = 0; tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */ } if (G_TCPOPT_WSCALE_OK(opt)) tp->t_flags |= TF_RCVD_SCALE; /* Doing window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == (TF_RCVD_SCALE | TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; tp->snd_scale = G_TCPOPT_SND_WSCALE(opt); } else toep->params.wscale = 0; CTR6(KTR_CXGBE, "assign_rxopt: tid %d, mtu_idx %u, emss %u, ts %u, sack %u, wscale %u", toep->tid, toep->params.mtu_idx, toep->params.emss, toep->params.tstamp, toep->params.sack, toep->params.wscale); } /* * Completes some final bits of initialization for just established connections * and changes their state to TCPS_ESTABLISHED. * * The ISNs are from the exchange of SYNs. */ void make_established(struct toepcb *toep, uint32_t iss, uint32_t irs, uint16_t opt) { struct inpcb *inp = toep->inp; struct socket *so = inp->inp_socket; struct tcpcb *tp = intotcpcb(inp); uint16_t tcpopt = be16toh(opt); INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED, ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); CTR6(KTR_CXGBE, "%s: tid %d, so %p, inp %p, tp %p, toep %p", __func__, toep->tid, so, inp, tp, toep); tcp_state_change(tp, TCPS_ESTABLISHED); tp->t_starttime = ticks; TCPSTAT_INC(tcps_connects); tp->irs = irs; tcp_rcvseqinit(tp); tp->rcv_wnd = (u_int)toep->params.opt0_bufsize << 10; tp->rcv_adv += tp->rcv_wnd; tp->last_ack_sent = tp->rcv_nxt; tp->iss = iss; tcp_sendseqinit(tp); tp->snd_una = iss + 1; tp->snd_nxt = iss + 1; tp->snd_max = iss + 1; assign_rxopt(tp, tcpopt); send_flowc_wr(toep, tp); soisconnected(so); if (ulp_mode(toep) == ULP_MODE_TLS) tls_establish(toep); } int send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) { struct wrqe *wr; struct cpl_rx_data_ack *req; uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); KASSERT(credits >= 0, ("%s: %d credits", __func__, credits)); wr = alloc_wrqe(sizeof(*req), toep->ctrlq); if (wr == NULL) return (0); req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); req->credit_dack = htobe32(dack | V_RX_CREDITS(credits)); t4_wrq_tx(sc, wr); return (credits); } void send_rx_modulate(struct adapter *sc, struct toepcb *toep) { struct wrqe *wr; struct cpl_rx_data_ack *req; wr = alloc_wrqe(sizeof(*req), toep->ctrlq); if (wr == NULL) return; req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); req->credit_dack = htobe32(F_RX_MODULATE_RX); t4_wrq_tx(sc, wr); } void t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_rcv; struct toepcb *toep = tp->t_toe; int rx_credits; INP_WLOCK_ASSERT(inp); SOCKBUF_LOCK_ASSERT(sb); rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0; if (rx_credits > 0 && (tp->rcv_wnd <= 32 * 1024 || rx_credits >= 64 * 1024 || (rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) || sbused(sb) + tp->rcv_wnd < sb->sb_lowat)) { rx_credits = send_rx_credits(sc, toep, rx_credits); tp->rcv_wnd += rx_credits; tp->rcv_adv += rx_credits; } else if (toep->flags & TPF_FORCE_CREDITS) send_rx_modulate(sc, toep); } void t4_rcvd(struct toedev *tod, struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_rcv; SOCKBUF_LOCK(sb); t4_rcvd_locked(tod, tp); SOCKBUF_UNLOCK(sb); } /* * Close a connection by sending a CPL_CLOSE_CON_REQ message. */ int t4_close_conn(struct adapter *sc, struct toepcb *toep) { struct wrqe *wr; struct cpl_close_con_req *req; unsigned int tid = toep->tid; CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid, toep->flags & TPF_FIN_SENT ? ", IGNORED" : ""); if (toep->flags & TPF_FIN_SENT) return (0); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, tid)); wr = alloc_wrqe(sizeof(*req), &toep->ofld_txq->wrq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr))); req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) | V_FW_WR_FLOWID(tid)); req->wr.wr_lo = cpu_to_be64(0); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); req->rsvd = 0; toep->flags |= TPF_FIN_SENT; toep->flags &= ~TPF_SEND_FIN; t4_l2t_send(sc, wr, toep->l2te); return (0); } #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) #define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16)) /* Maximum amount of immediate data we could stuff in a WR */ static inline int max_imm_payload(int tx_credits) { const int n = 1; /* Use no more than one desc for imm. data WR */ KASSERT(tx_credits >= 0 && tx_credits <= MAX_OFLD_TX_CREDITS, ("%s: %d credits", __func__, tx_credits)); if (tx_credits < MIN_OFLD_TX_CREDITS) return (0); if (tx_credits >= (n * EQ_ESIZE) / 16) return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr)); else return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr)); } /* Maximum number of SGL entries we could stuff in a WR */ static inline int max_dsgl_nsegs(int tx_credits) { int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS; KASSERT(tx_credits >= 0 && tx_credits <= MAX_OFLD_TX_CREDITS, ("%s: %d credits", __func__, tx_credits)); if (tx_credits < MIN_OFLD_TX_CREDITS) return (0); nseg += 2 * (sge_pair_credits * 16 / 24); if ((sge_pair_credits * 16) % 24 == 16) nseg++; return (nseg); } static inline void write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, unsigned int plen, uint8_t credits, int shove, int ulp_submode) { struct fw_ofld_tx_data_wr *txwr = dst; txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) | V_FW_WR_IMMDLEN(immdlen)); txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | V_FW_WR_LEN16(credits)); txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(ulp_mode(toep)) | V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove)); txwr->plen = htobe32(plen); if (toep->params.tx_align > 0) { if (plen < 2 * toep->params.emss) txwr->lsodisable_to_flags |= htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE); else txwr->lsodisable_to_flags |= htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD | (toep->params.nagle == 0 ? 0 : F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE)); } } /* * Generate a DSGL from a starting mbuf. The total number of segments and the * maximum segments in any one mbuf are provided. */ static void write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) { struct mbuf *m; struct ulptx_sgl *usgl = dst; int i, j, rc; struct sglist sg; struct sglist_seg segs[n]; KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); sglist_init(&sg, n, segs); usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(nsegs)); i = -1; for (m = start; m != stop; m = m->m_next) { if (m->m_flags & M_EXTPG) rc = sglist_append_mbuf_epg(&sg, m, mtod(m, vm_offset_t), m->m_len); else rc = sglist_append(&sg, mtod(m, void *), m->m_len); if (__predict_false(rc != 0)) panic("%s: sglist_append %d", __func__, rc); for (j = 0; j < sg.sg_nseg; i++, j++) { if (i < 0) { usgl->len0 = htobe32(segs[j].ss_len); usgl->addr0 = htobe64(segs[j].ss_paddr); } else { usgl->sge[i / 2].len[i & 1] = htobe32(segs[j].ss_len); usgl->sge[i / 2].addr[i & 1] = htobe64(segs[j].ss_paddr); } #ifdef INVARIANTS nsegs--; #endif } sglist_reset(&sg); } if (i & 1) usgl->sge[i / 2].len[1] = htobe32(0); KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p", __func__, nsegs, start, stop)); } /* * Max number of SGL entries an offload tx work request can have. This is 41 * (1 + 40) for a full 512B work request. * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40) */ #define OFLD_SGL_LEN (41) /* * Send data and/or a FIN to the peer. * * The socket's so_snd buffer consists of a stream of data starting with sb_mb * and linked together with m_next. sb_sndptr, if set, is the last mbuf that * was transmitted. * * drop indicates the number of bytes that should be dropped from the head of * the send buffer. It is an optimization that lets do_fw4_ack avoid creating * contention on the send buffer lock (before this change it used to do * sowwakeup and then t4_push_frames right after that when recovering from tx * stalls). When drop is set this function MUST drop the bytes and wake up any * writers. */ void t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) { struct mbuf *sndptr, *m, *sb_sndptr; struct fw_ofld_tx_data_wr *txwr; struct wrqe *wr; u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_snd; int tx_credits, shove, compl, sowwakeup; struct ofld_tx_sdesc *txsd; bool nomap_mbuf_seen; INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); KASSERT(ulp_mode(toep) == ULP_MODE_NONE || ulp_mode(toep) == ULP_MODE_TCPDDP || ulp_mode(toep) == ULP_MODE_TLS || ulp_mode(toep) == ULP_MODE_RDMA, ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep)); #ifdef VERBOSE_TRACES CTR5(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d", __func__, toep->tid, toep->flags, tp->t_flags, drop); #endif if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) return; #ifdef RATELIMIT if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) && (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) { inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; } #endif /* * This function doesn't resume by itself. Someone else must clear the * flag and call this function. */ if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { KASSERT(drop == 0, ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); return; } txsd = &toep->txsd[toep->txsd_pidx]; do { tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); max_imm = max_imm_payload(tx_credits); max_nsegs = max_dsgl_nsegs(tx_credits); SOCKBUF_LOCK(sb); sowwakeup = drop; if (drop) { sbdrop_locked(sb, drop); drop = 0; } sb_sndptr = sb->sb_sndptr; sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb; plen = 0; nsegs = 0; max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ nomap_mbuf_seen = false; for (m = sndptr; m != NULL; m = m->m_next) { int n; if ((m->m_flags & M_NOTAVAIL) != 0) break; if (m->m_flags & M_EXTPG) { #ifdef KERN_TLS if (m->m_epg_tls != NULL) { toep->flags |= TPF_KTLS; if (plen == 0) { SOCKBUF_UNLOCK(sb); t4_push_ktls(sc, toep, 0); return; } break; } #endif n = sglist_count_mbuf_epg(m, mtod(m, vm_offset_t), m->m_len); } else n = sglist_count(mtod(m, void *), m->m_len); nsegs += n; plen += m->m_len; /* This mbuf sent us _over_ the nsegs limit, back out */ if (plen > max_imm && nsegs > max_nsegs) { nsegs -= n; plen -= m->m_len; if (plen == 0) { /* Too few credits */ toep->flags |= TPF_TX_SUSPENDED; if (sowwakeup) { if (!TAILQ_EMPTY( &toep->aiotx_jobq)) t4_aiotx_queue_toep(so, toep); sowwakeup_locked(so); } else SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK_ASSERT(sb); return; } break; } if (m->m_flags & M_EXTPG) nomap_mbuf_seen = true; if (max_nsegs_1mbuf < n) max_nsegs_1mbuf = n; sb_sndptr = m; /* new sb->sb_sndptr if all goes well */ /* This mbuf put us right at the max_nsegs limit */ if (plen > max_imm && nsegs == max_nsegs) { m = m->m_next; break; } } if (sbused(sb) > sb->sb_hiwat * 5 / 8 && toep->plen_nocompl + plen >= sb->sb_hiwat / 4) compl = 1; else compl = 0; if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf && sb->sb_hiwat < V_tcp_autosndbuf_max && sbused(sb) >= sb->sb_hiwat * 7 / 8) { int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, V_tcp_autosndbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else sowwakeup = 1; /* room available */ } if (sowwakeup) { if (!TAILQ_EMPTY(&toep->aiotx_jobq)) t4_aiotx_queue_toep(so, toep); sowwakeup_locked(so); } else SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK_ASSERT(sb); /* nothing to send */ if (plen == 0) { KASSERT(m == NULL || (m->m_flags & M_NOTAVAIL) != 0, ("%s: nothing to send, but m != NULL is ready", __func__)); break; } if (__predict_false(toep->flags & TPF_FIN_SENT)) panic("%s: excess tx.", __func__); shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); if (plen <= max_imm && !nomap_mbuf_seen) { /* Immediate data tx */ wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), &toep->ofld_txq->wrq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr->wr_len, 16); write_tx_wr(txwr, toep, plen, plen, credits, shove, 0); m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); nsegs = 0; } else { int wr_len; /* DSGL tx */ wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; wr = alloc_wrqe(roundup2(wr_len, 16), &toep->ofld_txq->wrq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr_len, 16); write_tx_wr(txwr, toep, 0, plen, credits, shove, 0); write_tx_sgl(txwr + 1, sndptr, m, nsegs, max_nsegs_1mbuf); if (wr_len & 0xf) { uint64_t *pad = (uint64_t *) ((uintptr_t)txwr + wr_len); *pad = 0; } } KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); toep->tx_credits -= credits; toep->tx_nocompl += credits; toep->plen_nocompl += plen; if (toep->tx_credits <= toep->tx_total * 3 / 8 && toep->tx_nocompl >= toep->tx_total / 4) compl = 1; if (compl || ulp_mode(toep) == ULP_MODE_RDMA) { txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); toep->tx_nocompl = 0; toep->plen_nocompl = 0; } tp->snd_nxt += plen; tp->snd_max += plen; SOCKBUF_LOCK(sb); KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__)); sb->sb_sndptr = sb_sndptr; SOCKBUF_UNLOCK(sb); toep->flags |= TPF_TX_DATA_SENT; if (toep->tx_credits < MIN_OFLD_TX_CREDITS) toep->flags |= TPF_TX_SUSPENDED; KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); txsd->plen = plen; txsd->tx_credits = credits; txsd++; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { toep->txsd_pidx = 0; txsd = &toep->txsd[0]; } toep->txsd_avail--; t4_l2t_send(sc, wr, toep->l2te); } while (m != NULL && (m->m_flags & M_NOTAVAIL) == 0); /* Send a FIN if requested, but only if there's no more data to send */ if (m == NULL && toep->flags & TPF_SEND_FIN) t4_close_conn(sc, toep); } static inline void rqdrop_locked(struct mbufq *q, int plen) { struct mbuf *m; while (plen > 0) { m = mbufq_dequeue(q); /* Too many credits. */ MPASS(m != NULL); M_ASSERTPKTHDR(m); /* Partial credits. */ MPASS(plen >= m->m_pkthdr.len); plen -= m->m_pkthdr.len; m_freem(m); } } static struct wrqe * write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr) { struct mbuf *m; struct fw_ofld_tx_data_wr *txwr; struct wrqe *wr; u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; u_int adjusted_plen, ulp_submode; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); int tx_credits, shove; static const u_int ulp_extra_len[] = {0, 4, 4, 8}; M_ASSERTPKTHDR(sndptr); tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); if (mbuf_raw_wr(sndptr)) { plen = sndptr->m_pkthdr.len; KASSERT(plen <= SGE_MAX_WR_LEN, ("raw WR len %u is greater than max WR len", plen)); if (plen > tx_credits * 16) return (NULL); wr = alloc_wrqe(roundup2(plen, 16), &toep->ofld_txq->wrq); if (__predict_false(wr == NULL)) return (NULL); m_copydata(sndptr, 0, plen, wrtod(wr)); return (wr); } max_imm = max_imm_payload(tx_credits); max_nsegs = max_dsgl_nsegs(tx_credits); plen = 0; nsegs = 0; max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ for (m = sndptr; m != NULL; m = m->m_next) { int n = sglist_count(mtod(m, void *), m->m_len); nsegs += n; plen += m->m_len; /* * This mbuf would send us _over_ the nsegs limit. * Suspend tx because the PDU can't be sent out. */ if (plen > max_imm && nsegs > max_nsegs) return (NULL); if (max_nsegs_1mbuf < n) max_nsegs_1mbuf = n; } if (__predict_false(toep->flags & TPF_FIN_SENT)) panic("%s: excess tx.", __func__); /* * We have a PDU to send. All of it goes out in one WR so 'm' * is NULL. A PDU's length is always a multiple of 4. */ MPASS(m == NULL); MPASS((plen & 3) == 0); MPASS(sndptr->m_pkthdr.len == plen); shove = !(tp->t_flags & TF_MORETOCOME); ulp_submode = mbuf_ulp_submode(sndptr); MPASS(ulp_submode < nitems(ulp_extra_len)); /* * plen doesn't include header and data digests, which are * generated and inserted in the right places by the TOE, but * they do occupy TCP sequence space and need to be accounted * for. */ adjusted_plen = plen + ulp_extra_len[ulp_submode]; if (plen <= max_imm) { /* Immediate data tx */ wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), &toep->ofld_txq->wrq); if (wr == NULL) { /* XXX: how will we recover from this? */ return (NULL); } txwr = wrtod(wr); credits = howmany(wr->wr_len, 16); write_tx_wr(txwr, toep, plen, adjusted_plen, credits, shove, ulp_submode); m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); nsegs = 0; } else { int wr_len; /* DSGL tx */ wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; wr = alloc_wrqe(roundup2(wr_len, 16), &toep->ofld_txq->wrq); if (wr == NULL) { /* XXX: how will we recover from this? */ return (NULL); } txwr = wrtod(wr); credits = howmany(wr_len, 16); write_tx_wr(txwr, toep, 0, adjusted_plen, credits, shove, ulp_submode); write_tx_sgl(txwr + 1, sndptr, m, nsegs, max_nsegs_1mbuf); if (wr_len & 0xf) { uint64_t *pad = (uint64_t *)((uintptr_t)txwr + wr_len); *pad = 0; } } tp->snd_nxt += adjusted_plen; tp->snd_max += adjusted_plen; counter_u64_add(toep->ofld_txq->tx_iscsi_pdus, 1); counter_u64_add(toep->ofld_txq->tx_iscsi_octets, plen); return (wr); } void t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) { struct mbuf *sndptr, *m; struct fw_wr_hdr *wrhdr; struct wrqe *wr; u_int plen, credits; struct inpcb *inp = toep->inp; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; struct mbufq *pduq = &toep->ulp_pduq; INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); KASSERT(ulp_mode(toep) == ULP_MODE_ISCSI, ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep)); if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) return; /* * This function doesn't resume by itself. Someone else must clear the * flag and call this function. */ if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { KASSERT(drop == 0, ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); return; } if (drop) { struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_snd; int sbu; /* * An unlocked read is ok here as the data should only * transition from a non-zero value to either another * non-zero value or zero. Once it is zero it should * stay zero. */ if (__predict_false(sbused(sb)) > 0) { SOCKBUF_LOCK(sb); sbu = sbused(sb); if (sbu > 0) { /* * The data transmitted before the * tid's ULP mode changed to ISCSI is * still in so_snd. Incoming credits * should account for so_snd first. */ sbdrop_locked(sb, min(sbu, drop)); drop -= min(sbu, drop); } sowwakeup_locked(so); /* unlocks so_snd */ } rqdrop_locked(&toep->ulp_pdu_reclaimq, drop); } while ((sndptr = mbufq_first(pduq)) != NULL) { wr = write_iscsi_mbuf_wr(toep, sndptr); if (wr == NULL) { toep->flags |= TPF_TX_SUSPENDED; return; } plen = sndptr->m_pkthdr.len; credits = howmany(wr->wr_len, 16); KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); m = mbufq_dequeue(pduq); MPASS(m == sndptr); mbufq_enqueue(&toep->ulp_pdu_reclaimq, m); toep->tx_credits -= credits; toep->tx_nocompl += credits; toep->plen_nocompl += plen; /* * Ensure there are enough credits for a full-sized WR * as page pod WRs can be full-sized. */ if (toep->tx_credits <= SGE_MAX_WR_LEN * 5 / 4 && toep->tx_nocompl >= toep->tx_total / 4) { wrhdr = wrtod(wr); wrhdr->hi |= htobe32(F_FW_WR_COMPL); toep->tx_nocompl = 0; toep->plen_nocompl = 0; } toep->flags |= TPF_TX_DATA_SENT; if (toep->tx_credits < MIN_OFLD_TX_CREDITS) toep->flags |= TPF_TX_SUSPENDED; KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); txsd->plen = plen; txsd->tx_credits = credits; txsd++; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { toep->txsd_pidx = 0; txsd = &toep->txsd[0]; } toep->txsd_avail--; t4_l2t_send(sc, wr, toep->l2te); } /* Send a FIN if requested, but only if there are no more PDUs to send */ if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN) t4_close_conn(sc, toep); } static inline void t4_push_data(struct adapter *sc, struct toepcb *toep, int drop) { if (ulp_mode(toep) == ULP_MODE_ISCSI) t4_push_pdus(sc, toep, drop); - else if (tls_tx_key(toep) && toep->tls.mode == TLS_MODE_TLSOM) - t4_push_tls_records(sc, toep, drop); -#ifdef KERN_TLS else if (toep->flags & TPF_KTLS) t4_push_ktls(sc, toep, drop); -#endif else t4_push_frames(sc, toep, drop); } int t4_tod_output(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); t4_push_data(sc, toep, 0); return (0); } int t4_send_fin(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); toep->flags |= TPF_SEND_FIN; if (tp->t_state >= TCPS_ESTABLISHED) t4_push_data(sc, toep, 0); return (0); } int t4_send_rst(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #if defined(INVARIANTS) struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); /* hmmmm */ KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc for tid %u [%s] not sent already", __func__, toep->tid, tcpstates[tp->t_state])); send_reset(sc, toep, 0); return (0); } /* * Peer has sent us a FIN. */ static int do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_peer_close *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so; struct epoch_tracker et; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PEER_CLOSE, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (__predict_false(toep->flags & TPF_SYNQE)) { /* * do_pass_establish must have run before do_peer_close and if * this is still a synqe instead of a toepcb then the connection * must be getting aborted. */ MPASS(toep->flags & TPF_ABORT_SHUTDOWN); CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, toep, toep->flags); return (0); } KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CURVNET_SET(toep->vnet); NET_EPOCH_ENTER(et); INP_WLOCK(inp); tp = intotcpcb(inp); CTR6(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, ddp_flags 0x%x, inp %p", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, toep->ddp.flags, inp); if (toep->flags & TPF_ABORT_SHUTDOWN) goto done; tp->rcv_nxt++; /* FIN */ so = inp->inp_socket; socantrcvmore(so); if (ulp_mode(toep) == ULP_MODE_TCPDDP) { DDP_LOCK(toep); if (__predict_false(toep->ddp.flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) handle_ddp_close(toep, tp, cpl->rcv_nxt); DDP_UNLOCK(toep); } if (ulp_mode(toep) != ULP_MODE_RDMA) { KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, be32toh(cpl->rcv_nxt))); } switch (tp->t_state) { case TCPS_SYN_RECEIVED: tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: tcp_state_change(tp, TCPS_CLOSE_WAIT); break; case TCPS_FIN_WAIT_1: tcp_state_change(tp, TCPS_CLOSING); break; case TCPS_FIN_WAIT_2: restore_so_proto(so, inp->inp_vflag & INP_IPV6); tcp_twstart(tp); INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ NET_EPOCH_EXIT(et); CURVNET_RESTORE(); INP_WLOCK(inp); final_cpl_received(toep); return (0); default: log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", __func__, tid, tp->t_state); } done: INP_WUNLOCK(inp); NET_EPOCH_EXIT(et); CURVNET_RESTORE(); return (0); } /* * Peer has ACK'd our FIN. */ static int do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so = NULL; struct epoch_tracker et; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_CLOSE_CON_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CURVNET_SET(toep->vnet); NET_EPOCH_ENTER(et); INP_WLOCK(inp); tp = intotcpcb(inp); CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags); if (toep->flags & TPF_ABORT_SHUTDOWN) goto done; so = inp->inp_socket; tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */ switch (tp->t_state) { case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */ restore_so_proto(so, inp->inp_vflag & INP_IPV6); tcp_twstart(tp); release: INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ NET_EPOCH_EXIT(et); CURVNET_RESTORE(); INP_WLOCK(inp); final_cpl_received(toep); /* no more CPLs expected */ return (0); case TCPS_LAST_ACK: if (tcp_close(tp)) INP_WUNLOCK(inp); goto release; case TCPS_FIN_WAIT_1: if (so->so_rcv.sb_state & SBS_CANTRCVMORE) soisdisconnected(so); tcp_state_change(tp, TCPS_FIN_WAIT_2); break; default: log(LOG_ERR, "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n", __func__, tid, tcpstates[tp->t_state]); } done: INP_WUNLOCK(inp); NET_EPOCH_EXIT(et); CURVNET_RESTORE(); return (0); } void send_abort_rpl(struct adapter *sc, struct sge_ofld_txq *ofld_txq, int tid, int rst_status) { struct wrqe *wr; struct cpl_abort_rpl *cpl; wr = alloc_wrqe(sizeof(*cpl), &ofld_txq->wrq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } cpl = wrtod(wr); INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid); cpl->cmd = rst_status; t4_wrq_tx(sc, wr); } static int abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason) { switch (abort_reason) { case CPL_ERR_BAD_SYN: case CPL_ERR_CONN_RESET: return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); case CPL_ERR_XMIT_TIMEDOUT: case CPL_ERR_PERSIST_TIMEDOUT: case CPL_ERR_FINWAIT2_TIMEDOUT: case CPL_ERR_KEEPALIVE_TIMEDOUT: return (ETIMEDOUT); default: return (EIO); } } /* * TCP RST from the peer, timeout, or some other such critical error. */ static int do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct sge_ofld_txq *ofld_txq = toep->ofld_txq; struct inpcb *inp; struct tcpcb *tp; struct epoch_tracker et; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_REQ_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (toep->flags & TPF_SYNQE) return (do_abort_req_synqe(iq, rss, m)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); if (negative_advice(cpl->status)) { CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", __func__, cpl->status, tid, toep->flags); return (0); /* Ignore negative advice */ } inp = toep->inp; CURVNET_SET(toep->vnet); NET_EPOCH_ENTER(et); /* for tcp_close */ INP_WLOCK(inp); tp = intotcpcb(inp); CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp->inp_flags, cpl->status); /* * If we'd initiated an abort earlier the reply to it is responsible for * cleaning up resources. Otherwise we tear everything down right here * right now. We owe the T4 a CPL_ABORT_RPL no matter what. */ if (toep->flags & TPF_ABORT_SHUTDOWN) { INP_WUNLOCK(inp); goto done; } toep->flags |= TPF_ABORT_SHUTDOWN; if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) so_error_set(so, abort_status_to_errno(tp, cpl->status)); tp = tcp_close(tp); if (tp == NULL) INP_WLOCK(inp); /* re-acquire */ } final_cpl_received(toep); done: NET_EPOCH_EXIT(et); CURVNET_RESTORE(); send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); return (0); } /* * Reply to the CPL_ABORT_REQ (send_reset) */ static int do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_RPL_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (toep->flags & TPF_SYNQE) return (do_abort_rpl_synqe(iq, rss, m)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d", __func__, tid, toep, inp, cpl->status); KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, ("%s: wasn't expecting abort reply", __func__)); INP_WLOCK(inp); final_cpl_received(toep); return (0); } static int do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rx_data *cpl = mtod(m, const void *); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp; struct socket *so; struct sockbuf *sb; struct epoch_tracker et; int len, rx_credits; uint32_t ddp_placed = 0; if (__predict_false(toep->flags & TPF_SYNQE)) { /* * do_pass_establish must have run before do_rx_data and if this * is still a synqe instead of a toepcb then the connection must * be getting aborted. */ MPASS(toep->flags & TPF_ABORT_SHUTDOWN); CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, toep, toep->flags); m_freem(m); return (0); } KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); /* strip off CPL header */ m_adj(m, sizeof(*cpl)); len = m->m_pkthdr.len; INP_WLOCK(inp); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", __func__, tid, len, inp->inp_flags); INP_WUNLOCK(inp); m_freem(m); return (0); } tp = intotcpcb(inp); if (__predict_false(ulp_mode(toep) == ULP_MODE_TLS && toep->flags & TPF_TLS_RECEIVE)) { /* Received "raw" data on a TLS socket. */ CTR3(KTR_CXGBE, "%s: tid %u, raw TLS data (%d bytes)", __func__, tid, len); do_rx_data_tls(cpl, toep, m); return (0); } if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt; tp->rcv_nxt += len; if (tp->rcv_wnd < len) { KASSERT(ulp_mode(toep) == ULP_MODE_RDMA, ("%s: negative window size", __func__)); } tp->rcv_wnd -= len; tp->t_rcvtime = ticks; if (ulp_mode(toep) == ULP_MODE_TCPDDP) DDP_LOCK(toep); so = inp_inpcbtosocket(inp); sb = &so->so_rcv; SOCKBUF_LOCK(sb); if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", __func__, tid, len); m_freem(m); SOCKBUF_UNLOCK(sb); if (ulp_mode(toep) == ULP_MODE_TCPDDP) DDP_UNLOCK(toep); INP_WUNLOCK(inp); CURVNET_SET(toep->vnet); NET_EPOCH_ENTER(et); INP_WLOCK(inp); tp = tcp_drop(tp, ECONNRESET); if (tp) INP_WUNLOCK(inp); NET_EPOCH_EXIT(et); CURVNET_RESTORE(); return (0); } /* receive buffer autosize */ MPASS(toep->vnet == so->so_vnet); CURVNET_SET(toep->vnet); if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autorcvbuf && sb->sb_hiwat < V_tcp_autorcvbuf_max && len > (sbspace(sb) / 8 * 7)) { unsigned int hiwat = sb->sb_hiwat; unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc, V_tcp_autorcvbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; } if (ulp_mode(toep) == ULP_MODE_TCPDDP) { int changed = !(toep->ddp.flags & DDP_ON) ^ cpl->ddp_off; if (toep->ddp.waiting_count != 0 || toep->ddp.active_count != 0) CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)", __func__, tid, len); if (changed) { if (toep->ddp.flags & DDP_SC_REQ) toep->ddp.flags ^= DDP_ON | DDP_SC_REQ; else { KASSERT(cpl->ddp_off == 1, ("%s: DDP switched on by itself.", __func__)); /* Fell out of DDP mode */ toep->ddp.flags &= ~DDP_ON; CTR1(KTR_CXGBE, "%s: fell out of DDP mode", __func__); insert_ddp_data(toep, ddp_placed); } } if (toep->ddp.flags & DDP_ON) { /* * CPL_RX_DATA with DDP on can only be an indicate. * Start posting queued AIO requests via DDP. The * payload that arrived in this indicate is appended * to the socket buffer as usual. */ handle_ddp_indicate(toep); } } sbappendstream_locked(sb, m, 0); rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0; if (rx_credits > 0 && sbused(sb) + tp->rcv_wnd < sb->sb_lowat) { rx_credits = send_rx_credits(sc, toep, rx_credits); tp->rcv_wnd += rx_credits; tp->rcv_adv += rx_credits; } if (ulp_mode(toep) == ULP_MODE_TCPDDP && toep->ddp.waiting_count > 0 && sbavail(sb) != 0) { CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__, tid); ddp_queue_toep(toep); } sorwakeup_locked(so); SOCKBUF_UNLOCK_ASSERT(sb); if (ulp_mode(toep) == ULP_MODE_TCPDDP) DDP_UNLOCK(toep); INP_WUNLOCK(inp); CURVNET_RESTORE(); return (0); } static int do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp; struct tcpcb *tp; struct socket *so; uint8_t credits = cpl->credits; struct ofld_tx_sdesc *txsd; int plen; #ifdef INVARIANTS unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl))); #endif /* * Very unusual case: we'd sent a flowc + abort_req for a synq entry and * now this comes back carrying the credits for the flowc. */ if (__predict_false(toep->flags & TPF_SYNQE)) { KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, ("%s: credits for a synq entry %p", __func__, toep)); return (0); } inp = toep->inp; KASSERT(opcode == CPL_FW4_ACK, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); INP_WLOCK(inp); if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) { INP_WUNLOCK(inp); return (0); } KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0, ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); tp = intotcpcb(inp); if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { tcp_seq snd_una = be32toh(cpl->snd_una); #ifdef INVARIANTS if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { log(LOG_ERR, "%s: unexpected seq# %x for TID %u, snd_una %x\n", __func__, snd_una, toep->tid, tp->snd_una); } #endif if (tp->snd_una != snd_una) { tp->snd_una = snd_una; tp->ts_recent_age = tcp_ts_getticks(); } } #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: tid %d credits %u", __func__, tid, credits); #endif so = inp->inp_socket; txsd = &toep->txsd[toep->txsd_cidx]; plen = 0; while (credits) { KASSERT(credits >= txsd->tx_credits, ("%s: too many (or partial) credits", __func__)); credits -= txsd->tx_credits; toep->tx_credits += txsd->tx_credits; plen += txsd->plen; - if (txsd->iv_buffer) { - free(txsd->iv_buffer, M_CXGBE); - txsd->iv_buffer = NULL; - } txsd++; toep->txsd_avail++; KASSERT(toep->txsd_avail <= toep->txsd_total, ("%s: txsd avail > total", __func__)); if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) { txsd = &toep->txsd[0]; toep->txsd_cidx = 0; } } if (toep->tx_credits == toep->tx_total) { toep->tx_nocompl = 0; toep->plen_nocompl = 0; } if (toep->flags & TPF_TX_SUSPENDED && toep->tx_credits >= toep->tx_total / 4) { #ifdef VERBOSE_TRACES CTR2(KTR_CXGBE, "%s: tid %d calling t4_push_frames", __func__, tid); #endif toep->flags &= ~TPF_TX_SUSPENDED; CURVNET_SET(toep->vnet); t4_push_data(sc, toep, plen); CURVNET_RESTORE(); } else if (plen > 0) { struct sockbuf *sb = &so->so_snd; int sbu; SOCKBUF_LOCK(sb); sbu = sbused(sb); if (ulp_mode(toep) == ULP_MODE_ISCSI) { if (__predict_false(sbu > 0)) { /* * The data transmitted before the * tid's ULP mode changed to ISCSI is * still in so_snd. Incoming credits * should account for so_snd first. */ sbdrop_locked(sb, min(sbu, plen)); plen -= min(sbu, plen); } sowwakeup_locked(so); /* unlocks so_snd */ rqdrop_locked(&toep->ulp_pdu_reclaimq, plen); } else { #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: tid %d dropped %d bytes", __func__, tid, plen); #endif sbdrop_locked(sb, plen); - if (tls_tx_key(toep) && - toep->tls.mode == TLS_MODE_TLSOM) { - struct tls_ofld_info *tls_ofld = &toep->tls; - - MPASS(tls_ofld->sb_off >= plen); - tls_ofld->sb_off -= plen; - } if (!TAILQ_EMPTY(&toep->aiotx_jobq)) t4_aiotx_queue_toep(so, toep); sowwakeup_locked(so); /* unlocks so_snd */ } SOCKBUF_UNLOCK_ASSERT(sb); } INP_WUNLOCK(inp); return (0); } void t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie) { struct wrqe *wr; struct cpl_set_tcb_field *req; struct ofld_tx_sdesc *txsd; MPASS((cookie & ~M_COOKIE) == 0); if (reply) { MPASS(cookie != CPL_COOKIE_RESERVED); } wr = alloc_wrqe(sizeof(*req), wrq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); req->reply_ctrl = htobe16(V_QUEUENO(toep->ofld_rxq->iq.abs_id)); if (reply == 0) req->reply_ctrl |= htobe16(F_NO_REPLY); req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie)); req->mask = htobe64(mask); req->val = htobe64(val); if (wrq->eq.type == EQ_OFLD) { txsd = &toep->txsd[toep->txsd_pidx]; txsd->tx_credits = howmany(sizeof(*req), 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; } t4_wrq_tx(sc, wr); } void t4_init_cpl_io_handlers(void) { t4_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); t4_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); t4_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl, CPL_COOKIE_TOM); t4_register_cpl_handler(CPL_RX_DATA, do_rx_data); t4_register_shared_cpl_handler(CPL_FW4_ACK, do_fw4_ack, CPL_COOKIE_TOM); } void t4_uninit_cpl_io_handlers(void) { t4_register_cpl_handler(CPL_PEER_CLOSE, NULL); t4_register_cpl_handler(CPL_CLOSE_CON_RPL, NULL); t4_register_cpl_handler(CPL_ABORT_REQ_RSS, NULL); t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS, NULL, CPL_COOKIE_TOM); t4_register_cpl_handler(CPL_RX_DATA, NULL); t4_register_shared_cpl_handler(CPL_FW4_ACK, NULL, CPL_COOKIE_TOM); } /* * Use the 'backend1' field in AIO jobs to hold an error that should * be reported when the job is completed, the 'backend3' field to * store the amount of data sent by the AIO job so far, and the * 'backend4' field to hold a reference count on the job. * * Each unmapped mbuf holds a reference on the job as does the queue * so long as the job is queued. */ #define aio_error backend1 #define aio_sent backend3 #define aio_refs backend4 #define jobtotid(job) \ (((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid) static void aiotx_free_job(struct kaiocb *job) { long status; int error; if (refcount_release(&job->aio_refs) == 0) return; error = (intptr_t)job->aio_error; status = job->aio_sent; #ifdef VERBOSE_TRACES CTR5(KTR_CXGBE, "%s: tid %d completed %p len %ld, error %d", __func__, jobtotid(job), job, status, error); #endif if (error != 0 && status != 0) error = 0; if (error == ECANCELED) aio_cancel(job); else if (error) aio_complete(job, -1, error); else { job->msgsnd = 1; aio_complete(job, status, 0); } } static void aiotx_free_pgs(struct mbuf *m) { struct kaiocb *job; vm_page_t pg; M_ASSERTEXTPG(m); job = m->m_ext.ext_arg1; #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__, m->m_len, jobtotid(job)); #endif for (int i = 0; i < m->m_epg_npgs; i++) { pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]); vm_page_unwire(pg, PQ_ACTIVE); } aiotx_free_job(job); } /* * Allocate a chain of unmapped mbufs describing the next 'len' bytes * of an AIO job. */ static struct mbuf * alloc_aiotx_mbuf(struct kaiocb *job, int len) { struct vmspace *vm; vm_page_t pgs[MBUF_PEXT_MAX_PGS]; struct mbuf *m, *top, *last; vm_map_t map; vm_offset_t start; int i, mlen, npages, pgoff; KASSERT(job->aio_sent + len <= job->uaiocb.aio_nbytes, ("%s(%p, %d): request to send beyond end of buffer", __func__, job, len)); /* * The AIO subsystem will cancel and drain all requests before * permitting a process to exit or exec, so p_vmspace should * be stable here. */ vm = job->userproc->p_vmspace; map = &vm->vm_map; start = (uintptr_t)job->uaiocb.aio_buf + job->aio_sent; pgoff = start & PAGE_MASK; top = NULL; last = NULL; while (len > 0) { mlen = imin(len, MBUF_PEXT_MAX_PGS * PAGE_SIZE - pgoff); KASSERT(mlen == len || ((start + mlen) & PAGE_MASK) == 0, ("%s: next start (%#jx + %#x) is not page aligned", __func__, (uintmax_t)start, mlen)); npages = vm_fault_quick_hold_pages(map, start, mlen, VM_PROT_WRITE, pgs, nitems(pgs)); if (npages < 0) break; m = mb_alloc_ext_pgs(M_WAITOK, aiotx_free_pgs); if (m == NULL) { vm_page_unhold_pages(pgs, npages); break; } m->m_epg_1st_off = pgoff; m->m_epg_npgs = npages; if (npages == 1) { KASSERT(mlen + pgoff <= PAGE_SIZE, ("%s: single page is too large (off %d len %d)", __func__, pgoff, mlen)); m->m_epg_last_len = mlen; } else { m->m_epg_last_len = mlen - (PAGE_SIZE - pgoff) - (npages - 2) * PAGE_SIZE; } for (i = 0; i < npages; i++) m->m_epg_pa[i] = VM_PAGE_TO_PHYS(pgs[i]); m->m_len = mlen; m->m_ext.ext_size = npages * PAGE_SIZE; m->m_ext.ext_arg1 = job; refcount_acquire(&job->aio_refs); #ifdef VERBOSE_TRACES CTR5(KTR_CXGBE, "%s: tid %d, new mbuf %p for job %p, npages %d", __func__, jobtotid(job), m, job, npages); #endif if (top == NULL) top = m; else last->m_next = m; last = m; len -= mlen; start += mlen; pgoff = 0; } return (top); } static void t4_aiotx_process_job(struct toepcb *toep, struct socket *so, struct kaiocb *job) { struct sockbuf *sb; struct file *fp; struct inpcb *inp; struct tcpcb *tp; struct mbuf *m; int error, len; bool moretocome, sendmore; sb = &so->so_snd; SOCKBUF_UNLOCK(sb); fp = job->fd_file; m = NULL; #ifdef MAC error = mac_socket_check_send(fp->f_cred, so); if (error != 0) goto out; #endif /* Inline sosend_generic(). */ error = sblock(sb, SBL_WAIT); MPASS(error == 0); sendanother: SOCKBUF_LOCK(sb); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCKBUF_UNLOCK(sb); sbunlock(sb); if ((so->so_options & SO_NOSIGPIPE) == 0) { PROC_LOCK(job->userproc); kern_psignal(job->userproc, SIGPIPE); PROC_UNLOCK(job->userproc); } error = EPIPE; goto out; } if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(sb); sbunlock(sb); goto out; } if ((so->so_state & SS_ISCONNECTED) == 0) { SOCKBUF_UNLOCK(sb); sbunlock(sb); error = ENOTCONN; goto out; } if (sbspace(sb) < sb->sb_lowat) { MPASS(job->aio_sent == 0 || !(so->so_state & SS_NBIO)); /* * Don't block if there is too little room in the socket * buffer. Instead, requeue the request. */ if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { SOCKBUF_UNLOCK(sb); sbunlock(sb); error = ECANCELED; goto out; } TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); SOCKBUF_UNLOCK(sb); sbunlock(sb); goto out; } /* * Write as much data as the socket permits, but no more than a * a single sndbuf at a time. */ len = sbspace(sb); if (len > job->uaiocb.aio_nbytes - job->aio_sent) { len = job->uaiocb.aio_nbytes - job->aio_sent; moretocome = false; } else moretocome = true; if (len > toep->params.sndbuf) { len = toep->params.sndbuf; sendmore = true; } else sendmore = false; if (!TAILQ_EMPTY(&toep->aiotx_jobq)) moretocome = true; SOCKBUF_UNLOCK(sb); MPASS(len != 0); m = alloc_aiotx_mbuf(job, len); if (m == NULL) { sbunlock(sb); error = EFAULT; goto out; } /* Inlined tcp_usr_send(). */ inp = toep->inp; INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { INP_WUNLOCK(inp); sbunlock(sb); error = ECONNRESET; goto out; } job->aio_sent += m_length(m, NULL); sbappendstream(sb, m, 0); m = NULL; if (!(inp->inp_flags & INP_DROPPED)) { tp = intotcpcb(inp); if (moretocome) tp->t_flags |= TF_MORETOCOME; error = tp->t_fb->tfb_tcp_output(tp); if (moretocome) tp->t_flags &= ~TF_MORETOCOME; } INP_WUNLOCK(inp); if (sendmore) goto sendanother; sbunlock(sb); if (error) goto out; /* * If this is a blocking socket and the request has not been * fully completed, requeue it until the socket is ready * again. */ if (job->aio_sent < job->uaiocb.aio_nbytes && !(so->so_state & SS_NBIO)) { SOCKBUF_LOCK(sb); if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { SOCKBUF_UNLOCK(sb); error = ECANCELED; goto out; } TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); return; } /* * If the request will not be requeued, drop the queue's * reference to the job. Any mbufs in flight should still * hold a reference, but this drops the reference that the * queue owns while it is waiting to queue mbufs to the * socket. */ aiotx_free_job(job); out: if (error) { job->aio_error = (void *)(intptr_t)error; aiotx_free_job(job); } m_freem(m); SOCKBUF_LOCK(sb); } static void t4_aiotx_task(void *context, int pending) { struct toepcb *toep = context; struct socket *so; struct kaiocb *job; so = toep->aiotx_so; CURVNET_SET(toep->vnet); SOCKBUF_LOCK(&so->so_snd); while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) { job = TAILQ_FIRST(&toep->aiotx_jobq); TAILQ_REMOVE(&toep->aiotx_jobq, job, list); if (!aio_clear_cancel_function(job)) continue; t4_aiotx_process_job(toep, so, job); } toep->aiotx_so = NULL; SOCKBUF_UNLOCK(&so->so_snd); CURVNET_RESTORE(); free_toepcb(toep); SOCK_LOCK(so); sorele(so); } static void t4_aiotx_queue_toep(struct socket *so, struct toepcb *toep) { SOCKBUF_LOCK_ASSERT(&toep->inp->inp_socket->so_snd); #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: queueing aiotx task for tid %d, active = %s", __func__, toep->tid, toep->aiotx_so != NULL ? "true" : "false"); #endif if (toep->aiotx_so != NULL) return; soref(so); toep->aiotx_so = so; hold_toepcb(toep); soaio_enqueue(&toep->aiotx_task); } static void t4_aiotx_cancel(struct kaiocb *job) { struct socket *so; struct sockbuf *sb; struct tcpcb *tp; struct toepcb *toep; so = job->fd_file->f_data; tp = so_sototcpcb(so); toep = tp->t_toe; MPASS(job->uaiocb.aio_lio_opcode == LIO_WRITE); sb = &so->so_snd; SOCKBUF_LOCK(sb); if (!aio_cancel_cleared(job)) TAILQ_REMOVE(&toep->aiotx_jobq, job, list); SOCKBUF_UNLOCK(sb); job->aio_error = (void *)(intptr_t)ECANCELED; aiotx_free_job(job); } int t4_aio_queue_aiotx(struct socket *so, struct kaiocb *job) { struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct adapter *sc = td_adapter(toep->td); /* This only handles writes. */ if (job->uaiocb.aio_lio_opcode != LIO_WRITE) return (EOPNOTSUPP); if (!sc->tt.tx_zcopy) return (EOPNOTSUPP); if (tls_tx_key(toep)) return (EOPNOTSUPP); SOCKBUF_LOCK(&so->so_snd); #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: queueing %p for tid %u", __func__, job, toep->tid); #endif if (!aio_set_cancel_function(job, t4_aiotx_cancel)) panic("new job was cancelled"); refcount_init(&job->aio_refs, 1); TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list); if (sowriteable(so)) t4_aiotx_queue_toep(so, toep); SOCKBUF_UNLOCK(&so->so_snd); return (0); } void aiotx_init_toep(struct toepcb *toep) { TAILQ_INIT(&toep->aiotx_jobq); TASK_INIT(&toep->aiotx_task, 0, t4_aiotx_task, toep); } #endif diff --git a/sys/dev/cxgbe/tom/t4_tls.c b/sys/dev/cxgbe/tom/t4_tls.c index bbd905d8acc3..be47dbac7ae5 100644 --- a/sys/dev/cxgbe/tom/t4_tls.c +++ b/sys/dev/cxgbe/tom/t4_tls.c @@ -1,2417 +1,1542 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017-2018 Chelsio Communications, Inc. * All rights reserved. * Written by: John Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_inet.h" #include "opt_kern_tls.h" #include __FBSDID("$FreeBSD$"); +#ifdef KERN_TLS #include #include -#ifdef KERN_TLS #include -#endif #include #include #include #include #include #include #include #include -#ifdef KERN_TLS #include #include -#endif #ifdef TCP_OFFLOAD #include "common/common.h" #include "common/t4_tcb.h" #include "crypto/t4_crypto.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" /* * The TCP sequence number of a CPL_TLS_DATA mbuf is saved here while * the mbuf is in the ulp_pdu_reclaimq. */ #define tls_tcp_seq PH_loc.thirtytwo[0] static void t4_set_tls_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val) { struct adapter *sc = td_adapter(toep->td); t4_set_tcb_field(sc, &toep->ofld_txq->wrq, toep, word, mask, val, 0, 0); } /* TLS and DTLS common routines */ bool can_tls_offload(struct adapter *sc) { return (sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS); } int tls_tx_key(struct toepcb *toep) { struct tls_ofld_info *tls_ofld = &toep->tls; return (tls_ofld->tx_key_addr >= 0); } -int -tls_rx_key(struct toepcb *toep) -{ - struct tls_ofld_info *tls_ofld = &toep->tls; - - return (tls_ofld->rx_key_addr >= 0); -} - -static int -key_size(struct toepcb *toep) -{ - struct tls_ofld_info *tls_ofld = &toep->tls; - - return ((tls_ofld->key_location == TLS_SFO_WR_CONTEXTLOC_IMMEDIATE) ? - tls_ofld->k_ctx.tx_key_info_size : KEY_IN_DDR_SIZE); -} - /* Set TLS Key-Id in TCB */ static void t4_set_tls_keyid(struct toepcb *toep, unsigned int key_id) { t4_set_tls_tcb_field(toep, W_TCB_RX_TLS_KEY_TAG, V_TCB_RX_TLS_KEY_TAG(M_TCB_RX_TLS_BUF_TAG), V_TCB_RX_TLS_KEY_TAG(key_id)); } /* Clear TF_RX_QUIESCE to re-enable receive. */ static void t4_clear_rx_quiesce(struct toepcb *toep) { t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1), 0); } static void tls_clr_ofld_mode(struct toepcb *toep) { tls_stop_handshake_timer(toep); KASSERT(toep->tls.rx_key_addr == -1, ("%s: tid %d has RX key", __func__, toep->tid)); /* Switch to plain TOE mode. */ t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW, V_TCB_ULP_RAW(V_TF_TLS_ENABLE(1)), V_TCB_ULP_RAW(V_TF_TLS_ENABLE(0))); t4_set_tls_tcb_field(toep, W_TCB_ULP_TYPE, V_TCB_ULP_TYPE(M_TCB_ULP_TYPE), V_TCB_ULP_TYPE(ULP_MODE_NONE)); t4_clear_rx_quiesce(toep); toep->flags &= ~(TPF_FORCE_CREDITS | TPF_TLS_ESTABLISHED); toep->params.ulp_mode = ULP_MODE_NONE; } -static void -tls_clr_quiesce(struct toepcb *toep) -{ - - tls_stop_handshake_timer(toep); - t4_clear_rx_quiesce(toep); -} - -/* - * Calculate the TLS data expansion size - */ -static int -tls_expansion_size(struct toepcb *toep, int data_len, int full_pdus_only, - unsigned short *pdus_per_ulp) -{ - struct tls_ofld_info *tls_ofld = &toep->tls; - struct tls_scmd *scmd = &tls_ofld->scmd0; - int expn_size = 0, frag_count = 0, pad_per_pdu = 0, - pad_last_pdu = 0, last_frag_size = 0, max_frag_size = 0; - int exp_per_pdu = 0; - int hdr_len = TLS_HEADER_LENGTH; - - do { - max_frag_size = tls_ofld->k_ctx.frag_size; - if (G_SCMD_CIPH_MODE(scmd->seqno_numivs) == - SCMD_CIPH_MODE_AES_GCM) { - frag_count = (data_len / max_frag_size); - exp_per_pdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE + - hdr_len; - expn_size = frag_count * exp_per_pdu; - if (full_pdus_only) { - *pdus_per_ulp = data_len / (exp_per_pdu + - max_frag_size); - if (*pdus_per_ulp > 32) - *pdus_per_ulp = 32; - else if(!*pdus_per_ulp) - *pdus_per_ulp = 1; - expn_size = (*pdus_per_ulp) * exp_per_pdu; - break; - } - if ((last_frag_size = data_len % max_frag_size) > 0) { - frag_count += 1; - expn_size += exp_per_pdu; - } - break; - } else if (G_SCMD_CIPH_MODE(scmd->seqno_numivs) != - SCMD_CIPH_MODE_NOP) { - /* Calculate the number of fragments we can make */ - frag_count = (data_len / max_frag_size); - if (frag_count > 0) { - pad_per_pdu = (((howmany((max_frag_size + - tls_ofld->mac_length), - CIPHER_BLOCK_SIZE)) * - CIPHER_BLOCK_SIZE) - - (max_frag_size + - tls_ofld->mac_length)); - if (!pad_per_pdu) - pad_per_pdu = CIPHER_BLOCK_SIZE; - exp_per_pdu = pad_per_pdu + - tls_ofld->mac_length + - hdr_len + CIPHER_BLOCK_SIZE; - expn_size = frag_count * exp_per_pdu; - } - if (full_pdus_only) { - *pdus_per_ulp = data_len / (exp_per_pdu + - max_frag_size); - if (*pdus_per_ulp > 32) - *pdus_per_ulp = 32; - else if (!*pdus_per_ulp) - *pdus_per_ulp = 1; - expn_size = (*pdus_per_ulp) * exp_per_pdu; - break; - } - /* Consider the last fragment */ - if ((last_frag_size = data_len % max_frag_size) > 0) { - pad_last_pdu = (((howmany((last_frag_size + - tls_ofld->mac_length), - CIPHER_BLOCK_SIZE)) * - CIPHER_BLOCK_SIZE) - - (last_frag_size + - tls_ofld->mac_length)); - if (!pad_last_pdu) - pad_last_pdu = CIPHER_BLOCK_SIZE; - expn_size += (pad_last_pdu + - tls_ofld->mac_length + hdr_len + - CIPHER_BLOCK_SIZE); - } - } - } while (0); - - return (expn_size); -} - -/* Copy Key to WR */ -static void -tls_copy_tx_key(struct toepcb *toep, void *dst) -{ - struct tls_ofld_info *tls_ofld = &toep->tls; - struct ulptx_sc_memrd *sc_memrd; - struct ulptx_idata *sc; - - if (tls_ofld->k_ctx.tx_key_info_size <= 0) - return; - - if (tls_ofld->key_location == TLS_SFO_WR_CONTEXTLOC_DDR) { - sc = dst; - sc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); - sc->len = htobe32(0); - sc_memrd = (struct ulptx_sc_memrd *)(sc + 1); - sc_memrd->cmd_to_len = htobe32(V_ULPTX_CMD(ULP_TX_SC_MEMRD) | - V_ULP_TX_SC_MORE(1) | - V_ULPTX_LEN16(tls_ofld->k_ctx.tx_key_info_size >> 4)); - sc_memrd->addr = htobe32(tls_ofld->tx_key_addr >> 5); - } else if (tls_ofld->key_location == TLS_SFO_WR_CONTEXTLOC_IMMEDIATE) { - memcpy(dst, &tls_ofld->k_ctx.tx, - tls_ofld->k_ctx.tx_key_info_size); - } -} - /* TLS/DTLS content type for CPL SFO */ static inline unsigned char tls_content_type(unsigned char content_type) { /* * XXX: Shouldn't this map CONTENT_TYPE_APP_DATA to DATA and * default to "CUSTOM" for all other types including * heartbeat? */ switch (content_type) { case CONTENT_TYPE_CCS: return CPL_TX_TLS_SFO_TYPE_CCS; case CONTENT_TYPE_ALERT: return CPL_TX_TLS_SFO_TYPE_ALERT; case CONTENT_TYPE_HANDSHAKE: return CPL_TX_TLS_SFO_TYPE_HANDSHAKE; case CONTENT_TYPE_HEARTBEAT: return CPL_TX_TLS_SFO_TYPE_HEARTBEAT; } return CPL_TX_TLS_SFO_TYPE_DATA; } -static unsigned char -get_cipher_key_size(unsigned int ck_size) +static int +tls_key_info_size(struct ktls_session *tls) +{ + u_int key_info_size, mac_key_size; + + key_info_size = sizeof(struct tx_keyctx_hdr) + + tls->params.cipher_key_len; + if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16) { + key_info_size += GMAC_BLOCK_LEN; + } else { + switch (tls->params.auth_algorithm) { + case CRYPTO_SHA1_HMAC: + mac_key_size = SHA1_HASH_LEN; + break; + case CRYPTO_SHA2_256_HMAC: + mac_key_size = SHA2_256_HASH_LEN; + break; + case CRYPTO_SHA2_384_HMAC: + mac_key_size = SHA2_512_HASH_LEN; + break; + default: + __assert_unreachable(); + } + key_info_size += roundup2(mac_key_size, 16) * 2; + } + return (key_info_size); +} + +static int +tls_proto_ver(struct ktls_session *tls) +{ + if (tls->params.tls_vminor == TLS_MINOR_VER_ONE) + return (SCMD_PROTO_VERSION_TLS_1_1); + else + return (SCMD_PROTO_VERSION_TLS_1_2); +} + +static int +tls_cipher_mode(struct ktls_session *tls) { - switch (ck_size) { - case AES_NOP: /* NOP */ - return 15; - case AES_128: /* AES128 */ - return CH_CK_SIZE_128; - case AES_192: /* AES192 */ - return CH_CK_SIZE_192; - case AES_256: /* AES256 */ - return CH_CK_SIZE_256; + switch (tls->params.cipher_algorithm) { + case CRYPTO_AES_CBC: + return (SCMD_CIPH_MODE_AES_CBC); + case CRYPTO_AES_NIST_GCM_16: + return (SCMD_CIPH_MODE_AES_GCM); default: - return CH_CK_SIZE_256; + return (SCMD_CIPH_MODE_NOP); } } -static unsigned char -get_mac_key_size(unsigned int mk_size) +static int +tls_auth_mode(struct ktls_session *tls) { - switch (mk_size) { - case SHA_NOP: /* NOP */ - return CH_MK_SIZE_128; - case SHA_GHASH: /* GHASH */ - case SHA_512: /* SHA512 */ - return CH_MK_SIZE_512; - case SHA_224: /* SHA2-224 */ - return CH_MK_SIZE_192; - case SHA_256: /* SHA2-256*/ - return CH_MK_SIZE_256; - case SHA_384: /* SHA384 */ - return CH_MK_SIZE_512; - case SHA1: /* SHA1 */ + switch (tls->params.cipher_algorithm) { + case CRYPTO_AES_CBC: + switch (tls->params.auth_algorithm) { + case CRYPTO_SHA1_HMAC: + return (SCMD_AUTH_MODE_SHA1); + case CRYPTO_SHA2_256_HMAC: + return (SCMD_AUTH_MODE_SHA256); + case CRYPTO_SHA2_384_HMAC: + return (SCMD_AUTH_MODE_SHA512_384); + default: + return (SCMD_AUTH_MODE_NOP); + } + case CRYPTO_AES_NIST_GCM_16: + return (SCMD_AUTH_MODE_GHASH); + default: + return (SCMD_AUTH_MODE_NOP); + } +} + +static int +tls_hmac_ctrl(struct ktls_session *tls) +{ + switch (tls->params.cipher_algorithm) { + case CRYPTO_AES_CBC: + return (SCMD_HMAC_CTRL_NO_TRUNC); + case CRYPTO_AES_NIST_GCM_16: + return (SCMD_HMAC_CTRL_NOP); default: - return CH_MK_SIZE_160; + return (SCMD_HMAC_CTRL_NOP); } } -static unsigned int -get_proto_ver(int proto_ver) +static int +tls_cipher_key_size(struct ktls_session *tls) { - switch (proto_ver) { - case TLS1_2_VERSION: - return TLS_1_2_VERSION; - case TLS1_1_VERSION: - return TLS_1_1_VERSION; - case DTLS1_2_VERSION: - return DTLS_1_2_VERSION; + switch (tls->params.cipher_key_len) { + case 128 / 8: + return (CHCR_KEYCTX_CIPHER_KEY_SIZE_128); + case 192 / 8: + return (CHCR_KEYCTX_CIPHER_KEY_SIZE_192); + case 256 / 8: + return (CHCR_KEYCTX_CIPHER_KEY_SIZE_256); default: - return TLS_VERSION_MAX; + __assert_unreachable(); + } +} + +static int +tls_mac_key_size(struct ktls_session *tls) +{ + if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16) + /* + * XXX: This used to use 128 (SHA_NOP) for TOE, + * but NIC TLS has always used 512. + */ + return (CHCR_KEYCTX_MAC_KEY_SIZE_512); + else { + switch (tls->params.auth_algorithm) { + case CRYPTO_SHA1_HMAC: + return (CHCR_KEYCTX_MAC_KEY_SIZE_160); + case CRYPTO_SHA2_256_HMAC: + return (CHCR_KEYCTX_MAC_KEY_SIZE_256); + case CRYPTO_SHA2_384_HMAC: + return (CHCR_KEYCTX_MAC_KEY_SIZE_512); + default: + __assert_unreachable(); + } + } +} + +static void +prepare_tls_keys(char *key, char *salt, struct ktls_session *tls, + int direction) +{ + struct auth_hash *axf; + u_int mac_key_size; + char *hash; + + if (direction == KTLS_RX && + tls->params.cipher_algorithm == CRYPTO_AES_CBC) + t4_aes_getdeckey(key, tls->params.cipher_key, + tls->params.cipher_key_len * 8); + else + memcpy(key, tls->params.cipher_key, + tls->params.cipher_key_len); + hash = key + tls->params.cipher_key_len; + if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16) { + memcpy(salt, tls->params.iv, SALT_SIZE); + t4_init_gmac_hash(tls->params.cipher_key, + tls->params.cipher_key_len, hash); + } else { + switch (tls->params.auth_algorithm) { + case CRYPTO_SHA1_HMAC: + axf = &auth_hash_hmac_sha1; + mac_key_size = SHA1_HASH_LEN; + break; + case CRYPTO_SHA2_256_HMAC: + axf = &auth_hash_hmac_sha2_256; + mac_key_size = SHA2_256_HASH_LEN; + break; + case CRYPTO_SHA2_384_HMAC: + axf = &auth_hash_hmac_sha2_384; + mac_key_size = SHA2_512_HASH_LEN; + break; + default: + __assert_unreachable(); + } + t4_init_hmac_digest(axf, mac_key_size, tls->params.auth_key, + tls->params.auth_key_len, hash); } } +/* Rx key */ static void -tls_rxkey_flit1(struct tls_keyctx *kwr, struct tls_key_context *kctx) +prepare_rxkey_wr(struct tls_keyctx *kwr, struct ktls_session *tls) { - if (kctx->state.enc_mode == CH_EVP_CIPH_GCM_MODE) { + kwr->u.rxhdr.flitcnt_hmacctrl = + ((tls_key_info_size(tls) / 16) << 3) | tls_hmac_ctrl(tls); + + kwr->u.rxhdr.protover_ciphmode = + V_TLS_KEYCTX_TX_WR_PROTOVER(tls_proto_ver(tls)) | + V_TLS_KEYCTX_TX_WR_CIPHMODE(tls_cipher_mode(tls)); + + kwr->u.rxhdr.authmode_to_rxvalid = + V_TLS_KEYCTX_TX_WR_AUTHMODE(tls_auth_mode(tls)) | + V_TLS_KEYCTX_TX_WR_SEQNUMCTRL(3) | + V_TLS_KEYCTX_TX_WR_RXVALID(1); + + kwr->u.rxhdr.ivpresent_to_rxmk_size = + V_TLS_KEYCTX_TX_WR_IVPRESENT(0) | + V_TLS_KEYCTX_TX_WR_RXCK_SIZE(tls_cipher_key_size(tls)) | + V_TLS_KEYCTX_TX_WR_RXMK_SIZE(tls_mac_key_size(tls)); + + if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16) { kwr->u.rxhdr.ivinsert_to_authinsrt = htobe64(V_TLS_KEYCTX_TX_WR_IVINSERT(6ULL) | V_TLS_KEYCTX_TX_WR_AADSTRTOFST(1ULL) | V_TLS_KEYCTX_TX_WR_AADSTOPOFST(5ULL) | V_TLS_KEYCTX_TX_WR_AUTHSRTOFST(14ULL) | V_TLS_KEYCTX_TX_WR_AUTHSTOPOFST(16ULL) | V_TLS_KEYCTX_TX_WR_CIPHERSRTOFST(14ULL) | V_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST(0ULL) | V_TLS_KEYCTX_TX_WR_AUTHINSRT(16ULL)); - kwr->u.rxhdr.ivpresent_to_rxmk_size &= - ~(V_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT(1)); - kwr->u.rxhdr.authmode_to_rxvalid &= - ~(V_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL(1)); } else { + kwr->u.rxhdr.authmode_to_rxvalid |= + V_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL(1); + kwr->u.rxhdr.ivpresent_to_rxmk_size |= + V_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT(1); kwr->u.rxhdr.ivinsert_to_authinsrt = htobe64(V_TLS_KEYCTX_TX_WR_IVINSERT(6ULL) | V_TLS_KEYCTX_TX_WR_AADSTRTOFST(1ULL) | V_TLS_KEYCTX_TX_WR_AADSTOPOFST(5ULL) | V_TLS_KEYCTX_TX_WR_AUTHSRTOFST(22ULL) | V_TLS_KEYCTX_TX_WR_AUTHSTOPOFST(0ULL) | V_TLS_KEYCTX_TX_WR_CIPHERSRTOFST(22ULL) | V_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST(0ULL) | V_TLS_KEYCTX_TX_WR_AUTHINSRT(0ULL)); } -} - -/* Rx key */ -static void -prepare_rxkey_wr(struct tls_keyctx *kwr, struct tls_key_context *kctx) -{ - unsigned int ck_size = kctx->cipher_secret_size; - unsigned int mk_size = kctx->mac_secret_size; - int proto_ver = kctx->proto_ver; - - kwr->u.rxhdr.flitcnt_hmacctrl = - ((kctx->rx_key_info_size >> 4) << 3) | kctx->hmac_ctrl; - kwr->u.rxhdr.protover_ciphmode = - V_TLS_KEYCTX_TX_WR_PROTOVER(get_proto_ver(proto_ver)) | - V_TLS_KEYCTX_TX_WR_CIPHMODE(kctx->state.enc_mode); - - kwr->u.rxhdr.authmode_to_rxvalid = - V_TLS_KEYCTX_TX_WR_AUTHMODE(kctx->state.auth_mode) | - V_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL(1) | - V_TLS_KEYCTX_TX_WR_SEQNUMCTRL(3) | - V_TLS_KEYCTX_TX_WR_RXVALID(1); - - kwr->u.rxhdr.ivpresent_to_rxmk_size = - V_TLS_KEYCTX_TX_WR_IVPRESENT(0) | - V_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT(1) | - V_TLS_KEYCTX_TX_WR_RXCK_SIZE(get_cipher_key_size(ck_size)) | - V_TLS_KEYCTX_TX_WR_RXMK_SIZE(get_mac_key_size(mk_size)); - - tls_rxkey_flit1(kwr, kctx); - - /* No key reversal for GCM */ - if (kctx->state.enc_mode != CH_EVP_CIPH_GCM_MODE) { - t4_aes_getdeckey(kwr->keys.edkey, kctx->rx.key, - (kctx->cipher_secret_size << 3)); - memcpy(kwr->keys.edkey + kctx->cipher_secret_size, - kctx->rx.key + kctx->cipher_secret_size, - (IPAD_SIZE + OPAD_SIZE)); - } else { - memcpy(kwr->keys.edkey, kctx->rx.key, - (kctx->rx_key_info_size - SALT_SIZE)); - memcpy(kwr->u.rxhdr.rxsalt, kctx->rx.salt, SALT_SIZE); - } + prepare_tls_keys(kwr->keys.edkey, kwr->u.rxhdr.rxsalt, tls, KTLS_RX); } /* Tx key */ static void -prepare_txkey_wr(struct tls_keyctx *kwr, struct tls_key_context *kctx) +prepare_txkey_wr(struct tls_keyctx *kwr, struct ktls_session *tls) { - unsigned int ck_size = kctx->cipher_secret_size; - unsigned int mk_size = kctx->mac_secret_size; - kwr->u.txhdr.ctxlen = - (kctx->tx_key_info_size >> 4); + kwr->u.txhdr.ctxlen = tls_key_info_size(tls) / 16; kwr->u.txhdr.dualck_to_txvalid = - V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(1) | V_TLS_KEYCTX_TX_WR_SALT_PRESENT(1) | - V_TLS_KEYCTX_TX_WR_TXCK_SIZE(get_cipher_key_size(ck_size)) | - V_TLS_KEYCTX_TX_WR_TXMK_SIZE(get_mac_key_size(mk_size)) | + V_TLS_KEYCTX_TX_WR_TXCK_SIZE(tls_cipher_key_size(tls)) | + V_TLS_KEYCTX_TX_WR_TXMK_SIZE(tls_mac_key_size(tls)) | V_TLS_KEYCTX_TX_WR_TXVALID(1); - - memcpy(kwr->keys.edkey, kctx->tx.key, HDR_KCTX_SIZE); - if (kctx->state.enc_mode == CH_EVP_CIPH_GCM_MODE) { - memcpy(kwr->u.txhdr.txsalt, kctx->tx.salt, SALT_SIZE); - kwr->u.txhdr.dualck_to_txvalid &= - ~(V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(1)); - } + if (tls->params.cipher_algorithm == CRYPTO_AES_CBC) + kwr->u.txhdr.dualck_to_txvalid |= + V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(1); kwr->u.txhdr.dualck_to_txvalid = htons(kwr->u.txhdr.dualck_to_txvalid); + + prepare_tls_keys(kwr->keys.edkey, kwr->u.txhdr.txsalt, tls, KTLS_TX); } /* TLS Key memory management */ static int get_new_keyid(struct toepcb *toep) { struct adapter *sc = td_adapter(toep->td); vmem_addr_t addr; if (vmem_alloc(sc->key_map, TLS_KEY_CONTEXT_SZ, M_NOWAIT | M_FIRSTFIT, &addr) != 0) return (-1); return (addr); } static void free_keyid(struct toepcb *toep, int keyid) { struct adapter *sc = td_adapter(toep->td); vmem_free(sc->key_map, keyid, TLS_KEY_CONTEXT_SZ); } static void clear_tls_keyid(struct toepcb *toep) { struct tls_ofld_info *tls_ofld = &toep->tls; if (tls_ofld->rx_key_addr >= 0) { free_keyid(toep, tls_ofld->rx_key_addr); tls_ofld->rx_key_addr = -1; } if (tls_ofld->tx_key_addr >= 0) { free_keyid(toep, tls_ofld->tx_key_addr); tls_ofld->tx_key_addr = -1; } } static int -get_keyid(struct tls_ofld_info *tls_ofld, unsigned int ops) -{ - return (ops & KEY_WRITE_RX ? tls_ofld->rx_key_addr : - ((ops & KEY_WRITE_TX) ? tls_ofld->tx_key_addr : -1)); -} - -static int -get_tp_plen_max(struct tls_ofld_info *tls_ofld) +get_tp_plen_max(struct ktls_session *tls) { int plen = ((min(3*4096, TP_TX_PG_SZ))/1448) * 1448; - return (tls_ofld->k_ctx.frag_size <= 8192 ? plen : FC_TP_PLEN_MAX); + return (tls->params.max_frame_len <= 8192 ? plen : FC_TP_PLEN_MAX); } /* Send request to get the key-id */ static int -tls_program_key_id(struct toepcb *toep, struct tls_key_context *k_ctx) +tls_program_key_id(struct toepcb *toep, struct ktls_session *tls, + int direction) { struct tls_ofld_info *tls_ofld = &toep->tls; struct adapter *sc = td_adapter(toep->td); struct ofld_tx_sdesc *txsd; int kwrlen, kctxlen, keyid, len; struct wrqe *wr; struct tls_key_req *kwr; struct tls_keyctx *kctx; kwrlen = sizeof(*kwr); kctxlen = roundup2(sizeof(*kctx), 32); len = roundup2(kwrlen + kctxlen, 16); if (toep->txsd_avail == 0) return (EAGAIN); - /* Dont initialize key for re-neg */ - if (!G_KEY_CLR_LOC(k_ctx->l_p_key)) { - if ((keyid = get_new_keyid(toep)) < 0) { - return (ENOSPC); - } - } else { - keyid = get_keyid(tls_ofld, k_ctx->l_p_key); + if ((keyid = get_new_keyid(toep)) < 0) { + return (ENOSPC); } wr = alloc_wrqe(len, &toep->ofld_txq->wrq); if (wr == NULL) { free_keyid(toep, keyid); return (ENOMEM); } kwr = wrtod(wr); memset(kwr, 0, kwrlen); kwr->wr_hi = htobe32(V_FW_WR_OP(FW_ULPTX_WR) | F_FW_WR_COMPL | F_FW_WR_ATOMIC); kwr->wr_mid = htobe32(V_FW_WR_LEN16(DIV_ROUND_UP(len, 16)) | V_FW_WR_FLOWID(toep->tid)); - kwr->protocol = get_proto_ver(k_ctx->proto_ver); - kwr->mfs = htons(k_ctx->frag_size); - kwr->reneg_to_write_rx = k_ctx->l_p_key; + kwr->protocol = tls_proto_ver(tls); + kwr->mfs = htons(tls->params.max_frame_len); + kwr->reneg_to_write_rx = V_KEY_GET_LOC(direction == KTLS_TX ? + KEY_WRITE_TX : KEY_WRITE_RX); /* master command */ kwr->cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE) | V_T5_ULP_MEMIO_ORDER(1) | V_T5_ULP_MEMIO_IMM(1)); kwr->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(kctxlen >> 5)); kwr->len16 = htobe32((toep->tid << 8) | DIV_ROUND_UP(len - sizeof(struct work_request_hdr), 16)); kwr->kaddr = htobe32(V_ULP_MEMIO_ADDR(keyid >> 5)); /* sub command */ kwr->sc_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); kwr->sc_len = htobe32(kctxlen); kctx = (struct tls_keyctx *)(kwr + 1); memset(kctx, 0, kctxlen); - if (G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_TX) { + if (direction == KTLS_TX) { tls_ofld->tx_key_addr = keyid; - prepare_txkey_wr(kctx, k_ctx); - } else if (G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_RX) { + prepare_txkey_wr(kctx, tls); + } else { tls_ofld->rx_key_addr = keyid; - prepare_rxkey_wr(kctx, k_ctx); + prepare_rxkey_wr(kctx, tls); } txsd = &toep->txsd[toep->txsd_pidx]; txsd->tx_credits = DIV_ROUND_UP(len, 16); txsd->plen = 0; toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; t4_wrq_tx(sc, wr); return (0); } -/* Store a key received from SSL in DDR. */ -static int -program_key_context(struct tcpcb *tp, struct toepcb *toep, - struct tls_key_context *uk_ctx) -{ - struct adapter *sc = td_adapter(toep->td); - struct tls_ofld_info *tls_ofld = &toep->tls; - struct tls_key_context *k_ctx; - int error, key_offset; - - if (tp->t_state != TCPS_ESTABLISHED) { - /* - * XXX: Matches Linux driver, but not sure this is a - * very appropriate error. - */ - return (ENOENT); - } - - /* Stop timer on handshake completion */ - tls_stop_handshake_timer(toep); - - toep->flags &= ~TPF_FORCE_CREDITS; - - CTR4(KTR_CXGBE, "%s: tid %d %s proto_ver %#x", __func__, toep->tid, - G_KEY_GET_LOC(uk_ctx->l_p_key) == KEY_WRITE_RX ? "KEY_WRITE_RX" : - "KEY_WRITE_TX", uk_ctx->proto_ver); - - if (G_KEY_GET_LOC(uk_ctx->l_p_key) == KEY_WRITE_RX && - ulp_mode(toep) != ULP_MODE_TLS) - return (EOPNOTSUPP); - - /* Don't copy the 'tx' and 'rx' fields. */ - k_ctx = &tls_ofld->k_ctx; - memcpy(&k_ctx->l_p_key, &uk_ctx->l_p_key, - sizeof(*k_ctx) - offsetof(struct tls_key_context, l_p_key)); - - /* TLS version != 1.1 and !1.2 OR DTLS != 1.2 */ - if (get_proto_ver(k_ctx->proto_ver) > DTLS_1_2_VERSION) { - if (G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_RX) { - tls_ofld->rx_key_addr = -1; - t4_clear_rx_quiesce(toep); - } else { - tls_ofld->tx_key_addr = -1; - } - return (0); - } - - if (k_ctx->state.enc_mode == CH_EVP_CIPH_GCM_MODE) { - k_ctx->iv_size = 4; - k_ctx->mac_first = 0; - k_ctx->hmac_ctrl = 0; - } else { - k_ctx->iv_size = 8; /* for CBC, iv is 16B, unit of 2B */ - k_ctx->mac_first = 1; - } - - tls_ofld->scmd0.seqno_numivs = - (V_SCMD_SEQ_NO_CTRL(3) | - V_SCMD_PROTO_VERSION(get_proto_ver(k_ctx->proto_ver)) | - V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) | - V_SCMD_CIPH_AUTH_SEQ_CTRL((k_ctx->mac_first == 0)) | - V_SCMD_CIPH_MODE(k_ctx->state.enc_mode) | - V_SCMD_AUTH_MODE(k_ctx->state.auth_mode) | - V_SCMD_HMAC_CTRL(k_ctx->hmac_ctrl) | - V_SCMD_IV_SIZE(k_ctx->iv_size)); - - tls_ofld->scmd0.ivgen_hdrlen = - (V_SCMD_IV_GEN_CTRL(k_ctx->iv_ctrl) | - V_SCMD_KEY_CTX_INLINE(0) | - V_SCMD_TLS_FRAG_ENABLE(1)); - - tls_ofld->mac_length = k_ctx->mac_secret_size; - - if (G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_RX) { - k_ctx->rx = uk_ctx->rx; - /* Dont initialize key for re-neg */ - if (!G_KEY_CLR_LOC(k_ctx->l_p_key)) - tls_ofld->rx_key_addr = -1; - } else { - k_ctx->tx = uk_ctx->tx; - /* Dont initialize key for re-neg */ - if (!G_KEY_CLR_LOC(k_ctx->l_p_key)) - tls_ofld->tx_key_addr = -1; - } - - /* Flush pending data before new Tx key becomes active */ - if (G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_TX) { - struct sockbuf *sb; - - /* XXX: This might not drain everything. */ - t4_push_frames(sc, toep, 0); - sb = &toep->inp->inp_socket->so_snd; - SOCKBUF_LOCK(sb); - - /* XXX: This asserts that everything has been pushed. */ - MPASS(sb->sb_sndptr == NULL || sb->sb_sndptr->m_next == NULL); - sb->sb_sndptr = NULL; - tls_ofld->sb_off = sbavail(sb); - SOCKBUF_UNLOCK(sb); - tls_ofld->tx_seq_no = 0; - } - - if ((G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_RX) || - (tls_ofld->key_location == TLS_SFO_WR_CONTEXTLOC_DDR)) { - - /* - * XXX: The userland library sets tx_key_info_size, not - * rx_key_info_size. - */ - k_ctx->rx_key_info_size = k_ctx->tx_key_info_size; - - error = tls_program_key_id(toep, k_ctx); - if (error) { - /* XXX: Only clear quiesce for KEY_WRITE_RX? */ - t4_clear_rx_quiesce(toep); - return (error); - } - } - - if (G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_RX) { - /* - * RX key tags are an index into the key portion of MA - * memory stored as an offset from the base address in - * units of 64 bytes. - */ - key_offset = tls_ofld->rx_key_addr - sc->vres.key.start; - t4_set_tls_keyid(toep, key_offset / 64); - t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW, - V_TCB_ULP_RAW(M_TCB_ULP_RAW), - V_TCB_ULP_RAW((V_TF_TLS_KEY_SIZE(3) | - V_TF_TLS_CONTROL(1) | - V_TF_TLS_ACTIVE(1) | - V_TF_TLS_ENABLE(1)))); - t4_set_tls_tcb_field(toep, W_TCB_TLS_SEQ, - V_TCB_TLS_SEQ(M_TCB_TLS_SEQ), - V_TCB_TLS_SEQ(0)); - t4_clear_rx_quiesce(toep); - - toep->flags |= TPF_TLS_RECEIVE; - } else { - unsigned short pdus_per_ulp; - - if (tls_ofld->key_location == TLS_SFO_WR_CONTEXTLOC_IMMEDIATE) - tls_ofld->tx_key_addr = 1; - - tls_ofld->fcplenmax = get_tp_plen_max(tls_ofld); - tls_ofld->expn_per_ulp = tls_expansion_size(toep, - tls_ofld->fcplenmax, 1, &pdus_per_ulp); - tls_ofld->pdus_per_ulp = pdus_per_ulp; - tls_ofld->adjusted_plen = tls_ofld->pdus_per_ulp * - ((tls_ofld->expn_per_ulp/tls_ofld->pdus_per_ulp) + - tls_ofld->k_ctx.frag_size); - } - - return (0); -} - /* * In some cases a client connection can hang without sending the * ServerHelloDone message from the NIC to the host. Send a dummy * RX_DATA_ACK with RX_MODULATE to unstick the connection. */ static void tls_send_handshake_ack(void *arg) { struct toepcb *toep = arg; struct tls_ofld_info *tls_ofld = &toep->tls; struct adapter *sc = td_adapter(toep->td); /* Bail without rescheduling if the connection has closed. */ if ((toep->flags & (TPF_FIN_SENT | TPF_ABORT_SHUTDOWN)) != 0) return; /* * If this connection has timed out without receiving more * data, downgrade to plain TOE mode and don't re-arm the * timer. */ if (sc->tt.tls_rx_timeout != 0) { struct inpcb *inp; struct tcpcb *tp; inp = toep->inp; tp = intotcpcb(inp); if ((ticks - tp->t_rcvtime) >= sc->tt.tls_rx_timeout) { CTR2(KTR_CXGBE, "%s: tid %d clr_ofld_mode", __func__, toep->tid); tls_clr_ofld_mode(toep); return; } } /* * XXX: Does not have the t4_get_tcb() checks to refine the * workaround. */ callout_schedule(&tls_ofld->handshake_timer, TLS_SRV_HELLO_RD_TM * hz); CTR2(KTR_CXGBE, "%s: tid %d sending RX_DATA_ACK", __func__, toep->tid); send_rx_modulate(sc, toep); } static void tls_start_handshake_timer(struct toepcb *toep) { struct tls_ofld_info *tls_ofld = &toep->tls; INP_WLOCK_ASSERT(toep->inp); callout_reset(&tls_ofld->handshake_timer, TLS_SRV_HELLO_BKOFF_TM * hz, tls_send_handshake_ack, toep); } void tls_stop_handshake_timer(struct toepcb *toep) { struct tls_ofld_info *tls_ofld = &toep->tls; INP_WLOCK_ASSERT(toep->inp); callout_stop(&tls_ofld->handshake_timer); } -int -t4_ctloutput_tls(struct socket *so, struct sockopt *sopt) -{ - struct tls_key_context uk_ctx; - struct inpcb *inp; - struct tcpcb *tp; - struct toepcb *toep; - int error, optval; - - error = 0; - if (sopt->sopt_dir == SOPT_SET && - sopt->sopt_name == TCP_TLSOM_SET_TLS_CONTEXT) { - error = sooptcopyin(sopt, &uk_ctx, sizeof(uk_ctx), - sizeof(uk_ctx)); - if (error) - return (error); - } - - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); - INP_WLOCK(inp); - if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { - INP_WUNLOCK(inp); - return (ECONNRESET); - } - tp = intotcpcb(inp); - toep = tp->t_toe; - switch (sopt->sopt_dir) { - case SOPT_SET: - switch (sopt->sopt_name) { - case TCP_TLSOM_SET_TLS_CONTEXT: - if (toep->tls.mode == TLS_MODE_KTLS) - error = EINVAL; - else { - error = program_key_context(tp, toep, &uk_ctx); - if (error == 0) - toep->tls.mode = TLS_MODE_TLSOM; - } - INP_WUNLOCK(inp); - break; - case TCP_TLSOM_CLR_TLS_TOM: - if (toep->tls.mode == TLS_MODE_KTLS) - error = EINVAL; - else if (ulp_mode(toep) == ULP_MODE_TLS) { - CTR2(KTR_CXGBE, "%s: tid %d CLR_TLS_TOM", - __func__, toep->tid); - tls_clr_ofld_mode(toep); - } else - error = EOPNOTSUPP; - INP_WUNLOCK(inp); - break; - case TCP_TLSOM_CLR_QUIES: - if (toep->tls.mode == TLS_MODE_KTLS) - error = EINVAL; - else if (ulp_mode(toep) == ULP_MODE_TLS) { - CTR2(KTR_CXGBE, "%s: tid %d CLR_QUIES", - __func__, toep->tid); - tls_clr_quiesce(toep); - } else - error = EOPNOTSUPP; - INP_WUNLOCK(inp); - break; - default: - INP_WUNLOCK(inp); - error = EOPNOTSUPP; - break; - } - break; - case SOPT_GET: - switch (sopt->sopt_name) { - case TCP_TLSOM_GET_TLS_TOM: - /* - * TLS TX is permitted on any TOE socket, but - * TLS RX requires a TLS ULP mode. - */ - optval = TLS_TOM_NONE; - if (can_tls_offload(td_adapter(toep->td)) && - toep->tls.mode != TLS_MODE_KTLS) { - switch (ulp_mode(toep)) { - case ULP_MODE_NONE: - case ULP_MODE_TCPDDP: - optval = TLS_TOM_TXONLY; - break; - case ULP_MODE_TLS: - optval = TLS_TOM_BOTH; - break; - } - } - CTR3(KTR_CXGBE, "%s: tid %d GET_TLS_TOM = %d", - __func__, toep->tid, optval); - INP_WUNLOCK(inp); - error = sooptcopyout(sopt, &optval, sizeof(optval)); - break; - default: - INP_WUNLOCK(inp); - error = EOPNOTSUPP; - break; - } - break; - } - return (error); -} - -#ifdef KERN_TLS -static void -init_ktls_key_context(struct ktls_session *tls, struct tls_key_context *k_ctx, - int direction) -{ - struct auth_hash *axf; - u_int key_info_size, mac_key_size; - char *hash, *key; - - k_ctx->l_p_key = V_KEY_GET_LOC(direction == KTLS_TX ? KEY_WRITE_TX : - KEY_WRITE_RX); - k_ctx->proto_ver = tls->params.tls_vmajor << 8 | tls->params.tls_vminor; - k_ctx->cipher_secret_size = tls->params.cipher_key_len; - key_info_size = sizeof(struct tx_keyctx_hdr) + - k_ctx->cipher_secret_size; - if (direction == KTLS_TX) - key = k_ctx->tx.key; - else - key = k_ctx->rx.key; - memcpy(key, tls->params.cipher_key, tls->params.cipher_key_len); - hash = key + tls->params.cipher_key_len; - if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16) { - k_ctx->state.auth_mode = SCMD_AUTH_MODE_GHASH; - k_ctx->state.enc_mode = SCMD_CIPH_MODE_AES_GCM; - k_ctx->iv_size = 4; - k_ctx->mac_first = 0; - k_ctx->hmac_ctrl = SCMD_HMAC_CTRL_NOP; - key_info_size += GMAC_BLOCK_LEN; - k_ctx->mac_secret_size = 0; - if (direction == KTLS_TX) - memcpy(k_ctx->tx.salt, tls->params.iv, SALT_SIZE); - else - memcpy(k_ctx->rx.salt, tls->params.iv, SALT_SIZE); - t4_init_gmac_hash(tls->params.cipher_key, - tls->params.cipher_key_len, hash); - } else { - switch (tls->params.auth_algorithm) { - case CRYPTO_SHA1_HMAC: - axf = &auth_hash_hmac_sha1; - mac_key_size = SHA1_HASH_LEN; - k_ctx->state.auth_mode = SCMD_AUTH_MODE_SHA1; - break; - case CRYPTO_SHA2_256_HMAC: - axf = &auth_hash_hmac_sha2_256; - mac_key_size = SHA2_256_HASH_LEN; - k_ctx->state.auth_mode = SCMD_AUTH_MODE_SHA256; - break; - case CRYPTO_SHA2_384_HMAC: - axf = &auth_hash_hmac_sha2_384; - mac_key_size = SHA2_512_HASH_LEN; - k_ctx->state.auth_mode = SCMD_AUTH_MODE_SHA512_384; - break; - default: - panic("bad auth mode"); - } - k_ctx->state.enc_mode = SCMD_CIPH_MODE_AES_CBC; - k_ctx->iv_size = 8; /* for CBC, iv is 16B, unit of 2B */ - k_ctx->mac_first = 1; - k_ctx->hmac_ctrl = SCMD_HMAC_CTRL_NO_TRUNC; - key_info_size += roundup2(mac_key_size, 16) * 2; - k_ctx->mac_secret_size = mac_key_size; - t4_init_hmac_digest(axf, mac_key_size, tls->params.auth_key, - tls->params.auth_key_len, hash); - } - - if (direction == KTLS_TX) - k_ctx->tx_key_info_size = key_info_size; - else - k_ctx->rx_key_info_size = key_info_size; - k_ctx->frag_size = tls->params.max_frame_len; - k_ctx->iv_ctrl = 1; -} - int tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction) { struct adapter *sc = td_adapter(toep->td); - struct tls_key_context *k_ctx; - int error, key_offset; + int error, explicit_iv_size, key_offset, mac_first; - if (toep->tls.mode == TLS_MODE_TLSOM) - return (EINVAL); if (!can_tls_offload(td_adapter(toep->td))) return (EINVAL); switch (ulp_mode(toep)) { case ULP_MODE_TLS: break; case ULP_MODE_NONE: case ULP_MODE_TCPDDP: if (direction != KTLS_TX) return (EINVAL); break; default: return (EINVAL); } switch (tls->params.cipher_algorithm) { case CRYPTO_AES_CBC: /* XXX: Explicitly ignore any provided IV. */ switch (tls->params.cipher_key_len) { case 128 / 8: case 192 / 8: case 256 / 8: break; default: error = EINVAL; goto clr_ofld; } switch (tls->params.auth_algorithm) { case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384_HMAC: break; default: error = EPROTONOSUPPORT; goto clr_ofld; } + explicit_iv_size = AES_BLOCK_LEN; + mac_first = 1; break; case CRYPTO_AES_NIST_GCM_16: if (tls->params.iv_len != SALT_SIZE) { error = EINVAL; goto clr_ofld; } switch (tls->params.cipher_key_len) { case 128 / 8: case 192 / 8: case 256 / 8: break; default: error = EINVAL; goto clr_ofld; } + explicit_iv_size = 8; + mac_first = 0; break; default: error = EPROTONOSUPPORT; goto clr_ofld; } /* Only TLS 1.1 and TLS 1.2 are currently supported. */ if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE || tls->params.tls_vminor < TLS_MINOR_VER_ONE || tls->params.tls_vminor > TLS_MINOR_VER_TWO) { error = EPROTONOSUPPORT; goto clr_ofld; } /* Bail if we already have a key. */ if (direction == KTLS_TX) { if (toep->tls.tx_key_addr != -1) return (EOPNOTSUPP); } else { if (toep->tls.rx_key_addr != -1) return (EOPNOTSUPP); } - /* - * XXX: This assumes no key renegotation. If KTLS ever supports - * that we will want to allocate TLS sessions dynamically rather - * than as a static member of toep. - */ - k_ctx = &toep->tls.k_ctx; - init_ktls_key_context(tls, k_ctx, direction); - - error = tls_program_key_id(toep, k_ctx); + error = tls_program_key_id(toep, tls, direction); if (error) { if (direction == KTLS_RX) goto clr_ofld; return (error); } if (direction == KTLS_TX) { toep->tls.scmd0.seqno_numivs = (V_SCMD_SEQ_NO_CTRL(3) | - V_SCMD_PROTO_VERSION(get_proto_ver(k_ctx->proto_ver)) | + V_SCMD_PROTO_VERSION(tls_proto_ver(tls)) | V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) | - V_SCMD_CIPH_AUTH_SEQ_CTRL((k_ctx->mac_first == 0)) | - V_SCMD_CIPH_MODE(k_ctx->state.enc_mode) | - V_SCMD_AUTH_MODE(k_ctx->state.auth_mode) | - V_SCMD_HMAC_CTRL(k_ctx->hmac_ctrl) | - V_SCMD_IV_SIZE(k_ctx->iv_size)); + V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) | + V_SCMD_CIPH_MODE(tls_cipher_mode(tls)) | + V_SCMD_AUTH_MODE(tls_auth_mode(tls)) | + V_SCMD_HMAC_CTRL(tls_hmac_ctrl(tls)) | + V_SCMD_IV_SIZE(explicit_iv_size / 2)); toep->tls.scmd0.ivgen_hdrlen = - (V_SCMD_IV_GEN_CTRL(k_ctx->iv_ctrl) | + (V_SCMD_IV_GEN_CTRL(1) | V_SCMD_KEY_CTX_INLINE(0) | V_SCMD_TLS_FRAG_ENABLE(1)); if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16) toep->tls.iv_len = 8; else toep->tls.iv_len = AES_BLOCK_LEN; - toep->tls.mac_length = k_ctx->mac_secret_size; - - toep->tls.fcplenmax = get_tp_plen_max(&toep->tls); + toep->tls.frag_size = tls->params.max_frame_len; + toep->tls.fcplenmax = get_tp_plen_max(tls); toep->tls.expn_per_ulp = tls->params.tls_hlen + tls->params.tls_tlen; toep->tls.pdus_per_ulp = 1; toep->tls.adjusted_plen = toep->tls.expn_per_ulp + - toep->tls.k_ctx.frag_size; + tls->params.max_frame_len; + toep->tls.tx_key_info_size = tls_key_info_size(tls); } else { /* Stop timer on handshake completion */ tls_stop_handshake_timer(toep); toep->flags &= ~TPF_FORCE_CREDITS; toep->flags |= TPF_TLS_RECEIVE; + toep->tls.rx_version = tls->params.tls_vmajor << 8 | + tls->params.tls_vminor; /* * RX key tags are an index into the key portion of MA * memory stored as an offset from the base address in * units of 64 bytes. */ key_offset = toep->tls.rx_key_addr - sc->vres.key.start; t4_set_tls_keyid(toep, key_offset / 64); t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW, V_TCB_ULP_RAW(M_TCB_ULP_RAW), V_TCB_ULP_RAW((V_TF_TLS_KEY_SIZE(3) | V_TF_TLS_CONTROL(1) | V_TF_TLS_ACTIVE(1) | V_TF_TLS_ENABLE(1)))); t4_set_tls_tcb_field(toep, W_TCB_TLS_SEQ, V_TCB_TLS_SEQ(M_TCB_TLS_SEQ), V_TCB_TLS_SEQ(0)); t4_clear_rx_quiesce(toep); } - toep->tls.mode = TLS_MODE_KTLS; - return (0); clr_ofld: if (ulp_mode(toep) == ULP_MODE_TLS) { CTR2(KTR_CXGBE, "%s: tid %d clr_ofld_mode", __func__, toep->tid); tls_clr_ofld_mode(toep); } return (error); } -#endif void tls_init_toep(struct toepcb *toep) { struct tls_ofld_info *tls_ofld = &toep->tls; - tls_ofld->mode = TLS_MODE_OFF; - tls_ofld->key_location = TLS_SFO_WR_CONTEXTLOC_DDR; tls_ofld->rx_key_addr = -1; tls_ofld->tx_key_addr = -1; } void tls_establish(struct toepcb *toep) { /* * Enable PDU extraction. * * XXX: Supposedly this should be done by the firmware when * the ULP_MODE FLOWC parameter is set in send_flowc_wr(), but * in practice this seems to be required. */ CTR2(KTR_CXGBE, "%s: tid %d setting TLS_ENABLE", __func__, toep->tid); t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW, V_TCB_ULP_RAW(M_TCB_ULP_RAW), V_TCB_ULP_RAW(V_TF_TLS_ENABLE(1))); toep->flags |= TPF_FORCE_CREDITS | TPF_TLS_ESTABLISHED; callout_init_rw(&toep->tls.handshake_timer, &toep->inp->inp_lock, 0); tls_start_handshake_timer(toep); } void tls_detach(struct toepcb *toep) { if (toep->flags & TPF_TLS_ESTABLISHED) { tls_stop_handshake_timer(toep); toep->flags &= ~TPF_TLS_ESTABLISHED; } } void tls_uninit_toep(struct toepcb *toep) { MPASS((toep->flags & TPF_TLS_ESTABLISHED) == 0); clear_tls_keyid(toep); } #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) #define MIN_OFLD_TLSTX_CREDITS(toep) \ - (howmany(sizeof(struct fw_tlstx_data_wr) + \ - sizeof(struct cpl_tx_tls_sfo) + key_size((toep)) + \ - CIPHER_BLOCK_SIZE + 1, 16)) + (howmany(sizeof(struct fw_tlstx_data_wr) + \ + sizeof(struct cpl_tx_tls_sfo) + sizeof(struct ulptx_idata) + \ + sizeof(struct ulptx_sc_memrd) + \ + AES_BLOCK_LEN + 1, 16)) static inline u_int max_imm_tls_space(int tx_credits) { const int n = 2; /* Use only up to 2 desc for imm. data WR */ int space; KASSERT(tx_credits >= 0 && tx_credits <= MAX_OFLD_TX_CREDITS, ("%s: %d credits", __func__, tx_credits)); if (tx_credits >= (n * EQ_ESIZE) / 16) space = (n * EQ_ESIZE); else space = tx_credits * 16; return (space); } -static int -count_mbuf_segs(struct mbuf *m, int skip, int len, int *max_nsegs_1mbufp) -{ - int max_nsegs_1mbuf, n, nsegs; - - while (skip >= m->m_len) { - skip -= m->m_len; - m = m->m_next; - } - - nsegs = 0; - max_nsegs_1mbuf = 0; - while (len > 0) { - n = sglist_count(mtod(m, char *) + skip, m->m_len - skip); - if (n > max_nsegs_1mbuf) - max_nsegs_1mbuf = n; - nsegs += n; - len -= m->m_len - skip; - skip = 0; - m = m->m_next; - } - *max_nsegs_1mbufp = max_nsegs_1mbuf; - return (nsegs); -} - static void write_tlstx_wr(struct fw_tlstx_data_wr *txwr, struct toepcb *toep, unsigned int immdlen, unsigned int plen, unsigned int expn, unsigned int pdus, uint8_t credits, int shove, int imm_ivs) { struct tls_ofld_info *tls_ofld = &toep->tls; unsigned int len = plen + expn; txwr->op_to_immdlen = htobe32(V_WR_OP(FW_TLSTX_DATA_WR) | V_FW_TLSTX_DATA_WR_COMPL(1) | V_FW_TLSTX_DATA_WR_IMMDLEN(immdlen)); txwr->flowid_len16 = htobe32(V_FW_TLSTX_DATA_WR_FLOWID(toep->tid) | V_FW_TLSTX_DATA_WR_LEN16(credits)); txwr->plen = htobe32(len); txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(ULP_MODE_TLS) | V_TX_URG(0) | /* F_T6_TX_FORCE | */ V_TX_SHOVE(shove)); txwr->ctxloc_to_exp = htobe32(V_FW_TLSTX_DATA_WR_NUMIVS(pdus) | V_FW_TLSTX_DATA_WR_EXP(expn) | - V_FW_TLSTX_DATA_WR_CTXLOC(tls_ofld->key_location) | + V_FW_TLSTX_DATA_WR_CTXLOC(TLS_SFO_WR_CONTEXTLOC_DDR) | V_FW_TLSTX_DATA_WR_IVDSGL(!imm_ivs) | - V_FW_TLSTX_DATA_WR_KEYSIZE(tls_ofld->k_ctx.tx_key_info_size >> 4)); - txwr->mfs = htobe16(tls_ofld->k_ctx.frag_size); + V_FW_TLSTX_DATA_WR_KEYSIZE(tls_ofld->tx_key_info_size >> 4)); + txwr->mfs = htobe16(tls_ofld->frag_size); txwr->adjustedplen_pkd = htobe16( V_FW_TLSTX_DATA_WR_ADJUSTEDPLEN(tls_ofld->adjusted_plen)); txwr->expinplenmax_pkd = htobe16( V_FW_TLSTX_DATA_WR_EXPINPLENMAX(tls_ofld->expn_per_ulp)); txwr->pdusinplenmax_pkd = V_FW_TLSTX_DATA_WR_PDUSINPLENMAX(tls_ofld->pdus_per_ulp); } static void write_tlstx_cpl(struct cpl_tx_tls_sfo *cpl, struct toepcb *toep, struct tls_hdr *tls_hdr, unsigned int plen, unsigned int pdus) { struct tls_ofld_info *tls_ofld = &toep->tls; int data_type, seglen; - if (plen < tls_ofld->k_ctx.frag_size) + if (plen < tls_ofld->frag_size) seglen = plen; else - seglen = tls_ofld->k_ctx.frag_size; + seglen = tls_ofld->frag_size; data_type = tls_content_type(tls_hdr->type); cpl->op_to_seg_len = htobe32(V_CPL_TX_TLS_SFO_OPCODE(CPL_TX_TLS_SFO) | V_CPL_TX_TLS_SFO_DATA_TYPE(data_type) | V_CPL_TX_TLS_SFO_CPL_LEN(2) | V_CPL_TX_TLS_SFO_SEG_LEN(seglen)); cpl->pld_len = htobe32(plen); if (data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) cpl->type_protover = htobe32( V_CPL_TX_TLS_SFO_TYPE(tls_hdr->type)); cpl->seqno_numivs = htobe32(tls_ofld->scmd0.seqno_numivs | V_SCMD_NUM_IVS(pdus)); cpl->ivgen_hdrlen = htobe32(tls_ofld->scmd0.ivgen_hdrlen); cpl->scmd1 = htobe64(tls_ofld->tx_seq_no); tls_ofld->tx_seq_no += pdus; } -/* - * Similar to write_tx_sgl() except that it accepts an optional - * trailer buffer for IVs. - */ -static void -write_tlstx_sgl(void *dst, struct mbuf *start, int skip, int plen, - void *iv_buffer, int iv_len, int nsegs, int n) -{ - struct mbuf *m; - struct ulptx_sgl *usgl = dst; - int i, j, rc; - struct sglist sg; - struct sglist_seg segs[n]; - - KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); - - sglist_init(&sg, n, segs); - usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | - V_ULPTX_NSGE(nsegs)); - - for (m = start; skip >= m->m_len; m = m->m_next) - skip -= m->m_len; - - i = -1; - for (m = start; plen > 0; m = m->m_next) { - rc = sglist_append(&sg, mtod(m, char *) + skip, - m->m_len - skip); - if (__predict_false(rc != 0)) - panic("%s: sglist_append %d", __func__, rc); - plen -= m->m_len - skip; - skip = 0; - - for (j = 0; j < sg.sg_nseg; i++, j++) { - if (i < 0) { - usgl->len0 = htobe32(segs[j].ss_len); - usgl->addr0 = htobe64(segs[j].ss_paddr); - } else { - usgl->sge[i / 2].len[i & 1] = - htobe32(segs[j].ss_len); - usgl->sge[i / 2].addr[i & 1] = - htobe64(segs[j].ss_paddr); - } -#ifdef INVARIANTS - nsegs--; -#endif - } - sglist_reset(&sg); - } - if (iv_buffer != NULL) { - rc = sglist_append(&sg, iv_buffer, iv_len); - if (__predict_false(rc != 0)) - panic("%s: sglist_append %d", __func__, rc); - - for (j = 0; j < sg.sg_nseg; i++, j++) { - if (i < 0) { - usgl->len0 = htobe32(segs[j].ss_len); - usgl->addr0 = htobe64(segs[j].ss_paddr); - } else { - usgl->sge[i / 2].len[i & 1] = - htobe32(segs[j].ss_len); - usgl->sge[i / 2].addr[i & 1] = - htobe64(segs[j].ss_paddr); - } -#ifdef INVARIANTS - nsegs--; -#endif - } - } - if (i & 1) - usgl->sge[i / 2].len[1] = htobe32(0); - KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, iv_buffer %p", - __func__, nsegs, start, iv_buffer)); -} - -/* - * Similar to t4_push_frames() but handles TLS sockets when TLS offload - * is enabled. Rather than transmitting bulk data, the socket buffer - * contains TLS records. The work request requires a full TLS record, - * so batch mbufs up until a full TLS record is seen. This requires - * reading the TLS header out of the start of each record to determine - * its length. - */ -void -t4_push_tls_records(struct adapter *sc, struct toepcb *toep, int drop) -{ - struct tls_hdr thdr; - struct mbuf *sndptr; - struct fw_tlstx_data_wr *txwr; - struct cpl_tx_tls_sfo *cpl; - struct wrqe *wr; - u_int plen, nsegs, credits, space, max_nsegs_1mbuf, wr_len; - u_int expn_size, iv_len, pdus, sndptroff; - struct tls_ofld_info *tls_ofld = &toep->tls; - struct inpcb *inp = toep->inp; - struct tcpcb *tp = intotcpcb(inp); - struct socket *so = inp->inp_socket; - struct sockbuf *sb = &so->so_snd; - int tls_size, tx_credits, shove, /* compl,*/ sowwakeup; - struct ofld_tx_sdesc *txsd; - bool imm_ivs, imm_payload; - void *iv_buffer, *iv_dst, *buf; - - INP_WLOCK_ASSERT(inp); - KASSERT(toep->flags & TPF_FLOWC_WR_SENT, - ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); - - KASSERT(ulp_mode(toep) == ULP_MODE_NONE || - ulp_mode(toep) == ULP_MODE_TCPDDP || ulp_mode(toep) == ULP_MODE_TLS, - ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep)); - KASSERT(tls_tx_key(toep), - ("%s: TX key not set for toep %p", __func__, toep)); - -#ifdef VERBOSE_TRACES - CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d", - __func__, toep->tid, toep->flags, tp->t_flags); -#endif - if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) - return; - -#ifdef RATELIMIT - if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) && - (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) { - inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; - } -#endif - - /* - * This function doesn't resume by itself. Someone else must clear the - * flag and call this function. - */ - if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { - KASSERT(drop == 0, - ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); - return; - } - - txsd = &toep->txsd[toep->txsd_pidx]; - for (;;) { - tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); - space = max_imm_tls_space(tx_credits); - wr_len = sizeof(struct fw_tlstx_data_wr) + - sizeof(struct cpl_tx_tls_sfo) + key_size(toep); - if (wr_len + CIPHER_BLOCK_SIZE + 1 > space) { -#ifdef VERBOSE_TRACES - CTR5(KTR_CXGBE, - "%s: tid %d tx_credits %d min_wr %d space %d", - __func__, toep->tid, tx_credits, wr_len + - CIPHER_BLOCK_SIZE + 1, space); -#endif - return; - } - - SOCKBUF_LOCK(sb); - sowwakeup = drop; - if (drop) { - sbdrop_locked(sb, drop); - MPASS(tls_ofld->sb_off >= drop); - tls_ofld->sb_off -= drop; - drop = 0; - } - - /* - * Send a FIN if requested, but only if there's no - * more data to send. - */ - if (sbavail(sb) == tls_ofld->sb_off && - toep->flags & TPF_SEND_FIN) { - if (sowwakeup) - sowwakeup_locked(so); - else - SOCKBUF_UNLOCK(sb); - SOCKBUF_UNLOCK_ASSERT(sb); - t4_close_conn(sc, toep); - return; - } - - if (sbavail(sb) < tls_ofld->sb_off + TLS_HEADER_LENGTH) { - /* - * A full TLS header is not yet queued, stop - * for now until more data is added to the - * socket buffer. However, if the connection - * has been closed, we will never get the rest - * of the header so just discard the partial - * header and close the connection. - */ -#ifdef VERBOSE_TRACES - CTR5(KTR_CXGBE, "%s: tid %d sbavail %d sb_off %d%s", - __func__, toep->tid, sbavail(sb), tls_ofld->sb_off, - toep->flags & TPF_SEND_FIN ? "" : " SEND_FIN"); -#endif - if (sowwakeup) - sowwakeup_locked(so); - else - SOCKBUF_UNLOCK(sb); - SOCKBUF_UNLOCK_ASSERT(sb); - if (toep->flags & TPF_SEND_FIN) - t4_close_conn(sc, toep); - return; - } - - /* Read the header of the next TLS record. */ - sndptr = sbsndmbuf(sb, tls_ofld->sb_off, &sndptroff); - m_copydata(sndptr, sndptroff, sizeof(thdr), (caddr_t)&thdr); - tls_size = htons(thdr.length); - plen = TLS_HEADER_LENGTH + tls_size; - pdus = howmany(tls_size, tls_ofld->k_ctx.frag_size); - iv_len = pdus * CIPHER_BLOCK_SIZE; - - if (sbavail(sb) < tls_ofld->sb_off + plen) { - /* - * The full TLS record is not yet queued, stop - * for now until more data is added to the - * socket buffer. However, if the connection - * has been closed, we will never get the rest - * of the record so just discard the partial - * record and close the connection. - */ -#ifdef VERBOSE_TRACES - CTR6(KTR_CXGBE, - "%s: tid %d sbavail %d sb_off %d plen %d%s", - __func__, toep->tid, sbavail(sb), tls_ofld->sb_off, - plen, toep->flags & TPF_SEND_FIN ? "" : - " SEND_FIN"); -#endif - if (sowwakeup) - sowwakeup_locked(so); - else - SOCKBUF_UNLOCK(sb); - SOCKBUF_UNLOCK_ASSERT(sb); - if (toep->flags & TPF_SEND_FIN) - t4_close_conn(sc, toep); - return; - } - - /* Shove if there is no additional data pending. */ - shove = (sbavail(sb) == tls_ofld->sb_off + plen) && - !(tp->t_flags & TF_MORETOCOME); - - if (sb->sb_flags & SB_AUTOSIZE && - V_tcp_do_autosndbuf && - sb->sb_hiwat < V_tcp_autosndbuf_max && - sbused(sb) >= sb->sb_hiwat * 7 / 8) { - int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, - V_tcp_autosndbuf_max); - - if (!sbreserve_locked(sb, newsize, so, NULL)) - sb->sb_flags &= ~SB_AUTOSIZE; - else - sowwakeup = 1; /* room available */ - } - if (sowwakeup) - sowwakeup_locked(so); - else - SOCKBUF_UNLOCK(sb); - SOCKBUF_UNLOCK_ASSERT(sb); - - if (__predict_false(toep->flags & TPF_FIN_SENT)) - panic("%s: excess tx.", __func__); - - /* Determine whether to use immediate vs SGL. */ - imm_payload = false; - imm_ivs = false; - if (wr_len + iv_len <= space) { - imm_ivs = true; - wr_len += iv_len; - if (wr_len + tls_size <= space) { - wr_len += tls_size; - imm_payload = true; - } - } - - /* Allocate space for IVs if needed. */ - if (!imm_ivs) { - iv_buffer = malloc(iv_len, M_CXGBE, M_NOWAIT); - if (iv_buffer == NULL) { - /* - * XXX: How to restart this? - */ - if (sowwakeup) - sowwakeup_locked(so); - else - SOCKBUF_UNLOCK(sb); - SOCKBUF_UNLOCK_ASSERT(sb); - CTR3(KTR_CXGBE, - "%s: tid %d failed to alloc IV space len %d", - __func__, toep->tid, iv_len); - return; - } - } else - iv_buffer = NULL; - - /* Determine size of SGL. */ - nsegs = 0; - max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ - if (!imm_payload) { - nsegs = count_mbuf_segs(sndptr, sndptroff + - TLS_HEADER_LENGTH, tls_size, &max_nsegs_1mbuf); - if (!imm_ivs) { - int n = sglist_count(iv_buffer, iv_len); - nsegs += n; - if (n > max_nsegs_1mbuf) - max_nsegs_1mbuf = n; - } - - /* Account for SGL in work request length. */ - wr_len += sizeof(struct ulptx_sgl) + - ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; - } - - wr = alloc_wrqe(roundup2(wr_len, 16), &toep->ofld_txq->wrq); - if (wr == NULL) { - /* XXX: how will we recover from this? */ - toep->flags |= TPF_TX_SUSPENDED; - return; - } - -#ifdef VERBOSE_TRACES - CTR5(KTR_CXGBE, "%s: tid %d TLS record %d len %#x pdus %d", - __func__, toep->tid, thdr.type, tls_size, pdus); -#endif - txwr = wrtod(wr); - cpl = (struct cpl_tx_tls_sfo *)(txwr + 1); - memset(txwr, 0, roundup2(wr_len, 16)); - credits = howmany(wr_len, 16); - expn_size = tls_expansion_size(toep, tls_size, 0, NULL); - write_tlstx_wr(txwr, toep, imm_payload ? tls_size : 0, - tls_size, expn_size, pdus, credits, shove, imm_ivs ? 1 : 0); - write_tlstx_cpl(cpl, toep, &thdr, tls_size, pdus); - tls_copy_tx_key(toep, cpl + 1); - - /* Generate random IVs */ - buf = (char *)(cpl + 1) + key_size(toep); - if (imm_ivs) { - MPASS(iv_buffer == NULL); - iv_dst = buf; - buf = (char *)iv_dst + iv_len; - } else - iv_dst = iv_buffer; - arc4rand(iv_dst, iv_len, 0); - - if (imm_payload) { - m_copydata(sndptr, sndptroff + TLS_HEADER_LENGTH, - tls_size, buf); - } else { - write_tlstx_sgl(buf, sndptr, - sndptroff + TLS_HEADER_LENGTH, tls_size, iv_buffer, - iv_len, nsegs, max_nsegs_1mbuf); - } - - KASSERT(toep->tx_credits >= credits, - ("%s: not enough credits", __func__)); - - toep->tx_credits -= credits; - - tp->snd_nxt += plen; - tp->snd_max += plen; - - SOCKBUF_LOCK(sb); - sbsndptr_adv(sb, sb->sb_sndptr, plen); - tls_ofld->sb_off += plen; - SOCKBUF_UNLOCK(sb); - - toep->flags |= TPF_TX_DATA_SENT; - if (toep->tx_credits < MIN_OFLD_TLSTX_CREDITS(toep)) - toep->flags |= TPF_TX_SUSPENDED; - - KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); - txsd->plen = plen; - txsd->tx_credits = credits; - txsd->iv_buffer = iv_buffer; - txsd++; - if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { - toep->txsd_pidx = 0; - txsd = &toep->txsd[0]; - } - toep->txsd_avail--; - - counter_u64_add(toep->ofld_txq->tx_toe_tls_records, 1); - counter_u64_add(toep->ofld_txq->tx_toe_tls_octets, plen); - - t4_l2t_send(sc, wr, toep->l2te); - } -} - -#ifdef KERN_TLS static int count_ext_pgs_segs(struct mbuf *m) { vm_paddr_t nextpa; u_int i, nsegs; MPASS(m->m_epg_npgs > 0); nsegs = 1; nextpa = m->m_epg_pa[0] + PAGE_SIZE; for (i = 1; i < m->m_epg_npgs; i++) { if (nextpa != m->m_epg_pa[i]) nsegs++; nextpa = m->m_epg_pa[i] + PAGE_SIZE; } return (nsegs); } static void write_ktlstx_sgl(void *dst, struct mbuf *m, int nsegs) { struct ulptx_sgl *usgl = dst; vm_paddr_t pa; uint32_t len; int i, j; KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(nsegs)); /* Figure out the first S/G length. */ pa = m->m_epg_pa[0] + m->m_epg_1st_off; usgl->addr0 = htobe64(pa); len = m_epg_pagelen(m, 0, m->m_epg_1st_off); pa += len; for (i = 1; i < m->m_epg_npgs; i++) { if (m->m_epg_pa[i] != pa) break; len += m_epg_pagelen(m, i, 0); pa += m_epg_pagelen(m, i, 0); } usgl->len0 = htobe32(len); #ifdef INVARIANTS nsegs--; #endif j = -1; for (; i < m->m_epg_npgs; i++) { if (j == -1 || m->m_epg_pa[i] != pa) { if (j >= 0) usgl->sge[j / 2].len[j & 1] = htobe32(len); j++; #ifdef INVARIANTS nsegs--; #endif pa = m->m_epg_pa[i]; usgl->sge[j / 2].addr[j & 1] = htobe64(pa); len = m_epg_pagelen(m, i, 0); pa += len; } else { len += m_epg_pagelen(m, i, 0); pa += m_epg_pagelen(m, i, 0); } } if (j >= 0) { usgl->sge[j / 2].len[j & 1] = htobe32(len); if ((j & 1) == 0) usgl->sge[j / 2].len[1] = htobe32(0); } KASSERT(nsegs == 0, ("%s: nsegs %d, m %p", __func__, nsegs, m)); } /* * Similar to t4_push_frames() but handles sockets that contain TLS - * record mbufs. Unlike TLSOM, each mbuf is a complete TLS record and - * corresponds to a single work request. + * record mbufs. */ void t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop) { struct tls_hdr *thdr; struct fw_tlstx_data_wr *txwr; struct cpl_tx_tls_sfo *cpl; + struct ulptx_idata *idata; + struct ulptx_sc_memrd *memrd; struct wrqe *wr; struct mbuf *m; u_int nsegs, credits, wr_len; u_int expn_size; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_snd; int tls_size, tx_credits, shove, sowwakeup; struct ofld_tx_sdesc *txsd; char *buf; INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); KASSERT(ulp_mode(toep) == ULP_MODE_NONE || ulp_mode(toep) == ULP_MODE_TCPDDP || ulp_mode(toep) == ULP_MODE_TLS, ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep)); KASSERT(tls_tx_key(toep), ("%s: TX key not set for toep %p", __func__, toep)); #ifdef VERBOSE_TRACES CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d", __func__, toep->tid, toep->flags, tp->t_flags); #endif if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) return; #ifdef RATELIMIT if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) && (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) { inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; } #endif /* * This function doesn't resume by itself. Someone else must clear the * flag and call this function. */ if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { KASSERT(drop == 0, ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); return; } txsd = &toep->txsd[toep->txsd_pidx]; for (;;) { tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); SOCKBUF_LOCK(sb); sowwakeup = drop; if (drop) { sbdrop_locked(sb, drop); drop = 0; } m = sb->sb_sndptr != NULL ? sb->sb_sndptr->m_next : sb->sb_mb; /* * Send a FIN if requested, but only if there's no * more data to send. */ if (m == NULL && toep->flags & TPF_SEND_FIN) { if (sowwakeup) sowwakeup_locked(so); else SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK_ASSERT(sb); t4_close_conn(sc, toep); return; } /* * If there is no ready data to send, wait until more * data arrives. */ if (m == NULL || (m->m_flags & M_NOTAVAIL) != 0) { if (sowwakeup) sowwakeup_locked(so); else SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK_ASSERT(sb); #ifdef VERBOSE_TRACES CTR2(KTR_CXGBE, "%s: tid %d no ready data to send", __func__, toep->tid); #endif return; } KASSERT(m->m_flags & M_EXTPG, ("%s: mbuf %p is not NOMAP", __func__, m)); KASSERT(m->m_epg_tls != NULL, ("%s: mbuf %p doesn't have TLS session", __func__, m)); /* Calculate WR length. */ wr_len = sizeof(struct fw_tlstx_data_wr) + - sizeof(struct cpl_tx_tls_sfo) + key_size(toep); + sizeof(struct cpl_tx_tls_sfo) + + sizeof(struct ulptx_idata) + sizeof(struct ulptx_sc_memrd); /* Explicit IVs for AES-CBC and AES-GCM are <= 16. */ MPASS(toep->tls.iv_len <= AES_BLOCK_LEN); wr_len += AES_BLOCK_LEN; /* Account for SGL in work request length. */ nsegs = count_ext_pgs_segs(m); wr_len += sizeof(struct ulptx_sgl) + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; /* Not enough credits for this work request. */ if (howmany(wr_len, 16) > tx_credits) { if (sowwakeup) sowwakeup_locked(so); else SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK_ASSERT(sb); #ifdef VERBOSE_TRACES CTR5(KTR_CXGBE, "%s: tid %d mbuf %p requires %d credits, but only %d available", __func__, toep->tid, m, howmany(wr_len, 16), tx_credits); #endif toep->flags |= TPF_TX_SUSPENDED; return; } /* Shove if there is no additional data pending. */ shove = ((m->m_next == NULL || (m->m_next->m_flags & M_NOTAVAIL) != 0)) && (tp->t_flags & TF_MORETOCOME) == 0; if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf && sb->sb_hiwat < V_tcp_autosndbuf_max && sbused(sb) >= sb->sb_hiwat * 7 / 8) { int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, V_tcp_autosndbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else sowwakeup = 1; /* room available */ } if (sowwakeup) sowwakeup_locked(so); else SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK_ASSERT(sb); if (__predict_false(toep->flags & TPF_FIN_SENT)) panic("%s: excess tx.", __func__); wr = alloc_wrqe(roundup2(wr_len, 16), &toep->ofld_txq->wrq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } thdr = (struct tls_hdr *)&m->m_epg_hdr; #ifdef VERBOSE_TRACES CTR5(KTR_CXGBE, "%s: tid %d TLS record %ju type %d len %#x", __func__, toep->tid, m->m_epg_seqno, thdr->type, m->m_len); #endif txwr = wrtod(wr); cpl = (struct cpl_tx_tls_sfo *)(txwr + 1); memset(txwr, 0, roundup2(wr_len, 16)); credits = howmany(wr_len, 16); expn_size = m->m_epg_hdrlen + m->m_epg_trllen; tls_size = m->m_len - expn_size; write_tlstx_wr(txwr, toep, 0, tls_size, expn_size, 1, credits, shove, 1); toep->tls.tx_seq_no = m->m_epg_seqno; write_tlstx_cpl(cpl, toep, thdr, tls_size, 1); - tls_copy_tx_key(toep, cpl + 1); + + idata = (struct ulptx_idata *)(cpl + 1); + idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); + idata->len = htobe32(0); + memrd = (struct ulptx_sc_memrd *)(idata + 1); + memrd->cmd_to_len = htobe32(V_ULPTX_CMD(ULP_TX_SC_MEMRD) | + V_ULP_TX_SC_MORE(1) | + V_ULPTX_LEN16(toep->tls.tx_key_info_size >> 4)); + memrd->addr = htobe32(toep->tls.tx_key_addr >> 5); /* Copy IV. */ - buf = (char *)(cpl + 1) + key_size(toep); + buf = (char *)(memrd + 1); memcpy(buf, thdr + 1, toep->tls.iv_len); buf += AES_BLOCK_LEN; write_ktlstx_sgl(buf, m, nsegs); KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); toep->tx_credits -= credits; tp->snd_nxt += m->m_len; tp->snd_max += m->m_len; SOCKBUF_LOCK(sb); sb->sb_sndptr = m; SOCKBUF_UNLOCK(sb); toep->flags |= TPF_TX_DATA_SENT; if (toep->tx_credits < MIN_OFLD_TLSTX_CREDITS(toep)) toep->flags |= TPF_TX_SUSPENDED; KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); txsd->plen = m->m_len; txsd->tx_credits = credits; txsd++; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { toep->txsd_pidx = 0; txsd = &toep->txsd[0]; } toep->txsd_avail--; counter_u64_add(toep->ofld_txq->tx_toe_tls_records, 1); counter_u64_add(toep->ofld_txq->tx_toe_tls_octets, m->m_len); t4_l2t_send(sc, wr, toep->l2te); } } -#endif /* * For TLS data we place received mbufs received via CPL_TLS_DATA into * an mbufq in the TLS offload state. When CPL_RX_TLS_CMP is * received, the completed PDUs are placed into the socket receive * buffer. * * The TLS code reuses the ulp_pdu_reclaimq to hold the pending mbufs. */ static int do_tls_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_tls_data *cpl = mtod(m, const void *); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp; int len; /* XXX: Should this match do_rx_data instead? */ KASSERT(!(toep->flags & TPF_SYNQE), ("%s: toep %p claims to be a synq entry", __func__, toep)); KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__)); /* strip off CPL header */ m_adj(m, sizeof(*cpl)); len = m->m_pkthdr.len; toep->ofld_rxq->rx_toe_tls_octets += len; KASSERT(len == G_CPL_TLS_DATA_LENGTH(be32toh(cpl->length_pkd)), ("%s: payload length mismatch", __func__)); INP_WLOCK(inp); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", __func__, tid, len, inp->inp_flags); INP_WUNLOCK(inp); m_freem(m); return (0); } /* Save TCP sequence number. */ m->m_pkthdr.tls_tcp_seq = be32toh(cpl->seq); if (mbufq_enqueue(&toep->ulp_pdu_reclaimq, m)) { #ifdef INVARIANTS panic("Failed to queue TLS data packet"); #else printf("%s: Failed to queue TLS data packet\n", __func__); INP_WUNLOCK(inp); m_freem(m); return (0); #endif } tp = intotcpcb(inp); tp->t_rcvtime = ticks; #ifdef VERBOSE_TRACES CTR4(KTR_CXGBE, "%s: tid %u len %d seq %u", __func__, tid, len, be32toh(cpl->seq)); #endif INP_WUNLOCK(inp); return (0); } static int do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rx_tls_cmp *cpl = mtod(m, const void *); struct tlsrx_hdr_pkt *tls_hdr_pkt; unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp; struct socket *so; struct sockbuf *sb; struct mbuf *tls_data; -#ifdef KERN_TLS struct tls_get_record *tgr; struct mbuf *control; -#endif int len, pdu_length, rx_credits; KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__)); KASSERT(!(toep->flags & TPF_SYNQE), ("%s: toep %p claims to be a synq entry", __func__, toep)); /* strip off CPL header */ m_adj(m, sizeof(*cpl)); len = m->m_pkthdr.len; toep->ofld_rxq->rx_toe_tls_records++; KASSERT(len == G_CPL_RX_TLS_CMP_LENGTH(be32toh(cpl->pdulength_length)), ("%s: payload length mismatch", __func__)); INP_WLOCK(inp); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", __func__, tid, len, inp->inp_flags); INP_WUNLOCK(inp); m_freem(m); return (0); } pdu_length = G_CPL_RX_TLS_CMP_PDULENGTH(be32toh(cpl->pdulength_length)); so = inp_inpcbtosocket(inp); tp = intotcpcb(inp); #ifdef VERBOSE_TRACES CTR6(KTR_CXGBE, "%s: tid %u PDU len %d len %d seq %u, rcv_nxt %u", __func__, tid, pdu_length, len, be32toh(cpl->seq), tp->rcv_nxt); #endif tp->rcv_nxt += pdu_length; KASSERT(tp->rcv_wnd >= pdu_length, ("%s: negative window size", __func__)); tp->rcv_wnd -= pdu_length; /* XXX: Not sure what to do about urgent data. */ /* * The payload of this CPL is the TLS header followed by * additional fields. */ KASSERT(m->m_len >= sizeof(*tls_hdr_pkt), ("%s: payload too small", __func__)); tls_hdr_pkt = mtod(m, void *); tls_data = mbufq_dequeue(&toep->ulp_pdu_reclaimq); if (tls_data != NULL) { KASSERT(be32toh(cpl->seq) == tls_data->m_pkthdr.tls_tcp_seq, ("%s: sequence mismatch", __func__)); } -#ifdef KERN_TLS - if (toep->tls.mode == TLS_MODE_KTLS) { - /* Report decryption errors as EBADMSG. */ - if ((tls_hdr_pkt->res_to_mac_error & M_TLSRX_HDR_PKT_ERROR) != - 0) { - m_freem(m); - m_freem(tls_data); + /* Report decryption errors as EBADMSG. */ + if ((tls_hdr_pkt->res_to_mac_error & M_TLSRX_HDR_PKT_ERROR) != 0) { + m_freem(m); + m_freem(tls_data); - CURVNET_SET(toep->vnet); - so->so_error = EBADMSG; - sorwakeup(so); + CURVNET_SET(toep->vnet); + so->so_error = EBADMSG; + sorwakeup(so); - INP_WUNLOCK(inp); - CURVNET_RESTORE(); + INP_WUNLOCK(inp); + CURVNET_RESTORE(); - return (0); - } + return (0); + } - /* Allocate the control message mbuf. */ - control = sbcreatecontrol(NULL, sizeof(*tgr), TLS_GET_RECORD, - IPPROTO_TCP); - if (control == NULL) { - m_freem(m); - m_freem(tls_data); + /* Allocate the control message mbuf. */ + control = sbcreatecontrol(NULL, sizeof(*tgr), TLS_GET_RECORD, + IPPROTO_TCP); + if (control == NULL) { + m_freem(m); + m_freem(tls_data); - CURVNET_SET(toep->vnet); - so->so_error = ENOBUFS; - sorwakeup(so); + CURVNET_SET(toep->vnet); + so->so_error = ENOBUFS; + sorwakeup(so); - INP_WUNLOCK(inp); - CURVNET_RESTORE(); + INP_WUNLOCK(inp); + CURVNET_RESTORE(); - return (0); - } + return (0); + } - tgr = (struct tls_get_record *) - CMSG_DATA(mtod(control, struct cmsghdr *)); - tgr->tls_type = tls_hdr_pkt->type; - tgr->tls_vmajor = be16toh(tls_hdr_pkt->version) >> 8; - tgr->tls_vminor = be16toh(tls_hdr_pkt->version) & 0xff; + tgr = (struct tls_get_record *) + CMSG_DATA(mtod(control, struct cmsghdr *)); + tgr->tls_type = tls_hdr_pkt->type; + tgr->tls_vmajor = be16toh(tls_hdr_pkt->version) >> 8; + tgr->tls_vminor = be16toh(tls_hdr_pkt->version) & 0xff; - m_freem(m); + m_freem(m); - if (tls_data != NULL) { - m_last(tls_data)->m_flags |= M_EOR; - tgr->tls_length = htobe16(tls_data->m_pkthdr.len); - } else - tgr->tls_length = 0; - m = tls_data; + if (tls_data != NULL) { + m_last(tls_data)->m_flags |= M_EOR; + tgr->tls_length = htobe16(tls_data->m_pkthdr.len); } else -#endif - { - /* - * Only the TLS header is sent to OpenSSL, so report - * errors by altering the record type. - */ - if ((tls_hdr_pkt->res_to_mac_error & M_TLSRX_HDR_PKT_ERROR) != - 0) - tls_hdr_pkt->type = CONTENT_TYPE_ERROR; - - /* Trim this CPL's mbuf to only include the TLS header. */ - KASSERT(m->m_len == len && m->m_next == NULL, - ("%s: CPL spans multiple mbufs", __func__)); - m->m_len = TLS_HEADER_LENGTH; - m->m_pkthdr.len = TLS_HEADER_LENGTH; - - if (tls_data != NULL) { - /* - * Update the TLS header length to be the length of - * the payload data. - */ - tls_hdr_pkt->length = htobe16(tls_data->m_pkthdr.len); - - m->m_next = tls_data; - m->m_pkthdr.len += tls_data->m_len; - } - -#ifdef KERN_TLS - control = NULL; -#endif - } + tgr->tls_length = 0; + m = tls_data; sb = &so->so_rcv; SOCKBUF_LOCK(sb); if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { struct epoch_tracker et; CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", __func__, tid, pdu_length); m_freem(m); -#ifdef KERN_TLS m_freem(control); -#endif SOCKBUF_UNLOCK(sb); INP_WUNLOCK(inp); CURVNET_SET(toep->vnet); NET_EPOCH_ENTER(et); INP_WLOCK(inp); tp = tcp_drop(tp, ECONNRESET); if (tp) INP_WUNLOCK(inp); NET_EPOCH_EXIT(et); CURVNET_RESTORE(); return (0); } /* * Not all of the bytes on the wire are included in the socket buffer * (e.g. the MAC of the TLS record). However, those bytes are included * in the TCP sequence space. */ /* receive buffer autosize */ MPASS(toep->vnet == so->so_vnet); CURVNET_SET(toep->vnet); if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autorcvbuf && sb->sb_hiwat < V_tcp_autorcvbuf_max && m->m_pkthdr.len > (sbspace(sb) / 8 * 7)) { unsigned int hiwat = sb->sb_hiwat; unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc, V_tcp_autorcvbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; } -#ifdef KERN_TLS - if (control != NULL) - sbappendcontrol_locked(sb, m, control, 0); - else -#endif - sbappendstream_locked(sb, m, 0); + sbappendcontrol_locked(sb, m, control, 0); rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0; #ifdef VERBOSE_TRACES CTR4(KTR_CXGBE, "%s: tid %u rx_credits %u rcv_wnd %u", __func__, tid, rx_credits, tp->rcv_wnd); #endif if (rx_credits > 0 && sbused(sb) + tp->rcv_wnd < sb->sb_lowat) { rx_credits = send_rx_credits(sc, toep, rx_credits); tp->rcv_wnd += rx_credits; tp->rcv_adv += rx_credits; } sorwakeup_locked(so); SOCKBUF_UNLOCK_ASSERT(sb); INP_WUNLOCK(inp); CURVNET_RESTORE(); return (0); } void do_rx_data_tls(const struct cpl_rx_data *cpl, struct toepcb *toep, struct mbuf *m) { struct inpcb *inp = toep->inp; struct tls_ofld_info *tls_ofld = &toep->tls; struct tls_hdr *hdr; struct tcpcb *tp; struct socket *so; struct sockbuf *sb; - int error, len, rx_credits; + int len, rx_credits; len = m->m_pkthdr.len; INP_WLOCK_ASSERT(inp); so = inp_inpcbtosocket(inp); tp = intotcpcb(inp); sb = &so->so_rcv; SOCKBUF_LOCK(sb); CURVNET_SET(toep->vnet); tp->rcv_nxt += len; KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__)); tp->rcv_wnd -= len; /* Do we have a full TLS header? */ if (len < sizeof(*hdr)) { CTR3(KTR_CXGBE, "%s: tid %u len %d: too short for a TLS header", __func__, toep->tid, len); so->so_error = EMSGSIZE; goto out; } hdr = mtod(m, struct tls_hdr *); /* Is the header valid? */ - if (be16toh(hdr->version) != tls_ofld->k_ctx.proto_ver) { + if (be16toh(hdr->version) != tls_ofld->rx_version) { CTR3(KTR_CXGBE, "%s: tid %u invalid version %04x", __func__, toep->tid, be16toh(hdr->version)); - error = EINVAL; - goto report_error; + so->so_error = EINVAL; + goto out; } if (be16toh(hdr->length) < sizeof(*hdr)) { CTR3(KTR_CXGBE, "%s: tid %u invalid length %u", __func__, toep->tid, be16toh(hdr->length)); - error = EBADMSG; - goto report_error; + so->so_error = EBADMSG; + goto out; } /* Did we get a truncated record? */ if (len < be16toh(hdr->length)) { CTR4(KTR_CXGBE, "%s: tid %u truncated TLS record (%d vs %u)", __func__, toep->tid, len, be16toh(hdr->length)); - error = EMSGSIZE; - goto report_error; + so->so_error = EMSGSIZE; + goto out; } /* Is the header type unknown? */ switch (hdr->type) { case CONTENT_TYPE_CCS: case CONTENT_TYPE_ALERT: case CONTENT_TYPE_APP_DATA: case CONTENT_TYPE_HANDSHAKE: break; default: CTR3(KTR_CXGBE, "%s: tid %u invalid TLS record type %u", __func__, toep->tid, hdr->type); - error = EBADMSG; - goto report_error; + so->so_error = EBADMSG; + goto out; } /* * Just punt. Although this could fall back to software * decryption, this case should never really happen. */ CTR4(KTR_CXGBE, "%s: tid %u dropping TLS record type %u, length %u", __func__, toep->tid, hdr->type, be16toh(hdr->length)); - error = EBADMSG; - -report_error: -#ifdef KERN_TLS - if (toep->tls.mode == TLS_MODE_KTLS) - so->so_error = error; - else -#endif - { - /* - * Report errors by sending an empty TLS record - * with an error record type. - */ - hdr->type = CONTENT_TYPE_ERROR; - - /* Trim this CPL's mbuf to only include the TLS header. */ - KASSERT(m->m_len == len && m->m_next == NULL, - ("%s: CPL spans multiple mbufs", __func__)); - m->m_len = TLS_HEADER_LENGTH; - m->m_pkthdr.len = TLS_HEADER_LENGTH; - - sbappendstream_locked(sb, m, 0); - m = NULL; - } + so->so_error = EBADMSG; out: /* * This connection is going to die anyway, so probably don't * need to bother with returning credits. */ rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0; #ifdef VERBOSE_TRACES CTR4(KTR_CXGBE, "%s: tid %u rx_credits %u rcv_wnd %u", __func__, toep->tid, rx_credits, tp->rcv_wnd); #endif if (rx_credits > 0 && sbused(sb) + tp->rcv_wnd < sb->sb_lowat) { rx_credits = send_rx_credits(toep->vi->adapter, toep, rx_credits); tp->rcv_wnd += rx_credits; tp->rcv_adv += rx_credits; } sorwakeup_locked(so); SOCKBUF_UNLOCK_ASSERT(sb); INP_WUNLOCK(inp); CURVNET_RESTORE(); m_freem(m); } void t4_tls_mod_load(void) { t4_register_cpl_handler(CPL_TLS_DATA, do_tls_data); t4_register_cpl_handler(CPL_RX_TLS_CMP, do_rx_tls_cmp); } void t4_tls_mod_unload(void) { t4_register_cpl_handler(CPL_TLS_DATA, NULL); t4_register_cpl_handler(CPL_RX_TLS_CMP, NULL); } #endif /* TCP_OFFLOAD */ +#endif /* KERN_TLS */ diff --git a/sys/dev/cxgbe/tom/t4_tls.h b/sys/dev/cxgbe/tom/t4_tls.h index 37266206c31f..2770526704aa 100644 --- a/sys/dev/cxgbe/tom/t4_tls.h +++ b/sys/dev/cxgbe/tom/t4_tls.h @@ -1,590 +1,420 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017-2018 Chelsio Communications, Inc. * All rights reserved. * Written by: John Baldwin , Atul Gupta * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_TLS_H__ #define __T4_TLS_H__ -#define TLS1_VERSION 0x0301 -#define TLS1_1_VERSION 0x0302 -#define TLS1_2_VERSION 0x0303 -#define TLS_MAX_VERSION TLS1_2_VERSION - -#define DTLS1_VERSION 0xFEFF -#define DTLS1_2_VERSION 0xFEFD -#define DTLS_MAX_VERSION DTLS1_2_VERSION -#define DTLS1_VERSION_MAJOR 0xFE - -/* Custom socket options for TLS+TOE. */ - -#define MAX_MAC_KSZ 64 /*512 bits */ -#define MAX_CIPHER_KSZ 32 /* 256 bits */ -#define CIPHER_BLOCK_SZ 16 #define SALT_SIZE 4 -/* Can accomodate 16, 11-15 are reserved */ -enum { - CHSSL_SHA_NOP, - CHSSL_SHA1, - CHSSL_SHA224, - CHSSL_SHA256, - CHSSL_GHASH, - CHSSL_SHA512_224, - CHSSL_SHA512_256, - CHSSL_SHA512_384, - CHSSL_SHA512_512, - CHSSL_CBCMAC, - CHSSL_CMAC, -}; - -/* Can accomodate 16, 8-15 are reserved */ -enum { - CHSSL_CIPH_NOP, - CHSSL_AES_CBC, - CHSSL_AES_GCM, - CHSSL_AES_CTR, - CHSSL_AES_GEN, - CHSSL_IPSEC_ESP, - CHSSL_AES_XTS, - CHSSL_AES_CCM, -}; - /* Key Context Programming Operation type */ #define KEY_WRITE_RX 0x1 #define KEY_WRITE_TX 0x2 #define KEY_DELETE_RX 0x4 #define KEY_DELETE_TX 0x8 #define S_KEY_CLR_LOC 4 #define M_KEY_CLR_LOC 0xf #define V_KEY_CLR_LOC(x) ((x) << S_KEY_CLR_LOC) #define G_KEY_CLR_LOC(x) (((x) >> S_KEY_CLR_LOC) & M_KEY_CLR_LOC) #define F_KEY_CLR_LOC V_KEY_CLR_LOC(1U) #define S_KEY_GET_LOC 0 #define M_KEY_GET_LOC 0xf #define V_KEY_GET_LOC(x) ((x) << S_KEY_GET_LOC) #define G_KEY_GET_LOC(x) (((x) >> S_KEY_GET_LOC) & M_KEY_GET_LOC) -struct tls_ofld_state { - unsigned char enc_mode; - unsigned char mac_mode; - unsigned char key_loc; - unsigned char ofld_mode; - unsigned char auth_mode; - unsigned char resv[3]; -}; - -struct tls_tx_ctxt { - unsigned char salt[SALT_SIZE]; - unsigned char key[MAX_CIPHER_KSZ]; - unsigned char ipad[MAX_MAC_KSZ]; - unsigned char opad[MAX_MAC_KSZ]; -}; - -struct tls_rx_ctxt { - unsigned char salt[SALT_SIZE]; - unsigned char key[MAX_CIPHER_KSZ]; - unsigned char ipad[MAX_MAC_KSZ]; - unsigned char opad[MAX_MAC_KSZ]; -}; - -struct tls_key_context { - struct tls_tx_ctxt tx; - struct tls_rx_ctxt rx; - - unsigned char l_p_key; - unsigned char hmac_ctrl; - unsigned char mac_first; - unsigned char iv_size; - unsigned char iv_ctrl; - unsigned char iv_algo; - unsigned char tx_seq_no; - unsigned char rx_seq_no; - - struct tls_ofld_state state; - - unsigned int tx_key_info_size; - unsigned int rx_key_info_size; - unsigned int frag_size; - unsigned int mac_secret_size; - unsigned int cipher_secret_size; - int proto_ver; - unsigned int sock_fd; - unsigned short dtls_epoch; - unsigned short rsv; -}; - -/* Set with 'struct tls_key_context'. */ -#define TCP_TLSOM_SET_TLS_CONTEXT (TCP_VENDOR) - -/* Get returns int of enabled (1) / disabled (0). */ -#define TCP_TLSOM_GET_TLS_TOM (TCP_VENDOR + 1) - -enum { - TLS_TOM_NONE = 0, - TLS_TOM_TXONLY, - TLS_TOM_BOTH -}; - -/* Set with no value. */ -#define TCP_TLSOM_CLR_TLS_TOM (TCP_VENDOR + 2) - -/* Set with no value. */ -#define TCP_TLSOM_CLR_QUIES (TCP_VENDOR + 3) - #ifdef _KERNEL /* Timeouts for handshake timer in seconds. */ #define TLS_SRV_HELLO_DONE 9 #define TLS_SRV_HELLO_RD_TM 5 #define TLS_SRV_HELLO_BKOFF_TM 15 #define CONTENT_TYPE_CCS 20 #define CONTENT_TYPE_ALERT 21 #define CONTENT_TYPE_HANDSHAKE 22 #define CONTENT_TYPE_APP_DATA 23 #define CONTENT_TYPE_HEARTBEAT 24 #define CONTENT_TYPE_KEY_CONTEXT 32 #define CONTENT_TYPE_ERROR 127 -#define GCM_TAG_SIZE 16 -#define AEAD_EXPLICIT_DATA_SIZE 8 #define TLS_HEADER_LENGTH 5 #define TP_TX_PG_SZ 65536 #define FC_TP_PLEN_MAX 17408 -#define IPAD_SIZE 64 -#define OPAD_SIZE 64 -#define KEY_SIZE 32 -#define CIPHER_BLOCK_SIZE 16 -#define HDR_KCTX_SIZE (IPAD_SIZE + OPAD_SIZE + KEY_SIZE) - -#define KEY_IN_DDR_SIZE 16 -#define TLS_KEY_CONTEXT_SZ roundup2(sizeof(struct tls_tx_ctxt), 32) - -/* MAC KEY SIZE */ -#define SHA_NOP 0 -#define SHA_GHASH 16 -#define SHA_224 28 -#define SHA_256 32 -#define SHA_384 48 -#define SHA_512 64 -#define SHA1 20 - -/* CIPHER KEY SIZE */ -#define AES_NOP 0 -#define AES_128 16 -#define AES_192 24 -#define AES_256 32 - -enum { - TLS_1_2_VERSION, - TLS_1_1_VERSION, - DTLS_1_2_VERSION, - TLS_VERSION_MAX, -}; - -enum { - CH_EVP_CIPH_STREAM_CIPHER, - CH_EVP_CIPH_CBC_MODE, - CH_EVP_CIPH_GCM_MODE, - CH_EVP_CIPH_CTR_MODE, -}; +#define TLS_KEY_CONTEXT_SZ roundup2(sizeof(struct tls_keyctx), 32) enum { TLS_SFO_WR_CONTEXTLOC_DSGL, TLS_SFO_WR_CONTEXTLOC_IMMEDIATE, TLS_SFO_WR_CONTEXTLOC_DDR, }; enum { CPL_TX_TLS_SFO_TYPE_CCS, CPL_TX_TLS_SFO_TYPE_ALERT, CPL_TX_TLS_SFO_TYPE_HANDSHAKE, CPL_TX_TLS_SFO_TYPE_DATA, CPL_TX_TLS_SFO_TYPE_HEARTBEAT, /* XXX: Shouldn't this be "CUSTOM"? */ }; -enum { - CH_CK_SIZE_128, - CH_CK_SIZE_192, - CH_CK_SIZE_256, - CH_CK_SIZE_NOP, -}; - -enum { - CH_MK_SIZE_128, - CH_MK_SIZE_160, - CH_MK_SIZE_192, - CH_MK_SIZE_256, - CH_MK_SIZE_512, - CH_MK_SIZE_NOP, -}; - struct tls_scmd { __be32 seqno_numivs; __be32 ivgen_hdrlen; }; -enum tls_mode { - TLS_MODE_OFF, - TLS_MODE_TLSOM, - TLS_MODE_KTLS, -}; - struct tls_ofld_info { - struct tls_key_context k_ctx; + unsigned int frag_size; int key_location; - int mac_length; int rx_key_addr; int tx_key_addr; uint64_t tx_seq_no; + uint16_t rx_version; unsigned short fcplenmax; unsigned short adjusted_plen; unsigned short expn_per_ulp; unsigned short pdus_per_ulp; struct tls_scmd scmd0; u_int iv_len; - enum tls_mode mode; + unsigned int tx_key_info_size; struct callout handshake_timer; - u_int sb_off; }; struct tls_key_req { __be32 wr_hi; __be32 wr_mid; __be32 ftid; __u8 reneg_to_write_rx; __u8 protocol; __be16 mfs; /* master command */ __be32 cmd; __be32 len16; /* command length */ __be32 dlen; /* data length in 32-byte units */ __be32 kaddr; /* sub-command */ __be32 sc_more; __be32 sc_len; }__packed; struct tls_keyctx { union key_ctx { struct tx_keyctx_hdr { __u8 ctxlen; __u8 r2; __be16 dualck_to_txvalid; __u8 txsalt[4]; __be64 r5; } txhdr; struct rx_keyctx_hdr { __u8 flitcnt_hmacctrl; __u8 protover_ciphmode; __u8 authmode_to_rxvalid; __u8 ivpresent_to_rxmk_size; __u8 rxsalt[4]; __be64 ivinsert_to_authinsrt; } rxhdr; } u; struct keys { __u8 edkey[32]; __u8 ipad[64]; __u8 opad[64]; } keys; }; #define S_TLS_KEYCTX_TX_WR_DUALCK 12 #define M_TLS_KEYCTX_TX_WR_DUALCK 0x1 #define V_TLS_KEYCTX_TX_WR_DUALCK(x) ((x) << S_TLS_KEYCTX_TX_WR_DUALCK) #define G_TLS_KEYCTX_TX_WR_DUALCK(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_DUALCK) & M_TLS_KEYCTX_TX_WR_DUALCK) #define F_TLS_KEYCTX_TX_WR_DUALCK V_TLS_KEYCTX_TX_WR_DUALCK(1U) #define S_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT 11 #define M_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT 0x1 #define V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(x) \ ((x) << S_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT) #define G_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT) & \ M_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT) #define F_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT \ V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(1U) #define S_TLS_KEYCTX_TX_WR_SALT_PRESENT 10 #define M_TLS_KEYCTX_TX_WR_SALT_PRESENT 0x1 #define V_TLS_KEYCTX_TX_WR_SALT_PRESENT(x) \ ((x) << S_TLS_KEYCTX_TX_WR_SALT_PRESENT) #define G_TLS_KEYCTX_TX_WR_SALT_PRESENT(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_SALT_PRESENT) & \ M_TLS_KEYCTX_TX_WR_SALT_PRESENT) #define F_TLS_KEYCTX_TX_WR_SALT_PRESENT \ V_TLS_KEYCTX_TX_WR_SALT_PRESENT(1U) #define S_TLS_KEYCTX_TX_WR_TXCK_SIZE 6 #define M_TLS_KEYCTX_TX_WR_TXCK_SIZE 0xf #define V_TLS_KEYCTX_TX_WR_TXCK_SIZE(x) \ ((x) << S_TLS_KEYCTX_TX_WR_TXCK_SIZE) #define G_TLS_KEYCTX_TX_WR_TXCK_SIZE(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_TXCK_SIZE) & \ M_TLS_KEYCTX_TX_WR_TXCK_SIZE) #define S_TLS_KEYCTX_TX_WR_TXMK_SIZE 2 #define M_TLS_KEYCTX_TX_WR_TXMK_SIZE 0xf #define V_TLS_KEYCTX_TX_WR_TXMK_SIZE(x) \ ((x) << S_TLS_KEYCTX_TX_WR_TXMK_SIZE) #define G_TLS_KEYCTX_TX_WR_TXMK_SIZE(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_TXMK_SIZE) & \ M_TLS_KEYCTX_TX_WR_TXMK_SIZE) #define S_TLS_KEYCTX_TX_WR_TXVALID 0 #define M_TLS_KEYCTX_TX_WR_TXVALID 0x1 #define V_TLS_KEYCTX_TX_WR_TXVALID(x) \ ((x) << S_TLS_KEYCTX_TX_WR_TXVALID) #define G_TLS_KEYCTX_TX_WR_TXVALID(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_TXVALID) & M_TLS_KEYCTX_TX_WR_TXVALID) #define F_TLS_KEYCTX_TX_WR_TXVALID V_TLS_KEYCTX_TX_WR_TXVALID(1U) #define S_TLS_KEYCTX_TX_WR_FLITCNT 3 #define M_TLS_KEYCTX_TX_WR_FLITCNT 0x1f #define V_TLS_KEYCTX_TX_WR_FLITCNT(x) \ ((x) << S_TLS_KEYCTX_TX_WR_FLITCNT) #define G_TLS_KEYCTX_TX_WR_FLITCNT(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_FLITCNT) & M_TLS_KEYCTX_TX_WR_FLITCNT) #define S_TLS_KEYCTX_TX_WR_HMACCTRL 0 #define M_TLS_KEYCTX_TX_WR_HMACCTRL 0x7 #define V_TLS_KEYCTX_TX_WR_HMACCTRL(x) \ ((x) << S_TLS_KEYCTX_TX_WR_HMACCTRL) #define G_TLS_KEYCTX_TX_WR_HMACCTRL(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_HMACCTRL) & M_TLS_KEYCTX_TX_WR_HMACCTRL) #define S_TLS_KEYCTX_TX_WR_PROTOVER 4 #define M_TLS_KEYCTX_TX_WR_PROTOVER 0xf #define V_TLS_KEYCTX_TX_WR_PROTOVER(x) \ ((x) << S_TLS_KEYCTX_TX_WR_PROTOVER) #define G_TLS_KEYCTX_TX_WR_PROTOVER(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_PROTOVER) & M_TLS_KEYCTX_TX_WR_PROTOVER) #define S_TLS_KEYCTX_TX_WR_CIPHMODE 0 #define M_TLS_KEYCTX_TX_WR_CIPHMODE 0xf #define V_TLS_KEYCTX_TX_WR_CIPHMODE(x) \ ((x) << S_TLS_KEYCTX_TX_WR_CIPHMODE) #define G_TLS_KEYCTX_TX_WR_CIPHMODE(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_CIPHMODE) & M_TLS_KEYCTX_TX_WR_CIPHMODE) #define S_TLS_KEYCTX_TX_WR_AUTHMODE 4 #define M_TLS_KEYCTX_TX_WR_AUTHMODE 0xf #define V_TLS_KEYCTX_TX_WR_AUTHMODE(x) \ ((x) << S_TLS_KEYCTX_TX_WR_AUTHMODE) #define G_TLS_KEYCTX_TX_WR_AUTHMODE(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_AUTHMODE) & M_TLS_KEYCTX_TX_WR_AUTHMODE) #define S_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL 3 #define M_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL 0x1 #define V_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL(x) \ ((x) << S_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL) #define G_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL) & \ M_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL) #define F_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL \ V_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL(1U) #define S_TLS_KEYCTX_TX_WR_SEQNUMCTRL 1 #define M_TLS_KEYCTX_TX_WR_SEQNUMCTRL 0x3 #define V_TLS_KEYCTX_TX_WR_SEQNUMCTRL(x) \ ((x) << S_TLS_KEYCTX_TX_WR_SEQNUMCTRL) #define G_TLS_KEYCTX_TX_WR_SEQNUMCTRL(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_SEQNUMCTRL) & \ M_TLS_KEYCTX_TX_WR_SEQNUMCTRL) #define S_TLS_KEYCTX_TX_WR_RXVALID 0 #define M_TLS_KEYCTX_TX_WR_RXVALID 0x1 #define V_TLS_KEYCTX_TX_WR_RXVALID(x) \ ((x) << S_TLS_KEYCTX_TX_WR_RXVALID) #define G_TLS_KEYCTX_TX_WR_RXVALID(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_RXVALID) & M_TLS_KEYCTX_TX_WR_RXVALID) #define F_TLS_KEYCTX_TX_WR_RXVALID V_TLS_KEYCTX_TX_WR_RXVALID(1U) #define S_TLS_KEYCTX_TX_WR_IVPRESENT 7 #define M_TLS_KEYCTX_TX_WR_IVPRESENT 0x1 #define V_TLS_KEYCTX_TX_WR_IVPRESENT(x) \ ((x) << S_TLS_KEYCTX_TX_WR_IVPRESENT) #define G_TLS_KEYCTX_TX_WR_IVPRESENT(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_IVPRESENT) & \ M_TLS_KEYCTX_TX_WR_IVPRESENT) #define F_TLS_KEYCTX_TX_WR_IVPRESENT V_TLS_KEYCTX_TX_WR_IVPRESENT(1U) #define S_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT 6 #define M_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT 0x1 #define V_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT(x) \ ((x) << S_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT) #define G_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT) & \ M_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT) #define F_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT \ V_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT(1U) #define S_TLS_KEYCTX_TX_WR_RXCK_SIZE 3 #define M_TLS_KEYCTX_TX_WR_RXCK_SIZE 0x7 #define V_TLS_KEYCTX_TX_WR_RXCK_SIZE(x) \ ((x) << S_TLS_KEYCTX_TX_WR_RXCK_SIZE) #define G_TLS_KEYCTX_TX_WR_RXCK_SIZE(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_RXCK_SIZE) & \ M_TLS_KEYCTX_TX_WR_RXCK_SIZE) #define S_TLS_KEYCTX_TX_WR_RXMK_SIZE 0 #define M_TLS_KEYCTX_TX_WR_RXMK_SIZE 0x7 #define V_TLS_KEYCTX_TX_WR_RXMK_SIZE(x) \ ((x) << S_TLS_KEYCTX_TX_WR_RXMK_SIZE) #define G_TLS_KEYCTX_TX_WR_RXMK_SIZE(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_RXMK_SIZE) & \ M_TLS_KEYCTX_TX_WR_RXMK_SIZE) #define S_TLS_KEYCTX_TX_WR_IVINSERT 55 #define M_TLS_KEYCTX_TX_WR_IVINSERT 0x1ffULL #define V_TLS_KEYCTX_TX_WR_IVINSERT(x) \ ((x) << S_TLS_KEYCTX_TX_WR_IVINSERT) #define G_TLS_KEYCTX_TX_WR_IVINSERT(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_IVINSERT) & M_TLS_KEYCTX_TX_WR_IVINSERT) #define S_TLS_KEYCTX_TX_WR_AADSTRTOFST 47 #define M_TLS_KEYCTX_TX_WR_AADSTRTOFST 0xffULL #define V_TLS_KEYCTX_TX_WR_AADSTRTOFST(x) \ ((x) << S_TLS_KEYCTX_TX_WR_AADSTRTOFST) #define G_TLS_KEYCTX_TX_WR_AADSTRTOFST(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_AADSTRTOFST) & \ M_TLS_KEYCTX_TX_WR_AADSTRTOFST) #define S_TLS_KEYCTX_TX_WR_AADSTOPOFST 39 #define M_TLS_KEYCTX_TX_WR_AADSTOPOFST 0xffULL #define V_TLS_KEYCTX_TX_WR_AADSTOPOFST(x) \ ((x) << S_TLS_KEYCTX_TX_WR_AADSTOPOFST) #define G_TLS_KEYCTX_TX_WR_AADSTOPOFST(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_AADSTOPOFST) & \ M_TLS_KEYCTX_TX_WR_AADSTOPOFST) #define S_TLS_KEYCTX_TX_WR_CIPHERSRTOFST 30 #define M_TLS_KEYCTX_TX_WR_CIPHERSRTOFST 0x1ffULL #define V_TLS_KEYCTX_TX_WR_CIPHERSRTOFST(x) \ ((x) << S_TLS_KEYCTX_TX_WR_CIPHERSRTOFST) #define G_TLS_KEYCTX_TX_WR_CIPHERSRTOFST(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_CIPHERSRTOFST) & \ M_TLS_KEYCTX_TX_WR_CIPHERSRTOFST) #define S_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST 23 #define M_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST 0x7f #define V_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST(x) \ ((x) << S_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST) #define G_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST) & \ M_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST) #define S_TLS_KEYCTX_TX_WR_AUTHSRTOFST 14 #define M_TLS_KEYCTX_TX_WR_AUTHSRTOFST 0x1ff #define V_TLS_KEYCTX_TX_WR_AUTHSRTOFST(x) \ ((x) << S_TLS_KEYCTX_TX_WR_AUTHSRTOFST) #define G_TLS_KEYCTX_TX_WR_AUTHSRTOFST(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_AUTHSRTOFST) & \ M_TLS_KEYCTX_TX_WR_AUTHSRTOFST) #define S_TLS_KEYCTX_TX_WR_AUTHSTOPOFST 7 #define M_TLS_KEYCTX_TX_WR_AUTHSTOPOFST 0x7f #define V_TLS_KEYCTX_TX_WR_AUTHSTOPOFST(x) \ ((x) << S_TLS_KEYCTX_TX_WR_AUTHSTOPOFST) #define G_TLS_KEYCTX_TX_WR_AUTHSTOPOFST(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_AUTHSTOPOFST) & \ M_TLS_KEYCTX_TX_WR_AUTHSTOPOFST) #define S_TLS_KEYCTX_TX_WR_AUTHINSRT 0 #define M_TLS_KEYCTX_TX_WR_AUTHINSRT 0x7f #define V_TLS_KEYCTX_TX_WR_AUTHINSRT(x) \ ((x) << S_TLS_KEYCTX_TX_WR_AUTHINSRT) #define G_TLS_KEYCTX_TX_WR_AUTHINSRT(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_AUTHINSRT) & \ M_TLS_KEYCTX_TX_WR_AUTHINSRT) struct tls_hdr { __u8 type; __be16 version; __be16 length; } __packed; struct tlsrx_hdr_pkt { __u8 type; __be16 version; __be16 length; __be64 tls_seq; __be16 reserved1; __u8 res_to_mac_error; } __packed; /* res_to_mac_error fields */ #define S_TLSRX_HDR_PKT_INTERNAL_ERROR 4 #define M_TLSRX_HDR_PKT_INTERNAL_ERROR 0x1 #define V_TLSRX_HDR_PKT_INTERNAL_ERROR(x) \ ((x) << S_TLSRX_HDR_PKT_INTERNAL_ERROR) #define G_TLSRX_HDR_PKT_INTERNAL_ERROR(x) \ (((x) >> S_TLSRX_HDR_PKT_INTERNAL_ERROR) & M_TLSRX_HDR_PKT_INTERNAL_ERROR) #define F_TLSRX_HDR_PKT_INTERNAL_ERROR V_TLSRX_HDR_PKT_INTERNAL_ERROR(1U) #define S_TLSRX_HDR_PKT_SPP_ERROR 3 #define M_TLSRX_HDR_PKT_SPP_ERROR 0x1 #define V_TLSRX_HDR_PKT_SPP_ERROR(x) ((x) << S_TLSRX_HDR_PKT_SPP_ERROR) #define G_TLSRX_HDR_PKT_SPP_ERROR(x) \ (((x) >> S_TLSRX_HDR_PKT_SPP_ERROR) & M_TLSRX_HDR_PKT_SPP_ERROR) #define F_TLSRX_HDR_PKT_SPP_ERROR V_TLSRX_HDR_PKT_SPP_ERROR(1U) #define S_TLSRX_HDR_PKT_CCDX_ERROR 2 #define M_TLSRX_HDR_PKT_CCDX_ERROR 0x1 #define V_TLSRX_HDR_PKT_CCDX_ERROR(x) ((x) << S_TLSRX_HDR_PKT_CCDX_ERROR) #define G_TLSRX_HDR_PKT_CCDX_ERROR(x) \ (((x) >> S_TLSRX_HDR_PKT_CCDX_ERROR) & M_TLSRX_HDR_PKT_CCDX_ERROR) #define F_TLSRX_HDR_PKT_CCDX_ERROR V_TLSRX_HDR_PKT_CCDX_ERROR(1U) #define S_TLSRX_HDR_PKT_PAD_ERROR 1 #define M_TLSRX_HDR_PKT_PAD_ERROR 0x1 #define V_TLSRX_HDR_PKT_PAD_ERROR(x) ((x) << S_TLSRX_HDR_PKT_PAD_ERROR) #define G_TLSRX_HDR_PKT_PAD_ERROR(x) \ (((x) >> S_TLSRX_HDR_PKT_PAD_ERROR) & M_TLSRX_HDR_PKT_PAD_ERROR) #define F_TLSRX_HDR_PKT_PAD_ERROR V_TLSRX_HDR_PKT_PAD_ERROR(1U) #define S_TLSRX_HDR_PKT_MAC_ERROR 0 #define M_TLSRX_HDR_PKT_MAC_ERROR 0x1 #define V_TLSRX_HDR_PKT_MAC_ERROR(x) ((x) << S_TLSRX_HDR_PKT_MAC_ERROR) #define G_TLSRX_HDR_PKT_MAC_ERROR(x) \ (((x) >> S_TLSRX_HDR_PKT_MAC_ERROR) & M_TLSRX_HDR_PKT_MAC_ERROR) #define F_TLSRX_HDR_PKT_MAC_ERROR V_TLSRX_HDR_PKT_MAC_ERROR(1U) #define M_TLSRX_HDR_PKT_ERROR 0x1F #endif /* _KERNEL */ #endif /* !__T4_TLS_H__ */ diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c index 1c9999be339d..f5c2c6804aa2 100644 --- a/sys/dev/cxgbe/tom/t4_tom.c +++ b/sys/dev/cxgbe/tom/t4_tom.c @@ -1,2114 +1,2094 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_kern_tls.h" #include "opt_ratelimit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #ifdef TCP_OFFLOAD #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "common/t4_tcb.h" #include "t4_clip.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" #include "tom/t4_tls.h" static struct protosw *tcp_protosw; static struct protosw toe_protosw; static struct pr_usrreqs toe_usrreqs; static struct protosw *tcp6_protosw; static struct protosw toe6_protosw; static struct pr_usrreqs toe6_usrreqs; /* Module ops */ static int t4_tom_mod_load(void); static int t4_tom_mod_unload(void); static int t4_tom_modevent(module_t, int, void *); /* ULD ops and helpers */ static int t4_tom_activate(struct adapter *); static int t4_tom_deactivate(struct adapter *); static struct uld_info tom_uld_info = { .uld_id = ULD_TOM, .activate = t4_tom_activate, .deactivate = t4_tom_deactivate, }; static void release_offload_resources(struct toepcb *); static int alloc_tid_tabs(struct tid_info *); static void free_tid_tabs(struct tid_info *); static void free_tom_data(struct adapter *, struct tom_data *); static void reclaim_wr_resources(void *, int); struct toepcb * alloc_toepcb(struct vi_info *vi, int flags) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct toepcb *toep; int tx_credits, txsd_total, len; /* * The firmware counts tx work request credits in units of 16 bytes * each. Reserve room for an ABORT_REQ so the driver never has to worry * about tx credits if it wants to abort a connection. */ tx_credits = sc->params.ofldq_wr_cred; tx_credits -= howmany(sizeof(struct cpl_abort_req), 16); /* * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte * immediate payload, and firmware counts tx work request credits in * units of 16 byte. Calculate the maximum work requests possible. */ txsd_total = tx_credits / howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16); len = offsetof(struct toepcb, txsd) + txsd_total * sizeof(struct ofld_tx_sdesc); toep = malloc(len, M_CXGBE, M_ZERO | flags); if (toep == NULL) return (NULL); refcount_init(&toep->refcount, 1); toep->td = sc->tom_softc; toep->vi = vi; toep->tid = -1; toep->tx_total = tx_credits; toep->tx_credits = tx_credits; mbufq_init(&toep->ulp_pduq, INT_MAX); mbufq_init(&toep->ulp_pdu_reclaimq, INT_MAX); toep->txsd_total = txsd_total; toep->txsd_avail = txsd_total; toep->txsd_pidx = 0; toep->txsd_cidx = 0; aiotx_init_toep(toep); return (toep); } /* * Initialize a toepcb after its params have been filled out. */ int init_toepcb(struct vi_info *vi, struct toepcb *toep) { struct conn_params *cp = &toep->params; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct tx_cl_rl_params *tc; if (cp->tc_idx >= 0 && cp->tc_idx < sc->chip_params->nsched_cls) { tc = &pi->sched_params->cl_rl[cp->tc_idx]; mtx_lock(&sc->tc_lock); if (tc->flags & CLRL_ERR) { log(LOG_ERR, "%s: failed to associate traffic class %u with tid %u\n", device_get_nameunit(vi->dev), cp->tc_idx, toep->tid); cp->tc_idx = -1; } else { tc->refcount++; } mtx_unlock(&sc->tc_lock); } toep->ofld_txq = &sc->sge.ofld_txq[cp->txq_idx]; toep->ofld_rxq = &sc->sge.ofld_rxq[cp->rxq_idx]; toep->ctrlq = &sc->sge.ctrlq[pi->port_id]; tls_init_toep(toep); if (ulp_mode(toep) == ULP_MODE_TCPDDP) ddp_init_toep(toep); toep->flags |= TPF_INITIALIZED; return (0); } struct toepcb * hold_toepcb(struct toepcb *toep) { refcount_acquire(&toep->refcount); return (toep); } void free_toepcb(struct toepcb *toep) { if (refcount_release(&toep->refcount) == 0) return; KASSERT(!(toep->flags & TPF_ATTACHED), ("%s: attached to an inpcb", __func__)); KASSERT(!(toep->flags & TPF_CPL_PENDING), ("%s: CPL pending", __func__)); if (toep->flags & TPF_INITIALIZED) { if (ulp_mode(toep) == ULP_MODE_TCPDDP) ddp_uninit_toep(toep); tls_uninit_toep(toep); } free(toep, M_CXGBE); } /* * Set up the socket for TCP offload. */ void offload_socket(struct socket *so, struct toepcb *toep) { struct tom_data *td = toep->td; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct sockbuf *sb; INP_WLOCK_ASSERT(inp); /* Update socket */ sb = &so->so_snd; SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOCOALESCE; SOCKBUF_UNLOCK(sb); sb = &so->so_rcv; SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOCOALESCE; if (inp->inp_vflag & INP_IPV6) so->so_proto = &toe6_protosw; else so->so_proto = &toe_protosw; SOCKBUF_UNLOCK(sb); /* Update TCP PCB */ tp->tod = &td->tod; tp->t_toe = toep; tp->t_flags |= TF_TOE; /* Install an extra hold on inp */ toep->inp = inp; toep->flags |= TPF_ATTACHED; in_pcbref(inp); /* Add the TOE PCB to the active list */ mtx_lock(&td->toep_list_lock); TAILQ_INSERT_HEAD(&td->toep_list, toep, link); mtx_unlock(&td->toep_list_lock); } void restore_so_proto(struct socket *so, bool v6) { if (v6) so->so_proto = tcp6_protosw; else so->so_proto = tcp_protosw; } /* This is _not_ the normal way to "unoffload" a socket. */ void undo_offload_socket(struct socket *so) { struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = tp->t_toe; struct tom_data *td = toep->td; struct sockbuf *sb; INP_WLOCK_ASSERT(inp); sb = &so->so_snd; SOCKBUF_LOCK(sb); sb->sb_flags &= ~SB_NOCOALESCE; SOCKBUF_UNLOCK(sb); sb = &so->so_rcv; SOCKBUF_LOCK(sb); sb->sb_flags &= ~SB_NOCOALESCE; restore_so_proto(so, inp->inp_vflag & INP_IPV6); SOCKBUF_UNLOCK(sb); tp->tod = NULL; tp->t_toe = NULL; tp->t_flags &= ~TF_TOE; toep->inp = NULL; toep->flags &= ~TPF_ATTACHED; if (in_pcbrele_wlocked(inp)) panic("%s: inp freed.", __func__); mtx_lock(&td->toep_list_lock); TAILQ_REMOVE(&td->toep_list, toep, link); mtx_unlock(&td->toep_list_lock); } static void release_offload_resources(struct toepcb *toep) { struct tom_data *td = toep->td; struct adapter *sc = td_adapter(td); int tid = toep->tid; KASSERT(!(toep->flags & TPF_CPL_PENDING), ("%s: %p has CPL pending.", __func__, toep)); KASSERT(!(toep->flags & TPF_ATTACHED), ("%s: %p is still attached.", __func__, toep)); CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)", __func__, toep, tid, toep->l2te, toep->ce); /* * These queues should have been emptied at approximately the same time * that a normal connection's socket's so_snd would have been purged or * drained. Do _not_ clean up here. */ MPASS(mbufq_len(&toep->ulp_pduq) == 0); MPASS(mbufq_len(&toep->ulp_pdu_reclaimq) == 0); #ifdef INVARIANTS if (ulp_mode(toep) == ULP_MODE_TCPDDP) ddp_assert_empty(toep); #endif MPASS(TAILQ_EMPTY(&toep->aiotx_jobq)); if (toep->l2te) t4_l2t_release(toep->l2te); if (tid >= 0) { remove_tid(sc, tid, toep->ce ? 2 : 1); release_tid(sc, tid, toep->ctrlq); } if (toep->ce) t4_release_clip_entry(sc, toep->ce); if (toep->params.tc_idx != -1) t4_release_cl_rl(sc, toep->vi->pi->port_id, toep->params.tc_idx); mtx_lock(&td->toep_list_lock); TAILQ_REMOVE(&td->toep_list, toep, link); mtx_unlock(&td->toep_list_lock); free_toepcb(toep); } /* * The kernel is done with the TCP PCB and this is our opportunity to unhook the * toepcb hanging off of it. If the TOE driver is also done with the toepcb (no * pending CPL) then it is time to release all resources tied to the toepcb. * * Also gets called when an offloaded active open fails and the TOM wants the * kernel to take the TCP PCB back. */ static void t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp) { #if defined(KTR) || defined(INVARIANTS) struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); KASSERT(toep->flags & TPF_ATTACHED, ("%s: not attached", __func__)); #ifdef KTR if (tp->t_state == TCPS_SYN_SENT) { CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)", __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); } else { CTR6(KTR_CXGBE, "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)", toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp, inp->inp_flags); } #endif if (ulp_mode(toep) == ULP_MODE_TLS) tls_detach(toep); tp->tod = NULL; tp->t_toe = NULL; tp->t_flags &= ~TF_TOE; toep->flags &= ~TPF_ATTACHED; if (!(toep->flags & TPF_CPL_PENDING)) release_offload_resources(toep); } /* * setsockopt handler. */ static void t4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name) { struct adapter *sc = tod->tod_softc; struct toepcb *toep = tp->t_toe; if (dir == SOPT_GET) return; CTR4(KTR_CXGBE, "%s: tp %p, dir %u, name %u", __func__, tp, dir, name); switch (name) { case TCP_NODELAY: if (tp->t_state != TCPS_ESTABLISHED) break; toep->params.nagle = tp->t_flags & TF_NODELAY ? 0 : 1; t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS, V_TF_NAGLE(1), V_TF_NAGLE(toep->params.nagle), 0, 0); break; default: break; } } static inline uint64_t get_tcb_tflags(const uint64_t *tcb) { return ((be64toh(tcb[14]) << 32) | (be64toh(tcb[15]) >> 32)); } static inline uint32_t get_tcb_field(const uint64_t *tcb, u_int word, uint32_t mask, u_int shift) { #define LAST_WORD ((TCB_SIZE / 4) - 1) uint64_t t1, t2; int flit_idx; MPASS(mask != 0); MPASS(word <= LAST_WORD); MPASS(shift < 32); flit_idx = (LAST_WORD - word) / 2; if (word & 0x1) shift += 32; t1 = be64toh(tcb[flit_idx]) >> shift; t2 = 0; if (fls(mask) > 64 - shift) { /* * Will spill over into the next logical flit, which is the flit * before this one. The flit_idx before this one must be valid. */ MPASS(flit_idx > 0); t2 = be64toh(tcb[flit_idx - 1]) << (64 - shift); } return ((t2 | t1) & mask); #undef LAST_WORD } #define GET_TCB_FIELD(tcb, F) \ get_tcb_field(tcb, W_TCB_##F, M_TCB_##F, S_TCB_##F) /* * Issues a CPL_GET_TCB to read the entire TCB for the tid. */ static int send_get_tcb(struct adapter *sc, u_int tid) { struct cpl_get_tcb *cpl; struct wrq_cookie cookie; MPASS(tid < sc->tids.ntids); cpl = start_wrq_wr(&sc->sge.ctrlq[0], howmany(sizeof(*cpl), 16), &cookie); if (__predict_false(cpl == NULL)) return (ENOMEM); bzero(cpl, sizeof(*cpl)); INIT_TP_WR(cpl, tid); OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_GET_TCB, tid)); cpl->reply_ctrl = htobe16(V_REPLY_CHAN(0) | V_QUEUENO(sc->sge.ofld_rxq[0].iq.cntxt_id)); cpl->cookie = 0xff; commit_wrq_wr(&sc->sge.ctrlq[0], cpl, &cookie); return (0); } static struct tcb_histent * alloc_tcb_histent(struct adapter *sc, u_int tid, int flags) { struct tcb_histent *te; MPASS(flags == M_NOWAIT || flags == M_WAITOK); te = malloc(sizeof(*te), M_CXGBE, M_ZERO | flags); if (te == NULL) return (NULL); mtx_init(&te->te_lock, "TCB entry", NULL, MTX_DEF); callout_init_mtx(&te->te_callout, &te->te_lock, 0); te->te_adapter = sc; te->te_tid = tid; return (te); } static void free_tcb_histent(struct tcb_histent *te) { mtx_destroy(&te->te_lock); free(te, M_CXGBE); } /* * Start tracking the tid in the TCB history. */ int add_tid_to_history(struct adapter *sc, u_int tid) { struct tcb_histent *te = NULL; struct tom_data *td = sc->tom_softc; int rc; MPASS(tid < sc->tids.ntids); if (td->tcb_history == NULL) return (ENXIO); rw_wlock(&td->tcb_history_lock); if (td->tcb_history[tid] != NULL) { rc = EEXIST; goto done; } te = alloc_tcb_histent(sc, tid, M_NOWAIT); if (te == NULL) { rc = ENOMEM; goto done; } mtx_lock(&te->te_lock); rc = send_get_tcb(sc, tid); if (rc == 0) { te->te_flags |= TE_RPL_PENDING; td->tcb_history[tid] = te; } else { free(te, M_CXGBE); } mtx_unlock(&te->te_lock); done: rw_wunlock(&td->tcb_history_lock); return (rc); } static void remove_tcb_histent(struct tcb_histent *te) { struct adapter *sc = te->te_adapter; struct tom_data *td = sc->tom_softc; rw_assert(&td->tcb_history_lock, RA_WLOCKED); mtx_assert(&te->te_lock, MA_OWNED); MPASS(td->tcb_history[te->te_tid] == te); td->tcb_history[te->te_tid] = NULL; free_tcb_histent(te); rw_wunlock(&td->tcb_history_lock); } static inline struct tcb_histent * lookup_tcb_histent(struct adapter *sc, u_int tid, bool addrem) { struct tcb_histent *te; struct tom_data *td = sc->tom_softc; MPASS(tid < sc->tids.ntids); if (td->tcb_history == NULL) return (NULL); if (addrem) rw_wlock(&td->tcb_history_lock); else rw_rlock(&td->tcb_history_lock); te = td->tcb_history[tid]; if (te != NULL) { mtx_lock(&te->te_lock); return (te); /* with both locks held */ } if (addrem) rw_wunlock(&td->tcb_history_lock); else rw_runlock(&td->tcb_history_lock); return (te); } static inline void release_tcb_histent(struct tcb_histent *te) { struct adapter *sc = te->te_adapter; struct tom_data *td = sc->tom_softc; mtx_assert(&te->te_lock, MA_OWNED); mtx_unlock(&te->te_lock); rw_assert(&td->tcb_history_lock, RA_RLOCKED); rw_runlock(&td->tcb_history_lock); } static void request_tcb(void *arg) { struct tcb_histent *te = arg; mtx_assert(&te->te_lock, MA_OWNED); /* Noone else is supposed to update the histent. */ MPASS(!(te->te_flags & TE_RPL_PENDING)); if (send_get_tcb(te->te_adapter, te->te_tid) == 0) te->te_flags |= TE_RPL_PENDING; else callout_schedule(&te->te_callout, hz / 100); } static void update_tcb_histent(struct tcb_histent *te, const uint64_t *tcb) { struct tom_data *td = te->te_adapter->tom_softc; uint64_t tflags = get_tcb_tflags(tcb); uint8_t sample = 0; if (GET_TCB_FIELD(tcb, SND_MAX_RAW) != GET_TCB_FIELD(tcb, SND_UNA_RAW)) { if (GET_TCB_FIELD(tcb, T_RXTSHIFT) != 0) sample |= TS_RTO; if (GET_TCB_FIELD(tcb, T_DUPACKS) != 0) sample |= TS_DUPACKS; if (GET_TCB_FIELD(tcb, T_DUPACKS) >= td->dupack_threshold) sample |= TS_FASTREXMT; } if (GET_TCB_FIELD(tcb, SND_MAX_RAW) != 0) { uint32_t snd_wnd; sample |= TS_SND_BACKLOGGED; /* for whatever reason. */ snd_wnd = GET_TCB_FIELD(tcb, RCV_ADV); if (tflags & V_TF_RECV_SCALE(1)) snd_wnd <<= GET_TCB_FIELD(tcb, RCV_SCALE); if (GET_TCB_FIELD(tcb, SND_CWND) < snd_wnd) sample |= TS_CWND_LIMITED; /* maybe due to CWND */ } if (tflags & V_TF_CCTRL_ECN(1)) { /* * CE marker on incoming IP hdr, echoing ECE back in the TCP * hdr. Indicates congestion somewhere on the way from the peer * to this node. */ if (tflags & V_TF_CCTRL_ECE(1)) sample |= TS_ECN_ECE; /* * ECE seen and CWR sent (or about to be sent). Might indicate * congestion on the way to the peer. This node is reducing its * congestion window in response. */ if (tflags & (V_TF_CCTRL_CWR(1) | V_TF_CCTRL_RFR(1))) sample |= TS_ECN_CWR; } te->te_sample[te->te_pidx] = sample; if (++te->te_pidx == nitems(te->te_sample)) te->te_pidx = 0; memcpy(te->te_tcb, tcb, TCB_SIZE); te->te_flags |= TE_ACTIVE; } static int do_get_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_get_tcb_rpl *cpl = mtod(m, const void *); const uint64_t *tcb = (const uint64_t *)(const void *)(cpl + 1); struct tcb_histent *te; const u_int tid = GET_TID(cpl); bool remove; remove = GET_TCB_FIELD(tcb, T_STATE) == TCPS_CLOSED; te = lookup_tcb_histent(sc, tid, remove); if (te == NULL) { /* Not in the history. Who issued the GET_TCB for this? */ device_printf(sc->dev, "tcb %u: flags 0x%016jx, state %u, " "srtt %u, sscale %u, rscale %u, cookie 0x%x\n", tid, (uintmax_t)get_tcb_tflags(tcb), GET_TCB_FIELD(tcb, T_STATE), GET_TCB_FIELD(tcb, T_SRTT), GET_TCB_FIELD(tcb, SND_SCALE), GET_TCB_FIELD(tcb, RCV_SCALE), cpl->cookie); goto done; } MPASS(te->te_flags & TE_RPL_PENDING); te->te_flags &= ~TE_RPL_PENDING; if (remove) { remove_tcb_histent(te); } else { update_tcb_histent(te, tcb); callout_reset(&te->te_callout, hz / 10, request_tcb, te); release_tcb_histent(te); } done: m_freem(m); return (0); } static void fill_tcp_info_from_tcb(struct adapter *sc, uint64_t *tcb, struct tcp_info *ti) { uint32_t v; ti->tcpi_state = GET_TCB_FIELD(tcb, T_STATE); v = GET_TCB_FIELD(tcb, T_SRTT); ti->tcpi_rtt = tcp_ticks_to_us(sc, v); v = GET_TCB_FIELD(tcb, T_RTTVAR); ti->tcpi_rttvar = tcp_ticks_to_us(sc, v); ti->tcpi_snd_ssthresh = GET_TCB_FIELD(tcb, SND_SSTHRESH); ti->tcpi_snd_cwnd = GET_TCB_FIELD(tcb, SND_CWND); ti->tcpi_rcv_nxt = GET_TCB_FIELD(tcb, RCV_NXT); v = GET_TCB_FIELD(tcb, TX_MAX); ti->tcpi_snd_nxt = v - GET_TCB_FIELD(tcb, SND_NXT_RAW); /* Receive window being advertised by us. */ ti->tcpi_rcv_wscale = GET_TCB_FIELD(tcb, SND_SCALE); /* Yes, SND. */ ti->tcpi_rcv_space = GET_TCB_FIELD(tcb, RCV_WND); /* Send window */ ti->tcpi_snd_wscale = GET_TCB_FIELD(tcb, RCV_SCALE); /* Yes, RCV. */ ti->tcpi_snd_wnd = GET_TCB_FIELD(tcb, RCV_ADV); if (get_tcb_tflags(tcb) & V_TF_RECV_SCALE(1)) ti->tcpi_snd_wnd <<= ti->tcpi_snd_wscale; else ti->tcpi_snd_wscale = 0; } static void fill_tcp_info_from_history(struct adapter *sc, struct tcb_histent *te, struct tcp_info *ti) { fill_tcp_info_from_tcb(sc, te->te_tcb, ti); } /* * Reads the TCB for the given tid using a memory window and copies it to 'buf' * in the same format as CPL_GET_TCB_RPL. */ static void read_tcb_using_memwin(struct adapter *sc, u_int tid, uint64_t *buf) { int i, j, k, rc; uint32_t addr; u_char *tcb, tmp; MPASS(tid < sc->tids.ntids); addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + tid * TCB_SIZE; rc = read_via_memwin(sc, 2, addr, (uint32_t *)buf, TCB_SIZE); if (rc != 0) return; tcb = (u_char *)buf; for (i = 0, j = TCB_SIZE - 16; i < j; i += 16, j -= 16) { for (k = 0; k < 16; k++) { tmp = tcb[i + k]; tcb[i + k] = tcb[j + k]; tcb[j + k] = tmp; } } } static void fill_tcp_info(struct adapter *sc, u_int tid, struct tcp_info *ti) { uint64_t tcb[TCB_SIZE / sizeof(uint64_t)]; struct tcb_histent *te; ti->tcpi_toe_tid = tid; te = lookup_tcb_histent(sc, tid, false); if (te != NULL) { fill_tcp_info_from_history(sc, te, ti); release_tcb_histent(te); } else { if (!(sc->debug_flags & DF_DISABLE_TCB_CACHE)) { /* XXX: tell firmware to flush TCB cache. */ } read_tcb_using_memwin(sc, tid, tcb); fill_tcp_info_from_tcb(sc, tcb, ti); } } /* * Called by the kernel to allow the TOE driver to "refine" values filled up in * the tcp_info for an offloaded connection. */ static void t4_tcp_info(struct toedev *tod, struct tcpcb *tp, struct tcp_info *ti) { struct adapter *sc = tod->tod_softc; struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(tp->t_inpcb); MPASS(ti != NULL); fill_tcp_info(sc, toep->tid, ti); } #ifdef KERN_TLS static int t4_alloc_tls_session(struct toedev *tod, struct tcpcb *tp, struct ktls_session *tls, int direction) { struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(tp->t_inpcb); MPASS(tls != NULL); return (tls_alloc_ktls(toep, tls, direction)); } #endif /* SET_TCB_FIELD sent as a ULP command looks like this */ #define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \ sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core)) static void * mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, uint64_t word, uint64_t mask, uint64_t val, uint32_t tid) { struct ulptx_idata *ulpsc; struct cpl_set_tcb_field_core *req; ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16)); ulpsc = (struct ulptx_idata *)(ulpmc + 1); ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); ulpsc->len = htobe32(sizeof(*req)); req = (struct cpl_set_tcb_field_core *)(ulpsc + 1); OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); req->reply_ctrl = htobe16(V_NO_REPLY(1)); req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); req->mask = htobe64(mask); req->val = htobe64(val); ulpsc = (struct ulptx_idata *)(req + 1); if (LEN__SET_TCB_FIELD_ULP % 16) { ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); ulpsc->len = htobe32(0); return (ulpsc + 1); } return (ulpsc); } static void send_mss_flowc_wr(struct adapter *sc, struct toepcb *toep) { struct wrq_cookie cookie; struct fw_flowc_wr *flowc; struct ofld_tx_sdesc *txsd; const int flowclen = sizeof(*flowc) + sizeof(struct fw_flowc_mnemval); const int flowclen16 = howmany(flowclen, 16); if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0) { CH_ERR(sc, "%s: tid %u out of tx credits (%d, %d).\n", __func__, toep->tid, toep->tx_credits, toep->txsd_avail); return; } flowc = start_wrq_wr(&toep->ofld_txq->wrq, flowclen16, &cookie); if (__predict_false(flowc == NULL)) { CH_ERR(sc, "ENOMEM in %s for tid %u.\n", __func__, toep->tid); return; } flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(1)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[0].val = htobe32(toep->params.emss); txsd = &toep->txsd[toep->txsd_pidx]; txsd->tx_credits = flowclen16; txsd->plen = 0; toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; commit_wrq_wr(&toep->ofld_txq->wrq, flowc, &cookie); } static void t4_pmtu_update(struct toedev *tod, struct tcpcb *tp, tcp_seq seq, int mtu) { struct work_request_hdr *wrh; struct ulp_txpkt *ulpmc; int idx, len; struct wrq_cookie cookie; struct inpcb *inp = tp->t_inpcb; struct toepcb *toep = tp->t_toe; struct adapter *sc = td_adapter(toep->td); unsigned short *mtus = &sc->params.mtus[0]; INP_WLOCK_ASSERT(inp); MPASS(mtu > 0); /* kernel is supposed to provide something usable. */ /* tp->snd_una and snd_max are in host byte order too. */ seq = be32toh(seq); CTR6(KTR_CXGBE, "%s: tid %d, seq 0x%08x, mtu %u, mtu_idx %u (%d)", __func__, toep->tid, seq, mtu, toep->params.mtu_idx, mtus[toep->params.mtu_idx]); if (ulp_mode(toep) == ULP_MODE_NONE && /* XXX: Read TCB otherwise? */ (SEQ_LT(seq, tp->snd_una) || SEQ_GEQ(seq, tp->snd_max))) { CTR5(KTR_CXGBE, "%s: tid %d, seq 0x%08x not in range [0x%08x, 0x%08x).", __func__, toep->tid, seq, tp->snd_una, tp->snd_max); return; } /* Find the best mtu_idx for the suggested MTU. */ for (idx = 0; idx < NMTUS - 1 && mtus[idx + 1] <= mtu; idx++) continue; if (idx >= toep->params.mtu_idx) return; /* Never increase the PMTU (just like the kernel). */ /* * We'll send a compound work request with 2 SET_TCB_FIELDs -- the first * one updates the mtu_idx and the second one triggers a retransmit. */ len = sizeof(*wrh) + 2 * roundup2(LEN__SET_TCB_FIELD_ULP, 16); wrh = start_wrq_wr(toep->ctrlq, howmany(len, 16), &cookie); if (wrh == NULL) { CH_ERR(sc, "failed to change mtu_idx of tid %d (%u -> %u).\n", toep->tid, toep->params.mtu_idx, idx); return; } INIT_ULPTX_WRH(wrh, len, 1, 0); /* atomic */ ulpmc = (struct ulp_txpkt *)(wrh + 1); ulpmc = mk_set_tcb_field_ulp(ulpmc, W_TCB_T_MAXSEG, V_TCB_T_MAXSEG(M_TCB_T_MAXSEG), V_TCB_T_MAXSEG(idx), toep->tid); ulpmc = mk_set_tcb_field_ulp(ulpmc, W_TCB_TIMESTAMP, V_TCB_TIMESTAMP(0x7FFFFULL << 11), 0, toep->tid); commit_wrq_wr(toep->ctrlq, wrh, &cookie); /* Update the software toepcb and tcpcb. */ toep->params.mtu_idx = idx; tp->t_maxseg = mtus[toep->params.mtu_idx]; if (inp->inp_inc.inc_flags & INC_ISIPV6) tp->t_maxseg -= sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else tp->t_maxseg -= sizeof(struct ip) + sizeof(struct tcphdr); toep->params.emss = tp->t_maxseg; if (tp->t_flags & TF_RCVD_TSTMP) toep->params.emss -= TCPOLEN_TSTAMP_APPA; /* Update the firmware flowc. */ send_mss_flowc_wr(sc, toep); /* Update the MTU in the kernel's hostcache. */ if (sc->tt.update_hc_on_pmtu_change != 0) { struct in_conninfo inc = {0}; inc.inc_fibnum = inp->inp_inc.inc_fibnum; if (inp->inp_inc.inc_flags & INC_ISIPV6) { inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = inp->inp_inc.inc6_faddr; } else { inc.inc_faddr = inp->inp_inc.inc_faddr; } tcp_hc_updatemtu(&inc, mtu); } CTR6(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u), t_maxseg %u, emss %u", __func__, toep->tid, toep->params.mtu_idx, mtus[toep->params.mtu_idx], tp->t_maxseg, toep->params.emss); } /* * The TOE driver will not receive any more CPLs for the tid associated with the * toepcb; release the hold on the inpcb. */ void final_cpl_received(struct toepcb *toep) { struct inpcb *inp = toep->inp; KASSERT(inp != NULL, ("%s: inp is NULL", __func__)); INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_CPL_PENDING, ("%s: CPL not pending already?", __func__)); CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)", __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); if (ulp_mode(toep) == ULP_MODE_TCPDDP) release_ddp_resources(toep); else if (ulp_mode(toep) == ULP_MODE_TLS) tls_detach(toep); toep->inp = NULL; toep->flags &= ~TPF_CPL_PENDING; mbufq_drain(&toep->ulp_pduq); mbufq_drain(&toep->ulp_pdu_reclaimq); if (!(toep->flags & TPF_ATTACHED)) release_offload_resources(toep); if (!in_pcbrele_wlocked(inp)) INP_WUNLOCK(inp); } void insert_tid(struct adapter *sc, int tid, void *ctx, int ntids) { struct tid_info *t = &sc->tids; MPASS(tid >= t->tid_base); MPASS(tid - t->tid_base < t->ntids); t->tid_tab[tid - t->tid_base] = ctx; atomic_add_int(&t->tids_in_use, ntids); } void * lookup_tid(struct adapter *sc, int tid) { struct tid_info *t = &sc->tids; return (t->tid_tab[tid - t->tid_base]); } void update_tid(struct adapter *sc, int tid, void *ctx) { struct tid_info *t = &sc->tids; t->tid_tab[tid - t->tid_base] = ctx; } void remove_tid(struct adapter *sc, int tid, int ntids) { struct tid_info *t = &sc->tids; t->tid_tab[tid - t->tid_base] = NULL; atomic_subtract_int(&t->tids_in_use, ntids); } /* * What mtu_idx to use, given a 4-tuple. Note that both s->mss and tcp_mssopt * have the MSS that we should advertise in our SYN. Advertised MSS doesn't * account for any TCP options so the effective MSS (only payload, no headers or * options) could be different. */ static int find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, struct offload_settings *s) { unsigned short *mtus = &sc->params.mtus[0]; int i, mss, mtu; MPASS(inc != NULL); mss = s->mss > 0 ? s->mss : tcp_mssopt(inc); if (inc->inc_flags & INC_ISIPV6) mtu = mss + sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else mtu = mss + sizeof(struct ip) + sizeof(struct tcphdr); for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mtu; i++) continue; return (i); } /* * Determine the receive window size for a socket. */ u_long select_rcv_wnd(struct socket *so) { unsigned long wnd; SOCKBUF_LOCK_ASSERT(&so->so_rcv); wnd = sbspace(&so->so_rcv); if (wnd < MIN_RCV_WND) wnd = MIN_RCV_WND; return min(wnd, MAX_RCV_WND); } int select_rcv_wscale(void) { int wscale = 0; unsigned long space = sb_max; if (space > MAX_RCV_WND) space = MAX_RCV_WND; while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space) wscale++; return (wscale); } __be64 calc_options0(struct vi_info *vi, struct conn_params *cp) { uint64_t opt0 = 0; opt0 |= F_TCAM_BYPASS; MPASS(cp->wscale >= 0 && cp->wscale <= M_WND_SCALE); opt0 |= V_WND_SCALE(cp->wscale); MPASS(cp->mtu_idx >= 0 && cp->mtu_idx < NMTUS); opt0 |= V_MSS_IDX(cp->mtu_idx); MPASS(cp->ulp_mode >= 0 && cp->ulp_mode <= M_ULP_MODE); opt0 |= V_ULP_MODE(cp->ulp_mode); MPASS(cp->opt0_bufsize >= 0 && cp->opt0_bufsize <= M_RCV_BUFSIZ); opt0 |= V_RCV_BUFSIZ(cp->opt0_bufsize); MPASS(cp->l2t_idx >= 0 && cp->l2t_idx < vi->adapter->vres.l2t.size); opt0 |= V_L2T_IDX(cp->l2t_idx); opt0 |= V_SMAC_SEL(vi->smt_idx); opt0 |= V_TX_CHAN(vi->pi->tx_chan); MPASS(cp->keepalive == 0 || cp->keepalive == 1); opt0 |= V_KEEP_ALIVE(cp->keepalive); MPASS(cp->nagle == 0 || cp->nagle == 1); opt0 |= V_NAGLE(cp->nagle); return (htobe64(opt0)); } __be32 calc_options2(struct vi_info *vi, struct conn_params *cp) { uint32_t opt2 = 0; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; /* * rx flow control, rx coalesce, congestion control, and tx pace are all * explicitly set by the driver. On T5+ the ISS is also set by the * driver to the value picked by the kernel. */ if (is_t4(sc)) { opt2 |= F_RX_FC_VALID | F_RX_COALESCE_VALID; opt2 |= F_CONG_CNTRL_VALID | F_PACE_VALID; } else { opt2 |= F_T5_OPT_2_VALID; /* all 4 valid */ opt2 |= F_T5_ISS; /* ISS provided in CPL */ } MPASS(cp->sack == 0 || cp->sack == 1); opt2 |= V_SACK_EN(cp->sack); MPASS(cp->tstamp == 0 || cp->tstamp == 1); opt2 |= V_TSTAMPS_EN(cp->tstamp); if (cp->wscale > 0) opt2 |= F_WND_SCALE_EN; MPASS(cp->ecn == 0 || cp->ecn == 1); opt2 |= V_CCTRL_ECN(cp->ecn); /* XXX: F_RX_CHANNEL for multiple rx c-chan support goes here. */ opt2 |= V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]); opt2 |= V_PACE(0); opt2 |= F_RSS_QUEUE_VALID; opt2 |= V_RSS_QUEUE(sc->sge.ofld_rxq[cp->rxq_idx].iq.abs_id); MPASS(cp->cong_algo >= 0 && cp->cong_algo <= M_CONG_CNTRL); opt2 |= V_CONG_CNTRL(cp->cong_algo); MPASS(cp->rx_coalesce == 0 || cp->rx_coalesce == 1); if (cp->rx_coalesce == 1) opt2 |= V_RX_COALESCE(M_RX_COALESCE); opt2 |= V_RX_FC_DDP(0) | V_RX_FC_DISABLE(0); #ifdef USE_DDP_RX_FLOW_CONTROL if (cp->ulp_mode == ULP_MODE_TCPDDP) opt2 |= F_RX_FC_DDP; #endif return (htobe32(opt2)); } uint64_t select_ntuple(struct vi_info *vi, struct l2t_entry *e) { struct adapter *sc = vi->adapter; struct tp_params *tp = &sc->params.tp; uint64_t ntuple = 0; /* * Initialize each of the fields which we care about which are present * in the Compressed Filter Tuple. */ if (tp->vlan_shift >= 0 && EVL_VLANOFTAG(e->vlan) != CPL_L2T_VLAN_NONE) ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift; if (tp->port_shift >= 0) ntuple |= (uint64_t)e->lport << tp->port_shift; if (tp->protocol_shift >= 0) ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift; if (tp->vnic_shift >= 0 && tp->vnic_mode == FW_VNIC_MODE_PF_VF) { ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vi->vin) | V_FT_VNID_ID_PF(sc->pf) | V_FT_VNID_ID_VLD(vi->vfvld)) << tp->vnic_shift; } if (is_t4(sc)) return (htobe32((uint32_t)ntuple)); else return (htobe64(V_FILTER_TUPLE(ntuple))); } static int is_tls_sock(struct socket *so, struct adapter *sc) { struct inpcb *inp = sotoinpcb(so); int i, rc; /* XXX: Eventually add a SO_WANT_TLS socket option perhaps? */ rc = 0; ADAPTER_LOCK(sc); for (i = 0; i < sc->tt.num_tls_rx_ports; i++) { if (inp->inp_lport == htons(sc->tt.tls_rx_ports[i]) || inp->inp_fport == htons(sc->tt.tls_rx_ports[i])) { rc = 1; break; } } ADAPTER_UNLOCK(sc); return (rc); } /* * Initialize various connection parameters. */ void init_conn_params(struct vi_info *vi , struct offload_settings *s, struct in_conninfo *inc, struct socket *so, const struct tcp_options *tcpopt, int16_t l2t_idx, struct conn_params *cp) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct tom_tunables *tt = &sc->tt; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); u_long wnd; MPASS(s->offload != 0); /* Congestion control algorithm */ if (s->cong_algo >= 0) cp->cong_algo = s->cong_algo & M_CONG_CNTRL; else if (sc->tt.cong_algorithm >= 0) cp->cong_algo = tt->cong_algorithm & M_CONG_CNTRL; else { struct cc_algo *cc = CC_ALGO(tp); if (strcasecmp(cc->name, "reno") == 0) cp->cong_algo = CONG_ALG_RENO; else if (strcasecmp(cc->name, "tahoe") == 0) cp->cong_algo = CONG_ALG_TAHOE; if (strcasecmp(cc->name, "newreno") == 0) cp->cong_algo = CONG_ALG_NEWRENO; if (strcasecmp(cc->name, "highspeed") == 0) cp->cong_algo = CONG_ALG_HIGHSPEED; else { /* * Use newreno in case the algorithm selected by the * host stack is not supported by the hardware. */ cp->cong_algo = CONG_ALG_NEWRENO; } } /* Tx traffic scheduling class. */ if (s->sched_class >= 0 && s->sched_class < sc->chip_params->nsched_cls) { cp->tc_idx = s->sched_class; } else cp->tc_idx = -1; /* Nagle's algorithm. */ if (s->nagle >= 0) cp->nagle = s->nagle > 0 ? 1 : 0; else cp->nagle = tp->t_flags & TF_NODELAY ? 0 : 1; /* TCP Keepalive. */ if (V_tcp_always_keepalive || so_options_get(so) & SO_KEEPALIVE) cp->keepalive = 1; else cp->keepalive = 0; /* Optimization that's specific to T5 @ 40G. */ if (tt->tx_align >= 0) cp->tx_align = tt->tx_align > 0 ? 1 : 0; else if (chip_id(sc) == CHELSIO_T5 && (port_top_speed(pi) > 10 || sc->params.nports > 2)) cp->tx_align = 1; else cp->tx_align = 0; /* ULP mode. */ if (can_tls_offload(sc) && (s->tls > 0 || (s->tls < 0 && is_tls_sock(so, sc)))) cp->ulp_mode = ULP_MODE_TLS; else if (s->ddp > 0 || (s->ddp < 0 && sc->tt.ddp && (so_options_get(so) & SO_NO_DDP) == 0)) cp->ulp_mode = ULP_MODE_TCPDDP; else cp->ulp_mode = ULP_MODE_NONE; /* Rx coalescing. */ if (s->rx_coalesce >= 0) cp->rx_coalesce = s->rx_coalesce > 0 ? 1 : 0; else if (cp->ulp_mode == ULP_MODE_TLS) cp->rx_coalesce = 0; else if (tt->rx_coalesce >= 0) cp->rx_coalesce = tt->rx_coalesce > 0 ? 1 : 0; else cp->rx_coalesce = 1; /* default */ /* * Index in the PMTU table. This controls the MSS that we announce in * our SYN initially, but after ESTABLISHED it controls the MSS that we * use to send data. */ cp->mtu_idx = find_best_mtu_idx(sc, inc, s); /* Tx queue for this connection. */ if (s->txq >= 0 && s->txq < vi->nofldtxq) cp->txq_idx = s->txq; else cp->txq_idx = arc4random() % vi->nofldtxq; cp->txq_idx += vi->first_ofld_txq; /* Rx queue for this connection. */ if (s->rxq >= 0 && s->rxq < vi->nofldrxq) cp->rxq_idx = s->rxq; else cp->rxq_idx = arc4random() % vi->nofldrxq; cp->rxq_idx += vi->first_ofld_rxq; if (SOLISTENING(so)) { /* Passive open */ MPASS(tcpopt != NULL); /* TCP timestamp option */ if (tcpopt->tstamp && (s->tstamp > 0 || (s->tstamp < 0 && V_tcp_do_rfc1323))) cp->tstamp = 1; else cp->tstamp = 0; /* SACK */ if (tcpopt->sack && (s->sack > 0 || (s->sack < 0 && V_tcp_do_sack))) cp->sack = 1; else cp->sack = 0; /* Receive window scaling. */ if (tcpopt->wsf > 0 && tcpopt->wsf < 15 && V_tcp_do_rfc1323) cp->wscale = select_rcv_wscale(); else cp->wscale = 0; /* ECN */ if (tcpopt->ecn && /* XXX: review. */ (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn))) cp->ecn = 1; else cp->ecn = 0; wnd = max(so->sol_sbrcv_hiwat, MIN_RCV_WND); cp->opt0_bufsize = min(wnd >> 10, M_RCV_BUFSIZ); if (tt->sndbuf > 0) cp->sndbuf = tt->sndbuf; else if (so->sol_sbsnd_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) cp->sndbuf = 256 * 1024; else cp->sndbuf = so->sol_sbsnd_hiwat; } else { /* Active open */ /* TCP timestamp option */ if (s->tstamp > 0 || (s->tstamp < 0 && (tp->t_flags & TF_REQ_TSTMP))) cp->tstamp = 1; else cp->tstamp = 0; /* SACK */ if (s->sack > 0 || (s->sack < 0 && (tp->t_flags & TF_SACK_PERMIT))) cp->sack = 1; else cp->sack = 0; /* Receive window scaling */ if (tp->t_flags & TF_REQ_SCALE) cp->wscale = select_rcv_wscale(); else cp->wscale = 0; /* ECN */ if (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn == 1)) cp->ecn = 1; else cp->ecn = 0; SOCKBUF_LOCK(&so->so_rcv); wnd = max(select_rcv_wnd(so), MIN_RCV_WND); SOCKBUF_UNLOCK(&so->so_rcv); cp->opt0_bufsize = min(wnd >> 10, M_RCV_BUFSIZ); if (tt->sndbuf > 0) cp->sndbuf = tt->sndbuf; else { SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) cp->sndbuf = 256 * 1024; else cp->sndbuf = so->so_snd.sb_hiwat; SOCKBUF_UNLOCK(&so->so_snd); } } cp->l2t_idx = l2t_idx; /* This will be initialized on ESTABLISHED. */ cp->emss = 0; } int negative_advice(int status) { return (status == CPL_ERR_RTX_NEG_ADVICE || status == CPL_ERR_PERSIST_NEG_ADVICE || status == CPL_ERR_KEEPALV_NEG_ADVICE); } static int alloc_tid_tab(struct tid_info *t, int flags) { MPASS(t->ntids > 0); MPASS(t->tid_tab == NULL); t->tid_tab = malloc(t->ntids * sizeof(*t->tid_tab), M_CXGBE, M_ZERO | flags); if (t->tid_tab == NULL) return (ENOMEM); atomic_store_rel_int(&t->tids_in_use, 0); return (0); } static void free_tid_tab(struct tid_info *t) { KASSERT(t->tids_in_use == 0, ("%s: %d tids still in use.", __func__, t->tids_in_use)); free(t->tid_tab, M_CXGBE); t->tid_tab = NULL; } static int alloc_stid_tab(struct tid_info *t, int flags) { MPASS(t->nstids > 0); MPASS(t->stid_tab == NULL); t->stid_tab = malloc(t->nstids * sizeof(*t->stid_tab), M_CXGBE, M_ZERO | flags); if (t->stid_tab == NULL) return (ENOMEM); mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF); t->stids_in_use = 0; TAILQ_INIT(&t->stids); t->nstids_free_head = t->nstids; return (0); } static void free_stid_tab(struct tid_info *t) { KASSERT(t->stids_in_use == 0, ("%s: %d tids still in use.", __func__, t->stids_in_use)); if (mtx_initialized(&t->stid_lock)) mtx_destroy(&t->stid_lock); free(t->stid_tab, M_CXGBE); t->stid_tab = NULL; } static void free_tid_tabs(struct tid_info *t) { free_tid_tab(t); free_stid_tab(t); } static int alloc_tid_tabs(struct tid_info *t) { int rc; rc = alloc_tid_tab(t, M_NOWAIT); if (rc != 0) goto failed; rc = alloc_stid_tab(t, M_NOWAIT); if (rc != 0) goto failed; return (0); failed: free_tid_tabs(t); return (rc); } static inline void alloc_tcb_history(struct adapter *sc, struct tom_data *td) { if (sc->tids.ntids == 0 || sc->tids.ntids > 1024) return; rw_init(&td->tcb_history_lock, "TCB history"); td->tcb_history = malloc(sc->tids.ntids * sizeof(*td->tcb_history), M_CXGBE, M_ZERO | M_NOWAIT); td->dupack_threshold = G_DUPACKTHRESH(t4_read_reg(sc, A_TP_PARA_REG0)); } static inline void free_tcb_history(struct adapter *sc, struct tom_data *td) { #ifdef INVARIANTS int i; if (td->tcb_history != NULL) { for (i = 0; i < sc->tids.ntids; i++) { MPASS(td->tcb_history[i] == NULL); } } #endif free(td->tcb_history, M_CXGBE); if (rw_initialized(&td->tcb_history_lock)) rw_destroy(&td->tcb_history_lock); } static void free_tom_data(struct adapter *sc, struct tom_data *td) { ASSERT_SYNCHRONIZED_OP(sc); KASSERT(TAILQ_EMPTY(&td->toep_list), ("%s: TOE PCB list is not empty.", __func__)); KASSERT(td->lctx_count == 0, ("%s: lctx hash table is not empty.", __func__)); t4_free_ppod_region(&td->pr); if (td->listen_mask != 0) hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); if (mtx_initialized(&td->unsent_wr_lock)) mtx_destroy(&td->unsent_wr_lock); if (mtx_initialized(&td->lctx_hash_lock)) mtx_destroy(&td->lctx_hash_lock); if (mtx_initialized(&td->toep_list_lock)) mtx_destroy(&td->toep_list_lock); free_tcb_history(sc, td); free_tid_tabs(&sc->tids); free(td, M_CXGBE); } static char * prepare_pkt(int open_type, uint16_t vtag, struct inpcb *inp, int *pktlen, int *buflen) { char *pkt; struct tcphdr *th; int ipv6, len; const int maxlen = max(sizeof(struct ether_header), sizeof(struct ether_vlan_header)) + max(sizeof(struct ip), sizeof(struct ip6_hdr)) + sizeof(struct tcphdr); MPASS(open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN); pkt = malloc(maxlen, M_CXGBE, M_ZERO | M_NOWAIT); if (pkt == NULL) return (NULL); ipv6 = inp->inp_vflag & INP_IPV6; len = 0; if (EVL_VLANOFTAG(vtag) == 0xfff) { struct ether_header *eh = (void *)pkt; if (ipv6) eh->ether_type = htons(ETHERTYPE_IPV6); else eh->ether_type = htons(ETHERTYPE_IP); len += sizeof(*eh); } else { struct ether_vlan_header *evh = (void *)pkt; evh->evl_encap_proto = htons(ETHERTYPE_VLAN); evh->evl_tag = htons(vtag); if (ipv6) evh->evl_proto = htons(ETHERTYPE_IPV6); else evh->evl_proto = htons(ETHERTYPE_IP); len += sizeof(*evh); } if (ipv6) { struct ip6_hdr *ip6 = (void *)&pkt[len]; ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_plen = htons(sizeof(struct tcphdr)); ip6->ip6_nxt = IPPROTO_TCP; if (open_type == OPEN_TYPE_ACTIVE) { ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; } else if (open_type == OPEN_TYPE_LISTEN) { ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = ip6->ip6_src; } len += sizeof(*ip6); } else { struct ip *ip = (void *)&pkt[len]; ip->ip_v = IPVERSION; ip->ip_hl = sizeof(*ip) >> 2; ip->ip_tos = inp->inp_ip_tos; ip->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr)); ip->ip_ttl = inp->inp_ip_ttl; ip->ip_p = IPPROTO_TCP; if (open_type == OPEN_TYPE_ACTIVE) { ip->ip_src = inp->inp_laddr; ip->ip_dst = inp->inp_faddr; } else if (open_type == OPEN_TYPE_LISTEN) { ip->ip_src = inp->inp_laddr; ip->ip_dst = ip->ip_src; } len += sizeof(*ip); } th = (void *)&pkt[len]; if (open_type == OPEN_TYPE_ACTIVE) { th->th_sport = inp->inp_lport; /* network byte order already */ th->th_dport = inp->inp_fport; /* ditto */ } else if (open_type == OPEN_TYPE_LISTEN) { th->th_sport = inp->inp_lport; /* network byte order already */ th->th_dport = th->th_sport; } len += sizeof(th); *pktlen = *buflen = len; return (pkt); } const struct offload_settings * lookup_offload_policy(struct adapter *sc, int open_type, struct mbuf *m, uint16_t vtag, struct inpcb *inp) { const struct t4_offload_policy *op; char *pkt; struct offload_rule *r; int i, matched, pktlen, buflen; static const struct offload_settings allow_offloading_settings = { .offload = 1, .rx_coalesce = -1, .cong_algo = -1, .sched_class = -1, .tstamp = -1, .sack = -1, .nagle = -1, .ecn = -1, .ddp = -1, .tls = -1, .txq = -1, .rxq = -1, .mss = -1, }; static const struct offload_settings disallow_offloading_settings = { .offload = 0, /* rest is irrelevant when offload is off. */ }; rw_assert(&sc->policy_lock, RA_LOCKED); /* * If there's no Connection Offloading Policy attached to the device * then we need to return a default static policy. If * "cop_managed_offloading" is true, then we need to disallow * offloading until a COP is attached to the device. Otherwise we * allow offloading ... */ op = sc->policy; if (op == NULL) { if (sc->tt.cop_managed_offloading) return (&disallow_offloading_settings); else return (&allow_offloading_settings); } switch (open_type) { case OPEN_TYPE_ACTIVE: case OPEN_TYPE_LISTEN: pkt = prepare_pkt(open_type, vtag, inp, &pktlen, &buflen); break; case OPEN_TYPE_PASSIVE: MPASS(m != NULL); pkt = mtod(m, char *); MPASS(*pkt == CPL_PASS_ACCEPT_REQ); pkt += sizeof(struct cpl_pass_accept_req); pktlen = m->m_pkthdr.len - sizeof(struct cpl_pass_accept_req); buflen = m->m_len - sizeof(struct cpl_pass_accept_req); break; default: MPASS(0); return (&disallow_offloading_settings); } if (pkt == NULL || pktlen == 0 || buflen == 0) return (&disallow_offloading_settings); matched = 0; r = &op->rule[0]; for (i = 0; i < op->nrules; i++, r++) { if (r->open_type != open_type && r->open_type != OPEN_TYPE_DONTCARE) { continue; } matched = bpf_filter(r->bpf_prog.bf_insns, pkt, pktlen, buflen); if (matched) break; } if (open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN) free(pkt, M_CXGBE); return (matched ? &r->settings : &disallow_offloading_settings); } static void reclaim_wr_resources(void *arg, int count) { struct tom_data *td = arg; STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list); struct cpl_act_open_req *cpl; u_int opcode, atid, tid; struct wrqe *wr; struct adapter *sc = td_adapter(td); mtx_lock(&td->unsent_wr_lock); STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe); mtx_unlock(&td->unsent_wr_lock); while ((wr = STAILQ_FIRST(&twr_list)) != NULL) { STAILQ_REMOVE_HEAD(&twr_list, link); cpl = wrtod(wr); opcode = GET_OPCODE(cpl); switch (opcode) { case CPL_ACT_OPEN_REQ: case CPL_ACT_OPEN_REQ6: atid = G_TID_TID(be32toh(OPCODE_TID(cpl))); CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid); act_open_failure_cleanup(sc, atid, EHOSTUNREACH); free(wr, M_CXGBE); break; case CPL_PASS_ACCEPT_RPL: tid = GET_TID(cpl); CTR2(KTR_CXGBE, "%s: tid %u ", __func__, tid); synack_failure_cleanup(sc, tid); free(wr, M_CXGBE); break; default: log(LOG_ERR, "%s: leaked work request %p, wr_len %d, " "opcode %x\n", __func__, wr, wr->wr_len, opcode); /* WR not freed here; go look at it with a debugger. */ } } } /* * Ground control to Major TOM * Commencing countdown, engines on */ static int t4_tom_activate(struct adapter *sc) { struct tom_data *td; struct toedev *tod; struct vi_info *vi; int i, rc, v; ASSERT_SYNCHRONIZED_OP(sc); /* per-adapter softc for TOM */ td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); if (td == NULL) return (ENOMEM); /* List of TOE PCBs and associated lock */ mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF); TAILQ_INIT(&td->toep_list); /* Listen context */ mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF); td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE, &td->listen_mask, HASH_NOWAIT); /* List of WRs for which L2 resolution failed */ mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF); STAILQ_INIT(&td->unsent_wr_list); TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td); /* TID tables */ rc = alloc_tid_tabs(&sc->tids); if (rc != 0) goto done; rc = t4_init_ppod_region(&td->pr, &sc->vres.ddp, t4_read_reg(sc, A_ULP_RX_TDDP_PSZ), "TDDP page pods"); if (rc != 0) goto done; t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK, V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask); alloc_tcb_history(sc, td); /* toedev ops */ tod = &td->tod; init_toedev(tod); tod->tod_softc = sc; tod->tod_connect = t4_connect; tod->tod_listen_start = t4_listen_start; tod->tod_listen_stop = t4_listen_stop; tod->tod_rcvd = t4_rcvd; tod->tod_output = t4_tod_output; tod->tod_send_rst = t4_send_rst; tod->tod_send_fin = t4_send_fin; tod->tod_pcb_detach = t4_pcb_detach; tod->tod_l2_update = t4_l2_update; tod->tod_syncache_added = t4_syncache_added; tod->tod_syncache_removed = t4_syncache_removed; tod->tod_syncache_respond = t4_syncache_respond; tod->tod_offload_socket = t4_offload_socket; tod->tod_ctloutput = t4_ctloutput; tod->tod_tcp_info = t4_tcp_info; #ifdef KERN_TLS tod->tod_alloc_tls_session = t4_alloc_tls_session; #endif tod->tod_pmtu_update = t4_pmtu_update; for_each_port(sc, i) { for_each_vi(sc->port[i], v, vi) { TOEDEV(vi->ifp) = &td->tod; } } sc->tom_softc = td; register_toedev(sc->tom_softc); done: if (rc != 0) free_tom_data(sc, td); return (rc); } static int t4_tom_deactivate(struct adapter *sc) { int rc = 0; struct tom_data *td = sc->tom_softc; ASSERT_SYNCHRONIZED_OP(sc); if (td == NULL) return (0); /* XXX. KASSERT? */ if (sc->offload_map != 0) return (EBUSY); /* at least one port has IFCAP_TOE enabled */ if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI)) return (EBUSY); /* both iWARP and iSCSI rely on the TOE. */ mtx_lock(&td->toep_list_lock); if (!TAILQ_EMPTY(&td->toep_list)) rc = EBUSY; mtx_unlock(&td->toep_list_lock); mtx_lock(&td->lctx_hash_lock); if (td->lctx_count > 0) rc = EBUSY; mtx_unlock(&td->lctx_hash_lock); taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources); mtx_lock(&td->unsent_wr_lock); if (!STAILQ_EMPTY(&td->unsent_wr_list)) rc = EBUSY; mtx_unlock(&td->unsent_wr_lock); if (rc == 0) { unregister_toedev(sc->tom_softc); free_tom_data(sc, td); sc->tom_softc = NULL; } return (rc); } static int t4_aio_queue_tom(struct socket *so, struct kaiocb *job) { struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; int error; if (ulp_mode(toep) == ULP_MODE_TCPDDP) { error = t4_aio_queue_ddp(so, job); if (error != EOPNOTSUPP) return (error); } return (t4_aio_queue_aiotx(so, job)); } -static int -t4_ctloutput_tom(struct socket *so, struct sockopt *sopt) -{ - - if (sopt->sopt_level != IPPROTO_TCP) - return (tcp_ctloutput(so, sopt)); - - switch (sopt->sopt_name) { - case TCP_TLSOM_SET_TLS_CONTEXT: - case TCP_TLSOM_GET_TLS_TOM: - case TCP_TLSOM_CLR_TLS_TOM: - case TCP_TLSOM_CLR_QUIES: - return (t4_ctloutput_tls(so, sopt)); - default: - return (tcp_ctloutput(so, sopt)); - } -} - static int t4_tom_mod_load(void) { /* CPL handlers */ t4_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl); t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl2, CPL_COOKIE_TOM); t4_init_connect_cpl_handlers(); t4_init_listen_cpl_handlers(); t4_init_cpl_io_handlers(); t4_ddp_mod_load(); t4_tls_mod_load(); tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM); if (tcp_protosw == NULL) return (ENOPROTOOPT); bcopy(tcp_protosw, &toe_protosw, sizeof(toe_protosw)); bcopy(tcp_protosw->pr_usrreqs, &toe_usrreqs, sizeof(toe_usrreqs)); toe_usrreqs.pru_aio_queue = t4_aio_queue_tom; - toe_protosw.pr_ctloutput = t4_ctloutput_tom; toe_protosw.pr_usrreqs = &toe_usrreqs; tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM); if (tcp6_protosw == NULL) return (ENOPROTOOPT); bcopy(tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw)); bcopy(tcp6_protosw->pr_usrreqs, &toe6_usrreqs, sizeof(toe6_usrreqs)); toe6_usrreqs.pru_aio_queue = t4_aio_queue_tom; - toe6_protosw.pr_ctloutput = t4_ctloutput_tom; toe6_protosw.pr_usrreqs = &toe6_usrreqs; return (t4_register_uld(&tom_uld_info)); } static void tom_uninit(struct adapter *sc, void *arg __unused) { if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tomun")) return; /* Try to free resources (works only if no port has IFCAP_TOE) */ if (uld_active(sc, ULD_TOM)) t4_deactivate_uld(sc, ULD_TOM); end_synchronized_op(sc, 0); } static int t4_tom_mod_unload(void) { t4_iterate(tom_uninit, NULL); if (t4_unregister_uld(&tom_uld_info) == EBUSY) return (EBUSY); t4_tls_mod_unload(); t4_ddp_mod_unload(); t4_uninit_connect_cpl_handlers(); t4_uninit_listen_cpl_handlers(); t4_uninit_cpl_io_handlers(); t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, NULL, CPL_COOKIE_TOM); t4_register_cpl_handler(CPL_GET_TCB_RPL, NULL); return (0); } #endif /* TCP_OFFLOAD */ static int t4_tom_modevent(module_t mod, int cmd, void *arg) { int rc = 0; #ifdef TCP_OFFLOAD switch (cmd) { case MOD_LOAD: rc = t4_tom_mod_load(); break; case MOD_UNLOAD: rc = t4_tom_mod_unload(); break; default: rc = EINVAL; } #else printf("t4_tom: compiled without TCP_OFFLOAD support.\n"); rc = EOPNOTSUPP; #endif return (rc); } static moduledata_t t4_tom_moddata= { "t4_tom", t4_tom_modevent, 0 }; MODULE_VERSION(t4_tom, 1); MODULE_DEPEND(t4_tom, toecore, 1, 1, 1); MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1); DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY); diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h index 21cfb1df6e16..5dd04ff548e5 100644 --- a/sys/dev/cxgbe/tom/t4_tom.h +++ b/sys/dev/cxgbe/tom/t4_tom.h @@ -1,483 +1,479 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012, 2015 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_TOM_H__ #define __T4_TOM_H__ #include #include "common/t4_hw.h" #include "common/t4_msg.h" #include "tom/t4_tls.h" #define LISTEN_HASH_SIZE 32 /* * Min receive window. We want it to be large enough to accommodate receive * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. */ #define MIN_RCV_WND (24 * 1024U) /* * Max receive window supported by HW in bytes. Only a small part of it can * be set through option0, the rest needs to be set through RX_DATA_ACK. */ #define MAX_RCV_WND ((1U << 27) - 1) #define DDP_RSVD_WIN (16 * 1024U) #define SB_DDP_INDICATE SB_IN_TOE /* soreceive must respond to indicate */ #define USE_DDP_RX_FLOW_CONTROL #define PPOD_SZ(n) ((n) * sizeof(struct pagepod)) #define PPOD_SIZE (PPOD_SZ(1)) /* TOE PCB flags */ enum { TPF_ATTACHED = (1 << 0), /* a tcpcb refers to this toepcb */ TPF_FLOWC_WR_SENT = (1 << 1), /* firmware flow context WR sent */ TPF_TX_DATA_SENT = (1 << 2), /* some data sent */ TPF_TX_SUSPENDED = (1 << 3), /* tx suspended for lack of resources */ TPF_SEND_FIN = (1 << 4), /* send FIN after all pending data */ TPF_FIN_SENT = (1 << 5), /* FIN has been sent */ TPF_ABORT_SHUTDOWN = (1 << 6), /* connection abort is in progress */ TPF_CPL_PENDING = (1 << 7), /* haven't received the last CPL */ TPF_SYNQE = (1 << 8), /* synq_entry, not really a toepcb */ TPF_SYNQE_EXPANDED = (1 << 9), /* toepcb ready, tid context updated */ TPF_FORCE_CREDITS = (1 << 10), /* always send credits */ TPF_KTLS = (1 << 11), /* send TLS records from KTLS */ TPF_INITIALIZED = (1 << 12), /* init_toepcb has been called */ TPF_TLS_RECEIVE = (1 << 13), /* should receive TLS records */ TPF_TLS_ESTABLISHED = (1 << 14), /* TLS handshake timer initialized */ }; enum { DDP_OK = (1 << 0), /* OK to turn on DDP */ DDP_SC_REQ = (1 << 1), /* state change (on/off) requested */ DDP_ON = (1 << 2), /* DDP is turned on */ DDP_BUF0_ACTIVE = (1 << 3), /* buffer 0 in use (not invalidated) */ DDP_BUF1_ACTIVE = (1 << 4), /* buffer 1 in use (not invalidated) */ DDP_TASK_ACTIVE = (1 << 5), /* requeue task is queued / running */ DDP_DEAD = (1 << 6), /* toepcb is shutting down */ }; struct ctl_sg_entry; struct sockopt; struct offload_settings; /* * Connection parameters for an offloaded connection. These are mostly (but not * all) hardware TOE parameters. */ struct conn_params { int8_t rx_coalesce; int8_t cong_algo; int8_t tc_idx; int8_t tstamp; int8_t sack; int8_t nagle; int8_t keepalive; int8_t wscale; int8_t ecn; int8_t mtu_idx; int8_t ulp_mode; int8_t tx_align; int16_t txq_idx; /* ofld_txq = &sc->sge.ofld_txq[txq_idx] */ int16_t rxq_idx; /* ofld_rxq = &sc->sge.ofld_rxq[rxq_idx] */ int16_t l2t_idx; uint16_t emss; uint16_t opt0_bufsize; u_int sndbuf; /* controls TP tx pages */ }; struct ofld_tx_sdesc { uint32_t plen; /* payload length */ uint8_t tx_credits; /* firmware tx credits (unit is 16B) */ - void *iv_buffer; /* optional buffer holding IVs for TLS */ }; struct ppod_region { u_int pr_start; u_int pr_len; u_int pr_page_shift[4]; uint32_t pr_tag_mask; /* hardware tagmask for this region. */ uint32_t pr_invalid_bit; /* OR with this to invalidate tag. */ uint32_t pr_alias_mask; /* AND with tag to get alias bits. */ u_int pr_alias_shift; /* shift this much for first alias bit. */ vmem_t *pr_arena; }; struct ppod_reservation { struct ppod_region *prsv_pr; uint32_t prsv_tag; /* Full tag: pgsz, alias, tag, color */ u_int prsv_nppods; }; struct pageset { TAILQ_ENTRY(pageset) link; vm_page_t *pages; int npages; int flags; int offset; /* offset in first page */ int len; struct ppod_reservation prsv; struct vmspace *vm; vm_offset_t start; u_int vm_timestamp; }; TAILQ_HEAD(pagesetq, pageset); #define PS_PPODS_WRITTEN 0x0001 /* Page pods written to the card. */ struct ddp_buffer { struct pageset *ps; struct kaiocb *job; int cancel_pending; }; struct ddp_pcb { u_int flags; struct ddp_buffer db[2]; TAILQ_HEAD(, pageset) cached_pagesets; TAILQ_HEAD(, kaiocb) aiojobq; u_int waiting_count; u_int active_count; u_int cached_count; int active_id; /* the currently active DDP buffer */ struct task requeue_task; struct kaiocb *queueing; struct mtx lock; }; struct toepcb { struct tom_data *td; struct inpcb *inp; /* backpointer to host stack's PCB */ u_int flags; /* miscellaneous flags */ TAILQ_ENTRY(toepcb) link; /* toep_list */ int refcount; struct vnet *vnet; struct vi_info *vi; /* virtual interface */ struct sge_ofld_txq *ofld_txq; struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ctrlq; struct l2t_entry *l2te; /* L2 table entry used by this connection */ struct clip_entry *ce; /* CLIP table entry used by this tid */ int tid; /* Connection identifier */ /* tx credit handling */ u_int tx_total; /* total tx WR credits (in 16B units) */ u_int tx_credits; /* tx WR credits (in 16B units) available */ u_int tx_nocompl; /* tx WR credits since last compl request */ u_int plen_nocompl; /* payload since last compl request */ struct conn_params params; void *ulpcb; void *ulpcb2; struct mbufq ulp_pduq; /* PDUs waiting to be sent out. */ struct mbufq ulp_pdu_reclaimq; struct ddp_pcb ddp; struct tls_ofld_info tls; TAILQ_HEAD(, kaiocb) aiotx_jobq; struct task aiotx_task; struct socket *aiotx_so; /* Tx software descriptor */ uint8_t txsd_total; uint8_t txsd_pidx; uint8_t txsd_cidx; uint8_t txsd_avail; struct ofld_tx_sdesc txsd[]; }; static inline int ulp_mode(struct toepcb *toep) { return (toep->params.ulp_mode); } #define DDP_LOCK(toep) mtx_lock(&(toep)->ddp.lock) #define DDP_UNLOCK(toep) mtx_unlock(&(toep)->ddp.lock) #define DDP_ASSERT_LOCKED(toep) mtx_assert(&(toep)->ddp.lock, MA_OWNED) /* * Compressed state for embryonic connections for a listener. */ struct synq_entry { struct listen_ctx *lctx; /* backpointer to listen ctx */ struct mbuf *syn; int flags; /* same as toepcb's tp_flags */ volatile int ok_to_respond; volatile u_int refcnt; int tid; uint32_t iss; uint32_t irs; uint32_t ts; uint32_t rss_hash; __be16 tcp_opt; /* from cpl_pass_establish */ struct toepcb *toep; struct conn_params params; }; /* listen_ctx flags */ #define LCTX_RPL_PENDING 1 /* waiting for a CPL_PASS_OPEN_RPL */ struct listen_ctx { LIST_ENTRY(listen_ctx) link; /* listen hash linkage */ volatile int refcount; int stid; struct stid_region stid_region; int flags; struct inpcb *inp; /* listening socket's inp */ struct vnet *vnet; struct sge_wrq *ctrlq; struct sge_ofld_rxq *ofld_rxq; struct clip_entry *ce; }; /* tcb_histent flags */ #define TE_RPL_PENDING 1 #define TE_ACTIVE 2 /* bits in one 8b tcb_histent sample. */ #define TS_RTO (1 << 0) #define TS_DUPACKS (1 << 1) #define TS_FASTREXMT (1 << 2) #define TS_SND_BACKLOGGED (1 << 3) #define TS_CWND_LIMITED (1 << 4) #define TS_ECN_ECE (1 << 5) #define TS_ECN_CWR (1 << 6) #define TS_RESERVED (1 << 7) /* Unused. */ struct tcb_histent { struct mtx te_lock; struct callout te_callout; uint64_t te_tcb[TCB_SIZE / sizeof(uint64_t)]; struct adapter *te_adapter; u_int te_flags; u_int te_tid; uint8_t te_pidx; uint8_t te_sample[100]; }; struct tom_data { struct toedev tod; /* toepcb's associated with this TOE device */ struct mtx toep_list_lock; TAILQ_HEAD(, toepcb) toep_list; struct mtx lctx_hash_lock; LIST_HEAD(, listen_ctx) *listen_hash; u_long listen_mask; int lctx_count; /* # of lctx in the hash table */ struct ppod_region pr; struct rwlock tcb_history_lock __aligned(CACHE_LINE_SIZE); struct tcb_histent **tcb_history; int dupack_threshold; /* WRs that will not be sent to the chip because L2 resolution failed */ struct mtx unsent_wr_lock; STAILQ_HEAD(, wrqe) unsent_wr_list; struct task reclaim_wr_resources; }; static inline struct tom_data * tod_td(struct toedev *tod) { return (__containerof(tod, struct tom_data, tod)); } static inline struct adapter * td_adapter(struct tom_data *td) { return (td->tod.tod_softc); } static inline void set_mbuf_raw_wr(struct mbuf *m, bool raw) { M_ASSERTPKTHDR(m); m->m_pkthdr.PH_per.eight[6] = raw; } static inline bool mbuf_raw_wr(struct mbuf *m) { M_ASSERTPKTHDR(m); return (m->m_pkthdr.PH_per.eight[6]); } static inline void set_mbuf_ulp_submode(struct mbuf *m, uint8_t ulp_submode) { M_ASSERTPKTHDR(m); m->m_pkthdr.PH_per.eight[0] = ulp_submode; } static inline uint8_t mbuf_ulp_submode(struct mbuf *m) { M_ASSERTPKTHDR(m); return (m->m_pkthdr.PH_per.eight[0]); } /* t4_tom.c */ struct toepcb *alloc_toepcb(struct vi_info *, int); int init_toepcb(struct vi_info *, struct toepcb *); struct toepcb *hold_toepcb(struct toepcb *); void free_toepcb(struct toepcb *); void offload_socket(struct socket *, struct toepcb *); void restore_so_proto(struct socket *, bool); void undo_offload_socket(struct socket *); void final_cpl_received(struct toepcb *); void insert_tid(struct adapter *, int, void *, int); void *lookup_tid(struct adapter *, int); void update_tid(struct adapter *, int, void *); void remove_tid(struct adapter *, int, int); u_long select_rcv_wnd(struct socket *); int select_rcv_wscale(void); void init_conn_params(struct vi_info *, struct offload_settings *, struct in_conninfo *, struct socket *, const struct tcp_options *, int16_t, struct conn_params *cp); __be64 calc_options0(struct vi_info *, struct conn_params *); __be32 calc_options2(struct vi_info *, struct conn_params *); uint64_t select_ntuple(struct vi_info *, struct l2t_entry *); int negative_advice(int); int add_tid_to_history(struct adapter *, u_int); /* t4_connect.c */ void t4_init_connect_cpl_handlers(void); void t4_uninit_connect_cpl_handlers(void); int t4_connect(struct toedev *, struct socket *, struct nhop_object *, struct sockaddr *); void act_open_failure_cleanup(struct adapter *, u_int, u_int); /* t4_listen.c */ void t4_init_listen_cpl_handlers(void); void t4_uninit_listen_cpl_handlers(void); int t4_listen_start(struct toedev *, struct tcpcb *); int t4_listen_stop(struct toedev *, struct tcpcb *); void t4_syncache_added(struct toedev *, void *); void t4_syncache_removed(struct toedev *, void *); int t4_syncache_respond(struct toedev *, void *, struct mbuf *); int do_abort_req_synqe(struct sge_iq *, const struct rss_header *, struct mbuf *); int do_abort_rpl_synqe(struct sge_iq *, const struct rss_header *, struct mbuf *); void t4_offload_socket(struct toedev *, void *, struct socket *); void synack_failure_cleanup(struct adapter *, int); /* t4_cpl_io.c */ void aiotx_init_toep(struct toepcb *); int t4_aio_queue_aiotx(struct socket *, struct kaiocb *); void t4_init_cpl_io_handlers(void); void t4_uninit_cpl_io_handlers(void); void send_abort_rpl(struct adapter *, struct sge_ofld_txq *, int , int); void send_flowc_wr(struct toepcb *, struct tcpcb *); void send_reset(struct adapter *, struct toepcb *, uint32_t); int send_rx_credits(struct adapter *, struct toepcb *, int); void send_rx_modulate(struct adapter *, struct toepcb *); void make_established(struct toepcb *, uint32_t, uint32_t, uint16_t); int t4_close_conn(struct adapter *, struct toepcb *); void t4_rcvd(struct toedev *, struct tcpcb *); void t4_rcvd_locked(struct toedev *, struct tcpcb *); int t4_tod_output(struct toedev *, struct tcpcb *); int t4_send_fin(struct toedev *, struct tcpcb *); int t4_send_rst(struct toedev *, struct tcpcb *); void t4_set_tcb_field(struct adapter *, struct sge_wrq *, struct toepcb *, uint16_t, uint64_t, uint64_t, int, int); void t4_push_frames(struct adapter *, struct toepcb *, int); void t4_push_pdus(struct adapter *, struct toepcb *, int); /* t4_ddp.c */ int t4_init_ppod_region(struct ppod_region *, struct t4_range *, u_int, const char *); void t4_free_ppod_region(struct ppod_region *); int t4_alloc_page_pods_for_ps(struct ppod_region *, struct pageset *); int t4_alloc_page_pods_for_buf(struct ppod_region *, vm_offset_t, int, struct ppod_reservation *); int t4_alloc_page_pods_for_sgl(struct ppod_region *, struct ctl_sg_entry *, int, struct ppod_reservation *); int t4_write_page_pods_for_ps(struct adapter *, struct sge_wrq *, int, struct pageset *); int t4_write_page_pods_for_buf(struct adapter *, struct toepcb *, struct ppod_reservation *, vm_offset_t, int, struct mbufq *); int t4_write_page_pods_for_sgl(struct adapter *, struct toepcb *, struct ppod_reservation *, struct ctl_sg_entry *, int, int, struct mbufq *); void t4_free_page_pods(struct ppod_reservation *); int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *, struct mbuf **, struct mbuf **, int *); int t4_aio_queue_ddp(struct socket *, struct kaiocb *); void t4_ddp_mod_load(void); void t4_ddp_mod_unload(void); void ddp_assert_empty(struct toepcb *); void ddp_init_toep(struct toepcb *); void ddp_uninit_toep(struct toepcb *); void ddp_queue_toep(struct toepcb *); void release_ddp_resources(struct toepcb *toep); void handle_ddp_close(struct toepcb *, struct tcpcb *, uint32_t); void handle_ddp_indicate(struct toepcb *); void insert_ddp_data(struct toepcb *, uint32_t); const struct offload_settings *lookup_offload_policy(struct adapter *, int, struct mbuf *, uint16_t, struct inpcb *); /* t4_tls.c */ bool can_tls_offload(struct adapter *); void do_rx_data_tls(const struct cpl_rx_data *, struct toepcb *, struct mbuf *); -int t4_ctloutput_tls(struct socket *, struct sockopt *); -void t4_push_tls_records(struct adapter *, struct toepcb *, int); void t4_push_ktls(struct adapter *, struct toepcb *, int); void t4_tls_mod_load(void); void t4_tls_mod_unload(void); void tls_detach(struct toepcb *); void tls_establish(struct toepcb *); void tls_init_toep(struct toepcb *); -int tls_rx_key(struct toepcb *); void tls_stop_handshake_timer(struct toepcb *); int tls_tx_key(struct toepcb *); void tls_uninit_toep(struct toepcb *); int tls_alloc_ktls(struct toepcb *, struct ktls_session *, int); #endif