Index: head/sys/dev/cxgbe/tom/t4_cpl_io.c
===================================================================
--- head/sys/dev/cxgbe/tom/t4_cpl_io.c	(revision 366853)
+++ head/sys/dev/cxgbe/tom/t4_cpl_io.c	(revision 366854)
@@ -1,2304 +1,2294 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012, 2015 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_kern_tls.h"
 #include "opt_ratelimit.h"
 
 #ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/aio.h>
 #include <sys/file.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sglist.h>
 #include <sys/taskqueue.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_var.h>
 #include <netinet/toecore.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "common/t4_tcb.h"
 #include "tom/t4_tom_l2t.h"
 #include "tom/t4_tom.h"
 
 static void	t4_aiotx_cancel(struct kaiocb *job);
 static void	t4_aiotx_queue_toep(struct socket *so, struct toepcb *toep);
 
 void
 send_flowc_wr(struct toepcb *toep, struct tcpcb *tp)
 {
 	struct wrqe *wr;
 	struct fw_flowc_wr *flowc;
 	unsigned int nparams, flowclen, paramidx;
 	struct vi_info *vi = toep->vi;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	unsigned int pfvf = sc->pf << S_FW_VIID_PFN;
 	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
 
 	KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT),
 	    ("%s: flowc for tid %u sent already", __func__, toep->tid));
 
 	if (tp != NULL)
 		nparams = 8;
 	else
 		nparams = 6;
 	if (ulp_mode(toep) == ULP_MODE_TLS)
 		nparams++;
 	if (toep->tls.fcplenmax != 0)
 		nparams++;
 	if (toep->params.tc_idx != -1) {
 		MPASS(toep->params.tc_idx >= 0 &&
 		    toep->params.tc_idx < sc->chip_params->nsched_cls);
 		nparams++;
 	}
 
 	flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
 
 	wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	flowc = wrtod(wr);
 	memset(flowc, 0, wr->wr_len);
 
 	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
 	    V_FW_FLOWC_WR_NPARAMS(nparams));
 	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
 	    V_FW_WR_FLOWID(toep->tid));
 
 #define FLOWC_PARAM(__m, __v) \
 	do { \
 		flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
 		flowc->mnemval[paramidx].val = htobe32(__v); \
 		paramidx++; \
 	} while (0)
 
 	paramidx = 0;
 
 	FLOWC_PARAM(PFNVFN, pfvf);
 	FLOWC_PARAM(CH, pi->tx_chan);
 	FLOWC_PARAM(PORT, pi->tx_chan);
 	FLOWC_PARAM(IQID, toep->ofld_rxq->iq.abs_id);
 	FLOWC_PARAM(SNDBUF, toep->params.sndbuf);
 	if (tp) {
 		FLOWC_PARAM(MSS, toep->params.emss);
 		FLOWC_PARAM(SNDNXT, tp->snd_nxt);
 		FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
 	} else
 		FLOWC_PARAM(MSS, 512);
 	CTR6(KTR_CXGBE,
 	    "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x",
 	    __func__, toep->tid, toep->params.emss, toep->params.sndbuf,
 	    tp ? tp->snd_nxt : 0, tp ? tp->rcv_nxt : 0);
 
 	if (ulp_mode(toep) == ULP_MODE_TLS)
 		FLOWC_PARAM(ULP_MODE, ulp_mode(toep));
 	if (toep->tls.fcplenmax != 0)
 		FLOWC_PARAM(TXDATAPLEN_MAX, toep->tls.fcplenmax);
 	if (toep->params.tc_idx != -1)
 		FLOWC_PARAM(SCHEDCLASS, toep->params.tc_idx);
 #undef FLOWC_PARAM
 
 	KASSERT(paramidx == nparams, ("nparams mismatch"));
 
 	txsd->tx_credits = howmany(flowclen, 16);
 	txsd->plen = 0;
 	KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0,
 	    ("%s: not enough credits (%d)", __func__, toep->tx_credits));
 	toep->tx_credits -= txsd->tx_credits;
 	if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
 		toep->txsd_pidx = 0;
 	toep->txsd_avail--;
 
 	toep->flags |= TPF_FLOWC_WR_SENT;
         t4_wrq_tx(sc, wr);
 }
 
 #ifdef RATELIMIT
 /*
  * Input is Bytes/second (so_max_pacing_rate), chip counts in Kilobits/second.
  */
 static int
 update_tx_rate_limit(struct adapter *sc, struct toepcb *toep, u_int Bps)
 {
 	int tc_idx, rc;
 	const u_int kbps = (u_int) (uint64_t)Bps * 8ULL / 1000;
 	const int port_id = toep->vi->pi->port_id;
 
 	CTR3(KTR_CXGBE, "%s: tid %u, rate %uKbps", __func__, toep->tid, kbps);
 
 	if (kbps == 0) {
 		/* unbind */
 		tc_idx = -1;
 	} else {
 		rc = t4_reserve_cl_rl_kbps(sc, port_id, kbps, &tc_idx);
 		if (rc != 0)
 			return (rc);
 		MPASS(tc_idx >= 0 && tc_idx < sc->chip_params->nsched_cls);
 	}
 
 	if (toep->params.tc_idx != tc_idx) {
 		struct wrqe *wr;
 		struct fw_flowc_wr *flowc;
 		int nparams = 1, flowclen, flowclen16;
 		struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
 
 		flowclen = sizeof(*flowc) + nparams * sizeof(struct
 		    fw_flowc_mnemval);
 		flowclen16 = howmany(flowclen, 16);
 		if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0 ||
 		    (wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq)) == NULL) {
 			if (tc_idx >= 0)
 				t4_release_cl_rl(sc, port_id, tc_idx);
 			return (ENOMEM);
 		}
 
 		flowc = wrtod(wr);
 		memset(flowc, 0, wr->wr_len);
 
 		flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
 		    V_FW_FLOWC_WR_NPARAMS(nparams));
 		flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) |
 		    V_FW_WR_FLOWID(toep->tid));
 
 		flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
 		if (tc_idx == -1)
 			flowc->mnemval[0].val = htobe32(0xff);
 		else
 			flowc->mnemval[0].val = htobe32(tc_idx);
 
 		txsd->tx_credits = flowclen16;
 		txsd->plen = 0;
 		toep->tx_credits -= txsd->tx_credits;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
 			toep->txsd_pidx = 0;
 		toep->txsd_avail--;
 		t4_wrq_tx(sc, wr);
 	}
 
 	if (toep->params.tc_idx >= 0)
 		t4_release_cl_rl(sc, port_id, toep->params.tc_idx);
 	toep->params.tc_idx = tc_idx;
 
 	return (0);
 }
 #endif
 
 void
 send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt)
 {
 	struct wrqe *wr;
 	struct cpl_abort_req *req;
 	int tid = toep->tid;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);	/* don't use if INP_DROPPED */
 
 	INP_WLOCK_ASSERT(inp);
 
 	CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s",
 	    __func__, toep->tid,
 	    inp->inp_flags & INP_DROPPED ? "inp dropped" :
 	    tcpstates[tp->t_state],
 	    toep->flags, inp->inp_flags,
 	    toep->flags & TPF_ABORT_SHUTDOWN ?
 	    " (abort already in progress)" : "");
 
 	if (toep->flags & TPF_ABORT_SHUTDOWN)
 		return;	/* abort already in progress */
 
 	toep->flags |= TPF_ABORT_SHUTDOWN;
 
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %d.", __func__, tid));
 
 	wr = alloc_wrqe(sizeof(*req), toep->ofld_txq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid);
 	if (inp->inp_flags & INP_DROPPED)
 		req->rsvd0 = htobe32(snd_nxt);
 	else
 		req->rsvd0 = htobe32(tp->snd_nxt);
 	req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT);
 	req->cmd = CPL_ABORT_SEND_RST;
 
 	/*
 	 * XXX: What's the correct way to tell that the inp hasn't been detached
 	 * from its socket?  Should I even be flushing the snd buffer here?
 	 */
 	if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
 		struct socket *so = inp->inp_socket;
 
 		if (so != NULL)	/* because I'm not sure.  See comment above */
 			sbflush(&so->so_snd);
 	}
 
 	t4_l2t_send(sc, wr, toep->l2te);
 }
 
 /*
  * Called when a connection is established to translate the TCP options
  * reported by HW to FreeBSD's native format.
  */
 static void
 assign_rxopt(struct tcpcb *tp, uint16_t opt)
 {
 	struct toepcb *toep = tp->t_toe;
 	struct inpcb *inp = tp->t_inpcb;
 	struct adapter *sc = td_adapter(toep->td);
 
 	INP_LOCK_ASSERT(inp);
 
 	toep->params.mtu_idx = G_TCPOPT_MSS(opt);
 	tp->t_maxseg = sc->params.mtus[toep->params.mtu_idx];
 	if (inp->inp_inc.inc_flags & INC_ISIPV6)
 		tp->t_maxseg -= sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	else
 		tp->t_maxseg -= sizeof(struct ip) + sizeof(struct tcphdr);
 
 	toep->params.emss = tp->t_maxseg;
 	if (G_TCPOPT_TSTAMP(opt)) {
 		toep->params.tstamp = 1;
 		toep->params.emss -= TCPOLEN_TSTAMP_APPA;
 		tp->t_flags |= TF_RCVD_TSTMP;	/* timestamps ok */
 		tp->ts_recent = 0;		/* hmmm */
 		tp->ts_recent_age = tcp_ts_getticks();
 	} else
 		toep->params.tstamp = 0;
 
 	if (G_TCPOPT_SACK(opt)) {
 		toep->params.sack = 1;
 		tp->t_flags |= TF_SACK_PERMIT;	/* should already be set */
 	} else {
 		toep->params.sack = 0;
 		tp->t_flags &= ~TF_SACK_PERMIT;	/* sack disallowed by peer */
 	}
 
 	if (G_TCPOPT_WSCALE_OK(opt))
 		tp->t_flags |= TF_RCVD_SCALE;
 
 	/* Doing window scaling? */
 	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
 	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
 		tp->rcv_scale = tp->request_r_scale;
 		tp->snd_scale = G_TCPOPT_SND_WSCALE(opt);
 	} else
 		toep->params.wscale = 0;
 
 	CTR6(KTR_CXGBE,
 	    "assign_rxopt: tid %d, mtu_idx %u, emss %u, ts %u, sack %u, wscale %u",
 	    toep->tid, toep->params.mtu_idx, toep->params.emss,
 	    toep->params.tstamp, toep->params.sack, toep->params.wscale);
 }
 
 /*
  * Completes some final bits of initialization for just established connections
  * and changes their state to TCPS_ESTABLISHED.
  *
  * The ISNs are from the exchange of SYNs.
  */
 void
 make_established(struct toepcb *toep, uint32_t iss, uint32_t irs, uint16_t opt)
 {
 	struct inpcb *inp = toep->inp;
 	struct socket *so = inp->inp_socket;
 	struct tcpcb *tp = intotcpcb(inp);
 	uint16_t tcpopt = be16toh(opt);
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(tp->t_state == TCPS_SYN_SENT ||
 	    tp->t_state == TCPS_SYN_RECEIVED,
 	    ("%s: TCP state %s", __func__, tcpstates[tp->t_state]));
 
 	CTR6(KTR_CXGBE, "%s: tid %d, so %p, inp %p, tp %p, toep %p",
 	    __func__, toep->tid, so, inp, tp, toep);
 
 	tcp_state_change(tp, TCPS_ESTABLISHED);
 	tp->t_starttime = ticks;
 	TCPSTAT_INC(tcps_connects);
 
 	tp->irs = irs;
 	tcp_rcvseqinit(tp);
 	tp->rcv_wnd = (u_int)toep->params.opt0_bufsize << 10;
 	tp->rcv_adv += tp->rcv_wnd;
 	tp->last_ack_sent = tp->rcv_nxt;
 
 	tp->iss = iss;
 	tcp_sendseqinit(tp);
 	tp->snd_una = iss + 1;
 	tp->snd_nxt = iss + 1;
 	tp->snd_max = iss + 1;
 
 	assign_rxopt(tp, tcpopt);
 	send_flowc_wr(toep, tp);
 
 	soisconnected(so);
 }
 
 int
 send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
 {
 	struct wrqe *wr;
 	struct cpl_rx_data_ack *req;
 	uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
 
 	KASSERT(credits >= 0, ("%s: %d credits", __func__, credits));
 
 	wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
 	if (wr == NULL)
 		return (0);
 	req = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid);
 	req->credit_dack = htobe32(dack | V_RX_CREDITS(credits));
 
 	t4_wrq_tx(sc, wr);
 	return (credits);
 }
 
 void
 send_rx_modulate(struct adapter *sc, struct toepcb *toep)
 {
 	struct wrqe *wr;
 	struct cpl_rx_data_ack *req;
 
 	wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
 	if (wr == NULL)
 		return;
 	req = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid);
 	req->credit_dack = htobe32(F_RX_MODULATE_RX);
 
 	t4_wrq_tx(sc, wr);
 }
 
 void
 t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *sb = &so->so_rcv;
 	struct toepcb *toep = tp->t_toe;
 	int rx_credits;
 
 	INP_WLOCK_ASSERT(inp);
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
-	if (ulp_mode(toep) == ULP_MODE_TLS) {
-		if (toep->tls.rcv_over >= rx_credits) {
-			toep->tls.rcv_over -= rx_credits;
-			rx_credits = 0;
-		} else {
-			rx_credits -= toep->tls.rcv_over;
-			toep->tls.rcv_over = 0;
-		}
-	}
-
 	if (rx_credits > 0 &&
 	    (tp->rcv_wnd <= 32 * 1024 || rx_credits >= 64 * 1024 ||
 	    (rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) ||
 	    sbused(sb) + tp->rcv_wnd < sb->sb_lowat)) {
 		rx_credits = send_rx_credits(sc, toep, rx_credits);
 		tp->rcv_wnd += rx_credits;
 		tp->rcv_adv += rx_credits;
 	} else if (toep->flags & TPF_FORCE_CREDITS)
 		send_rx_modulate(sc, toep);
 }
 
 void
 t4_rcvd(struct toedev *tod, struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *sb = &so->so_rcv;
 
 	SOCKBUF_LOCK(sb);
 	t4_rcvd_locked(tod, tp);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Close a connection by sending a CPL_CLOSE_CON_REQ message.
  */
 int
 t4_close_conn(struct adapter *sc, struct toepcb *toep)
 {
 	struct wrqe *wr;
 	struct cpl_close_con_req *req;
 	unsigned int tid = toep->tid;
 
 	CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid,
 	    toep->flags & TPF_FIN_SENT ? ", IGNORED" : "");
 
 	if (toep->flags & TPF_FIN_SENT)
 		return (0);
 
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, tid));
 
 	wr = alloc_wrqe(sizeof(*req), toep->ofld_txq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 
         req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) |
 	    V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr)));
 	req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) |
 	    V_FW_WR_FLOWID(tid));
         req->wr.wr_lo = cpu_to_be64(0);
         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
 	req->rsvd = 0;
 
 	toep->flags |= TPF_FIN_SENT;
 	toep->flags &= ~TPF_SEND_FIN;
 	t4_l2t_send(sc, wr, toep->l2te);
 
 	return (0);
 }
 
 #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16)
 #define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16))
 
 /* Maximum amount of immediate data we could stuff in a WR */
 static inline int
 max_imm_payload(int tx_credits)
 {
 	const int n = 1;	/* Use no more than one desc for imm. data WR */
 
 	KASSERT(tx_credits >= 0 &&
 		tx_credits <= MAX_OFLD_TX_CREDITS,
 		("%s: %d credits", __func__, tx_credits));
 
 	if (tx_credits < MIN_OFLD_TX_CREDITS)
 		return (0);
 
 	if (tx_credits >= (n * EQ_ESIZE) / 16)
 		return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr));
 	else
 		return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr));
 }
 
 /* Maximum number of SGL entries we could stuff in a WR */
 static inline int
 max_dsgl_nsegs(int tx_credits)
 {
 	int nseg = 1;	/* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */
 	int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS;
 
 	KASSERT(tx_credits >= 0 &&
 		tx_credits <= MAX_OFLD_TX_CREDITS,
 		("%s: %d credits", __func__, tx_credits));
 
 	if (tx_credits < MIN_OFLD_TX_CREDITS)
 		return (0);
 
 	nseg += 2 * (sge_pair_credits * 16 / 24);
 	if ((sge_pair_credits * 16) % 24 == 16)
 		nseg++;
 
 	return (nseg);
 }
 
 static inline void
 write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen,
     unsigned int plen, uint8_t credits, int shove, int ulp_submode)
 {
 	struct fw_ofld_tx_data_wr *txwr = dst;
 
 	txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) |
 	    V_FW_WR_IMMDLEN(immdlen));
 	txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) |
 	    V_FW_WR_LEN16(credits));
 	txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(ulp_mode(toep)) |
 	    V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove));
 	txwr->plen = htobe32(plen);
 
 	if (toep->params.tx_align > 0) {
 		if (plen < 2 * toep->params.emss)
 			txwr->lsodisable_to_flags |=
 			    htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE);
 		else
 			txwr->lsodisable_to_flags |=
 			    htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD |
 				(toep->params.nagle == 0 ? 0 :
 				F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE));
 	}
 }
 
 /*
  * Generate a DSGL from a starting mbuf.  The total number of segments and the
  * maximum segments in any one mbuf are provided.
  */
 static void
 write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n)
 {
 	struct mbuf *m;
 	struct ulptx_sgl *usgl = dst;
 	int i, j, rc;
 	struct sglist sg;
 	struct sglist_seg segs[n];
 
 	KASSERT(nsegs > 0, ("%s: nsegs 0", __func__));
 
 	sglist_init(&sg, n, segs);
 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
 	    V_ULPTX_NSGE(nsegs));
 
 	i = -1;
 	for (m = start; m != stop; m = m->m_next) {
 		if (m->m_flags & M_EXTPG)
 			rc = sglist_append_mbuf_epg(&sg, m,
 			    mtod(m, vm_offset_t), m->m_len);
 		else
 			rc = sglist_append(&sg, mtod(m, void *), m->m_len);
 		if (__predict_false(rc != 0))
 			panic("%s: sglist_append %d", __func__, rc);
 
 		for (j = 0; j < sg.sg_nseg; i++, j++) {
 			if (i < 0) {
 				usgl->len0 = htobe32(segs[j].ss_len);
 				usgl->addr0 = htobe64(segs[j].ss_paddr);
 			} else {
 				usgl->sge[i / 2].len[i & 1] =
 				    htobe32(segs[j].ss_len);
 				usgl->sge[i / 2].addr[i & 1] =
 				    htobe64(segs[j].ss_paddr);
 			}
 #ifdef INVARIANTS
 			nsegs--;
 #endif
 		}
 		sglist_reset(&sg);
 	}
 	if (i & 1)
 		usgl->sge[i / 2].len[1] = htobe32(0);
 	KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p",
 	    __func__, nsegs, start, stop));
 }
 
 /*
  * Max number of SGL entries an offload tx work request can have.  This is 41
  * (1 + 40) for a full 512B work request.
  * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40)
  */
 #define OFLD_SGL_LEN (41)
 
 /*
  * Send data and/or a FIN to the peer.
  *
  * The socket's so_snd buffer consists of a stream of data starting with sb_mb
  * and linked together with m_next.  sb_sndptr, if set, is the last mbuf that
  * was transmitted.
  *
  * drop indicates the number of bytes that should be dropped from the head of
  * the send buffer.  It is an optimization that lets do_fw4_ack avoid creating
  * contention on the send buffer lock (before this change it used to do
  * sowwakeup and then t4_push_frames right after that when recovering from tx
  * stalls).  When drop is set this function MUST drop the bytes and wake up any
  * writers.
  */
 void
 t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 {
 	struct mbuf *sndptr, *m, *sb_sndptr;
 	struct fw_ofld_tx_data_wr *txwr;
 	struct wrqe *wr;
 	u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *sb = &so->so_snd;
 	int tx_credits, shove, compl, sowwakeup;
 	struct ofld_tx_sdesc *txsd;
 	bool nomap_mbuf_seen;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
 
 	KASSERT(ulp_mode(toep) == ULP_MODE_NONE ||
 	    ulp_mode(toep) == ULP_MODE_TCPDDP ||
 	    ulp_mode(toep) == ULP_MODE_TLS ||
 	    ulp_mode(toep) == ULP_MODE_RDMA,
 	    ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep));
 
 #ifdef VERBOSE_TRACES
 	CTR5(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d",
 	    __func__, toep->tid, toep->flags, tp->t_flags, drop);
 #endif
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
 		return;
 
 #ifdef RATELIMIT
 	if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) &&
 	    (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) {
 		inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
 	}
 #endif
 
 	/*
 	 * This function doesn't resume by itself.  Someone else must clear the
 	 * flag and call this function.
 	 */
 	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
 		KASSERT(drop == 0,
 		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
 		return;
 	}
 
 	txsd = &toep->txsd[toep->txsd_pidx];
 	do {
 		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
 		max_imm = max_imm_payload(tx_credits);
 		max_nsegs = max_dsgl_nsegs(tx_credits);
 
 		SOCKBUF_LOCK(sb);
 		sowwakeup = drop;
 		if (drop) {
 			sbdrop_locked(sb, drop);
 			drop = 0;
 		}
 		sb_sndptr = sb->sb_sndptr;
 		sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb;
 		plen = 0;
 		nsegs = 0;
 		max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
 		nomap_mbuf_seen = false;
 		for (m = sndptr; m != NULL; m = m->m_next) {
 			int n;
 
 			if (m->m_flags & M_EXTPG) {
 #ifdef KERN_TLS
 				if (m->m_epg_tls != NULL) {
 					toep->flags |= TPF_KTLS;
 					if (plen == 0) {
 						SOCKBUF_UNLOCK(sb);
 						t4_push_ktls(sc, toep, 0);
 						return;
 					}
 					break;
 				}
 #endif
 				n = sglist_count_mbuf_epg(m,
 				    mtod(m, vm_offset_t), m->m_len);
 			} else
 				n = sglist_count(mtod(m, void *), m->m_len);
 
 			nsegs += n;
 			plen += m->m_len;
 
 			/* This mbuf sent us _over_ the nsegs limit, back out */
 			if (plen > max_imm && nsegs > max_nsegs) {
 				nsegs -= n;
 				plen -= m->m_len;
 				if (plen == 0) {
 					/* Too few credits */
 					toep->flags |= TPF_TX_SUSPENDED;
 					if (sowwakeup) {
 						if (!TAILQ_EMPTY(
 						    &toep->aiotx_jobq))
 							t4_aiotx_queue_toep(so,
 							    toep);
 						sowwakeup_locked(so);
 					} else
 						SOCKBUF_UNLOCK(sb);
 					SOCKBUF_UNLOCK_ASSERT(sb);
 					return;
 				}
 				break;
 			}
 
 			if (m->m_flags & M_EXTPG)
 				nomap_mbuf_seen = true;
 			if (max_nsegs_1mbuf < n)
 				max_nsegs_1mbuf = n;
 			sb_sndptr = m;	/* new sb->sb_sndptr if all goes well */
 
 			/* This mbuf put us right at the max_nsegs limit */
 			if (plen > max_imm && nsegs == max_nsegs) {
 				m = m->m_next;
 				break;
 			}
 		}
 
 		if (sbused(sb) > sb->sb_hiwat * 5 / 8 &&
 		    toep->plen_nocompl + plen >= sb->sb_hiwat / 4)
 			compl = 1;
 		else
 			compl = 0;
 
 		if (sb->sb_flags & SB_AUTOSIZE &&
 		    V_tcp_do_autosndbuf &&
 		    sb->sb_hiwat < V_tcp_autosndbuf_max &&
 		    sbused(sb) >= sb->sb_hiwat * 7 / 8) {
 			int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
 			    V_tcp_autosndbuf_max);
 
 			if (!sbreserve_locked(sb, newsize, so, NULL))
 				sb->sb_flags &= ~SB_AUTOSIZE;
 			else
 				sowwakeup = 1;	/* room available */
 		}
 		if (sowwakeup) {
 			if (!TAILQ_EMPTY(&toep->aiotx_jobq))
 				t4_aiotx_queue_toep(so, toep);
 			sowwakeup_locked(so);
 		} else
 			SOCKBUF_UNLOCK(sb);
 		SOCKBUF_UNLOCK_ASSERT(sb);
 
 		/* nothing to send */
 		if (plen == 0) {
 			KASSERT(m == NULL,
 			    ("%s: nothing to send, but m != NULL", __func__));
 			break;
 		}
 
 		if (__predict_false(toep->flags & TPF_FIN_SENT))
 			panic("%s: excess tx.", __func__);
 
 		shove = m == NULL && !(tp->t_flags & TF_MORETOCOME);
 		if (plen <= max_imm && !nomap_mbuf_seen) {
 
 			/* Immediate data tx */
 
 			wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16),
 					toep->ofld_txq);
 			if (wr == NULL) {
 				/* XXX: how will we recover from this? */
 				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 			txwr = wrtod(wr);
 			credits = howmany(wr->wr_len, 16);
 			write_tx_wr(txwr, toep, plen, plen, credits, shove, 0);
 			m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
 			nsegs = 0;
 		} else {
 			int wr_len;
 
 			/* DSGL tx */
 
 			wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) +
 			    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
 			wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq);
 			if (wr == NULL) {
 				/* XXX: how will we recover from this? */
 				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 			txwr = wrtod(wr);
 			credits = howmany(wr_len, 16);
 			write_tx_wr(txwr, toep, 0, plen, credits, shove, 0);
 			write_tx_sgl(txwr + 1, sndptr, m, nsegs,
 			    max_nsegs_1mbuf);
 			if (wr_len & 0xf) {
 				uint64_t *pad = (uint64_t *)
 				    ((uintptr_t)txwr + wr_len);
 				*pad = 0;
 			}
 		}
 
 		KASSERT(toep->tx_credits >= credits,
 			("%s: not enough credits", __func__));
 
 		toep->tx_credits -= credits;
 		toep->tx_nocompl += credits;
 		toep->plen_nocompl += plen;
 		if (toep->tx_credits <= toep->tx_total * 3 / 8 &&
 		    toep->tx_nocompl >= toep->tx_total / 4)
 			compl = 1;
 
 		if (compl || ulp_mode(toep) == ULP_MODE_RDMA) {
 			txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL);
 			toep->tx_nocompl = 0;
 			toep->plen_nocompl = 0;
 		}
 
 		tp->snd_nxt += plen;
 		tp->snd_max += plen;
 
 		SOCKBUF_LOCK(sb);
 		KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__));
 		sb->sb_sndptr = sb_sndptr;
 		SOCKBUF_UNLOCK(sb);
 
 		toep->flags |= TPF_TX_DATA_SENT;
 		if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
 			toep->flags |= TPF_TX_SUSPENDED;
 
 		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
 		txsd->plen = plen;
 		txsd->tx_credits = credits;
 		txsd++;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
 			toep->txsd_pidx = 0;
 			txsd = &toep->txsd[0];
 		}
 		toep->txsd_avail--;
 
 		t4_l2t_send(sc, wr, toep->l2te);
 	} while (m != NULL);
 
 	/* Send a FIN if requested, but only if there's no more data to send */
 	if (m == NULL && toep->flags & TPF_SEND_FIN)
 		t4_close_conn(sc, toep);
 }
 
 static inline void
 rqdrop_locked(struct mbufq *q, int plen)
 {
 	struct mbuf *m;
 
 	while (plen > 0) {
 		m = mbufq_dequeue(q);
 
 		/* Too many credits. */
 		MPASS(m != NULL);
 		M_ASSERTPKTHDR(m);
 
 		/* Partial credits. */
 		MPASS(plen >= m->m_pkthdr.len);
 
 		plen -= m->m_pkthdr.len;
 		m_freem(m);
 	}
 }
 
 void
 t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop)
 {
 	struct mbuf *sndptr, *m;
 	struct fw_ofld_tx_data_wr *txwr;
 	struct wrqe *wr;
 	u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
 	u_int adjusted_plen, ulp_submode;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);
 	int tx_credits, shove;
 	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
 	struct mbufq *pduq = &toep->ulp_pduq;
 	static const u_int ulp_extra_len[] = {0, 4, 4, 8};
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
 	KASSERT(ulp_mode(toep) == ULP_MODE_ISCSI,
 	    ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep));
 
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
 		return;
 
 	/*
 	 * This function doesn't resume by itself.  Someone else must clear the
 	 * flag and call this function.
 	 */
 	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
 		KASSERT(drop == 0,
 		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
 		return;
 	}
 
 	if (drop)
 		rqdrop_locked(&toep->ulp_pdu_reclaimq, drop);
 
 	while ((sndptr = mbufq_first(pduq)) != NULL) {
 		M_ASSERTPKTHDR(sndptr);
 
 		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
 		max_imm = max_imm_payload(tx_credits);
 		max_nsegs = max_dsgl_nsegs(tx_credits);
 
 		plen = 0;
 		nsegs = 0;
 		max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
 		for (m = sndptr; m != NULL; m = m->m_next) {
 			int n = sglist_count(mtod(m, void *), m->m_len);
 
 			nsegs += n;
 			plen += m->m_len;
 
 			/*
 			 * This mbuf would send us _over_ the nsegs limit.
 			 * Suspend tx because the PDU can't be sent out.
 			 */
 			if (plen > max_imm && nsegs > max_nsegs) {
 				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 
 			if (max_nsegs_1mbuf < n)
 				max_nsegs_1mbuf = n;
 		}
 
 		if (__predict_false(toep->flags & TPF_FIN_SENT))
 			panic("%s: excess tx.", __func__);
 
 		/*
 		 * We have a PDU to send.  All of it goes out in one WR so 'm'
 		 * is NULL.  A PDU's length is always a multiple of 4.
 		 */
 		MPASS(m == NULL);
 		MPASS((plen & 3) == 0);
 		MPASS(sndptr->m_pkthdr.len == plen);
 
 		shove = !(tp->t_flags & TF_MORETOCOME);
 		ulp_submode = mbuf_ulp_submode(sndptr);
 		MPASS(ulp_submode < nitems(ulp_extra_len));
 
 		/*
 		 * plen doesn't include header and data digests, which are
 		 * generated and inserted in the right places by the TOE, but
 		 * they do occupy TCP sequence space and need to be accounted
 		 * for.
 		 */
 		adjusted_plen = plen + ulp_extra_len[ulp_submode];
 		if (plen <= max_imm) {
 
 			/* Immediate data tx */
 
 			wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16),
 					toep->ofld_txq);
 			if (wr == NULL) {
 				/* XXX: how will we recover from this? */
 				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 			txwr = wrtod(wr);
 			credits = howmany(wr->wr_len, 16);
 			write_tx_wr(txwr, toep, plen, adjusted_plen, credits,
 			    shove, ulp_submode);
 			m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
 			nsegs = 0;
 		} else {
 			int wr_len;
 
 			/* DSGL tx */
 			wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) +
 			    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
 			wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq);
 			if (wr == NULL) {
 				/* XXX: how will we recover from this? */
 				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 			txwr = wrtod(wr);
 			credits = howmany(wr_len, 16);
 			write_tx_wr(txwr, toep, 0, adjusted_plen, credits,
 			    shove, ulp_submode);
 			write_tx_sgl(txwr + 1, sndptr, m, nsegs,
 			    max_nsegs_1mbuf);
 			if (wr_len & 0xf) {
 				uint64_t *pad = (uint64_t *)
 				    ((uintptr_t)txwr + wr_len);
 				*pad = 0;
 			}
 		}
 
 		KASSERT(toep->tx_credits >= credits,
 			("%s: not enough credits", __func__));
 
 		m = mbufq_dequeue(pduq);
 		MPASS(m == sndptr);
 		mbufq_enqueue(&toep->ulp_pdu_reclaimq, m);
 
 		toep->tx_credits -= credits;
 		toep->tx_nocompl += credits;
 		toep->plen_nocompl += plen;
 		if (toep->tx_credits <= toep->tx_total * 3 / 8 &&
 		    toep->tx_nocompl >= toep->tx_total / 4) {
 			txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL);
 			toep->tx_nocompl = 0;
 			toep->plen_nocompl = 0;
 		}
 
 		tp->snd_nxt += adjusted_plen;
 		tp->snd_max += adjusted_plen;
 
 		toep->flags |= TPF_TX_DATA_SENT;
 		if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
 			toep->flags |= TPF_TX_SUSPENDED;
 
 		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
 		txsd->plen = plen;
 		txsd->tx_credits = credits;
 		txsd++;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
 			toep->txsd_pidx = 0;
 			txsd = &toep->txsd[0];
 		}
 		toep->txsd_avail--;
 
 		t4_l2t_send(sc, wr, toep->l2te);
 	}
 
 	/* Send a FIN if requested, but only if there are no more PDUs to send */
 	if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN)
 		t4_close_conn(sc, toep);
 }
 
 static inline void
 t4_push_data(struct adapter *sc, struct toepcb *toep, int drop)
 {
 
 	if (ulp_mode(toep) == ULP_MODE_ISCSI)
 		t4_push_pdus(sc, toep, drop);
 	else if (tls_tx_key(toep) && toep->tls.mode == TLS_MODE_TLSOM)
 		t4_push_tls_records(sc, toep, drop);
 #ifdef KERN_TLS
 	else if (toep->flags & TPF_KTLS)
 		t4_push_ktls(sc, toep, drop);
 #endif
 	else
 		t4_push_frames(sc, toep, drop);
 }
 
 int
 t4_tod_output(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 #ifdef INVARIANTS
 	struct inpcb *inp = tp->t_inpcb;
 #endif
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("%s: inp %p dropped.", __func__, inp));
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 
 	t4_push_data(sc, toep, 0);
 
 	return (0);
 }
 
 int
 t4_send_fin(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 #ifdef INVARIANTS
 	struct inpcb *inp = tp->t_inpcb;
 #endif
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("%s: inp %p dropped.", __func__, inp));
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 
 	toep->flags |= TPF_SEND_FIN;
 	if (tp->t_state >= TCPS_ESTABLISHED)
 		t4_push_data(sc, toep, 0);
 
 	return (0);
 }
 
 int
 t4_send_rst(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 #if defined(INVARIANTS)
 	struct inpcb *inp = tp->t_inpcb;
 #endif
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("%s: inp %p dropped.", __func__, inp));
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 
 	/* hmmmm */
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc for tid %u [%s] not sent already",
 	    __func__, toep->tid, tcpstates[tp->t_state]));
 
 	send_reset(sc, toep, 0);
 	return (0);
 }
 
 /*
  * Peer has sent us a FIN.
  */
 static int
 do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_peer_close *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = NULL;
 	struct socket *so;
 	struct epoch_tracker et;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_PEER_CLOSE,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 
 	if (__predict_false(toep->flags & TPF_SYNQE)) {
 		/*
 		 * do_pass_establish must have run before do_peer_close and if
 		 * this is still a synqe instead of a toepcb then the connection
 		 * must be getting aborted.
 		 */
 		MPASS(toep->flags & TPF_ABORT_SHUTDOWN);
 		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
 		    toep, toep->flags);
 		return (0);
 	}
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CURVNET_SET(toep->vnet);
 	NET_EPOCH_ENTER(et);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
 	CTR6(KTR_CXGBE,
 	    "%s: tid %u (%s), toep_flags 0x%x, ddp_flags 0x%x, inp %p",
 	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags,
 	    toep->ddp.flags, inp);
 
 	if (toep->flags & TPF_ABORT_SHUTDOWN)
 		goto done;
 
 	tp->rcv_nxt++;	/* FIN */
 
 	so = inp->inp_socket;
 	socantrcvmore(so);
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP) {
 		DDP_LOCK(toep);
 		if (__predict_false(toep->ddp.flags &
 		    (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE)))
 			handle_ddp_close(toep, tp, cpl->rcv_nxt);
 		DDP_UNLOCK(toep);
 	}
 
 	if (ulp_mode(toep) != ULP_MODE_RDMA) {
 		KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt),
 	    		("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt,
 	    		be32toh(cpl->rcv_nxt)));
 	}
 
 	switch (tp->t_state) {
 	case TCPS_SYN_RECEIVED:
 		tp->t_starttime = ticks;
 		/* FALLTHROUGH */ 
 
 	case TCPS_ESTABLISHED:
 		tcp_state_change(tp, TCPS_CLOSE_WAIT);
 		break;
 
 	case TCPS_FIN_WAIT_1:
 		tcp_state_change(tp, TCPS_CLOSING);
 		break;
 
 	case TCPS_FIN_WAIT_2:
 		tcp_twstart(tp);
 		INP_UNLOCK_ASSERT(inp);	 /* safe, we have a ref on the inp */
 		NET_EPOCH_EXIT(et);
 		CURVNET_RESTORE();
 
 		INP_WLOCK(inp);
 		final_cpl_received(toep);
 		return (0);
 
 	default:
 		log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n",
 		    __func__, tid, tp->t_state);
 	}
 done:
 	INP_WUNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 /*
  * Peer has ACK'd our FIN.
  */
 static int
 do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = NULL;
 	struct socket *so = NULL;
 	struct epoch_tracker et;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_CLOSE_CON_RPL,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CURVNET_SET(toep->vnet);
 	NET_EPOCH_ENTER(et);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
 	CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x",
 	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags);
 
 	if (toep->flags & TPF_ABORT_SHUTDOWN)
 		goto done;
 
 	so = inp->inp_socket;
 	tp->snd_una = be32toh(cpl->snd_nxt) - 1;	/* exclude FIN */
 
 	switch (tp->t_state) {
 	case TCPS_CLOSING:	/* see TCPS_FIN_WAIT_2 in do_peer_close too */
 		tcp_twstart(tp);
 release:
 		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
 		NET_EPOCH_EXIT(et);
 		CURVNET_RESTORE();
 
 		INP_WLOCK(inp);
 		final_cpl_received(toep);	/* no more CPLs expected */
 
 		return (0);
 	case TCPS_LAST_ACK:
 		if (tcp_close(tp))
 			INP_WUNLOCK(inp);
 		goto release;
 
 	case TCPS_FIN_WAIT_1:
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 			soisdisconnected(so);
 		tcp_state_change(tp, TCPS_FIN_WAIT_2);
 		break;
 
 	default:
 		log(LOG_ERR,
 		    "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n",
 		    __func__, tid, tcpstates[tp->t_state]);
 	}
 done:
 	INP_WUNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 void
 send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid,
     int rst_status)
 {
 	struct wrqe *wr;
 	struct cpl_abort_rpl *cpl;
 
 	wr = alloc_wrqe(sizeof(*cpl), ofld_txq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	cpl = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid);
 	cpl->cmd = rst_status;
 
 	t4_wrq_tx(sc, wr);
 }
 
 static int
 abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason)
 {
 	switch (abort_reason) {
 	case CPL_ERR_BAD_SYN:
 	case CPL_ERR_CONN_RESET:
 		return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
 	case CPL_ERR_XMIT_TIMEDOUT:
 	case CPL_ERR_PERSIST_TIMEDOUT:
 	case CPL_ERR_FINWAIT2_TIMEDOUT:
 	case CPL_ERR_KEEPALIVE_TIMEDOUT:
 		return (ETIMEDOUT);
 	default:
 		return (EIO);
 	}
 }
 
 /*
  * TCP RST from the peer, timeout, or some other such critical error.
  */
 static int
 do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct sge_wrq *ofld_txq = toep->ofld_txq;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct epoch_tracker et;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_ABORT_REQ_RSS,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 
 	if (toep->flags & TPF_SYNQE)
 		return (do_abort_req_synqe(iq, rss, m));
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	if (negative_advice(cpl->status)) {
 		CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)",
 		    __func__, cpl->status, tid, toep->flags);
 		return (0);	/* Ignore negative advice */
 	}
 
 	inp = toep->inp;
 	CURVNET_SET(toep->vnet);
 	NET_EPOCH_ENTER(et);	/* for tcp_close */
 	INP_WLOCK(inp);
 
 	tp = intotcpcb(inp);
 
 	CTR6(KTR_CXGBE,
 	    "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d",
 	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags,
 	    inp->inp_flags, cpl->status);
 
 	/*
 	 * If we'd initiated an abort earlier the reply to it is responsible for
 	 * cleaning up resources.  Otherwise we tear everything down right here
 	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
 	 */
 	if (toep->flags & TPF_ABORT_SHUTDOWN) {
 		INP_WUNLOCK(inp);
 		goto done;
 	}
 	toep->flags |= TPF_ABORT_SHUTDOWN;
 
 	if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
 		struct socket *so = inp->inp_socket;
 
 		if (so != NULL)
 			so_error_set(so, abort_status_to_errno(tp,
 			    cpl->status));
 		tp = tcp_close(tp);
 		if (tp == NULL)
 			INP_WLOCK(inp);	/* re-acquire */
 	}
 
 	final_cpl_received(toep);
 done:
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
 	return (0);
 }
 
 /*
  * Reply to the CPL_ABORT_REQ (send_reset)
  */
 static int
 do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_ABORT_RPL_RSS,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 
 	if (toep->flags & TPF_SYNQE)
 		return (do_abort_rpl_synqe(iq, rss, m));
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d",
 	    __func__, tid, toep, inp, cpl->status);
 
 	KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
 	    ("%s: wasn't expecting abort reply", __func__));
 
 	INP_WLOCK(inp);
 	final_cpl_received(toep);
 
 	return (0);
 }
 
 static int
 do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_rx_data *cpl = mtod(m, const void *);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	struct sockbuf *sb;
 	struct epoch_tracker et;
 	int len, rx_credits;
 	uint32_t ddp_placed = 0;
 
 	if (__predict_false(toep->flags & TPF_SYNQE)) {
 		/*
 		 * do_pass_establish must have run before do_rx_data and if this
 		 * is still a synqe instead of a toepcb then the connection must
 		 * be getting aborted.
 		 */
 		MPASS(toep->flags & TPF_ABORT_SHUTDOWN);
 		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
 		    toep, toep->flags);
 		m_freem(m);
 		return (0);
 	}
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	/* strip off CPL header */
 	m_adj(m, sizeof(*cpl));
 	len = m->m_pkthdr.len;
 
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
 		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
 		    __func__, tid, len, inp->inp_flags);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 	}
 
 	tp = intotcpcb(inp);
 
 	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
 		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;
 
 	tp->rcv_nxt += len;
 	if (tp->rcv_wnd < len) {
 		KASSERT(ulp_mode(toep) == ULP_MODE_RDMA,
 				("%s: negative window size", __func__));
 	}
 
 	tp->rcv_wnd -= len;
 	tp->t_rcvtime = ticks;
 
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 		DDP_LOCK(toep);
 	so = inp_inpcbtosocket(inp);
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 
 	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
 		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
 		    __func__, tid, len);
 		m_freem(m);
 		SOCKBUF_UNLOCK(sb);
 		if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 			DDP_UNLOCK(toep);
 		INP_WUNLOCK(inp);
 
 		CURVNET_SET(toep->vnet);
 		NET_EPOCH_ENTER(et);
 		INP_WLOCK(inp);
 		tp = tcp_drop(tp, ECONNRESET);
 		if (tp)
 			INP_WUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
 		CURVNET_RESTORE();
 
 		return (0);
 	}
 
 	/* receive buffer autosize */
 	MPASS(toep->vnet == so->so_vnet);
 	CURVNET_SET(toep->vnet);
 	if (sb->sb_flags & SB_AUTOSIZE &&
 	    V_tcp_do_autorcvbuf &&
 	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
 	    len > (sbspace(sb) / 8 * 7)) {
 		unsigned int hiwat = sb->sb_hiwat;
 		unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
 		    V_tcp_autorcvbuf_max);
 
 		if (!sbreserve_locked(sb, newsize, so, NULL))
 			sb->sb_flags &= ~SB_AUTOSIZE;
 	}
 
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP) {
 		int changed = !(toep->ddp.flags & DDP_ON) ^ cpl->ddp_off;
 
 		if (toep->ddp.waiting_count != 0 || toep->ddp.active_count != 0)
 			CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)",
 			    __func__, tid, len);
 
 		if (changed) {
 			if (toep->ddp.flags & DDP_SC_REQ)
 				toep->ddp.flags ^= DDP_ON | DDP_SC_REQ;
 			else {
 				KASSERT(cpl->ddp_off == 1,
 				    ("%s: DDP switched on by itself.",
 				    __func__));
 
 				/* Fell out of DDP mode */
 				toep->ddp.flags &= ~DDP_ON;
 				CTR1(KTR_CXGBE, "%s: fell out of DDP mode",
 				    __func__);
 
 				insert_ddp_data(toep, ddp_placed);
 			}
 		}
 
 		if (toep->ddp.flags & DDP_ON) {
 			/*
 			 * CPL_RX_DATA with DDP on can only be an indicate.
 			 * Start posting queued AIO requests via DDP.  The
 			 * payload that arrived in this indicate is appended
 			 * to the socket buffer as usual.
 			 */
 			handle_ddp_indicate(toep);
 		}
 	}
 
 	sbappendstream_locked(sb, m, 0);
 	rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
 	if (rx_credits > 0 && sbused(sb) + tp->rcv_wnd < sb->sb_lowat) {
 		rx_credits = send_rx_credits(sc, toep, rx_credits);
 		tp->rcv_wnd += rx_credits;
 		tp->rcv_adv += rx_credits;
 	}
 
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP && toep->ddp.waiting_count > 0 &&
 	    sbavail(sb) != 0) {
 		CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__,
 		    tid);
 		ddp_queue_toep(toep);
 	}
 	sorwakeup_locked(so);
 	SOCKBUF_UNLOCK_ASSERT(sb);
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 		DDP_UNLOCK(toep);
 
 	INP_WUNLOCK(inp);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 static int
 do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
 	unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	uint8_t credits = cpl->credits;
 	struct ofld_tx_sdesc *txsd;
 	int plen;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	/*
 	 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and
 	 * now this comes back carrying the credits for the flowc.
 	 */
 	if (__predict_false(toep->flags & TPF_SYNQE)) {
 		KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
 		    ("%s: credits for a synq entry %p", __func__, toep));
 		return (0);
 	}
 
 	inp = toep->inp;
 
 	KASSERT(opcode == CPL_FW4_ACK,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	INP_WLOCK(inp);
 
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) {
 		INP_WUNLOCK(inp);
 		return (0);
 	}
 
 	KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0,
 	    ("%s: inp_flags 0x%x", __func__, inp->inp_flags));
 
 	tp = intotcpcb(inp);
 
 	if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) {
 		tcp_seq snd_una = be32toh(cpl->snd_una);
 
 #ifdef INVARIANTS
 		if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
 			log(LOG_ERR,
 			    "%s: unexpected seq# %x for TID %u, snd_una %x\n",
 			    __func__, snd_una, toep->tid, tp->snd_una);
 		}
 #endif
 
 		if (tp->snd_una != snd_una) {
 			tp->snd_una = snd_una;
 			tp->ts_recent_age = tcp_ts_getticks();
 		}
 	}
 
 #ifdef VERBOSE_TRACES
 	CTR3(KTR_CXGBE, "%s: tid %d credits %u", __func__, tid, credits);
 #endif
 	so = inp->inp_socket;
 	txsd = &toep->txsd[toep->txsd_cidx];
 	plen = 0;
 	while (credits) {
 		KASSERT(credits >= txsd->tx_credits,
 		    ("%s: too many (or partial) credits", __func__));
 		credits -= txsd->tx_credits;
 		toep->tx_credits += txsd->tx_credits;
 		plen += txsd->plen;
 		if (txsd->iv_buffer) {
 			free(txsd->iv_buffer, M_CXGBE);
 			txsd->iv_buffer = NULL;
 		}
 		txsd++;
 		toep->txsd_avail++;
 		KASSERT(toep->txsd_avail <= toep->txsd_total,
 		    ("%s: txsd avail > total", __func__));
 		if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) {
 			txsd = &toep->txsd[0];
 			toep->txsd_cidx = 0;
 		}
 	}
 
 	if (toep->tx_credits == toep->tx_total) {
 		toep->tx_nocompl = 0;
 		toep->plen_nocompl = 0;
 	}
 
 	if (toep->flags & TPF_TX_SUSPENDED &&
 	    toep->tx_credits >= toep->tx_total / 4) {
 #ifdef VERBOSE_TRACES
 		CTR2(KTR_CXGBE, "%s: tid %d calling t4_push_frames", __func__,
 		    tid);
 #endif
 		toep->flags &= ~TPF_TX_SUSPENDED;
 		CURVNET_SET(toep->vnet);
 		t4_push_data(sc, toep, plen);
 		CURVNET_RESTORE();
 	} else if (plen > 0) {
 		struct sockbuf *sb = &so->so_snd;
 		int sbu;
 
 		SOCKBUF_LOCK(sb);
 		sbu = sbused(sb);
 		if (ulp_mode(toep) == ULP_MODE_ISCSI) {
 
 			if (__predict_false(sbu > 0)) {
 				/*
 				 * The data trasmitted before the tid's ULP mode
 				 * changed to ISCSI is still in so_snd.
 				 * Incoming credits should account for so_snd
 				 * first.
 				 */
 				sbdrop_locked(sb, min(sbu, plen));
 				plen -= min(sbu, plen);
 			}
 			sowwakeup_locked(so);	/* unlocks so_snd */
 			rqdrop_locked(&toep->ulp_pdu_reclaimq, plen);
 		} else {
 #ifdef VERBOSE_TRACES
 			CTR3(KTR_CXGBE, "%s: tid %d dropped %d bytes", __func__,
 			    tid, plen);
 #endif
 			sbdrop_locked(sb, plen);
 			if (tls_tx_key(toep) &&
 			    toep->tls.mode == TLS_MODE_TLSOM) {
 				struct tls_ofld_info *tls_ofld = &toep->tls;
 
 				MPASS(tls_ofld->sb_off >= plen);
 				tls_ofld->sb_off -= plen;
 			}
 			if (!TAILQ_EMPTY(&toep->aiotx_jobq))
 				t4_aiotx_queue_toep(so, toep);
 			sowwakeup_locked(so);	/* unlocks so_snd */
 		}
 		SOCKBUF_UNLOCK_ASSERT(sb);
 	}
 
 	INP_WUNLOCK(inp);
 
 	return (0);
 }
 
 void
 t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, struct toepcb *toep,
     uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie)
 {
 	struct wrqe *wr;
 	struct cpl_set_tcb_field *req;
 	struct ofld_tx_sdesc *txsd;
 
 	MPASS((cookie & ~M_COOKIE) == 0);
 	if (reply) {
 		MPASS(cookie != CPL_COOKIE_RESERVED);
 	}
 
 	wr = alloc_wrqe(sizeof(*req), wrq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid);
 	req->reply_ctrl = htobe16(V_QUEUENO(toep->ofld_rxq->iq.abs_id));
 	if (reply == 0)
 		req->reply_ctrl |= htobe16(F_NO_REPLY);
 	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie));
 	req->mask = htobe64(mask);
 	req->val = htobe64(val);
 	if ((wrq->eq.flags & EQ_TYPEMASK) == EQ_OFLD) {
 		txsd = &toep->txsd[toep->txsd_pidx];
 		txsd->tx_credits = howmany(sizeof(*req), 16);
 		txsd->plen = 0;
 		KASSERT(toep->tx_credits >= txsd->tx_credits &&
 		    toep->txsd_avail > 0,
 		    ("%s: not enough credits (%d)", __func__,
 		    toep->tx_credits));
 		toep->tx_credits -= txsd->tx_credits;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
 			toep->txsd_pidx = 0;
 		toep->txsd_avail--;
 	}
 
 	t4_wrq_tx(sc, wr);
 }
 
 void
 t4_init_cpl_io_handlers(void)
 {
 
 	t4_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close);
 	t4_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl);
 	t4_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req);
 	t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl,
 	    CPL_COOKIE_TOM);
 	t4_register_cpl_handler(CPL_RX_DATA, do_rx_data);
 	t4_register_shared_cpl_handler(CPL_FW4_ACK, do_fw4_ack, CPL_COOKIE_TOM);
 }
 
 void
 t4_uninit_cpl_io_handlers(void)
 {
 
 	t4_register_cpl_handler(CPL_PEER_CLOSE, NULL);
 	t4_register_cpl_handler(CPL_CLOSE_CON_RPL, NULL);
 	t4_register_cpl_handler(CPL_ABORT_REQ_RSS, NULL);
 	t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS, NULL, CPL_COOKIE_TOM);
 	t4_register_cpl_handler(CPL_RX_DATA, NULL);
 	t4_register_shared_cpl_handler(CPL_FW4_ACK, NULL, CPL_COOKIE_TOM);
 }
 
 /*
  * Use the 'backend1' field in AIO jobs to hold an error that should
  * be reported when the job is completed, the 'backend3' field to
  * store the amount of data sent by the AIO job so far, and the
  * 'backend4' field to hold a reference count on the job.
  *
  * Each unmapped mbuf holds a reference on the job as does the queue
  * so long as the job is queued.
  */
 #define	aio_error	backend1
 #define	aio_sent	backend3
 #define	aio_refs	backend4
 
 #define	jobtotid(job)							\
 	(((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid)
 
 static void
 aiotx_free_job(struct kaiocb *job)
 {
 	long status;
 	int error;
 
 	if (refcount_release(&job->aio_refs) == 0)
 		return;
 
 	error = (intptr_t)job->aio_error;
 	status = job->aio_sent;
 #ifdef VERBOSE_TRACES
 	CTR5(KTR_CXGBE, "%s: tid %d completed %p len %ld, error %d", __func__,
 	    jobtotid(job), job, status, error);
 #endif
 	if (error != 0 && status != 0)
 		error = 0;
 	if (error == ECANCELED)
 		aio_cancel(job);
 	else if (error)
 		aio_complete(job, -1, error);
 	else {
 		job->msgsnd = 1;
 		aio_complete(job, status, 0);
 	}
 }
 
 static void
 aiotx_free_pgs(struct mbuf *m)
 {
 	struct kaiocb *job;
 	vm_page_t pg;
 
 	M_ASSERTEXTPG(m);
 	job = m->m_ext.ext_arg1;
 #ifdef VERBOSE_TRACES
 	CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__,
 	    m->m_len, jobtotid(job));
 #endif
 
 	for (int i = 0; i < m->m_epg_npgs; i++) {
 		pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]);
 		vm_page_unwire(pg, PQ_ACTIVE);
 	}
 
 	aiotx_free_job(job);
 }
 
 /*
  * Allocate a chain of unmapped mbufs describing the next 'len' bytes
  * of an AIO job.
  */
 static struct mbuf *
 alloc_aiotx_mbuf(struct kaiocb *job, int len)
 {
 	struct vmspace *vm;
 	vm_page_t pgs[MBUF_PEXT_MAX_PGS];
 	struct mbuf *m, *top, *last;
 	vm_map_t map;
 	vm_offset_t start;
 	int i, mlen, npages, pgoff;
 
 	KASSERT(job->aio_sent + len <= job->uaiocb.aio_nbytes,
 	    ("%s(%p, %d): request to send beyond end of buffer", __func__,
 	    job, len));
 
 	/*
 	 * The AIO subsystem will cancel and drain all requests before
 	 * permitting a process to exit or exec, so p_vmspace should
 	 * be stable here.
 	 */
 	vm = job->userproc->p_vmspace;
 	map = &vm->vm_map;
 	start = (uintptr_t)job->uaiocb.aio_buf + job->aio_sent;
 	pgoff = start & PAGE_MASK;
 
 	top = NULL;
 	last = NULL;
 	while (len > 0) {
 		mlen = imin(len, MBUF_PEXT_MAX_PGS * PAGE_SIZE - pgoff);
 		KASSERT(mlen == len || ((start + mlen) & PAGE_MASK) == 0,
 		    ("%s: next start (%#jx + %#x) is not page aligned",
 		    __func__, (uintmax_t)start, mlen));
 
 		npages = vm_fault_quick_hold_pages(map, start, mlen,
 		    VM_PROT_WRITE, pgs, nitems(pgs));
 		if (npages < 0)
 			break;
 
 		m = mb_alloc_ext_pgs(M_WAITOK, aiotx_free_pgs);
 		if (m == NULL) {
 			vm_page_unhold_pages(pgs, npages);
 			break;
 		}
 
 		m->m_epg_1st_off = pgoff;
 		m->m_epg_npgs = npages;
 		if (npages == 1) {
 			KASSERT(mlen + pgoff <= PAGE_SIZE,
 			    ("%s: single page is too large (off %d len %d)",
 			    __func__, pgoff, mlen));
 			m->m_epg_last_len = mlen;
 		} else {
 			m->m_epg_last_len = mlen - (PAGE_SIZE - pgoff) -
 			    (npages - 2) * PAGE_SIZE;
 		}
 		for (i = 0; i < npages; i++)
 			m->m_epg_pa[i] = VM_PAGE_TO_PHYS(pgs[i]);
 
 		m->m_len = mlen;
 		m->m_ext.ext_size = npages * PAGE_SIZE;
 		m->m_ext.ext_arg1 = job;
 		refcount_acquire(&job->aio_refs);
 
 #ifdef VERBOSE_TRACES
 		CTR5(KTR_CXGBE, "%s: tid %d, new mbuf %p for job %p, npages %d",
 		    __func__, jobtotid(job), m, job, npages);
 #endif
 
 		if (top == NULL)
 			top = m;
 		else
 			last->m_next = m;
 		last = m;
 
 		len -= mlen;
 		start += mlen;
 		pgoff = 0;
 	}
 
 	return (top);
 }
 
 static void
 t4_aiotx_process_job(struct toepcb *toep, struct socket *so, struct kaiocb *job)
 {
 	struct sockbuf *sb;
 	struct file *fp;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct mbuf *m;
 	int error, len;
 	bool moretocome, sendmore;
 
 	sb = &so->so_snd;
 	SOCKBUF_UNLOCK(sb);
 	fp = job->fd_file;
 	m = NULL;
 
 #ifdef MAC
 	error = mac_socket_check_send(fp->f_cred, so);
 	if (error != 0)
 		goto out;
 #endif
 
 	/* Inline sosend_generic(). */
 
 	error = sblock(sb, SBL_WAIT);
 	MPASS(error == 0);
 
 sendanother:
 	SOCKBUF_LOCK(sb);
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		SOCKBUF_UNLOCK(sb);
 		sbunlock(sb);
 		if ((so->so_options & SO_NOSIGPIPE) == 0) {
 			PROC_LOCK(job->userproc);
 			kern_psignal(job->userproc, SIGPIPE);
 			PROC_UNLOCK(job->userproc);
 		}
 		error = EPIPE;
 		goto out;
 	}
 	if (so->so_error) {
 		error = so->so_error;
 		so->so_error = 0;
 		SOCKBUF_UNLOCK(sb);
 		sbunlock(sb);
 		goto out;
 	}
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		SOCKBUF_UNLOCK(sb);
 		sbunlock(sb);
 		error = ENOTCONN;
 		goto out;
 	}
 	if (sbspace(sb) < sb->sb_lowat) {
 		MPASS(job->aio_sent == 0 || !(so->so_state & SS_NBIO));
 
 		/*
 		 * Don't block if there is too little room in the socket
 		 * buffer.  Instead, requeue the request.
 		 */
 		if (!aio_set_cancel_function(job, t4_aiotx_cancel)) {
 			SOCKBUF_UNLOCK(sb);
 			sbunlock(sb);
 			error = ECANCELED;
 			goto out;
 		}
 		TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list);
 		SOCKBUF_UNLOCK(sb);
 		sbunlock(sb);
 		goto out;
 	}
 
 	/*
 	 * Write as much data as the socket permits, but no more than a
 	 * a single sndbuf at a time.
 	 */
 	len = sbspace(sb);
 	if (len > job->uaiocb.aio_nbytes - job->aio_sent) {
 		len = job->uaiocb.aio_nbytes - job->aio_sent;
 		moretocome = false;
 	} else
 		moretocome = true;
 	if (len > toep->params.sndbuf) {
 		len = toep->params.sndbuf;
 		sendmore = true;
 	} else
 		sendmore = false;
 
 	if (!TAILQ_EMPTY(&toep->aiotx_jobq))
 		moretocome = true;
 	SOCKBUF_UNLOCK(sb);
 	MPASS(len != 0);
 
 	m = alloc_aiotx_mbuf(job, len);
 	if (m == NULL) {
 		sbunlock(sb);
 		error = EFAULT;
 		goto out;
 	}
 
 	/* Inlined tcp_usr_send(). */
 
 	inp = toep->inp;
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		INP_WUNLOCK(inp);
 		sbunlock(sb);
 		error = ECONNRESET;
 		goto out;
 	}
 
 	job->aio_sent += m_length(m, NULL);
 
 	sbappendstream(sb, m, 0);
 	m = NULL;
 
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		tp = intotcpcb(inp);
 		if (moretocome)
 			tp->t_flags |= TF_MORETOCOME;
 		error = tp->t_fb->tfb_tcp_output(tp);
 		if (moretocome)
 			tp->t_flags &= ~TF_MORETOCOME;
 	}
 
 	INP_WUNLOCK(inp);
 	if (sendmore)
 		goto sendanother;
 	sbunlock(sb);
 
 	if (error)
 		goto out;
 
 	/*
 	 * If this is a blocking socket and the request has not been
 	 * fully completed, requeue it until the socket is ready
 	 * again.
 	 */
 	if (job->aio_sent < job->uaiocb.aio_nbytes &&
 	    !(so->so_state & SS_NBIO)) {
 		SOCKBUF_LOCK(sb);
 		if (!aio_set_cancel_function(job, t4_aiotx_cancel)) {
 			SOCKBUF_UNLOCK(sb);
 			error = ECANCELED;
 			goto out;
 		}
 		TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list);
 		return;
 	}
 
 	/*
 	 * If the request will not be requeued, drop the queue's
 	 * reference to the job.  Any mbufs in flight should still
 	 * hold a reference, but this drops the reference that the
 	 * queue owns while it is waiting to queue mbufs to the
 	 * socket.
 	 */
 	aiotx_free_job(job);
 
 out:
 	if (error) {
 		job->aio_error = (void *)(intptr_t)error;
 		aiotx_free_job(job);
 	}
 	if (m != NULL)
 		m_free(m);
 	SOCKBUF_LOCK(sb);
 }
 
 static void
 t4_aiotx_task(void *context, int pending)
 {
 	struct toepcb *toep = context;
 	struct socket *so;
 	struct kaiocb *job;
 
 	so = toep->aiotx_so;
 	CURVNET_SET(toep->vnet);
 	SOCKBUF_LOCK(&so->so_snd);
 	while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) {
 		job = TAILQ_FIRST(&toep->aiotx_jobq);
 		TAILQ_REMOVE(&toep->aiotx_jobq, job, list);
 		if (!aio_clear_cancel_function(job))
 			continue;
 
 		t4_aiotx_process_job(toep, so, job);
 	}
 	toep->aiotx_so = NULL;
 	SOCKBUF_UNLOCK(&so->so_snd);
 	CURVNET_RESTORE();
 
 	free_toepcb(toep);
 	SOCK_LOCK(so);
 	sorele(so);
 }
 
 static void
 t4_aiotx_queue_toep(struct socket *so, struct toepcb *toep)
 {
 
 	SOCKBUF_LOCK_ASSERT(&toep->inp->inp_socket->so_snd);
 #ifdef VERBOSE_TRACES
 	CTR3(KTR_CXGBE, "%s: queueing aiotx task for tid %d, active = %s",
 	    __func__, toep->tid, toep->aiotx_so != NULL ? "true" : "false");
 #endif
 	if (toep->aiotx_so != NULL)
 		return;
 	soref(so);
 	toep->aiotx_so = so;
 	hold_toepcb(toep);
 	soaio_enqueue(&toep->aiotx_task);
 }
 
 static void
 t4_aiotx_cancel(struct kaiocb *job)
 {
 	struct socket *so;
 	struct sockbuf *sb;
 	struct tcpcb *tp;
 	struct toepcb *toep;
 
 	so = job->fd_file->f_data;
 	tp = so_sototcpcb(so);
 	toep = tp->t_toe;
 	MPASS(job->uaiocb.aio_lio_opcode == LIO_WRITE);
 	sb = &so->so_snd;
 
 	SOCKBUF_LOCK(sb);
 	if (!aio_cancel_cleared(job))
 		TAILQ_REMOVE(&toep->aiotx_jobq, job, list);
 	SOCKBUF_UNLOCK(sb);
 
 	job->aio_error = (void *)(intptr_t)ECANCELED;
 	aiotx_free_job(job);
 }
 
 int
 t4_aio_queue_aiotx(struct socket *so, struct kaiocb *job)
 {
 	struct tcpcb *tp = so_sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 	struct adapter *sc = td_adapter(toep->td);
 
 	/* This only handles writes. */
 	if (job->uaiocb.aio_lio_opcode != LIO_WRITE)
 		return (EOPNOTSUPP);
 
 	if (!sc->tt.tx_zcopy)
 		return (EOPNOTSUPP);
 
 	if (tls_tx_key(toep))
 		return (EOPNOTSUPP);
 
 	SOCKBUF_LOCK(&so->so_snd);
 #ifdef VERBOSE_TRACES
 	CTR3(KTR_CXGBE, "%s: queueing %p for tid %u", __func__, job, toep->tid);
 #endif
 	if (!aio_set_cancel_function(job, t4_aiotx_cancel))
 		panic("new job was cancelled");
 	refcount_init(&job->aio_refs, 1);
 	TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list);
 	if (sowriteable(so))
 		t4_aiotx_queue_toep(so, toep);
 	SOCKBUF_UNLOCK(&so->so_snd);
 	return (0);
 }
 
 void
 aiotx_init_toep(struct toepcb *toep)
 {
 
 	TAILQ_INIT(&toep->aiotx_jobq);
 	TASK_INIT(&toep->aiotx_task, 0, t4_aiotx_task, toep);
 }
 #endif
Index: head/sys/dev/cxgbe/tom/t4_tls.c
===================================================================
--- head/sys/dev/cxgbe/tom/t4_tls.c	(revision 366853)
+++ head/sys/dev/cxgbe/tom/t4_tls.c	(revision 366854)
@@ -1,2242 +1,2240 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2017-2018 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_inet.h"
 #include "opt_kern_tls.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/ktr.h>
 #ifdef KERN_TLS
 #include <sys/ktls.h>
 #endif
 #include <sys/sglist.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/systm.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_var.h>
 #include <netinet/toecore.h>
 #ifdef KERN_TLS
 #include <opencrypto/cryptodev.h>
 #include <opencrypto/xform.h>
 #endif
 
 #ifdef TCP_OFFLOAD
 #include "common/common.h"
 #include "common/t4_tcb.h"
 #include "crypto/t4_crypto.h"
 #include "tom/t4_tom_l2t.h"
 #include "tom/t4_tom.h"
 
 /*
  * The TCP sequence number of a CPL_TLS_DATA mbuf is saved here while
  * the mbuf is in the ulp_pdu_reclaimq.
  */
 #define	tls_tcp_seq	PH_loc.thirtytwo[0]
 
 /*
  * Handshake lock used for the handshake timer.  Having a global lock
  * is perhaps not ideal, but it avoids having to use callout_drain()
  * in tls_uninit_toep() which can't block.  Also, the timer shouldn't
  * actually fire for most connections.
  */
 static struct mtx tls_handshake_lock;
 
 static void
 t4_set_tls_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask,
     uint64_t val)
 {
 	struct adapter *sc = td_adapter(toep->td);
 
 	t4_set_tcb_field(sc, toep->ofld_txq, toep, word, mask, val, 0, 0);
 }
 
 /* TLS and DTLS common routines */
 bool
 can_tls_offload(struct adapter *sc)
 {
 
 	return (sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS);
 }
 
 int
 tls_tx_key(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	return (tls_ofld->tx_key_addr >= 0);
 }
 
 int
 tls_rx_key(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	return (tls_ofld->rx_key_addr >= 0);
 }
 
 static int
 key_size(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	return ((tls_ofld->key_location == TLS_SFO_WR_CONTEXTLOC_IMMEDIATE) ?
 		tls_ofld->k_ctx.tx_key_info_size : KEY_IN_DDR_SIZE);
 }
 
 /* Set TLS Key-Id in TCB */
 static void
 t4_set_tls_keyid(struct toepcb *toep, unsigned int key_id)
 {
 
 	t4_set_tls_tcb_field(toep, W_TCB_RX_TLS_KEY_TAG,
 			 V_TCB_RX_TLS_KEY_TAG(M_TCB_RX_TLS_BUF_TAG),
 			 V_TCB_RX_TLS_KEY_TAG(key_id));
 }
 
 /* Clear TF_RX_QUIESCE to re-enable receive. */
 static void
 t4_clear_rx_quiesce(struct toepcb *toep)
 {
 
 	t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1), 0);
 }
 
 static void
 tls_clr_ofld_mode(struct toepcb *toep)
 {
 
 	tls_stop_handshake_timer(toep);
 
 	/* Operate in PDU extraction mode only. */
 	t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW,
 	    V_TCB_ULP_RAW(M_TCB_ULP_RAW),
 	    V_TCB_ULP_RAW(V_TF_TLS_ENABLE(1)));
 	t4_clear_rx_quiesce(toep);
 }
 
 static void
 tls_clr_quiesce(struct toepcb *toep)
 {
 
 	tls_stop_handshake_timer(toep);
 	t4_clear_rx_quiesce(toep);
 }
 
 /*
  * Calculate the TLS data expansion size
  */
 static int
 tls_expansion_size(struct toepcb *toep, int data_len, int full_pdus_only,
     unsigned short *pdus_per_ulp)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct tls_scmd *scmd = &tls_ofld->scmd0;
 	int expn_size = 0, frag_count = 0, pad_per_pdu = 0,
 	    pad_last_pdu = 0, last_frag_size = 0, max_frag_size = 0;
 	int exp_per_pdu = 0;
 	int hdr_len = TLS_HEADER_LENGTH;
 
 	do {
 		max_frag_size = tls_ofld->k_ctx.frag_size;
 		if (G_SCMD_CIPH_MODE(scmd->seqno_numivs) ==
 		   SCMD_CIPH_MODE_AES_GCM) {
 			frag_count = (data_len / max_frag_size);
 			exp_per_pdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
 				hdr_len;
 			expn_size =  frag_count * exp_per_pdu;
 			if (full_pdus_only) {
 				*pdus_per_ulp = data_len / (exp_per_pdu +
 					max_frag_size);
 				if (*pdus_per_ulp > 32)
 					*pdus_per_ulp = 32;
 				else if(!*pdus_per_ulp)
 					*pdus_per_ulp = 1;
 				expn_size = (*pdus_per_ulp) * exp_per_pdu;
 				break;
 			}
 			if ((last_frag_size = data_len % max_frag_size) > 0) {
 				frag_count += 1;
 				expn_size += exp_per_pdu;
 			}
 			break;
 		} else if (G_SCMD_CIPH_MODE(scmd->seqno_numivs) !=
 			   SCMD_CIPH_MODE_NOP) {
 			/* Calculate the number of fragments we can make */
 			frag_count  = (data_len / max_frag_size);
 			if (frag_count > 0) {
 				pad_per_pdu = (((howmany((max_frag_size +
 						       tls_ofld->mac_length),
 						      CIPHER_BLOCK_SIZE)) *
 						CIPHER_BLOCK_SIZE) -
 					       (max_frag_size +
 						tls_ofld->mac_length));
 				if (!pad_per_pdu)
 					pad_per_pdu = CIPHER_BLOCK_SIZE;
 				exp_per_pdu = pad_per_pdu +
 				       	tls_ofld->mac_length +
 					hdr_len + CIPHER_BLOCK_SIZE;
 				expn_size = frag_count * exp_per_pdu;
 			}
 			if (full_pdus_only) {
 				*pdus_per_ulp = data_len / (exp_per_pdu +
 					max_frag_size);
 				if (*pdus_per_ulp > 32)
 					*pdus_per_ulp = 32;
 				else if (!*pdus_per_ulp)
 					*pdus_per_ulp = 1;
 				expn_size = (*pdus_per_ulp) * exp_per_pdu;
 				break;
 			}
 			/* Consider the last fragment */
 			if ((last_frag_size = data_len % max_frag_size) > 0) {
 				pad_last_pdu = (((howmany((last_frag_size +
 							tls_ofld->mac_length),
 						       CIPHER_BLOCK_SIZE)) *
 						 CIPHER_BLOCK_SIZE) -
 						(last_frag_size +
 						 tls_ofld->mac_length));
 				if (!pad_last_pdu)
 					pad_last_pdu = CIPHER_BLOCK_SIZE;
 				expn_size += (pad_last_pdu +
 					      tls_ofld->mac_length + hdr_len +
 					      CIPHER_BLOCK_SIZE);
 			}
 		}
 	} while (0);
 
 	return (expn_size);
 }
 
 /* Copy Key to WR */
 static void
 tls_copy_tx_key(struct toepcb *toep, void *dst)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct ulptx_sc_memrd *sc_memrd;
 	struct ulptx_idata *sc;
 
 	if (tls_ofld->k_ctx.tx_key_info_size <= 0)
 		return;
 
 	if (tls_ofld->key_location == TLS_SFO_WR_CONTEXTLOC_DDR) {
 		sc = dst;
 		sc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
 		sc->len = htobe32(0);
 		sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
 		sc_memrd->cmd_to_len = htobe32(V_ULPTX_CMD(ULP_TX_SC_MEMRD) |
 		    V_ULP_TX_SC_MORE(1) |
 		    V_ULPTX_LEN16(tls_ofld->k_ctx.tx_key_info_size >> 4));
 		sc_memrd->addr = htobe32(tls_ofld->tx_key_addr >> 5);
 	} else if (tls_ofld->key_location == TLS_SFO_WR_CONTEXTLOC_IMMEDIATE) {
 		memcpy(dst, &tls_ofld->k_ctx.tx,
 		    tls_ofld->k_ctx.tx_key_info_size);
 	}
 }
 
 /* TLS/DTLS content type  for CPL SFO */
 static inline unsigned char
 tls_content_type(unsigned char content_type)
 {
 	/*
 	 * XXX: Shouldn't this map CONTENT_TYPE_APP_DATA to DATA and
 	 * default to "CUSTOM" for all other types including
 	 * heartbeat?
 	 */
 	switch (content_type) {
 	case CONTENT_TYPE_CCS:
 		return CPL_TX_TLS_SFO_TYPE_CCS;
 	case CONTENT_TYPE_ALERT:
 		return CPL_TX_TLS_SFO_TYPE_ALERT;
 	case CONTENT_TYPE_HANDSHAKE:
 		return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
 	case CONTENT_TYPE_HEARTBEAT:
 		return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
 	}
 	return CPL_TX_TLS_SFO_TYPE_DATA;
 }
 
 static unsigned char
 get_cipher_key_size(unsigned int ck_size)
 {
 	switch (ck_size) {
 	case AES_NOP: /* NOP */
 		return 15;
 	case AES_128: /* AES128 */
 		return CH_CK_SIZE_128;
 	case AES_192: /* AES192 */
 		return CH_CK_SIZE_192;
 	case AES_256: /* AES256 */
 		return CH_CK_SIZE_256;
 	default:
 		return CH_CK_SIZE_256;
 	}
 }
 
 static unsigned char
 get_mac_key_size(unsigned int mk_size)
 {
 	switch (mk_size) {
 	case SHA_NOP: /* NOP */
 		return CH_MK_SIZE_128;
 	case SHA_GHASH: /* GHASH */
 	case SHA_512: /* SHA512 */
 		return CH_MK_SIZE_512;
 	case SHA_224: /* SHA2-224 */
 		return CH_MK_SIZE_192;
 	case SHA_256: /* SHA2-256*/
 		return CH_MK_SIZE_256;
 	case SHA_384: /* SHA384 */
 		return CH_MK_SIZE_512;
 	case SHA1: /* SHA1 */
 	default:
 		return CH_MK_SIZE_160;
 	}
 }
 
 static unsigned int
 get_proto_ver(int proto_ver)
 {
 	switch (proto_ver) {
 	case TLS1_2_VERSION:
 		return TLS_1_2_VERSION;
 	case TLS1_1_VERSION:
 		return TLS_1_1_VERSION;
 	case DTLS1_2_VERSION:
 		return DTLS_1_2_VERSION;
 	default:
 		return TLS_VERSION_MAX;
 	}
 }
 
 static void
 tls_rxkey_flit1(struct tls_keyctx *kwr, struct tls_key_context *kctx)
 {
 
 	if (kctx->state.enc_mode == CH_EVP_CIPH_GCM_MODE) {
 		kwr->u.rxhdr.ivinsert_to_authinsrt =
 		    htobe64(V_TLS_KEYCTX_TX_WR_IVINSERT(6ULL) |
 			V_TLS_KEYCTX_TX_WR_AADSTRTOFST(1ULL) |
 			V_TLS_KEYCTX_TX_WR_AADSTOPOFST(5ULL) |
 			V_TLS_KEYCTX_TX_WR_AUTHSRTOFST(14ULL) |
 			V_TLS_KEYCTX_TX_WR_AUTHSTOPOFST(16ULL) |
 			V_TLS_KEYCTX_TX_WR_CIPHERSRTOFST(14ULL) |
 			V_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST(0ULL) |
 			V_TLS_KEYCTX_TX_WR_AUTHINSRT(16ULL));
 		kwr->u.rxhdr.ivpresent_to_rxmk_size &=
 			~(V_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT(1));
 		kwr->u.rxhdr.authmode_to_rxvalid &=
 			~(V_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL(1));
 	} else {
 		kwr->u.rxhdr.ivinsert_to_authinsrt =
 		    htobe64(V_TLS_KEYCTX_TX_WR_IVINSERT(6ULL) |
 			V_TLS_KEYCTX_TX_WR_AADSTRTOFST(1ULL) |
 			V_TLS_KEYCTX_TX_WR_AADSTOPOFST(5ULL) |
 			V_TLS_KEYCTX_TX_WR_AUTHSRTOFST(22ULL) |
 			V_TLS_KEYCTX_TX_WR_AUTHSTOPOFST(0ULL) |
 			V_TLS_KEYCTX_TX_WR_CIPHERSRTOFST(22ULL) |
 			V_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST(0ULL) |
 			V_TLS_KEYCTX_TX_WR_AUTHINSRT(0ULL));
 	}
 }
 
 /* Rx key */
 static void
 prepare_rxkey_wr(struct tls_keyctx *kwr, struct tls_key_context *kctx)
 {
 	unsigned int ck_size = kctx->cipher_secret_size;
 	unsigned int mk_size = kctx->mac_secret_size;
 	int proto_ver = kctx->proto_ver;
 
 	kwr->u.rxhdr.flitcnt_hmacctrl =
 		((kctx->rx_key_info_size >> 4) << 3) | kctx->hmac_ctrl;
 
 	kwr->u.rxhdr.protover_ciphmode =
 		V_TLS_KEYCTX_TX_WR_PROTOVER(get_proto_ver(proto_ver)) |
 		V_TLS_KEYCTX_TX_WR_CIPHMODE(kctx->state.enc_mode);
 
 	kwr->u.rxhdr.authmode_to_rxvalid =
 		V_TLS_KEYCTX_TX_WR_AUTHMODE(kctx->state.auth_mode) |
 		V_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL(1) |
 		V_TLS_KEYCTX_TX_WR_SEQNUMCTRL(3) |
 		V_TLS_KEYCTX_TX_WR_RXVALID(1);
 
 	kwr->u.rxhdr.ivpresent_to_rxmk_size =
 		V_TLS_KEYCTX_TX_WR_IVPRESENT(0) |
 		V_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT(1) |
 		V_TLS_KEYCTX_TX_WR_RXCK_SIZE(get_cipher_key_size(ck_size)) |
 		V_TLS_KEYCTX_TX_WR_RXMK_SIZE(get_mac_key_size(mk_size));
 
 	tls_rxkey_flit1(kwr, kctx);
 
 	/* No key reversal for GCM */
 	if (kctx->state.enc_mode != CH_EVP_CIPH_GCM_MODE) {
 		t4_aes_getdeckey(kwr->keys.edkey, kctx->rx.key,
 				 (kctx->cipher_secret_size << 3));
 		memcpy(kwr->keys.edkey + kctx->cipher_secret_size,
 		       kctx->rx.key + kctx->cipher_secret_size,
 		       (IPAD_SIZE + OPAD_SIZE));
 	} else {
 		memcpy(kwr->keys.edkey, kctx->rx.key,
 		       (kctx->rx_key_info_size - SALT_SIZE));
 		memcpy(kwr->u.rxhdr.rxsalt, kctx->rx.salt, SALT_SIZE);
 	}
 }
 
 /* Tx key */
 static void
 prepare_txkey_wr(struct tls_keyctx *kwr, struct tls_key_context *kctx)
 {
 	unsigned int ck_size = kctx->cipher_secret_size;
 	unsigned int mk_size = kctx->mac_secret_size;
 
 	kwr->u.txhdr.ctxlen =
 		(kctx->tx_key_info_size >> 4);
 	kwr->u.txhdr.dualck_to_txvalid =
 		V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(1) |
 		V_TLS_KEYCTX_TX_WR_SALT_PRESENT(1) |
 		V_TLS_KEYCTX_TX_WR_TXCK_SIZE(get_cipher_key_size(ck_size)) |
 		V_TLS_KEYCTX_TX_WR_TXMK_SIZE(get_mac_key_size(mk_size)) |
 		V_TLS_KEYCTX_TX_WR_TXVALID(1);
 
 	memcpy(kwr->keys.edkey, kctx->tx.key, HDR_KCTX_SIZE);
 	if (kctx->state.enc_mode == CH_EVP_CIPH_GCM_MODE) {
 		memcpy(kwr->u.txhdr.txsalt, kctx->tx.salt, SALT_SIZE);
 		kwr->u.txhdr.dualck_to_txvalid &=
 			~(V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(1));
 	}
 	kwr->u.txhdr.dualck_to_txvalid = htons(kwr->u.txhdr.dualck_to_txvalid);
 }
 
 /* TLS Key memory management */
 static int
 get_new_keyid(struct toepcb *toep)
 {
 	struct adapter *sc = td_adapter(toep->td);
 	vmem_addr_t addr;
 
 	if (vmem_alloc(sc->key_map, TLS_KEY_CONTEXT_SZ, M_NOWAIT | M_FIRSTFIT,
 	    &addr) != 0)
 		return (-1);
 
 	return (addr);
 }
 
 static void
 free_keyid(struct toepcb *toep, int keyid)
 {
 	struct adapter *sc = td_adapter(toep->td);
 
 	vmem_free(sc->key_map, keyid, TLS_KEY_CONTEXT_SZ);
 }
 
 static void
 clear_tls_keyid(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	if (tls_ofld->rx_key_addr >= 0) {
 		free_keyid(toep, tls_ofld->rx_key_addr);
 		tls_ofld->rx_key_addr = -1;
 	}
 	if (tls_ofld->tx_key_addr >= 0) {
 		free_keyid(toep, tls_ofld->tx_key_addr);
 		tls_ofld->tx_key_addr = -1;
 	}
 }
 
 static int
 get_keyid(struct tls_ofld_info *tls_ofld, unsigned int ops)
 {
 	return (ops & KEY_WRITE_RX ? tls_ofld->rx_key_addr :
 		((ops & KEY_WRITE_TX) ? tls_ofld->tx_key_addr : -1));
 }
 
 static int
 get_tp_plen_max(struct tls_ofld_info *tls_ofld)
 {
 	int plen = ((min(3*4096, TP_TX_PG_SZ))/1448) * 1448;
 
 	return (tls_ofld->k_ctx.frag_size <= 8192 ? plen : FC_TP_PLEN_MAX);
 }
 
 /* Send request to get the key-id */
 static int
 tls_program_key_id(struct toepcb *toep, struct tls_key_context *k_ctx)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct adapter *sc = td_adapter(toep->td);
 	struct ofld_tx_sdesc *txsd;
 	int kwrlen, kctxlen, keyid, len;
 	struct wrqe *wr;
 	struct tls_key_req *kwr;
 	struct tls_keyctx *kctx;
 
 	kwrlen = sizeof(*kwr);
 	kctxlen = roundup2(sizeof(*kctx), 32);
 	len = roundup2(kwrlen + kctxlen, 16);
 
 	if (toep->txsd_avail == 0)
 		return (EAGAIN);
 
 	/* Dont initialize key for re-neg */
 	if (!G_KEY_CLR_LOC(k_ctx->l_p_key)) {
 		if ((keyid = get_new_keyid(toep)) < 0) {
 			return (ENOSPC);
 		}
 	} else {
 		keyid = get_keyid(tls_ofld, k_ctx->l_p_key);
 	}
 
 	wr = alloc_wrqe(len, toep->ofld_txq);
 	if (wr == NULL) {
 		free_keyid(toep, keyid);
 		return (ENOMEM);
 	}
 	kwr = wrtod(wr);
 	memset(kwr, 0, kwrlen);
 
 	kwr->wr_hi = htobe32(V_FW_WR_OP(FW_ULPTX_WR) | F_FW_WR_COMPL |
 	    F_FW_WR_ATOMIC);
 	kwr->wr_mid = htobe32(V_FW_WR_LEN16(DIV_ROUND_UP(len, 16)) |
 	    V_FW_WR_FLOWID(toep->tid));
 	kwr->protocol = get_proto_ver(k_ctx->proto_ver);
 	kwr->mfs = htons(k_ctx->frag_size);
 	kwr->reneg_to_write_rx = k_ctx->l_p_key;
 
 	/* master command */
 	kwr->cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE) |
 	    V_T5_ULP_MEMIO_ORDER(1) | V_T5_ULP_MEMIO_IMM(1));
 	kwr->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(kctxlen >> 5));
 	kwr->len16 = htobe32((toep->tid << 8) |
 	    DIV_ROUND_UP(len - sizeof(struct work_request_hdr), 16));
 	kwr->kaddr = htobe32(V_ULP_MEMIO_ADDR(keyid >> 5));
 
 	/* sub command */
 	kwr->sc_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 	kwr->sc_len = htobe32(kctxlen);
 
 	kctx = (struct tls_keyctx *)(kwr + 1);
 	memset(kctx, 0, kctxlen);
 
 	if (G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_TX) {
 		tls_ofld->tx_key_addr = keyid;
 		prepare_txkey_wr(kctx, k_ctx);
 	} else if (G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_RX) {
 		tls_ofld->rx_key_addr = keyid;
 		prepare_rxkey_wr(kctx, k_ctx);
 	}
 
 	txsd = &toep->txsd[toep->txsd_pidx];
 	txsd->tx_credits = DIV_ROUND_UP(len, 16);
 	txsd->plen = 0;
 	toep->tx_credits -= txsd->tx_credits;
 	if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
 		toep->txsd_pidx = 0;
 	toep->txsd_avail--;
 
 	t4_wrq_tx(sc, wr);
 
 	return (0);
 }
 
 /* Store a key received from SSL in DDR. */
 static int
 program_key_context(struct tcpcb *tp, struct toepcb *toep,
     struct tls_key_context *uk_ctx)
 {
 	struct adapter *sc = td_adapter(toep->td);
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct tls_key_context *k_ctx;
 	int error, key_offset;
 
 	if (tp->t_state != TCPS_ESTABLISHED) {
 		/*
 		 * XXX: Matches Linux driver, but not sure this is a
 		 * very appropriate error.
 		 */
 		return (ENOENT);
 	}
 
 	/* Stop timer on handshake completion */
 	tls_stop_handshake_timer(toep);
 
 	toep->flags &= ~TPF_FORCE_CREDITS;
 
 	CTR4(KTR_CXGBE, "%s: tid %d %s proto_ver %#x", __func__, toep->tid,
 	    G_KEY_GET_LOC(uk_ctx->l_p_key) == KEY_WRITE_RX ? "KEY_WRITE_RX" :
 	    "KEY_WRITE_TX", uk_ctx->proto_ver);
 
 	if (G_KEY_GET_LOC(uk_ctx->l_p_key) == KEY_WRITE_RX &&
 	    ulp_mode(toep) != ULP_MODE_TLS)
 		return (EOPNOTSUPP);
 
 	/* Don't copy the 'tx' and 'rx' fields. */
 	k_ctx = &tls_ofld->k_ctx;
 	memcpy(&k_ctx->l_p_key, &uk_ctx->l_p_key,
 	    sizeof(*k_ctx) - offsetof(struct tls_key_context, l_p_key));
 
 	/* TLS version != 1.1 and !1.2 OR DTLS != 1.2 */
 	if (get_proto_ver(k_ctx->proto_ver) > DTLS_1_2_VERSION) {
 		if (G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_RX) {
 			tls_ofld->rx_key_addr = -1;
 			t4_clear_rx_quiesce(toep);
 		} else {
 			tls_ofld->tx_key_addr = -1;
 		}
 		return (0);
 	}
 
 	if (k_ctx->state.enc_mode == CH_EVP_CIPH_GCM_MODE) {
 		k_ctx->iv_size = 4;
 		k_ctx->mac_first = 0;
 		k_ctx->hmac_ctrl = 0;
 	} else {
 		k_ctx->iv_size = 8; /* for CBC, iv is 16B, unit of 2B */
 		k_ctx->mac_first = 1;
 	}
 
 	tls_ofld->scmd0.seqno_numivs =
 		(V_SCMD_SEQ_NO_CTRL(3) |
 		 V_SCMD_PROTO_VERSION(get_proto_ver(k_ctx->proto_ver)) |
 		 V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
 		 V_SCMD_CIPH_AUTH_SEQ_CTRL((k_ctx->mac_first == 0)) |
 		 V_SCMD_CIPH_MODE(k_ctx->state.enc_mode) |
 		 V_SCMD_AUTH_MODE(k_ctx->state.auth_mode) |
 		 V_SCMD_HMAC_CTRL(k_ctx->hmac_ctrl) |
 		 V_SCMD_IV_SIZE(k_ctx->iv_size));
 
 	tls_ofld->scmd0.ivgen_hdrlen =
 		(V_SCMD_IV_GEN_CTRL(k_ctx->iv_ctrl) |
 		 V_SCMD_KEY_CTX_INLINE(0) |
 		 V_SCMD_TLS_FRAG_ENABLE(1));
 
 	tls_ofld->mac_length = k_ctx->mac_secret_size;
 
 	if (G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_RX) {
 		k_ctx->rx = uk_ctx->rx;
 		/* Dont initialize key for re-neg */
 		if (!G_KEY_CLR_LOC(k_ctx->l_p_key))
 			tls_ofld->rx_key_addr = -1;
 	} else {
 		k_ctx->tx = uk_ctx->tx;
 		/* Dont initialize key for re-neg */
 		if (!G_KEY_CLR_LOC(k_ctx->l_p_key))
 			tls_ofld->tx_key_addr = -1;
 	}
 
 	/* Flush pending data before new Tx key becomes active */
 	if (G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_TX) {
 		struct sockbuf *sb;
 
 		/* XXX: This might not drain everything. */
 		t4_push_frames(sc, toep, 0);
 		sb = &toep->inp->inp_socket->so_snd;
 		SOCKBUF_LOCK(sb);
 
 		/* XXX: This asserts that everything has been pushed. */
 		MPASS(sb->sb_sndptr == NULL || sb->sb_sndptr->m_next == NULL);
 		sb->sb_sndptr = NULL;
 		tls_ofld->sb_off = sbavail(sb);
 		SOCKBUF_UNLOCK(sb);
 		tls_ofld->tx_seq_no = 0;
 	}
 
 	if ((G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_RX) ||
 	    (tls_ofld->key_location == TLS_SFO_WR_CONTEXTLOC_DDR)) {
 
 		/*
 		 * XXX: The userland library sets tx_key_info_size, not
 		 * rx_key_info_size.
 		 */
 		k_ctx->rx_key_info_size = k_ctx->tx_key_info_size;
 
 		error = tls_program_key_id(toep, k_ctx);
 		if (error) {
 			/* XXX: Only clear quiesce for KEY_WRITE_RX? */
 			t4_clear_rx_quiesce(toep);
 			return (error);
 		}
 	}
 
 	if (G_KEY_GET_LOC(k_ctx->l_p_key) == KEY_WRITE_RX) {
 		/*
 		 * RX key tags are an index into the key portion of MA
 		 * memory stored as an offset from the base address in
 		 * units of 64 bytes.
 		 */
 		key_offset = tls_ofld->rx_key_addr - sc->vres.key.start;
 		t4_set_tls_keyid(toep, key_offset / 64);
 		t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW,
 				 V_TCB_ULP_RAW(M_TCB_ULP_RAW),
 				 V_TCB_ULP_RAW((V_TF_TLS_KEY_SIZE(3) |
 						V_TF_TLS_CONTROL(1) |
 						V_TF_TLS_ACTIVE(1) |
 						V_TF_TLS_ENABLE(1))));
 		t4_set_tls_tcb_field(toep, W_TCB_TLS_SEQ,
 				 V_TCB_TLS_SEQ(M_TCB_TLS_SEQ),
 				 V_TCB_TLS_SEQ(0));
 		t4_clear_rx_quiesce(toep);
 	} else {
 		unsigned short pdus_per_ulp;
 
 		if (tls_ofld->key_location == TLS_SFO_WR_CONTEXTLOC_IMMEDIATE)
 			tls_ofld->tx_key_addr = 1;
 
 		tls_ofld->fcplenmax = get_tp_plen_max(tls_ofld);
 		tls_ofld->expn_per_ulp = tls_expansion_size(toep,
 				tls_ofld->fcplenmax, 1, &pdus_per_ulp);
 		tls_ofld->pdus_per_ulp = pdus_per_ulp;
 		tls_ofld->adjusted_plen = tls_ofld->pdus_per_ulp *
 			((tls_ofld->expn_per_ulp/tls_ofld->pdus_per_ulp) +
 			 tls_ofld->k_ctx.frag_size);
 	}
 
 	return (0);
 }
 
 /*
  * In some cases a client connection can hang without sending the
  * ServerHelloDone message from the NIC to the host.  Send a dummy
  * RX_DATA_ACK with RX_MODULATE to unstick the connection.
  */
 static void
 tls_send_handshake_ack(void *arg)
 {
 	struct toepcb *toep = arg;
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct adapter *sc = td_adapter(toep->td);
 
 	/*
 	 * XXX: Does not have the t4_get_tcb() checks to refine the
 	 * workaround.
 	 */
 	callout_schedule(&tls_ofld->handshake_timer, TLS_SRV_HELLO_RD_TM * hz);
 
 	CTR2(KTR_CXGBE, "%s: tid %d sending RX_DATA_ACK", __func__, toep->tid);
 	send_rx_modulate(sc, toep);
 }
 
 static void
 tls_start_handshake_timer(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	mtx_lock(&tls_handshake_lock);
 	callout_reset(&tls_ofld->handshake_timer, TLS_SRV_HELLO_BKOFF_TM * hz,
 	    tls_send_handshake_ack, toep);
 	mtx_unlock(&tls_handshake_lock);
 }
 
 void
 tls_stop_handshake_timer(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	mtx_lock(&tls_handshake_lock);
 	callout_stop(&tls_ofld->handshake_timer);
 	mtx_unlock(&tls_handshake_lock);
 }
 
 int
 t4_ctloutput_tls(struct socket *so, struct sockopt *sopt)
 {
 	struct tls_key_context uk_ctx;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct toepcb *toep;
 	int error, optval;
 
 	error = 0;
 	if (sopt->sopt_dir == SOPT_SET &&
 	    sopt->sopt_name == TCP_TLSOM_SET_TLS_CONTEXT) {
 		error = sooptcopyin(sopt, &uk_ctx, sizeof(uk_ctx),
 		    sizeof(uk_ctx));
 		if (error)
 			return (error);
 	}
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 	toep = tp->t_toe;
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case TCP_TLSOM_SET_TLS_CONTEXT:
 			if (toep->tls.mode == TLS_MODE_KTLS)
 				error = EINVAL;
 			else {
 				error = program_key_context(tp, toep, &uk_ctx);
 				if (error == 0)
 					toep->tls.mode = TLS_MODE_TLSOM;
 			}
 			INP_WUNLOCK(inp);
 			break;
 		case TCP_TLSOM_CLR_TLS_TOM:
 			if (toep->tls.mode == TLS_MODE_KTLS)
 				error = EINVAL;
 			else if (ulp_mode(toep) == ULP_MODE_TLS) {
 				CTR2(KTR_CXGBE, "%s: tid %d CLR_TLS_TOM",
 				    __func__, toep->tid);
 				tls_clr_ofld_mode(toep);
 			} else
 				error = EOPNOTSUPP;
 			INP_WUNLOCK(inp);
 			break;
 		case TCP_TLSOM_CLR_QUIES:
 			if (toep->tls.mode == TLS_MODE_KTLS)
 				error = EINVAL;
 			else if (ulp_mode(toep) == ULP_MODE_TLS) {
 				CTR2(KTR_CXGBE, "%s: tid %d CLR_QUIES",
 				    __func__, toep->tid);
 				tls_clr_quiesce(toep);
 			} else
 				error = EOPNOTSUPP;
 			INP_WUNLOCK(inp);
 			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = EOPNOTSUPP;
 			break;
 		}
 		break;
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case TCP_TLSOM_GET_TLS_TOM:
 			/*
 			 * TLS TX is permitted on any TOE socket, but
 			 * TLS RX requires a TLS ULP mode.
 			 */
 			optval = TLS_TOM_NONE;
 			if (can_tls_offload(td_adapter(toep->td)) &&
 			    toep->tls.mode != TLS_MODE_KTLS) {
 				switch (ulp_mode(toep)) {
 				case ULP_MODE_NONE:
 				case ULP_MODE_TCPDDP:
 					optval = TLS_TOM_TXONLY;
 					break;
 				case ULP_MODE_TLS:
 					optval = TLS_TOM_BOTH;
 					break;
 				}
 			}
 			CTR3(KTR_CXGBE, "%s: tid %d GET_TLS_TOM = %d",
 			    __func__, toep->tid, optval);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = EOPNOTSUPP;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 
 #ifdef KERN_TLS
 static void
 init_ktls_key_context(struct ktls_session *tls, struct tls_key_context *k_ctx,
     int direction)
 {
 	struct auth_hash *axf;
 	u_int key_info_size, mac_key_size;
 	char *hash, *key;
 
 	k_ctx->l_p_key = V_KEY_GET_LOC(direction == KTLS_TX ? KEY_WRITE_TX :
 	    KEY_WRITE_RX);
 	k_ctx->proto_ver = tls->params.tls_vmajor << 8 | tls->params.tls_vminor;
 	k_ctx->cipher_secret_size = tls->params.cipher_key_len;
 	key_info_size = sizeof(struct tx_keyctx_hdr) +
 	    k_ctx->cipher_secret_size;
 	if (direction == KTLS_TX)
 		key = k_ctx->tx.key;
 	else
 		key = k_ctx->rx.key;
 	memcpy(key, tls->params.cipher_key, tls->params.cipher_key_len);
 	hash = key + tls->params.cipher_key_len;
 	if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16) {
 		k_ctx->state.auth_mode = SCMD_AUTH_MODE_GHASH;
 		k_ctx->state.enc_mode = SCMD_CIPH_MODE_AES_GCM;
 		k_ctx->iv_size = 4;
 		k_ctx->mac_first = 0;
 		k_ctx->hmac_ctrl = SCMD_HMAC_CTRL_NOP;
 		key_info_size += GMAC_BLOCK_LEN;
 		k_ctx->mac_secret_size = 0;
 		if (direction == KTLS_TX)
 			memcpy(k_ctx->tx.salt, tls->params.iv, SALT_SIZE);
 		else
 			memcpy(k_ctx->rx.salt, tls->params.iv, SALT_SIZE);
 		t4_init_gmac_hash(tls->params.cipher_key,
 		    tls->params.cipher_key_len, hash);
 	} else {
 		switch (tls->params.auth_algorithm) {
 		case CRYPTO_SHA1_HMAC:
 			axf = &auth_hash_hmac_sha1;
 			mac_key_size = SHA1_HASH_LEN;
 			k_ctx->state.auth_mode = SCMD_AUTH_MODE_SHA1;
 			break;
 		case CRYPTO_SHA2_256_HMAC:
 			axf = &auth_hash_hmac_sha2_256;
 			mac_key_size = SHA2_256_HASH_LEN;
 			k_ctx->state.auth_mode = SCMD_AUTH_MODE_SHA256;
 			break;
 		case CRYPTO_SHA2_384_HMAC:
 			axf = &auth_hash_hmac_sha2_384;
 			mac_key_size = SHA2_512_HASH_LEN;
 			k_ctx->state.auth_mode = SCMD_AUTH_MODE_SHA512_384;
 			break;
 		default:
 			panic("bad auth mode");
 		}
 		k_ctx->state.enc_mode = SCMD_CIPH_MODE_AES_CBC;
 		k_ctx->iv_size = 8; /* for CBC, iv is 16B, unit of 2B */
 		k_ctx->mac_first = 1;
 		k_ctx->hmac_ctrl = SCMD_HMAC_CTRL_NO_TRUNC;
 		key_info_size += roundup2(mac_key_size, 16) * 2;
 		k_ctx->mac_secret_size = mac_key_size;
 		t4_init_hmac_digest(axf, mac_key_size, tls->params.auth_key,
 		    tls->params.auth_key_len, hash);
 	}
 
 	if (direction == KTLS_TX)
 		k_ctx->tx_key_info_size = key_info_size;
 	else
 		k_ctx->rx_key_info_size = key_info_size;
 	k_ctx->frag_size = tls->params.max_frame_len;
 	k_ctx->iv_ctrl = 1;
 }
 
 int
 tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction)
 {
 	struct adapter *sc = td_adapter(toep->td);
 	struct tls_key_context *k_ctx;
 	int error, key_offset;
 
 	if (toep->tls.mode == TLS_MODE_TLSOM)
 		return (EINVAL);
 	if (!can_tls_offload(td_adapter(toep->td)))
 		return (EINVAL);
 	switch (ulp_mode(toep)) {
 	case ULP_MODE_TLS:
 		break;
 	case ULP_MODE_NONE:
 	case ULP_MODE_TCPDDP:
 		if (direction != KTLS_TX)
 			return (EINVAL);
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	switch (tls->params.cipher_algorithm) {
 	case CRYPTO_AES_CBC:
 		/* XXX: Explicitly ignore any provided IV. */
 		switch (tls->params.cipher_key_len) {
 		case 128 / 8:
 		case 192 / 8:
 		case 256 / 8:
 			break;
 		default:
 			return (EINVAL);
 		}
 		switch (tls->params.auth_algorithm) {
 		case CRYPTO_SHA1_HMAC:
 		case CRYPTO_SHA2_256_HMAC:
 		case CRYPTO_SHA2_384_HMAC:
 			break;
 		default:
 			return (EPROTONOSUPPORT);
 		}
 		break;
 	case CRYPTO_AES_NIST_GCM_16:
 		if (tls->params.iv_len != SALT_SIZE)
 			return (EINVAL);
 		switch (tls->params.cipher_key_len) {
 		case 128 / 8:
 		case 192 / 8:
 		case 256 / 8:
 			break;
 		default:
 			return (EINVAL);
 		}
 		break;
 	default:
 		return (EPROTONOSUPPORT);
 	}
 
 	/* Only TLS 1.1 and TLS 1.2 are currently supported. */
 	if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE ||
 	    tls->params.tls_vminor < TLS_MINOR_VER_ONE ||
 	    tls->params.tls_vminor > TLS_MINOR_VER_TWO)
 		return (EPROTONOSUPPORT);
 
 	/* Bail if we already have a key. */
 	if (direction == KTLS_TX) {
 		if (toep->tls.tx_key_addr != -1)
 			return (EOPNOTSUPP);
 	} else {
 		if (toep->tls.rx_key_addr != -1)
 			return (EOPNOTSUPP);
 	}
 
 	/*
 	 * XXX: This assumes no key renegotation.  If KTLS ever supports
 	 * that we will want to allocate TLS sessions dynamically rather
 	 * than as a static member of toep.
 	 */
 	k_ctx = &toep->tls.k_ctx;
 	init_ktls_key_context(tls, k_ctx, direction);
 
 	error = tls_program_key_id(toep, k_ctx);
 	if (error)
 		return (error);
 
 	if (direction == KTLS_TX) {
 		toep->tls.scmd0.seqno_numivs =
 			(V_SCMD_SEQ_NO_CTRL(3) |
 			 V_SCMD_PROTO_VERSION(get_proto_ver(k_ctx->proto_ver)) |
 			 V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
 			 V_SCMD_CIPH_AUTH_SEQ_CTRL((k_ctx->mac_first == 0)) |
 			 V_SCMD_CIPH_MODE(k_ctx->state.enc_mode) |
 			 V_SCMD_AUTH_MODE(k_ctx->state.auth_mode) |
 			 V_SCMD_HMAC_CTRL(k_ctx->hmac_ctrl) |
 			 V_SCMD_IV_SIZE(k_ctx->iv_size));
 
 		toep->tls.scmd0.ivgen_hdrlen =
 			(V_SCMD_IV_GEN_CTRL(k_ctx->iv_ctrl) |
 			 V_SCMD_KEY_CTX_INLINE(0) |
 			 V_SCMD_TLS_FRAG_ENABLE(1));
 
 		if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16)
 			toep->tls.iv_len = 8;
 		else
 			toep->tls.iv_len = AES_BLOCK_LEN;
 
 		toep->tls.mac_length = k_ctx->mac_secret_size;
 
 		toep->tls.fcplenmax = get_tp_plen_max(&toep->tls);
 		toep->tls.expn_per_ulp = tls->params.tls_hlen +
 		    tls->params.tls_tlen;
 		toep->tls.pdus_per_ulp = 1;
 		toep->tls.adjusted_plen = toep->tls.expn_per_ulp +
 		    toep->tls.k_ctx.frag_size;
 	} else {
 		/* Stop timer on handshake completion */
 		tls_stop_handshake_timer(toep);
 
 		toep->flags &= ~TPF_FORCE_CREDITS;
 
 		/*
 		 * RX key tags are an index into the key portion of MA
 		 * memory stored as an offset from the base address in
 		 * units of 64 bytes.
 		 */
 		key_offset = toep->tls.rx_key_addr - sc->vres.key.start;
 		t4_set_tls_keyid(toep, key_offset / 64);
 		t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW,
 				 V_TCB_ULP_RAW(M_TCB_ULP_RAW),
 				 V_TCB_ULP_RAW((V_TF_TLS_KEY_SIZE(3) |
 						V_TF_TLS_CONTROL(1) |
 						V_TF_TLS_ACTIVE(1) |
 						V_TF_TLS_ENABLE(1))));
 		t4_set_tls_tcb_field(toep, W_TCB_TLS_SEQ,
 				 V_TCB_TLS_SEQ(M_TCB_TLS_SEQ),
 				 V_TCB_TLS_SEQ(0));
 		t4_clear_rx_quiesce(toep);
 	}
 
 	toep->tls.mode = TLS_MODE_KTLS;
 
 	return (0);
 }
 #endif
 
 void
 tls_init_toep(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	tls_ofld->mode = TLS_MODE_OFF;
 	tls_ofld->key_location = TLS_SFO_WR_CONTEXTLOC_DDR;
 	tls_ofld->rx_key_addr = -1;
 	tls_ofld->tx_key_addr = -1;
 	if (ulp_mode(toep) == ULP_MODE_TLS)
 		callout_init_mtx(&tls_ofld->handshake_timer,
 		    &tls_handshake_lock, 0);
 }
 
 void
 tls_establish(struct toepcb *toep)
 {
 
 	/*
 	 * Enable PDU extraction.
 	 *
 	 * XXX: Supposedly this should be done by the firmware when
 	 * the ULP_MODE FLOWC parameter is set in send_flowc_wr(), but
 	 * in practice this seems to be required.
 	 */
 	CTR2(KTR_CXGBE, "%s: tid %d setting TLS_ENABLE", __func__, toep->tid);
 	t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW, V_TCB_ULP_RAW(M_TCB_ULP_RAW),
 	    V_TCB_ULP_RAW(V_TF_TLS_ENABLE(1)));
 
 	toep->flags |= TPF_FORCE_CREDITS;
 
 	tls_start_handshake_timer(toep);
 }
 
 void
 tls_uninit_toep(struct toepcb *toep)
 {
 
 	if (ulp_mode(toep) == ULP_MODE_TLS)
 		tls_stop_handshake_timer(toep);
 	clear_tls_keyid(toep);
 }
 
 #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16)
 #define	MIN_OFLD_TLSTX_CREDITS(toep)					\
 	(howmany(sizeof(struct fw_tlstx_data_wr) +			\
 	    sizeof(struct cpl_tx_tls_sfo) + key_size((toep)) +		\
 	    CIPHER_BLOCK_SIZE + 1, 16))
 
 static inline u_int
 max_imm_tls_space(int tx_credits)
 {
 	const int n = 2;	/* Use only up to 2 desc for imm. data WR */
 	int space;
 
 	KASSERT(tx_credits >= 0 &&
 		tx_credits <= MAX_OFLD_TX_CREDITS,
 		("%s: %d credits", __func__, tx_credits));
 
 	if (tx_credits >= (n * EQ_ESIZE) / 16)
 		space = (n * EQ_ESIZE);
 	else
 		space = tx_credits * 16;
 	return (space);
 }
 
 static int
 count_mbuf_segs(struct mbuf *m, int skip, int len, int *max_nsegs_1mbufp)
 {
 	int max_nsegs_1mbuf, n, nsegs;
 
 	while (skip >= m->m_len) {
 		skip -= m->m_len;
 		m = m->m_next;
 	}
 
 	nsegs = 0;
 	max_nsegs_1mbuf = 0;
 	while (len > 0) {
 		n = sglist_count(mtod(m, char *) + skip, m->m_len - skip);
 		if (n > max_nsegs_1mbuf)
 			max_nsegs_1mbuf = n;
 		nsegs += n;
 		len -= m->m_len - skip;
 		skip = 0;
 		m = m->m_next;
 	}
 	*max_nsegs_1mbufp = max_nsegs_1mbuf;
 	return (nsegs);
 }
 
 static void
 write_tlstx_wr(struct fw_tlstx_data_wr *txwr, struct toepcb *toep,
     unsigned int immdlen, unsigned int plen, unsigned int expn,
     unsigned int pdus, uint8_t credits, int shove, int imm_ivs)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	unsigned int len = plen + expn;
 
 	txwr->op_to_immdlen = htobe32(V_WR_OP(FW_TLSTX_DATA_WR) |
 	    V_FW_TLSTX_DATA_WR_COMPL(1) |
 	    V_FW_TLSTX_DATA_WR_IMMDLEN(immdlen));
 	txwr->flowid_len16 = htobe32(V_FW_TLSTX_DATA_WR_FLOWID(toep->tid) |
 	    V_FW_TLSTX_DATA_WR_LEN16(credits));
 	txwr->plen = htobe32(len);
 	txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(ULP_MODE_TLS) |
 	    V_TX_URG(0) | /* F_T6_TX_FORCE | */ V_TX_SHOVE(shove));
 	txwr->ctxloc_to_exp = htobe32(V_FW_TLSTX_DATA_WR_NUMIVS(pdus) |
 	    V_FW_TLSTX_DATA_WR_EXP(expn) |
 	    V_FW_TLSTX_DATA_WR_CTXLOC(tls_ofld->key_location) |
 	    V_FW_TLSTX_DATA_WR_IVDSGL(!imm_ivs) |
 	    V_FW_TLSTX_DATA_WR_KEYSIZE(tls_ofld->k_ctx.tx_key_info_size >> 4));
 	txwr->mfs = htobe16(tls_ofld->k_ctx.frag_size);
 	txwr->adjustedplen_pkd = htobe16(
 	    V_FW_TLSTX_DATA_WR_ADJUSTEDPLEN(tls_ofld->adjusted_plen));
 	txwr->expinplenmax_pkd = htobe16(
 	    V_FW_TLSTX_DATA_WR_EXPINPLENMAX(tls_ofld->expn_per_ulp));
 	txwr->pdusinplenmax_pkd = 
 	    V_FW_TLSTX_DATA_WR_PDUSINPLENMAX(tls_ofld->pdus_per_ulp);
 }
 
 static void
 write_tlstx_cpl(struct cpl_tx_tls_sfo *cpl, struct toepcb *toep,
     struct tls_hdr *tls_hdr, unsigned int plen, unsigned int pdus)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	int data_type, seglen;
 
 	if (plen < tls_ofld->k_ctx.frag_size)
 		seglen = plen;
 	else
 		seglen = tls_ofld->k_ctx.frag_size;
 	data_type = tls_content_type(tls_hdr->type);
 	cpl->op_to_seg_len = htobe32(V_CPL_TX_TLS_SFO_OPCODE(CPL_TX_TLS_SFO) |
 	    V_CPL_TX_TLS_SFO_DATA_TYPE(data_type) |
 	    V_CPL_TX_TLS_SFO_CPL_LEN(2) | V_CPL_TX_TLS_SFO_SEG_LEN(seglen));
 	cpl->pld_len = htobe32(plen);
 	if (data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT)
 		cpl->type_protover = htobe32(
 		    V_CPL_TX_TLS_SFO_TYPE(tls_hdr->type));
 	cpl->seqno_numivs = htobe32(tls_ofld->scmd0.seqno_numivs |
 	    V_SCMD_NUM_IVS(pdus));
 	cpl->ivgen_hdrlen = htobe32(tls_ofld->scmd0.ivgen_hdrlen);
 	cpl->scmd1 = htobe64(tls_ofld->tx_seq_no);
 	tls_ofld->tx_seq_no += pdus;
 }
 
 /*
  * Similar to write_tx_sgl() except that it accepts an optional
  * trailer buffer for IVs.
  */
 static void
 write_tlstx_sgl(void *dst, struct mbuf *start, int skip, int plen,
     void *iv_buffer, int iv_len, int nsegs, int n)
 {
 	struct mbuf *m;
 	struct ulptx_sgl *usgl = dst;
 	int i, j, rc;
 	struct sglist sg;
 	struct sglist_seg segs[n];
 
 	KASSERT(nsegs > 0, ("%s: nsegs 0", __func__));
 
 	sglist_init(&sg, n, segs);
 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
 	    V_ULPTX_NSGE(nsegs));
 
 	for (m = start; skip >= m->m_len; m = m->m_next)
 		skip -= m->m_len;
 
 	i = -1;
 	for (m = start; plen > 0; m = m->m_next) {
 		rc = sglist_append(&sg, mtod(m, char *) + skip,
 		    m->m_len - skip);
 		if (__predict_false(rc != 0))
 			panic("%s: sglist_append %d", __func__, rc);
 		plen -= m->m_len - skip;
 		skip = 0;
 
 		for (j = 0; j < sg.sg_nseg; i++, j++) {
 			if (i < 0) {
 				usgl->len0 = htobe32(segs[j].ss_len);
 				usgl->addr0 = htobe64(segs[j].ss_paddr);
 			} else {
 				usgl->sge[i / 2].len[i & 1] =
 				    htobe32(segs[j].ss_len);
 				usgl->sge[i / 2].addr[i & 1] =
 				    htobe64(segs[j].ss_paddr);
 			}
 #ifdef INVARIANTS
 			nsegs--;
 #endif
 		}
 		sglist_reset(&sg);
 	}
 	if (iv_buffer != NULL) {
 		rc = sglist_append(&sg, iv_buffer, iv_len);
 		if (__predict_false(rc != 0))
 			panic("%s: sglist_append %d", __func__, rc);
 
 		for (j = 0; j < sg.sg_nseg; i++, j++) {
 			if (i < 0) {
 				usgl->len0 = htobe32(segs[j].ss_len);
 				usgl->addr0 = htobe64(segs[j].ss_paddr);
 			} else {
 				usgl->sge[i / 2].len[i & 1] =
 				    htobe32(segs[j].ss_len);
 				usgl->sge[i / 2].addr[i & 1] =
 				    htobe64(segs[j].ss_paddr);
 			}
 #ifdef INVARIANTS
 			nsegs--;
 #endif
 		}
 	}
 	if (i & 1)
 		usgl->sge[i / 2].len[1] = htobe32(0);
 	KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, iv_buffer %p",
 	    __func__, nsegs, start, iv_buffer));
 }
 
 /*
  * Similar to t4_push_frames() but handles TLS sockets when TLS offload
  * is enabled.  Rather than transmitting bulk data, the socket buffer
  * contains TLS records.  The work request requires a full TLS record,
  * so batch mbufs up until a full TLS record is seen.  This requires
  * reading the TLS header out of the start of each record to determine
  * its length.
  */
 void
 t4_push_tls_records(struct adapter *sc, struct toepcb *toep, int drop)
 {
 	struct tls_hdr thdr;
 	struct mbuf *sndptr;
 	struct fw_tlstx_data_wr *txwr;
 	struct cpl_tx_tls_sfo *cpl;
 	struct wrqe *wr;
 	u_int plen, nsegs, credits, space, max_nsegs_1mbuf, wr_len;
 	u_int expn_size, iv_len, pdus, sndptroff;
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *sb = &so->so_snd;
 	int tls_size, tx_credits, shove, /* compl,*/ sowwakeup;
 	struct ofld_tx_sdesc *txsd;
 	bool imm_ivs, imm_payload;
 	void *iv_buffer, *iv_dst, *buf;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
 
 	KASSERT(ulp_mode(toep) == ULP_MODE_NONE ||
 	    ulp_mode(toep) == ULP_MODE_TCPDDP || ulp_mode(toep) == ULP_MODE_TLS,
 	    ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep));
 	KASSERT(tls_tx_key(toep),
 	    ("%s: TX key not set for toep %p", __func__, toep));
 
 #ifdef VERBOSE_TRACES
 	CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d",
 	    __func__, toep->tid, toep->flags, tp->t_flags);
 #endif
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
 		return;
 
 #ifdef RATELIMIT
 	if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) &&
 	    (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) {
 		inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
 	}
 #endif
 
 	/*
 	 * This function doesn't resume by itself.  Someone else must clear the
 	 * flag and call this function.
 	 */
 	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
 		KASSERT(drop == 0,
 		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
 		return;
 	}
 
 	txsd = &toep->txsd[toep->txsd_pidx];
 	for (;;) {
 		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
 		space = max_imm_tls_space(tx_credits);
 		wr_len = sizeof(struct fw_tlstx_data_wr) +
 		    sizeof(struct cpl_tx_tls_sfo) + key_size(toep);
 		if (wr_len + CIPHER_BLOCK_SIZE + 1 > space) {
 #ifdef VERBOSE_TRACES
 			CTR5(KTR_CXGBE,
 			    "%s: tid %d tx_credits %d min_wr %d space %d",
 			    __func__, toep->tid, tx_credits, wr_len +
 			    CIPHER_BLOCK_SIZE + 1, space);
 #endif
 			return;
 		}
 
 		SOCKBUF_LOCK(sb);
 		sowwakeup = drop;
 		if (drop) {
 			sbdrop_locked(sb, drop);
 			MPASS(tls_ofld->sb_off >= drop);
 			tls_ofld->sb_off -= drop;
 			drop = 0;
 		}
 
 		/*
 		 * Send a FIN if requested, but only if there's no
 		 * more data to send.
 		 */
 		if (sbavail(sb) == tls_ofld->sb_off &&
 		    toep->flags & TPF_SEND_FIN) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
 				SOCKBUF_UNLOCK(sb);
 			SOCKBUF_UNLOCK_ASSERT(sb);
 			t4_close_conn(sc, toep);
 			return;
 		}
 
 		if (sbavail(sb) < tls_ofld->sb_off + TLS_HEADER_LENGTH) {
 			/*
 			 * A full TLS header is not yet queued, stop
 			 * for now until more data is added to the
 			 * socket buffer.  However, if the connection
 			 * has been closed, we will never get the rest
 			 * of the header so just discard the partial
 			 * header and close the connection.
 			 */
 #ifdef VERBOSE_TRACES
 			CTR5(KTR_CXGBE, "%s: tid %d sbavail %d sb_off %d%s",
 			    __func__, toep->tid, sbavail(sb), tls_ofld->sb_off,
 			    toep->flags & TPF_SEND_FIN ? "" : " SEND_FIN");
 #endif
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
 				SOCKBUF_UNLOCK(sb);
 			SOCKBUF_UNLOCK_ASSERT(sb);
 			if (toep->flags & TPF_SEND_FIN)
 				t4_close_conn(sc, toep);
 			return;
 		}
 
 		/* Read the header of the next TLS record. */
 		sndptr = sbsndmbuf(sb, tls_ofld->sb_off, &sndptroff);
 		m_copydata(sndptr, sndptroff, sizeof(thdr), (caddr_t)&thdr);
 		tls_size = htons(thdr.length);
 		plen = TLS_HEADER_LENGTH + tls_size;
 		pdus = howmany(tls_size, tls_ofld->k_ctx.frag_size);
 		iv_len = pdus * CIPHER_BLOCK_SIZE;
 
 		if (sbavail(sb) < tls_ofld->sb_off + plen) {
 			/*
 			 * The full TLS record is not yet queued, stop
 			 * for now until more data is added to the
 			 * socket buffer.  However, if the connection
 			 * has been closed, we will never get the rest
 			 * of the record so just discard the partial
 			 * record and close the connection.
 			 */
 #ifdef VERBOSE_TRACES
 			CTR6(KTR_CXGBE,
 			    "%s: tid %d sbavail %d sb_off %d plen %d%s",
 			    __func__, toep->tid, sbavail(sb), tls_ofld->sb_off,
 			    plen, toep->flags & TPF_SEND_FIN ? "" :
 			    " SEND_FIN");
 #endif
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
 				SOCKBUF_UNLOCK(sb);
 			SOCKBUF_UNLOCK_ASSERT(sb);
 			if (toep->flags & TPF_SEND_FIN)
 				t4_close_conn(sc, toep);
 			return;
 		}
 
 		/* Shove if there is no additional data pending. */
 		shove = (sbavail(sb) == tls_ofld->sb_off + plen) &&
 		    !(tp->t_flags & TF_MORETOCOME);
 
 		if (sb->sb_flags & SB_AUTOSIZE &&
 		    V_tcp_do_autosndbuf &&
 		    sb->sb_hiwat < V_tcp_autosndbuf_max &&
 		    sbused(sb) >= sb->sb_hiwat * 7 / 8) {
 			int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
 			    V_tcp_autosndbuf_max);
 
 			if (!sbreserve_locked(sb, newsize, so, NULL))
 				sb->sb_flags &= ~SB_AUTOSIZE;
 			else
 				sowwakeup = 1;	/* room available */
 		}
 		if (sowwakeup)
 			sowwakeup_locked(so);
 		else
 			SOCKBUF_UNLOCK(sb);
 		SOCKBUF_UNLOCK_ASSERT(sb);
 
 		if (__predict_false(toep->flags & TPF_FIN_SENT))
 			panic("%s: excess tx.", __func__);
 
 		/* Determine whether to use immediate vs SGL. */
 		imm_payload = false;
 		imm_ivs = false;
 		if (wr_len + iv_len <= space) {
 			imm_ivs = true;
 			wr_len += iv_len;
 			if (wr_len + tls_size <= space) {
 				wr_len += tls_size;
 				imm_payload = true;
 			}
 		}
 
 		/* Allocate space for IVs if needed. */
 		if (!imm_ivs) {
 			iv_buffer = malloc(iv_len, M_CXGBE, M_NOWAIT);
 			if (iv_buffer == NULL) {
 				/*
 				 * XXX: How to restart this?
 				 */
 				if (sowwakeup)
 					sowwakeup_locked(so);
 				else
 					SOCKBUF_UNLOCK(sb);
 				SOCKBUF_UNLOCK_ASSERT(sb);
 				CTR3(KTR_CXGBE,
 			    "%s: tid %d failed to alloc IV space len %d",
 				    __func__, toep->tid, iv_len);
 				return;
 			}
 		} else
 			iv_buffer = NULL;
 
 		/* Determine size of SGL. */
 		nsegs = 0;
 		max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
 		if (!imm_payload) {
 			nsegs = count_mbuf_segs(sndptr, sndptroff +
 			    TLS_HEADER_LENGTH, tls_size, &max_nsegs_1mbuf);
 			if (!imm_ivs) {
 				int n = sglist_count(iv_buffer, iv_len);
 				nsegs += n;
 				if (n > max_nsegs_1mbuf)
 					max_nsegs_1mbuf = n;
 			}
 
 			/* Account for SGL in work request length. */
 			wr_len += sizeof(struct ulptx_sgl) +
 			    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
 		}
 
 		wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq);
 		if (wr == NULL) {
 			/* XXX: how will we recover from this? */
 			toep->flags |= TPF_TX_SUSPENDED;
 			return;
 		}
 
 #ifdef VERBOSE_TRACES
 		CTR5(KTR_CXGBE, "%s: tid %d TLS record %d len %#x pdus %d",
 		    __func__, toep->tid, thdr.type, tls_size, pdus);
 #endif
 		txwr = wrtod(wr);
 		cpl = (struct cpl_tx_tls_sfo *)(txwr + 1);
 		memset(txwr, 0, roundup2(wr_len, 16));
 		credits = howmany(wr_len, 16);
 		expn_size = tls_expansion_size(toep, tls_size, 0, NULL);
 		write_tlstx_wr(txwr, toep, imm_payload ? tls_size : 0,
 		    tls_size, expn_size, pdus, credits, shove, imm_ivs ? 1 : 0);
 		write_tlstx_cpl(cpl, toep, &thdr, tls_size, pdus);
 		tls_copy_tx_key(toep, cpl + 1);
 
 		/* Generate random IVs */
 		buf = (char *)(cpl + 1) + key_size(toep);
 		if (imm_ivs) {
 			MPASS(iv_buffer == NULL);
 			iv_dst = buf;
 			buf = (char *)iv_dst + iv_len;
 		} else
 			iv_dst = iv_buffer;
 		arc4rand(iv_dst, iv_len, 0);
 
 		if (imm_payload) {
 			m_copydata(sndptr, sndptroff + TLS_HEADER_LENGTH,
 			    tls_size, buf);
 		} else {
 			write_tlstx_sgl(buf, sndptr,
 			    sndptroff + TLS_HEADER_LENGTH, tls_size, iv_buffer,
 			    iv_len, nsegs, max_nsegs_1mbuf);
 		}
 
 		KASSERT(toep->tx_credits >= credits,
 			("%s: not enough credits", __func__));
 
 		toep->tx_credits -= credits;
 
 		tp->snd_nxt += plen;
 		tp->snd_max += plen;
 
 		SOCKBUF_LOCK(sb);
 		sbsndptr_adv(sb, sb->sb_sndptr, plen);
 		tls_ofld->sb_off += plen;
 		SOCKBUF_UNLOCK(sb);
 
 		toep->flags |= TPF_TX_DATA_SENT;
 		if (toep->tx_credits < MIN_OFLD_TLSTX_CREDITS(toep))
 			toep->flags |= TPF_TX_SUSPENDED;
 
 		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
 		txsd->plen = plen;
 		txsd->tx_credits = credits;
 		txsd->iv_buffer = iv_buffer;
 		txsd++;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
 			toep->txsd_pidx = 0;
 			txsd = &toep->txsd[0];
 		}
 		toep->txsd_avail--;
 
 		atomic_add_long(&toep->vi->pi->tx_toe_tls_records, 1);
 		atomic_add_long(&toep->vi->pi->tx_toe_tls_octets, plen);
 
 		t4_l2t_send(sc, wr, toep->l2te);
 	}
 }
 
 #ifdef KERN_TLS
 static int
 count_ext_pgs_segs(struct mbuf *m)
 {
 	vm_paddr_t nextpa;
 	u_int i, nsegs;
 
 	MPASS(m->m_epg_npgs > 0);
 	nsegs = 1;
 	nextpa = m->m_epg_pa[0] + PAGE_SIZE;
 	for (i = 1; i < m->m_epg_npgs; i++) {
 		if (nextpa != m->m_epg_pa[i])
 			nsegs++;
 		nextpa = m->m_epg_pa[i] + PAGE_SIZE;
 	}
 	return (nsegs);
 }
 
 static void
 write_ktlstx_sgl(void *dst, struct mbuf *m, int nsegs)
 {
 	struct ulptx_sgl *usgl = dst;
 	vm_paddr_t pa;
 	uint32_t len;
 	int i, j;
 
 	KASSERT(nsegs > 0, ("%s: nsegs 0", __func__));
 
 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
 	    V_ULPTX_NSGE(nsegs));
 
 	/* Figure out the first S/G length. */
 	pa = m->m_epg_pa[0] + m->m_epg_1st_off;
 	usgl->addr0 = htobe64(pa);
 	len = m_epg_pagelen(m, 0, m->m_epg_1st_off);
 	pa += len;
 	for (i = 1; i < m->m_epg_npgs; i++) {
 		if (m->m_epg_pa[i] != pa)
 			break;
 		len += m_epg_pagelen(m, i, 0);
 		pa += m_epg_pagelen(m, i, 0);
 	}
 	usgl->len0 = htobe32(len);
 #ifdef INVARIANTS
 	nsegs--;
 #endif
 
 	j = -1;
 	for (; i < m->m_epg_npgs; i++) {
 		if (j == -1 || m->m_epg_pa[i] != pa) {
 			if (j >= 0)
 				usgl->sge[j / 2].len[j & 1] = htobe32(len);
 			j++;
 #ifdef INVARIANTS
 			nsegs--;
 #endif
 			pa = m->m_epg_pa[i];
 			usgl->sge[j / 2].addr[j & 1] = htobe64(pa);
 			len = m_epg_pagelen(m, i, 0);
 			pa += len;
 		} else {
 			len += m_epg_pagelen(m, i, 0);
 			pa += m_epg_pagelen(m, i, 0);
 		}
 	}
 	if (j >= 0) {
 		usgl->sge[j / 2].len[j & 1] = htobe32(len);
 
 		if ((j & 1) == 0)
 			usgl->sge[j / 2].len[1] = htobe32(0);
 	}
 	KASSERT(nsegs == 0, ("%s: nsegs %d, m %p", __func__, nsegs, m));
 }
 
 /*
  * Similar to t4_push_frames() but handles sockets that contain TLS
  * record mbufs.  Unlike TLSOM, each mbuf is a complete TLS record and
  * corresponds to a single work request.
  */
 void
 t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
 {
 	struct tls_hdr *thdr;
 	struct fw_tlstx_data_wr *txwr;
 	struct cpl_tx_tls_sfo *cpl;
 	struct wrqe *wr;
 	struct mbuf *m;
 	u_int nsegs, credits, wr_len;
 	u_int expn_size;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *sb = &so->so_snd;
 	int tls_size, tx_credits, shove, sowwakeup;
 	struct ofld_tx_sdesc *txsd;
 	char *buf;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
 
 	KASSERT(ulp_mode(toep) == ULP_MODE_NONE ||
 	    ulp_mode(toep) == ULP_MODE_TCPDDP || ulp_mode(toep) == ULP_MODE_TLS,
 	    ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep));
 	KASSERT(tls_tx_key(toep),
 	    ("%s: TX key not set for toep %p", __func__, toep));
 
 #ifdef VERBOSE_TRACES
 	CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d",
 	    __func__, toep->tid, toep->flags, tp->t_flags);
 #endif
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
 		return;
 
 #ifdef RATELIMIT
 	if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) &&
 	    (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) {
 		inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
 	}
 #endif
 
 	/*
 	 * This function doesn't resume by itself.  Someone else must clear the
 	 * flag and call this function.
 	 */
 	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
 		KASSERT(drop == 0,
 		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
 		return;
 	}
 
 	txsd = &toep->txsd[toep->txsd_pidx];
 	for (;;) {
 		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
 
 		SOCKBUF_LOCK(sb);
 		sowwakeup = drop;
 		if (drop) {
 			sbdrop_locked(sb, drop);
 			drop = 0;
 		}
 
 		m = sb->sb_sndptr != NULL ? sb->sb_sndptr->m_next : sb->sb_mb;
 
 		/*
 		 * Send a FIN if requested, but only if there's no
 		 * more data to send.
 		 */
 		if (m == NULL && toep->flags & TPF_SEND_FIN) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
 				SOCKBUF_UNLOCK(sb);
 			SOCKBUF_UNLOCK_ASSERT(sb);
 			t4_close_conn(sc, toep);
 			return;
 		}
 
 		/*
 		 * If there is no ready data to send, wait until more
 		 * data arrives.
 		 */
 		if (m == NULL || (m->m_flags & M_NOTAVAIL) != 0) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
 				SOCKBUF_UNLOCK(sb);
 			SOCKBUF_UNLOCK_ASSERT(sb);
 #ifdef VERBOSE_TRACES
 			CTR2(KTR_CXGBE, "%s: tid %d no ready data to send",
 			    __func__, toep->tid);
 #endif
 			return;
 		}
 
 		KASSERT(m->m_flags & M_EXTPG, ("%s: mbuf %p is not NOMAP",
 		    __func__, m));
 		KASSERT(m->m_epg_tls != NULL,
 		    ("%s: mbuf %p doesn't have TLS session", __func__, m));
 
 		/* Calculate WR length. */
 		wr_len = sizeof(struct fw_tlstx_data_wr) +
 		    sizeof(struct cpl_tx_tls_sfo) + key_size(toep);
 
 		/* Explicit IVs for AES-CBC and AES-GCM are <= 16. */
 		MPASS(toep->tls.iv_len <= AES_BLOCK_LEN);
 		wr_len += AES_BLOCK_LEN;
 
 		/* Account for SGL in work request length. */
 		nsegs = count_ext_pgs_segs(m);
 		wr_len += sizeof(struct ulptx_sgl) +
 		    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
 
 		/* Not enough credits for this work request. */
 		if (howmany(wr_len, 16) > tx_credits) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
 				SOCKBUF_UNLOCK(sb);
 			SOCKBUF_UNLOCK_ASSERT(sb);
 #ifdef VERBOSE_TRACES
 			CTR5(KTR_CXGBE,
 	    "%s: tid %d mbuf %p requires %d credits, but only %d available",
 			    __func__, toep->tid, m, howmany(wr_len, 16),
 			    tx_credits);
 #endif
 			toep->flags |= TPF_TX_SUSPENDED;
 			return;
 		}
 	
 		/* Shove if there is no additional data pending. */
 		shove = ((m->m_next == NULL ||
 		    (m->m_next->m_flags & M_NOTAVAIL) != 0)) &&
 		    (tp->t_flags & TF_MORETOCOME) == 0;
 
 		if (sb->sb_flags & SB_AUTOSIZE &&
 		    V_tcp_do_autosndbuf &&
 		    sb->sb_hiwat < V_tcp_autosndbuf_max &&
 		    sbused(sb) >= sb->sb_hiwat * 7 / 8) {
 			int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
 			    V_tcp_autosndbuf_max);
 
 			if (!sbreserve_locked(sb, newsize, so, NULL))
 				sb->sb_flags &= ~SB_AUTOSIZE;
 			else
 				sowwakeup = 1;	/* room available */
 		}
 		if (sowwakeup)
 			sowwakeup_locked(so);
 		else
 			SOCKBUF_UNLOCK(sb);
 		SOCKBUF_UNLOCK_ASSERT(sb);
 
 		if (__predict_false(toep->flags & TPF_FIN_SENT))
 			panic("%s: excess tx.", __func__);
 
 		wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq);
 		if (wr == NULL) {
 			/* XXX: how will we recover from this? */
 			toep->flags |= TPF_TX_SUSPENDED;
 			return;
 		}
 
 		thdr = (struct tls_hdr *)&m->m_epg_hdr;
 #ifdef VERBOSE_TRACES
 		CTR5(KTR_CXGBE, "%s: tid %d TLS record %ju type %d len %#x",
 		    __func__, toep->tid, m->m_epg_seqno, thdr->type,
 		    m->m_len);
 #endif
 		txwr = wrtod(wr);
 		cpl = (struct cpl_tx_tls_sfo *)(txwr + 1);
 		memset(txwr, 0, roundup2(wr_len, 16));
 		credits = howmany(wr_len, 16);
 		expn_size = m->m_epg_hdrlen +
 		    m->m_epg_trllen;
 		tls_size = m->m_len - expn_size;
 		write_tlstx_wr(txwr, toep, 0,
 		    tls_size, expn_size, 1, credits, shove, 1);
 		toep->tls.tx_seq_no = m->m_epg_seqno;
 		write_tlstx_cpl(cpl, toep, thdr, tls_size, 1);
 		tls_copy_tx_key(toep, cpl + 1);
 
 		/* Copy IV. */
 		buf = (char *)(cpl + 1) + key_size(toep);
 		memcpy(buf, thdr + 1, toep->tls.iv_len);
 		buf += AES_BLOCK_LEN;
 
 		write_ktlstx_sgl(buf, m, nsegs);
 
 		KASSERT(toep->tx_credits >= credits,
 			("%s: not enough credits", __func__));
 
 		toep->tx_credits -= credits;
 
 		tp->snd_nxt += m->m_len;
 		tp->snd_max += m->m_len;
 
 		SOCKBUF_LOCK(sb);
 		sb->sb_sndptr = m;
 		SOCKBUF_UNLOCK(sb);
 
 		toep->flags |= TPF_TX_DATA_SENT;
 		if (toep->tx_credits < MIN_OFLD_TLSTX_CREDITS(toep))
 			toep->flags |= TPF_TX_SUSPENDED;
 
 		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
 		txsd->plen = m->m_len;
 		txsd->tx_credits = credits;
 		txsd++;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
 			toep->txsd_pidx = 0;
 			txsd = &toep->txsd[0];
 		}
 		toep->txsd_avail--;
 
 		atomic_add_long(&toep->vi->pi->tx_toe_tls_records, 1);
 		atomic_add_long(&toep->vi->pi->tx_toe_tls_octets, m->m_len);
 
 		t4_l2t_send(sc, wr, toep->l2te);
 	}
 }
 #endif
 
 /*
  * For TLS data we place received mbufs received via CPL_TLS_DATA into
  * an mbufq in the TLS offload state.  When CPL_RX_TLS_CMP is
  * received, the completed PDUs are placed into the socket receive
  * buffer.
  *
  * The TLS code reuses the ulp_pdu_reclaimq to hold the pending mbufs.
  */
 static int
 do_tls_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_tls_data *cpl = mtod(m, const void *);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp;
 	int len;
 
 	/* XXX: Should this match do_rx_data instead? */
 	KASSERT(!(toep->flags & TPF_SYNQE),
 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
 
 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
 
 	/* strip off CPL header */
 	m_adj(m, sizeof(*cpl));
 	len = m->m_pkthdr.len;
 
 	atomic_add_long(&toep->vi->pi->rx_toe_tls_octets, len);
 
 	KASSERT(len == G_CPL_TLS_DATA_LENGTH(be32toh(cpl->length_pkd)),
 	    ("%s: payload length mismatch", __func__));
 
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
 		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
 		    __func__, tid, len, inp->inp_flags);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 	}
 
 	/* Save TCP sequence number. */
 	m->m_pkthdr.tls_tcp_seq = be32toh(cpl->seq);
 
 	if (mbufq_enqueue(&toep->ulp_pdu_reclaimq, m)) {
 #ifdef INVARIANTS
 		panic("Failed to queue TLS data packet");
 #else
 		printf("%s: Failed to queue TLS data packet\n", __func__);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 #endif
 	}
 
 	tp = intotcpcb(inp);
 	tp->t_rcvtime = ticks;
 
 #ifdef VERBOSE_TRACES
 	CTR4(KTR_CXGBE, "%s: tid %u len %d seq %u", __func__, tid, len,
 	    be32toh(cpl->seq));
 #endif
 
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_rx_tls_cmp *cpl = mtod(m, const void *);
 	struct tlsrx_hdr_pkt *tls_hdr_pkt;
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	struct sockbuf *sb;
 	struct mbuf *tls_data;
 #ifdef KERN_TLS
 	struct tls_get_record *tgr;
 	struct mbuf *control;
 #endif
 	int len, pdu_length, rx_credits;
 
 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
 	KASSERT(!(toep->flags & TPF_SYNQE),
 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
 
 	/* strip off CPL header */
 	m_adj(m, sizeof(*cpl));
 	len = m->m_pkthdr.len;
 
 	atomic_add_long(&toep->vi->pi->rx_toe_tls_records, 1);
 
 	KASSERT(len == G_CPL_RX_TLS_CMP_LENGTH(be32toh(cpl->pdulength_length)),
 	    ("%s: payload length mismatch", __func__));
 
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
 		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
 		    __func__, tid, len, inp->inp_flags);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 	}
 
 	pdu_length = G_CPL_RX_TLS_CMP_PDULENGTH(be32toh(cpl->pdulength_length));
 
 	so = inp_inpcbtosocket(inp);
 	tp = intotcpcb(inp);
 
 #ifdef VERBOSE_TRACES
 	CTR6(KTR_CXGBE, "%s: tid %u PDU len %d len %d seq %u, rcv_nxt %u",
 	    __func__, tid, pdu_length, len, be32toh(cpl->seq), tp->rcv_nxt);
 #endif
 
 	tp->rcv_nxt += pdu_length;
-	if (tp->rcv_wnd < pdu_length) {
-		toep->tls.rcv_over += pdu_length - tp->rcv_wnd;
-		tp->rcv_wnd = 0;
-	} else
-		tp->rcv_wnd -= pdu_length;
+	KASSERT(tp->rcv_wnd >= pdu_length,
+	    ("%s: negative window size", __func__));
+	tp->rcv_wnd -= pdu_length;
 
 	/* XXX: Not sure what to do about urgent data. */
 
 	/*
 	 * The payload of this CPL is the TLS header followed by
 	 * additional fields.
 	 */
 	KASSERT(m->m_len >= sizeof(*tls_hdr_pkt),
 	    ("%s: payload too small", __func__));
 	tls_hdr_pkt = mtod(m, void *);
 
 	tls_data = mbufq_dequeue(&toep->ulp_pdu_reclaimq);
 	if (tls_data != NULL) {
 		KASSERT(be32toh(cpl->seq) == tls_data->m_pkthdr.tls_tcp_seq,
 		    ("%s: sequence mismatch", __func__));
 	}
 
 #ifdef KERN_TLS
 	if (toep->tls.mode == TLS_MODE_KTLS) {
 		/* Report decryption errors as EBADMSG. */
 		if ((tls_hdr_pkt->res_to_mac_error & M_TLSRX_HDR_PKT_ERROR) !=
 		    0) {
 			m_freem(m);
 			m_freem(tls_data);
 
 			CURVNET_SET(toep->vnet);
 			so->so_error = EBADMSG;
 			sorwakeup(so);
 
 			INP_WUNLOCK(inp);
 			CURVNET_RESTORE();
 
 			return (0);
 		}
 
 		/* Allocate the control message mbuf. */
 		control = sbcreatecontrol(NULL, sizeof(*tgr), TLS_GET_RECORD,
 		    IPPROTO_TCP);
 		if (control == NULL) {
 			m_freem(m);
 			m_freem(tls_data);
 
 			CURVNET_SET(toep->vnet);
 			so->so_error = ENOBUFS;
 			sorwakeup(so);
 
 			INP_WUNLOCK(inp);
 			CURVNET_RESTORE();
 
 			return (0);
 		}
 
 		tgr = (struct tls_get_record *)
 		    CMSG_DATA(mtod(control, struct cmsghdr *));
 		tgr->tls_type = tls_hdr_pkt->type;
 		tgr->tls_vmajor = be16toh(tls_hdr_pkt->version) >> 8;
 		tgr->tls_vminor = be16toh(tls_hdr_pkt->version) & 0xff;
 
 		m_freem(m);
 
 		if (tls_data != NULL) {
 			m_last(tls_data)->m_flags |= M_EOR;
 			tgr->tls_length = htobe16(tls_data->m_pkthdr.len);
 		} else
 			tgr->tls_length = 0;
 		m = tls_data;
 	} else
 #endif
 	{
 		/*
 		 * Only the TLS header is sent to OpenSSL, so report
 		 * errors by altering the record type.
 		 */
 		if ((tls_hdr_pkt->res_to_mac_error & M_TLSRX_HDR_PKT_ERROR) !=
 		    0)
 			tls_hdr_pkt->type = CONTENT_TYPE_ERROR;
 
 		/* Trim this CPL's mbuf to only include the TLS header. */
 		KASSERT(m->m_len == len && m->m_next == NULL,
 		    ("%s: CPL spans multiple mbufs", __func__));
 		m->m_len = TLS_HEADER_LENGTH;
 		m->m_pkthdr.len = TLS_HEADER_LENGTH;
 
 		if (tls_data != NULL) {
 			/*
 			 * Update the TLS header length to be the length of
 			 * the payload data.
 			 */
 			tls_hdr_pkt->length = htobe16(tls_data->m_pkthdr.len);
 
 			m->m_next = tls_data;
 			m->m_pkthdr.len += tls_data->m_len;
 		}
 
 #ifdef KERN_TLS
 		control = NULL;
 #endif
 	}
 
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 
 	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
 		struct epoch_tracker et;
 
 		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
 		    __func__, tid, pdu_length);
 		m_freem(m);
 #ifdef KERN_TLS
 		m_freem(control);
 #endif
 		SOCKBUF_UNLOCK(sb);
 		INP_WUNLOCK(inp);
 
 		CURVNET_SET(toep->vnet);
 		NET_EPOCH_ENTER(et);
 		INP_WLOCK(inp);
 		tp = tcp_drop(tp, ECONNRESET);
 		if (tp)
 			INP_WUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
 		CURVNET_RESTORE();
 
 		return (0);
 	}
 
 	/*
 	 * Not all of the bytes on the wire are included in the socket buffer
 	 * (e.g. the MAC of the TLS record).  However, those bytes are included
 	 * in the TCP sequence space.
 	 */
 
 	/* receive buffer autosize */
 	MPASS(toep->vnet == so->so_vnet);
 	CURVNET_SET(toep->vnet);
 	if (sb->sb_flags & SB_AUTOSIZE &&
 	    V_tcp_do_autorcvbuf &&
 	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
 	    m->m_pkthdr.len > (sbspace(sb) / 8 * 7)) {
 		unsigned int hiwat = sb->sb_hiwat;
 		unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
 		    V_tcp_autorcvbuf_max);
 
 		if (!sbreserve_locked(sb, newsize, so, NULL))
 			sb->sb_flags &= ~SB_AUTOSIZE;
 	}
 
 #ifdef KERN_TLS
 	if (control != NULL)
 		sbappendcontrol_locked(sb, m, control, 0);
 	else
 #endif
 		sbappendstream_locked(sb, m, 0);
 	rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
 #ifdef VERBOSE_TRACES
 	CTR4(KTR_CXGBE, "%s: tid %u rx_credits %u rcv_wnd %u",
 	    __func__, tid, rx_credits, tp->rcv_wnd);
 #endif
 	if (rx_credits > 0 && sbused(sb) + tp->rcv_wnd < sb->sb_lowat) {
 		rx_credits = send_rx_credits(sc, toep, rx_credits);
 		tp->rcv_wnd += rx_credits;
 		tp->rcv_adv += rx_credits;
 	}
 
 	sorwakeup_locked(so);
 	SOCKBUF_UNLOCK_ASSERT(sb);
 
 	INP_WUNLOCK(inp);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 void
 t4_tls_mod_load(void)
 {
 
 	mtx_init(&tls_handshake_lock, "t4tls handshake", NULL, MTX_DEF);
 	t4_register_cpl_handler(CPL_TLS_DATA, do_tls_data);
 	t4_register_cpl_handler(CPL_RX_TLS_CMP, do_rx_tls_cmp);
 }
 
 void
 t4_tls_mod_unload(void)
 {
 
 	t4_register_cpl_handler(CPL_TLS_DATA, NULL);
 	t4_register_cpl_handler(CPL_RX_TLS_CMP, NULL);
 	mtx_destroy(&tls_handshake_lock);
 }
 #endif	/* TCP_OFFLOAD */
Index: head/sys/dev/cxgbe/tom/t4_tls.h
===================================================================
--- head/sys/dev/cxgbe/tom/t4_tls.h	(revision 366853)
+++ head/sys/dev/cxgbe/tom/t4_tls.h	(revision 366854)
@@ -1,591 +1,590 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2017-2018 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: John Baldwin <jhb@FreeBSD.org>, Atul Gupta
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 #ifndef __T4_TLS_H__
 #define __T4_TLS_H__
 
 #define TLS1_VERSION                    0x0301
 #define TLS1_1_VERSION                  0x0302
 #define TLS1_2_VERSION                  0x0303
 #define TLS_MAX_VERSION                 TLS1_2_VERSION
 
 #define DTLS1_VERSION                   0xFEFF
 #define DTLS1_2_VERSION                 0xFEFD
 #define DTLS_MAX_VERSION                DTLS1_2_VERSION
 #define DTLS1_VERSION_MAJOR             0xFE
 
 /* Custom socket options for TLS+TOE. */
 
 #define MAX_MAC_KSZ		64	/*512 bits */
 #define MAX_CIPHER_KSZ		32	/* 256 bits */
 #define CIPHER_BLOCK_SZ		16
 #define SALT_SIZE		4
 
 /* Can accomodate 16, 11-15 are reserved */
 enum {
     CHSSL_SHA_NOP,
     CHSSL_SHA1,
     CHSSL_SHA224,
     CHSSL_SHA256,
     CHSSL_GHASH,
     CHSSL_SHA512_224,
     CHSSL_SHA512_256,
     CHSSL_SHA512_384,
     CHSSL_SHA512_512,
     CHSSL_CBCMAC,
     CHSSL_CMAC,
 };
 
 /* Can accomodate 16, 8-15 are reserved */
 enum {
     CHSSL_CIPH_NOP,
     CHSSL_AES_CBC,
     CHSSL_AES_GCM,
     CHSSL_AES_CTR,
     CHSSL_AES_GEN,
     CHSSL_IPSEC_ESP,
     CHSSL_AES_XTS,
     CHSSL_AES_CCM,
 };
 
 /* Key Context Programming Operation type */
 #define KEY_WRITE_RX			0x1
 #define KEY_WRITE_TX			0x2
 #define KEY_DELETE_RX			0x4
 #define KEY_DELETE_TX			0x8
 
 #define S_KEY_CLR_LOC		4
 #define M_KEY_CLR_LOC		0xf
 #define V_KEY_CLR_LOC(x)	((x) << S_KEY_CLR_LOC)
 #define G_KEY_CLR_LOC(x)	(((x) >> S_KEY_CLR_LOC) & M_KEY_CLR_LOC)
 #define F_KEY_CLR_LOC		V_KEY_CLR_LOC(1U)
 
 #define S_KEY_GET_LOC           0
 #define M_KEY_GET_LOC           0xf
 #define V_KEY_GET_LOC(x)        ((x) << S_KEY_GET_LOC)
 #define G_KEY_GET_LOC(x)        (((x) >> S_KEY_GET_LOC) & M_KEY_GET_LOC)
 
 struct tls_ofld_state {
     unsigned char enc_mode;
     unsigned char mac_mode;
     unsigned char key_loc;
     unsigned char ofld_mode;
     unsigned char auth_mode;
     unsigned char resv[3];
 };
 
 struct tls_tx_ctxt {
     unsigned char   salt[SALT_SIZE];
     unsigned char key[MAX_CIPHER_KSZ];
     unsigned char ipad[MAX_MAC_KSZ];
     unsigned char opad[MAX_MAC_KSZ];
 };
 
 struct tls_rx_ctxt {
     unsigned char   salt[SALT_SIZE];
     unsigned char key[MAX_CIPHER_KSZ];
     unsigned char ipad[MAX_MAC_KSZ];
     unsigned char opad[MAX_MAC_KSZ];
 };
 
 struct tls_key_context {
     struct tls_tx_ctxt tx;
     struct tls_rx_ctxt rx;
 
     unsigned char l_p_key;
     unsigned char hmac_ctrl;
     unsigned char mac_first;
     unsigned char iv_size;
     unsigned char iv_ctrl;
     unsigned char iv_algo;
     unsigned char tx_seq_no;
     unsigned char rx_seq_no;
 
     struct tls_ofld_state state;
 
     unsigned int tx_key_info_size;
     unsigned int rx_key_info_size;
     unsigned int frag_size;
     unsigned int mac_secret_size;
     unsigned int cipher_secret_size;
     int proto_ver;
     unsigned int sock_fd;
     unsigned short dtls_epoch;
     unsigned short rsv;
 };
 
 /* Set with 'struct tls_key_context'. */
 #define	TCP_TLSOM_SET_TLS_CONTEXT	(TCP_VENDOR)
 
 /* Get returns int of enabled (1) / disabled (0). */
 #define	TCP_TLSOM_GET_TLS_TOM		(TCP_VENDOR + 1)
 
 enum {
 	TLS_TOM_NONE = 0,
 	TLS_TOM_TXONLY,
 	TLS_TOM_BOTH
 };
 
 /* Set with no value. */
 #define	TCP_TLSOM_CLR_TLS_TOM		(TCP_VENDOR + 2)
 
 /* Set with no value. */
 #define	TCP_TLSOM_CLR_QUIES		(TCP_VENDOR + 3)
 
 #ifdef _KERNEL
 /* Timeouts for handshake timer in seconds. */
 #define TLS_SRV_HELLO_DONE		9
 #define TLS_SRV_HELLO_RD_TM		5
 #define TLS_SRV_HELLO_BKOFF_TM		15
 
 #define CONTENT_TYPE_CCS		20
 #define CONTENT_TYPE_ALERT		21
 #define CONTENT_TYPE_HANDSHAKE		22
 #define CONTENT_TYPE_APP_DATA		23
 #define CONTENT_TYPE_HEARTBEAT		24
 #define CONTENT_TYPE_KEY_CONTEXT	32
 #define CONTENT_TYPE_ERROR		127
 
 #define GCM_TAG_SIZE			16
 #define AEAD_EXPLICIT_DATA_SIZE		8
 #define TLS_HEADER_LENGTH		5
 #define TP_TX_PG_SZ			65536
 #define FC_TP_PLEN_MAX			17408
 
 #define IPAD_SIZE			64
 #define OPAD_SIZE			64
 #define KEY_SIZE			32
 #define CIPHER_BLOCK_SIZE		16
 #define HDR_KCTX_SIZE   (IPAD_SIZE + OPAD_SIZE + KEY_SIZE)
 
 #define KEY_IN_DDR_SIZE			16
 #define	TLS_KEY_CONTEXT_SZ	roundup2(sizeof(struct tls_tx_ctxt), 32)
 
 /* MAC KEY SIZE */
 #define SHA_NOP				0
 #define SHA_GHASH			16
 #define SHA_224				28
 #define SHA_256				32
 #define SHA_384				48
 #define SHA_512				64
 #define SHA1				20
 
 /* CIPHER KEY SIZE */
 #define AES_NOP				0
 #define AES_128				16
 #define AES_192				24
 #define AES_256				32
 
 enum {
 	TLS_1_2_VERSION,
 	TLS_1_1_VERSION,
 	DTLS_1_2_VERSION,
 	TLS_VERSION_MAX,
 };
 
 enum {
 	CH_EVP_CIPH_STREAM_CIPHER,
 	CH_EVP_CIPH_CBC_MODE,
 	CH_EVP_CIPH_GCM_MODE,
 	CH_EVP_CIPH_CTR_MODE,
 };
 
 enum {
 	TLS_SFO_WR_CONTEXTLOC_DSGL,
 	TLS_SFO_WR_CONTEXTLOC_IMMEDIATE,
 	TLS_SFO_WR_CONTEXTLOC_DDR,
 };
 
 enum {
 	CPL_TX_TLS_SFO_TYPE_CCS,
 	CPL_TX_TLS_SFO_TYPE_ALERT,
 	CPL_TX_TLS_SFO_TYPE_HANDSHAKE,
 	CPL_TX_TLS_SFO_TYPE_DATA,
 	CPL_TX_TLS_SFO_TYPE_HEARTBEAT,	/* XXX: Shouldn't this be "CUSTOM"? */
 };
 
 enum {
 	CH_CK_SIZE_128,
 	CH_CK_SIZE_192,
 	CH_CK_SIZE_256,
 	CH_CK_SIZE_NOP,
 };
 
 enum {
 	CH_MK_SIZE_128,
 	CH_MK_SIZE_160,
 	CH_MK_SIZE_192,
 	CH_MK_SIZE_256,
 	CH_MK_SIZE_512,
 	CH_MK_SIZE_NOP,
 };
 
 struct tls_scmd {
 	__be32 seqno_numivs;
 	__be32 ivgen_hdrlen;
 };
 
 enum tls_mode {
 	TLS_MODE_OFF,
 	TLS_MODE_TLSOM,
 	TLS_MODE_KTLS,
 };
 
 struct tls_ofld_info {
 	struct tls_key_context k_ctx;
 	int key_location;
 	int mac_length;
 	int rx_key_addr;
 	int tx_key_addr;
 	uint64_t tx_seq_no;
 	unsigned short fcplenmax;
 	unsigned short adjusted_plen;
 	unsigned short expn_per_ulp;
 	unsigned short pdus_per_ulp;
 	struct tls_scmd scmd0;
 	u_int iv_len;
 	enum tls_mode mode;
 	struct callout handshake_timer;
 	u_int sb_off;
-	u_int rcv_over;
 };
 
 struct tls_key_req {
 	__be32 wr_hi;
 	__be32 wr_mid;
         __be32 ftid;
         __u8   reneg_to_write_rx;
         __u8   protocol;
         __be16 mfs;
 	/* master command */
 	__be32 cmd;
 	__be32 len16;             /* command length */
 	__be32 dlen;              /* data length in 32-byte units */
 	__be32 kaddr;
 	/* sub-command */
 	__be32 sc_more;
 	__be32 sc_len;
 }__packed;
 
 struct tls_keyctx {
         union key_ctx {
                 struct tx_keyctx_hdr {
                         __u8   ctxlen;
                         __u8   r2;
                         __be16 dualck_to_txvalid;
                         __u8   txsalt[4];
                         __be64 r5;
                 } txhdr;
                 struct rx_keyctx_hdr {
                         __u8   flitcnt_hmacctrl;
                         __u8   protover_ciphmode;
                         __u8   authmode_to_rxvalid;
                         __u8   ivpresent_to_rxmk_size;
                         __u8   rxsalt[4];
                         __be64 ivinsert_to_authinsrt;
                 } rxhdr;
         } u;
         struct keys {
                 __u8   edkey[32];
                 __u8   ipad[64];
                 __u8   opad[64];
         } keys;
 };
 
 #define S_TLS_KEYCTX_TX_WR_DUALCK    12
 #define M_TLS_KEYCTX_TX_WR_DUALCK    0x1
 #define V_TLS_KEYCTX_TX_WR_DUALCK(x) ((x) << S_TLS_KEYCTX_TX_WR_DUALCK)
 #define G_TLS_KEYCTX_TX_WR_DUALCK(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_DUALCK) & M_TLS_KEYCTX_TX_WR_DUALCK)
 #define F_TLS_KEYCTX_TX_WR_DUALCK    V_TLS_KEYCTX_TX_WR_DUALCK(1U)
 
 #define S_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT 11
 #define M_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT 0x1
 #define V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT)
 #define G_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT) & \
      M_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT)
 #define F_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT \
     V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(1U)
 
 #define S_TLS_KEYCTX_TX_WR_SALT_PRESENT 10
 #define M_TLS_KEYCTX_TX_WR_SALT_PRESENT 0x1
 #define V_TLS_KEYCTX_TX_WR_SALT_PRESENT(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_SALT_PRESENT)
 #define G_TLS_KEYCTX_TX_WR_SALT_PRESENT(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_SALT_PRESENT) & \
      M_TLS_KEYCTX_TX_WR_SALT_PRESENT)
 #define F_TLS_KEYCTX_TX_WR_SALT_PRESENT \
     V_TLS_KEYCTX_TX_WR_SALT_PRESENT(1U)
 
 #define S_TLS_KEYCTX_TX_WR_TXCK_SIZE 6
 #define M_TLS_KEYCTX_TX_WR_TXCK_SIZE 0xf
 #define V_TLS_KEYCTX_TX_WR_TXCK_SIZE(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_TXCK_SIZE)
 #define G_TLS_KEYCTX_TX_WR_TXCK_SIZE(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_TXCK_SIZE) & \
      M_TLS_KEYCTX_TX_WR_TXCK_SIZE)
 
 #define S_TLS_KEYCTX_TX_WR_TXMK_SIZE 2
 #define M_TLS_KEYCTX_TX_WR_TXMK_SIZE 0xf
 #define V_TLS_KEYCTX_TX_WR_TXMK_SIZE(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_TXMK_SIZE)
 #define G_TLS_KEYCTX_TX_WR_TXMK_SIZE(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_TXMK_SIZE) & \
      M_TLS_KEYCTX_TX_WR_TXMK_SIZE)
 
 #define S_TLS_KEYCTX_TX_WR_TXVALID   0
 #define M_TLS_KEYCTX_TX_WR_TXVALID   0x1
 #define V_TLS_KEYCTX_TX_WR_TXVALID(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_TXVALID)
 #define G_TLS_KEYCTX_TX_WR_TXVALID(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_TXVALID) & M_TLS_KEYCTX_TX_WR_TXVALID)
 #define F_TLS_KEYCTX_TX_WR_TXVALID   V_TLS_KEYCTX_TX_WR_TXVALID(1U)
 
 #define S_TLS_KEYCTX_TX_WR_FLITCNT   3
 #define M_TLS_KEYCTX_TX_WR_FLITCNT   0x1f
 #define V_TLS_KEYCTX_TX_WR_FLITCNT(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_FLITCNT)
 #define G_TLS_KEYCTX_TX_WR_FLITCNT(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_FLITCNT) & M_TLS_KEYCTX_TX_WR_FLITCNT)
 
 #define S_TLS_KEYCTX_TX_WR_HMACCTRL  0
 #define M_TLS_KEYCTX_TX_WR_HMACCTRL  0x7
 #define V_TLS_KEYCTX_TX_WR_HMACCTRL(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_HMACCTRL)
 #define G_TLS_KEYCTX_TX_WR_HMACCTRL(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_HMACCTRL) & M_TLS_KEYCTX_TX_WR_HMACCTRL)
 
 #define S_TLS_KEYCTX_TX_WR_PROTOVER  4
 #define M_TLS_KEYCTX_TX_WR_PROTOVER  0xf
 #define V_TLS_KEYCTX_TX_WR_PROTOVER(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_PROTOVER)
 #define G_TLS_KEYCTX_TX_WR_PROTOVER(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_PROTOVER) & M_TLS_KEYCTX_TX_WR_PROTOVER)
 
 #define S_TLS_KEYCTX_TX_WR_CIPHMODE  0
 #define M_TLS_KEYCTX_TX_WR_CIPHMODE  0xf
 #define V_TLS_KEYCTX_TX_WR_CIPHMODE(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_CIPHMODE)
 #define G_TLS_KEYCTX_TX_WR_CIPHMODE(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_CIPHMODE) & M_TLS_KEYCTX_TX_WR_CIPHMODE)
 
 #define S_TLS_KEYCTX_TX_WR_AUTHMODE  4
 #define M_TLS_KEYCTX_TX_WR_AUTHMODE  0xf
 #define V_TLS_KEYCTX_TX_WR_AUTHMODE(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_AUTHMODE)
 #define G_TLS_KEYCTX_TX_WR_AUTHMODE(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_AUTHMODE) & M_TLS_KEYCTX_TX_WR_AUTHMODE)
 
 #define S_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL 3
 #define M_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL 0x1
 #define V_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL)
 #define G_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL) & \
      M_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL)
 #define F_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL \
     V_TLS_KEYCTX_TX_WR_CIPHAUTHSEQCTRL(1U)
 
 #define S_TLS_KEYCTX_TX_WR_SEQNUMCTRL 1
 #define M_TLS_KEYCTX_TX_WR_SEQNUMCTRL 0x3
 #define V_TLS_KEYCTX_TX_WR_SEQNUMCTRL(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_SEQNUMCTRL)
 #define G_TLS_KEYCTX_TX_WR_SEQNUMCTRL(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_SEQNUMCTRL) & \
      M_TLS_KEYCTX_TX_WR_SEQNUMCTRL)
 
 #define S_TLS_KEYCTX_TX_WR_RXVALID   0
 #define M_TLS_KEYCTX_TX_WR_RXVALID   0x1
 #define V_TLS_KEYCTX_TX_WR_RXVALID(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_RXVALID)
 #define G_TLS_KEYCTX_TX_WR_RXVALID(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_RXVALID) & M_TLS_KEYCTX_TX_WR_RXVALID)
 #define F_TLS_KEYCTX_TX_WR_RXVALID   V_TLS_KEYCTX_TX_WR_RXVALID(1U)
 
 #define S_TLS_KEYCTX_TX_WR_IVPRESENT 7
 #define M_TLS_KEYCTX_TX_WR_IVPRESENT 0x1
 #define V_TLS_KEYCTX_TX_WR_IVPRESENT(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_IVPRESENT)
 #define G_TLS_KEYCTX_TX_WR_IVPRESENT(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_IVPRESENT) & \
      M_TLS_KEYCTX_TX_WR_IVPRESENT)
 #define F_TLS_KEYCTX_TX_WR_IVPRESENT V_TLS_KEYCTX_TX_WR_IVPRESENT(1U)
 
 #define S_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT 6
 #define M_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT 0x1
 #define V_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT)
 #define G_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT) & \
      M_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT)
 #define F_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT \
     V_TLS_KEYCTX_TX_WR_RXOPAD_PRESENT(1U)
 
 #define S_TLS_KEYCTX_TX_WR_RXCK_SIZE 3
 #define M_TLS_KEYCTX_TX_WR_RXCK_SIZE 0x7
 #define V_TLS_KEYCTX_TX_WR_RXCK_SIZE(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_RXCK_SIZE)
 #define G_TLS_KEYCTX_TX_WR_RXCK_SIZE(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_RXCK_SIZE) & \
      M_TLS_KEYCTX_TX_WR_RXCK_SIZE)
 
 #define S_TLS_KEYCTX_TX_WR_RXMK_SIZE 0
 #define M_TLS_KEYCTX_TX_WR_RXMK_SIZE 0x7
 #define V_TLS_KEYCTX_TX_WR_RXMK_SIZE(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_RXMK_SIZE)
 #define G_TLS_KEYCTX_TX_WR_RXMK_SIZE(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_RXMK_SIZE) & \
      M_TLS_KEYCTX_TX_WR_RXMK_SIZE)
 
 #define S_TLS_KEYCTX_TX_WR_IVINSERT  55
 #define M_TLS_KEYCTX_TX_WR_IVINSERT  0x1ffULL
 #define V_TLS_KEYCTX_TX_WR_IVINSERT(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_IVINSERT)
 #define G_TLS_KEYCTX_TX_WR_IVINSERT(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_IVINSERT) & M_TLS_KEYCTX_TX_WR_IVINSERT)
 
 #define S_TLS_KEYCTX_TX_WR_AADSTRTOFST 47
 #define M_TLS_KEYCTX_TX_WR_AADSTRTOFST 0xffULL
 #define V_TLS_KEYCTX_TX_WR_AADSTRTOFST(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_AADSTRTOFST)
 #define G_TLS_KEYCTX_TX_WR_AADSTRTOFST(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_AADSTRTOFST) & \
      M_TLS_KEYCTX_TX_WR_AADSTRTOFST)
 
 #define S_TLS_KEYCTX_TX_WR_AADSTOPOFST 39
 #define M_TLS_KEYCTX_TX_WR_AADSTOPOFST 0xffULL
 #define V_TLS_KEYCTX_TX_WR_AADSTOPOFST(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_AADSTOPOFST)
 #define G_TLS_KEYCTX_TX_WR_AADSTOPOFST(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_AADSTOPOFST) & \
      M_TLS_KEYCTX_TX_WR_AADSTOPOFST)
 
 #define S_TLS_KEYCTX_TX_WR_CIPHERSRTOFST 30
 #define M_TLS_KEYCTX_TX_WR_CIPHERSRTOFST 0x1ffULL
 #define V_TLS_KEYCTX_TX_WR_CIPHERSRTOFST(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_CIPHERSRTOFST)
 #define G_TLS_KEYCTX_TX_WR_CIPHERSRTOFST(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_CIPHERSRTOFST) & \
      M_TLS_KEYCTX_TX_WR_CIPHERSRTOFST)
 
 #define S_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST 23
 #define M_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST 0x7f
 #define V_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST)
 #define G_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST) & \
      M_TLS_KEYCTX_TX_WR_CIPHERSTOPOFST)
 
 #define S_TLS_KEYCTX_TX_WR_AUTHSRTOFST 14
 #define M_TLS_KEYCTX_TX_WR_AUTHSRTOFST 0x1ff
 #define V_TLS_KEYCTX_TX_WR_AUTHSRTOFST(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_AUTHSRTOFST)
 #define G_TLS_KEYCTX_TX_WR_AUTHSRTOFST(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_AUTHSRTOFST) & \
      M_TLS_KEYCTX_TX_WR_AUTHSRTOFST)
 
 #define S_TLS_KEYCTX_TX_WR_AUTHSTOPOFST 7
 #define M_TLS_KEYCTX_TX_WR_AUTHSTOPOFST 0x7f
 #define V_TLS_KEYCTX_TX_WR_AUTHSTOPOFST(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_AUTHSTOPOFST)
 #define G_TLS_KEYCTX_TX_WR_AUTHSTOPOFST(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_AUTHSTOPOFST) & \
      M_TLS_KEYCTX_TX_WR_AUTHSTOPOFST)
 
 #define S_TLS_KEYCTX_TX_WR_AUTHINSRT 0
 #define M_TLS_KEYCTX_TX_WR_AUTHINSRT 0x7f
 #define V_TLS_KEYCTX_TX_WR_AUTHINSRT(x) \
     ((x) << S_TLS_KEYCTX_TX_WR_AUTHINSRT)
 #define G_TLS_KEYCTX_TX_WR_AUTHINSRT(x) \
     (((x) >> S_TLS_KEYCTX_TX_WR_AUTHINSRT) & \
      M_TLS_KEYCTX_TX_WR_AUTHINSRT)
 
 struct tls_hdr {
 	__u8   type;
 	__be16 version;
 	__be16 length;
 } __packed;
 
 struct tlsrx_hdr_pkt {
 	__u8   type;
 	__be16 version;
 	__be16 length;
 
 	__be64 tls_seq;
 	__be16 reserved1;
 	__u8   res_to_mac_error;
 } __packed;
 
 /* res_to_mac_error fields */
 #define S_TLSRX_HDR_PKT_INTERNAL_ERROR   4
 #define M_TLSRX_HDR_PKT_INTERNAL_ERROR   0x1
 #define V_TLSRX_HDR_PKT_INTERNAL_ERROR(x) \
 	((x) << S_TLSRX_HDR_PKT_INTERNAL_ERROR)
 #define G_TLSRX_HDR_PKT_INTERNAL_ERROR(x) \
 (((x) >> S_TLSRX_HDR_PKT_INTERNAL_ERROR) & M_TLSRX_HDR_PKT_INTERNAL_ERROR)
 #define F_TLSRX_HDR_PKT_INTERNAL_ERROR   V_TLSRX_HDR_PKT_INTERNAL_ERROR(1U)
 
 #define S_TLSRX_HDR_PKT_SPP_ERROR        3
 #define M_TLSRX_HDR_PKT_SPP_ERROR        0x1
 #define V_TLSRX_HDR_PKT_SPP_ERROR(x)     ((x) << S_TLSRX_HDR_PKT_SPP_ERROR)
 #define G_TLSRX_HDR_PKT_SPP_ERROR(x)     \
 (((x) >> S_TLSRX_HDR_PKT_SPP_ERROR) & M_TLSRX_HDR_PKT_SPP_ERROR)
 #define F_TLSRX_HDR_PKT_SPP_ERROR        V_TLSRX_HDR_PKT_SPP_ERROR(1U)
 
 #define S_TLSRX_HDR_PKT_CCDX_ERROR       2
 #define M_TLSRX_HDR_PKT_CCDX_ERROR       0x1
 #define V_TLSRX_HDR_PKT_CCDX_ERROR(x)    ((x) << S_TLSRX_HDR_PKT_CCDX_ERROR)
 #define G_TLSRX_HDR_PKT_CCDX_ERROR(x)    \
 (((x) >> S_TLSRX_HDR_PKT_CCDX_ERROR) & M_TLSRX_HDR_PKT_CCDX_ERROR)
 #define F_TLSRX_HDR_PKT_CCDX_ERROR       V_TLSRX_HDR_PKT_CCDX_ERROR(1U)
 
 #define S_TLSRX_HDR_PKT_PAD_ERROR        1
 #define M_TLSRX_HDR_PKT_PAD_ERROR        0x1
 #define V_TLSRX_HDR_PKT_PAD_ERROR(x)     ((x) << S_TLSRX_HDR_PKT_PAD_ERROR)
 #define G_TLSRX_HDR_PKT_PAD_ERROR(x)     \
 (((x) >> S_TLSRX_HDR_PKT_PAD_ERROR) & M_TLSRX_HDR_PKT_PAD_ERROR)
 #define F_TLSRX_HDR_PKT_PAD_ERROR        V_TLSRX_HDR_PKT_PAD_ERROR(1U)
 
 #define S_TLSRX_HDR_PKT_MAC_ERROR        0
 #define M_TLSRX_HDR_PKT_MAC_ERROR        0x1
 #define V_TLSRX_HDR_PKT_MAC_ERROR(x)     ((x) << S_TLSRX_HDR_PKT_MAC_ERROR)
 #define G_TLSRX_HDR_PKT_MAC_ERROR(x)     \
 (((x) >> S_TLSRX_HDR_PKT_MAC_ERROR) & M_TLSRX_HDR_PKT_MAC_ERROR)
 #define F_TLSRX_HDR_PKT_MAC_ERROR        V_TLSRX_HDR_PKT_MAC_ERROR(1U)
 
 #define M_TLSRX_HDR_PKT_ERROR		0x1F
 
 #endif /* _KERNEL */
 
 #endif /* !__T4_TLS_H__ */
Index: head/sys/dev/cxgbe/tom/t4_tom.c
===================================================================
--- head/sys/dev/cxgbe/tom/t4_tom.c	(revision 366853)
+++ head/sys/dev/cxgbe/tom/t4_tom.c	(revision 366854)
@@ -1,1938 +1,1936 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_kern_tls.h"
 #include "opt_ratelimit.h"
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/limits.h>
 #include <sys/module.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
 #include <sys/refcount.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet6/scope6_var.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/toecore.h>
 #include <netinet/cc/cc.h>
 
 #ifdef TCP_OFFLOAD
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "common/t4_regs_values.h"
 #include "common/t4_tcb.h"
 #include "t4_clip.h"
 #include "tom/t4_tom_l2t.h"
 #include "tom/t4_tom.h"
 #include "tom/t4_tls.h"
 
 static struct protosw toe_protosw;
 static struct pr_usrreqs toe_usrreqs;
 
 static struct protosw toe6_protosw;
 static struct pr_usrreqs toe6_usrreqs;
 
 /* Module ops */
 static int t4_tom_mod_load(void);
 static int t4_tom_mod_unload(void);
 static int t4_tom_modevent(module_t, int, void *);
 
 /* ULD ops and helpers */
 static int t4_tom_activate(struct adapter *);
 static int t4_tom_deactivate(struct adapter *);
 
 static struct uld_info tom_uld_info = {
 	.uld_id = ULD_TOM,
 	.activate = t4_tom_activate,
 	.deactivate = t4_tom_deactivate,
 };
 
 static void release_offload_resources(struct toepcb *);
 static int alloc_tid_tabs(struct tid_info *);
 static void free_tid_tabs(struct tid_info *);
 static void free_tom_data(struct adapter *, struct tom_data *);
 static void reclaim_wr_resources(void *, int);
 
 struct toepcb *
 alloc_toepcb(struct vi_info *vi, int flags)
 {
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct toepcb *toep;
 	int tx_credits, txsd_total, len;
 
 	/*
 	 * The firmware counts tx work request credits in units of 16 bytes
 	 * each.  Reserve room for an ABORT_REQ so the driver never has to worry
 	 * about tx credits if it wants to abort a connection.
 	 */
 	tx_credits = sc->params.ofldq_wr_cred;
 	tx_credits -= howmany(sizeof(struct cpl_abort_req), 16);
 
 	/*
 	 * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte
 	 * immediate payload, and firmware counts tx work request credits in
 	 * units of 16 byte.  Calculate the maximum work requests possible.
 	 */
 	txsd_total = tx_credits /
 	    howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16);
 
 	len = offsetof(struct toepcb, txsd) +
 	    txsd_total * sizeof(struct ofld_tx_sdesc);
 
 	toep = malloc(len, M_CXGBE, M_ZERO | flags);
 	if (toep == NULL)
 		return (NULL);
 
 	refcount_init(&toep->refcount, 1);
 	toep->td = sc->tom_softc;
 	toep->vi = vi;
 	toep->tid = -1;
 	toep->tx_total = tx_credits;
 	toep->tx_credits = tx_credits;
 	mbufq_init(&toep->ulp_pduq, INT_MAX);
 	mbufq_init(&toep->ulp_pdu_reclaimq, INT_MAX);
 	toep->txsd_total = txsd_total;
 	toep->txsd_avail = txsd_total;
 	toep->txsd_pidx = 0;
 	toep->txsd_cidx = 0;
 	aiotx_init_toep(toep);
 
 	return (toep);
 }
 
 /*
  * Initialize a toepcb after its params have been filled out.
  */
 int
 init_toepcb(struct vi_info *vi, struct toepcb *toep)
 {
 	struct conn_params *cp = &toep->params;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct tx_cl_rl_params *tc;
 
 	if (cp->tc_idx >= 0 && cp->tc_idx < sc->chip_params->nsched_cls) {
 		tc = &pi->sched_params->cl_rl[cp->tc_idx];
 		mtx_lock(&sc->tc_lock);
 		if (tc->flags & CLRL_ERR) {
 			log(LOG_ERR,
 			    "%s: failed to associate traffic class %u with tid %u\n",
 			    device_get_nameunit(vi->dev), cp->tc_idx,
 			    toep->tid);
 			cp->tc_idx = -1;
 		} else {
 			tc->refcount++;
 		}
 		mtx_unlock(&sc->tc_lock);
 	}
 	toep->ofld_txq = &sc->sge.ofld_txq[cp->txq_idx];
 	toep->ofld_rxq = &sc->sge.ofld_rxq[cp->rxq_idx];
 	toep->ctrlq = &sc->sge.ctrlq[pi->port_id];
 
 	tls_init_toep(toep);
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 		ddp_init_toep(toep);
 
 	toep->flags |= TPF_INITIALIZED;
 
 	return (0);
 }
 
 struct toepcb *
 hold_toepcb(struct toepcb *toep)
 {
 
 	refcount_acquire(&toep->refcount);
 	return (toep);
 }
 
 void
 free_toepcb(struct toepcb *toep)
 {
 
 	if (refcount_release(&toep->refcount) == 0)
 		return;
 
 	KASSERT(!(toep->flags & TPF_ATTACHED),
 	    ("%s: attached to an inpcb", __func__));
 	KASSERT(!(toep->flags & TPF_CPL_PENDING),
 	    ("%s: CPL pending", __func__));
 
 	if (toep->flags & TPF_INITIALIZED) {
 		if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 			ddp_uninit_toep(toep);
 		tls_uninit_toep(toep);
 	}
 	free(toep, M_CXGBE);
 }
 
 /*
  * Set up the socket for TCP offload.
  */
 void
 offload_socket(struct socket *so, struct toepcb *toep)
 {
 	struct tom_data *td = toep->td;
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
 	struct sockbuf *sb;
 
 	INP_WLOCK_ASSERT(inp);
 
 	/* Update socket */
 	sb = &so->so_snd;
 	SOCKBUF_LOCK(sb);
 	sb->sb_flags |= SB_NOCOALESCE;
 	SOCKBUF_UNLOCK(sb);
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	sb->sb_flags |= SB_NOCOALESCE;
 	if (inp->inp_vflag & INP_IPV6)
 		so->so_proto = &toe6_protosw;
 	else
 		so->so_proto = &toe_protosw;
 	SOCKBUF_UNLOCK(sb);
 
 	/* Update TCP PCB */
 	tp->tod = &td->tod;
 	tp->t_toe = toep;
 	tp->t_flags |= TF_TOE;
 
 	/* Install an extra hold on inp */
 	toep->inp = inp;
 	toep->flags |= TPF_ATTACHED;
 	in_pcbref(inp);
 
 	/* Add the TOE PCB to the active list */
 	mtx_lock(&td->toep_list_lock);
 	TAILQ_INSERT_HEAD(&td->toep_list, toep, link);
 	mtx_unlock(&td->toep_list_lock);
 }
 
 /* This is _not_ the normal way to "unoffload" a socket. */
 void
 undo_offload_socket(struct socket *so)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
 	struct toepcb *toep = tp->t_toe;
 	struct tom_data *td = toep->td;
 	struct sockbuf *sb;
 
 	INP_WLOCK_ASSERT(inp);
 
 	sb = &so->so_snd;
 	SOCKBUF_LOCK(sb);
 	sb->sb_flags &= ~SB_NOCOALESCE;
 	SOCKBUF_UNLOCK(sb);
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	sb->sb_flags &= ~SB_NOCOALESCE;
 	SOCKBUF_UNLOCK(sb);
 
 	tp->tod = NULL;
 	tp->t_toe = NULL;
 	tp->t_flags &= ~TF_TOE;
 
 	toep->inp = NULL;
 	toep->flags &= ~TPF_ATTACHED;
 	if (in_pcbrele_wlocked(inp))
 		panic("%s: inp freed.", __func__);
 
 	mtx_lock(&td->toep_list_lock);
 	TAILQ_REMOVE(&td->toep_list, toep, link);
 	mtx_unlock(&td->toep_list_lock);
 }
 
 static void
 release_offload_resources(struct toepcb *toep)
 {
 	struct tom_data *td = toep->td;
 	struct adapter *sc = td_adapter(td);
 	int tid = toep->tid;
 
 	KASSERT(!(toep->flags & TPF_CPL_PENDING),
 	    ("%s: %p has CPL pending.", __func__, toep));
 	KASSERT(!(toep->flags & TPF_ATTACHED),
 	    ("%s: %p is still attached.", __func__, toep));
 
 	CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)",
 	    __func__, toep, tid, toep->l2te, toep->ce);
 
 	/*
 	 * These queues should have been emptied at approximately the same time
 	 * that a normal connection's socket's so_snd would have been purged or
 	 * drained.  Do _not_ clean up here.
 	 */
 	MPASS(mbufq_len(&toep->ulp_pduq) == 0);
 	MPASS(mbufq_len(&toep->ulp_pdu_reclaimq) == 0);
 #ifdef INVARIANTS
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 		ddp_assert_empty(toep);
 #endif
 	MPASS(TAILQ_EMPTY(&toep->aiotx_jobq));
 
 	if (toep->l2te)
 		t4_l2t_release(toep->l2te);
 
 	if (tid >= 0) {
 		remove_tid(sc, tid, toep->ce ? 2 : 1);
 		release_tid(sc, tid, toep->ctrlq);
 	}
 
 	if (toep->ce)
 		t4_release_lip(sc, toep->ce);
 
 	if (toep->params.tc_idx != -1)
 		t4_release_cl_rl(sc, toep->vi->pi->port_id, toep->params.tc_idx);
 
 	mtx_lock(&td->toep_list_lock);
 	TAILQ_REMOVE(&td->toep_list, toep, link);
 	mtx_unlock(&td->toep_list_lock);
 
 	free_toepcb(toep);
 }
 
 /*
  * The kernel is done with the TCP PCB and this is our opportunity to unhook the
  * toepcb hanging off of it.  If the TOE driver is also done with the toepcb (no
  * pending CPL) then it is time to release all resources tied to the toepcb.
  *
  * Also gets called when an offloaded active open fails and the TOM wants the
  * kernel to take the TCP PCB back.
  */
 static void
 t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp)
 {
 #if defined(KTR) || defined(INVARIANTS)
 	struct inpcb *inp = tp->t_inpcb;
 #endif
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(inp);
 
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 	KASSERT(toep->flags & TPF_ATTACHED,
 	    ("%s: not attached", __func__));
 
 #ifdef KTR
 	if (tp->t_state == TCPS_SYN_SENT) {
 		CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)",
 		    __func__, toep->tid, toep, toep->flags, inp,
 		    inp->inp_flags);
 	} else {
 		CTR6(KTR_CXGBE,
 		    "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)",
 		    toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp,
 		    inp->inp_flags);
 	}
 #endif
 
 	tp->t_toe = NULL;
 	tp->t_flags &= ~TF_TOE;
 	toep->flags &= ~TPF_ATTACHED;
 
 	if (!(toep->flags & TPF_CPL_PENDING))
 		release_offload_resources(toep);
 }
 
 /*
  * setsockopt handler.
  */
 static void
 t4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct toepcb *toep = tp->t_toe;
 
 	if (dir == SOPT_GET)
 		return;
 
 	CTR4(KTR_CXGBE, "%s: tp %p, dir %u, name %u", __func__, tp, dir, name);
 
 	switch (name) {
 	case TCP_NODELAY:
 		if (tp->t_state != TCPS_ESTABLISHED)
 			break;
 		toep->params.nagle = tp->t_flags & TF_NODELAY ? 0 : 1;
 		t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS,
 		    V_TF_NAGLE(1), V_TF_NAGLE(toep->params.nagle), 0, 0);
 		break;
 	default:
 		break;
 	}
 }
 
 static inline uint64_t
 get_tcb_tflags(const uint64_t *tcb)
 {
 
 	return ((be64toh(tcb[14]) << 32) | (be64toh(tcb[15]) >> 32));
 }
 
 static inline uint32_t
 get_tcb_field(const uint64_t *tcb, u_int word, uint32_t mask, u_int shift)
 {
 #define LAST_WORD ((TCB_SIZE / 4) - 1)
 	uint64_t t1, t2;
 	int flit_idx;
 
 	MPASS(mask != 0);
 	MPASS(word <= LAST_WORD);
 	MPASS(shift < 32);
 
 	flit_idx = (LAST_WORD - word) / 2;
 	if (word & 0x1)
 		shift += 32;
 	t1 = be64toh(tcb[flit_idx]) >> shift;
 	t2 = 0;
 	if (fls(mask) > 64 - shift) {
 		/*
 		 * Will spill over into the next logical flit, which is the flit
 		 * before this one.  The flit_idx before this one must be valid.
 		 */
 		MPASS(flit_idx > 0);
 		t2 = be64toh(tcb[flit_idx - 1]) << (64 - shift);
 	}
 	return ((t2 | t1) & mask);
 #undef LAST_WORD
 }
 #define GET_TCB_FIELD(tcb, F) \
     get_tcb_field(tcb, W_TCB_##F, M_TCB_##F, S_TCB_##F)
 
 /*
  * Issues a CPL_GET_TCB to read the entire TCB for the tid.
  */
 static int
 send_get_tcb(struct adapter *sc, u_int tid)
 {
 	struct cpl_get_tcb *cpl;
 	struct wrq_cookie cookie;
 
 	MPASS(tid < sc->tids.ntids);
 
 	cpl = start_wrq_wr(&sc->sge.ctrlq[0], howmany(sizeof(*cpl), 16),
 	    &cookie);
 	if (__predict_false(cpl == NULL))
 		return (ENOMEM);
 	bzero(cpl, sizeof(*cpl));
 	INIT_TP_WR(cpl, tid);
 	OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_GET_TCB, tid));
 	cpl->reply_ctrl = htobe16(V_REPLY_CHAN(0) |
 	    V_QUEUENO(sc->sge.ofld_rxq[0].iq.cntxt_id));
 	cpl->cookie = 0xff;
 	commit_wrq_wr(&sc->sge.ctrlq[0], cpl, &cookie);
 
 	return (0);
 }
 
 static struct tcb_histent *
 alloc_tcb_histent(struct adapter *sc, u_int tid, int flags)
 {
 	struct tcb_histent *te;
 
 	MPASS(flags == M_NOWAIT || flags == M_WAITOK);
 
 	te = malloc(sizeof(*te), M_CXGBE, M_ZERO | flags);
 	if (te == NULL)
 		return (NULL);
 	mtx_init(&te->te_lock, "TCB entry", NULL, MTX_DEF);
 	callout_init_mtx(&te->te_callout, &te->te_lock, 0);
 	te->te_adapter = sc;
 	te->te_tid = tid;
 
 	return (te);
 }
 
 static void
 free_tcb_histent(struct tcb_histent *te)
 {
 
 	mtx_destroy(&te->te_lock);
 	free(te, M_CXGBE);
 }
 
 /*
  * Start tracking the tid in the TCB history.
  */
 int
 add_tid_to_history(struct adapter *sc, u_int tid)
 {
 	struct tcb_histent *te = NULL;
 	struct tom_data *td = sc->tom_softc;
 	int rc;
 
 	MPASS(tid < sc->tids.ntids);
 
 	if (td->tcb_history == NULL)
 		return (ENXIO);
 
 	rw_wlock(&td->tcb_history_lock);
 	if (td->tcb_history[tid] != NULL) {
 		rc = EEXIST;
 		goto done;
 	}
 	te = alloc_tcb_histent(sc, tid, M_NOWAIT);
 	if (te == NULL) {
 		rc = ENOMEM;
 		goto done;
 	}
 	mtx_lock(&te->te_lock);
 	rc = send_get_tcb(sc, tid);
 	if (rc == 0) {
 		te->te_flags |= TE_RPL_PENDING;
 		td->tcb_history[tid] = te;
 	} else {
 		free(te, M_CXGBE);
 	}
 	mtx_unlock(&te->te_lock);
 done:
 	rw_wunlock(&td->tcb_history_lock);
 	return (rc);
 }
 
 static void
 remove_tcb_histent(struct tcb_histent *te)
 {
 	struct adapter *sc = te->te_adapter;
 	struct tom_data *td = sc->tom_softc;
 
 	rw_assert(&td->tcb_history_lock, RA_WLOCKED);
 	mtx_assert(&te->te_lock, MA_OWNED);
 	MPASS(td->tcb_history[te->te_tid] == te);
 
 	td->tcb_history[te->te_tid] = NULL;
 	free_tcb_histent(te);
 	rw_wunlock(&td->tcb_history_lock);
 }
 
 static inline struct tcb_histent *
 lookup_tcb_histent(struct adapter *sc, u_int tid, bool addrem)
 {
 	struct tcb_histent *te;
 	struct tom_data *td = sc->tom_softc;
 
 	MPASS(tid < sc->tids.ntids);
 
 	if (td->tcb_history == NULL)
 		return (NULL);
 
 	if (addrem)
 		rw_wlock(&td->tcb_history_lock);
 	else
 		rw_rlock(&td->tcb_history_lock);
 	te = td->tcb_history[tid];
 	if (te != NULL) {
 		mtx_lock(&te->te_lock);
 		return (te);	/* with both locks held */
 	}
 	if (addrem)
 		rw_wunlock(&td->tcb_history_lock);
 	else
 		rw_runlock(&td->tcb_history_lock);
 
 	return (te);
 }
 
 static inline void
 release_tcb_histent(struct tcb_histent *te)
 {
 	struct adapter *sc = te->te_adapter;
 	struct tom_data *td = sc->tom_softc;
 
 	mtx_assert(&te->te_lock, MA_OWNED);
 	mtx_unlock(&te->te_lock);
 	rw_assert(&td->tcb_history_lock, RA_RLOCKED);
 	rw_runlock(&td->tcb_history_lock);
 }
 
 static void
 request_tcb(void *arg)
 {
 	struct tcb_histent *te = arg;
 
 	mtx_assert(&te->te_lock, MA_OWNED);
 
 	/* Noone else is supposed to update the histent. */
 	MPASS(!(te->te_flags & TE_RPL_PENDING));
 	if (send_get_tcb(te->te_adapter, te->te_tid) == 0)
 		te->te_flags |= TE_RPL_PENDING;
 	else
 		callout_schedule(&te->te_callout, hz / 100);
 }
 
 static void
 update_tcb_histent(struct tcb_histent *te, const uint64_t *tcb)
 {
 	struct tom_data *td = te->te_adapter->tom_softc;
 	uint64_t tflags = get_tcb_tflags(tcb);
 	uint8_t sample = 0;
 
 	if (GET_TCB_FIELD(tcb, SND_MAX_RAW) != GET_TCB_FIELD(tcb, SND_UNA_RAW)) {
 		if (GET_TCB_FIELD(tcb, T_RXTSHIFT) != 0)
 			sample |= TS_RTO;
 		if (GET_TCB_FIELD(tcb, T_DUPACKS) != 0)
 			sample |= TS_DUPACKS;
 		if (GET_TCB_FIELD(tcb, T_DUPACKS) >= td->dupack_threshold)
 			sample |= TS_FASTREXMT;
 	}
 
 	if (GET_TCB_FIELD(tcb, SND_MAX_RAW) != 0) {
 		uint32_t snd_wnd;
 
 		sample |= TS_SND_BACKLOGGED;	/* for whatever reason. */
 
 		snd_wnd = GET_TCB_FIELD(tcb, RCV_ADV);
 		if (tflags & V_TF_RECV_SCALE(1))
 			snd_wnd <<= GET_TCB_FIELD(tcb, RCV_SCALE);
 		if (GET_TCB_FIELD(tcb, SND_CWND) < snd_wnd)
 			sample |= TS_CWND_LIMITED;	/* maybe due to CWND */
 	}
 
 	if (tflags & V_TF_CCTRL_ECN(1)) {
 
 		/*
 		 * CE marker on incoming IP hdr, echoing ECE back in the TCP
 		 * hdr.  Indicates congestion somewhere on the way from the peer
 		 * to this node.
 		 */
 		if (tflags & V_TF_CCTRL_ECE(1))
 			sample |= TS_ECN_ECE;
 
 		/*
 		 * ECE seen and CWR sent (or about to be sent).  Might indicate
 		 * congestion on the way to the peer.  This node is reducing its
 		 * congestion window in response.
 		 */
 		if (tflags & (V_TF_CCTRL_CWR(1) | V_TF_CCTRL_RFR(1)))
 			sample |= TS_ECN_CWR;
 	}
 
 	te->te_sample[te->te_pidx] = sample;
 	if (++te->te_pidx == nitems(te->te_sample))
 		te->te_pidx = 0;
 	memcpy(te->te_tcb, tcb, TCB_SIZE);
 	te->te_flags |= TE_ACTIVE;
 }
 
 static int
 do_get_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_get_tcb_rpl *cpl = mtod(m, const void *);
 	const uint64_t *tcb = (const uint64_t *)(const void *)(cpl + 1);
 	struct tcb_histent *te;
 	const u_int tid = GET_TID(cpl);
 	bool remove;
 
 	remove = GET_TCB_FIELD(tcb, T_STATE) == TCPS_CLOSED;
 	te = lookup_tcb_histent(sc, tid, remove);
 	if (te == NULL) {
 		/* Not in the history.  Who issued the GET_TCB for this? */
 		device_printf(sc->dev, "tcb %u: flags 0x%016jx, state %u, "
 		    "srtt %u, sscale %u, rscale %u, cookie 0x%x\n", tid,
 		    (uintmax_t)get_tcb_tflags(tcb), GET_TCB_FIELD(tcb, T_STATE),
 		    GET_TCB_FIELD(tcb, T_SRTT), GET_TCB_FIELD(tcb, SND_SCALE),
 		    GET_TCB_FIELD(tcb, RCV_SCALE), cpl->cookie);
 		goto done;
 	}
 
 	MPASS(te->te_flags & TE_RPL_PENDING);
 	te->te_flags &= ~TE_RPL_PENDING;
 	if (remove) {
 		remove_tcb_histent(te);
 	} else {
 		update_tcb_histent(te, tcb);
 		callout_reset(&te->te_callout, hz / 10, request_tcb, te);
 		release_tcb_histent(te);
 	}
 done:
 	m_freem(m);
 	return (0);
 }
 
 static void
 fill_tcp_info_from_tcb(struct adapter *sc, uint64_t *tcb, struct tcp_info *ti)
 {
 	uint32_t v;
 
 	ti->tcpi_state = GET_TCB_FIELD(tcb, T_STATE);
 
 	v = GET_TCB_FIELD(tcb, T_SRTT);
 	ti->tcpi_rtt = tcp_ticks_to_us(sc, v);
 
 	v = GET_TCB_FIELD(tcb, T_RTTVAR);
 	ti->tcpi_rttvar = tcp_ticks_to_us(sc, v);
 
 	ti->tcpi_snd_ssthresh = GET_TCB_FIELD(tcb, SND_SSTHRESH);
 	ti->tcpi_snd_cwnd = GET_TCB_FIELD(tcb, SND_CWND);
 	ti->tcpi_rcv_nxt = GET_TCB_FIELD(tcb, RCV_NXT);
 
 	v = GET_TCB_FIELD(tcb, TX_MAX);
 	ti->tcpi_snd_nxt = v - GET_TCB_FIELD(tcb, SND_NXT_RAW);
 
 	/* Receive window being advertised by us. */
 	ti->tcpi_rcv_wscale = GET_TCB_FIELD(tcb, SND_SCALE);	/* Yes, SND. */
 	ti->tcpi_rcv_space = GET_TCB_FIELD(tcb, RCV_WND);
 
 	/* Send window */
 	ti->tcpi_snd_wscale = GET_TCB_FIELD(tcb, RCV_SCALE);	/* Yes, RCV. */
 	ti->tcpi_snd_wnd = GET_TCB_FIELD(tcb, RCV_ADV);
 	if (get_tcb_tflags(tcb) & V_TF_RECV_SCALE(1))
 		ti->tcpi_snd_wnd <<= ti->tcpi_snd_wscale;
 	else
 		ti->tcpi_snd_wscale = 0;
 
 }
 
 static void
 fill_tcp_info_from_history(struct adapter *sc, struct tcb_histent *te,
     struct tcp_info *ti)
 {
 
 	fill_tcp_info_from_tcb(sc, te->te_tcb, ti);
 }
 
 /*
  * Reads the TCB for the given tid using a memory window and copies it to 'buf'
  * in the same format as CPL_GET_TCB_RPL.
  */
 static void
 read_tcb_using_memwin(struct adapter *sc, u_int tid, uint64_t *buf)
 {
 	int i, j, k, rc;
 	uint32_t addr;
 	u_char *tcb, tmp;
 
 	MPASS(tid < sc->tids.ntids);
 
 	addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + tid * TCB_SIZE;
 	rc = read_via_memwin(sc, 2, addr, (uint32_t *)buf, TCB_SIZE);
 	if (rc != 0)
 		return;
 
 	tcb = (u_char *)buf;
 	for (i = 0, j = TCB_SIZE - 16; i < j; i += 16, j -= 16) {
 		for (k = 0; k < 16; k++) {
 			tmp = tcb[i + k];
 			tcb[i + k] = tcb[j + k];
 			tcb[j + k] = tmp;
 		}
 	}
 }
 
 static void
 fill_tcp_info(struct adapter *sc, u_int tid, struct tcp_info *ti)
 {
 	uint64_t tcb[TCB_SIZE / sizeof(uint64_t)];
 	struct tcb_histent *te;
 
 	ti->tcpi_toe_tid = tid;
 	te = lookup_tcb_histent(sc, tid, false);
 	if (te != NULL) {
 		fill_tcp_info_from_history(sc, te, ti);
 		release_tcb_histent(te);
 	} else {
 		if (!(sc->debug_flags & DF_DISABLE_TCB_CACHE)) {
 			/* XXX: tell firmware to flush TCB cache. */
 		}
 		read_tcb_using_memwin(sc, tid, tcb);
 		fill_tcp_info_from_tcb(sc, tcb, ti);
 	}
 }
 
 /*
  * Called by the kernel to allow the TOE driver to "refine" values filled up in
  * the tcp_info for an offloaded connection.
  */
 static void
 t4_tcp_info(struct toedev *tod, struct tcpcb *tp, struct tcp_info *ti)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	MPASS(ti != NULL);
 
 	fill_tcp_info(sc, toep->tid, ti);
 }
 
 #ifdef KERN_TLS
 static int
 t4_alloc_tls_session(struct toedev *tod, struct tcpcb *tp,
     struct ktls_session *tls, int direction)
 {
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	MPASS(tls != NULL);
 
 	return (tls_alloc_ktls(toep, tls, direction));
 }
 #endif
 
 /*
  * The TOE driver will not receive any more CPLs for the tid associated with the
  * toepcb; release the hold on the inpcb.
  */
 void
 final_cpl_received(struct toepcb *toep)
 {
 	struct inpcb *inp = toep->inp;
 
 	KASSERT(inp != NULL, ("%s: inp is NULL", __func__));
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(toep->flags & TPF_CPL_PENDING,
 	    ("%s: CPL not pending already?", __func__));
 
 	CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)",
 	    __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags);
 
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 		release_ddp_resources(toep);
 	toep->inp = NULL;
 	toep->flags &= ~TPF_CPL_PENDING;
 	mbufq_drain(&toep->ulp_pdu_reclaimq);
 
 	if (!(toep->flags & TPF_ATTACHED))
 		release_offload_resources(toep);
 
 	if (!in_pcbrele_wlocked(inp))
 		INP_WUNLOCK(inp);
 }
 
 void
 insert_tid(struct adapter *sc, int tid, void *ctx, int ntids)
 {
 	struct tid_info *t = &sc->tids;
 
 	MPASS(tid >= t->tid_base);
 	MPASS(tid - t->tid_base < t->ntids);
 
 	t->tid_tab[tid - t->tid_base] = ctx;
 	atomic_add_int(&t->tids_in_use, ntids);
 }
 
 void *
 lookup_tid(struct adapter *sc, int tid)
 {
 	struct tid_info *t = &sc->tids;
 
 	return (t->tid_tab[tid - t->tid_base]);
 }
 
 void
 update_tid(struct adapter *sc, int tid, void *ctx)
 {
 	struct tid_info *t = &sc->tids;
 
 	t->tid_tab[tid - t->tid_base] = ctx;
 }
 
 void
 remove_tid(struct adapter *sc, int tid, int ntids)
 {
 	struct tid_info *t = &sc->tids;
 
 	t->tid_tab[tid - t->tid_base] = NULL;
 	atomic_subtract_int(&t->tids_in_use, ntids);
 }
 
 /*
  * What mtu_idx to use, given a 4-tuple.  Note that both s->mss and tcp_mssopt
  * have the MSS that we should advertise in our SYN.  Advertised MSS doesn't
  * account for any TCP options so the effective MSS (only payload, no headers or
  * options) could be different.
  */
 static int
 find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc,
     struct offload_settings *s)
 {
 	unsigned short *mtus = &sc->params.mtus[0];
 	int i, mss, mtu;
 
 	MPASS(inc != NULL);
 
 	mss = s->mss > 0 ? s->mss : tcp_mssopt(inc);
 	if (inc->inc_flags & INC_ISIPV6)
 		mtu = mss + sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	else
 		mtu = mss + sizeof(struct ip) + sizeof(struct tcphdr);
 
 	for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mtu; i++)
 		continue;
 
 	return (i);
 }
 
 /*
  * Determine the receive window size for a socket.
  */
 u_long
 select_rcv_wnd(struct socket *so)
 {
 	unsigned long wnd;
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	wnd = sbspace(&so->so_rcv);
 	if (wnd < MIN_RCV_WND)
 		wnd = MIN_RCV_WND;
 
 	return min(wnd, MAX_RCV_WND);
 }
 
 int
 select_rcv_wscale(void)
 {
 	int wscale = 0;
 	unsigned long space = sb_max;
 
 	if (space > MAX_RCV_WND)
 		space = MAX_RCV_WND;
 
 	while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space)
 		wscale++;
 
 	return (wscale);
 }
 
 __be64
 calc_options0(struct vi_info *vi, struct conn_params *cp)
 {
 	uint64_t opt0 = 0;
 
 	opt0 |= F_TCAM_BYPASS;
 
 	MPASS(cp->wscale >= 0 && cp->wscale <= M_WND_SCALE);
 	opt0 |= V_WND_SCALE(cp->wscale);
 
 	MPASS(cp->mtu_idx >= 0 && cp->mtu_idx < NMTUS);
 	opt0 |= V_MSS_IDX(cp->mtu_idx);
 
 	MPASS(cp->ulp_mode >= 0 && cp->ulp_mode <= M_ULP_MODE);
 	opt0 |= V_ULP_MODE(cp->ulp_mode);
 
 	MPASS(cp->opt0_bufsize >= 0 && cp->opt0_bufsize <= M_RCV_BUFSIZ);
 	opt0 |= V_RCV_BUFSIZ(cp->opt0_bufsize);
 
 	MPASS(cp->l2t_idx >= 0 && cp->l2t_idx < vi->adapter->vres.l2t.size);
 	opt0 |= V_L2T_IDX(cp->l2t_idx);
 
 	opt0 |= V_SMAC_SEL(vi->smt_idx);
 	opt0 |= V_TX_CHAN(vi->pi->tx_chan);
 
 	MPASS(cp->keepalive == 0 || cp->keepalive == 1);
 	opt0 |= V_KEEP_ALIVE(cp->keepalive);
 
 	MPASS(cp->nagle == 0 || cp->nagle == 1);
 	opt0 |= V_NAGLE(cp->nagle);
 
 	return (htobe64(opt0));
 }
 
 __be32
 calc_options2(struct vi_info *vi, struct conn_params *cp)
 {
 	uint32_t opt2 = 0;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 
 	/*
 	 * rx flow control, rx coalesce, congestion control, and tx pace are all
 	 * explicitly set by the driver.  On T5+ the ISS is also set by the
 	 * driver to the value picked by the kernel.
 	 */
 	if (is_t4(sc)) {
 		opt2 |= F_RX_FC_VALID | F_RX_COALESCE_VALID;
 		opt2 |= F_CONG_CNTRL_VALID | F_PACE_VALID;
 	} else {
 		opt2 |= F_T5_OPT_2_VALID;	/* all 4 valid */
 		opt2 |= F_T5_ISS;		/* ISS provided in CPL */
 	}
 
 	MPASS(cp->sack == 0 || cp->sack == 1);
 	opt2 |= V_SACK_EN(cp->sack);
 
 	MPASS(cp->tstamp == 0 || cp->tstamp == 1);
 	opt2 |= V_TSTAMPS_EN(cp->tstamp);
 
 	if (cp->wscale > 0)
 		opt2 |= F_WND_SCALE_EN;
 
 	MPASS(cp->ecn == 0 || cp->ecn == 1);
 	opt2 |= V_CCTRL_ECN(cp->ecn);
 
 	/* XXX: F_RX_CHANNEL for multiple rx c-chan support goes here. */
 
 	opt2 |= V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]);
 	opt2 |= V_PACE(0);
 	opt2 |= F_RSS_QUEUE_VALID;
 	opt2 |= V_RSS_QUEUE(sc->sge.ofld_rxq[cp->rxq_idx].iq.abs_id);
 
 	MPASS(cp->cong_algo >= 0 && cp->cong_algo <= M_CONG_CNTRL);
 	opt2 |= V_CONG_CNTRL(cp->cong_algo);
 
 	MPASS(cp->rx_coalesce == 0 || cp->rx_coalesce == 1);
 	if (cp->rx_coalesce == 1)
 		opt2 |= V_RX_COALESCE(M_RX_COALESCE);
 
 	opt2 |= V_RX_FC_DDP(0) | V_RX_FC_DISABLE(0);
 #ifdef USE_DDP_RX_FLOW_CONTROL
 	if (cp->ulp_mode == ULP_MODE_TCPDDP)
 		opt2 |= F_RX_FC_DDP;
 #endif
-	if (cp->ulp_mode == ULP_MODE_TLS)
-		opt2 |= F_RX_FC_DISABLE;
 
 	return (htobe32(opt2));
 }
 
 uint64_t
 select_ntuple(struct vi_info *vi, struct l2t_entry *e)
 {
 	struct adapter *sc = vi->adapter;
 	struct tp_params *tp = &sc->params.tp;
 	uint64_t ntuple = 0;
 
 	/*
 	 * Initialize each of the fields which we care about which are present
 	 * in the Compressed Filter Tuple.
 	 */
 	if (tp->vlan_shift >= 0 && EVL_VLANOFTAG(e->vlan) != CPL_L2T_VLAN_NONE)
 		ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift;
 
 	if (tp->port_shift >= 0)
 		ntuple |= (uint64_t)e->lport << tp->port_shift;
 
 	if (tp->protocol_shift >= 0)
 		ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift;
 
 	if (tp->vnic_shift >= 0 && tp->ingress_config & F_VNIC) {
 		ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vi->vin) |
 		    V_FT_VNID_ID_PF(sc->pf) | V_FT_VNID_ID_VLD(vi->vfvld)) <<
 		    tp->vnic_shift;
 	}
 
 	if (is_t4(sc))
 		return (htobe32((uint32_t)ntuple));
 	else
 		return (htobe64(V_FILTER_TUPLE(ntuple)));
 }
 
 static int
 is_tls_sock(struct socket *so, struct adapter *sc)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	int i, rc;
 
 	/* XXX: Eventually add a SO_WANT_TLS socket option perhaps? */
 	rc = 0;
 	ADAPTER_LOCK(sc);
 	for (i = 0; i < sc->tt.num_tls_rx_ports; i++) {
 		if (inp->inp_lport == htons(sc->tt.tls_rx_ports[i]) ||
 		    inp->inp_fport == htons(sc->tt.tls_rx_ports[i])) {
 			rc = 1;
 			break;
 		}
 	}
 	ADAPTER_UNLOCK(sc);
 	return (rc);
 }
 
 /*
  * Initialize various connection parameters.
  */
 void
 init_conn_params(struct vi_info *vi , struct offload_settings *s,
     struct in_conninfo *inc, struct socket *so,
     const struct tcp_options *tcpopt, int16_t l2t_idx, struct conn_params *cp)
 {
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct tom_tunables *tt = &sc->tt;
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
 	u_long wnd;
 
 	MPASS(s->offload != 0);
 
 	/* Congestion control algorithm */
 	if (s->cong_algo >= 0)
 		cp->cong_algo = s->cong_algo & M_CONG_CNTRL;
 	else if (sc->tt.cong_algorithm >= 0)
 		cp->cong_algo = tt->cong_algorithm & M_CONG_CNTRL;
 	else {
 		struct cc_algo *cc = CC_ALGO(tp);
 
 		if (strcasecmp(cc->name, "reno") == 0)
 			cp->cong_algo = CONG_ALG_RENO;
 		else if (strcasecmp(cc->name, "tahoe") == 0)
 			cp->cong_algo = CONG_ALG_TAHOE;
 		if (strcasecmp(cc->name, "newreno") == 0)
 			cp->cong_algo = CONG_ALG_NEWRENO;
 		if (strcasecmp(cc->name, "highspeed") == 0)
 			cp->cong_algo = CONG_ALG_HIGHSPEED;
 		else {
 			/*
 			 * Use newreno in case the algorithm selected by the
 			 * host stack is not supported by the hardware.
 			 */
 			cp->cong_algo = CONG_ALG_NEWRENO;
 		}
 	}
 
 	/* Tx traffic scheduling class. */
 	if (s->sched_class >= 0 &&
 	    s->sched_class < sc->chip_params->nsched_cls) {
 	    cp->tc_idx = s->sched_class;
 	} else
 	    cp->tc_idx = -1;
 
 	/* Nagle's algorithm. */
 	if (s->nagle >= 0)
 		cp->nagle = s->nagle > 0 ? 1 : 0;
 	else
 		cp->nagle = tp->t_flags & TF_NODELAY ? 0 : 1;
 
 	/* TCP Keepalive. */
 	if (V_tcp_always_keepalive || so_options_get(so) & SO_KEEPALIVE)
 		cp->keepalive = 1;
 	else
 		cp->keepalive = 0;
 
 	/* Optimization that's specific to T5 @ 40G. */
 	if (tt->tx_align >= 0)
 		cp->tx_align =  tt->tx_align > 0 ? 1 : 0;
 	else if (chip_id(sc) == CHELSIO_T5 &&
 	    (port_top_speed(pi) > 10 || sc->params.nports > 2))
 		cp->tx_align = 1;
 	else
 		cp->tx_align = 0;
 
 	/* ULP mode. */
 	if (can_tls_offload(sc) &&
 	    (s->tls > 0 || (s->tls < 0 && is_tls_sock(so, sc))))
 		cp->ulp_mode = ULP_MODE_TLS;
 	else if (s->ddp > 0 ||
 	    (s->ddp < 0 && sc->tt.ddp && (so_options_get(so) & SO_NO_DDP) == 0))
 		cp->ulp_mode = ULP_MODE_TCPDDP;
 	else
 		cp->ulp_mode = ULP_MODE_NONE;
 
 	/* Rx coalescing. */
 	if (s->rx_coalesce >= 0)
 		cp->rx_coalesce = s->rx_coalesce > 0 ? 1 : 0;
 	else if (cp->ulp_mode == ULP_MODE_TLS)
 		cp->rx_coalesce = 0;
 	else if (tt->rx_coalesce >= 0)
 		cp->rx_coalesce = tt->rx_coalesce > 0 ? 1 : 0;
 	else
 		cp->rx_coalesce = 1;	/* default */
 
 	/*
 	 * Index in the PMTU table.  This controls the MSS that we announce in
 	 * our SYN initially, but after ESTABLISHED it controls the MSS that we
 	 * use to send data.
 	 */
 	cp->mtu_idx = find_best_mtu_idx(sc, inc, s);
 
 	/* Tx queue for this connection. */
 	if (s->txq >= 0 && s->txq < vi->nofldtxq)
 		cp->txq_idx = s->txq;
 	else
 		cp->txq_idx = arc4random() % vi->nofldtxq;
 	cp->txq_idx += vi->first_ofld_txq;
 
 	/* Rx queue for this connection. */
 	if (s->rxq >= 0 && s->rxq < vi->nofldrxq)
 		cp->rxq_idx = s->rxq;
 	else
 		cp->rxq_idx = arc4random() % vi->nofldrxq;
 	cp->rxq_idx += vi->first_ofld_rxq;
 
 	if (SOLISTENING(so)) {
 		/* Passive open */
 		MPASS(tcpopt != NULL);
 
 		/* TCP timestamp option */
 		if (tcpopt->tstamp &&
 		    (s->tstamp > 0 || (s->tstamp < 0 && V_tcp_do_rfc1323)))
 			cp->tstamp = 1;
 		else
 			cp->tstamp = 0;
 
 		/* SACK */
 		if (tcpopt->sack &&
 		    (s->sack > 0 || (s->sack < 0 && V_tcp_do_sack)))
 			cp->sack = 1;
 		else
 			cp->sack = 0;
 
 		/* Receive window scaling. */
 		if (tcpopt->wsf > 0 && tcpopt->wsf < 15 && V_tcp_do_rfc1323)
 			cp->wscale = select_rcv_wscale();
 		else
 			cp->wscale = 0;
 
 		/* ECN */
 		if (tcpopt->ecn &&	/* XXX: review. */
 		    (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn)))
 			cp->ecn = 1;
 		else
 			cp->ecn = 0;
 
 		wnd = max(so->sol_sbrcv_hiwat, MIN_RCV_WND);
 		cp->opt0_bufsize = min(wnd >> 10, M_RCV_BUFSIZ);
 
 		if (tt->sndbuf > 0)
 			cp->sndbuf = tt->sndbuf;
 		else if (so->sol_sbsnd_flags & SB_AUTOSIZE &&
 		    V_tcp_do_autosndbuf)
 			cp->sndbuf = 256 * 1024;
 		else
 			cp->sndbuf = so->sol_sbsnd_hiwat;
 	} else {
 		/* Active open */
 
 		/* TCP timestamp option */
 		if (s->tstamp > 0 ||
 		    (s->tstamp < 0 && (tp->t_flags & TF_REQ_TSTMP)))
 			cp->tstamp = 1;
 		else
 			cp->tstamp = 0;
 
 		/* SACK */
 		if (s->sack > 0 ||
 		    (s->sack < 0 && (tp->t_flags & TF_SACK_PERMIT)))
 			cp->sack = 1;
 		else
 			cp->sack = 0;
 
 		/* Receive window scaling */
 		if (tp->t_flags & TF_REQ_SCALE)
 			cp->wscale = select_rcv_wscale();
 		else
 			cp->wscale = 0;
 
 		/* ECN */
 		if (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn == 1))
 			cp->ecn = 1;
 		else
 			cp->ecn = 0;
 
 		SOCKBUF_LOCK(&so->so_rcv);
 		wnd = max(select_rcv_wnd(so), MIN_RCV_WND);
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		cp->opt0_bufsize = min(wnd >> 10, M_RCV_BUFSIZ);
 
 		if (tt->sndbuf > 0)
 			cp->sndbuf = tt->sndbuf;
 		else {
 			SOCKBUF_LOCK(&so->so_snd);
 			if (so->so_snd.sb_flags & SB_AUTOSIZE &&
 			    V_tcp_do_autosndbuf)
 				cp->sndbuf = 256 * 1024;
 			else
 				cp->sndbuf = so->so_snd.sb_hiwat;
 			SOCKBUF_UNLOCK(&so->so_snd);
 		}
 	}
 
 	cp->l2t_idx = l2t_idx;
 
 	/* This will be initialized on ESTABLISHED. */
 	cp->emss = 0;
 }
 
 int
 negative_advice(int status)
 {
 
 	return (status == CPL_ERR_RTX_NEG_ADVICE ||
 	    status == CPL_ERR_PERSIST_NEG_ADVICE ||
 	    status == CPL_ERR_KEEPALV_NEG_ADVICE);
 }
 
 static int
 alloc_tid_tab(struct tid_info *t, int flags)
 {
 
 	MPASS(t->ntids > 0);
 	MPASS(t->tid_tab == NULL);
 
 	t->tid_tab = malloc(t->ntids * sizeof(*t->tid_tab), M_CXGBE,
 	    M_ZERO | flags);
 	if (t->tid_tab == NULL)
 		return (ENOMEM);
 	atomic_store_rel_int(&t->tids_in_use, 0);
 
 	return (0);
 }
 
 static void
 free_tid_tab(struct tid_info *t)
 {
 
 	KASSERT(t->tids_in_use == 0,
 	    ("%s: %d tids still in use.", __func__, t->tids_in_use));
 
 	free(t->tid_tab, M_CXGBE);
 	t->tid_tab = NULL;
 }
 
 static int
 alloc_stid_tab(struct tid_info *t, int flags)
 {
 
 	MPASS(t->nstids > 0);
 	MPASS(t->stid_tab == NULL);
 
 	t->stid_tab = malloc(t->nstids * sizeof(*t->stid_tab), M_CXGBE,
 	    M_ZERO | flags);
 	if (t->stid_tab == NULL)
 		return (ENOMEM);
 	mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF);
 	t->stids_in_use = 0;
 	TAILQ_INIT(&t->stids);
 	t->nstids_free_head = t->nstids;
 
 	return (0);
 }
 
 static void
 free_stid_tab(struct tid_info *t)
 {
 
 	KASSERT(t->stids_in_use == 0,
 	    ("%s: %d tids still in use.", __func__, t->stids_in_use));
 
 	if (mtx_initialized(&t->stid_lock))
 		mtx_destroy(&t->stid_lock);
 	free(t->stid_tab, M_CXGBE);
 	t->stid_tab = NULL;
 }
 
 static void
 free_tid_tabs(struct tid_info *t)
 {
 
 	free_tid_tab(t);
 	free_stid_tab(t);
 }
 
 static int
 alloc_tid_tabs(struct tid_info *t)
 {
 	int rc;
 
 	rc = alloc_tid_tab(t, M_NOWAIT);
 	if (rc != 0)
 		goto failed;
 
 	rc = alloc_stid_tab(t, M_NOWAIT);
 	if (rc != 0)
 		goto failed;
 
 	return (0);
 failed:
 	free_tid_tabs(t);
 	return (rc);
 }
 
 static inline void
 alloc_tcb_history(struct adapter *sc, struct tom_data *td)
 {
 
 	if (sc->tids.ntids == 0 || sc->tids.ntids > 1024)
 		return;
 	rw_init(&td->tcb_history_lock, "TCB history");
 	td->tcb_history = malloc(sc->tids.ntids * sizeof(*td->tcb_history),
 	    M_CXGBE, M_ZERO | M_NOWAIT);
 	td->dupack_threshold = G_DUPACKTHRESH(t4_read_reg(sc, A_TP_PARA_REG0));
 }
 
 static inline void
 free_tcb_history(struct adapter *sc, struct tom_data *td)
 {
 #ifdef INVARIANTS
 	int i;
 
 	if (td->tcb_history != NULL) {
 		for (i = 0; i < sc->tids.ntids; i++) {
 			MPASS(td->tcb_history[i] == NULL);
 		}
 	}
 #endif
 	free(td->tcb_history, M_CXGBE);
 	if (rw_initialized(&td->tcb_history_lock))
 		rw_destroy(&td->tcb_history_lock);
 }
 
 static void
 free_tom_data(struct adapter *sc, struct tom_data *td)
 {
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	KASSERT(TAILQ_EMPTY(&td->toep_list),
 	    ("%s: TOE PCB list is not empty.", __func__));
 	KASSERT(td->lctx_count == 0,
 	    ("%s: lctx hash table is not empty.", __func__));
 
 	t4_free_ppod_region(&td->pr);
 
 	if (td->listen_mask != 0)
 		hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask);
 
 	if (mtx_initialized(&td->unsent_wr_lock))
 		mtx_destroy(&td->unsent_wr_lock);
 	if (mtx_initialized(&td->lctx_hash_lock))
 		mtx_destroy(&td->lctx_hash_lock);
 	if (mtx_initialized(&td->toep_list_lock))
 		mtx_destroy(&td->toep_list_lock);
 
 	free_tcb_history(sc, td);
 	free_tid_tabs(&sc->tids);
 	free(td, M_CXGBE);
 }
 
 static char *
 prepare_pkt(int open_type, uint16_t vtag, struct inpcb *inp, int *pktlen,
     int *buflen)
 {
 	char *pkt;
 	struct tcphdr *th;
 	int ipv6, len;
 	const int maxlen =
 	    max(sizeof(struct ether_header), sizeof(struct ether_vlan_header)) +
 	    max(sizeof(struct ip), sizeof(struct ip6_hdr)) +
 	    sizeof(struct tcphdr);
 
 	MPASS(open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN);
 
 	pkt = malloc(maxlen, M_CXGBE, M_ZERO | M_NOWAIT);
 	if (pkt == NULL)
 		return (NULL);
 
 	ipv6 = inp->inp_vflag & INP_IPV6;
 	len = 0;
 
 	if (EVL_VLANOFTAG(vtag) == 0xfff) {
 		struct ether_header *eh = (void *)pkt;
 
 		if (ipv6)
 			eh->ether_type = htons(ETHERTYPE_IPV6);
 		else
 			eh->ether_type = htons(ETHERTYPE_IP);
 
 		len += sizeof(*eh);
 	} else {
 		struct ether_vlan_header *evh = (void *)pkt;
 
 		evh->evl_encap_proto = htons(ETHERTYPE_VLAN);
 		evh->evl_tag = htons(vtag);
 		if (ipv6)
 			evh->evl_proto = htons(ETHERTYPE_IPV6);
 		else
 			evh->evl_proto = htons(ETHERTYPE_IP);
 
 		len += sizeof(*evh);
 	}
 
 	if (ipv6) {
 		struct ip6_hdr *ip6 = (void *)&pkt[len];
 
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_nxt = IPPROTO_TCP;
 		if (open_type == OPEN_TYPE_ACTIVE) {
 			ip6->ip6_src = inp->in6p_laddr;
 			ip6->ip6_dst = inp->in6p_faddr;
 		} else if (open_type == OPEN_TYPE_LISTEN) {
 			ip6->ip6_src = inp->in6p_laddr;
 			ip6->ip6_dst = ip6->ip6_src;
 		}
 
 		len += sizeof(*ip6);
 	} else {
 		struct ip *ip = (void *)&pkt[len];
 
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(*ip) >> 2;
 		ip->ip_tos = inp->inp_ip_tos;
 		ip->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr));
 		ip->ip_ttl = inp->inp_ip_ttl;
 		ip->ip_p = IPPROTO_TCP;
 		if (open_type == OPEN_TYPE_ACTIVE) {
 			ip->ip_src = inp->inp_laddr;
 			ip->ip_dst = inp->inp_faddr;
 		} else if (open_type == OPEN_TYPE_LISTEN) {
 			ip->ip_src = inp->inp_laddr;
 			ip->ip_dst = ip->ip_src;
 		}
 
 		len += sizeof(*ip);
 	}
 
 	th = (void *)&pkt[len];
 	if (open_type == OPEN_TYPE_ACTIVE) {
 		th->th_sport = inp->inp_lport;	/* network byte order already */
 		th->th_dport = inp->inp_fport;	/* ditto */
 	} else if (open_type == OPEN_TYPE_LISTEN) {
 		th->th_sport = inp->inp_lport;	/* network byte order already */
 		th->th_dport = th->th_sport;
 	}
 	len += sizeof(th);
 
 	*pktlen = *buflen = len;
 	return (pkt);
 }
 
 const struct offload_settings *
 lookup_offload_policy(struct adapter *sc, int open_type, struct mbuf *m,
     uint16_t vtag, struct inpcb *inp)
 {
 	const struct t4_offload_policy *op;
 	char *pkt;
 	struct offload_rule *r;
 	int i, matched, pktlen, buflen;
 	static const struct offload_settings allow_offloading_settings = {
 		.offload = 1,
 		.rx_coalesce = -1,
 		.cong_algo = -1,
 		.sched_class = -1,
 		.tstamp = -1,
 		.sack = -1,
 		.nagle = -1,
 		.ecn = -1,
 		.ddp = -1,
 		.tls = -1,
 		.txq = -1,
 		.rxq = -1,
 		.mss = -1,
 	};
 	static const struct offload_settings disallow_offloading_settings = {
 		.offload = 0,
 		/* rest is irrelevant when offload is off. */
 	};
 
 	rw_assert(&sc->policy_lock, RA_LOCKED);
 
 	/*
 	 * If there's no Connection Offloading Policy attached to the device
 	 * then we need to return a default static policy.  If
 	 * "cop_managed_offloading" is true, then we need to disallow
 	 * offloading until a COP is attached to the device.  Otherwise we
 	 * allow offloading ...
 	 */
 	op = sc->policy;
 	if (op == NULL) {
 		if (sc->tt.cop_managed_offloading)
 			return (&disallow_offloading_settings);
 		else
 			return (&allow_offloading_settings);
 	}
 
 	switch (open_type) {
 	case OPEN_TYPE_ACTIVE:
 	case OPEN_TYPE_LISTEN:
 		pkt = prepare_pkt(open_type, vtag, inp, &pktlen, &buflen);
 		break;
 	case OPEN_TYPE_PASSIVE:
 		MPASS(m != NULL);
 		pkt = mtod(m, char *);
 		MPASS(*pkt == CPL_PASS_ACCEPT_REQ);
 		pkt += sizeof(struct cpl_pass_accept_req);
 		pktlen = m->m_pkthdr.len - sizeof(struct cpl_pass_accept_req);
 		buflen = m->m_len - sizeof(struct cpl_pass_accept_req);
 		break;
 	default:
 		MPASS(0);
 		return (&disallow_offloading_settings);
 	}
 
 	if (pkt == NULL || pktlen == 0 || buflen == 0)
 		return (&disallow_offloading_settings);
 
 	matched = 0;
 	r = &op->rule[0];
 	for (i = 0; i < op->nrules; i++, r++) {
 		if (r->open_type != open_type &&
 		    r->open_type != OPEN_TYPE_DONTCARE) {
 			continue;
 		}
 		matched = bpf_filter(r->bpf_prog.bf_insns, pkt, pktlen, buflen);
 		if (matched)
 			break;
 	}
 
 	if (open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN)
 		free(pkt, M_CXGBE);
 
 	return (matched ? &r->settings : &disallow_offloading_settings);
 }
 
 static void
 reclaim_wr_resources(void *arg, int count)
 {
 	struct tom_data *td = arg;
 	STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list);
 	struct cpl_act_open_req *cpl;
 	u_int opcode, atid, tid;
 	struct wrqe *wr;
 	struct adapter *sc = td_adapter(td);
 
 	mtx_lock(&td->unsent_wr_lock);
 	STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe);
 	mtx_unlock(&td->unsent_wr_lock);
 
 	while ((wr = STAILQ_FIRST(&twr_list)) != NULL) {
 		STAILQ_REMOVE_HEAD(&twr_list, link);
 
 		cpl = wrtod(wr);
 		opcode = GET_OPCODE(cpl);
 
 		switch (opcode) {
 		case CPL_ACT_OPEN_REQ:
 		case CPL_ACT_OPEN_REQ6:
 			atid = G_TID_TID(be32toh(OPCODE_TID(cpl)));
 			CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid);
 			act_open_failure_cleanup(sc, atid, EHOSTUNREACH);
 			free(wr, M_CXGBE);
 			break;
 		case CPL_PASS_ACCEPT_RPL:
 			tid = GET_TID(cpl);
 			CTR2(KTR_CXGBE, "%s: tid %u ", __func__, tid);
 			synack_failure_cleanup(sc, tid);
 			free(wr, M_CXGBE);
 			break;
 		default:
 			log(LOG_ERR, "%s: leaked work request %p, wr_len %d, "
 			    "opcode %x\n", __func__, wr, wr->wr_len, opcode);
 			/* WR not freed here; go look at it with a debugger.  */
 		}
 	}
 }
 
 /*
  * Ground control to Major TOM
  * Commencing countdown, engines on
  */
 static int
 t4_tom_activate(struct adapter *sc)
 {
 	struct tom_data *td;
 	struct toedev *tod;
 	struct vi_info *vi;
 	int i, rc, v;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	/* per-adapter softc for TOM */
 	td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT);
 	if (td == NULL)
 		return (ENOMEM);
 
 	/* List of TOE PCBs and associated lock */
 	mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF);
 	TAILQ_INIT(&td->toep_list);
 
 	/* Listen context */
 	mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF);
 	td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE,
 	    &td->listen_mask, HASH_NOWAIT);
 
 	/* List of WRs for which L2 resolution failed */
 	mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF);
 	STAILQ_INIT(&td->unsent_wr_list);
 	TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td);
 
 	/* TID tables */
 	rc = alloc_tid_tabs(&sc->tids);
 	if (rc != 0)
 		goto done;
 
 	rc = t4_init_ppod_region(&td->pr, &sc->vres.ddp,
 	    t4_read_reg(sc, A_ULP_RX_TDDP_PSZ), "TDDP page pods");
 	if (rc != 0)
 		goto done;
 	t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK,
 	    V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask);
 
 	alloc_tcb_history(sc, td);
 
 	/* toedev ops */
 	tod = &td->tod;
 	init_toedev(tod);
 	tod->tod_softc = sc;
 	tod->tod_connect = t4_connect;
 	tod->tod_listen_start = t4_listen_start;
 	tod->tod_listen_stop = t4_listen_stop;
 	tod->tod_rcvd = t4_rcvd;
 	tod->tod_output = t4_tod_output;
 	tod->tod_send_rst = t4_send_rst;
 	tod->tod_send_fin = t4_send_fin;
 	tod->tod_pcb_detach = t4_pcb_detach;
 	tod->tod_l2_update = t4_l2_update;
 	tod->tod_syncache_added = t4_syncache_added;
 	tod->tod_syncache_removed = t4_syncache_removed;
 	tod->tod_syncache_respond = t4_syncache_respond;
 	tod->tod_offload_socket = t4_offload_socket;
 	tod->tod_ctloutput = t4_ctloutput;
 	tod->tod_tcp_info = t4_tcp_info;
 #ifdef KERN_TLS
 	tod->tod_alloc_tls_session = t4_alloc_tls_session;
 #endif
 
 	for_each_port(sc, i) {
 		for_each_vi(sc->port[i], v, vi) {
 			TOEDEV(vi->ifp) = &td->tod;
 		}
 	}
 
 	sc->tom_softc = td;
 	register_toedev(sc->tom_softc);
 
 done:
 	if (rc != 0)
 		free_tom_data(sc, td);
 	return (rc);
 }
 
 static int
 t4_tom_deactivate(struct adapter *sc)
 {
 	int rc = 0;
 	struct tom_data *td = sc->tom_softc;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (td == NULL)
 		return (0);	/* XXX. KASSERT? */
 
 	if (sc->offload_map != 0)
 		return (EBUSY);	/* at least one port has IFCAP_TOE enabled */
 
 	if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI))
 		return (EBUSY);	/* both iWARP and iSCSI rely on the TOE. */
 
 	mtx_lock(&td->toep_list_lock);
 	if (!TAILQ_EMPTY(&td->toep_list))
 		rc = EBUSY;
 	mtx_unlock(&td->toep_list_lock);
 
 	mtx_lock(&td->lctx_hash_lock);
 	if (td->lctx_count > 0)
 		rc = EBUSY;
 	mtx_unlock(&td->lctx_hash_lock);
 
 	taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources);
 	mtx_lock(&td->unsent_wr_lock);
 	if (!STAILQ_EMPTY(&td->unsent_wr_list))
 		rc = EBUSY;
 	mtx_unlock(&td->unsent_wr_lock);
 
 	if (rc == 0) {
 		unregister_toedev(sc->tom_softc);
 		free_tom_data(sc, td);
 		sc->tom_softc = NULL;
 	}
 
 	return (rc);
 }
 
 static int
 t4_aio_queue_tom(struct socket *so, struct kaiocb *job)
 {
 	struct tcpcb *tp = so_sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 	int error;
 
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP) {
 		error = t4_aio_queue_ddp(so, job);
 		if (error != EOPNOTSUPP)
 			return (error);
 	}
 
 	return (t4_aio_queue_aiotx(so, job));
 }
 
 static int
 t4_ctloutput_tom(struct socket *so, struct sockopt *sopt)
 {
 
 	if (sopt->sopt_level != IPPROTO_TCP)
 		return (tcp_ctloutput(so, sopt));
 
 	switch (sopt->sopt_name) {
 	case TCP_TLSOM_SET_TLS_CONTEXT:
 	case TCP_TLSOM_GET_TLS_TOM:
 	case TCP_TLSOM_CLR_TLS_TOM:
 	case TCP_TLSOM_CLR_QUIES:
 		return (t4_ctloutput_tls(so, sopt));
 	default:
 		return (tcp_ctloutput(so, sopt));
 	}
 }
 
 static int
 t4_tom_mod_load(void)
 {
 	struct protosw *tcp_protosw, *tcp6_protosw;
 
 	/* CPL handlers */
 	t4_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl);
 	t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl2,
 	    CPL_COOKIE_TOM);
 	t4_init_connect_cpl_handlers();
 	t4_init_listen_cpl_handlers();
 	t4_init_cpl_io_handlers();
 
 	t4_ddp_mod_load();
 	t4_tls_mod_load();
 
 	tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM);
 	if (tcp_protosw == NULL)
 		return (ENOPROTOOPT);
 	bcopy(tcp_protosw, &toe_protosw, sizeof(toe_protosw));
 	bcopy(tcp_protosw->pr_usrreqs, &toe_usrreqs, sizeof(toe_usrreqs));
 	toe_usrreqs.pru_aio_queue = t4_aio_queue_tom;
 	toe_protosw.pr_ctloutput = t4_ctloutput_tom;
 	toe_protosw.pr_usrreqs = &toe_usrreqs;
 
 	tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM);
 	if (tcp6_protosw == NULL)
 		return (ENOPROTOOPT);
 	bcopy(tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw));
 	bcopy(tcp6_protosw->pr_usrreqs, &toe6_usrreqs, sizeof(toe6_usrreqs));
 	toe6_usrreqs.pru_aio_queue = t4_aio_queue_tom;
 	toe6_protosw.pr_ctloutput = t4_ctloutput_tom;
 	toe6_protosw.pr_usrreqs = &toe6_usrreqs;
 
 	return (t4_register_uld(&tom_uld_info));
 }
 
 static void
 tom_uninit(struct adapter *sc, void *arg __unused)
 {
 	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tomun"))
 		return;
 
 	/* Try to free resources (works only if no port has IFCAP_TOE) */
 	if (uld_active(sc, ULD_TOM))
 		t4_deactivate_uld(sc, ULD_TOM);
 
 	end_synchronized_op(sc, 0);
 }
 
 static int
 t4_tom_mod_unload(void)
 {
 	t4_iterate(tom_uninit, NULL);
 
 	if (t4_unregister_uld(&tom_uld_info) == EBUSY)
 		return (EBUSY);
 
 	t4_tls_mod_unload();
 	t4_ddp_mod_unload();
 
 	t4_uninit_connect_cpl_handlers();
 	t4_uninit_listen_cpl_handlers();
 	t4_uninit_cpl_io_handlers();
 	t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, NULL, CPL_COOKIE_TOM);
 	t4_register_cpl_handler(CPL_GET_TCB_RPL, NULL);
 
 	return (0);
 }
 #endif	/* TCP_OFFLOAD */
 
 static int
 t4_tom_modevent(module_t mod, int cmd, void *arg)
 {
 	int rc = 0;
 
 #ifdef TCP_OFFLOAD
 	switch (cmd) {
 	case MOD_LOAD:
 		rc = t4_tom_mod_load();
 		break;
 
 	case MOD_UNLOAD:
 		rc = t4_tom_mod_unload();
 		break;
 
 	default:
 		rc = EINVAL;
 	}
 #else
 	printf("t4_tom: compiled without TCP_OFFLOAD support.\n");
 	rc = EOPNOTSUPP;
 #endif
 	return (rc);
 }
 
 static moduledata_t t4_tom_moddata= {
 	"t4_tom",
 	t4_tom_modevent,
 	0
 };
 
 MODULE_VERSION(t4_tom, 1);
 MODULE_DEPEND(t4_tom, toecore, 1, 1, 1);
 MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1);
 DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY);