diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 130c2468b20b..59d1c367f94c 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -1,2470 +1,2470 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012, 2015 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_kern_tls.h"
 #include "opt_ratelimit.h"
 
 #ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/aio.h>
 #include <sys/file.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sglist.h>
 #include <sys/taskqueue.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_var.h>
 #include <netinet/toecore.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 
 #include <dev/iscsi/iscsi_proto.h>
 
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "common/t4_tcb.h"
 #include "tom/t4_tom_l2t.h"
 #include "tom/t4_tom.h"
 
 static void	t4_aiotx_cancel(struct kaiocb *job);
 static void	t4_aiotx_queue_toep(struct socket *so, struct toepcb *toep);
 
 void
 send_flowc_wr(struct toepcb *toep, struct tcpcb *tp)
 {
 	struct wrqe *wr;
 	struct fw_flowc_wr *flowc;
 	unsigned int nparams, flowclen, paramidx;
 	struct vi_info *vi = toep->vi;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	unsigned int pfvf = sc->pf << S_FW_VIID_PFN;
 	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
 
 	KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT),
 	    ("%s: flowc for tid %u sent already", __func__, toep->tid));
 
 	if (tp != NULL)
 		nparams = 8;
 	else
 		nparams = 6;
 	if (ulp_mode(toep) == ULP_MODE_TLS)
 		nparams++;
 	if (toep->tls.fcplenmax != 0)
 		nparams++;
 	if (toep->params.tc_idx != -1) {
 		MPASS(toep->params.tc_idx >= 0 &&
 		    toep->params.tc_idx < sc->params.nsched_cls);
 		nparams++;
 	}
 
 	flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
 
 	wr = alloc_wrqe(roundup2(flowclen, 16), &toep->ofld_txq->wrq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	flowc = wrtod(wr);
 	memset(flowc, 0, wr->wr_len);
 
 	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
 	    V_FW_FLOWC_WR_NPARAMS(nparams));
 	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
 	    V_FW_WR_FLOWID(toep->tid));
 
 #define FLOWC_PARAM(__m, __v) \
 	do { \
 		flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
 		flowc->mnemval[paramidx].val = htobe32(__v); \
 		paramidx++; \
 	} while (0)
 
 	paramidx = 0;
 
 	FLOWC_PARAM(PFNVFN, pfvf);
 	FLOWC_PARAM(CH, pi->tx_chan);
 	FLOWC_PARAM(PORT, pi->tx_chan);
 	FLOWC_PARAM(IQID, toep->ofld_rxq->iq.abs_id);
 	FLOWC_PARAM(SNDBUF, toep->params.sndbuf);
 	if (tp) {
 		FLOWC_PARAM(MSS, toep->params.emss);
 		FLOWC_PARAM(SNDNXT, tp->snd_nxt);
 		FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
 	} else
 		FLOWC_PARAM(MSS, 512);
 	CTR6(KTR_CXGBE,
 	    "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x",
 	    __func__, toep->tid, toep->params.emss, toep->params.sndbuf,
 	    tp ? tp->snd_nxt : 0, tp ? tp->rcv_nxt : 0);
 
 	if (ulp_mode(toep) == ULP_MODE_TLS)
 		FLOWC_PARAM(ULP_MODE, ulp_mode(toep));
 	if (toep->tls.fcplenmax != 0)
 		FLOWC_PARAM(TXDATAPLEN_MAX, toep->tls.fcplenmax);
 	if (toep->params.tc_idx != -1)
 		FLOWC_PARAM(SCHEDCLASS, toep->params.tc_idx);
 #undef FLOWC_PARAM
 
 	KASSERT(paramidx == nparams, ("nparams mismatch"));
 
 	txsd->tx_credits = howmany(flowclen, 16);
 	txsd->plen = 0;
 	KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0,
 	    ("%s: not enough credits (%d)", __func__, toep->tx_credits));
 	toep->tx_credits -= txsd->tx_credits;
 	if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
 		toep->txsd_pidx = 0;
 	toep->txsd_avail--;
 
 	toep->flags |= TPF_FLOWC_WR_SENT;
         t4_wrq_tx(sc, wr);
 }
 
 #ifdef RATELIMIT
 /*
  * Input is Bytes/second (so_max_pacing_rate), chip counts in Kilobits/second.
  */
 static int
 update_tx_rate_limit(struct adapter *sc, struct toepcb *toep, u_int Bps)
 {
 	int tc_idx, rc;
 	const u_int kbps = (u_int) (uint64_t)Bps * 8ULL / 1000;
 	const int port_id = toep->vi->pi->port_id;
 
 	CTR3(KTR_CXGBE, "%s: tid %u, rate %uKbps", __func__, toep->tid, kbps);
 
 	if (kbps == 0) {
 		/* unbind */
 		tc_idx = -1;
 	} else {
 		rc = t4_reserve_cl_rl_kbps(sc, port_id, kbps, &tc_idx);
 		if (rc != 0)
 			return (rc);
 		MPASS(tc_idx >= 0 && tc_idx < sc->params.nsched_cls);
 	}
 
 	if (toep->params.tc_idx != tc_idx) {
 		struct wrqe *wr;
 		struct fw_flowc_wr *flowc;
 		int nparams = 1, flowclen, flowclen16;
 		struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
 
 		flowclen = sizeof(*flowc) + nparams * sizeof(struct
 		    fw_flowc_mnemval);
 		flowclen16 = howmany(flowclen, 16);
 		if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0 ||
 		    (wr = alloc_wrqe(roundup2(flowclen, 16),
 		    &toep->ofld_txq->wrq)) == NULL) {
 			if (tc_idx >= 0)
 				t4_release_cl_rl(sc, port_id, tc_idx);
 			return (ENOMEM);
 		}
 
 		flowc = wrtod(wr);
 		memset(flowc, 0, wr->wr_len);
 
 		flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
 		    V_FW_FLOWC_WR_NPARAMS(nparams));
 		flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) |
 		    V_FW_WR_FLOWID(toep->tid));
 
 		flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
 		if (tc_idx == -1)
 			flowc->mnemval[0].val = htobe32(0xff);
 		else
 			flowc->mnemval[0].val = htobe32(tc_idx);
 
 		txsd->tx_credits = flowclen16;
 		txsd->plen = 0;
 		toep->tx_credits -= txsd->tx_credits;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
 			toep->txsd_pidx = 0;
 		toep->txsd_avail--;
 		t4_wrq_tx(sc, wr);
 	}
 
 	if (toep->params.tc_idx >= 0)
 		t4_release_cl_rl(sc, port_id, toep->params.tc_idx);
 	toep->params.tc_idx = tc_idx;
 
 	return (0);
 }
 #endif
 
 void
 send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt)
 {
 	struct wrqe *wr;
 	struct cpl_abort_req *req;
 	int tid = toep->tid;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);	/* don't use if INP_DROPPED */
 
 	INP_WLOCK_ASSERT(inp);
 
 	CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s",
 	    __func__, toep->tid,
 	    inp->inp_flags & INP_DROPPED ? "inp dropped" :
 	    tcpstates[tp->t_state],
 	    toep->flags, inp->inp_flags,
 	    toep->flags & TPF_ABORT_SHUTDOWN ?
 	    " (abort already in progress)" : "");
 
 	if (toep->flags & TPF_ABORT_SHUTDOWN)
 		return;	/* abort already in progress */
 
 	toep->flags |= TPF_ABORT_SHUTDOWN;
 
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %d.", __func__, tid));
 
 	wr = alloc_wrqe(sizeof(*req), &toep->ofld_txq->wrq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid);
 	if (inp->inp_flags & INP_DROPPED)
 		req->rsvd0 = htobe32(snd_nxt);
 	else
 		req->rsvd0 = htobe32(tp->snd_nxt);
 	req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT);
 	req->cmd = CPL_ABORT_SEND_RST;
 
 	/*
 	 * XXX: What's the correct way to tell that the inp hasn't been detached
 	 * from its socket?  Should I even be flushing the snd buffer here?
 	 */
 	if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
 		struct socket *so = inp->inp_socket;
 
 		if (so != NULL)	/* because I'm not sure.  See comment above */
 			sbflush(&so->so_snd);
 	}
 
 	t4_l2t_send(sc, wr, toep->l2te);
 }
 
 /*
  * Called when a connection is established to translate the TCP options
  * reported by HW to FreeBSD's native format.
  */
 static void
 assign_rxopt(struct tcpcb *tp, uint16_t opt)
 {
 	struct toepcb *toep = tp->t_toe;
 	struct inpcb *inp = tp->t_inpcb;
 	struct adapter *sc = td_adapter(toep->td);
 
 	INP_LOCK_ASSERT(inp);
 
 	toep->params.mtu_idx = G_TCPOPT_MSS(opt);
 	tp->t_maxseg = sc->params.mtus[toep->params.mtu_idx];
 	if (inp->inp_inc.inc_flags & INC_ISIPV6)
 		tp->t_maxseg -= sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	else
 		tp->t_maxseg -= sizeof(struct ip) + sizeof(struct tcphdr);
 
 	toep->params.emss = tp->t_maxseg;
 	if (G_TCPOPT_TSTAMP(opt)) {
 		toep->params.tstamp = 1;
 		toep->params.emss -= TCPOLEN_TSTAMP_APPA;
 		tp->t_flags |= TF_RCVD_TSTMP;	/* timestamps ok */
 		tp->ts_recent = 0;		/* hmmm */
 		tp->ts_recent_age = tcp_ts_getticks();
 	} else
 		toep->params.tstamp = 0;
 
 	if (G_TCPOPT_SACK(opt)) {
 		toep->params.sack = 1;
 		tp->t_flags |= TF_SACK_PERMIT;	/* should already be set */
 	} else {
 		toep->params.sack = 0;
 		tp->t_flags &= ~TF_SACK_PERMIT;	/* sack disallowed by peer */
 	}
 
 	if (G_TCPOPT_WSCALE_OK(opt))
 		tp->t_flags |= TF_RCVD_SCALE;
 
 	/* Doing window scaling? */
 	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
 	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
 		tp->rcv_scale = tp->request_r_scale;
 		tp->snd_scale = G_TCPOPT_SND_WSCALE(opt);
 	} else
 		toep->params.wscale = 0;
 
 	CTR6(KTR_CXGBE,
 	    "assign_rxopt: tid %d, mtu_idx %u, emss %u, ts %u, sack %u, wscale %u",
 	    toep->tid, toep->params.mtu_idx, toep->params.emss,
 	    toep->params.tstamp, toep->params.sack, toep->params.wscale);
 }
 
 /*
  * Completes some final bits of initialization for just established connections
  * and changes their state to TCPS_ESTABLISHED.
  *
  * The ISNs are from the exchange of SYNs.
  */
 void
 make_established(struct toepcb *toep, uint32_t iss, uint32_t irs, uint16_t opt)
 {
 	struct inpcb *inp = toep->inp;
 	struct socket *so = inp->inp_socket;
 	struct tcpcb *tp = intotcpcb(inp);
 	uint16_t tcpopt = be16toh(opt);
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(tp->t_state == TCPS_SYN_SENT ||
 	    tp->t_state == TCPS_SYN_RECEIVED,
 	    ("%s: TCP state %s", __func__, tcpstates[tp->t_state]));
 
 	CTR6(KTR_CXGBE, "%s: tid %d, so %p, inp %p, tp %p, toep %p",
 	    __func__, toep->tid, so, inp, tp, toep);
 
 	tcp_state_change(tp, TCPS_ESTABLISHED);
 	tp->t_starttime = ticks;
 	TCPSTAT_INC(tcps_connects);
 
 	tp->irs = irs;
 	tcp_rcvseqinit(tp);
 	tp->rcv_wnd = (u_int)toep->params.opt0_bufsize << 10;
 	tp->rcv_adv += tp->rcv_wnd;
 	tp->last_ack_sent = tp->rcv_nxt;
 
 	tp->iss = iss;
 	tcp_sendseqinit(tp);
 	tp->snd_una = iss + 1;
 	tp->snd_nxt = iss + 1;
 	tp->snd_max = iss + 1;
 
 	assign_rxopt(tp, tcpopt);
 	send_flowc_wr(toep, tp);
 
 	soisconnected(so);
 
 	if (ulp_mode(toep) == ULP_MODE_TLS)
 		tls_establish(toep);
 }
 
 int
 send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
 {
 	struct wrqe *wr;
 	struct cpl_rx_data_ack *req;
 	uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
 
 	KASSERT(credits >= 0, ("%s: %d credits", __func__, credits));
 
 	wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
 	if (wr == NULL)
 		return (0);
 	req = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid);
 	req->credit_dack = htobe32(dack | V_RX_CREDITS(credits));
 
 	t4_wrq_tx(sc, wr);
 	return (credits);
 }
 
 void
 send_rx_modulate(struct adapter *sc, struct toepcb *toep)
 {
 	struct wrqe *wr;
 	struct cpl_rx_data_ack *req;
 
 	wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
 	if (wr == NULL)
 		return;
 	req = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid);
 	req->credit_dack = htobe32(F_RX_MODULATE_RX);
 
 	t4_wrq_tx(sc, wr);
 }
 
 void
 t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *sb = &so->so_rcv;
 	struct toepcb *toep = tp->t_toe;
 	int rx_credits;
 
 	INP_WLOCK_ASSERT(inp);
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
 	if (rx_credits > 0 &&
 	    (tp->rcv_wnd <= 32 * 1024 || rx_credits >= 64 * 1024 ||
 	    (rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) ||
 	    sbused(sb) + tp->rcv_wnd < sb->sb_lowat)) {
 		rx_credits = send_rx_credits(sc, toep, rx_credits);
 		tp->rcv_wnd += rx_credits;
 		tp->rcv_adv += rx_credits;
 	} else if (toep->flags & TPF_FORCE_CREDITS)
 		send_rx_modulate(sc, toep);
 }
 
 void
 t4_rcvd(struct toedev *tod, struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *sb = &so->so_rcv;
 
 	SOCKBUF_LOCK(sb);
 	t4_rcvd_locked(tod, tp);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Close a connection by sending a CPL_CLOSE_CON_REQ message.
  */
 int
 t4_close_conn(struct adapter *sc, struct toepcb *toep)
 {
 	struct wrqe *wr;
 	struct cpl_close_con_req *req;
 	unsigned int tid = toep->tid;
 
 	CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid,
 	    toep->flags & TPF_FIN_SENT ? ", IGNORED" : "");
 
 	if (toep->flags & TPF_FIN_SENT)
 		return (0);
 
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, tid));
 
 	wr = alloc_wrqe(sizeof(*req), &toep->ofld_txq->wrq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 
         req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) |
 	    V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr)));
 	req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) |
 	    V_FW_WR_FLOWID(tid));
         req->wr.wr_lo = cpu_to_be64(0);
         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
 	req->rsvd = 0;
 
 	toep->flags |= TPF_FIN_SENT;
 	toep->flags &= ~TPF_SEND_FIN;
 	t4_l2t_send(sc, wr, toep->l2te);
 
 	return (0);
 }
 
 #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16)
 #define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16))
 #define MIN_ISO_TX_CREDITS  (howmany(sizeof(struct cpl_tx_data_iso), 16))
 #define MIN_TX_CREDITS(iso)						\
 	(MIN_OFLD_TX_CREDITS + ((iso) ? MIN_ISO_TX_CREDITS : 0))
 
 /* Maximum amount of immediate data we could stuff in a WR */
 static inline int
 max_imm_payload(int tx_credits, int iso)
 {
 	const int iso_cpl_size = iso ? sizeof(struct cpl_tx_data_iso) : 0;
 	const int n = 1;	/* Use no more than one desc for imm. data WR */
 
 	KASSERT(tx_credits >= 0 &&
 		tx_credits <= MAX_OFLD_TX_CREDITS,
 		("%s: %d credits", __func__, tx_credits));
 
 	if (tx_credits < MIN_TX_CREDITS(iso))
 		return (0);
 
 	if (tx_credits >= (n * EQ_ESIZE) / 16)
 		return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr) -
 		    iso_cpl_size);
 	else
 		return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr) -
 		    iso_cpl_size);
 }
 
 /* Maximum number of SGL entries we could stuff in a WR */
 static inline int
 max_dsgl_nsegs(int tx_credits, int iso)
 {
 	int nseg = 1;	/* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */
 	int sge_pair_credits = tx_credits - MIN_TX_CREDITS(iso);
 
 	KASSERT(tx_credits >= 0 &&
 		tx_credits <= MAX_OFLD_TX_CREDITS,
 		("%s: %d credits", __func__, tx_credits));
 
 	if (tx_credits < MIN_TX_CREDITS(iso))
 		return (0);
 
 	nseg += 2 * (sge_pair_credits * 16 / 24);
 	if ((sge_pair_credits * 16) % 24 == 16)
 		nseg++;
 
 	return (nseg);
 }
 
 static inline void
 write_tx_wr(void *dst, struct toepcb *toep, int fw_wr_opcode,
     unsigned int immdlen, unsigned int plen, uint8_t credits, int shove,
     int ulp_submode)
 {
 	struct fw_ofld_tx_data_wr *txwr = dst;
 
 	txwr->op_to_immdlen = htobe32(V_WR_OP(fw_wr_opcode) |
 	    V_FW_WR_IMMDLEN(immdlen));
 	txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) |
 	    V_FW_WR_LEN16(credits));
 	txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(ulp_mode(toep)) |
 	    V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove));
 	txwr->plen = htobe32(plen);
 
 	if (toep->params.tx_align > 0) {
 		if (plen < 2 * toep->params.emss)
 			txwr->lsodisable_to_flags |=
 			    htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE);
 		else
 			txwr->lsodisable_to_flags |=
 			    htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD |
 				(toep->params.nagle == 0 ? 0 :
 				F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE));
 	}
 }
 
 /*
  * Generate a DSGL from a starting mbuf.  The total number of segments and the
  * maximum segments in any one mbuf are provided.
  */
 static void
 write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n)
 {
 	struct mbuf *m;
 	struct ulptx_sgl *usgl = dst;
 	int i, j, rc;
 	struct sglist sg;
 	struct sglist_seg segs[n];
 
 	KASSERT(nsegs > 0, ("%s: nsegs 0", __func__));
 
 	sglist_init(&sg, n, segs);
 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
 	    V_ULPTX_NSGE(nsegs));
 
 	i = -1;
 	for (m = start; m != stop; m = m->m_next) {
 		if (m->m_flags & M_EXTPG)
 			rc = sglist_append_mbuf_epg(&sg, m,
 			    mtod(m, vm_offset_t), m->m_len);
 		else
 			rc = sglist_append(&sg, mtod(m, void *), m->m_len);
 		if (__predict_false(rc != 0))
 			panic("%s: sglist_append %d", __func__, rc);
 
 		for (j = 0; j < sg.sg_nseg; i++, j++) {
 			if (i < 0) {
 				usgl->len0 = htobe32(segs[j].ss_len);
 				usgl->addr0 = htobe64(segs[j].ss_paddr);
 			} else {
 				usgl->sge[i / 2].len[i & 1] =
 				    htobe32(segs[j].ss_len);
 				usgl->sge[i / 2].addr[i & 1] =
 				    htobe64(segs[j].ss_paddr);
 			}
 #ifdef INVARIANTS
 			nsegs--;
 #endif
 		}
 		sglist_reset(&sg);
 	}
 	if (i & 1)
 		usgl->sge[i / 2].len[1] = htobe32(0);
 	KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p",
 	    __func__, nsegs, start, stop));
 }
 
 /*
  * Max number of SGL entries an offload tx work request can have.  This is 41
  * (1 + 40) for a full 512B work request.
  * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40)
  */
 #define OFLD_SGL_LEN (41)
 
 /*
  * Send data and/or a FIN to the peer.
  *
  * The socket's so_snd buffer consists of a stream of data starting with sb_mb
  * and linked together with m_next.  sb_sndptr, if set, is the last mbuf that
  * was transmitted.
  *
  * drop indicates the number of bytes that should be dropped from the head of
  * the send buffer.  It is an optimization that lets do_fw4_ack avoid creating
  * contention on the send buffer lock (before this change it used to do
  * sowwakeup and then t4_push_frames right after that when recovering from tx
  * stalls).  When drop is set this function MUST drop the bytes and wake up any
  * writers.
  */
 void
 t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 {
 	struct mbuf *sndptr, *m, *sb_sndptr;
 	struct fw_ofld_tx_data_wr *txwr;
 	struct wrqe *wr;
 	u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *sb = &so->so_snd;
 	int tx_credits, shove, compl, sowwakeup;
 	struct ofld_tx_sdesc *txsd;
 	bool nomap_mbuf_seen;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
 
 	KASSERT(ulp_mode(toep) == ULP_MODE_NONE ||
 	    ulp_mode(toep) == ULP_MODE_TCPDDP ||
 	    ulp_mode(toep) == ULP_MODE_TLS ||
 	    ulp_mode(toep) == ULP_MODE_RDMA,
 	    ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep));
 
 #ifdef VERBOSE_TRACES
 	CTR5(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d",
 	    __func__, toep->tid, toep->flags, tp->t_flags, drop);
 #endif
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
 		return;
 
 #ifdef RATELIMIT
 	if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) &&
 	    (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) {
 		inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
 	}
 #endif
 
 	/*
 	 * This function doesn't resume by itself.  Someone else must clear the
 	 * flag and call this function.
 	 */
 	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
 		KASSERT(drop == 0,
 		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
 		return;
 	}
 
 	txsd = &toep->txsd[toep->txsd_pidx];
 	do {
 		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
 		max_imm = max_imm_payload(tx_credits, 0);
 		max_nsegs = max_dsgl_nsegs(tx_credits, 0);
 
 		SOCKBUF_LOCK(sb);
 		sowwakeup = drop;
 		if (drop) {
 			sbdrop_locked(sb, drop);
 			drop = 0;
 		}
 		sb_sndptr = sb->sb_sndptr;
 		sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb;
 		plen = 0;
 		nsegs = 0;
 		max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
 		nomap_mbuf_seen = false;
 		for (m = sndptr; m != NULL; m = m->m_next) {
 			int n;
 
 			if ((m->m_flags & M_NOTAVAIL) != 0)
 				break;
 			if (m->m_flags & M_EXTPG) {
 #ifdef KERN_TLS
 				if (m->m_epg_tls != NULL) {
 					toep->flags |= TPF_KTLS;
 					if (plen == 0) {
 						SOCKBUF_UNLOCK(sb);
 						t4_push_ktls(sc, toep, 0);
 						return;
 					}
 					break;
 				}
 #endif
 				n = sglist_count_mbuf_epg(m,
 				    mtod(m, vm_offset_t), m->m_len);
 			} else
 				n = sglist_count(mtod(m, void *), m->m_len);
 
 			nsegs += n;
 			plen += m->m_len;
 
 			/* This mbuf sent us _over_ the nsegs limit, back out */
 			if (plen > max_imm && nsegs > max_nsegs) {
 				nsegs -= n;
 				plen -= m->m_len;
 				if (plen == 0) {
 					/* Too few credits */
 					toep->flags |= TPF_TX_SUSPENDED;
 					if (sowwakeup) {
 						if (!TAILQ_EMPTY(
 						    &toep->aiotx_jobq))
 							t4_aiotx_queue_toep(so,
 							    toep);
 						sowwakeup_locked(so);
 					} else
 						SOCKBUF_UNLOCK(sb);
 					SOCKBUF_UNLOCK_ASSERT(sb);
 					return;
 				}
 				break;
 			}
 
 			if (m->m_flags & M_EXTPG)
 				nomap_mbuf_seen = true;
 			if (max_nsegs_1mbuf < n)
 				max_nsegs_1mbuf = n;
 			sb_sndptr = m;	/* new sb->sb_sndptr if all goes well */
 
 			/* This mbuf put us right at the max_nsegs limit */
 			if (plen > max_imm && nsegs == max_nsegs) {
 				m = m->m_next;
 				break;
 			}
 		}
 
 		if (sbused(sb) > sb->sb_hiwat * 5 / 8 &&
 		    toep->plen_nocompl + plen >= sb->sb_hiwat / 4)
 			compl = 1;
 		else
 			compl = 0;
 
 		if (sb->sb_flags & SB_AUTOSIZE &&
 		    V_tcp_do_autosndbuf &&
 		    sb->sb_hiwat < V_tcp_autosndbuf_max &&
 		    sbused(sb) >= sb->sb_hiwat * 7 / 8) {
 			int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
 			    V_tcp_autosndbuf_max);
 
-			if (!sbreserve_locked(sb, newsize, so, NULL))
+			if (!sbreserve_locked(so, SO_SND, newsize, NULL))
 				sb->sb_flags &= ~SB_AUTOSIZE;
 			else
 				sowwakeup = 1;	/* room available */
 		}
 		if (sowwakeup) {
 			if (!TAILQ_EMPTY(&toep->aiotx_jobq))
 				t4_aiotx_queue_toep(so, toep);
 			sowwakeup_locked(so);
 		} else
 			SOCKBUF_UNLOCK(sb);
 		SOCKBUF_UNLOCK_ASSERT(sb);
 
 		/* nothing to send */
 		if (plen == 0) {
 			KASSERT(m == NULL || (m->m_flags & M_NOTAVAIL) != 0,
 			    ("%s: nothing to send, but m != NULL is ready",
 			    __func__));
 			break;
 		}
 
 		if (__predict_false(toep->flags & TPF_FIN_SENT))
 			panic("%s: excess tx.", __func__);
 
 		shove = m == NULL && !(tp->t_flags & TF_MORETOCOME);
 		if (plen <= max_imm && !nomap_mbuf_seen) {
 
 			/* Immediate data tx */
 
 			wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16),
 					&toep->ofld_txq->wrq);
 			if (wr == NULL) {
 				/* XXX: how will we recover from this? */
 				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 			txwr = wrtod(wr);
 			credits = howmany(wr->wr_len, 16);
 			write_tx_wr(txwr, toep, FW_OFLD_TX_DATA_WR, plen, plen,
 			    credits, shove, 0);
 			m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
 			nsegs = 0;
 		} else {
 			int wr_len;
 
 			/* DSGL tx */
 
 			wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) +
 			    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
 			wr = alloc_wrqe(roundup2(wr_len, 16),
 			    &toep->ofld_txq->wrq);
 			if (wr == NULL) {
 				/* XXX: how will we recover from this? */
 				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 			txwr = wrtod(wr);
 			credits = howmany(wr_len, 16);
 			write_tx_wr(txwr, toep, FW_OFLD_TX_DATA_WR, 0, plen,
 			    credits, shove, 0);
 			write_tx_sgl(txwr + 1, sndptr, m, nsegs,
 			    max_nsegs_1mbuf);
 			if (wr_len & 0xf) {
 				uint64_t *pad = (uint64_t *)
 				    ((uintptr_t)txwr + wr_len);
 				*pad = 0;
 			}
 		}
 
 		KASSERT(toep->tx_credits >= credits,
 			("%s: not enough credits", __func__));
 
 		toep->tx_credits -= credits;
 		toep->tx_nocompl += credits;
 		toep->plen_nocompl += plen;
 		if (toep->tx_credits <= toep->tx_total * 3 / 8 &&
 		    toep->tx_nocompl >= toep->tx_total / 4)
 			compl = 1;
 
 		if (compl || ulp_mode(toep) == ULP_MODE_RDMA) {
 			txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL);
 			toep->tx_nocompl = 0;
 			toep->plen_nocompl = 0;
 		}
 
 		tp->snd_nxt += plen;
 		tp->snd_max += plen;
 
 		SOCKBUF_LOCK(sb);
 		KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__));
 		sb->sb_sndptr = sb_sndptr;
 		SOCKBUF_UNLOCK(sb);
 
 		toep->flags |= TPF_TX_DATA_SENT;
 		if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
 			toep->flags |= TPF_TX_SUSPENDED;
 
 		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
 		txsd->plen = plen;
 		txsd->tx_credits = credits;
 		txsd++;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
 			toep->txsd_pidx = 0;
 			txsd = &toep->txsd[0];
 		}
 		toep->txsd_avail--;
 
 		t4_l2t_send(sc, wr, toep->l2te);
 	} while (m != NULL && (m->m_flags & M_NOTAVAIL) == 0);
 
 	/* Send a FIN if requested, but only if there's no more data to send */
 	if (m == NULL && toep->flags & TPF_SEND_FIN)
 		t4_close_conn(sc, toep);
 }
 
 static inline void
 rqdrop_locked(struct mbufq *q, int plen)
 {
 	struct mbuf *m;
 
 	while (plen > 0) {
 		m = mbufq_dequeue(q);
 
 		/* Too many credits. */
 		MPASS(m != NULL);
 		M_ASSERTPKTHDR(m);
 
 		/* Partial credits. */
 		MPASS(plen >= m->m_pkthdr.len);
 
 		plen -= m->m_pkthdr.len;
 		m_freem(m);
 	}
 }
 
 /*
  * Not a bit in the TCB, but is a bit in the ulp_submode field of the
  * CPL_TX_DATA flags field in FW_ISCSI_TX_DATA_WR.
  */
 #define	ULP_ISO		G_TX_ULP_SUBMODE(F_FW_ISCSI_TX_DATA_WR_ULPSUBMODE_ISO)
 
 static void
 write_tx_data_iso(void *dst, u_int ulp_submode, uint8_t flags, uint16_t mss,
     int len, int npdu)
 {
 	struct cpl_tx_data_iso *cpl;
 	unsigned int burst_size;
 	unsigned int last;
 
 	/*
 	 * The firmware will set the 'F' bit on the last PDU when
 	 * either condition is true:
 	 *
 	 * - this large PDU is marked as the "last" slice
 	 *
 	 * - the amount of data payload bytes equals the burst_size
 	 *
 	 * The strategy used here is to always set the burst_size
 	 * artificially high (len includes the size of the template
 	 * BHS) and only set the "last" flag if the original PDU had
 	 * 'F' set.
 	 */
 	burst_size = len;
 	last = !!(flags & CXGBE_ISO_F);
 
 	cpl = (struct cpl_tx_data_iso *)dst;
 	cpl->op_to_scsi = htonl(V_CPL_TX_DATA_ISO_OP(CPL_TX_DATA_ISO) |
 	    V_CPL_TX_DATA_ISO_FIRST(1) | V_CPL_TX_DATA_ISO_LAST(last) |
 	    V_CPL_TX_DATA_ISO_CPLHDRLEN(0) |
 	    V_CPL_TX_DATA_ISO_HDRCRC(!!(ulp_submode & ULP_CRC_HEADER)) |
 	    V_CPL_TX_DATA_ISO_PLDCRC(!!(ulp_submode & ULP_CRC_DATA)) |
 	    V_CPL_TX_DATA_ISO_IMMEDIATE(0) |
 	    V_CPL_TX_DATA_ISO_SCSI(CXGBE_ISO_TYPE(flags)));
 
 	cpl->ahs_len = 0;
 	cpl->mpdu = htons(DIV_ROUND_UP(mss, 4));
 	cpl->burst_size = htonl(DIV_ROUND_UP(burst_size, 4));
 	cpl->len = htonl(len);
 	cpl->reserved2_seglen_offset = htonl(0);
 	cpl->datasn_offset = htonl(0);
 	cpl->buffer_offset = htonl(0);
 	cpl->reserved3 = 0;
 }
 
 static struct wrqe *
 write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr)
 {
 	struct mbuf *m;
 	struct fw_ofld_tx_data_wr *txwr;
 	struct cpl_tx_data_iso *cpl_iso;
 	void *p;
 	struct wrqe *wr;
 	u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
 	u_int adjusted_plen, imm_data, ulp_submode;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);
 	int tx_credits, shove, npdu, wr_len;
 	uint16_t iso_mss;
 	static const u_int ulp_extra_len[] = {0, 4, 4, 8};
 	bool iso, nomap_mbuf_seen;
 
 	M_ASSERTPKTHDR(sndptr);
 
 	tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
 	if (mbuf_raw_wr(sndptr)) {
 		plen = sndptr->m_pkthdr.len;
 		KASSERT(plen <= SGE_MAX_WR_LEN,
 		    ("raw WR len %u is greater than max WR len", plen));
 		if (plen > tx_credits * 16)
 			return (NULL);
 
 		wr = alloc_wrqe(roundup2(plen, 16), &toep->ofld_txq->wrq);
 		if (__predict_false(wr == NULL))
 			return (NULL);
 
 		m_copydata(sndptr, 0, plen, wrtod(wr));
 		return (wr);
 	}
 
 	iso = mbuf_iscsi_iso(sndptr);
 	max_imm = max_imm_payload(tx_credits, iso);
 	max_nsegs = max_dsgl_nsegs(tx_credits, iso);
 	iso_mss = mbuf_iscsi_iso_mss(sndptr);
 
 	plen = 0;
 	nsegs = 0;
 	max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
 	nomap_mbuf_seen = false;
 	for (m = sndptr; m != NULL; m = m->m_next) {
 		int n;
 
 		if (m->m_flags & M_EXTPG)
 			n = sglist_count_mbuf_epg(m, mtod(m, vm_offset_t),
 			    m->m_len);
 		else
 			n = sglist_count(mtod(m, void *), m->m_len);
 
 		nsegs += n;
 		plen += m->m_len;
 
 		/*
 		 * This mbuf would send us _over_ the nsegs limit.
 		 * Suspend tx because the PDU can't be sent out.
 		 */
 		if ((nomap_mbuf_seen || plen > max_imm) && nsegs > max_nsegs)
 			return (NULL);
 
 		if (m->m_flags & M_EXTPG)
 			nomap_mbuf_seen = true;
 		if (max_nsegs_1mbuf < n)
 			max_nsegs_1mbuf = n;
 	}
 
 	if (__predict_false(toep->flags & TPF_FIN_SENT))
 		panic("%s: excess tx.", __func__);
 
 	/*
 	 * We have a PDU to send.  All of it goes out in one WR so 'm'
 	 * is NULL.  A PDU's length is always a multiple of 4.
 	 */
 	MPASS(m == NULL);
 	MPASS((plen & 3) == 0);
 	MPASS(sndptr->m_pkthdr.len == plen);
 
 	shove = !(tp->t_flags & TF_MORETOCOME);
 
 	/*
 	 * plen doesn't include header and data digests, which are
 	 * generated and inserted in the right places by the TOE, but
 	 * they do occupy TCP sequence space and need to be accounted
 	 * for.
 	 */
 	ulp_submode = mbuf_ulp_submode(sndptr);
 	MPASS(ulp_submode < nitems(ulp_extra_len));
 	npdu = iso ? howmany(plen - ISCSI_BHS_SIZE, iso_mss) : 1;
 	adjusted_plen = plen + ulp_extra_len[ulp_submode] * npdu;
 	if (iso)
 		adjusted_plen += ISCSI_BHS_SIZE * (npdu - 1);
 	wr_len = sizeof(*txwr);
 	if (iso)
 		wr_len += sizeof(struct cpl_tx_data_iso);
 	if (plen <= max_imm && !nomap_mbuf_seen) {
 		/* Immediate data tx */
 		imm_data = plen;
 		wr_len += plen;
 		nsegs = 0;
 	} else {
 		/* DSGL tx */
 		imm_data = 0;
 		wr_len += sizeof(struct ulptx_sgl) +
 		    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
 	}
 
 	wr = alloc_wrqe(roundup2(wr_len, 16), &toep->ofld_txq->wrq);
 	if (wr == NULL) {
 		/* XXX: how will we recover from this? */
 		return (NULL);
 	}
 	txwr = wrtod(wr);
 	credits = howmany(wr->wr_len, 16);
 
 	if (iso) {
 		write_tx_wr(txwr, toep, FW_ISCSI_TX_DATA_WR,
 		    imm_data + sizeof(struct cpl_tx_data_iso),
 		    adjusted_plen, credits, shove, ulp_submode | ULP_ISO);
 		cpl_iso = (struct cpl_tx_data_iso *)(txwr + 1);
 		MPASS(plen == sndptr->m_pkthdr.len);
 		write_tx_data_iso(cpl_iso, ulp_submode,
 		    mbuf_iscsi_iso_flags(sndptr), iso_mss, plen, npdu);
 		p = cpl_iso + 1;
 	} else {
 		write_tx_wr(txwr, toep, FW_OFLD_TX_DATA_WR, imm_data,
 		    adjusted_plen, credits, shove, ulp_submode);
 		p = txwr + 1;
 	}
 
 	if (imm_data != 0) {
 		m_copydata(sndptr, 0, plen, p);
 	} else {
 		write_tx_sgl(p, sndptr, m, nsegs, max_nsegs_1mbuf);
 		if (wr_len & 0xf) {
 			uint64_t *pad = (uint64_t *)((uintptr_t)txwr + wr_len);
 			*pad = 0;
 		}
 	}
 
 	KASSERT(toep->tx_credits >= credits,
 	    ("%s: not enough credits: credits %u "
 		"toep->tx_credits %u tx_credits %u nsegs %u "
 		"max_nsegs %u iso %d", __func__, credits,
 		toep->tx_credits, tx_credits, nsegs, max_nsegs, iso));
 
 	tp->snd_nxt += adjusted_plen;
 	tp->snd_max += adjusted_plen;
 
 	counter_u64_add(toep->ofld_txq->tx_iscsi_pdus, npdu);
 	counter_u64_add(toep->ofld_txq->tx_iscsi_octets, plen);
 	if (iso)
 		counter_u64_add(toep->ofld_txq->tx_iscsi_iso_wrs, 1);
 
 	return (wr);
 }
 
 void
 t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop)
 {
 	struct mbuf *sndptr, *m;
 	struct fw_wr_hdr *wrhdr;
 	struct wrqe *wr;
 	u_int plen, credits;
 	struct inpcb *inp = toep->inp;
 	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
 	struct mbufq *pduq = &toep->ulp_pduq;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
 	KASSERT(ulp_mode(toep) == ULP_MODE_ISCSI,
 	    ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep));
 
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
 		return;
 
 	/*
 	 * This function doesn't resume by itself.  Someone else must clear the
 	 * flag and call this function.
 	 */
 	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
 		KASSERT(drop == 0,
 		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
 		return;
 	}
 
 	if (drop) {
 		struct socket *so = inp->inp_socket;
 		struct sockbuf *sb = &so->so_snd;
 		int sbu;
 
 		/*
 		 * An unlocked read is ok here as the data should only
 		 * transition from a non-zero value to either another
 		 * non-zero value or zero.  Once it is zero it should
 		 * stay zero.
 		 */
 		if (__predict_false(sbused(sb)) > 0) {
 			SOCKBUF_LOCK(sb);
 			sbu = sbused(sb);
 			if (sbu > 0) {
 				/*
 				 * The data transmitted before the
 				 * tid's ULP mode changed to ISCSI is
 				 * still in so_snd.  Incoming credits
 				 * should account for so_snd first.
 				 */
 				sbdrop_locked(sb, min(sbu, drop));
 				drop -= min(sbu, drop);
 			}
 			sowwakeup_locked(so);	/* unlocks so_snd */
 		}
 		rqdrop_locked(&toep->ulp_pdu_reclaimq, drop);
 	}
 
 	while ((sndptr = mbufq_first(pduq)) != NULL) {
 		wr = write_iscsi_mbuf_wr(toep, sndptr);
 		if (wr == NULL) {
 			toep->flags |= TPF_TX_SUSPENDED;
 			return;
 		}
 
 		plen = sndptr->m_pkthdr.len;
 		credits = howmany(wr->wr_len, 16);
 		KASSERT(toep->tx_credits >= credits,
 			("%s: not enough credits", __func__));
 
 		m = mbufq_dequeue(pduq);
 		MPASS(m == sndptr);
 		mbufq_enqueue(&toep->ulp_pdu_reclaimq, m);
 
 		toep->tx_credits -= credits;
 		toep->tx_nocompl += credits;
 		toep->plen_nocompl += plen;
 
 		/*
 		 * Ensure there are enough credits for a full-sized WR
 		 * as page pod WRs can be full-sized.
 		 */
 		if (toep->tx_credits <= SGE_MAX_WR_LEN * 5 / 4 &&
 		    toep->tx_nocompl >= toep->tx_total / 4) {
 			wrhdr = wrtod(wr);
 			wrhdr->hi |= htobe32(F_FW_WR_COMPL);
 			toep->tx_nocompl = 0;
 			toep->plen_nocompl = 0;
 		}
 
 		toep->flags |= TPF_TX_DATA_SENT;
 		if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
 			toep->flags |= TPF_TX_SUSPENDED;
 
 		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
 		txsd->plen = plen;
 		txsd->tx_credits = credits;
 		txsd++;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
 			toep->txsd_pidx = 0;
 			txsd = &toep->txsd[0];
 		}
 		toep->txsd_avail--;
 
 		t4_l2t_send(sc, wr, toep->l2te);
 	}
 
 	/* Send a FIN if requested, but only if there are no more PDUs to send */
 	if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN)
 		t4_close_conn(sc, toep);
 }
 
 static inline void
 t4_push_data(struct adapter *sc, struct toepcb *toep, int drop)
 {
 
 	if (ulp_mode(toep) == ULP_MODE_ISCSI)
 		t4_push_pdus(sc, toep, drop);
 	else if (toep->flags & TPF_KTLS)
 		t4_push_ktls(sc, toep, drop);
 	else
 		t4_push_frames(sc, toep, drop);
 }
 
 int
 t4_tod_output(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 #ifdef INVARIANTS
 	struct inpcb *inp = tp->t_inpcb;
 #endif
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("%s: inp %p dropped.", __func__, inp));
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 
 	t4_push_data(sc, toep, 0);
 
 	return (0);
 }
 
 int
 t4_send_fin(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 #ifdef INVARIANTS
 	struct inpcb *inp = tp->t_inpcb;
 #endif
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("%s: inp %p dropped.", __func__, inp));
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 
 	toep->flags |= TPF_SEND_FIN;
 	if (tp->t_state >= TCPS_ESTABLISHED)
 		t4_push_data(sc, toep, 0);
 
 	return (0);
 }
 
 int
 t4_send_rst(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 #if defined(INVARIANTS)
 	struct inpcb *inp = tp->t_inpcb;
 #endif
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("%s: inp %p dropped.", __func__, inp));
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 
 	/* hmmmm */
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc for tid %u [%s] not sent already",
 	    __func__, toep->tid, tcpstates[tp->t_state]));
 
 	send_reset(sc, toep, 0);
 	return (0);
 }
 
 /*
  * Peer has sent us a FIN.
  */
 static int
 do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_peer_close *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = NULL;
 	struct socket *so;
 	struct epoch_tracker et;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_PEER_CLOSE,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 
 	if (__predict_false(toep->flags & TPF_SYNQE)) {
 		/*
 		 * do_pass_establish must have run before do_peer_close and if
 		 * this is still a synqe instead of a toepcb then the connection
 		 * must be getting aborted.
 		 */
 		MPASS(toep->flags & TPF_ABORT_SHUTDOWN);
 		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
 		    toep, toep->flags);
 		return (0);
 	}
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CURVNET_SET(toep->vnet);
 	NET_EPOCH_ENTER(et);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
 	CTR6(KTR_CXGBE,
 	    "%s: tid %u (%s), toep_flags 0x%x, ddp_flags 0x%x, inp %p",
 	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags,
 	    toep->ddp.flags, inp);
 
 	if (toep->flags & TPF_ABORT_SHUTDOWN)
 		goto done;
 
 	so = inp->inp_socket;
 	socantrcvmore(so);
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP) {
 		DDP_LOCK(toep);
 		if (__predict_false(toep->ddp.flags &
 		    (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE)))
 			handle_ddp_close(toep, tp, cpl->rcv_nxt);
 		DDP_UNLOCK(toep);
 	}
 
 	if (ulp_mode(toep) == ULP_MODE_RDMA ||
 	    (ulp_mode(toep) == ULP_MODE_ISCSI && chip_id(sc) >= CHELSIO_T6)) {
 		/*
 		 * There might be data received via DDP before the FIN
 		 * not reported to the driver.  Just assume the
 		 * sequence number in the CPL is correct as the
 		 * sequence number of the FIN.
 		 */
 	} else {
 		KASSERT(tp->rcv_nxt + 1 == be32toh(cpl->rcv_nxt),
 		    ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt,
 		    be32toh(cpl->rcv_nxt)));
 	}
 
 	tp->rcv_nxt = be32toh(cpl->rcv_nxt);
 
 	switch (tp->t_state) {
 	case TCPS_SYN_RECEIVED:
 		tp->t_starttime = ticks;
 		/* FALLTHROUGH */ 
 
 	case TCPS_ESTABLISHED:
 		tcp_state_change(tp, TCPS_CLOSE_WAIT);
 		break;
 
 	case TCPS_FIN_WAIT_1:
 		tcp_state_change(tp, TCPS_CLOSING);
 		break;
 
 	case TCPS_FIN_WAIT_2:
 		restore_so_proto(so, inp->inp_vflag & INP_IPV6);
 		tcp_twstart(tp);
 		INP_UNLOCK_ASSERT(inp);	 /* safe, we have a ref on the inp */
 		NET_EPOCH_EXIT(et);
 		CURVNET_RESTORE();
 
 		INP_WLOCK(inp);
 		final_cpl_received(toep);
 		return (0);
 
 	default:
 		log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n",
 		    __func__, tid, tp->t_state);
 	}
 done:
 	INP_WUNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 /*
  * Peer has ACK'd our FIN.
  */
 static int
 do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = NULL;
 	struct socket *so = NULL;
 	struct epoch_tracker et;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_CLOSE_CON_RPL,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CURVNET_SET(toep->vnet);
 	NET_EPOCH_ENTER(et);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
 	CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x",
 	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags);
 
 	if (toep->flags & TPF_ABORT_SHUTDOWN)
 		goto done;
 
 	so = inp->inp_socket;
 	tp->snd_una = be32toh(cpl->snd_nxt) - 1;	/* exclude FIN */
 
 	switch (tp->t_state) {
 	case TCPS_CLOSING:	/* see TCPS_FIN_WAIT_2 in do_peer_close too */
 		restore_so_proto(so, inp->inp_vflag & INP_IPV6);
 		tcp_twstart(tp);
 release:
 		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
 		NET_EPOCH_EXIT(et);
 		CURVNET_RESTORE();
 
 		INP_WLOCK(inp);
 		final_cpl_received(toep);	/* no more CPLs expected */
 
 		return (0);
 	case TCPS_LAST_ACK:
 		if (tcp_close(tp))
 			INP_WUNLOCK(inp);
 		goto release;
 
 	case TCPS_FIN_WAIT_1:
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 			soisdisconnected(so);
 		tcp_state_change(tp, TCPS_FIN_WAIT_2);
 		break;
 
 	default:
 		log(LOG_ERR,
 		    "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n",
 		    __func__, tid, tcpstates[tp->t_state]);
 	}
 done:
 	INP_WUNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 void
 send_abort_rpl(struct adapter *sc, struct sge_ofld_txq *ofld_txq, int tid,
     int rst_status)
 {
 	struct wrqe *wr;
 	struct cpl_abort_rpl *cpl;
 
 	wr = alloc_wrqe(sizeof(*cpl), &ofld_txq->wrq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	cpl = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid);
 	cpl->cmd = rst_status;
 
 	t4_wrq_tx(sc, wr);
 }
 
 static int
 abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason)
 {
 	switch (abort_reason) {
 	case CPL_ERR_BAD_SYN:
 	case CPL_ERR_CONN_RESET:
 		return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
 	case CPL_ERR_XMIT_TIMEDOUT:
 	case CPL_ERR_PERSIST_TIMEDOUT:
 	case CPL_ERR_FINWAIT2_TIMEDOUT:
 	case CPL_ERR_KEEPALIVE_TIMEDOUT:
 		return (ETIMEDOUT);
 	default:
 		return (EIO);
 	}
 }
 
 /*
  * TCP RST from the peer, timeout, or some other such critical error.
  */
 static int
 do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct sge_ofld_txq *ofld_txq = toep->ofld_txq;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct epoch_tracker et;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_ABORT_REQ_RSS,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 
 	if (toep->flags & TPF_SYNQE)
 		return (do_abort_req_synqe(iq, rss, m));
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	if (negative_advice(cpl->status)) {
 		CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)",
 		    __func__, cpl->status, tid, toep->flags);
 		return (0);	/* Ignore negative advice */
 	}
 
 	inp = toep->inp;
 	CURVNET_SET(toep->vnet);
 	NET_EPOCH_ENTER(et);	/* for tcp_close */
 	INP_WLOCK(inp);
 
 	tp = intotcpcb(inp);
 
 	CTR6(KTR_CXGBE,
 	    "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d",
 	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags,
 	    inp->inp_flags, cpl->status);
 
 	/*
 	 * If we'd initiated an abort earlier the reply to it is responsible for
 	 * cleaning up resources.  Otherwise we tear everything down right here
 	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
 	 */
 	if (toep->flags & TPF_ABORT_SHUTDOWN) {
 		INP_WUNLOCK(inp);
 		goto done;
 	}
 	toep->flags |= TPF_ABORT_SHUTDOWN;
 
 	if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
 		struct socket *so = inp->inp_socket;
 
 		if (so != NULL)
 			so_error_set(so, abort_status_to_errno(tp,
 			    cpl->status));
 		tp = tcp_close(tp);
 		if (tp == NULL)
 			INP_WLOCK(inp);	/* re-acquire */
 	}
 
 	final_cpl_received(toep);
 done:
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
 	return (0);
 }
 
 /*
  * Reply to the CPL_ABORT_REQ (send_reset)
  */
 static int
 do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_ABORT_RPL_RSS,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 
 	if (toep->flags & TPF_SYNQE)
 		return (do_abort_rpl_synqe(iq, rss, m));
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d",
 	    __func__, tid, toep, inp, cpl->status);
 
 	KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
 	    ("%s: wasn't expecting abort reply", __func__));
 
 	INP_WLOCK(inp);
 	final_cpl_received(toep);
 
 	return (0);
 }
 
 static int
 do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_rx_data *cpl = mtod(m, const void *);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	struct sockbuf *sb;
 	struct epoch_tracker et;
 	int len, rx_credits;
 	uint32_t ddp_placed = 0;
 
 	if (__predict_false(toep->flags & TPF_SYNQE)) {
 		/*
 		 * do_pass_establish must have run before do_rx_data and if this
 		 * is still a synqe instead of a toepcb then the connection must
 		 * be getting aborted.
 		 */
 		MPASS(toep->flags & TPF_ABORT_SHUTDOWN);
 		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
 		    toep, toep->flags);
 		m_freem(m);
 		return (0);
 	}
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	/* strip off CPL header */
 	m_adj(m, sizeof(*cpl));
 	len = m->m_pkthdr.len;
 
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
 		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
 		    __func__, tid, len, inp->inp_flags);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 	}
 
 	tp = intotcpcb(inp);
 
 	if (__predict_false(ulp_mode(toep) == ULP_MODE_TLS &&
 	   toep->flags & TPF_TLS_RECEIVE)) {
 		/* Received "raw" data on a TLS socket. */
 		CTR3(KTR_CXGBE, "%s: tid %u, raw TLS data (%d bytes)",
 		    __func__, tid, len);
 		do_rx_data_tls(cpl, toep, m);
 		return (0);
 	}
 
 	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
 		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;
 
 	tp->rcv_nxt += len;
 	if (tp->rcv_wnd < len) {
 		KASSERT(ulp_mode(toep) == ULP_MODE_RDMA,
 				("%s: negative window size", __func__));
 	}
 
 	tp->rcv_wnd -= len;
 	tp->t_rcvtime = ticks;
 
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 		DDP_LOCK(toep);
 	so = inp_inpcbtosocket(inp);
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 
 	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
 		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
 		    __func__, tid, len);
 		m_freem(m);
 		SOCKBUF_UNLOCK(sb);
 		if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 			DDP_UNLOCK(toep);
 		INP_WUNLOCK(inp);
 
 		CURVNET_SET(toep->vnet);
 		NET_EPOCH_ENTER(et);
 		INP_WLOCK(inp);
 		tp = tcp_drop(tp, ECONNRESET);
 		if (tp)
 			INP_WUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
 		CURVNET_RESTORE();
 
 		return (0);
 	}
 
 	/* receive buffer autosize */
 	MPASS(toep->vnet == so->so_vnet);
 	CURVNET_SET(toep->vnet);
 	if (sb->sb_flags & SB_AUTOSIZE &&
 	    V_tcp_do_autorcvbuf &&
 	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
 	    len > (sbspace(sb) / 8 * 7)) {
 		unsigned int hiwat = sb->sb_hiwat;
 		unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
 		    V_tcp_autorcvbuf_max);
 
-		if (!sbreserve_locked(sb, newsize, so, NULL))
+		if (!sbreserve_locked(so, SO_RCV, newsize, NULL))
 			sb->sb_flags &= ~SB_AUTOSIZE;
 	}
 
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP) {
 		int changed = !(toep->ddp.flags & DDP_ON) ^ cpl->ddp_off;
 
 		if (toep->ddp.waiting_count != 0 || toep->ddp.active_count != 0)
 			CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)",
 			    __func__, tid, len);
 
 		if (changed) {
 			if (toep->ddp.flags & DDP_SC_REQ)
 				toep->ddp.flags ^= DDP_ON | DDP_SC_REQ;
 			else {
 				KASSERT(cpl->ddp_off == 1,
 				    ("%s: DDP switched on by itself.",
 				    __func__));
 
 				/* Fell out of DDP mode */
 				toep->ddp.flags &= ~DDP_ON;
 				CTR1(KTR_CXGBE, "%s: fell out of DDP mode",
 				    __func__);
 
 				insert_ddp_data(toep, ddp_placed);
 			}
 		}
 
 		if (toep->ddp.flags & DDP_ON) {
 			/*
 			 * CPL_RX_DATA with DDP on can only be an indicate.
 			 * Start posting queued AIO requests via DDP.  The
 			 * payload that arrived in this indicate is appended
 			 * to the socket buffer as usual.
 			 */
 			handle_ddp_indicate(toep);
 		}
 	}
 
 	sbappendstream_locked(sb, m, 0);
 	rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
 	if (rx_credits > 0 && sbused(sb) + tp->rcv_wnd < sb->sb_lowat) {
 		rx_credits = send_rx_credits(sc, toep, rx_credits);
 		tp->rcv_wnd += rx_credits;
 		tp->rcv_adv += rx_credits;
 	}
 
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP && toep->ddp.waiting_count > 0 &&
 	    sbavail(sb) != 0) {
 		CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__,
 		    tid);
 		ddp_queue_toep(toep);
 	}
 	sorwakeup_locked(so);
 	SOCKBUF_UNLOCK_ASSERT(sb);
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 		DDP_UNLOCK(toep);
 
 	INP_WUNLOCK(inp);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 static int
 do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
 	unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	uint8_t credits = cpl->credits;
 	struct ofld_tx_sdesc *txsd;
 	int plen;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	/*
 	 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and
 	 * now this comes back carrying the credits for the flowc.
 	 */
 	if (__predict_false(toep->flags & TPF_SYNQE)) {
 		KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
 		    ("%s: credits for a synq entry %p", __func__, toep));
 		return (0);
 	}
 
 	inp = toep->inp;
 
 	KASSERT(opcode == CPL_FW4_ACK,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	INP_WLOCK(inp);
 
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) {
 		INP_WUNLOCK(inp);
 		return (0);
 	}
 
 	KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0,
 	    ("%s: inp_flags 0x%x", __func__, inp->inp_flags));
 
 	tp = intotcpcb(inp);
 
 	if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) {
 		tcp_seq snd_una = be32toh(cpl->snd_una);
 
 #ifdef INVARIANTS
 		if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
 			log(LOG_ERR,
 			    "%s: unexpected seq# %x for TID %u, snd_una %x\n",
 			    __func__, snd_una, toep->tid, tp->snd_una);
 		}
 #endif
 
 		if (tp->snd_una != snd_una) {
 			tp->snd_una = snd_una;
 			tp->ts_recent_age = tcp_ts_getticks();
 		}
 	}
 
 #ifdef VERBOSE_TRACES
 	CTR3(KTR_CXGBE, "%s: tid %d credits %u", __func__, tid, credits);
 #endif
 	so = inp->inp_socket;
 	txsd = &toep->txsd[toep->txsd_cidx];
 	plen = 0;
 	while (credits) {
 		KASSERT(credits >= txsd->tx_credits,
 		    ("%s: too many (or partial) credits", __func__));
 		credits -= txsd->tx_credits;
 		toep->tx_credits += txsd->tx_credits;
 		plen += txsd->plen;
 		txsd++;
 		toep->txsd_avail++;
 		KASSERT(toep->txsd_avail <= toep->txsd_total,
 		    ("%s: txsd avail > total", __func__));
 		if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) {
 			txsd = &toep->txsd[0];
 			toep->txsd_cidx = 0;
 		}
 	}
 
 	if (toep->tx_credits == toep->tx_total) {
 		toep->tx_nocompl = 0;
 		toep->plen_nocompl = 0;
 	}
 
 	if (toep->flags & TPF_TX_SUSPENDED &&
 	    toep->tx_credits >= toep->tx_total / 4) {
 #ifdef VERBOSE_TRACES
 		CTR2(KTR_CXGBE, "%s: tid %d calling t4_push_frames", __func__,
 		    tid);
 #endif
 		toep->flags &= ~TPF_TX_SUSPENDED;
 		CURVNET_SET(toep->vnet);
 		t4_push_data(sc, toep, plen);
 		CURVNET_RESTORE();
 	} else if (plen > 0) {
 		struct sockbuf *sb = &so->so_snd;
 		int sbu;
 
 		SOCKBUF_LOCK(sb);
 		sbu = sbused(sb);
 		if (ulp_mode(toep) == ULP_MODE_ISCSI) {
 			if (__predict_false(sbu > 0)) {
 				/*
 				 * The data transmitted before the
 				 * tid's ULP mode changed to ISCSI is
 				 * still in so_snd.  Incoming credits
 				 * should account for so_snd first.
 				 */
 				sbdrop_locked(sb, min(sbu, plen));
 				plen -= min(sbu, plen);
 			}
 			sowwakeup_locked(so);	/* unlocks so_snd */
 			rqdrop_locked(&toep->ulp_pdu_reclaimq, plen);
 		} else {
 #ifdef VERBOSE_TRACES
 			CTR3(KTR_CXGBE, "%s: tid %d dropped %d bytes", __func__,
 			    tid, plen);
 #endif
 			sbdrop_locked(sb, plen);
 			if (!TAILQ_EMPTY(&toep->aiotx_jobq))
 				t4_aiotx_queue_toep(so, toep);
 			sowwakeup_locked(so);	/* unlocks so_snd */
 		}
 		SOCKBUF_UNLOCK_ASSERT(sb);
 	}
 
 	INP_WUNLOCK(inp);
 
 	return (0);
 }
 
 void
 t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, struct toepcb *toep,
     uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie)
 {
 	struct wrqe *wr;
 	struct cpl_set_tcb_field *req;
 	struct ofld_tx_sdesc *txsd;
 
 	MPASS((cookie & ~M_COOKIE) == 0);
 	if (reply) {
 		MPASS(cookie != CPL_COOKIE_RESERVED);
 	}
 
 	wr = alloc_wrqe(sizeof(*req), wrq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid);
 	req->reply_ctrl = htobe16(V_QUEUENO(toep->ofld_rxq->iq.abs_id));
 	if (reply == 0)
 		req->reply_ctrl |= htobe16(F_NO_REPLY);
 	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie));
 	req->mask = htobe64(mask);
 	req->val = htobe64(val);
 	if (wrq->eq.type == EQ_OFLD) {
 		txsd = &toep->txsd[toep->txsd_pidx];
 		txsd->tx_credits = howmany(sizeof(*req), 16);
 		txsd->plen = 0;
 		KASSERT(toep->tx_credits >= txsd->tx_credits &&
 		    toep->txsd_avail > 0,
 		    ("%s: not enough credits (%d)", __func__,
 		    toep->tx_credits));
 		toep->tx_credits -= txsd->tx_credits;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
 			toep->txsd_pidx = 0;
 		toep->txsd_avail--;
 	}
 
 	t4_wrq_tx(sc, wr);
 }
 
 void
 t4_init_cpl_io_handlers(void)
 {
 
 	t4_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close);
 	t4_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl);
 	t4_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req);
 	t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl,
 	    CPL_COOKIE_TOM);
 	t4_register_cpl_handler(CPL_RX_DATA, do_rx_data);
 	t4_register_shared_cpl_handler(CPL_FW4_ACK, do_fw4_ack, CPL_COOKIE_TOM);
 }
 
 void
 t4_uninit_cpl_io_handlers(void)
 {
 
 	t4_register_cpl_handler(CPL_PEER_CLOSE, NULL);
 	t4_register_cpl_handler(CPL_CLOSE_CON_RPL, NULL);
 	t4_register_cpl_handler(CPL_ABORT_REQ_RSS, NULL);
 	t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS, NULL, CPL_COOKIE_TOM);
 	t4_register_cpl_handler(CPL_RX_DATA, NULL);
 	t4_register_shared_cpl_handler(CPL_FW4_ACK, NULL, CPL_COOKIE_TOM);
 }
 
 /*
  * Use the 'backend1' field in AIO jobs to hold an error that should
  * be reported when the job is completed, the 'backend3' field to
  * store the amount of data sent by the AIO job so far, and the
  * 'backend4' field to hold a reference count on the job.
  *
  * Each unmapped mbuf holds a reference on the job as does the queue
  * so long as the job is queued.
  */
 #define	aio_error	backend1
 #define	aio_sent	backend3
 #define	aio_refs	backend4
 
 #define	jobtotid(job)							\
 	(((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid)
 
 static void
 aiotx_free_job(struct kaiocb *job)
 {
 	long status;
 	int error;
 
 	if (refcount_release(&job->aio_refs) == 0)
 		return;
 
 	error = (intptr_t)job->aio_error;
 	status = job->aio_sent;
 #ifdef VERBOSE_TRACES
 	CTR5(KTR_CXGBE, "%s: tid %d completed %p len %ld, error %d", __func__,
 	    jobtotid(job), job, status, error);
 #endif
 	if (error != 0 && status != 0)
 		error = 0;
 	if (error == ECANCELED)
 		aio_cancel(job);
 	else if (error)
 		aio_complete(job, -1, error);
 	else {
 		job->msgsnd = 1;
 		aio_complete(job, status, 0);
 	}
 }
 
 static void
 aiotx_free_pgs(struct mbuf *m)
 {
 	struct kaiocb *job;
 	vm_page_t pg;
 
 	M_ASSERTEXTPG(m);
 	job = m->m_ext.ext_arg1;
 #ifdef VERBOSE_TRACES
 	CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__,
 	    m->m_len, jobtotid(job));
 #endif
 
 	for (int i = 0; i < m->m_epg_npgs; i++) {
 		pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]);
 		vm_page_unwire(pg, PQ_ACTIVE);
 	}
 
 	aiotx_free_job(job);
 }
 
 /*
  * Allocate a chain of unmapped mbufs describing the next 'len' bytes
  * of an AIO job.
  */
 static struct mbuf *
 alloc_aiotx_mbuf(struct kaiocb *job, int len)
 {
 	struct vmspace *vm;
 	vm_page_t pgs[MBUF_PEXT_MAX_PGS];
 	struct mbuf *m, *top, *last;
 	vm_map_t map;
 	vm_offset_t start;
 	int i, mlen, npages, pgoff;
 
 	KASSERT(job->aio_sent + len <= job->uaiocb.aio_nbytes,
 	    ("%s(%p, %d): request to send beyond end of buffer", __func__,
 	    job, len));
 
 	/*
 	 * The AIO subsystem will cancel and drain all requests before
 	 * permitting a process to exit or exec, so p_vmspace should
 	 * be stable here.
 	 */
 	vm = job->userproc->p_vmspace;
 	map = &vm->vm_map;
 	start = (uintptr_t)job->uaiocb.aio_buf + job->aio_sent;
 	pgoff = start & PAGE_MASK;
 
 	top = NULL;
 	last = NULL;
 	while (len > 0) {
 		mlen = imin(len, MBUF_PEXT_MAX_PGS * PAGE_SIZE - pgoff);
 		KASSERT(mlen == len || ((start + mlen) & PAGE_MASK) == 0,
 		    ("%s: next start (%#jx + %#x) is not page aligned",
 		    __func__, (uintmax_t)start, mlen));
 
 		npages = vm_fault_quick_hold_pages(map, start, mlen,
 		    VM_PROT_WRITE, pgs, nitems(pgs));
 		if (npages < 0)
 			break;
 
 		m = mb_alloc_ext_pgs(M_WAITOK, aiotx_free_pgs);
 		if (m == NULL) {
 			vm_page_unhold_pages(pgs, npages);
 			break;
 		}
 
 		m->m_epg_1st_off = pgoff;
 		m->m_epg_npgs = npages;
 		if (npages == 1) {
 			KASSERT(mlen + pgoff <= PAGE_SIZE,
 			    ("%s: single page is too large (off %d len %d)",
 			    __func__, pgoff, mlen));
 			m->m_epg_last_len = mlen;
 		} else {
 			m->m_epg_last_len = mlen - (PAGE_SIZE - pgoff) -
 			    (npages - 2) * PAGE_SIZE;
 		}
 		for (i = 0; i < npages; i++)
 			m->m_epg_pa[i] = VM_PAGE_TO_PHYS(pgs[i]);
 
 		m->m_len = mlen;
 		m->m_ext.ext_size = npages * PAGE_SIZE;
 		m->m_ext.ext_arg1 = job;
 		refcount_acquire(&job->aio_refs);
 
 #ifdef VERBOSE_TRACES
 		CTR5(KTR_CXGBE, "%s: tid %d, new mbuf %p for job %p, npages %d",
 		    __func__, jobtotid(job), m, job, npages);
 #endif
 
 		if (top == NULL)
 			top = m;
 		else
 			last->m_next = m;
 		last = m;
 
 		len -= mlen;
 		start += mlen;
 		pgoff = 0;
 	}
 
 	return (top);
 }
 
 static void
 t4_aiotx_process_job(struct toepcb *toep, struct socket *so, struct kaiocb *job)
 {
 	struct sockbuf *sb;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct mbuf *m;
 	int error, len;
 	bool moretocome, sendmore;
 
 	sb = &so->so_snd;
 	SOCKBUF_UNLOCK(sb);
 	m = NULL;
 
 #ifdef MAC
 	error = mac_socket_check_send(job->fd_file->f_cred, so);
 	if (error != 0)
 		goto out;
 #endif
 
 	/* Inline sosend_generic(). */
 
 	error = SOCK_IO_SEND_LOCK(so, SBL_WAIT);
 	MPASS(error == 0);
 
 sendanother:
 	SOCKBUF_LOCK(sb);
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		SOCKBUF_UNLOCK(sb);
 		SOCK_IO_SEND_UNLOCK(so);
 		if ((so->so_options & SO_NOSIGPIPE) == 0) {
 			PROC_LOCK(job->userproc);
 			kern_psignal(job->userproc, SIGPIPE);
 			PROC_UNLOCK(job->userproc);
 		}
 		error = EPIPE;
 		goto out;
 	}
 	if (so->so_error) {
 		error = so->so_error;
 		so->so_error = 0;
 		SOCKBUF_UNLOCK(sb);
 		SOCK_IO_SEND_UNLOCK(so);
 		goto out;
 	}
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		SOCKBUF_UNLOCK(sb);
 		SOCK_IO_SEND_UNLOCK(so);
 		error = ENOTCONN;
 		goto out;
 	}
 	if (sbspace(sb) < sb->sb_lowat) {
 		MPASS(job->aio_sent == 0 || !(so->so_state & SS_NBIO));
 
 		/*
 		 * Don't block if there is too little room in the socket
 		 * buffer.  Instead, requeue the request.
 		 */
 		if (!aio_set_cancel_function(job, t4_aiotx_cancel)) {
 			SOCKBUF_UNLOCK(sb);
 			SOCK_IO_SEND_UNLOCK(so);
 			error = ECANCELED;
 			goto out;
 		}
 		TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list);
 		SOCKBUF_UNLOCK(sb);
 		SOCK_IO_SEND_UNLOCK(so);
 		goto out;
 	}
 
 	/*
 	 * Write as much data as the socket permits, but no more than a
 	 * a single sndbuf at a time.
 	 */
 	len = sbspace(sb);
 	if (len > job->uaiocb.aio_nbytes - job->aio_sent) {
 		len = job->uaiocb.aio_nbytes - job->aio_sent;
 		moretocome = false;
 	} else
 		moretocome = true;
 	if (len > toep->params.sndbuf) {
 		len = toep->params.sndbuf;
 		sendmore = true;
 	} else
 		sendmore = false;
 
 	if (!TAILQ_EMPTY(&toep->aiotx_jobq))
 		moretocome = true;
 	SOCKBUF_UNLOCK(sb);
 	MPASS(len != 0);
 
 	m = alloc_aiotx_mbuf(job, len);
 	if (m == NULL) {
 		SOCK_IO_SEND_UNLOCK(so);
 		error = EFAULT;
 		goto out;
 	}
 
 	/* Inlined tcp_usr_send(). */
 
 	inp = toep->inp;
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		INP_WUNLOCK(inp);
 		SOCK_IO_SEND_UNLOCK(so);
 		error = ECONNRESET;
 		goto out;
 	}
 
 	job->aio_sent += m_length(m, NULL);
 
 	sbappendstream(sb, m, 0);
 	m = NULL;
 
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		tp = intotcpcb(inp);
 		if (moretocome)
 			tp->t_flags |= TF_MORETOCOME;
 		error = tcp_output(tp);
 		if (error < 0) {
 			INP_UNLOCK_ASSERT(inp);
 			SOCK_IO_SEND_UNLOCK(so);
 			error = -error;
 			goto out;
 		}
 		if (moretocome)
 			tp->t_flags &= ~TF_MORETOCOME;
 	}
 
 	INP_WUNLOCK(inp);
 	if (sendmore)
 		goto sendanother;
 	SOCK_IO_SEND_UNLOCK(so);
 
 	if (error)
 		goto out;
 
 	/*
 	 * If this is a blocking socket and the request has not been
 	 * fully completed, requeue it until the socket is ready
 	 * again.
 	 */
 	if (job->aio_sent < job->uaiocb.aio_nbytes &&
 	    !(so->so_state & SS_NBIO)) {
 		SOCKBUF_LOCK(sb);
 		if (!aio_set_cancel_function(job, t4_aiotx_cancel)) {
 			SOCKBUF_UNLOCK(sb);
 			error = ECANCELED;
 			goto out;
 		}
 		TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list);
 		return;
 	}
 
 	/*
 	 * If the request will not be requeued, drop the queue's
 	 * reference to the job.  Any mbufs in flight should still
 	 * hold a reference, but this drops the reference that the
 	 * queue owns while it is waiting to queue mbufs to the
 	 * socket.
 	 */
 	aiotx_free_job(job);
 
 out:
 	if (error) {
 		job->aio_error = (void *)(intptr_t)error;
 		aiotx_free_job(job);
 	}
 	m_freem(m);
 	SOCKBUF_LOCK(sb);
 }
 
 static void
 t4_aiotx_task(void *context, int pending)
 {
 	struct toepcb *toep = context;
 	struct socket *so;
 	struct kaiocb *job;
 	struct epoch_tracker et;
 
 	so = toep->aiotx_so;
 	CURVNET_SET(toep->vnet);
 	NET_EPOCH_ENTER(et);
 	SOCKBUF_LOCK(&so->so_snd);
 	while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) {
 		job = TAILQ_FIRST(&toep->aiotx_jobq);
 		TAILQ_REMOVE(&toep->aiotx_jobq, job, list);
 		if (!aio_clear_cancel_function(job))
 			continue;
 
 		t4_aiotx_process_job(toep, so, job);
 	}
 	toep->aiotx_so = NULL;
 	SOCKBUF_UNLOCK(&so->so_snd);
 	NET_EPOCH_EXIT(et);
 
 	free_toepcb(toep);
 	sorele(so);
 	CURVNET_RESTORE();
 }
 
 static void
 t4_aiotx_queue_toep(struct socket *so, struct toepcb *toep)
 {
 
 	SOCKBUF_LOCK_ASSERT(&toep->inp->inp_socket->so_snd);
 #ifdef VERBOSE_TRACES
 	CTR3(KTR_CXGBE, "%s: queueing aiotx task for tid %d, active = %s",
 	    __func__, toep->tid, toep->aiotx_so != NULL ? "true" : "false");
 #endif
 	if (toep->aiotx_so != NULL)
 		return;
 	soref(so);
 	toep->aiotx_so = so;
 	hold_toepcb(toep);
 	soaio_enqueue(&toep->aiotx_task);
 }
 
 static void
 t4_aiotx_cancel(struct kaiocb *job)
 {
 	struct socket *so;
 	struct sockbuf *sb;
 	struct tcpcb *tp;
 	struct toepcb *toep;
 
 	so = job->fd_file->f_data;
 	tp = so_sototcpcb(so);
 	toep = tp->t_toe;
 	MPASS(job->uaiocb.aio_lio_opcode == LIO_WRITE);
 	sb = &so->so_snd;
 
 	SOCKBUF_LOCK(sb);
 	if (!aio_cancel_cleared(job))
 		TAILQ_REMOVE(&toep->aiotx_jobq, job, list);
 	SOCKBUF_UNLOCK(sb);
 
 	job->aio_error = (void *)(intptr_t)ECANCELED;
 	aiotx_free_job(job);
 }
 
 int
 t4_aio_queue_aiotx(struct socket *so, struct kaiocb *job)
 {
 	struct tcpcb *tp = so_sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 	struct adapter *sc = td_adapter(toep->td);
 
 	/* This only handles writes. */
 	if (job->uaiocb.aio_lio_opcode != LIO_WRITE)
 		return (EOPNOTSUPP);
 
 	if (!sc->tt.tx_zcopy)
 		return (EOPNOTSUPP);
 
 	if (tls_tx_key(toep))
 		return (EOPNOTSUPP);
 
 	SOCKBUF_LOCK(&so->so_snd);
 #ifdef VERBOSE_TRACES
 	CTR3(KTR_CXGBE, "%s: queueing %p for tid %u", __func__, job, toep->tid);
 #endif
 	if (!aio_set_cancel_function(job, t4_aiotx_cancel))
 		panic("new job was cancelled");
 	refcount_init(&job->aio_refs, 1);
 	TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list);
 	if (sowriteable(so))
 		t4_aiotx_queue_toep(so, toep);
 	SOCKBUF_UNLOCK(&so->so_snd);
 	return (0);
 }
 
 void
 aiotx_init_toep(struct toepcb *toep)
 {
 
 	TAILQ_INIT(&toep->aiotx_jobq);
 	TASK_INIT(&toep->aiotx_task, 0, t4_aiotx_task, toep);
 }
 #endif
diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c
index 05bb903a28aa..11fea91b060e 100644
--- a/sys/dev/cxgbe/tom/t4_ddp.c
+++ b/sys/dev/cxgbe/tom/t4_ddp.c
@@ -1,2262 +1,2262 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/aio.h>
 #include <sys/bio.h>
 #include <sys/file.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/module.h>
 #include <sys/protosw.h>
 #include <sys/proc.h>
 #include <sys/domain.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/tcp_var.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/toecore.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 
 #include <cam/scsi/scsi_all.h>
 #include <cam/ctl/ctl_io.h>
 
 #ifdef TCP_OFFLOAD
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "common/t4_tcb.h"
 #include "tom/t4_tom.h"
 
 /*
  * Use the 'backend3' field in AIO jobs to store the amount of data
  * received by the AIO job so far.
  */
 #define	aio_received	backend3
 
 static void aio_ddp_requeue_task(void *context, int pending);
 static void ddp_complete_all(struct toepcb *toep, int error);
 static void t4_aio_cancel_active(struct kaiocb *job);
 static void t4_aio_cancel_queued(struct kaiocb *job);
 
 static TAILQ_HEAD(, pageset) ddp_orphan_pagesets;
 static struct mtx ddp_orphan_pagesets_lock;
 static struct task ddp_orphan_task;
 
 #define MAX_DDP_BUFFER_SIZE		(M_TCB_RX_DDP_BUF0_LEN)
 
 /*
  * A page set holds information about a buffer used for DDP.  The page
  * set holds resources such as the VM pages backing the buffer (either
  * held or wired) and the page pods associated with the buffer.
  * Recently used page sets are cached to allow for efficient reuse of
  * buffers (avoiding the need to re-fault in pages, hold them, etc.).
  * Note that cached page sets keep the backing pages wired.  The
  * number of wired pages is capped by only allowing for two wired
  * pagesets per connection.  This is not a perfect cap, but is a
  * trade-off for performance.
  *
  * If an application ping-pongs two buffers for a connection via
  * aio_read(2) then those buffers should remain wired and expensive VM
  * fault lookups should be avoided after each buffer has been used
  * once.  If an application uses more than two buffers then this will
  * fall back to doing expensive VM fault lookups for each operation.
  */
 static void
 free_pageset(struct tom_data *td, struct pageset *ps)
 {
 	vm_page_t p;
 	int i;
 
 	if (ps->prsv.prsv_nppods > 0)
 		t4_free_page_pods(&ps->prsv);
 
 	for (i = 0; i < ps->npages; i++) {
 		p = ps->pages[i];
 		vm_page_unwire(p, PQ_INACTIVE);
 	}
 	mtx_lock(&ddp_orphan_pagesets_lock);
 	TAILQ_INSERT_TAIL(&ddp_orphan_pagesets, ps, link);
 	taskqueue_enqueue(taskqueue_thread, &ddp_orphan_task);
 	mtx_unlock(&ddp_orphan_pagesets_lock);
 }
 
 static void
 ddp_free_orphan_pagesets(void *context, int pending)
 {
 	struct pageset *ps;
 
 	mtx_lock(&ddp_orphan_pagesets_lock);
 	while (!TAILQ_EMPTY(&ddp_orphan_pagesets)) {
 		ps = TAILQ_FIRST(&ddp_orphan_pagesets);
 		TAILQ_REMOVE(&ddp_orphan_pagesets, ps, link);
 		mtx_unlock(&ddp_orphan_pagesets_lock);
 		if (ps->vm)
 			vmspace_free(ps->vm);
 		free(ps, M_CXGBE);
 		mtx_lock(&ddp_orphan_pagesets_lock);
 	}
 	mtx_unlock(&ddp_orphan_pagesets_lock);
 }
 
 static void
 recycle_pageset(struct toepcb *toep, struct pageset *ps)
 {
 
 	DDP_ASSERT_LOCKED(toep);
 	if (!(toep->ddp.flags & DDP_DEAD)) {
 		KASSERT(toep->ddp.cached_count + toep->ddp.active_count <
 		    nitems(toep->ddp.db), ("too many wired pagesets"));
 		TAILQ_INSERT_HEAD(&toep->ddp.cached_pagesets, ps, link);
 		toep->ddp.cached_count++;
 	} else
 		free_pageset(toep->td, ps);
 }
 
 static void
 ddp_complete_one(struct kaiocb *job, int error)
 {
 	long copied;
 
 	/*
 	 * If this job had copied data out of the socket buffer before
 	 * it was cancelled, report it as a short read rather than an
 	 * error.
 	 */
 	copied = job->aio_received;
 	if (copied != 0 || error == 0)
 		aio_complete(job, copied, 0);
 	else
 		aio_complete(job, -1, error);
 }
 
 static void
 free_ddp_buffer(struct tom_data *td, struct ddp_buffer *db)
 {
 
 	if (db->job) {
 		/*
 		 * XXX: If we are un-offloading the socket then we
 		 * should requeue these on the socket somehow.  If we
 		 * got a FIN from the remote end, then this completes
 		 * any remaining requests with an EOF read.
 		 */
 		if (!aio_clear_cancel_function(db->job))
 			ddp_complete_one(db->job, 0);
 	}
 
 	if (db->ps)
 		free_pageset(td, db->ps);
 }
 
 void
 ddp_init_toep(struct toepcb *toep)
 {
 
 	TAILQ_INIT(&toep->ddp.aiojobq);
 	TASK_INIT(&toep->ddp.requeue_task, 0, aio_ddp_requeue_task, toep);
 	toep->ddp.flags = DDP_OK;
 	toep->ddp.active_id = -1;
 	mtx_init(&toep->ddp.lock, "t4 ddp", NULL, MTX_DEF);
 }
 
 void
 ddp_uninit_toep(struct toepcb *toep)
 {
 
 	mtx_destroy(&toep->ddp.lock);
 }
 
 void
 release_ddp_resources(struct toepcb *toep)
 {
 	struct pageset *ps;
 	int i;
 
 	DDP_LOCK(toep);
 	toep->ddp.flags |= DDP_DEAD;
 	for (i = 0; i < nitems(toep->ddp.db); i++) {
 		free_ddp_buffer(toep->td, &toep->ddp.db[i]);
 	}
 	while ((ps = TAILQ_FIRST(&toep->ddp.cached_pagesets)) != NULL) {
 		TAILQ_REMOVE(&toep->ddp.cached_pagesets, ps, link);
 		free_pageset(toep->td, ps);
 	}
 	ddp_complete_all(toep, 0);
 	DDP_UNLOCK(toep);
 }
 
 #ifdef INVARIANTS
 void
 ddp_assert_empty(struct toepcb *toep)
 {
 	int i;
 
 	MPASS(!(toep->ddp.flags & DDP_TASK_ACTIVE));
 	for (i = 0; i < nitems(toep->ddp.db); i++) {
 		MPASS(toep->ddp.db[i].job == NULL);
 		MPASS(toep->ddp.db[i].ps == NULL);
 	}
 	MPASS(TAILQ_EMPTY(&toep->ddp.cached_pagesets));
 	MPASS(TAILQ_EMPTY(&toep->ddp.aiojobq));
 }
 #endif
 
 static void
 complete_ddp_buffer(struct toepcb *toep, struct ddp_buffer *db,
     unsigned int db_idx)
 {
 	unsigned int db_flag;
 
 	toep->ddp.active_count--;
 	if (toep->ddp.active_id == db_idx) {
 		if (toep->ddp.active_count == 0) {
 			KASSERT(toep->ddp.db[db_idx ^ 1].job == NULL,
 			    ("%s: active_count mismatch", __func__));
 			toep->ddp.active_id = -1;
 		} else
 			toep->ddp.active_id ^= 1;
 #ifdef VERBOSE_TRACES
 		CTR3(KTR_CXGBE, "%s: tid %u, ddp_active_id = %d", __func__,
 		    toep->tid, toep->ddp.active_id);
 #endif
 	} else {
 		KASSERT(toep->ddp.active_count != 0 &&
 		    toep->ddp.active_id != -1,
 		    ("%s: active count mismatch", __func__));
 	}
 
 	db->cancel_pending = 0;
 	db->job = NULL;
 	recycle_pageset(toep, db->ps);
 	db->ps = NULL;
 
 	db_flag = db_idx == 1 ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE;
 	KASSERT(toep->ddp.flags & db_flag,
 	    ("%s: DDP buffer not active. toep %p, ddp_flags 0x%x",
 	    __func__, toep, toep->ddp.flags));
 	toep->ddp.flags &= ~db_flag;
 }
 
 /* XXX: handle_ddp_data code duplication */
 void
 insert_ddp_data(struct toepcb *toep, uint32_t n)
 {
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);
 	struct ddp_buffer *db;
 	struct kaiocb *job;
 	size_t placed;
 	long copied;
 	unsigned int db_idx;
 #ifdef INVARIANTS
 	unsigned int db_flag;
 #endif
 
 	INP_WLOCK_ASSERT(inp);
 	DDP_ASSERT_LOCKED(toep);
 
 	tp->rcv_nxt += n;
 #ifndef USE_DDP_RX_FLOW_CONTROL
 	KASSERT(tp->rcv_wnd >= n, ("%s: negative window size", __func__));
 	tp->rcv_wnd -= n;
 #endif
 	CTR2(KTR_CXGBE, "%s: placed %u bytes before falling out of DDP",
 	    __func__, n);
 	while (toep->ddp.active_count > 0) {
 		MPASS(toep->ddp.active_id != -1);
 		db_idx = toep->ddp.active_id;
 #ifdef INVARIANTS
 		db_flag = db_idx == 1 ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE;
 #endif
 		MPASS((toep->ddp.flags & db_flag) != 0);
 		db = &toep->ddp.db[db_idx];
 		job = db->job;
 		copied = job->aio_received;
 		placed = n;
 		if (placed > job->uaiocb.aio_nbytes - copied)
 			placed = job->uaiocb.aio_nbytes - copied;
 		if (placed > 0)
 			job->msgrcv = 1;
 		if (!aio_clear_cancel_function(job)) {
 			/*
 			 * Update the copied length for when
 			 * t4_aio_cancel_active() completes this
 			 * request.
 			 */
 			job->aio_received += placed;
 		} else if (copied + placed != 0) {
 			CTR4(KTR_CXGBE,
 			    "%s: completing %p (copied %ld, placed %lu)",
 			    __func__, job, copied, placed);
 			/* XXX: This always completes if there is some data. */
 			aio_complete(job, copied + placed, 0);
 		} else if (aio_set_cancel_function(job, t4_aio_cancel_queued)) {
 			TAILQ_INSERT_HEAD(&toep->ddp.aiojobq, job, list);
 			toep->ddp.waiting_count++;
 		} else
 			aio_cancel(job);
 		n -= placed;
 		complete_ddp_buffer(toep, db, db_idx);
 	}
 
 	MPASS(n == 0);
 }
 
 /* SET_TCB_FIELD sent as a ULP command looks like this */
 #define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \
     sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core))
 
 /* RX_DATA_ACK sent as a ULP command looks like this */
 #define LEN__RX_DATA_ACK_ULP (sizeof(struct ulp_txpkt) + \
     sizeof(struct ulptx_idata) + sizeof(struct cpl_rx_data_ack_core))
 
 static inline void *
 mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep,
     uint64_t word, uint64_t mask, uint64_t val)
 {
 	struct ulptx_idata *ulpsc;
 	struct cpl_set_tcb_field_core *req;
 
 	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0));
 	ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16));
 
 	ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 	ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 	ulpsc->len = htobe32(sizeof(*req));
 
 	req = (struct cpl_set_tcb_field_core *)(ulpsc + 1);
 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tid));
 	req->reply_ctrl = htobe16(V_NO_REPLY(1) |
 	    V_QUEUENO(toep->ofld_rxq->iq.abs_id));
 	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0));
         req->mask = htobe64(mask);
         req->val = htobe64(val);
 
 	ulpsc = (struct ulptx_idata *)(req + 1);
 	if (LEN__SET_TCB_FIELD_ULP % 16) {
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
 		ulpsc->len = htobe32(0);
 		return (ulpsc + 1);
 	}
 	return (ulpsc);
 }
 
 static inline void *
 mk_rx_data_ack_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep)
 {
 	struct ulptx_idata *ulpsc;
 	struct cpl_rx_data_ack_core *req;
 
 	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0));
 	ulpmc->len = htobe32(howmany(LEN__RX_DATA_ACK_ULP, 16));
 
 	ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 	ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 	ulpsc->len = htobe32(sizeof(*req));
 
 	req = (struct cpl_rx_data_ack_core *)(ulpsc + 1);
 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tid));
 	req->credit_dack = htobe32(F_RX_MODULATE_RX);
 
 	ulpsc = (struct ulptx_idata *)(req + 1);
 	if (LEN__RX_DATA_ACK_ULP % 16) {
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
 		ulpsc->len = htobe32(0);
 		return (ulpsc + 1);
 	}
 	return (ulpsc);
 }
 
 static struct wrqe *
 mk_update_tcb_for_ddp(struct adapter *sc, struct toepcb *toep, int db_idx,
     struct pageset *ps, int offset, uint64_t ddp_flags, uint64_t ddp_flags_mask)
 {
 	struct wrqe *wr;
 	struct work_request_hdr *wrh;
 	struct ulp_txpkt *ulpmc;
 	int len;
 
 	KASSERT(db_idx == 0 || db_idx == 1,
 	    ("%s: bad DDP buffer index %d", __func__, db_idx));
 
 	/*
 	 * We'll send a compound work request that has 3 SET_TCB_FIELDs and an
 	 * RX_DATA_ACK (with RX_MODULATE to speed up delivery).
 	 *
 	 * The work request header is 16B and always ends at a 16B boundary.
 	 * The ULPTX master commands that follow must all end at 16B boundaries
 	 * too so we round up the size to 16.
 	 */
 	len = sizeof(*wrh) + 3 * roundup2(LEN__SET_TCB_FIELD_ULP, 16) +
 	    roundup2(LEN__RX_DATA_ACK_ULP, 16);
 
 	wr = alloc_wrqe(len, toep->ctrlq);
 	if (wr == NULL)
 		return (NULL);
 	wrh = wrtod(wr);
 	INIT_ULPTX_WRH(wrh, len, 1, 0);	/* atomic */
 	ulpmc = (struct ulp_txpkt *)(wrh + 1);
 
 	/* Write the buffer's tag */
 	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep,
 	    W_TCB_RX_DDP_BUF0_TAG + db_idx,
 	    V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG),
 	    V_TCB_RX_DDP_BUF0_TAG(ps->prsv.prsv_tag));
 
 	/* Update the current offset in the DDP buffer and its total length */
 	if (db_idx == 0)
 		ulpmc = mk_set_tcb_field_ulp(ulpmc, toep,
 		    W_TCB_RX_DDP_BUF0_OFFSET,
 		    V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
 		    V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
 		    V_TCB_RX_DDP_BUF0_OFFSET(offset) |
 		    V_TCB_RX_DDP_BUF0_LEN(ps->len));
 	else
 		ulpmc = mk_set_tcb_field_ulp(ulpmc, toep,
 		    W_TCB_RX_DDP_BUF1_OFFSET,
 		    V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
 		    V_TCB_RX_DDP_BUF1_LEN((u64)M_TCB_RX_DDP_BUF1_LEN << 32),
 		    V_TCB_RX_DDP_BUF1_OFFSET(offset) |
 		    V_TCB_RX_DDP_BUF1_LEN((u64)ps->len << 32));
 
 	/* Update DDP flags */
 	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_RX_DDP_FLAGS,
 	    ddp_flags_mask, ddp_flags);
 
 	/* Gratuitous RX_DATA_ACK with RX_MODULATE set to speed up delivery. */
 	ulpmc = mk_rx_data_ack_ulp(ulpmc, toep);
 
 	return (wr);
 }
 
 static int
 handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len)
 {
 	uint32_t report = be32toh(ddp_report);
 	unsigned int db_idx;
 	struct inpcb *inp = toep->inp;
 	struct ddp_buffer *db;
 	struct tcpcb *tp;
 	struct socket *so;
 	struct sockbuf *sb;
 	struct kaiocb *job;
 	long copied;
 
 	db_idx = report & F_DDP_BUF_IDX ? 1 : 0;
 
 	if (__predict_false(!(report & F_DDP_INV)))
 		CXGBE_UNIMPLEMENTED("DDP buffer still valid");
 
 	INP_WLOCK(inp);
 	so = inp_inpcbtosocket(inp);
 	sb = &so->so_rcv;
 	DDP_LOCK(toep);
 
 	KASSERT(toep->ddp.active_id == db_idx,
 	    ("completed DDP buffer (%d) != active_id (%d) for tid %d", db_idx,
 	    toep->ddp.active_id, toep->tid));
 	db = &toep->ddp.db[db_idx];
 	job = db->job;
 
 	if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {
 		/*
 		 * This can happen due to an administrative tcpdrop(8).
 		 * Just fail the request with ECONNRESET.
 		 */
 		CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x",
 		    __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags);
 		if (aio_clear_cancel_function(job))
 			ddp_complete_one(job, ECONNRESET);
 		goto completed;
 	}
 
 	tp = intotcpcb(inp);
 
 	/*
 	 * For RX_DDP_COMPLETE, len will be zero and rcv_nxt is the
 	 * sequence number of the next byte to receive.  The length of
 	 * the data received for this message must be computed by
 	 * comparing the new and old values of rcv_nxt.
 	 *
 	 * For RX_DATA_DDP, len might be non-zero, but it is only the
 	 * length of the most recent DMA.  It does not include the
 	 * total length of the data received since the previous update
 	 * for this DDP buffer.  rcv_nxt is the sequence number of the
 	 * first received byte from the most recent DMA.
 	 */
 	len += be32toh(rcv_nxt) - tp->rcv_nxt;
 	tp->rcv_nxt += len;
 	tp->t_rcvtime = ticks;
 #ifndef USE_DDP_RX_FLOW_CONTROL
 	KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
 	tp->rcv_wnd -= len;
 #endif
 #ifdef VERBOSE_TRACES
 	CTR5(KTR_CXGBE, "%s: tid %u, DDP[%d] placed %d bytes (%#x)", __func__,
 	    toep->tid, db_idx, len, report);
 #endif
 
 	/* receive buffer autosize */
 	MPASS(toep->vnet == so->so_vnet);
 	CURVNET_SET(toep->vnet);
 	SOCKBUF_LOCK(sb);
 	if (sb->sb_flags & SB_AUTOSIZE &&
 	    V_tcp_do_autorcvbuf &&
 	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
 	    len > (sbspace(sb) / 8 * 7)) {
 		struct adapter *sc = td_adapter(toep->td);
 		unsigned int hiwat = sb->sb_hiwat;
 		unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
 		    V_tcp_autorcvbuf_max);
 
-		if (!sbreserve_locked(sb, newsize, so, NULL))
+		if (!sbreserve_locked(so, SO_RCV, newsize, NULL))
 			sb->sb_flags &= ~SB_AUTOSIZE;
 	}
 	SOCKBUF_UNLOCK(sb);
 	CURVNET_RESTORE();
 
 	job->msgrcv = 1;
 	if (db->cancel_pending) {
 		/*
 		 * Update the job's length but defer completion to the
 		 * TCB_RPL callback.
 		 */
 		job->aio_received += len;
 		goto out;
 	} else if (!aio_clear_cancel_function(job)) {
 		/*
 		 * Update the copied length for when
 		 * t4_aio_cancel_active() completes this request.
 		 */
 		job->aio_received += len;
 	} else {
 		copied = job->aio_received;
 #ifdef VERBOSE_TRACES
 		CTR5(KTR_CXGBE,
 		    "%s: tid %u, completing %p (copied %ld, placed %d)",
 		    __func__, toep->tid, job, copied, len);
 #endif
 		aio_complete(job, copied + len, 0);
 		t4_rcvd(&toep->td->tod, tp);
 	}
 
 completed:
 	complete_ddp_buffer(toep, db, db_idx);
 	if (toep->ddp.waiting_count > 0)
 		ddp_queue_toep(toep);
 out:
 	DDP_UNLOCK(toep);
 	INP_WUNLOCK(inp);
 
 	return (0);
 }
 
 void
 handle_ddp_indicate(struct toepcb *toep)
 {
 
 	DDP_ASSERT_LOCKED(toep);
 	MPASS(toep->ddp.active_count == 0);
 	MPASS((toep->ddp.flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE)) == 0);
 	if (toep->ddp.waiting_count == 0) {
 		/*
 		 * The pending requests that triggered the request for an
 		 * an indicate were cancelled.  Those cancels should have
 		 * already disabled DDP.  Just ignore this as the data is
 		 * going into the socket buffer anyway.
 		 */
 		return;
 	}
 	CTR3(KTR_CXGBE, "%s: tid %d indicated (%d waiting)", __func__,
 	    toep->tid, toep->ddp.waiting_count);
 	ddp_queue_toep(toep);
 }
 
 CTASSERT(CPL_COOKIE_DDP0 + 1 == CPL_COOKIE_DDP1);
 
 static int
 do_ddp_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	unsigned int db_idx;
 	struct toepcb *toep;
 	struct inpcb *inp;
 	struct ddp_buffer *db;
 	struct kaiocb *job;
 	long copied;
 
 	if (cpl->status != CPL_ERR_NONE)
 		panic("XXX: tcp_rpl failed: %d", cpl->status);
 
 	toep = lookup_tid(sc, tid);
 	inp = toep->inp;
 	switch (cpl->cookie) {
 	case V_WORD(W_TCB_RX_DDP_FLAGS) | V_COOKIE(CPL_COOKIE_DDP0):
 	case V_WORD(W_TCB_RX_DDP_FLAGS) | V_COOKIE(CPL_COOKIE_DDP1):
 		/*
 		 * XXX: This duplicates a lot of code with handle_ddp_data().
 		 */
 		db_idx = G_COOKIE(cpl->cookie) - CPL_COOKIE_DDP0;
 		MPASS(db_idx < nitems(toep->ddp.db));
 		INP_WLOCK(inp);
 		DDP_LOCK(toep);
 		db = &toep->ddp.db[db_idx];
 
 		/*
 		 * handle_ddp_data() should leave the job around until
 		 * this callback runs once a cancel is pending.
 		 */
 		MPASS(db != NULL);
 		MPASS(db->job != NULL);
 		MPASS(db->cancel_pending);
 
 		/*
 		 * XXX: It's not clear what happens if there is data
 		 * placed when the buffer is invalidated.  I suspect we
 		 * need to read the TCB to see how much data was placed.
 		 *
 		 * For now this just pretends like nothing was placed.
 		 *
 		 * XXX: Note that if we did check the PCB we would need to
 		 * also take care of updating the tp, etc.
 		 */
 		job = db->job;
 		copied = job->aio_received;
 		if (copied == 0) {
 			CTR2(KTR_CXGBE, "%s: cancelling %p", __func__, job);
 			aio_cancel(job);
 		} else {
 			CTR3(KTR_CXGBE, "%s: completing %p (copied %ld)",
 			    __func__, job, copied);
 			aio_complete(job, copied, 0);
 			t4_rcvd(&toep->td->tod, intotcpcb(inp));
 		}
 
 		complete_ddp_buffer(toep, db, db_idx);
 		if (toep->ddp.waiting_count > 0)
 			ddp_queue_toep(toep);
 		DDP_UNLOCK(toep);
 		INP_WUNLOCK(inp);
 		break;
 	default:
 		panic("XXX: unknown tcb_rpl offset %#x, cookie %#x",
 		    G_WORD(cpl->cookie), G_COOKIE(cpl->cookie));
 	}
 
 	return (0);
 }
 
 void
 handle_ddp_close(struct toepcb *toep, struct tcpcb *tp, __be32 rcv_nxt)
 {
 	struct ddp_buffer *db;
 	struct kaiocb *job;
 	long copied;
 	unsigned int db_idx;
 #ifdef INVARIANTS
 	unsigned int db_flag;
 #endif
 	int len, placed;
 
 	INP_WLOCK_ASSERT(toep->inp);
 	DDP_ASSERT_LOCKED(toep);
 
 	/* - 1 is to ignore the byte for FIN */
 	len = be32toh(rcv_nxt) - tp->rcv_nxt - 1;
 	tp->rcv_nxt += len;
 
 	while (toep->ddp.active_count > 0) {
 		MPASS(toep->ddp.active_id != -1);
 		db_idx = toep->ddp.active_id;
 #ifdef INVARIANTS
 		db_flag = db_idx == 1 ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE;
 #endif
 		MPASS((toep->ddp.flags & db_flag) != 0);
 		db = &toep->ddp.db[db_idx];
 		job = db->job;
 		copied = job->aio_received;
 		placed = len;
 		if (placed > job->uaiocb.aio_nbytes - copied)
 			placed = job->uaiocb.aio_nbytes - copied;
 		if (placed > 0)
 			job->msgrcv = 1;
 		if (!aio_clear_cancel_function(job)) {
 			/*
 			 * Update the copied length for when
 			 * t4_aio_cancel_active() completes this
 			 * request.
 			 */
 			job->aio_received += placed;
 		} else {
 			CTR4(KTR_CXGBE, "%s: tid %d completed buf %d len %d",
 			    __func__, toep->tid, db_idx, placed);
 			aio_complete(job, copied + placed, 0);
 		}
 		len -= placed;
 		complete_ddp_buffer(toep, db, db_idx);
 	}
 
 	MPASS(len == 0);
 	ddp_complete_all(toep, 0);
 }
 
 #define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\
 	 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\
 	 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\
 	 F_DDP_INVALID_PPOD | F_DDP_HDRCRC_ERR | F_DDP_DATACRC_ERR)
 
 extern cpl_handler_t t4_cpl_handler[];
 
 static int
 do_rx_data_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	uint32_t vld;
 	struct toepcb *toep = lookup_tid(sc, tid);
 
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
 	KASSERT(!(toep->flags & TPF_SYNQE),
 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
 
 	vld = be32toh(cpl->ddpvld);
 	if (__predict_false(vld & DDP_ERR)) {
 		panic("%s: DDP error 0x%x (tid %d, toep %p)",
 		    __func__, vld, tid, toep);
 	}
 
 	if (ulp_mode(toep) == ULP_MODE_ISCSI) {
 		t4_cpl_handler[CPL_RX_ISCSI_DDP](iq, rss, m);
 		return (0);
 	}
 
 	handle_ddp_data(toep, cpl->u.ddp_report, cpl->seq, be16toh(cpl->len));
 
 	return (0);
 }
 
 static int
 do_rx_ddp_complete(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_rx_ddp_complete *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
 	KASSERT(!(toep->flags & TPF_SYNQE),
 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
 
 	handle_ddp_data(toep, cpl->ddp_report, cpl->rcv_nxt, 0);
 
 	return (0);
 }
 
 static void
 enable_ddp(struct adapter *sc, struct toepcb *toep)
 {
 
 	KASSERT((toep->ddp.flags & (DDP_ON | DDP_OK | DDP_SC_REQ)) == DDP_OK,
 	    ("%s: toep %p has bad ddp_flags 0x%x",
 	    __func__, toep, toep->ddp.flags));
 
 	CTR3(KTR_CXGBE, "%s: tid %u (time %u)",
 	    __func__, toep->tid, time_uptime);
 
 	DDP_ASSERT_LOCKED(toep);
 	toep->ddp.flags |= DDP_SC_REQ;
 	t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_RX_DDP_FLAGS,
 	    V_TF_DDP_OFF(1) | V_TF_DDP_INDICATE_OUT(1) |
 	    V_TF_DDP_BUF0_INDICATE(1) | V_TF_DDP_BUF1_INDICATE(1) |
 	    V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_BUF1_VALID(1),
 	    V_TF_DDP_BUF0_INDICATE(1) | V_TF_DDP_BUF1_INDICATE(1), 0, 0);
 	t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS,
 	    V_TF_RCV_COALESCE_ENABLE(1), 0, 0, 0);
 }
 
 static int
 calculate_hcf(int n1, int n2)
 {
 	int a, b, t;
 
 	if (n1 <= n2) {
 		a = n1;
 		b = n2;
 	} else {
 		a = n2;
 		b = n1;
 	}
 
 	while (a != 0) {
 		t = a;
 		a = b % a;
 		b = t;
 	}
 
 	return (b);
 }
 
 static inline int
 pages_to_nppods(int npages, int ddp_page_shift)
 {
 
 	MPASS(ddp_page_shift >= PAGE_SHIFT);
 
 	return (howmany(npages >> (ddp_page_shift - PAGE_SHIFT), PPOD_PAGES));
 }
 
 static int
 alloc_page_pods(struct ppod_region *pr, u_int nppods, u_int pgsz_idx,
     struct ppod_reservation *prsv)
 {
 	vmem_addr_t addr;       /* relative to start of region */
 
 	if (vmem_alloc(pr->pr_arena, PPOD_SZ(nppods), M_NOWAIT | M_FIRSTFIT,
 	    &addr) != 0)
 		return (ENOMEM);
 
 #ifdef VERBOSE_TRACES
 	CTR5(KTR_CXGBE, "%-17s arena %p, addr 0x%08x, nppods %d, pgsz %d",
 	    __func__, pr->pr_arena, (uint32_t)addr & pr->pr_tag_mask,
 	    nppods, 1 << pr->pr_page_shift[pgsz_idx]);
 #endif
 
 	/*
 	 * The hardware tagmask includes an extra invalid bit but the arena was
 	 * seeded with valid values only.  An allocation out of this arena will
 	 * fit inside the tagmask but won't have the invalid bit set.
 	 */
 	MPASS((addr & pr->pr_tag_mask) == addr);
 	MPASS((addr & pr->pr_invalid_bit) == 0);
 
 	prsv->prsv_pr = pr;
 	prsv->prsv_tag = V_PPOD_PGSZ(pgsz_idx) | addr;
 	prsv->prsv_nppods = nppods;
 
 	return (0);
 }
 
 static int
 t4_alloc_page_pods_for_vmpages(struct ppod_region *pr, vm_page_t *pages,
     int npages, struct ppod_reservation *prsv)
 {
 	int i, hcf, seglen, idx, nppods;
 
 	/*
 	 * The DDP page size is unrelated to the VM page size.  We combine
 	 * contiguous physical pages into larger segments to get the best DDP
 	 * page size possible.  This is the largest of the four sizes in
 	 * A_ULP_RX_TDDP_PSZ that evenly divides the HCF of the segment sizes in
 	 * the page list.
 	 */
 	hcf = 0;
 	for (i = 0; i < npages; i++) {
 		seglen = PAGE_SIZE;
 		while (i < npages - 1 &&
 		    VM_PAGE_TO_PHYS(pages[i]) + PAGE_SIZE ==
 		    VM_PAGE_TO_PHYS(pages[i + 1])) {
 			seglen += PAGE_SIZE;
 			i++;
 		}
 
 		hcf = calculate_hcf(hcf, seglen);
 		if (hcf < (1 << pr->pr_page_shift[1])) {
 			idx = 0;
 			goto have_pgsz;	/* give up, short circuit */
 		}
 	}
 
 #define PR_PAGE_MASK(x) ((1 << pr->pr_page_shift[(x)]) - 1)
 	MPASS((hcf & PR_PAGE_MASK(0)) == 0); /* PAGE_SIZE is >= 4K everywhere */
 	for (idx = nitems(pr->pr_page_shift) - 1; idx > 0; idx--) {
 		if ((hcf & PR_PAGE_MASK(idx)) == 0)
 			break;
 	}
 #undef PR_PAGE_MASK
 
 have_pgsz:
 	MPASS(idx <= M_PPOD_PGSZ);
 
 	nppods = pages_to_nppods(npages, pr->pr_page_shift[idx]);
 	if (alloc_page_pods(pr, nppods, idx, prsv) != 0)
 		return (ENOMEM);
 	MPASS(prsv->prsv_nppods > 0);
 
 	return (0);
 }
 
 int
 t4_alloc_page_pods_for_ps(struct ppod_region *pr, struct pageset *ps)
 {
 	struct ppod_reservation *prsv = &ps->prsv;
 
 	KASSERT(prsv->prsv_nppods == 0,
 	    ("%s: page pods already allocated", __func__));
 
 	return (t4_alloc_page_pods_for_vmpages(pr, ps->pages, ps->npages,
 	    prsv));
 }
 
 int
 t4_alloc_page_pods_for_bio(struct ppod_region *pr, struct bio *bp,
     struct ppod_reservation *prsv)
 {
 
 	MPASS(bp->bio_flags & BIO_UNMAPPED);
 
 	return (t4_alloc_page_pods_for_vmpages(pr, bp->bio_ma, bp->bio_ma_n,
 	    prsv));
 }
 
 int
 t4_alloc_page_pods_for_buf(struct ppod_region *pr, vm_offset_t buf, int len,
     struct ppod_reservation *prsv)
 {
 	int hcf, seglen, idx, npages, nppods;
 	uintptr_t start_pva, end_pva, pva, p1;
 
 	MPASS(buf > 0);
 	MPASS(len > 0);
 
 	/*
 	 * The DDP page size is unrelated to the VM page size.  We combine
 	 * contiguous physical pages into larger segments to get the best DDP
 	 * page size possible.  This is the largest of the four sizes in
 	 * A_ULP_RX_ISCSI_PSZ that evenly divides the HCF of the segment sizes
 	 * in the page list.
 	 */
 	hcf = 0;
 	start_pva = trunc_page(buf);
 	end_pva = trunc_page(buf + len - 1);
 	pva = start_pva;
 	while (pva <= end_pva) {
 		seglen = PAGE_SIZE;
 		p1 = pmap_kextract(pva);
 		pva += PAGE_SIZE;
 		while (pva <= end_pva && p1 + seglen == pmap_kextract(pva)) {
 			seglen += PAGE_SIZE;
 			pva += PAGE_SIZE;
 		}
 
 		hcf = calculate_hcf(hcf, seglen);
 		if (hcf < (1 << pr->pr_page_shift[1])) {
 			idx = 0;
 			goto have_pgsz;	/* give up, short circuit */
 		}
 	}
 
 #define PR_PAGE_MASK(x) ((1 << pr->pr_page_shift[(x)]) - 1)
 	MPASS((hcf & PR_PAGE_MASK(0)) == 0); /* PAGE_SIZE is >= 4K everywhere */
 	for (idx = nitems(pr->pr_page_shift) - 1; idx > 0; idx--) {
 		if ((hcf & PR_PAGE_MASK(idx)) == 0)
 			break;
 	}
 #undef PR_PAGE_MASK
 
 have_pgsz:
 	MPASS(idx <= M_PPOD_PGSZ);
 
 	npages = 1;
 	npages += (end_pva - start_pva) >> pr->pr_page_shift[idx];
 	nppods = howmany(npages, PPOD_PAGES);
 	if (alloc_page_pods(pr, nppods, idx, prsv) != 0)
 		return (ENOMEM);
 	MPASS(prsv->prsv_nppods > 0);
 
 	return (0);
 }
 
 int
 t4_alloc_page_pods_for_sgl(struct ppod_region *pr, struct ctl_sg_entry *sgl,
     int entries, struct ppod_reservation *prsv)
 {
 	int hcf, seglen, idx = 0, npages, nppods, i, len;
 	uintptr_t start_pva, end_pva, pva, p1 ;
 	vm_offset_t buf;
 	struct ctl_sg_entry *sge;
 
 	MPASS(entries > 0);
 	MPASS(sgl);
 
 	/*
 	 * The DDP page size is unrelated to the VM page size.	We combine
 	 * contiguous physical pages into larger segments to get the best DDP
 	 * page size possible.	This is the largest of the four sizes in
 	 * A_ULP_RX_ISCSI_PSZ that evenly divides the HCF of the segment sizes
 	 * in the page list.
 	 */
 	hcf = 0;
 	for (i = entries - 1; i >= 0; i--) {
 		sge = sgl + i;
 		buf = (vm_offset_t)sge->addr;
 		len = sge->len;
 		start_pva = trunc_page(buf);
 		end_pva = trunc_page(buf + len - 1);
 		pva = start_pva;
 		while (pva <= end_pva) {
 			seglen = PAGE_SIZE;
 			p1 = pmap_kextract(pva);
 			pva += PAGE_SIZE;
 			while (pva <= end_pva && p1 + seglen ==
 			    pmap_kextract(pva)) {
 				seglen += PAGE_SIZE;
 				pva += PAGE_SIZE;
 			}
 
 			hcf = calculate_hcf(hcf, seglen);
 			if (hcf < (1 << pr->pr_page_shift[1])) {
 				idx = 0;
 				goto have_pgsz; /* give up, short circuit */
 			}
 		}
 	}
 #define PR_PAGE_MASK(x) ((1 << pr->pr_page_shift[(x)]) - 1)
 	MPASS((hcf & PR_PAGE_MASK(0)) == 0); /* PAGE_SIZE is >= 4K everywhere */
 	for (idx = nitems(pr->pr_page_shift) - 1; idx > 0; idx--) {
 		if ((hcf & PR_PAGE_MASK(idx)) == 0)
 			break;
 	}
 #undef PR_PAGE_MASK
 
 have_pgsz:
 	MPASS(idx <= M_PPOD_PGSZ);
 
 	npages = 0;
 	while (entries--) {
 		npages++;
 		start_pva = trunc_page((vm_offset_t)sgl->addr);
 		end_pva = trunc_page((vm_offset_t)sgl->addr + sgl->len - 1);
 		npages += (end_pva - start_pva) >> pr->pr_page_shift[idx];
 		sgl = sgl + 1;
 	}
 	nppods = howmany(npages, PPOD_PAGES);
 	if (alloc_page_pods(pr, nppods, idx, prsv) != 0)
 		return (ENOMEM);
 	MPASS(prsv->prsv_nppods > 0);
 	return (0);
 }
 
 void
 t4_free_page_pods(struct ppod_reservation *prsv)
 {
 	struct ppod_region *pr = prsv->prsv_pr;
 	vmem_addr_t addr;
 
 	MPASS(prsv != NULL);
 	MPASS(prsv->prsv_nppods != 0);
 
 	addr = prsv->prsv_tag & pr->pr_tag_mask;
 	MPASS((addr & pr->pr_invalid_bit) == 0);
 
 #ifdef VERBOSE_TRACES
 	CTR4(KTR_CXGBE, "%-17s arena %p, addr 0x%08x, nppods %d", __func__,
 	    pr->pr_arena, addr, prsv->prsv_nppods);
 #endif
 
 	vmem_free(pr->pr_arena, addr, PPOD_SZ(prsv->prsv_nppods));
 	prsv->prsv_nppods = 0;
 }
 
 #define NUM_ULP_TX_SC_IMM_PPODS (256 / PPOD_SIZE)
 
 int
 t4_write_page_pods_for_ps(struct adapter *sc, struct sge_wrq *wrq, int tid,
     struct pageset *ps)
 {
 	struct wrqe *wr;
 	struct ulp_mem_io *ulpmc;
 	struct ulptx_idata *ulpsc;
 	struct pagepod *ppod;
 	int i, j, k, n, chunk, len, ddp_pgsz, idx;
 	u_int ppod_addr;
 	uint32_t cmd;
 	struct ppod_reservation *prsv = &ps->prsv;
 	struct ppod_region *pr = prsv->prsv_pr;
 	vm_paddr_t pa;
 
 	KASSERT(!(ps->flags & PS_PPODS_WRITTEN),
 	    ("%s: page pods already written", __func__));
 	MPASS(prsv->prsv_nppods > 0);
 
 	cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
 	if (is_t4(sc))
 		cmd |= htobe32(F_ULP_MEMIO_ORDER);
 	else
 		cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
 	ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
 	ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
 	for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
 
 		/* How many page pods are we writing in this cycle */
 		n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
 		chunk = PPOD_SZ(n);
 		len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
 
 		wr = alloc_wrqe(len, wrq);
 		if (wr == NULL)
 			return (ENOMEM);	/* ok to just bail out */
 		ulpmc = wrtod(wr);
 
 		INIT_ULPTX_WR(ulpmc, len, 0, 0);
 		ulpmc->cmd = cmd;
 		ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
 		ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
 		ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
 
 		ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 		ulpsc->len = htobe32(chunk);
 
 		ppod = (struct pagepod *)(ulpsc + 1);
 		for (j = 0; j < n; i++, j++, ppod++) {
 			ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
 			    V_PPOD_TID(tid) | prsv->prsv_tag);
 			ppod->len_offset = htobe64(V_PPOD_LEN(ps->len) |
 			    V_PPOD_OFST(ps->offset));
 			ppod->rsvd = 0;
 			idx = i * PPOD_PAGES * (ddp_pgsz / PAGE_SIZE);
 			for (k = 0; k < nitems(ppod->addr); k++) {
 				if (idx < ps->npages) {
 					pa = VM_PAGE_TO_PHYS(ps->pages[idx]);
 					ppod->addr[k] = htobe64(pa);
 					idx += ddp_pgsz / PAGE_SIZE;
 				} else
 					ppod->addr[k] = 0;
 #if 0
 				CTR5(KTR_CXGBE,
 				    "%s: tid %d ppod[%d]->addr[%d] = %p",
 				    __func__, tid, i, k,
 				    be64toh(ppod->addr[k]));
 #endif
 			}
 
 		}
 
 		t4_wrq_tx(sc, wr);
 	}
 	ps->flags |= PS_PPODS_WRITTEN;
 
 	return (0);
 }
 
 static struct mbuf *
 alloc_raw_wr_mbuf(int len)
 {
 	struct mbuf *m;
 
 	if (len <= MHLEN)
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 	else if (len <= MCLBYTES)
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	else
 		m = NULL;
 	if (m == NULL)
 		return (NULL);
 	m->m_pkthdr.len = len;
 	m->m_len = len;
 	set_mbuf_raw_wr(m, true);
 	return (m);
 }
 
 int
 t4_write_page_pods_for_bio(struct adapter *sc, struct toepcb *toep,
     struct ppod_reservation *prsv, struct bio *bp, struct mbufq *wrq)
 {
 	struct ulp_mem_io *ulpmc;
 	struct ulptx_idata *ulpsc;
 	struct pagepod *ppod;
 	int i, j, k, n, chunk, len, ddp_pgsz, idx;
 	u_int ppod_addr;
 	uint32_t cmd;
 	struct ppod_region *pr = prsv->prsv_pr;
 	vm_paddr_t pa;
 	struct mbuf *m;
 
 	MPASS(bp->bio_flags & BIO_UNMAPPED);
 
 	cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
 	if (is_t4(sc))
 		cmd |= htobe32(F_ULP_MEMIO_ORDER);
 	else
 		cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
 	ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
 	ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
 	for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
 
 		/* How many page pods are we writing in this cycle */
 		n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
 		MPASS(n > 0);
 		chunk = PPOD_SZ(n);
 		len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
 
 		m = alloc_raw_wr_mbuf(len);
 		if (m == NULL)
 			return (ENOMEM);
 
 		ulpmc = mtod(m, struct ulp_mem_io *);
 		INIT_ULPTX_WR(ulpmc, len, 0, toep->tid);
 		ulpmc->cmd = cmd;
 		ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
 		ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
 		ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
 
 		ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 		ulpsc->len = htobe32(chunk);
 
 		ppod = (struct pagepod *)(ulpsc + 1);
 		for (j = 0; j < n; i++, j++, ppod++) {
 			ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
 			    V_PPOD_TID(toep->tid) |
 			    (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ)));
 			ppod->len_offset = htobe64(V_PPOD_LEN(bp->bio_bcount) |
 			    V_PPOD_OFST(bp->bio_ma_offset));
 			ppod->rsvd = 0;
 			idx = i * PPOD_PAGES * (ddp_pgsz / PAGE_SIZE);
 			for (k = 0; k < nitems(ppod->addr); k++) {
 				if (idx < bp->bio_ma_n) {
 					pa = VM_PAGE_TO_PHYS(bp->bio_ma[idx]);
 					ppod->addr[k] = htobe64(pa);
 					idx += ddp_pgsz / PAGE_SIZE;
 				} else
 					ppod->addr[k] = 0;
 #if 0
 				CTR5(KTR_CXGBE,
 				    "%s: tid %d ppod[%d]->addr[%d] = %p",
 				    __func__, toep->tid, i, k,
 				    be64toh(ppod->addr[k]));
 #endif
 			}
 		}
 
 		mbufq_enqueue(wrq, m);
 	}
 
 	return (0);
 }
 
 int
 t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep,
     struct ppod_reservation *prsv, vm_offset_t buf, int buflen,
     struct mbufq *wrq)
 {
 	struct ulp_mem_io *ulpmc;
 	struct ulptx_idata *ulpsc;
 	struct pagepod *ppod;
 	int i, j, k, n, chunk, len, ddp_pgsz;
 	u_int ppod_addr, offset;
 	uint32_t cmd;
 	struct ppod_region *pr = prsv->prsv_pr;
 	uintptr_t end_pva, pva;
 	vm_paddr_t pa;
 	struct mbuf *m;
 
 	cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
 	if (is_t4(sc))
 		cmd |= htobe32(F_ULP_MEMIO_ORDER);
 	else
 		cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
 	ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
 	offset = buf & PAGE_MASK;
 	ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
 	pva = trunc_page(buf);
 	end_pva = trunc_page(buf + buflen - 1);
 	for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
 
 		/* How many page pods are we writing in this cycle */
 		n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
 		MPASS(n > 0);
 		chunk = PPOD_SZ(n);
 		len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
 
 		m = alloc_raw_wr_mbuf(len);
 		if (m == NULL)
 			return (ENOMEM);
 		ulpmc = mtod(m, struct ulp_mem_io *);
 
 		INIT_ULPTX_WR(ulpmc, len, 0, toep->tid);
 		ulpmc->cmd = cmd;
 		ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
 		ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
 		ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
 
 		ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 		ulpsc->len = htobe32(chunk);
 
 		ppod = (struct pagepod *)(ulpsc + 1);
 		for (j = 0; j < n; i++, j++, ppod++) {
 			ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
 			    V_PPOD_TID(toep->tid) |
 			    (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ)));
 			ppod->len_offset = htobe64(V_PPOD_LEN(buflen) |
 			    V_PPOD_OFST(offset));
 			ppod->rsvd = 0;
 
 			for (k = 0; k < nitems(ppod->addr); k++) {
 				if (pva > end_pva)
 					ppod->addr[k] = 0;
 				else {
 					pa = pmap_kextract(pva);
 					ppod->addr[k] = htobe64(pa);
 					pva += ddp_pgsz;
 				}
 #if 0
 				CTR5(KTR_CXGBE,
 				    "%s: tid %d ppod[%d]->addr[%d] = %p",
 				    __func__, toep->tid, i, k,
 				    be64toh(ppod->addr[k]));
 #endif
 			}
 
 			/*
 			 * Walk back 1 segment so that the first address in the
 			 * next pod is the same as the last one in the current
 			 * pod.
 			 */
 			pva -= ddp_pgsz;
 		}
 
 		mbufq_enqueue(wrq, m);
 	}
 
 	MPASS(pva <= end_pva);
 
 	return (0);
 }
 
 int
 t4_write_page_pods_for_sgl(struct adapter *sc, struct toepcb *toep,
     struct ppod_reservation *prsv, struct ctl_sg_entry *sgl, int entries,
     int xferlen, struct mbufq *wrq)
 {
 	struct ulp_mem_io *ulpmc;
 	struct ulptx_idata *ulpsc;
 	struct pagepod *ppod;
 	int i, j, k, n, chunk, len, ddp_pgsz;
 	u_int ppod_addr, offset, sg_offset = 0;
 	uint32_t cmd;
 	struct ppod_region *pr = prsv->prsv_pr;
 	uintptr_t pva;
 	vm_paddr_t pa;
 	struct mbuf *m;
 
 	MPASS(sgl != NULL);
 	MPASS(entries > 0);
 	cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
 	if (is_t4(sc))
 		cmd |= htobe32(F_ULP_MEMIO_ORDER);
 	else
 		cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
 	ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
 	offset = (vm_offset_t)sgl->addr & PAGE_MASK;
 	ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
 	pva = trunc_page((vm_offset_t)sgl->addr);
 	for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
 
 		/* How many page pods are we writing in this cycle */
 		n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
 		MPASS(n > 0);
 		chunk = PPOD_SZ(n);
 		len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
 
 		m = alloc_raw_wr_mbuf(len);
 		if (m == NULL)
 			return (ENOMEM);
 		ulpmc = mtod(m, struct ulp_mem_io *);
 
 		INIT_ULPTX_WR(ulpmc, len, 0, toep->tid);
 		ulpmc->cmd = cmd;
 		ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
 		ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
 		ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
 
 		ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 		ulpsc->len = htobe32(chunk);
 
 		ppod = (struct pagepod *)(ulpsc + 1);
 		for (j = 0; j < n; i++, j++, ppod++) {
 			ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
 			    V_PPOD_TID(toep->tid) |
 			    (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ)));
 			ppod->len_offset = htobe64(V_PPOD_LEN(xferlen) |
 			    V_PPOD_OFST(offset));
 			ppod->rsvd = 0;
 
 			for (k = 0; k < nitems(ppod->addr); k++) {
 				if (entries != 0) {
 					pa = pmap_kextract(pva + sg_offset);
 					ppod->addr[k] = htobe64(pa);
 				} else
 					ppod->addr[k] = 0;
 
 #if 0
 				CTR5(KTR_CXGBE,
 				    "%s: tid %d ppod[%d]->addr[%d] = %p",
 				    __func__, toep->tid, i, k,
 				    be64toh(ppod->addr[k]));
 #endif
 
 				/*
 				 * If this is the last entry in a pod,
 				 * reuse the same entry for first address
 				 * in the next pod.
 				 */
 				if (k + 1 == nitems(ppod->addr))
 					break;
 
 				/*
 				 * Don't move to the next DDP page if the
 				 * sgl is already finished.
 				 */
 				if (entries == 0)
 					continue;
 
 				sg_offset += ddp_pgsz;
 				if (sg_offset == sgl->len) {
 					/*
 					 * This sgl entry is done.  Go
 					 * to the next.
 					 */
 					entries--;
 					sgl++;
 					sg_offset = 0;
 					if (entries != 0)
 						pva = trunc_page(
 						    (vm_offset_t)sgl->addr);
 				}
 			}
 		}
 
 		mbufq_enqueue(wrq, m);
 	}
 
 	return (0);
 }
 
 /*
  * Prepare a pageset for DDP.  This sets up page pods.
  */
 static int
 prep_pageset(struct adapter *sc, struct toepcb *toep, struct pageset *ps)
 {
 	struct tom_data *td = sc->tom_softc;
 
 	if (ps->prsv.prsv_nppods == 0 &&
 	    t4_alloc_page_pods_for_ps(&td->pr, ps) != 0) {
 		return (0);
 	}
 	if (!(ps->flags & PS_PPODS_WRITTEN) &&
 	    t4_write_page_pods_for_ps(sc, toep->ctrlq, toep->tid, ps) != 0) {
 		return (0);
 	}
 
 	return (1);
 }
 
 int
 t4_init_ppod_region(struct ppod_region *pr, struct t4_range *r, u_int psz,
     const char *name)
 {
 	int i;
 
 	MPASS(pr != NULL);
 	MPASS(r->size > 0);
 
 	pr->pr_start = r->start;
 	pr->pr_len = r->size;
 	pr->pr_page_shift[0] = 12 + G_HPZ0(psz);
 	pr->pr_page_shift[1] = 12 + G_HPZ1(psz);
 	pr->pr_page_shift[2] = 12 + G_HPZ2(psz);
 	pr->pr_page_shift[3] = 12 + G_HPZ3(psz);
 
 	/* The SGL -> page pod algorithm requires the sizes to be in order. */
 	for (i = 1; i < nitems(pr->pr_page_shift); i++) {
 		if (pr->pr_page_shift[i] <= pr->pr_page_shift[i - 1])
 			return (ENXIO);
 	}
 
 	pr->pr_tag_mask = ((1 << fls(r->size)) - 1) & V_PPOD_TAG(M_PPOD_TAG);
 	pr->pr_alias_mask = V_PPOD_TAG(M_PPOD_TAG) & ~pr->pr_tag_mask;
 	if (pr->pr_tag_mask == 0 || pr->pr_alias_mask == 0)
 		return (ENXIO);
 	pr->pr_alias_shift = fls(pr->pr_tag_mask);
 	pr->pr_invalid_bit = 1 << (pr->pr_alias_shift - 1);
 
 	pr->pr_arena = vmem_create(name, 0, pr->pr_len, PPOD_SIZE, 0,
 	    M_FIRSTFIT | M_NOWAIT);
 	if (pr->pr_arena == NULL)
 		return (ENOMEM);
 
 	return (0);
 }
 
 void
 t4_free_ppod_region(struct ppod_region *pr)
 {
 
 	MPASS(pr != NULL);
 
 	if (pr->pr_arena)
 		vmem_destroy(pr->pr_arena);
 	bzero(pr, sizeof(*pr));
 }
 
 static int
 pscmp(struct pageset *ps, struct vmspace *vm, vm_offset_t start, int npages,
     int pgoff, int len)
 {
 
 	if (ps->start != start || ps->npages != npages ||
 	    ps->offset != pgoff || ps->len != len)
 		return (1);
 
 	return (ps->vm != vm || ps->vm_timestamp != vm->vm_map.timestamp);
 }
 
 static int
 hold_aio(struct toepcb *toep, struct kaiocb *job, struct pageset **pps)
 {
 	struct vmspace *vm;
 	vm_map_t map;
 	vm_offset_t start, end, pgoff;
 	struct pageset *ps;
 	int n;
 
 	DDP_ASSERT_LOCKED(toep);
 
 	/*
 	 * The AIO subsystem will cancel and drain all requests before
 	 * permitting a process to exit or exec, so p_vmspace should
 	 * be stable here.
 	 */
 	vm = job->userproc->p_vmspace;
 	map = &vm->vm_map;
 	start = (uintptr_t)job->uaiocb.aio_buf;
 	pgoff = start & PAGE_MASK;
 	end = round_page(start + job->uaiocb.aio_nbytes);
 	start = trunc_page(start);
 
 	if (end - start > MAX_DDP_BUFFER_SIZE) {
 		/*
 		 * Truncate the request to a short read.
 		 * Alternatively, we could DDP in chunks to the larger
 		 * buffer, but that would be quite a bit more work.
 		 *
 		 * When truncating, round the request down to avoid
 		 * crossing a cache line on the final transaction.
 		 */
 		end = rounddown2(start + MAX_DDP_BUFFER_SIZE, CACHE_LINE_SIZE);
 #ifdef VERBOSE_TRACES
 		CTR4(KTR_CXGBE, "%s: tid %d, truncating size from %lu to %lu",
 		    __func__, toep->tid, (unsigned long)job->uaiocb.aio_nbytes,
 		    (unsigned long)(end - (start + pgoff)));
 		job->uaiocb.aio_nbytes = end - (start + pgoff);
 #endif
 		end = round_page(end);
 	}
 
 	n = atop(end - start);
 
 	/*
 	 * Try to reuse a cached pageset.
 	 */
 	TAILQ_FOREACH(ps, &toep->ddp.cached_pagesets, link) {
 		if (pscmp(ps, vm, start, n, pgoff,
 		    job->uaiocb.aio_nbytes) == 0) {
 			TAILQ_REMOVE(&toep->ddp.cached_pagesets, ps, link);
 			toep->ddp.cached_count--;
 			*pps = ps;
 			return (0);
 		}
 	}
 
 	/*
 	 * If there are too many cached pagesets to create a new one,
 	 * free a pageset before creating a new one.
 	 */
 	KASSERT(toep->ddp.active_count + toep->ddp.cached_count <=
 	    nitems(toep->ddp.db), ("%s: too many wired pagesets", __func__));
 	if (toep->ddp.active_count + toep->ddp.cached_count ==
 	    nitems(toep->ddp.db)) {
 		KASSERT(toep->ddp.cached_count > 0,
 		    ("no cached pageset to free"));
 		ps = TAILQ_LAST(&toep->ddp.cached_pagesets, pagesetq);
 		TAILQ_REMOVE(&toep->ddp.cached_pagesets, ps, link);
 		toep->ddp.cached_count--;
 		free_pageset(toep->td, ps);
 	}
 	DDP_UNLOCK(toep);
 
 	/* Create a new pageset. */
 	ps = malloc(sizeof(*ps) + n * sizeof(vm_page_t), M_CXGBE, M_WAITOK |
 	    M_ZERO);
 	ps->pages = (vm_page_t *)(ps + 1);
 	ps->vm_timestamp = map->timestamp;
 	ps->npages = vm_fault_quick_hold_pages(map, start, end - start,
 	    VM_PROT_WRITE, ps->pages, n);
 
 	DDP_LOCK(toep);
 	if (ps->npages < 0) {
 		free(ps, M_CXGBE);
 		return (EFAULT);
 	}
 
 	KASSERT(ps->npages == n, ("hold_aio: page count mismatch: %d vs %d",
 	    ps->npages, n));
 
 	ps->offset = pgoff;
 	ps->len = job->uaiocb.aio_nbytes;
 	refcount_acquire(&vm->vm_refcnt);
 	ps->vm = vm;
 	ps->start = start;
 
 	CTR5(KTR_CXGBE, "%s: tid %d, new pageset %p for job %p, npages %d",
 	    __func__, toep->tid, ps, job, ps->npages);
 	*pps = ps;
 	return (0);
 }
 
 static void
 ddp_complete_all(struct toepcb *toep, int error)
 {
 	struct kaiocb *job;
 
 	DDP_ASSERT_LOCKED(toep);
 	while (!TAILQ_EMPTY(&toep->ddp.aiojobq)) {
 		job = TAILQ_FIRST(&toep->ddp.aiojobq);
 		TAILQ_REMOVE(&toep->ddp.aiojobq, job, list);
 		toep->ddp.waiting_count--;
 		if (aio_clear_cancel_function(job))
 			ddp_complete_one(job, error);
 	}
 }
 
 static void
 aio_ddp_cancel_one(struct kaiocb *job)
 {
 	long copied;
 
 	/*
 	 * If this job had copied data out of the socket buffer before
 	 * it was cancelled, report it as a short read rather than an
 	 * error.
 	 */
 	copied = job->aio_received;
 	if (copied != 0)
 		aio_complete(job, copied, 0);
 	else
 		aio_cancel(job);
 }
 
 /*
  * Called when the main loop wants to requeue a job to retry it later.
  * Deals with the race of the job being cancelled while it was being
  * examined.
  */
 static void
 aio_ddp_requeue_one(struct toepcb *toep, struct kaiocb *job)
 {
 
 	DDP_ASSERT_LOCKED(toep);
 	if (!(toep->ddp.flags & DDP_DEAD) &&
 	    aio_set_cancel_function(job, t4_aio_cancel_queued)) {
 		TAILQ_INSERT_HEAD(&toep->ddp.aiojobq, job, list);
 		toep->ddp.waiting_count++;
 	} else
 		aio_ddp_cancel_one(job);
 }
 
 static void
 aio_ddp_requeue(struct toepcb *toep)
 {
 	struct adapter *sc = td_adapter(toep->td);
 	struct socket *so;
 	struct sockbuf *sb;
 	struct inpcb *inp;
 	struct kaiocb *job;
 	struct ddp_buffer *db;
 	size_t copied, offset, resid;
 	struct pageset *ps;
 	struct mbuf *m;
 	uint64_t ddp_flags, ddp_flags_mask;
 	struct wrqe *wr;
 	int buf_flag, db_idx, error;
 
 	DDP_ASSERT_LOCKED(toep);
 
 restart:
 	if (toep->ddp.flags & DDP_DEAD) {
 		MPASS(toep->ddp.waiting_count == 0);
 		MPASS(toep->ddp.active_count == 0);
 		return;
 	}
 
 	if (toep->ddp.waiting_count == 0 ||
 	    toep->ddp.active_count == nitems(toep->ddp.db)) {
 		return;
 	}
 
 	job = TAILQ_FIRST(&toep->ddp.aiojobq);
 	so = job->fd_file->f_data;
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 
 	/* We will never get anything unless we are or were connected. */
 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
 		SOCKBUF_UNLOCK(sb);
 		ddp_complete_all(toep, ENOTCONN);
 		return;
 	}
 
 	KASSERT(toep->ddp.active_count == 0 || sbavail(sb) == 0,
 	    ("%s: pending sockbuf data and DDP is active", __func__));
 
 	/* Abort if socket has reported problems. */
 	/* XXX: Wait for any queued DDP's to finish and/or flush them? */
 	if (so->so_error && sbavail(sb) == 0) {
 		toep->ddp.waiting_count--;
 		TAILQ_REMOVE(&toep->ddp.aiojobq, job, list);
 		if (!aio_clear_cancel_function(job)) {
 			SOCKBUF_UNLOCK(sb);
 			goto restart;
 		}
 
 		/*
 		 * If this job has previously copied some data, report
 		 * a short read and leave the error to be reported by
 		 * a future request.
 		 */
 		copied = job->aio_received;
 		if (copied != 0) {
 			SOCKBUF_UNLOCK(sb);
 			aio_complete(job, copied, 0);
 			goto restart;
 		}
 		error = so->so_error;
 		so->so_error = 0;
 		SOCKBUF_UNLOCK(sb);
 		aio_complete(job, -1, error);
 		goto restart;
 	}
 
 	/*
 	 * Door is closed.  If there is pending data in the socket buffer,
 	 * deliver it.  If there are pending DDP requests, wait for those
 	 * to complete.  Once they have completed, return EOF reads.
 	 */
 	if (sb->sb_state & SBS_CANTRCVMORE && sbavail(sb) == 0) {
 		SOCKBUF_UNLOCK(sb);
 		if (toep->ddp.active_count != 0)
 			return;
 		ddp_complete_all(toep, 0);
 		return;
 	}
 
 	/*
 	 * If DDP is not enabled and there is no pending socket buffer
 	 * data, try to enable DDP.
 	 */
 	if (sbavail(sb) == 0 && (toep->ddp.flags & DDP_ON) == 0) {
 		SOCKBUF_UNLOCK(sb);
 
 		/*
 		 * Wait for the card to ACK that DDP is enabled before
 		 * queueing any buffers.  Currently this waits for an
 		 * indicate to arrive.  This could use a TCB_SET_FIELD_RPL
 		 * message to know that DDP was enabled instead of waiting
 		 * for the indicate which would avoid copying the indicate
 		 * if no data is pending.
 		 *
 		 * XXX: Might want to limit the indicate size to the size
 		 * of the first queued request.
 		 */
 		if ((toep->ddp.flags & DDP_SC_REQ) == 0)
 			enable_ddp(sc, toep);
 		return;
 	}
 	SOCKBUF_UNLOCK(sb);
 
 	/*
 	 * If another thread is queueing a buffer for DDP, let it
 	 * drain any work and return.
 	 */
 	if (toep->ddp.queueing != NULL)
 		return;
 
 	/* Take the next job to prep it for DDP. */
 	toep->ddp.waiting_count--;
 	TAILQ_REMOVE(&toep->ddp.aiojobq, job, list);
 	if (!aio_clear_cancel_function(job))
 		goto restart;
 	toep->ddp.queueing = job;
 
 	/* NB: This drops DDP_LOCK while it holds the backing VM pages. */
 	error = hold_aio(toep, job, &ps);
 	if (error != 0) {
 		ddp_complete_one(job, error);
 		toep->ddp.queueing = NULL;
 		goto restart;
 	}
 
 	SOCKBUF_LOCK(sb);
 	if (so->so_error && sbavail(sb) == 0) {
 		copied = job->aio_received;
 		if (copied != 0) {
 			SOCKBUF_UNLOCK(sb);
 			recycle_pageset(toep, ps);
 			aio_complete(job, copied, 0);
 			toep->ddp.queueing = NULL;
 			goto restart;
 		}
 
 		error = so->so_error;
 		so->so_error = 0;
 		SOCKBUF_UNLOCK(sb);
 		recycle_pageset(toep, ps);
 		aio_complete(job, -1, error);
 		toep->ddp.queueing = NULL;
 		goto restart;
 	}
 
 	if (sb->sb_state & SBS_CANTRCVMORE && sbavail(sb) == 0) {
 		SOCKBUF_UNLOCK(sb);
 		recycle_pageset(toep, ps);
 		if (toep->ddp.active_count != 0) {
 			/*
 			 * The door is closed, but there are still pending
 			 * DDP buffers.  Requeue.  These jobs will all be
 			 * completed once those buffers drain.
 			 */
 			aio_ddp_requeue_one(toep, job);
 			toep->ddp.queueing = NULL;
 			return;
 		}
 		ddp_complete_one(job, 0);
 		ddp_complete_all(toep, 0);
 		toep->ddp.queueing = NULL;
 		return;
 	}
 
 sbcopy:
 	/*
 	 * If the toep is dead, there shouldn't be any data in the socket
 	 * buffer, so the above case should have handled this.
 	 */
 	MPASS(!(toep->ddp.flags & DDP_DEAD));
 
 	/*
 	 * If there is pending data in the socket buffer (either
 	 * from before the requests were queued or a DDP indicate),
 	 * copy those mbufs out directly.
 	 */
 	copied = 0;
 	offset = ps->offset + job->aio_received;
 	MPASS(job->aio_received <= job->uaiocb.aio_nbytes);
 	resid = job->uaiocb.aio_nbytes - job->aio_received;
 	m = sb->sb_mb;
 	KASSERT(m == NULL || toep->ddp.active_count == 0,
 	    ("%s: sockbuf data with active DDP", __func__));
 	while (m != NULL && resid > 0) {
 		struct iovec iov[1];
 		struct uio uio;
 #ifdef INVARIANTS
 		int error;
 #endif
 
 		iov[0].iov_base = mtod(m, void *);
 		iov[0].iov_len = m->m_len;
 		if (iov[0].iov_len > resid)
 			iov[0].iov_len = resid;
 		uio.uio_iov = iov;
 		uio.uio_iovcnt = 1;
 		uio.uio_offset = 0;
 		uio.uio_resid = iov[0].iov_len;
 		uio.uio_segflg = UIO_SYSSPACE;
 		uio.uio_rw = UIO_WRITE;
 #ifdef INVARIANTS
 		error = uiomove_fromphys(ps->pages, offset + copied,
 		    uio.uio_resid, &uio);
 #else
 		uiomove_fromphys(ps->pages, offset + copied, uio.uio_resid, &uio);
 #endif
 		MPASS(error == 0 && uio.uio_resid == 0);
 		copied += uio.uio_offset;
 		resid -= uio.uio_offset;
 		m = m->m_next;
 	}
 	if (copied != 0) {
 		sbdrop_locked(sb, copied);
 		job->aio_received += copied;
 		job->msgrcv = 1;
 		copied = job->aio_received;
 		inp = sotoinpcb(so);
 		if (!INP_TRY_WLOCK(inp)) {
 			/*
 			 * The reference on the socket file descriptor in
 			 * the AIO job should keep 'sb' and 'inp' stable.
 			 * Our caller has a reference on the 'toep' that
 			 * keeps it stable.
 			 */
 			SOCKBUF_UNLOCK(sb);
 			DDP_UNLOCK(toep);
 			INP_WLOCK(inp);
 			DDP_LOCK(toep);
 			SOCKBUF_LOCK(sb);
 
 			/*
 			 * If the socket has been closed, we should detect
 			 * that and complete this request if needed on
 			 * the next trip around the loop.
 			 */
 		}
 		t4_rcvd_locked(&toep->td->tod, intotcpcb(inp));
 		INP_WUNLOCK(inp);
 		if (resid == 0 || toep->ddp.flags & DDP_DEAD) {
 			/*
 			 * We filled the entire buffer with socket
 			 * data, DDP is not being used, or the socket
 			 * is being shut down, so complete the
 			 * request.
 			 */
 			SOCKBUF_UNLOCK(sb);
 			recycle_pageset(toep, ps);
 			aio_complete(job, copied, 0);
 			toep->ddp.queueing = NULL;
 			goto restart;
 		}
 
 		/*
 		 * If DDP is not enabled, requeue this request and restart.
 		 * This will either enable DDP or wait for more data to
 		 * arrive on the socket buffer.
 		 */
 		if ((toep->ddp.flags & (DDP_ON | DDP_SC_REQ)) != DDP_ON) {
 			SOCKBUF_UNLOCK(sb);
 			recycle_pageset(toep, ps);
 			aio_ddp_requeue_one(toep, job);
 			toep->ddp.queueing = NULL;
 			goto restart;
 		}
 
 		/*
 		 * An indicate might have arrived and been added to
 		 * the socket buffer while it was unlocked after the
 		 * copy to lock the INP.  If so, restart the copy.
 		 */
 		if (sbavail(sb) != 0)
 			goto sbcopy;
 	}
 	SOCKBUF_UNLOCK(sb);
 
 	if (prep_pageset(sc, toep, ps) == 0) {
 		recycle_pageset(toep, ps);
 		aio_ddp_requeue_one(toep, job);
 		toep->ddp.queueing = NULL;
 
 		/*
 		 * XXX: Need to retry this later.  Mostly need a trigger
 		 * when page pods are freed up.
 		 */
 		printf("%s: prep_pageset failed\n", __func__);
 		return;
 	}
 
 	/* Determine which DDP buffer to use. */
 	if (toep->ddp.db[0].job == NULL) {
 		db_idx = 0;
 	} else {
 		MPASS(toep->ddp.db[1].job == NULL);
 		db_idx = 1;
 	}
 
 	ddp_flags = 0;
 	ddp_flags_mask = 0;
 	if (db_idx == 0) {
 		ddp_flags |= V_TF_DDP_BUF0_VALID(1);
 		if (so->so_state & SS_NBIO)
 			ddp_flags |= V_TF_DDP_BUF0_FLUSH(1);
 		ddp_flags_mask |= V_TF_DDP_PSH_NO_INVALIDATE0(1) |
 		    V_TF_DDP_PUSH_DISABLE_0(1) | V_TF_DDP_PSHF_ENABLE_0(1) |
 		    V_TF_DDP_BUF0_FLUSH(1) | V_TF_DDP_BUF0_VALID(1);
 		buf_flag = DDP_BUF0_ACTIVE;
 	} else {
 		ddp_flags |= V_TF_DDP_BUF1_VALID(1);
 		if (so->so_state & SS_NBIO)
 			ddp_flags |= V_TF_DDP_BUF1_FLUSH(1);
 		ddp_flags_mask |= V_TF_DDP_PSH_NO_INVALIDATE1(1) |
 		    V_TF_DDP_PUSH_DISABLE_1(1) | V_TF_DDP_PSHF_ENABLE_1(1) |
 		    V_TF_DDP_BUF1_FLUSH(1) | V_TF_DDP_BUF1_VALID(1);
 		buf_flag = DDP_BUF1_ACTIVE;
 	}
 	MPASS((toep->ddp.flags & buf_flag) == 0);
 	if ((toep->ddp.flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE)) == 0) {
 		MPASS(db_idx == 0);
 		MPASS(toep->ddp.active_id == -1);
 		MPASS(toep->ddp.active_count == 0);
 		ddp_flags_mask |= V_TF_DDP_ACTIVE_BUF(1);
 	}
 
 	/*
 	 * The TID for this connection should still be valid.  If DDP_DEAD
 	 * is set, SBS_CANTRCVMORE should be set, so we shouldn't be
 	 * this far anyway.  Even if the socket is closing on the other
 	 * end, the AIO job holds a reference on this end of the socket
 	 * which will keep it open and keep the TCP PCB attached until
 	 * after the job is completed.
 	 */
 	wr = mk_update_tcb_for_ddp(sc, toep, db_idx, ps, job->aio_received,
 	    ddp_flags, ddp_flags_mask);
 	if (wr == NULL) {
 		recycle_pageset(toep, ps);
 		aio_ddp_requeue_one(toep, job);
 		toep->ddp.queueing = NULL;
 
 		/*
 		 * XXX: Need a way to kick a retry here.
 		 *
 		 * XXX: We know the fixed size needed and could
 		 * preallocate this using a blocking request at the
 		 * start of the task to avoid having to handle this
 		 * edge case.
 		 */
 		printf("%s: mk_update_tcb_for_ddp failed\n", __func__);
 		return;
 	}
 
 	if (!aio_set_cancel_function(job, t4_aio_cancel_active)) {
 		free_wrqe(wr);
 		recycle_pageset(toep, ps);
 		aio_ddp_cancel_one(job);
 		toep->ddp.queueing = NULL;
 		goto restart;
 	}
 
 #ifdef VERBOSE_TRACES
 	CTR6(KTR_CXGBE,
 	    "%s: tid %u, scheduling %p for DDP[%d] (flags %#lx/%#lx)", __func__,
 	    toep->tid, job, db_idx, ddp_flags, ddp_flags_mask);
 #endif
 	/* Give the chip the go-ahead. */
 	t4_wrq_tx(sc, wr);
 	db = &toep->ddp.db[db_idx];
 	db->cancel_pending = 0;
 	db->job = job;
 	db->ps = ps;
 	toep->ddp.queueing = NULL;
 	toep->ddp.flags |= buf_flag;
 	toep->ddp.active_count++;
 	if (toep->ddp.active_count == 1) {
 		MPASS(toep->ddp.active_id == -1);
 		toep->ddp.active_id = db_idx;
 		CTR2(KTR_CXGBE, "%s: ddp_active_id = %d", __func__,
 		    toep->ddp.active_id);
 	}
 	goto restart;
 }
 
 void
 ddp_queue_toep(struct toepcb *toep)
 {
 
 	DDP_ASSERT_LOCKED(toep);
 	if (toep->ddp.flags & DDP_TASK_ACTIVE)
 		return;
 	toep->ddp.flags |= DDP_TASK_ACTIVE;
 	hold_toepcb(toep);
 	soaio_enqueue(&toep->ddp.requeue_task);
 }
 
 static void
 aio_ddp_requeue_task(void *context, int pending)
 {
 	struct toepcb *toep = context;
 
 	DDP_LOCK(toep);
 	aio_ddp_requeue(toep);
 	toep->ddp.flags &= ~DDP_TASK_ACTIVE;
 	DDP_UNLOCK(toep);
 
 	free_toepcb(toep);
 }
 
 static void
 t4_aio_cancel_active(struct kaiocb *job)
 {
 	struct socket *so = job->fd_file->f_data;
 	struct tcpcb *tp = so_sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 	struct adapter *sc = td_adapter(toep->td);
 	uint64_t valid_flag;
 	int i;
 
 	DDP_LOCK(toep);
 	if (aio_cancel_cleared(job)) {
 		DDP_UNLOCK(toep);
 		aio_ddp_cancel_one(job);
 		return;
 	}
 
 	for (i = 0; i < nitems(toep->ddp.db); i++) {
 		if (toep->ddp.db[i].job == job) {
 			/* Should only ever get one cancel request for a job. */
 			MPASS(toep->ddp.db[i].cancel_pending == 0);
 
 			/*
 			 * Invalidate this buffer.  It will be
 			 * cancelled or partially completed once the
 			 * card ACKs the invalidate.
 			 */
 			valid_flag = i == 0 ? V_TF_DDP_BUF0_VALID(1) :
 			    V_TF_DDP_BUF1_VALID(1);
 			t4_set_tcb_field(sc, toep->ctrlq, toep,
 			    W_TCB_RX_DDP_FLAGS, valid_flag, 0, 1,
 			    CPL_COOKIE_DDP0 + i);
 			toep->ddp.db[i].cancel_pending = 1;
 			CTR2(KTR_CXGBE, "%s: request %p marked pending",
 			    __func__, job);
 			break;
 		}
 	}
 	DDP_UNLOCK(toep);
 }
 
 static void
 t4_aio_cancel_queued(struct kaiocb *job)
 {
 	struct socket *so = job->fd_file->f_data;
 	struct tcpcb *tp = so_sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 
 	DDP_LOCK(toep);
 	if (!aio_cancel_cleared(job)) {
 		TAILQ_REMOVE(&toep->ddp.aiojobq, job, list);
 		toep->ddp.waiting_count--;
 		if (toep->ddp.waiting_count == 0)
 			ddp_queue_toep(toep);
 	}
 	CTR2(KTR_CXGBE, "%s: request %p cancelled", __func__, job);
 	DDP_UNLOCK(toep);
 
 	aio_ddp_cancel_one(job);
 }
 
 int
 t4_aio_queue_ddp(struct socket *so, struct kaiocb *job)
 {
 	struct tcpcb *tp = so_sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 
 
 	/* Ignore writes. */
 	if (job->uaiocb.aio_lio_opcode != LIO_READ)
 		return (EOPNOTSUPP);
 
 	DDP_LOCK(toep);
 
 	/*
 	 * XXX: Think about possibly returning errors for ENOTCONN,
 	 * etc.  Perhaps the caller would only queue the request
 	 * if it failed with EOPNOTSUPP?
 	 */
 
 #ifdef VERBOSE_TRACES
 	CTR3(KTR_CXGBE, "%s: queueing %p for tid %u", __func__, job, toep->tid);
 #endif
 	if (!aio_set_cancel_function(job, t4_aio_cancel_queued))
 		panic("new job was cancelled");
 	TAILQ_INSERT_TAIL(&toep->ddp.aiojobq, job, list);
 	toep->ddp.waiting_count++;
 	toep->ddp.flags |= DDP_OK;
 
 	/*
 	 * Try to handle this request synchronously.  If this has
 	 * to block because the task is running, it will just bail
 	 * and let the task handle it instead.
 	 */
 	aio_ddp_requeue(toep);
 	DDP_UNLOCK(toep);
 	return (0);
 }
 
 void
 t4_ddp_mod_load(void)
 {
 
 	t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, do_ddp_tcb_rpl,
 	    CPL_COOKIE_DDP0);
 	t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, do_ddp_tcb_rpl,
 	    CPL_COOKIE_DDP1);
 	t4_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp);
 	t4_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete);
 	TAILQ_INIT(&ddp_orphan_pagesets);
 	mtx_init(&ddp_orphan_pagesets_lock, "ddp orphans", NULL, MTX_DEF);
 	TASK_INIT(&ddp_orphan_task, 0, ddp_free_orphan_pagesets, NULL);
 }
 
 void
 t4_ddp_mod_unload(void)
 {
 
 	taskqueue_drain(taskqueue_thread, &ddp_orphan_task);
 	MPASS(TAILQ_EMPTY(&ddp_orphan_pagesets));
 	mtx_destroy(&ddp_orphan_pagesets_lock);
 	t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, NULL, CPL_COOKIE_DDP0);
 	t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, NULL, CPL_COOKIE_DDP1);
 	t4_register_cpl_handler(CPL_RX_DATA_DDP, NULL);
 	t4_register_cpl_handler(CPL_RX_DDP_COMPLETE, NULL);
 }
 #endif
diff --git a/sys/dev/hyperv/hvsock/hv_sock.c b/sys/dev/hyperv/hvsock/hv_sock.c
index f0238c233181..8c327a22e6fd 100644
--- a/sys/dev/hyperv/hvsock/hv_sock.c
+++ b/sys/dev/hyperv/hvsock/hv_sock.c
@@ -1,1758 +1,1758 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2020 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/domain.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/types.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 #include <sys/sockbuf.h>
 #include <sys/sx.h>
 #include <sys/uio.h>
 
 #include <net/vnet.h>
 
 #include <dev/hyperv/vmbus/vmbus_reg.h>
 
 #include "hv_sock.h"
 
 #define HVSOCK_DBG_NONE			0x0
 #define HVSOCK_DBG_INFO			0x1
 #define HVSOCK_DBG_ERR			0x2
 #define HVSOCK_DBG_VERBOSE		0x3
 
 
 SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket");
 
 static int hvs_dbg_level;
 SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level,
     0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose");
 
 
 #define HVSOCK_DBG(level, ...) do {					\
 	if (hvs_dbg_level >= (level))					\
 		printf(__VA_ARGS__);					\
 	} while (0)
 
 MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures");
 
 static int hvs_dom_probe(void);
 
 /* The MTU is 16KB per host side's design */
 #define HVSOCK_MTU_SIZE		(1024 * 16)
 #define HVSOCK_SEND_BUF_SZ	(PAGE_SIZE - sizeof(struct vmpipe_proto_header))
 
 #define HVSOCK_HEADER_LEN	(sizeof(struct hvs_pkt_header))
 
 #define HVSOCK_PKT_LEN(payload_len)	(HVSOCK_HEADER_LEN + \
 					 roundup2(payload_len, 8) + \
 					 sizeof(uint64_t))
 
 
 static struct domain		hv_socket_domain;
 
 /*
  * HyperV Transport sockets
  */
 static struct pr_usrreqs	hvs_trans_usrreqs = {
 	.pru_attach =		hvs_trans_attach,
 	.pru_bind =		hvs_trans_bind,
 	.pru_listen =		hvs_trans_listen,
 	.pru_accept =		hvs_trans_accept,
 	.pru_connect =		hvs_trans_connect,
 	.pru_peeraddr =		hvs_trans_peeraddr,
 	.pru_sockaddr =		hvs_trans_sockaddr,
 	.pru_soreceive =	hvs_trans_soreceive,
 	.pru_sosend =		hvs_trans_sosend,
 	.pru_disconnect =	hvs_trans_disconnect,
 	.pru_close =		hvs_trans_close,
 	.pru_detach =		hvs_trans_detach,
 	.pru_shutdown =		hvs_trans_shutdown,
 	.pru_abort =		hvs_trans_abort,
 };
 
 /*
  * Definitions of protocols supported in HyperV socket domain
  */
 static struct protosw		hv_socket_protosw[] = {
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&hv_socket_domain,
 	.pr_protocol =		HYPERV_SOCK_PROTO_TRANS,
 	.pr_flags =		PR_CONNREQUIRED,
 	.pr_usrreqs =		&hvs_trans_usrreqs,
 },
 };
 
 static struct domain		hv_socket_domain = {
 	.dom_family =		AF_HYPERV,
 	.dom_name =		"hyperv",
 	.dom_probe =		hvs_dom_probe,
 	.dom_protosw =		hv_socket_protosw,
 	.dom_protoswNPROTOSW =	&hv_socket_protosw[nitems(hv_socket_protosw)]
 };
 
 DOMAIN_SET(hv_socket_);
 
 #define MAX_PORT			((uint32_t)0xFFFFFFFF)
 #define MIN_PORT			((uint32_t)0x0)
 
 /* 00000000-facb-11e6-bd58-64006a7986d3 */
 static const struct hyperv_guid srv_id_template = {
 	.hv_guid = {
 	    0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11,
 	    0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 }
 };
 
 static int		hvsock_br_callback(void *, int, void *);
 static uint32_t		hvsock_canread_check(struct hvs_pcb *);
 static uint32_t		hvsock_canwrite_check(struct hvs_pcb *);
 static int		hvsock_send_data(struct vmbus_channel *chan,
     struct uio *uio, uint32_t to_write, struct sockbuf *sb);
 
 
 
 /* Globals */
 static struct sx		hvs_trans_socks_sx;
 static struct mtx		hvs_trans_socks_mtx;
 static LIST_HEAD(, hvs_pcb)	hvs_trans_bound_socks;
 static LIST_HEAD(, hvs_pcb)	hvs_trans_connected_socks;
 static uint32_t			previous_auto_bound_port;
 
 static void
 hvsock_print_guid(struct hyperv_guid *guid)
 {
 	unsigned char *p = (unsigned char *)guid;
 
 	HVSOCK_DBG(HVSOCK_DBG_INFO,
 	    "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n",
 	    *(unsigned int *)p,
 	    *((unsigned short *) &p[4]),
 	    *((unsigned short *) &p[6]),
 	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
 }
 
 static bool
 is_valid_srv_id(const struct hyperv_guid *id)
 {
 	return !memcmp(&id->hv_guid[4],
 	    &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4);
 }
 
 static unsigned int
 get_port_by_srv_id(const struct hyperv_guid *srv_id)
 {
 	return *((const unsigned int *)srv_id);
 }
 
 static void
 set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port)
 {
 	*((unsigned int *)srv_id) = port;
 }
 
 
 static void
 __hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list)
 {
 	struct hvs_pcb *p = NULL;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
 
 	if (!pcb)
 		return;
 
 	if (list & HVS_LIST_BOUND) {
 		LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
 			if  (p == pcb)
 				LIST_REMOVE(p, bound_next);
 	}
 
 	if (list & HVS_LIST_CONNECTED) {
 		LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
 			if (p == pcb)
 				LIST_REMOVE(pcb, connected_next);
 	}
 }
 
 static void
 __hvs_remove_socket_from_list(struct socket *so, unsigned char list)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
 
 	__hvs_remove_pcb_from_list(pcb, list);
 }
 
 static void
 __hvs_insert_socket_on_list(struct socket *so, unsigned char list)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	if (list & HVS_LIST_BOUND)
 		LIST_INSERT_HEAD(&hvs_trans_bound_socks,
 		   pcb, bound_next);
 
 	if (list & HVS_LIST_CONNECTED)
 		LIST_INSERT_HEAD(&hvs_trans_connected_socks,
 		   pcb, connected_next);
 }
 
 void
 hvs_remove_socket_from_list(struct socket *so, unsigned char list)
 {
 	if (!so || !so->so_pcb) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: socket or so_pcb is null\n", __func__);
 		return;
 	}
 
 	mtx_lock(&hvs_trans_socks_mtx);
 	__hvs_remove_socket_from_list(so, list);
 	mtx_unlock(&hvs_trans_socks_mtx);
 }
 
 static void
 hvs_insert_socket_on_list(struct socket *so, unsigned char list)
 {
 	if (!so || !so->so_pcb) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: socket or so_pcb is null\n", __func__);
 		return;
 	}
 
 	mtx_lock(&hvs_trans_socks_mtx);
 	__hvs_insert_socket_on_list(so, list);
 	mtx_unlock(&hvs_trans_socks_mtx);
 }
 
 static struct socket *
 __hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
 {
 	struct hvs_pcb *p = NULL;
 
 	if (list & HVS_LIST_BOUND)
 		LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
 			if (p->so != NULL &&
 			    addr->hvs_port == p->local_addr.hvs_port)
 				return p->so;
 
 	if (list & HVS_LIST_CONNECTED)
 		LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
 			if (p->so != NULL &&
 			    addr->hvs_port == p->local_addr.hvs_port)
 				return p->so;
 
 	return NULL;
 }
 
 static struct socket *
 hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
 {
 	struct socket *s = NULL;
 
 	mtx_lock(&hvs_trans_socks_mtx);
 	s = __hvs_find_socket_on_list(addr, list);
 	mtx_unlock(&hvs_trans_socks_mtx);
 
 	return s;
 }
 
 static inline void
 hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port)
 {
 	memset(addr, 0, sizeof(*addr));
 	addr->sa_family = AF_HYPERV;
 	addr->sa_len = sizeof(*addr);
 	addr->hvs_port = port;
 }
 
 void
 hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id)
 {
 	hvs_addr_set(addr, get_port_by_srv_id(svr_id));
 }
 
 int
 hvs_trans_lock(void)
 {
 	sx_xlock(&hvs_trans_socks_sx);
 	return (0);
 }
 
 void
 hvs_trans_unlock(void)
 {
 	sx_xunlock(&hvs_trans_socks_sx);
 }
 
 static int
 hvs_dom_probe(void)
 {
 
 	/* Don't even give us a chance to attach on non-HyperV. */
 	if (vm_guest != VM_GUEST_HV)
 		return (ENXIO);
 	return (0);
 }
 
 static void
 hvs_trans_init(void *arg __unused)
 {
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_init called\n", __func__);
 
 	/* Initialize Globals */
 	previous_auto_bound_port = MAX_PORT;
 	sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx");
 	mtx_init(&hvs_trans_socks_mtx,
 	    "hvs_trans_socks_mtx", NULL, MTX_DEF);
 	LIST_INIT(&hvs_trans_bound_socks);
 	LIST_INIT(&hvs_trans_connected_socks);
 }
 SYSINIT(hvs_trans_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
     hvs_trans_init, NULL);
 
 /*
  * Called in two cases:
  * 1) When user calls socket();
  * 2) When we accept new incoming conneciton and call sonewconn().
  */
 int
 hvs_trans_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_attach called\n", __func__);
 
 	if (so->so_type != SOCK_STREAM)
 		return (ESOCKTNOSUPPORT);
 
 	if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS)
 		return (EPROTONOSUPPORT);
 
 	if (pcb != NULL)
 		return (EISCONN);
 	pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO);
 	if (pcb == NULL)
 		return (ENOMEM);
 
 	pcb->so = so;
 	so->so_pcb = (void *)pcb;
 
 	return (0);
 }
 
 void
 hvs_trans_detach(struct socket *so)
 {
 	struct hvs_pcb *pcb;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_detach called\n", __func__);
 
 	(void) hvs_trans_lock();
 	pcb = so2hvspcb(so);
 	if (pcb == NULL) {
 		hvs_trans_unlock();
 		return;
 	}
 
 	if (SOLISTENING(so)) {
 		bzero(pcb, sizeof(*pcb));
 		free(pcb, M_HVSOCK);
 	}
 
 	so->so_pcb = NULL;
 
 	hvs_trans_unlock();
 }
 
 int
 hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr;
 	int error = 0;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_bind called\n", __func__);
 
 	if (sa == NULL) {
 		return (EINVAL);
 	}
 
 	if (pcb == NULL) {
 		return (EINVAL);
 	}
 
 	if (sa->sa_family != AF_HYPERV) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: Not supported, sa_family is %u\n",
 		    __func__, sa->sa_family);
 		return (EAFNOSUPPORT);
 	}
 	if (sa->sa_len != sizeof(*sa)) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: Not supported, sa_len is %u\n",
 		    __func__, sa->sa_len);
 		return (EINVAL);
 	}
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: binding port = 0x%x\n", __func__, sa->hvs_port);
 
 	mtx_lock(&hvs_trans_socks_mtx);
 	if (__hvs_find_socket_on_list(sa,
 	    HVS_LIST_BOUND | HVS_LIST_CONNECTED)) {
 		error = EADDRINUSE;
 	} else {
 		/*
 		 * The address is available for us to bind.
 		 * Add socket to the bound list.
 		 */
 		hvs_addr_set(&pcb->local_addr, sa->hvs_port);
 		hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY);
 		__hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
 	}
 	mtx_unlock(&hvs_trans_socks_mtx);
 
 	return (error);
 }
 
 int
 hvs_trans_listen(struct socket *so, int backlog, struct thread *td)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	struct socket *bound_so;
 	int error;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_listen called\n", __func__);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/* Check if the address is already bound and it was by us. */
 	bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND);
 	if (bound_so == NULL || bound_so != so) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: Address not bound or not by us.\n", __func__);
 		return (EADDRNOTAVAIL);
 	}
 
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error == 0)
 		solisten_proto(so, backlog);
 	SOCK_UNLOCK(so);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket listen error = %d\n", __func__, error);
 	return (error);
 }
 
 int
 hvs_trans_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_accept called\n", __func__);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	*nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr,
 	    M_NOWAIT);
 
 	return ((*nam == NULL) ? ENOMEM : 0);
 }
 
 int
 hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam;
 	bool found_auto_bound_port = false;
 	int i, error = 0;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n",
 	    __func__, raddr->hvs_port);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/* Verify the remote address */
 	if (raddr == NULL)
 		return (EINVAL);
 	if (raddr->sa_family != AF_HYPERV)
 		return (EAFNOSUPPORT);
 	if (raddr->sa_len != sizeof(*raddr))
 		return (EINVAL);
 
 	mtx_lock(&hvs_trans_socks_mtx);
 	if (so->so_state &
 	    (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) {
 			HVSOCK_DBG(HVSOCK_DBG_ERR,
 			    "%s: socket connect in progress\n",
 			    __func__);
 			error = EINPROGRESS;
 			goto out;
 	}
 
 	/*
 	 * Find an available port for us to auto bind the local
 	 * address.
 	 */
 	hvs_addr_set(&pcb->local_addr, 0);
 
 	for (i = previous_auto_bound_port - 1;
 	    i != previous_auto_bound_port; i --) {
 		if (i == MIN_PORT)
 			i = MAX_PORT;
 
 		pcb->local_addr.hvs_port = i;
 
 		if (__hvs_find_socket_on_list(&pcb->local_addr,
 		    HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) {
 			found_auto_bound_port = true;
 			previous_auto_bound_port = i;
 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 			    "%s: found local bound port is %x\n",
 			    __func__, pcb->local_addr.hvs_port);
 			break;
 		}
 	}
 
 	if (found_auto_bound_port == true) {
 		/* Found available port for auto bound, put on list */
 		__hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
 		/* Set VM service ID */
 		pcb->vm_srv_id = srv_id_template;
 		set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port);
 		/* Set host service ID and remote port */
 		pcb->host_srv_id = srv_id_template;
 		set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port);
 		hvs_addr_set(&pcb->remote_addr, raddr->hvs_port);
 
 		/* Change the socket state to SS_ISCONNECTING */
 		soisconnecting(so);
 	} else {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: No local port available for auto bound\n",
 		    __func__);
 		error = EADDRINUSE;
 	}
 
 	HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is ");
 	hvsock_print_guid(&pcb->vm_srv_id);
 	HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is ");
 	hvsock_print_guid(&pcb->host_srv_id);
 
 out:
 	mtx_unlock(&hvs_trans_socks_mtx);
 
 	if (found_auto_bound_port == true)
 		 vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id);
 
 	return (error);
 }
 
 int
 hvs_trans_disconnect(struct socket *so)
 {
 	struct hvs_pcb *pcb;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_disconnect called\n", __func__);
 
 	(void) hvs_trans_lock();
 	pcb = so2hvspcb(so);
 	if (pcb == NULL) {
 		hvs_trans_unlock();
 		return (EINVAL);
 	}
 
 	/* If socket is already disconnected, skip this */
 	if ((so->so_state & SS_ISDISCONNECTED) == 0)
 		soisdisconnecting(so);
 
 	hvs_trans_unlock();
 
 	return (0);
 }
 
 struct hvs_callback_arg {
 	struct uio *uio;
 	struct sockbuf *sb;
 };
 
 int
 hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr,
     struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	struct sockbuf *sb;
 	ssize_t orig_resid;
 	uint32_t canread, to_read;
 	int flags, error = 0;
 	struct hvs_callback_arg cbarg;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_soreceive called\n", __func__);
 
 	if (so->so_type != SOCK_STREAM)
 		return (EINVAL);
 	if (pcb == NULL)
 		return (EINVAL);
 
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 
 	if (flags & MSG_PEEK)
 		return (EOPNOTSUPP);
 
 	/* If no space to copy out anything */
 	if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ)
 		return (EINVAL);
 
 	orig_resid = uio->uio_resid;
 
 	/* Prevent other readers from entering the socket. */
 	error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
 	if (error) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: soiolock returned error = %d\n", __func__, error);
 		return (error);
 	}
 
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 
 	cbarg.uio = uio;
 	cbarg.sb = sb;
 	/*
 	 * If the socket is closing, there might still be some data
 	 * in rx br to read. However we need to make sure
 	 * the channel is still open.
 	 */
 	if ((sb->sb_state & SBS_CANTRCVMORE) &&
 	    (so->so_state & SS_ISDISCONNECTED)) {
 		/* Other thread already closed the channel */
 		error = EPIPE;
 		goto out;
 	}
 
 	while (true) {
 		while (uio->uio_resid > 0 &&
 		    (canread = hvsock_canread_check(pcb)) > 0) {
 			to_read = MIN(canread, uio->uio_resid);
 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 			    "%s: to_read = %u, skip = %u\n", __func__, to_read,
 			    (unsigned int)(sizeof(struct hvs_pkt_header) +
 			    pcb->recv_data_off));
 
 			error = vmbus_chan_recv_peek_call(pcb->chan, to_read,
 			    sizeof(struct hvs_pkt_header) + pcb->recv_data_off,
 			    hvsock_br_callback, (void *)&cbarg);
 			/*
 			 * It is possible socket is disconnected becasue
 			 * we released lock in hvsock_br_callback. So we
 			 * need to check the state to make sure it is not
 			 * disconnected.
 			 */
 			if (error || so->so_state & SS_ISDISCONNECTED) {
 				break;
 			}
 
 			pcb->recv_data_len -= to_read;
 			pcb->recv_data_off += to_read;
 		}
 
 		if (error)
 			break;
 
 		/* Abort if socket has reported problems. */
 		if (so->so_error) {
 			if (so->so_error == ESHUTDOWN &&
 			    orig_resid > uio->uio_resid) {
 				/*
 				 * Although we got a FIN, we also received
 				 * some data in this round. Delivery it
 				 * to user.
 				 */
 				error = 0;
 			} else {
 				if (so->so_error != ESHUTDOWN)
 					error = so->so_error;
 			}
 
 			break;
 		}
 
 		/* Cannot received more. */
 		if (sb->sb_state & SBS_CANTRCVMORE)
 			break;
 
 		/* We are done if buffer has been filled */
 		if (uio->uio_resid == 0)
 			break;
 
 		if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid)
 			break;
 
 		/* Buffer ring is empty and we shall not block */
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			if (orig_resid == uio->uio_resid) {
 				/* We have not read anything */
 				error = EAGAIN;
 			}
 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 			    "%s: non blocked read return, error %d.\n",
 			    __func__, error);
 			break;
 		}
 
 		/*
 		 * Wait and block until (more) data comes in.
 		 * Note: Drops the sockbuf lock during wait.
 		 */
-		error = sbwait(sb);
+		error = sbwait(so, SO_RCV);
 
 		if (error)
 			break;
 
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: wake up from sbwait, read available is %u\n",
 		    __func__, vmbus_chan_read_available(pcb->chan));
 	}
 
 out:
 	SOCKBUF_UNLOCK(sb);
 	SOCK_IO_RECV_UNLOCK(so);
 
 	/* We recieved a FIN in this call */
 	if (so->so_error == ESHUTDOWN) {
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			/* Send has already closed */
 			soisdisconnecting(so);
 		} else {
 			/* Just close the receive side */
 			socantrcvmore(so);
 		}
 	}
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: returning error = %d, so_error = %d\n",
 	    __func__, error, so->so_error);
 
 	return (error);
 }
 
 int
 hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	struct sockbuf *sb;
 	ssize_t orig_resid;
 	uint32_t canwrite, to_write;
 	int error = 0;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %zd\n",
 	    __func__, uio->uio_resid);
 
 	if (so->so_type != SOCK_STREAM)
 		return (EINVAL);
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/* If nothing to send */
 	if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE)
 		return (EINVAL);
 
 	orig_resid = uio->uio_resid;
 
 	/* Prevent other writers from entering the socket. */
 	error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
 	if (error) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: soiolocak returned error = %d\n", __func__, error);
 		return (error);
 	}
 
 	sb = &so->so_snd;
 	SOCKBUF_LOCK(sb);
 
 	if ((sb->sb_state & SBS_CANTSENDMORE) ||
 	    so->so_error == ESHUTDOWN) {
 		error = EPIPE;
 		goto out;
 	}
 
 	while (uio->uio_resid > 0) {
 		canwrite = hvsock_canwrite_check(pcb);
 		if (canwrite == 0) {
 			/* We have sent some data */
 			if (orig_resid > uio->uio_resid)
 				break;
 			/*
 			 * We have not sent any data and it is
 			 * non-blocked io
 			 */
 			if (so->so_state & SS_NBIO ||
 			    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
 				error = EWOULDBLOCK;
 				break;
 			} else {
 				/*
 				 * We are here because there is no space on
 				 * send buffer ring. Signal the other side
 				 * to read and free more space.
 				 * Sleep wait until space avaiable to send
 				 * Note: Drops the sockbuf lock during wait.
 				 */
-				error = sbwait(sb);
+				error = sbwait(so, SO_SND);
 
 				if (error)
 					break;
 
 				HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 				    "%s: wake up from sbwait, space avail on "
 				    "tx ring is %u\n",
 				    __func__,
 				    vmbus_chan_write_available(pcb->chan));
 
 				continue;
 			}
 		}
 		to_write = MIN(canwrite, uio->uio_resid);
 		to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ);
 
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: canwrite is %u, to_write = %u\n", __func__,
 		    canwrite, to_write);
 		error = hvsock_send_data(pcb->chan, uio, to_write, sb);
 
 		if (error)
 			break;
 	}
 
 out:
 	SOCKBUF_UNLOCK(sb);
 	SOCK_IO_SEND_UNLOCK(so);
 
 	return (error);
 }
 
 int
 hvs_trans_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	*nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 }
 
 int
 hvs_trans_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	*nam = sodupsockaddr((struct sockaddr *) &pcb->local_addr, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 }
 
 void
 hvs_trans_close(struct socket *so)
 {
 	struct hvs_pcb *pcb;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_close called\n", __func__);
 
 	(void) hvs_trans_lock();
 	pcb = so2hvspcb(so);
 	if (!pcb) {
 		hvs_trans_unlock();
 		return;
 	}
 
 	if (so->so_state & SS_ISCONNECTED) {
 		/* Send a FIN to peer */
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: hvs_trans_close sending a FIN to host\n", __func__);
 		(void) hvsock_send_data(pcb->chan, NULL, 0, NULL);
 	}
 
 	if (so->so_state &
 	    (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
 		soisdisconnected(so);
 
 	pcb->chan = NULL;
 	pcb->so = NULL;
 
 	if (SOLISTENING(so)) {
 		mtx_lock(&hvs_trans_socks_mtx);
 		/* Remove from bound list */
 		__hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
 		mtx_unlock(&hvs_trans_socks_mtx);
 	}
 
 	hvs_trans_unlock();
 
 	return;
 }
 
 void
 hvs_trans_abort(struct socket *so)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_abort called\n", __func__);
 
 	(void) hvs_trans_lock();
 	if (pcb == NULL) {
 		hvs_trans_unlock();
 		return;
 	}
 
 	if (SOLISTENING(so)) {
 		mtx_lock(&hvs_trans_socks_mtx);
 		/* Remove from bound list */
 		__hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
 		mtx_unlock(&hvs_trans_socks_mtx);
 	}
 
 	if (so->so_state & SS_ISCONNECTED) {
 		(void) sodisconnect(so);
 	}
 	hvs_trans_unlock();
 
 	return;
 }
 
 int
 hvs_trans_shutdown(struct socket *so)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	struct sockbuf *sb;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_shutdown called\n", __func__);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/*
 	 * Only get called with the shutdown method is SHUT_WR or
 	 * SHUT_RDWR.
 	 * When the method is SHUT_RD or SHUT_RDWR, the caller
 	 * already set the SBS_CANTRCVMORE on receive side socket
 	 * buffer.
 	 */
 	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
 		/*
 		 * SHUT_WR only case.
 		 * Receive side is still open. Just close
 		 * the send side.
 		 */
 		socantsendmore(so);
 	} else {
 		/* SHUT_RDWR case */
 		if (so->so_state & SS_ISCONNECTED) {
 			/* Send a FIN to peer */
 			sb = &so->so_snd;
 			SOCKBUF_LOCK(sb);
 			(void) hvsock_send_data(pcb->chan, NULL, 0, sb);
 			SOCKBUF_UNLOCK(sb);
 
 			soisdisconnecting(so);
 		}
 	}
 
 	return (0);
 }
 
 /* In the VM, we support Hyper-V Sockets with AF_HYPERV, and the endpoint is
  * <port> (see struct sockaddr_hvs).
  *
  * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
  * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
  * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
  * the below sockaddr:
  *
  * struct SOCKADDR_HV
  * {
  *    ADDRESS_FAMILY Family;
  *    USHORT Reserved;
  *    GUID VmId;
  *    GUID ServiceId;
  * };
  * Note: VmID is not used by FreeBSD VM and actually it isn't transmitted via
  * VMBus, because here it's obvious the host and the VM can easily identify
  * each other. Though the VmID is useful on the host, especially in the case
  * of Windows container, FreeBSD VM doesn't need it at all.
  *
  * To be compatible with similar infrastructure in Linux VMs, we have
  * to limit the available GUID space of SOCKADDR_HV so that we can create
  * a mapping between FreeBSD AF_HYPERV port and SOCKADDR_HV Service GUID.
  * The rule of writing Hyper-V Sockets apps on the host and in FreeBSD VM is:
  *
  ****************************************************************************
  * The only valid Service GUIDs, from the perspectives of both the host and *
  * FreeBSD VM, that can be connected by the other end, must conform to this *
  * format: <port>-facb-11e6-bd58-64006a7986d3.                              *
  ****************************************************************************
  *
  * When we write apps on the host to connect(), the GUID ServiceID is used.
  * When we write apps in FreeBSD VM to connect(), we only need to specify the
  * port and the driver will form the GUID and use that to request the host.
  *
  * From the perspective of FreeBSD VM, the remote ephemeral port (i.e. the
  * auto-generated remote port for a connect request initiated by the host's
  * connect()) is set to HVADDR_PORT_UNKNOWN, which is not realy used on the
  * FreeBSD guest.
  */
 
 /*
  * Older HyperV hosts (vmbus version 'VMBUS_VERSION_WIN10' or before)
  * restricts HyperV socket ring buffer size to six 4K pages. Newer
  * HyperV hosts doen't have this limit.
  */
 #define HVS_RINGBUF_RCV_SIZE	(PAGE_SIZE * 6)
 #define HVS_RINGBUF_SND_SIZE	(PAGE_SIZE * 6)
 #define HVS_RINGBUF_MAX_SIZE	(PAGE_SIZE * 64)
 
 struct hvsock_sc {
 	device_t		dev;
 	struct hvs_pcb		*pcb;
 	struct vmbus_channel	*channel;
 };
 
 static bool
 hvsock_chan_readable(struct vmbus_channel *chan)
 {
 	uint32_t readable = vmbus_chan_read_available(chan);
 
 	return (readable >= HVSOCK_PKT_LEN(0));
 }
 
 static void
 hvsock_chan_cb(struct vmbus_channel *chan, void *context)
 {
 	struct hvs_pcb *pcb = (struct hvs_pcb *) context;
 	struct socket *so;
 	uint32_t canwrite;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: host send us a wakeup on rb data, pcb = %p\n",
 	    __func__, pcb);
 
 	/*
 	 * Check if the socket is still attached and valid.
 	 * Here we know channel is still open. Need to make
 	 * sure the socket has not been closed or freed.
 	 */
 	(void) hvs_trans_lock();
 	so = hsvpcb2so(pcb);
 
 	if (pcb->chan != NULL && so != NULL) {
 		/*
 		 * Wake up reader if there are data to read.
 		 */
 		SOCKBUF_LOCK(&(so)->so_rcv);
 
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: read available = %u\n", __func__,
 		    vmbus_chan_read_available(pcb->chan));
 
 		if (hvsock_chan_readable(pcb->chan))
 			sorwakeup_locked(so);
 		else
 			SOCKBUF_UNLOCK(&(so)->so_rcv);
 
 		/*
 		 * Wake up sender if space becomes available to write.
 		 */
 		SOCKBUF_LOCK(&(so)->so_snd);
 		canwrite = hvsock_canwrite_check(pcb);
 
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: canwrite = %u\n", __func__, canwrite);
 
 		if (canwrite > 0) {
 			sowwakeup_locked(so);
 		} else {
 			SOCKBUF_UNLOCK(&(so)->so_snd);
 		}
 	}
 
 	hvs_trans_unlock();
 
 	return;
 }
 
 static int
 hvsock_br_callback(void *datap, int cplen, void *cbarg)
 {
 	struct hvs_callback_arg *arg = (struct hvs_callback_arg *)cbarg;
 	struct uio *uio = arg->uio;
 	struct sockbuf *sb = arg->sb;
 	int error = 0;
 
 	if (cbarg == NULL || datap == NULL)
 		return (EINVAL);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: called, uio_rw = %s, uio_resid = %zd, cplen = %u, "
 	    "datap = %p\n",
 	    __func__, (uio->uio_rw == UIO_READ) ? "read from br":"write to br",
 	    uio->uio_resid, cplen, datap);
 
 	if (sb)
 		SOCKBUF_UNLOCK(sb);
 
 	error = uiomove(datap, cplen, uio);
 
 	if (sb)
 		SOCKBUF_LOCK(sb);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: after uiomove, uio_resid = %zd, error = %d\n",
 	    __func__, uio->uio_resid, error);
 
 	return (error);
 }
 
 static int
 hvsock_send_data(struct vmbus_channel *chan, struct uio *uio,
     uint32_t to_write, struct sockbuf *sb)
 {
 	struct hvs_pkt_header hvs_pkt;
 	int hvs_pkthlen, hvs_pktlen, pad_pktlen, hlen, error = 0;
 	uint64_t pad = 0;
 	struct iovec iov[3];
 	struct hvs_callback_arg cbarg;
 
 	if (chan == NULL)
 		return (ENOTCONN);
 
 	hlen = sizeof(struct vmbus_chanpkt_hdr);
 	hvs_pkthlen = sizeof(struct hvs_pkt_header);
 	hvs_pktlen = hvs_pkthlen + to_write;
 	pad_pktlen = VMBUS_CHANPKT_TOTLEN(hvs_pktlen);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: hlen = %u, hvs_pkthlen = %u, hvs_pktlen = %u, "
 	    "pad_pktlen = %u, data_len = %u\n",
 	    __func__, hlen, hvs_pkthlen, hvs_pktlen, pad_pktlen, to_write);
 
 	hvs_pkt.chan_pkt_hdr.cph_type = VMBUS_CHANPKT_TYPE_INBAND;
 	hvs_pkt.chan_pkt_hdr.cph_flags = 0;
 	VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_hlen, hlen);
 	VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_tlen, pad_pktlen);
 	hvs_pkt.chan_pkt_hdr.cph_xactid = 0;
 
 	hvs_pkt.vmpipe_pkt_hdr.vmpipe_pkt_type = 1;
 	hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size = to_write;
 
 	cbarg.uio = uio;
 	cbarg.sb = sb;
 
 	if (uio && to_write > 0) {
 		iov[0].iov_base = &hvs_pkt;
 		iov[0].iov_len = hvs_pkthlen;
 		iov[1].iov_base = NULL;
 		iov[1].iov_len = to_write;
 		iov[2].iov_base = &pad;
 		iov[2].iov_len = pad_pktlen - hvs_pktlen;
 
 		error = vmbus_chan_iov_send(chan, iov, 3,
 		    hvsock_br_callback, &cbarg);
 	} else {
 		if (to_write == 0) {
 			iov[0].iov_base = &hvs_pkt;
 			iov[0].iov_len = hvs_pkthlen;
 			iov[1].iov_base = &pad;
 			iov[1].iov_len = pad_pktlen - hvs_pktlen;
 			error = vmbus_chan_iov_send(chan, iov, 2, NULL, NULL);
 		}
 	}
 
 	if (error) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: error = %d\n", __func__, error);
 	}
 
 	return (error);
 }
 
 /*
  * Check if we have data on current ring buffer to read
  * or not. If not, advance the ring buffer read index to
  * next packet. Update the recev_data_len and recev_data_off
  * to new value.
  * Return the number of bytes can read.
  */
 static uint32_t
 hvsock_canread_check(struct hvs_pcb *pcb)
 {
 	uint32_t advance;
 	uint32_t tlen, hlen, dlen;
 	uint32_t bytes_canread = 0;
 	int error;
 
 	if (pcb == NULL || pcb->chan == NULL) {
 		pcb->so->so_error = EIO;
 		return (0);
 	}
 
 	/* Still have data not read yet on current packet */
 	if (pcb->recv_data_len > 0)
 		return (pcb->recv_data_len);
 
 	if (pcb->rb_init)
 		advance =
 		    VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
 	else
 		advance = 0;
 
 	bytes_canread = vmbus_chan_read_available(pcb->chan);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: bytes_canread on br = %u, advance = %u\n",
 	    __func__, bytes_canread, advance);
 
 	if (pcb->rb_init && bytes_canread == (advance + sizeof(uint64_t))) {
 		/*
 		 * Nothing to read. Need to advance the rindex before
 		 * calling sbwait, so host knows to wake us up when data
 		 * is available to read on rb.
 		 */
 		error = vmbus_chan_recv_idxadv(pcb->chan, advance);
 		if (error) {
 			HVSOCK_DBG(HVSOCK_DBG_ERR,
 			    "%s: after calling vmbus_chan_recv_idxadv, "
 			    "got error = %d\n",  __func__, error);
 			return (0);
 		} else {
 			pcb->rb_init = false;
 			pcb->recv_data_len = 0;
 			pcb->recv_data_off = 0;
 			bytes_canread = vmbus_chan_read_available(pcb->chan);
 
 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 			    "%s: advanced %u bytes, "
 			    " bytes_canread on br now = %u\n",
 			    __func__, advance, bytes_canread);
 
 			if (bytes_canread == 0)
 				return (0);
 			else
 				advance = 0;
 		}
 	}
 
 	if (bytes_canread <
 	    advance + (sizeof(struct hvs_pkt_header) + sizeof(uint64_t)))
 		return (0);
 
 	error = vmbus_chan_recv_peek(pcb->chan, &pcb->hvs_pkt,
 	    sizeof(struct hvs_pkt_header), advance);
 
 	/* Don't have anything to read */
 	if (error) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: after calling vmbus_chan_recv_peek, got error = %d\n",
 		    __func__, error);
 		return (0);
 	}
 
 	/*
 	 * We just read in a new packet header. Do some sanity checks.
 	 */
 	tlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
 	hlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_hlen);
 	dlen = pcb->hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size;
 	if (__predict_false(hlen < sizeof(struct vmbus_chanpkt_hdr)) ||
 	    __predict_false(hlen > tlen) ||
 	    __predict_false(tlen < dlen + sizeof(struct hvs_pkt_header))) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "invalid tlen(%u), hlen(%u) or dlen(%u)\n",
 		    tlen, hlen, dlen);
 		pcb->so->so_error = EIO;
 		return (0);
 	}
 	if (pcb->rb_init == false)
 		pcb->rb_init = true;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "Got new pkt tlen(%u), hlen(%u) or dlen(%u)\n",
 	    tlen, hlen, dlen);
 
 	/* The other side has sent a close FIN */
 	if (dlen == 0) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: Received FIN from other side\n", __func__);
 		/* inform the caller by seting so_error to ESHUTDOWN */
 		pcb->so->so_error = ESHUTDOWN;
 	}
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: canread on receive ring is %u \n", __func__, dlen);
 
 	pcb->recv_data_len = dlen;
 	pcb->recv_data_off = 0;
 
 	return (pcb->recv_data_len);
 }
 
 static uint32_t
 hvsock_canwrite_check(struct hvs_pcb *pcb)
 {
 	uint32_t writeable;
 	uint32_t ret;
 
 	if (pcb == NULL || pcb->chan == NULL)
 		return (0);
 
 	writeable = vmbus_chan_write_available(pcb->chan);
 
 	/*
 	 * We must always reserve a 0-length-payload packet for the FIN.
 	 */
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: writeable is %u, should be greater than %ju\n",
 	    __func__, writeable,
 	    (uintmax_t)(HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)));
 
 	if (writeable < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) {
 		/*
 		 * The Tx ring seems full.
 		 */
 		return (0);
 	}
 
 	ret = writeable - HVSOCK_PKT_LEN(0) - HVSOCK_PKT_LEN(0);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: available size is %u\n", __func__, rounddown2(ret, 8));
 
 	return (rounddown2(ret, 8));
 }
 
 static void
 hvsock_set_chan_pending_send_size(struct vmbus_channel *chan)
 {
 	vmbus_chan_set_pending_send_size(chan,
 	    HVSOCK_PKT_LEN(HVSOCK_SEND_BUF_SZ));
 }
 
 static int
 hvsock_open_channel(struct vmbus_channel *chan, struct socket *so)
 {
 	unsigned int rcvbuf, sndbuf;
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	int ret;
 
 	if (vmbus_current_version < VMBUS_VERSION_WIN10_V5) {
 		sndbuf = HVS_RINGBUF_SND_SIZE;
 		rcvbuf = HVS_RINGBUF_RCV_SIZE;
 	} else {
 		sndbuf = MAX(so->so_snd.sb_hiwat, HVS_RINGBUF_SND_SIZE);
 		sndbuf = MIN(sndbuf, HVS_RINGBUF_MAX_SIZE);
 		sndbuf = rounddown2(sndbuf, PAGE_SIZE);
 		rcvbuf = MAX(so->so_rcv.sb_hiwat, HVS_RINGBUF_RCV_SIZE);
 		rcvbuf = MIN(rcvbuf, HVS_RINGBUF_MAX_SIZE);
 		rcvbuf = rounddown2(rcvbuf, PAGE_SIZE);
 	}
 
 	/*
 	 * Can only read whatever user provided size of data
 	 * from ring buffer. Turn off batched reading.
 	 */
 	vmbus_chan_set_readbatch(chan, false);
 
 	ret = vmbus_chan_open(chan, sndbuf, rcvbuf, NULL, 0,
 	    hvsock_chan_cb, pcb);
 
 	if (ret != 0) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: failed to open hvsock channel, sndbuf = %u, "
 		    "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
 	} else {
 		HVSOCK_DBG(HVSOCK_DBG_INFO,
 		    "%s: hvsock channel opened, sndbuf = %u, i"
 		    "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
 		/*
 		 * Se the pending send size so to receive wakeup
 		 * signals from host when there is enough space on
 		 * rx buffer ring to write.
 		 */
 		hvsock_set_chan_pending_send_size(chan);
 	}
 
 	return ret;
 }
 
 /*
  * Guest is listening passively on the socket. Open channel and
  * create a new socket for the conneciton.
  */
 static void
 hvsock_open_conn_passive(struct vmbus_channel *chan, struct socket *so,
     struct hvsock_sc *sc)
 {
 	struct socket *new_so;
 	struct hvs_pcb *new_pcb, *pcb;
 	int error;
 
 	/* Do nothing if socket is not listening */
 	if (!SOLISTENING(so)) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: socket is not a listening one\n", __func__);
 		return;
 	}
 
 	/*
 	 * Create a new socket. This will call pru_attach to complete
 	 * the socket initialization and put the new socket onto
 	 * listening socket's sol_incomp list, waiting to be promoted
 	 * to sol_comp list.
 	 * The new socket created has ref count 0. There is no other
 	 * thread that changes the state of this new one at the
 	 * moment, so we don't need to hold its lock while opening
 	 * channel and filling out its pcb information.
 	 */
 	new_so = sonewconn(so, 0);
 	if (!new_so)
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: creating new socket failed\n", __func__);
 
 	/*
 	 * Now open the vmbus channel. If it fails, the socket will be
 	 * on the listening socket's sol_incomp queue until it is
 	 * replaced and aborted.
 	 */
 	error = hvsock_open_channel(chan, new_so);
 	if (error) {
 		new_so->so_error = error;
 		return;
 	}
 
 	pcb = so->so_pcb;
 	new_pcb = new_so->so_pcb;
 
 	hvs_addr_set(&(new_pcb->local_addr), pcb->local_addr.hvs_port);
 	/* Remote port is unknown to guest in this type of conneciton */
 	hvs_addr_set(&(new_pcb->remote_addr), HVADDR_PORT_UNKNOWN);
 	new_pcb->chan = chan;
 	new_pcb->recv_data_len = 0;
 	new_pcb->recv_data_off = 0;
 	new_pcb->rb_init = false;
 
 	new_pcb->vm_srv_id = *vmbus_chan_guid_type(chan);
 	new_pcb->host_srv_id = *vmbus_chan_guid_inst(chan);
 
 	hvs_insert_socket_on_list(new_so, HVS_LIST_CONNECTED);
 
 	sc->pcb = new_pcb;
 
 	/*
 	 * Change the socket state to SS_ISCONNECTED. This will promote
 	 * the socket to sol_comp queue and wake up the thread which
 	 * is accepting connection.
 	 */
 	soisconnected(new_so);
 }
 
 
 /*
  * Guest is actively connecting to host.
  */
 static void
 hvsock_open_conn_active(struct vmbus_channel *chan, struct socket *so)
 {
 	struct hvs_pcb *pcb;
 	int error;
 
 	error = hvsock_open_channel(chan, so);
 	if (error) {
 		so->so_error = error;
 		return;
 	}
 
 	pcb = so->so_pcb;
 	pcb->chan = chan;
 	pcb->recv_data_len = 0;
 	pcb->recv_data_off = 0;
 	pcb->rb_init = false;
 
 	mtx_lock(&hvs_trans_socks_mtx);
 	__hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
 	__hvs_insert_socket_on_list(so, HVS_LIST_CONNECTED);
 	mtx_unlock(&hvs_trans_socks_mtx);
 
 	/*
 	 * Change the socket state to SS_ISCONNECTED. This will wake up
 	 * the thread sleeping in connect call.
 	 */
 	soisconnected(so);
 }
 
 static void
 hvsock_open_connection(struct vmbus_channel *chan, struct hvsock_sc *sc)
 {
 	struct hyperv_guid *inst_guid, *type_guid;
 	bool conn_from_host;
 	struct sockaddr_hvs addr;
 	struct socket *so;
 	struct hvs_pcb *pcb;
 
 	type_guid = (struct hyperv_guid *) vmbus_chan_guid_type(chan);
 	inst_guid = (struct hyperv_guid *) vmbus_chan_guid_inst(chan);
 	conn_from_host = vmbus_chan_is_hvs_conn_from_host(chan);
 
 	HVSOCK_DBG(HVSOCK_DBG_INFO, "type_guid is ");
 	hvsock_print_guid(type_guid);
 	HVSOCK_DBG(HVSOCK_DBG_INFO, "inst_guid is ");
 	hvsock_print_guid(inst_guid);
 	HVSOCK_DBG(HVSOCK_DBG_INFO, "connection %s host\n",
 	    (conn_from_host == true ) ? "from" : "to");
 
 	/*
 	 * The listening port should be in [0, MAX_LISTEN_PORT]
 	 */
 	if (!is_valid_srv_id(type_guid))
 		return;
 
 	/*
 	 * There should be a bound socket already created no matter
 	 * it is a passive or active connection.
 	 * For host initiated connection (passive on guest side),
 	 * the  type_guid contains the port which guest is bound and
 	 * listening.
 	 * For the guest initiated connection (active on guest side),
 	 * the inst_guid contains the port that guest has auto bound
 	 * to.
 	 */
 	hvs_addr_init(&addr, conn_from_host ? type_guid : inst_guid);
 	so = hvs_find_socket_on_list(&addr, HVS_LIST_BOUND);
 	if (!so) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: no bound socket found for port %u\n",
 		    __func__, addr.hvs_port);
 		return;
 	}
 
 	if (conn_from_host) {
 		hvsock_open_conn_passive(chan, so, sc);
 	} else {
 		(void) hvs_trans_lock();
 		pcb = so->so_pcb;
 		if (pcb && pcb->so) {
 			sc->pcb = so2hvspcb(so);
 			hvsock_open_conn_active(chan, so);
 		} else {
 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 			    "%s: channel detached before open\n", __func__);
 		}
 		hvs_trans_unlock();
 	}
 
 }
 
 static int
 hvsock_probe(device_t dev)
 {
 	struct vmbus_channel *channel = vmbus_get_channel(dev);
 
 	if (!channel || !vmbus_chan_is_hvs(channel)) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "hvsock_probe called but not a hvsock channel id %u\n",
 		    vmbus_chan_id(channel));
 
 		return ENXIO;
 	} else {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "hvsock_probe got a hvsock channel id %u\n",
 		    vmbus_chan_id(channel));
 
 		return BUS_PROBE_DEFAULT;
 	}
 }
 
 static int
 hvsock_attach(device_t dev)
 {
 	struct vmbus_channel *channel = vmbus_get_channel(dev);
 	struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_attach called.\n");
 
 	hvsock_open_connection(channel, sc);
 
 	/*
 	 * Always return success. On error the host will rescind the device
 	 * in 30 seconds and we can do cleanup at that time in
 	 * vmbus_chan_msgproc_chrescind().
 	 */
 	return (0);
 }
 
 static int
 hvsock_detach(device_t dev)
 {
 	struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
 	struct socket *so;
 	int retry;
 
 	if (bootverbose)
 		device_printf(dev, "hvsock_detach called.\n");
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_detach called.\n");
 
 	if (sc->pcb != NULL) {
 		(void) hvs_trans_lock();
 
 		so = hsvpcb2so(sc->pcb);
 		if (so) {
 			/* Close the connection */
 			if (so->so_state &
 			    (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
 				soisdisconnected(so);
 		}
 
 		mtx_lock(&hvs_trans_socks_mtx);
 		__hvs_remove_pcb_from_list(sc->pcb,
 		    HVS_LIST_BOUND | HVS_LIST_CONNECTED);
 		mtx_unlock(&hvs_trans_socks_mtx);
 
 		/*
 		 * Close channel while no reader and sender are working
 		 * on the buffer rings.
 		 */
 		if (so) {
 			retry = 0;
 			while (SOCK_IO_RECV_LOCK(so, 0) == EWOULDBLOCK) {
 				/*
 				 * Someone is reading, rx br is busy
 				 */
 				soisdisconnected(so);
 				DELAY(500);
 				HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 				    "waiting for rx reader to exit, "
 				    "retry = %d\n", retry++);
 			}
 			retry = 0;
 			while (SOCK_IO_SEND_LOCK(so, 0) == EWOULDBLOCK) {
 				/*
 				 * Someone is sending, tx br is busy
 				 */
 				soisdisconnected(so);
 				DELAY(500);
 				HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 				    "waiting for tx sender to exit, "
 				    "retry = %d\n", retry++);
 			}
 		}
 
 
 		bzero(sc->pcb, sizeof(struct hvs_pcb));
 		free(sc->pcb, M_HVSOCK);
 		sc->pcb = NULL;
 
 		if (so) {
 			SOCK_IO_RECV_UNLOCK(so);
 			SOCK_IO_SEND_UNLOCK(so);
 			so->so_pcb = NULL;
 		}
 
 		hvs_trans_unlock();
 	}
 
 	vmbus_chan_close(vmbus_get_channel(dev));
 
 	return (0);
 }
 
 static device_method_t hvsock_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, hvsock_probe),
 	DEVMETHOD(device_attach, hvsock_attach),
 	DEVMETHOD(device_detach, hvsock_detach),
 	DEVMETHOD_END
 };
 
 static driver_t hvsock_driver = {
 	"hv_sock",
 	hvsock_methods,
 	sizeof(struct hvsock_sc)
 };
 
 DRIVER_MODULE(hvsock, vmbus, hvsock_driver, NULL, NULL);
 MODULE_VERSION(hvsock, 1);
 MODULE_DEPEND(hvsock, vmbus, 1, 1, 1);
diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c
index 30383490ca41..2de015254ab9 100644
--- a/sys/kern/kern_sendfile.c
+++ b/sys/kern/kern_sendfile.c
@@ -1,1370 +1,1370 @@
 /*-
  * Copyright (c) 2013-2015 Gleb Smirnoff <glebius@FreeBSD.org>
  * Copyright (c) 1998, David Greenman. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_kern_tls.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/ktls.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rwlock.h>
 #include <sys/sf_buf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 
 #include <net/vnet.h>
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 
 static MALLOC_DEFINE(M_SENDFILE, "sendfile", "sendfile dynamic memory");
 
 #define	EXT_FLAG_SYNC		EXT_FLAG_VENDOR1
 #define	EXT_FLAG_NOCACHE	EXT_FLAG_VENDOR2
 #define	EXT_FLAG_CACHE_LAST	EXT_FLAG_VENDOR3
 
 /*
  * Structure describing a single sendfile(2) I/O, which may consist of
  * several underlying pager I/Os.
  *
  * The syscall context allocates the structure and initializes 'nios'
  * to 1.  As sendfile_swapin() runs through pages and starts asynchronous
  * paging operations, it increments 'nios'.
  *
  * Every I/O completion calls sendfile_iodone(), which decrements the 'nios',
  * and the syscall also calls sendfile_iodone() after allocating all mbufs,
  * linking them and sending to socket.  Whoever reaches zero 'nios' is
  * responsible to * call pru_ready on the socket, to notify it of readyness
  * of the data.
  */
 struct sf_io {
 	volatile u_int	nios;
 	u_int		error;
 	int		npages;
 	struct socket	*so;
 	struct mbuf	*m;
 	vm_object_t	obj;
 	vm_pindex_t	pindex0;
 #ifdef KERN_TLS
 	struct ktls_session *tls;
 #endif
 	vm_page_t	pa[];
 };
 
 /*
  * Structure used to track requests with SF_SYNC flag.
  */
 struct sendfile_sync {
 	struct mtx	mtx;
 	struct cv	cv;
 	unsigned	count;
 	bool		waiting;
 };
 
 static void
 sendfile_sync_destroy(struct sendfile_sync *sfs)
 {
 	KASSERT(sfs->count == 0, ("sendfile sync %p still busy", sfs));
 
 	cv_destroy(&sfs->cv);
 	mtx_destroy(&sfs->mtx);
 	free(sfs, M_SENDFILE);
 }
 
 static void
 sendfile_sync_signal(struct sendfile_sync *sfs)
 {
 	mtx_lock(&sfs->mtx);
 	KASSERT(sfs->count > 0, ("sendfile sync %p not busy", sfs));
 	if (--sfs->count == 0) {
 		if (!sfs->waiting) {
 			/* The sendfile() waiter was interrupted by a signal. */
 			sendfile_sync_destroy(sfs);
 			return;
 		} else {
 			cv_signal(&sfs->cv);
 		}
 	}
 	mtx_unlock(&sfs->mtx);
 }
 
 counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
 
 static void
 sfstat_init(const void *unused)
 {
 
 	COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t),
 	    M_WAITOK);
 }
 SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL);
 
 static int
 sfstat_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct sfstat s;
 
 	COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t));
 	if (req->newptr)
 		COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t));
 	return (SYSCTL_OUT(req, &s, sizeof(s)));
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat,
     CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
     sfstat_sysctl, "I",
     "sendfile statistics");
 
 static void
 sendfile_free_mext(struct mbuf *m)
 {
 	struct sf_buf *sf;
 	vm_page_t pg;
 	int flags;
 
 	KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_SFBUF,
 	    ("%s: m %p !M_EXT or !EXT_SFBUF", __func__, m));
 
 	sf = m->m_ext.ext_arg1;
 	pg = sf_buf_page(sf);
 	flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0;
 
 	sf_buf_free(sf);
 	vm_page_release(pg, flags);
 
 	if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
 		struct sendfile_sync *sfs = m->m_ext.ext_arg2;
 		sendfile_sync_signal(sfs);
 	}
 }
 
 static void
 sendfile_free_mext_pg(struct mbuf *m)
 {
 	vm_page_t pg;
 	int flags, i;
 	bool cache_last;
 
 	M_ASSERTEXTPG(m);
 
 	cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST;
 	flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0;
 
 	for (i = 0; i < m->m_epg_npgs; i++) {
 		if (cache_last && i == m->m_epg_npgs - 1)
 			flags = 0;
 		pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]);
 		vm_page_release(pg, flags);
 	}
 
 	if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
 		struct sendfile_sync *sfs = m->m_ext.ext_arg1;
 		sendfile_sync_signal(sfs);
 	}
 }
 
 /*
  * Helper function to calculate how much data to put into page i of n.
  * Only first and last pages are special.
  */
 static inline off_t
 xfsize(int i, int n, off_t off, off_t len)
 {
 
 	if (i == 0)
 		return (omin(PAGE_SIZE - (off & PAGE_MASK), len));
 
 	if (i == n - 1 && ((off + len) & PAGE_MASK) > 0)
 		return ((off + len) & PAGE_MASK);
 
 	return (PAGE_SIZE);
 }
 
 /*
  * Helper function to get offset within object for i page.
  */
 static inline vm_ooffset_t
 vmoff(int i, off_t off)
 {
 
 	if (i == 0)
 		return ((vm_ooffset_t)off);
 
 	return (trunc_page(off + i * PAGE_SIZE));
 }
 
 /*
  * Helper function used when allocation of a page or sf_buf failed.
  * Pretend as if we don't have enough space, subtract xfsize() of
  * all pages that failed.
  */
 static inline void
 fixspace(int old, int new, off_t off, int *space)
 {
 
 	KASSERT(old > new, ("%s: old %d new %d", __func__, old, new));
 
 	/* Subtract last one. */
 	*space -= xfsize(old - 1, old, off, *space);
 	old--;
 
 	if (new == old)
 		/* There was only one page. */
 		return;
 
 	/* Subtract first one. */
 	if (new == 0) {
 		*space -= xfsize(0, old, off, *space);
 		new++;
 	}
 
 	/* Rest of pages are full sized. */
 	*space -= (old - new) * PAGE_SIZE;
 
 	KASSERT(*space >= 0, ("%s: space went backwards", __func__));
 }
 
 /*
  * Wait for all in-flight ios to complete, we must not unwire pages
  * under them.
  */
 static void
 sendfile_iowait(struct sf_io *sfio, const char *wmesg)
 {
 	while (atomic_load_int(&sfio->nios) != 1)
 		pause(wmesg, 1);
 }
 
 /*
  * I/O completion callback.
  */
 static void
 sendfile_iodone(void *arg, vm_page_t *pa, int count, int error)
 {
 	struct sf_io *sfio = arg;
 	struct socket *so;
 	int i;
 
 	if (error != 0)
 		sfio->error = error;
 
 	/*
 	 * Restore the valid page pointers.  They are already
 	 * unbusied, but still wired.
 	 *
 	 * XXXKIB since pages are only wired, and we do not
 	 * own the object lock, other users might have
 	 * invalidated them in meantime.  Similarly, after we
 	 * unbusied the swapped-in pages, they can become
 	 * invalid under us.
 	 */
 	MPASS(count == 0 || pa[0] != bogus_page);
 	for (i = 0; i < count; i++) {
 		if (pa[i] == bogus_page) {
 			sfio->pa[(pa[0]->pindex - sfio->pindex0) + i] =
 			    pa[i] = vm_page_relookup(sfio->obj,
 			    pa[0]->pindex + i);
 			KASSERT(pa[i] != NULL,
 			    ("%s: page %p[%d] disappeared",
 			    __func__, pa, i));
 		} else {
 			vm_page_xunbusy_unchecked(pa[i]);
 		}
 	}
 
 	if (!refcount_release(&sfio->nios))
 		return;
 
 #ifdef INVARIANTS
 	for (i = 1; i < sfio->npages; i++) {
 		if (sfio->pa[i] == NULL)
 			break;
 		KASSERT(vm_page_wired(sfio->pa[i]),
 		    ("sfio %p page %d %p not wired", sfio, i, sfio->pa[i]));
 		if (i == 0)
 			continue;
 		KASSERT(sfio->pa[0]->object == sfio->pa[i]->object,
 		    ("sfio %p page %d %p wrong owner %p %p", sfio, i,
 		    sfio->pa[i], sfio->pa[0]->object, sfio->pa[i]->object));
 		KASSERT(sfio->pa[0]->pindex + i == sfio->pa[i]->pindex,
 		    ("sfio %p page %d %p wrong index %jx %jx", sfio, i,
 		    sfio->pa[i], (uintmax_t)sfio->pa[0]->pindex,
 		    (uintmax_t)sfio->pa[i]->pindex));
 	}
 #endif
 
 	vm_object_pip_wakeup(sfio->obj);
 
 	if (sfio->m == NULL) {
 		/*
 		 * Either I/O operation failed, or we failed to allocate
 		 * buffers, or we bailed out on first busy page, or we
 		 * succeeded filling the request without any I/Os. Anyway,
 		 * pru_send hadn't been executed - nothing had been sent
 		 * to the socket yet.
 		 */
 		MPASS((curthread->td_pflags & TDP_KTHREAD) == 0);
 		free(sfio, M_SENDFILE);
 		return;
 	}
 
 #if defined(KERN_TLS) && defined(INVARIANTS)
 	if ((sfio->m->m_flags & M_EXTPG) != 0)
 		KASSERT(sfio->tls == sfio->m->m_epg_tls,
 		    ("TLS session mismatch"));
 	else
 		KASSERT(sfio->tls == NULL,
 		    ("non-ext_pgs mbuf with TLS session"));
 #endif
 	so = sfio->so;
 	CURVNET_SET(so->so_vnet);
 	if (__predict_false(sfio->error)) {
 		/*
 		 * I/O operation failed.  The state of data in the socket
 		 * is now inconsistent, and all what we can do is to tear
 		 * it down. Protocol abort method would tear down protocol
 		 * state, free all ready mbufs and detach not ready ones.
 		 * We will free the mbufs corresponding to this I/O manually.
 		 *
 		 * The socket would be marked with EIO and made available
 		 * for read, so that application receives EIO on next
 		 * syscall and eventually closes the socket.
 		 */
 		so->so_proto->pr_usrreqs->pru_abort(so);
 		so->so_error = EIO;
 
 		mb_free_notready(sfio->m, sfio->npages);
 #ifdef KERN_TLS
 	} else if (sfio->tls != NULL && sfio->tls->mode == TCP_TLS_MODE_SW) {
 		/*
 		 * I/O operation is complete, but we still need to
 		 * encrypt.  We cannot do this in the interrupt thread
 		 * of the disk controller, so forward the mbufs to a
 		 * different thread.
 		 *
 		 * Donate the socket reference from sfio to rather
 		 * than explicitly invoking soref().
 		 */
 		ktls_enqueue(sfio->m, so, sfio->npages);
 		goto out_with_ref;
 #endif
 	} else
 		(void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m,
 		    sfio->npages);
 
 	sorele(so);
 #ifdef KERN_TLS
 out_with_ref:
 #endif
 	CURVNET_RESTORE();
 	free(sfio, M_SENDFILE);
 }
 
 /*
  * Iterate through pages vector and request paging for non-valid pages.
  */
 static int
 sendfile_swapin(vm_object_t obj, struct sf_io *sfio, int *nios, off_t off,
     off_t len, int rhpages, int flags)
 {
 	vm_page_t *pa;
 	int a, count, count1, grabbed, i, j, npages, rv;
 
 	pa = sfio->pa;
 	npages = sfio->npages;
 	*nios = 0;
 	flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0;
 	sfio->pindex0 = OFF_TO_IDX(off);
 
 	/*
 	 * First grab all the pages and wire them.  Note that we grab
 	 * only required pages.  Readahead pages are dealt with later.
 	 */
 	grabbed = vm_page_grab_pages_unlocked(obj, OFF_TO_IDX(off),
 	    VM_ALLOC_NORMAL | VM_ALLOC_WIRED | flags, pa, npages);
 	if (grabbed < npages) {
 		for (int i = grabbed; i < npages; i++)
 			pa[i] = NULL;
 		npages = grabbed;
 		rhpages = 0;
 	}
 
 	for (i = 0; i < npages;) {
 		/* Skip valid pages. */
 		if (vm_page_is_valid(pa[i], vmoff(i, off) & PAGE_MASK,
 		    xfsize(i, npages, off, len))) {
 			vm_page_xunbusy(pa[i]);
 			SFSTAT_INC(sf_pages_valid);
 			i++;
 			continue;
 		}
 
 		/*
 		 * Next page is invalid.  Check if it belongs to pager.  It
 		 * may not be there, which is a regular situation for shmem
 		 * pager.  For vnode pager this happens only in case of
 		 * a sparse file.
 		 *
 		 * Important feature of vm_pager_has_page() is the hint
 		 * stored in 'a', about how many pages we can pagein after
 		 * this page in a single I/O.
 		 */
 		VM_OBJECT_RLOCK(obj);
 		if (!vm_pager_has_page(obj, OFF_TO_IDX(vmoff(i, off)), NULL,
 		    &a)) {
 			VM_OBJECT_RUNLOCK(obj);
 			pmap_zero_page(pa[i]);
 			vm_page_valid(pa[i]);
 			MPASS(pa[i]->dirty == 0);
 			vm_page_xunbusy(pa[i]);
 			i++;
 			continue;
 		}
 		VM_OBJECT_RUNLOCK(obj);
 
 		/*
 		 * We want to pagein as many pages as possible, limited only
 		 * by the 'a' hint and actual request.
 		 */
 		count = min(a + 1, npages - i);
 
 		/*
 		 * We should not pagein into a valid page because
 		 * there might be still unfinished write tracked by
 		 * e.g. a buffer, thus we substitute any valid pages
 		 * with the bogus one.
 		 *
 		 * We must not leave around xbusy pages which are not
 		 * part of the run passed to vm_pager_getpages(),
 		 * otherwise pager might deadlock waiting for the busy
 		 * status of the page, e.g. if it constitues the
 		 * buffer needed to validate other page.
 		 *
 		 * First trim the end of the run consisting of the
 		 * valid pages, then replace the rest of the valid
 		 * with bogus.
 		 */
 		count1 = count;
 		for (j = i + count - 1; j > i; j--) {
 			if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK,
 			    xfsize(j, npages, off, len))) {
 				vm_page_xunbusy(pa[j]);
 				SFSTAT_INC(sf_pages_valid);
 				count--;
 			} else {
 				break;
 			}
 		}
 
 		/*
 		 * The last page in the run pa[i + count - 1] is
 		 * guaranteed to be invalid by the trim above, so it
 		 * is not replaced with bogus, thus -1 in the loop end
 		 * condition.
 		 */
 		MPASS(pa[i + count - 1]->valid != VM_PAGE_BITS_ALL);
 		for (j = i + 1; j < i + count - 1; j++) {
 			if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK,
 			    xfsize(j, npages, off, len))) {
 				vm_page_xunbusy(pa[j]);
 				SFSTAT_INC(sf_pages_valid);
 				SFSTAT_INC(sf_pages_bogus);
 				pa[j] = bogus_page;
 			}
 		}
 
 		refcount_acquire(&sfio->nios);
 		rv = vm_pager_get_pages_async(obj, pa + i, count, NULL,
 		    i + count == npages ? &rhpages : NULL,
 		    &sendfile_iodone, sfio);
 		if (__predict_false(rv != VM_PAGER_OK)) {
 			sendfile_iowait(sfio, "sferrio");
 
 			/*
 			 * Do remaining pages recovery before returning EIO.
 			 * Pages from 0 to npages are wired.
 			 * Pages from (i + count1) to npages are busied.
 			 */
 			for (j = 0; j < npages; j++) {
 				if (j >= i + count1)
 					vm_page_xunbusy(pa[j]);
 				KASSERT(pa[j] != NULL && pa[j] != bogus_page,
 				    ("%s: page %p[%d] I/O recovery failure",
 				    __func__, pa, j));
 				vm_page_unwire(pa[j], PQ_INACTIVE);
 				pa[j] = NULL;
 			}
 			return (EIO);
 		}
 
 		SFSTAT_INC(sf_iocnt);
 		SFSTAT_ADD(sf_pages_read, count);
 		if (i + count == npages)
 			SFSTAT_ADD(sf_rhpages_read, rhpages);
 
 		i += count1;
 		(*nios)++;
 	}
 
 	if (*nios == 0 && npages != 0)
 		SFSTAT_INC(sf_noiocnt);
 
 	return (0);
 }
 
 static int
 sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res,
     struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size,
     int *bsize)
 {
 	struct vattr va;
 	vm_object_t obj;
 	struct vnode *vp;
 	struct shmfd *shmfd;
 	int error;
 
 	error = 0;
 	vp = *vp_res = NULL;
 	obj = NULL;
 	shmfd = *shmfd_res = NULL;
 	*bsize = 0;
 
 	/*
 	 * The file descriptor must be a regular file and have a
 	 * backing VM object.
 	 */
 	if (fp->f_type == DTYPE_VNODE) {
 		vp = fp->f_vnode;
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		if (vp->v_type != VREG) {
 			error = EINVAL;
 			goto out;
 		}
 		*bsize = vp->v_mount->mnt_stat.f_iosize;
 		obj = vp->v_object;
 		if (obj == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 
 		/*
 		 * Use the pager size when available to simplify synchronization
 		 * with filesystems, which otherwise must atomically update both
 		 * the vnode pager size and file size.
 		 */
 		if (obj->type == OBJT_VNODE) {
 			VM_OBJECT_RLOCK(obj);
 			*obj_size = obj->un_pager.vnp.vnp_size;
 		} else {
 			error = VOP_GETATTR(vp, &va, td->td_ucred);
 			if (error != 0)
 				goto out;
 			*obj_size = va.va_size;
 			VM_OBJECT_RLOCK(obj);
 		}
 	} else if (fp->f_type == DTYPE_SHM) {
 		shmfd = fp->f_data;
 		obj = shmfd->shm_object;
 		VM_OBJECT_RLOCK(obj);
 		*obj_size = shmfd->shm_size;
 	} else {
 		error = EINVAL;
 		goto out;
 	}
 
 	if ((obj->flags & OBJ_DEAD) != 0) {
 		VM_OBJECT_RUNLOCK(obj);
 		error = EBADF;
 		goto out;
 	}
 
 	/*
 	 * Temporarily increase the backing VM object's reference
 	 * count so that a forced reclamation of its vnode does not
 	 * immediately destroy it.
 	 */
 	vm_object_reference_locked(obj);
 	VM_OBJECT_RUNLOCK(obj);
 	*obj_res = obj;
 	*vp_res = vp;
 	*shmfd_res = shmfd;
 
 out:
 	if (vp != NULL)
 		VOP_UNLOCK(vp);
 	return (error);
 }
 
 static int
 sendfile_getsock(struct thread *td, int s, struct file **sock_fp,
     struct socket **so)
 {
 	int error;
 
 	*sock_fp = NULL;
 	*so = NULL;
 
 	/*
 	 * The socket must be a stream socket and connected.
 	 */
 	error = getsock_cap(td, s, &cap_send_rights,
 	    sock_fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	*so = (*sock_fp)->f_data;
 	if ((*so)->so_type != SOCK_STREAM)
 		return (EINVAL);
 	/*
 	 * SCTP one-to-one style sockets currently don't work with
 	 * sendfile(). So indicate EINVAL for now.
 	 */
 	if ((*so)->so_proto->pr_protocol == IPPROTO_SCTP)
 		return (EINVAL);
 	return (0);
 }
 
 int
 vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
     struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
     struct thread *td)
 {
 	struct file *sock_fp;
 	struct vnode *vp;
 	struct vm_object *obj;
 	vm_page_t pga;
 	struct socket *so;
 #ifdef KERN_TLS
 	struct ktls_session *tls;
 #endif
 	struct mbuf *m, *mh, *mhtail;
 	struct sf_buf *sf;
 	struct shmfd *shmfd;
 	struct sendfile_sync *sfs;
 	struct vattr va;
 	off_t off, sbytes, rem, obj_size, nobj_size;
 	int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr;
 #ifdef KERN_TLS
 	int tls_enq_cnt;
 #endif
 	bool use_ext_pgs;
 
 	obj = NULL;
 	so = NULL;
 	m = mh = NULL;
 	sfs = NULL;
 #ifdef KERN_TLS
 	tls = NULL;
 #endif
 	hdrlen = sbytes = 0;
 	softerr = 0;
 	use_ext_pgs = false;
 
 	error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize);
 	if (error != 0)
 		return (error);
 
 	error = sendfile_getsock(td, sockfd, &sock_fp, &so);
 	if (error != 0)
 		goto out;
 
 #ifdef MAC
 	error = mac_socket_check_send(td->td_ucred, so);
 	if (error != 0)
 		goto out;
 #endif
 
 	SFSTAT_INC(sf_syscalls);
 	SFSTAT_ADD(sf_rhpages_requested, SF_READAHEAD(flags));
 
 	if (flags & SF_SYNC) {
 		sfs = malloc(sizeof(*sfs), M_SENDFILE, M_WAITOK | M_ZERO);
 		mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
 		cv_init(&sfs->cv, "sendfile");
 		sfs->waiting = true;
 	}
 
 	rem = nbytes ? omin(nbytes, obj_size - offset) : obj_size - offset;
 
 	/*
 	 * Protect against multiple writers to the socket.
 	 *
 	 * XXXRW: Historically this has assumed non-interruptibility, so now
 	 * we implement that, but possibly shouldn't.
 	 */
 	error = SOCK_IO_SEND_LOCK(so, SBL_WAIT | SBL_NOINTR);
 	if (error != 0)
 		goto out;
 #ifdef KERN_TLS
 	tls = ktls_hold(so->so_snd.sb_tls_info);
 #endif
 
 	/*
 	 * Loop through the pages of the file, starting with the requested
 	 * offset. Get a file page (do I/O if necessary), map the file page
 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 	 * it on the socket.
 	 * This is done in two loops.  The inner loop turns as many pages
 	 * as it can, up to available socket buffer space, without blocking
 	 * into mbufs to have it bulk delivered into the socket send buffer.
 	 * The outer loop checks the state and available space of the socket
 	 * and takes care of the overall progress.
 	 */
 	for (off = offset; rem > 0; ) {
 		struct sf_io *sfio;
 		vm_page_t *pa;
 		struct mbuf *m0, *mtail;
 		int nios, space, npages, rhpages;
 
 		mtail = NULL;
 		/*
 		 * Check the socket state for ongoing connection,
 		 * no errors and space in socket buffer.
 		 * If space is low allow for the remainder of the
 		 * file to be processed if it fits the socket buffer.
 		 * Otherwise block in waiting for sufficient space
 		 * to proceed, or if the socket is nonblocking, return
 		 * to userland with EAGAIN while reporting how far
 		 * we've come.
 		 * We wait until the socket buffer has significant free
 		 * space to do bulk sends.  This makes good use of file
 		 * system read ahead and allows packet segmentation
 		 * offloading hardware to take over lots of work.  If
 		 * we were not careful here we would send off only one
 		 * sfbuf at a time.
 		 */
 		SOCKBUF_LOCK(&so->so_snd);
 		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
 			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
 retry_space:
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			error = EPIPE;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto done;
 		} else if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto done;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = ENOTCONN;
 			goto done;
 		}
 
 		space = sbspace(&so->so_snd);
 		if (space < rem &&
 		    (space <= 0 ||
 		     space < so->so_snd.sb_lowat)) {
 			if (so->so_state & SS_NBIO) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = EAGAIN;
 				goto done;
 			}
 			/*
 			 * sbwait drops the lock while sleeping.
 			 * When we loop back to retry_space the
 			 * state may have changed and we retest
 			 * for it.
 			 */
-			error = sbwait(&so->so_snd);
+			error = sbwait(so, SO_SND);
 			/*
 			 * An error from sbwait usually indicates that we've
 			 * been interrupted by a signal. If we've sent anything
 			 * then return bytes sent, otherwise return the error.
 			 */
 			if (error != 0) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				goto done;
 			}
 			goto retry_space;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 
 		/*
 		 * At the beginning of the first loop check if any headers
 		 * are specified and copy them into mbufs.  Reduce space in
 		 * the socket buffer by the size of the header mbuf chain.
 		 * Clear hdr_uio here and hdrlen at the end of the first loop.
 		 */
 		if (hdr_uio != NULL && hdr_uio->uio_resid > 0) {
 			hdr_uio->uio_td = td;
 			hdr_uio->uio_rw = UIO_WRITE;
 #ifdef KERN_TLS
 			if (tls != NULL)
 				mh = m_uiotombuf(hdr_uio, M_WAITOK, space,
 				    tls->params.max_frame_len, M_EXTPG);
 			else
 #endif
 				mh = m_uiotombuf(hdr_uio, M_WAITOK,
 				    space, 0, 0);
 			hdrlen = m_length(mh, &mhtail);
 			space -= hdrlen;
 			/*
 			 * If header consumed all the socket buffer space,
 			 * don't waste CPU cycles and jump to the end.
 			 */
 			if (space == 0) {
 				sfio = NULL;
 				nios = 0;
 				goto prepend_header;
 			}
 			hdr_uio = NULL;
 		}
 
 		if (vp != NULL) {
 			error = vn_lock(vp, LK_SHARED);
 			if (error != 0)
 				goto done;
 
 			/*
 			 * Check to see if the file size has changed.
 			 */
 			if (obj->type == OBJT_VNODE) {
 				VM_OBJECT_RLOCK(obj);
 				nobj_size = obj->un_pager.vnp.vnp_size;
 				VM_OBJECT_RUNLOCK(obj);
 			} else {
 				error = VOP_GETATTR(vp, &va, td->td_ucred);
 				if (error != 0) {
 					VOP_UNLOCK(vp);
 					goto done;
 				}
 				nobj_size = va.va_size;
 			}
 			if (off >= nobj_size) {
 				VOP_UNLOCK(vp);
 				goto done;
 			}
 			if (nobj_size != obj_size) {
 				obj_size = nobj_size;
 				rem = nbytes ? omin(nbytes + offset, obj_size) :
 				    obj_size;
 				rem -= off;
 			}
 		}
 
 		if (space > rem)
 			space = rem;
 		else if (space > PAGE_SIZE) {
 			/*
 			 * Use page boundaries when possible for large
 			 * requests.
 			 */
 			if (off & PAGE_MASK)
 				space -= (PAGE_SIZE - (off & PAGE_MASK));
 			space = trunc_page(space);
 			if (off & PAGE_MASK)
 				space += (PAGE_SIZE - (off & PAGE_MASK));
 		}
 
 		npages = howmany(space + (off & PAGE_MASK), PAGE_SIZE);
 
 		/*
 		 * Calculate maximum allowed number of pages for readahead
 		 * at this iteration.  If SF_USER_READAHEAD was set, we don't
 		 * do any heuristics and use exactly the value supplied by
 		 * application.  Otherwise, we allow readahead up to "rem".
 		 * If application wants more, let it be, but there is no
 		 * reason to go above maxphys.  Also check against "obj_size",
 		 * since vm_pager_has_page() can hint beyond EOF.
 		 */
 		if (flags & SF_USER_READAHEAD) {
 			rhpages = SF_READAHEAD(flags);
 		} else {
 			rhpages = howmany(rem + (off & PAGE_MASK), PAGE_SIZE) -
 			    npages;
 			rhpages += SF_READAHEAD(flags);
 		}
 		rhpages = min(howmany(maxphys, PAGE_SIZE), rhpages);
 		rhpages = min(howmany(obj_size - trunc_page(off), PAGE_SIZE) -
 		    npages, rhpages);
 
 		sfio = malloc(sizeof(struct sf_io) +
 		    npages * sizeof(vm_page_t), M_SENDFILE, M_WAITOK);
 		refcount_init(&sfio->nios, 1);
 		sfio->obj = obj;
 		sfio->error = 0;
 		sfio->m = NULL;
 		sfio->npages = npages;
 #ifdef KERN_TLS
 		/*
 		 * This doesn't use ktls_hold() because sfio->m will
 		 * also have a reference on 'tls' that will be valid
 		 * for all of sfio's lifetime.
 		 */
 		sfio->tls = tls;
 #endif
 		vm_object_pip_add(obj, 1);
 		error = sendfile_swapin(obj, sfio, &nios, off, space, rhpages,
 		    flags);
 		if (error != 0) {
 			if (vp != NULL)
 				VOP_UNLOCK(vp);
 			sendfile_iodone(sfio, NULL, 0, error);
 			goto done;
 		}
 
 		/*
 		 * Loop and construct maximum sized mbuf chain to be bulk
 		 * dumped into socket buffer.
 		 */
 		pa = sfio->pa;
 
 		/*
 		 * Use unmapped mbufs if enabled for TCP.  Unmapped
 		 * bufs are restricted to TCP as that is what has been
 		 * tested.  In particular, unmapped mbufs have not
 		 * been tested with UNIX-domain sockets.
 		 *
 		 * TLS frames always require unmapped mbufs.
 		 */
 		if ((mb_use_ext_pgs &&
 		    so->so_proto->pr_protocol == IPPROTO_TCP)
 #ifdef KERN_TLS
 		    || tls != NULL
 #endif
 		    ) {
 			use_ext_pgs = true;
 #ifdef KERN_TLS
 			if (tls != NULL)
 				max_pgs = num_pages(tls->params.max_frame_len);
 			else
 #endif
 				max_pgs = MBUF_PEXT_MAX_PGS;
 
 			/* Start at last index, to wrap on first use. */
 			ext_pgs_idx = max_pgs - 1;
 		}
 
 		for (int i = 0; i < npages; i++) {
 			/*
 			 * If a page wasn't grabbed successfully, then
 			 * trim the array. Can happen only with SF_NODISKIO.
 			 */
 			if (pa[i] == NULL) {
 				SFSTAT_INC(sf_busy);
 				fixspace(npages, i, off, &space);
 				sfio->npages = i;
 				softerr = EBUSY;
 				break;
 			}
 			pga = pa[i];
 			if (pga == bogus_page)
 				pga = vm_page_relookup(obj, sfio->pindex0 + i);
 
 			if (use_ext_pgs) {
 				off_t xfs;
 
 				ext_pgs_idx++;
 				if (ext_pgs_idx == max_pgs) {
 					m0 = mb_alloc_ext_pgs(M_WAITOK,
 					    sendfile_free_mext_pg);
 
 					if (flags & SF_NOCACHE) {
 						m0->m_ext.ext_flags |=
 						    EXT_FLAG_NOCACHE;
 
 						/*
 						 * See comment below regarding
 						 * ignoring SF_NOCACHE for the
 						 * last page.
 						 */
 						if ((npages - i <= max_pgs) &&
 						    ((off + space) & PAGE_MASK) &&
 						    (rem > space || rhpages > 0))
 							m0->m_ext.ext_flags |=
 							    EXT_FLAG_CACHE_LAST;
 					}
 					if (sfs != NULL) {
 						m0->m_ext.ext_flags |=
 						    EXT_FLAG_SYNC;
 						m0->m_ext.ext_arg1 = sfs;
 						mtx_lock(&sfs->mtx);
 						sfs->count++;
 						mtx_unlock(&sfs->mtx);
 					}
 					ext_pgs_idx = 0;
 
 					/* Append to mbuf chain. */
 					if (mtail != NULL)
 						mtail->m_next = m0;
 					else
 						m = m0;
 					mtail = m0;
 					m0->m_epg_1st_off =
 					    vmoff(i, off) & PAGE_MASK;
 				}
 				if (nios) {
 					mtail->m_flags |= M_NOTREADY;
 					m0->m_epg_nrdy++;
 				}
 
 				m0->m_epg_pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pga);
 				m0->m_epg_npgs++;
 				xfs = xfsize(i, npages, off, space);
 				m0->m_epg_last_len = xfs;
 				MBUF_EXT_PGS_ASSERT_SANITY(m0);
 				mtail->m_len += xfs;
 				mtail->m_ext.ext_size += PAGE_SIZE;
 				continue;
 			}
 
 			/*
 			 * Get a sendfile buf.  When allocating the
 			 * first buffer for mbuf chain, we usually
 			 * wait as long as necessary, but this wait
 			 * can be interrupted.  For consequent
 			 * buffers, do not sleep, since several
 			 * threads might exhaust the buffers and then
 			 * deadlock.
 			 */
 			sf = sf_buf_alloc(pga,
 			    m != NULL ? SFB_NOWAIT : SFB_CATCH);
 			if (sf == NULL) {
 				SFSTAT_INC(sf_allocfail);
 				sendfile_iowait(sfio, "sfnosf");
 				for (int j = i; j < npages; j++) {
 					vm_page_unwire(pa[j], PQ_INACTIVE);
 					pa[j] = NULL;
 				}
 				if (m == NULL)
 					softerr = ENOBUFS;
 				fixspace(npages, i, off, &space);
 				sfio->npages = i;
 				break;
 			}
 
 			m0 = m_get(M_WAITOK, MT_DATA);
 			m0->m_ext.ext_buf = (char *)sf_buf_kva(sf);
 			m0->m_ext.ext_size = PAGE_SIZE;
 			m0->m_ext.ext_arg1 = sf;
 			m0->m_ext.ext_type = EXT_SFBUF;
 			m0->m_ext.ext_flags = EXT_FLAG_EMBREF;
 			m0->m_ext.ext_free = sendfile_free_mext;
 			/*
 			 * SF_NOCACHE sets the page as being freed upon send.
 			 * However, we ignore it for the last page in 'space',
 			 * if the page is truncated, and we got more data to
 			 * send (rem > space), or if we have readahead
 			 * configured (rhpages > 0).
 			 */
 			if ((flags & SF_NOCACHE) &&
 			    (i != npages - 1 ||
 			    !((off + space) & PAGE_MASK) ||
 			    !(rem > space || rhpages > 0)))
 				m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE;
 			if (sfs != NULL) {
 				m0->m_ext.ext_flags |= EXT_FLAG_SYNC;
 				m0->m_ext.ext_arg2 = sfs;
 				mtx_lock(&sfs->mtx);
 				sfs->count++;
 				mtx_unlock(&sfs->mtx);
 			}
 			m0->m_ext.ext_count = 1;
 			m0->m_flags |= (M_EXT | M_RDONLY);
 			if (nios)
 				m0->m_flags |= M_NOTREADY;
 			m0->m_data = (char *)sf_buf_kva(sf) +
 			    (vmoff(i, off) & PAGE_MASK);
 			m0->m_len = xfsize(i, npages, off, space);
 
 			/* Append to mbuf chain. */
 			if (mtail != NULL)
 				mtail->m_next = m0;
 			else
 				m = m0;
 			mtail = m0;
 		}
 
 		if (vp != NULL)
 			VOP_UNLOCK(vp);
 
 		/* Keep track of bytes processed. */
 		off += space;
 		rem -= space;
 
 		/*
 		 * Prepend header, if any.  Save pointer to first mbuf
 		 * with a page.
 		 */
 		if (hdrlen) {
 prepend_header:
 			m0 = mhtail->m_next = m;
 			m = mh;
 			mh = NULL;
 		} else
 			m0 = m;
 
 		if (m == NULL) {
 			KASSERT(softerr, ("%s: m NULL, no error", __func__));
 			error = softerr;
 			sendfile_iodone(sfio, NULL, 0, 0);
 			goto done;
 		}
 
 		/* Add the buffer chain to the socket buffer. */
 		KASSERT(m_length(m, NULL) == space + hdrlen,
 		    ("%s: mlen %u space %d hdrlen %d",
 		    __func__, m_length(m, NULL), space, hdrlen));
 
 		CURVNET_SET(so->so_vnet);
 #ifdef KERN_TLS
 		if (tls != NULL)
 			ktls_frame(m, tls, &tls_enq_cnt, TLS_RLTYPE_APP);
 #endif
 		if (nios == 0) {
 			/*
 			 * If sendfile_swapin() didn't initiate any I/Os,
 			 * which happens if all data is cached in VM, or if
 			 * the header consumed all socket buffer space and
 			 * sfio is NULL, then we can send data right now
 			 * without the PRUS_NOTREADY flag.
 			 */
 			if (sfio != NULL)
 				sendfile_iodone(sfio, NULL, 0, 0);
 #ifdef KERN_TLS
 			if (tls != NULL && tls->mode == TCP_TLS_MODE_SW) {
 				error = (*so->so_proto->pr_usrreqs->pru_send)
 				    (so, PRUS_NOTREADY, m, NULL, NULL, td);
 				if (error != 0) {
 					m_freem(m);
 				} else {
 					soref(so);
 					ktls_enqueue(m, so, tls_enq_cnt);
 				}
 			} else
 #endif
 				error = (*so->so_proto->pr_usrreqs->pru_send)
 				    (so, 0, m, NULL, NULL, td);
 		} else {
 			sfio->so = so;
 			sfio->m = m0;
 			soref(so);
 			error = (*so->so_proto->pr_usrreqs->pru_send)
 			    (so, PRUS_NOTREADY, m, NULL, NULL, td);
 			sendfile_iodone(sfio, NULL, 0, error);
 		}
 		CURVNET_RESTORE();
 
 		m = NULL;
 		if (error)
 			goto done;
 		sbytes += space + hdrlen;
 		if (hdrlen)
 			hdrlen = 0;
 		if (softerr) {
 			error = softerr;
 			goto done;
 		}
 	}
 
 	/*
 	 * Send trailers. Wimp out and use writev(2).
 	 */
 	if (trl_uio != NULL) {
 		SOCK_IO_SEND_UNLOCK(so);
 		error = kern_writev(td, sockfd, trl_uio);
 		if (error == 0)
 			sbytes += td->td_retval[0];
 		goto out;
 	}
 
 done:
 	SOCK_IO_SEND_UNLOCK(so);
 out:
 	/*
 	 * If there was no error we have to clear td->td_retval[0]
 	 * because it may have been set by writev.
 	 */
 	if (error == 0) {
 		td->td_retval[0] = 0;
 	}
 	if (sent != NULL) {
 		(*sent) = sbytes;
 	}
 	if (obj != NULL)
 		vm_object_deallocate(obj);
 	if (so)
 		fdrop(sock_fp, td);
 	if (m)
 		m_freem(m);
 	if (mh)
 		m_freem(mh);
 
 	if (sfs != NULL) {
 		mtx_lock(&sfs->mtx);
 		if (sfs->count != 0)
 			error = cv_wait_sig(&sfs->cv, &sfs->mtx);
 		if (sfs->count == 0) {
 			sendfile_sync_destroy(sfs);
 		} else {
 			sfs->waiting = false;
 			mtx_unlock(&sfs->mtx);
 		}
 	}
 #ifdef KERN_TLS
 	if (tls != NULL)
 		ktls_free(tls);
 #endif
 
 	if (error == ERESTART)
 		error = EINTR;
 
 	return (error);
 }
 
 static int
 sendfile(struct thread *td, struct sendfile_args *uap, int compat)
 {
 	struct sf_hdtr hdtr;
 	struct uio *hdr_uio, *trl_uio;
 	struct file *fp;
 	off_t sbytes;
 	int error;
 
 	/*
 	 * File offset must be positive.  If it goes beyond EOF
 	 * we send only the header/trailer and no payload data.
 	 */
 	if (uap->offset < 0)
 		return (EINVAL);
 
 	sbytes = 0;
 	hdr_uio = trl_uio = NULL;
 
 	if (uap->hdtr != NULL) {
 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 		if (error != 0)
 			goto out;
 		if (hdtr.headers != NULL) {
 			error = copyinuio(hdtr.headers, hdtr.hdr_cnt,
 			    &hdr_uio);
 			if (error != 0)
 				goto out;
 #ifdef COMPAT_FREEBSD4
 			/*
 			 * In FreeBSD < 5.0 the nbytes to send also included
 			 * the header.  If compat is specified subtract the
 			 * header size from nbytes.
 			 */
 			if (compat) {
 				if (uap->nbytes > hdr_uio->uio_resid)
 					uap->nbytes -= hdr_uio->uio_resid;
 				else
 					uap->nbytes = 0;
 			}
 #endif
 		}
 		if (hdtr.trailers != NULL) {
 			error = copyinuio(hdtr.trailers, hdtr.trl_cnt,
 			    &trl_uio);
 			if (error != 0)
 				goto out;
 		}
 	}
 
 	AUDIT_ARG_FD(uap->fd);
 
 	/*
 	 * sendfile(2) can start at any offset within a file so we require
 	 * CAP_READ+CAP_SEEK = CAP_PREAD.
 	 */
 	if ((error = fget_read(td, uap->fd, &cap_pread_rights, &fp)) != 0)
 		goto out;
 
 	error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset,
 	    uap->nbytes, &sbytes, uap->flags, td);
 	fdrop(fp, td);
 
 	if (uap->sbytes != NULL)
 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
 
 out:
 	free(hdr_uio, M_IOV);
 	free(trl_uio, M_IOV);
 	return (error);
 }
 
 /*
  * sendfile(2)
  * 
  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
  * 
  * Send a file specified by 'fd' and starting at 'offset' to a socket
  * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
  * 0.  Optionally add a header and/or trailer to the socket output.  If
  * specified, write the total number of bytes sent into *sbytes.
  */
 int
 sys_sendfile(struct thread *td, struct sendfile_args *uap)
 {
 
 	return (sendfile(td, uap, 0));
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
 {
 	struct sendfile_args args;
 
 	args.fd = uap->fd;
 	args.s = uap->s;
 	args.offset = uap->offset;
 	args.nbytes = uap->nbytes;
 	args.hdtr = uap->hdtr;
 	args.sbytes = uap->sbytes;
 	args.flags = uap->flags;
 
 	return (sendfile(td, &args, 1));
 }
 #endif /* COMPAT_FREEBSD4 */
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index 774b317c6ecb..b1b47d3d3c26 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -1,842 +1,847 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)sys_socket.c	8.1 (Berkeley) 6/10/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/aio.h>
 #include <sys/domain.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sigio.h>
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/filio.h>			/* XXX */
 #include <sys/sockio.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 #include <sys/ucred.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/user.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 static SYSCTL_NODE(_kern_ipc, OID_AUTO, aio, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "socket AIO stats");
 
 static int empty_results;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, empty_results, CTLFLAG_RD, &empty_results,
     0, "socket operation returned EAGAIN");
 
 static int empty_retries;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, empty_retries, CTLFLAG_RD, &empty_retries,
     0, "socket operation retries");
 
 static fo_rdwr_t soo_read;
 static fo_rdwr_t soo_write;
 static fo_ioctl_t soo_ioctl;
 static fo_poll_t soo_poll;
 extern fo_kqfilter_t soo_kqfilter;
 static fo_stat_t soo_stat;
 static fo_close_t soo_close;
 static fo_fill_kinfo_t soo_fill_kinfo;
 static fo_aio_queue_t soo_aio_queue;
 
 static void	soo_aio_cancel(struct kaiocb *job);
 
 struct fileops	socketops = {
 	.fo_read = soo_read,
 	.fo_write = soo_write,
 	.fo_truncate = invfo_truncate,
 	.fo_ioctl = soo_ioctl,
 	.fo_poll = soo_poll,
 	.fo_kqfilter = soo_kqfilter,
 	.fo_stat = soo_stat,
 	.fo_close = soo_close,
 	.fo_chmod = invfo_chmod,
 	.fo_chown = invfo_chown,
 	.fo_sendfile = invfo_sendfile,
 	.fo_fill_kinfo = soo_fill_kinfo,
 	.fo_aio_queue = soo_aio_queue,
 	.fo_flags = DFLAG_PASSABLE
 };
 
 static int
 soo_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	int error;
 
 #ifdef MAC
 	error = mac_socket_check_receive(active_cred, so);
 	if (error)
 		return (error);
 #endif
 	error = soreceive(so, 0, uio, 0, 0, 0);
 	return (error);
 }
 
 static int
 soo_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	int error;
 
 #ifdef MAC
 	error = mac_socket_check_send(active_cred, so);
 	if (error)
 		return (error);
 #endif
 	error = sosend(so, 0, uio, 0, 0, 0, uio->uio_td);
 	if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
 		PROC_LOCK(uio->uio_td->td_proc);
 		tdsignal(uio->uio_td, SIGPIPE);
 		PROC_UNLOCK(uio->uio_td->td_proc);
 	}
 	return (error);
 }
 
 static int
 soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
     struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	int error = 0;
 
 	switch (cmd) {
 	case FIONBIO:
 		SOCK_LOCK(so);
 		if (*(int *)data)
 			so->so_state |= SS_NBIO;
 		else
 			so->so_state &= ~SS_NBIO;
 		SOCK_UNLOCK(so);
 		break;
 
 	case FIOASYNC:
 		if (*(int *)data) {
 			SOCK_LOCK(so);
 			so->so_state |= SS_ASYNC;
 			if (SOLISTENING(so)) {
 				so->sol_sbrcv_flags |= SB_ASYNC;
 				so->sol_sbsnd_flags |= SB_ASYNC;
 			} else {
-				SOCKBUF_LOCK(&so->so_rcv);
+				SOCK_RECVBUF_LOCK(so);
 				so->so_rcv.sb_flags |= SB_ASYNC;
-				SOCKBUF_UNLOCK(&so->so_rcv);
-				SOCKBUF_LOCK(&so->so_snd);
+				SOCK_RECVBUF_UNLOCK(so);
+				SOCK_SENDBUF_LOCK(so);
 				so->so_snd.sb_flags |= SB_ASYNC;
-				SOCKBUF_UNLOCK(&so->so_snd);
+				SOCK_SENDBUF_UNLOCK(so);
 			}
 			SOCK_UNLOCK(so);
 		} else {
 			SOCK_LOCK(so);
 			so->so_state &= ~SS_ASYNC;
 			if (SOLISTENING(so)) {
 				so->sol_sbrcv_flags &= ~SB_ASYNC;
 				so->sol_sbsnd_flags &= ~SB_ASYNC;
 			} else {
-				SOCKBUF_LOCK(&so->so_rcv);
+				SOCK_RECVBUF_LOCK(so);
 				so->so_rcv.sb_flags &= ~SB_ASYNC;
-				SOCKBUF_UNLOCK(&so->so_rcv);
-				SOCKBUF_LOCK(&so->so_snd);
+				SOCK_RECVBUF_UNLOCK(so);
+				SOCK_SENDBUF_LOCK(so);
 				so->so_snd.sb_flags &= ~SB_ASYNC;
-				SOCKBUF_UNLOCK(&so->so_snd);
+				SOCK_SENDBUF_UNLOCK(so);
 			}
 			SOCK_UNLOCK(so);
 		}
 		break;
 
 	case FIONREAD:
 		SOCK_RECVBUF_LOCK(so);
 		if (SOLISTENING(so)) {
 			error = EINVAL;
 		} else {
 			*(int *)data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
 		}
 		SOCK_RECVBUF_UNLOCK(so);
 		break;
 
 	case FIONWRITE:
 		/* Unlocked read. */
 		if (SOLISTENING(so)) {
 			error = EINVAL;
 		} else {
 			*(int *)data = sbavail(&so->so_snd);
 		}
 		break;
 
 	case FIONSPACE:
 		/* Unlocked read. */
 		if (SOLISTENING(so)) {
 			error = EINVAL;
 		} else {
 			if ((so->so_snd.sb_hiwat < sbused(&so->so_snd)) ||
 			    (so->so_snd.sb_mbmax < so->so_snd.sb_mbcnt)) {
 				*(int *)data = 0;
 			} else {
 				*(int *)data = sbspace(&so->so_snd);
 			}
 		}
 		break;
 
 	case FIOSETOWN:
 		error = fsetown(*(int *)data, &so->so_sigio);
 		break;
 
 	case FIOGETOWN:
 		*(int *)data = fgetown(&so->so_sigio);
 		break;
 
 	case SIOCSPGRP:
 		error = fsetown(-(*(int *)data), &so->so_sigio);
 		break;
 
 	case SIOCGPGRP:
 		*(int *)data = -fgetown(&so->so_sigio);
 		break;
 
 	case SIOCATMARK:
 		/* Unlocked read. */
 		if (SOLISTENING(so)) {
 			error = EINVAL;
 		} else {
 			*(int *)data = (so->so_rcv.sb_state & SBS_RCVATMARK) != 0;
 		}
 		break;
 	default:
 		/*
 		 * Interface/routing/protocol specific ioctls: interface and
 		 * routing ioctls should have a different entry since a
 		 * socket is unnecessary.
 		 */
 		if (IOCGROUP(cmd) == 'i')
 			error = ifioctl(so, cmd, data, td);
 		else if (IOCGROUP(cmd) == 'r') {
 			CURVNET_SET(so->so_vnet);
 			error = rtioctl_fib(cmd, data, so->so_fibnum);
 			CURVNET_RESTORE();
 		} else {
 			CURVNET_SET(so->so_vnet);
 			error = ((*so->so_proto->pr_usrreqs->pru_control)
 			    (so, cmd, data, 0, td));
 			CURVNET_RESTORE();
 		}
 		break;
 	}
 	return (error);
 }
 
 static int
 soo_poll(struct file *fp, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	struct socket *so = fp->f_data;
 #ifdef MAC
 	int error;
 
 	error = mac_socket_check_poll(active_cred, so);
 	if (error)
 		return (error);
 #endif
 	return (sopoll(so, events, fp->f_cred, td));
 }
 
 static int
 soo_stat(struct file *fp, struct stat *ub, struct ucred *active_cred)
 {
 	struct socket *so = fp->f_data;
 	int error;
 
 	bzero((caddr_t)ub, sizeof (*ub));
 	ub->st_mode = S_IFSOCK;
 #ifdef MAC
 	error = mac_socket_check_stat(active_cred, so);
 	if (error)
 		return (error);
 #endif
 	SOCK_LOCK(so);
 	if (!SOLISTENING(so)) {
 		struct sockbuf *sb;
 
 		/*
 		 * If SBS_CANTRCVMORE is set, but there's still data left
 		 * in the receive buffer, the socket is still readable.
 		 */
 		sb = &so->so_rcv;
 		SOCKBUF_LOCK(sb);
 		if ((sb->sb_state & SBS_CANTRCVMORE) == 0 || sbavail(sb))
 			ub->st_mode |= S_IRUSR | S_IRGRP | S_IROTH;
 		ub->st_size = sbavail(sb) - sb->sb_ctl;
 		SOCKBUF_UNLOCK(sb);
 
 		sb = &so->so_snd;
 		SOCKBUF_LOCK(sb);
 		if ((sb->sb_state & SBS_CANTSENDMORE) == 0)
 			ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
 		SOCKBUF_UNLOCK(sb);
 	}
 	ub->st_uid = so->so_cred->cr_uid;
 	ub->st_gid = so->so_cred->cr_gid;
 	error = so->so_proto->pr_usrreqs->pru_sense(so, ub);
 	SOCK_UNLOCK(so);
 	return (error);
 }
 
 /*
  * API socket close on file pointer.  We call soclose() to close the socket
  * (including initiating closing protocols).  soclose() will sorele() the
  * file reference but the actual socket will not go away until the socket's
  * ref count hits 0.
  */
 static int
 soo_close(struct file *fp, struct thread *td)
 {
 	int error = 0;
 	struct socket *so;
 
 	so = fp->f_data;
 	fp->f_ops = &badfileops;
 	fp->f_data = NULL;
 
 	if (so)
 		error = soclose(so);
 	return (error);
 }
 
 static int
 soo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 {
 	struct sockaddr *sa;
 	struct inpcb *inpcb;
 	struct unpcb *unpcb;
 	struct socket *so;
 	int error;
 
 	kif->kf_type = KF_TYPE_SOCKET;
 	so = fp->f_data;
 	CURVNET_SET(so->so_vnet);
 	kif->kf_un.kf_sock.kf_sock_domain0 =
 	    so->so_proto->pr_domain->dom_family;
 	kif->kf_un.kf_sock.kf_sock_type0 = so->so_type;
 	kif->kf_un.kf_sock.kf_sock_protocol0 = so->so_proto->pr_protocol;
 	kif->kf_un.kf_sock.kf_sock_pcb = (uintptr_t)so->so_pcb;
 	switch (kif->kf_un.kf_sock.kf_sock_domain0) {
 	case AF_INET:
 	case AF_INET6:
 		if (kif->kf_un.kf_sock.kf_sock_protocol0 == IPPROTO_TCP) {
 			if (so->so_pcb != NULL) {
 				inpcb = (struct inpcb *)(so->so_pcb);
 				kif->kf_un.kf_sock.kf_sock_inpcb =
 				    (uintptr_t)inpcb->inp_ppcb;
 				kif->kf_un.kf_sock.kf_sock_sendq =
 				    sbused(&so->so_snd);
 				kif->kf_un.kf_sock.kf_sock_recvq =
 				    sbused(&so->so_rcv);
 			}
 		}
 		break;
 	case AF_UNIX:
 		if (so->so_pcb != NULL) {
 			unpcb = (struct unpcb *)(so->so_pcb);
 			if (unpcb->unp_conn) {
 				kif->kf_un.kf_sock.kf_sock_unpconn =
 				    (uintptr_t)unpcb->unp_conn;
 				kif->kf_un.kf_sock.kf_sock_rcv_sb_state =
 				    so->so_rcv.sb_state;
 				kif->kf_un.kf_sock.kf_sock_snd_sb_state =
 				    so->so_snd.sb_state;
 				kif->kf_un.kf_sock.kf_sock_sendq =
 				    sbused(&so->so_snd);
 				kif->kf_un.kf_sock.kf_sock_recvq =
 				    sbused(&so->so_rcv);
 			}
 		}
 		break;
 	}
 	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
 	if (error == 0 &&
 	    sa->sa_len <= sizeof(kif->kf_un.kf_sock.kf_sa_local)) {
 		bcopy(sa, &kif->kf_un.kf_sock.kf_sa_local, sa->sa_len);
 		free(sa, M_SONAME);
 	}
 	error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
 	if (error == 0 &&
 	    sa->sa_len <= sizeof(kif->kf_un.kf_sock.kf_sa_peer)) {
 		bcopy(sa, &kif->kf_un.kf_sock.kf_sa_peer, sa->sa_len);
 		free(sa, M_SONAME);
 	}
 	strncpy(kif->kf_path, so->so_proto->pr_domain->dom_name,
 	    sizeof(kif->kf_path));
 	CURVNET_RESTORE();
 	return (0);	
 }
 
 /*
  * Use the 'backend3' field in AIO jobs to store the amount of data
  * completed by the AIO job so far.
  */
 #define	aio_done	backend3
 
 static STAILQ_HEAD(, task) soaio_jobs;
 static struct mtx soaio_jobs_lock;
 static struct task soaio_kproc_task;
 static int soaio_starting, soaio_idle, soaio_queued;
 static struct unrhdr *soaio_kproc_unr;
 
 static int soaio_max_procs = MAX_AIO_PROCS;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, max_procs, CTLFLAG_RW, &soaio_max_procs, 0,
     "Maximum number of kernel processes to use for async socket IO");
 
 static int soaio_num_procs;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, num_procs, CTLFLAG_RD, &soaio_num_procs, 0,
     "Number of active kernel processes for async socket IO");
 
 static int soaio_target_procs = TARGET_AIO_PROCS;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, target_procs, CTLFLAG_RD,
     &soaio_target_procs, 0,
     "Preferred number of ready kernel processes for async socket IO");
 
 static int soaio_lifetime;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, lifetime, CTLFLAG_RW, &soaio_lifetime, 0,
     "Maximum lifetime for idle aiod");
 
 static void
 soaio_kproc_loop(void *arg)
 {
 	struct proc *p;
 	struct vmspace *myvm;
 	struct task *task;
 	int error, id, pending;
 
 	id = (intptr_t)arg;
 
 	/*
 	 * Grab an extra reference on the daemon's vmspace so that it
 	 * doesn't get freed by jobs that switch to a different
 	 * vmspace.
 	 */
 	p = curproc;
 	myvm = vmspace_acquire_ref(p);
 
 	mtx_lock(&soaio_jobs_lock);
 	MPASS(soaio_starting > 0);
 	soaio_starting--;
 	for (;;) {
 		while (!STAILQ_EMPTY(&soaio_jobs)) {
 			task = STAILQ_FIRST(&soaio_jobs);
 			STAILQ_REMOVE_HEAD(&soaio_jobs, ta_link);
 			soaio_queued--;
 			pending = task->ta_pending;
 			task->ta_pending = 0;
 			mtx_unlock(&soaio_jobs_lock);
 
 			task->ta_func(task->ta_context, pending);
 
 			mtx_lock(&soaio_jobs_lock);
 		}
 		MPASS(soaio_queued == 0);
 
 		if (p->p_vmspace != myvm) {
 			mtx_unlock(&soaio_jobs_lock);
 			vmspace_switch_aio(myvm);
 			mtx_lock(&soaio_jobs_lock);
 			continue;
 		}
 
 		soaio_idle++;
 		error = mtx_sleep(&soaio_idle, &soaio_jobs_lock, 0, "-",
 		    soaio_lifetime);
 		soaio_idle--;
 		if (error == EWOULDBLOCK && STAILQ_EMPTY(&soaio_jobs) &&
 		    soaio_num_procs > soaio_target_procs)
 			break;
 	}
 	soaio_num_procs--;
 	mtx_unlock(&soaio_jobs_lock);
 	free_unr(soaio_kproc_unr, id);
 	kproc_exit(0);
 }
 
 static void
 soaio_kproc_create(void *context, int pending)
 {
 	struct proc *p;
 	int error, id;
 
 	mtx_lock(&soaio_jobs_lock);
 	for (;;) {
 		if (soaio_num_procs < soaio_target_procs) {
 			/* Must create */
 		} else if (soaio_num_procs >= soaio_max_procs) {
 			/*
 			 * Hit the limit on kernel processes, don't
 			 * create another one.
 			 */
 			break;
 		} else if (soaio_queued <= soaio_idle + soaio_starting) {
 			/*
 			 * No more AIO jobs waiting for a process to be
 			 * created, so stop.
 			 */
 			break;
 		}
 		soaio_starting++;
 		mtx_unlock(&soaio_jobs_lock);
 
 		id = alloc_unr(soaio_kproc_unr);
 		error = kproc_create(soaio_kproc_loop, (void *)(intptr_t)id,
 		    &p, 0, 0, "soaiod%d", id);
 		if (error != 0) {
 			free_unr(soaio_kproc_unr, id);
 			mtx_lock(&soaio_jobs_lock);
 			soaio_starting--;
 			break;
 		}
 
 		mtx_lock(&soaio_jobs_lock);
 		soaio_num_procs++;
 	}
 	mtx_unlock(&soaio_jobs_lock);
 }
 
 void
 soaio_enqueue(struct task *task)
 {
 
 	mtx_lock(&soaio_jobs_lock);
 	MPASS(task->ta_pending == 0);
 	task->ta_pending++;
 	STAILQ_INSERT_TAIL(&soaio_jobs, task, ta_link);
 	soaio_queued++;
 	if (soaio_queued <= soaio_idle)
 		wakeup_one(&soaio_idle);
 	else if (soaio_num_procs < soaio_max_procs)
 		taskqueue_enqueue(taskqueue_thread, &soaio_kproc_task);
 	mtx_unlock(&soaio_jobs_lock);
 }
 
 static void
 soaio_init(void)
 {
 
 	soaio_lifetime = AIOD_LIFETIME_DEFAULT;
 	STAILQ_INIT(&soaio_jobs);
 	mtx_init(&soaio_jobs_lock, "soaio jobs", NULL, MTX_DEF);
 	soaio_kproc_unr = new_unrhdr(1, INT_MAX, NULL);
 	TASK_INIT(&soaio_kproc_task, 0, soaio_kproc_create, NULL);
 }
 SYSINIT(soaio, SI_SUB_VFS, SI_ORDER_ANY, soaio_init, NULL);
 
 static __inline int
 soaio_ready(struct socket *so, struct sockbuf *sb)
 {
 	return (sb == &so->so_rcv ? soreadable(so) : sowriteable(so));
 }
 
 static void
 soaio_process_job(struct socket *so, struct sockbuf *sb, struct kaiocb *job)
 {
 	struct ucred *td_savedcred;
 	struct thread *td;
 #ifdef MAC
 	struct file *fp = job->fd_file;
 #endif
 	size_t cnt, done, job_total_nbytes __diagused;
 	long ru_before;
 	int error, flags;
 
 	SOCKBUF_UNLOCK(sb);
 	aio_switch_vmspace(job);
 	td = curthread;
 retry:
 	td_savedcred = td->td_ucred;
 	td->td_ucred = job->cred;
 
 	job_total_nbytes = job->uiop->uio_resid + job->aio_done;
 	done = job->aio_done;
 	cnt = job->uiop->uio_resid;
 	job->uiop->uio_offset = 0;
 	job->uiop->uio_td = td;
 	flags = MSG_NBIO;
 
 	/*
 	 * For resource usage accounting, only count a completed request
 	 * as a single message to avoid counting multiple calls to
 	 * sosend/soreceive on a blocking socket.
 	 */
 
 	if (sb == &so->so_rcv) {
 		ru_before = td->td_ru.ru_msgrcv;
 #ifdef MAC
 		error = mac_socket_check_receive(fp->f_cred, so);
 		if (error == 0)
 
 #endif
 			error = soreceive(so, NULL, job->uiop, NULL, NULL,
 			    &flags);
 		if (td->td_ru.ru_msgrcv != ru_before)
 			job->msgrcv = 1;
 	} else {
 		if (!TAILQ_EMPTY(&sb->sb_aiojobq))
 			flags |= MSG_MORETOCOME;
 		ru_before = td->td_ru.ru_msgsnd;
 #ifdef MAC
 		error = mac_socket_check_send(fp->f_cred, so);
 		if (error == 0)
 #endif
 			error = sosend(so, NULL, job->uiop, NULL, NULL, flags,
 			    td);
 		if (td->td_ru.ru_msgsnd != ru_before)
 			job->msgsnd = 1;
 		if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
 			PROC_LOCK(job->userproc);
 			kern_psignal(job->userproc, SIGPIPE);
 			PROC_UNLOCK(job->userproc);
 		}
 	}
 
 	done += cnt - job->uiop->uio_resid;
 	job->aio_done = done;
 	td->td_ucred = td_savedcred;
 
 	if (error == EWOULDBLOCK) {
 		/*
 		 * The request was either partially completed or not
 		 * completed at all due to racing with a read() or
 		 * write() on the socket.  If the socket is
 		 * non-blocking, return with any partial completion.
 		 * If the socket is blocking or if no progress has
 		 * been made, requeue this request at the head of the
 		 * queue to try again when the socket is ready.
 		 */
 		MPASS(done != job_total_nbytes);
 		SOCKBUF_LOCK(sb);
 		if (done == 0 || !(so->so_state & SS_NBIO)) {
 			empty_results++;
 			if (soaio_ready(so, sb)) {
 				empty_retries++;
 				SOCKBUF_UNLOCK(sb);
 				goto retry;
 			}
 			
 			if (!aio_set_cancel_function(job, soo_aio_cancel)) {
 				SOCKBUF_UNLOCK(sb);
 				if (done != 0)
 					aio_complete(job, done, 0);
 				else
 					aio_cancel(job);
 				SOCKBUF_LOCK(sb);
 			} else {
 				TAILQ_INSERT_HEAD(&sb->sb_aiojobq, job, list);
 			}
 			return;
 		}
 		SOCKBUF_UNLOCK(sb);
 	}		
 	if (done != 0 && (error == ERESTART || error == EINTR ||
 	    error == EWOULDBLOCK))
 		error = 0;
 	if (error)
 		aio_complete(job, -1, error);
 	else
 		aio_complete(job, done, 0);
 	SOCKBUF_LOCK(sb);
 }
 
 static void
 soaio_process_sb(struct socket *so, struct sockbuf *sb)
 {
 	struct kaiocb *job;
 
 	CURVNET_SET(so->so_vnet);
 	SOCKBUF_LOCK(sb);
 	while (!TAILQ_EMPTY(&sb->sb_aiojobq) && soaio_ready(so, sb)) {
 		job = TAILQ_FIRST(&sb->sb_aiojobq);
 		TAILQ_REMOVE(&sb->sb_aiojobq, job, list);
 		if (!aio_clear_cancel_function(job))
 			continue;
 
 		soaio_process_job(so, sb, job);
 	}
 
 	/*
 	 * If there are still pending requests, the socket must not be
 	 * ready so set SB_AIO to request a wakeup when the socket
 	 * becomes ready.
 	 */
 	if (!TAILQ_EMPTY(&sb->sb_aiojobq))
 		sb->sb_flags |= SB_AIO;
 	sb->sb_flags &= ~SB_AIO_RUNNING;
 	SOCKBUF_UNLOCK(sb);
 
 	sorele(so);
 	CURVNET_RESTORE();
 }
 
 void
 soaio_rcv(void *context, int pending)
 {
 	struct socket *so;
 
 	so = context;
 	soaio_process_sb(so, &so->so_rcv);
 }
 
 void
 soaio_snd(void *context, int pending)
 {
 	struct socket *so;
 
 	so = context;
 	soaio_process_sb(so, &so->so_snd);
 }
 
 void
-sowakeup_aio(struct socket *so, struct sockbuf *sb)
+sowakeup_aio(struct socket *so, sb_which which)
 {
+	struct sockbuf *sb = sobuf(so, which);
+
+	SOCK_BUF_LOCK_ASSERT(so, which);
 
-	SOCKBUF_LOCK_ASSERT(sb);
 	sb->sb_flags &= ~SB_AIO;
 	if (sb->sb_flags & SB_AIO_RUNNING)
 		return;
 	sb->sb_flags |= SB_AIO_RUNNING;
 	soref(so);
 	soaio_enqueue(&sb->sb_aiotask);
 }
 
 static void
 soo_aio_cancel(struct kaiocb *job)
 {
 	struct socket *so;
 	struct sockbuf *sb;
 	long done;
 	int opcode;
 
 	so = job->fd_file->f_data;
 	opcode = job->uaiocb.aio_lio_opcode;
 	if (opcode & LIO_READ)
 		sb = &so->so_rcv;
 	else {
 		MPASS(opcode & LIO_WRITE);
 		sb = &so->so_snd;
 	}
 
 	SOCKBUF_LOCK(sb);
 	if (!aio_cancel_cleared(job))
 		TAILQ_REMOVE(&sb->sb_aiojobq, job, list);
 	if (TAILQ_EMPTY(&sb->sb_aiojobq))
 		sb->sb_flags &= ~SB_AIO;
 	SOCKBUF_UNLOCK(sb);
 
 	done = job->aio_done;
 	if (done != 0)
 		aio_complete(job, done, 0);
 	else
 		aio_cancel(job);
 }
 
 static int
 soo_aio_queue(struct file *fp, struct kaiocb *job)
 {
 	struct socket *so;
 	struct sockbuf *sb;
+	sb_which which;
 	int error;
 
 	so = fp->f_data;
 	error = (*so->so_proto->pr_usrreqs->pru_aio_queue)(so, job);
 	if (error == 0)
 		return (0);
 
 	/* Lock through the socket, since this may be a listening socket. */
 	switch (job->uaiocb.aio_lio_opcode & (LIO_WRITE | LIO_READ)) {
 	case LIO_READ:
-		sb = &so->so_rcv;
 		SOCK_RECVBUF_LOCK(so);
+		sb = &so->so_rcv;
+		which = SO_RCV;
 		break;
 	case LIO_WRITE:
-		sb = &so->so_snd;
 		SOCK_SENDBUF_LOCK(so);
+		sb = &so->so_snd;
+		which = SO_SND;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	if (SOLISTENING(so)) {
 		if (sb == &so->so_rcv)
 			SOCK_RECVBUF_UNLOCK(so);
 		else
 			SOCK_SENDBUF_UNLOCK(so);
 		return (EINVAL);
 	}
 
 	if (!aio_set_cancel_function(job, soo_aio_cancel))
 		panic("new job was cancelled");
 	TAILQ_INSERT_TAIL(&sb->sb_aiojobq, job, list);
 	if (!(sb->sb_flags & SB_AIO_RUNNING)) {
 		if (soaio_ready(so, sb))
-			sowakeup_aio(so, sb);
+			sowakeup_aio(so, which);
 		else
 			sb->sb_flags |= SB_AIO;
 	}
 	SOCKBUF_UNLOCK(sb);
 	return (0);
 }
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index 5ac6c79a928f..421fa5da37d9 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -1,1792 +1,1832 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_kern_tls.h"
 #include "opt_param.h"
 
 #include <sys/param.h>
 #include <sys/aio.h> /* for aio_swake proto */
 #include <sys/kernel.h>
 #include <sys/ktls.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 /*
  * Function pointer set by the AIO routines so that the socket buffer code
  * can call back into the AIO module if it is loaded.
  */
 void	(*aio_swake)(struct socket *, struct sockbuf *);
 
 /*
  * Primitive routines for operating on socket buffers
  */
 
 u_long	sb_max = SB_MAX;
 u_long sb_max_adj =
        (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
 
 static	u_long sb_efficiency = 8;	/* parameter for sbreserve() */
 
 #ifdef KERN_TLS
 static void	sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m,
     struct mbuf *n);
 #endif
 static struct mbuf	*sbcut_internal(struct sockbuf *sb, int len);
 static void	sbflush_internal(struct sockbuf *sb);
 
 /*
  * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY.
  */
 static void
 sbm_clrprotoflags(struct mbuf *m, int flags)
 {
 	int mask;
 
 	mask = ~M_PROTOFLAGS;
 	if (flags & PRUS_NOTREADY)
 		mask |= M_NOTREADY;
 	while (m) {
 		m->m_flags &= mask;
 		m = m->m_next;
 	}
 }
 
 /*
  * Compress M_NOTREADY mbufs after they have been readied by sbready().
  *
  * sbcompress() skips M_NOTREADY mbufs since the data is not available to
  * be copied at the time of sbcompress().  This function combines small
  * mbufs similar to sbcompress() once mbufs are ready.  'm0' is the first
  * mbuf sbready() marked ready, and 'end' is the first mbuf still not
  * ready.
  */
 static void
 sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end)
 {
 	struct mbuf *m, *n;
 	int ext_size;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if ((sb->sb_flags & SB_NOCOALESCE) != 0)
 		return;
 
 	for (m = m0; m != end; m = m->m_next) {
 		MPASS((m->m_flags & M_NOTREADY) == 0);
 		/*
 		 * NB: In sbcompress(), 'n' is the last mbuf in the
 		 * socket buffer and 'm' is the new mbuf being copied
 		 * into the trailing space of 'n'.  Here, the roles
 		 * are reversed and 'n' is the next mbuf after 'm'
 		 * that is being copied into the trailing space of
 		 * 'm'.
 		 */
 		n = m->m_next;
 #ifdef KERN_TLS
 		/* Try to coalesce adjacent ktls mbuf hdr/trailers. */
 		if ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
 		    (m->m_flags & M_EXTPG) &&
 		    (n->m_flags & M_EXTPG) &&
 		    !mbuf_has_tls_session(m) &&
 		    !mbuf_has_tls_session(n)) {
 			int hdr_len, trail_len;
 
 			hdr_len = n->m_epg_hdrlen;
 			trail_len = m->m_epg_trllen;
 			if (trail_len != 0 && hdr_len != 0 &&
 			    trail_len + hdr_len <= MBUF_PEXT_TRAIL_LEN) {
 				/* copy n's header to m's trailer */
 				memcpy(&m->m_epg_trail[trail_len],
 				    n->m_epg_hdr, hdr_len);
 				m->m_epg_trllen += hdr_len;
 				m->m_len += hdr_len;
 				n->m_epg_hdrlen = 0;
 				n->m_len -= hdr_len;
 			}
 		}
 #endif
 
 		/* Compress small unmapped mbufs into plain mbufs. */
 		if ((m->m_flags & M_EXTPG) && m->m_len <= MLEN &&
 		    !mbuf_has_tls_session(m)) {
 			ext_size = m->m_ext.ext_size;
 			if (mb_unmapped_compress(m) == 0) {
 				sb->sb_mbcnt -= ext_size;
 				sb->sb_ccnt -= 1;
 			}
 		}
 
 		while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
 		    M_WRITABLE(m) &&
 		    (m->m_flags & M_EXTPG) == 0 &&
 		    !mbuf_has_tls_session(n) &&
 		    !mbuf_has_tls_session(m) &&
 		    n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    n->m_len <= M_TRAILINGSPACE(m) &&
 		    m->m_type == n->m_type) {
 			KASSERT(sb->sb_lastrecord != n,
 		    ("%s: merging start of record (%p) into previous mbuf (%p)",
 			    __func__, n, m));
 			m_copydata(n, 0, n->m_len, mtodo(m, m->m_len));
 			m->m_len += n->m_len;
 			m->m_next = n->m_next;
 			m->m_flags |= n->m_flags & M_EOR;
 			if (sb->sb_mbtail == n)
 				sb->sb_mbtail = m;
 
 			sb->sb_mbcnt -= MSIZE;
 			sb->sb_mcnt -= 1;
 			if (n->m_flags & M_EXT) {
 				sb->sb_mbcnt -= n->m_ext.ext_size;
 				sb->sb_ccnt -= 1;
 			}
 			m_free(n);
 			n = m->m_next;
 		}
 	}
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 }
 
 /*
  * Mark ready "count" units of I/O starting with "m".  Most mbufs
  * count as a single unit of I/O except for M_EXTPG mbufs which
  * are backed by multiple pages.
  */
 int
 sbready(struct sockbuf *sb, struct mbuf *m0, int count)
 {
 	struct mbuf *m;
 	u_int blocker;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
 	KASSERT(count > 0, ("%s: invalid count %d", __func__, count));
 
 	m = m0;
 	blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
 
 	while (count > 0) {
 		KASSERT(m->m_flags & M_NOTREADY,
 		    ("%s: m %p !M_NOTREADY", __func__, m));
 		if ((m->m_flags & M_EXTPG) != 0 && m->m_epg_npgs != 0) {
 			if (count < m->m_epg_nrdy) {
 				m->m_epg_nrdy -= count;
 				count = 0;
 				break;
 			}
 			count -= m->m_epg_nrdy;
 			m->m_epg_nrdy = 0;
 		} else
 			count--;
 
 		m->m_flags &= ~(M_NOTREADY | blocker);
 		if (blocker)
 			sb->sb_acc += m->m_len;
 		m = m->m_next;
 	}
 
 	/*
 	 * If the first mbuf is still not fully ready because only
 	 * some of its backing pages were readied, no further progress
 	 * can be made.
 	 */
 	if (m0 == m) {
 		MPASS(m->m_flags & M_NOTREADY);
 		return (EINPROGRESS);
 	}
 
 	if (!blocker) {
 		sbready_compress(sb, m0, m);
 		return (EINPROGRESS);
 	}
 
 	/* This one was blocking all the queue. */
 	for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
 		KASSERT(m->m_flags & M_BLOCKED,
 		    ("%s: m %p !M_BLOCKED", __func__, m));
 		m->m_flags &= ~M_BLOCKED;
 		sb->sb_acc += m->m_len;
 	}
 
 	sb->sb_fnrdy = m;
 	sbready_compress(sb, m0, m);
 
 	return (0);
 }
 
 /*
  * Adjust sockbuf state reflecting allocation of m.
  */
 void
 sballoc(struct sockbuf *sb, struct mbuf *m)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	sb->sb_ccc += m->m_len;
 
 	if (sb->sb_fnrdy == NULL) {
 		if (m->m_flags & M_NOTREADY)
 			sb->sb_fnrdy = m;
 		else
 			sb->sb_acc += m->m_len;
 	} else
 		m->m_flags |= M_BLOCKED;
 
 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 		sb->sb_ctl += m->m_len;
 
 	sb->sb_mbcnt += MSIZE;
 	sb->sb_mcnt += 1;
 
 	if (m->m_flags & M_EXT) {
 		sb->sb_mbcnt += m->m_ext.ext_size;
 		sb->sb_ccnt += 1;
 	}
 }
 
 /*
  * Adjust sockbuf state reflecting freeing of m.
  */
 void
 sbfree(struct sockbuf *sb, struct mbuf *m)
 {
 
 #if 0	/* XXX: not yet: soclose() call path comes here w/o lock. */
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 
 	sb->sb_ccc -= m->m_len;
 
 	if (!(m->m_flags & M_NOTAVAIL))
 		sb->sb_acc -= m->m_len;
 
 	if (m == sb->sb_fnrdy) {
 		struct mbuf *n;
 
 		KASSERT(m->m_flags & M_NOTREADY,
 		    ("%s: m %p !M_NOTREADY", __func__, m));
 
 		n = m->m_next;
 		while (n != NULL && !(n->m_flags & M_NOTREADY)) {
 			n->m_flags &= ~M_BLOCKED;
 			sb->sb_acc += n->m_len;
 			n = n->m_next;
 		}
 		sb->sb_fnrdy = n;
 	}
 
 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 		sb->sb_ctl -= m->m_len;
 
 	sb->sb_mbcnt -= MSIZE;
 	sb->sb_mcnt -= 1;
 	if (m->m_flags & M_EXT) {
 		sb->sb_mbcnt -= m->m_ext.ext_size;
 		sb->sb_ccnt -= 1;
 	}
 
 	if (sb->sb_sndptr == m) {
 		sb->sb_sndptr = NULL;
 		sb->sb_sndptroff = 0;
 	}
 	if (sb->sb_sndptroff != 0)
 		sb->sb_sndptroff -= m->m_len;
 }
 
 #ifdef KERN_TLS
 /*
  * Similar to sballoc/sbfree but does not adjust state associated with
  * the sb_mb chain such as sb_fnrdy or sb_sndptr*.  Also assumes mbufs
  * are not ready.
  */
 void
 sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	sb->sb_ccc += m->m_len;
 	sb->sb_tlscc += m->m_len;
 
 	sb->sb_mbcnt += MSIZE;
 	sb->sb_mcnt += 1;
 
 	if (m->m_flags & M_EXT) {
 		sb->sb_mbcnt += m->m_ext.ext_size;
 		sb->sb_ccnt += 1;
 	}
 }
 
 void
 sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m)
 {
 
 #if 0	/* XXX: not yet: soclose() call path comes here w/o lock. */
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 
 	sb->sb_ccc -= m->m_len;
 	sb->sb_tlscc -= m->m_len;
 
 	sb->sb_mbcnt -= MSIZE;
 	sb->sb_mcnt -= 1;
 
 	if (m->m_flags & M_EXT) {
 		sb->sb_mbcnt -= m->m_ext.ext_size;
 		sb->sb_ccnt -= 1;
 	}
 }
 #endif
 
 /*
  * Socantsendmore indicates that no more data will be sent on the socket; it
  * would normally be applied to a socket when the user informs the system
  * that no more data is to be sent, by the protocol code (in case
  * PRU_SHUTDOWN).  Socantrcvmore indicates that no more data will be
  * received, and will normally be applied to the socket by a protocol when it
  * detects that the peer will send no more data.  Data queued for reading in
  * the socket may yet be read.
  */
 void
 socantsendmore_locked(struct socket *so)
 {
 
-	SOCKBUF_LOCK_ASSERT(&so->so_snd);
+	SOCK_SENDBUF_LOCK_ASSERT(so);
 
 	so->so_snd.sb_state |= SBS_CANTSENDMORE;
 	sowwakeup_locked(so);
-	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
+	SOCK_SENDBUF_UNLOCK_ASSERT(so);
 }
 
 void
 socantsendmore(struct socket *so)
 {
 
-	SOCKBUF_LOCK(&so->so_snd);
+	SOCK_SENDBUF_LOCK(so);
 	socantsendmore_locked(so);
-	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
+	SOCK_SENDBUF_UNLOCK_ASSERT(so);
 }
 
 void
 socantrcvmore_locked(struct socket *so)
 {
 
-	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+	SOCK_RECVBUF_LOCK_ASSERT(so);
 
 	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
 #ifdef KERN_TLS
 	if (so->so_rcv.sb_flags & SB_TLS_RX)
 		ktls_check_rx(&so->so_rcv);
 #endif
 	sorwakeup_locked(so);
-	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+	SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 void
 socantrcvmore(struct socket *so)
 {
 
-	SOCKBUF_LOCK(&so->so_rcv);
+	SOCK_RECVBUF_LOCK(so);
 	socantrcvmore_locked(so);
-	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+	SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 void
 soroverflow_locked(struct socket *so)
 {
 
-	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+	SOCK_RECVBUF_LOCK_ASSERT(so);
 
 	if (so->so_options & SO_RERROR) {
 		so->so_rerror = ENOBUFS;
 		sorwakeup_locked(so);
 	} else
-		SOCKBUF_UNLOCK(&so->so_rcv);
+		SOCK_RECVBUF_UNLOCK(so);
 
-	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+	SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 void
 soroverflow(struct socket *so)
 {
 
-	SOCKBUF_LOCK(&so->so_rcv);
+	SOCK_RECVBUF_LOCK(so);
 	soroverflow_locked(so);
-	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+	SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 /*
  * Wait for data to arrive at/drain from a socket buffer.
  */
 int
-sbwait(struct sockbuf *sb)
+sbwait(struct socket *so, sb_which which)
 {
+	struct sockbuf *sb;
 
-	SOCKBUF_LOCK_ASSERT(sb);
+	SOCK_BUF_LOCK_ASSERT(so, which);
 
+	sb = sobuf(so, which);
 	sb->sb_flags |= SB_WAIT;
-	return (msleep_sbt(&sb->sb_acc, SOCKBUF_MTX(sb),
+	return (msleep_sbt(&sb->sb_acc, soeventmtx(so, which),
 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 	    sb->sb_timeo, 0, 0));
 }
 
 /*
  * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
  * via SIGIO if the socket has the SS_ASYNC flag set.
  *
  * Called with the socket buffer lock held; will release the lock by the end
  * of the function.  This allows the caller to acquire the socket buffer lock
  * while testing for the need for various sorts of wakeup and hold it through
  * to the point where it's no longer required.  We currently hold the lock
  * through calls out to other subsystems (with the exception of kqueue), and
  * then release it to avoid lock order issues.  It's not clear that's
  * correct.
  */
-void
-sowakeup(struct socket *so, struct sockbuf *sb)
+static __always_inline void
+sowakeup(struct socket *so, const sb_which which)
 {
+	struct sockbuf *sb;
 	int ret;
 
-	SOCKBUF_LOCK_ASSERT(sb);
+	SOCK_BUF_LOCK_ASSERT(so, which);
 
+	sb = sobuf(so, which);
 	selwakeuppri(sb->sb_sel, PSOCK);
 	if (!SEL_WAITING(sb->sb_sel))
 		sb->sb_flags &= ~SB_SEL;
 	if (sb->sb_flags & SB_WAIT) {
 		sb->sb_flags &= ~SB_WAIT;
 		wakeup(&sb->sb_acc);
 	}
 	KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
 	if (sb->sb_upcall != NULL) {
 		ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
 		if (ret == SU_ISCONNECTED) {
 			KASSERT(sb == &so->so_rcv,
 			    ("SO_SND upcall returned SU_ISCONNECTED"));
 			soupcall_clear(so, SO_RCV);
 		}
 	} else
 		ret = SU_OK;
 	if (sb->sb_flags & SB_AIO)
-		sowakeup_aio(so, sb);
-	SOCKBUF_UNLOCK(sb);
+		sowakeup_aio(so, which);
+	SOCK_BUF_UNLOCK(so, which);
 	if (ret == SU_ISCONNECTED)
 		soisconnected(so);
 	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGIO, 0);
-	mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
+	SOCK_BUF_UNLOCK_ASSERT(so, which);
+}
+
+/*
+ * Do we need to notify the other side when I/O is possible?
+ */
+static __always_inline bool
+sb_notify(const struct sockbuf *sb)
+{
+	return ((sb->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC |
+	    SB_UPCALL | SB_AIO | SB_KNOTE)) != 0);
+}
+
+void
+sorwakeup_locked(struct socket *so)
+{
+	SOCK_RECVBUF_LOCK_ASSERT(so);
+	if (sb_notify(&so->so_rcv))
+		sowakeup(so, SO_RCV);
+	else
+		SOCK_RECVBUF_UNLOCK(so);
+}
+
+void
+sowwakeup_locked(struct socket *so)
+{
+	SOCK_SENDBUF_LOCK_ASSERT(so);
+	if (sb_notify(&so->so_snd))
+		sowakeup(so, SO_SND);
+	else
+		SOCK_SENDBUF_UNLOCK(so);
 }
 
 /*
  * Socket buffer (struct sockbuf) utility routines.
  *
  * Each socket contains two socket buffers: one for sending data and one for
  * receiving data.  Each buffer contains a queue of mbufs, information about
  * the number of mbufs and amount of data in the queue, and other fields
  * allowing select() statements and notification on data availability to be
  * implemented.
  *
  * Data stored in a socket buffer is maintained as a list of records.  Each
  * record is a list of mbufs chained together with the m_next field.  Records
  * are chained together with the m_nextpkt field. The upper level routine
  * soreceive() expects the following conventions to be observed when placing
  * information in the receive buffer:
  *
  * 1. If the protocol requires each message be preceded by the sender's name,
  *    then a record containing that name must be present before any
  *    associated data (mbuf's must be of type MT_SONAME).
  * 2. If the protocol supports the exchange of ``access rights'' (really just
  *    additional data associated with the message), and there are ``rights''
  *    to be received, then a record containing this data should be present
  *    (mbuf's must be of type MT_RIGHTS).
  * 3. If a name or rights record exists, then it must be followed by a data
  *    record, perhaps of zero length.
  *
  * Before using a new socket structure it is first necessary to reserve
  * buffer space to the socket, by calling sbreserve().  This should commit
  * some of the available buffer space in the system buffer pool for the
  * socket (currently, it does nothing but enforce limits).  The space should
  * be released by calling sbrelease() when the socket is destroyed.
  */
 int
 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
 {
 	struct thread *td = curthread;
 
-	SOCKBUF_LOCK(&so->so_snd);
-	SOCKBUF_LOCK(&so->so_rcv);
-	if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
+	SOCK_SENDBUF_LOCK(so);
+	SOCK_RECVBUF_LOCK(so);
+	if (sbreserve_locked(so, SO_SND, sndcc, td) == 0)
 		goto bad;
-	if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
+	if (sbreserve_locked(so, SO_RCV, rcvcc, td) == 0)
 		goto bad2;
 	if (so->so_rcv.sb_lowat == 0)
 		so->so_rcv.sb_lowat = 1;
 	if (so->so_snd.sb_lowat == 0)
 		so->so_snd.sb_lowat = MCLBYTES;
 	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
 		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
-	SOCKBUF_UNLOCK(&so->so_rcv);
-	SOCKBUF_UNLOCK(&so->so_snd);
+	SOCK_RECVBUF_UNLOCK(so);
+	SOCK_SENDBUF_UNLOCK(so);
 	return (0);
 bad2:
-	sbrelease_locked(&so->so_snd, so);
+	sbrelease_locked(so, SO_SND);
 bad:
-	SOCKBUF_UNLOCK(&so->so_rcv);
-	SOCKBUF_UNLOCK(&so->so_snd);
+	SOCK_RECVBUF_UNLOCK(so);
+	SOCK_SENDBUF_UNLOCK(so);
 	return (ENOBUFS);
 }
 
 static int
 sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 	u_long tmp_sb_max = sb_max;
 
 	error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
 	if (error || !req->newptr)
 		return (error);
 	if (tmp_sb_max < MSIZE + MCLBYTES)
 		return (EINVAL);
 	sb_max = tmp_sb_max;
 	sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
 	return (0);
 }
 
 /*
  * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
  * become limiting if buffering efficiency is near the normal case.
  */
-int
-sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
+bool
+sbreserve_locked(struct socket *so, sb_which which, u_long cc,
     struct thread *td)
 {
+	struct sockbuf *sb = sobuf(so, which);
 	rlim_t sbsize_limit;
 
-	SOCKBUF_LOCK_ASSERT(sb);
+	SOCK_BUF_LOCK_ASSERT(so, which);
 
 	/*
 	 * When a thread is passed, we take into account the thread's socket
 	 * buffer size limit.  The caller will generally pass curthread, but
 	 * in the TCP input path, NULL will be passed to indicate that no
 	 * appropriate thread resource limits are available.  In that case,
 	 * we don't apply a process limit.
 	 */
 	if (cc > sb_max_adj)
-		return (0);
+		return (false);
 	if (td != NULL) {
 		sbsize_limit = lim_cur(td, RLIMIT_SBSIZE);
 	} else
 		sbsize_limit = RLIM_INFINITY;
 	if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
 	    sbsize_limit))
-		return (0);
+		return (false);
 	sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
 	if (sb->sb_lowat > sb->sb_hiwat)
 		sb->sb_lowat = sb->sb_hiwat;
-	return (1);
+	return (true);
 }
 
 int
 sbsetopt(struct socket *so, int cmd, u_long cc)
 {
 	struct sockbuf *sb;
+	sb_which wh;
 	short *flags;
 	u_int *hiwat, *lowat;
 	int error;
 
 	sb = NULL;
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		switch (cmd) {
 			case SO_SNDLOWAT:
 			case SO_SNDBUF:
 				lowat = &so->sol_sbsnd_lowat;
 				hiwat = &so->sol_sbsnd_hiwat;
 				flags = &so->sol_sbsnd_flags;
 				break;
 			case SO_RCVLOWAT:
 			case SO_RCVBUF:
 				lowat = &so->sol_sbrcv_lowat;
 				hiwat = &so->sol_sbrcv_hiwat;
 				flags = &so->sol_sbrcv_flags;
 				break;
 		}
 	} else {
 		switch (cmd) {
 			case SO_SNDLOWAT:
 			case SO_SNDBUF:
 				sb = &so->so_snd;
+				wh = SO_SND;
 				break;
 			case SO_RCVLOWAT:
 			case SO_RCVBUF:
 				sb = &so->so_rcv;
+				wh = SO_RCV;
 				break;
 		}
 		flags = &sb->sb_flags;
 		hiwat = &sb->sb_hiwat;
 		lowat = &sb->sb_lowat;
-		SOCKBUF_LOCK(sb);
+		SOCK_BUF_LOCK(so, wh);
 	}
 
 	error = 0;
 	switch (cmd) {
 	case SO_SNDBUF:
 	case SO_RCVBUF:
 		if (SOLISTENING(so)) {
 			if (cc > sb_max_adj) {
 				error = ENOBUFS;
 				break;
 			}
 			*hiwat = cc;
 			if (*lowat > *hiwat)
 				*lowat = *hiwat;
 		} else {
-			if (!sbreserve_locked(sb, cc, so, curthread))
+			if (!sbreserve_locked(so, wh, cc, curthread))
 				error = ENOBUFS;
 		}
 		if (error == 0)
 			*flags &= ~SB_AUTOSIZE;
 		break;
 	case SO_SNDLOWAT:
 	case SO_RCVLOWAT:
 		/*
 		 * Make sure the low-water is never greater than the
 		 * high-water.
 		 */
 		*lowat = (cc > *hiwat) ? *hiwat : cc;
 		break;
 	}
 
 	if (!SOLISTENING(so))
-		SOCKBUF_UNLOCK(sb);
+		SOCK_BUF_UNLOCK(so, wh);
 	SOCK_UNLOCK(so);
 	return (error);
 }
 
 /*
  * Free mbufs held by a socket, and reserved mbuf space.
  */
 static void
-sbrelease_internal(struct sockbuf *sb, struct socket *so)
+sbrelease_internal(struct socket *so, sb_which which)
 {
+	struct sockbuf *sb = sobuf(so, which);
 
 	sbflush_internal(sb);
 	(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
 	    RLIM_INFINITY);
 	sb->sb_mbmax = 0;
 }
 
 void
-sbrelease_locked(struct sockbuf *sb, struct socket *so)
+sbrelease_locked(struct socket *so, sb_which which)
 {
 
-	SOCKBUF_LOCK_ASSERT(sb);
+	SOCK_BUF_LOCK_ASSERT(so, which);
 
-	sbrelease_internal(sb, so);
+	sbrelease_internal(so, which);
 }
 
 void
-sbrelease(struct sockbuf *sb, struct socket *so)
+sbrelease(struct socket *so, sb_which which)
 {
 
-	SOCKBUF_LOCK(sb);
-	sbrelease_locked(sb, so);
-	SOCKBUF_UNLOCK(sb);
+	SOCK_BUF_LOCK(so, which);
+	sbrelease_locked(so, which);
+	SOCK_BUF_UNLOCK(so, which);
 }
 
 void
-sbdestroy(struct sockbuf *sb, struct socket *so)
+sbdestroy(struct socket *so, sb_which which)
 {
-
-	sbrelease_internal(sb, so);
 #ifdef KERN_TLS
+	struct sockbuf *sb = sobuf(so, which);
+
 	if (sb->sb_tls_info != NULL)
 		ktls_free(sb->sb_tls_info);
 	sb->sb_tls_info = NULL;
 #endif
+	sbrelease_internal(so, which);
 }
 
 /*
  * Routines to add and remove data from an mbuf queue.
  *
  * The routines sbappend() or sbappendrecord() are normally called to append
  * new mbufs to a socket buffer, after checking that adequate space is
  * available, comparing the function sbspace() with the amount of data to be
  * added.  sbappendrecord() differs from sbappend() in that data supplied is
  * treated as the beginning of a new record.  To place a sender's address,
  * optional access rights, and data in a socket receive buffer,
  * sbappendaddr() should be used.  To place access rights and data in a
  * socket receive buffer, sbappendrights() should be used.  In either case,
  * the new data begins a new record.  Note that unlike sbappend() and
  * sbappendrecord(), these routines check for the caller that there will be
  * enough space to store the data.  Each fails if there is not enough space,
  * or if it cannot find mbufs to store additional information in.
  *
  * Reliable protocols may use the socket send buffer to hold data awaiting
  * acknowledgement.  Data is normally copied from a socket send buffer in a
  * protocol with m_copy for output to a peer, and then removing the data from
  * the socket buffer with sbdrop() or sbdroprecord() when the data is
  * acknowledged by the peer.
  */
 #ifdef SOCKBUF_DEBUG
 void
 sblastrecordchk(struct sockbuf *sb, const char *file, int line)
 {
 	struct mbuf *m = sb->sb_mb;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
 	if (m != sb->sb_lastrecord) {
 		printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
 			__func__, sb->sb_mb, sb->sb_lastrecord, m);
 		printf("packet chain:\n");
 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
 			printf("\t%p\n", m);
 		panic("%s from %s:%u", __func__, file, line);
 	}
 }
 
 void
 sblastmbufchk(struct sockbuf *sb, const char *file, int line)
 {
 	struct mbuf *m = sb->sb_mb;
 	struct mbuf *n;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
 	while (m && m->m_next)
 		m = m->m_next;
 
 	if (m != sb->sb_mbtail) {
 		printf("%s: sb_mb %p sb_mbtail %p last %p\n",
 			__func__, sb->sb_mb, sb->sb_mbtail, m);
 		printf("packet tree:\n");
 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
 			printf("\t");
 			for (n = m; n != NULL; n = n->m_next)
 				printf("%p ", n);
 			printf("\n");
 		}
 		panic("%s from %s:%u", __func__, file, line);
 	}
 
 #ifdef KERN_TLS
 	m = sb->sb_mtls;
 	while (m && m->m_next)
 		m = m->m_next;
 
 	if (m != sb->sb_mtlstail) {
 		printf("%s: sb_mtls %p sb_mtlstail %p last %p\n",
 			__func__, sb->sb_mtls, sb->sb_mtlstail, m);
 		printf("TLS packet tree:\n");
 		printf("\t");
 		for (m = sb->sb_mtls; m != NULL; m = m->m_next) {
 			printf("%p ", m);
 		}
 		printf("\n");
 		panic("%s from %s:%u", __func__, file, line);
 	}
 #endif
 }
 #endif /* SOCKBUF_DEBUG */
 
 #define SBLINKRECORD(sb, m0) do {					\
 	SOCKBUF_LOCK_ASSERT(sb);					\
 	if ((sb)->sb_lastrecord != NULL)				\
 		(sb)->sb_lastrecord->m_nextpkt = (m0);			\
 	else								\
 		(sb)->sb_mb = (m0);					\
 	(sb)->sb_lastrecord = (m0);					\
 } while (/*CONSTCOND*/0)
 
 /*
  * Append mbuf chain m to the last record in the socket buffer sb.  The
  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  * are discarded and mbufs are compacted where possible.
  */
 void
 sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 	struct mbuf *n;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (m == NULL)
 		return;
 	sbm_clrprotoflags(m, flags);
 	SBLASTRECORDCHK(sb);
 	n = sb->sb_mb;
 	if (n) {
 		while (n->m_nextpkt)
 			n = n->m_nextpkt;
 		do {
 			if (n->m_flags & M_EOR) {
 				sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
 				return;
 			}
 		} while (n->m_next && (n = n->m_next));
 	} else {
 		/*
 		 * XXX Would like to simply use sb_mbtail here, but
 		 * XXX I need to verify that I won't miss an EOR that
 		 * XXX way.
 		 */
 		if ((n = sb->sb_lastrecord) != NULL) {
 			do {
 				if (n->m_flags & M_EOR) {
 					sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
 					return;
 				}
 			} while (n->m_next && (n = n->m_next));
 		} else {
 			/*
 			 * If this is the first record in the socket buffer,
 			 * it's also the last record.
 			 */
 			sb->sb_lastrecord = m;
 		}
 	}
 	sbcompress(sb, m, n);
 	SBLASTRECORDCHK(sb);
 }
 
 /*
  * Append mbuf chain m to the last record in the socket buffer sb.  The
  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  * are discarded and mbufs are compacted where possible.
  */
 void
 sbappend(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappend_locked(sb, m, flags);
 	SOCKBUF_UNLOCK(sb);
 }
 
 #ifdef KERN_TLS
 /*
  * Append an mbuf containing encrypted TLS data.  The data
  * is marked M_NOTREADY until it has been decrypted and
  * stored as a TLS record.
  */
 static void
 sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m)
 {
 	struct mbuf *n;
 
 	SBLASTMBUFCHK(sb);
 
 	/* Remove all packet headers and mbuf tags to get a pure data chain. */
 	m_demote(m, 1, 0);
 
 	for (n = m; n != NULL; n = n->m_next)
 		n->m_flags |= M_NOTREADY;
 	sbcompress_ktls_rx(sb, m, sb->sb_mtlstail);
 	ktls_check_rx(sb);
 }
 #endif
 
 /*
  * This version of sbappend() should only be used when the caller absolutely
  * knows that there will never be more than one record in the socket buffer,
  * that is, a stream protocol (such as TCP).
  */
 void
 sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
 
 #ifdef KERN_TLS
 	/*
 	 * Decrypted TLS records are appended as records via
 	 * sbappendrecord().  TCP passes encrypted TLS records to this
 	 * function which must be scheduled for decryption.
 	 */
 	if (sb->sb_flags & SB_TLS_RX) {
 		sbappend_ktls_rx(sb, m);
 		return;
 	}
 #endif
 
 	KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
 
 	SBLASTMBUFCHK(sb);
 
 #ifdef KERN_TLS
 	if (sb->sb_tls_info != NULL)
 		ktls_seq(sb, m);
 #endif
 
 	/* Remove all packet headers and mbuf tags to get a pure data chain. */
 	m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
 
 	sbcompress(sb, m, sb->sb_mbtail);
 
 	sb->sb_lastrecord = sb->sb_mb;
 	SBLASTRECORDCHK(sb);
 }
 
 /*
  * This version of sbappend() should only be used when the caller absolutely
  * knows that there will never be more than one record in the socket buffer,
  * that is, a stream protocol (such as TCP).
  */
 void
 sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappendstream_locked(sb, m, flags);
 	SOCKBUF_UNLOCK(sb);
 }
 
 #ifdef SOCKBUF_DEBUG
 void
 sbcheck(struct sockbuf *sb, const char *file, int line)
 {
 	struct mbuf *m, *n, *fnrdy;
 	u_long acc, ccc, mbcnt;
 #ifdef KERN_TLS
 	u_long tlscc;
 #endif
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	acc = ccc = mbcnt = 0;
 	fnrdy = NULL;
 
 	for (m = sb->sb_mb; m; m = n) {
 	    n = m->m_nextpkt;
 	    for (; m; m = m->m_next) {
 		if (m->m_len == 0) {
 			printf("sb %p empty mbuf %p\n", sb, m);
 			goto fail;
 		}
 		if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) {
 			if (m != sb->sb_fnrdy) {
 				printf("sb %p: fnrdy %p != m %p\n",
 				    sb, sb->sb_fnrdy, m);
 				goto fail;
 			}
 			fnrdy = m;
 		}
 		if (fnrdy) {
 			if (!(m->m_flags & M_NOTAVAIL)) {
 				printf("sb %p: fnrdy %p, m %p is avail\n",
 				    sb, sb->sb_fnrdy, m);
 				goto fail;
 			}
 		} else
 			acc += m->m_len;
 		ccc += m->m_len;
 		mbcnt += MSIZE;
 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 			mbcnt += m->m_ext.ext_size;
 	    }
 	}
 #ifdef KERN_TLS
 	/*
 	 * Account for mbufs "detached" by ktls_detach_record() while
 	 * they are decrypted by ktls_decrypt().  tlsdcc gives a count
 	 * of the detached bytes that are included in ccc.  The mbufs
 	 * and clusters are not included in the socket buffer
 	 * accounting.
 	 */
 	ccc += sb->sb_tlsdcc;
 
 	tlscc = 0;
 	for (m = sb->sb_mtls; m; m = m->m_next) {
 		if (m->m_nextpkt != NULL) {
 			printf("sb %p TLS mbuf %p with nextpkt\n", sb, m);
 			goto fail;
 		}
 		if ((m->m_flags & M_NOTREADY) == 0) {
 			printf("sb %p TLS mbuf %p ready\n", sb, m);
 			goto fail;
 		}
 		tlscc += m->m_len;
 		ccc += m->m_len;
 		mbcnt += MSIZE;
 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 			mbcnt += m->m_ext.ext_size;
 	}
 
 	if (sb->sb_tlscc != tlscc) {
 		printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
 		    sb->sb_tlsdcc);
 		goto fail;
 	}
 #endif
 	if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
 		printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
 		    acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
 #ifdef KERN_TLS
 		printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
 		    sb->sb_tlsdcc);
 #endif
 		goto fail;
 	}
 	return;
 fail:
 	panic("%s from %s:%u", __func__, file, line);
 }
 #endif
 
 /*
  * As above, except the mbuf chain begins a new record.
  */
 void
 sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
 {
 	struct mbuf *m;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (m0 == NULL)
 		return;
 	m_clrprotoflags(m0);
 	/*
 	 * Put the first mbuf on the queue.  Note this permits zero length
 	 * records.
 	 */
 	sballoc(sb, m0);
 	SBLASTRECORDCHK(sb);
 	SBLINKRECORD(sb, m0);
 	sb->sb_mbtail = m0;
 	m = m0->m_next;
 	m0->m_next = 0;
 	if (m && (m0->m_flags & M_EOR)) {
 		m0->m_flags &= ~M_EOR;
 		m->m_flags |= M_EOR;
 	}
 	/* always call sbcompress() so it can do SBLASTMBUFCHK() */
 	sbcompress(sb, m, m0);
 }
 
 /*
  * As above, except the mbuf chain begins a new record.
  */
 void
 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappendrecord_locked(sb, m0);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /* Helper routine that appends data, control, and address to a sockbuf. */
 static int
 sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last)
 {
 	struct mbuf *m, *n, *nlast;
 #if MSIZE <= 256
 	if (asa->sa_len > MLEN)
 		return (0);
 #endif
 	m = m_get(M_NOWAIT, MT_SONAME);
 	if (m == NULL)
 		return (0);
 	m->m_len = asa->sa_len;
 	bcopy(asa, mtod(m, caddr_t), asa->sa_len);
 	if (m0) {
 		M_ASSERT_NO_SND_TAG(m0);
 		m_clrprotoflags(m0);
 		m_tag_delete_chain(m0, NULL);
 		/*
 		 * Clear some persistent info from pkthdr.
 		 * We don't use m_demote(), because some netgraph consumers
 		 * expect M_PKTHDR presence.
 		 */
 		m0->m_pkthdr.rcvif = NULL;
 		m0->m_pkthdr.flowid = 0;
 		m0->m_pkthdr.csum_flags = 0;
 		m0->m_pkthdr.fibnum = 0;
 		m0->m_pkthdr.rsstype = 0;
 	}
 	if (ctrl_last)
 		ctrl_last->m_next = m0;	/* concatenate data to control */
 	else
 		control = m0;
 	m->m_next = control;
 	for (n = m; n->m_next != NULL; n = n->m_next)
 		sballoc(sb, n);
 	sballoc(sb, n);
 	nlast = n;
 	SBLINKRECORD(sb, m);
 
 	sb->sb_mbtail = nlast;
 	SBLASTMBUFCHK(sb);
 
 	SBLASTRECORDCHK(sb);
 	return (1);
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data to the
  * receive queue of a socket.  If present, m0 must include a packet header
  * with total length.  Returns 0 if no space in sockbuf or insufficient
  * mbufs.
  */
 int
 sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control)
 {
 	struct mbuf *ctrl_last;
 	int space = asa->sa_len;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 		panic("sbappendaddr_locked");
 	if (m0)
 		space += m0->m_pkthdr.len;
 	space += m_length(control, &ctrl_last);
 
 	if (space > sbspace(sb))
 		return (0);
 	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data to the
  * receive queue of a socket.  If present, m0 must include a packet header
  * with total length.  Returns 0 if insufficient mbufs.  Does not validate space
  * on the receiving sockbuf.
  */
 int
 sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control)
 {
 	struct mbuf *ctrl_last;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	ctrl_last = (control == NULL) ? NULL : m_last(control);
 	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data to the
  * receive queue of a socket.  If present, m0 must include a packet header
  * with total length.  Returns 0 if no space in sockbuf or insufficient
  * mbufs.
  */
 int
 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control)
 {
 	int retval;
 
 	SOCKBUF_LOCK(sb);
 	retval = sbappendaddr_locked(sb, asa, m0, control);
 	SOCKBUF_UNLOCK(sb);
 	return (retval);
 }
 
 void
 sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
     struct mbuf *control, int flags)
 {
 	struct mbuf *m, *mlast;
 
 	sbm_clrprotoflags(m0, flags);
 	m_last(control)->m_next = m0;
 
 	SBLASTRECORDCHK(sb);
 
 	for (m = control; m->m_next; m = m->m_next)
 		sballoc(sb, m);
 	sballoc(sb, m);
 	mlast = m;
 	SBLINKRECORD(sb, control);
 
 	sb->sb_mbtail = mlast;
 	SBLASTMBUFCHK(sb);
 
 	SBLASTRECORDCHK(sb);
 }
 
 void
 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
     int flags)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappendcontrol_locked(sb, m0, control, flags);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
  * (n).  If (n) is NULL, the buffer is presumed empty.
  *
  * When the data is compressed, mbufs in the chain may be handled in one of
  * three ways:
  *
  * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
  *     record boundary, and no change in data type).
  *
  * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
  *     an mbuf already in the socket buffer.  This can occur if an
  *     appropriate mbuf exists, there is room, both mbufs are not marked as
  *     not ready, and no merging of data types will occur.
  *
  * (3) The mbuf may be appended to the end of the existing mbuf chain.
  *
  * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
  * end-of-record.
  */
 void
 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
 {
 	int eor = 0;
 	struct mbuf *o;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m) {
 		eor |= m->m_flags & M_EOR;
 		if (m->m_len == 0 &&
 		    (eor == 0 ||
 		     (((o = m->m_next) || (o = n)) &&
 		      o->m_type == m->m_type))) {
 			if (sb->sb_lastrecord == m)
 				sb->sb_lastrecord = m->m_next;
 			m = m_free(m);
 			continue;
 		}
 		if (n && (n->m_flags & M_EOR) == 0 &&
 		    M_WRITABLE(n) &&
 		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
 		    !(m->m_flags & M_NOTREADY) &&
 		    !(n->m_flags & (M_NOTREADY | M_EXTPG)) &&
 		    !mbuf_has_tls_session(m) &&
 		    !mbuf_has_tls_session(n) &&
 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    m->m_len <= M_TRAILINGSPACE(n) &&
 		    n->m_type == m->m_type) {
 			m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
 			n->m_len += m->m_len;
 			sb->sb_ccc += m->m_len;
 			if (sb->sb_fnrdy == NULL)
 				sb->sb_acc += m->m_len;
 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 				/* XXX: Probably don't need.*/
 				sb->sb_ctl += m->m_len;
 			m = m_free(m);
 			continue;
 		}
 		if (m->m_len <= MLEN && (m->m_flags & M_EXTPG) &&
 		    (m->m_flags & M_NOTREADY) == 0 &&
 		    !mbuf_has_tls_session(m))
 			(void)mb_unmapped_compress(m);
 		if (n)
 			n->m_next = m;
 		else
 			sb->sb_mb = m;
 		sb->sb_mbtail = m;
 		sballoc(sb, m);
 		n = m;
 		m->m_flags &= ~M_EOR;
 		m = m->m_next;
 		n->m_next = 0;
 	}
 	if (eor) {
 		KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
 		n->m_flags |= eor;
 	}
 	SBLASTMBUFCHK(sb);
 }
 
 #ifdef KERN_TLS
 /*
  * A version of sbcompress() for encrypted TLS RX mbufs.  These mbufs
  * are appended to the 'sb_mtls' chain instead of 'sb_mb' and are also
  * a bit simpler (no EOR markers, always MT_DATA, etc.).
  */
 static void
 sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m) {
 		KASSERT((m->m_flags & M_EOR) == 0,
 		    ("TLS RX mbuf %p with EOR", m));
 		KASSERT(m->m_type == MT_DATA,
 		    ("TLS RX mbuf %p is not MT_DATA", m));
 		KASSERT((m->m_flags & M_NOTREADY) != 0,
 		    ("TLS RX mbuf %p ready", m));
 		KASSERT((m->m_flags & M_EXTPG) == 0,
 		    ("TLS RX mbuf %p unmapped", m));
 
 		if (m->m_len == 0) {
 			m = m_free(m);
 			continue;
 		}
 
 		/*
 		 * Even though both 'n' and 'm' are NOTREADY, it's ok
 		 * to coalesce the data.
 		 */
 		if (n &&
 		    M_WRITABLE(n) &&
 		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
 		    !(n->m_flags & (M_EXTPG)) &&
 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    m->m_len <= M_TRAILINGSPACE(n)) {
 			m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
 			n->m_len += m->m_len;
 			sb->sb_ccc += m->m_len;
 			sb->sb_tlscc += m->m_len;
 			m = m_free(m);
 			continue;
 		}
 		if (n)
 			n->m_next = m;
 		else
 			sb->sb_mtls = m;
 		sb->sb_mtlstail = m;
 		sballoc_ktls_rx(sb, m);
 		n = m;
 		m = m->m_next;
 		n->m_next = NULL;
 	}
 	SBLASTMBUFCHK(sb);
 }
 #endif
 
 /*
  * Free all mbufs in a sockbuf.  Check that all resources are reclaimed.
  */
 static void
 sbflush_internal(struct sockbuf *sb)
 {
 
 	while (sb->sb_mbcnt || sb->sb_tlsdcc) {
 		/*
 		 * Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
 		 * we would loop forever. Panic instead.
 		 */
 		if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len))
 			break;
 		m_freem(sbcut_internal(sb, (int)sb->sb_ccc));
 	}
 	KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
 	    ("%s: ccc %u mb %p mbcnt %u", __func__,
 	    sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
 }
 
 void
 sbflush_locked(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	sbflush_internal(sb);
 }
 
 void
 sbflush(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbflush_locked(sb);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Cut data from (the front of) a sockbuf.
  */
 static struct mbuf *
 sbcut_internal(struct sockbuf *sb, int len)
 {
 	struct mbuf *m, *next, *mfree;
 	bool is_tls;
 
 	KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0",
 	    __func__, len));
 	KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u",
 	    __func__, len, sb->sb_ccc));
 
 	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 	is_tls = false;
 	mfree = NULL;
 
 	while (len > 0) {
 		if (m == NULL) {
 #ifdef KERN_TLS
 			if (next == NULL && !is_tls) {
 				if (sb->sb_tlsdcc != 0) {
 					MPASS(len >= sb->sb_tlsdcc);
 					len -= sb->sb_tlsdcc;
 					sb->sb_ccc -= sb->sb_tlsdcc;
 					sb->sb_tlsdcc = 0;
 					if (len == 0)
 						break;
 				}
 				next = sb->sb_mtls;
 				is_tls = true;
 			}
 #endif
 			KASSERT(next, ("%s: no next, len %d", __func__, len));
 			m = next;
 			next = m->m_nextpkt;
 		}
 		if (m->m_len > len) {
 			KASSERT(!(m->m_flags & M_NOTAVAIL),
 			    ("%s: m %p M_NOTAVAIL", __func__, m));
 			m->m_len -= len;
 			m->m_data += len;
 			sb->sb_ccc -= len;
 			sb->sb_acc -= len;
 			if (sb->sb_sndptroff != 0)
 				sb->sb_sndptroff -= len;
 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 				sb->sb_ctl -= len;
 			break;
 		}
 		len -= m->m_len;
 #ifdef KERN_TLS
 		if (is_tls)
 			sbfree_ktls_rx(sb, m);
 		else
 #endif
 			sbfree(sb, m);
 		/*
 		 * Do not put M_NOTREADY buffers to the free list, they
 		 * are referenced from outside.
 		 */
 		if (m->m_flags & M_NOTREADY && !is_tls)
 			m = m->m_next;
 		else {
 			struct mbuf *n;
 
 			n = m->m_next;
 			m->m_next = mfree;
 			mfree = m;
 			m = n;
 		}
 	}
 	/*
 	 * Free any zero-length mbufs from the buffer.
 	 * For SOCK_DGRAM sockets such mbufs represent empty records.
 	 * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer,
 	 * when sosend_generic() needs to send only control data.
 	 */
 	while (m && m->m_len == 0) {
 		struct mbuf *n;
 
 		sbfree(sb, m);
 		n = m->m_next;
 		m->m_next = mfree;
 		mfree = m;
 		m = n;
 	}
 #ifdef KERN_TLS
 	if (is_tls) {
 		sb->sb_mb = NULL;
 		sb->sb_mtls = m;
 		if (m == NULL)
 			sb->sb_mtlstail = NULL;
 	} else
 #endif
 	if (m) {
 		sb->sb_mb = m;
 		m->m_nextpkt = next;
 	} else
 		sb->sb_mb = next;
 	/*
 	 * First part is an inline SB_EMPTY_FIXUP().  Second part makes sure
 	 * sb_lastrecord is up-to-date if we dropped part of the last record.
 	 */
 	m = sb->sb_mb;
 	if (m == NULL) {
 		sb->sb_mbtail = NULL;
 		sb->sb_lastrecord = NULL;
 	} else if (m->m_nextpkt == NULL) {
 		sb->sb_lastrecord = m;
 	}
 
 	return (mfree);
 }
 
 /*
  * Drop data from (the front of) a sockbuf.
  */
 void
 sbdrop_locked(struct sockbuf *sb, int len)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	m_freem(sbcut_internal(sb, len));
 }
 
 /*
  * Drop data from (the front of) a sockbuf,
  * and return it to caller.
  */
 struct mbuf *
 sbcut_locked(struct sockbuf *sb, int len)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	return (sbcut_internal(sb, len));
 }
 
 void
 sbdrop(struct sockbuf *sb, int len)
 {
 	struct mbuf *mfree;
 
 	SOCKBUF_LOCK(sb);
 	mfree = sbcut_internal(sb, len);
 	SOCKBUF_UNLOCK(sb);
 
 	m_freem(mfree);
 }
 
 struct mbuf *
 sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff)
 {
 	struct mbuf *m;
 
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
 	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
 		*moff = off;
 		if (sb->sb_sndptr == NULL) {
 			sb->sb_sndptr = sb->sb_mb;
 			sb->sb_sndptroff = 0;
 		}
 		return (sb->sb_mb);
 	} else {
 		m = sb->sb_sndptr;
 		off -= sb->sb_sndptroff;
 	}
 	*moff = off;
 	return (m);
 }
 
 void
 sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len)
 {
 	/*
 	 * A small copy was done, advance forward the sb_sbsndptr to cover
 	 * it.
 	 */
 	struct mbuf *m;
 
 	if (mb != sb->sb_sndptr) {
 		/* Did not copyout at the same mbuf */
 		return;
 	}
 	m = mb;
 	while (m && (len > 0)) {
 		if (len >= m->m_len) {
 			len -= m->m_len;
 			if (m->m_next) {
 				sb->sb_sndptroff += m->m_len;
 				sb->sb_sndptr = m->m_next;
 			}
 			m = m->m_next;
 		} else {
 			len = 0;
 		}
 	}
 }
 
 /*
  * Return the first mbuf and the mbuf data offset for the provided
  * send offset without changing the "sb_sndptroff" field.
  */
 struct mbuf *
 sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff)
 {
 	struct mbuf *m;
 
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
 
 	/*
 	 * If the "off" is below the stored offset, which happens on
 	 * retransmits, just use "sb_mb":
 	 */
 	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
 		m = sb->sb_mb;
 	} else {
 		m = sb->sb_sndptr;
 		off -= sb->sb_sndptroff;
 	}
 	while (off > 0 && m != NULL) {
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	*moff = off;
 	return (m);
 }
 
 /*
  * Drop a record off the front of a sockbuf and move the next record to the
  * front.
  */
 void
 sbdroprecord_locked(struct sockbuf *sb)
 {
 	struct mbuf *m;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	m = sb->sb_mb;
 	if (m) {
 		sb->sb_mb = m->m_nextpkt;
 		do {
 			sbfree(sb, m);
 			m = m_free(m);
 		} while (m);
 	}
 	SB_EMPTY_FIXUP(sb);
 }
 
 /*
  * Drop a record off the front of a sockbuf and move the next record to the
  * front.
  */
 void
 sbdroprecord(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbdroprecord_locked(sb);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Create a "control" mbuf containing the specified data with the specified
  * type for presentation on a socket buffer.
  */
 struct mbuf *
 sbcreatecontrol_how(void *p, int size, int type, int level, int wait)
 {
 	struct cmsghdr *cp;
 	struct mbuf *m;
 
 	MBUF_CHECKSLEEP(wait);
 	if (CMSG_SPACE((u_int)size) > MCLBYTES)
 		return ((struct mbuf *) NULL);
 	if (CMSG_SPACE((u_int)size) > MLEN)
 		m = m_getcl(wait, MT_CONTROL, 0);
 	else
 		m = m_get(wait, MT_CONTROL);
 	if (m == NULL)
 		return ((struct mbuf *) NULL);
 	cp = mtod(m, struct cmsghdr *);
 	m->m_len = 0;
 	KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
 	    ("sbcreatecontrol: short mbuf"));
 	/*
 	 * Don't leave the padding between the msg header and the
 	 * cmsg data and the padding after the cmsg data un-initialized.
 	 */
 	bzero(cp, CMSG_SPACE((u_int)size));
 	if (p != NULL)
 		(void)memcpy(CMSG_DATA(cp), p, size);
 	m->m_len = CMSG_SPACE(size);
 	cp->cmsg_len = CMSG_LEN(size);
 	cp->cmsg_level = level;
 	cp->cmsg_type = type;
 	return (m);
 }
 
 struct mbuf *
 sbcreatecontrol(caddr_t p, int size, int type, int level)
 {
 
 	return (sbcreatecontrol_how(p, size, type, level, M_NOWAIT));
 }
 
 /*
  * This does the same for socket buffers that sotoxsocket does for sockets:
  * generate an user-format data structure describing the socket buffer.  Note
  * that the xsockbuf structure, since it is always embedded in a socket, does
  * not include a self pointer nor a length.  We make this entry point public
  * in case some other mechanism needs it.
  */
 void
 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 {
 
 	xsb->sb_cc = sb->sb_ccc;
 	xsb->sb_hiwat = sb->sb_hiwat;
 	xsb->sb_mbcnt = sb->sb_mbcnt;
 	xsb->sb_mcnt = sb->sb_mcnt;	
 	xsb->sb_ccnt = sb->sb_ccnt;
 	xsb->sb_mbmax = sb->sb_mbmax;
 	xsb->sb_lowat = sb->sb_lowat;
 	xsb->sb_flags = sb->sb_flags;
 	xsb->sb_timeo = sb->sb_timeo;
 }
 
 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
 static int dummy;
 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW | CTLFLAG_SKIP, &dummy, 0, "");
 SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf,
     CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, &sb_max, 0,
     sysctl_handle_sb_max, "LU",
     "Maximum socket buffer size");
 SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
     &sb_efficiency, 0, "Socket buffer size waste factor");
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 628730171715..49a2b5773cc6 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1,4516 +1,4516 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2004 The FreeBSD Foundation
  * Copyright (c) 2004-2008 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
  */
 
 /*
  * Comments on the socket life cycle:
  *
  * soalloc() sets of socket layer state for a socket, called only by
  * socreate() and sonewconn().  Socket layer private.
  *
  * sodealloc() tears down socket layer state for a socket, called only by
  * sofree() and sonewconn().  Socket layer private.
  *
  * pru_attach() associates protocol layer state with an allocated socket;
  * called only once, may fail, aborting socket allocation.  This is called
  * from socreate() and sonewconn().  Socket layer private.
  *
  * pru_detach() disassociates protocol layer state from an attached socket,
  * and will be called exactly once for sockets in which pru_attach() has
  * been successfully called.  If pru_attach() returned an error,
  * pru_detach() will not be called.  Socket layer private.
  *
  * pru_abort() and pru_close() notify the protocol layer that the last
  * consumer of a socket is starting to tear down the socket, and that the
  * protocol should terminate the connection.  Historically, pru_abort() also
  * detached protocol state from the socket state, but this is no longer the
  * case.
  *
  * socreate() creates a socket and attaches protocol state.  This is a public
  * interface that may be used by socket layer consumers to create new
  * sockets.
  *
  * sonewconn() creates a socket and attaches protocol state.  This is a
  * public interface  that may be used by protocols to create new sockets when
  * a new connection is received and will be available for accept() on a
  * listen socket.
  *
  * soclose() destroys a socket after possibly waiting for it to disconnect.
  * This is a public interface that socket consumers should use to close and
  * release a socket when done with it.
  *
  * soabort() destroys a socket without waiting for it to disconnect (used
  * only for incoming connections that are already partially or fully
  * connected).  This is used internally by the socket layer when clearing
  * listen socket queues (due to overflow or close on the listen socket), but
  * is also a public interface protocols may use to abort connections in
  * their incomplete listen queues should they no longer be required.  Sockets
  * placed in completed connection listen queues should not be aborted for
  * reasons described in the comment above the soclose() implementation.  This
  * is not a general purpose close routine, and except in the specific
  * circumstances described here, should not be used.
  *
  * sofree() will free a socket and its protocol state if all references on
  * the socket have been released, and is the public interface to attempt to
  * free a socket when a reference is removed.  This is a socket layer private
  * interface.
  *
  * NOTE: In addition to socreate() and soclose(), which provide a single
  * socket reference to the consumer to be managed as required, there are two
  * calls to explicitly manage socket references, soref(), and sorele().
  * Currently, these are generally required only when transitioning a socket
  * from a listen queue to a file descriptor, in order to prevent garbage
  * collection of the socket at an untimely moment.  For a number of reasons,
  * these interfaces are not preferred, and should be avoided.
  *
  * NOTE: With regard to VNETs the general rule is that callers do not set
  * curvnet. Exceptions to this rule include soabort(), sodisconnect(),
  * sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
  * and sorflush(), which are usually called from a pre-set VNET context.
  * sopoll() currently does not need a VNET context to be set.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_kern_tls.h"
 #include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/fcntl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/domain.h>
 #include <sys/file.h>			/* for struct knote */
 #include <sys/hhook.h>
 #include <sys/kernel.h>
 #include <sys/khelp.h>
 #include <sys/ktls.h>
 #include <sys/event.h>
 #include <sys/eventhandler.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/resourcevar.h>
 #include <net/route.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/jail.h>
 #include <sys/syslog.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp.h>
 
 #include <net/vnet.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <sys/sysent.h>
 #include <compat/freebsd32/freebsd32.h>
 #endif
 
 static int	soreceive_rcvoob(struct socket *so, struct uio *uio,
 		    int flags);
 static void	so_rdknl_lock(void *);
 static void	so_rdknl_unlock(void *);
 static void	so_rdknl_assert_lock(void *, int);
 static void	so_wrknl_lock(void *);
 static void	so_wrknl_unlock(void *);
 static void	so_wrknl_assert_lock(void *, int);
 
 static void	filt_sordetach(struct knote *kn);
 static int	filt_soread(struct knote *kn, long hint);
 static void	filt_sowdetach(struct knote *kn);
 static int	filt_sowrite(struct knote *kn, long hint);
 static int	filt_soempty(struct knote *kn, long hint);
 static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
 fo_kqfilter_t	soo_kqfilter;
 
 static struct filterops soread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sordetach,
 	.f_event = filt_soread,
 };
 static struct filterops sowrite_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sowdetach,
 	.f_event = filt_sowrite,
 };
 static struct filterops soempty_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sowdetach,
 	.f_event = filt_soempty,
 };
 
 so_gen_t	so_gencnt;	/* generation count for sockets */
 
 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
 
 #define	VNET_SO_ASSERT(so)						\
 	VNET_ASSERT(curvnet != NULL,					\
 	    ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
 
 VNET_DEFINE(struct hhook_head *, socket_hhh[HHOOK_SOCKET_LAST + 1]);
 #define	V_socket_hhh		VNET(socket_hhh)
 
 /*
  * Limit on the number of connections in the listen queue waiting
  * for accept(2).
  * NB: The original sysctl somaxconn is still available but hidden
  * to prevent confusion about the actual purpose of this number.
  */
 static u_int somaxconn = SOMAXCONN;
 
 static int
 sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int val;
 
 	val = somaxconn;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr )
 		return (error);
 
 	/*
 	 * The purpose of the UINT_MAX / 3 limit, is so that the formula
 	 *   3 * so_qlimit / 2
 	 * below, will not overflow.
          */
 
 	if (val < 1 || val > UINT_MAX / 3)
 		return (EINVAL);
 
 	somaxconn = val;
 	return (0);
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, soacceptqueue,
     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int),
     sysctl_somaxconn, "I",
     "Maximum listen socket pending connection accept queue size");
 SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn,
     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP | CTLFLAG_MPSAFE, 0,
     sizeof(int), sysctl_somaxconn, "I",
     "Maximum listen socket pending connection accept queue size (compat)");
 
 static int numopensockets;
 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
     &numopensockets, 0, "Number of open sockets");
 
 /*
  * accept_mtx locks down per-socket fields relating to accept queues.  See
  * socketvar.h for an annotation of the protected fields of struct socket.
  */
 struct mtx accept_mtx;
 MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF);
 
 /*
  * so_global_mtx protects so_gencnt, numopensockets, and the per-socket
  * so_gencnt field.
  */
 static struct mtx so_global_mtx;
 MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF);
 
 /*
  * General IPC sysctl name space, used by sockets and a variety of other IPC
  * types.
  */
 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IPC");
 
 /*
  * Initialize the socket subsystem and set up the socket
  * memory allocator.
  */
 static uma_zone_t socket_zone;
 int	maxsockets;
 
 static void
 socket_zone_change(void *tag)
 {
 
 	maxsockets = uma_zone_set_max(socket_zone, maxsockets);
 }
 
 static void
 socket_hhook_register(int subtype)
 {
 
 	if (hhook_head_register(HHOOK_TYPE_SOCKET, subtype,
 	    &V_socket_hhh[subtype],
 	    HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register hook\n", __func__);
 }
 
 static void
 socket_hhook_deregister(int subtype)
 {
 
 	if (hhook_head_deregister(V_socket_hhh[subtype]) != 0)
 		printf("%s: WARNING: unable to deregister hook\n", __func__);
 }
 
 static void
 socket_init(void *tag)
 {
 
 	socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
 	maxsockets = uma_zone_set_max(socket_zone, maxsockets);
 	uma_zone_set_warning(socket_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
 	    EVENTHANDLER_PRI_FIRST);
 }
 SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL);
 
 static void
 socket_vnet_init(const void *unused __unused)
 {
 	int i;
 
 	/* We expect a contiguous range */
 	for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
 		socket_hhook_register(i);
 }
 VNET_SYSINIT(socket_vnet_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
     socket_vnet_init, NULL);
 
 static void
 socket_vnet_uninit(const void *unused __unused)
 {
 	int i;
 
 	for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
 		socket_hhook_deregister(i);
 }
 VNET_SYSUNINIT(socket_vnet_uninit, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
     socket_vnet_uninit, NULL);
 
 /*
  * Initialise maxsockets.  This SYSINIT must be run after
  * tunable_mbinit().
  */
 static void
 init_maxsockets(void *ignored)
 {
 
 	TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
 	maxsockets = imax(maxsockets, maxfiles);
 }
 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
 
 /*
  * Sysctl to get and set the maximum global sockets limit.  Notify protocols
  * of the change so that they can update their dependent limits as required.
  */
 static int
 sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
 {
 	int error, newmaxsockets;
 
 	newmaxsockets = maxsockets;
 	error = sysctl_handle_int(oidp, &newmaxsockets, 0, req);
 	if (error == 0 && req->newptr && newmaxsockets != maxsockets) {
 		if (newmaxsockets > maxsockets &&
 		    newmaxsockets <= maxfiles) {
 			maxsockets = newmaxsockets;
 			EVENTHANDLER_INVOKE(maxsockets_change);
 		} else
 			error = EINVAL;
 	}
 	return (error);
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &maxsockets, 0,
     sysctl_maxsockets, "IU",
     "Maximum number of sockets available");
 
 /*
  * Socket operation routines.  These routines are called by the routines in
  * sys_socket.c or from a system process, and implement the semantics of
  * socket operations by switching out to the protocol specific routines.
  */
 
 /*
  * Get a socket structure from our zone, and initialize it.  Note that it
  * would probably be better to allocate socket and PCB at the same time, but
  * I'm not convinced that all the protocols can be easily modified to do
  * this.
  *
  * soalloc() returns a socket with a ref count of 0.
  */
 static struct socket *
 soalloc(struct vnet *vnet)
 {
 	struct socket *so;
 
 	so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO);
 	if (so == NULL)
 		return (NULL);
 #ifdef MAC
 	if (mac_socket_init(so, M_NOWAIT) != 0) {
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 #endif
 	if (khelp_init_osd(HELPER_CLASS_SOCKET, &so->osd)) {
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 
 	/*
 	 * The socket locking protocol allows to lock 2 sockets at a time,
 	 * however, the first one must be a listening socket.  WITNESS lacks
 	 * a feature to change class of an existing lock, so we use DUPOK.
 	 */
 	mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
 	so->so_snd.sb_mtx = &so->so_snd_mtx;
 	so->so_rcv.sb_mtx = &so->so_rcv_mtx;
-	SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
-	SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
+	mtx_init(&so->so_snd_mtx, "so_snd", NULL, MTX_DEF);
+	mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF);
 	so->so_rcv.sb_sel = &so->so_rdsel;
 	so->so_snd.sb_sel = &so->so_wrsel;
 	sx_init(&so->so_snd_sx, "so_snd_sx");
 	sx_init(&so->so_rcv_sx, "so_rcv_sx");
 	TAILQ_INIT(&so->so_snd.sb_aiojobq);
 	TAILQ_INIT(&so->so_rcv.sb_aiojobq);
 	TASK_INIT(&so->so_snd.sb_aiotask, 0, soaio_snd, so);
 	TASK_INIT(&so->so_rcv.sb_aiotask, 0, soaio_rcv, so);
 #ifdef VIMAGE
 	VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p",
 	    __func__, __LINE__, so));
 	so->so_vnet = vnet;
 #endif
 	/* We shouldn't need the so_global_mtx */
 	if (hhook_run_socket(so, NULL, HHOOK_SOCKET_CREATE)) {
 		/* Do we need more comprehensive error returns? */
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 	mtx_lock(&so_global_mtx);
 	so->so_gencnt = ++so_gencnt;
 	++numopensockets;
 #ifdef VIMAGE
 	vnet->vnet_sockcnt++;
 #endif
 	mtx_unlock(&so_global_mtx);
 
 	return (so);
 }
 
 /*
  * Free the storage associated with a socket at the socket layer, tear down
  * locks, labels, etc.  All protocol state is assumed already to have been
  * torn down (and possibly never set up) by the caller.
  */
 static void
 sodealloc(struct socket *so)
 {
 
 	KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count));
 	KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL"));
 
 	mtx_lock(&so_global_mtx);
 	so->so_gencnt = ++so_gencnt;
 	--numopensockets;	/* Could be below, but faster here. */
 #ifdef VIMAGE
 	VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p",
 	    __func__, __LINE__, so));
 	so->so_vnet->vnet_sockcnt--;
 #endif
 	mtx_unlock(&so_global_mtx);
 #ifdef MAC
 	mac_socket_destroy(so);
 #endif
 	hhook_run_socket(so, NULL, HHOOK_SOCKET_CLOSE);
 
 	khelp_destroy_osd(&so->osd);
 	if (SOLISTENING(so)) {
 		if (so->sol_accept_filter != NULL)
 			accept_filt_setopt(so, NULL);
 	} else {
 		if (so->so_rcv.sb_hiwat)
 			(void)chgsbsize(so->so_cred->cr_uidinfo,
 			    &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
 		if (so->so_snd.sb_hiwat)
 			(void)chgsbsize(so->so_cred->cr_uidinfo,
 			    &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
 		sx_destroy(&so->so_snd_sx);
 		sx_destroy(&so->so_rcv_sx);
-		SOCKBUF_LOCK_DESTROY(&so->so_snd);
-		SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+		mtx_destroy(&so->so_snd_mtx);
+		mtx_destroy(&so->so_rcv_mtx);
 	}
 	crfree(so->so_cred);
 	mtx_destroy(&so->so_lock);
 	uma_zfree(socket_zone, so);
 }
 
 /*
  * socreate returns a socket with a ref count of 1.  The socket should be
  * closed with soclose().
  */
 int
 socreate(int dom, struct socket **aso, int type, int proto,
     struct ucred *cred, struct thread *td)
 {
 	struct protosw *prp;
 	struct socket *so;
 	int error;
 
 	if (proto)
 		prp = pffindproto(dom, proto, type);
 	else
 		prp = pffindtype(dom, type);
 
 	if (prp == NULL) {
 		/* No support for domain. */
 		if (pffinddomain(dom) == NULL)
 			return (EAFNOSUPPORT);
 		/* No support for socket type. */
 		if (proto == 0 && type != 0)
 			return (EPROTOTYPE);
 		return (EPROTONOSUPPORT);
 	}
 	if (prp->pr_usrreqs->pru_attach == NULL ||
 	    prp->pr_usrreqs->pru_attach == pru_attach_notsupp)
 		return (EPROTONOSUPPORT);
 
 	if (IN_CAPABILITY_MODE(td) && (prp->pr_flags & PR_CAPATTACH) == 0)
 		return (ECAPMODE);
 
 	if (prison_check_af(cred, prp->pr_domain->dom_family) != 0)
 		return (EPROTONOSUPPORT);
 
 	if (prp->pr_type != type)
 		return (EPROTOTYPE);
 	so = soalloc(CRED_TO_VNET(cred));
 	if (so == NULL)
 		return (ENOBUFS);
 
 	so->so_type = type;
 	so->so_cred = crhold(cred);
 	if ((prp->pr_domain->dom_family == PF_INET) ||
 	    (prp->pr_domain->dom_family == PF_INET6) ||
 	    (prp->pr_domain->dom_family == PF_ROUTE))
 		so->so_fibnum = td->td_proc->p_fibnum;
 	else
 		so->so_fibnum = 0;
 	so->so_proto = prp;
 #ifdef MAC
 	mac_socket_create(cred, so);
 #endif
 	knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
 	    so_rdknl_assert_lock);
 	knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
 	    so_wrknl_assert_lock);
 	/*
 	 * Auto-sizing of socket buffers is managed by the protocols and
 	 * the appropriate flags must be set in the pru_attach function.
 	 */
 	CURVNET_SET(so->so_vnet);
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
 	CURVNET_RESTORE();
 	if (error) {
 		sodealloc(so);
 		return (error);
 	}
 	soref(so);
 	*aso = so;
 	return (0);
 }
 
 #ifdef REGRESSION
 static int regression_sonewconn_earlytest = 1;
 SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW,
     &regression_sonewconn_earlytest, 0, "Perform early sonewconn limit test");
 #endif
 
 static struct timeval overinterval = { 60, 0 };
 SYSCTL_TIMEVAL_SEC(_kern_ipc, OID_AUTO, sooverinterval, CTLFLAG_RW,
     &overinterval,
     "Delay in seconds between warnings for listen socket overflows");
 
 /*
  * When an attempt at a new connection is noted on a socket which accepts
  * connections, sonewconn is called.  If the connection is possible (subject
  * to space constraints, etc.) then we allocate a new structure, properly
  * linked into the data structure of the original socket, and return this.
  * Connstatus may be 0, or SS_ISCONFIRMING, or SS_ISCONNECTED.
  *
  * Note: the ref count on the socket is 0 on return.
  */
 struct socket *
 sonewconn(struct socket *head, int connstatus)
 {
 	struct sbuf descrsb;
 	struct socket *so;
 	int len, overcount;
 	u_int qlen;
 	const char localprefix[] = "local:";
 	char descrbuf[SUNPATHLEN + sizeof(localprefix)];
 #if defined(INET6)
 	char addrbuf[INET6_ADDRSTRLEN];
 #elif defined(INET)
 	char addrbuf[INET_ADDRSTRLEN];
 #endif
 	bool dolog, over;
 
 	SOLISTEN_LOCK(head);
 	over = (head->sol_qlen > 3 * head->sol_qlimit / 2);
 #ifdef REGRESSION
 	if (regression_sonewconn_earlytest && over) {
 #else
 	if (over) {
 #endif
 		head->sol_overcount++;
 		dolog = !!ratecheck(&head->sol_lastover, &overinterval);
 
 		/*
 		 * If we're going to log, copy the overflow count and queue
 		 * length from the listen socket before dropping the lock.
 		 * Also, reset the overflow count.
 		 */
 		if (dolog) {
 			overcount = head->sol_overcount;
 			head->sol_overcount = 0;
 			qlen = head->sol_qlen;
 		}
 		SOLISTEN_UNLOCK(head);
 
 		if (dolog) {
 			/*
 			 * Try to print something descriptive about the
 			 * socket for the error message.
 			 */
 			sbuf_new(&descrsb, descrbuf, sizeof(descrbuf),
 			    SBUF_FIXEDLEN);
 			switch (head->so_proto->pr_domain->dom_family) {
 #if defined(INET) || defined(INET6)
 #ifdef INET
 			case AF_INET:
 #endif
 #ifdef INET6
 			case AF_INET6:
 				if (head->so_proto->pr_domain->dom_family ==
 				    AF_INET6 ||
 				    (sotoinpcb(head)->inp_inc.inc_flags &
 				    INC_ISIPV6)) {
 					ip6_sprintf(addrbuf,
 					    &sotoinpcb(head)->inp_inc.inc6_laddr);
 					sbuf_printf(&descrsb, "[%s]", addrbuf);
 				} else
 #endif
 				{
 #ifdef INET
 					inet_ntoa_r(
 					    sotoinpcb(head)->inp_inc.inc_laddr,
 					    addrbuf);
 					sbuf_cat(&descrsb, addrbuf);
 #endif
 				}
 				sbuf_printf(&descrsb, ":%hu (proto %u)",
 				    ntohs(sotoinpcb(head)->inp_inc.inc_lport),
 				    head->so_proto->pr_protocol);
 				break;
 #endif /* INET || INET6 */
 			case AF_UNIX:
 				sbuf_cat(&descrsb, localprefix);
 				if (sotounpcb(head)->unp_addr != NULL)
 					len =
 					    sotounpcb(head)->unp_addr->sun_len -
 					    offsetof(struct sockaddr_un,
 					    sun_path);
 				else
 					len = 0;
 				if (len > 0)
 					sbuf_bcat(&descrsb,
 					    sotounpcb(head)->unp_addr->sun_path,
 					    len);
 				else
 					sbuf_cat(&descrsb, "(unknown)");
 				break;
 			}
 
 			/*
 			 * If we can't print something more specific, at least
 			 * print the domain name.
 			 */
 			if (sbuf_finish(&descrsb) != 0 ||
 			    sbuf_len(&descrsb) <= 0) {
 				sbuf_clear(&descrsb);
 				sbuf_cat(&descrsb,
 				    head->so_proto->pr_domain->dom_name ?:
 				    "unknown");
 				sbuf_finish(&descrsb);
 			}
 			KASSERT(sbuf_len(&descrsb) > 0,
 			    ("%s: sbuf creation failed", __func__));
 			if (head->so_cred == 0) {
 				log(LOG_DEBUG,
 			    	"%s: pcb %p (%s): Listen queue overflow: "
 			    	"%i already in queue awaiting acceptance "
 			    	"(%d occurrences)\n",
 			    	__func__, head->so_pcb, sbuf_data(&descrsb),
 			    	qlen, overcount);
 			} else {
 				log(LOG_DEBUG, "%s: pcb %p (%s): Listen queue overflow: "
 				    "%i already in queue awaiting acceptance "
 				    "(%d occurrences), euid %d, rgid %d, jail %s\n",
 				    __func__, head->so_pcb, sbuf_data(&descrsb),
 				    qlen, overcount,
 				    head->so_cred->cr_uid, head->so_cred->cr_rgid,
 				    head->so_cred->cr_prison ?
 					head->so_cred->cr_prison->pr_name :
 					"not_jailed");
 			}
 			sbuf_delete(&descrsb);
 
 			overcount = 0;
 		}
 
 		return (NULL);
 	}
 	SOLISTEN_UNLOCK(head);
 	VNET_ASSERT(head->so_vnet != NULL, ("%s: so %p vnet is NULL",
 	    __func__, head));
 	so = soalloc(head->so_vnet);
 	if (so == NULL) {
 		log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
 		    "limit reached or out of memory\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_listen = head;
 	so->so_type = head->so_type;
 	so->so_options = head->so_options & ~SO_ACCEPTCONN;
 	so->so_linger = head->so_linger;
 	so->so_state = head->so_state | SS_NOFDREF;
 	so->so_fibnum = head->so_fibnum;
 	so->so_proto = head->so_proto;
 	so->so_cred = crhold(head->so_cred);
 #ifdef MAC
 	mac_socket_newconn(head, so);
 #endif
 	knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
 	    so_rdknl_assert_lock);
 	knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
 	    so_wrknl_assert_lock);
 	VNET_SO_ASSERT(head);
 	if (soreserve(so, head->sol_sbsnd_hiwat, head->sol_sbrcv_hiwat)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_rcv.sb_lowat = head->sol_sbrcv_lowat;
 	so->so_snd.sb_lowat = head->sol_sbsnd_lowat;
 	so->so_rcv.sb_timeo = head->sol_sbrcv_timeo;
 	so->so_snd.sb_timeo = head->sol_sbsnd_timeo;
 	so->so_rcv.sb_flags |= head->sol_sbrcv_flags & SB_AUTOSIZE;
 	so->so_snd.sb_flags |= head->sol_sbsnd_flags & SB_AUTOSIZE;
 
 	SOLISTEN_LOCK(head);
 	if (head->sol_accept_filter != NULL)
 		connstatus = 0;
 	so->so_state |= connstatus;
 	soref(head); /* A socket on (in)complete queue refs head. */
 	if (connstatus) {
 		TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
 		so->so_qstate = SQ_COMP;
 		head->sol_qlen++;
 		solisten_wakeup(head);	/* unlocks */
 	} else {
 		/*
 		 * Keep removing sockets from the head until there's room for
 		 * us to insert on the tail.  In pre-locking revisions, this
 		 * was a simple if(), but as we could be racing with other
 		 * threads and soabort() requires dropping locks, we must
 		 * loop waiting for the condition to be true.
 		 */
 		while (head->sol_incqlen > head->sol_qlimit) {
 			struct socket *sp;
 
 			sp = TAILQ_FIRST(&head->sol_incomp);
 			TAILQ_REMOVE(&head->sol_incomp, sp, so_list);
 			head->sol_incqlen--;
 			SOCK_LOCK(sp);
 			sp->so_qstate = SQ_NONE;
 			sp->so_listen = NULL;
 			SOCK_UNLOCK(sp);
 			sorele_locked(head);	/* does SOLISTEN_UNLOCK, head stays */
 			soabort(sp);
 			SOLISTEN_LOCK(head);
 		}
 		TAILQ_INSERT_TAIL(&head->sol_incomp, so, so_list);
 		so->so_qstate = SQ_INCOMP;
 		head->sol_incqlen++;
 		SOLISTEN_UNLOCK(head);
 	}
 	return (so);
 }
 
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 /*
  * Socket part of sctp_peeloff().  Detach a new socket from an
  * association.  The new socket is returned with a reference.
  */
 struct socket *
 sopeeloff(struct socket *head)
 {
 	struct socket *so;
 
 	VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
 	    __func__, __LINE__, head));
 	so = soalloc(head->so_vnet);
 	if (so == NULL) {
 		log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
 		    "limit reached or out of memory\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_type = head->so_type;
 	so->so_options = head->so_options;
 	so->so_linger = head->so_linger;
 	so->so_state = (head->so_state & SS_NBIO) | SS_ISCONNECTED;
 	so->so_fibnum = head->so_fibnum;
 	so->so_proto = head->so_proto;
 	so->so_cred = crhold(head->so_cred);
 #ifdef MAC
 	mac_socket_newconn(head, so);
 #endif
 	knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
 	    so_rdknl_assert_lock);
 	knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
 	    so_wrknl_assert_lock);
 	VNET_SO_ASSERT(head);
 	if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
 	so->so_snd.sb_lowat = head->so_snd.sb_lowat;
 	so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
 	so->so_snd.sb_timeo = head->so_snd.sb_timeo;
 	so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
 	so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
 
 	soref(so);
 
 	return (so);
 }
 #endif	/* SCTP */
 
 int
 sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_bindat)(fd, so, nam, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * solisten() transitions a socket from a non-listening state to a listening
  * state, but can also be used to update the listen queue depth on an
  * existing listen socket.  The protocol will call back into the sockets
  * layer using solisten_proto_check() and solisten_proto() to check and set
  * socket-layer listen state.  Call backs are used so that the protocol can
  * acquire both protocol and socket layer locks in whatever order is required
  * by the protocol.
  *
  * Protocol implementors are advised to hold the socket lock across the
  * socket-layer test and set to avoid races at the socket layer.
  */
 int
 solisten(struct socket *so, int backlog, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * Prepare for a call to solisten_proto().  Acquire all socket buffer locks in
  * order to interlock with socket I/O.
  */
 int
 solisten_proto_check(struct socket *so)
 {
 	SOCK_LOCK_ASSERT(so);
 
 	if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
 	    SS_ISDISCONNECTING)) != 0)
 		return (EINVAL);
 
 	/*
 	 * Sleeping is not permitted here, so simply fail if userspace is
 	 * attempting to transmit or receive on the socket.  This kind of
 	 * transient failure is not ideal, but it should occur only if userspace
 	 * is misusing the socket interfaces.
 	 */
 	if (!sx_try_xlock(&so->so_snd_sx))
 		return (EAGAIN);
 	if (!sx_try_xlock(&so->so_rcv_sx)) {
 		sx_xunlock(&so->so_snd_sx);
 		return (EAGAIN);
 	}
 	mtx_lock(&so->so_snd_mtx);
 	mtx_lock(&so->so_rcv_mtx);
 
 	/* Interlock with soo_aio_queue(). */
 	if ((so->so_snd.sb_flags & (SB_AIO | SB_AIO_RUNNING)) != 0 ||
 	   (so->so_rcv.sb_flags & (SB_AIO | SB_AIO_RUNNING)) != 0) {
 		solisten_proto_abort(so);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 /*
  * Undo the setup done by solisten_proto_check().
  */
 void
 solisten_proto_abort(struct socket *so)
 {
 	mtx_unlock(&so->so_snd_mtx);
 	mtx_unlock(&so->so_rcv_mtx);
 	sx_xunlock(&so->so_snd_sx);
 	sx_xunlock(&so->so_rcv_sx);
 }
 
 void
 solisten_proto(struct socket *so, int backlog)
 {
 	int sbrcv_lowat, sbsnd_lowat;
 	u_int sbrcv_hiwat, sbsnd_hiwat;
 	short sbrcv_flags, sbsnd_flags;
 	sbintime_t sbrcv_timeo, sbsnd_timeo;
 
 	SOCK_LOCK_ASSERT(so);
 	KASSERT((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
 	    SS_ISDISCONNECTING)) == 0,
 	    ("%s: bad socket state %p", __func__, so));
 
 	if (SOLISTENING(so))
 		goto listening;
 
 	/*
 	 * Change this socket to listening state.
 	 */
 	sbrcv_lowat = so->so_rcv.sb_lowat;
 	sbsnd_lowat = so->so_snd.sb_lowat;
 	sbrcv_hiwat = so->so_rcv.sb_hiwat;
 	sbsnd_hiwat = so->so_snd.sb_hiwat;
 	sbrcv_flags = so->so_rcv.sb_flags;
 	sbsnd_flags = so->so_snd.sb_flags;
 	sbrcv_timeo = so->so_rcv.sb_timeo;
 	sbsnd_timeo = so->so_snd.sb_timeo;
 
-	sbdestroy(&so->so_snd, so);
-	sbdestroy(&so->so_rcv, so);
+	sbdestroy(so, SO_SND);
+	sbdestroy(so, SO_RCV);
 
 #ifdef INVARIANTS
 	bzero(&so->so_rcv,
 	    sizeof(struct socket) - offsetof(struct socket, so_rcv));
 #endif
 
 	so->sol_sbrcv_lowat = sbrcv_lowat;
 	so->sol_sbsnd_lowat = sbsnd_lowat;
 	so->sol_sbrcv_hiwat = sbrcv_hiwat;
 	so->sol_sbsnd_hiwat = sbsnd_hiwat;
 	so->sol_sbrcv_flags = sbrcv_flags;
 	so->sol_sbsnd_flags = sbsnd_flags;
 	so->sol_sbrcv_timeo = sbrcv_timeo;
 	so->sol_sbsnd_timeo = sbsnd_timeo;
 
 	so->sol_qlen = so->sol_incqlen = 0;
 	TAILQ_INIT(&so->sol_incomp);
 	TAILQ_INIT(&so->sol_comp);
 
 	so->sol_accept_filter = NULL;
 	so->sol_accept_filter_arg = NULL;
 	so->sol_accept_filter_str = NULL;
 
 	so->sol_upcall = NULL;
 	so->sol_upcallarg = NULL;
 
 	so->so_options |= SO_ACCEPTCONN;
 
 listening:
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
 	so->sol_qlimit = backlog;
 
 	mtx_unlock(&so->so_snd_mtx);
 	mtx_unlock(&so->so_rcv_mtx);
 	sx_xunlock(&so->so_snd_sx);
 	sx_xunlock(&so->so_rcv_sx);
 }
 
 /*
  * Wakeup listeners/subsystems once we have a complete connection.
  * Enters with lock, returns unlocked.
  */
 void
 solisten_wakeup(struct socket *sol)
 {
 
 	if (sol->sol_upcall != NULL)
 		(void )sol->sol_upcall(sol, sol->sol_upcallarg, M_NOWAIT);
 	else {
 		selwakeuppri(&sol->so_rdsel, PSOCK);
 		KNOTE_LOCKED(&sol->so_rdsel.si_note, 0);
 	}
 	SOLISTEN_UNLOCK(sol);
 	wakeup_one(&sol->sol_comp);
 	if ((sol->so_state & SS_ASYNC) && sol->so_sigio != NULL)
 		pgsigio(&sol->so_sigio, SIGIO, 0);
 }
 
 /*
  * Return single connection off a listening socket queue.  Main consumer of
  * the function is kern_accept4().  Some modules, that do their own accept
  * management also use the function.
  *
  * Listening socket must be locked on entry and is returned unlocked on
  * return.
  * The flags argument is set of accept4(2) flags and ACCEPT4_INHERIT.
  */
 int
 solisten_dequeue(struct socket *head, struct socket **ret, int flags)
 {
 	struct socket *so;
 	int error;
 
 	SOLISTEN_LOCK_ASSERT(head);
 
 	while (!(head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp) &&
 	    head->so_error == 0) {
 		error = msleep(&head->sol_comp, SOCK_MTX(head), PSOCK | PCATCH,
 		    "accept", 0);
 		if (error != 0) {
 			SOLISTEN_UNLOCK(head);
 			return (error);
 		}
 	}
 	if (head->so_error) {
 		error = head->so_error;
 		head->so_error = 0;
 	} else if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp))
 		error = EWOULDBLOCK;
 	else
 		error = 0;
 	if (error) {
 		SOLISTEN_UNLOCK(head);
 		return (error);
 	}
 	so = TAILQ_FIRST(&head->sol_comp);
 	SOCK_LOCK(so);
 	KASSERT(so->so_qstate == SQ_COMP,
 	    ("%s: so %p not SQ_COMP", __func__, so));
 	soref(so);
 	head->sol_qlen--;
 	so->so_qstate = SQ_NONE;
 	so->so_listen = NULL;
 	TAILQ_REMOVE(&head->sol_comp, so, so_list);
 	if (flags & ACCEPT4_INHERIT)
 		so->so_state |= (head->so_state & SS_NBIO);
 	else
 		so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
 	SOCK_UNLOCK(so);
 	sorele_locked(head);
 
 	*ret = so;
 	return (0);
 }
 
 /*
  * Evaluate the reference count and named references on a socket; if no
  * references remain, free it.  This should be called whenever a reference is
  * released, such as in sorele(), but also when named reference flags are
  * cleared in socket or protocol code.
  *
  * sofree() will free the socket if:
  *
  * - There are no outstanding file descriptor references or related consumers
  *   (so_count == 0).
  *
  * - The socket has been closed by user space, if ever open (SS_NOFDREF).
  *
  * - The protocol does not have an outstanding strong reference on the socket
  *   (SS_PROTOREF).
  *
  * - The socket is not in a completed connection queue, so a process has been
  *   notified that it is present.  If it is removed, the user process may
  *   block in accept() despite select() saying the socket was ready.
  */
 void
 sofree(struct socket *so)
 {
 	struct protosw *pr = so->so_proto;
 	bool last __diagused;
 
 	SOCK_LOCK_ASSERT(so);
 
 	if ((so->so_state & (SS_NOFDREF | SS_PROTOREF)) != SS_NOFDREF ||
 	    refcount_load(&so->so_count) != 0 || so->so_qstate == SQ_COMP) {
 		SOCK_UNLOCK(so);
 		return;
 	}
 
 	if (!SOLISTENING(so) && so->so_qstate == SQ_INCOMP) {
 		struct socket *sol;
 
 		sol = so->so_listen;
 		KASSERT(sol, ("%s: so %p on incomp of NULL", __func__, so));
 
 		/*
 		 * To solve race between close of a listening socket and
 		 * a socket on its incomplete queue, we need to lock both.
 		 * The order is first listening socket, then regular.
 		 * Since we don't have SS_NOFDREF neither SS_PROTOREF, this
 		 * function and the listening socket are the only pointers
 		 * to so.  To preserve so and sol, we reference both and then
 		 * relock.
 		 * After relock the socket may not move to so_comp since it
 		 * doesn't have PCB already, but it may be removed from
 		 * so_incomp. If that happens, we share responsiblity on
 		 * freeing the socket, but soclose() has already removed
 		 * it from queue.
 		 */
 		soref(sol);
 		soref(so);
 		SOCK_UNLOCK(so);
 		SOLISTEN_LOCK(sol);
 		SOCK_LOCK(so);
 		if (so->so_qstate == SQ_INCOMP) {
 			KASSERT(so->so_listen == sol,
 			    ("%s: so %p migrated out of sol %p",
 			    __func__, so, sol));
 			TAILQ_REMOVE(&sol->sol_incomp, so, so_list);
 			sol->sol_incqlen--;
 			last = refcount_release(&sol->so_count);
 			KASSERT(!last, ("%s: released last reference for %p",
 			    __func__, sol));
 			so->so_qstate = SQ_NONE;
 			so->so_listen = NULL;
 		} else
 			KASSERT(so->so_listen == NULL,
 			    ("%s: so %p not on (in)comp with so_listen",
 			    __func__, so));
 		sorele_locked(sol);
 		KASSERT(refcount_load(&so->so_count) == 1,
 		    ("%s: so %p count %u", __func__, so, so->so_count));
 		so->so_count = 0;
 	}
 	if (SOLISTENING(so))
 		so->so_error = ECONNABORTED;
 	SOCK_UNLOCK(so);
 
 	if (so->so_dtor != NULL)
 		so->so_dtor(so);
 
 	VNET_SO_ASSERT(so);
 	if ((pr->pr_flags & PR_RIGHTS) && !SOLISTENING(so)) {
 		MPASS(pr->pr_domain->dom_dispose != NULL);
 		(*pr->pr_domain->dom_dispose)(so);
 	}
 	if (pr->pr_usrreqs->pru_detach != NULL)
 		(*pr->pr_usrreqs->pru_detach)(so);
 
 	/*
 	 * From this point on, we assume that no other references to this
 	 * socket exist anywhere else in the stack.  Therefore, no locks need
 	 * to be acquired or held.
 	 */
 	if (!SOLISTENING(so)) {
-		sbdestroy(&so->so_snd, so);
-		sbdestroy(&so->so_rcv, so);
+		sbdestroy(so, SO_SND);
+		sbdestroy(so, SO_RCV);
 	}
 	seldrain(&so->so_rdsel);
 	seldrain(&so->so_wrsel);
 	knlist_destroy(&so->so_rdsel.si_note);
 	knlist_destroy(&so->so_wrsel.si_note);
 	sodealloc(so);
 }
 
 /*
  * Release a reference on a socket while holding the socket lock.
  * Unlocks the socket lock before returning.
  */
 void
 sorele_locked(struct socket *so)
 {
 	SOCK_LOCK_ASSERT(so);
 	if (refcount_release(&so->so_count))
 		sofree(so);
 	else
 		SOCK_UNLOCK(so);
 }
 
 /*
  * Close a socket on last file table reference removal.  Initiate disconnect
  * if connected.  Free socket when disconnect complete.
  *
  * This function will sorele() the socket.  Note that soclose() may be called
  * prior to the ref count reaching zero.  The actual socket structure will
  * not be freed until the ref count reaches zero.
  */
 int
 soclose(struct socket *so)
 {
 	struct accept_queue lqueue;
 	int error = 0;
 	bool listening, last __diagused;
 
 	KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
 
 	CURVNET_SET(so->so_vnet);
 	funsetown(&so->so_sigio);
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 			error = sodisconnect(so);
 			if (error) {
 				if (error == ENOTCONN)
 					error = 0;
 				goto drop;
 			}
 		}
 
 		if ((so->so_options & SO_LINGER) != 0 && so->so_linger != 0) {
 			if ((so->so_state & SS_ISDISCONNECTING) &&
 			    (so->so_state & SS_NBIO))
 				goto drop;
 			while (so->so_state & SS_ISCONNECTED) {
 				error = tsleep(&so->so_timeo,
 				    PSOCK | PCATCH, "soclos",
 				    so->so_linger * hz);
 				if (error)
 					break;
 			}
 		}
 	}
 
 drop:
 	if (so->so_proto->pr_usrreqs->pru_close != NULL)
 		(*so->so_proto->pr_usrreqs->pru_close)(so);
 
 	SOCK_LOCK(so);
 	if ((listening = SOLISTENING(so))) {
 		struct socket *sp;
 
 		TAILQ_INIT(&lqueue);
 		TAILQ_SWAP(&lqueue, &so->sol_incomp, socket, so_list);
 		TAILQ_CONCAT(&lqueue, &so->sol_comp, so_list);
 
 		so->sol_qlen = so->sol_incqlen = 0;
 
 		TAILQ_FOREACH(sp, &lqueue, so_list) {
 			SOCK_LOCK(sp);
 			sp->so_qstate = SQ_NONE;
 			sp->so_listen = NULL;
 			SOCK_UNLOCK(sp);
 			last = refcount_release(&so->so_count);
 			KASSERT(!last, ("%s: released last reference for %p",
 			    __func__, so));
 		}
 	}
 	KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
 	so->so_state |= SS_NOFDREF;
 	sorele_locked(so);
 	if (listening) {
 		struct socket *sp, *tsp;
 
 		TAILQ_FOREACH_SAFE(sp, &lqueue, so_list, tsp) {
 			SOCK_LOCK(sp);
 			if (refcount_load(&sp->so_count) == 0) {
 				SOCK_UNLOCK(sp);
 				soabort(sp);
 			} else {
 				/* See the handling of queued sockets
 				   in sofree(). */
 				SOCK_UNLOCK(sp);
 			}
 		}
 	}
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * soabort() is used to abruptly tear down a connection, such as when a
  * resource limit is reached (listen queue depth exceeded), or if a listen
  * socket is closed while there are sockets waiting to be accepted.
  *
  * This interface is tricky, because it is called on an unreferenced socket,
  * and must be called only by a thread that has actually removed the socket
  * from the listen queue it was on, or races with other threads are risked.
  *
  * This interface will call into the protocol code, so must not be called
  * with any socket locks held.  Protocols do call it while holding their own
  * recursible protocol mutexes, but this is something that should be subject
  * to review in the future.
  */
 void
 soabort(struct socket *so)
 {
 
 	/*
 	 * In as much as is possible, assert that no references to this
 	 * socket are held.  This is not quite the same as asserting that the
 	 * current thread is responsible for arranging for no references, but
 	 * is as close as we can get for now.
 	 */
 	KASSERT(so->so_count == 0, ("soabort: so_count"));
 	KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
 	KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
 	VNET_SO_ASSERT(so);
 
 	if (so->so_proto->pr_usrreqs->pru_abort != NULL)
 		(*so->so_proto->pr_usrreqs->pru_abort)(so);
 	SOCK_LOCK(so);
 	sofree(so);
 }
 
 int
 soaccept(struct socket *so, struct sockaddr **nam)
 {
 	int error;
 
 	SOCK_LOCK(so);
 	KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF"));
 	so->so_state &= ~SS_NOFDREF;
 	SOCK_UNLOCK(so);
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (soconnectat(AT_FDCWD, so, nam, td));
 }
 
 int
 soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.  This allows
 	 * user to disconnect by connecting to, e.g., a null address.
 	 */
 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
 	    (error = sodisconnect(so)))) {
 		error = EISCONN;
 	} else {
 		/*
 		 * Prevent accumulated error from previous connection from
 		 * biting us.
 		 */
 		so->so_error = 0;
 		if (fd == AT_FDCWD) {
 			error = (*so->so_proto->pr_usrreqs->pru_connect)(so,
 			    nam, td);
 		} else {
 			error = (*so->so_proto->pr_usrreqs->pru_connectat)(fd,
 			    so, nam, td);
 		}
 	}
 	CURVNET_RESTORE();
 
 	return (error);
 }
 
 int
 soconnect2(struct socket *so1, struct socket *so2)
 {
 	int error;
 
 	CURVNET_SET(so1->so_vnet);
 	error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 sodisconnect(struct socket *so)
 {
 	int error;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 	if (so->so_state & SS_ISDISCONNECTING)
 		return (EALREADY);
 	VNET_SO_ASSERT(so);
 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
 	return (error);
 }
 
 int
 sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	long space;
 	ssize_t resid;
 	int clen = 0, error, dontroute;
 
 	KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM"));
 	KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
 	    ("sosend_dgram: !PR_ATOMIC"));
 
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 */
 	if (resid < 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0;
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		SOCKBUF_UNLOCK(&so->so_snd);
 		error = EPIPE;
 		goto out;
 	}
 	if (so->so_error) {
 		error = so->so_error;
 		so->so_error = 0;
 		SOCKBUF_UNLOCK(&so->so_snd);
 		goto out;
 	}
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		/*
 		 * `sendto' and `sendmsg' is allowed on a connection-based
 		 * socket if it supports implied connect.  Return ENOTCONN if
 		 * not connected and no address is supplied.
 		 */
 		if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 		    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 			if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 			    !(resid == 0 && clen != 0)) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = ENOTCONN;
 				goto out;
 			}
 		} else if (addr == NULL) {
 			if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 				error = ENOTCONN;
 			else
 				error = EDESTADDRREQ;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto out;
 		}
 	}
 
 	/*
 	 * Do we need MSG_OOB support in SOCK_DGRAM?  Signs here may be a
 	 * problem and need fixing.
 	 */
 	space = sbspace(&so->so_snd);
 	if (flags & MSG_OOB)
 		space += 1024;
 	space -= clen;
 	SOCKBUF_UNLOCK(&so->so_snd);
 	if (resid > space) {
 		error = EMSGSIZE;
 		goto out;
 	}
 	if (uio == NULL) {
 		resid = 0;
 		if (flags & MSG_EOR)
 			top->m_flags |= M_EOR;
 	} else {
 		/*
 		 * Copy the data from userland into a mbuf chain.
 		 * If no data is to be copied in, a single empty mbuf
 		 * is returned.
 		 */
 		top = m_uiotombuf(uio, M_WAITOK, space, max_hdr,
 		    (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0)));
 		if (top == NULL) {
 			error = EFAULT;	/* only possible error */
 			goto out;
 		}
 		space -= resid - uio->uio_resid;
 		resid = uio->uio_resid;
 	}
 	KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
 	/*
 	 * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock
 	 * than with.
 	 */
 	if (dontroute) {
 		SOCK_LOCK(so);
 		so->so_options |= SO_DONTROUTE;
 		SOCK_UNLOCK(so);
 	}
 	/*
 	 * XXX all the SBS_CANTSENDMORE checks previously done could be out
 	 * of date.  We could have received a reset packet in an interrupt or
 	 * maybe we slept while doing page faults in uiomove() etc.  We could
 	 * probably recheck again inside the locking protection here, but
 	 * there are probably other places that this also happens.  We must
 	 * rethink this.
 	 */
 	VNET_SO_ASSERT(so);
 	error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 	    (flags & MSG_OOB) ? PRUS_OOB :
 	/*
 	 * If the user set MSG_EOF, the protocol understands this flag and
 	 * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND.
 	 */
 	    ((flags & MSG_EOF) &&
 	     (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 	     (resid <= 0)) ?
 		PRUS_EOF :
 		/* If there is more to send set PRUS_MORETOCOME */
 		(flags & MSG_MORETOCOME) ||
 		(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 		top, addr, control, td);
 	if (dontroute) {
 		SOCK_LOCK(so);
 		so->so_options &= ~SO_DONTROUTE;
 		SOCK_UNLOCK(so);
 	}
 	clen = 0;
 	control = NULL;
 	top = NULL;
 out:
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
 		m_freem(control);
 	return (error);
 }
 
 /*
  * Send on a socket.  If send must go all at once and message is larger than
  * send buffering, then hard error.  Lock against other senders.  If must go
  * all at once and not enough room now, then inform user that this would
  * block and do nothing.  Otherwise, if nonblocking, send as much as
  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
  * in mbuf chain must be small enough to send all at once.
  *
  * Returns nonzero on error, timeout or signal; callers must check for short
  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
  * on return.
  */
 int
 sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	long space;
 	ssize_t resid;
 	int clen = 0, error, dontroute;
 	int atomic = sosendallatonce(so) || top;
 	int pru_flag;
 #ifdef KERN_TLS
 	struct ktls_session *tls;
 	int tls_enq_cnt, tls_pruflag;
 	uint8_t tls_rtype;
 
 	tls = NULL;
 	tls_rtype = TLS_RLTYPE_APP;
 #endif
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else if ((top->m_flags & M_PKTHDR) != 0)
 		resid = top->m_pkthdr.len;
 	else
 		resid = m_length(top, NULL);
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 *
 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 	 * type sockets since that's an error.
 	 */
 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
 	    (so->so_proto->pr_flags & PR_ATOMIC);
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
 	error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 
 #ifdef KERN_TLS
 	tls_pruflag = 0;
 	tls = ktls_hold(so->so_snd.sb_tls_info);
 	if (tls != NULL) {
 		if (tls->mode == TCP_TLS_MODE_SW)
 			tls_pruflag = PRUS_NOTREADY;
 
 		if (control != NULL) {
 			struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 
 			if (clen >= sizeof(*cm) &&
 			    cm->cmsg_type == TLS_SET_RECORD_TYPE) {
 				tls_rtype = *((uint8_t *)CMSG_DATA(cm));
 				clen = 0;
 				m_freem(control);
 				control = NULL;
 				atomic = 1;
 			}
 		}
 
 		if (resid == 0 && !ktls_permit_empty_frames(tls)) {
 			error = EINVAL;
 			goto release;
 		}
 	}
 #endif
 
 restart:
 	do {
 		SOCKBUF_LOCK(&so->so_snd);
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EPIPE;
 			goto release;
 		}
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			/*
 			 * `sendto' and `sendmsg' is allowed on a connection-
 			 * based socket if it supports implied connect.
 			 * Return ENOTCONN if not connected and no address is
 			 * supplied.
 			 */
 			if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 			    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 				    !(resid == 0 && clen != 0)) {
 					SOCKBUF_UNLOCK(&so->so_snd);
 					error = ENOTCONN;
 					goto release;
 				}
 			} else if (addr == NULL) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 					error = ENOTCONN;
 				else
 					error = EDESTADDRREQ;
 				goto release;
 			}
 		}
 		space = sbspace(&so->so_snd);
 		if (flags & MSG_OOB)
 			space += 1024;
 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
 		    clen > so->so_snd.sb_hiwat) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EMSGSIZE;
 			goto release;
 		}
 		if (space < resid + clen &&
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 			if ((so->so_state & SS_NBIO) ||
 			    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = EWOULDBLOCK;
 				goto release;
 			}
-			error = sbwait(&so->so_snd);
+			error = sbwait(so, SO_SND);
 			SOCKBUF_UNLOCK(&so->so_snd);
 			if (error)
 				goto release;
 			goto restart;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 		space -= clen;
 		do {
 			if (uio == NULL) {
 				resid = 0;
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 #ifdef KERN_TLS
 				if (tls != NULL) {
 					ktls_frame(top, tls, &tls_enq_cnt,
 					    tls_rtype);
 					tls_rtype = TLS_RLTYPE_APP;
 				}
 #endif
 			} else {
 				/*
 				 * Copy the data from userland into a mbuf
 				 * chain.  If resid is 0, which can happen
 				 * only if we have control to send, then
 				 * a single empty mbuf is returned.  This
 				 * is a workaround to prevent protocol send
 				 * methods to panic.
 				 */
 #ifdef KERN_TLS
 				if (tls != NULL) {
 					top = m_uiotombuf(uio, M_WAITOK, space,
 					    tls->params.max_frame_len,
 					    M_EXTPG |
 					    ((flags & MSG_EOR) ? M_EOR : 0));
 					if (top != NULL) {
 						ktls_frame(top, tls,
 						    &tls_enq_cnt, tls_rtype);
 					}
 					tls_rtype = TLS_RLTYPE_APP;
 				} else
 #endif
 					top = m_uiotombuf(uio, M_WAITOK, space,
 					    (atomic ? max_hdr : 0),
 					    (atomic ? M_PKTHDR : 0) |
 					    ((flags & MSG_EOR) ? M_EOR : 0));
 				if (top == NULL) {
 					error = EFAULT; /* only possible error */
 					goto release;
 				}
 				space -= resid - uio->uio_resid;
 				resid = uio->uio_resid;
 			}
 			if (dontroute) {
 				SOCK_LOCK(so);
 				so->so_options |= SO_DONTROUTE;
 				SOCK_UNLOCK(so);
 			}
 			/*
 			 * XXX all the SBS_CANTSENDMORE checks previously
 			 * done could be out of date.  We could have received
 			 * a reset packet in an interrupt or maybe we slept
 			 * while doing page faults in uiomove() etc.  We
 			 * could probably recheck again inside the locking
 			 * protection here, but there are probably other
 			 * places that this also happens.  We must rethink
 			 * this.
 			 */
 			VNET_SO_ASSERT(so);
 
 			pru_flag = (flags & MSG_OOB) ? PRUS_OOB :
 			/*
 			 * If the user set MSG_EOF, the protocol understands
 			 * this flag and nothing left to send then use
 			 * PRU_SEND_EOF instead of PRU_SEND.
 			 */
 			    ((flags & MSG_EOF) &&
 			     (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 			     (resid <= 0)) ?
 				PRUS_EOF :
 			/* If there is more to send set PRUS_MORETOCOME. */
 			    (flags & MSG_MORETOCOME) ||
 			    (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
 
 #ifdef KERN_TLS
 			pru_flag |= tls_pruflag;
 #endif
 
 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 			    pru_flag, top, addr, control, td);
 
 			if (dontroute) {
 				SOCK_LOCK(so);
 				so->so_options &= ~SO_DONTROUTE;
 				SOCK_UNLOCK(so);
 			}
 
 #ifdef KERN_TLS
 			if (tls != NULL && tls->mode == TCP_TLS_MODE_SW) {
 				if (error != 0) {
 					m_freem(top);
 					top = NULL;
 				} else {
 					soref(so);
 					ktls_enqueue(top, so, tls_enq_cnt);
 				}
 			}
 #endif
 			clen = 0;
 			control = NULL;
 			top = NULL;
 			if (error)
 				goto release;
 		} while (resid && space > 0);
 	} while (resid);
 
 release:
 	SOCK_IO_SEND_UNLOCK(so);
 out:
 #ifdef KERN_TLS
 	if (tls != NULL)
 		ktls_free(tls);
 #endif
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
 		m_freem(control);
 	return (error);
 }
 
 int
 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio,
 	    top, control, flags, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * The part of soreceive() that implements reading non-inline out-of-band
  * data from a socket.  For more complete comments, see soreceive(), from
  * which this code originated.
  *
  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
  * unable to return an mbuf chain to the caller.
  */
 static int
 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
 {
 	struct protosw *pr = so->so_proto;
 	struct mbuf *m;
 	int error;
 
 	KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
 	VNET_SO_ASSERT(so);
 
 	m = m_get(M_WAITOK, MT_DATA);
 	error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
 	if (error)
 		goto bad;
 	do {
 		error = uiomove(mtod(m, void *),
 		    (int) min(uio->uio_resid, m->m_len), uio);
 		m = m_free(m);
 	} while (uio->uio_resid && error == 0 && m);
 bad:
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Following replacement or removal of the first mbuf on the first mbuf chain
  * of a socket buffer, push necessary state changes back into the socket
  * buffer so that other consumers see the values consistently.  'nextrecord'
  * is the callers locally stored value of the original value of
  * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
  * NOTE: 'nextrecord' may be NULL.
  */
 static __inline void
 sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	/*
 	 * First, update for the new value of nextrecord.  If necessary, make
 	 * it the first record.
 	 */
 	if (sb->sb_mb != NULL)
 		sb->sb_mb->m_nextpkt = nextrecord;
 	else
 		sb->sb_mb = nextrecord;
 
 	/*
 	 * Now update any dependent socket buffer fields to reflect the new
 	 * state.  This is an expanded inline of SB_EMPTY_FIXUP(), with the
 	 * addition of a second clause that takes care of the case where
 	 * sb_mb has been updated, but remains the last record.
 	 */
 	if (sb->sb_mb == NULL) {
 		sb->sb_mbtail = NULL;
 		sb->sb_lastrecord = NULL;
 	} else if (sb->sb_mb->m_nextpkt == NULL)
 		sb->sb_lastrecord = sb->sb_mb;
 }
 
 /*
  * Implement receive operations on a socket.  We depend on the way that
  * records are added to the sockbuf by sbappend.  In particular, each record
  * (mbufs linked through m_next) must begin with an address if the protocol
  * so specifies, followed by an optional mbuf or mbufs containing ancillary
  * data, and then zero or more mbufs of data.  In order to allow parallelism
  * between network receive and copying to user space, as well as avoid
  * sleeping with a mutex held, we release the socket buffer mutex during the
  * user space copy.  Although the sockbuf is locked, new data may still be
  * appended, and thus we must maintain consistency of the sockbuf during that
  * time.
  *
  * The caller may receive the data as a single mbuf chain by supplying an
  * mbuf **mp0 for use in returning the chain.  The uio is then used only for
  * the count in uio_resid.
  */
 int
 soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct mbuf *m, **mp;
 	int flags, error, offset;
 	ssize_t len;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 	int moff, type = 0;
 	ssize_t orig_resid = uio->uio_resid;
 
 	mp = mp0;
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp != NULL)
 		*mp = NULL;
 	if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING)
 	    && uio->uio_resid) {
 		VNET_SO_ASSERT(so);
 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
 	}
 
 	error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 
 restart:
 	SOCKBUF_LOCK(&so->so_rcv);
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more (subject
 	 * to any timeout) if:
 	 *   1. the current count is less than the low water mark, or
 	 *   2. MSG_DONTWAIT is not set
 	 */
 	if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
 	    sbavail(&so->so_rcv) < uio->uio_resid) &&
 	    sbavail(&so->so_rcv) < so->so_rcv.sb_lowat &&
 	    m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
 		KASSERT(m != NULL || !sbavail(&so->so_rcv),
 		    ("receive: m == %p sbavail == %u",
 		    m, sbavail(&so->so_rcv)));
 		if (so->so_error || so->so_rerror) {
 			if (m != NULL)
 				goto dontblock;
 			if (so->so_error)
 				error = so->so_error;
 			else
 				error = so->so_rerror;
 			if ((flags & MSG_PEEK) == 0) {
 				if (so->so_error)
 					so->so_error = 0;
 				else
 					so->so_rerror = 0;
 			}
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			if (m != NULL)
 				goto dontblock;
 #ifdef KERN_TLS
 			else if (so->so_rcv.sb_tlsdcc == 0 &&
 			    so->so_rcv.sb_tlscc == 0) {
 #else
 			else {
 #endif
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			}
 		}
 		for (; m != NULL; m = m->m_next)
 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
 				m = so->so_rcv.sb_mb;
 				goto dontblock;
 			}
 		if ((so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED |
 		    SS_ISDISCONNECTING | SS_ISDISCONNECTED)) == 0 &&
 		    (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = ENOTCONN;
 			goto release;
 		}
 		if (uio->uio_resid == 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = EWOULDBLOCK;
 			goto release;
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
-		error = sbwait(&so->so_rcv);
+		error = sbwait(so, SO_RCV);
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		if (error)
 			goto release;
 		goto restart;
 	}
 dontblock:
 	/*
 	 * From this point onward, we maintain 'nextrecord' as a cache of the
 	 * pointer to the next record in the socket buffer.  We must keep the
 	 * various socket buffer pointers and local stack versions of the
 	 * pointers in sync, pushing out modifications before dropping the
 	 * socket buffer mutex, and re-reading them when picking it up.
 	 *
 	 * Otherwise, we will race with the network stack appending new data
 	 * or records onto the socket buffer by using inconsistent/stale
 	 * versions of the field, possibly resulting in socket buffer
 	 * corruption.
 	 *
 	 * By holding the high-level sblock(), we prevent simultaneous
 	 * readers from pulling off the front of the socket buffer.
 	 */
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 	KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb"));
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	nextrecord = m->m_nextpkt;
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
 		orig_resid = 0;
 		if (psa != NULL)
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
 			    M_NOWAIT);
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			so->so_rcv.sb_mb = m_free(m);
 			m = so->so_rcv.sb_mb;
 			sockbuf_pushsync(&so->so_rcv, nextrecord);
 		}
 	}
 
 	/*
 	 * Process one or more MT_CONTROL mbufs present before any data mbufs
 	 * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we
 	 * just copy the data; if !MSG_PEEK, we call into the protocol to
 	 * perform externalization (or freeing if controlp == NULL).
 	 */
 	if (m != NULL && m->m_type == MT_CONTROL) {
 		struct mbuf *cm = NULL, *cmn;
 		struct mbuf **cme = &cm;
 #ifdef KERN_TLS
 		struct cmsghdr *cmsg;
 		struct tls_get_record tgr;
 
 		/*
 		 * For MSG_TLSAPPDATA, check for a non-application data
 		 * record.  If found, return ENXIO without removing
 		 * it from the receive queue.  This allows a subsequent
 		 * call without MSG_TLSAPPDATA to receive it.
 		 * Note that, for TLS, there should only be a single
 		 * control mbuf with the TLS_GET_RECORD message in it.
 		 */
 		if (flags & MSG_TLSAPPDATA) {
 			cmsg = mtod(m, struct cmsghdr *);
 			if (cmsg->cmsg_type == TLS_GET_RECORD &&
 			    cmsg->cmsg_len == CMSG_LEN(sizeof(tgr))) {
 				memcpy(&tgr, CMSG_DATA(cmsg), sizeof(tgr));
 				/* This will need to change for TLS 1.3. */
 				if (tgr.tls_type != TLS_RLTYPE_APP) {
 					SOCKBUF_UNLOCK(&so->so_rcv);
 					error = ENXIO;
 					goto release;
 				}
 			}
 		}
 #endif
 
 		do {
 			if (flags & MSG_PEEK) {
 				if (controlp != NULL) {
 					*controlp = m_copym(m, 0, m->m_len,
 					    M_NOWAIT);
 					controlp = &(*controlp)->m_next;
 				}
 				m = m->m_next;
 			} else {
 				sbfree(&so->so_rcv, m);
 				so->so_rcv.sb_mb = m->m_next;
 				m->m_next = NULL;
 				*cme = m;
 				cme = &(*cme)->m_next;
 				m = so->so_rcv.sb_mb;
 			}
 		} while (m != NULL && m->m_type == MT_CONTROL);
 		if ((flags & MSG_PEEK) == 0)
 			sockbuf_pushsync(&so->so_rcv, nextrecord);
 		while (cm != NULL) {
 			cmn = cm->m_next;
 			cm->m_next = NULL;
 			if (pr->pr_domain->dom_externalize != NULL) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				VNET_SO_ASSERT(so);
 				error = (*pr->pr_domain->dom_externalize)
 				    (cm, controlp, flags);
 				SOCKBUF_LOCK(&so->so_rcv);
 			} else if (controlp != NULL)
 				*controlp = cm;
 			else
 				m_freem(cm);
 			if (controlp != NULL) {
 				while (*controlp != NULL)
 					controlp = &(*controlp)->m_next;
 			}
 			cm = cmn;
 		}
 		if (m != NULL)
 			nextrecord = so->so_rcv.sb_mb->m_nextpkt;
 		else
 			nextrecord = so->so_rcv.sb_mb;
 		orig_resid = 0;
 	}
 	if (m != NULL) {
 		if ((flags & MSG_PEEK) == 0) {
 			KASSERT(m->m_nextpkt == nextrecord,
 			    ("soreceive: post-control, nextrecord !sync"));
 			if (nextrecord == NULL) {
 				KASSERT(so->so_rcv.sb_mb == m,
 				    ("soreceive: post-control, sb_mb!=m"));
 				KASSERT(so->so_rcv.sb_lastrecord == m,
 				    ("soreceive: post-control, lastrecord!=m"));
 			}
 		}
 		type = m->m_type;
 		if (type == MT_OOBDATA)
 			flags |= MSG_OOB;
 	} else {
 		if ((flags & MSG_PEEK) == 0) {
 			KASSERT(so->so_rcv.sb_mb == nextrecord,
 			    ("soreceive: sb_mb != nextrecord"));
 			if (so->so_rcv.sb_mb == NULL) {
 				KASSERT(so->so_rcv.sb_lastrecord == NULL,
 				    ("soreceive: sb_lastercord != NULL"));
 			}
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 
 	/*
 	 * Now continue to read any data mbufs off of the head of the socket
 	 * buffer until the read request is satisfied.  Note that 'type' is
 	 * used to store the type of any mbuf reads that have happened so far
 	 * such that soreceive() can stop reading if the type changes, which
 	 * causes soreceive() to return only one of regular data and inline
 	 * out-of-band data in a single socket receive operation.
 	 */
 	moff = 0;
 	offset = 0;
 	while (m != NULL && !(m->m_flags & M_NOTAVAIL) && uio->uio_resid > 0
 	    && error == 0) {
 		/*
 		 * If the type of mbuf has changed since the last mbuf
 		 * examined ('type'), end the receive operation.
 		 */
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) {
 			if (type != m->m_type)
 				break;
 		} else if (type == MT_OOBDATA)
 			break;
 		else
 		    KASSERT(m->m_type == MT_DATA,
 			("m->m_type == %d", m->m_type));
 		so->so_rcv.sb_state &= ~SBS_RCVATMARK;
 		len = uio->uio_resid;
 		if (so->so_oobmark && len > so->so_oobmark - offset)
 			len = so->so_oobmark - offset;
 		if (len > m->m_len - moff)
 			len = m->m_len - moff;
 		/*
 		 * If mp is set, just pass back the mbufs.  Otherwise copy
 		 * them out via the uio, then free.  Sockbuf must be
 		 * consistent here (points to current mbuf, it points to next
 		 * record) when we drop priority; we must note any additions
 		 * to the sockbuf when we block interrupts again.
 		 */
 		if (mp == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			if ((m->m_flags & M_EXTPG) != 0)
 				error = m_unmapped_uiomove(m, moff, uio,
 				    (int)len);
 			else
 				error = uiomove(mtod(m, char *) + moff,
 				    (int)len, uio);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (error) {
 				/*
 				 * The MT_SONAME mbuf has already been removed
 				 * from the record, so it is necessary to
 				 * remove the data mbufs, if any, to preserve
 				 * the invariant in the case of PR_ADDR that
 				 * requires MT_SONAME mbufs at the head of
 				 * each record.
 				 */
 				if (pr->pr_flags & PR_ATOMIC &&
 				    ((flags & MSG_PEEK) == 0))
 					(void)sbdroprecord_locked(&so->so_rcv);
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			}
 		} else
 			uio->uio_resid -= len;
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (len == m->m_len - moff) {
 			if (m->m_flags & M_EOR)
 				flags |= MSG_EOR;
 			if (flags & MSG_PEEK) {
 				m = m->m_next;
 				moff = 0;
 			} else {
 				nextrecord = m->m_nextpkt;
 				sbfree(&so->so_rcv, m);
 				if (mp != NULL) {
 					m->m_nextpkt = NULL;
 					*mp = m;
 					mp = &m->m_next;
 					so->so_rcv.sb_mb = m = m->m_next;
 					*mp = NULL;
 				} else {
 					so->so_rcv.sb_mb = m_free(m);
 					m = so->so_rcv.sb_mb;
 				}
 				sockbuf_pushsync(&so->so_rcv, nextrecord);
 				SBLASTRECORDCHK(&so->so_rcv);
 				SBLASTMBUFCHK(&so->so_rcv);
 			}
 		} else {
 			if (flags & MSG_PEEK)
 				moff += len;
 			else {
 				if (mp != NULL) {
 					if (flags & MSG_DONTWAIT) {
 						*mp = m_copym(m, 0, len,
 						    M_NOWAIT);
 						if (*mp == NULL) {
 							/*
 							 * m_copym() couldn't
 							 * allocate an mbuf.
 							 * Adjust uio_resid back
 							 * (it was adjusted
 							 * down by len bytes,
 							 * which we didn't end
 							 * up "copying" over).
 							 */
 							uio->uio_resid += len;
 							break;
 						}
 					} else {
 						SOCKBUF_UNLOCK(&so->so_rcv);
 						*mp = m_copym(m, 0, len,
 						    M_WAITOK);
 						SOCKBUF_LOCK(&so->so_rcv);
 					}
 				}
 				sbcut_locked(&so->so_rcv, len);
 			}
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (so->so_oobmark) {
 			if ((flags & MSG_PEEK) == 0) {
 				so->so_oobmark -= len;
 				if (so->so_oobmark == 0) {
 					so->so_rcv.sb_state |= SBS_RCVATMARK;
 					break;
 				}
 			} else {
 				offset += len;
 				if (offset == so->so_oobmark)
 					break;
 			}
 		}
 		if (flags & MSG_EOR)
 			break;
 		/*
 		 * If the MSG_WAITALL flag is set (for non-atomic socket), we
 		 * must not quit until "uio->uio_resid == 0" or an error
 		 * termination.  If a signal/timeout occurs, return with a
 		 * short count but without error.  Keep sockbuf locked
 		 * against other readers.
 		 */
 		while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
 		    !sosendallatonce(so) && nextrecord == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 			if (so->so_error || so->so_rerror ||
 			    so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				break;
 			/*
 			 * Notify the protocol that some data has been
 			 * drained before blocking.
 			 */
 			if (pr->pr_flags & PR_WANTRCVD) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				VNET_SO_ASSERT(so);
 				(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 				SOCKBUF_LOCK(&so->so_rcv);
 			}
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			/*
 			 * We could receive some data while was notifying
 			 * the protocol. Skip blocking in this case.
 			 */
 			if (so->so_rcv.sb_mb == NULL) {
-				error = sbwait(&so->so_rcv);
+				error = sbwait(so, SO_RCV);
 				if (error) {
 					SOCKBUF_UNLOCK(&so->so_rcv);
 					goto release;
 				}
 			}
 			m = so->so_rcv.sb_mb;
 			if (m != NULL)
 				nextrecord = m->m_nextpkt;
 		}
 	}
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (m != NULL && pr->pr_flags & PR_ATOMIC) {
 		flags |= MSG_TRUNC;
 		if ((flags & MSG_PEEK) == 0)
 			(void) sbdroprecord_locked(&so->so_rcv);
 	}
 	if ((flags & MSG_PEEK) == 0) {
 		if (m == NULL) {
 			/*
 			 * First part is an inline SB_EMPTY_FIXUP().  Second
 			 * part makes sure sb_lastrecord is up-to-date if
 			 * there is still data in the socket buffer.
 			 */
 			so->so_rcv.sb_mb = nextrecord;
 			if (so->so_rcv.sb_mb == NULL) {
 				so->so_rcv.sb_mbtail = NULL;
 				so->so_rcv.sb_lastrecord = NULL;
 			} else if (nextrecord->m_nextpkt == NULL)
 				so->so_rcv.sb_lastrecord = nextrecord;
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		/*
 		 * If soreceive() is being done from the socket callback,
 		 * then don't need to generate ACK to peer to update window,
 		 * since ACK will be generated on return to TCP.
 		 */
 		if (!(flags & MSG_SOCALLBCK) &&
 		    (pr->pr_flags & PR_WANTRCVD)) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			VNET_SO_ASSERT(so);
 			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 			SOCKBUF_LOCK(&so->so_rcv);
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (orig_resid == uio->uio_resid && orig_resid &&
 	    (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		goto restart;
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	if (flagsp != NULL)
 		*flagsp |= flags;
 release:
 	SOCK_IO_RECV_UNLOCK(so);
 	return (error);
 }
 
 /*
  * Optimized version of soreceive() for stream (TCP) sockets.
  */
 int
 soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	int len = 0, error = 0, flags, oresid;
 	struct sockbuf *sb;
 	struct mbuf *m, *n = NULL;
 
 	/* We only do stream sockets. */
 	if (so->so_type != SOCK_STREAM)
 		return (EINVAL);
 	if (psa != NULL)
 		*psa = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp0 != NULL)
 		*mp0 = NULL;
 
 	sb = &so->so_rcv;
 
 #ifdef KERN_TLS
 	/*
 	 * KTLS store TLS records as records with a control message to
 	 * describe the framing.
 	 *
 	 * We check once here before acquiring locks to optimize the
 	 * common case.
 	 */
 	if (sb->sb_tls_info != NULL)
 		return (soreceive_generic(so, psa, uio, mp0, controlp,
 		    flagsp));
 #endif
 
 	/* Prevent other readers from entering the socket. */
 	error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 	SOCKBUF_LOCK(sb);
 
 #ifdef KERN_TLS
 	if (sb->sb_tls_info != NULL) {
 		SOCKBUF_UNLOCK(sb);
 		SOCK_IO_RECV_UNLOCK(so);
 		return (soreceive_generic(so, psa, uio, mp0, controlp,
 		    flagsp));
 	}
 #endif
 
 	/* Easy one, no space to copyout anything. */
 	if (uio->uio_resid == 0) {
 		error = EINVAL;
 		goto out;
 	}
 	oresid = uio->uio_resid;
 
 	/* We will never ever get anything unless we are or were connected. */
 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
 		error = ENOTCONN;
 		goto out;
 	}
 
 restart:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	/* Abort if socket has reported problems. */
 	if (so->so_error) {
 		if (sbavail(sb) > 0)
 			goto deliver;
 		if (oresid > uio->uio_resid)
 			goto out;
 		error = so->so_error;
 		if (!(flags & MSG_PEEK))
 			so->so_error = 0;
 		goto out;
 	}
 
 	/* Door is closed.  Deliver what is left, if any. */
 	if (sb->sb_state & SBS_CANTRCVMORE) {
 		if (sbavail(sb) > 0)
 			goto deliver;
 		else
 			goto out;
 	}
 
 	/* Socket buffer is empty and we shall not block. */
 	if (sbavail(sb) == 0 &&
 	    ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
 		error = EAGAIN;
 		goto out;
 	}
 
 	/* Socket buffer got some data that we shall deliver now. */
 	if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) &&
 	    ((so->so_state & SS_NBIO) ||
 	     (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
 	     sbavail(sb) >= sb->sb_lowat ||
 	     sbavail(sb) >= uio->uio_resid ||
 	     sbavail(sb) >= sb->sb_hiwat) ) {
 		goto deliver;
 	}
 
 	/* On MSG_WAITALL we must wait until all data or error arrives. */
 	if ((flags & MSG_WAITALL) &&
 	    (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat))
 		goto deliver;
 
 	/*
 	 * Wait and block until (more) data comes in.
 	 * NB: Drops the sockbuf lock during wait.
 	 */
-	error = sbwait(sb);
+	error = sbwait(so, SO_RCV);
 	if (error)
 		goto out;
 	goto restart;
 
 deliver:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__));
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
 
 	/* Statistics. */
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 
 	/* Fill uio until full or current end of socket buffer is reached. */
 	len = min(uio->uio_resid, sbavail(sb));
 	if (mp0 != NULL) {
 		/* Dequeue as many mbufs as possible. */
 		if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
 			if (*mp0 == NULL)
 				*mp0 = sb->sb_mb;
 			else
 				m_cat(*mp0, sb->sb_mb);
 			for (m = sb->sb_mb;
 			     m != NULL && m->m_len <= len;
 			     m = m->m_next) {
 				KASSERT(!(m->m_flags & M_NOTAVAIL),
 				    ("%s: m %p not available", __func__, m));
 				len -= m->m_len;
 				uio->uio_resid -= m->m_len;
 				sbfree(sb, m);
 				n = m;
 			}
 			n->m_next = NULL;
 			sb->sb_mb = m;
 			sb->sb_lastrecord = sb->sb_mb;
 			if (sb->sb_mb == NULL)
 				SB_EMPTY_FIXUP(sb);
 		}
 		/* Copy the remainder. */
 		if (len > 0) {
 			KASSERT(sb->sb_mb != NULL,
 			    ("%s: len > 0 && sb->sb_mb empty", __func__));
 
 			m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
 			if (m == NULL)
 				len = 0;	/* Don't flush data from sockbuf. */
 			else
 				uio->uio_resid -= len;
 			if (*mp0 != NULL)
 				m_cat(*mp0, m);
 			else
 				*mp0 = m;
 			if (*mp0 == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 		}
 	} else {
 		/* NB: Must unlock socket buffer as uiomove may sleep. */
 		SOCKBUF_UNLOCK(sb);
 		error = m_mbuftouio(uio, sb->sb_mb, len);
 		SOCKBUF_LOCK(sb);
 		if (error)
 			goto out;
 	}
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 
 	/*
 	 * Remove the delivered data from the socket buffer unless we
 	 * were only peeking.
 	 */
 	if (!(flags & MSG_PEEK)) {
 		if (len > 0)
 			sbdrop_locked(sb, len);
 
 		/* Notify protocol that we drained some data. */
 		if ((so->so_proto->pr_flags & PR_WANTRCVD) &&
 		    (((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
 		     !(flags & MSG_SOCALLBCK))) {
 			SOCKBUF_UNLOCK(sb);
 			VNET_SO_ASSERT(so);
 			(*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
 			SOCKBUF_LOCK(sb);
 		}
 	}
 
 	/*
 	 * For MSG_WAITALL we may have to loop again and wait for
 	 * more data to come in.
 	 */
 	if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
 		goto restart;
 out:
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 	SOCKBUF_UNLOCK(sb);
 	SOCK_IO_RECV_UNLOCK(so);
 	return (error);
 }
 
 /*
  * Optimized version of soreceive() for simple datagram cases from userspace.
  * Unlike in the stream case, we're able to drop a datagram if copyout()
  * fails, and because we handle datagrams atomically, we don't need to use a
  * sleep lock to prevent I/O interlacing.
  */
 int
 soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct mbuf *m, *m2;
 	int flags, error;
 	ssize_t len;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 
 	/*
 	 * For any complicated cases, fall back to the full
 	 * soreceive_generic().
 	 */
 	if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB))
 		return (soreceive_generic(so, psa, uio, mp0, controlp,
 		    flagsp));
 
 	/*
 	 * Enforce restrictions on use.
 	 */
 	KASSERT((pr->pr_flags & PR_WANTRCVD) == 0,
 	    ("soreceive_dgram: wantrcvd"));
 	KASSERT(pr->pr_flags & PR_ATOMIC, ("soreceive_dgram: !atomic"));
 	KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0,
 	    ("soreceive_dgram: SBS_RCVATMARK"));
 	KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0,
 	    ("soreceive_dgram: P_CONNREQUIRED"));
 
 	/*
 	 * Loop blocking while waiting for a datagram.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	while ((m = so->so_rcv.sb_mb) == NULL) {
 		KASSERT(sbavail(&so->so_rcv) == 0,
 		    ("soreceive_dgram: sb_mb NULL but sbavail %u",
 		    sbavail(&so->so_rcv)));
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (error);
 		}
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE ||
 		    uio->uio_resid == 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (0);
 		}
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (EWOULDBLOCK);
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
-		error = sbwait(&so->so_rcv);
+		error = sbwait(so, SO_RCV);
 		if (error) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (error);
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	nextrecord = m->m_nextpkt;
 	if (nextrecord == NULL) {
 		KASSERT(so->so_rcv.sb_lastrecord == m,
 		    ("soreceive_dgram: lastrecord != m"));
 	}
 
 	KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord,
 	    ("soreceive_dgram: m_nextpkt != nextrecord"));
 
 	/*
 	 * Pull 'm' and its chain off the front of the packet queue.
 	 */
 	so->so_rcv.sb_mb = NULL;
 	sockbuf_pushsync(&so->so_rcv, nextrecord);
 
 	/*
 	 * Walk 'm's chain and free that many bytes from the socket buffer.
 	 */
 	for (m2 = m; m2 != NULL; m2 = m2->m_next)
 		sbfree(&so->so_rcv, m2);
 
 	/*
 	 * Do a few last checks before we let go of the lock.
 	 */
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
 		if (psa != NULL)
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
 			    M_NOWAIT);
 		m = m_free(m);
 	}
 	if (m == NULL) {
 		/* XXXRW: Can this happen? */
 		return (0);
 	}
 
 	/*
 	 * Packet to copyout() is now in 'm' and it is disconnected from the
 	 * queue.
 	 *
 	 * Process one or more MT_CONTROL mbufs present before any data mbufs
 	 * in the first mbuf chain on the socket buffer.  We call into the
 	 * protocol to perform externalization (or freeing if controlp ==
 	 * NULL). In some cases there can be only MT_CONTROL mbufs without
 	 * MT_DATA mbufs.
 	 */
 	if (m->m_type == MT_CONTROL) {
 		struct mbuf *cm = NULL, *cmn;
 		struct mbuf **cme = &cm;
 
 		do {
 			m2 = m->m_next;
 			m->m_next = NULL;
 			*cme = m;
 			cme = &(*cme)->m_next;
 			m = m2;
 		} while (m != NULL && m->m_type == MT_CONTROL);
 		while (cm != NULL) {
 			cmn = cm->m_next;
 			cm->m_next = NULL;
 			if (pr->pr_domain->dom_externalize != NULL) {
 				error = (*pr->pr_domain->dom_externalize)
 				    (cm, controlp, flags);
 			} else if (controlp != NULL)
 				*controlp = cm;
 			else
 				m_freem(cm);
 			if (controlp != NULL) {
 				while (*controlp != NULL)
 					controlp = &(*controlp)->m_next;
 			}
 			cm = cmn;
 		}
 	}
 	KASSERT(m == NULL || m->m_type == MT_DATA,
 	    ("soreceive_dgram: !data"));
 	while (m != NULL && uio->uio_resid > 0) {
 		len = uio->uio_resid;
 		if (len > m->m_len)
 			len = m->m_len;
 		error = uiomove(mtod(m, char *), (int)len, uio);
 		if (error) {
 			m_freem(m);
 			return (error);
 		}
 		if (len == m->m_len)
 			m = m_free(m);
 		else {
 			m->m_data += len;
 			m->m_len -= len;
 		}
 	}
 	if (m != NULL) {
 		flags |= MSG_TRUNC;
 		m_freem(m);
 	}
 	if (flagsp != NULL)
 		*flagsp |= flags;
 	return (0);
 }
 
 int
 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio,
 	    mp0, controlp, flagsp));
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soshutdown(struct socket *so, int how)
 {
 	struct protosw *pr;
 	int error, soerror_enotconn;
 
 	if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
 		return (EINVAL);
 
 	soerror_enotconn = 0;
 	SOCK_LOCK(so);
 	if ((so->so_state &
 	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
 		/*
 		 * POSIX mandates us to return ENOTCONN when shutdown(2) is
 		 * invoked on a datagram sockets, however historically we would
 		 * actually tear socket down. This is known to be leveraged by
 		 * some applications to unblock process waiting in recvXXX(2)
 		 * by other process that it shares that socket with. Try to meet
 		 * both backward-compatibility and POSIX requirements by forcing
 		 * ENOTCONN but still asking protocol to perform pru_shutdown().
 		 */
 		if (so->so_type != SOCK_DGRAM && !SOLISTENING(so)) {
 			SOCK_UNLOCK(so);
 			return (ENOTCONN);
 		}
 		soerror_enotconn = 1;
 	}
 
 	if (SOLISTENING(so)) {
 		if (how != SHUT_WR) {
 			so->so_error = ECONNABORTED;
 			solisten_wakeup(so);	/* unlocks so */
 		} else {
 			SOCK_UNLOCK(so);
 		}
 		goto done;
 	}
 	SOCK_UNLOCK(so);
 
 	CURVNET_SET(so->so_vnet);
 	pr = so->so_proto;
 	if (pr->pr_usrreqs->pru_flush != NULL)
 		(*pr->pr_usrreqs->pru_flush)(so, how);
 	if (how != SHUT_WR)
 		sorflush(so);
 	if (how != SHUT_RD) {
 		error = (*pr->pr_usrreqs->pru_shutdown)(so);
 		wakeup(&so->so_timeo);
 		CURVNET_RESTORE();
 		return ((error == 0 && soerror_enotconn) ? ENOTCONN : error);
 	}
 	wakeup(&so->so_timeo);
 	CURVNET_RESTORE();
 
 done:
 	return (soerror_enotconn ? ENOTCONN : 0);
 }
 
 void
 sorflush(struct socket *so)
 {
 	struct protosw *pr;
 	int error;
 
 	VNET_SO_ASSERT(so);
 
 	/*
 	 * Dislodge threads currently blocked in receive and wait to acquire
 	 * a lock against other simultaneous readers before clearing the
 	 * socket buffer.  Don't let our acquire be interrupted by a signal
 	 * despite any existing socket disposition on interruptable waiting.
 	 */
 	socantrcvmore(so);
 
 	error = SOCK_IO_RECV_LOCK(so, SBL_WAIT | SBL_NOINTR);
 	if (error != 0) {
 		KASSERT(SOLISTENING(so),
 		    ("%s: soiolock(%p) failed", __func__, so));
 		return;
 	}
 
 	pr = so->so_proto;
 	if (pr->pr_flags & PR_RIGHTS) {
 		MPASS(pr->pr_domain->dom_dispose != NULL);
 		(*pr->pr_domain->dom_dispose)(so);
 	} else {
-		sbrelease(&so->so_rcv, so);
+		sbrelease(so, SO_RCV);
 		SOCK_IO_RECV_UNLOCK(so);
 	}
 
 }
 
 /*
  * Wrapper for Socket established helper hook.
  * Parameters: socket, context of the hook point, hook id.
  */
 static int inline
 hhook_run_socket(struct socket *so, void *hctx, int32_t h_id)
 {
 	struct socket_hhook_data hhook_data = {
 		.so = so,
 		.hctx = hctx,
 		.m = NULL,
 		.status = 0
 	};
 
 	CURVNET_SET(so->so_vnet);
 	HHOOKS_RUN_IF(V_socket_hhh[h_id], &hhook_data, &so->osd);
 	CURVNET_RESTORE();
 
 	/* Ugly but needed, since hhooks return void for now */
 	return (hhook_data.status);
 }
 
 /*
  * Perhaps this routine, and sooptcopyout(), below, ought to come in an
  * additional variant to handle the case where the option value needs to be
  * some kind of integer, but not a specific size.  In addition to their use
  * here, these functions are also called by the protocol-level pr_ctloutput()
  * routines.
  */
 int
 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
 {
 	size_t	valsize;
 
 	/*
 	 * If the user gives us more than we wanted, we ignore it, but if we
 	 * don't get the minimum length the caller wants, we return EINVAL.
 	 * On success, sopt->sopt_valsize is set to however much we actually
 	 * retrieved.
 	 */
 	if ((valsize = sopt->sopt_valsize) < minlen)
 		return EINVAL;
 	if (valsize > len)
 		sopt->sopt_valsize = valsize = len;
 
 	if (sopt->sopt_td != NULL)
 		return (copyin(sopt->sopt_val, buf, valsize));
 
 	bcopy(sopt->sopt_val, buf, valsize);
 	return (0);
 }
 
 /*
  * Kernel version of setsockopt(2).
  *
  * XXX: optlen is size_t, not socklen_t
  */
 int
 so_setsockopt(struct socket *so, int level, int optname, void *optval,
     size_t optlen)
 {
 	struct sockopt sopt;
 
 	sopt.sopt_level = level;
 	sopt.sopt_name = optname;
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_val = optval;
 	sopt.sopt_valsize = optlen;
 	sopt.sopt_td = NULL;
 	return (sosetopt(so, &sopt));
 }
 
 int
 sosetopt(struct socket *so, struct sockopt *sopt)
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 	sbintime_t val;
 	uint32_t val32;
 #ifdef MAC
 	struct mac extmac;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto->pr_ctloutput != NULL)
 			error = (*so->so_proto->pr_ctloutput)(so, sopt);
 		else
 			error = ENOPROTOOPT;
 	} else {
 		switch (sopt->sopt_name) {
 		case SO_ACCEPTFILTER:
 			error = accept_filt_setopt(so, sopt);
 			if (error)
 				goto bad;
 			break;
 
 		case SO_LINGER:
 			error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
 			if (error)
 				goto bad;
 			if (l.l_linger < 0 ||
 			    l.l_linger > USHRT_MAX ||
 			    l.l_linger > (INT_MAX / hz)) {
 				error = EDOM;
 				goto bad;
 			}
 			SOCK_LOCK(so);
 			so->so_linger = l.l_linger;
 			if (l.l_onoff)
 				so->so_options |= SO_LINGER;
 			else
 				so->so_options &= ~SO_LINGER;
 			SOCK_UNLOCK(so);
 			break;
 
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_DONTROUTE:
 		case SO_USELOOPBACK:
 		case SO_BROADCAST:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_REUSEPORT_LB:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
 		case SO_NO_DDP:
 		case SO_NO_OFFLOAD:
 		case SO_RERROR:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 			SOCK_LOCK(so);
 			if (optval)
 				so->so_options |= sopt->sopt_name;
 			else
 				so->so_options &= ~sopt->sopt_name;
 			SOCK_UNLOCK(so);
 			break;
 
 		case SO_SETFIB:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 
 			if (optval < 0 || optval >= rt_numfibs) {
 				error = EINVAL;
 				goto bad;
 			}
 			if (((so->so_proto->pr_domain->dom_family == PF_INET) ||
 			   (so->so_proto->pr_domain->dom_family == PF_INET6) ||
 			   (so->so_proto->pr_domain->dom_family == PF_ROUTE)))
 				so->so_fibnum = optval;
 			else
 				so->so_fibnum = 0;
 			break;
 
 		case SO_USER_COOKIE:
 			error = sooptcopyin(sopt, &val32, sizeof val32,
 			    sizeof val32);
 			if (error)
 				goto bad;
 			so->so_user_cookie = val32;
 			break;
 
 		case SO_SNDBUF:
 		case SO_RCVBUF:
 		case SO_SNDLOWAT:
 		case SO_RCVLOWAT:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 
 			/*
 			 * Values < 1 make no sense for any of these options,
 			 * so disallow them.
 			 */
 			if (optval < 1) {
 				error = EINVAL;
 				goto bad;
 			}
 
 			error = sbsetopt(so, sopt->sopt_name, optval);
 			break;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 #ifdef COMPAT_FREEBSD32
 			if (SV_CURPROC_FLAG(SV_ILP32)) {
 				struct timeval32 tv32;
 
 				error = sooptcopyin(sopt, &tv32, sizeof tv32,
 				    sizeof tv32);
 				CP(tv32, tv, tv_sec);
 				CP(tv32, tv, tv_usec);
 			} else
 #endif
 				error = sooptcopyin(sopt, &tv, sizeof tv,
 				    sizeof tv);
 			if (error)
 				goto bad;
 			if (tv.tv_sec < 0 || tv.tv_usec < 0 ||
 			    tv.tv_usec >= 1000000) {
 				error = EDOM;
 				goto bad;
 			}
 			if (tv.tv_sec > INT32_MAX)
 				val = SBT_MAX;
 			else
 				val = tvtosbt(tv);
 			switch (sopt->sopt_name) {
 			case SO_SNDTIMEO:
 				so->so_snd.sb_timeo = val;
 				break;
 			case SO_RCVTIMEO:
 				so->so_rcv.sb_timeo = val;
 				break;
 			}
 			break;
 
 		case SO_LABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof extmac,
 			    sizeof extmac);
 			if (error)
 				goto bad;
 			error = mac_setsockopt_label(sopt->sopt_td->td_ucred,
 			    so, &extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_TS_CLOCK:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 			if (optval < 0 || optval > SO_TS_CLOCK_MAX) {
 				error = EINVAL;
 				goto bad;
 			}
 			so->so_ts_clock = optval;
 			break;
 
 		case SO_MAX_PACING_RATE:
 			error = sooptcopyin(sopt, &val32, sizeof(val32),
 			    sizeof(val32));
 			if (error)
 				goto bad;
 			so->so_max_pacing_rate = val32;
 			break;
 
 		default:
 			if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
 				error = hhook_run_socket(so, sopt,
 				    HHOOK_SOCKET_OPT);
 			else
 				error = ENOPROTOOPT;
 			break;
 		}
 		if (error == 0 && so->so_proto->pr_ctloutput != NULL)
 			(void)(*so->so_proto->pr_ctloutput)(so, sopt);
 	}
 bad:
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * Helper routine for getsockopt.
  */
 int
 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len)
 {
 	int	error;
 	size_t	valsize;
 
 	error = 0;
 
 	/*
 	 * Documented get behavior is that we always return a value, possibly
 	 * truncated to fit in the user's buffer.  Traditional behavior is
 	 * that we always tell the user precisely how much we copied, rather
 	 * than something useful like the total amount we had available for
 	 * her.  Note that this interface is not idempotent; the entire
 	 * answer must be generated ahead of time.
 	 */
 	valsize = min(len, sopt->sopt_valsize);
 	sopt->sopt_valsize = valsize;
 	if (sopt->sopt_val != NULL) {
 		if (sopt->sopt_td != NULL)
 			error = copyout(buf, sopt->sopt_val, valsize);
 		else
 			bcopy(buf, sopt->sopt_val, valsize);
 	}
 	return (error);
 }
 
 int
 sogetopt(struct socket *so, struct sockopt *sopt)
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 #ifdef MAC
 	struct mac extmac;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto->pr_ctloutput != NULL)
 			error = (*so->so_proto->pr_ctloutput)(so, sopt);
 		else
 			error = ENOPROTOOPT;
 		CURVNET_RESTORE();
 		return (error);
 	} else {
 		switch (sopt->sopt_name) {
 		case SO_ACCEPTFILTER:
 			error = accept_filt_getopt(so, sopt);
 			break;
 
 		case SO_LINGER:
 			SOCK_LOCK(so);
 			l.l_onoff = so->so_options & SO_LINGER;
 			l.l_linger = so->so_linger;
 			SOCK_UNLOCK(so);
 			error = sooptcopyout(sopt, &l, sizeof l);
 			break;
 
 		case SO_USELOOPBACK:
 		case SO_DONTROUTE:
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_REUSEPORT_LB:
 		case SO_BROADCAST:
 		case SO_OOBINLINE:
 		case SO_ACCEPTCONN:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
 		case SO_NO_DDP:
 		case SO_NO_OFFLOAD:
 		case SO_RERROR:
 			optval = so->so_options & sopt->sopt_name;
 integer:
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case SO_DOMAIN:
 			optval = so->so_proto->pr_domain->dom_family;
 			goto integer;
 
 		case SO_TYPE:
 			optval = so->so_type;
 			goto integer;
 
 		case SO_PROTOCOL:
 			optval = so->so_proto->pr_protocol;
 			goto integer;
 
 		case SO_ERROR:
 			SOCK_LOCK(so);
 			if (so->so_error) {
 				optval = so->so_error;
 				so->so_error = 0;
 			} else {
 				optval = so->so_rerror;
 				so->so_rerror = 0;
 			}
 			SOCK_UNLOCK(so);
 			goto integer;
 
 		case SO_SNDBUF:
 			optval = SOLISTENING(so) ? so->sol_sbsnd_hiwat :
 			    so->so_snd.sb_hiwat;
 			goto integer;
 
 		case SO_RCVBUF:
 			optval = SOLISTENING(so) ? so->sol_sbrcv_hiwat :
 			    so->so_rcv.sb_hiwat;
 			goto integer;
 
 		case SO_SNDLOWAT:
 			optval = SOLISTENING(so) ? so->sol_sbsnd_lowat :
 			    so->so_snd.sb_lowat;
 			goto integer;
 
 		case SO_RCVLOWAT:
 			optval = SOLISTENING(so) ? so->sol_sbrcv_lowat :
 			    so->so_rcv.sb_lowat;
 			goto integer;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 			tv = sbttotv(sopt->sopt_name == SO_SNDTIMEO ?
 			    so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 #ifdef COMPAT_FREEBSD32
 			if (SV_CURPROC_FLAG(SV_ILP32)) {
 				struct timeval32 tv32;
 
 				CP(tv, tv32, tv_sec);
 				CP(tv, tv32, tv_usec);
 				error = sooptcopyout(sopt, &tv32, sizeof tv32);
 			} else
 #endif
 				error = sooptcopyout(sopt, &tv, sizeof tv);
 			break;
 
 		case SO_LABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 			    sizeof(extmac));
 			if (error)
 				goto bad;
 			error = mac_getsockopt_label(sopt->sopt_td->td_ucred,
 			    so, &extmac);
 			if (error)
 				goto bad;
 			error = sooptcopyout(sopt, &extmac, sizeof extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_PEERLABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 			    sizeof(extmac));
 			if (error)
 				goto bad;
 			error = mac_getsockopt_peerlabel(
 			    sopt->sopt_td->td_ucred, so, &extmac);
 			if (error)
 				goto bad;
 			error = sooptcopyout(sopt, &extmac, sizeof extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_LISTENQLIMIT:
 			optval = SOLISTENING(so) ? so->sol_qlimit : 0;
 			goto integer;
 
 		case SO_LISTENQLEN:
 			optval = SOLISTENING(so) ? so->sol_qlen : 0;
 			goto integer;
 
 		case SO_LISTENINCQLEN:
 			optval = SOLISTENING(so) ? so->sol_incqlen : 0;
 			goto integer;
 
 		case SO_TS_CLOCK:
 			optval = so->so_ts_clock;
 			goto integer;
 
 		case SO_MAX_PACING_RATE:
 			optval = so->so_max_pacing_rate;
 			goto integer;
 
 		default:
 			if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
 				error = hhook_run_socket(so, sopt,
 				    HHOOK_SOCKET_OPT);
 			else
 				error = ENOPROTOOPT;
 			break;
 		}
 	}
 #ifdef MAC
 bad:
 #endif
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 {
 	struct mbuf *m, *m_prev;
 	int sopt_size = sopt->sopt_valsize;
 
 	MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return ENOBUFS;
 	if (sopt_size > MLEN) {
 		MCLGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			return ENOBUFS;
 		}
 		m->m_len = min(MCLBYTES, sopt_size);
 	} else {
 		m->m_len = min(MLEN, sopt_size);
 	}
 	sopt_size -= m->m_len;
 	*mp = m;
 	m_prev = m;
 
 	while (sopt_size) {
 		MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			m_freem(*mp);
 			return ENOBUFS;
 		}
 		if (sopt_size > MLEN) {
 			MCLGET(m, sopt->sopt_td != NULL ? M_WAITOK :
 			    M_NOWAIT);
 			if ((m->m_flags & M_EXT) == 0) {
 				m_freem(m);
 				m_freem(*mp);
 				return ENOBUFS;
 			}
 			m->m_len = min(MCLBYTES, sopt_size);
 		} else {
 			m->m_len = min(MLEN, sopt_size);
 		}
 		sopt_size -= m->m_len;
 		m_prev->m_next = m;
 		m_prev = m;
 	}
 	return (0);
 }
 
 int
 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 
 	if (sopt->sopt_val == NULL)
 		return (0);
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_td != NULL) {
 			int error;
 
 			error = copyin(sopt->sopt_val, mtod(m, char *),
 			    m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
 		sopt->sopt_valsize -= m->m_len;
 		sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 		m = m->m_next;
 	}
 	if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
 		panic("ip6_sooptmcopyin");
 	return (0);
 }
 
 int
 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 	size_t valsize = 0;
 
 	if (sopt->sopt_val == NULL)
 		return (0);
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_td != NULL) {
 			int error;
 
 			error = copyout(mtod(m, char *), sopt->sopt_val,
 			    m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
 		sopt->sopt_valsize -= m->m_len;
 		sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 		valsize += m->m_len;
 		m = m->m_next;
 	}
 	if (m != NULL) {
 		/* enough soopt buffer should be given from user-land */
 		m_freem(m0);
 		return(EINVAL);
 	}
 	sopt->sopt_valsize = valsize;
 	return (0);
 }
 
 /*
  * sohasoutofband(): protocol notifies socket layer of the arrival of new
  * out-of-band data, which will then notify socket consumers.
  */
 void
 sohasoutofband(struct socket *so)
 {
 
 	if (so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGURG, 0);
 	selwakeuppri(&so->so_rdsel, PSOCK);
 }
 
 int
 sopoll(struct socket *so, int events, struct ucred *active_cred,
     struct thread *td)
 {
 
 	/*
 	 * We do not need to set or assert curvnet as long as everyone uses
 	 * sopoll_generic().
 	 */
 	return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred,
 	    td));
 }
 
 int
 sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	int revents;
 
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		if (!(events & (POLLIN | POLLRDNORM)))
 			revents = 0;
 		else if (!TAILQ_EMPTY(&so->sol_comp))
 			revents = events & (POLLIN | POLLRDNORM);
 		else if ((events & POLLINIGNEOF) == 0 && so->so_error)
 			revents = (events & (POLLIN | POLLRDNORM)) | POLLHUP;
 		else {
 			selrecord(td, &so->so_rdsel);
 			revents = 0;
 		}
 	} else {
 		revents = 0;
-		SOCKBUF_LOCK(&so->so_snd);
-		SOCKBUF_LOCK(&so->so_rcv);
+		SOCK_SENDBUF_LOCK(so);
+		SOCK_RECVBUF_LOCK(so);
 		if (events & (POLLIN | POLLRDNORM))
 			if (soreadabledata(so))
 				revents |= events & (POLLIN | POLLRDNORM);
 		if (events & (POLLOUT | POLLWRNORM))
 			if (sowriteable(so))
 				revents |= events & (POLLOUT | POLLWRNORM);
 		if (events & (POLLPRI | POLLRDBAND))
 			if (so->so_oobmark ||
 			    (so->so_rcv.sb_state & SBS_RCVATMARK))
 				revents |= events & (POLLPRI | POLLRDBAND);
 		if ((events & POLLINIGNEOF) == 0) {
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 				revents |= events & (POLLIN | POLLRDNORM);
 				if (so->so_snd.sb_state & SBS_CANTSENDMORE)
 					revents |= POLLHUP;
 			}
 		}
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 			revents |= events & POLLRDHUP;
 		if (revents == 0) {
 			if (events &
 			    (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND | POLLRDHUP)) {
 				selrecord(td, &so->so_rdsel);
 				so->so_rcv.sb_flags |= SB_SEL;
 			}
 			if (events & (POLLOUT | POLLWRNORM)) {
 				selrecord(td, &so->so_wrsel);
 				so->so_snd.sb_flags |= SB_SEL;
 			}
 		}
-		SOCKBUF_UNLOCK(&so->so_rcv);
-		SOCKBUF_UNLOCK(&so->so_snd);
+		SOCK_RECVBUF_UNLOCK(so);
+		SOCK_SENDBUF_UNLOCK(so);
 	}
 	SOCK_UNLOCK(so);
 	return (revents);
 }
 
 int
 soo_kqfilter(struct file *fp, struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 	struct sockbuf *sb;
 	struct knlist *knl;
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		kn->kn_fop = &soread_filtops;
 		knl = &so->so_rdsel.si_note;
 		sb = &so->so_rcv;
 		break;
 	case EVFILT_WRITE:
 		kn->kn_fop = &sowrite_filtops;
 		knl = &so->so_wrsel.si_note;
 		sb = &so->so_snd;
 		break;
 	case EVFILT_EMPTY:
 		kn->kn_fop = &soempty_filtops;
 		knl = &so->so_wrsel.si_note;
 		sb = &so->so_snd;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		knlist_add(knl, kn, 1);
 	} else {
 		SOCKBUF_LOCK(sb);
 		knlist_add(knl, kn, 1);
 		sb->sb_flags |= SB_KNOTE;
 		SOCKBUF_UNLOCK(sb);
 	}
 	SOCK_UNLOCK(so);
 	return (0);
 }
 
 /*
  * Some routines that return EOPNOTSUPP for entry points that are not
  * supported by a protocol.  Fill in as needed.
  */
 int
 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_aio_queue_notsupp(struct socket *so, struct kaiocb *job)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_attach_notsupp(struct socket *so, int proto, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
     struct ifnet *ifp, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_disconnect_notsupp(struct socket *so)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_listen_notsupp(struct socket *so, int backlog, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvd_notsupp(struct socket *so, int flags)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *addr, struct mbuf *control, struct thread *td)
 {
 
 	if (control != NULL)
 		m_freem(control);
 	if ((flags & PRUS_NOTREADY) == 0)
 		m_freem(m);
 	return (EOPNOTSUPP);
 }
 
 int
 pru_ready_notsupp(struct socket *so, struct mbuf *m, int count)
 {
 
 	return (EOPNOTSUPP);
 }
 
 /*
  * This isn't really a ``null'' operation, but it's the default one and
  * doesn't do anything destructive.
  */
 int
 pru_sense_null(struct socket *so, struct stat *sb)
 {
 
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	return 0;
 }
 
 int
 pru_shutdown_notsupp(struct socket *so)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
     struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 static void
 filt_sordetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
 	so_rdknl_lock(so);
 	knlist_remove(&so->so_rdsel.si_note, kn, 1);
 	if (!SOLISTENING(so) && knlist_empty(&so->so_rdsel.si_note))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
 	so_rdknl_unlock(so);
 }
 
 /*ARGSUSED*/
 static int
 filt_soread(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 
 	if (SOLISTENING(so)) {
 		SOCK_LOCK_ASSERT(so);
 		kn->kn_data = so->sol_qlen;
 		if (so->so_error) {
 			kn->kn_flags |= EV_EOF;
 			kn->kn_fflags = so->so_error;
 			return (1);
 		}
 		return (!TAILQ_EMPTY(&so->sol_comp));
 	}
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
 	} else if (so->so_error || so->so_rerror)
 		return (1);
 
 	if (kn->kn_sfflags & NOTE_LOWAT) {
 		if (kn->kn_data >= kn->kn_sdata)
 			return (1);
 	} else if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat)
 		return (1);
 
 	/* This hook returning non-zero indicates an event, not error */
 	return (hhook_run_socket(so, NULL, HHOOK_FILT_SOREAD));
 }
 
 static void
 filt_sowdetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
 	so_wrknl_lock(so);
 	knlist_remove(&so->so_wrsel.si_note, kn, 1);
 	if (!SOLISTENING(so) && knlist_empty(&so->so_wrsel.si_note))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
 	so_wrknl_unlock(so);
 }
 
 /*ARGSUSED*/
 static int
 filt_sowrite(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 
 	if (SOLISTENING(so))
 		return (0);
 
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	kn->kn_data = sbspace(&so->so_snd);
 
 	hhook_run_socket(so, kn, HHOOK_FILT_SOWRITE);
 
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
 	} else if (so->so_error)	/* temporary udp error */
 		return (1);
 	else if (((so->so_state & SS_ISCONNECTED) == 0) &&
 	    (so->so_proto->pr_flags & PR_CONNREQUIRED))
 		return (0);
 	else if (kn->kn_sfflags & NOTE_LOWAT)
 		return (kn->kn_data >= kn->kn_sdata);
 	else
 		return (kn->kn_data >= so->so_snd.sb_lowat);
 }
 
 static int
 filt_soempty(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 
 	if (SOLISTENING(so))
 		return (1);
 
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	kn->kn_data = sbused(&so->so_snd);
 
 	if (kn->kn_data == 0)
 		return (1);
 	else
 		return (0);
 }
 
 int
 socheckuid(struct socket *so, uid_t uid)
 {
 
 	if (so == NULL)
 		return (EPERM);
 	if (so->so_cred->cr_uid != uid)
 		return (EPERM);
 	return (0);
 }
 
 /*
  * These functions are used by protocols to notify the socket layer (and its
  * consumers) of state changes in the sockets driven by protocol-side events.
  */
 
 /*
  * Procedures to manipulate state flags of socket and do appropriate wakeups.
  *
  * Normal sequence from the active (originating) side is that
  * soisconnecting() is called during processing of connect() call, resulting
  * in an eventual call to soisconnected() if/when the connection is
  * established.  When the connection is torn down soisdisconnecting() is
  * called during processing of disconnect() call, and soisdisconnected() is
  * called when the connection to the peer is totally severed.  The semantics
  * of these routines are such that connectionless protocols can call
  * soisconnected() and soisdisconnected() only, bypassing the in-progress
  * calls when setting up a ``connection'' takes no time.
  *
  * From the passive side, a socket is created with two queues of sockets:
  * so_incomp for connections in progress and so_comp for connections already
  * made and awaiting user acceptance.  As a protocol is preparing incoming
  * connections, it creates a socket structure queued on so_incomp by calling
  * sonewconn().  When the connection is established, soisconnected() is
  * called, and transfers the socket structure to so_comp, making it available
  * to accept().
  *
  * If a socket is closed with sockets on either so_incomp or so_comp, these
  * sockets are dropped.
  *
  * If higher-level protocols are implemented in the kernel, the wakeups done
  * here will sometimes cause software-interrupt process scheduling.
  */
 void
 soisconnecting(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISCONNECTING;
 	SOCK_UNLOCK(so);
 }
 
 void
 soisconnected(struct socket *so)
 {
 	bool last __diagused;
 
 	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 	so->so_state |= SS_ISCONNECTED;
 
 	if (so->so_qstate == SQ_INCOMP) {
 		struct socket *head = so->so_listen;
 		int ret;
 
 		KASSERT(head, ("%s: so %p on incomp of NULL", __func__, so));
 		/*
 		 * Promoting a socket from incomplete queue to complete, we
 		 * need to go through reverse order of locking.  We first do
 		 * trylock, and if that doesn't succeed, we go the hard way
 		 * leaving a reference and rechecking consistency after proper
 		 * locking.
 		 */
 		if (__predict_false(SOLISTEN_TRYLOCK(head) == 0)) {
 			soref(head);
 			SOCK_UNLOCK(so);
 			SOLISTEN_LOCK(head);
 			SOCK_LOCK(so);
 			if (__predict_false(head != so->so_listen)) {
 				/*
 				 * The socket went off the listen queue,
 				 * should be lost race to close(2) of sol.
 				 * The socket is about to soabort().
 				 */
 				SOCK_UNLOCK(so);
 				sorele_locked(head);
 				return;
 			}
 			last = refcount_release(&head->so_count);
 			KASSERT(!last, ("%s: released last reference for %p",
 			    __func__, head));
 		}
 again:
 		if ((so->so_options & SO_ACCEPTFILTER) == 0) {
 			TAILQ_REMOVE(&head->sol_incomp, so, so_list);
 			head->sol_incqlen--;
 			TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
 			head->sol_qlen++;
 			so->so_qstate = SQ_COMP;
 			SOCK_UNLOCK(so);
 			solisten_wakeup(head);	/* unlocks */
 		} else {
 			SOCKBUF_LOCK(&so->so_rcv);
 			soupcall_set(so, SO_RCV,
 			    head->sol_accept_filter->accf_callback,
 			    head->sol_accept_filter_arg);
 			so->so_options &= ~SO_ACCEPTFILTER;
 			ret = head->sol_accept_filter->accf_callback(so,
 			    head->sol_accept_filter_arg, M_NOWAIT);
 			if (ret == SU_ISCONNECTED) {
 				soupcall_clear(so, SO_RCV);
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto again;
 			}
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			SOCK_UNLOCK(so);
 			SOLISTEN_UNLOCK(head);
 		}
 		return;
 	}
 	SOCK_UNLOCK(so);
 	wakeup(&so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
 }
 
 void
 soisdisconnecting(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTING;
 	so->so_state |= SS_ISDISCONNECTING;
 
 	if (!SOLISTENING(so)) {
 		SOCKBUF_LOCK(&so->so_rcv);
 		socantrcvmore_locked(so);
 		SOCKBUF_LOCK(&so->so_snd);
 		socantsendmore_locked(so);
 	}
 	SOCK_UNLOCK(so);
 	wakeup(&so->so_timeo);
 }
 
 void
 soisdisconnected(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 
 	/*
 	 * There is at least one reader of so_state that does not
 	 * acquire socket lock, namely soreceive_generic().  Ensure
 	 * that it never sees all flags that track connection status
 	 * cleared, by ordering the update with a barrier semantic of
 	 * our release thread fence.
 	 */
 	so->so_state |= SS_ISDISCONNECTED;
 	atomic_thread_fence_rel();
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 
 	if (!SOLISTENING(so)) {
 		SOCK_UNLOCK(so);
 		SOCKBUF_LOCK(&so->so_rcv);
 		socantrcvmore_locked(so);
 		SOCKBUF_LOCK(&so->so_snd);
 		sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
 		socantsendmore_locked(so);
 	} else
 		SOCK_UNLOCK(so);
 	wakeup(&so->so_timeo);
 }
 
 int
 soiolock(struct socket *so, struct sx *sx, int flags)
 {
 	int error;
 
 	KASSERT((flags & SBL_VALID) == flags,
 	    ("soiolock: invalid flags %#x", flags));
 
 	if ((flags & SBL_WAIT) != 0) {
 		if ((flags & SBL_NOINTR) != 0) {
 			sx_xlock(sx);
 		} else {
 			error = sx_xlock_sig(sx);
 			if (error != 0)
 				return (error);
 		}
 	} else if (!sx_try_xlock(sx)) {
 		return (EWOULDBLOCK);
 	}
 
 	if (__predict_false(SOLISTENING(so))) {
 		sx_xunlock(sx);
 		return (ENOTCONN);
 	}
 	return (0);
 }
 
 void
 soiounlock(struct sx *sx)
 {
 	sx_xunlock(sx);
 }
 
 /*
  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
  */
 struct sockaddr *
 sodupsockaddr(const struct sockaddr *sa, int mflags)
 {
 	struct sockaddr *sa2;
 
 	sa2 = malloc(sa->sa_len, M_SONAME, mflags);
 	if (sa2)
 		bcopy(sa, sa2, sa->sa_len);
 	return sa2;
 }
 
 /*
  * Register per-socket destructor.
  */
 void
 sodtor_set(struct socket *so, so_dtor_t *func)
 {
 
 	SOCK_LOCK_ASSERT(so);
 	so->so_dtor = func;
 }
 
 /*
  * Register per-socket buffer upcalls.
  */
 void
 soupcall_set(struct socket *so, sb_which which, so_upcall_t func, void *arg)
 {
 	struct sockbuf *sb;
 
 	KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
 
 	switch (which) {
 	case SO_RCV:
 		sb = &so->so_rcv;
 		break;
 	case SO_SND:
 		sb = &so->so_snd;
 		break;
 	default:
 		panic("soupcall_set: bad which");
 	}
 	SOCKBUF_LOCK_ASSERT(sb);
 	sb->sb_upcall = func;
 	sb->sb_upcallarg = arg;
 	sb->sb_flags |= SB_UPCALL;
 }
 
 void
 soupcall_clear(struct socket *so, sb_which which)
 {
 	struct sockbuf *sb;
 
 	KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
 
 	switch (which) {
 	case SO_RCV:
 		sb = &so->so_rcv;
 		break;
 	case SO_SND:
 		sb = &so->so_snd;
 		break;
 	}
 	SOCKBUF_LOCK_ASSERT(sb);
 	KASSERT(sb->sb_upcall != NULL,
 	    ("%s: so %p no upcall to clear", __func__, so));
 	sb->sb_upcall = NULL;
 	sb->sb_upcallarg = NULL;
 	sb->sb_flags &= ~SB_UPCALL;
 }
 
 void
 solisten_upcall_set(struct socket *so, so_upcall_t func, void *arg)
 {
 
 	SOLISTEN_LOCK_ASSERT(so);
 	so->sol_upcall = func;
 	so->sol_upcallarg = arg;
 }
 
 static void
 so_rdknl_lock(void *arg)
 {
 	struct socket *so = arg;
 
 	if (SOLISTENING(so))
 		SOCK_LOCK(so);
 	else
 		SOCKBUF_LOCK(&so->so_rcv);
 }
 
 static void
 so_rdknl_unlock(void *arg)
 {
 	struct socket *so = arg;
 
 	if (SOLISTENING(so))
 		SOCK_UNLOCK(so);
 	else
 		SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 static void
 so_rdknl_assert_lock(void *arg, int what)
 {
 	struct socket *so = arg;
 
 	if (what == LA_LOCKED) {
 		if (SOLISTENING(so))
 			SOCK_LOCK_ASSERT(so);
 		else
-			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+			SOCK_RECVBUF_LOCK_ASSERT(so);
 	} else {
 		if (SOLISTENING(so))
 			SOCK_UNLOCK_ASSERT(so);
 		else
-			SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
+			SOCK_RECVBUF_UNLOCK_ASSERT(so);
 	}
 }
 
 static void
 so_wrknl_lock(void *arg)
 {
 	struct socket *so = arg;
 
 	if (SOLISTENING(so))
 		SOCK_LOCK(so);
 	else
-		SOCKBUF_LOCK(&so->so_snd);
+		SOCK_SENDBUF_LOCK(so);
 }
 
 static void
 so_wrknl_unlock(void *arg)
 {
 	struct socket *so = arg;
 
 	if (SOLISTENING(so))
 		SOCK_UNLOCK(so);
 	else
-		SOCKBUF_UNLOCK(&so->so_snd);
+		SOCK_SENDBUF_UNLOCK(so);
 }
 
 static void
 so_wrknl_assert_lock(void *arg, int what)
 {
 	struct socket *so = arg;
 
 	if (what == LA_LOCKED) {
 		if (SOLISTENING(so))
 			SOCK_LOCK_ASSERT(so);
 		else
-			SOCKBUF_LOCK_ASSERT(&so->so_snd);
+			SOCK_SENDBUF_LOCK_ASSERT(so);
 	} else {
 		if (SOLISTENING(so))
 			SOCK_UNLOCK_ASSERT(so);
 		else
-			SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+			SOCK_SENDBUF_UNLOCK_ASSERT(so);
 	}
 }
 
 /*
  * Create an external-format (``xsocket'') structure using the information in
  * the kernel-format socket structure pointed to by so.  This is done to
  * reduce the spew of irrelevant information over this interface, to isolate
  * user code from changes in the kernel structure, and potentially to provide
  * information-hiding if we decide that some of this information should be
  * hidden from users.
  */
 void
 sotoxsocket(struct socket *so, struct xsocket *xso)
 {
 
 	bzero(xso, sizeof(*xso));
 	xso->xso_len = sizeof *xso;
 	xso->xso_so = (uintptr_t)so;
 	xso->so_type = so->so_type;
 	xso->so_options = so->so_options;
 	xso->so_linger = so->so_linger;
 	xso->so_state = so->so_state;
 	xso->so_pcb = (uintptr_t)so->so_pcb;
 	xso->xso_protocol = so->so_proto->pr_protocol;
 	xso->xso_family = so->so_proto->pr_domain->dom_family;
 	xso->so_timeo = so->so_timeo;
 	xso->so_error = so->so_error;
 	xso->so_uid = so->so_cred->cr_uid;
 	xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
 	if (SOLISTENING(so)) {
 		xso->so_qlen = so->sol_qlen;
 		xso->so_incqlen = so->sol_incqlen;
 		xso->so_qlimit = so->sol_qlimit;
 		xso->so_oobmark = 0;
 	} else {
 		xso->so_state |= so->so_qstate;
 		xso->so_qlen = xso->so_incqlen = xso->so_qlimit = 0;
 		xso->so_oobmark = so->so_oobmark;
 		sbtoxsockbuf(&so->so_snd, &xso->so_snd);
 		sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 	}
 }
 
 struct sockbuf *
 so_sockbuf_rcv(struct socket *so)
 {
 
 	return (&so->so_rcv);
 }
 
 struct sockbuf *
 so_sockbuf_snd(struct socket *so)
 {
 
 	return (&so->so_snd);
 }
 
 int
 so_state_get(const struct socket *so)
 {
 
 	return (so->so_state);
 }
 
 void
 so_state_set(struct socket *so, int val)
 {
 
 	so->so_state = val;
 }
 
 int
 so_options_get(const struct socket *so)
 {
 
 	return (so->so_options);
 }
 
 void
 so_options_set(struct socket *so, int val)
 {
 
 	so->so_options = val;
 }
 
 int
 so_error_get(const struct socket *so)
 {
 
 	return (so->so_error);
 }
 
 void
 so_error_set(struct socket *so, int val)
 {
 
 	so->so_error = val;
 }
 
 int
 so_linger_get(const struct socket *so)
 {
 
 	return (so->so_linger);
 }
 
 void
 so_linger_set(struct socket *so, int val)
 {
 
 	KASSERT(val >= 0 && val <= USHRT_MAX && val <= (INT_MAX / hz),
 	    ("%s: val %d out of range", __func__, val));
 
 	so->so_linger = val;
 }
 
 struct protosw *
 so_protosw_get(const struct socket *so)
 {
 
 	return (so->so_proto);
 }
 
 void
 so_protosw_set(struct socket *so, struct protosw *val)
 {
 
 	so->so_proto = val;
 }
 
 void
 so_sorwakeup(struct socket *so)
 {
 
 	sorwakeup(so);
 }
 
 void
 so_sowwakeup(struct socket *so)
 {
 
 	sowwakeup(so);
 }
 
 void
 so_sorwakeup_locked(struct socket *so)
 {
 
 	sorwakeup_locked(so);
 }
 
 void
 so_sowwakeup_locked(struct socket *so)
 {
 
 	sowwakeup_locked(so);
 }
 
 void
 so_lock(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 }
 
 void
 so_unlock(struct socket *so)
 {
 
 	SOCK_UNLOCK(so);
 }
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index efa586d346c5..b326dbd825a6 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1,2974 +1,2974 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California. All Rights Reserved.
  * Copyright (c) 2004-2009 Robert N. M. Watson All Rights Reserved.
  * Copyright (c) 2018 Matthew Macy
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
  */
 
 /*
  * UNIX Domain (Local) Sockets
  *
  * This is an implementation of UNIX (local) domain sockets.  Each socket has
  * an associated struct unpcb (UNIX protocol control block).  Stream sockets
  * may be connected to 0 or 1 other socket.  Datagram sockets may be
  * connected to 0, 1, or many other sockets.  Sockets may be created and
  * connected in pairs (socketpair(2)), or bound/connected to using the file
  * system name space.  For most purposes, only the receive socket buffer is
  * used, as sending on one socket delivers directly to the receive socket
  * buffer of a second socket.
  *
  * The implementation is substantially complicated by the fact that
  * "ancillary data", such as file descriptors or credentials, may be passed
  * across UNIX domain sockets.  The potential for passing UNIX domain sockets
  * over other UNIX domain sockets requires the implementation of a simple
  * garbage collector to find and tear down cycles of disconnected sockets.
  *
  * TODO:
  *	RDM
  *	rethink name space problems
  *	need a proper out-of-band
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/vnode.h>
 
 #include <net/vnet.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 MALLOC_DECLARE(M_FILECAPS);
 
 /*
  * See unpcb.h for the locking key.
  */
 
 static uma_zone_t	unp_zone;
 static unp_gen_t	unp_gencnt;	/* (l) */
 static u_int		unp_count;	/* (l) Count of local sockets. */
 static ino_t		unp_ino;	/* Prototype for fake inode numbers. */
 static int		unp_rights;	/* (g) File descriptors in flight. */
 static struct unp_head	unp_shead;	/* (l) List of stream sockets. */
 static struct unp_head	unp_dhead;	/* (l) List of datagram sockets. */
 static struct unp_head	unp_sphead;	/* (l) List of seqpacket sockets. */
 
 struct unp_defer {
 	SLIST_ENTRY(unp_defer) ud_link;
 	struct file *ud_fp;
 };
 static SLIST_HEAD(, unp_defer) unp_defers;
 static int unp_defers_count;
 
 static const struct sockaddr	sun_noname = { sizeof(sun_noname), AF_LOCAL };
 
 /*
  * Garbage collection of cyclic file descriptor/socket references occurs
  * asynchronously in a taskqueue context in order to avoid recursion and
  * reentrance in the UNIX domain socket, file descriptor, and socket layer
  * code.  See unp_gc() for a full description.
  */
 static struct timeout_task unp_gc_task;
 
 /*
  * The close of unix domain sockets attached as SCM_RIGHTS is
  * postponed to the taskqueue, to avoid arbitrary recursion depth.
  * The attached sockets might have another sockets attached.
  */
 static struct task	unp_defer_task;
 
 /*
  * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
  * stream sockets, although the total for sender and receiver is actually
  * only PIPSIZ.
  *
  * Datagram sockets really use the sendspace as the maximum datagram size,
  * and don't really want to reserve the sendspace.  Their recvspace should be
  * large enough for at least one max-size datagram plus address.
  */
 #ifndef PIPSIZ
 #define	PIPSIZ	8192
 #endif
 static u_long	unpst_sendspace = PIPSIZ;
 static u_long	unpst_recvspace = PIPSIZ;
 static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
 static u_long	unpdg_recvspace = 16*1024;	/* support 8KB syslog msgs */
 static u_long	unpsp_sendspace = PIPSIZ;	/* really max datagram size */
 static u_long	unpsp_recvspace = PIPSIZ;
 
 static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Local domain");
 static SYSCTL_NODE(_net_local, SOCK_STREAM, stream,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SOCK_STREAM");
 static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SOCK_DGRAM");
 static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SOCK_SEQPACKET");
 
 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
 	   &unpst_sendspace, 0, "Default stream send space.");
 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpst_recvspace, 0, "Default stream receive space.");
 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
 	   &unpdg_sendspace, 0, "Default datagram send space.");
 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpdg_recvspace, 0, "Default datagram receive space.");
 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW,
 	   &unpsp_sendspace, 0, "Default seqpacket send space.");
 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpsp_recvspace, 0, "Default seqpacket receive space.");
 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
     "File descriptors in flight.");
 SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
     &unp_defers_count, 0,
     "File descriptors deferred to taskqueue for close.");
 
 /*
  * Locking and synchronization:
  *
  * Several types of locks exist in the local domain socket implementation:
  * - a global linkage lock
  * - a global connection list lock
  * - the mtxpool lock
  * - per-unpcb mutexes
  *
  * The linkage lock protects the global socket lists, the generation number
  * counter and garbage collector state.
  *
  * The connection list lock protects the list of referring sockets in a datagram
  * socket PCB.  This lock is also overloaded to protect a global list of
  * sockets whose buffers contain socket references in the form of SCM_RIGHTS
  * messages.  To avoid recursion, such references are released by a dedicated
  * thread.
  *
  * The mtxpool lock protects the vnode from being modified while referenced.
  * Lock ordering rules require that it be acquired before any PCB locks.
  *
  * The unpcb lock (unp_mtx) protects the most commonly referenced fields in the
  * unpcb.  This includes the unp_conn field, which either links two connected
  * PCBs together (for connected socket types) or points at the destination
  * socket (for connectionless socket types).  The operations of creating or
  * destroying a connection therefore involve locking multiple PCBs.  To avoid
  * lock order reversals, in some cases this involves dropping a PCB lock and
  * using a reference counter to maintain liveness.
  *
  * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
  * allocated in pru_attach() and freed in pru_detach().  The validity of that
  * pointer is an invariant, so no lock is required to dereference the so_pcb
  * pointer if a valid socket reference is held by the caller.  In practice,
  * this is always true during operations performed on a socket.  Each unpcb
  * has a back-pointer to its socket, unp_socket, which will be stable under
  * the same circumstances.
  *
  * This pointer may only be safely dereferenced as long as a valid reference
  * to the unpcb is held.  Typically, this reference will be from the socket,
  * or from another unpcb when the referring unpcb's lock is held (in order
  * that the reference not be invalidated during use).  For example, to follow
  * unp->unp_conn->unp_socket, you need to hold a lock on unp_conn to guarantee
  * that detach is not run clearing unp_socket.
  *
  * Blocking with UNIX domain sockets is a tricky issue: unlike most network
  * protocols, bind() is a non-atomic operation, and connect() requires
  * potential sleeping in the protocol, due to potentially waiting on local or
  * distributed file systems.  We try to separate "lookup" operations, which
  * may sleep, and the IPC operations themselves, which typically can occur
  * with relative atomicity as locks can be held over the entire operation.
  *
  * Another tricky issue is simultaneous multi-threaded or multi-process
  * access to a single UNIX domain socket.  These are handled by the flags
  * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or
  * binding, both of which involve dropping UNIX domain socket locks in order
  * to perform namei() and other file system operations.
  */
 static struct rwlock	unp_link_rwlock;
 static struct mtx	unp_defers_lock;
 
 #define	UNP_LINK_LOCK_INIT()		rw_init(&unp_link_rwlock,	\
 					    "unp_link_rwlock")
 
 #define	UNP_LINK_LOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
 					    RA_LOCKED)
 #define	UNP_LINK_UNLOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
 					    RA_UNLOCKED)
 
 #define	UNP_LINK_RLOCK()		rw_rlock(&unp_link_rwlock)
 #define	UNP_LINK_RUNLOCK()		rw_runlock(&unp_link_rwlock)
 #define	UNP_LINK_WLOCK()		rw_wlock(&unp_link_rwlock)
 #define	UNP_LINK_WUNLOCK()		rw_wunlock(&unp_link_rwlock)
 #define	UNP_LINK_WLOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
 					    RA_WLOCKED)
 #define	UNP_LINK_WOWNED()		rw_wowned(&unp_link_rwlock)
 
 #define	UNP_DEFERRED_LOCK_INIT()	mtx_init(&unp_defers_lock, \
 					    "unp_defer", NULL, MTX_DEF)
 #define	UNP_DEFERRED_LOCK()		mtx_lock(&unp_defers_lock)
 #define	UNP_DEFERRED_UNLOCK()		mtx_unlock(&unp_defers_lock)
 
 #define UNP_REF_LIST_LOCK()		UNP_DEFERRED_LOCK();
 #define UNP_REF_LIST_UNLOCK()		UNP_DEFERRED_UNLOCK();
 
 #define UNP_PCB_LOCK_INIT(unp)		mtx_init(&(unp)->unp_mtx,	\
 					    "unp", "unp",	\
 					    MTX_DUPOK|MTX_DEF)
 #define	UNP_PCB_LOCK_DESTROY(unp)	mtx_destroy(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCKPTR(unp)		(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCK(unp)		mtx_lock(&(unp)->unp_mtx)
 #define	UNP_PCB_TRYLOCK(unp)		mtx_trylock(&(unp)->unp_mtx)
 #define	UNP_PCB_UNLOCK(unp)		mtx_unlock(&(unp)->unp_mtx)
 #define	UNP_PCB_OWNED(unp)		mtx_owned(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_OWNED)
 #define	UNP_PCB_UNLOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_NOTOWNED)
 
 static int	uipc_connect2(struct socket *, struct socket *);
 static int	uipc_ctloutput(struct socket *, struct sockopt *);
 static int	unp_connect(struct socket *, struct sockaddr *,
 		    struct thread *);
 static int	unp_connectat(int, struct socket *, struct sockaddr *,
 		    struct thread *);
 static int	unp_connect2(struct socket *so, struct socket *so2, int);
 static void	unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
 static void	unp_dispose(struct socket *so);
 static void	unp_dispose_mbuf(struct mbuf *);
 static void	unp_shutdown(struct unpcb *);
 static void	unp_drop(struct unpcb *);
 static void	unp_gc(__unused void *, int);
 static void	unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
 static void	unp_discard(struct file *);
 static void	unp_freerights(struct filedescent **, int);
 static int	unp_internalize(struct mbuf **, struct thread *);
 static void	unp_internalize_fp(struct file *);
 static int	unp_externalize(struct mbuf *, struct mbuf **, int);
 static int	unp_externalize_fp(struct file *);
 static struct mbuf	*unp_addsockcred(struct thread *, struct mbuf *, int);
 static void	unp_process_defers(void * __unused, int);
 
 static void
 unp_pcb_hold(struct unpcb *unp)
 {
 	u_int old __unused;
 
 	old = refcount_acquire(&unp->unp_refcount);
 	KASSERT(old > 0, ("%s: unpcb %p has no references", __func__, unp));
 }
 
 static __result_use_check bool
 unp_pcb_rele(struct unpcb *unp)
 {
 	bool ret;
 
 	UNP_PCB_LOCK_ASSERT(unp);
 
 	if ((ret = refcount_release(&unp->unp_refcount))) {
 		UNP_PCB_UNLOCK(unp);
 		UNP_PCB_LOCK_DESTROY(unp);
 		uma_zfree(unp_zone, unp);
 	}
 	return (ret);
 }
 
 static void
 unp_pcb_rele_notlast(struct unpcb *unp)
 {
 	bool ret __unused;
 
 	ret = refcount_release(&unp->unp_refcount);
 	KASSERT(!ret, ("%s: unpcb %p has no references", __func__, unp));
 }
 
 static void
 unp_pcb_lock_pair(struct unpcb *unp, struct unpcb *unp2)
 {
 	UNP_PCB_UNLOCK_ASSERT(unp);
 	UNP_PCB_UNLOCK_ASSERT(unp2);
 
 	if (unp == unp2) {
 		UNP_PCB_LOCK(unp);
 	} else if ((uintptr_t)unp2 > (uintptr_t)unp) {
 		UNP_PCB_LOCK(unp);
 		UNP_PCB_LOCK(unp2);
 	} else {
 		UNP_PCB_LOCK(unp2);
 		UNP_PCB_LOCK(unp);
 	}
 }
 
 static void
 unp_pcb_unlock_pair(struct unpcb *unp, struct unpcb *unp2)
 {
 	UNP_PCB_UNLOCK(unp);
 	if (unp != unp2)
 		UNP_PCB_UNLOCK(unp2);
 }
 
 /*
  * Try to lock the connected peer of an already locked socket.  In some cases
  * this requires that we unlock the current socket.  The pairbusy counter is
  * used to block concurrent connection attempts while the lock is dropped.  The
  * caller must be careful to revalidate PCB state.
  */
 static struct unpcb *
 unp_pcb_lock_peer(struct unpcb *unp)
 {
 	struct unpcb *unp2;
 
 	UNP_PCB_LOCK_ASSERT(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 == NULL)
 		return (NULL);
 	if (__predict_false(unp == unp2))
 		return (unp);
 
 	UNP_PCB_UNLOCK_ASSERT(unp2);
 
 	if (__predict_true(UNP_PCB_TRYLOCK(unp2)))
 		return (unp2);
 	if ((uintptr_t)unp2 > (uintptr_t)unp) {
 		UNP_PCB_LOCK(unp2);
 		return (unp2);
 	}
 	unp->unp_pairbusy++;
 	unp_pcb_hold(unp2);
 	UNP_PCB_UNLOCK(unp);
 
 	UNP_PCB_LOCK(unp2);
 	UNP_PCB_LOCK(unp);
 	KASSERT(unp->unp_conn == unp2 || unp->unp_conn == NULL,
 	    ("%s: socket %p was reconnected", __func__, unp));
 	if (--unp->unp_pairbusy == 0 && (unp->unp_flags & UNP_WAITING) != 0) {
 		unp->unp_flags &= ~UNP_WAITING;
 		wakeup(unp);
 	}
 	if (unp_pcb_rele(unp2)) {
 		/* unp2 is unlocked. */
 		return (NULL);
 	}
 	if (unp->unp_conn == NULL) {
 		UNP_PCB_UNLOCK(unp2);
 		return (NULL);
 	}
 	return (unp2);
 }
 
 /*
  * Definitions of protocols supported in the LOCAL domain.
  */
 static struct domain localdomain;
 static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream;
 static struct pr_usrreqs uipc_usrreqs_seqpacket;
 static struct protosw localsw[] = {
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&localdomain,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS|
 				    PR_CAPATTACH,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_stream
 },
 {
 	.pr_type =		SOCK_DGRAM,
 	.pr_domain =		&localdomain,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_RIGHTS|PR_CAPATTACH,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_dgram
 },
 {
 	.pr_type =		SOCK_SEQPACKET,
 	.pr_domain =		&localdomain,
 
 	/*
 	 * XXXRW: For now, PR_ADDR because soreceive will bump into them
 	 * due to our use of sbappendaddr.  A new sbappend variants is needed
 	 * that supports both atomic record writes and control data.
 	 */
 	.pr_flags =		PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|
 				    PR_WANTRCVD|PR_RIGHTS|PR_CAPATTACH,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_seqpacket,
 },
 };
 
 static struct domain localdomain = {
 	.dom_family =		AF_LOCAL,
 	.dom_name =		"local",
 	.dom_externalize =	unp_externalize,
 	.dom_dispose =		unp_dispose,
 	.dom_protosw =		localsw,
 	.dom_protoswNPROTOSW =	&localsw[nitems(localsw)]
 };
 DOMAIN_SET(local);
 
 static void
 uipc_abort(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
 	UNP_PCB_UNLOCK_ASSERT(unp);
 
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		unp_pcb_hold(unp2);
 		UNP_PCB_UNLOCK(unp);
 		unp_drop(unp2);
 	} else
 		UNP_PCB_UNLOCK(unp);
 }
 
 static int
 uipc_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *sa;
 
 	/*
 	 * Pass back name of connected socket, if it was bound and we are
 	 * still connected (our peer may have closed already!).
 	 */
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_PCB_LOCK(unp);
 	unp2 = unp_pcb_lock_peer(unp);
 	if (unp2 != NULL && unp2->unp_addr != NULL)
 		sa = (struct sockaddr *)unp2->unp_addr;
 	else
 		sa = &sun_noname;
 	bcopy(sa, *nam, sa->sa_len);
 	if (unp2 != NULL)
 		unp_pcb_unlock_pair(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_attach(struct socket *so, int proto, struct thread *td)
 {
 	u_long sendspace, recvspace;
 	struct unpcb *unp;
 	int error;
 	bool locked;
 
 	KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		switch (so->so_type) {
 		case SOCK_STREAM:
 			sendspace = unpst_sendspace;
 			recvspace = unpst_recvspace;
 			break;
 
 		case SOCK_DGRAM:
 			sendspace = unpdg_sendspace;
 			recvspace = unpdg_recvspace;
 			break;
 
 		case SOCK_SEQPACKET:
 			sendspace = unpsp_sendspace;
 			recvspace = unpsp_recvspace;
 			break;
 
 		default:
 			panic("uipc_attach");
 		}
 		error = soreserve(so, sendspace, recvspace);
 		if (error)
 			return (error);
 	}
 	unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
 	if (unp == NULL)
 		return (ENOBUFS);
 	LIST_INIT(&unp->unp_refs);
 	UNP_PCB_LOCK_INIT(unp);
 	unp->unp_socket = so;
 	so->so_pcb = unp;
 	refcount_init(&unp->unp_refcount, 1);
 
 	if ((locked = UNP_LINK_WOWNED()) == false)
 		UNP_LINK_WLOCK();
 
 	unp->unp_gencnt = ++unp_gencnt;
 	unp->unp_ino = ++unp_ino;
 	unp_count++;
 	switch (so->so_type) {
 	case SOCK_STREAM:
 		LIST_INSERT_HEAD(&unp_shead, unp, unp_link);
 		break;
 
 	case SOCK_DGRAM:
 		LIST_INSERT_HEAD(&unp_dhead, unp, unp_link);
 		break;
 
 	case SOCK_SEQPACKET:
 		LIST_INSERT_HEAD(&unp_sphead, unp, unp_link);
 		break;
 
 	default:
 		panic("uipc_attach");
 	}
 
 	if (locked == false)
 		UNP_LINK_WUNLOCK();
 
 	return (0);
 }
 
 static int
 uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 	struct vattr vattr;
 	int error, namelen;
 	struct nameidata nd;
 	struct unpcb *unp;
 	struct vnode *vp;
 	struct mount *mp;
 	cap_rights_t rights;
 	char *buf;
 
 	if (nam->sa_family != AF_UNIX)
 		return (EAFNOSUPPORT);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
 
 	if (soun->sun_len > sizeof(struct sockaddr_un))
 		return (EINVAL);
 	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
 	if (namelen <= 0)
 		return (EINVAL);
 
 	/*
 	 * We don't allow simultaneous bind() calls on a single UNIX domain
 	 * socket, so flag in-progress operations, and return an error if an
 	 * operation is already in progress.
 	 *
 	 * Historically, we have not allowed a socket to be rebound, so this
 	 * also returns an error.  Not allowing re-binding simplifies the
 	 * implementation and avoids a great many possible failure modes.
 	 */
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode != NULL) {
 		UNP_PCB_UNLOCK(unp);
 		return (EINVAL);
 	}
 	if (unp->unp_flags & UNP_BINDING) {
 		UNP_PCB_UNLOCK(unp);
 		return (EALREADY);
 	}
 	unp->unp_flags |= UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 
 	buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
 	bcopy(soun->sun_path, buf, namelen);
 	buf[namelen] = 0;
 
 restart:
 	NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
 	    UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_BINDAT));
 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
 	error = namei(&nd);
 	if (error)
 		goto error;
 	vp = nd.ni_vp;
 	if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE_PNBUF(&nd);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (vp != NULL) {
 			vrele(vp);
 			error = EADDRINUSE;
 			goto error;
 		}
 		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 		if (error)
 			goto error;
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VSOCK;
 	vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_pd->pd_cmask);
 #ifdef MAC
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 #endif
 	if (error == 0)
 		error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 	NDFREE_PNBUF(&nd);
 	if (error) {
 		VOP_VPUT_PAIR(nd.ni_dvp, NULL, true);
 		vn_finished_write(mp);
 		if (error == ERELOOKUP)
 			goto restart;
 		goto error;
 	}
 	vp = nd.ni_vp;
 	ASSERT_VOP_ELOCKED(vp, "uipc_bind");
 	soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
 
 	UNP_PCB_LOCK(unp);
 	VOP_UNP_BIND(vp, unp);
 	unp->unp_vnode = vp;
 	unp->unp_addr = soun;
 	unp->unp_flags &= ~UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 	vref(vp);
 	VOP_VPUT_PAIR(nd.ni_dvp, &vp, true);
 	vn_finished_write(mp);
 	free(buf, M_TEMP);
 	return (0);
 
 error:
 	UNP_PCB_LOCK(unp);
 	unp->unp_flags &= ~UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 	free(buf, M_TEMP);
 	return (error);
 }
 
 static int
 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (uipc_bindat(AT_FDCWD, so, nam, td));
 }
 
 static int
 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	KASSERT(td == curthread, ("uipc_connect: td != curthread"));
 	error = unp_connect(so, nam, td);
 	return (error);
 }
 
 static int
 uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 	int error;
 
 	KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
 	error = unp_connectat(fd, so, nam, td);
 	return (error);
 }
 
 static void
 uipc_close(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 	struct vnode *vp = NULL;
 	struct mtx *vplock;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
 
 	vplock = NULL;
 	if ((vp = unp->unp_vnode) != NULL) {
 		vplock = mtx_pool_find(mtxpool_sleep, vp);
 		mtx_lock(vplock);
 	}
 	UNP_PCB_LOCK(unp);
 	if (vp && unp->unp_vnode == NULL) {
 		mtx_unlock(vplock);
 		vp = NULL;
 	}
 	if (vp != NULL) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 	}
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 	if (vp) {
 		mtx_unlock(vplock);
 		vrele(vp);
 	}
 }
 
 static int
 uipc_connect2(struct socket *so1, struct socket *so2)
 {
 	struct unpcb *unp, *unp2;
 	int error;
 
 	unp = so1->so_pcb;
 	KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
 	unp2 = so2->so_pcb;
 	KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
 	unp_pcb_lock_pair(unp, unp2);
 	error = unp_connect2(so1, so2, PRU_CONNECT2);
 	unp_pcb_unlock_pair(unp, unp2);
 	return (error);
 }
 
 static void
 uipc_detach(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 	struct mtx *vplock;
 	struct vnode *vp;
 	int local_unp_rights;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
 
 	vp = NULL;
 	vplock = NULL;
 
 	UNP_LINK_WLOCK();
 	LIST_REMOVE(unp, unp_link);
 	if (unp->unp_gcflag & UNPGC_DEAD)
 		LIST_REMOVE(unp, unp_dead);
 	unp->unp_gencnt = ++unp_gencnt;
 	--unp_count;
 	UNP_LINK_WUNLOCK();
 
 	UNP_PCB_UNLOCK_ASSERT(unp);
  restart:
 	if ((vp = unp->unp_vnode) != NULL) {
 		vplock = mtx_pool_find(mtxpool_sleep, vp);
 		mtx_lock(vplock);
 	}
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode != vp && unp->unp_vnode != NULL) {
 		if (vplock)
 			mtx_unlock(vplock);
 		UNP_PCB_UNLOCK(unp);
 		goto restart;
 	}
 	if ((vp = unp->unp_vnode) != NULL) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 	}
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 
 	UNP_REF_LIST_LOCK();
 	while (!LIST_EMPTY(&unp->unp_refs)) {
 		struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
 
 		unp_pcb_hold(ref);
 		UNP_REF_LIST_UNLOCK();
 
 		MPASS(ref != unp);
 		UNP_PCB_UNLOCK_ASSERT(ref);
 		unp_drop(ref);
 		UNP_REF_LIST_LOCK();
 	}
 	UNP_REF_LIST_UNLOCK();
 
 	UNP_PCB_LOCK(unp);
 	local_unp_rights = unp_rights;
 	unp->unp_socket->so_pcb = NULL;
 	unp->unp_socket = NULL;
 	free(unp->unp_addr, M_SONAME);
 	unp->unp_addr = NULL;
 	if (!unp_pcb_rele(unp))
 		UNP_PCB_UNLOCK(unp);
 	if (vp) {
 		mtx_unlock(vplock);
 		vrele(vp);
 	}
 	if (local_unp_rights)
 		taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
 }
 
 static int
 uipc_disconnect(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
 
 	UNP_PCB_LOCK(unp);
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_listen(struct socket *so, int backlog, struct thread *td)
 {
 	struct unpcb *unp;
 	int error;
 
 	MPASS(so->so_type != SOCK_DGRAM);
 
 	/*
 	 * Synchronize with concurrent connection attempts.
 	 */
 	error = 0;
 	unp = sotounpcb(so);
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_conn != NULL || (unp->unp_flags & UNP_CONNECTING) != 0)
 		error = EINVAL;
 	else if (unp->unp_vnode == NULL)
 		error = EDESTADDRREQ;
 	if (error != 0) {
 		UNP_PCB_UNLOCK(unp);
 		return (error);
 	}
 
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error == 0) {
 		cru2xt(td, &unp->unp_peercred);
 		solisten_proto(so, backlog);
 	}
 	SOCK_UNLOCK(so);
 	UNP_PCB_UNLOCK(unp);
 	return (error);
 }
 
 static int
 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *sa;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_LINK_RLOCK();
 	/*
 	 * XXX: It seems that this test always fails even when connection is
 	 * established.  So, this else clause is added as workaround to
 	 * return PF_LOCAL sockaddr.
 	 */
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		if (unp2->unp_addr != NULL)
 			sa = (struct sockaddr *) unp2->unp_addr;
 		else
 			sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 		UNP_PCB_UNLOCK(unp2);
 	} else {
 		sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 	}
 	UNP_LINK_RUNLOCK();
 	return (0);
 }
 
 static int
 uipc_rcvd(struct socket *so, int flags)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	u_int mbcnt, sbcc;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
 	    ("%s: socktype %d", __func__, so->so_type));
 
 	/*
 	 * Adjust backpressure on sender and wakeup any waiting to write.
 	 *
 	 * The unp lock is acquired to maintain the validity of the unp_conn
 	 * pointer; no lock on unp2 is required as unp2->unp_socket will be
 	 * static as long as we don't permit unp2 to disconnect from unp,
 	 * which is prevented by the lock on unp.  We cache values from
 	 * so_rcv to avoid holding the so_rcv lock over the entire
 	 * transaction on the remote so_snd.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	mbcnt = so->so_rcv.sb_mbcnt;
 	sbcc = sbavail(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	/*
 	 * There is a benign race condition at this point.  If we're planning to
 	 * clear SB_STOP, but uipc_send is called on the connected socket at
 	 * this instant, it might add data to the sockbuf and set SB_STOP.  Then
 	 * we would erroneously clear SB_STOP below, even though the sockbuf is
 	 * full.  The race is benign because the only ill effect is to allow the
 	 * sockbuf to exceed its size limit, and the size limits are not
 	 * strictly guaranteed anyway.
 	 */
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 == NULL) {
 		UNP_PCB_UNLOCK(unp);
 		return (0);
 	}
 	so2 = unp2->unp_socket;
 	SOCKBUF_LOCK(&so2->so_snd);
 	if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
 		so2->so_snd.sb_flags &= ~SB_STOP;
 	sowwakeup_locked(so2);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	u_int mbcnt, sbcc;
 	int error;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
 	    so->so_type == SOCK_SEQPACKET,
 	    ("%s: socktype %d", __func__, so->so_type));
 
 	error = 0;
 	if (flags & PRUS_OOB) {
 		error = EOPNOTSUPP;
 		goto release;
 	}
 	if (control != NULL && (error = unp_internalize(&control, td)))
 		goto release;
 
 	unp2 = NULL;
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 	{
 		const struct sockaddr *from;
 
 		if (nam != NULL) {
 			error = unp_connect(so, nam, td);
 			if (error != 0)
 				break;
 		}
 		UNP_PCB_LOCK(unp);
 
 		/*
 		 * Because connect() and send() are non-atomic in a sendto()
 		 * with a target address, it's possible that the socket will
 		 * have disconnected before the send() can run.  In that case
 		 * return the slightly counter-intuitive but otherwise
 		 * correct error that the socket is not connected.
 		 */
 		unp2 = unp_pcb_lock_peer(unp);
 		if (unp2 == NULL) {
 			UNP_PCB_UNLOCK(unp);
 			error = ENOTCONN;
 			break;
 		}
 
 		if (unp2->unp_flags & UNP_WANTCRED_MASK)
 			control = unp_addsockcred(td, control,
 			    unp2->unp_flags);
 		if (unp->unp_addr != NULL)
 			from = (struct sockaddr *)unp->unp_addr;
 		else
 			from = &sun_noname;
 		so2 = unp2->unp_socket;
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if (sbappendaddr_locked(&so2->so_rcv, from, m,
 		    control)) {
 			sorwakeup_locked(so2);
 			m = NULL;
 			control = NULL;
 		} else {
 			soroverflow_locked(so2);
 			error = (so->so_state & SS_NBIO) ? EAGAIN : ENOBUFS;
 		}
 		if (nam != NULL)
 			unp_disconnect(unp, unp2);
 		else
 			unp_pcb_unlock_pair(unp, unp2);
 		break;
 	}
 
 	case SOCK_SEQPACKET:
 	case SOCK_STREAM:
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			if (nam != NULL) {
 				error = unp_connect(so, nam, td);
 				if (error != 0)
 					break;
 			} else {
 				error = ENOTCONN;
 				break;
 			}
 		}
 
 		UNP_PCB_LOCK(unp);
 		if ((unp2 = unp_pcb_lock_peer(unp)) == NULL) {
 			UNP_PCB_UNLOCK(unp);
 			error = ENOTCONN;
 			break;
 		} else if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			unp_pcb_unlock_pair(unp, unp2);
 			error = EPIPE;
 			break;
 		}
 		UNP_PCB_UNLOCK(unp);
 		if ((so2 = unp2->unp_socket) == NULL) {
 			UNP_PCB_UNLOCK(unp2);
 			error = ENOTCONN;
 			break;
 		}
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if (unp2->unp_flags & UNP_WANTCRED_MASK) {
 			/*
 			 * Credentials are passed only once on SOCK_STREAM and
 			 * SOCK_SEQPACKET (LOCAL_CREDS => WANTCRED_ONESHOT), or
 			 * forever (LOCAL_CREDS_PERSISTENT => WANTCRED_ALWAYS).
 			 */
 			control = unp_addsockcred(td, control, unp2->unp_flags);
 			unp2->unp_flags &= ~UNP_WANTCRED_ONESHOT;
 		}
 
 		/*
 		 * Send to paired receive port and wake up readers.  Don't
 		 * check for space available in the receive buffer if we're
 		 * attaching ancillary data; Unix domain sockets only check
 		 * for space in the sending sockbuf, and that check is
 		 * performed one level up the stack.  At that level we cannot
 		 * precisely account for the amount of buffer space used
 		 * (e.g., because control messages are not yet internalized).
 		 */
 		switch (so->so_type) {
 		case SOCK_STREAM:
 			if (control != NULL) {
 				sbappendcontrol_locked(&so2->so_rcv, m,
 				    control, flags);
 				control = NULL;
 			} else
 				sbappend_locked(&so2->so_rcv, m, flags);
 			break;
 
 		case SOCK_SEQPACKET:
 			if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
 			    &sun_noname, m, control))
 				control = NULL;
 			break;
 		}
 
 		mbcnt = so2->so_rcv.sb_mbcnt;
 		sbcc = sbavail(&so2->so_rcv);
 		if (sbcc)
 			sorwakeup_locked(so2);
 		else
 			SOCKBUF_UNLOCK(&so2->so_rcv);
 
 		/*
 		 * The PCB lock on unp2 protects the SB_STOP flag.  Without it,
 		 * it would be possible for uipc_rcvd to be called at this
 		 * point, drain the receiving sockbuf, clear SB_STOP, and then
 		 * we would set SB_STOP below.  That could lead to an empty
 		 * sockbuf having SB_STOP set
 		 */
 		SOCKBUF_LOCK(&so->so_snd);
 		if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
 			so->so_snd.sb_flags |= SB_STOP;
 		SOCKBUF_UNLOCK(&so->so_snd);
 		UNP_PCB_UNLOCK(unp2);
 		m = NULL;
 		break;
 	}
 
 	/*
 	 * PRUS_EOF is equivalent to pru_send followed by pru_shutdown.
 	 */
 	if (flags & PRUS_EOF) {
 		UNP_PCB_LOCK(unp);
 		socantsendmore(so);
 		unp_shutdown(unp);
 		UNP_PCB_UNLOCK(unp);
 	}
 	if (control != NULL && error != 0)
 		unp_dispose_mbuf(control);
 
 release:
 	if (control != NULL)
 		m_freem(control);
 	/*
 	 * In case of PRUS_NOTREADY, uipc_ready() is responsible
 	 * for freeing memory.
 	 */   
 	if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 		m_freem(m);
 	return (error);
 }
 
 static bool
 uipc_ready_scan(struct socket *so, struct mbuf *m, int count, int *errorp)
 {
 	struct mbuf *mb, *n;
 	struct sockbuf *sb;
 
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		SOCK_UNLOCK(so);
 		return (false);
 	}
 	mb = NULL;
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	if (sb->sb_fnrdy != NULL) {
 		for (mb = sb->sb_mb, n = mb->m_nextpkt; mb != NULL;) {
 			if (mb == m) {
 				*errorp = sbready(sb, m, count);
 				break;
 			}
 			mb = mb->m_next;
 			if (mb == NULL) {
 				mb = n;
 				if (mb != NULL)
 					n = mb->m_nextpkt;
 			}
 		}
 	}
 	SOCKBUF_UNLOCK(sb);
 	SOCK_UNLOCK(so);
 	return (mb != NULL);
 }
 
 static int
 uipc_ready(struct socket *so, struct mbuf *m, int count)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	int error, i;
 
 	unp = sotounpcb(so);
 
 	KASSERT(so->so_type == SOCK_STREAM,
 	    ("%s: unexpected socket type for %p", __func__, so));
 
 	UNP_PCB_LOCK(unp);
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 		UNP_PCB_UNLOCK(unp);
 		so2 = unp2->unp_socket;
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if ((error = sbready(&so2->so_rcv, m, count)) == 0)
 			sorwakeup_locked(so2);
 		else
 			SOCKBUF_UNLOCK(&so2->so_rcv);
 		UNP_PCB_UNLOCK(unp2);
 		return (error);
 	}
 	UNP_PCB_UNLOCK(unp);
 
 	/*
 	 * The receiving socket has been disconnected, but may still be valid.
 	 * In this case, the now-ready mbufs are still present in its socket
 	 * buffer, so perform an exhaustive search before giving up and freeing
 	 * the mbufs.
 	 */
 	UNP_LINK_RLOCK();
 	LIST_FOREACH(unp, &unp_shead, unp_link) {
 		if (uipc_ready_scan(unp->unp_socket, m, count, &error))
 			break;
 	}
 	UNP_LINK_RUNLOCK();
 
 	if (unp == NULL) {
 		for (i = 0; i < count; i++)
 			m = m_free(m);
 		error = ECONNRESET;
 	}
 	return (error);
 }
 
 static int
 uipc_sense(struct socket *so, struct stat *sb)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
 
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	sb->st_dev = NODEV;
 	sb->st_ino = unp->unp_ino;
 	return (0);
 }
 
 static int
 uipc_shutdown(struct socket *so)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
 
 	UNP_PCB_LOCK(unp);
 	socantsendmore(so);
 	unp_shutdown(unp);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp;
 	const struct sockaddr *sa;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_addr != NULL)
 		sa = (struct sockaddr *) unp->unp_addr;
 	else
 		sa = &sun_noname;
 	bcopy(sa, *nam, sa->sa_len);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static struct pr_usrreqs uipc_usrreqs_dgram = {
 	.pru_abort = 		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_send =		uipc_send,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_dgram,
 	.pru_close =		uipc_close,
 };
 
 static struct pr_usrreqs uipc_usrreqs_seqpacket = {
 	.pru_abort =		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_generic,	/* XXX: or...? */
 	.pru_close =		uipc_close,
 };
 
 static struct pr_usrreqs uipc_usrreqs_stream = {
 	.pru_abort = 		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_ready =		uipc_ready,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_generic,
 	.pru_close =		uipc_close,
 };
 
 static int
 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct unpcb *unp;
 	struct xucred xu;
 	int error, optval;
 
 	if (sopt->sopt_level != SOL_LOCAL)
 		return (EINVAL);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case LOCAL_PEERCRED:
 			UNP_PCB_LOCK(unp);
 			if (unp->unp_flags & UNP_HAVEPC)
 				xu = unp->unp_peercred;
 			else {
 				if (so->so_type == SOCK_STREAM)
 					error = ENOTCONN;
 				else
 					error = EINVAL;
 			}
 			UNP_PCB_UNLOCK(unp);
 			if (error == 0)
 				error = sooptcopyout(sopt, &xu, sizeof(xu));
 			break;
 
 		case LOCAL_CREDS:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_WANTCRED_ONESHOT ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		case LOCAL_CREDS_PERSISTENT:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_WANTCRED_ALWAYS ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		case LOCAL_CONNWAIT:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		default:
 			error = EOPNOTSUPP;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case LOCAL_CREDS:
 		case LOCAL_CREDS_PERSISTENT:
 		case LOCAL_CONNWAIT:
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 					    sizeof(optval));
 			if (error)
 				break;
 
 #define	OPTSET(bit, exclusive) do {					\
 	UNP_PCB_LOCK(unp);						\
 	if (optval) {							\
 		if ((unp->unp_flags & (exclusive)) != 0) {		\
 			UNP_PCB_UNLOCK(unp);				\
 			error = EINVAL;					\
 			break;						\
 		}							\
 		unp->unp_flags |= (bit);				\
 	} else								\
 		unp->unp_flags &= ~(bit);				\
 	UNP_PCB_UNLOCK(unp);						\
 } while (0)
 
 			switch (sopt->sopt_name) {
 			case LOCAL_CREDS:
 				OPTSET(UNP_WANTCRED_ONESHOT, UNP_WANTCRED_ALWAYS);
 				break;
 
 			case LOCAL_CREDS_PERSISTENT:
 				OPTSET(UNP_WANTCRED_ALWAYS, UNP_WANTCRED_ONESHOT);
 				break;
 
 			case LOCAL_CONNWAIT:
 				OPTSET(UNP_CONNWAIT, 0);
 				break;
 
 			default:
 				break;
 			}
 			break;
 #undef	OPTSET
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 static int
 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (unp_connectat(AT_FDCWD, so, nam, td));
 }
 
 static int
 unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 	struct mtx *vplock;
 	struct sockaddr_un *soun;
 	struct vnode *vp;
 	struct socket *so2;
 	struct unpcb *unp, *unp2, *unp3;
 	struct nameidata nd;
 	char buf[SOCK_MAXADDRLEN];
 	struct sockaddr *sa;
 	cap_rights_t rights;
 	int error, len;
 	bool connreq;
 
 	if (nam->sa_family != AF_UNIX)
 		return (EAFNOSUPPORT);
 	if (nam->sa_len > sizeof(struct sockaddr_un))
 		return (EINVAL);
 	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 	if (len <= 0)
 		return (EINVAL);
 	soun = (struct sockaddr_un *)nam;
 	bcopy(soun->sun_path, buf, len);
 	buf[len] = 0;
 
 	error = 0;
 	unp = sotounpcb(so);
 	UNP_PCB_LOCK(unp);
 	for (;;) {
 		/*
 		 * Wait for connection state to stabilize.  If a connection
 		 * already exists, give up.  For datagram sockets, which permit
 		 * multiple consecutive connect(2) calls, upper layers are
 		 * responsible for disconnecting in advance of a subsequent
 		 * connect(2), but this is not synchronized with PCB connection
 		 * state.
 		 *
 		 * Also make sure that no threads are currently attempting to
 		 * lock the peer socket, to ensure that unp_conn cannot
 		 * transition between two valid sockets while locks are dropped.
 		 */
 		if (SOLISTENING(so))
 			error = EOPNOTSUPP;
 		else if (unp->unp_conn != NULL)
 			error = EISCONN;
 		else if ((unp->unp_flags & UNP_CONNECTING) != 0) {
 			error = EALREADY;
 		}
 		if (error != 0) {
 			UNP_PCB_UNLOCK(unp);
 			return (error);
 		}
 		if (unp->unp_pairbusy > 0) {
 			unp->unp_flags |= UNP_WAITING;
 			mtx_sleep(unp, UNP_PCB_LOCKPTR(unp), 0, "unpeer", 0);
 			continue;
 		}
 		break;
 	}
 	unp->unp_flags |= UNP_CONNECTING;
 	UNP_PCB_UNLOCK(unp);
 
 	connreq = (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0;
 	if (connreq)
 		sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	else
 		sa = NULL;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 	    UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_CONNECTAT));
 	error = namei(&nd);
 	if (error)
 		vp = NULL;
 	else
 		vp = nd.ni_vp;
 	ASSERT_VOP_LOCKED(vp, "unp_connect");
 	NDFREE_NOTHING(&nd);
 	if (error)
 		goto bad;
 
 	if (vp->v_type != VSOCK) {
 		error = ENOTSOCK;
 		goto bad;
 	}
 #ifdef MAC
 	error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD);
 	if (error)
 		goto bad;
 #endif
 	error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 	if (error)
 		goto bad;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 
 	vplock = mtx_pool_find(mtxpool_sleep, vp);
 	mtx_lock(vplock);
 	VOP_UNP_CONNECT(vp, &unp2);
 	if (unp2 == NULL) {
 		error = ECONNREFUSED;
 		goto bad2;
 	}
 	so2 = unp2->unp_socket;
 	if (so->so_type != so2->so_type) {
 		error = EPROTOTYPE;
 		goto bad2;
 	}
 	if (connreq) {
 		if (SOLISTENING(so2)) {
 			CURVNET_SET(so2->so_vnet);
 			so2 = sonewconn(so2, 0);
 			CURVNET_RESTORE();
 		} else
 			so2 = NULL;
 		if (so2 == NULL) {
 			error = ECONNREFUSED;
 			goto bad2;
 		}
 		unp3 = sotounpcb(so2);
 		unp_pcb_lock_pair(unp2, unp3);
 		if (unp2->unp_addr != NULL) {
 			bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
 			unp3->unp_addr = (struct sockaddr_un *) sa;
 			sa = NULL;
 		}
 
 		unp_copy_peercred(td, unp3, unp, unp2);
 
 		UNP_PCB_UNLOCK(unp2);
 		unp2 = unp3;
 
 		/*
 		 * It is safe to block on the PCB lock here since unp2 is
 		 * nascent and cannot be connected to any other sockets.
 		 */
 		UNP_PCB_LOCK(unp);
 #ifdef MAC
 		mac_socketpeer_set_from_socket(so, so2);
 		mac_socketpeer_set_from_socket(so2, so);
 #endif
 	} else {
 		unp_pcb_lock_pair(unp, unp2);
 	}
 	KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
 	    sotounpcb(so2) == unp2,
 	    ("%s: unp2 %p so2 %p", __func__, unp2, so2));
 	error = unp_connect2(so, so2, PRU_CONNECT);
 	unp_pcb_unlock_pair(unp, unp2);
 bad2:
 	mtx_unlock(vplock);
 bad:
 	if (vp != NULL) {
 		vput(vp);
 	}
 	free(sa, M_SONAME);
 	UNP_PCB_LOCK(unp);
 	KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
 	    ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
 	unp->unp_flags &= ~UNP_CONNECTING;
 	UNP_PCB_UNLOCK(unp);
 	return (error);
 }
 
 /*
  * Set socket peer credentials at connection time.
  *
  * The client's PCB credentials are copied from its process structure.  The
  * server's PCB credentials are copied from the socket on which it called
  * listen(2).  uipc_listen cached that process's credentials at the time.
  */
 void
 unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
     struct unpcb *server_unp, struct unpcb *listen_unp)
 {
 	cru2xt(td, &client_unp->unp_peercred);
 	client_unp->unp_flags |= UNP_HAVEPC;
 
 	memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
 	    sizeof(server_unp->unp_peercred));
 	server_unp->unp_flags |= UNP_HAVEPC;
 	client_unp->unp_flags |= (listen_unp->unp_flags & UNP_WANTCRED_MASK);
 }
 
 static int
 unp_connect2(struct socket *so, struct socket *so2, int req)
 {
 	struct unpcb *unp;
 	struct unpcb *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect2: unp == NULL"));
 	unp2 = sotounpcb(so2);
 	KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
 
 	UNP_PCB_LOCK_ASSERT(unp);
 	UNP_PCB_LOCK_ASSERT(unp2);
 	KASSERT(unp->unp_conn == NULL,
 	    ("%s: socket %p is already connected", __func__, unp));
 
 	if (so2->so_type != so->so_type)
 		return (EPROTOTYPE);
 	unp->unp_conn = unp2;
 	unp_pcb_hold(unp2);
 	unp_pcb_hold(unp);
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 		UNP_REF_LIST_LOCK();
 		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 		UNP_REF_LIST_UNLOCK();
 		soisconnected(so);
 		break;
 
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		KASSERT(unp2->unp_conn == NULL,
 		    ("%s: socket %p is already connected", __func__, unp2));
 		unp2->unp_conn = unp;
 		if (req == PRU_CONNECT &&
 		    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 			soisconnecting(so);
 		else
 			soisconnected(so);
 		soisconnected(so2);
 		break;
 
 	default:
 		panic("unp_connect2");
 	}
 	return (0);
 }
 
 static void
 unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
 {
 	struct socket *so, *so2;
 #ifdef INVARIANTS
 	struct unpcb *unptmp;
 #endif
 
 	UNP_PCB_LOCK_ASSERT(unp);
 	UNP_PCB_LOCK_ASSERT(unp2);
 	KASSERT(unp->unp_conn == unp2,
 	    ("%s: unpcb %p is not connected to %p", __func__, unp, unp2));
 
 	unp->unp_conn = NULL;
 	so = unp->unp_socket;
 	so2 = unp2->unp_socket;
 	switch (unp->unp_socket->so_type) {
 	case SOCK_DGRAM:
 		UNP_REF_LIST_LOCK();
 #ifdef INVARIANTS
 		LIST_FOREACH(unptmp, &unp2->unp_refs, unp_reflink) {
 			if (unptmp == unp)
 				break;
 		}
 		KASSERT(unptmp != NULL,
 		    ("%s: %p not found in reflist of %p", __func__, unp, unp2));
 #endif
 		LIST_REMOVE(unp, unp_reflink);
 		UNP_REF_LIST_UNLOCK();
 		if (so) {
 			SOCK_LOCK(so);
 			so->so_state &= ~SS_ISCONNECTED;
 			SOCK_UNLOCK(so);
 		}
 		break;
 
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		if (so)
 			soisdisconnected(so);
 		MPASS(unp2->unp_conn == unp);
 		unp2->unp_conn = NULL;
 		if (so2)
 			soisdisconnected(so2);
 		break;
 	}
 
 	if (unp == unp2) {
 		unp_pcb_rele_notlast(unp);
 		if (!unp_pcb_rele(unp))
 			UNP_PCB_UNLOCK(unp);
 	} else {
 		if (!unp_pcb_rele(unp))
 			UNP_PCB_UNLOCK(unp);
 		if (!unp_pcb_rele(unp2))
 			UNP_PCB_UNLOCK(unp2);
 	}
 }
 
 /*
  * unp_pcblist() walks the global list of struct unpcb's to generate a
  * pointer list, bumping the refcount on each unpcb.  It then copies them out
  * sequentially, validating the generation number on each to see if it has
  * been detached.  All of this is necessary because copyout() may sleep on
  * disk I/O.
  */
 static int
 unp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	struct unpcb *unp, **unp_list;
 	unp_gen_t gencnt;
 	struct xunpgen *xug;
 	struct unp_head *head;
 	struct xunpcb *xu;
 	u_int i;
 	int error, n;
 
 	switch ((intptr_t)arg1) {
 	case SOCK_STREAM:
 		head = &unp_shead;
 		break;
 
 	case SOCK_DGRAM:
 		head = &unp_dhead;
 		break;
 
 	case SOCK_SEQPACKET:
 		head = &unp_sphead;
 		break;
 
 	default:
 		panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1);
 	}
 
 	/*
 	 * The process of preparing the PCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		n = unp_count;
 		req->oldidx = 2 * (sizeof *xug)
 			+ (n + n/8) * sizeof(struct xunpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK | M_ZERO);
 	UNP_LINK_RLOCK();
 	gencnt = unp_gencnt;
 	n = unp_count;
 	UNP_LINK_RUNLOCK();
 
 	xug->xug_len = sizeof *xug;
 	xug->xug_count = n;
 	xug->xug_gen = gencnt;
 	xug->xug_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, xug, sizeof *xug);
 	if (error) {
 		free(xug, M_TEMP);
 		return (error);
 	}
 
 	unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 
 	UNP_LINK_RLOCK();
 	for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 	     unp = LIST_NEXT(unp, unp_link)) {
 		UNP_PCB_LOCK(unp);
 		if (unp->unp_gencnt <= gencnt) {
 			if (cr_cansee(req->td->td_ucred,
 			    unp->unp_socket->so_cred)) {
 				UNP_PCB_UNLOCK(unp);
 				continue;
 			}
 			unp_list[i++] = unp;
 			unp_pcb_hold(unp);
 		}
 		UNP_PCB_UNLOCK(unp);
 	}
 	UNP_LINK_RUNLOCK();
 	n = i;			/* In case we lost some during malloc. */
 
 	error = 0;
 	xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 	for (i = 0; i < n; i++) {
 		unp = unp_list[i];
 		UNP_PCB_LOCK(unp);
 		if (unp_pcb_rele(unp))
 			continue;
 
 		if (unp->unp_gencnt <= gencnt) {
 			xu->xu_len = sizeof *xu;
 			xu->xu_unpp = (uintptr_t)unp;
 			/*
 			 * XXX - need more locking here to protect against
 			 * connect/disconnect races for SMP.
 			 */
 			if (unp->unp_addr != NULL)
 				bcopy(unp->unp_addr, &xu->xu_addr,
 				      unp->unp_addr->sun_len);
 			else
 				bzero(&xu->xu_addr, sizeof(xu->xu_addr));
 			if (unp->unp_conn != NULL &&
 			    unp->unp_conn->unp_addr != NULL)
 				bcopy(unp->unp_conn->unp_addr,
 				      &xu->xu_caddr,
 				      unp->unp_conn->unp_addr->sun_len);
 			else
 				bzero(&xu->xu_caddr, sizeof(xu->xu_caddr));
 			xu->unp_vnode = (uintptr_t)unp->unp_vnode;
 			xu->unp_conn = (uintptr_t)unp->unp_conn;
 			xu->xu_firstref = (uintptr_t)LIST_FIRST(&unp->unp_refs);
 			xu->xu_nextref = (uintptr_t)LIST_NEXT(unp, unp_reflink);
 			xu->unp_gencnt = unp->unp_gencnt;
 			sotoxsocket(unp->unp_socket, &xu->xu_socket);
 			UNP_PCB_UNLOCK(unp);
 			error = SYSCTL_OUT(req, xu, sizeof *xu);
 		} else {
 			UNP_PCB_UNLOCK(unp);
 		}
 	}
 	free(xu, M_TEMP);
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		xug->xug_gen = unp_gencnt;
 		xug->xug_sogen = so_gencnt;
 		xug->xug_count = unp_count;
 		error = SYSCTL_OUT(req, xug, sizeof *xug);
 	}
 	free(unp_list, M_TEMP);
 	free(xug, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
     "List of active local datagram sockets");
 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
     "List of active local stream sockets");
 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
     "List of active local seqpacket sockets");
 
 static void
 unp_shutdown(struct unpcb *unp)
 {
 	struct unpcb *unp2;
 	struct socket *so;
 
 	UNP_PCB_LOCK_ASSERT(unp);
 
 	unp2 = unp->unp_conn;
 	if ((unp->unp_socket->so_type == SOCK_STREAM ||
 	    (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) {
 		so = unp2->unp_socket;
 		if (so != NULL)
 			socantrcvmore(so);
 	}
 }
 
 static void
 unp_drop(struct unpcb *unp)
 {
 	struct socket *so;
 	struct unpcb *unp2;
 
 	/*
 	 * Regardless of whether the socket's peer dropped the connection
 	 * with this socket by aborting or disconnecting, POSIX requires
 	 * that ECONNRESET is returned.
 	 */
 
 	UNP_PCB_LOCK(unp);
 	so = unp->unp_socket;
 	if (so)
 		so->so_error = ECONNRESET;
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 		/* Last reference dropped in unp_disconnect(). */
 		unp_pcb_rele_notlast(unp);
 		unp_disconnect(unp, unp2);
 	} else if (!unp_pcb_rele(unp)) {
 		UNP_PCB_UNLOCK(unp);
 	}
 }
 
 static void
 unp_freerights(struct filedescent **fdep, int fdcount)
 {
 	struct file *fp;
 	int i;
 
 	KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		filecaps_free(&fdep[i]->fde_caps);
 		unp_discard(fp);
 	}
 	free(fdep[0], M_FILECAPS);
 }
 
 static int
 unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
 {
 	struct thread *td = curthread;		/* XXX */
 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 	int i;
 	int *fdp;
 	struct filedesc *fdesc = td->td_proc->p_fd;
 	struct filedescent **fdep;
 	void *data;
 	socklen_t clen = control->m_len, datalen;
 	int error, newfds;
 	u_int newlen;
 
 	UNP_LINK_UNLOCK_ASSERT();
 
 	error = 0;
 	if (controlp != NULL) /* controlp == NULL => free control messages */
 		*controlp = NULL;
 	while (cm != NULL) {
 		if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 			error = EINVAL;
 			break;
 		}
 		data = CMSG_DATA(cm);
 		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 		if (cm->cmsg_level == SOL_SOCKET
 		    && cm->cmsg_type == SCM_RIGHTS) {
 			newfds = datalen / sizeof(*fdep);
 			if (newfds == 0)
 				goto next;
 			fdep = data;
 
 			/* If we're not outputting the descriptors free them. */
 			if (error || controlp == NULL) {
 				unp_freerights(fdep, newfds);
 				goto next;
 			}
 			FILEDESC_XLOCK(fdesc);
 
 			/*
 			 * Now change each pointer to an fd in the global
 			 * table to an integer that is the index to the local
 			 * fd table entry that we set up to point to the
 			 * global one we are transferring.
 			 */
 			newlen = newfds * sizeof(int);
 			*controlp = sbcreatecontrol(NULL, newlen,
 			    SCM_RIGHTS, SOL_SOCKET);
 			if (*controlp == NULL) {
 				FILEDESC_XUNLOCK(fdesc);
 				error = E2BIG;
 				unp_freerights(fdep, newfds);
 				goto next;
 			}
 
 			fdp = (int *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			if (fdallocn(td, 0, fdp, newfds) != 0) {
 				FILEDESC_XUNLOCK(fdesc);
 				error = EMSGSIZE;
 				unp_freerights(fdep, newfds);
 				m_freem(*controlp);
 				*controlp = NULL;
 				goto next;
 			}
 			for (i = 0; i < newfds; i++, fdp++) {
 				_finstall(fdesc, fdep[i]->fde_file, *fdp,
 				    (flags & MSG_CMSG_CLOEXEC) != 0 ? O_CLOEXEC : 0,
 				    &fdep[i]->fde_caps);
 				unp_externalize_fp(fdep[i]->fde_file);
 			}
 
 			/*
 			 * The new type indicates that the mbuf data refers to
 			 * kernel resources that may need to be released before
 			 * the mbuf is freed.
 			 */
 			m_chtype(*controlp, MT_EXTCONTROL);
 			FILEDESC_XUNLOCK(fdesc);
 			free(fdep[0], M_FILECAPS);
 		} else {
 			/* We can just copy anything else across. */
 			if (error || controlp == NULL)
 				goto next;
 			*controlp = sbcreatecontrol(NULL, datalen,
 			    cm->cmsg_type, cm->cmsg_level);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto next;
 			}
 			bcopy(data,
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 			    datalen);
 		}
 		controlp = &(*controlp)->m_next;
 
 next:
 		if (CMSG_SPACE(datalen) < clen) {
 			clen -= CMSG_SPACE(datalen);
 			cm = (struct cmsghdr *)
 			    ((caddr_t)cm + CMSG_SPACE(datalen));
 		} else {
 			clen = 0;
 			cm = NULL;
 		}
 	}
 
 	m_freem(control);
 	return (error);
 }
 
 static void
 unp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(unp_zone, maxsockets);
 }
 
 #ifdef INVARIANTS
 static void
 unp_zdtor(void *mem, int size __unused, void *arg __unused)
 {
 	struct unpcb *unp;
 
 	unp = mem;
 
 	KASSERT(LIST_EMPTY(&unp->unp_refs),
 	    ("%s: unpcb %p has lingering refs", __func__, unp));
 	KASSERT(unp->unp_socket == NULL,
 	    ("%s: unpcb %p has socket backpointer", __func__, unp));
 	KASSERT(unp->unp_vnode == NULL,
 	    ("%s: unpcb %p has vnode references", __func__, unp));
 	KASSERT(unp->unp_conn == NULL,
 	    ("%s: unpcb %p is still connected", __func__, unp));
 	KASSERT(unp->unp_addr == NULL,
 	    ("%s: unpcb %p has leaked addr", __func__, unp));
 }
 #endif
 
 static void
 unp_init(void *arg __unused)
 {
 	uma_dtor dtor;
 
 #ifdef INVARIANTS
 	dtor = unp_zdtor;
 #else
 	dtor = NULL;
 #endif
 	unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, dtor,
 	    NULL, NULL, UMA_ALIGN_CACHE, 0);
 	uma_zone_set_max(unp_zone, maxsockets);
 	uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
 	    NULL, EVENTHANDLER_PRI_ANY);
 	LIST_INIT(&unp_dhead);
 	LIST_INIT(&unp_shead);
 	LIST_INIT(&unp_sphead);
 	SLIST_INIT(&unp_defers);
 	TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
 	TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
 	UNP_LINK_LOCK_INIT();
 	UNP_DEFERRED_LOCK_INIT();
 }
 SYSINIT(unp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, unp_init, NULL);
 
 static void
 unp_internalize_cleanup_rights(struct mbuf *control)
 {
 	struct cmsghdr *cp;
 	struct mbuf *m;
 	void *data;
 	socklen_t datalen;
 
 	for (m = control; m != NULL; m = m->m_next) {
 		cp = mtod(m, struct cmsghdr *);
 		if (cp->cmsg_level != SOL_SOCKET ||
 		    cp->cmsg_type != SCM_RIGHTS)
 			continue;
 		data = CMSG_DATA(cp);
 		datalen = (caddr_t)cp + cp->cmsg_len - (caddr_t)data;
 		unp_freerights(data, datalen / sizeof(struct filedesc *));
 	}
 }
 
 static int
 unp_internalize(struct mbuf **controlp, struct thread *td)
 {
 	struct mbuf *control, **initial_controlp;
 	struct proc *p;
 	struct filedesc *fdesc;
 	struct bintime *bt;
 	struct cmsghdr *cm;
 	struct cmsgcred *cmcred;
 	struct filedescent *fde, **fdep, *fdev;
 	struct file *fp;
 	struct timeval *tv;
 	struct timespec *ts;
 	void *data;
 	socklen_t clen, datalen;
 	int i, j, error, *fdp, oldfds;
 	u_int newlen;
 
 	UNP_LINK_UNLOCK_ASSERT();
 
 	p = td->td_proc;
 	fdesc = p->p_fd;
 	error = 0;
 	control = *controlp;
 	clen = control->m_len;
 	*controlp = NULL;
 	initial_controlp = controlp;
 	for (cm = mtod(control, struct cmsghdr *); cm != NULL;) {
 		if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 		    || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
 			error = EINVAL;
 			goto out;
 		}
 		data = CMSG_DATA(cm);
 		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 
 		switch (cm->cmsg_type) {
 		/*
 		 * Fill in credential information.
 		 */
 		case SCM_CREDS:
 			*controlp = sbcreatecontrol_how(NULL, sizeof(*cmcred),
 			    SCM_CREDS, SOL_SOCKET, M_WAITOK);
 			cmcred = (struct cmsgcred *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			cmcred->cmcred_pid = p->p_pid;
 			cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 			cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 			cmcred->cmcred_euid = td->td_ucred->cr_uid;
 			cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 			    CMGROUP_MAX);
 			for (i = 0; i < cmcred->cmcred_ngroups; i++)
 				cmcred->cmcred_groups[i] =
 				    td->td_ucred->cr_groups[i];
 			break;
 
 		case SCM_RIGHTS:
 			oldfds = datalen / sizeof (int);
 			if (oldfds == 0)
 				break;
 			/*
 			 * Check that all the FDs passed in refer to legal
 			 * files.  If not, reject the entire operation.
 			 */
 			fdp = data;
 			FILEDESC_SLOCK(fdesc);
 			for (i = 0; i < oldfds; i++, fdp++) {
 				fp = fget_noref(fdesc, *fdp);
 				if (fp == NULL) {
 					FILEDESC_SUNLOCK(fdesc);
 					error = EBADF;
 					goto out;
 				}
 				if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 					FILEDESC_SUNLOCK(fdesc);
 					error = EOPNOTSUPP;
 					goto out;
 				}
 			}
 
 			/*
 			 * Now replace the integer FDs with pointers to the
 			 * file structure and capability rights.
 			 */
 			newlen = oldfds * sizeof(fdep[0]);
 			*controlp = sbcreatecontrol_how(NULL, newlen,
 			    SCM_RIGHTS, SOL_SOCKET, M_WAITOK);
 			fdp = data;
 			for (i = 0; i < oldfds; i++, fdp++) {
 				if (!fhold(fdesc->fd_ofiles[*fdp].fde_file)) {
 					fdp = data;
 					for (j = 0; j < i; j++, fdp++) {
 						fdrop(fdesc->fd_ofiles[*fdp].
 						    fde_file, td);
 					}
 					FILEDESC_SUNLOCK(fdesc);
 					error = EBADF;
 					goto out;
 				}
 			}
 			fdp = data;
 			fdep = (struct filedescent **)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
 			    M_WAITOK);
 			for (i = 0; i < oldfds; i++, fdev++, fdp++) {
 				fde = &fdesc->fd_ofiles[*fdp];
 				fdep[i] = fdev;
 				fdep[i]->fde_file = fde->fde_file;
 				filecaps_copy(&fde->fde_caps,
 				    &fdep[i]->fde_caps, true);
 				unp_internalize_fp(fdep[i]->fde_file);
 			}
 			FILEDESC_SUNLOCK(fdesc);
 			break;
 
 		case SCM_TIMESTAMP:
 			*controlp = sbcreatecontrol_how(NULL, sizeof(*tv),
 			    SCM_TIMESTAMP, SOL_SOCKET, M_WAITOK);
 			tv = (struct timeval *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			microtime(tv);
 			break;
 
 		case SCM_BINTIME:
 			*controlp = sbcreatecontrol_how(NULL, sizeof(*bt),
 			    SCM_BINTIME, SOL_SOCKET, M_WAITOK);
 			bt = (struct bintime *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			bintime(bt);
 			break;
 
 		case SCM_REALTIME:
 			*controlp = sbcreatecontrol_how(NULL, sizeof(*ts),
 			    SCM_REALTIME, SOL_SOCKET, M_WAITOK);
 			ts = (struct timespec *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			nanotime(ts);
 			break;
 
 		case SCM_MONOTONIC:
 			*controlp = sbcreatecontrol_how(NULL, sizeof(*ts),
 			    SCM_MONOTONIC, SOL_SOCKET, M_WAITOK);
 			ts = (struct timespec *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			nanouptime(ts);
 			break;
 
 		default:
 			error = EINVAL;
 			goto out;
 		}
 
 		if (*controlp != NULL)
 			controlp = &(*controlp)->m_next;
 		if (CMSG_SPACE(datalen) < clen) {
 			clen -= CMSG_SPACE(datalen);
 			cm = (struct cmsghdr *)
 			    ((caddr_t)cm + CMSG_SPACE(datalen));
 		} else {
 			clen = 0;
 			cm = NULL;
 		}
 	}
 
 out:
 	if (error != 0 && initial_controlp != NULL)
 		unp_internalize_cleanup_rights(*initial_controlp);
 	m_freem(control);
 	return (error);
 }
 
 static struct mbuf *
 unp_addsockcred(struct thread *td, struct mbuf *control, int mode)
 {
 	struct mbuf *m, *n, *n_prev;
 	const struct cmsghdr *cm;
 	int ngroups, i, cmsgtype;
 	size_t ctrlsz;
 
 	ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
 	if (mode & UNP_WANTCRED_ALWAYS) {
 		ctrlsz = SOCKCRED2SIZE(ngroups);
 		cmsgtype = SCM_CREDS2;
 	} else {
 		ctrlsz = SOCKCREDSIZE(ngroups);
 		cmsgtype = SCM_CREDS;
 	}
 
 	m = sbcreatecontrol(NULL, ctrlsz, cmsgtype, SOL_SOCKET);
 	if (m == NULL)
 		return (control);
 
 	if (mode & UNP_WANTCRED_ALWAYS) {
 		struct sockcred2 *sc;
 
 		sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 		sc->sc_version = 0;
 		sc->sc_pid = td->td_proc->p_pid;
 		sc->sc_uid = td->td_ucred->cr_ruid;
 		sc->sc_euid = td->td_ucred->cr_uid;
 		sc->sc_gid = td->td_ucred->cr_rgid;
 		sc->sc_egid = td->td_ucred->cr_gid;
 		sc->sc_ngroups = ngroups;
 		for (i = 0; i < sc->sc_ngroups; i++)
 			sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 	} else {
 		struct sockcred *sc;
 
 		sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 		sc->sc_uid = td->td_ucred->cr_ruid;
 		sc->sc_euid = td->td_ucred->cr_uid;
 		sc->sc_gid = td->td_ucred->cr_rgid;
 		sc->sc_egid = td->td_ucred->cr_gid;
 		sc->sc_ngroups = ngroups;
 		for (i = 0; i < sc->sc_ngroups; i++)
 			sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 	}
 
 	/*
 	 * Unlink SCM_CREDS control messages (struct cmsgcred), since just
 	 * created SCM_CREDS control message (struct sockcred) has another
 	 * format.
 	 */
 	if (control != NULL && cmsgtype == SCM_CREDS)
 		for (n = control, n_prev = NULL; n != NULL;) {
 			cm = mtod(n, struct cmsghdr *);
     			if (cm->cmsg_level == SOL_SOCKET &&
 			    cm->cmsg_type == SCM_CREDS) {
     				if (n_prev == NULL)
 					control = n->m_next;
 				else
 					n_prev->m_next = n->m_next;
 				n = m_free(n);
 			} else {
 				n_prev = n;
 				n = n->m_next;
 			}
 		}
 
 	/* Prepend it to the head. */
 	m->m_next = control;
 	return (m);
 }
 
 static struct unpcb *
 fptounp(struct file *fp)
 {
 	struct socket *so;
 
 	if (fp->f_type != DTYPE_SOCKET)
 		return (NULL);
 	if ((so = fp->f_data) == NULL)
 		return (NULL);
 	if (so->so_proto->pr_domain != &localdomain)
 		return (NULL);
 	return sotounpcb(so);
 }
 
 static void
 unp_discard(struct file *fp)
 {
 	struct unp_defer *dr;
 
 	if (unp_externalize_fp(fp)) {
 		dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK);
 		dr->ud_fp = fp;
 		UNP_DEFERRED_LOCK();
 		SLIST_INSERT_HEAD(&unp_defers, dr, ud_link);
 		UNP_DEFERRED_UNLOCK();
 		atomic_add_int(&unp_defers_count, 1);
 		taskqueue_enqueue(taskqueue_thread, &unp_defer_task);
 	} else
 		closef_nothread(fp);
 }
 
 static void
 unp_process_defers(void *arg __unused, int pending)
 {
 	struct unp_defer *dr;
 	SLIST_HEAD(, unp_defer) drl;
 	int count;
 
 	SLIST_INIT(&drl);
 	for (;;) {
 		UNP_DEFERRED_LOCK();
 		if (SLIST_FIRST(&unp_defers) == NULL) {
 			UNP_DEFERRED_UNLOCK();
 			break;
 		}
 		SLIST_SWAP(&unp_defers, &drl, unp_defer);
 		UNP_DEFERRED_UNLOCK();
 		count = 0;
 		while ((dr = SLIST_FIRST(&drl)) != NULL) {
 			SLIST_REMOVE_HEAD(&drl, ud_link);
 			closef_nothread(dr->ud_fp);
 			free(dr, M_TEMP);
 			count++;
 		}
 		atomic_add_int(&unp_defers_count, -count);
 	}
 }
 
 static void
 unp_internalize_fp(struct file *fp)
 {
 	struct unpcb *unp;
 
 	UNP_LINK_WLOCK();
 	if ((unp = fptounp(fp)) != NULL) {
 		unp->unp_file = fp;
 		unp->unp_msgcount++;
 	}
 	unp_rights++;
 	UNP_LINK_WUNLOCK();
 }
 
 static int
 unp_externalize_fp(struct file *fp)
 {
 	struct unpcb *unp;
 	int ret;
 
 	UNP_LINK_WLOCK();
 	if ((unp = fptounp(fp)) != NULL) {
 		unp->unp_msgcount--;
 		ret = 1;
 	} else
 		ret = 0;
 	unp_rights--;
 	UNP_LINK_WUNLOCK();
 	return (ret);
 }
 
 /*
  * unp_defer indicates whether additional work has been defered for a future
  * pass through unp_gc().  It is thread local and does not require explicit
  * synchronization.
  */
 static int	unp_marked;
 
 static void
 unp_remove_dead_ref(struct filedescent **fdep, int fdcount)
 {
 	struct unpcb *unp;
 	struct file *fp;
 	int i;
 
 	/*
 	 * This function can only be called from the gc task.
 	 */
 	KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 	    ("%s: not on gc callout", __func__));
 	UNP_LINK_LOCK_ASSERT();
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		if ((unp = fptounp(fp)) == NULL)
 			continue;
 		if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 			continue;
 		unp->unp_gcrefs--;
 	}
 }
 
 static void
 unp_restore_undead_ref(struct filedescent **fdep, int fdcount)
 {
 	struct unpcb *unp;
 	struct file *fp;
 	int i;
 
 	/*
 	 * This function can only be called from the gc task.
 	 */
 	KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 	    ("%s: not on gc callout", __func__));
 	UNP_LINK_LOCK_ASSERT();
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		if ((unp = fptounp(fp)) == NULL)
 			continue;
 		if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 			continue;
 		unp->unp_gcrefs++;
 		unp_marked++;
 	}
 }
 
 static void
 unp_gc_scan(struct unpcb *unp, void (*op)(struct filedescent **, int))
 {
 	struct socket *so, *soa;
 
 	so = unp->unp_socket;
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		/*
 		 * Mark all sockets in our accept queue.
 		 */
 		TAILQ_FOREACH(soa, &so->sol_comp, so_list) {
 			if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
 				continue;
 			SOCKBUF_LOCK(&soa->so_rcv);
 			unp_scan(soa->so_rcv.sb_mb, op);
 			SOCKBUF_UNLOCK(&soa->so_rcv);
 		}
 	} else {
 		/*
 		 * Mark all sockets we reference with RIGHTS.
 		 */
 		if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
 			SOCKBUF_LOCK(&so->so_rcv);
 			unp_scan(so->so_rcv.sb_mb, op);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 		}
 	}
 	SOCK_UNLOCK(so);
 }
 
 static int unp_recycled;
 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 
     "Number of unreachable sockets claimed by the garbage collector.");
 
 static int unp_taskcount;
 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 
     "Number of times the garbage collector has run.");
 
 SYSCTL_UINT(_net_local, OID_AUTO, sockcount, CTLFLAG_RD, &unp_count, 0, 
     "Number of active local sockets.");
 
 static void
 unp_gc(__unused void *arg, int pending)
 {
 	struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
 				    NULL };
 	struct unp_head **head;
 	struct unp_head unp_deadhead;	/* List of potentially-dead sockets. */
 	struct file *f, **unref;
 	struct unpcb *unp, *unptmp;
 	int i, total, unp_unreachable;
 
 	LIST_INIT(&unp_deadhead);
 	unp_taskcount++;
 	UNP_LINK_RLOCK();
 	/*
 	 * First determine which sockets may be in cycles.
 	 */
 	unp_unreachable = 0;
 
 	for (head = heads; *head != NULL; head++)
 		LIST_FOREACH(unp, *head, unp_link) {
 			KASSERT((unp->unp_gcflag & ~UNPGC_IGNORE_RIGHTS) == 0,
 			    ("%s: unp %p has unexpected gc flags 0x%x",
 			    __func__, unp, (unsigned int)unp->unp_gcflag));
 
 			f = unp->unp_file;
 
 			/*
 			 * Check for an unreachable socket potentially in a
 			 * cycle.  It must be in a queue as indicated by
 			 * msgcount, and this must equal the file reference
 			 * count.  Note that when msgcount is 0 the file is
 			 * NULL.
 			 */
 			if (f != NULL && unp->unp_msgcount != 0 &&
 			    refcount_load(&f->f_count) == unp->unp_msgcount) {
 				LIST_INSERT_HEAD(&unp_deadhead, unp, unp_dead);
 				unp->unp_gcflag |= UNPGC_DEAD;
 				unp->unp_gcrefs = unp->unp_msgcount;
 				unp_unreachable++;
 			}
 		}
 
 	/*
 	 * Scan all sockets previously marked as potentially being in a cycle
 	 * and remove the references each socket holds on any UNPGC_DEAD
 	 * sockets in its queue.  After this step, all remaining references on
 	 * sockets marked UNPGC_DEAD should not be part of any cycle.
 	 */
 	LIST_FOREACH(unp, &unp_deadhead, unp_dead)
 		unp_gc_scan(unp, unp_remove_dead_ref);
 
 	/*
 	 * If a socket still has a non-negative refcount, it cannot be in a
 	 * cycle.  In this case increment refcount of all children iteratively.
 	 * Stop the scan once we do a complete loop without discovering
 	 * a new reachable socket.
 	 */
 	do {
 		unp_marked = 0;
 		LIST_FOREACH_SAFE(unp, &unp_deadhead, unp_dead, unptmp)
 			if (unp->unp_gcrefs > 0) {
 				unp->unp_gcflag &= ~UNPGC_DEAD;
 				LIST_REMOVE(unp, unp_dead);
 				KASSERT(unp_unreachable > 0,
 				    ("%s: unp_unreachable underflow.",
 				    __func__));
 				unp_unreachable--;
 				unp_gc_scan(unp, unp_restore_undead_ref);
 			}
 	} while (unp_marked);
 
 	UNP_LINK_RUNLOCK();
 
 	if (unp_unreachable == 0)
 		return;
 
 	/*
 	 * Allocate space for a local array of dead unpcbs.
 	 * TODO: can this path be simplified by instead using the local
 	 * dead list at unp_deadhead, after taking out references
 	 * on the file object and/or unpcb and dropping the link lock?
 	 */
 	unref = malloc(unp_unreachable * sizeof(struct file *),
 	    M_TEMP, M_WAITOK);
 
 	/*
 	 * Iterate looking for sockets which have been specifically marked
 	 * as unreachable and store them locally.
 	 */
 	UNP_LINK_RLOCK();
 	total = 0;
 	LIST_FOREACH(unp, &unp_deadhead, unp_dead) {
 		KASSERT((unp->unp_gcflag & UNPGC_DEAD) != 0,
 		    ("%s: unp %p not marked UNPGC_DEAD", __func__, unp));
 		unp->unp_gcflag &= ~UNPGC_DEAD;
 		f = unp->unp_file;
 		if (unp->unp_msgcount == 0 || f == NULL ||
 		    refcount_load(&f->f_count) != unp->unp_msgcount ||
 		    !fhold(f))
 			continue;
 		unref[total++] = f;
 		KASSERT(total <= unp_unreachable,
 		    ("%s: incorrect unreachable count.", __func__));
 	}
 	UNP_LINK_RUNLOCK();
 
 	/*
 	 * Now flush all sockets, free'ing rights.  This will free the
 	 * struct files associated with these sockets but leave each socket
 	 * with one remaining ref.
 	 */
 	for (i = 0; i < total; i++) {
 		struct socket *so;
 
 		so = unref[i]->f_data;
 		CURVNET_SET(so->so_vnet);
 		sorflush(so);
 		CURVNET_RESTORE();
 	}
 
 	/*
 	 * And finally release the sockets so they can be reclaimed.
 	 */
 	for (i = 0; i < total; i++)
 		fdrop(unref[i], NULL);
 	unp_recycled += total;
 	free(unref, M_TEMP);
 }
 
 static void
 unp_dispose_mbuf(struct mbuf *m)
 {
 
 	if (m)
 		unp_scan(m, unp_freerights);
 }
 
 /*
  * Synchronize against unp_gc, which can trip over data as we are freeing it.
  */
 static void
 unp_dispose(struct socket *so)
 {
 	struct sockbuf *sb = &so->so_rcv;
 	struct unpcb *unp;
 	struct mbuf *m;
 
 	MPASS(!SOLISTENING(so));
 
 	unp = sotounpcb(so);
 	UNP_LINK_WLOCK();
 	unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
 	UNP_LINK_WUNLOCK();
 
 	/*
 	 * Grab our special mbufs before calling sbrelease().
 	 */
 	SOCK_RECVBUF_LOCK(so);
 	m = sbcut_locked(sb, sb->sb_ccc);
 	KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
 	    ("%s: ccc %u mb %p mbcnt %u", __func__,
 	    sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
-	sbrelease_locked(sb, so);
+	sbrelease_locked(so, SO_RCV);
 	SOCK_RECVBUF_UNLOCK(so);
 	if (SOCK_IO_RECV_OWNED(so))
 		SOCK_IO_RECV_UNLOCK(so);
 
 	unp_dispose_mbuf(m);
 }
 
 static void
 unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
 {
 	struct mbuf *m;
 	struct cmsghdr *cm;
 	void *data;
 	socklen_t clen, datalen;
 
 	while (m0 != NULL) {
 		for (m = m0; m; m = m->m_next) {
 			if (m->m_type != MT_CONTROL)
 				continue;
 
 			cm = mtod(m, struct cmsghdr *);
 			clen = m->m_len;
 
 			while (cm != NULL) {
 				if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 					break;
 
 				data = CMSG_DATA(cm);
 				datalen = (caddr_t)cm + cm->cmsg_len
 				    - (caddr_t)data;
 
 				if (cm->cmsg_level == SOL_SOCKET &&
 				    cm->cmsg_type == SCM_RIGHTS) {
 					(*op)(data, datalen /
 					    sizeof(struct filedescent *));
 				}
 
 				if (CMSG_SPACE(datalen) < clen) {
 					clen -= CMSG_SPACE(datalen);
 					cm = (struct cmsghdr *)
 					    ((caddr_t)cm + CMSG_SPACE(datalen));
 				} else {
 					clen = 0;
 					cm = NULL;
 				}
 			}
 		}
 		m0 = m0->m_nextpkt;
 	}
 }
 
 /*
  * A helper function called by VFS before socket-type vnode reclamation.
  * For an active vnode it clears unp_vnode pointer and decrements unp_vnode
  * use count.
  */
 void
 vfs_unp_reclaim(struct vnode *vp)
 {
 	struct unpcb *unp;
 	int active;
 	struct mtx *vplock;
 
 	ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
 	KASSERT(vp->v_type == VSOCK,
 	    ("vfs_unp_reclaim: vp->v_type != VSOCK"));
 
 	active = 0;
 	vplock = mtx_pool_find(mtxpool_sleep, vp);
 	mtx_lock(vplock);
 	VOP_UNP_CONNECT(vp, &unp);
 	if (unp == NULL)
 		goto done;
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode == vp) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 		active = 1;
 	}
 	UNP_PCB_UNLOCK(unp);
  done:
 	mtx_unlock(vplock);
 	if (active)
 		vunref(vp);
 }
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_unpflags(int unp_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (unp_flags & UNP_HAVEPC) {
 		db_printf("%sUNP_HAVEPC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_WANTCRED_ALWAYS) {
 		db_printf("%sUNP_WANTCRED_ALWAYS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_WANTCRED_ONESHOT) {
 		db_printf("%sUNP_WANTCRED_ONESHOT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_CONNWAIT) {
 		db_printf("%sUNP_CONNWAIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_CONNECTING) {
 		db_printf("%sUNP_CONNECTING", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_BINDING) {
 		db_printf("%sUNP_BINDING", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_xucred(int indent, struct xucred *xu)
 {
 	int comma, i;
 
 	db_print_indent(indent);
 	db_printf("cr_version: %u   cr_uid: %u   cr_pid: %d   cr_ngroups: %d\n",
 	    xu->cr_version, xu->cr_uid, xu->cr_pid, xu->cr_ngroups);
 	db_print_indent(indent);
 	db_printf("cr_groups: ");
 	comma = 0;
 	for (i = 0; i < xu->cr_ngroups; i++) {
 		db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]);
 		comma = 1;
 	}
 	db_printf("\n");
 }
 
 static void
 db_print_unprefs(int indent, struct unp_head *uh)
 {
 	struct unpcb *unp;
 	int counter;
 
 	counter = 0;
 	LIST_FOREACH(unp, uh, unp_reflink) {
 		if (counter % 4 == 0)
 			db_print_indent(indent);
 		db_printf("%p  ", unp);
 		if (counter % 4 == 3)
 			db_printf("\n");
 		counter++;
 	}
 	if (counter != 0 && counter % 4 != 0)
 		db_printf("\n");
 }
 
 DB_SHOW_COMMAND(unpcb, db_show_unpcb)
 {
 	struct unpcb *unp;
 
         if (!have_addr) {
                 db_printf("usage: show unpcb <addr>\n");
                 return;
         }
         unp = (struct unpcb *)addr;
 
 	db_printf("unp_socket: %p   unp_vnode: %p\n", unp->unp_socket,
 	    unp->unp_vnode);
 
 	db_printf("unp_ino: %ju   unp_conn: %p\n", (uintmax_t)unp->unp_ino,
 	    unp->unp_conn);
 
 	db_printf("unp_refs:\n");
 	db_print_unprefs(2, &unp->unp_refs);
 
 	/* XXXRW: Would be nice to print the full address, if any. */
 	db_printf("unp_addr: %p\n", unp->unp_addr);
 
 	db_printf("unp_gencnt: %llu\n",
 	    (unsigned long long)unp->unp_gencnt);
 
 	db_printf("unp_flags: %x (", unp->unp_flags);
 	db_print_unpflags(unp->unp_flags);
 	db_printf(")\n");
 
 	db_printf("unp_peercred:\n");
 	db_print_xucred(2, &unp->unp_peercred);
 
 	db_printf("unp_refcount: %u\n", unp->unp_refcount);
 }
 #endif
diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c
index 9a8927160441..9e2e70313be3 100644
--- a/sys/netinet/sctp_output.c
+++ b/sys/netinet/sctp_output.c
@@ -1,13809 +1,13809 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
  * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
  * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * a) Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  *
  * b) Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the distribution.
  *
  * c) Neither the name of Cisco Systems, Inc. nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <netinet/sctp_os.h>
 #include <sys/proc.h>
 #include <netinet/sctp_var.h>
 #include <netinet/sctp_sysctl.h>
 #include <netinet/sctp_header.h>
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctputil.h>
 #include <netinet/sctp_output.h>
 #include <netinet/sctp_uio.h>
 #include <netinet/sctputil.h>
 #include <netinet/sctp_auth.h>
 #include <netinet/sctp_timer.h>
 #include <netinet/sctp_asconf.h>
 #include <netinet/sctp_indata.h>
 #include <netinet/sctp_bsd_addr.h>
 #include <netinet/sctp_input.h>
 #include <netinet/sctp_crc32.h>
 #include <netinet/sctp_kdtrace.h>
 #if defined(INET) || defined(INET6)
 #include <netinet/udp.h>
 #endif
 #include <netinet/udp_var.h>
 #include <machine/in_cksum.h>
 
 #define SCTP_MAX_GAPS_INARRAY 4
 struct sack_track {
 	uint8_t right_edge;	/* mergable on the right edge */
 	uint8_t left_edge;	/* mergable on the left edge */
 	uint8_t num_entries;
 	uint8_t spare;
 	struct sctp_gap_ack_block gaps[SCTP_MAX_GAPS_INARRAY];
 };
 
 const struct sack_track sack_array[256] = {
 	{0, 0, 0, 0,		/* 0x00 */
 		{{0, 0},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x01 */
 		{{0, 0},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x02 */
 		{{1, 1},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x03 */
 		{{0, 1},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x04 */
 		{{2, 2},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x05 */
 		{{0, 0},
 		{2, 2},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x06 */
 		{{1, 2},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x07 */
 		{{0, 2},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x08 */
 		{{3, 3},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x09 */
 		{{0, 0},
 		{3, 3},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x0a */
 		{{1, 1},
 		{3, 3},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x0b */
 		{{0, 1},
 		{3, 3},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x0c */
 		{{2, 3},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x0d */
 		{{0, 0},
 		{2, 3},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x0e */
 		{{1, 3},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x0f */
 		{{0, 3},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x10 */
 		{{4, 4},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x11 */
 		{{0, 0},
 		{4, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x12 */
 		{{1, 1},
 		{4, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x13 */
 		{{0, 1},
 		{4, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x14 */
 		{{2, 2},
 		{4, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x15 */
 		{{0, 0},
 		{2, 2},
 		{4, 4},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x16 */
 		{{1, 2},
 		{4, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x17 */
 		{{0, 2},
 		{4, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x18 */
 		{{3, 4},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x19 */
 		{{0, 0},
 		{3, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x1a */
 		{{1, 1},
 		{3, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x1b */
 		{{0, 1},
 		{3, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x1c */
 		{{2, 4},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x1d */
 		{{0, 0},
 		{2, 4},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x1e */
 		{{1, 4},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x1f */
 		{{0, 4},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x20 */
 		{{5, 5},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x21 */
 		{{0, 0},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x22 */
 		{{1, 1},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x23 */
 		{{0, 1},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x24 */
 		{{2, 2},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x25 */
 		{{0, 0},
 		{2, 2},
 		{5, 5},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x26 */
 		{{1, 2},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x27 */
 		{{0, 2},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x28 */
 		{{3, 3},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x29 */
 		{{0, 0},
 		{3, 3},
 		{5, 5},
 		{0, 0}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x2a */
 		{{1, 1},
 		{3, 3},
 		{5, 5},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x2b */
 		{{0, 1},
 		{3, 3},
 		{5, 5},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x2c */
 		{{2, 3},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x2d */
 		{{0, 0},
 		{2, 3},
 		{5, 5},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x2e */
 		{{1, 3},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x2f */
 		{{0, 3},
 		{5, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x30 */
 		{{4, 5},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x31 */
 		{{0, 0},
 		{4, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x32 */
 		{{1, 1},
 		{4, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x33 */
 		{{0, 1},
 		{4, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x34 */
 		{{2, 2},
 		{4, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x35 */
 		{{0, 0},
 		{2, 2},
 		{4, 5},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x36 */
 		{{1, 2},
 		{4, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x37 */
 		{{0, 2},
 		{4, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x38 */
 		{{3, 5},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x39 */
 		{{0, 0},
 		{3, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x3a */
 		{{1, 1},
 		{3, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x3b */
 		{{0, 1},
 		{3, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x3c */
 		{{2, 5},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x3d */
 		{{0, 0},
 		{2, 5},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x3e */
 		{{1, 5},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x3f */
 		{{0, 5},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x40 */
 		{{6, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x41 */
 		{{0, 0},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x42 */
 		{{1, 1},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x43 */
 		{{0, 1},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x44 */
 		{{2, 2},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x45 */
 		{{0, 0},
 		{2, 2},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x46 */
 		{{1, 2},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x47 */
 		{{0, 2},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x48 */
 		{{3, 3},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x49 */
 		{{0, 0},
 		{3, 3},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x4a */
 		{{1, 1},
 		{3, 3},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x4b */
 		{{0, 1},
 		{3, 3},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x4c */
 		{{2, 3},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x4d */
 		{{0, 0},
 		{2, 3},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x4e */
 		{{1, 3},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x4f */
 		{{0, 3},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x50 */
 		{{4, 4},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x51 */
 		{{0, 0},
 		{4, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x52 */
 		{{1, 1},
 		{4, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x53 */
 		{{0, 1},
 		{4, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x54 */
 		{{2, 2},
 		{4, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{1, 0, 4, 0,		/* 0x55 */
 		{{0, 0},
 		{2, 2},
 		{4, 4},
 		{6, 6}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x56 */
 		{{1, 2},
 		{4, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x57 */
 		{{0, 2},
 		{4, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x58 */
 		{{3, 4},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x59 */
 		{{0, 0},
 		{3, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x5a */
 		{{1, 1},
 		{3, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x5b */
 		{{0, 1},
 		{3, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x5c */
 		{{2, 4},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x5d */
 		{{0, 0},
 		{2, 4},
 		{6, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x5e */
 		{{1, 4},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x5f */
 		{{0, 4},
 		{6, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x60 */
 		{{5, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x61 */
 		{{0, 0},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x62 */
 		{{1, 1},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x63 */
 		{{0, 1},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x64 */
 		{{2, 2},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x65 */
 		{{0, 0},
 		{2, 2},
 		{5, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x66 */
 		{{1, 2},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x67 */
 		{{0, 2},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x68 */
 		{{3, 3},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x69 */
 		{{0, 0},
 		{3, 3},
 		{5, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 3, 0,		/* 0x6a */
 		{{1, 1},
 		{3, 3},
 		{5, 6},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x6b */
 		{{0, 1},
 		{3, 3},
 		{5, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x6c */
 		{{2, 3},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x6d */
 		{{0, 0},
 		{2, 3},
 		{5, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x6e */
 		{{1, 3},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x6f */
 		{{0, 3},
 		{5, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x70 */
 		{{4, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x71 */
 		{{0, 0},
 		{4, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x72 */
 		{{1, 1},
 		{4, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x73 */
 		{{0, 1},
 		{4, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x74 */
 		{{2, 2},
 		{4, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 3, 0,		/* 0x75 */
 		{{0, 0},
 		{2, 2},
 		{4, 6},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x76 */
 		{{1, 2},
 		{4, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x77 */
 		{{0, 2},
 		{4, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x78 */
 		{{3, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x79 */
 		{{0, 0},
 		{3, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 2, 0,		/* 0x7a */
 		{{1, 1},
 		{3, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x7b */
 		{{0, 1},
 		{3, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x7c */
 		{{2, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 2, 0,		/* 0x7d */
 		{{0, 0},
 		{2, 6},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 0, 1, 0,		/* 0x7e */
 		{{1, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 0, 1, 0,		/* 0x7f */
 		{{0, 6},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0x80 */
 		{{7, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0x81 */
 		{{0, 0},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x82 */
 		{{1, 1},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0x83 */
 		{{0, 1},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x84 */
 		{{2, 2},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x85 */
 		{{0, 0},
 		{2, 2},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x86 */
 		{{1, 2},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0x87 */
 		{{0, 2},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x88 */
 		{{3, 3},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x89 */
 		{{0, 0},
 		{3, 3},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0x8a */
 		{{1, 1},
 		{3, 3},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x8b */
 		{{0, 1},
 		{3, 3},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x8c */
 		{{2, 3},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x8d */
 		{{0, 0},
 		{2, 3},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x8e */
 		{{1, 3},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0x8f */
 		{{0, 3},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x90 */
 		{{4, 4},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x91 */
 		{{0, 0},
 		{4, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0x92 */
 		{{1, 1},
 		{4, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x93 */
 		{{0, 1},
 		{4, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0x94 */
 		{{2, 2},
 		{4, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 4, 0,		/* 0x95 */
 		{{0, 0},
 		{2, 2},
 		{4, 4},
 		{7, 7}
 		}
 	},
 	{0, 1, 3, 0,		/* 0x96 */
 		{{1, 2},
 		{4, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x97 */
 		{{0, 2},
 		{4, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x98 */
 		{{3, 4},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x99 */
 		{{0, 0},
 		{3, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0x9a */
 		{{1, 1},
 		{3, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x9b */
 		{{0, 1},
 		{3, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x9c */
 		{{2, 4},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0x9d */
 		{{0, 0},
 		{2, 4},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0x9e */
 		{{1, 4},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0x9f */
 		{{0, 4},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xa0 */
 		{{5, 5},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xa1 */
 		{{0, 0},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xa2 */
 		{{1, 1},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xa3 */
 		{{0, 1},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xa4 */
 		{{2, 2},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 4, 0,		/* 0xa5 */
 		{{0, 0},
 		{2, 2},
 		{5, 5},
 		{7, 7}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xa6 */
 		{{1, 2},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xa7 */
 		{{0, 2},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xa8 */
 		{{3, 3},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 4, 0,		/* 0xa9 */
 		{{0, 0},
 		{3, 3},
 		{5, 5},
 		{7, 7}
 		}
 	},
 	{0, 1, 4, 0,		/* 0xaa */
 		{{1, 1},
 		{3, 3},
 		{5, 5},
 		{7, 7}
 		}
 	},
 	{1, 1, 4, 0,		/* 0xab */
 		{{0, 1},
 		{3, 3},
 		{5, 5},
 		{7, 7}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xac */
 		{{2, 3},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 4, 0,		/* 0xad */
 		{{0, 0},
 		{2, 3},
 		{5, 5},
 		{7, 7}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xae */
 		{{1, 3},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xaf */
 		{{0, 3},
 		{5, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xb0 */
 		{{4, 5},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xb1 */
 		{{0, 0},
 		{4, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xb2 */
 		{{1, 1},
 		{4, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xb3 */
 		{{0, 1},
 		{4, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xb4 */
 		{{2, 2},
 		{4, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 4, 0,		/* 0xb5 */
 		{{0, 0},
 		{2, 2},
 		{4, 5},
 		{7, 7}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xb6 */
 		{{1, 2},
 		{4, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xb7 */
 		{{0, 2},
 		{4, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xb8 */
 		{{3, 5},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xb9 */
 		{{0, 0},
 		{3, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xba */
 		{{1, 1},
 		{3, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xbb */
 		{{0, 1},
 		{3, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xbc */
 		{{2, 5},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xbd */
 		{{0, 0},
 		{2, 5},
 		{7, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xbe */
 		{{1, 5},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xbf */
 		{{0, 5},
 		{7, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0xc0 */
 		{{6, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xc1 */
 		{{0, 0},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xc2 */
 		{{1, 1},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xc3 */
 		{{0, 1},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xc4 */
 		{{2, 2},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xc5 */
 		{{0, 0},
 		{2, 2},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xc6 */
 		{{1, 2},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xc7 */
 		{{0, 2},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xc8 */
 		{{3, 3},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xc9 */
 		{{0, 0},
 		{3, 3},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xca */
 		{{1, 1},
 		{3, 3},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xcb */
 		{{0, 1},
 		{3, 3},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xcc */
 		{{2, 3},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xcd */
 		{{0, 0},
 		{2, 3},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xce */
 		{{1, 3},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xcf */
 		{{0, 3},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xd0 */
 		{{4, 4},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xd1 */
 		{{0, 0},
 		{4, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xd2 */
 		{{1, 1},
 		{4, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xd3 */
 		{{0, 1},
 		{4, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xd4 */
 		{{2, 2},
 		{4, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 4, 0,		/* 0xd5 */
 		{{0, 0},
 		{2, 2},
 		{4, 4},
 		{6, 7}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xd6 */
 		{{1, 2},
 		{4, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xd7 */
 		{{0, 2},
 		{4, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xd8 */
 		{{3, 4},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xd9 */
 		{{0, 0},
 		{3, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xda */
 		{{1, 1},
 		{3, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xdb */
 		{{0, 1},
 		{3, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xdc */
 		{{2, 4},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xdd */
 		{{0, 0},
 		{2, 4},
 		{6, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xde */
 		{{1, 4},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xdf */
 		{{0, 4},
 		{6, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0xe0 */
 		{{5, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xe1 */
 		{{0, 0},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xe2 */
 		{{1, 1},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xe3 */
 		{{0, 1},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xe4 */
 		{{2, 2},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xe5 */
 		{{0, 0},
 		{2, 2},
 		{5, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xe6 */
 		{{1, 2},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xe7 */
 		{{0, 2},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xe8 */
 		{{3, 3},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xe9 */
 		{{0, 0},
 		{3, 3},
 		{5, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 3, 0,		/* 0xea */
 		{{1, 1},
 		{3, 3},
 		{5, 7},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xeb */
 		{{0, 1},
 		{3, 3},
 		{5, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xec */
 		{{2, 3},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xed */
 		{{0, 0},
 		{2, 3},
 		{5, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xee */
 		{{1, 3},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xef */
 		{{0, 3},
 		{5, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0xf0 */
 		{{4, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xf1 */
 		{{0, 0},
 		{4, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xf2 */
 		{{1, 1},
 		{4, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xf3 */
 		{{0, 1},
 		{4, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xf4 */
 		{{2, 2},
 		{4, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 3, 0,		/* 0xf5 */
 		{{0, 0},
 		{2, 2},
 		{4, 7},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xf6 */
 		{{1, 2},
 		{4, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xf7 */
 		{{0, 2},
 		{4, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0xf8 */
 		{{3, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xf9 */
 		{{0, 0},
 		{3, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 2, 0,		/* 0xfa */
 		{{1, 1},
 		{3, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xfb */
 		{{0, 1},
 		{3, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0xfc */
 		{{2, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 2, 0,		/* 0xfd */
 		{{0, 0},
 		{2, 7},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{0, 1, 1, 0,		/* 0xfe */
 		{{1, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	},
 	{1, 1, 1, 0,		/* 0xff */
 		{{0, 7},
 		{0, 0},
 		{0, 0},
 		{0, 0}
 		}
 	}
 };
 
 int
 sctp_is_address_in_scope(struct sctp_ifa *ifa,
     struct sctp_scoping *scope,
     int do_update)
 {
 	if ((scope->loopback_scope == 0) &&
 	    (ifa->ifn_p) && SCTP_IFN_IS_IFT_LOOP(ifa->ifn_p)) {
 		/*
 		 * skip loopback if not in scope *
 		 */
 		return (0);
 	}
 	switch (ifa->address.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		if (scope->ipv4_addr_legal) {
 			struct sockaddr_in *sin;
 
 			sin = &ifa->address.sin;
 			if (sin->sin_addr.s_addr == 0) {
 				/* not in scope , unspecified */
 				return (0);
 			}
 			if ((scope->ipv4_local_scope == 0) &&
 			    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
 				/* private address not in scope */
 				return (0);
 			}
 		} else {
 			return (0);
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if (scope->ipv6_addr_legal) {
 			struct sockaddr_in6 *sin6;
 
 			/*
 			 * Must update the flags,  bummer, which means any
 			 * IFA locks must now be applied HERE <->
 			 */
 			if (do_update) {
 				sctp_gather_internal_ifa_flags(ifa);
 			}
 			if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
 				return (0);
 			}
 			/* ok to use deprecated addresses? */
 			sin6 = &ifa->address.sin6;
 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 				/* skip unspecified addresses */
 				return (0);
 			}
 			if (	/* (local_scope == 0) && */
 			    (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) {
 				return (0);
 			}
 			if ((scope->site_scope == 0) &&
 			    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
 				return (0);
 			}
 		} else {
 			return (0);
 		}
 		break;
 #endif
 	default:
 		return (0);
 	}
 	return (1);
 }
 
 static struct mbuf *
 sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa, uint16_t *len)
 {
 #if defined(INET) || defined(INET6)
 	struct sctp_paramhdr *paramh;
 	struct mbuf *mret;
 	uint16_t plen;
 #endif
 
 	switch (ifa->address.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		plen = (uint16_t)sizeof(struct sctp_ipv4addr_param);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		plen = (uint16_t)sizeof(struct sctp_ipv6addr_param);
 		break;
 #endif
 	default:
 		return (m);
 	}
 #if defined(INET) || defined(INET6)
 	if (M_TRAILINGSPACE(m) >= plen) {
 		/* easy side we just drop it on the end */
 		paramh = (struct sctp_paramhdr *)(SCTP_BUF_AT(m, SCTP_BUF_LEN(m)));
 		mret = m;
 	} else {
 		/* Need more space */
 		mret = m;
 		while (SCTP_BUF_NEXT(mret) != NULL) {
 			mret = SCTP_BUF_NEXT(mret);
 		}
 		SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(plen, 0, M_NOWAIT, 1, MT_DATA);
 		if (SCTP_BUF_NEXT(mret) == NULL) {
 			/* We are hosed, can't add more addresses */
 			return (m);
 		}
 		mret = SCTP_BUF_NEXT(mret);
 		paramh = mtod(mret, struct sctp_paramhdr *);
 	}
 	/* now add the parameter */
 	switch (ifa->address.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		{
 			struct sctp_ipv4addr_param *ipv4p;
 			struct sockaddr_in *sin;
 
 			sin = &ifa->address.sin;
 			ipv4p = (struct sctp_ipv4addr_param *)paramh;
 			paramh->param_type = htons(SCTP_IPV4_ADDRESS);
 			paramh->param_length = htons(plen);
 			ipv4p->addr = sin->sin_addr.s_addr;
 			SCTP_BUF_LEN(mret) += plen;
 			break;
 		}
 #endif
 #ifdef INET6
 	case AF_INET6:
 		{
 			struct sctp_ipv6addr_param *ipv6p;
 			struct sockaddr_in6 *sin6;
 
 			sin6 = &ifa->address.sin6;
 			ipv6p = (struct sctp_ipv6addr_param *)paramh;
 			paramh->param_type = htons(SCTP_IPV6_ADDRESS);
 			paramh->param_length = htons(plen);
 			memcpy(ipv6p->addr, &sin6->sin6_addr,
 			    sizeof(ipv6p->addr));
 			/* clear embedded scope in the address */
 			in6_clearscope((struct in6_addr *)ipv6p->addr);
 			SCTP_BUF_LEN(mret) += plen;
 			break;
 		}
 #endif
 	default:
 		return (m);
 	}
 	if (len != NULL) {
 		*len += plen;
 	}
 	return (mret);
 #endif
 }
 
 struct mbuf *
 sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
     struct sctp_scoping *scope,
     struct mbuf *m_at, int cnt_inits_to,
     uint16_t *padding_len, uint16_t *chunk_len)
 {
 	struct sctp_vrf *vrf = NULL;
 	int cnt, limit_out = 0, total_count;
 	uint32_t vrf_id;
 
 	vrf_id = inp->def_vrf_id;
 	SCTP_IPI_ADDR_RLOCK();
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL) {
 		SCTP_IPI_ADDR_RUNLOCK();
 		return (m_at);
 	}
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		struct sctp_ifa *sctp_ifap;
 		struct sctp_ifn *sctp_ifnp;
 
 		cnt = cnt_inits_to;
 		if (vrf->total_ifa_count > SCTP_COUNT_LIMIT) {
 			limit_out = 1;
 			cnt = SCTP_ADDRESS_LIMIT;
 			goto skip_count;
 		}
 		LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
 			if ((scope->loopback_scope == 0) &&
 			    SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) {
 				/*
 				 * Skip loopback devices if loopback_scope
 				 * not set
 				 */
 				continue;
 			}
 			LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
 #ifdef INET
 				if ((sctp_ifap->address.sa.sa_family == AF_INET) &&
 				    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 				    &sctp_ifap->address.sin.sin_addr) != 0)) {
 					continue;
 				}
 #endif
 #ifdef INET6
 				if ((sctp_ifap->address.sa.sa_family == AF_INET6) &&
 				    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 				    &sctp_ifap->address.sin6.sin6_addr) != 0)) {
 					continue;
 				}
 #endif
 				if (sctp_is_addr_restricted(stcb, sctp_ifap)) {
 					continue;
 				}
 				if (sctp_is_address_in_scope(sctp_ifap, scope, 1) == 0) {
 					continue;
 				}
 				cnt++;
 				if (cnt > SCTP_ADDRESS_LIMIT) {
 					break;
 				}
 			}
 			if (cnt > SCTP_ADDRESS_LIMIT) {
 				break;
 			}
 		}
 skip_count:
 		if (cnt > 1) {
 			total_count = 0;
 			LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
 				cnt = 0;
 				if ((scope->loopback_scope == 0) &&
 				    SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) {
 					/*
 					 * Skip loopback devices if
 					 * loopback_scope not set
 					 */
 					continue;
 				}
 				LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
 #ifdef INET
 					if ((sctp_ifap->address.sa.sa_family == AF_INET) &&
 					    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 					    &sctp_ifap->address.sin.sin_addr) != 0)) {
 						continue;
 					}
 #endif
 #ifdef INET6
 					if ((sctp_ifap->address.sa.sa_family == AF_INET6) &&
 					    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 					    &sctp_ifap->address.sin6.sin6_addr) != 0)) {
 						continue;
 					}
 #endif
 					if (sctp_is_addr_restricted(stcb, sctp_ifap)) {
 						continue;
 					}
 					if (sctp_is_address_in_scope(sctp_ifap,
 					    scope, 0) == 0) {
 						continue;
 					}
 					if ((chunk_len != NULL) &&
 					    (padding_len != NULL) &&
 					    (*padding_len > 0)) {
 						memset(mtod(m_at, caddr_t)+*chunk_len, 0, *padding_len);
 						SCTP_BUF_LEN(m_at) += *padding_len;
 						*chunk_len += *padding_len;
 						*padding_len = 0;
 					}
 					m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap, chunk_len);
 					if (limit_out) {
 						cnt++;
 						total_count++;
 						if (cnt >= 2) {
 							/*
 							 * two from each
 							 * address
 							 */
 							break;
 						}
 						if (total_count > SCTP_ADDRESS_LIMIT) {
 							/* No more addresses */
 							break;
 						}
 					}
 				}
 			}
 		}
 	} else {
 		struct sctp_laddr *laddr;
 
 		cnt = cnt_inits_to;
 		/* First, how many ? */
 		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 			if (laddr->ifa == NULL) {
 				continue;
 			}
 			if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED)
 				/*
 				 * Address being deleted by the system, dont
 				 * list.
 				 */
 				continue;
 			if (laddr->action == SCTP_DEL_IP_ADDRESS) {
 				/*
 				 * Address being deleted on this ep don't
 				 * list.
 				 */
 				continue;
 			}
 			if (sctp_is_address_in_scope(laddr->ifa,
 			    scope, 1) == 0) {
 				continue;
 			}
 			cnt++;
 		}
 		/*
 		 * To get through a NAT we only list addresses if we have
 		 * more than one. That way if you just bind a single address
 		 * we let the source of the init dictate our address.
 		 */
 		if (cnt > 1) {
 			cnt = cnt_inits_to;
 			LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 				if (laddr->ifa == NULL) {
 					continue;
 				}
 				if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
 					continue;
 				}
 				if (sctp_is_address_in_scope(laddr->ifa,
 				    scope, 0) == 0) {
 					continue;
 				}
 				if ((chunk_len != NULL) &&
 				    (padding_len != NULL) &&
 				    (*padding_len > 0)) {
 					memset(mtod(m_at, caddr_t)+*chunk_len, 0, *padding_len);
 					SCTP_BUF_LEN(m_at) += *padding_len;
 					*chunk_len += *padding_len;
 					*padding_len = 0;
 				}
 				m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa, chunk_len);
 				cnt++;
 				if (cnt >= SCTP_ADDRESS_LIMIT) {
 					break;
 				}
 			}
 		}
 	}
 	SCTP_IPI_ADDR_RUNLOCK();
 	return (m_at);
 }
 
 static struct sctp_ifa *
 sctp_is_ifa_addr_preferred(struct sctp_ifa *ifa,
     uint8_t dest_is_loop,
     uint8_t dest_is_priv,
     sa_family_t fam)
 {
 	uint8_t dest_is_global = 0;
 
 	/* dest_is_priv is true if destination is a private address */
 	/* dest_is_loop is true if destination is a loopback addresses */
 
 	/**
 	 * Here we determine if its a preferred address. A preferred address
 	 * means it is the same scope or higher scope then the destination.
 	 * L = loopback, P = private, G = global
 	 * -----------------------------------------
 	 *    src    |  dest | result
 	 *  ----------------------------------------
 	 *     L     |    L  |    yes
 	 *  -----------------------------------------
 	 *     P     |    L  |    yes-v4 no-v6
 	 *  -----------------------------------------
 	 *     G     |    L  |    yes-v4 no-v6
 	 *  -----------------------------------------
 	 *     L     |    P  |    no
 	 *  -----------------------------------------
 	 *     P     |    P  |    yes
 	 *  -----------------------------------------
 	 *     G     |    P  |    no
 	 *   -----------------------------------------
 	 *     L     |    G  |    no
 	 *   -----------------------------------------
 	 *     P     |    G  |    no
 	 *    -----------------------------------------
 	 *     G     |    G  |    yes
 	 *    -----------------------------------------
 	 */
 
 	if (ifa->address.sa.sa_family != fam) {
 		/* forget mis-matched family */
 		return (NULL);
 	}
 	if ((dest_is_priv == 0) && (dest_is_loop == 0)) {
 		dest_is_global = 1;
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Is destination preferred:");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ifa->address.sa);
 	/* Ok the address may be ok */
 #ifdef INET6
 	if (fam == AF_INET6) {
 		/* ok to use deprecated addresses? no lets not! */
 		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:1\n");
 			return (NULL);
 		}
 		if (ifa->src_is_priv && !ifa->src_is_loop) {
 			if (dest_is_loop) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:2\n");
 				return (NULL);
 			}
 		}
 		if (ifa->src_is_glob) {
 			if (dest_is_loop) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:3\n");
 				return (NULL);
 			}
 		}
 	}
 #endif
 	/*
 	 * Now that we know what is what, implement or table this could in
 	 * theory be done slicker (it used to be), but this is
 	 * straightforward and easier to validate :-)
 	 */
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "src_loop:%d src_priv:%d src_glob:%d\n",
 	    ifa->src_is_loop, ifa->src_is_priv, ifa->src_is_glob);
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "dest_loop:%d dest_priv:%d dest_glob:%d\n",
 	    dest_is_loop, dest_is_priv, dest_is_global);
 
 	if ((ifa->src_is_loop) && (dest_is_priv)) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:4\n");
 		return (NULL);
 	}
 	if ((ifa->src_is_glob) && (dest_is_priv)) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:5\n");
 		return (NULL);
 	}
 	if ((ifa->src_is_loop) && (dest_is_global)) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:6\n");
 		return (NULL);
 	}
 	if ((ifa->src_is_priv) && (dest_is_global)) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:7\n");
 		return (NULL);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "YES\n");
 	/* its a preferred address */
 	return (ifa);
 }
 
 static struct sctp_ifa *
 sctp_is_ifa_addr_acceptable(struct sctp_ifa *ifa,
     uint8_t dest_is_loop,
     uint8_t dest_is_priv,
     sa_family_t fam)
 {
 	uint8_t dest_is_global = 0;
 
 	/**
 	 * Here we determine if its a acceptable address. A acceptable
 	 * address means it is the same scope or higher scope but we can
 	 * allow for NAT which means its ok to have a global dest and a
 	 * private src.
 	 *
 	 * L = loopback, P = private, G = global
 	 * -----------------------------------------
 	 *  src    |  dest | result
 	 * -----------------------------------------
 	 *   L     |   L   |    yes
 	 *  -----------------------------------------
 	 *   P     |   L   |    yes-v4 no-v6
 	 *  -----------------------------------------
 	 *   G     |   L   |    yes
 	 * -----------------------------------------
 	 *   L     |   P   |    no
 	 * -----------------------------------------
 	 *   P     |   P   |    yes
 	 * -----------------------------------------
 	 *   G     |   P   |    yes - May not work
 	 * -----------------------------------------
 	 *   L     |   G   |    no
 	 * -----------------------------------------
 	 *   P     |   G   |    yes - May not work
 	 * -----------------------------------------
 	 *   G     |   G   |    yes
 	 * -----------------------------------------
 	 */
 
 	if (ifa->address.sa.sa_family != fam) {
 		/* forget non matching family */
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "ifa_fam:%d fam:%d\n",
 		    ifa->address.sa.sa_family, fam);
 		return (NULL);
 	}
 	/* Ok the address may be ok */
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, &ifa->address.sa);
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "dst_is_loop:%d dest_is_priv:%d\n",
 	    dest_is_loop, dest_is_priv);
 	if ((dest_is_loop == 0) && (dest_is_priv == 0)) {
 		dest_is_global = 1;
 	}
 #ifdef INET6
 	if (fam == AF_INET6) {
 		/* ok to use deprecated addresses? */
 		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
 			return (NULL);
 		}
 		if (ifa->src_is_priv) {
 			/* Special case, linklocal to loop */
 			if (dest_is_loop)
 				return (NULL);
 		}
 	}
 #endif
 	/*
 	 * Now that we know what is what, implement our table. This could in
 	 * theory be done slicker (it used to be), but this is
 	 * straightforward and easier to validate :-)
 	 */
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "ifa->src_is_loop:%d dest_is_priv:%d\n",
 	    ifa->src_is_loop,
 	    dest_is_priv);
 	if ((ifa->src_is_loop == 1) && (dest_is_priv)) {
 		return (NULL);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "ifa->src_is_loop:%d dest_is_glob:%d\n",
 	    ifa->src_is_loop,
 	    dest_is_global);
 	if ((ifa->src_is_loop == 1) && (dest_is_global)) {
 		return (NULL);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "address is acceptable\n");
 	/* its an acceptable address */
 	return (ifa);
 }
 
 int
 sctp_is_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
 {
 	struct sctp_laddr *laddr;
 
 	if (stcb == NULL) {
 		/* There are no restrictions, no TCB :-) */
 		return (0);
 	}
 	LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) {
 		if (laddr->ifa == NULL) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
 			    __func__);
 			continue;
 		}
 		if (laddr->ifa == ifa) {
 			/* Yes it is on the list */
 			return (1);
 		}
 	}
 	return (0);
 }
 
 int
 sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa)
 {
 	struct sctp_laddr *laddr;
 
 	if (ifa == NULL)
 		return (0);
 	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 		if (laddr->ifa == NULL) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
 			    __func__);
 			continue;
 		}
 		if ((laddr->ifa == ifa) && laddr->action == 0)
 			/* same pointer */
 			return (1);
 	}
 	return (0);
 }
 
 static struct sctp_ifa *
 sctp_choose_boundspecific_inp(struct sctp_inpcb *inp,
     sctp_route_t *ro,
     uint32_t vrf_id,
     int non_asoc_addr_ok,
     uint8_t dest_is_priv,
     uint8_t dest_is_loop,
     sa_family_t fam)
 {
 	struct sctp_laddr *laddr, *starting_point;
 	void *ifn;
 	int resettotop = 0;
 	struct sctp_ifn *sctp_ifn;
 	struct sctp_ifa *sctp_ifa, *sifa;
 	struct sctp_vrf *vrf;
 	uint32_t ifn_index;
 
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL)
 		return (NULL);
 
 	ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
 	ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
 	sctp_ifn = sctp_find_ifn(ifn, ifn_index);
 	/*
 	 * first question, is the ifn we will emit on in our list, if so, we
 	 * want such an address. Note that we first looked for a preferred
 	 * address.
 	 */
 	if (sctp_ifn) {
 		/* is a preferred one on the interface we route out? */
 		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 #ifdef INET
 			if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
 			    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin.sin_addr) != 0)) {
 				continue;
 			}
 #endif
 #ifdef INET6
 			if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
 			    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
 				continue;
 			}
 #endif
 			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 			    (non_asoc_addr_ok == 0))
 				continue;
 			sifa = sctp_is_ifa_addr_preferred(sctp_ifa,
 			    dest_is_loop,
 			    dest_is_priv, fam);
 			if (sifa == NULL)
 				continue;
 			if (sctp_is_addr_in_ep(inp, sifa)) {
 				atomic_add_int(&sifa->refcount, 1);
 				return (sifa);
 			}
 		}
 	}
 	/*
 	 * ok, now we now need to find one on the list of the addresses. We
 	 * can't get one on the emitting interface so let's find first a
 	 * preferred one. If not that an acceptable one otherwise... we
 	 * return NULL.
 	 */
 	starting_point = inp->next_addr_touse;
 once_again:
 	if (inp->next_addr_touse == NULL) {
 		inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list);
 		resettotop = 1;
 	}
 	for (laddr = inp->next_addr_touse; laddr;
 	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
 		if (laddr->ifa == NULL) {
 			/* address has been removed */
 			continue;
 		}
 		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
 			/* address is being deleted */
 			continue;
 		}
 		sifa = sctp_is_ifa_addr_preferred(laddr->ifa, dest_is_loop,
 		    dest_is_priv, fam);
 		if (sifa == NULL)
 			continue;
 		atomic_add_int(&sifa->refcount, 1);
 		return (sifa);
 	}
 	if (resettotop == 0) {
 		inp->next_addr_touse = NULL;
 		goto once_again;
 	}
 
 	inp->next_addr_touse = starting_point;
 	resettotop = 0;
 once_again_too:
 	if (inp->next_addr_touse == NULL) {
 		inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list);
 		resettotop = 1;
 	}
 
 	/* ok, what about an acceptable address in the inp */
 	for (laddr = inp->next_addr_touse; laddr;
 	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
 		if (laddr->ifa == NULL) {
 			/* address has been removed */
 			continue;
 		}
 		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
 			/* address is being deleted */
 			continue;
 		}
 		sifa = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop,
 		    dest_is_priv, fam);
 		if (sifa == NULL)
 			continue;
 		atomic_add_int(&sifa->refcount, 1);
 		return (sifa);
 	}
 	if (resettotop == 0) {
 		inp->next_addr_touse = NULL;
 		goto once_again_too;
 	}
 
 	/*
 	 * no address bound can be a source for the destination we are in
 	 * trouble
 	 */
 	return (NULL);
 }
 
 static struct sctp_ifa *
 sctp_choose_boundspecific_stcb(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     sctp_route_t *ro,
     uint32_t vrf_id,
     uint8_t dest_is_priv,
     uint8_t dest_is_loop,
     int non_asoc_addr_ok,
     sa_family_t fam)
 {
 	struct sctp_laddr *laddr, *starting_point;
 	void *ifn;
 	struct sctp_ifn *sctp_ifn;
 	struct sctp_ifa *sctp_ifa, *sifa;
 	uint8_t start_at_beginning = 0;
 	struct sctp_vrf *vrf;
 	uint32_t ifn_index;
 
 	/*
 	 * first question, is the ifn we will emit on in our list, if so, we
 	 * want that one.
 	 */
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL)
 		return (NULL);
 
 	ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
 	ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
 	sctp_ifn = sctp_find_ifn(ifn, ifn_index);
 
 	/*
 	 * first question, is the ifn we will emit on in our list?  If so,
 	 * we want that one. First we look for a preferred. Second, we go
 	 * for an acceptable.
 	 */
 	if (sctp_ifn) {
 		/* first try for a preferred address on the ep */
 		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 #ifdef INET
 			if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
 			    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin.sin_addr) != 0)) {
 				continue;
 			}
 #endif
 #ifdef INET6
 			if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
 			    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
 				continue;
 			}
 #endif
 			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
 				continue;
 			if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
 				sifa = sctp_is_ifa_addr_preferred(sctp_ifa, dest_is_loop, dest_is_priv, fam);
 				if (sifa == NULL)
 					continue;
 				if (((non_asoc_addr_ok == 0) &&
 				    (sctp_is_addr_restricted(stcb, sifa))) ||
 				    (non_asoc_addr_ok &&
 				    (sctp_is_addr_restricted(stcb, sifa)) &&
 				    (!sctp_is_addr_pending(stcb, sifa)))) {
 					/* on the no-no list */
 					continue;
 				}
 				atomic_add_int(&sifa->refcount, 1);
 				return (sifa);
 			}
 		}
 		/* next try for an acceptable address on the ep */
 		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 #ifdef INET
 			if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
 			    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin.sin_addr) != 0)) {
 				continue;
 			}
 #endif
 #ifdef INET6
 			if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
 			    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
 				continue;
 			}
 #endif
 			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
 				continue;
 			if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
 				sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam);
 				if (sifa == NULL)
 					continue;
 				if (((non_asoc_addr_ok == 0) &&
 				    (sctp_is_addr_restricted(stcb, sifa))) ||
 				    (non_asoc_addr_ok &&
 				    (sctp_is_addr_restricted(stcb, sifa)) &&
 				    (!sctp_is_addr_pending(stcb, sifa)))) {
 					/* on the no-no list */
 					continue;
 				}
 				atomic_add_int(&sifa->refcount, 1);
 				return (sifa);
 			}
 		}
 	}
 	/*
 	 * if we can't find one like that then we must look at all addresses
 	 * bound to pick one at first preferable then secondly acceptable.
 	 */
 	starting_point = stcb->asoc.last_used_address;
 sctp_from_the_top:
 	if (stcb->asoc.last_used_address == NULL) {
 		start_at_beginning = 1;
 		stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list);
 	}
 	/* search beginning with the last used address */
 	for (laddr = stcb->asoc.last_used_address; laddr;
 	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
 		if (laddr->ifa == NULL) {
 			/* address has been removed */
 			continue;
 		}
 		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
 			/* address is being deleted */
 			continue;
 		}
 		sifa = sctp_is_ifa_addr_preferred(laddr->ifa, dest_is_loop, dest_is_priv, fam);
 		if (sifa == NULL)
 			continue;
 		if (((non_asoc_addr_ok == 0) &&
 		    (sctp_is_addr_restricted(stcb, sifa))) ||
 		    (non_asoc_addr_ok &&
 		    (sctp_is_addr_restricted(stcb, sifa)) &&
 		    (!sctp_is_addr_pending(stcb, sifa)))) {
 			/* on the no-no list */
 			continue;
 		}
 		stcb->asoc.last_used_address = laddr;
 		atomic_add_int(&sifa->refcount, 1);
 		return (sifa);
 	}
 	if (start_at_beginning == 0) {
 		stcb->asoc.last_used_address = NULL;
 		goto sctp_from_the_top;
 	}
 	/* now try for any higher scope than the destination */
 	stcb->asoc.last_used_address = starting_point;
 	start_at_beginning = 0;
 sctp_from_the_top2:
 	if (stcb->asoc.last_used_address == NULL) {
 		start_at_beginning = 1;
 		stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list);
 	}
 	/* search beginning with the last used address */
 	for (laddr = stcb->asoc.last_used_address; laddr;
 	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
 		if (laddr->ifa == NULL) {
 			/* address has been removed */
 			continue;
 		}
 		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
 			/* address is being deleted */
 			continue;
 		}
 		sifa = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop,
 		    dest_is_priv, fam);
 		if (sifa == NULL)
 			continue;
 		if (((non_asoc_addr_ok == 0) &&
 		    (sctp_is_addr_restricted(stcb, sifa))) ||
 		    (non_asoc_addr_ok &&
 		    (sctp_is_addr_restricted(stcb, sifa)) &&
 		    (!sctp_is_addr_pending(stcb, sifa)))) {
 			/* on the no-no list */
 			continue;
 		}
 		stcb->asoc.last_used_address = laddr;
 		atomic_add_int(&sifa->refcount, 1);
 		return (sifa);
 	}
 	if (start_at_beginning == 0) {
 		stcb->asoc.last_used_address = NULL;
 		goto sctp_from_the_top2;
 	}
 	return (NULL);
 }
 
 static struct sctp_ifa *
 sctp_select_nth_preferred_addr_from_ifn_boundall(struct sctp_ifn *ifn,
     struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     int non_asoc_addr_ok,
     uint8_t dest_is_loop,
     uint8_t dest_is_priv,
     int addr_wanted,
     sa_family_t fam,
     sctp_route_t *ro)
 {
 	struct sctp_ifa *ifa, *sifa;
 	int num_eligible_addr = 0;
 #ifdef INET6
 	struct sockaddr_in6 sin6, lsa6;
 
 	if (fam == AF_INET6) {
 		memcpy(&sin6, &ro->ro_dst, sizeof(struct sockaddr_in6));
 		(void)sa6_recoverscope(&sin6);
 	}
 #endif				/* INET6 */
 	LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
 #ifdef INET
 		if ((ifa->address.sa.sa_family == AF_INET) &&
 		    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 		    &ifa->address.sin.sin_addr) != 0)) {
 			continue;
 		}
 #endif
 #ifdef INET6
 		if ((ifa->address.sa.sa_family == AF_INET6) &&
 		    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 		    &ifa->address.sin6.sin6_addr) != 0)) {
 			continue;
 		}
 #endif
 		if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 		    (non_asoc_addr_ok == 0))
 			continue;
 		sifa = sctp_is_ifa_addr_preferred(ifa, dest_is_loop,
 		    dest_is_priv, fam);
 		if (sifa == NULL)
 			continue;
 #ifdef INET6
 		if (fam == AF_INET6 &&
 		    dest_is_loop &&
 		    sifa->src_is_loop && sifa->src_is_priv) {
 			/*
 			 * don't allow fe80::1 to be a src on loop ::1, we
 			 * don't list it to the peer so we will get an
 			 * abort.
 			 */
 			continue;
 		}
 		if (fam == AF_INET6 &&
 		    IN6_IS_ADDR_LINKLOCAL(&sifa->address.sin6.sin6_addr) &&
 		    IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
 			/*
 			 * link-local <-> link-local must belong to the same
 			 * scope.
 			 */
 			memcpy(&lsa6, &sifa->address.sin6, sizeof(struct sockaddr_in6));
 			(void)sa6_recoverscope(&lsa6);
 			if (sin6.sin6_scope_id != lsa6.sin6_scope_id) {
 				continue;
 			}
 		}
 #endif				/* INET6 */
 
 		/*
 		 * Check if the IPv6 address matches to next-hop. In the
 		 * mobile case, old IPv6 address may be not deleted from the
 		 * interface. Then, the interface has previous and new
 		 * addresses.  We should use one corresponding to the
 		 * next-hop.  (by micchie)
 		 */
 #ifdef INET6
 		if (stcb && fam == AF_INET6 &&
 		    sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) {
 			if (sctp_v6src_match_nexthop(&sifa->address.sin6, ro)
 			    == 0) {
 				continue;
 			}
 		}
 #endif
 #ifdef INET
 		/* Avoid topologically incorrect IPv4 address */
 		if (stcb && fam == AF_INET &&
 		    sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) {
 			if (sctp_v4src_match_nexthop(sifa, ro) == 0) {
 				continue;
 			}
 		}
 #endif
 		if (stcb) {
 			if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) {
 				continue;
 			}
 			if (((non_asoc_addr_ok == 0) &&
 			    (sctp_is_addr_restricted(stcb, sifa))) ||
 			    (non_asoc_addr_ok &&
 			    (sctp_is_addr_restricted(stcb, sifa)) &&
 			    (!sctp_is_addr_pending(stcb, sifa)))) {
 				/*
 				 * It is restricted for some reason..
 				 * probably not yet added.
 				 */
 				continue;
 			}
 		}
 		if (num_eligible_addr >= addr_wanted) {
 			return (sifa);
 		}
 		num_eligible_addr++;
 	}
 	return (NULL);
 }
 
 static int
 sctp_count_num_preferred_boundall(struct sctp_ifn *ifn,
     struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     int non_asoc_addr_ok,
     uint8_t dest_is_loop,
     uint8_t dest_is_priv,
     sa_family_t fam)
 {
 	struct sctp_ifa *ifa, *sifa;
 	int num_eligible_addr = 0;
 
 	LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
 #ifdef INET
 		if ((ifa->address.sa.sa_family == AF_INET) &&
 		    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 		    &ifa->address.sin.sin_addr) != 0)) {
 			continue;
 		}
 #endif
 #ifdef INET6
 		if ((ifa->address.sa.sa_family == AF_INET6) &&
 		    (stcb != NULL) &&
 		    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 		    &ifa->address.sin6.sin6_addr) != 0)) {
 			continue;
 		}
 #endif
 		if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 		    (non_asoc_addr_ok == 0)) {
 			continue;
 		}
 		sifa = sctp_is_ifa_addr_preferred(ifa, dest_is_loop,
 		    dest_is_priv, fam);
 		if (sifa == NULL) {
 			continue;
 		}
 		if (stcb) {
 			if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) {
 				continue;
 			}
 			if (((non_asoc_addr_ok == 0) &&
 			    (sctp_is_addr_restricted(stcb, sifa))) ||
 			    (non_asoc_addr_ok &&
 			    (sctp_is_addr_restricted(stcb, sifa)) &&
 			    (!sctp_is_addr_pending(stcb, sifa)))) {
 				/*
 				 * It is restricted for some reason..
 				 * probably not yet added.
 				 */
 				continue;
 			}
 		}
 		num_eligible_addr++;
 	}
 	return (num_eligible_addr);
 }
 
 static struct sctp_ifa *
 sctp_choose_boundall(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_nets *net,
     sctp_route_t *ro,
     uint32_t vrf_id,
     uint8_t dest_is_priv,
     uint8_t dest_is_loop,
     int non_asoc_addr_ok,
     sa_family_t fam)
 {
 	int cur_addr_num = 0, num_preferred = 0;
 	void *ifn;
 	struct sctp_ifn *sctp_ifn, *looked_at = NULL, *emit_ifn;
 	struct sctp_ifa *sctp_ifa, *sifa;
 	uint32_t ifn_index;
 	struct sctp_vrf *vrf;
 #ifdef INET
 	int retried = 0;
 #endif
 
 	/*-
 	 * For boundall we can use any address in the association.
 	 * If non_asoc_addr_ok is set we can use any address (at least in
 	 * theory). So we look for preferred addresses first. If we find one,
 	 * we use it. Otherwise we next try to get an address on the
 	 * interface, which we should be able to do (unless non_asoc_addr_ok
 	 * is false and we are routed out that way). In these cases where we
 	 * can't use the address of the interface we go through all the
 	 * ifn's looking for an address we can use and fill that in. Punting
 	 * means we send back address 0, which will probably cause problems
 	 * actually since then IP will fill in the address of the route ifn,
 	 * which means we probably already rejected it.. i.e. here comes an
 	 * abort :-<.
 	 */
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL)
 		return (NULL);
 
 	ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
 	ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifn from route:%p ifn_index:%d\n", ifn, ifn_index);
 	emit_ifn = looked_at = sctp_ifn = sctp_find_ifn(ifn, ifn_index);
 	if (sctp_ifn == NULL) {
 		/* ?? We don't have this guy ?? */
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "No ifn emit interface?\n");
 		goto bound_all_plan_b;
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifn_index:%d name:%s is emit interface\n",
 	    ifn_index, sctp_ifn->ifn_name);
 
 	if (net) {
 		cur_addr_num = net->indx_of_eligible_next_to_use;
 	}
 	num_preferred = sctp_count_num_preferred_boundall(sctp_ifn,
 	    inp, stcb,
 	    non_asoc_addr_ok,
 	    dest_is_loop,
 	    dest_is_priv, fam);
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Found %d preferred source addresses for intf:%s\n",
 	    num_preferred, sctp_ifn->ifn_name);
 	if (num_preferred == 0) {
 		/*
 		 * no eligible addresses, we must use some other interface
 		 * address if we can find one.
 		 */
 		goto bound_all_plan_b;
 	}
 	/*
 	 * Ok we have num_eligible_addr set with how many we can use, this
 	 * may vary from call to call due to addresses being deprecated
 	 * etc..
 	 */
 	if (cur_addr_num >= num_preferred) {
 		cur_addr_num = 0;
 	}
 	/*
 	 * select the nth address from the list (where cur_addr_num is the
 	 * nth) and 0 is the first one, 1 is the second one etc...
 	 */
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "cur_addr_num:%d\n", cur_addr_num);
 
 	sctp_ifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok, dest_is_loop,
 	    dest_is_priv, cur_addr_num, fam, ro);
 
 	/* if sctp_ifa is NULL something changed??, fall to plan b. */
 	if (sctp_ifa) {
 		atomic_add_int(&sctp_ifa->refcount, 1);
 		if (net) {
 			/* save off where the next one we will want */
 			net->indx_of_eligible_next_to_use = cur_addr_num + 1;
 		}
 		return (sctp_ifa);
 	}
 	/*
 	 * plan_b: Look at all interfaces and find a preferred address. If
 	 * no preferred fall through to plan_c.
 	 */
 bound_all_plan_b:
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan B\n");
 	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "Examine interface %s\n",
 		    sctp_ifn->ifn_name);
 		if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
 			/* wrong base scope */
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "skip\n");
 			continue;
 		}
 		if ((sctp_ifn == looked_at) && looked_at) {
 			/* already looked at this guy */
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "already seen\n");
 			continue;
 		}
 		num_preferred = sctp_count_num_preferred_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok,
 		    dest_is_loop, dest_is_priv, fam);
 		SCTPDBG(SCTP_DEBUG_OUTPUT2,
 		    "Found ifn:%p %d preferred source addresses\n",
 		    ifn, num_preferred);
 		if (num_preferred == 0) {
 			/* None on this interface. */
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "No preferred -- skipping to next\n");
 			continue;
 		}
 		SCTPDBG(SCTP_DEBUG_OUTPUT2,
 		    "num preferred:%d on interface:%p cur_addr_num:%d\n",
 		    num_preferred, (void *)sctp_ifn, cur_addr_num);
 
 		/*
 		 * Ok we have num_eligible_addr set with how many we can
 		 * use, this may vary from call to call due to addresses
 		 * being deprecated etc..
 		 */
 		if (cur_addr_num >= num_preferred) {
 			cur_addr_num = 0;
 		}
 		sifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok, dest_is_loop,
 		    dest_is_priv, cur_addr_num, fam, ro);
 		if (sifa == NULL)
 			continue;
 		if (net) {
 			net->indx_of_eligible_next_to_use = cur_addr_num + 1;
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "we selected %d\n",
 			    cur_addr_num);
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Source:");
 			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Dest:");
 			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &net->ro._l_addr.sa);
 		}
 		atomic_add_int(&sifa->refcount, 1);
 		return (sifa);
 	}
 #ifdef INET
 again_with_private_addresses_allowed:
 #endif
 	/* plan_c: do we have an acceptable address on the emit interface */
 	sifa = NULL;
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan C: find acceptable on interface\n");
 	if (emit_ifn == NULL) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jump to Plan D - no emit_ifn\n");
 		goto plan_d;
 	}
 	LIST_FOREACH(sctp_ifa, &emit_ifn->ifalist, next_ifa) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifa:%p\n", (void *)sctp_ifa);
 #ifdef INET
 		if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
 		    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 		    &sctp_ifa->address.sin.sin_addr) != 0)) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jailed\n");
 			continue;
 		}
 #endif
 #ifdef INET6
 		if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
 		    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 		    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jailed\n");
 			continue;
 		}
 #endif
 		if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 		    (non_asoc_addr_ok == 0)) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Defer\n");
 			continue;
 		}
 		sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop,
 		    dest_is_priv, fam);
 		if (sifa == NULL) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "IFA not acceptable\n");
 			continue;
 		}
 		if (stcb) {
 			if (sctp_is_address_in_scope(sifa, &stcb->asoc.scope, 0) == 0) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT2, "NOT in scope\n");
 				sifa = NULL;
 				continue;
 			}
 			if (((non_asoc_addr_ok == 0) &&
 			    (sctp_is_addr_restricted(stcb, sifa))) ||
 			    (non_asoc_addr_ok &&
 			    (sctp_is_addr_restricted(stcb, sifa)) &&
 			    (!sctp_is_addr_pending(stcb, sifa)))) {
 				/*
 				 * It is restricted for some reason..
 				 * probably not yet added.
 				 */
 				SCTPDBG(SCTP_DEBUG_OUTPUT2, "Its restricted\n");
 				sifa = NULL;
 				continue;
 			}
 		}
 		atomic_add_int(&sifa->refcount, 1);
 		goto out;
 	}
 plan_d:
 	/*
 	 * plan_d: We are in trouble. No preferred address on the emit
 	 * interface. And not even a preferred address on all interfaces. Go
 	 * out and see if we can find an acceptable address somewhere
 	 * amongst all interfaces.
 	 */
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan D looked_at is %p\n", (void *)looked_at);
 	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 		if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
 			/* wrong base scope */
 			continue;
 		}
 		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 #ifdef INET
 			if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
 			    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin.sin_addr) != 0)) {
 				continue;
 			}
 #endif
 #ifdef INET6
 			if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
 			    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 			    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
 				continue;
 			}
 #endif
 			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 			    (non_asoc_addr_ok == 0))
 				continue;
 			sifa = sctp_is_ifa_addr_acceptable(sctp_ifa,
 			    dest_is_loop,
 			    dest_is_priv, fam);
 			if (sifa == NULL)
 				continue;
 			if (stcb) {
 				if (sctp_is_address_in_scope(sifa, &stcb->asoc.scope, 0) == 0) {
 					sifa = NULL;
 					continue;
 				}
 				if (((non_asoc_addr_ok == 0) &&
 				    (sctp_is_addr_restricted(stcb, sifa))) ||
 				    (non_asoc_addr_ok &&
 				    (sctp_is_addr_restricted(stcb, sifa)) &&
 				    (!sctp_is_addr_pending(stcb, sifa)))) {
 					/*
 					 * It is restricted for some
 					 * reason.. probably not yet added.
 					 */
 					sifa = NULL;
 					continue;
 				}
 			}
 			goto out;
 		}
 	}
 #ifdef INET
 	if (stcb) {
 		if ((retried == 0) && (stcb->asoc.scope.ipv4_local_scope == 0)) {
 			stcb->asoc.scope.ipv4_local_scope = 1;
 			retried = 1;
 			goto again_with_private_addresses_allowed;
 		} else if (retried == 1) {
 			stcb->asoc.scope.ipv4_local_scope = 0;
 		}
 	}
 #endif
 out:
 #ifdef INET
 	if (sifa) {
 		if (retried == 1) {
 			LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 				if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
 					/* wrong base scope */
 					continue;
 				}
 				LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 					struct sctp_ifa *tmp_sifa;
 
 #ifdef INET
 					if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
 					    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 					    &sctp_ifa->address.sin.sin_addr) != 0)) {
 						continue;
 					}
 #endif
 #ifdef INET6
 					if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
 					    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 					    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
 						continue;
 					}
 #endif
 					if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 					    (non_asoc_addr_ok == 0))
 						continue;
 					tmp_sifa = sctp_is_ifa_addr_acceptable(sctp_ifa,
 					    dest_is_loop,
 					    dest_is_priv, fam);
 					if (tmp_sifa == NULL) {
 						continue;
 					}
 					if (tmp_sifa == sifa) {
 						continue;
 					}
 					if (stcb) {
 						if (sctp_is_address_in_scope(tmp_sifa,
 						    &stcb->asoc.scope, 0) == 0) {
 							continue;
 						}
 						if (((non_asoc_addr_ok == 0) &&
 						    (sctp_is_addr_restricted(stcb, tmp_sifa))) ||
 						    (non_asoc_addr_ok &&
 						    (sctp_is_addr_restricted(stcb, tmp_sifa)) &&
 						    (!sctp_is_addr_pending(stcb, tmp_sifa)))) {
 							/*
 							 * It is restricted
 							 * for some reason..
 							 * probably not yet
 							 * added.
 							 */
 							continue;
 						}
 					}
 					if ((tmp_sifa->address.sin.sin_family == AF_INET) &&
 					    (IN4_ISPRIVATE_ADDRESS(&(tmp_sifa->address.sin.sin_addr)))) {
 						sctp_add_local_addr_restricted(stcb, tmp_sifa);
 					}
 				}
 			}
 		}
 		atomic_add_int(&sifa->refcount, 1);
 	}
 #endif
 	return (sifa);
 }
 
 /* tcb may be NULL */
 struct sctp_ifa *
 sctp_source_address_selection(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     sctp_route_t *ro,
     struct sctp_nets *net,
     int non_asoc_addr_ok, uint32_t vrf_id)
 {
 	struct sctp_ifa *answer;
 	uint8_t dest_is_priv, dest_is_loop;
 	sa_family_t fam;
 #ifdef INET
 	struct sockaddr_in *to = (struct sockaddr_in *)&ro->ro_dst;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *to6 = (struct sockaddr_in6 *)&ro->ro_dst;
 #endif
 
 	/**
 	 * Rules:
 	 * - Find the route if needed, cache if I can.
 	 * - Look at interface address in route, Is it in the bound list. If so we
 	 *   have the best source.
 	 * - If not we must rotate amongst the addresses.
 	 *
 	 * Caveats and issues
 	 *
 	 * Do we need to pay attention to scope. We can have a private address
 	 * or a global address we are sourcing or sending to. So if we draw
 	 * it out
 	 * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
 	 * For V4
 	 * ------------------------------------------
 	 *      source     *      dest  *  result
 	 * -----------------------------------------
 	 * <a>  Private    *    Global  *  NAT
 	 * -----------------------------------------
 	 * <b>  Private    *    Private *  No problem
 	 * -----------------------------------------
 	 * <c>  Global     *    Private *  Huh, How will this work?
 	 * -----------------------------------------
 	 * <d>  Global     *    Global  *  No Problem
 	 *------------------------------------------
 	 * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
 	 * For V6
 	 *------------------------------------------
 	 *      source     *      dest  *  result
 	 * -----------------------------------------
 	 * <a>  Linklocal  *    Global  *
 	 * -----------------------------------------
 	 * <b>  Linklocal  * Linklocal  *  No problem
 	 * -----------------------------------------
 	 * <c>  Global     * Linklocal  *  Huh, How will this work?
 	 * -----------------------------------------
 	 * <d>  Global     *    Global  *  No Problem
 	 *------------------------------------------
 	 * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
 	 *
 	 * And then we add to that what happens if there are multiple addresses
 	 * assigned to an interface. Remember the ifa on a ifn is a linked
 	 * list of addresses. So one interface can have more than one IP
 	 * address. What happens if we have both a private and a global
 	 * address? Do we then use context of destination to sort out which
 	 * one is best? And what about NAT's sending P->G may get you a NAT
 	 * translation, or should you select the G thats on the interface in
 	 * preference.
 	 *
 	 * Decisions:
 	 *
 	 * - count the number of addresses on the interface.
 	 * - if it is one, no problem except case <c>.
 	 *   For <a> we will assume a NAT out there.
 	 * - if there are more than one, then we need to worry about scope P
 	 *   or G. We should prefer G -> G and P -> P if possible.
 	 *   Then as a secondary fall back to mixed types G->P being a last
 	 *   ditch one.
 	 * - The above all works for bound all, but bound specific we need to
 	 *   use the same concept but instead only consider the bound
 	 *   addresses. If the bound set is NOT assigned to the interface then
 	 *   we must use rotation amongst the bound addresses..
 	 */
 	if (ro->ro_nh == NULL) {
 		/*
 		 * Need a route to cache.
 		 */
 		SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
 	}
 	if (ro->ro_nh == NULL) {
 		return (NULL);
 	}
 	fam = ro->ro_dst.sa_family;
 	dest_is_priv = dest_is_loop = 0;
 	/* Setup our scopes for the destination */
 	switch (fam) {
 #ifdef INET
 	case AF_INET:
 		/* Scope based on outbound address */
 		if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) {
 			dest_is_loop = 1;
 			if (net != NULL) {
 				/* mark it as local */
 				net->addr_is_local = 1;
 			}
 		} else if ((IN4_ISPRIVATE_ADDRESS(&to->sin_addr))) {
 			dest_is_priv = 1;
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		/* Scope based on outbound address */
 		if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr) ||
 		    SCTP_ROUTE_IS_REAL_LOOP(ro)) {
 			/*
 			 * If the address is a loopback address, which
 			 * consists of "::1" OR "fe80::1%lo0", we are
 			 * loopback scope. But we don't use dest_is_priv
 			 * (link local addresses).
 			 */
 			dest_is_loop = 1;
 			if (net != NULL) {
 				/* mark it as local */
 				net->addr_is_local = 1;
 			}
 		} else if (IN6_IS_ADDR_LINKLOCAL(&to6->sin6_addr)) {
 			dest_is_priv = 1;
 		}
 		break;
 #endif
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Select source addr for:");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&ro->ro_dst);
 	SCTP_IPI_ADDR_RLOCK();
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		/*
 		 * Bound all case
 		 */
 		answer = sctp_choose_boundall(inp, stcb, net, ro, vrf_id,
 		    dest_is_priv, dest_is_loop,
 		    non_asoc_addr_ok, fam);
 		SCTP_IPI_ADDR_RUNLOCK();
 		return (answer);
 	}
 	/*
 	 * Subset bound case
 	 */
 	if (stcb) {
 		answer = sctp_choose_boundspecific_stcb(inp, stcb, ro,
 		    vrf_id, dest_is_priv,
 		    dest_is_loop,
 		    non_asoc_addr_ok, fam);
 	} else {
 		answer = sctp_choose_boundspecific_inp(inp, ro, vrf_id,
 		    non_asoc_addr_ok,
 		    dest_is_priv,
 		    dest_is_loop, fam);
 	}
 	SCTP_IPI_ADDR_RUNLOCK();
 	return (answer);
 }
 
 static int
 sctp_find_cmsg(int c_type, void *data, struct mbuf *control, size_t cpsize)
 {
 	struct cmsghdr cmh;
 	struct sctp_sndinfo sndinfo;
 	struct sctp_prinfo prinfo;
 	struct sctp_authinfo authinfo;
 	int tot_len, rem_len, cmsg_data_len, cmsg_data_off, off;
 	int found;
 
 	/*
 	 * Independent of how many mbufs, find the c_type inside the control
 	 * structure and copy out the data.
 	 */
 	found = 0;
 	tot_len = SCTP_BUF_LEN(control);
 	for (off = 0; off < tot_len; off += CMSG_ALIGN(cmh.cmsg_len)) {
 		rem_len = tot_len - off;
 		if (rem_len < (int)CMSG_ALIGN(sizeof(cmh))) {
 			/* There is not enough room for one more. */
 			return (found);
 		}
 		m_copydata(control, off, sizeof(cmh), (caddr_t)&cmh);
 		if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
 			/* We dont't have a complete CMSG header. */
 			return (found);
 		}
 		if ((cmh.cmsg_len > INT_MAX) || ((int)cmh.cmsg_len > rem_len)) {
 			/* We don't have the complete CMSG. */
 			return (found);
 		}
 		cmsg_data_len = (int)cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh));
 		cmsg_data_off = off + CMSG_ALIGN(sizeof(cmh));
 		if ((cmh.cmsg_level == IPPROTO_SCTP) &&
 		    ((c_type == cmh.cmsg_type) ||
 		    ((c_type == SCTP_SNDRCV) &&
 		    ((cmh.cmsg_type == SCTP_SNDINFO) ||
 		    (cmh.cmsg_type == SCTP_PRINFO) ||
 		    (cmh.cmsg_type == SCTP_AUTHINFO))))) {
 			if (c_type == cmh.cmsg_type) {
 				if (cpsize > INT_MAX) {
 					return (found);
 				}
 				if (cmsg_data_len < (int)cpsize) {
 					return (found);
 				}
 				/* It is exactly what we want. Copy it out. */
 				m_copydata(control, cmsg_data_off, (int)cpsize, (caddr_t)data);
 				return (1);
 			} else {
 				struct sctp_sndrcvinfo *sndrcvinfo;
 
 				sndrcvinfo = (struct sctp_sndrcvinfo *)data;
 				if (found == 0) {
 					if (cpsize < sizeof(struct sctp_sndrcvinfo)) {
 						return (found);
 					}
 					memset(sndrcvinfo, 0, sizeof(struct sctp_sndrcvinfo));
 				}
 				switch (cmh.cmsg_type) {
 				case SCTP_SNDINFO:
 					if (cmsg_data_len < (int)sizeof(struct sctp_sndinfo)) {
 						return (found);
 					}
 					m_copydata(control, cmsg_data_off, sizeof(struct sctp_sndinfo), (caddr_t)&sndinfo);
 					sndrcvinfo->sinfo_stream = sndinfo.snd_sid;
 					sndrcvinfo->sinfo_flags = sndinfo.snd_flags;
 					sndrcvinfo->sinfo_ppid = sndinfo.snd_ppid;
 					sndrcvinfo->sinfo_context = sndinfo.snd_context;
 					sndrcvinfo->sinfo_assoc_id = sndinfo.snd_assoc_id;
 					break;
 				case SCTP_PRINFO:
 					if (cmsg_data_len < (int)sizeof(struct sctp_prinfo)) {
 						return (found);
 					}
 					m_copydata(control, cmsg_data_off, sizeof(struct sctp_prinfo), (caddr_t)&prinfo);
 					if (prinfo.pr_policy != SCTP_PR_SCTP_NONE) {
 						sndrcvinfo->sinfo_timetolive = prinfo.pr_value;
 					} else {
 						sndrcvinfo->sinfo_timetolive = 0;
 					}
 					sndrcvinfo->sinfo_flags |= prinfo.pr_policy;
 					break;
 				case SCTP_AUTHINFO:
 					if (cmsg_data_len < (int)sizeof(struct sctp_authinfo)) {
 						return (found);
 					}
 					m_copydata(control, cmsg_data_off, sizeof(struct sctp_authinfo), (caddr_t)&authinfo);
 					sndrcvinfo->sinfo_keynumber_valid = 1;
 					sndrcvinfo->sinfo_keynumber = authinfo.auth_keynumber;
 					break;
 				default:
 					return (found);
 				}
 				found = 1;
 			}
 		}
 	}
 	return (found);
 }
 
 static int
 sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *error)
 {
 	struct cmsghdr cmh;
 	struct sctp_initmsg initmsg;
 #ifdef INET
 	struct sockaddr_in sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 sin6;
 #endif
 	int tot_len, rem_len, cmsg_data_len, cmsg_data_off, off;
 
 	tot_len = SCTP_BUF_LEN(control);
 	for (off = 0; off < tot_len; off += CMSG_ALIGN(cmh.cmsg_len)) {
 		rem_len = tot_len - off;
 		if (rem_len < (int)CMSG_ALIGN(sizeof(cmh))) {
 			/* There is not enough room for one more. */
 			*error = EINVAL;
 			return (1);
 		}
 		m_copydata(control, off, sizeof(cmh), (caddr_t)&cmh);
 		if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
 			/* We dont't have a complete CMSG header. */
 			*error = EINVAL;
 			return (1);
 		}
 		if ((cmh.cmsg_len > INT_MAX) || ((int)cmh.cmsg_len > rem_len)) {
 			/* We don't have the complete CMSG. */
 			*error = EINVAL;
 			return (1);
 		}
 		cmsg_data_len = (int)cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh));
 		cmsg_data_off = off + CMSG_ALIGN(sizeof(cmh));
 		if (cmh.cmsg_level == IPPROTO_SCTP) {
 			switch (cmh.cmsg_type) {
 			case SCTP_INIT:
 				if (cmsg_data_len < (int)sizeof(struct sctp_initmsg)) {
 					*error = EINVAL;
 					return (1);
 				}
 				m_copydata(control, cmsg_data_off, sizeof(struct sctp_initmsg), (caddr_t)&initmsg);
 				if (initmsg.sinit_max_attempts)
 					stcb->asoc.max_init_times = initmsg.sinit_max_attempts;
 				if (initmsg.sinit_num_ostreams)
 					stcb->asoc.pre_open_streams = initmsg.sinit_num_ostreams;
 				if (initmsg.sinit_max_instreams)
 					stcb->asoc.max_inbound_streams = initmsg.sinit_max_instreams;
 				if (initmsg.sinit_max_init_timeo)
 					stcb->asoc.initial_init_rto_max = initmsg.sinit_max_init_timeo;
 				if (stcb->asoc.streamoutcnt < stcb->asoc.pre_open_streams) {
 					struct sctp_stream_out *tmp_str;
 					unsigned int i;
 #if defined(SCTP_DETAILED_STR_STATS)
 					int j;
 #endif
 
 					/* Default is NOT correct */
 					SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, default:%d pre_open:%d\n",
 					    stcb->asoc.streamoutcnt, stcb->asoc.pre_open_streams);
 					SCTP_TCB_UNLOCK(stcb);
 					SCTP_MALLOC(tmp_str,
 					    struct sctp_stream_out *,
 					    (stcb->asoc.pre_open_streams * sizeof(struct sctp_stream_out)),
 					    SCTP_M_STRMO);
 					SCTP_TCB_LOCK(stcb);
 					if (tmp_str != NULL) {
 						SCTP_FREE(stcb->asoc.strmout, SCTP_M_STRMO);
 						stcb->asoc.strmout = tmp_str;
 						stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt = stcb->asoc.pre_open_streams;
 					} else {
 						stcb->asoc.pre_open_streams = stcb->asoc.streamoutcnt;
 					}
 					for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 						TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
 						stcb->asoc.ss_functions.sctp_ss_init_stream(stcb, &stcb->asoc.strmout[i], NULL);
 						stcb->asoc.strmout[i].chunks_on_queues = 0;
 #if defined(SCTP_DETAILED_STR_STATS)
 						for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
 							stcb->asoc.strmout[i].abandoned_sent[j] = 0;
 							stcb->asoc.strmout[i].abandoned_unsent[j] = 0;
 						}
 #else
 						stcb->asoc.strmout[i].abandoned_sent[0] = 0;
 						stcb->asoc.strmout[i].abandoned_unsent[0] = 0;
 #endif
 						stcb->asoc.strmout[i].next_mid_ordered = 0;
 						stcb->asoc.strmout[i].next_mid_unordered = 0;
 						stcb->asoc.strmout[i].sid = i;
 						stcb->asoc.strmout[i].last_msg_incomplete = 0;
 						stcb->asoc.strmout[i].state = SCTP_STREAM_OPENING;
 					}
 				}
 				break;
 #ifdef INET
 			case SCTP_DSTADDRV4:
 				if (cmsg_data_len < (int)sizeof(struct in_addr)) {
 					*error = EINVAL;
 					return (1);
 				}
 				memset(&sin, 0, sizeof(struct sockaddr_in));
 				sin.sin_family = AF_INET;
 				sin.sin_len = sizeof(struct sockaddr_in);
 				sin.sin_port = stcb->rport;
 				m_copydata(control, cmsg_data_off, sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
 				if ((sin.sin_addr.s_addr == INADDR_ANY) ||
 				    (sin.sin_addr.s_addr == INADDR_BROADCAST) ||
 				    IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
 					*error = EINVAL;
 					return (1);
 				}
 				if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, NULL, stcb->asoc.port,
 				    SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
 					*error = ENOBUFS;
 					return (1);
 				}
 				break;
 #endif
 #ifdef INET6
 			case SCTP_DSTADDRV6:
 				if (cmsg_data_len < (int)sizeof(struct in6_addr)) {
 					*error = EINVAL;
 					return (1);
 				}
 				memset(&sin6, 0, sizeof(struct sockaddr_in6));
 				sin6.sin6_family = AF_INET6;
 				sin6.sin6_len = sizeof(struct sockaddr_in6);
 				sin6.sin6_port = stcb->rport;
 				m_copydata(control, cmsg_data_off, sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
 				if (IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr) ||
 				    IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) {
 					*error = EINVAL;
 					return (1);
 				}
 #ifdef INET
 				if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) {
 					in6_sin6_2_sin(&sin, &sin6);
 					if ((sin.sin_addr.s_addr == INADDR_ANY) ||
 					    (sin.sin_addr.s_addr == INADDR_BROADCAST) ||
 					    IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
 						*error = EINVAL;
 						return (1);
 					}
 					if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, NULL, stcb->asoc.port,
 					    SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
 						*error = ENOBUFS;
 						return (1);
 					}
 				} else
 #endif
 					if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin6, NULL, stcb->asoc.port,
 				    SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
 					*error = ENOBUFS;
 					return (1);
 				}
 				break;
 #endif
 			default:
 				break;
 			}
 		}
 	}
 	return (0);
 }
 
 #if defined(INET) || defined(INET6)
 static struct sctp_tcb *
 sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p,
     uint16_t port,
     struct mbuf *control,
     struct sctp_nets **net_p,
     int *error)
 {
 	struct cmsghdr cmh;
 	struct sctp_tcb *stcb;
 	struct sockaddr *addr;
 #ifdef INET
 	struct sockaddr_in sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 sin6;
 #endif
 	int tot_len, rem_len, cmsg_data_len, cmsg_data_off, off;
 
 	tot_len = SCTP_BUF_LEN(control);
 	for (off = 0; off < tot_len; off += CMSG_ALIGN(cmh.cmsg_len)) {
 		rem_len = tot_len - off;
 		if (rem_len < (int)CMSG_ALIGN(sizeof(cmh))) {
 			/* There is not enough room for one more. */
 			*error = EINVAL;
 			return (NULL);
 		}
 		m_copydata(control, off, sizeof(cmh), (caddr_t)&cmh);
 		if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
 			/* We dont't have a complete CMSG header. */
 			*error = EINVAL;
 			return (NULL);
 		}
 		if ((cmh.cmsg_len > INT_MAX) || ((int)cmh.cmsg_len > rem_len)) {
 			/* We don't have the complete CMSG. */
 			*error = EINVAL;
 			return (NULL);
 		}
 		cmsg_data_len = (int)cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh));
 		cmsg_data_off = off + CMSG_ALIGN(sizeof(cmh));
 		if (cmh.cmsg_level == IPPROTO_SCTP) {
 			switch (cmh.cmsg_type) {
 #ifdef INET
 			case SCTP_DSTADDRV4:
 				if (cmsg_data_len < (int)sizeof(struct in_addr)) {
 					*error = EINVAL;
 					return (NULL);
 				}
 				memset(&sin, 0, sizeof(struct sockaddr_in));
 				sin.sin_family = AF_INET;
 				sin.sin_len = sizeof(struct sockaddr_in);
 				sin.sin_port = port;
 				m_copydata(control, cmsg_data_off, sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
 				addr = (struct sockaddr *)&sin;
 				break;
 #endif
 #ifdef INET6
 			case SCTP_DSTADDRV6:
 				if (cmsg_data_len < (int)sizeof(struct in6_addr)) {
 					*error = EINVAL;
 					return (NULL);
 				}
 				memset(&sin6, 0, sizeof(struct sockaddr_in6));
 				sin6.sin6_family = AF_INET6;
 				sin6.sin6_len = sizeof(struct sockaddr_in6);
 				sin6.sin6_port = port;
 				m_copydata(control, cmsg_data_off, sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
 #ifdef INET
 				if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) {
 					in6_sin6_2_sin(&sin, &sin6);
 					addr = (struct sockaddr *)&sin;
 				} else
 #endif
 					addr = (struct sockaddr *)&sin6;
 				break;
 #endif
 			default:
 				addr = NULL;
 				break;
 			}
 			if (addr) {
 				stcb = sctp_findassociation_ep_addr(inp_p, addr, net_p, NULL, NULL);
 				if (stcb != NULL) {
 					return (stcb);
 				}
 			}
 		}
 	}
 	return (NULL);
 }
 #endif
 
 static struct mbuf *
 sctp_add_cookie(struct mbuf *init, int init_offset,
     struct mbuf *initack, int initack_offset, struct sctp_state_cookie *stc_in, uint8_t **signature)
 {
 	struct mbuf *copy_init, *copy_initack, *m_at, *sig, *mret;
 	struct sctp_state_cookie *stc;
 	struct sctp_paramhdr *ph;
 	uint16_t cookie_sz;
 
 	mret = sctp_get_mbuf_for_msg((sizeof(struct sctp_state_cookie) +
 	    sizeof(struct sctp_paramhdr)), 0,
 	    M_NOWAIT, 1, MT_DATA);
 	if (mret == NULL) {
 		return (NULL);
 	}
 	copy_init = SCTP_M_COPYM(init, init_offset, M_COPYALL, M_NOWAIT);
 	if (copy_init == NULL) {
 		sctp_m_freem(mret);
 		return (NULL);
 	}
 #ifdef SCTP_MBUF_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 		sctp_log_mbc(copy_init, SCTP_MBUF_ICOPY);
 	}
 #endif
 	copy_initack = SCTP_M_COPYM(initack, initack_offset, M_COPYALL,
 	    M_NOWAIT);
 	if (copy_initack == NULL) {
 		sctp_m_freem(mret);
 		sctp_m_freem(copy_init);
 		return (NULL);
 	}
 #ifdef SCTP_MBUF_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 		sctp_log_mbc(copy_initack, SCTP_MBUF_ICOPY);
 	}
 #endif
 	/* easy side we just drop it on the end */
 	ph = mtod(mret, struct sctp_paramhdr *);
 	SCTP_BUF_LEN(mret) = sizeof(struct sctp_state_cookie) +
 	    sizeof(struct sctp_paramhdr);
 	stc = (struct sctp_state_cookie *)((caddr_t)ph +
 	    sizeof(struct sctp_paramhdr));
 	ph->param_type = htons(SCTP_STATE_COOKIE);
 	ph->param_length = 0;	/* fill in at the end */
 	/* Fill in the stc cookie data */
 	memcpy(stc, stc_in, sizeof(struct sctp_state_cookie));
 
 	/* tack the INIT and then the INIT-ACK onto the chain */
 	cookie_sz = 0;
 	for (m_at = mret; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
 		cookie_sz += SCTP_BUF_LEN(m_at);
 		if (SCTP_BUF_NEXT(m_at) == NULL) {
 			SCTP_BUF_NEXT(m_at) = copy_init;
 			break;
 		}
 	}
 	for (m_at = copy_init; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
 		cookie_sz += SCTP_BUF_LEN(m_at);
 		if (SCTP_BUF_NEXT(m_at) == NULL) {
 			SCTP_BUF_NEXT(m_at) = copy_initack;
 			break;
 		}
 	}
 	for (m_at = copy_initack; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
 		cookie_sz += SCTP_BUF_LEN(m_at);
 		if (SCTP_BUF_NEXT(m_at) == NULL) {
 			break;
 		}
 	}
 	sig = sctp_get_mbuf_for_msg(SCTP_SIGNATURE_SIZE, 0, M_NOWAIT, 1, MT_DATA);
 	if (sig == NULL) {
 		/* no space, so free the entire chain */
 		sctp_m_freem(mret);
 		return (NULL);
 	}
 	SCTP_BUF_NEXT(m_at) = sig;
 	SCTP_BUF_LEN(sig) = SCTP_SIGNATURE_SIZE;
 	cookie_sz += SCTP_SIGNATURE_SIZE;
 	ph->param_length = htons(cookie_sz);
 	*signature = (uint8_t *)mtod(sig, caddr_t);
 	memset(*signature, 0, SCTP_SIGNATURE_SIZE);
 	return (mret);
 }
 
 static uint8_t
 sctp_get_ect(struct sctp_tcb *stcb)
 {
 	if ((stcb != NULL) && (stcb->asoc.ecn_supported == 1)) {
 		return (SCTP_ECT0_BIT);
 	} else {
 		return (0);
 	}
 }
 
 #if defined(INET) || defined(INET6)
 static void
 sctp_handle_no_route(struct sctp_tcb *stcb,
     struct sctp_nets *net,
     int so_locked)
 {
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "dropped packet - no valid source addr\n");
 
 	if (net) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT1, "Destination was ");
 		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT1, &net->ro._l_addr.sa);
 		if (net->dest_state & SCTP_ADDR_CONFIRMED) {
 			if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", (void *)net);
 				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
 				    stcb, 0,
 				    (void *)net,
 				    so_locked);
 				net->dest_state &= ~SCTP_ADDR_REACHABLE;
 				net->dest_state &= ~SCTP_ADDR_PF;
 			}
 		}
 		if (stcb) {
 			if (net == stcb->asoc.primary_destination) {
 				/* need a new primary */
 				struct sctp_nets *alt;
 
 				alt = sctp_find_alternate_net(stcb, net, 0);
 				if (alt != net) {
 					if (stcb->asoc.alternate) {
 						sctp_free_remote_addr(stcb->asoc.alternate);
 					}
 					stcb->asoc.alternate = alt;
 					atomic_add_int(&stcb->asoc.alternate->ref_count, 1);
 					if (net->ro._s_addr) {
 						sctp_free_ifa(net->ro._s_addr);
 						net->ro._s_addr = NULL;
 					}
 					net->src_addr_selected = 0;
 				}
 			}
 		}
 	}
 }
 #endif
 
 static int
 sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,	/* may be NULL */
     struct sctp_nets *net,
     struct sockaddr *to,
     struct mbuf *m,
     uint32_t auth_offset,
     struct sctp_auth_chunk *auth,
     uint16_t auth_keyid,
     int nofragment_flag,
     int ecn_ok,
     int out_of_asoc_ok,
     uint16_t src_port,
     uint16_t dest_port,
     uint32_t v_tag,
     uint16_t port,
     union sctp_sockstore *over_addr,
     uint8_t mflowtype, uint32_t mflowid,
     int so_locked)
 {
 /* nofragment_flag to tell if IP_DF should be set (IPv4 only) */
 	/**
 	 * Given a mbuf chain (via SCTP_BUF_NEXT()) that holds a packet header
 	 * WITH an SCTPHDR but no IP header, endpoint inp and sa structure:
 	 * - fill in the HMAC digest of any AUTH chunk in the packet.
 	 * - calculate and fill in the SCTP checksum.
 	 * - prepend an IP address header.
 	 * - if boundall use INADDR_ANY.
 	 * - if boundspecific do source address selection.
 	 * - set fragmentation option for ipV4.
 	 * - On return from IP output, check/adjust mtu size of output
 	 *   interface and smallest_mtu size as well.
 	 */
 	/* Will need ifdefs around this */
 	struct mbuf *newm;
 	struct sctphdr *sctphdr;
 	int packet_length;
 	int ret;
 #if defined(INET) || defined(INET6)
 	uint32_t vrf_id;
 #endif
 #if defined(INET) || defined(INET6)
 	struct mbuf *o_pak;
 	sctp_route_t *ro = NULL;
 	struct udphdr *udp = NULL;
 #endif
 	uint8_t tos_value;
 
 	if ((net) && (net->dest_state & SCTP_ADDR_OUT_OF_SCOPE)) {
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
 		sctp_m_freem(m);
 		return (EFAULT);
 	}
 #if defined(INET) || defined(INET6)
 	if (stcb) {
 		vrf_id = stcb->asoc.vrf_id;
 	} else {
 		vrf_id = inp->def_vrf_id;
 	}
 #endif
 	/* fill in the HMAC digest for any AUTH chunk in the packet */
 	if ((auth != NULL) && (stcb != NULL)) {
 		sctp_fill_hmac_digest_m(m, auth_offset, auth, stcb, auth_keyid);
 	}
 
 	if (net) {
 		tos_value = net->dscp;
 	} else if (stcb) {
 		tos_value = stcb->asoc.default_dscp;
 	} else {
 		tos_value = inp->sctp_ep.default_dscp;
 	}
 
 	switch (to->sa_family) {
 #ifdef INET
 	case AF_INET:
 		{
 			struct ip *ip = NULL;
 			sctp_route_t iproute;
 			int len;
 
 			len = SCTP_MIN_V4_OVERHEAD;
 			if (port) {
 				len += sizeof(struct udphdr);
 			}
 			newm = sctp_get_mbuf_for_msg(len, 1, M_NOWAIT, 1, MT_DATA);
 			if (newm == NULL) {
 				sctp_m_freem(m);
 				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 				return (ENOMEM);
 			}
 			SCTP_ALIGN_TO_END(newm, len);
 			SCTP_BUF_LEN(newm) = len;
 			SCTP_BUF_NEXT(newm) = m;
 			m = newm;
 			if (net != NULL) {
 				m->m_pkthdr.flowid = net->flowid;
 				M_HASHTYPE_SET(m, net->flowtype);
 			} else {
 				m->m_pkthdr.flowid = mflowid;
 				M_HASHTYPE_SET(m, mflowtype);
 			}
 			packet_length = sctp_calculate_len(m);
 			ip = mtod(m, struct ip *);
 			ip->ip_v = IPVERSION;
 			ip->ip_hl = (sizeof(struct ip) >> 2);
 			if (tos_value == 0) {
 				/*
 				 * This means especially, that it is not set
 				 * at the SCTP layer. So use the value from
 				 * the IP layer.
 				 */
 				tos_value = inp->ip_inp.inp.inp_ip_tos;
 			}
 			tos_value &= 0xfc;
 			if (ecn_ok) {
 				tos_value |= sctp_get_ect(stcb);
 			}
 			if ((nofragment_flag) && (port == 0)) {
 				ip->ip_off = htons(IP_DF);
 			} else {
 				ip->ip_off = htons(0);
 			}
 			/* FreeBSD has a function for ip_id's */
 			ip_fillid(ip);
 
 			ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl;
 			ip->ip_len = htons(packet_length);
 			ip->ip_tos = tos_value;
 			if (port) {
 				ip->ip_p = IPPROTO_UDP;
 			} else {
 				ip->ip_p = IPPROTO_SCTP;
 			}
 			ip->ip_sum = 0;
 			if (net == NULL) {
 				ro = &iproute;
 				memset(&iproute, 0, sizeof(iproute));
 				memcpy(&ro->ro_dst, to, to->sa_len);
 			} else {
 				ro = (sctp_route_t *)&net->ro;
 			}
 			/* Now the address selection part */
 			ip->ip_dst.s_addr = ((struct sockaddr_in *)to)->sin_addr.s_addr;
 
 			/* call the routine to select the src address */
 			if (net && out_of_asoc_ok == 0) {
 				if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) {
 					sctp_free_ifa(net->ro._s_addr);
 					net->ro._s_addr = NULL;
 					net->src_addr_selected = 0;
 					RO_NHFREE(ro);
 				}
 				if (net->src_addr_selected == 0) {
 					/* Cache the source address */
 					net->ro._s_addr = sctp_source_address_selection(inp, stcb,
 					    ro, net, 0,
 					    vrf_id);
 					net->src_addr_selected = 1;
 				}
 				if (net->ro._s_addr == NULL) {
 					/* No route to host */
 					net->src_addr_selected = 0;
 					sctp_handle_no_route(stcb, net, so_locked);
 					SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 					sctp_m_freem(m);
 					return (EHOSTUNREACH);
 				}
 				ip->ip_src = net->ro._s_addr->address.sin.sin_addr;
 			} else {
 				if (over_addr == NULL) {
 					struct sctp_ifa *_lsrc;
 
 					_lsrc = sctp_source_address_selection(inp, stcb, ro,
 					    net,
 					    out_of_asoc_ok,
 					    vrf_id);
 					if (_lsrc == NULL) {
 						sctp_handle_no_route(stcb, net, so_locked);
 						SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 						sctp_m_freem(m);
 						return (EHOSTUNREACH);
 					}
 					ip->ip_src = _lsrc->address.sin.sin_addr;
 					sctp_free_ifa(_lsrc);
 				} else {
 					ip->ip_src = over_addr->sin.sin_addr;
 					SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
 				}
 			}
 			if (port) {
 				if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) {
 					sctp_handle_no_route(stcb, net, so_locked);
 					SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 					sctp_m_freem(m);
 					return (EHOSTUNREACH);
 				}
 				udp = (struct udphdr *)((caddr_t)ip + sizeof(struct ip));
 				udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
 				udp->uh_dport = port;
 				udp->uh_ulen = htons((uint16_t)(packet_length - sizeof(struct ip)));
 				if (V_udp_cksum) {
 					udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
 				} else {
 					udp->uh_sum = 0;
 				}
 				sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr));
 			} else {
 				sctphdr = (struct sctphdr *)((caddr_t)ip + sizeof(struct ip));
 			}
 
 			sctphdr->src_port = src_port;
 			sctphdr->dest_port = dest_port;
 			sctphdr->v_tag = v_tag;
 			sctphdr->checksum = 0;
 
 			/*
 			 * If source address selection fails and we find no
 			 * route then the ip_output should fail as well with
 			 * a NO_ROUTE_TO_HOST type error. We probably should
 			 * catch that somewhere and abort the association
 			 * right away (assuming this is an INIT being sent).
 			 */
 			if (ro->ro_nh == NULL) {
 				/*
 				 * src addr selection failed to find a route
 				 * (or valid source addr), so we can't get
 				 * there from here (yet)!
 				 */
 				sctp_handle_no_route(stcb, net, so_locked);
 				SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 				sctp_m_freem(m);
 				return (EHOSTUNREACH);
 			}
 			if (ro != &iproute) {
 				memcpy(&iproute, ro, sizeof(*ro));
 			}
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv4 output routine from low level src addr:%x\n",
 			    (uint32_t)(ntohl(ip->ip_src.s_addr)));
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "Destination is %x\n",
 			    (uint32_t)(ntohl(ip->ip_dst.s_addr)));
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "RTP route is %p through\n",
 			    (void *)ro->ro_nh);
 
 			if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
 				/* failed to prepend data, give up */
 				SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 				sctp_m_freem(m);
 				return (ENOMEM);
 			}
 			SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
 			if (port) {
 				sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip) + sizeof(struct udphdr));
 				SCTP_STAT_INCR(sctps_sendswcrc);
 				if (V_udp_cksum) {
 					SCTP_ENABLE_UDP_CSUM(o_pak);
 				}
 			} else {
 				m->m_pkthdr.csum_flags = CSUM_SCTP;
 				m->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
 				SCTP_STAT_INCR(sctps_sendhwcrc);
 			}
 #ifdef SCTP_PACKET_LOGGING
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
 				sctp_packet_log(o_pak);
 #endif
 			/* send it out.  table id is taken from stcb */
 			SCTP_PROBE5(send, NULL, stcb, ip, stcb, sctphdr);
 			SCTP_IP_OUTPUT(ret, o_pak, ro, inp, vrf_id);
 			if (port) {
 				UDPSTAT_INC(udps_opackets);
 			}
 			SCTP_STAT_INCR(sctps_sendpackets);
 			SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
 			if (ret)
 				SCTP_STAT_INCR(sctps_senderrors);
 
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret);
 			if (net == NULL) {
 				/* free tempy routes */
 				RO_NHFREE(ro);
 			} else {
 				if ((ro->ro_nh != NULL) && (net->ro._s_addr) &&
 				    ((net->dest_state & SCTP_ADDR_NO_PMTUD) == 0)) {
 					uint32_t mtu;
 
 					mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_nh);
 					if (mtu > 0) {
 						if (net->port) {
 							mtu -= sizeof(struct udphdr);
 						}
 						if (mtu < net->mtu) {
 							net->mtu = mtu;
 							if ((stcb != NULL) && (stcb->asoc.smallest_mtu > mtu)) {
 								sctp_pathmtu_adjustment(stcb, mtu, true);
 							}
 						}
 					}
 				} else if (ro->ro_nh == NULL) {
 					/* route was freed */
 					if (net->ro._s_addr &&
 					    net->src_addr_selected) {
 						sctp_free_ifa(net->ro._s_addr);
 						net->ro._s_addr = NULL;
 					}
 					net->src_addr_selected = 0;
 				}
 			}
 			return (ret);
 		}
 #endif
 #ifdef INET6
 	case AF_INET6:
 		{
 			uint32_t flowlabel, flowinfo;
 			struct ip6_hdr *ip6h;
 			struct route_in6 ip6route;
 			struct ifnet *ifp;
 			struct sockaddr_in6 *sin6, tmp, *lsa6, lsa6_tmp;
 			int prev_scope = 0;
 			struct sockaddr_in6 lsa6_storage;
 			int error;
 			u_short prev_port = 0;
 			int len;
 
 			if (net) {
 				flowlabel = net->flowlabel;
 			} else if (stcb) {
 				flowlabel = stcb->asoc.default_flowlabel;
 			} else {
 				flowlabel = inp->sctp_ep.default_flowlabel;
 			}
 			if (flowlabel == 0) {
 				/*
 				 * This means especially, that it is not set
 				 * at the SCTP layer. So use the value from
 				 * the IP layer.
 				 */
 				flowlabel = ntohl(((struct inpcb *)inp)->inp_flow);
 			}
 			flowlabel &= 0x000fffff;
 			len = SCTP_MIN_OVERHEAD;
 			if (port) {
 				len += sizeof(struct udphdr);
 			}
 			newm = sctp_get_mbuf_for_msg(len, 1, M_NOWAIT, 1, MT_DATA);
 			if (newm == NULL) {
 				sctp_m_freem(m);
 				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 				return (ENOMEM);
 			}
 			SCTP_ALIGN_TO_END(newm, len);
 			SCTP_BUF_LEN(newm) = len;
 			SCTP_BUF_NEXT(newm) = m;
 			m = newm;
 			if (net != NULL) {
 				m->m_pkthdr.flowid = net->flowid;
 				M_HASHTYPE_SET(m, net->flowtype);
 			} else {
 				m->m_pkthdr.flowid = mflowid;
 				M_HASHTYPE_SET(m, mflowtype);
 			}
 			packet_length = sctp_calculate_len(m);
 
 			ip6h = mtod(m, struct ip6_hdr *);
 			/* protect *sin6 from overwrite */
 			sin6 = (struct sockaddr_in6 *)to;
 			tmp = *sin6;
 			sin6 = &tmp;
 
 			/* KAME hack: embed scopeid */
 			if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
 				SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 				sctp_m_freem(m);
 				return (EINVAL);
 			}
 			if (net == NULL) {
 				memset(&ip6route, 0, sizeof(ip6route));
 				ro = (sctp_route_t *)&ip6route;
 				memcpy(&ro->ro_dst, sin6, sin6->sin6_len);
 			} else {
 				ro = (sctp_route_t *)&net->ro;
 			}
 			/*
 			 * We assume here that inp_flow is in host byte
 			 * order within the TCB!
 			 */
 			if (tos_value == 0) {
 				/*
 				 * This means especially, that it is not set
 				 * at the SCTP layer. So use the value from
 				 * the IP layer.
 				 */
 				tos_value = (ntohl(((struct inpcb *)inp)->inp_flow) >> 20) & 0xff;
 			}
 			tos_value &= 0xfc;
 			if (ecn_ok) {
 				tos_value |= sctp_get_ect(stcb);
 			}
 			flowinfo = 0x06;
 			flowinfo <<= 8;
 			flowinfo |= tos_value;
 			flowinfo <<= 20;
 			flowinfo |= flowlabel;
 			ip6h->ip6_flow = htonl(flowinfo);
 			if (port) {
 				ip6h->ip6_nxt = IPPROTO_UDP;
 			} else {
 				ip6h->ip6_nxt = IPPROTO_SCTP;
 			}
 			ip6h->ip6_plen = htons((uint16_t)(packet_length - sizeof(struct ip6_hdr)));
 			ip6h->ip6_dst = sin6->sin6_addr;
 
 			/*
 			 * Add SRC address selection here: we can only reuse
 			 * to a limited degree the kame src-addr-sel, since
 			 * we can try their selection but it may not be
 			 * bound.
 			 */
 			memset(&lsa6_tmp, 0, sizeof(lsa6_tmp));
 			lsa6_tmp.sin6_family = AF_INET6;
 			lsa6_tmp.sin6_len = sizeof(lsa6_tmp);
 			lsa6 = &lsa6_tmp;
 			if (net && out_of_asoc_ok == 0) {
 				if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) {
 					sctp_free_ifa(net->ro._s_addr);
 					net->ro._s_addr = NULL;
 					net->src_addr_selected = 0;
 					RO_NHFREE(ro);
 				}
 				if (net->src_addr_selected == 0) {
 					sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
 					/* KAME hack: embed scopeid */
 					if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
 						SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 						sctp_m_freem(m);
 						return (EINVAL);
 					}
 					/* Cache the source address */
 					net->ro._s_addr = sctp_source_address_selection(inp,
 					    stcb,
 					    ro,
 					    net,
 					    0,
 					    vrf_id);
 					(void)sa6_recoverscope(sin6);
 					net->src_addr_selected = 1;
 				}
 				if (net->ro._s_addr == NULL) {
 					SCTPDBG(SCTP_DEBUG_OUTPUT3, "V6:No route to host\n");
 					net->src_addr_selected = 0;
 					sctp_handle_no_route(stcb, net, so_locked);
 					SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 					sctp_m_freem(m);
 					return (EHOSTUNREACH);
 				}
 				lsa6->sin6_addr = net->ro._s_addr->address.sin6.sin6_addr;
 			} else {
 				sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
 				/* KAME hack: embed scopeid */
 				if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
 					SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 					sctp_m_freem(m);
 					return (EINVAL);
 				}
 				if (over_addr == NULL) {
 					struct sctp_ifa *_lsrc;
 
 					_lsrc = sctp_source_address_selection(inp, stcb, ro,
 					    net,
 					    out_of_asoc_ok,
 					    vrf_id);
 					if (_lsrc == NULL) {
 						sctp_handle_no_route(stcb, net, so_locked);
 						SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 						sctp_m_freem(m);
 						return (EHOSTUNREACH);
 					}
 					lsa6->sin6_addr = _lsrc->address.sin6.sin6_addr;
 					sctp_free_ifa(_lsrc);
 				} else {
 					lsa6->sin6_addr = over_addr->sin6.sin6_addr;
 					SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
 				}
 				(void)sa6_recoverscope(sin6);
 			}
 			lsa6->sin6_port = inp->sctp_lport;
 
 			if (ro->ro_nh == NULL) {
 				/*
 				 * src addr selection failed to find a route
 				 * (or valid source addr), so we can't get
 				 * there from here!
 				 */
 				sctp_handle_no_route(stcb, net, so_locked);
 				SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 				sctp_m_freem(m);
 				return (EHOSTUNREACH);
 			}
 			/*
 			 * XXX: sa6 may not have a valid sin6_scope_id in
 			 * the non-SCOPEDROUTING case.
 			 */
 			memset(&lsa6_storage, 0, sizeof(lsa6_storage));
 			lsa6_storage.sin6_family = AF_INET6;
 			lsa6_storage.sin6_len = sizeof(lsa6_storage);
 			lsa6_storage.sin6_addr = lsa6->sin6_addr;
 			if ((error = sa6_recoverscope(&lsa6_storage)) != 0) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT3, "recover scope fails error %d\n", error);
 				sctp_m_freem(m);
 				return (error);
 			}
 			/* XXX */
 			lsa6_storage.sin6_addr = lsa6->sin6_addr;
 			lsa6_storage.sin6_port = inp->sctp_lport;
 			lsa6 = &lsa6_storage;
 			ip6h->ip6_src = lsa6->sin6_addr;
 
 			if (port) {
 				if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) {
 					sctp_handle_no_route(stcb, net, so_locked);
 					SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
 					sctp_m_freem(m);
 					return (EHOSTUNREACH);
 				}
 				udp = (struct udphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr));
 				udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
 				udp->uh_dport = port;
 				udp->uh_ulen = htons((uint16_t)(packet_length - sizeof(struct ip6_hdr)));
 				udp->uh_sum = 0;
 				sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr));
 			} else {
 				sctphdr = (struct sctphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr));
 			}
 
 			sctphdr->src_port = src_port;
 			sctphdr->dest_port = dest_port;
 			sctphdr->v_tag = v_tag;
 			sctphdr->checksum = 0;
 
 			/*
 			 * We set the hop limit now since there is a good
 			 * chance that our ro pointer is now filled
 			 */
 			ip6h->ip6_hlim = SCTP_GET_HLIM(inp, ro);
 			ifp = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
 
 #ifdef SCTP_DEBUG
 			/* Copy to be sure something bad is not happening */
 			sin6->sin6_addr = ip6h->ip6_dst;
 			lsa6->sin6_addr = ip6h->ip6_src;
 #endif
 
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv6 output routine from low level\n");
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "src: ");
 			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)lsa6);
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "dst: ");
 			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)sin6);
 			if (net) {
 				sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
 				/*
 				 * preserve the port and scope for link
 				 * local send
 				 */
 				prev_scope = sin6->sin6_scope_id;
 				prev_port = sin6->sin6_port;
 			}
 
 			if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
 				/* failed to prepend data, give up */
 				sctp_m_freem(m);
 				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 				return (ENOMEM);
 			}
 			SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
 			if (port) {
 				sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
 				SCTP_STAT_INCR(sctps_sendswcrc);
 				if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), packet_length - sizeof(struct ip6_hdr))) == 0) {
 					udp->uh_sum = 0xffff;
 				}
 			} else {
 				m->m_pkthdr.csum_flags = CSUM_SCTP_IPV6;
 				m->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
 				SCTP_STAT_INCR(sctps_sendhwcrc);
 			}
 			/* send it out. table id is taken from stcb */
 #ifdef SCTP_PACKET_LOGGING
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
 				sctp_packet_log(o_pak);
 #endif
 			SCTP_PROBE5(send, NULL, stcb, ip6h, stcb, sctphdr);
 			SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp, inp, vrf_id);
 			if (net) {
 				/* for link local this must be done */
 				sin6->sin6_scope_id = prev_scope;
 				sin6->sin6_port = prev_port;
 			}
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret);
 			if (port) {
 				UDPSTAT_INC(udps_opackets);
 			}
 			SCTP_STAT_INCR(sctps_sendpackets);
 			SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
 			if (ret) {
 				SCTP_STAT_INCR(sctps_senderrors);
 			}
 			if (net == NULL) {
 				/* Now if we had a temp route free it */
 				RO_NHFREE(ro);
 			} else {
 				/*
 				 * PMTU check versus smallest asoc MTU goes
 				 * here
 				 */
 				if (ro->ro_nh == NULL) {
 					/* Route was freed */
 					if (net->ro._s_addr &&
 					    net->src_addr_selected) {
 						sctp_free_ifa(net->ro._s_addr);
 						net->ro._s_addr = NULL;
 					}
 					net->src_addr_selected = 0;
 				}
 				if ((ro->ro_nh != NULL) && (net->ro._s_addr) &&
 				    ((net->dest_state & SCTP_ADDR_NO_PMTUD) == 0)) {
 					uint32_t mtu;
 
 					mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_nh);
 					if (mtu > 0) {
 						if (net->port) {
 							mtu -= sizeof(struct udphdr);
 						}
 						if (mtu < net->mtu) {
 							net->mtu = mtu;
 							if ((stcb != NULL) && (stcb->asoc.smallest_mtu > mtu)) {
 								sctp_pathmtu_adjustment(stcb, mtu, false);
 							}
 						}
 					}
 				} else if (ifp != NULL) {
 					if ((ND_IFINFO(ifp)->linkmtu > 0) &&
 					    (stcb->asoc.smallest_mtu > ND_IFINFO(ifp)->linkmtu)) {
 						sctp_pathmtu_adjustment(stcb, ND_IFINFO(ifp)->linkmtu, false);
 					}
 				}
 			}
 			return (ret);
 		}
 #endif
 	default:
 		SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n",
 		    ((struct sockaddr *)to)->sa_family);
 		sctp_m_freem(m);
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
 		return (EFAULT);
 	}
 }
 
 void
 sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked)
 {
 	struct mbuf *m, *m_last;
 	struct sctp_nets *net;
 	struct sctp_init_chunk *init;
 	struct sctp_supported_addr_param *sup_addr;
 	struct sctp_adaptation_layer_indication *ali;
 	struct sctp_supported_chunk_types_param *pr_supported;
 	struct sctp_paramhdr *ph;
 	int cnt_inits_to = 0;
 	int error;
 	uint16_t num_ext, chunk_len, padding_len, parameter_len;
 
 	/* INIT's always go to the primary (and usually ONLY address) */
 	net = stcb->asoc.primary_destination;
 	if (net == NULL) {
 		net = TAILQ_FIRST(&stcb->asoc.nets);
 		if (net == NULL) {
 			/* TSNH */
 			return;
 		}
 		/* we confirm any address we send an INIT to */
 		net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
 		(void)sctp_set_primary_addr(stcb, NULL, net);
 	} else {
 		/* we confirm any address we send an INIT to */
 		net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT\n");
 #ifdef INET6
 	if (net->ro._l_addr.sa.sa_family == AF_INET6) {
 		/*
 		 * special hook, if we are sending to link local it will not
 		 * show up in our private address count.
 		 */
 		if (IN6_IS_ADDR_LINKLOCAL(&net->ro._l_addr.sin6.sin6_addr))
 			cnt_inits_to = 1;
 	}
 #endif
 	if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
 		/* This case should not happen */
 		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - failed timer?\n");
 		return;
 	}
 	/* start the INIT timer */
 	sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, net);
 
 	m = sctp_get_mbuf_for_msg(MCLBYTES, 1, M_NOWAIT, 1, MT_DATA);
 	if (m == NULL) {
 		/* No memory, INIT timer will re-attempt. */
 		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - mbuf?\n");
 		return;
 	}
 	chunk_len = (uint16_t)sizeof(struct sctp_init_chunk);
 	padding_len = 0;
 	/* Now lets put the chunk header in place */
 	init = mtod(m, struct sctp_init_chunk *);
 	/* now the chunk header */
 	init->ch.chunk_type = SCTP_INITIATION;
 	init->ch.chunk_flags = 0;
 	/* fill in later from mbuf we build */
 	init->ch.chunk_length = 0;
 	/* place in my tag */
 	init->init.initiate_tag = htonl(stcb->asoc.my_vtag);
 	/* set up some of the credits. */
 	init->init.a_rwnd = htonl(max(inp->sctp_socket ? SCTP_SB_LIMIT_RCV(inp->sctp_socket) : 0,
 	    SCTP_MINIMAL_RWND));
 	init->init.num_outbound_streams = htons(stcb->asoc.pre_open_streams);
 	init->init.num_inbound_streams = htons(stcb->asoc.max_inbound_streams);
 	init->init.initial_tsn = htonl(stcb->asoc.init_seq_number);
 
 	/* Adaptation layer indication parameter */
 	if (inp->sctp_ep.adaptation_layer_indicator_provided) {
 		parameter_len = (uint16_t)sizeof(struct sctp_adaptation_layer_indication);
 		ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len);
 		ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
 		ali->ph.param_length = htons(parameter_len);
 		ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
 		chunk_len += parameter_len;
 	}
 
 	/* ECN parameter */
 	if (stcb->asoc.ecn_supported == 1) {
 		parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
 		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_ECN_CAPABLE);
 		ph->param_length = htons(parameter_len);
 		chunk_len += parameter_len;
 	}
 
 	/* PR-SCTP supported parameter */
 	if (stcb->asoc.prsctp_supported == 1) {
 		parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
 		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_PRSCTP_SUPPORTED);
 		ph->param_length = htons(parameter_len);
 		chunk_len += parameter_len;
 	}
 
 	/* Add NAT friendly parameter. */
 	if (SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly)) {
 		parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
 		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
 		ph->param_length = htons(parameter_len);
 		chunk_len += parameter_len;
 	}
 
 	/* And now tell the peer which extensions we support */
 	num_ext = 0;
 	pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
 	if (stcb->asoc.prsctp_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
 		if (stcb->asoc.idata_supported) {
 			pr_supported->chunk_types[num_ext++] = SCTP_IFORWARD_CUM_TSN;
 		}
 	}
 	if (stcb->asoc.auth_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
 	}
 	if (stcb->asoc.asconf_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
 		pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
 	}
 	if (stcb->asoc.reconfig_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
 	}
 	if (stcb->asoc.idata_supported) {
 		pr_supported->chunk_types[num_ext++] = SCTP_IDATA;
 	}
 	if (stcb->asoc.nrsack_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK;
 	}
 	if (stcb->asoc.pktdrop_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
 	}
 	if (num_ext > 0) {
 		parameter_len = (uint16_t)sizeof(struct sctp_supported_chunk_types_param) + num_ext;
 		pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
 		pr_supported->ph.param_length = htons(parameter_len);
 		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 		chunk_len += parameter_len;
 	}
 	/* add authentication parameters */
 	if (stcb->asoc.auth_supported) {
 		/* attach RANDOM parameter, if available */
 		if (stcb->asoc.authinfo.random != NULL) {
 			struct sctp_auth_random *randp;
 
 			if (padding_len > 0) {
 				memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 				chunk_len += padding_len;
 				padding_len = 0;
 			}
 			randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+chunk_len);
 			parameter_len = (uint16_t)sizeof(struct sctp_auth_random) + stcb->asoc.authinfo.random_len;
 			/* random key already contains the header */
 			memcpy(randp, stcb->asoc.authinfo.random->key, parameter_len);
 			padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 			chunk_len += parameter_len;
 		}
 		/* add HMAC_ALGO parameter */
 		if (stcb->asoc.local_hmacs != NULL) {
 			struct sctp_auth_hmac_algo *hmacs;
 
 			if (padding_len > 0) {
 				memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 				chunk_len += padding_len;
 				padding_len = 0;
 			}
 			hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+chunk_len);
 			parameter_len = (uint16_t)(sizeof(struct sctp_auth_hmac_algo) +
 			    stcb->asoc.local_hmacs->num_algo * sizeof(uint16_t));
 			hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
 			hmacs->ph.param_length = htons(parameter_len);
 			sctp_serialize_hmaclist(stcb->asoc.local_hmacs, (uint8_t *)hmacs->hmac_ids);
 			padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 			chunk_len += parameter_len;
 		}
 		/* add CHUNKS parameter */
 		if (stcb->asoc.local_auth_chunks != NULL) {
 			struct sctp_auth_chunk_list *chunks;
 
 			if (padding_len > 0) {
 				memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 				chunk_len += padding_len;
 				padding_len = 0;
 			}
 			chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+chunk_len);
 			parameter_len = (uint16_t)(sizeof(struct sctp_auth_chunk_list) +
 			    sctp_auth_get_chklist_size(stcb->asoc.local_auth_chunks));
 			chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
 			chunks->ph.param_length = htons(parameter_len);
 			sctp_serialize_auth_chunks(stcb->asoc.local_auth_chunks, chunks->chunk_types);
 			padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 			chunk_len += parameter_len;
 		}
 	}
 
 	/* now any cookie time extensions */
 	if (stcb->asoc.cookie_preserve_req > 0) {
 		struct sctp_cookie_perserve_param *cookie_preserve;
 
 		if (padding_len > 0) {
 			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 			chunk_len += padding_len;
 			padding_len = 0;
 		}
 		parameter_len = (uint16_t)sizeof(struct sctp_cookie_perserve_param);
 		cookie_preserve = (struct sctp_cookie_perserve_param *)(mtod(m, caddr_t)+chunk_len);
 		cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE);
 		cookie_preserve->ph.param_length = htons(parameter_len);
 		cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req);
 		stcb->asoc.cookie_preserve_req = 0;
 		chunk_len += parameter_len;
 	}
 
 	if (stcb->asoc.scope.ipv4_addr_legal || stcb->asoc.scope.ipv6_addr_legal) {
 		uint8_t i;
 
 		if (padding_len > 0) {
 			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 			chunk_len += padding_len;
 			padding_len = 0;
 		}
 		parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
 		if (stcb->asoc.scope.ipv4_addr_legal) {
 			parameter_len += (uint16_t)sizeof(uint16_t);
 		}
 		if (stcb->asoc.scope.ipv6_addr_legal) {
 			parameter_len += (uint16_t)sizeof(uint16_t);
 		}
 		sup_addr = (struct sctp_supported_addr_param *)(mtod(m, caddr_t)+chunk_len);
 		sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE);
 		sup_addr->ph.param_length = htons(parameter_len);
 		i = 0;
 		if (stcb->asoc.scope.ipv4_addr_legal) {
 			sup_addr->addr_type[i++] = htons(SCTP_IPV4_ADDRESS);
 		}
 		if (stcb->asoc.scope.ipv6_addr_legal) {
 			sup_addr->addr_type[i++] = htons(SCTP_IPV6_ADDRESS);
 		}
 		padding_len = 4 - 2 * i;
 		chunk_len += parameter_len;
 	}
 
 	SCTP_BUF_LEN(m) = chunk_len;
 	/* now the addresses */
 	/*
 	 * To optimize this we could put the scoping stuff into a structure
 	 * and remove the individual uint8's from the assoc structure. Then
 	 * we could just sifa in the address within the stcb. But for now
 	 * this is a quick hack to get the address stuff teased apart.
 	 */
 	m_last = sctp_add_addresses_to_i_ia(inp, stcb, &stcb->asoc.scope,
 	    m, cnt_inits_to,
 	    &padding_len, &chunk_len);
 
 	init->ch.chunk_length = htons(chunk_len);
 	if (padding_len > 0) {
 		if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
 			sctp_m_freem(m);
 			return;
 		}
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - calls lowlevel_output\n");
 	if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
 	    (struct sockaddr *)&net->ro._l_addr,
 	    m, 0, NULL, 0, 0, 0, 0,
 	    inp->sctp_lport, stcb->rport, htonl(0),
 	    net->port, NULL,
 	    0, 0,
 	    so_locked))) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak send error %d\n", error);
 		if (error == ENOBUFS) {
 			stcb->asoc.ifp_had_enobuf = 1;
 			SCTP_STAT_INCR(sctps_lowlevelerr);
 		}
 	} else {
 		stcb->asoc.ifp_had_enobuf = 0;
 	}
 	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 	(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
 }
 
 struct mbuf *
 sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
     int param_offset, int *abort_processing,
     struct sctp_chunkhdr *cp,
     int *nat_friendly,
     int *cookie_found)
 {
 	/*
 	 * Given a mbuf containing an INIT or INIT-ACK with the param_offset
 	 * being equal to the beginning of the params i.e. (iphlen +
 	 * sizeof(struct sctp_init_msg) parse through the parameters to the
 	 * end of the mbuf verifying that all parameters are known.
 	 *
 	 * For unknown parameters build and return a mbuf with
 	 * UNRECOGNIZED_PARAMETER errors. If the flags indicate to stop
 	 * processing this chunk stop, and set *abort_processing to 1.
 	 *
 	 * By having param_offset be pre-set to where parameters begin it is
 	 * hoped that this routine may be reused in the future by new
 	 * features.
 	 */
 	struct sctp_paramhdr *phdr, params;
 
 	struct mbuf *mat, *m_tmp, *op_err, *op_err_last;
 	int at, limit, pad_needed;
 	uint16_t ptype, plen, padded_size;
 
 	*abort_processing = 0;
 	if (cookie_found != NULL) {
 		*cookie_found = 0;
 	}
 	mat = in_initpkt;
 	limit = ntohs(cp->chunk_length) - sizeof(struct sctp_init_chunk);
 	at = param_offset;
 	op_err = NULL;
 	op_err_last = NULL;
 	pad_needed = 0;
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Check for unrecognized param's\n");
 	phdr = sctp_get_next_param(mat, at, &params, sizeof(params));
 	while ((phdr != NULL) && ((size_t)limit >= sizeof(struct sctp_paramhdr))) {
 		ptype = ntohs(phdr->param_type);
 		plen = ntohs(phdr->param_length);
 		if ((plen > limit) || (plen < sizeof(struct sctp_paramhdr))) {
 			/* wacked parameter */
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error %d\n", plen);
 			goto invalid_size;
 		}
 		limit -= SCTP_SIZE32(plen);
 		/*-
 		 * All parameters for all chunks that we know/understand are
 		 * listed here. We process them other places and make
 		 * appropriate stop actions per the upper bits. However this
 		 * is the generic routine processor's can call to get back
 		 * an operr.. to either incorporate (init-ack) or send.
 		 */
 		padded_size = SCTP_SIZE32(plen);
 		switch (ptype) {
 			/* Param's with variable size */
 		case SCTP_HEARTBEAT_INFO:
 		case SCTP_UNRECOG_PARAM:
 		case SCTP_ERROR_CAUSE_IND:
 			/* ok skip fwd */
 			at += padded_size;
 			break;
 		case SCTP_STATE_COOKIE:
 			if (cookie_found != NULL) {
 				*cookie_found = 1;
 			}
 			at += padded_size;
 			break;
 			/* Param's with variable size within a range */
 		case SCTP_CHUNK_LIST:
 		case SCTP_SUPPORTED_CHUNK_EXT:
 			if (padded_size > (sizeof(struct sctp_supported_chunk_types_param) + (sizeof(uint8_t) * SCTP_MAX_SUPPORTED_EXT))) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error chklist %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_SUPPORTED_ADDRTYPE:
 			if (padded_size > SCTP_MAX_ADDR_PARAMS_SIZE) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error supaddrtype %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_RANDOM:
 			if (padded_size > (sizeof(struct sctp_auth_random) + SCTP_RANDOM_MAX_SIZE)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error random %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_SET_PRIM_ADDR:
 		case SCTP_DEL_IP_ADDRESS:
 		case SCTP_ADD_IP_ADDRESS:
 			if ((padded_size != sizeof(struct sctp_asconf_addrv4_param)) &&
 			    (padded_size != sizeof(struct sctp_asconf_addr_param))) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error setprim %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 			/* Param's with a fixed size */
 		case SCTP_IPV4_ADDRESS:
 			if (padded_size != sizeof(struct sctp_ipv4addr_param)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ipv4 addr %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_IPV6_ADDRESS:
 			if (padded_size != sizeof(struct sctp_ipv6addr_param)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ipv6 addr %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_COOKIE_PRESERVE:
 			if (padded_size != sizeof(struct sctp_cookie_perserve_param)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error cookie-preserve %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_HAS_NAT_SUPPORT:
 			*nat_friendly = 1;
 			/* fall through */
 		case SCTP_PRSCTP_SUPPORTED:
 			if (padded_size != sizeof(struct sctp_paramhdr)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error prsctp/nat support %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_ECN_CAPABLE:
 			if (padded_size != sizeof(struct sctp_paramhdr)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecn %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_ULP_ADAPTATION:
 			if (padded_size != sizeof(struct sctp_adaptation_layer_indication)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error adapatation %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_SUCCESS_REPORT:
 			if (padded_size != sizeof(struct sctp_asconf_paramhdr)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error success %d\n", plen);
 				goto invalid_size;
 			}
 			at += padded_size;
 			break;
 		case SCTP_HOSTNAME_ADDRESS:
 			{
 				/* Hostname parameters are deprecated. */
 				struct sctp_gen_error_cause *cause;
 				int l_len;
 
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Can't handle hostname addresses.. abort processing\n");
 				*abort_processing = 1;
 				sctp_m_freem(op_err);
 				op_err = NULL;
 				op_err_last = NULL;
 #ifdef INET6
 				l_len = SCTP_MIN_OVERHEAD;
 #else
 				l_len = SCTP_MIN_V4_OVERHEAD;
 #endif
 				l_len += sizeof(struct sctp_chunkhdr);
 				l_len += sizeof(struct sctp_gen_error_cause);
 				op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
 				if (op_err != NULL) {
 					/*
 					 * Pre-reserve space for IP, SCTP,
 					 * and chunk header.
 					 */
 #ifdef INET6
 					SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
 #else
 					SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
 #endif
 					SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
 					SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
 					SCTP_BUF_LEN(op_err) = sizeof(struct sctp_gen_error_cause);
 					cause = mtod(op_err, struct sctp_gen_error_cause *);
 					cause->code = htons(SCTP_CAUSE_UNRESOLVABLE_ADDR);
 					cause->length = htons((uint16_t)(sizeof(struct sctp_gen_error_cause) + plen));
 					SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(mat, at, plen, M_NOWAIT);
 					if (SCTP_BUF_NEXT(op_err) == NULL) {
 						sctp_m_freem(op_err);
 						op_err = NULL;
 						op_err_last = NULL;
 					}
 				}
 				return (op_err);
 			}
 		default:
 			/*
 			 * we do not recognize the parameter figure out what
 			 * we do.
 			 */
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "Hit default param %x\n", ptype);
 			if ((ptype & 0x4000) == 0x4000) {
 				/* Report bit is set?? */
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "report op err\n");
 				if (op_err == NULL) {
 					int l_len;
 
 					/* Ok need to try to get an mbuf */
 #ifdef INET6
 					l_len = SCTP_MIN_OVERHEAD;
 #else
 					l_len = SCTP_MIN_V4_OVERHEAD;
 #endif
 					l_len += sizeof(struct sctp_chunkhdr);
 					l_len += sizeof(struct sctp_paramhdr);
 					op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
 					if (op_err) {
 						SCTP_BUF_LEN(op_err) = 0;
 #ifdef INET6
 						SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
 #else
 						SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
 #endif
 						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
 						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
 						op_err_last = op_err;
 					}
 				}
 				if (op_err != NULL) {
 					/* If we have space */
 					struct sctp_paramhdr *param;
 
 					if (pad_needed > 0) {
 						op_err_last = sctp_add_pad_tombuf(op_err_last, pad_needed);
 					}
 					if (op_err_last == NULL) {
 						sctp_m_freem(op_err);
 						op_err = NULL;
 						op_err_last = NULL;
 						goto more_processing;
 					}
 					if (M_TRAILINGSPACE(op_err_last) < (int)sizeof(struct sctp_paramhdr)) {
 						m_tmp = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_NOWAIT, 1, MT_DATA);
 						if (m_tmp == NULL) {
 							sctp_m_freem(op_err);
 							op_err = NULL;
 							op_err_last = NULL;
 							goto more_processing;
 						}
 						SCTP_BUF_LEN(m_tmp) = 0;
 						SCTP_BUF_NEXT(m_tmp) = NULL;
 						SCTP_BUF_NEXT(op_err_last) = m_tmp;
 						op_err_last = m_tmp;
 					}
 					param = (struct sctp_paramhdr *)(mtod(op_err_last, caddr_t)+SCTP_BUF_LEN(op_err_last));
 					param->param_type = htons(SCTP_UNRECOG_PARAM);
 					param->param_length = htons((uint16_t)sizeof(struct sctp_paramhdr) + plen);
 					SCTP_BUF_LEN(op_err_last) += sizeof(struct sctp_paramhdr);
 					SCTP_BUF_NEXT(op_err_last) = SCTP_M_COPYM(mat, at, plen, M_NOWAIT);
 					if (SCTP_BUF_NEXT(op_err_last) == NULL) {
 						sctp_m_freem(op_err);
 						op_err = NULL;
 						op_err_last = NULL;
 						goto more_processing;
 					} else {
 						while (SCTP_BUF_NEXT(op_err_last) != NULL) {
 							op_err_last = SCTP_BUF_NEXT(op_err_last);
 						}
 					}
 					if (plen % 4 != 0) {
 						pad_needed = 4 - (plen % 4);
 					} else {
 						pad_needed = 0;
 					}
 				}
 			}
 	more_processing:
 			if ((ptype & 0x8000) == 0x0000) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "stop proc\n");
 				return (op_err);
 			} else {
 				/* skip this chunk and continue processing */
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "move on\n");
 				at += SCTP_SIZE32(plen);
 			}
 			break;
 		}
 		phdr = sctp_get_next_param(mat, at, &params, sizeof(params));
 	}
 	return (op_err);
 invalid_size:
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "abort flag set\n");
 	*abort_processing = 1;
 	sctp_m_freem(op_err);
 	op_err = NULL;
 	op_err_last = NULL;
 	if (phdr != NULL) {
 		struct sctp_paramhdr *param;
 		int l_len;
 #ifdef INET6
 		l_len = SCTP_MIN_OVERHEAD;
 #else
 		l_len = SCTP_MIN_V4_OVERHEAD;
 #endif
 		l_len += sizeof(struct sctp_chunkhdr);
 		l_len += (2 * sizeof(struct sctp_paramhdr));
 		op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
 		if (op_err) {
 			SCTP_BUF_LEN(op_err) = 0;
 #ifdef INET6
 			SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
 #else
 			SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
 #endif
 			SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
 			SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
 			SCTP_BUF_LEN(op_err) = 2 * sizeof(struct sctp_paramhdr);
 			param = mtod(op_err, struct sctp_paramhdr *);
 			param->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
 			param->param_length = htons(2 * sizeof(struct sctp_paramhdr));
 			param++;
 			param->param_type = htons(ptype);
 			param->param_length = htons(plen);
 		}
 	}
 	return (op_err);
 }
 
 /*
  * Given a INIT chunk, look through the parameters to verify that there
  * are no new addresses.
  * Return true, if there is a new address or there is a problem parsing
    the parameters. Provide an optional error cause used when sending an ABORT.
  * Return false, if there are no new addresses and there is no problem in
    parameter processing.
  */
 static bool
 sctp_are_there_new_addresses(struct sctp_association *asoc,
     struct mbuf *in_initpkt, int offset, int limit, struct sockaddr *src,
     struct mbuf **op_err)
 {
 	struct sockaddr *sa_touse;
 	struct sockaddr *sa;
 	struct sctp_paramhdr *phdr, params;
 	struct sctp_nets *net;
 #ifdef INET
 	struct sockaddr_in sin4, *sa4;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 sin6, *sa6;
 #endif
 	uint16_t ptype, plen;
 	bool fnd, check_src;
 
 	*op_err = NULL;
 #ifdef INET
 	memset(&sin4, 0, sizeof(sin4));
 	sin4.sin_family = AF_INET;
 	sin4.sin_len = sizeof(sin4);
 #endif
 #ifdef INET6
 	memset(&sin6, 0, sizeof(sin6));
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_len = sizeof(sin6);
 #endif
 	/* First what about the src address of the pkt ? */
 	check_src = false;
 	switch (src->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if (asoc->scope.ipv4_addr_legal) {
 			check_src = true;
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if (asoc->scope.ipv6_addr_legal) {
 			check_src = true;
 		}
 		break;
 #endif
 	default:
 		/* TSNH */
 		break;
 	}
 	if (check_src) {
 		fnd = false;
 		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 			sa = (struct sockaddr *)&net->ro._l_addr;
 			if (sa->sa_family == src->sa_family) {
 #ifdef INET
 				if (sa->sa_family == AF_INET) {
 					struct sockaddr_in *src4;
 
 					sa4 = (struct sockaddr_in *)sa;
 					src4 = (struct sockaddr_in *)src;
 					if (sa4->sin_addr.s_addr == src4->sin_addr.s_addr) {
 						fnd = true;
 						break;
 					}
 				}
 #endif
 #ifdef INET6
 				if (sa->sa_family == AF_INET6) {
 					struct sockaddr_in6 *src6;
 
 					sa6 = (struct sockaddr_in6 *)sa;
 					src6 = (struct sockaddr_in6 *)src;
 					if (SCTP6_ARE_ADDR_EQUAL(sa6, src6)) {
 						fnd = true;
 						break;
 					}
 				}
 #endif
 			}
 		}
 		if (!fnd) {
 			/*
 			 * If sending an ABORT in case of an additional
 			 * address, don't use the new address error cause.
 			 * This looks no different than if no listener was
 			 * present.
 			 */
 			*op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), "Address added");
 			return (true);
 		}
 	}
 	/* Ok so far lets munge through the rest of the packet */
 	offset += sizeof(struct sctp_init_chunk);
 	phdr = sctp_get_next_param(in_initpkt, offset, &params, sizeof(params));
 	while (phdr) {
 		sa_touse = NULL;
 		ptype = ntohs(phdr->param_type);
 		plen = ntohs(phdr->param_length);
 		if (offset + plen > limit) {
 			*op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, "Partial parameter");
 			return (true);
 		}
 		if (plen < sizeof(struct sctp_paramhdr)) {
 			*op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, "Parameter length too small");
 			return (true);
 		}
 		switch (ptype) {
 #ifdef INET
 		case SCTP_IPV4_ADDRESS:
 			{
 				struct sctp_ipv4addr_param *p4, p4_buf;
 
 				if (plen != sizeof(struct sctp_ipv4addr_param)) {
 					*op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, "Parameter length illegal");
 					return (true);
 				}
 				phdr = sctp_get_next_param(in_initpkt, offset,
 				    (struct sctp_paramhdr *)&p4_buf, sizeof(p4_buf));
 				if (phdr == NULL) {
 					*op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, "");
 					return (true);
 				}
 				if (asoc->scope.ipv4_addr_legal) {
 					p4 = (struct sctp_ipv4addr_param *)phdr;
 					sin4.sin_addr.s_addr = p4->addr;
 					sa_touse = (struct sockaddr *)&sin4;
 				}
 				break;
 			}
 #endif
 #ifdef INET6
 		case SCTP_IPV6_ADDRESS:
 			{
 				struct sctp_ipv6addr_param *p6, p6_buf;
 
 				if (plen != sizeof(struct sctp_ipv6addr_param)) {
 					*op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, "Parameter length illegal");
 					return (true);
 				}
 				phdr = sctp_get_next_param(in_initpkt, offset,
 				    (struct sctp_paramhdr *)&p6_buf, sizeof(p6_buf));
 				if (phdr == NULL) {
 					*op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, "");
 					return (true);
 				}
 				if (asoc->scope.ipv6_addr_legal) {
 					p6 = (struct sctp_ipv6addr_param *)phdr;
 					memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
 					    sizeof(p6->addr));
 					sa_touse = (struct sockaddr *)&sin6;
 				}
 				break;
 			}
 #endif
 		default:
 			sa_touse = NULL;
 			break;
 		}
 		if (sa_touse) {
 			/* ok, sa_touse points to one to check */
 			fnd = false;
 			TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 				sa = (struct sockaddr *)&net->ro._l_addr;
 				if (sa->sa_family != sa_touse->sa_family) {
 					continue;
 				}
 #ifdef INET
 				if (sa->sa_family == AF_INET) {
 					sa4 = (struct sockaddr_in *)sa;
 					if (sa4->sin_addr.s_addr ==
 					    sin4.sin_addr.s_addr) {
 						fnd = true;
 						break;
 					}
 				}
 #endif
 #ifdef INET6
 				if (sa->sa_family == AF_INET6) {
 					sa6 = (struct sockaddr_in6 *)sa;
 					if (SCTP6_ARE_ADDR_EQUAL(
 					    sa6, &sin6)) {
 						fnd = true;
 						break;
 					}
 				}
 #endif
 			}
 			if (!fnd) {
 				/*
 				 * If sending an ABORT in case of an
 				 * additional address, don't use the new
 				 * address error cause. This looks no
 				 * different than if no listener was
 				 * present.
 				 */
 				*op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), "Address added");
 				return (true);
 			}
 		}
 		offset += SCTP_SIZE32(plen);
 		if (offset >= limit) {
 			break;
 		}
 		phdr = sctp_get_next_param(in_initpkt, offset, &params, sizeof(params));
 	}
 	return (false);
 }
 
 /*
  * Given a MBUF chain that was sent into us containing an INIT. Build a
  * INIT-ACK with COOKIE and send back. We assume that the in_initpkt has done
  * a pullup to include IPv6/4header, SCTP header and initial part of INIT
  * message (i.e. the struct sctp_init_msg).
  */
 void
 sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
     struct sctp_nets *src_net, struct mbuf *init_pkt,
     int iphlen, int offset,
     struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_init_chunk *init_chk,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_association *asoc;
 	struct mbuf *m, *m_tmp, *m_last, *m_cookie, *op_err;
 	struct sctp_init_ack_chunk *initack;
 	struct sctp_adaptation_layer_indication *ali;
 	struct sctp_supported_chunk_types_param *pr_supported;
 	struct sctp_paramhdr *ph;
 	union sctp_sockstore *over_addr;
 	struct sctp_scoping scp;
 	struct timeval now;
 #ifdef INET
 	struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
 	struct sockaddr_in *src4 = (struct sockaddr_in *)src;
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
 	struct sockaddr_in6 *src6 = (struct sockaddr_in6 *)src;
 	struct sockaddr_in6 *sin6;
 #endif
 	struct sockaddr *to;
 	struct sctp_state_cookie stc;
 	struct sctp_nets *net = NULL;
 	uint8_t *signature = NULL;
 	int cnt_inits_to = 0;
 	uint16_t his_limit, i_want;
 	int abort_flag;
 	int nat_friendly = 0;
 	int error;
 	struct socket *so;
 	uint16_t num_ext, chunk_len, padding_len, parameter_len;
 
 	if (stcb) {
 		asoc = &stcb->asoc;
 	} else {
 		asoc = NULL;
 	}
 	if ((asoc != NULL) &&
 	    (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT)) {
 		if (sctp_are_there_new_addresses(asoc, init_pkt, offset, offset + ntohs(init_chk->ch.chunk_length), src, &op_err)) {
 			/*
 			 * new addresses, out of here in non-cookie-wait
 			 * states
 			 */
 			sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
 			    mflowtype, mflowid, inp->fibnum,
 			    vrf_id, port);
 			return;
 		}
 		if (src_net != NULL && (src_net->port != port)) {
 			/*
 			 * change of remote encapsulation port, out of here
 			 * in non-cookie-wait states
 			 *
 			 * Send an ABORT, without an specific error cause.
 			 * This looks no different than if no listener was
 			 * present.
 			 */
 			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 			    "Remote encapsulation port changed");
 			sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
 			    mflowtype, mflowid, inp->fibnum,
 			    vrf_id, port);
 			return;
 		}
 	}
 	abort_flag = 0;
 	op_err = sctp_arethere_unrecognized_parameters(init_pkt,
 	    (offset + sizeof(struct sctp_init_chunk)),
 	    &abort_flag,
 	    (struct sctp_chunkhdr *)init_chk,
 	    &nat_friendly, NULL);
 	if (abort_flag) {
 do_a_abort:
 		if (op_err == NULL) {
 			char msg[SCTP_DIAG_INFO_LEN];
 
 			SCTP_SNPRINTF(msg, sizeof(msg), "%s:%d at %s", __FILE__, __LINE__, __func__);
 			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 			    msg);
 		}
 		sctp_send_abort(init_pkt, iphlen, src, dst, sh,
 		    init_chk->init.initiate_tag, op_err,
 		    mflowtype, mflowid, inp->fibnum,
 		    vrf_id, port);
 		return;
 	}
 	m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (m == NULL) {
 		/* No memory, INIT timer will re-attempt. */
 		sctp_m_freem(op_err);
 		return;
 	}
 	chunk_len = (uint16_t)sizeof(struct sctp_init_ack_chunk);
 	padding_len = 0;
 
 	/*
 	 * We might not overwrite the identification[] completely and on
 	 * some platforms time_entered will contain some padding. Therefore
 	 * zero out the cookie to avoid putting uninitialized memory on the
 	 * wire.
 	 */
 	memset(&stc, 0, sizeof(struct sctp_state_cookie));
 
 	/* the time I built cookie */
 	(void)SCTP_GETTIME_TIMEVAL(&now);
 	stc.time_entered.tv_sec = now.tv_sec;
 	stc.time_entered.tv_usec = now.tv_usec;
 
 	/* populate any tie tags */
 	if (asoc != NULL) {
 		/* unlock before tag selections */
 		stc.tie_tag_my_vtag = asoc->my_vtag_nonce;
 		stc.tie_tag_peer_vtag = asoc->peer_vtag_nonce;
 		stc.cookie_life = asoc->cookie_life;
 		net = asoc->primary_destination;
 	} else {
 		stc.tie_tag_my_vtag = 0;
 		stc.tie_tag_peer_vtag = 0;
 		/* life I will award this cookie */
 		stc.cookie_life = inp->sctp_ep.def_cookie_life;
 	}
 
 	/* copy in the ports for later check */
 	stc.myport = sh->dest_port;
 	stc.peerport = sh->src_port;
 
 	/*
 	 * If we wanted to honor cookie life extensions, we would add to
 	 * stc.cookie_life. For now we should NOT honor any extension
 	 */
 	stc.site_scope = stc.local_scope = stc.loopback_scope = 0;
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 		stc.ipv6_addr_legal = 1;
 		if (SCTP_IPV6_V6ONLY(inp)) {
 			stc.ipv4_addr_legal = 0;
 		} else {
 			stc.ipv4_addr_legal = 1;
 		}
 	} else {
 		stc.ipv6_addr_legal = 0;
 		stc.ipv4_addr_legal = 1;
 	}
 	stc.ipv4_scope = 0;
 	if (net == NULL) {
 		to = src;
 		switch (dst->sa_family) {
 #ifdef INET
 		case AF_INET:
 			{
 				/* lookup address */
 				stc.address[0] = src4->sin_addr.s_addr;
 				stc.address[1] = 0;
 				stc.address[2] = 0;
 				stc.address[3] = 0;
 				stc.addr_type = SCTP_IPV4_ADDRESS;
 				/* local from address */
 				stc.laddress[0] = dst4->sin_addr.s_addr;
 				stc.laddress[1] = 0;
 				stc.laddress[2] = 0;
 				stc.laddress[3] = 0;
 				stc.laddr_type = SCTP_IPV4_ADDRESS;
 				/* scope_id is only for v6 */
 				stc.scope_id = 0;
 				if ((IN4_ISPRIVATE_ADDRESS(&src4->sin_addr)) ||
 				    (IN4_ISPRIVATE_ADDRESS(&dst4->sin_addr))) {
 					stc.ipv4_scope = 1;
 				}
 				/* Must use the address in this case */
 				if (sctp_is_address_on_local_host(src, vrf_id)) {
 					stc.loopback_scope = 1;
 					stc.ipv4_scope = 1;
 					stc.site_scope = 1;
 					stc.local_scope = 0;
 				}
 				break;
 			}
 #endif
 #ifdef INET6
 		case AF_INET6:
 			{
 				stc.addr_type = SCTP_IPV6_ADDRESS;
 				memcpy(&stc.address, &src6->sin6_addr, sizeof(struct in6_addr));
 				stc.scope_id = ntohs(in6_getscope(&src6->sin6_addr));
 				if (sctp_is_address_on_local_host(src, vrf_id)) {
 					stc.loopback_scope = 1;
 					stc.local_scope = 0;
 					stc.site_scope = 1;
 					stc.ipv4_scope = 1;
 				} else if (IN6_IS_ADDR_LINKLOCAL(&src6->sin6_addr) ||
 				    IN6_IS_ADDR_LINKLOCAL(&dst6->sin6_addr)) {
 					/*
 					 * If the new destination or source
 					 * is a LINK_LOCAL we must have
 					 * common both site and local scope.
 					 * Don't set local scope though
 					 * since we must depend on the
 					 * source to be added implicitly. We
 					 * cannot assure just because we
 					 * share one link that all links are
 					 * common.
 					 */
 					stc.local_scope = 0;
 					stc.site_scope = 1;
 					stc.ipv4_scope = 1;
 					/*
 					 * we start counting for the private
 					 * address stuff at 1. since the
 					 * link local we source from won't
 					 * show up in our scoped count.
 					 */
 					cnt_inits_to = 1;
 					/*
 					 * pull out the scope_id from
 					 * incoming pkt
 					 */
 				} else if (IN6_IS_ADDR_SITELOCAL(&src6->sin6_addr) ||
 				    IN6_IS_ADDR_SITELOCAL(&dst6->sin6_addr)) {
 					/*
 					 * If the new destination or source
 					 * is SITE_LOCAL then we must have
 					 * site scope in common.
 					 */
 					stc.site_scope = 1;
 				}
 				memcpy(&stc.laddress, &dst6->sin6_addr, sizeof(struct in6_addr));
 				stc.laddr_type = SCTP_IPV6_ADDRESS;
 				break;
 			}
 #endif
 		default:
 			/* TSNH */
 			goto do_a_abort;
 			break;
 		}
 	} else {
 		/* set the scope per the existing tcb */
 
 #ifdef INET6
 		struct sctp_nets *lnet;
 #endif
 
 		stc.loopback_scope = asoc->scope.loopback_scope;
 		stc.ipv4_scope = asoc->scope.ipv4_local_scope;
 		stc.site_scope = asoc->scope.site_scope;
 		stc.local_scope = asoc->scope.local_scope;
 #ifdef INET6
 		/* Why do we not consider IPv4 LL addresses? */
 		TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) {
 			if (lnet->ro._l_addr.sin6.sin6_family == AF_INET6) {
 				if (IN6_IS_ADDR_LINKLOCAL(&lnet->ro._l_addr.sin6.sin6_addr)) {
 					/*
 					 * if we have a LL address, start
 					 * counting at 1.
 					 */
 					cnt_inits_to = 1;
 				}
 			}
 		}
 #endif
 		/* use the net pointer */
 		to = (struct sockaddr *)&net->ro._l_addr;
 		switch (to->sa_family) {
 #ifdef INET
 		case AF_INET:
 			sin = (struct sockaddr_in *)to;
 			stc.address[0] = sin->sin_addr.s_addr;
 			stc.address[1] = 0;
 			stc.address[2] = 0;
 			stc.address[3] = 0;
 			stc.addr_type = SCTP_IPV4_ADDRESS;
 			if (net->src_addr_selected == 0) {
 				/*
 				 * strange case here, the INIT should have
 				 * did the selection.
 				 */
 				net->ro._s_addr = sctp_source_address_selection(inp,
 				    stcb, (sctp_route_t *)&net->ro,
 				    net, 0, vrf_id);
 				if (net->ro._s_addr == NULL) {
 					sctp_m_freem(op_err);
 					sctp_m_freem(m);
 					return;
 				}
 
 				net->src_addr_selected = 1;
 			}
 			stc.laddress[0] = net->ro._s_addr->address.sin.sin_addr.s_addr;
 			stc.laddress[1] = 0;
 			stc.laddress[2] = 0;
 			stc.laddress[3] = 0;
 			stc.laddr_type = SCTP_IPV4_ADDRESS;
 			/* scope_id is only for v6 */
 			stc.scope_id = 0;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			sin6 = (struct sockaddr_in6 *)to;
 			memcpy(&stc.address, &sin6->sin6_addr,
 			    sizeof(struct in6_addr));
 			stc.addr_type = SCTP_IPV6_ADDRESS;
 			stc.scope_id = sin6->sin6_scope_id;
 			if (net->src_addr_selected == 0) {
 				/*
 				 * strange case here, the INIT should have
 				 * done the selection.
 				 */
 				net->ro._s_addr = sctp_source_address_selection(inp,
 				    stcb, (sctp_route_t *)&net->ro,
 				    net, 0, vrf_id);
 				if (net->ro._s_addr == NULL) {
 					sctp_m_freem(op_err);
 					sctp_m_freem(m);
 					return;
 				}
 
 				net->src_addr_selected = 1;
 			}
 			memcpy(&stc.laddress, &net->ro._s_addr->address.sin6.sin6_addr,
 			    sizeof(struct in6_addr));
 			stc.laddr_type = SCTP_IPV6_ADDRESS;
 			break;
 #endif
 		}
 	}
 	/* Now lets put the SCTP header in place */
 	initack = mtod(m, struct sctp_init_ack_chunk *);
 	/* Save it off for quick ref */
 	stc.peers_vtag = ntohl(init_chk->init.initiate_tag);
 	/* who are we */
 	memcpy(stc.identification, SCTP_VERSION_STRING,
 	    min(strlen(SCTP_VERSION_STRING), sizeof(stc.identification)));
 	memset(stc.reserved, 0, SCTP_RESERVE_SPACE);
 	/* now the chunk header */
 	initack->ch.chunk_type = SCTP_INITIATION_ACK;
 	initack->ch.chunk_flags = 0;
 	/* fill in later from mbuf we build */
 	initack->ch.chunk_length = 0;
 	/* place in my tag */
 	if ((asoc != NULL) &&
 	    ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_INUSE) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED))) {
 		/* re-use the v-tags and init-seq here */
 		initack->init.initiate_tag = htonl(asoc->my_vtag);
 		initack->init.initial_tsn = htonl(asoc->init_seq_number);
 	} else {
 		uint32_t vtag, itsn;
 
 		if (asoc) {
 			atomic_add_int(&asoc->refcnt, 1);
 			SCTP_TCB_UNLOCK(stcb);
 	new_tag:
 			SCTP_INP_INFO_RLOCK();
 			vtag = sctp_select_a_tag(inp, inp->sctp_lport, sh->src_port, 1);
 			SCTP_INP_INFO_RUNLOCK();
 			if ((asoc->peer_supports_nat) && (vtag == asoc->my_vtag)) {
 				/*
 				 * Got a duplicate vtag on some guy behind a
 				 * nat make sure we don't use it.
 				 */
 				goto new_tag;
 			}
 			initack->init.initiate_tag = htonl(vtag);
 			/* get a TSN to use too */
 			itsn = sctp_select_initial_TSN(&inp->sctp_ep);
 			initack->init.initial_tsn = htonl(itsn);
 			SCTP_TCB_LOCK(stcb);
 			atomic_subtract_int(&asoc->refcnt, 1);
 		} else {
 			SCTP_INP_INCR_REF(inp);
 			SCTP_INP_RUNLOCK(inp);
 			SCTP_INP_INFO_RLOCK();
 			vtag = sctp_select_a_tag(inp, inp->sctp_lport, sh->src_port, 1);
 			SCTP_INP_INFO_RUNLOCK();
 			initack->init.initiate_tag = htonl(vtag);
 			/* get a TSN to use too */
 			initack->init.initial_tsn = htonl(sctp_select_initial_TSN(&inp->sctp_ep));
 			SCTP_INP_RLOCK(inp);
 			SCTP_INP_DECR_REF(inp);
 		}
 	}
 	/* save away my tag to */
 	stc.my_vtag = initack->init.initiate_tag;
 
 	/* set up some of the credits. */
 	so = inp->sctp_socket;
 	if (so == NULL) {
 		/* memory problem */
 		sctp_m_freem(op_err);
 		sctp_m_freem(m);
 		return;
 	} else {
 		initack->init.a_rwnd = htonl(max(SCTP_SB_LIMIT_RCV(so), SCTP_MINIMAL_RWND));
 	}
 	/* set what I want */
 	his_limit = ntohs(init_chk->init.num_inbound_streams);
 	/* choose what I want */
 	if (asoc != NULL) {
 		if (asoc->streamoutcnt > asoc->pre_open_streams) {
 			i_want = asoc->streamoutcnt;
 		} else {
 			i_want = asoc->pre_open_streams;
 		}
 	} else {
 		i_want = inp->sctp_ep.pre_open_stream_count;
 	}
 	if (his_limit < i_want) {
 		/* I Want more :< */
 		initack->init.num_outbound_streams = init_chk->init.num_inbound_streams;
 	} else {
 		/* I can have what I want :> */
 		initack->init.num_outbound_streams = htons(i_want);
 	}
 	/* tell him his limit. */
 	initack->init.num_inbound_streams =
 	    htons(inp->sctp_ep.max_open_streams_intome);
 
 	/* adaptation layer indication parameter */
 	if (inp->sctp_ep.adaptation_layer_indicator_provided) {
 		parameter_len = (uint16_t)sizeof(struct sctp_adaptation_layer_indication);
 		ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len);
 		ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
 		ali->ph.param_length = htons(parameter_len);
 		ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
 		chunk_len += parameter_len;
 	}
 
 	/* ECN parameter */
 	if (((asoc != NULL) && (asoc->ecn_supported == 1)) ||
 	    ((asoc == NULL) && (inp->ecn_supported == 1))) {
 		parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
 		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_ECN_CAPABLE);
 		ph->param_length = htons(parameter_len);
 		chunk_len += parameter_len;
 	}
 
 	/* PR-SCTP supported parameter */
 	if (((asoc != NULL) && (asoc->prsctp_supported == 1)) ||
 	    ((asoc == NULL) && (inp->prsctp_supported == 1))) {
 		parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
 		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_PRSCTP_SUPPORTED);
 		ph->param_length = htons(parameter_len);
 		chunk_len += parameter_len;
 	}
 
 	/* Add NAT friendly parameter */
 	if (nat_friendly) {
 		parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
 		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
 		ph->param_length = htons(parameter_len);
 		chunk_len += parameter_len;
 	}
 
 	/* And now tell the peer which extensions we support */
 	num_ext = 0;
 	pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
 	if (((asoc != NULL) && (asoc->prsctp_supported == 1)) ||
 	    ((asoc == NULL) && (inp->prsctp_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
 		if (((asoc != NULL) && (asoc->idata_supported == 1)) ||
 		    ((asoc == NULL) && (inp->idata_supported == 1))) {
 			pr_supported->chunk_types[num_ext++] = SCTP_IFORWARD_CUM_TSN;
 		}
 	}
 	if (((asoc != NULL) && (asoc->auth_supported == 1)) ||
 	    ((asoc == NULL) && (inp->auth_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
 	}
 	if (((asoc != NULL) && (asoc->asconf_supported == 1)) ||
 	    ((asoc == NULL) && (inp->asconf_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
 		pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
 	}
 	if (((asoc != NULL) && (asoc->reconfig_supported == 1)) ||
 	    ((asoc == NULL) && (inp->reconfig_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
 	}
 	if (((asoc != NULL) && (asoc->idata_supported == 1)) ||
 	    ((asoc == NULL) && (inp->idata_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_IDATA;
 	}
 	if (((asoc != NULL) && (asoc->nrsack_supported == 1)) ||
 	    ((asoc == NULL) && (inp->nrsack_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK;
 	}
 	if (((asoc != NULL) && (asoc->pktdrop_supported == 1)) ||
 	    ((asoc == NULL) && (inp->pktdrop_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
 	}
 	if (num_ext > 0) {
 		parameter_len = (uint16_t)sizeof(struct sctp_supported_chunk_types_param) + num_ext;
 		pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
 		pr_supported->ph.param_length = htons(parameter_len);
 		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 		chunk_len += parameter_len;
 	}
 
 	/* add authentication parameters */
 	if (((asoc != NULL) && (asoc->auth_supported == 1)) ||
 	    ((asoc == NULL) && (inp->auth_supported == 1))) {
 		struct sctp_auth_random *randp;
 		struct sctp_auth_hmac_algo *hmacs;
 		struct sctp_auth_chunk_list *chunks;
 
 		if (padding_len > 0) {
 			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 			chunk_len += padding_len;
 			padding_len = 0;
 		}
 		/* generate and add RANDOM parameter */
 		randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+chunk_len);
 		parameter_len = (uint16_t)sizeof(struct sctp_auth_random) +
 		    SCTP_AUTH_RANDOM_SIZE_DEFAULT;
 		randp->ph.param_type = htons(SCTP_RANDOM);
 		randp->ph.param_length = htons(parameter_len);
 		SCTP_READ_RANDOM(randp->random_data, SCTP_AUTH_RANDOM_SIZE_DEFAULT);
 		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 		chunk_len += parameter_len;
 
 		if (padding_len > 0) {
 			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 			chunk_len += padding_len;
 			padding_len = 0;
 		}
 		/* add HMAC_ALGO parameter */
 		hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+chunk_len);
 		parameter_len = (uint16_t)sizeof(struct sctp_auth_hmac_algo) +
 		    sctp_serialize_hmaclist(inp->sctp_ep.local_hmacs,
 		    (uint8_t *)hmacs->hmac_ids);
 		hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
 		hmacs->ph.param_length = htons(parameter_len);
 		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 		chunk_len += parameter_len;
 
 		if (padding_len > 0) {
 			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 			chunk_len += padding_len;
 			padding_len = 0;
 		}
 		/* add CHUNKS parameter */
 		chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+chunk_len);
 		parameter_len = (uint16_t)sizeof(struct sctp_auth_chunk_list) +
 		    sctp_serialize_auth_chunks(inp->sctp_ep.local_auth_chunks,
 		    chunks->chunk_types);
 		chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
 		chunks->ph.param_length = htons(parameter_len);
 		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 		chunk_len += parameter_len;
 	}
 	SCTP_BUF_LEN(m) = chunk_len;
 	m_last = m;
 	/* now the addresses */
 	/*
 	 * To optimize this we could put the scoping stuff into a structure
 	 * and remove the individual uint8's from the stc structure. Then we
 	 * could just sifa in the address within the stc.. but for now this
 	 * is a quick hack to get the address stuff teased apart.
 	 */
 	scp.ipv4_addr_legal = stc.ipv4_addr_legal;
 	scp.ipv6_addr_legal = stc.ipv6_addr_legal;
 	scp.loopback_scope = stc.loopback_scope;
 	scp.ipv4_local_scope = stc.ipv4_scope;
 	scp.local_scope = stc.local_scope;
 	scp.site_scope = stc.site_scope;
 	m_last = sctp_add_addresses_to_i_ia(inp, stcb, &scp, m_last,
 	    cnt_inits_to,
 	    &padding_len, &chunk_len);
 	/* padding_len can only be positive, if no addresses have been added */
 	if (padding_len > 0) {
 		memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
 		chunk_len += padding_len;
 		SCTP_BUF_LEN(m) += padding_len;
 		padding_len = 0;
 	}
 
 	/* tack on the operational error if present */
 	if (op_err) {
 		parameter_len = 0;
 		for (m_tmp = op_err; m_tmp != NULL; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
 			parameter_len += SCTP_BUF_LEN(m_tmp);
 		}
 		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 		SCTP_BUF_NEXT(m_last) = op_err;
 		while (SCTP_BUF_NEXT(m_last) != NULL) {
 			m_last = SCTP_BUF_NEXT(m_last);
 		}
 		chunk_len += parameter_len;
 	}
 	if (padding_len > 0) {
 		m_last = sctp_add_pad_tombuf(m_last, padding_len);
 		if (m_last == NULL) {
 			/* Houston we have a problem, no space */
 			sctp_m_freem(m);
 			return;
 		}
 		chunk_len += padding_len;
 		padding_len = 0;
 	}
 	/* Now we must build a cookie */
 	m_cookie = sctp_add_cookie(init_pkt, offset, m, 0, &stc, &signature);
 	if (m_cookie == NULL) {
 		/* memory problem */
 		sctp_m_freem(m);
 		return;
 	}
 	/* Now append the cookie to the end and update the space/size */
 	SCTP_BUF_NEXT(m_last) = m_cookie;
 	parameter_len = 0;
 	for (m_tmp = m_cookie; m_tmp != NULL; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
 		parameter_len += SCTP_BUF_LEN(m_tmp);
 		if (SCTP_BUF_NEXT(m_tmp) == NULL) {
 			m_last = m_tmp;
 		}
 	}
 	padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
 	chunk_len += parameter_len;
 
 	/*
 	 * Place in the size, but we don't include the last pad (if any) in
 	 * the INIT-ACK.
 	 */
 	initack->ch.chunk_length = htons(chunk_len);
 
 	/*
 	 * Time to sign the cookie, we don't sign over the cookie signature
 	 * though thus we set trailer.
 	 */
 	(void)sctp_hmac_m(SCTP_HMAC,
 	    (uint8_t *)inp->sctp_ep.secret_key[(int)(inp->sctp_ep.current_secret_number)],
 	    SCTP_SECRET_SIZE, m_cookie, sizeof(struct sctp_paramhdr),
 	    (uint8_t *)signature, SCTP_SIGNATURE_SIZE);
 	/*
 	 * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
 	 * here since the timer will drive a retranmission.
 	 */
 	if (padding_len > 0) {
 		if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
 			sctp_m_freem(m);
 			return;
 		}
 	}
 	if (stc.loopback_scope) {
 		over_addr = (union sctp_sockstore *)dst;
 	} else {
 		over_addr = NULL;
 	}
 
 	if ((error = sctp_lowlevel_chunk_output(inp, NULL, NULL, to, m, 0, NULL, 0, 0,
 	    0, 0,
 	    inp->sctp_lport, sh->src_port, init_chk->init.initiate_tag,
 	    port, over_addr,
 	    mflowtype, mflowid,
 	    SCTP_SO_NOT_LOCKED))) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak send error %d\n", error);
 		if (error == ENOBUFS) {
 			if (asoc != NULL) {
 				asoc->ifp_had_enobuf = 1;
 			}
 			SCTP_STAT_INCR(sctps_lowlevelerr);
 		}
 	} else {
 		if (asoc != NULL) {
 			asoc->ifp_had_enobuf = 0;
 		}
 	}
 	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 }
 
 static void
 sctp_prune_prsctp(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     struct sctp_sndrcvinfo *srcv,
     int dataout)
 {
 	int freed_spc = 0;
 	struct sctp_tmit_chunk *chk, *nchk;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if ((asoc->prsctp_supported) &&
 	    (asoc->sent_queue_cnt_removeable > 0)) {
 		TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
 			/*
 			 * Look for chunks marked with the PR_SCTP flag AND
 			 * the buffer space flag. If the one being sent is
 			 * equal or greater priority then purge the old one
 			 * and free some space.
 			 */
 			if (PR_SCTP_BUF_ENABLED(chk->flags)) {
 				/*
 				 * This one is PR-SCTP AND buffer space
 				 * limited type
 				 */
 				if (chk->rec.data.timetodrop.tv_sec > (long)srcv->sinfo_timetolive) {
 					/*
 					 * Lower numbers equates to higher
 					 * priority. So if the one we are
 					 * looking at has a larger priority,
 					 * we want to drop the data and NOT
 					 * retransmit it.
 					 */
 					if (chk->data) {
 						/*
 						 * We release the book_size
 						 * if the mbuf is here
 						 */
 						int ret_spc;
 						uint8_t sent;
 
 						if (chk->sent > SCTP_DATAGRAM_UNSENT)
 							sent = 1;
 						else
 							sent = 0;
 						ret_spc = sctp_release_pr_sctp_chunk(stcb, chk,
 						    sent,
 						    SCTP_SO_LOCKED);
 						freed_spc += ret_spc;
 						if (freed_spc >= dataout) {
 							return;
 						}
 					}	/* if chunk was present */
 				}	/* if of sufficient priority */
 			}	/* if chunk has enabled */
 		}		/* tailqforeach */
 
 		TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
 			/* Here we must move to the sent queue and mark */
 			if (PR_SCTP_BUF_ENABLED(chk->flags)) {
 				if (chk->rec.data.timetodrop.tv_sec > (long)srcv->sinfo_timetolive) {
 					if (chk->data) {
 						/*
 						 * We release the book_size
 						 * if the mbuf is here
 						 */
 						int ret_spc;
 
 						ret_spc = sctp_release_pr_sctp_chunk(stcb, chk,
 						    0, SCTP_SO_LOCKED);
 
 						freed_spc += ret_spc;
 						if (freed_spc >= dataout) {
 							return;
 						}
 					}	/* end if chk->data */
 				}	/* end if right class */
 			}	/* end if chk pr-sctp */
 		}		/* tailqforeachsafe (chk) */
 	}			/* if enabled in asoc */
 }
 
 uint32_t
 sctp_get_frag_point(struct sctp_tcb *stcb)
 {
 	struct sctp_association *asoc;
 	uint32_t frag_point, overhead;
 
 	asoc = &stcb->asoc;
 	/* Consider IP header and SCTP common header. */
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 		overhead = SCTP_MIN_OVERHEAD;
 	} else {
 		overhead = SCTP_MIN_V4_OVERHEAD;
 	}
 	/* Consider DATA/IDATA chunk header and AUTH header, if needed. */
 	if (asoc->idata_supported) {
 		overhead += sizeof(struct sctp_idata_chunk);
 		if (sctp_auth_is_required_chunk(SCTP_IDATA, asoc->peer_auth_chunks)) {
 			overhead += sctp_get_auth_chunk_len(asoc->peer_hmac_id);
 		}
 	} else {
 		overhead += sizeof(struct sctp_data_chunk);
 		if (sctp_auth_is_required_chunk(SCTP_DATA, asoc->peer_auth_chunks)) {
 			overhead += sctp_get_auth_chunk_len(asoc->peer_hmac_id);
 		}
 	}
 	KASSERT(overhead % 4 == 0,
 	    ("overhead (%u) not a multiple of 4", overhead));
 	/* Consider padding. */
 	if (asoc->smallest_mtu % 4 > 0) {
 		overhead += (asoc->smallest_mtu % 4);
 	}
 	KASSERT(asoc->smallest_mtu > overhead,
 	    ("Association MTU (%u) too small for overhead (%u)",
 	    asoc->smallest_mtu, overhead));
 	frag_point = asoc->smallest_mtu - overhead;
 	KASSERT(frag_point % 4 == 0,
 	    ("frag_point (%u) not a multiple of 4", frag_point));
 	/* Honor MAXSEG socket option. */
 	if ((asoc->sctp_frag_point > 0) &&
 	    (asoc->sctp_frag_point < frag_point)) {
 		frag_point = asoc->sctp_frag_point;
 	}
 	return (frag_point);
 }
 
 static void
 sctp_set_prsctp_policy(struct sctp_stream_queue_pending *sp)
 {
 	/*
 	 * We assume that the user wants PR_SCTP_TTL if the user provides a
 	 * positive lifetime but does not specify any PR_SCTP policy.
 	 */
 	if (PR_SCTP_ENABLED(sp->sinfo_flags)) {
 		sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags);
 	} else if (sp->timetolive > 0) {
 		sp->sinfo_flags |= SCTP_PR_SCTP_TTL;
 		sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags);
 	} else {
 		return;
 	}
 	switch (PR_SCTP_POLICY(sp->sinfo_flags)) {
 	case CHUNK_FLAGS_PR_SCTP_BUF:
 		/*
 		 * Time to live is a priority stored in tv_sec when doing
 		 * the buffer drop thing.
 		 */
 		sp->ts.tv_sec = sp->timetolive;
 		sp->ts.tv_usec = 0;
 		break;
 	case CHUNK_FLAGS_PR_SCTP_TTL:
 		{
 			struct timeval tv;
 
 			(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
 			tv.tv_sec = sp->timetolive / 1000;
 			tv.tv_usec = (sp->timetolive * 1000) % 1000000;
 			/*
 			 * TODO sctp_constants.h needs alternative time
 			 * macros when _KERNEL is undefined.
 			 */
 			timevaladd(&sp->ts, &tv);
 		}
 		break;
 	case CHUNK_FLAGS_PR_SCTP_RTX:
 		/*
 		 * Time to live is a the number or retransmissions stored in
 		 * tv_sec.
 		 */
 		sp->ts.tv_sec = sp->timetolive;
 		sp->ts.tv_usec = 0;
 		break;
 	default:
 		SCTPDBG(SCTP_DEBUG_USRREQ1,
 		    "Unknown PR_SCTP policy %u.\n",
 		    PR_SCTP_POLICY(sp->sinfo_flags));
 		break;
 	}
 }
 
 static int
 sctp_msg_append(struct sctp_tcb *stcb,
     struct sctp_nets *net,
     struct mbuf *m,
     struct sctp_sndrcvinfo *srcv)
 {
 	int error = 0;
 	struct mbuf *at;
 	struct sctp_stream_queue_pending *sp = NULL;
 	struct sctp_stream_out *strm;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 
 	/*
 	 * Given an mbuf chain, put it into the association send queue and
 	 * place it on the wheel
 	 */
 	if (srcv->sinfo_stream >= stcb->asoc.streamoutcnt) {
 		/* Invalid stream number */
 		SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		error = EINVAL;
 		goto out_now;
 	}
 	if ((stcb->asoc.stream_locked) &&
 	    (stcb->asoc.stream_locked_on != srcv->sinfo_stream)) {
 		SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		error = EINVAL;
 		goto out_now;
 	}
 	/* Now can we send this? */
 	if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
 	    (stcb->asoc.state & SCTP_STATE_SHUTDOWN_PENDING)) {
 		/* got data while shutting down */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
 		error = ECONNRESET;
 		goto out_now;
 	}
 	sctp_alloc_a_strmoq(stcb, sp);
 	if (sp == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		error = ENOMEM;
 		goto out_now;
 	}
 	sp->sinfo_flags = srcv->sinfo_flags;
 	sp->timetolive = srcv->sinfo_timetolive;
 	sp->ppid = srcv->sinfo_ppid;
 	sp->context = srcv->sinfo_context;
 	sp->fsn = 0;
 	if (sp->sinfo_flags & SCTP_ADDR_OVER) {
 		sp->net = net;
 		atomic_add_int(&sp->net->ref_count, 1);
 	} else {
 		sp->net = NULL;
 	}
 	(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
 	sp->sid = srcv->sinfo_stream;
 	sp->msg_is_complete = 1;
 	sp->sender_all_done = 1;
 	sp->some_taken = 0;
 	sp->data = m;
 	sp->tail_mbuf = NULL;
 	sctp_set_prsctp_policy(sp);
 	/*
 	 * We could in theory (for sendall) sifa the length in, but we would
 	 * still have to hunt through the chain since we need to setup the
 	 * tail_mbuf
 	 */
 	sp->length = 0;
 	for (at = m; at; at = SCTP_BUF_NEXT(at)) {
 		if (SCTP_BUF_NEXT(at) == NULL)
 			sp->tail_mbuf = at;
 		sp->length += SCTP_BUF_LEN(at);
 	}
 	if (srcv->sinfo_keynumber_valid) {
 		sp->auth_keyid = srcv->sinfo_keynumber;
 	} else {
 		sp->auth_keyid = stcb->asoc.authinfo.active_keyid;
 	}
 	if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) {
 		sctp_auth_key_acquire(stcb, sp->auth_keyid);
 		sp->holds_key_ref = 1;
 	}
 	strm = &stcb->asoc.strmout[srcv->sinfo_stream];
 	sctp_snd_sb_alloc(stcb, sp->length);
 	atomic_add_int(&stcb->asoc.stream_queue_cnt, 1);
 	TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
 	stcb->asoc.ss_functions.sctp_ss_add_to_stream(stcb, &stcb->asoc, strm, sp);
 	m = NULL;
 out_now:
 	if (m) {
 		sctp_m_freem(m);
 	}
 	return (error);
 }
 
 static struct mbuf *
 sctp_copy_mbufchain(struct mbuf *clonechain,
     struct mbuf *outchain,
     struct mbuf **endofchain,
     int can_take_mbuf,
     int sizeofcpy,
     uint8_t copy_by_ref)
 {
 	struct mbuf *m;
 	struct mbuf *appendchain;
 	caddr_t cp;
 	int len;
 
 	if (endofchain == NULL) {
 		/* error */
 error_out:
 		if (outchain)
 			sctp_m_freem(outchain);
 		return (NULL);
 	}
 	if (can_take_mbuf) {
 		appendchain = clonechain;
 	} else {
 		if (!copy_by_ref &&
 		    (sizeofcpy <= (int)((((SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count) - 1) * MLEN) + MHLEN)))) {
 			/* Its not in a cluster */
 			if (*endofchain == NULL) {
 				/* lets get a mbuf cluster */
 				if (outchain == NULL) {
 					/* This is the general case */
 			new_mbuf:
 					outchain = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_HEADER);
 					if (outchain == NULL) {
 						goto error_out;
 					}
 					SCTP_BUF_LEN(outchain) = 0;
 					*endofchain = outchain;
 					/* get the prepend space */
 					SCTP_BUF_RESV_UF(outchain, (SCTP_FIRST_MBUF_RESV + 4));
 				} else {
 					/*
 					 * We really should not get a NULL
 					 * in endofchain
 					 */
 					/* find end */
 					m = outchain;
 					while (m) {
 						if (SCTP_BUF_NEXT(m) == NULL) {
 							*endofchain = m;
 							break;
 						}
 						m = SCTP_BUF_NEXT(m);
 					}
 					/* sanity */
 					if (*endofchain == NULL) {
 						/*
 						 * huh, TSNH XXX maybe we
 						 * should panic
 						 */
 						sctp_m_freem(outchain);
 						goto new_mbuf;
 					}
 				}
 				/* get the new end of length */
 				len = (int)M_TRAILINGSPACE(*endofchain);
 			} else {
 				/* how much is left at the end? */
 				len = (int)M_TRAILINGSPACE(*endofchain);
 			}
 			/* Find the end of the data, for appending */
 			cp = (mtod((*endofchain), caddr_t)+SCTP_BUF_LEN((*endofchain)));
 
 			/* Now lets copy it out */
 			if (len >= sizeofcpy) {
 				/* It all fits, copy it in */
 				m_copydata(clonechain, 0, sizeofcpy, cp);
 				SCTP_BUF_LEN((*endofchain)) += sizeofcpy;
 			} else {
 				/* fill up the end of the chain */
 				if (len > 0) {
 					m_copydata(clonechain, 0, len, cp);
 					SCTP_BUF_LEN((*endofchain)) += len;
 					/* now we need another one */
 					sizeofcpy -= len;
 				}
 				m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_HEADER);
 				if (m == NULL) {
 					/* We failed */
 					goto error_out;
 				}
 				SCTP_BUF_NEXT((*endofchain)) = m;
 				*endofchain = m;
 				cp = mtod((*endofchain), caddr_t);
 				m_copydata(clonechain, len, sizeofcpy, cp);
 				SCTP_BUF_LEN((*endofchain)) += sizeofcpy;
 			}
 			return (outchain);
 		} else {
 			/* copy the old fashion way */
 			appendchain = SCTP_M_COPYM(clonechain, 0, M_COPYALL, M_NOWAIT);
 #ifdef SCTP_MBUF_LOGGING
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 				sctp_log_mbc(appendchain, SCTP_MBUF_ICOPY);
 			}
 #endif
 		}
 	}
 	if (appendchain == NULL) {
 		/* error */
 		if (outchain)
 			sctp_m_freem(outchain);
 		return (NULL);
 	}
 	if (outchain) {
 		/* tack on to the end */
 		if (*endofchain != NULL) {
 			SCTP_BUF_NEXT(((*endofchain))) = appendchain;
 		} else {
 			m = outchain;
 			while (m) {
 				if (SCTP_BUF_NEXT(m) == NULL) {
 					SCTP_BUF_NEXT(m) = appendchain;
 					break;
 				}
 				m = SCTP_BUF_NEXT(m);
 			}
 		}
 		/*
 		 * save off the end and update the end-chain position
 		 */
 		m = appendchain;
 		while (m) {
 			if (SCTP_BUF_NEXT(m) == NULL) {
 				*endofchain = m;
 				break;
 			}
 			m = SCTP_BUF_NEXT(m);
 		}
 		return (outchain);
 	} else {
 		/* save off the end and update the end-chain position */
 		m = appendchain;
 		while (m) {
 			if (SCTP_BUF_NEXT(m) == NULL) {
 				*endofchain = m;
 				break;
 			}
 			m = SCTP_BUF_NEXT(m);
 		}
 		return (appendchain);
 	}
 }
 
 static int
 sctp_med_chunk_output(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int *num_out,
     int *reason_code,
     int control_only, int from_where,
     struct timeval *now, int *now_filled,
     uint32_t frag_point, int so_locked);
 
 static void
 sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
     uint32_t val SCTP_UNUSED)
 {
 	struct sctp_copy_all *ca;
 	struct mbuf *m;
 	int ret = 0;
 	int added_control = 0;
 	int un_sent, do_chunk_output = 1;
 	struct sctp_association *asoc;
 	struct sctp_nets *net;
 
 	ca = (struct sctp_copy_all *)ptr;
 	if (ca->m == NULL) {
 		return;
 	}
 	if (ca->inp != inp) {
 		/* TSNH */
 		return;
 	}
 	if (ca->sndlen > 0) {
 		m = SCTP_M_COPYM(ca->m, 0, M_COPYALL, M_NOWAIT);
 		if (m == NULL) {
 			/* can't copy so we are done */
 			ca->cnt_failed++;
 			return;
 		}
 #ifdef SCTP_MBUF_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 			sctp_log_mbc(m, SCTP_MBUF_ICOPY);
 		}
 #endif
 	} else {
 		m = NULL;
 	}
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if (stcb->asoc.alternate) {
 		net = stcb->asoc.alternate;
 	} else {
 		net = stcb->asoc.primary_destination;
 	}
 	if (ca->sndrcv.sinfo_flags & SCTP_ABORT) {
 		/* Abort this assoc with m as the user defined reason */
 		if (m != NULL) {
 			SCTP_BUF_PREPEND(m, sizeof(struct sctp_paramhdr), M_NOWAIT);
 		} else {
 			m = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
 			    0, M_NOWAIT, 1, MT_DATA);
 			SCTP_BUF_LEN(m) = sizeof(struct sctp_paramhdr);
 		}
 		if (m != NULL) {
 			struct sctp_paramhdr *ph;
 
 			ph = mtod(m, struct sctp_paramhdr *);
 			ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
 			ph->param_length = htons((uint16_t)(sizeof(struct sctp_paramhdr) + ca->sndlen));
 		}
 		/*
 		 * We add one here to keep the assoc from dis-appearing on
 		 * us.
 		 */
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		sctp_abort_an_association(inp, stcb, m, false, SCTP_SO_NOT_LOCKED);
 		/*
 		 * sctp_abort_an_association calls sctp_free_asoc() free
 		 * association will NOT free it since we incremented the
 		 * refcnt .. we do this to prevent it being freed and things
 		 * getting tricky since we could end up (from free_asoc)
 		 * calling inpcb_free which would get a recursive lock call
 		 * to the iterator lock.. But as a consequence of that the
 		 * stcb will return to us un-locked.. since free_asoc
 		 * returns with either no TCB or the TCB unlocked, we must
 		 * relock.. to unlock in the iterator timer :-0
 		 */
 		SCTP_TCB_LOCK(stcb);
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 		goto no_chunk_output;
 	} else {
 		if (m != NULL) {
 			ret = sctp_msg_append(stcb, net, m, &ca->sndrcv);
 		}
 		asoc = &stcb->asoc;
 		if (ca->sndrcv.sinfo_flags & SCTP_EOF) {
 			/* shutdown this assoc */
 			if (TAILQ_EMPTY(&asoc->send_queue) &&
 			    TAILQ_EMPTY(&asoc->sent_queue) &&
 			    sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED) == 0) {
 				if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
 					goto abort_anyway;
 				}
 				/*
 				 * there is nothing queued to send, so I'm
 				 * done...
 				 */
 				if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
 				    (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
 				    (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 					/*
 					 * only send SHUTDOWN the first time
 					 * through
 					 */
 					if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
 						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 					}
 					SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
 					sctp_stop_timers_for_shutdown(stcb);
 					sctp_send_shutdown(stcb, net);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
 					    net);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
 					    NULL);
 					added_control = 1;
 					do_chunk_output = 0;
 				}
 			} else {
 				/*
 				 * we still got (or just got) data to send,
 				 * so set SHUTDOWN_PENDING
 				 */
 				/*
 				 * XXX sockets draft says that SCTP_EOF
 				 * should be sent with no data.  currently,
 				 * we will allow user data to be sent first
 				 * and move to SHUTDOWN-PENDING
 				 */
 				if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
 				    (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
 				    (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 					if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
 						SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
 					}
 					SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
 					if (TAILQ_EMPTY(&asoc->send_queue) &&
 					    TAILQ_EMPTY(&asoc->sent_queue) &&
 					    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
 						struct mbuf *op_err;
 						char msg[SCTP_DIAG_INFO_LEN];
 
 				abort_anyway:
 						SCTP_SNPRINTF(msg, sizeof(msg),
 						    "%s:%d at %s", __FILE__, __LINE__, __func__);
 						op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 						    msg);
 						atomic_add_int(&stcb->asoc.refcnt, 1);
 						sctp_abort_an_association(stcb->sctp_ep, stcb,
 						    op_err, false, SCTP_SO_NOT_LOCKED);
 						atomic_subtract_int(&stcb->asoc.refcnt, 1);
 						goto no_chunk_output;
 					}
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
 					    NULL);
 				}
 			}
 		}
 	}
 	un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
 	    (stcb->asoc.stream_queue_cnt * SCTP_DATA_CHUNK_OVERHEAD(stcb)));
 
 	if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
 	    (stcb->asoc.total_flight > 0) &&
 	    (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))) {
 		do_chunk_output = 0;
 	}
 	if (do_chunk_output)
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_NOT_LOCKED);
 	else if (added_control) {
 		struct timeval now;
 		int num_out, reason, now_filled = 0;
 
 		(void)sctp_med_chunk_output(inp, stcb, &stcb->asoc, &num_out,
 		    &reason, 1, 1, &now, &now_filled,
 		    sctp_get_frag_point(stcb),
 		    SCTP_SO_NOT_LOCKED);
 	}
 no_chunk_output:
 	if (ret) {
 		ca->cnt_failed++;
 	} else {
 		ca->cnt_sent++;
 	}
 }
 
 static void
 sctp_sendall_completes(void *ptr, uint32_t val SCTP_UNUSED)
 {
 	struct sctp_copy_all *ca;
 
 	ca = (struct sctp_copy_all *)ptr;
 	/*
 	 * Do a notify here? Kacheong suggests that the notify be done at
 	 * the send time.. so you would push up a notification if any send
 	 * failed. Don't know if this is feasible since the only failures we
 	 * have is "memory" related and if you cannot get an mbuf to send
 	 * the data you surely can't get an mbuf to send up to notify the
 	 * user you can't send the data :->
 	 */
 
 	/* now free everything */
 	if (ca->inp) {
 		/* Lets clear the flag to allow others to run. */
 		SCTP_INP_WLOCK(ca->inp);
 		ca->inp->sctp_flags &= ~SCTP_PCB_FLAGS_SND_ITERATOR_UP;
 		SCTP_INP_WUNLOCK(ca->inp);
 	}
 	sctp_m_freem(ca->m);
 	SCTP_FREE(ca, SCTP_M_COPYAL);
 }
 
 static struct mbuf *
 sctp_copy_out_all(struct uio *uio, ssize_t len)
 {
 	struct mbuf *ret, *at;
 	ssize_t left, willcpy, cancpy, error;
 
 	ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAITOK, 1, MT_DATA);
 	if (ret == NULL) {
 		/* TSNH */
 		return (NULL);
 	}
 	left = len;
 	SCTP_BUF_LEN(ret) = 0;
 	/* save space for the data chunk header */
 	cancpy = (int)M_TRAILINGSPACE(ret);
 	willcpy = min(cancpy, left);
 	at = ret;
 	while (left > 0) {
 		/* Align data to the end */
 		error = uiomove(mtod(at, caddr_t), (int)willcpy, uio);
 		if (error) {
 	err_out_now:
 			sctp_m_freem(at);
 			return (NULL);
 		}
 		SCTP_BUF_LEN(at) = (int)willcpy;
 		SCTP_BUF_NEXT_PKT(at) = SCTP_BUF_NEXT(at) = 0;
 		left -= willcpy;
 		if (left > 0) {
 			SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg((unsigned int)left, 0, M_WAITOK, 1, MT_DATA);
 			if (SCTP_BUF_NEXT(at) == NULL) {
 				goto err_out_now;
 			}
 			at = SCTP_BUF_NEXT(at);
 			SCTP_BUF_LEN(at) = 0;
 			cancpy = (int)M_TRAILINGSPACE(at);
 			willcpy = min(cancpy, left);
 		}
 	}
 	return (ret);
 }
 
 static int
 sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
     struct sctp_sndrcvinfo *srcv)
 {
 	int ret;
 	struct sctp_copy_all *ca;
 
 	if (uio->uio_resid > (ssize_t)SCTP_BASE_SYSCTL(sctp_sendall_limit)) {
 		/* You must not be larger than the limit! */
 		return (EMSGSIZE);
 	}
 	SCTP_MALLOC(ca, struct sctp_copy_all *, sizeof(struct sctp_copy_all),
 	    SCTP_M_COPYAL);
 	if (ca == NULL) {
 		sctp_m_freem(m);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		return (ENOMEM);
 	}
 	memset(ca, 0, sizeof(struct sctp_copy_all));
 
 	ca->inp = inp;
 	if (srcv) {
 		memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo));
 	}
 
 	/* Serialize. */
 	SCTP_INP_WLOCK(inp);
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SND_ITERATOR_UP) != 0) {
 		SCTP_INP_WUNLOCK(inp);
 		sctp_m_freem(m);
 		SCTP_FREE(ca, SCTP_M_COPYAL);
 		return (EBUSY);
 	}
 	inp->sctp_flags |= SCTP_PCB_FLAGS_SND_ITERATOR_UP;
 	SCTP_INP_WUNLOCK(inp);
 
 	/*
 	 * take off the sendall flag, it would be bad if we failed to do
 	 * this :-0
 	 */
 	ca->sndrcv.sinfo_flags &= ~SCTP_SENDALL;
 	/* get length and mbuf chain */
 	if (uio) {
 		ca->sndlen = uio->uio_resid;
 		ca->m = sctp_copy_out_all(uio, ca->sndlen);
 		if (ca->m == NULL) {
 			SCTP_FREE(ca, SCTP_M_COPYAL);
 			sctp_m_freem(m);
 			SCTP_INP_WLOCK(inp);
 			inp->sctp_flags &= ~SCTP_PCB_FLAGS_SND_ITERATOR_UP;
 			SCTP_INP_WUNLOCK(inp);
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 			return (ENOMEM);
 		}
 	} else {
 		/* Gather the length of the send */
 		struct mbuf *mat;
 
 		ca->sndlen = 0;
 		for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
 			ca->sndlen += SCTP_BUF_LEN(mat);
 		}
 	}
 	ret = sctp_initiate_iterator(NULL, sctp_sendall_iterator, NULL,
 	    SCTP_PCB_ANY_FLAGS, SCTP_PCB_ANY_FEATURES,
 	    SCTP_ASOC_ANY_STATE,
 	    (void *)ca, 0,
 	    sctp_sendall_completes, inp, 1);
 	if (ret) {
 		SCTP_INP_WLOCK(inp);
 		inp->sctp_flags &= ~SCTP_PCB_FLAGS_SND_ITERATOR_UP;
 		SCTP_INP_WUNLOCK(inp);
 		SCTP_FREE(ca, SCTP_M_COPYAL);
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
 		return (EFAULT);
 	}
 	return (0);
 }
 
 void
 sctp_toss_old_cookies(struct sctp_tcb *stcb, struct sctp_association *asoc)
 {
 	struct sctp_tmit_chunk *chk, *nchk;
 
 	TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) {
 		if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
 			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
 			asoc->ctrl_queue_cnt--;
 			if (chk->data) {
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
 			}
 			sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		}
 	}
 }
 
 void
 sctp_toss_old_asconf(struct sctp_tcb *stcb)
 {
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk, *nchk;
 	struct sctp_asconf_chunk *acp;
 
 	asoc = &stcb->asoc;
 	TAILQ_FOREACH_SAFE(chk, &asoc->asconf_send_queue, sctp_next, nchk) {
 		/* find SCTP_ASCONF chunk in queue */
 		if (chk->rec.chunk_id.id == SCTP_ASCONF) {
 			if (chk->data) {
 				acp = mtod(chk->data, struct sctp_asconf_chunk *);
 				if (SCTP_TSN_GT(ntohl(acp->serial_number), asoc->asconf_seq_out_acked)) {
 					/* Not Acked yet */
 					break;
 				}
 			}
 			TAILQ_REMOVE(&asoc->asconf_send_queue, chk, sctp_next);
 			asoc->ctrl_queue_cnt--;
 			if (chk->data) {
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
 			}
 			sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		}
 	}
 }
 
 static void
 sctp_clean_up_datalist(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     struct sctp_tmit_chunk **data_list,
     int bundle_at,
     struct sctp_nets *net)
 {
 	int i;
 	struct sctp_tmit_chunk *tp1;
 
 	for (i = 0; i < bundle_at; i++) {
 		/* off of the send queue */
 		TAILQ_REMOVE(&asoc->send_queue, data_list[i], sctp_next);
 		asoc->send_queue_cnt--;
 		if (i > 0) {
 			/*
 			 * Any chunk NOT 0 you zap the time chunk 0 gets
 			 * zapped or set based on if a RTO measurement is
 			 * needed.
 			 */
 			data_list[i]->do_rtt = 0;
 		}
 		/* record time */
 		data_list[i]->sent_rcv_time = net->last_sent_time;
 		data_list[i]->rec.data.cwnd_at_send = net->cwnd;
 		data_list[i]->rec.data.fast_retran_tsn = data_list[i]->rec.data.tsn;
 		if (data_list[i]->whoTo == NULL) {
 			data_list[i]->whoTo = net;
 			atomic_add_int(&net->ref_count, 1);
 		}
 		/* on to the sent queue */
 		tp1 = TAILQ_LAST(&asoc->sent_queue, sctpchunk_listhead);
 		if ((tp1) && SCTP_TSN_GT(tp1->rec.data.tsn, data_list[i]->rec.data.tsn)) {
 			struct sctp_tmit_chunk *tpp;
 
 			/* need to move back */
 	back_up_more:
 			tpp = TAILQ_PREV(tp1, sctpchunk_listhead, sctp_next);
 			if (tpp == NULL) {
 				TAILQ_INSERT_BEFORE(tp1, data_list[i], sctp_next);
 				goto all_done;
 			}
 			tp1 = tpp;
 			if (SCTP_TSN_GT(tp1->rec.data.tsn, data_list[i]->rec.data.tsn)) {
 				goto back_up_more;
 			}
 			TAILQ_INSERT_AFTER(&asoc->sent_queue, tp1, data_list[i], sctp_next);
 		} else {
 			TAILQ_INSERT_TAIL(&asoc->sent_queue,
 			    data_list[i],
 			    sctp_next);
 		}
 all_done:
 		/* This does not lower until the cum-ack passes it */
 		asoc->sent_queue_cnt++;
 		if ((asoc->peers_rwnd <= 0) &&
 		    (asoc->total_flight == 0) &&
 		    (bundle_at == 1)) {
 			/* Mark the chunk as being a window probe */
 			SCTP_STAT_INCR(sctps_windowprobed);
 		}
 #ifdef SCTP_AUDITING_ENABLED
 		sctp_audit_log(0xC2, 3);
 #endif
 		data_list[i]->sent = SCTP_DATAGRAM_SENT;
 		data_list[i]->snd_count = 1;
 		data_list[i]->rec.data.chunk_was_revoked = 0;
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
 			sctp_misc_ints(SCTP_FLIGHT_LOG_UP,
 			    data_list[i]->whoTo->flight_size,
 			    data_list[i]->book_size,
 			    (uint32_t)(uintptr_t)data_list[i]->whoTo,
 			    data_list[i]->rec.data.tsn);
 		}
 		sctp_flight_size_increase(data_list[i]);
 		sctp_total_flight_increase(stcb, data_list[i]);
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) {
 			sctp_log_rwnd(SCTP_DECREASE_PEER_RWND,
 			    asoc->peers_rwnd, data_list[i]->send_size, SCTP_BASE_SYSCTL(sctp_peer_chunk_oh));
 		}
 		asoc->peers_rwnd = sctp_sbspace_sub(asoc->peers_rwnd,
 		    (uint32_t)(data_list[i]->send_size + SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)));
 		if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
 			/* SWS sender side engages */
 			asoc->peers_rwnd = 0;
 		}
 	}
 	if (asoc->cc_functions.sctp_cwnd_update_packet_transmitted) {
 		(*asoc->cc_functions.sctp_cwnd_update_packet_transmitted) (stcb, net);
 	}
 }
 
 static void
 sctp_clean_up_ctl(struct sctp_tcb *stcb, struct sctp_association *asoc, int so_locked)
 {
 	struct sctp_tmit_chunk *chk, *nchk;
 
 	TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) {
 		if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
 		    (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK) ||	/* EY */
 		    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) ||
 		    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_ACK) ||
 		    (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) ||
 		    (chk->rec.chunk_id.id == SCTP_SHUTDOWN) ||
 		    (chk->rec.chunk_id.id == SCTP_SHUTDOWN_ACK) ||
 		    (chk->rec.chunk_id.id == SCTP_OPERATION_ERROR) ||
 		    (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) ||
 		    (chk->rec.chunk_id.id == SCTP_COOKIE_ACK) ||
 		    (chk->rec.chunk_id.id == SCTP_ECN_CWR) ||
 		    (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) {
 			/* Stray chunks must be cleaned up */
 	clean_up_anyway:
 			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
 			asoc->ctrl_queue_cnt--;
 			if (chk->data) {
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
 			}
 			if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) {
 				asoc->fwd_tsn_cnt--;
 			}
 			sctp_free_a_chunk(stcb, chk, so_locked);
 		} else if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) {
 			/* special handling, we must look into the param */
 			if (chk != asoc->str_reset) {
 				goto clean_up_anyway;
 			}
 		}
 	}
 }
 
 static uint32_t
 sctp_can_we_split_this(struct sctp_tcb *stcb, uint32_t length,
     uint32_t space_left, uint32_t frag_point, int eeor_on)
 {
 	/*
 	 * Make a decision on if I should split a msg into multiple parts.
 	 * This is only asked of incomplete messages.
 	 */
 	if (eeor_on) {
 		/*
 		 * If we are doing EEOR we need to always send it if its the
 		 * entire thing, since it might be all the guy is putting in
 		 * the hopper.
 		 */
 		if (space_left >= length) {
 			/*-
 			 * If we have data outstanding,
 			 * we get another chance when the sack
 			 * arrives to transmit - wait for more data
 			 */
 			if (stcb->asoc.total_flight == 0) {
 				/*
 				 * If nothing is in flight, we zero the
 				 * packet counter.
 				 */
 				return (length);
 			}
 			return (0);
 
 		} else {
 			/* You can fill the rest */
 			return (space_left);
 		}
 	}
 	/*-
 	 * For those strange folk that make the send buffer
 	 * smaller than our fragmentation point, we can't
 	 * get a full msg in so we have to allow splitting.
 	 */
 	if (SCTP_SB_LIMIT_SND(stcb->sctp_socket) < frag_point) {
 		return (length);
 	}
 	if ((length <= space_left) ||
 	    ((length - space_left) < SCTP_BASE_SYSCTL(sctp_min_residual))) {
 		/* Sub-optimal residual don't split in non-eeor mode. */
 		return (0);
 	}
 	/*
 	 * If we reach here length is larger than the space_left. Do we wish
 	 * to split it for the sake of packet putting together?
 	 */
 	if (space_left >= min(SCTP_BASE_SYSCTL(sctp_min_split_point), frag_point)) {
 		/* Its ok to split it */
 		return (min(space_left, frag_point));
 	}
 	/* Nope, can't split */
 	return (0);
 }
 
 static uint32_t
 sctp_move_to_outqueue(struct sctp_tcb *stcb,
     struct sctp_nets *net,
     struct sctp_stream_out *strq,
     uint32_t space_left,
     uint32_t frag_point,
     int *giveup,
     int eeor_mode,
     int *bail,
     int so_locked)
 {
 	/* Move from the stream to the send_queue keeping track of the total */
 	struct sctp_association *asoc;
 	struct sctp_stream_queue_pending *sp;
 	struct sctp_tmit_chunk *chk;
 	struct sctp_data_chunk *dchkh = NULL;
 	struct sctp_idata_chunk *ndchkh = NULL;
 	uint32_t to_move, length;
 	int leading;
 	uint8_t rcv_flags = 0;
 	uint8_t some_taken;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	asoc = &stcb->asoc;
 one_more_time:
 	/* sa_ignore FREED_MEMORY */
 	sp = TAILQ_FIRST(&strq->outqueue);
 	if (sp == NULL) {
 		sp = TAILQ_FIRST(&strq->outqueue);
 		if (sp) {
 			goto one_more_time;
 		}
 		if ((sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_EXPLICIT_EOR) == 0) &&
 		    (stcb->asoc.idata_supported == 0) &&
 		    (strq->last_msg_incomplete)) {
 			SCTP_PRINTF("Huh? Stream:%d lm_in_c=%d but queue is NULL\n",
 			    strq->sid,
 			    strq->last_msg_incomplete);
 			strq->last_msg_incomplete = 0;
 		}
 		to_move = 0;
 		goto out_of;
 	}
 	if ((sp->msg_is_complete) && (sp->length == 0)) {
 		if (sp->sender_all_done) {
 			/*
 			 * We are doing deferred cleanup. Last time through
 			 * when we took all the data the sender_all_done was
 			 * not set.
 			 */
 			if ((sp->put_last_out == 0) && (sp->discard_rest == 0)) {
 				SCTP_PRINTF("Gak, put out entire msg with NO end!-1\n");
 				SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d\n",
 				    sp->sender_all_done,
 				    sp->length,
 				    sp->msg_is_complete,
 				    sp->put_last_out);
 			}
 			atomic_subtract_int(&asoc->stream_queue_cnt, 1);
 			TAILQ_REMOVE(&strq->outqueue, sp, next);
 			stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp);
 			if ((strq->state == SCTP_STREAM_RESET_PENDING) &&
 			    (strq->chunks_on_queues == 0) &&
 			    TAILQ_EMPTY(&strq->outqueue)) {
 				stcb->asoc.trigger_reset = 1;
 			}
 			if (sp->net) {
 				sctp_free_remote_addr(sp->net);
 				sp->net = NULL;
 			}
 			if (sp->data) {
 				sctp_m_freem(sp->data);
 				sp->data = NULL;
 			}
 			sctp_free_a_strmoq(stcb, sp, so_locked);
 			/* back to get the next msg */
 			goto one_more_time;
 		} else {
 			/*
 			 * sender just finished this but still holds a
 			 * reference
 			 */
 			*giveup = 1;
 			to_move = 0;
 			goto out_of;
 		}
 	} else {
 		/* is there some to get */
 		if (sp->length == 0) {
 			/* no */
 			*giveup = 1;
 			to_move = 0;
 			goto out_of;
 		} else if (sp->discard_rest) {
 			/* Whack down the size */
 			atomic_subtract_int(&stcb->asoc.total_output_queue_size, sp->length);
 			if ((stcb->sctp_socket != NULL) &&
 			    ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 			    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
 				atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc, sp->length);
 			}
 			if (sp->data) {
 				sctp_m_freem(sp->data);
 				sp->data = NULL;
 				sp->tail_mbuf = NULL;
 			}
 			sp->length = 0;
 			sp->some_taken = 1;
 			*giveup = 1;
 			to_move = 0;
 			goto out_of;
 		}
 	}
 	some_taken = sp->some_taken;
 	length = sp->length;
 	if (sp->msg_is_complete) {
 		/* The message is complete */
 		to_move = min(length, frag_point);
 		if (to_move == length) {
 			/* All of it fits in the MTU */
 			if (sp->some_taken) {
 				rcv_flags |= SCTP_DATA_LAST_FRAG;
 			} else {
 				rcv_flags |= SCTP_DATA_NOT_FRAG;
 			}
 			sp->put_last_out = 1;
 			if (sp->sinfo_flags & SCTP_SACK_IMMEDIATELY) {
 				rcv_flags |= SCTP_DATA_SACK_IMMEDIATELY;
 			}
 		} else {
 			/* Not all of it fits, we fragment */
 			if (sp->some_taken == 0) {
 				rcv_flags |= SCTP_DATA_FIRST_FRAG;
 			}
 			sp->some_taken = 1;
 		}
 	} else {
 		to_move = sctp_can_we_split_this(stcb, length, space_left, frag_point, eeor_mode);
 		if (to_move > 0) {
 			if (to_move >= length) {
 				to_move = length;
 			}
 			if (sp->some_taken == 0) {
 				rcv_flags |= SCTP_DATA_FIRST_FRAG;
 				sp->some_taken = 1;
 			}
 		} else {
 			/* Nothing to take. */
 			*giveup = 1;
 			to_move = 0;
 			goto out_of;
 		}
 	}
 
 	/* If we reach here, we can copy out a chunk */
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* No chunk memory */
 		*giveup = 1;
 		to_move = 0;
 		goto out_of;
 	}
 	/*
 	 * Setup for unordered if needed by looking at the user sent info
 	 * flags.
 	 */
 	if (sp->sinfo_flags & SCTP_UNORDERED) {
 		rcv_flags |= SCTP_DATA_UNORDERED;
 	}
 	if (SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) &&
 	    (sp->sinfo_flags & SCTP_EOF) == SCTP_EOF) {
 		rcv_flags |= SCTP_DATA_SACK_IMMEDIATELY;
 	}
 	/* clear out the chunk before setting up */
 	memset(chk, 0, sizeof(*chk));
 	chk->rec.data.rcv_flags = rcv_flags;
 
 	if (to_move >= length) {
 		/* we think we can steal the whole thing */
 		if (to_move < sp->length) {
 			/* bail, it changed */
 			goto dont_do_it;
 		}
 		chk->data = sp->data;
 		chk->last_mbuf = sp->tail_mbuf;
 		/* register the stealing */
 		sp->data = sp->tail_mbuf = NULL;
 	} else {
 		struct mbuf *m;
 
 dont_do_it:
 		chk->data = SCTP_M_COPYM(sp->data, 0, to_move, M_NOWAIT);
 		chk->last_mbuf = NULL;
 		if (chk->data == NULL) {
 			sp->some_taken = some_taken;
 			sctp_free_a_chunk(stcb, chk, so_locked);
 			*bail = 1;
 			to_move = 0;
 			goto out_of;
 		}
 #ifdef SCTP_MBUF_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 			sctp_log_mbc(chk->data, SCTP_MBUF_ICOPY);
 		}
 #endif
 		/* Pull off the data */
 		m_adj(sp->data, to_move);
 		/* Now lets work our way down and compact it */
 		m = sp->data;
 		while (m && (SCTP_BUF_LEN(m) == 0)) {
 			sp->data = SCTP_BUF_NEXT(m);
 			SCTP_BUF_NEXT(m) = NULL;
 			if (sp->tail_mbuf == m) {
 				/*-
 				 * Freeing tail? TSNH since
 				 * we supposedly were taking less
 				 * than the sp->length.
 				 */
 #ifdef INVARIANTS
 				panic("Huh, freeing tail? - TSNH");
 #else
 				SCTP_PRINTF("Huh, freeing tail? - TSNH\n");
 				sp->tail_mbuf = sp->data = NULL;
 				sp->length = 0;
 #endif
 			}
 			sctp_m_free(m);
 			m = sp->data;
 		}
 	}
 	if (SCTP_BUF_IS_EXTENDED(chk->data)) {
 		chk->copy_by_ref = 1;
 	} else {
 		chk->copy_by_ref = 0;
 	}
 	/*
 	 * get last_mbuf and counts of mb usage This is ugly but hopefully
 	 * its only one mbuf.
 	 */
 	if (chk->last_mbuf == NULL) {
 		chk->last_mbuf = chk->data;
 		while (SCTP_BUF_NEXT(chk->last_mbuf) != NULL) {
 			chk->last_mbuf = SCTP_BUF_NEXT(chk->last_mbuf);
 		}
 	}
 
 	if (to_move > length) {
 		/*- This should not happen either
 		 * since we always lower to_move to the size
 		 * of sp->length if its larger.
 		 */
 #ifdef INVARIANTS
 		panic("Huh, how can to_move be larger?");
 #else
 		SCTP_PRINTF("Huh, how can to_move be larger?\n");
 		sp->length = 0;
 #endif
 	} else {
 		atomic_subtract_int(&sp->length, to_move);
 	}
 	leading = SCTP_DATA_CHUNK_OVERHEAD(stcb);
 	if (M_LEADINGSPACE(chk->data) < leading) {
 		/* Not enough room for a chunk header, get some */
 		struct mbuf *m;
 
 		m = sctp_get_mbuf_for_msg(1, 0, M_NOWAIT, 1, MT_DATA);
 		if (m == NULL) {
 			/*
 			 * we're in trouble here. _PREPEND below will free
 			 * all the data if there is no leading space, so we
 			 * must put the data back and restore.
 			 */
 			if (sp->data == NULL) {
 				/* unsteal the data */
 				sp->data = chk->data;
 				sp->tail_mbuf = chk->last_mbuf;
 			} else {
 				struct mbuf *m_tmp;
 
 				/* reassemble the data */
 				m_tmp = sp->data;
 				sp->data = chk->data;
 				SCTP_BUF_NEXT(chk->last_mbuf) = m_tmp;
 			}
 			sp->some_taken = some_taken;
 			atomic_add_int(&sp->length, to_move);
 			chk->data = NULL;
 			*bail = 1;
 			sctp_free_a_chunk(stcb, chk, so_locked);
 			to_move = 0;
 			goto out_of;
 		} else {
 			SCTP_BUF_LEN(m) = 0;
 			SCTP_BUF_NEXT(m) = chk->data;
 			chk->data = m;
 			M_ALIGN(chk->data, 4);
 		}
 	}
 	SCTP_BUF_PREPEND(chk->data, SCTP_DATA_CHUNK_OVERHEAD(stcb), M_NOWAIT);
 	if (chk->data == NULL) {
 		/* HELP, TSNH since we assured it would not above? */
 #ifdef INVARIANTS
 		panic("prepend fails HELP?");
 #else
 		SCTP_PRINTF("prepend fails HELP?\n");
 		sctp_free_a_chunk(stcb, chk, so_locked);
 #endif
 		*bail = 1;
 		to_move = 0;
 		goto out_of;
 	}
 	sctp_snd_sb_alloc(stcb, SCTP_DATA_CHUNK_OVERHEAD(stcb));
 	chk->book_size = chk->send_size = (uint16_t)(to_move + SCTP_DATA_CHUNK_OVERHEAD(stcb));
 	chk->book_size_scale = 0;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->pad_inplace = 0;
 	chk->no_fr_allowed = 0;
 	if (stcb->asoc.idata_supported == 0) {
 		if (rcv_flags & SCTP_DATA_UNORDERED) {
 			/* Just use 0. The receiver ignores the values. */
 			chk->rec.data.mid = 0;
 		} else {
 			chk->rec.data.mid = strq->next_mid_ordered;
 			if (rcv_flags & SCTP_DATA_LAST_FRAG) {
 				strq->next_mid_ordered++;
 			}
 		}
 	} else {
 		if (rcv_flags & SCTP_DATA_UNORDERED) {
 			chk->rec.data.mid = strq->next_mid_unordered;
 			if (rcv_flags & SCTP_DATA_LAST_FRAG) {
 				strq->next_mid_unordered++;
 			}
 		} else {
 			chk->rec.data.mid = strq->next_mid_ordered;
 			if (rcv_flags & SCTP_DATA_LAST_FRAG) {
 				strq->next_mid_ordered++;
 			}
 		}
 	}
 	chk->rec.data.sid = sp->sid;
 	chk->rec.data.ppid = sp->ppid;
 	chk->rec.data.context = sp->context;
 	chk->rec.data.doing_fast_retransmit = 0;
 
 	chk->rec.data.timetodrop = sp->ts;
 	chk->flags = sp->act_flags;
 
 	if (sp->net) {
 		chk->whoTo = sp->net;
 		atomic_add_int(&chk->whoTo->ref_count, 1);
 	} else
 		chk->whoTo = NULL;
 
 	if (sp->holds_key_ref) {
 		chk->auth_keyid = sp->auth_keyid;
 		sctp_auth_key_acquire(stcb, chk->auth_keyid);
 		chk->holds_key_ref = 1;
 	}
 	stcb->asoc.ss_functions.sctp_ss_scheduled(stcb, net, asoc, strq, to_move);
 	chk->rec.data.tsn = atomic_fetchadd_int(&asoc->sending_seq, 1);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_AT_SEND_2_OUTQ) {
 		sctp_misc_ints(SCTP_STRMOUT_LOG_SEND,
 		    (uint32_t)(uintptr_t)stcb, sp->length,
 		    (uint32_t)((chk->rec.data.sid << 16) | (0x0000ffff & chk->rec.data.mid)),
 		    chk->rec.data.tsn);
 	}
 	if (stcb->asoc.idata_supported == 0) {
 		dchkh = mtod(chk->data, struct sctp_data_chunk *);
 	} else {
 		ndchkh = mtod(chk->data, struct sctp_idata_chunk *);
 	}
 	/*
 	 * Put the rest of the things in place now. Size was done earlier in
 	 * previous loop prior to padding.
 	 */
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 #ifdef SCTP_ASOCLOG_OF_TSNS
 	if (asoc->tsn_out_at >= SCTP_TSN_LOG_SIZE) {
 		asoc->tsn_out_at = 0;
 		asoc->tsn_out_wrapped = 1;
 	}
 	asoc->out_tsnlog[asoc->tsn_out_at].tsn = chk->rec.data.tsn;
 	asoc->out_tsnlog[asoc->tsn_out_at].strm = chk->rec.data.sid;
 	asoc->out_tsnlog[asoc->tsn_out_at].seq = chk->rec.data.mid;
 	asoc->out_tsnlog[asoc->tsn_out_at].sz = chk->send_size;
 	asoc->out_tsnlog[asoc->tsn_out_at].flgs = chk->rec.data.rcv_flags;
 	asoc->out_tsnlog[asoc->tsn_out_at].stcb = (void *)stcb;
 	asoc->out_tsnlog[asoc->tsn_out_at].in_pos = asoc->tsn_out_at;
 	asoc->out_tsnlog[asoc->tsn_out_at].in_out = 2;
 	asoc->tsn_out_at++;
 #endif
 	if (stcb->asoc.idata_supported == 0) {
 		dchkh->ch.chunk_type = SCTP_DATA;
 		dchkh->ch.chunk_flags = chk->rec.data.rcv_flags;
 		dchkh->dp.tsn = htonl(chk->rec.data.tsn);
 		dchkh->dp.sid = htons(strq->sid);
 		dchkh->dp.ssn = htons((uint16_t)chk->rec.data.mid);
 		dchkh->dp.ppid = chk->rec.data.ppid;
 		dchkh->ch.chunk_length = htons(chk->send_size);
 	} else {
 		ndchkh->ch.chunk_type = SCTP_IDATA;
 		ndchkh->ch.chunk_flags = chk->rec.data.rcv_flags;
 		ndchkh->dp.tsn = htonl(chk->rec.data.tsn);
 		ndchkh->dp.sid = htons(strq->sid);
 		ndchkh->dp.reserved = htons(0);
 		ndchkh->dp.mid = htonl(chk->rec.data.mid);
 		if (sp->fsn == 0)
 			ndchkh->dp.ppid_fsn.ppid = chk->rec.data.ppid;
 		else
 			ndchkh->dp.ppid_fsn.fsn = htonl(sp->fsn);
 		sp->fsn++;
 		ndchkh->ch.chunk_length = htons(chk->send_size);
 	}
 	/* Now advance the chk->send_size by the actual pad needed. */
 	if (chk->send_size < SCTP_SIZE32(chk->book_size)) {
 		/* need a pad */
 		struct mbuf *lm;
 		int pads;
 
 		pads = SCTP_SIZE32(chk->book_size) - chk->send_size;
 		lm = sctp_pad_lastmbuf(chk->data, pads, chk->last_mbuf);
 		if (lm != NULL) {
 			chk->last_mbuf = lm;
 			chk->pad_inplace = 1;
 		}
 		chk->send_size += pads;
 	}
 	if (PR_SCTP_ENABLED(chk->flags)) {
 		asoc->pr_sctp_cnt++;
 	}
 	if (sp->msg_is_complete && (sp->length == 0) && (sp->sender_all_done)) {
 		/* All done pull and kill the message */
 		if (sp->put_last_out == 0) {
 			SCTP_PRINTF("Gak, put out entire msg with NO end!-2\n");
 			SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d\n",
 			    sp->sender_all_done,
 			    sp->length,
 			    sp->msg_is_complete,
 			    sp->put_last_out);
 		}
 		atomic_subtract_int(&asoc->stream_queue_cnt, 1);
 		TAILQ_REMOVE(&strq->outqueue, sp, next);
 		stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp);
 		if ((strq->state == SCTP_STREAM_RESET_PENDING) &&
 		    (strq->chunks_on_queues == 0) &&
 		    TAILQ_EMPTY(&strq->outqueue)) {
 			stcb->asoc.trigger_reset = 1;
 		}
 		if (sp->net) {
 			sctp_free_remote_addr(sp->net);
 			sp->net = NULL;
 		}
 		if (sp->data) {
 			sctp_m_freem(sp->data);
 			sp->data = NULL;
 		}
 		sctp_free_a_strmoq(stcb, sp, so_locked);
 	}
 	asoc->chunks_on_out_queue++;
 	strq->chunks_on_queues++;
 	TAILQ_INSERT_TAIL(&asoc->send_queue, chk, sctp_next);
 	asoc->send_queue_cnt++;
 out_of:
 	return (to_move);
 }
 
 static void
 sctp_fill_outqueue(struct sctp_tcb *stcb, struct sctp_nets *net,
     uint32_t frag_point, int eeor_mode, int *quit_now,
     int so_locked)
 {
 	struct sctp_association *asoc;
 	struct sctp_stream_out *strq;
 	uint32_t space_left, moved, total_moved;
 	int bail, giveup;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	asoc = &stcb->asoc;
 	total_moved = 0;
 	switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		space_left = net->mtu - SCTP_MIN_V4_OVERHEAD;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		space_left = net->mtu - SCTP_MIN_OVERHEAD;
 		break;
 #endif
 	default:
 		/* TSNH */
 		space_left = net->mtu;
 		break;
 	}
 	/* Need an allowance for the data chunk header too */
 	space_left -= SCTP_DATA_CHUNK_OVERHEAD(stcb);
 
 	/* must make even word boundary */
 	space_left &= 0xfffffffc;
 	strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc);
 	giveup = 0;
 	bail = 0;
 	while ((space_left > 0) && (strq != NULL)) {
 		moved = sctp_move_to_outqueue(stcb, net, strq, space_left,
 		    frag_point, &giveup, eeor_mode,
 		    &bail, so_locked);
 		if ((giveup != 0) || (bail != 0)) {
 			break;
 		}
 		strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc);
 		total_moved += moved;
 		if (space_left >= moved) {
 			space_left -= moved;
 		} else {
 			space_left = 0;
 		}
 		if (space_left >= SCTP_DATA_CHUNK_OVERHEAD(stcb)) {
 			space_left -= SCTP_DATA_CHUNK_OVERHEAD(stcb);
 		} else {
 			space_left = 0;
 		}
 		space_left &= 0xfffffffc;
 	}
 	if (bail != 0)
 		*quit_now = 1;
 
 	stcb->asoc.ss_functions.sctp_ss_packet_done(stcb, net, asoc);
 
 	if (total_moved == 0) {
 		if ((stcb->asoc.sctp_cmt_on_off == 0) &&
 		    (net == stcb->asoc.primary_destination)) {
 			/* ran dry for primary network net */
 			SCTP_STAT_INCR(sctps_primary_randry);
 		} else if (stcb->asoc.sctp_cmt_on_off > 0) {
 			/* ran dry with CMT on */
 			SCTP_STAT_INCR(sctps_cmt_randry);
 		}
 	}
 }
 
 void
 sctp_fix_ecn_echo(struct sctp_association *asoc)
 {
 	struct sctp_tmit_chunk *chk;
 
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) {
 			chk->sent = SCTP_DATAGRAM_UNSENT;
 		}
 	}
 }
 
 void
 sctp_move_chunks_from_net(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk;
 	struct sctp_stream_queue_pending *sp;
 	unsigned int i;
 
 	if (net == NULL) {
 		return;
 	}
 	asoc = &stcb->asoc;
 	for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 		TAILQ_FOREACH(sp, &stcb->asoc.strmout[i].outqueue, next) {
 			if (sp->net == net) {
 				sctp_free_remote_addr(sp->net);
 				sp->net = NULL;
 			}
 		}
 	}
 	TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
 		if (chk->whoTo == net) {
 			sctp_free_remote_addr(chk->whoTo);
 			chk->whoTo = NULL;
 		}
 	}
 }
 
 int
 sctp_med_chunk_output(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int *num_out,
     int *reason_code,
     int control_only, int from_where,
     struct timeval *now, int *now_filled,
     uint32_t frag_point, int so_locked)
 {
 	/**
 	 * Ok this is the generic chunk service queue. we must do the
 	 * following:
 	 * - Service the stream queue that is next, moving any
 	 *   message (note I must get a complete message i.e. FIRST/MIDDLE and
 	 *   LAST to the out queue in one pass) and assigning TSN's. This
 	 *   only applies though if the peer does not support NDATA. For NDATA
 	 *   chunks its ok to not send the entire message ;-)
 	 * - Check to see if the cwnd/rwnd allows any output, if so we go ahead and
 	 *   formulate and send the low level chunks. Making sure to combine
 	 *   any control in the control chunk queue also.
 	 */
 	struct sctp_nets *net, *start_at, *sack_goes_to = NULL, *old_start_at = NULL;
 	struct mbuf *outchain, *endoutchain;
 	struct sctp_tmit_chunk *chk, *nchk;
 
 	/* temp arrays for unlinking */
 	struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING];
 	int no_fragmentflg, error;
 	unsigned int max_rwnd_per_dest, max_send_per_dest;
 	int one_chunk, hbflag, skip_data_for_this_net;
 	int asconf, cookie, no_out_cnt;
 	int bundle_at, ctl_cnt, no_data_chunks, eeor_mode;
 	unsigned int mtu, r_mtu, omtu, mx_mtu, to_out;
 	int tsns_sent = 0;
 	uint32_t auth_offset;
 	struct sctp_auth_chunk *auth;
 	uint16_t auth_keyid;
 	int override_ok = 1;
 	int skip_fill_up = 0;
 	int data_auth_reqd = 0;
 
 	/*
 	 * JRS 5/14/07 - Add flag for whether a heartbeat is sent to the
 	 * destination.
 	 */
 	int quit_now = 0;
 
 	*num_out = 0;
 	*reason_code = 0;
 	auth_keyid = stcb->asoc.authinfo.active_keyid;
 	if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
 	    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {
 		eeor_mode = 1;
 	} else {
 		eeor_mode = 0;
 	}
 	ctl_cnt = no_out_cnt = asconf = cookie = 0;
 	/*
 	 * First lets prime the pump. For each destination, if there is room
 	 * in the flight size, attempt to pull an MTU's worth out of the
 	 * stream queues into the general send_queue
 	 */
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_audit_log(0xC2, 2);
 #endif
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	hbflag = 0;
 	if (control_only)
 		no_data_chunks = 1;
 	else
 		no_data_chunks = 0;
 
 	/* Nothing to possible to send? */
 	if ((TAILQ_EMPTY(&asoc->control_send_queue) ||
 	    (asoc->ctrl_queue_cnt == stcb->asoc.ecn_echo_cnt_onq)) &&
 	    TAILQ_EMPTY(&asoc->asconf_send_queue) &&
 	    TAILQ_EMPTY(&asoc->send_queue) &&
 	    sctp_is_there_unsent_data(stcb, so_locked) == 0) {
 nothing_to_send:
 		*reason_code = 9;
 		return (0);
 	}
 	if (asoc->peers_rwnd == 0) {
 		/* No room in peers rwnd */
 		*reason_code = 1;
 		if (asoc->total_flight > 0) {
 			/* we are allowed one chunk in flight */
 			no_data_chunks = 1;
 		}
 	}
 	if (stcb->asoc.ecn_echo_cnt_onq) {
 		/* Record where a sack goes, if any */
 		if (no_data_chunks &&
 		    (asoc->ctrl_queue_cnt == stcb->asoc.ecn_echo_cnt_onq)) {
 			/* Nothing but ECNe to send - we don't do that */
 			goto nothing_to_send;
 		}
 		TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 			if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
 			    (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK)) {
 				sack_goes_to = chk->whoTo;
 				break;
 			}
 		}
 	}
 	max_rwnd_per_dest = ((asoc->peers_rwnd + asoc->total_flight) / asoc->numnets);
 	if (stcb->sctp_socket)
 		max_send_per_dest = SCTP_SB_LIMIT_SND(stcb->sctp_socket) / asoc->numnets;
 	else
 		max_send_per_dest = 0;
 	if (no_data_chunks == 0) {
 		/* How many non-directed chunks are there? */
 		TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
 			if (chk->whoTo == NULL) {
 				/*
 				 * We already have non-directed chunks on
 				 * the queue, no need to do a fill-up.
 				 */
 				skip_fill_up = 1;
 				break;
 			}
 		}
 	}
 	if ((no_data_chunks == 0) &&
 	    (skip_fill_up == 0) &&
 	    (!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc))) {
 		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 			/*
 			 * This for loop we are in takes in each net, if
 			 * its's got space in cwnd and has data sent to it
 			 * (when CMT is off) then it calls
 			 * sctp_fill_outqueue for the net. This gets data on
 			 * the send queue for that network.
 			 *
 			 * In sctp_fill_outqueue TSN's are assigned and data
 			 * is copied out of the stream buffers. Note mostly
 			 * copy by reference (we hope).
 			 */
 			net->window_probe = 0;
 			if ((net != stcb->asoc.alternate) &&
 			    ((net->dest_state & SCTP_ADDR_PF) ||
 			    (!(net->dest_state & SCTP_ADDR_REACHABLE)) ||
 			    (net->dest_state & SCTP_ADDR_UNCONFIRMED))) {
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 					sctp_log_cwnd(stcb, net, 1,
 					    SCTP_CWND_LOG_FILL_OUTQ_CALLED);
 				}
 				continue;
 			}
 			if ((stcb->asoc.cc_functions.sctp_cwnd_new_transmission_begins) &&
 			    (net->flight_size == 0)) {
 				(*stcb->asoc.cc_functions.sctp_cwnd_new_transmission_begins) (stcb, net);
 			}
 			if (net->flight_size >= net->cwnd) {
 				/* skip this network, no room - can't fill */
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 					sctp_log_cwnd(stcb, net, 3,
 					    SCTP_CWND_LOG_FILL_OUTQ_CALLED);
 				}
 				continue;
 			}
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, 4, SCTP_CWND_LOG_FILL_OUTQ_CALLED);
 			}
 			sctp_fill_outqueue(stcb, net, frag_point, eeor_mode, &quit_now, so_locked);
 			if (quit_now) {
 				/* memory alloc failure */
 				no_data_chunks = 1;
 				break;
 			}
 		}
 	}
 	/* now service each destination and send out what we can for it */
 	/* Nothing to send? */
 	if (TAILQ_EMPTY(&asoc->control_send_queue) &&
 	    TAILQ_EMPTY(&asoc->asconf_send_queue) &&
 	    TAILQ_EMPTY(&asoc->send_queue)) {
 		*reason_code = 8;
 		return (0);
 	}
 
 	if (asoc->sctp_cmt_on_off > 0) {
 		/* get the last start point */
 		start_at = asoc->last_net_cmt_send_started;
 		if (start_at == NULL) {
 			/* null so to beginning */
 			start_at = TAILQ_FIRST(&asoc->nets);
 		} else {
 			start_at = TAILQ_NEXT(asoc->last_net_cmt_send_started, sctp_next);
 			if (start_at == NULL) {
 				start_at = TAILQ_FIRST(&asoc->nets);
 			}
 		}
 		asoc->last_net_cmt_send_started = start_at;
 	} else {
 		start_at = TAILQ_FIRST(&asoc->nets);
 	}
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if (chk->whoTo == NULL) {
 			if (asoc->alternate) {
 				chk->whoTo = asoc->alternate;
 			} else {
 				chk->whoTo = asoc->primary_destination;
 			}
 			atomic_add_int(&chk->whoTo->ref_count, 1);
 		}
 	}
 	old_start_at = NULL;
 again_one_more_time:
 	for (net = start_at; net != NULL; net = TAILQ_NEXT(net, sctp_next)) {
 		/* how much can we send? */
 		/* SCTPDBG("Examine for sending net:%x\n", (uint32_t)net); */
 		if (old_start_at && (old_start_at == net)) {
 			/* through list completely. */
 			break;
 		}
 		tsns_sent = 0xa;
 		if (TAILQ_EMPTY(&asoc->control_send_queue) &&
 		    TAILQ_EMPTY(&asoc->asconf_send_queue) &&
 		    (net->flight_size >= net->cwnd)) {
 			/*
 			 * Nothing on control or asconf and flight is full,
 			 * we can skip even in the CMT case.
 			 */
 			continue;
 		}
 		bundle_at = 0;
 		endoutchain = outchain = NULL;
 		auth = NULL;
 		auth_offset = 0;
 		no_fragmentflg = 1;
 		one_chunk = 0;
 		if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
 			skip_data_for_this_net = 1;
 		} else {
 			skip_data_for_this_net = 0;
 		}
 		switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
 #ifdef INET
 		case AF_INET:
 			mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			mtu = net->mtu - SCTP_MIN_OVERHEAD;
 			break;
 #endif
 		default:
 			/* TSNH */
 			mtu = net->mtu;
 			break;
 		}
 		mx_mtu = mtu;
 		to_out = 0;
 		if (mtu > asoc->peers_rwnd) {
 			if (asoc->total_flight > 0) {
 				/* We have a packet in flight somewhere */
 				r_mtu = asoc->peers_rwnd;
 			} else {
 				/* We are always allowed to send one MTU out */
 				one_chunk = 1;
 				r_mtu = mtu;
 			}
 		} else {
 			r_mtu = mtu;
 		}
 		error = 0;
 		/************************/
 		/* ASCONF transmission */
 		/************************/
 		/* Now first lets go through the asconf queue */
 		TAILQ_FOREACH_SAFE(chk, &asoc->asconf_send_queue, sctp_next, nchk) {
 			if (chk->rec.chunk_id.id != SCTP_ASCONF) {
 				continue;
 			}
 			if (chk->whoTo == NULL) {
 				if (asoc->alternate == NULL) {
 					if (asoc->primary_destination != net) {
 						break;
 					}
 				} else {
 					if (asoc->alternate != net) {
 						break;
 					}
 				}
 			} else {
 				if (chk->whoTo != net) {
 					break;
 				}
 			}
 			if (chk->data == NULL) {
 				break;
 			}
 			if (chk->sent != SCTP_DATAGRAM_UNSENT &&
 			    chk->sent != SCTP_DATAGRAM_RESEND) {
 				break;
 			}
 			/*
 			 * if no AUTH is yet included and this chunk
 			 * requires it, make sure to account for it.  We
 			 * don't apply the size until the AUTH chunk is
 			 * actually added below in case there is no room for
 			 * this chunk. NOTE: we overload the use of "omtu"
 			 * here
 			 */
 			if ((auth == NULL) &&
 			    sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
 			    stcb->asoc.peer_auth_chunks)) {
 				omtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
 			} else
 				omtu = 0;
 			/* Here we do NOT factor the r_mtu */
 			if ((chk->send_size < (int)(mtu - omtu)) ||
 			    (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
 				/*
 				 * We probably should glom the mbuf chain
 				 * from the chk->data for control but the
 				 * problem is it becomes yet one more level
 				 * of tracking to do if for some reason
 				 * output fails. Then I have got to
 				 * reconstruct the merged control chain.. el
 				 * yucko.. for now we take the easy way and
 				 * do the copy
 				 */
 				/*
 				 * Add an AUTH chunk, if chunk requires it
 				 * save the offset into the chain for AUTH
 				 */
 				if ((auth == NULL) &&
 				    (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
 				    stcb->asoc.peer_auth_chunks))) {
 					outchain = sctp_add_auth_chunk(outchain,
 					    &endoutchain,
 					    &auth,
 					    &auth_offset,
 					    stcb,
 					    chk->rec.chunk_id.id);
 					SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 				}
 				outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain,
 				    (int)chk->rec.chunk_id.can_take_data,
 				    chk->send_size, chk->copy_by_ref);
 				if (outchain == NULL) {
 					*reason_code = 8;
 					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 					return (ENOMEM);
 				}
 				SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 				/* update our MTU size */
 				if (mtu > (chk->send_size + omtu))
 					mtu -= (chk->send_size + omtu);
 				else
 					mtu = 0;
 				to_out += (chk->send_size + omtu);
 				/* Do clear IP_DF ? */
 				if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
 					no_fragmentflg = 0;
 				}
 				if (chk->rec.chunk_id.can_take_data)
 					chk->data = NULL;
 				/*
 				 * set hb flag since we can use these for
 				 * RTO
 				 */
 				hbflag = 1;
 				asconf = 1;
 				/*
 				 * should sysctl this: don't bundle data
 				 * with ASCONF since it requires AUTH
 				 */
 				no_data_chunks = 1;
 				chk->sent = SCTP_DATAGRAM_SENT;
 				if (chk->whoTo == NULL) {
 					chk->whoTo = net;
 					atomic_add_int(&net->ref_count, 1);
 				}
 				chk->snd_count++;
 				if (mtu == 0) {
 					/*
 					 * Ok we are out of room but we can
 					 * output without effecting the
 					 * flight size since this little guy
 					 * is a control only packet.
 					 */
 					sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, net);
 					/*
 					 * do NOT clear the asconf flag as
 					 * it is used to do appropriate
 					 * source address selection.
 					 */
 					if (*now_filled == 0) {
 						(void)SCTP_GETTIME_TIMEVAL(now);
 						*now_filled = 1;
 					}
 					net->last_sent_time = *now;
 					hbflag = 0;
 					if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
 					    (struct sockaddr *)&net->ro._l_addr,
 					    outchain, auth_offset, auth,
 					    stcb->asoc.authinfo.active_keyid,
 					    no_fragmentflg, 0, asconf,
 					    inp->sctp_lport, stcb->rport,
 					    htonl(stcb->asoc.peer_vtag),
 					    net->port, NULL,
 					    0, 0,
 					    so_locked))) {
 						/*
 						 * error, we could not
 						 * output
 						 */
 						SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
 						if (from_where == 0) {
 							SCTP_STAT_INCR(sctps_lowlevelerrusr);
 						}
 						if (error == ENOBUFS) {
 							asoc->ifp_had_enobuf = 1;
 							SCTP_STAT_INCR(sctps_lowlevelerr);
 						}
 						/* error, could not output */
 						if (error == EHOSTUNREACH) {
 							/*
 							 * Destination went
 							 * unreachable
 							 * during this send
 							 */
 							sctp_move_chunks_from_net(stcb, net);
 						}
 						*reason_code = 7;
 						break;
 					} else {
 						asoc->ifp_had_enobuf = 0;
 					}
 					/*
 					 * increase the number we sent, if a
 					 * cookie is sent we don't tell them
 					 * any was sent out.
 					 */
 					outchain = endoutchain = NULL;
 					auth = NULL;
 					auth_offset = 0;
 					if (!no_out_cnt)
 						*num_out += ctl_cnt;
 					/* recalc a clean slate and setup */
 					switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 					case AF_INET:
 						mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
 						break;
 #endif
 #ifdef INET6
 					case AF_INET6:
 						mtu = net->mtu - SCTP_MIN_OVERHEAD;
 						break;
 #endif
 					default:
 						/* TSNH */
 						mtu = net->mtu;
 						break;
 					}
 					to_out = 0;
 					no_fragmentflg = 1;
 				}
 			}
 		}
 		if (error != 0) {
 			/* try next net */
 			continue;
 		}
 		/************************/
 		/* Control transmission */
 		/************************/
 		/* Now first lets go through the control queue */
 		TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) {
 			if ((sack_goes_to) &&
 			    (chk->rec.chunk_id.id == SCTP_ECN_ECHO) &&
 			    (chk->whoTo != sack_goes_to)) {
 				/*
 				 * if we have a sack in queue, and we are
 				 * looking at an ecn echo that is NOT queued
 				 * to where the sack is going..
 				 */
 				if (chk->whoTo == net) {
 					/*
 					 * Don't transmit it to where its
 					 * going (current net)
 					 */
 					continue;
 				} else if (sack_goes_to == net) {
 					/*
 					 * But do transmit it to this
 					 * address
 					 */
 					goto skip_net_check;
 				}
 			}
 			if (chk->whoTo == NULL) {
 				if (asoc->alternate == NULL) {
 					if (asoc->primary_destination != net) {
 						continue;
 					}
 				} else {
 					if (asoc->alternate != net) {
 						continue;
 					}
 				}
 			} else {
 				if (chk->whoTo != net) {
 					continue;
 				}
 			}
 	skip_net_check:
 			if (chk->data == NULL) {
 				continue;
 			}
 			if (chk->sent != SCTP_DATAGRAM_UNSENT) {
 				/*
 				 * It must be unsent. Cookies and ASCONF's
 				 * hang around but there timers will force
 				 * when marked for resend.
 				 */
 				continue;
 			}
 			/*
 			 * if no AUTH is yet included and this chunk
 			 * requires it, make sure to account for it.  We
 			 * don't apply the size until the AUTH chunk is
 			 * actually added below in case there is no room for
 			 * this chunk. NOTE: we overload the use of "omtu"
 			 * here
 			 */
 			if ((auth == NULL) &&
 			    sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
 			    stcb->asoc.peer_auth_chunks)) {
 				omtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
 			} else
 				omtu = 0;
 			/* Here we do NOT factor the r_mtu */
 			if ((chk->send_size <= (int)(mtu - omtu)) ||
 			    (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
 				/*
 				 * We probably should glom the mbuf chain
 				 * from the chk->data for control but the
 				 * problem is it becomes yet one more level
 				 * of tracking to do if for some reason
 				 * output fails. Then I have got to
 				 * reconstruct the merged control chain.. el
 				 * yucko.. for now we take the easy way and
 				 * do the copy
 				 */
 				/*
 				 * Add an AUTH chunk, if chunk requires it
 				 * save the offset into the chain for AUTH
 				 */
 				if ((auth == NULL) &&
 				    (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
 				    stcb->asoc.peer_auth_chunks))) {
 					outchain = sctp_add_auth_chunk(outchain,
 					    &endoutchain,
 					    &auth,
 					    &auth_offset,
 					    stcb,
 					    chk->rec.chunk_id.id);
 					SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 				}
 				outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain,
 				    (int)chk->rec.chunk_id.can_take_data,
 				    chk->send_size, chk->copy_by_ref);
 				if (outchain == NULL) {
 					*reason_code = 8;
 					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 					return (ENOMEM);
 				}
 				SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 				/* update our MTU size */
 				if (mtu > (chk->send_size + omtu))
 					mtu -= (chk->send_size + omtu);
 				else
 					mtu = 0;
 				to_out += (chk->send_size + omtu);
 				/* Do clear IP_DF ? */
 				if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
 					no_fragmentflg = 0;
 				}
 				if (chk->rec.chunk_id.can_take_data)
 					chk->data = NULL;
 				/* Mark things to be removed, if needed */
 				if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
 				    (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK) ||	/* EY */
 				    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) ||
 				    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_ACK) ||
 				    (chk->rec.chunk_id.id == SCTP_SHUTDOWN) ||
 				    (chk->rec.chunk_id.id == SCTP_SHUTDOWN_ACK) ||
 				    (chk->rec.chunk_id.id == SCTP_OPERATION_ERROR) ||
 				    (chk->rec.chunk_id.id == SCTP_COOKIE_ACK) ||
 				    (chk->rec.chunk_id.id == SCTP_ECN_CWR) ||
 				    (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) ||
 				    (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) {
 					if (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) {
 						hbflag = 1;
 					}
 					/* remove these chunks at the end */
 					if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
 					    (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK)) {
 						/* turn off the timer */
 						if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
 							sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
 							    inp, stcb, NULL,
 							    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_1);
 						}
 					}
 					ctl_cnt++;
 				} else {
 					/*
 					 * Other chunks, since they have
 					 * timers running (i.e. COOKIE) we
 					 * just "trust" that it gets sent or
 					 * retransmitted.
 					 */
 					ctl_cnt++;
 					if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
 						cookie = 1;
 						no_out_cnt = 1;
 					} else if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) {
 						/*
 						 * Increment ecne send count
 						 * here this means we may be
 						 * over-zealous in our
 						 * counting if the send
 						 * fails, but its the best
 						 * place to do it (we used
 						 * to do it in the queue of
 						 * the chunk, but that did
 						 * not tell how many times
 						 * it was sent.
 						 */
 						SCTP_STAT_INCR(sctps_sendecne);
 					}
 					chk->sent = SCTP_DATAGRAM_SENT;
 					if (chk->whoTo == NULL) {
 						chk->whoTo = net;
 						atomic_add_int(&net->ref_count, 1);
 					}
 					chk->snd_count++;
 				}
 				if (mtu == 0) {
 					/*
 					 * Ok we are out of room but we can
 					 * output without effecting the
 					 * flight size since this little guy
 					 * is a control only packet.
 					 */
 					if (asconf) {
 						sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, net);
 						/*
 						 * do NOT clear the asconf
 						 * flag as it is used to do
 						 * appropriate source
 						 * address selection.
 						 */
 					}
 					if (cookie) {
 						sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
 						cookie = 0;
 					}
 					/* Only HB or ASCONF advances time */
 					if (hbflag) {
 						if (*now_filled == 0) {
 							(void)SCTP_GETTIME_TIMEVAL(now);
 							*now_filled = 1;
 						}
 						net->last_sent_time = *now;
 						hbflag = 0;
 					}
 					if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
 					    (struct sockaddr *)&net->ro._l_addr,
 					    outchain,
 					    auth_offset, auth,
 					    stcb->asoc.authinfo.active_keyid,
 					    no_fragmentflg, 0, asconf,
 					    inp->sctp_lport, stcb->rport,
 					    htonl(stcb->asoc.peer_vtag),
 					    net->port, NULL,
 					    0, 0,
 					    so_locked))) {
 						/*
 						 * error, we could not
 						 * output
 						 */
 						SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
 						if (from_where == 0) {
 							SCTP_STAT_INCR(sctps_lowlevelerrusr);
 						}
 						if (error == ENOBUFS) {
 							asoc->ifp_had_enobuf = 1;
 							SCTP_STAT_INCR(sctps_lowlevelerr);
 						}
 						if (error == EHOSTUNREACH) {
 							/*
 							 * Destination went
 							 * unreachable
 							 * during this send
 							 */
 							sctp_move_chunks_from_net(stcb, net);
 						}
 						*reason_code = 7;
 						break;
 					} else {
 						asoc->ifp_had_enobuf = 0;
 					}
 					/*
 					 * increase the number we sent, if a
 					 * cookie is sent we don't tell them
 					 * any was sent out.
 					 */
 					outchain = endoutchain = NULL;
 					auth = NULL;
 					auth_offset = 0;
 					if (!no_out_cnt)
 						*num_out += ctl_cnt;
 					/* recalc a clean slate and setup */
 					switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 					case AF_INET:
 						mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
 						break;
 #endif
 #ifdef INET6
 					case AF_INET6:
 						mtu = net->mtu - SCTP_MIN_OVERHEAD;
 						break;
 #endif
 					default:
 						/* TSNH */
 						mtu = net->mtu;
 						break;
 					}
 					to_out = 0;
 					no_fragmentflg = 1;
 				}
 			}
 		}
 		if (error != 0) {
 			/* try next net */
 			continue;
 		}
 		/* JRI: if dest is in PF state, do not send data to it */
 		if ((asoc->sctp_cmt_on_off > 0) &&
 		    (net != stcb->asoc.alternate) &&
 		    (net->dest_state & SCTP_ADDR_PF)) {
 			goto no_data_fill;
 		}
 		if (net->flight_size >= net->cwnd) {
 			goto no_data_fill;
 		}
 		if ((asoc->sctp_cmt_on_off > 0) &&
 		    (SCTP_BASE_SYSCTL(sctp_buffer_splitting) & SCTP_RECV_BUFFER_SPLITTING) &&
 		    (net->flight_size > max_rwnd_per_dest)) {
 			goto no_data_fill;
 		}
 		/*
 		 * We need a specific accounting for the usage of the send
 		 * buffer. We also need to check the number of messages per
 		 * net. For now, this is better than nothing and it disabled
 		 * by default...
 		 */
 		if ((asoc->sctp_cmt_on_off > 0) &&
 		    (SCTP_BASE_SYSCTL(sctp_buffer_splitting) & SCTP_SEND_BUFFER_SPLITTING) &&
 		    (max_send_per_dest > 0) &&
 		    (net->flight_size > max_send_per_dest)) {
 			goto no_data_fill;
 		}
 		/*********************/
 		/* Data transmission */
 		/*********************/
 		/*
 		 * if AUTH for DATA is required and no AUTH has been added
 		 * yet, account for this in the mtu now... if no data can be
 		 * bundled, this adjustment won't matter anyways since the
 		 * packet will be going out...
 		 */
 		data_auth_reqd = sctp_auth_is_required_chunk(SCTP_DATA,
 		    stcb->asoc.peer_auth_chunks);
 		if (data_auth_reqd && (auth == NULL)) {
 			mtu -= sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
 		}
 		/* now lets add any data within the MTU constraints */
 		switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
 #ifdef INET
 		case AF_INET:
 			if (net->mtu > SCTP_MIN_V4_OVERHEAD)
 				omtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
 			else
 				omtu = 0;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (net->mtu > SCTP_MIN_OVERHEAD)
 				omtu = net->mtu - SCTP_MIN_OVERHEAD;
 			else
 				omtu = 0;
 			break;
 #endif
 		default:
 			/* TSNH */
 			omtu = 0;
 			break;
 		}
 		if ((((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
 		    (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
 		    (skip_data_for_this_net == 0)) ||
 		    (cookie)) {
 			TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
 				if (no_data_chunks) {
 					/* let only control go out */
 					*reason_code = 1;
 					break;
 				}
 				if (net->flight_size >= net->cwnd) {
 					/* skip this net, no room for data */
 					*reason_code = 2;
 					break;
 				}
 				if ((chk->whoTo != NULL) &&
 				    (chk->whoTo != net)) {
 					/* Don't send the chunk on this net */
 					continue;
 				}
 
 				if (asoc->sctp_cmt_on_off == 0) {
 					if ((asoc->alternate) &&
 					    (asoc->alternate != net) &&
 					    (chk->whoTo == NULL)) {
 						continue;
 					} else if ((net != asoc->primary_destination) &&
 						    (asoc->alternate == NULL) &&
 					    (chk->whoTo == NULL)) {
 						continue;
 					}
 				}
 				if ((chk->send_size > omtu) && ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) == 0)) {
 					/*-
 					 * strange, we have a chunk that is
 					 * to big for its destination and
 					 * yet no fragment ok flag.
 					 * Something went wrong when the
 					 * PMTU changed...we did not mark
 					 * this chunk for some reason?? I
 					 * will fix it here by letting IP
 					 * fragment it for now and printing
 					 * a warning. This really should not
 					 * happen ...
 					 */
 					SCTP_PRINTF("Warning chunk of %d bytes > mtu:%d and yet PMTU disc missed\n",
 					    chk->send_size, mtu);
 					chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
 				}
 				if (SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) &&
 				    (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
 					struct sctp_data_chunk *dchkh;
 
 					dchkh = mtod(chk->data, struct sctp_data_chunk *);
 					dchkh->ch.chunk_flags |= SCTP_DATA_SACK_IMMEDIATELY;
 				}
 				if (((chk->send_size <= mtu) && (chk->send_size <= r_mtu)) ||
 				    ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) && (chk->send_size <= asoc->peers_rwnd))) {
 					/* ok we will add this one */
 
 					/*
 					 * Add an AUTH chunk, if chunk
 					 * requires it, save the offset into
 					 * the chain for AUTH
 					 */
 					if (data_auth_reqd) {
 						if (auth == NULL) {
 							outchain = sctp_add_auth_chunk(outchain,
 							    &endoutchain,
 							    &auth,
 							    &auth_offset,
 							    stcb,
 							    SCTP_DATA);
 							auth_keyid = chk->auth_keyid;
 							override_ok = 0;
 							SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 						} else if (override_ok) {
 							/*
 							 * use this data's
 							 * keyid
 							 */
 							auth_keyid = chk->auth_keyid;
 							override_ok = 0;
 						} else if (auth_keyid != chk->auth_keyid) {
 							/*
 							 * different keyid,
 							 * so done bundling
 							 */
 							break;
 						}
 					}
 					outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain, 0,
 					    chk->send_size, chk->copy_by_ref);
 					if (outchain == NULL) {
 						SCTPDBG(SCTP_DEBUG_OUTPUT3, "No memory?\n");
 						if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
 							sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
 						}
 						*reason_code = 3;
 						SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 						return (ENOMEM);
 					}
 					/* update our MTU size */
 					/* Do clear IP_DF ? */
 					if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
 						no_fragmentflg = 0;
 					}
 					/* unsigned subtraction of mtu */
 					if (mtu > chk->send_size)
 						mtu -= chk->send_size;
 					else
 						mtu = 0;
 					/* unsigned subtraction of r_mtu */
 					if (r_mtu > chk->send_size)
 						r_mtu -= chk->send_size;
 					else
 						r_mtu = 0;
 
 					to_out += chk->send_size;
 					if ((to_out > mx_mtu) && no_fragmentflg) {
 #ifdef INVARIANTS
 						panic("Exceeding mtu of %d out size is %d", mx_mtu, to_out);
 #else
 						SCTP_PRINTF("Exceeding mtu of %d out size is %d\n",
 						    mx_mtu, to_out);
 #endif
 					}
 					chk->window_probe = 0;
 					data_list[bundle_at++] = chk;
 					if (bundle_at >= SCTP_MAX_DATA_BUNDLING) {
 						break;
 					}
 					if (chk->sent == SCTP_DATAGRAM_UNSENT) {
 						if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
 							SCTP_STAT_INCR_COUNTER64(sctps_outorderchunks);
 						} else {
 							SCTP_STAT_INCR_COUNTER64(sctps_outunorderchunks);
 						}
 						if (((chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) == SCTP_DATA_LAST_FRAG) &&
 						    ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0))
 							/*
 							 * Count number of
 							 * user msg's that
 							 * were fragmented
 							 * we do this by
 							 * counting when we
 							 * see a LAST
 							 * fragment only.
 							 */
 							SCTP_STAT_INCR_COUNTER64(sctps_fragusrmsgs);
 					}
 					if ((mtu == 0) || (r_mtu == 0) || (one_chunk)) {
 						if ((one_chunk) && (stcb->asoc.total_flight == 0)) {
 							data_list[0]->window_probe = 1;
 							net->window_probe = 1;
 						}
 						break;
 					}
 				} else {
 					/*
 					 * Must be sent in order of the
 					 * TSN's (on a network)
 					 */
 					break;
 				}
 			}	/* for (chunk gather loop for this net) */
 		}		/* if asoc.state OPEN */
 no_data_fill:
 		/* Is there something to send for this destination? */
 		if (outchain) {
 			/* We may need to start a control timer or two */
 			if (asconf) {
 				sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
 				    stcb, net);
 				/*
 				 * do NOT clear the asconf flag as it is
 				 * used to do appropriate source address
 				 * selection.
 				 */
 			}
 			if (cookie) {
 				sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
 				cookie = 0;
 			}
 			/* must start a send timer if data is being sent */
 			if (bundle_at && (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer))) {
 				/*
 				 * no timer running on this destination
 				 * restart it.
 				 */
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
 			}
 			if (bundle_at || hbflag) {
 				/* For data/asconf and hb set time */
 				if (*now_filled == 0) {
 					(void)SCTP_GETTIME_TIMEVAL(now);
 					*now_filled = 1;
 				}
 				net->last_sent_time = *now;
 			}
 			/* Now send it, if there is anything to send :> */
 			if ((error = sctp_lowlevel_chunk_output(inp,
 			    stcb,
 			    net,
 			    (struct sockaddr *)&net->ro._l_addr,
 			    outchain,
 			    auth_offset,
 			    auth,
 			    auth_keyid,
 			    no_fragmentflg,
 			    bundle_at,
 			    asconf,
 			    inp->sctp_lport, stcb->rport,
 			    htonl(stcb->asoc.peer_vtag),
 			    net->port, NULL,
 			    0, 0,
 			    so_locked))) {
 				/* error, we could not output */
 				SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
 				if (from_where == 0) {
 					SCTP_STAT_INCR(sctps_lowlevelerrusr);
 				}
 				if (error == ENOBUFS) {
 					asoc->ifp_had_enobuf = 1;
 					SCTP_STAT_INCR(sctps_lowlevelerr);
 				}
 				if (error == EHOSTUNREACH) {
 					/*
 					 * Destination went unreachable
 					 * during this send
 					 */
 					sctp_move_chunks_from_net(stcb, net);
 				}
 				*reason_code = 6;
 				/*-
 				 * I add this line to be paranoid. As far as
 				 * I can tell the continue, takes us back to
 				 * the top of the for, but just to make sure
 				 * I will reset these again here.
 				 */
 				ctl_cnt = 0;
 				continue;	/* This takes us back to the
 						 * for() for the nets. */
 			} else {
 				asoc->ifp_had_enobuf = 0;
 			}
 			endoutchain = NULL;
 			auth = NULL;
 			auth_offset = 0;
 			if (!no_out_cnt) {
 				*num_out += (ctl_cnt + bundle_at);
 			}
 			if (bundle_at) {
 				/* setup for a RTO measurement */
 				tsns_sent = data_list[0]->rec.data.tsn;
 				/* fill time if not already filled */
 				if (*now_filled == 0) {
 					(void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent);
 					*now_filled = 1;
 					*now = asoc->time_last_sent;
 				} else {
 					asoc->time_last_sent = *now;
 				}
 				if (net->rto_needed) {
 					data_list[0]->do_rtt = 1;
 					net->rto_needed = 0;
 				}
 				SCTP_STAT_INCR_BY(sctps_senddata, bundle_at);
 				sctp_clean_up_datalist(stcb, asoc, data_list, bundle_at, net);
 			}
 			if (one_chunk) {
 				break;
 			}
 		}
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 			sctp_log_cwnd(stcb, net, tsns_sent, SCTP_CWND_LOG_FROM_SEND);
 		}
 	}
 	if (old_start_at == NULL) {
 		old_start_at = start_at;
 		start_at = TAILQ_FIRST(&asoc->nets);
 		if (old_start_at)
 			goto again_one_more_time;
 	}
 
 	/*
 	 * At the end there should be no NON timed chunks hanging on this
 	 * queue.
 	 */
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 		sctp_log_cwnd(stcb, net, *num_out, SCTP_CWND_LOG_FROM_SEND);
 	}
 	if ((*num_out == 0) && (*reason_code == 0)) {
 		*reason_code = 4;
 	} else {
 		*reason_code = 5;
 	}
 	sctp_clean_up_ctl(stcb, asoc, so_locked);
 	return (0);
 }
 
 void
 sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err)
 {
 	/*-
 	 * Prepend a OPERATIONAL_ERROR chunk header and put on the end of
 	 * the control chunk queue.
 	 */
 	struct sctp_chunkhdr *hdr;
 	struct sctp_tmit_chunk *chk;
 	struct mbuf *mat, *last_mbuf;
 	uint32_t chunk_length;
 	uint16_t padding_length;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_NOWAIT);
 	if (op_err == NULL) {
 		return;
 	}
 	last_mbuf = NULL;
 	chunk_length = 0;
 	for (mat = op_err; mat != NULL; mat = SCTP_BUF_NEXT(mat)) {
 		chunk_length += SCTP_BUF_LEN(mat);
 		if (SCTP_BUF_NEXT(mat) == NULL) {
 			last_mbuf = mat;
 		}
 	}
 	if (chunk_length > SCTP_MAX_CHUNK_LENGTH) {
 		sctp_m_freem(op_err);
 		return;
 	}
 	padding_length = chunk_length % 4;
 	if (padding_length != 0) {
 		padding_length = 4 - padding_length;
 	}
 	if (padding_length != 0) {
 		if (sctp_add_pad_tombuf(last_mbuf, padding_length) == NULL) {
 			sctp_m_freem(op_err);
 			return;
 		}
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(op_err);
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_OPERATION_ERROR;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = 0;
 	chk->send_size = (uint16_t)chunk_length;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = op_err;
 	chk->whoTo = NULL;
 	hdr = mtod(op_err, struct sctp_chunkhdr *);
 	hdr->chunk_type = SCTP_OPERATION_ERROR;
 	hdr->chunk_flags = 0;
 	hdr->chunk_length = htons(chk->send_size);
 	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 }
 
 int
 sctp_send_cookie_echo(struct mbuf *m,
     int offset, int limit,
     struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
 	/*-
 	 * pull out the cookie and put it at the front of the control chunk
 	 * queue.
 	 */
 	int at;
 	struct mbuf *cookie;
 	struct sctp_paramhdr param, *phdr;
 	struct sctp_chunkhdr *hdr;
 	struct sctp_tmit_chunk *chk;
 	uint16_t ptype, plen;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	/* First find the cookie in the param area */
 	cookie = NULL;
 	at = offset + sizeof(struct sctp_init_chunk);
 	for (;;) {
 		phdr = sctp_get_next_param(m, at, &param, sizeof(param));
 		if (phdr == NULL) {
 			return (-3);
 		}
 		ptype = ntohs(phdr->param_type);
 		plen = ntohs(phdr->param_length);
 		if (plen < sizeof(struct sctp_paramhdr)) {
 			return (-6);
 		}
 		if (ptype == SCTP_STATE_COOKIE) {
 			int pad;
 
 			/* found the cookie */
 			if (at + plen > limit) {
 				return (-7);
 			}
 			cookie = SCTP_M_COPYM(m, at, plen, M_NOWAIT);
 			if (cookie == NULL) {
 				/* No memory */
 				return (-2);
 			}
 			if ((pad = (plen % 4)) > 0) {
 				pad = 4 - pad;
 			}
 			if (pad > 0) {
 				if (sctp_pad_lastmbuf(cookie, pad, NULL) == NULL) {
 					return (-8);
 				}
 			}
 #ifdef SCTP_MBUF_LOGGING
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 				sctp_log_mbc(cookie, SCTP_MBUF_ICOPY);
 			}
 #endif
 			break;
 		}
 		at += SCTP_SIZE32(plen);
 	}
 	/* ok, we got the cookie lets change it into a cookie echo chunk */
 	/* first the change from param to cookie */
 	hdr = mtod(cookie, struct sctp_chunkhdr *);
 	hdr->chunk_type = SCTP_COOKIE_ECHO;
 	hdr->chunk_flags = 0;
 	/* get the chunk stuff now and place it in the FRONT of the queue */
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(cookie);
 		return (-5);
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_COOKIE_ECHO;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
 	chk->send_size = SCTP_SIZE32(plen);
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = cookie;
 	chk->whoTo = net;
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 	TAILQ_INSERT_HEAD(&chk->asoc->control_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 	return (0);
 }
 
 void
 sctp_send_heartbeat_ack(struct sctp_tcb *stcb,
     struct mbuf *m,
     int offset,
     int chk_length,
     struct sctp_nets *net)
 {
 	/*
 	 * take a HB request and make it into a HB ack and send it.
 	 */
 	struct mbuf *outchain;
 	struct sctp_chunkhdr *chdr;
 	struct sctp_tmit_chunk *chk;
 
 	if (net == NULL)
 		/* must have a net pointer */
 		return;
 
 	outchain = SCTP_M_COPYM(m, offset, chk_length, M_NOWAIT);
 	if (outchain == NULL) {
 		/* gak out of memory */
 		return;
 	}
 #ifdef SCTP_MBUF_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 		sctp_log_mbc(outchain, SCTP_MBUF_ICOPY);
 	}
 #endif
 	chdr = mtod(outchain, struct sctp_chunkhdr *);
 	chdr->chunk_type = SCTP_HEARTBEAT_ACK;
 	chdr->chunk_flags = 0;
 	if (chk_length % 4 != 0) {
 		sctp_pad_lastmbuf(outchain, 4 - (chk_length % 4), NULL);
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(outchain);
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_HEARTBEAT_ACK;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	chk->send_size = chk_length;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = outchain;
 	chk->whoTo = net;
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 }
 
 void
 sctp_send_cookie_ack(struct sctp_tcb *stcb)
 {
 	/* formulate and queue a cookie-ack back to sender */
 	struct mbuf *cookie_ack;
 	struct sctp_chunkhdr *hdr;
 	struct sctp_tmit_chunk *chk;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 
 	cookie_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_NOWAIT, 1, MT_HEADER);
 	if (cookie_ack == NULL) {
 		/* no mbuf's */
 		return;
 	}
 	SCTP_BUF_RESV_UF(cookie_ack, SCTP_MIN_OVERHEAD);
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(cookie_ack);
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_COOKIE_ACK;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	chk->send_size = sizeof(struct sctp_chunkhdr);
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = cookie_ack;
 	if (chk->asoc->last_control_chunk_from != NULL) {
 		chk->whoTo = chk->asoc->last_control_chunk_from;
 		atomic_add_int(&chk->whoTo->ref_count, 1);
 	} else {
 		chk->whoTo = NULL;
 	}
 	hdr = mtod(cookie_ack, struct sctp_chunkhdr *);
 	hdr->chunk_type = SCTP_COOKIE_ACK;
 	hdr->chunk_flags = 0;
 	hdr->chunk_length = htons(chk->send_size);
 	SCTP_BUF_LEN(cookie_ack) = chk->send_size;
 	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 	return;
 }
 
 void
 sctp_send_shutdown_ack(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	/* formulate and queue a SHUTDOWN-ACK back to the sender */
 	struct mbuf *m_shutdown_ack;
 	struct sctp_shutdown_ack_chunk *ack_cp;
 	struct sctp_tmit_chunk *chk;
 
 	m_shutdown_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_ack_chunk), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_shutdown_ack == NULL) {
 		/* no mbuf's */
 		return;
 	}
 	SCTP_BUF_RESV_UF(m_shutdown_ack, SCTP_MIN_OVERHEAD);
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(m_shutdown_ack);
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_SHUTDOWN_ACK;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	chk->send_size = sizeof(struct sctp_chunkhdr);
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = m_shutdown_ack;
 	chk->whoTo = net;
 	if (chk->whoTo) {
 		atomic_add_int(&chk->whoTo->ref_count, 1);
 	}
 	ack_cp = mtod(m_shutdown_ack, struct sctp_shutdown_ack_chunk *);
 	ack_cp->ch.chunk_type = SCTP_SHUTDOWN_ACK;
 	ack_cp->ch.chunk_flags = 0;
 	ack_cp->ch.chunk_length = htons(chk->send_size);
 	SCTP_BUF_LEN(m_shutdown_ack) = chk->send_size;
 	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 	return;
 }
 
 void
 sctp_send_shutdown(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	/* formulate and queue a SHUTDOWN to the sender */
 	struct mbuf *m_shutdown;
 	struct sctp_shutdown_chunk *shutdown_cp;
 	struct sctp_tmit_chunk *chk;
 
 	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
 		if (chk->rec.chunk_id.id == SCTP_SHUTDOWN) {
 			/* We already have a SHUTDOWN queued. Reuse it. */
 			if (chk->whoTo) {
 				sctp_free_remote_addr(chk->whoTo);
 				chk->whoTo = NULL;
 			}
 			break;
 		}
 	}
 	if (chk == NULL) {
 		m_shutdown = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_chunk), 0, M_NOWAIT, 1, MT_HEADER);
 		if (m_shutdown == NULL) {
 			/* no mbuf's */
 			return;
 		}
 		SCTP_BUF_RESV_UF(m_shutdown, SCTP_MIN_OVERHEAD);
 		sctp_alloc_a_chunk(stcb, chk);
 		if (chk == NULL) {
 			/* no memory */
 			sctp_m_freem(m_shutdown);
 			return;
 		}
 		chk->copy_by_ref = 0;
 		chk->rec.chunk_id.id = SCTP_SHUTDOWN;
 		chk->rec.chunk_id.can_take_data = 1;
 		chk->flags = 0;
 		chk->send_size = sizeof(struct sctp_shutdown_chunk);
 		chk->sent = SCTP_DATAGRAM_UNSENT;
 		chk->snd_count = 0;
 		chk->asoc = &stcb->asoc;
 		chk->data = m_shutdown;
 		chk->whoTo = net;
 		if (chk->whoTo) {
 			atomic_add_int(&chk->whoTo->ref_count, 1);
 		}
 		shutdown_cp = mtod(m_shutdown, struct sctp_shutdown_chunk *);
 		shutdown_cp->ch.chunk_type = SCTP_SHUTDOWN;
 		shutdown_cp->ch.chunk_flags = 0;
 		shutdown_cp->ch.chunk_length = htons(chk->send_size);
 		shutdown_cp->cumulative_tsn_ack = htonl(stcb->asoc.cumulative_tsn);
 		SCTP_BUF_LEN(m_shutdown) = chk->send_size;
 		TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
 		chk->asoc->ctrl_queue_cnt++;
 	} else {
 		TAILQ_REMOVE(&stcb->asoc.control_send_queue, chk, sctp_next);
 		chk->whoTo = net;
 		if (chk->whoTo) {
 			atomic_add_int(&chk->whoTo->ref_count, 1);
 		}
 		shutdown_cp = mtod(chk->data, struct sctp_shutdown_chunk *);
 		shutdown_cp->cumulative_tsn_ack = htonl(stcb->asoc.cumulative_tsn);
 		TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
 	}
 	return;
 }
 
 void
 sctp_send_asconf(struct sctp_tcb *stcb, struct sctp_nets *net, int addr_locked)
 {
 	/*
 	 * formulate and queue an ASCONF to the peer. ASCONF parameters
 	 * should be queued on the assoc queue.
 	 */
 	struct sctp_tmit_chunk *chk;
 	struct mbuf *m_asconf;
 	int len;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 
 	if ((!TAILQ_EMPTY(&stcb->asoc.asconf_send_queue)) &&
 	    (!sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_MULTIPLE_ASCONFS))) {
 		/* can't send a new one if there is one in flight already */
 		return;
 	}
 
 	/* compose an ASCONF chunk, maximum length is PMTU */
 	m_asconf = sctp_compose_asconf(stcb, &len, addr_locked);
 	if (m_asconf == NULL) {
 		return;
 	}
 
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
 		sctp_m_freem(m_asconf);
 		return;
 	}
 
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_ASCONF;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
 	chk->data = m_asconf;
 	chk->send_size = len;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->asoc = &stcb->asoc;
 	chk->whoTo = net;
 	if (chk->whoTo) {
 		atomic_add_int(&chk->whoTo->ref_count, 1);
 	}
 	TAILQ_INSERT_TAIL(&chk->asoc->asconf_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 	return;
 }
 
 void
 sctp_send_asconf_ack(struct sctp_tcb *stcb)
 {
 	/*
 	 * formulate and queue a asconf-ack back to sender. the asconf-ack
 	 * must be stored in the tcb.
 	 */
 	struct sctp_tmit_chunk *chk;
 	struct sctp_asconf_ack *ack, *latest_ack;
 	struct mbuf *m_ack;
 	struct sctp_nets *net = NULL;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	/* Get the latest ASCONF-ACK */
 	latest_ack = TAILQ_LAST(&stcb->asoc.asconf_ack_sent, sctp_asconf_ackhead);
 	if (latest_ack == NULL) {
 		return;
 	}
 	if (latest_ack->last_sent_to != NULL &&
 	    latest_ack->last_sent_to == stcb->asoc.last_control_chunk_from) {
 		/* we're doing a retransmission */
 		net = sctp_find_alternate_net(stcb, stcb->asoc.last_control_chunk_from, 0);
 		if (net == NULL) {
 			/* no alternate */
 			if (stcb->asoc.last_control_chunk_from == NULL) {
 				if (stcb->asoc.alternate) {
 					net = stcb->asoc.alternate;
 				} else {
 					net = stcb->asoc.primary_destination;
 				}
 			} else {
 				net = stcb->asoc.last_control_chunk_from;
 			}
 		}
 	} else {
 		/* normal case */
 		if (stcb->asoc.last_control_chunk_from == NULL) {
 			if (stcb->asoc.alternate) {
 				net = stcb->asoc.alternate;
 			} else {
 				net = stcb->asoc.primary_destination;
 			}
 		} else {
 			net = stcb->asoc.last_control_chunk_from;
 		}
 	}
 	latest_ack->last_sent_to = net;
 
 	TAILQ_FOREACH(ack, &stcb->asoc.asconf_ack_sent, next) {
 		if (ack->data == NULL) {
 			continue;
 		}
 
 		/* copy the asconf_ack */
 		m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_NOWAIT);
 		if (m_ack == NULL) {
 			/* couldn't copy it */
 			return;
 		}
 #ifdef SCTP_MBUF_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 			sctp_log_mbc(m_ack, SCTP_MBUF_ICOPY);
 		}
 #endif
 
 		sctp_alloc_a_chunk(stcb, chk);
 		if (chk == NULL) {
 			/* no memory */
 			if (m_ack)
 				sctp_m_freem(m_ack);
 			return;
 		}
 		chk->copy_by_ref = 0;
 		chk->rec.chunk_id.id = SCTP_ASCONF_ACK;
 		chk->rec.chunk_id.can_take_data = 1;
 		chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
 		chk->whoTo = net;
 		if (chk->whoTo) {
 			atomic_add_int(&chk->whoTo->ref_count, 1);
 		}
 		chk->data = m_ack;
 		chk->send_size = ack->len;
 		chk->sent = SCTP_DATAGRAM_UNSENT;
 		chk->snd_count = 0;
 		chk->asoc = &stcb->asoc;
 
 		TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
 		chk->asoc->ctrl_queue_cnt++;
 	}
 	return;
 }
 
 static int
 sctp_chunk_retransmission(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int *cnt_out, struct timeval *now, int *now_filled, int *fr_done, int so_locked)
 {
 	/*-
 	 * send out one MTU of retransmission. If fast_retransmit is
 	 * happening we ignore the cwnd. Otherwise we obey the cwnd and
 	 * rwnd. For a Cookie or Asconf in the control chunk queue we
 	 * retransmit them by themselves.
 	 *
 	 * For data chunks we will pick out the lowest TSN's in the sent_queue
 	 * marked for resend and bundle them all together (up to a MTU of
 	 * destination). The address to send to should have been
 	 * selected/changed where the retransmission was marked (i.e. in FR
 	 * or t3-timeout routines).
 	 */
 	struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING];
 	struct sctp_tmit_chunk *chk, *fwd;
 	struct mbuf *m, *endofchain;
 	struct sctp_nets *net = NULL;
 	uint32_t tsns_sent = 0;
 	int no_fragmentflg, bundle_at, cnt_thru;
 	unsigned int mtu;
 	int error, i, one_chunk, fwd_tsn, ctl_cnt, tmr_started;
 	struct sctp_auth_chunk *auth = NULL;
 	uint32_t auth_offset = 0;
 	uint16_t auth_keyid;
 	int override_ok = 1;
 	int data_auth_reqd = 0;
 	uint32_t dmtu = 0;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	tmr_started = ctl_cnt = 0;
 	no_fragmentflg = 1;
 	fwd_tsn = 0;
 	*cnt_out = 0;
 	fwd = NULL;
 	endofchain = m = NULL;
 	auth_keyid = stcb->asoc.authinfo.active_keyid;
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_audit_log(0xC3, 1);
 #endif
 	if ((TAILQ_EMPTY(&asoc->sent_queue)) &&
 	    (TAILQ_EMPTY(&asoc->control_send_queue))) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT1, "SCTP hits empty queue with cnt set to %d?\n",
 		    asoc->sent_queue_retran_cnt);
 		asoc->sent_queue_cnt = 0;
 		asoc->sent_queue_cnt_removeable = 0;
 		/* send back 0/0 so we enter normal transmission */
 		*cnt_out = 0;
 		return (0);
 	}
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if ((chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) ||
 		    (chk->rec.chunk_id.id == SCTP_STREAM_RESET) ||
 		    (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN)) {
 			if (chk->sent != SCTP_DATAGRAM_RESEND) {
 				continue;
 			}
 			if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) {
 				if (chk != asoc->str_reset) {
 					/*
 					 * not eligible for retran if its
 					 * not ours
 					 */
 					continue;
 				}
 			}
 			ctl_cnt++;
 			if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) {
 				fwd_tsn = 1;
 			}
 			/*
 			 * Add an AUTH chunk, if chunk requires it save the
 			 * offset into the chain for AUTH
 			 */
 			if ((auth == NULL) &&
 			    (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
 			    stcb->asoc.peer_auth_chunks))) {
 				m = sctp_add_auth_chunk(m, &endofchain,
 				    &auth, &auth_offset,
 				    stcb,
 				    chk->rec.chunk_id.id);
 				SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 			}
 			m = sctp_copy_mbufchain(chk->data, m, &endofchain, 0, chk->send_size, chk->copy_by_ref);
 			break;
 		}
 	}
 	one_chunk = 0;
 	cnt_thru = 0;
 	/* do we have control chunks to retransmit? */
 	if (m != NULL) {
 		/* Start a timer no matter if we succeed or fail */
 		if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
 			sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, chk->whoTo);
 		} else if (chk->rec.chunk_id.id == SCTP_ASCONF)
 			sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, chk->whoTo);
 		chk->snd_count++;	/* update our count */
 		if ((error = sctp_lowlevel_chunk_output(inp, stcb, chk->whoTo,
 		    (struct sockaddr *)&chk->whoTo->ro._l_addr, m,
 		    auth_offset, auth, stcb->asoc.authinfo.active_keyid,
 		    no_fragmentflg, 0, 0,
 		    inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
 		    chk->whoTo->port, NULL,
 		    0, 0,
 		    so_locked))) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
 			if (error == ENOBUFS) {
 				asoc->ifp_had_enobuf = 1;
 				SCTP_STAT_INCR(sctps_lowlevelerr);
 			}
 			return (error);
 		} else {
 			asoc->ifp_had_enobuf = 0;
 		}
 		endofchain = NULL;
 		auth = NULL;
 		auth_offset = 0;
 		/*
 		 * We don't want to mark the net->sent time here since this
 		 * we use this for HB and retrans cannot measure RTT
 		 */
 		/* (void)SCTP_GETTIME_TIMEVAL(&chk->whoTo->last_sent_time); */
 		*cnt_out += 1;
 		chk->sent = SCTP_DATAGRAM_SENT;
 		sctp_ucount_decr(stcb->asoc.sent_queue_retran_cnt);
 		if (fwd_tsn == 0) {
 			return (0);
 		} else {
 			/* Clean up the fwd-tsn list */
 			sctp_clean_up_ctl(stcb, asoc, so_locked);
 			return (0);
 		}
 	}
 	/*
 	 * Ok, it is just data retransmission we need to do or that and a
 	 * fwd-tsn with it all.
 	 */
 	if (TAILQ_EMPTY(&asoc->sent_queue)) {
 		return (SCTP_RETRAN_DONE);
 	}
 	if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT)) {
 		/* not yet open, resend the cookie and that is it */
 		return (1);
 	}
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_auditing(20, inp, stcb, NULL);
 #endif
 	data_auth_reqd = sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks);
 	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
 		if (chk->sent != SCTP_DATAGRAM_RESEND) {
 			/* No, not sent to this net or not ready for rtx */
 			continue;
 		}
 		if (chk->data == NULL) {
 			SCTP_PRINTF("TSN:%x chk->snd_count:%d chk->sent:%d can't retran - no data\n",
 			    chk->rec.data.tsn, chk->snd_count, chk->sent);
 			continue;
 		}
 		if ((SCTP_BASE_SYSCTL(sctp_max_retran_chunk)) &&
 		    (chk->snd_count >= SCTP_BASE_SYSCTL(sctp_max_retran_chunk))) {
 			struct mbuf *op_err;
 			char msg[SCTP_DIAG_INFO_LEN];
 
 			SCTP_SNPRINTF(msg, sizeof(msg), "TSN %8.8x retransmitted %d times, giving up",
 			    chk->rec.data.tsn, chk->snd_count);
 			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 			    msg);
 			atomic_add_int(&stcb->asoc.refcnt, 1);
 			sctp_abort_an_association(stcb->sctp_ep, stcb, op_err,
 			    false, so_locked);
 			SCTP_TCB_LOCK(stcb);
 			atomic_subtract_int(&stcb->asoc.refcnt, 1);
 			return (SCTP_RETRAN_EXIT);
 		}
 		/* pick up the net */
 		net = chk->whoTo;
 		switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 		case AF_INET:
 			mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			mtu = net->mtu - SCTP_MIN_OVERHEAD;
 			break;
 #endif
 		default:
 			/* TSNH */
 			mtu = net->mtu;
 			break;
 		}
 
 		if ((asoc->peers_rwnd < mtu) && (asoc->total_flight > 0)) {
 			/* No room in peers rwnd */
 			uint32_t tsn;
 
 			tsn = asoc->last_acked_seq + 1;
 			if (tsn == chk->rec.data.tsn) {
 				/*
 				 * we make a special exception for this
 				 * case. The peer has no rwnd but is missing
 				 * the lowest chunk.. which is probably what
 				 * is holding up the rwnd.
 				 */
 				goto one_chunk_around;
 			}
 			return (1);
 		}
 one_chunk_around:
 		if (asoc->peers_rwnd < mtu) {
 			one_chunk = 1;
 			if ((asoc->peers_rwnd == 0) &&
 			    (asoc->total_flight == 0)) {
 				chk->window_probe = 1;
 				chk->whoTo->window_probe = 1;
 			}
 		}
 #ifdef SCTP_AUDITING_ENABLED
 		sctp_audit_log(0xC3, 2);
 #endif
 		bundle_at = 0;
 		m = NULL;
 		net->fast_retran_ip = 0;
 		if (chk->rec.data.doing_fast_retransmit == 0) {
 			/*
 			 * if no FR in progress skip destination that have
 			 * flight_size > cwnd.
 			 */
 			if (net->flight_size >= net->cwnd) {
 				continue;
 			}
 		} else {
 			/*
 			 * Mark the destination net to have FR recovery
 			 * limits put on it.
 			 */
 			*fr_done = 1;
 			net->fast_retran_ip = 1;
 		}
 
 		/*
 		 * if no AUTH is yet included and this chunk requires it,
 		 * make sure to account for it.  We don't apply the size
 		 * until the AUTH chunk is actually added below in case
 		 * there is no room for this chunk.
 		 */
 		if (data_auth_reqd && (auth == NULL)) {
 			dmtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
 		} else
 			dmtu = 0;
 
 		if ((chk->send_size <= (mtu - dmtu)) ||
 		    (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
 			/* ok we will add this one */
 			if (data_auth_reqd) {
 				if (auth == NULL) {
 					m = sctp_add_auth_chunk(m,
 					    &endofchain,
 					    &auth,
 					    &auth_offset,
 					    stcb,
 					    SCTP_DATA);
 					auth_keyid = chk->auth_keyid;
 					override_ok = 0;
 					SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 				} else if (override_ok) {
 					auth_keyid = chk->auth_keyid;
 					override_ok = 0;
 				} else if (chk->auth_keyid != auth_keyid) {
 					/* different keyid, so done bundling */
 					break;
 				}
 			}
 			m = sctp_copy_mbufchain(chk->data, m, &endofchain, 0, chk->send_size, chk->copy_by_ref);
 			if (m == NULL) {
 				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 				return (ENOMEM);
 			}
 			/* Do clear IP_DF ? */
 			if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
 				no_fragmentflg = 0;
 			}
 			/* update our MTU size */
 			if (mtu > (chk->send_size + dmtu))
 				mtu -= (chk->send_size + dmtu);
 			else
 				mtu = 0;
 			data_list[bundle_at++] = chk;
 			if (one_chunk && (asoc->total_flight <= 0)) {
 				SCTP_STAT_INCR(sctps_windowprobed);
 			}
 		}
 		if (one_chunk == 0) {
 			/*
 			 * now are there anymore forward from chk to pick
 			 * up?
 			 */
 			for (fwd = TAILQ_NEXT(chk, sctp_next); fwd != NULL; fwd = TAILQ_NEXT(fwd, sctp_next)) {
 				if (fwd->sent != SCTP_DATAGRAM_RESEND) {
 					/* Nope, not for retran */
 					continue;
 				}
 				if (fwd->whoTo != net) {
 					/* Nope, not the net in question */
 					continue;
 				}
 				if (data_auth_reqd && (auth == NULL)) {
 					dmtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
 				} else
 					dmtu = 0;
 				if (fwd->send_size <= (mtu - dmtu)) {
 					if (data_auth_reqd) {
 						if (auth == NULL) {
 							m = sctp_add_auth_chunk(m,
 							    &endofchain,
 							    &auth,
 							    &auth_offset,
 							    stcb,
 							    SCTP_DATA);
 							auth_keyid = fwd->auth_keyid;
 							override_ok = 0;
 							SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 						} else if (override_ok) {
 							auth_keyid = fwd->auth_keyid;
 							override_ok = 0;
 						} else if (fwd->auth_keyid != auth_keyid) {
 							/*
 							 * different keyid,
 							 * so done bundling
 							 */
 							break;
 						}
 					}
 					m = sctp_copy_mbufchain(fwd->data, m, &endofchain, 0, fwd->send_size, fwd->copy_by_ref);
 					if (m == NULL) {
 						SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 						return (ENOMEM);
 					}
 					/* Do clear IP_DF ? */
 					if (fwd->flags & CHUNK_FLAGS_FRAGMENT_OK) {
 						no_fragmentflg = 0;
 					}
 					/* update our MTU size */
 					if (mtu > (fwd->send_size + dmtu))
 						mtu -= (fwd->send_size + dmtu);
 					else
 						mtu = 0;
 					data_list[bundle_at++] = fwd;
 					if (bundle_at >= SCTP_MAX_DATA_BUNDLING) {
 						break;
 					}
 				} else {
 					/* can't fit so we are done */
 					break;
 				}
 			}
 		}
 		/* Is there something to send for this destination? */
 		if (m) {
 			/*
 			 * No matter if we fail/or succeed we should start a
 			 * timer. A failure is like a lost IP packet :-)
 			 */
 			if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
 				/*
 				 * no timer running on this destination
 				 * restart it.
 				 */
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
 				tmr_started = 1;
 			}
 			/* Now lets send it, if there is anything to send :> */
 			if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
 			    (struct sockaddr *)&net->ro._l_addr, m,
 			    auth_offset, auth, auth_keyid,
 			    no_fragmentflg, 0, 0,
 			    inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
 			    net->port, NULL,
 			    0, 0,
 			    so_locked))) {
 				/* error, we could not output */
 				SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
 				if (error == ENOBUFS) {
 					asoc->ifp_had_enobuf = 1;
 					SCTP_STAT_INCR(sctps_lowlevelerr);
 				}
 				return (error);
 			} else {
 				asoc->ifp_had_enobuf = 0;
 			}
 			endofchain = NULL;
 			auth = NULL;
 			auth_offset = 0;
 			/* For HB's */
 			/*
 			 * We don't want to mark the net->sent time here
 			 * since this we use this for HB and retrans cannot
 			 * measure RTT
 			 */
 			/* (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time); */
 
 			/* For auto-close */
 			cnt_thru++;
 			if (*now_filled == 0) {
 				(void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent);
 				*now = asoc->time_last_sent;
 				*now_filled = 1;
 			} else {
 				asoc->time_last_sent = *now;
 			}
 			*cnt_out += bundle_at;
 #ifdef SCTP_AUDITING_ENABLED
 			sctp_audit_log(0xC4, bundle_at);
 #endif
 			if (bundle_at) {
 				tsns_sent = data_list[0]->rec.data.tsn;
 			}
 			for (i = 0; i < bundle_at; i++) {
 				SCTP_STAT_INCR(sctps_sendretransdata);
 				data_list[i]->sent = SCTP_DATAGRAM_SENT;
 				/*
 				 * When we have a revoked data, and we
 				 * retransmit it, then we clear the revoked
 				 * flag since this flag dictates if we
 				 * subtracted from the fs
 				 */
 				if (data_list[i]->rec.data.chunk_was_revoked) {
 					/* Deflate the cwnd */
 					data_list[i]->whoTo->cwnd -= data_list[i]->book_size;
 					data_list[i]->rec.data.chunk_was_revoked = 0;
 				}
 				data_list[i]->snd_count++;
 				sctp_ucount_decr(asoc->sent_queue_retran_cnt);
 				/* record the time */
 				data_list[i]->sent_rcv_time = asoc->time_last_sent;
 				if (data_list[i]->book_size_scale) {
 					/*
 					 * need to double the book size on
 					 * this one
 					 */
 					data_list[i]->book_size_scale = 0;
 					/*
 					 * Since we double the booksize, we
 					 * must also double the output queue
 					 * size, since this get shrunk when
 					 * we free by this amount.
 					 */
 					atomic_add_int(&((asoc)->total_output_queue_size), data_list[i]->book_size);
 					data_list[i]->book_size *= 2;
 				} else {
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) {
 						sctp_log_rwnd(SCTP_DECREASE_PEER_RWND,
 						    asoc->peers_rwnd, data_list[i]->send_size, SCTP_BASE_SYSCTL(sctp_peer_chunk_oh));
 					}
 					asoc->peers_rwnd = sctp_sbspace_sub(asoc->peers_rwnd,
 					    (uint32_t)(data_list[i]->send_size +
 					    SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)));
 				}
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
 					sctp_misc_ints(SCTP_FLIGHT_LOG_UP_RSND,
 					    data_list[i]->whoTo->flight_size,
 					    data_list[i]->book_size,
 					    (uint32_t)(uintptr_t)data_list[i]->whoTo,
 					    data_list[i]->rec.data.tsn);
 				}
 				sctp_flight_size_increase(data_list[i]);
 				sctp_total_flight_increase(stcb, data_list[i]);
 				if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
 					/* SWS sender side engages */
 					asoc->peers_rwnd = 0;
 				}
 				if ((i == 0) &&
 				    (data_list[i]->rec.data.doing_fast_retransmit)) {
 					SCTP_STAT_INCR(sctps_sendfastretrans);
 					if ((data_list[i] == TAILQ_FIRST(&asoc->sent_queue)) &&
 					    (tmr_started == 0)) {
 						/*-
 						 * ok we just fast-retrans'd
 						 * the lowest TSN, i.e the
 						 * first on the list. In
 						 * this case we want to give
 						 * some more time to get a
 						 * SACK back without a
 						 * t3-expiring.
 						 */
 						sctp_timer_stop(SCTP_TIMER_TYPE_SEND, inp, stcb, net,
 						    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_2);
 						sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
 					}
 				}
 			}
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, tsns_sent, SCTP_CWND_LOG_FROM_RESEND);
 			}
 #ifdef SCTP_AUDITING_ENABLED
 			sctp_auditing(21, inp, stcb, NULL);
 #endif
 		} else {
 			/* None will fit */
 			return (1);
 		}
 		if (asoc->sent_queue_retran_cnt <= 0) {
 			/* all done we have no more to retran */
 			asoc->sent_queue_retran_cnt = 0;
 			break;
 		}
 		if (one_chunk) {
 			/* No more room in rwnd */
 			return (1);
 		}
 		/* stop the for loop here. we sent out a packet */
 		break;
 	}
 	return (0);
 }
 
 static void
 sctp_timer_validation(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_association *asoc)
 {
 	struct sctp_nets *net;
 
 	/* Validate that a timer is running somewhere */
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
 			/* Here is a timer */
 			return;
 		}
 	}
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	/* Gak, we did not have a timer somewhere */
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "Deadlock avoided starting timer on a dest at retran\n");
 	if (asoc->alternate) {
 		sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, asoc->alternate);
 	} else {
 		sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, asoc->primary_destination);
 	}
 	return;
 }
 
 void
 sctp_chunk_output(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     int from_where,
     int so_locked)
 {
 	/*-
 	 * Ok this is the generic chunk service queue. we must do the
 	 * following:
 	 * - See if there are retransmits pending, if so we must
 	 *   do these first.
 	 * - Service the stream queue that is next, moving any
 	 *   message (note I must get a complete message i.e.
 	 *   FIRST/MIDDLE and LAST to the out queue in one pass) and assigning
 	 *   TSN's
 	 * - Check to see if the cwnd/rwnd allows any output, if so we
 	 *   go ahead and formulate and send the low level chunks. Making sure
 	 *   to combine any control in the control chunk queue also.
 	 */
 	struct sctp_association *asoc;
 	struct sctp_nets *net;
 	int error = 0, num_out, tot_out = 0, ret = 0, reason_code;
 	unsigned int burst_cnt = 0;
 	struct timeval now;
 	int now_filled = 0;
 	int nagle_on;
 	uint32_t frag_point = sctp_get_frag_point(stcb);
 	int un_sent = 0;
 	int fr_done;
 	unsigned int tot_frs = 0;
 
 	asoc = &stcb->asoc;
 do_it_again:
 	/* The Nagle algorithm is only applied when handling a send call. */
 	if (from_where == SCTP_OUTPUT_FROM_USR_SEND) {
 		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY)) {
 			nagle_on = 0;
 		} else {
 			nagle_on = 1;
 		}
 	} else {
 		nagle_on = 0;
 	}
 	SCTP_TCB_LOCK_ASSERT(stcb);
 
 	un_sent = (stcb->asoc.total_output_queue_size - stcb->asoc.total_flight);
 
 	if ((un_sent <= 0) &&
 	    (TAILQ_EMPTY(&asoc->control_send_queue)) &&
 	    (TAILQ_EMPTY(&asoc->asconf_send_queue)) &&
 	    (asoc->sent_queue_retran_cnt == 0) &&
 	    (asoc->trigger_reset == 0)) {
 		/* Nothing to do unless there is something to be sent left */
 		return;
 	}
 	/*
 	 * Do we have something to send, data or control AND a sack timer
 	 * running, if so piggy-back the sack.
 	 */
 	if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
 		sctp_send_sack(stcb, so_locked);
 		sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL,
 		    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_3);
 	}
 	while (asoc->sent_queue_retran_cnt) {
 		/*-
 		 * Ok, it is retransmission time only, we send out only ONE
 		 * packet with a single call off to the retran code.
 		 */
 		if (from_where == SCTP_OUTPUT_FROM_COOKIE_ACK) {
 			/*-
 			 * Special hook for handling cookies discarded
 			 * by peer that carried data. Send cookie-ack only
 			 * and then the next call with get the retran's.
 			 */
 			(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1,
 			    from_where,
 			    &now, &now_filled, frag_point, so_locked);
 			return;
 		} else if (from_where != SCTP_OUTPUT_FROM_HB_TMR) {
 			/* if its not from a HB then do it */
 			fr_done = 0;
 			ret = sctp_chunk_retransmission(inp, stcb, asoc, &num_out, &now, &now_filled, &fr_done, so_locked);
 			if (fr_done) {
 				tot_frs++;
 			}
 		} else {
 			/*
 			 * its from any other place, we don't allow retran
 			 * output (only control)
 			 */
 			ret = 1;
 		}
 		if (ret > 0) {
 			/* Can't send anymore */
 			/*-
 			 * now lets push out control by calling med-level
 			 * output once. this assures that we WILL send HB's
 			 * if queued too.
 			 */
 			(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1,
 			    from_where,
 			    &now, &now_filled, frag_point, so_locked);
 #ifdef SCTP_AUDITING_ENABLED
 			sctp_auditing(8, inp, stcb, NULL);
 #endif
 			sctp_timer_validation(inp, stcb, asoc);
 			return;
 		}
 		if (ret < 0) {
 			/*-
 			 * The count was off.. retran is not happening so do
 			 * the normal retransmission.
 			 */
 #ifdef SCTP_AUDITING_ENABLED
 			sctp_auditing(9, inp, stcb, NULL);
 #endif
 			if (ret == SCTP_RETRAN_EXIT) {
 				return;
 			}
 			break;
 		}
 		if (from_where == SCTP_OUTPUT_FROM_T3) {
 			/* Only one transmission allowed out of a timeout */
 #ifdef SCTP_AUDITING_ENABLED
 			sctp_auditing(10, inp, stcb, NULL);
 #endif
 			/* Push out any control */
 			(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1, from_where,
 			    &now, &now_filled, frag_point, so_locked);
 			return;
 		}
 		if ((asoc->fr_max_burst > 0) && (tot_frs >= asoc->fr_max_burst)) {
 			/* Hit FR burst limit */
 			return;
 		}
 		if ((num_out == 0) && (ret == 0)) {
 			/* No more retrans to send */
 			break;
 		}
 	}
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_auditing(12, inp, stcb, NULL);
 #endif
 	/* Check for bad destinations, if they exist move chunks around. */
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		if (!(net->dest_state & SCTP_ADDR_REACHABLE)) {
 			/*-
 			 * if possible move things off of this address we
 			 * still may send below due to the dormant state but
 			 * we try to find an alternate address to send to
 			 * and if we have one we move all queued data on the
 			 * out wheel to this alternate address.
 			 */
 			if (net->ref_count > 1)
 				sctp_move_chunks_from_net(stcb, net);
 		} else {
 			/*-
 			 * if ((asoc->sat_network) || (net->addr_is_local))
 			 * { burst_limit = asoc->max_burst *
 			 * SCTP_SAT_NETWORK_BURST_INCR; }
 			 */
 			if (asoc->max_burst > 0) {
 				if (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst)) {
 					if ((net->flight_size + (asoc->max_burst * net->mtu)) < net->cwnd) {
 						/*
 						 * JRS - Use the congestion
 						 * control given in the
 						 * congestion control module
 						 */
 						asoc->cc_functions.sctp_cwnd_update_after_output(stcb, net, asoc->max_burst);
 						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) {
 							sctp_log_maxburst(stcb, net, 0, asoc->max_burst, SCTP_MAX_BURST_APPLIED);
 						}
 						SCTP_STAT_INCR(sctps_maxburstqueued);
 					}
 					net->fast_retran_ip = 0;
 				} else {
 					if (net->flight_size == 0) {
 						/*
 						 * Should be decaying the
 						 * cwnd here
 						 */
 						;
 					}
 				}
 			}
 		}
 	}
 	burst_cnt = 0;
 	do {
 		error = sctp_med_chunk_output(inp, stcb, asoc, &num_out,
 		    &reason_code, 0, from_where,
 		    &now, &now_filled, frag_point, so_locked);
 		if (error) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "Error %d was returned from med-c-op\n", error);
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) {
 				sctp_log_maxburst(stcb, asoc->primary_destination, error, burst_cnt, SCTP_MAX_BURST_ERROR_STOP);
 			}
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, NULL, error, SCTP_SEND_NOW_COMPLETES);
 				sctp_log_cwnd(stcb, NULL, 0xdeadbeef, SCTP_SEND_NOW_COMPLETES);
 			}
 			break;
 		}
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "m-c-o put out %d\n", num_out);
 
 		tot_out += num_out;
 		burst_cnt++;
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 			sctp_log_cwnd(stcb, NULL, num_out, SCTP_SEND_NOW_COMPLETES);
 			if (num_out == 0) {
 				sctp_log_cwnd(stcb, NULL, reason_code, SCTP_SEND_NOW_COMPLETES);
 			}
 		}
 		if (nagle_on) {
 			/*
 			 * When the Nagle algorithm is used, look at how
 			 * much is unsent, then if its smaller than an MTU
 			 * and we have data in flight we stop, except if we
 			 * are handling a fragmented user message.
 			 */
 			un_sent = stcb->asoc.total_output_queue_size - stcb->asoc.total_flight;
 			if ((un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD)) &&
 			    (stcb->asoc.total_flight > 0)) {
 /*	&&		     sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {*/
 				break;
 			}
 		}
 		if (TAILQ_EMPTY(&asoc->control_send_queue) &&
 		    TAILQ_EMPTY(&asoc->send_queue) &&
 		    sctp_is_there_unsent_data(stcb, so_locked) == 0) {
 			/* Nothing left to send */
 			break;
 		}
 		if ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) <= 0) {
 			/* Nothing left to send */
 			break;
 		}
 	} while (num_out &&
 	    ((asoc->max_burst == 0) ||
 	    SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) ||
 	    (burst_cnt < asoc->max_burst)));
 
 	if (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) == 0) {
 		if ((asoc->max_burst > 0) && (burst_cnt >= asoc->max_burst)) {
 			SCTP_STAT_INCR(sctps_maxburstqueued);
 			asoc->burst_limit_applied = 1;
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) {
 				sctp_log_maxburst(stcb, asoc->primary_destination, 0, burst_cnt, SCTP_MAX_BURST_APPLIED);
 			}
 		} else {
 			asoc->burst_limit_applied = 0;
 		}
 	}
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 		sctp_log_cwnd(stcb, NULL, tot_out, SCTP_SEND_NOW_COMPLETES);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, we have put out %d chunks\n",
 	    tot_out);
 
 	/*-
 	 * Now we need to clean up the control chunk chain if a ECNE is on
 	 * it. It must be marked as UNSENT again so next call will continue
 	 * to send it until such time that we get a CWR, to remove it.
 	 */
 	if (stcb->asoc.ecn_echo_cnt_onq)
 		sctp_fix_ecn_echo(asoc);
 
 	if (stcb->asoc.trigger_reset) {
 		if (sctp_send_stream_reset_out_if_possible(stcb, so_locked) == 0) {
 			goto do_it_again;
 		}
 	}
 	return;
 }
 
 int
 sctp_output(
     struct sctp_inpcb *inp,
     struct mbuf *m,
     struct sockaddr *addr,
     struct mbuf *control,
     struct thread *p,
     int flags)
 {
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		return (EINVAL);
 	}
 
 	if (inp->sctp_socket == NULL) {
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		return (EINVAL);
 	}
 	return (sctp_sosend(inp->sctp_socket,
 	    addr,
 	    (struct uio *)NULL,
 	    m,
 	    control,
 	    flags, p
 	    ));
 }
 
 void
 send_forward_tsn(struct sctp_tcb *stcb,
     struct sctp_association *asoc)
 {
 	struct sctp_tmit_chunk *chk, *at, *tp1, *last;
 	struct sctp_forward_tsn_chunk *fwdtsn;
 	struct sctp_strseq *strseq;
 	struct sctp_strseq_mid *strseq_m;
 	uint32_t advance_peer_ack_point;
 	unsigned int cnt_of_space, i, ovh;
 	unsigned int space_needed;
 	unsigned int cnt_of_skipped = 0;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) {
 			/* mark it to unsent */
 			chk->sent = SCTP_DATAGRAM_UNSENT;
 			chk->snd_count = 0;
 			/* Do we correct its output location? */
 			if (chk->whoTo) {
 				sctp_free_remote_addr(chk->whoTo);
 				chk->whoTo = NULL;
 			}
 			goto sctp_fill_in_rest;
 		}
 	}
 	/* Ok if we reach here we must build one */
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		return;
 	}
 	asoc->fwd_tsn_cnt++;
 	chk->copy_by_ref = 0;
 	/*
 	 * We don't do the old thing here since this is used not for on-wire
 	 * but to tell if we are sending a fwd-tsn by the stack during
 	 * output. And if its a IFORWARD or a FORWARD it is a fwd-tsn.
 	 */
 	chk->rec.chunk_id.id = SCTP_FORWARD_CUM_TSN;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = 0;
 	chk->asoc = asoc;
 	chk->whoTo = NULL;
 	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return;
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	TAILQ_INSERT_TAIL(&asoc->control_send_queue, chk, sctp_next);
 	asoc->ctrl_queue_cnt++;
 sctp_fill_in_rest:
 	/*-
 	 * Here we go through and fill out the part that deals with
 	 * stream/seq of the ones we skip.
 	 */
 	SCTP_BUF_LEN(chk->data) = 0;
 	TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) {
 		if ((at->sent != SCTP_FORWARD_TSN_SKIP) &&
 		    (at->sent != SCTP_DATAGRAM_NR_ACKED)) {
 			/* no more to look at */
 			break;
 		}
 		if (!asoc->idata_supported && (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED)) {
 			/* We don't report these */
 			continue;
 		}
 		cnt_of_skipped++;
 	}
 	if (asoc->idata_supported) {
 		space_needed = (sizeof(struct sctp_forward_tsn_chunk) +
 		    (cnt_of_skipped * sizeof(struct sctp_strseq_mid)));
 	} else {
 		space_needed = (sizeof(struct sctp_forward_tsn_chunk) +
 		    (cnt_of_skipped * sizeof(struct sctp_strseq)));
 	}
 	cnt_of_space = (unsigned int)M_TRAILINGSPACE(chk->data);
 
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 		ovh = SCTP_MIN_OVERHEAD;
 	} else {
 		ovh = SCTP_MIN_V4_OVERHEAD;
 	}
 	if (cnt_of_space > (asoc->smallest_mtu - ovh)) {
 		/* trim to a mtu size */
 		cnt_of_space = asoc->smallest_mtu - ovh;
 	}
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
 		sctp_misc_ints(SCTP_FWD_TSN_CHECK,
 		    0xff, 0, cnt_of_skipped,
 		    asoc->advanced_peer_ack_point);
 	}
 	advance_peer_ack_point = asoc->advanced_peer_ack_point;
 	if (cnt_of_space < space_needed) {
 		/*-
 		 * ok we must trim down the chunk by lowering the
 		 * advance peer ack point.
 		 */
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
 			sctp_misc_ints(SCTP_FWD_TSN_CHECK,
 			    0xff, 0xff, cnt_of_space,
 			    space_needed);
 		}
 		cnt_of_skipped = cnt_of_space - sizeof(struct sctp_forward_tsn_chunk);
 		if (asoc->idata_supported) {
 			cnt_of_skipped /= sizeof(struct sctp_strseq_mid);
 		} else {
 			cnt_of_skipped /= sizeof(struct sctp_strseq);
 		}
 		/*-
 		 * Go through and find the TSN that will be the one
 		 * we report.
 		 */
 		at = TAILQ_FIRST(&asoc->sent_queue);
 		if (at != NULL) {
 			for (i = 0; i < cnt_of_skipped; i++) {
 				tp1 = TAILQ_NEXT(at, sctp_next);
 				if (tp1 == NULL) {
 					break;
 				}
 				at = tp1;
 			}
 		}
 		if (at && SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
 			sctp_misc_ints(SCTP_FWD_TSN_CHECK,
 			    0xff, cnt_of_skipped, at->rec.data.tsn,
 			    asoc->advanced_peer_ack_point);
 		}
 		last = at;
 		/*-
 		 * last now points to last one I can report, update
 		 * peer ack point
 		 */
 		if (last) {
 			advance_peer_ack_point = last->rec.data.tsn;
 		}
 		if (asoc->idata_supported) {
 			space_needed = sizeof(struct sctp_forward_tsn_chunk) +
 			    cnt_of_skipped * sizeof(struct sctp_strseq_mid);
 		} else {
 			space_needed = sizeof(struct sctp_forward_tsn_chunk) +
 			    cnt_of_skipped * sizeof(struct sctp_strseq);
 		}
 	}
 	chk->send_size = space_needed;
 	/* Setup the chunk */
 	fwdtsn = mtod(chk->data, struct sctp_forward_tsn_chunk *);
 	fwdtsn->ch.chunk_length = htons(chk->send_size);
 	fwdtsn->ch.chunk_flags = 0;
 	if (asoc->idata_supported) {
 		fwdtsn->ch.chunk_type = SCTP_IFORWARD_CUM_TSN;
 	} else {
 		fwdtsn->ch.chunk_type = SCTP_FORWARD_CUM_TSN;
 	}
 	fwdtsn->new_cumulative_tsn = htonl(advance_peer_ack_point);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	fwdtsn++;
 	/*-
 	 * Move pointer to after the fwdtsn and transfer to the
 	 * strseq pointer.
 	 */
 	if (asoc->idata_supported) {
 		strseq_m = (struct sctp_strseq_mid *)fwdtsn;
 		strseq = NULL;
 	} else {
 		strseq = (struct sctp_strseq *)fwdtsn;
 		strseq_m = NULL;
 	}
 	/*-
 	 * Now populate the strseq list. This is done blindly
 	 * without pulling out duplicate stream info. This is
 	 * inefficient but won't harm the process since the peer will
 	 * look at these in sequence and will thus release anything.
 	 * It could mean we exceed the PMTU and chop off some that
 	 * we could have included.. but this is unlikely (aka 1432/4
 	 * would mean 300+ stream seq's would have to be reported in
 	 * one FWD-TSN. With a bit of work we can later FIX this to
 	 * optimize and pull out duplicates.. but it does add more
 	 * overhead. So for now... not!
 	 */
 	i = 0;
 	TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) {
 		if (i >= cnt_of_skipped) {
 			break;
 		}
 		if (!asoc->idata_supported && (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED)) {
 			/* We don't report these */
 			continue;
 		}
 		if (at->rec.data.tsn == advance_peer_ack_point) {
 			at->rec.data.fwd_tsn_cnt = 0;
 		}
 		if (asoc->idata_supported) {
 			strseq_m->sid = htons(at->rec.data.sid);
 			if (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED) {
 				strseq_m->flags = htons(PR_SCTP_UNORDERED_FLAG);
 			} else {
 				strseq_m->flags = 0;
 			}
 			strseq_m->mid = htonl(at->rec.data.mid);
 			strseq_m++;
 		} else {
 			strseq->sid = htons(at->rec.data.sid);
 			strseq->ssn = htons((uint16_t)at->rec.data.mid);
 			strseq++;
 		}
 		i++;
 	}
 	return;
 }
 
 void
 sctp_send_sack(struct sctp_tcb *stcb, int so_locked)
 {
 	/*-
 	 * Queue up a SACK or NR-SACK in the control queue.
 	 * We must first check to see if a SACK or NR-SACK is
 	 * somehow on the control queue.
 	 * If so, we will take and and remove the old one.
 	 */
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk, *a_chk;
 	struct sctp_sack_chunk *sack;
 	struct sctp_nr_sack_chunk *nr_sack;
 	struct sctp_gap_ack_block *gap_descriptor;
 	const struct sack_track *selector;
 	int mergeable = 0;
 	int offset;
 	caddr_t limit;
 	uint32_t *dup;
 	int limit_reached = 0;
 	unsigned int i, siz, j;
 	unsigned int num_gap_blocks = 0, num_nr_gap_blocks = 0, space;
 	int num_dups = 0;
 	int space_req;
 	uint32_t highest_tsn;
 	uint8_t flags;
 	uint8_t type;
 	uint8_t tsn_map;
 
 	if (stcb->asoc.nrsack_supported == 1) {
 		type = SCTP_NR_SELECTIVE_ACK;
 	} else {
 		type = SCTP_SELECTIVE_ACK;
 	}
 	a_chk = NULL;
 	asoc = &stcb->asoc;
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if (asoc->last_data_chunk_from == NULL) {
 		/* Hmm we never received anything */
 		return;
 	}
 	sctp_slide_mapping_arrays(stcb);
 	sctp_set_rwnd(stcb, asoc);
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if (chk->rec.chunk_id.id == type) {
 			/* Hmm, found a sack already on queue, remove it */
 			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
 			asoc->ctrl_queue_cnt--;
 			a_chk = chk;
 			if (a_chk->data) {
 				sctp_m_freem(a_chk->data);
 				a_chk->data = NULL;
 			}
 			if (a_chk->whoTo) {
 				sctp_free_remote_addr(a_chk->whoTo);
 				a_chk->whoTo = NULL;
 			}
 			break;
 		}
 	}
 	if (a_chk == NULL) {
 		sctp_alloc_a_chunk(stcb, a_chk);
 		if (a_chk == NULL) {
 			/* No memory so we drop the idea, and set a timer */
 			if (stcb->asoc.delayed_ack) {
 				sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
 				    stcb->sctp_ep, stcb, NULL,
 				    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_4);
 				sctp_timer_start(SCTP_TIMER_TYPE_RECV,
 				    stcb->sctp_ep, stcb, NULL);
 			} else {
 				stcb->asoc.send_sack = 1;
 			}
 			return;
 		}
 		a_chk->copy_by_ref = 0;
 		a_chk->rec.chunk_id.id = type;
 		a_chk->rec.chunk_id.can_take_data = 1;
 	}
 	/* Clear our pkt counts */
 	asoc->data_pkts_seen = 0;
 
 	a_chk->flags = 0;
 	a_chk->asoc = asoc;
 	a_chk->snd_count = 0;
 	a_chk->send_size = 0;	/* fill in later */
 	a_chk->sent = SCTP_DATAGRAM_UNSENT;
 	a_chk->whoTo = NULL;
 
 	if (!(asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE)) {
 		/*-
 		 * Ok, the destination for the SACK is unreachable, lets see if
 		 * we can select an alternate to asoc->last_data_chunk_from
 		 */
 		a_chk->whoTo = sctp_find_alternate_net(stcb, asoc->last_data_chunk_from, 0);
 		if (a_chk->whoTo == NULL) {
 			/* Nope, no alternate */
 			a_chk->whoTo = asoc->last_data_chunk_from;
 		}
 	} else {
 		a_chk->whoTo = asoc->last_data_chunk_from;
 	}
 	if (a_chk->whoTo) {
 		atomic_add_int(&a_chk->whoTo->ref_count, 1);
 	}
 	if (SCTP_TSN_GT(asoc->highest_tsn_inside_map, asoc->highest_tsn_inside_nr_map)) {
 		highest_tsn = asoc->highest_tsn_inside_map;
 	} else {
 		highest_tsn = asoc->highest_tsn_inside_nr_map;
 	}
 	if (highest_tsn == asoc->cumulative_tsn) {
 		/* no gaps */
 		if (type == SCTP_SELECTIVE_ACK) {
 			space_req = sizeof(struct sctp_sack_chunk);
 		} else {
 			space_req = sizeof(struct sctp_nr_sack_chunk);
 		}
 	} else {
 		/* gaps get a cluster */
 		space_req = MCLBYTES;
 	}
 	/* Ok now lets formulate a MBUF with our sack */
 	a_chk->data = sctp_get_mbuf_for_msg(space_req, 0, M_NOWAIT, 1, MT_DATA);
 	if ((a_chk->data == NULL) ||
 	    (a_chk->whoTo == NULL)) {
 		/* rats, no mbuf memory */
 		if (a_chk->data) {
 			/* was a problem with the destination */
 			sctp_m_freem(a_chk->data);
 			a_chk->data = NULL;
 		}
 		sctp_free_a_chunk(stcb, a_chk, so_locked);
 		/* sa_ignore NO_NULL_CHK */
 		if (stcb->asoc.delayed_ack) {
 			sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
 			    stcb->sctp_ep, stcb, NULL,
 			    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5);
 			sctp_timer_start(SCTP_TIMER_TYPE_RECV,
 			    stcb->sctp_ep, stcb, NULL);
 		} else {
 			stcb->asoc.send_sack = 1;
 		}
 		return;
 	}
 	/* ok, lets go through and fill it in */
 	SCTP_BUF_RESV_UF(a_chk->data, SCTP_MIN_OVERHEAD);
 	space = (unsigned int)M_TRAILINGSPACE(a_chk->data);
 	if (space > (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD)) {
 		space = (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD);
 	}
 	limit = mtod(a_chk->data, caddr_t);
 	limit += space;
 
 	flags = 0;
 
 	if ((asoc->sctp_cmt_on_off > 0) &&
 	    SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) {
 		/*-
 		 * CMT DAC algorithm: If 2 (i.e., 0x10) packets have been
 		 * received, then set high bit to 1, else 0. Reset
 		 * pkts_rcvd.
 		 */
 		flags |= (asoc->cmt_dac_pkts_rcvd << 6);
 		asoc->cmt_dac_pkts_rcvd = 0;
 	}
 #ifdef SCTP_ASOCLOG_OF_TSNS
 	stcb->asoc.cumack_logsnt[stcb->asoc.cumack_log_atsnt] = asoc->cumulative_tsn;
 	stcb->asoc.cumack_log_atsnt++;
 	if (stcb->asoc.cumack_log_atsnt >= SCTP_TSN_LOG_SIZE) {
 		stcb->asoc.cumack_log_atsnt = 0;
 	}
 #endif
 	/* reset the readers interpretation */
 	stcb->freed_by_sorcv_sincelast = 0;
 
 	if (type == SCTP_SELECTIVE_ACK) {
 		sack = mtod(a_chk->data, struct sctp_sack_chunk *);
 		nr_sack = NULL;
 		gap_descriptor = (struct sctp_gap_ack_block *)((caddr_t)sack + sizeof(struct sctp_sack_chunk));
 		if (highest_tsn > asoc->mapping_array_base_tsn) {
 			siz = (((highest_tsn - asoc->mapping_array_base_tsn) + 1) + 7) / 8;
 		} else {
 			siz = (((MAX_TSN - asoc->mapping_array_base_tsn) + 1) + highest_tsn + 7) / 8;
 		}
 	} else {
 		sack = NULL;
 		nr_sack = mtod(a_chk->data, struct sctp_nr_sack_chunk *);
 		gap_descriptor = (struct sctp_gap_ack_block *)((caddr_t)nr_sack + sizeof(struct sctp_nr_sack_chunk));
 		if (asoc->highest_tsn_inside_map > asoc->mapping_array_base_tsn) {
 			siz = (((asoc->highest_tsn_inside_map - asoc->mapping_array_base_tsn) + 1) + 7) / 8;
 		} else {
 			siz = (((MAX_TSN - asoc->mapping_array_base_tsn) + 1) + asoc->highest_tsn_inside_map + 7) / 8;
 		}
 	}
 
 	if (SCTP_TSN_GT(asoc->mapping_array_base_tsn, asoc->cumulative_tsn)) {
 		offset = 1;
 	} else {
 		offset = asoc->mapping_array_base_tsn - asoc->cumulative_tsn;
 	}
 	if (((type == SCTP_SELECTIVE_ACK) &&
 	    SCTP_TSN_GT(highest_tsn, asoc->cumulative_tsn)) ||
 	    ((type == SCTP_NR_SELECTIVE_ACK) &&
 	    SCTP_TSN_GT(asoc->highest_tsn_inside_map, asoc->cumulative_tsn))) {
 		/* we have a gap .. maybe */
 		for (i = 0; i < siz; i++) {
 			tsn_map = asoc->mapping_array[i];
 			if (type == SCTP_SELECTIVE_ACK) {
 				tsn_map |= asoc->nr_mapping_array[i];
 			}
 			if (i == 0) {
 				/*
 				 * Clear all bits corresponding to TSNs
 				 * smaller or equal to the cumulative TSN.
 				 */
 				tsn_map &= (~0U << (1 - offset));
 			}
 			selector = &sack_array[tsn_map];
 			if (mergeable && selector->right_edge) {
 				/*
 				 * Backup, left and right edges were ok to
 				 * merge.
 				 */
 				num_gap_blocks--;
 				gap_descriptor--;
 			}
 			if (selector->num_entries == 0)
 				mergeable = 0;
 			else {
 				for (j = 0; j < selector->num_entries; j++) {
 					if (mergeable && selector->right_edge) {
 						/*
 						 * do a merge by NOT setting
 						 * the left side
 						 */
 						mergeable = 0;
 					} else {
 						/*
 						 * no merge, set the left
 						 * side
 						 */
 						mergeable = 0;
 						gap_descriptor->start = htons((selector->gaps[j].start + offset));
 					}
 					gap_descriptor->end = htons((selector->gaps[j].end + offset));
 					num_gap_blocks++;
 					gap_descriptor++;
 					if (((caddr_t)gap_descriptor + sizeof(struct sctp_gap_ack_block)) > limit) {
 						/* no more room */
 						limit_reached = 1;
 						break;
 					}
 				}
 				if (selector->left_edge) {
 					mergeable = 1;
 				}
 			}
 			if (limit_reached) {
 				/* Reached the limit stop */
 				break;
 			}
 			offset += 8;
 		}
 	}
 	if ((type == SCTP_NR_SELECTIVE_ACK) &&
 	    (limit_reached == 0)) {
 		mergeable = 0;
 
 		if (asoc->highest_tsn_inside_nr_map > asoc->mapping_array_base_tsn) {
 			siz = (((asoc->highest_tsn_inside_nr_map - asoc->mapping_array_base_tsn) + 1) + 7) / 8;
 		} else {
 			siz = (((MAX_TSN - asoc->mapping_array_base_tsn) + 1) + asoc->highest_tsn_inside_nr_map + 7) / 8;
 		}
 
 		if (SCTP_TSN_GT(asoc->mapping_array_base_tsn, asoc->cumulative_tsn)) {
 			offset = 1;
 		} else {
 			offset = asoc->mapping_array_base_tsn - asoc->cumulative_tsn;
 		}
 		if (SCTP_TSN_GT(asoc->highest_tsn_inside_nr_map, asoc->cumulative_tsn)) {
 			/* we have a gap .. maybe */
 			for (i = 0; i < siz; i++) {
 				tsn_map = asoc->nr_mapping_array[i];
 				if (i == 0) {
 					/*
 					 * Clear all bits corresponding to
 					 * TSNs smaller or equal to the
 					 * cumulative TSN.
 					 */
 					tsn_map &= (~0U << (1 - offset));
 				}
 				selector = &sack_array[tsn_map];
 				if (mergeable && selector->right_edge) {
 					/*
 					 * Backup, left and right edges were
 					 * ok to merge.
 					 */
 					num_nr_gap_blocks--;
 					gap_descriptor--;
 				}
 				if (selector->num_entries == 0)
 					mergeable = 0;
 				else {
 					for (j = 0; j < selector->num_entries; j++) {
 						if (mergeable && selector->right_edge) {
 							/*
 							 * do a merge by NOT
 							 * setting the left
 							 * side
 							 */
 							mergeable = 0;
 						} else {
 							/*
 							 * no merge, set the
 							 * left side
 							 */
 							mergeable = 0;
 							gap_descriptor->start = htons((selector->gaps[j].start + offset));
 						}
 						gap_descriptor->end = htons((selector->gaps[j].end + offset));
 						num_nr_gap_blocks++;
 						gap_descriptor++;
 						if (((caddr_t)gap_descriptor + sizeof(struct sctp_gap_ack_block)) > limit) {
 							/* no more room */
 							limit_reached = 1;
 							break;
 						}
 					}
 					if (selector->left_edge) {
 						mergeable = 1;
 					}
 				}
 				if (limit_reached) {
 					/* Reached the limit stop */
 					break;
 				}
 				offset += 8;
 			}
 		}
 	}
 	/* now we must add any dups we are going to report. */
 	if ((limit_reached == 0) && (asoc->numduptsns)) {
 		dup = (uint32_t *)gap_descriptor;
 		for (i = 0; i < asoc->numduptsns; i++) {
 			*dup = htonl(asoc->dup_tsns[i]);
 			dup++;
 			num_dups++;
 			if (((caddr_t)dup + sizeof(uint32_t)) > limit) {
 				/* no more room */
 				break;
 			}
 		}
 		asoc->numduptsns = 0;
 	}
 	/*
 	 * now that the chunk is prepared queue it to the control chunk
 	 * queue.
 	 */
 	if (type == SCTP_SELECTIVE_ACK) {
 		a_chk->send_size = (uint16_t)(sizeof(struct sctp_sack_chunk) +
 		    (num_gap_blocks + num_nr_gap_blocks) * sizeof(struct sctp_gap_ack_block) +
 		    num_dups * sizeof(int32_t));
 		SCTP_BUF_LEN(a_chk->data) = a_chk->send_size;
 		sack->sack.cum_tsn_ack = htonl(asoc->cumulative_tsn);
 		sack->sack.a_rwnd = htonl(asoc->my_rwnd);
 		sack->sack.num_gap_ack_blks = htons(num_gap_blocks);
 		sack->sack.num_dup_tsns = htons(num_dups);
 		sack->ch.chunk_type = type;
 		sack->ch.chunk_flags = flags;
 		sack->ch.chunk_length = htons(a_chk->send_size);
 	} else {
 		a_chk->send_size = (uint16_t)(sizeof(struct sctp_nr_sack_chunk) +
 		    (num_gap_blocks + num_nr_gap_blocks) * sizeof(struct sctp_gap_ack_block) +
 		    num_dups * sizeof(int32_t));
 		SCTP_BUF_LEN(a_chk->data) = a_chk->send_size;
 		nr_sack->nr_sack.cum_tsn_ack = htonl(asoc->cumulative_tsn);
 		nr_sack->nr_sack.a_rwnd = htonl(asoc->my_rwnd);
 		nr_sack->nr_sack.num_gap_ack_blks = htons(num_gap_blocks);
 		nr_sack->nr_sack.num_nr_gap_ack_blks = htons(num_nr_gap_blocks);
 		nr_sack->nr_sack.num_dup_tsns = htons(num_dups);
 		nr_sack->nr_sack.reserved = 0;
 		nr_sack->ch.chunk_type = type;
 		nr_sack->ch.chunk_flags = flags;
 		nr_sack->ch.chunk_length = htons(a_chk->send_size);
 	}
 	TAILQ_INSERT_TAIL(&asoc->control_send_queue, a_chk, sctp_next);
 	asoc->my_last_reported_rwnd = asoc->my_rwnd;
 	asoc->ctrl_queue_cnt++;
 	asoc->send_sack = 0;
 	SCTP_STAT_INCR(sctps_sendsacks);
 	return;
 }
 
 void
 sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked)
 {
 	struct mbuf *m_abort, *m, *m_last;
 	struct mbuf *m_out, *m_end = NULL;
 	struct sctp_abort_chunk *abort;
 	struct sctp_auth_chunk *auth = NULL;
 	struct sctp_nets *net;
 	uint32_t vtag;
 	uint32_t auth_offset = 0;
 	int error;
 	uint16_t cause_len, chunk_len, padding_len;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	/*-
 	 * Add an AUTH chunk, if chunk requires it and save the offset into
 	 * the chain for AUTH
 	 */
 	if (sctp_auth_is_required_chunk(SCTP_ABORT_ASSOCIATION,
 	    stcb->asoc.peer_auth_chunks)) {
 		m_out = sctp_add_auth_chunk(NULL, &m_end, &auth, &auth_offset,
 		    stcb, SCTP_ABORT_ASSOCIATION);
 		SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 	} else {
 		m_out = NULL;
 	}
 	m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_abort == NULL) {
 		if (m_out) {
 			sctp_m_freem(m_out);
 		}
 		if (operr) {
 			sctp_m_freem(operr);
 		}
 		return;
 	}
 	/* link in any error */
 	SCTP_BUF_NEXT(m_abort) = operr;
 	cause_len = 0;
 	m_last = NULL;
 	for (m = operr; m; m = SCTP_BUF_NEXT(m)) {
 		cause_len += (uint16_t)SCTP_BUF_LEN(m);
 		if (SCTP_BUF_NEXT(m) == NULL) {
 			m_last = m;
 		}
 	}
 	SCTP_BUF_LEN(m_abort) = sizeof(struct sctp_abort_chunk);
 	chunk_len = (uint16_t)sizeof(struct sctp_abort_chunk) + cause_len;
 	padding_len = SCTP_SIZE32(chunk_len) - chunk_len;
 	if (m_out == NULL) {
 		/* NO Auth chunk prepended, so reserve space in front */
 		SCTP_BUF_RESV_UF(m_abort, SCTP_MIN_OVERHEAD);
 		m_out = m_abort;
 	} else {
 		/* Put AUTH chunk at the front of the chain */
 		SCTP_BUF_NEXT(m_end) = m_abort;
 	}
 	if (stcb->asoc.alternate) {
 		net = stcb->asoc.alternate;
 	} else {
 		net = stcb->asoc.primary_destination;
 	}
 	/* Fill in the ABORT chunk header. */
 	abort = mtod(m_abort, struct sctp_abort_chunk *);
 	abort->ch.chunk_type = SCTP_ABORT_ASSOCIATION;
 	if (stcb->asoc.peer_vtag == 0) {
 		/* This happens iff the assoc is in COOKIE-WAIT state. */
 		vtag = stcb->asoc.my_vtag;
 		abort->ch.chunk_flags = SCTP_HAD_NO_TCB;
 	} else {
 		vtag = stcb->asoc.peer_vtag;
 		abort->ch.chunk_flags = 0;
 	}
 	abort->ch.chunk_length = htons(chunk_len);
 	/* Add padding, if necessary. */
 	if (padding_len > 0) {
 		if ((m_last == NULL) ||
 		    (sctp_add_pad_tombuf(m_last, padding_len) == NULL)) {
 			sctp_m_freem(m_out);
 			return;
 		}
 	}
 	if ((error = sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net,
 	    (struct sockaddr *)&net->ro._l_addr,
 	    m_out, auth_offset, auth, stcb->asoc.authinfo.active_keyid, 1, 0, 0,
 	    stcb->sctp_ep->sctp_lport, stcb->rport, htonl(vtag),
 	    stcb->asoc.primary_destination->port, NULL,
 	    0, 0,
 	    so_locked))) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
 		if (error == ENOBUFS) {
 			stcb->asoc.ifp_had_enobuf = 1;
 			SCTP_STAT_INCR(sctps_lowlevelerr);
 		}
 	} else {
 		stcb->asoc.ifp_had_enobuf = 0;
 	}
 	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 }
 
 void
 sctp_send_shutdown_complete(struct sctp_tcb *stcb,
     struct sctp_nets *net,
     int reflect_vtag)
 {
 	/* formulate and SEND a SHUTDOWN-COMPLETE */
 	struct mbuf *m_shutdown_comp;
 	struct sctp_shutdown_complete_chunk *shutdown_complete;
 	uint32_t vtag;
 	int error;
 	uint8_t flags;
 
 	m_shutdown_comp = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_shutdown_comp == NULL) {
 		/* no mbuf's */
 		return;
 	}
 	if (reflect_vtag) {
 		flags = SCTP_HAD_NO_TCB;
 		vtag = stcb->asoc.my_vtag;
 	} else {
 		flags = 0;
 		vtag = stcb->asoc.peer_vtag;
 	}
 	shutdown_complete = mtod(m_shutdown_comp, struct sctp_shutdown_complete_chunk *);
 	shutdown_complete->ch.chunk_type = SCTP_SHUTDOWN_COMPLETE;
 	shutdown_complete->ch.chunk_flags = flags;
 	shutdown_complete->ch.chunk_length = htons(sizeof(struct sctp_shutdown_complete_chunk));
 	SCTP_BUF_LEN(m_shutdown_comp) = sizeof(struct sctp_shutdown_complete_chunk);
 	if ((error = sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net,
 	    (struct sockaddr *)&net->ro._l_addr,
 	    m_shutdown_comp, 0, NULL, 0, 1, 0, 0,
 	    stcb->sctp_ep->sctp_lport, stcb->rport,
 	    htonl(vtag),
 	    net->port, NULL,
 	    0, 0,
 	    SCTP_SO_NOT_LOCKED))) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
 		if (error == ENOBUFS) {
 			stcb->asoc.ifp_had_enobuf = 1;
 			SCTP_STAT_INCR(sctps_lowlevelerr);
 		}
 	} else {
 		stcb->asoc.ifp_had_enobuf = 0;
 	}
 	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 	return;
 }
 
 static void
 sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, uint32_t vtag,
     uint8_t type, struct mbuf *cause,
     uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
 	struct mbuf *o_pak;
 	struct mbuf *mout;
 	struct sctphdr *shout;
 	struct sctp_chunkhdr *ch;
 #if defined(INET) || defined(INET6)
 	struct udphdr *udp;
 #endif
 	int ret, len, cause_len, padding_len;
 #ifdef INET
 	struct sockaddr_in *src_sin, *dst_sin;
 	struct ip *ip;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *src_sin6, *dst_sin6;
 	struct ip6_hdr *ip6;
 #endif
 
 	/* Compute the length of the cause and add final padding. */
 	cause_len = 0;
 	if (cause != NULL) {
 		struct mbuf *m_at, *m_last = NULL;
 
 		for (m_at = cause; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
 			if (SCTP_BUF_NEXT(m_at) == NULL)
 				m_last = m_at;
 			cause_len += SCTP_BUF_LEN(m_at);
 		}
 		padding_len = cause_len % 4;
 		if (padding_len != 0) {
 			padding_len = 4 - padding_len;
 		}
 		if (padding_len != 0) {
 			if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
 				sctp_m_freem(cause);
 				return;
 			}
 		}
 	} else {
 		padding_len = 0;
 	}
 	/* Get an mbuf for the header. */
 	len = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		len += sizeof(struct ip);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		len += sizeof(struct ip6_hdr);
 		break;
 #endif
 	default:
 		break;
 	}
 #if defined(INET) || defined(INET6)
 	if (port) {
 		len += sizeof(struct udphdr);
 	}
 #endif
 	mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_NOWAIT, 1, MT_DATA);
 	if (mout == NULL) {
 		if (cause) {
 			sctp_m_freem(cause);
 		}
 		return;
 	}
 	SCTP_BUF_RESV_UF(mout, max_linkhdr);
 	SCTP_BUF_LEN(mout) = len;
 	SCTP_BUF_NEXT(mout) = cause;
 	M_SETFIB(mout, fibnum);
 	mout->m_pkthdr.flowid = mflowid;
 	M_HASHTYPE_SET(mout, mflowtype);
 #ifdef INET
 	ip = NULL;
 #endif
 #ifdef INET6
 	ip6 = NULL;
 #endif
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		src_sin = (struct sockaddr_in *)src;
 		dst_sin = (struct sockaddr_in *)dst;
 		ip = mtod(mout, struct ip *);
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = (sizeof(struct ip) >> 2);
 		ip->ip_tos = 0;
 		ip->ip_off = htons(IP_DF);
 		ip_fillid(ip);
 		ip->ip_ttl = MODULE_GLOBAL(ip_defttl);
 		if (port) {
 			ip->ip_p = IPPROTO_UDP;
 		} else {
 			ip->ip_p = IPPROTO_SCTP;
 		}
 		ip->ip_src.s_addr = dst_sin->sin_addr.s_addr;
 		ip->ip_dst.s_addr = src_sin->sin_addr.s_addr;
 		ip->ip_sum = 0;
 		len = sizeof(struct ip);
 		shout = (struct sctphdr *)((caddr_t)ip + len);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		src_sin6 = (struct sockaddr_in6 *)src;
 		dst_sin6 = (struct sockaddr_in6 *)dst;
 		ip6 = mtod(mout, struct ip6_hdr *);
 		ip6->ip6_flow = htonl(0x60000000);
 		if (V_ip6_auto_flowlabel) {
 			ip6->ip6_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
 		}
 		ip6->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
 		if (port) {
 			ip6->ip6_nxt = IPPROTO_UDP;
 		} else {
 			ip6->ip6_nxt = IPPROTO_SCTP;
 		}
 		ip6->ip6_src = dst_sin6->sin6_addr;
 		ip6->ip6_dst = src_sin6->sin6_addr;
 		len = sizeof(struct ip6_hdr);
 		shout = (struct sctphdr *)((caddr_t)ip6 + len);
 		break;
 #endif
 	default:
 		len = 0;
 		shout = mtod(mout, struct sctphdr *);
 		break;
 	}
 #if defined(INET) || defined(INET6)
 	if (port) {
 		if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) {
 			sctp_m_freem(mout);
 			return;
 		}
 		udp = (struct udphdr *)shout;
 		udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
 		udp->uh_dport = port;
 		udp->uh_sum = 0;
 		udp->uh_ulen = htons((uint16_t)(sizeof(struct udphdr) +
 		    sizeof(struct sctphdr) +
 		    sizeof(struct sctp_chunkhdr) +
 		    cause_len + padding_len));
 		len += sizeof(struct udphdr);
 		shout = (struct sctphdr *)((caddr_t)shout + sizeof(struct udphdr));
 	} else {
 		udp = NULL;
 	}
 #endif
 	shout->src_port = sh->dest_port;
 	shout->dest_port = sh->src_port;
 	shout->checksum = 0;
 	if (vtag) {
 		shout->v_tag = htonl(vtag);
 	} else {
 		shout->v_tag = sh->v_tag;
 	}
 	len += sizeof(struct sctphdr);
 	ch = (struct sctp_chunkhdr *)((caddr_t)shout + sizeof(struct sctphdr));
 	ch->chunk_type = type;
 	if (vtag) {
 		ch->chunk_flags = 0;
 	} else {
 		ch->chunk_flags = SCTP_HAD_NO_TCB;
 	}
 	ch->chunk_length = htons((uint16_t)(sizeof(struct sctp_chunkhdr) + cause_len));
 	len += sizeof(struct sctp_chunkhdr);
 	len += cause_len + padding_len;
 
 	if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
 		sctp_m_freem(mout);
 		return;
 	}
 	SCTP_ATTACH_CHAIN(o_pak, mout, len);
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if (port) {
 			if (V_udp_cksum) {
 				udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
 			} else {
 				udp->uh_sum = 0;
 			}
 		}
 		ip->ip_len = htons(len);
 		if (port) {
 			shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip) + sizeof(struct udphdr));
 			SCTP_STAT_INCR(sctps_sendswcrc);
 			if (V_udp_cksum) {
 				SCTP_ENABLE_UDP_CSUM(o_pak);
 			}
 		} else {
 			mout->m_pkthdr.csum_flags = CSUM_SCTP;
 			mout->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
 			SCTP_STAT_INCR(sctps_sendhwcrc);
 		}
 #ifdef SCTP_PACKET_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
 			sctp_packet_log(o_pak);
 		}
 #endif
 		SCTP_PROBE5(send, NULL, NULL, ip, NULL, shout);
 		SCTP_IP_OUTPUT(ret, o_pak, NULL, NULL, vrf_id);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		ip6->ip6_plen = htons((uint16_t)(len - sizeof(struct ip6_hdr)));
 		if (port) {
 			shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
 			SCTP_STAT_INCR(sctps_sendswcrc);
 			if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), len - sizeof(struct ip6_hdr))) == 0) {
 				udp->uh_sum = 0xffff;
 			}
 		} else {
 			mout->m_pkthdr.csum_flags = CSUM_SCTP_IPV6;
 			mout->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
 			SCTP_STAT_INCR(sctps_sendhwcrc);
 		}
 #ifdef SCTP_PACKET_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
 			sctp_packet_log(o_pak);
 		}
 #endif
 		SCTP_PROBE5(send, NULL, NULL, ip6, NULL, shout);
 		SCTP_IP6_OUTPUT(ret, o_pak, NULL, NULL, NULL, vrf_id);
 		break;
 #endif
 	default:
 		SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n",
 		    dst->sa_family);
 		sctp_m_freem(mout);
 		SCTP_LTRACE_ERR_RET_PKT(mout, NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
 		return;
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret);
 	if (port) {
 		UDPSTAT_INC(udps_opackets);
 	}
 	SCTP_STAT_INCR(sctps_sendpackets);
 	SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
 	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 	if (ret) {
 		SCTP_STAT_INCR(sctps_senderrors);
 	}
 	return;
 }
 
 void
 sctp_send_shutdown_complete2(struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh,
     uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
 	sctp_send_resp_msg(src, dst, sh, 0, SCTP_SHUTDOWN_COMPLETE, NULL,
 	    mflowtype, mflowid, fibnum,
 	    vrf_id, port);
 }
 
 void
 sctp_send_hb(struct sctp_tcb *stcb, struct sctp_nets *net, int so_locked)
 {
 	struct sctp_tmit_chunk *chk;
 	struct sctp_heartbeat_chunk *hb;
 	struct timeval now;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if (net == NULL) {
 		return;
 	}
 	(void)SCTP_GETTIME_TIMEVAL(&now);
 	switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		break;
 #endif
 	default:
 		return;
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak, can't get a chunk for hb\n");
 		return;
 	}
 
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_HEARTBEAT_REQUEST;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->send_size = sizeof(struct sctp_heartbeat_chunk);
 
 	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, so_locked);
 		return;
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->whoTo = net;
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 	/* Now we have a mbuf that we can fill in with the details */
 	hb = mtod(chk->data, struct sctp_heartbeat_chunk *);
 	memset(hb, 0, sizeof(struct sctp_heartbeat_chunk));
 	/* fill out chunk header */
 	hb->ch.chunk_type = SCTP_HEARTBEAT_REQUEST;
 	hb->ch.chunk_flags = 0;
 	hb->ch.chunk_length = htons(chk->send_size);
 	/* Fill out hb parameter */
 	hb->heartbeat.hb_info.ph.param_type = htons(SCTP_HEARTBEAT_INFO);
 	hb->heartbeat.hb_info.ph.param_length = htons(sizeof(struct sctp_heartbeat_info_param));
 	hb->heartbeat.hb_info.time_value_1 = (uint32_t)now.tv_sec;
 	hb->heartbeat.hb_info.time_value_2 = now.tv_usec;
 	/* Did our user request this one, put it in */
 	hb->heartbeat.hb_info.addr_family = (uint8_t)net->ro._l_addr.sa.sa_family;
 	hb->heartbeat.hb_info.addr_len = net->ro._l_addr.sa.sa_len;
 	if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
 		/*
 		 * we only take from the entropy pool if the address is not
 		 * confirmed.
 		 */
 		net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
 		net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
 	} else {
 		net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = 0;
 		net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = 0;
 	}
 	switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		memcpy(hb->heartbeat.hb_info.address,
 		    &net->ro._l_addr.sin.sin_addr,
 		    sizeof(net->ro._l_addr.sin.sin_addr));
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		memcpy(hb->heartbeat.hb_info.address,
 		    &net->ro._l_addr.sin6.sin6_addr,
 		    sizeof(net->ro._l_addr.sin6.sin6_addr));
 		break;
 #endif
 	default:
 		if (chk->data) {
 			sctp_m_freem(chk->data);
 			chk->data = NULL;
 		}
 		sctp_free_a_chunk(stcb, chk, so_locked);
 		return;
 		break;
 	}
 	net->hb_responded = 0;
 	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
 	stcb->asoc.ctrl_queue_cnt++;
 	SCTP_STAT_INCR(sctps_sendheartbeat);
 	return;
 }
 
 void
 sctp_send_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
     uint32_t high_tsn)
 {
 	struct sctp_association *asoc;
 	struct sctp_ecne_chunk *ecne;
 	struct sctp_tmit_chunk *chk;
 
 	if (net == NULL) {
 		return;
 	}
 	asoc = &stcb->asoc;
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if ((chk->rec.chunk_id.id == SCTP_ECN_ECHO) && (net == chk->whoTo)) {
 			/* found a previous ECN_ECHO update it if needed */
 			uint32_t cnt, ctsn;
 
 			ecne = mtod(chk->data, struct sctp_ecne_chunk *);
 			ctsn = ntohl(ecne->tsn);
 			if (SCTP_TSN_GT(high_tsn, ctsn)) {
 				ecne->tsn = htonl(high_tsn);
 				SCTP_STAT_INCR(sctps_queue_upd_ecne);
 			}
 			cnt = ntohl(ecne->num_pkts_since_cwr);
 			cnt++;
 			ecne->num_pkts_since_cwr = htonl(cnt);
 			return;
 		}
 	}
 	/* nope could not find one to update so we must build one */
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		return;
 	}
 	SCTP_STAT_INCR(sctps_queue_upd_ecne);
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_ECN_ECHO;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->send_size = sizeof(struct sctp_ecne_chunk);
 	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return;
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->whoTo = net;
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 
 	stcb->asoc.ecn_echo_cnt_onq++;
 	ecne = mtod(chk->data, struct sctp_ecne_chunk *);
 	ecne->ch.chunk_type = SCTP_ECN_ECHO;
 	ecne->ch.chunk_flags = 0;
 	ecne->ch.chunk_length = htons(sizeof(struct sctp_ecne_chunk));
 	ecne->tsn = htonl(high_tsn);
 	ecne->num_pkts_since_cwr = htonl(1);
 	TAILQ_INSERT_HEAD(&stcb->asoc.control_send_queue, chk, sctp_next);
 	asoc->ctrl_queue_cnt++;
 }
 
 void
 sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net,
     struct mbuf *m, int len, int iphlen, int bad_crc)
 {
 	struct sctp_association *asoc;
 	struct sctp_pktdrop_chunk *drp;
 	struct sctp_tmit_chunk *chk;
 	uint8_t *datap;
 	int was_trunc = 0;
 	int fullsz = 0;
 	long spc;
 	int offset;
 	struct sctp_chunkhdr *ch, chunk_buf;
 	unsigned int chk_length;
 
 	if (!stcb) {
 		return;
 	}
 	asoc = &stcb->asoc;
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if (asoc->pktdrop_supported == 0) {
 		/*-
 		 * peer must declare support before I send one.
 		 */
 		return;
 	}
 	if (stcb->sctp_socket == NULL) {
 		return;
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_PACKET_DROPPED;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	len -= iphlen;
 	chk->send_size = len;
 	/* Validate that we do not have an ABORT in here. */
 	offset = iphlen + sizeof(struct sctphdr);
 	ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
 	    sizeof(*ch), (uint8_t *)&chunk_buf);
 	while (ch != NULL) {
 		chk_length = ntohs(ch->chunk_length);
 		if (chk_length < sizeof(*ch)) {
 			/* break to abort land */
 			break;
 		}
 		switch (ch->chunk_type) {
 		case SCTP_PACKET_DROPPED:
 		case SCTP_ABORT_ASSOCIATION:
 		case SCTP_INITIATION_ACK:
 			/**
 			 * We don't respond with an PKT-DROP to an ABORT
 			 * or PKT-DROP. We also do not respond to an
 			 * INIT-ACK, because we can't know if the initiation
 			 * tag is correct or not.
 			 */
 			sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 			return;
 		default:
 			break;
 		}
 		offset += SCTP_SIZE32(chk_length);
 		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
 		    sizeof(*ch), (uint8_t *)&chunk_buf);
 	}
 
 	if ((len + SCTP_MAX_OVERHEAD + sizeof(struct sctp_pktdrop_chunk)) >
 	    min(stcb->asoc.smallest_mtu, MCLBYTES)) {
 		/*
 		 * only send 1 mtu worth, trim off the excess on the end.
 		 */
 		fullsz = len;
 		len = min(stcb->asoc.smallest_mtu, MCLBYTES) - SCTP_MAX_OVERHEAD;
 		was_trunc = 1;
 	}
 	chk->asoc = &stcb->asoc;
 	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 jump_out:
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return;
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 	drp = mtod(chk->data, struct sctp_pktdrop_chunk *);
 	if (drp == NULL) {
 		sctp_m_freem(chk->data);
 		chk->data = NULL;
 		goto jump_out;
 	}
 	chk->book_size = SCTP_SIZE32((chk->send_size + sizeof(struct sctp_pktdrop_chunk) +
 	    sizeof(struct sctphdr) + SCTP_MED_OVERHEAD));
 	chk->book_size_scale = 0;
 	if (was_trunc) {
 		drp->ch.chunk_flags = SCTP_PACKET_TRUNCATED;
 		drp->trunc_len = htons(fullsz);
 		/*
 		 * Len is already adjusted to size minus overhead above take
 		 * out the pkt_drop chunk itself from it.
 		 */
 		chk->send_size = (uint16_t)(len - sizeof(struct sctp_pktdrop_chunk));
 		len = chk->send_size;
 	} else {
 		/* no truncation needed */
 		drp->ch.chunk_flags = 0;
 		drp->trunc_len = htons(0);
 	}
 	if (bad_crc) {
 		drp->ch.chunk_flags |= SCTP_BADCRC;
 	}
 	chk->send_size += sizeof(struct sctp_pktdrop_chunk);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	if (net) {
 		/* we should hit here */
 		chk->whoTo = net;
 		atomic_add_int(&chk->whoTo->ref_count, 1);
 	} else {
 		chk->whoTo = NULL;
 	}
 	drp->ch.chunk_type = SCTP_PACKET_DROPPED;
 	drp->ch.chunk_length = htons(chk->send_size);
 	spc = SCTP_SB_LIMIT_RCV(stcb->sctp_socket);
 	if (spc < 0) {
 		spc = 0;
 	}
 	drp->bottle_bw = htonl(spc);
 	if (asoc->my_rwnd) {
 		drp->current_onq = htonl(asoc->size_on_reasm_queue +
 		    asoc->size_on_all_streams +
 		    asoc->my_rwnd_control_len +
 		    stcb->sctp_socket->so_rcv.sb_cc);
 	} else {
 		/*-
 		 * If my rwnd is 0, possibly from mbuf depletion as well as
 		 * space used, tell the peer there is NO space aka onq == bw
 		 */
 		drp->current_onq = htonl(spc);
 	}
 	drp->reserved = 0;
 	datap = drp->data;
 	m_copydata(m, iphlen, len, (caddr_t)datap);
 	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
 	asoc->ctrl_queue_cnt++;
 }
 
 void
 sctp_send_cwr(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn, uint8_t override)
 {
 	struct sctp_association *asoc;
 	struct sctp_cwr_chunk *cwr;
 	struct sctp_tmit_chunk *chk;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if (net == NULL) {
 		return;
 	}
 	asoc = &stcb->asoc;
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if ((chk->rec.chunk_id.id == SCTP_ECN_CWR) && (net == chk->whoTo)) {
 			/*
 			 * found a previous CWR queued to same destination
 			 * update it if needed
 			 */
 			uint32_t ctsn;
 
 			cwr = mtod(chk->data, struct sctp_cwr_chunk *);
 			ctsn = ntohl(cwr->tsn);
 			if (SCTP_TSN_GT(high_tsn, ctsn)) {
 				cwr->tsn = htonl(high_tsn);
 			}
 			if (override & SCTP_CWR_REDUCE_OVERRIDE) {
 				/* Make sure override is carried */
 				cwr->ch.chunk_flags |= SCTP_CWR_REDUCE_OVERRIDE;
 			}
 			return;
 		}
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_ECN_CWR;
 	chk->rec.chunk_id.can_take_data = 1;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->send_size = sizeof(struct sctp_cwr_chunk);
 	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return;
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->whoTo = net;
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 	cwr = mtod(chk->data, struct sctp_cwr_chunk *);
 	cwr->ch.chunk_type = SCTP_ECN_CWR;
 	cwr->ch.chunk_flags = override;
 	cwr->ch.chunk_length = htons(sizeof(struct sctp_cwr_chunk));
 	cwr->tsn = htonl(high_tsn);
 	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
 	asoc->ctrl_queue_cnt++;
 }
 
 static int
 sctp_add_stream_reset_out(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk,
     uint32_t seq, uint32_t resp_seq, uint32_t last_sent)
 {
 	uint16_t len, old_len, i;
 	struct sctp_stream_reset_out_request *req_out;
 	struct sctp_chunkhdr *ch;
 	int at;
 	int number_entries = 0;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 	/* get to new offset for the param. */
 	req_out = (struct sctp_stream_reset_out_request *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 		if ((stcb->asoc.strmout[i].state == SCTP_STREAM_RESET_PENDING) &&
 		    (stcb->asoc.strmout[i].chunks_on_queues == 0) &&
 		    TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
 			number_entries++;
 		}
 	}
 	if (number_entries == 0) {
 		return (0);
 	}
 	if (number_entries == stcb->asoc.streamoutcnt) {
 		number_entries = 0;
 	}
 	if (number_entries > SCTP_MAX_STREAMS_AT_ONCE_RESET) {
 		number_entries = SCTP_MAX_STREAMS_AT_ONCE_RESET;
 	}
 	len = (uint16_t)(sizeof(struct sctp_stream_reset_out_request) + (sizeof(uint16_t) * number_entries));
 	req_out->ph.param_type = htons(SCTP_STR_RESET_OUT_REQUEST);
 	req_out->ph.param_length = htons(len);
 	req_out->request_seq = htonl(seq);
 	req_out->response_seq = htonl(resp_seq);
 	req_out->send_reset_at_tsn = htonl(last_sent);
 	at = 0;
 	if (number_entries) {
 		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 			if ((stcb->asoc.strmout[i].state == SCTP_STREAM_RESET_PENDING) &&
 			    (stcb->asoc.strmout[i].chunks_on_queues == 0) &&
 			    TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
 				req_out->list_of_streams[at] = htons(i);
 				at++;
 				stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_IN_FLIGHT;
 				if (at >= number_entries) {
 					break;
 				}
 			}
 		}
 	} else {
 		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 			stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_IN_FLIGHT;
 		}
 	}
 	if (SCTP_SIZE32(len) > len) {
 		/*-
 		 * Need to worry about the pad we may end up adding to the
 		 * end. This is easy since the struct is either aligned to 4
 		 * bytes or 2 bytes off.
 		 */
 		req_out->list_of_streams[number_entries] = 0;
 	}
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->book_size = len + old_len;
 	chk->book_size_scale = 0;
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	return (1);
 }
 
 static void
 sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
     int number_entries, uint16_t *list,
     uint32_t seq)
 {
 	uint16_t len, old_len, i;
 	struct sctp_stream_reset_in_request *req_in;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	req_in = (struct sctp_stream_reset_in_request *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = (uint16_t)(sizeof(struct sctp_stream_reset_in_request) + (sizeof(uint16_t) * number_entries));
 	req_in->ph.param_type = htons(SCTP_STR_RESET_IN_REQUEST);
 	req_in->ph.param_length = htons(len);
 	req_in->request_seq = htonl(seq);
 	if (number_entries) {
 		for (i = 0; i < number_entries; i++) {
 			req_in->list_of_streams[i] = htons(list[i]);
 		}
 	}
 	if (SCTP_SIZE32(len) > len) {
 		/*-
 		 * Need to worry about the pad we may end up adding to the
 		 * end. This is easy since the struct is either aligned to 4
 		 * bytes or 2 bytes off.
 		 */
 		req_in->list_of_streams[number_entries] = 0;
 	}
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->book_size = len + old_len;
 	chk->book_size_scale = 0;
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	return;
 }
 
 static void
 sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk,
     uint32_t seq)
 {
 	uint16_t len, old_len;
 	struct sctp_stream_reset_tsn_request *req_tsn;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	req_tsn = (struct sctp_stream_reset_tsn_request *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = sizeof(struct sctp_stream_reset_tsn_request);
 	req_tsn->ph.param_type = htons(SCTP_STR_RESET_TSN_REQUEST);
 	req_tsn->ph.param_length = htons(len);
 	req_tsn->request_seq = htonl(seq);
 
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->send_size = len + old_len;
 	chk->book_size = SCTP_SIZE32(chk->send_size);
 	chk->book_size_scale = 0;
 	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
 	return;
 }
 
 void
 sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
     uint32_t resp_seq, uint32_t result)
 {
 	uint16_t len, old_len;
 	struct sctp_stream_reset_response *resp;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	resp = (struct sctp_stream_reset_response *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = sizeof(struct sctp_stream_reset_response);
 	resp->ph.param_type = htons(SCTP_STR_RESET_RESPONSE);
 	resp->ph.param_length = htons(len);
 	resp->response_seq = htonl(resp_seq);
 	resp->result = ntohl(result);
 
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->book_size = len + old_len;
 	chk->book_size_scale = 0;
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	return;
 }
 
 void
 sctp_send_deferred_reset_response(struct sctp_tcb *stcb,
     struct sctp_stream_reset_list *ent,
     int response)
 {
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk;
 	struct sctp_chunkhdr *ch;
 
 	asoc = &stcb->asoc;
 
 	/*
 	 * Reset our last reset action to the new one IP -> response
 	 * (PERFORMED probably). This assures that if we fail to send, a
 	 * retran from the peer will get the new response.
 	 */
 	asoc->last_reset_action[0] = response;
 	if (asoc->stream_reset_outstanding) {
 		return;
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		return;
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->book_size = sizeof(struct sctp_chunkhdr);
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	chk->book_size_scale = 0;
 	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED);
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		return;
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 	/* setup chunk parameters */
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	if (stcb->asoc.alternate) {
 		chk->whoTo = stcb->asoc.alternate;
 	} else {
 		chk->whoTo = stcb->asoc.primary_destination;
 	}
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	ch->chunk_type = SCTP_STREAM_RESET;
 	ch->chunk_flags = 0;
 	ch->chunk_length = htons(chk->book_size);
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	sctp_add_stream_reset_result(chk, ent->seq, response);
 	/* insert the chunk for sending */
 	TAILQ_INSERT_TAIL(&asoc->control_send_queue,
 	    chk,
 	    sctp_next);
 	asoc->ctrl_queue_cnt++;
 }
 
 void
 sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
     uint32_t resp_seq, uint32_t result,
     uint32_t send_una, uint32_t recv_next)
 {
 	uint16_t len, old_len;
 	struct sctp_stream_reset_response_tsn *resp;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	resp = (struct sctp_stream_reset_response_tsn *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = sizeof(struct sctp_stream_reset_response_tsn);
 	resp->ph.param_type = htons(SCTP_STR_RESET_RESPONSE);
 	resp->ph.param_length = htons(len);
 	resp->response_seq = htonl(resp_seq);
 	resp->result = htonl(result);
 	resp->senders_next_tsn = htonl(send_una);
 	resp->receivers_next_tsn = htonl(recv_next);
 
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->book_size = len + old_len;
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	chk->book_size_scale = 0;
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	return;
 }
 
 static void
 sctp_add_an_out_stream(struct sctp_tmit_chunk *chk,
     uint32_t seq,
     uint16_t adding)
 {
 	uint16_t len, old_len;
 	struct sctp_chunkhdr *ch;
 	struct sctp_stream_reset_add_strm *addstr;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	addstr = (struct sctp_stream_reset_add_strm *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = sizeof(struct sctp_stream_reset_add_strm);
 
 	/* Fill it out. */
 	addstr->ph.param_type = htons(SCTP_STR_RESET_ADD_OUT_STREAMS);
 	addstr->ph.param_length = htons(len);
 	addstr->request_seq = htonl(seq);
 	addstr->number_of_streams = htons(adding);
 	addstr->reserved = 0;
 
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->send_size = len + old_len;
 	chk->book_size = SCTP_SIZE32(chk->send_size);
 	chk->book_size_scale = 0;
 	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
 	return;
 }
 
 static void
 sctp_add_an_in_stream(struct sctp_tmit_chunk *chk,
     uint32_t seq,
     uint16_t adding)
 {
 	uint16_t len, old_len;
 	struct sctp_chunkhdr *ch;
 	struct sctp_stream_reset_add_strm *addstr;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
 	addstr = (struct sctp_stream_reset_add_strm *)((caddr_t)ch + len);
 	/* now how long will this param be? */
 	len = sizeof(struct sctp_stream_reset_add_strm);
 	/* Fill it out. */
 	addstr->ph.param_type = htons(SCTP_STR_RESET_ADD_IN_STREAMS);
 	addstr->ph.param_length = htons(len);
 	addstr->request_seq = htonl(seq);
 	addstr->number_of_streams = htons(adding);
 	addstr->reserved = 0;
 
 	/* now fix the chunk length */
 	ch->chunk_length = htons(len + old_len);
 	chk->send_size = len + old_len;
 	chk->book_size = SCTP_SIZE32(chk->send_size);
 	chk->book_size_scale = 0;
 	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
 	return;
 }
 
 int
 sctp_send_stream_reset_out_if_possible(struct sctp_tcb *stcb, int so_locked)
 {
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk;
 	struct sctp_chunkhdr *ch;
 	uint32_t seq;
 
 	asoc = &stcb->asoc;
 	asoc->trigger_reset = 0;
 	if (asoc->stream_reset_outstanding) {
 		return (EALREADY);
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		return (ENOMEM);
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->book_size = sizeof(struct sctp_chunkhdr);
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	chk->book_size_scale = 0;
 	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, so_locked);
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		return (ENOMEM);
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 
 	/* setup chunk parameters */
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	if (stcb->asoc.alternate) {
 		chk->whoTo = stcb->asoc.alternate;
 	} else {
 		chk->whoTo = stcb->asoc.primary_destination;
 	}
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	ch->chunk_type = SCTP_STREAM_RESET;
 	ch->chunk_flags = 0;
 	ch->chunk_length = htons(chk->book_size);
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	seq = stcb->asoc.str_reset_seq_out;
 	if (sctp_add_stream_reset_out(stcb, chk, seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1))) {
 		seq++;
 		asoc->stream_reset_outstanding++;
 	} else {
 		m_freem(chk->data);
 		chk->data = NULL;
 		sctp_free_a_chunk(stcb, chk, so_locked);
 		return (ENOENT);
 	}
 	asoc->str_reset = chk;
 	/* insert the chunk for sending */
 	TAILQ_INSERT_TAIL(&asoc->control_send_queue,
 	    chk,
 	    sctp_next);
 	asoc->ctrl_queue_cnt++;
 
 	if (stcb->asoc.send_sack) {
 		sctp_send_sack(stcb, so_locked);
 	}
 	sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
 	return (0);
 }
 
 int
 sctp_send_str_reset_req(struct sctp_tcb *stcb,
     uint16_t number_entries, uint16_t *list,
     uint8_t send_in_req,
     uint8_t send_tsn_req,
     uint8_t add_stream,
     uint16_t adding_o,
     uint16_t adding_i, uint8_t peer_asked)
 {
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk;
 	struct sctp_chunkhdr *ch;
 	int can_send_out_req = 0;
 	uint32_t seq;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 
 	asoc = &stcb->asoc;
 	if (asoc->stream_reset_outstanding) {
 		/*-
 		 * Already one pending, must get ACK back to clear the flag.
 		 */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EBUSY);
 		return (EBUSY);
 	}
 	if ((send_in_req == 0) && (send_tsn_req == 0) &&
 	    (add_stream == 0)) {
 		/* nothing to do */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		return (EINVAL);
 	}
 	if (send_tsn_req && send_in_req) {
 		/* error, can't do that */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		return (EINVAL);
 	} else if (send_in_req) {
 		can_send_out_req = 1;
 	}
 	if (number_entries > (MCLBYTES -
 	    SCTP_MIN_OVERHEAD -
 	    sizeof(struct sctp_chunkhdr) -
 	    sizeof(struct sctp_stream_reset_out_request)) /
 	    sizeof(uint16_t)) {
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		return (ENOMEM);
 	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		return (ENOMEM);
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->book_size = sizeof(struct sctp_chunkhdr);
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	chk->book_size_scale = 0;
 	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED);
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		return (ENOMEM);
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 
 	/* setup chunk parameters */
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	if (stcb->asoc.alternate) {
 		chk->whoTo = stcb->asoc.alternate;
 	} else {
 		chk->whoTo = stcb->asoc.primary_destination;
 	}
 	atomic_add_int(&chk->whoTo->ref_count, 1);
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	ch->chunk_type = SCTP_STREAM_RESET;
 	ch->chunk_flags = 0;
 	ch->chunk_length = htons(chk->book_size);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 
 	seq = stcb->asoc.str_reset_seq_out;
 	if (can_send_out_req) {
 		int ret;
 
 		ret = sctp_add_stream_reset_out(stcb, chk, seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1));
 		if (ret) {
 			seq++;
 			asoc->stream_reset_outstanding++;
 		}
 	}
 	if ((add_stream & 1) &&
 	    ((stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt) < adding_o)) {
 		/* Need to allocate more */
 		struct sctp_stream_out *oldstream;
 		struct sctp_stream_queue_pending *sp, *nsp;
 		int i;
 #if defined(SCTP_DETAILED_STR_STATS)
 		int j;
 #endif
 
 		oldstream = stcb->asoc.strmout;
 		/* get some more */
 		SCTP_MALLOC(stcb->asoc.strmout, struct sctp_stream_out *,
 		    (stcb->asoc.streamoutcnt + adding_o) * sizeof(struct sctp_stream_out),
 		    SCTP_M_STRMO);
 		if (stcb->asoc.strmout == NULL) {
 			uint8_t x;
 
 			stcb->asoc.strmout = oldstream;
 			/* Turn off the bit */
 			x = add_stream & 0xfe;
 			add_stream = x;
 			goto skip_stuff;
 		}
 		/*
 		 * Ok now we proceed with copying the old out stuff and
 		 * initializing the new stuff.
 		 */
 		stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, false);
 		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 			TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
 			/* FIX ME FIX ME */
 			/*
 			 * This should be a SS_COPY operation FIX ME STREAM
 			 * SCHEDULER EXPERT
 			 */
 			stcb->asoc.ss_functions.sctp_ss_init_stream(stcb, &stcb->asoc.strmout[i], &oldstream[i]);
 			stcb->asoc.strmout[i].chunks_on_queues = oldstream[i].chunks_on_queues;
 #if defined(SCTP_DETAILED_STR_STATS)
 			for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
 				stcb->asoc.strmout[i].abandoned_sent[j] = oldstream[i].abandoned_sent[j];
 				stcb->asoc.strmout[i].abandoned_unsent[j] = oldstream[i].abandoned_unsent[j];
 			}
 #else
 			stcb->asoc.strmout[i].abandoned_sent[0] = oldstream[i].abandoned_sent[0];
 			stcb->asoc.strmout[i].abandoned_unsent[0] = oldstream[i].abandoned_unsent[0];
 #endif
 			stcb->asoc.strmout[i].next_mid_ordered = oldstream[i].next_mid_ordered;
 			stcb->asoc.strmout[i].next_mid_unordered = oldstream[i].next_mid_unordered;
 			stcb->asoc.strmout[i].last_msg_incomplete = oldstream[i].last_msg_incomplete;
 			stcb->asoc.strmout[i].sid = i;
 			stcb->asoc.strmout[i].state = oldstream[i].state;
 			/* now anything on those queues? */
 			TAILQ_FOREACH_SAFE(sp, &oldstream[i].outqueue, next, nsp) {
 				TAILQ_REMOVE(&oldstream[i].outqueue, sp, next);
 				TAILQ_INSERT_TAIL(&stcb->asoc.strmout[i].outqueue, sp, next);
 			}
 		}
 		/* now the new streams */
 		stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc);
 		for (i = stcb->asoc.streamoutcnt; i < (stcb->asoc.streamoutcnt + adding_o); i++) {
 			TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
 			stcb->asoc.strmout[i].chunks_on_queues = 0;
 #if defined(SCTP_DETAILED_STR_STATS)
 			for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
 				stcb->asoc.strmout[i].abandoned_sent[j] = 0;
 				stcb->asoc.strmout[i].abandoned_unsent[j] = 0;
 			}
 #else
 			stcb->asoc.strmout[i].abandoned_sent[0] = 0;
 			stcb->asoc.strmout[i].abandoned_unsent[0] = 0;
 #endif
 			stcb->asoc.strmout[i].next_mid_ordered = 0;
 			stcb->asoc.strmout[i].next_mid_unordered = 0;
 			stcb->asoc.strmout[i].sid = i;
 			stcb->asoc.strmout[i].last_msg_incomplete = 0;
 			stcb->asoc.ss_functions.sctp_ss_init_stream(stcb, &stcb->asoc.strmout[i], NULL);
 			stcb->asoc.strmout[i].state = SCTP_STREAM_CLOSED;
 		}
 		stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt + adding_o;
 		SCTP_FREE(oldstream, SCTP_M_STRMO);
 	}
 skip_stuff:
 	if ((add_stream & 1) && (adding_o > 0)) {
 		asoc->strm_pending_add_size = adding_o;
 		asoc->peer_req_out = peer_asked;
 		sctp_add_an_out_stream(chk, seq, adding_o);
 		seq++;
 		asoc->stream_reset_outstanding++;
 	}
 	if ((add_stream & 2) && (adding_i > 0)) {
 		sctp_add_an_in_stream(chk, seq, adding_i);
 		seq++;
 		asoc->stream_reset_outstanding++;
 	}
 	if (send_in_req) {
 		sctp_add_stream_reset_in(chk, number_entries, list, seq);
 		seq++;
 		asoc->stream_reset_outstanding++;
 	}
 	if (send_tsn_req) {
 		sctp_add_stream_reset_tsn(chk, seq);
 		asoc->stream_reset_outstanding++;
 	}
 	asoc->str_reset = chk;
 	/* insert the chunk for sending */
 	TAILQ_INSERT_TAIL(&asoc->control_send_queue,
 	    chk,
 	    sctp_next);
 	asoc->ctrl_queue_cnt++;
 	if (stcb->asoc.send_sack) {
 		sctp_send_sack(stcb, SCTP_SO_LOCKED);
 	}
 	sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
 	return (0);
 }
 
 void
 sctp_send_abort(struct mbuf *m, int iphlen, struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, uint32_t vtag, struct mbuf *cause,
     uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
 	/* Don't respond to an ABORT with an ABORT. */
 	if (sctp_is_there_an_abort_here(m, iphlen, &vtag)) {
 		if (cause)
 			sctp_m_freem(cause);
 		return;
 	}
 	sctp_send_resp_msg(src, dst, sh, vtag, SCTP_ABORT_ASSOCIATION, cause,
 	    mflowtype, mflowid, fibnum,
 	    vrf_id, port);
 	return;
 }
 
 void
 sctp_send_operr_to(struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, uint32_t vtag, struct mbuf *cause,
     uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
 	sctp_send_resp_msg(src, dst, sh, vtag, SCTP_OPERATION_ERROR, cause,
 	    mflowtype, mflowid, fibnum,
 	    vrf_id, port);
 	return;
 }
 
 static struct mbuf *
 sctp_copy_resume(struct uio *uio,
     int max_send_len,
     int user_marks_eor,
     int *error,
     uint32_t *sndout,
     struct mbuf **new_tail)
 {
 	struct mbuf *m;
 
 	m = m_uiotombuf(uio, M_WAITOK, max_send_len, 0,
 	    (M_PKTHDR | (user_marks_eor ? M_EOR : 0)));
 	if (m == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
 		*error = ENOBUFS;
 	} else {
 		*sndout = m_length(m, NULL);
 		*new_tail = m_last(m);
 	}
 	return (m);
 }
 
 static int
 sctp_copy_one(struct sctp_stream_queue_pending *sp,
     struct uio *uio,
     int resv_upfront)
 {
 	sp->data = m_uiotombuf(uio, M_WAITOK, sp->length,
 	    resv_upfront, 0);
 	if (sp->data == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
 		return (ENOBUFS);
 	}
 
 	sp->tail_mbuf = m_last(sp->data);
 	return (0);
 }
 
 static struct sctp_stream_queue_pending *
 sctp_copy_it_in(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     struct sctp_sndrcvinfo *srcv,
     struct uio *uio,
     struct sctp_nets *net,
     ssize_t max_send_len,
     int user_marks_eor,
     int *error)
 {
 
 	/*-
 	 * This routine must be very careful in its work. Protocol
 	 * processing is up and running so care must be taken to spl...()
 	 * when you need to do something that may effect the stcb/asoc. The
 	 * sb is locked however. When data is copied the protocol processing
 	 * should be enabled since this is a slower operation...
 	 */
 	struct sctp_stream_queue_pending *sp = NULL;
 	int resv_in_first;
 
 	*error = 0;
 	/* Now can we send this? */
 	if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
 	    (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
 		/* got data while shutting down */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
 		*error = ECONNRESET;
 		goto out_now;
 	}
 	sctp_alloc_a_strmoq(stcb, sp);
 	if (sp == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 		*error = ENOMEM;
 		goto out_now;
 	}
 	sp->act_flags = 0;
 	sp->sender_all_done = 0;
 	sp->sinfo_flags = srcv->sinfo_flags;
 	sp->timetolive = srcv->sinfo_timetolive;
 	sp->ppid = srcv->sinfo_ppid;
 	sp->context = srcv->sinfo_context;
 	sp->fsn = 0;
 	(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
 
 	sp->sid = srcv->sinfo_stream;
 	sp->length = (uint32_t)min(uio->uio_resid, max_send_len);
 	if ((sp->length == (uint32_t)uio->uio_resid) &&
 	    ((user_marks_eor == 0) ||
 	    (srcv->sinfo_flags & SCTP_EOF) ||
 	    (user_marks_eor && (srcv->sinfo_flags & SCTP_EOR)))) {
 		sp->msg_is_complete = 1;
 	} else {
 		sp->msg_is_complete = 0;
 	}
 	sp->sender_all_done = 0;
 	sp->some_taken = 0;
 	sp->put_last_out = 0;
 	resv_in_first = SCTP_DATA_CHUNK_OVERHEAD(stcb);
 	sp->data = sp->tail_mbuf = NULL;
 	if (sp->length == 0) {
 		goto skip_copy;
 	}
 	if (srcv->sinfo_keynumber_valid) {
 		sp->auth_keyid = srcv->sinfo_keynumber;
 	} else {
 		sp->auth_keyid = stcb->asoc.authinfo.active_keyid;
 	}
 	if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) {
 		sctp_auth_key_acquire(stcb, sp->auth_keyid);
 		sp->holds_key_ref = 1;
 	}
 	*error = sctp_copy_one(sp, uio, resv_in_first);
 skip_copy:
 	if (*error) {
 		sctp_free_a_strmoq(stcb, sp, SCTP_SO_LOCKED);
 		sp = NULL;
 	} else {
 		if (sp->sinfo_flags & SCTP_ADDR_OVER) {
 			sp->net = net;
 			atomic_add_int(&sp->net->ref_count, 1);
 		} else {
 			sp->net = NULL;
 		}
 		sctp_set_prsctp_policy(sp);
 	}
 out_now:
 	return (sp);
 }
 
 int
 sctp_sosend(struct socket *so,
     struct sockaddr *addr,
     struct uio *uio,
     struct mbuf *top,
     struct mbuf *control,
     int flags,
     struct thread *p
 )
 {
 	int error, use_sndinfo = 0;
 	struct sctp_sndrcvinfo sndrcvninfo;
 	struct sockaddr *addr_to_use;
 #if defined(INET) && defined(INET6)
 	struct sockaddr_in sin;
 #endif
 
 	if (control) {
 		/* process cmsg snd/rcv info (maybe a assoc-id) */
 		if (sctp_find_cmsg(SCTP_SNDRCV, (void *)&sndrcvninfo, control,
 		    sizeof(sndrcvninfo))) {
 			/* got one */
 			use_sndinfo = 1;
 		}
 	}
 	addr_to_use = addr;
 #if defined(INET) && defined(INET6)
 	if ((addr != NULL) && (addr->sa_family == AF_INET6)) {
 		struct sockaddr_in6 *sin6;
 
 		if (addr->sa_len != sizeof(struct sockaddr_in6)) {
 			SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 			return (EINVAL);
 		}
 		sin6 = (struct sockaddr_in6 *)addr;
 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 			in6_sin6_2_sin(&sin, sin6);
 			addr_to_use = (struct sockaddr *)&sin;
 		}
 	}
 #endif
 	error = sctp_lower_sosend(so, addr_to_use, uio, top,
 	    control,
 	    flags,
 	    use_sndinfo ? &sndrcvninfo : NULL
 	    ,p
 	    );
 	return (error);
 }
 
 int
 sctp_lower_sosend(struct socket *so,
     struct sockaddr *addr,
     struct uio *uio,
     struct mbuf *top,
     struct mbuf *control,
     int flags,
     struct sctp_sndrcvinfo *srcv
     ,
     struct thread *p
 )
 {
 	struct epoch_tracker et;
 	ssize_t sndlen = 0, max_len, local_add_more;
 	int error;
 	int queue_only = 0, queue_only_for_init = 0;
 	bool free_cnt_applied = false;
 	int un_sent;
 	int now_filled = 0;
 	unsigned int inqueue_bytes = 0;
 	struct sctp_block_entry be;
 	struct sctp_inpcb *inp;
 	struct sctp_tcb *stcb = NULL;
 	struct timeval now;
 	struct sctp_nets *net;
 	struct sctp_association *asoc;
 	struct sctp_inpcb *t_inp;
 	int user_marks_eor;
 	bool create_lock_applied = false;
 	int nagle_applies = 0;
 	bool some_on_control;
 	bool got_all_of_the_send = false;
 	bool hold_tcblock = false;
 	bool non_blocking = false;
 	ssize_t local_soresv = 0;
 	uint16_t port;
 	uint16_t sinfo_flags;
 	sctp_assoc_t sinfo_assoc_id;
 
 	error = 0;
 	net = NULL;
 	stcb = NULL;
 
 	t_inp = inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		error = EINVAL;
 		goto out_unlocked;
 	}
 	if ((uio == NULL) && (top == NULL)) {
 		error = EINVAL;
 		goto out_unlocked;
 	}
 	user_marks_eor = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
 	atomic_add_int(&inp->total_sends, 1);
 	if (uio != NULL) {
 		if (uio->uio_resid < 0) {
 			error = EINVAL;
 			goto out_unlocked;
 		}
 		sndlen = uio->uio_resid;
 	} else {
 		sndlen = SCTP_HEADER_LEN(top);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Send called addr:%p send length %zd\n",
 	    (void *)addr, sndlen);
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
 	    SCTP_IS_LISTENING(inp)) {
 		/* The listener can NOT send. */
 		error = EINVAL;
 		goto out_unlocked;
 	}
 	if (addr != NULL) {
 		union sctp_sockstore *raddr = (union sctp_sockstore *)addr;
 
 		switch (raddr->sa.sa_family) {
 #ifdef INET
 		case AF_INET:
 			if (raddr->sin.sin_len != sizeof(struct sockaddr_in)) {
 				error = EINVAL;
 				goto out_unlocked;
 			}
 			port = raddr->sin.sin_port;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (raddr->sin6.sin6_len != sizeof(struct sockaddr_in6)) {
 				error = EINVAL;
 				goto out_unlocked;
 			}
 			port = raddr->sin6.sin6_port;
 			break;
 #endif
 		default:
 			error = EAFNOSUPPORT;
 			goto out_unlocked;
 		}
 	} else {
 		port = 0;
 	}
 
 	if (srcv != NULL) {
 		sinfo_flags = srcv->sinfo_flags;
 		sinfo_assoc_id = srcv->sinfo_assoc_id;
 		if (INVALID_SINFO_FLAG(sinfo_flags) ||
 		    PR_SCTP_INVALID_POLICY(sinfo_flags)) {
 			error = EINVAL;
 			goto out_unlocked;
 		}
 		if (srcv->sinfo_flags != 0)
 			SCTP_STAT_INCR(sctps_sends_with_flags);
 	} else {
 		sinfo_flags = inp->def_send.sinfo_flags;
 		sinfo_assoc_id = inp->def_send.sinfo_assoc_id;
 	}
 	if (flags & MSG_EOR) {
 		sinfo_flags |= SCTP_EOR;
 	}
 	if (flags & MSG_EOF) {
 		sinfo_flags |= SCTP_EOF;
 	}
 	if (sinfo_flags & SCTP_SENDALL) {
 		error = sctp_sendall(inp, uio, top, srcv);
 		top = NULL;
 		goto out_unlocked;
 	}
 	if ((sinfo_flags & SCTP_ADDR_OVER) && (addr == NULL)) {
 		error = EINVAL;
 		goto out_unlocked;
 	}
 	/* Now we must find the association. */
 	SCTP_INP_RLOCK(inp);
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 		stcb = LIST_FIRST(&inp->sctp_asoc_list);
 		if (stcb != NULL) {
 			SCTP_TCB_LOCK(stcb);
 			hold_tcblock = true;
 		}
 		SCTP_INP_RUNLOCK(inp);
 	} else if (sinfo_assoc_id > SCTP_ALL_ASSOC) {
 		stcb = sctp_findasoc_ep_asocid_locked(inp, sinfo_assoc_id, 1);
 		SCTP_INP_RUNLOCK(inp);
 		if (stcb != NULL) {
 			SCTP_TCB_LOCK_ASSERT(stcb);
 			hold_tcblock = true;
 		}
 	} else if (addr != NULL) {
 		/*-
 		 * Since we did not use findep we must
 		 * increment it, and if we don't find a tcb
 		 * decrement it.
 		 */
 		SCTP_INP_INCR_REF(inp);
 		SCTP_INP_RUNLOCK(inp);
 		stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL);
 		if (stcb == NULL) {
 			SCTP_INP_WLOCK(inp);
 			SCTP_INP_DECR_REF(inp);
 			SCTP_INP_WUNLOCK(inp);
 		} else {
 			SCTP_TCB_LOCK_ASSERT(stcb);
 			hold_tcblock = true;
 		}
 	} else {
 		SCTP_INP_RUNLOCK(inp);
 	}
 
 #ifdef INVARIANTS
 	if (stcb != NULL) {
 		SCTP_TCB_LOCK_ASSERT(stcb);
 		KASSERT(hold_tcblock, ("tcb lock hold, hold_tcblock is false"));
 	} else {
 		KASSERT(!hold_tcblock, ("hold_tcblock is true, but stcb is NULL"));
 	}
 #endif
 	if ((stcb == NULL) && (addr != NULL)) {
 		/* Possible implicit send? */
 		SCTP_ASOC_CREATE_LOCK(inp);
 		create_lock_applied = true;
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
 		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
 			error = EINVAL;
 			goto out_unlocked;
 		}
 		if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
 		    (addr->sa_family == AF_INET6)) {
 			error = EINVAL;
 			goto out_unlocked;
 		}
 		SCTP_INP_WLOCK(inp);
 		SCTP_INP_INCR_REF(inp);
 		SCTP_INP_WUNLOCK(inp);
 		/* With the lock applied look again */
 		stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL);
 #if defined(INET) || defined(INET6)
 		if ((stcb == NULL) && (control != NULL) && (port > 0)) {
 			stcb = sctp_findassociation_cmsgs(&t_inp, port, control, &net, &error);
 		}
 #endif
 		if (stcb == NULL) {
 			SCTP_INP_WLOCK(inp);
 			SCTP_INP_DECR_REF(inp);
 			SCTP_INP_WUNLOCK(inp);
 		} else {
 			SCTP_TCB_LOCK_ASSERT(stcb);
 			hold_tcblock = true;
 			SCTP_ASOC_CREATE_UNLOCK(inp);
 			create_lock_applied = false;
 		}
 		if (error != 0) {
 			goto out_unlocked;
 		}
 		if (t_inp != inp) {
 			error = ENOTCONN;
 			goto out_unlocked;
 		}
 	}
 	if (stcb == NULL) {
 		if (addr == NULL) {
 			error = ENOENT;
 			goto out_unlocked;
 		} else {
 			/* We must go ahead and start the INIT process */
 			uint32_t vrf_id;
 
 			if ((sinfo_flags & SCTP_ABORT) ||
 			    ((sinfo_flags & SCTP_EOF) && (sndlen == 0))) {
 				/*-
 				 * User asks to abort a non-existent assoc,
 				 * or EOF a non-existent assoc with no data
 				 */
 				error = ENOENT;
 				goto out_unlocked;
 			}
 			/* get an asoc/stcb struct */
 			vrf_id = inp->def_vrf_id;
 			KASSERT(create_lock_applied, ("create_lock_applied is false"));
 			stcb = sctp_aloc_assoc_connected(inp, addr, &error, 0, 0, vrf_id,
 			    inp->sctp_ep.pre_open_stream_count,
 			    inp->sctp_ep.port,
 			    p,
 			    SCTP_INITIALIZE_AUTH_PARAMS);
 			if (stcb == NULL) {
 				/* error is setup for us in the call. */
 				KASSERT(error != 0, ("error is 0 although stcb is NULL"));
 				goto out_unlocked;
 			}
 			SCTP_TCB_LOCK_ASSERT(stcb);
 			hold_tcblock = true;
 			SCTP_ASOC_CREATE_UNLOCK(inp);
 			create_lock_applied = false;
 			/*
 			 * Turn on queue only flag to prevent data from
 			 * being sent
 			 */
 			queue_only = 1;
 			SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
 			(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
 			if (control != NULL) {
 				if (sctp_process_cmsgs_for_init(stcb, control, &error)) {
 					sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 					    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_6);
 					hold_tcblock = false;
 					stcb = NULL;
 					KASSERT(error != 0,
 					    ("error is 0 although sctp_process_cmsgs_for_init() indicated an error"));
 					goto out_unlocked;
 				}
 			}
 			/* out with the INIT */
 			queue_only_for_init = 1;
 			/*-
 			 * we may want to dig in after this call and adjust the MTU
 			 * value. It defaulted to 1500 (constant) but the ro
 			 * structure may now have an update and thus we may need to
 			 * change it BEFORE we append the message.
 			 */
 		}
 	}
 
 	KASSERT(!create_lock_applied, ("create_lock_applied is true"));
 	KASSERT(stcb != NULL, ("stcb is NULL"));
 	KASSERT(hold_tcblock, ("hold_tcblock is false"));
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	asoc = &stcb->asoc;
 	if ((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) ||
 	    (asoc->state & SCTP_STATE_WAS_ABORTED)) {
 		if (asoc->state & SCTP_STATE_WAS_ABORTED) {
 			/* XXX: Could also be ECONNABORTED, not enough info. */
 			error = ECONNRESET;
 		} else {
 			error = ENOTCONN;
 		}
 		goto out_unlocked;
 	}
 	/* Keep the stcb from being freed under our feet. */
 	atomic_add_int(&asoc->refcnt, 1);
 	free_cnt_applied = true;
 
 	if (srcv == NULL) {
 		srcv = (struct sctp_sndrcvinfo *)&asoc->def_send;
 		sinfo_flags = srcv->sinfo_flags;
 		if (flags & MSG_EOR) {
 			sinfo_flags |= SCTP_EOR;
 		}
 		if (flags & MSG_EOF) {
 			sinfo_flags |= SCTP_EOF;
 		}
 	}
 	if (sinfo_flags & SCTP_ADDR_OVER) {
 		if (addr != NULL)
 			net = sctp_findnet(stcb, addr);
 		else
 			net = NULL;
 		if ((net == NULL) ||
 		    ((port != 0) && (port != stcb->rport))) {
 			error = EINVAL;
 			goto out_unlocked;
 		}
 	} else {
 		if (asoc->alternate != NULL) {
 			net = asoc->alternate;
 		} else {
 			net = asoc->primary_destination;
 		}
 	}
 	atomic_add_int(&stcb->total_sends, 1);
 
 	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NO_FRAGMENT)) {
 		if (sndlen > (ssize_t)asoc->smallest_mtu) {
 			error = EMSGSIZE;
 			goto out_unlocked;
 		}
 	}
 	if (SCTP_SO_IS_NBIO(so)
 	    || (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0
 	    ) {
 		non_blocking = true;
 	}
 	/* would we block? */
 	if (non_blocking) {
 		ssize_t amount;
 
 		inqueue_bytes = asoc->total_output_queue_size - (asoc->chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb));
 		if (user_marks_eor == 0) {
 			amount = sndlen;
 		} else {
 			amount = 1;
 		}
 		if ((SCTP_SB_LIMIT_SND(so) < (amount + inqueue_bytes + asoc->sb_send_resv)) ||
 		    (asoc->chunks_on_out_queue >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) {
 			if ((sndlen > (ssize_t)SCTP_SB_LIMIT_SND(so)) &&
 			    (user_marks_eor == 0)) {
 				error = EMSGSIZE;
 			} else {
 				error = EWOULDBLOCK;
 			}
 			goto out_unlocked;
 		}
 		asoc->sb_send_resv += (uint32_t)sndlen;
 	} else {
 		atomic_add_int(&asoc->sb_send_resv, (int)sndlen);
 	}
 	local_soresv = sndlen;
 	/* Is the stream no. valid? */
 	if (srcv->sinfo_stream >= asoc->streamoutcnt) {
 		/* Invalid stream number */
 		error = EINVAL;
 		goto out_unlocked;
 	}
 	if ((asoc->strmout[srcv->sinfo_stream].state != SCTP_STREAM_OPEN) &&
 	    (asoc->strmout[srcv->sinfo_stream].state != SCTP_STREAM_OPENING)) {
 		/*
 		 * Can't queue any data while stream reset is underway.
 		 */
 		if (asoc->strmout[srcv->sinfo_stream].state > SCTP_STREAM_OPEN) {
 			error = EAGAIN;
 		} else {
 			error = EINVAL;
 		}
 		goto out_unlocked;
 	}
 	if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
 		queue_only = 1;
 	}
 	if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
 	    (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
 		if ((sinfo_flags & SCTP_ABORT) == 0) {
 			error = EPIPE;
 			goto out_unlocked;
 		}
 	}
 	/* Ok, we will attempt a msgsnd :> */
 	if (p != NULL) {
 		p->td_ru.ru_msgsnd++;
 	}
 
 	KASSERT(stcb != NULL, ("stcb is NULL"));
 	KASSERT(hold_tcblock, ("hold_tcblock is false"));
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	KASSERT((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0,
 	    ("Association about to be freed"));
 	KASSERT((asoc->state & SCTP_STATE_WAS_ABORTED) == 0,
 	    ("Association was aborted"));
 
 	/* Are we aborting? */
 	if (sinfo_flags & SCTP_ABORT) {
 		struct mbuf *mm;
 		struct sctp_paramhdr *ph;
 		ssize_t tot_demand, tot_out = 0, max_out;
 
 		SCTP_STAT_INCR(sctps_sends_with_abort);
 		if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
 		    (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
 			/* It has to be up before we abort. */
 			error = EINVAL;
 			goto out;
 		}
 		/* How big is the user initiated abort? */
 		if (top != NULL) {
 			struct mbuf *cntm;
 
 			if (sndlen != 0) {
 				for (cntm = top; cntm; cntm = SCTP_BUF_NEXT(cntm)) {
 					tot_out += SCTP_BUF_LEN(cntm);
 				}
 			}
 			mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_NOWAIT, 1, MT_DATA);
 		} else {
 			/* Must fit in a MTU */
 			tot_out = sndlen;
 			tot_demand = (tot_out + sizeof(struct sctp_paramhdr));
 			if (tot_demand > SCTP_DEFAULT_ADD_MORE) {
 				error = EMSGSIZE;
 				goto out_unlocked;
 			}
 			mm = sctp_get_mbuf_for_msg((unsigned int)tot_demand, 0, M_NOWAIT, 1, MT_DATA);
 		}
 		if (mm == NULL) {
 			error = ENOMEM;
 			goto out_unlocked;
 		}
 		max_out = asoc->smallest_mtu - sizeof(struct sctp_paramhdr);
 		max_out -= sizeof(struct sctp_abort_msg);
 		if (tot_out > max_out) {
 			tot_out = max_out;
 		}
 		ph = mtod(mm, struct sctp_paramhdr *);
 		ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
 		ph->param_length = htons((uint16_t)(sizeof(struct sctp_paramhdr) + tot_out));
 		ph++;
 		SCTP_BUF_LEN(mm) = (int)(tot_out + sizeof(struct sctp_paramhdr));
 		if (top == NULL) {
 			SCTP_TCB_UNLOCK(stcb);
 			hold_tcblock = false;
 			error = uiomove((caddr_t)ph, (int)tot_out, uio);
 			SCTP_TCB_LOCK(stcb);
 			hold_tcblock = true;
 			if ((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) ||
 			    (asoc->state & SCTP_STATE_WAS_ABORTED)) {
 				sctp_m_freem(mm);
 				if (asoc->state & SCTP_STATE_WAS_ABORTED) {
 					/*
 					 * XXX: Could also be ECONNABORTED,
 					 * not enough info.
 					 */
 					error = ECONNRESET;
 				} else {
 					error = ENOTCONN;
 				}
 				goto out_unlocked;
 			}
 			if (error != 0) {
 				/*-
 				 * Here if we can't get his data we
 				 * still abort we just don't get to
 				 * send the users note :-0
 				 */
 				sctp_m_freem(mm);
 				mm = NULL;
 				error = 0;
 			}
 		} else {
 			if (sndlen != 0) {
 				SCTP_BUF_NEXT(mm) = top;
 			}
 		}
 		atomic_subtract_int(&asoc->refcnt, 1);
 		free_cnt_applied = false;
 		/* release this lock, otherwise we hang on ourselves */
 		NET_EPOCH_ENTER(et);
 		sctp_abort_an_association(stcb->sctp_ep, stcb, mm, false, SCTP_SO_LOCKED);
 		NET_EPOCH_EXIT(et);
 		stcb = NULL;
 		/*
 		 * In this case top is already chained to mm avoid double
 		 * free, since we free it below if top != NULL and driver
 		 * would free it after sending the packet out
 		 */
 		if (sndlen != 0) {
 			top = NULL;
 		}
 		goto out_unlocked;
 	}
 
 	KASSERT(stcb != NULL, ("stcb is NULL"));
 	KASSERT(hold_tcblock, ("hold_tcblock is false"));
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	KASSERT((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0,
 	    ("Association about to be freed"));
 	KASSERT((asoc->state & SCTP_STATE_WAS_ABORTED) == 0,
 	    ("Association was aborted"));
 
 	/* Calculate the maximum we can send */
 	inqueue_bytes = asoc->total_output_queue_size - (asoc->chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb));
 	if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) {
 		max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
 	} else {
 		max_len = 0;
 	}
 	/* Unless E_EOR mode is on, we must make a send FIT in one call. */
 	if ((user_marks_eor == 0) &&
 	    (sndlen > (ssize_t)SCTP_SB_LIMIT_SND(stcb->sctp_socket))) {
 		/* It will NEVER fit. */
 		error = EMSGSIZE;
 		goto out_unlocked;
 	}
 	if ((uio == NULL) && (user_marks_eor != 0)) {
 		/*-
 		 * We do not support eeor mode for
 		 * sending with mbuf chains (like sendfile).
 		 */
 		error = EINVAL;
 		goto out_unlocked;
 	}
 
 	if (user_marks_eor != 0) {
 		local_add_more = (ssize_t)min(SCTP_SB_LIMIT_SND(so), SCTP_BASE_SYSCTL(sctp_add_more_threshold));
 	} else {
 		/*-
 		 * For non-eeor the whole message must fit in
 		 * the socket send buffer.
 		 */
 		local_add_more = sndlen;
 	}
 	if (non_blocking) {
 		goto skip_preblock;
 	}
 	if (((max_len <= local_add_more) && ((ssize_t)SCTP_SB_LIMIT_SND(so) >= local_add_more)) ||
 	    (max_len == 0) ||
 	    ((asoc->chunks_on_out_queue + asoc->stream_queue_cnt) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) {
 		/* No room right now! */
 		inqueue_bytes = asoc->total_output_queue_size - (asoc->chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb));
 		SOCKBUF_LOCK(&so->so_snd);
 		while ((SCTP_SB_LIMIT_SND(so) < (inqueue_bytes + local_add_more)) ||
 		    ((asoc->stream_queue_cnt + asoc->chunks_on_out_queue) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "pre_block limit:%u <(inq:%d + %zd) || (%d+%d > %d)\n",
 			    (unsigned int)SCTP_SB_LIMIT_SND(so),
 			    inqueue_bytes,
 			    local_add_more,
 			    asoc->stream_queue_cnt,
 			    asoc->chunks_on_out_queue,
 			    SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue));
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
 				sctp_log_block(SCTP_BLOCK_LOG_INTO_BLKA, asoc, sndlen);
 			}
 			be.error = 0;
 			stcb->block_entry = &be;
 			SCTP_TCB_UNLOCK(stcb);
 			hold_tcblock = false;
-			error = sbwait(&so->so_snd);
+			error = sbwait(so, SO_SND);
 			if (error == 0) {
 				if (so->so_error != 0) {
 					error = so->so_error;
 				}
 				if (be.error != 0) {
 					error = be.error;
 				}
 			}
 			SOCKBUF_UNLOCK(&so->so_snd);
 			SCTP_TCB_LOCK(stcb);
 			hold_tcblock = true;
 			stcb->block_entry = NULL;
 			if (error != 0) {
 				goto out_unlocked;
 			}
 			if ((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) ||
 			    (asoc->state & SCTP_STATE_WAS_ABORTED)) {
 				if (asoc->state & SCTP_STATE_WAS_ABORTED) {
 					/*
 					 * XXX: Could also be ECONNABORTED,
 					 * not enough info.
 					 */
 					error = ECONNRESET;
 				} else {
 					error = ENOTCONN;
 				}
 				goto out_unlocked;
 			}
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
 				sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK,
 				    asoc, asoc->total_output_queue_size);
 			}
 			inqueue_bytes = asoc->total_output_queue_size - (asoc->chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb));
 			SOCKBUF_LOCK(&so->so_snd);
 		}
 		if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) {
 			max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
 		} else {
 			max_len = 0;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 	}
 
 skip_preblock:
 	KASSERT(stcb != NULL, ("stcb is NULL"));
 	KASSERT(hold_tcblock, ("hold_tcblock is false"));
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	KASSERT((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0,
 	    ("Association about to be freed"));
 	KASSERT((asoc->state & SCTP_STATE_WAS_ABORTED) == 0,
 	    ("Association was aborted"));
 
 	/*
 	 * sndlen covers for mbuf case uio_resid covers for the non-mbuf
 	 * case NOTE: uio will be null when top/mbuf is passed
 	 */
 	if (sndlen == 0) {
 		if (sinfo_flags & SCTP_EOF) {
 			got_all_of_the_send = true;
 			goto dataless_eof;
 		} else {
 			error = EINVAL;
 			goto out;
 		}
 	}
 
 	if (top == NULL) {
 		struct sctp_stream_queue_pending *sp;
 		struct sctp_stream_out *strm;
 		uint32_t sndout;
 
 		if ((asoc->stream_locked) &&
 		    (asoc->stream_locked_on != srcv->sinfo_stream)) {
 			error = EINVAL;
 			goto out;
 		}
 		strm = &asoc->strmout[srcv->sinfo_stream];
 		if (strm->last_msg_incomplete == 0) {
 	do_a_copy_in:
 			SCTP_TCB_UNLOCK(stcb);
 			hold_tcblock = false;
 			sp = sctp_copy_it_in(stcb, asoc, srcv, uio, net, max_len, user_marks_eor, &error);
 			SCTP_TCB_LOCK(stcb);
 			hold_tcblock = true;
 			if ((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) ||
 			    (asoc->state & SCTP_STATE_WAS_ABORTED)) {
 				if (asoc->state & SCTP_STATE_WAS_ABORTED) {
 					/*
 					 * XXX: Could also be ECONNABORTED,
 					 * not enough info.
 					 */
 					error = ECONNRESET;
 				} else {
 					error = ENOTCONN;
 				}
 				goto out;
 			}
 			if (error != 0) {
 				goto out;
 			}
 			/* The out streams might be reallocated. */
 			strm = &asoc->strmout[srcv->sinfo_stream];
 			if (sp->msg_is_complete) {
 				strm->last_msg_incomplete = 0;
 				asoc->stream_locked = 0;
 			} else {
 				/*
 				 * Just got locked to this guy in case of an
 				 * interrupt.
 				 */
 				strm->last_msg_incomplete = 1;
 				if (asoc->idata_supported == 0) {
 					asoc->stream_locked = 1;
 					asoc->stream_locked_on = srcv->sinfo_stream;
 				}
 				sp->sender_all_done = 0;
 			}
 			sctp_snd_sb_alloc(stcb, sp->length);
 			atomic_add_int(&asoc->stream_queue_cnt, 1);
 			if (sinfo_flags & SCTP_UNORDERED) {
 				SCTP_STAT_INCR(sctps_sends_with_unord);
 			}
 			sp->processing = 1;
 			TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
 			asoc->ss_functions.sctp_ss_add_to_stream(stcb, asoc, strm, sp);
 		} else {
 			sp = TAILQ_LAST(&strm->outqueue, sctp_streamhead);
 			if (sp == NULL) {
 				/* ???? Huh ??? last msg is gone */
 #ifdef INVARIANTS
 				panic("Warning: Last msg marked incomplete, yet nothing left?");
 #else
 				SCTP_PRINTF("Warning: Last msg marked incomplete, yet nothing left?\n");
 				strm->last_msg_incomplete = 0;
 #endif
 				goto do_a_copy_in;
 			}
 			if (sp->processing != 0) {
 				error = EINVAL;
 				goto out;
 			} else {
 				sp->processing = 1;
 			}
 		}
 
 		KASSERT(stcb != NULL, ("stcb is NULL"));
 		KASSERT(hold_tcblock, ("hold_tcblock is false"));
 		SCTP_TCB_LOCK_ASSERT(stcb);
 		KASSERT((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0,
 		    ("Association about to be freed"));
 		KASSERT((asoc->state & SCTP_STATE_WAS_ABORTED) == 0,
 		    ("Association was aborted"));
 
 		while (uio->uio_resid > 0) {
 			/* How much room do we have? */
 			struct mbuf *new_tail, *mm;
 
 			inqueue_bytes = asoc->total_output_queue_size - (asoc->chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb));
 			if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) {
 				max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
 			} else {
 				max_len = 0;
 			}
 			if ((max_len > (ssize_t)SCTP_BASE_SYSCTL(sctp_add_more_threshold)) ||
 			    ((max_len > 0) && (SCTP_SB_LIMIT_SND(so) < SCTP_BASE_SYSCTL(sctp_add_more_threshold))) ||
 			    (uio->uio_resid <= max_len)) {
 				SCTP_TCB_UNLOCK(stcb);
 				hold_tcblock = false;
 				sndout = 0;
 				new_tail = NULL;
 				mm = sctp_copy_resume(uio, (int)max_len, user_marks_eor, &error, &sndout, &new_tail);
 				SCTP_TCB_LOCK(stcb);
 				hold_tcblock = true;
 				if ((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) ||
 				    (asoc->state & SCTP_STATE_WAS_ABORTED)) {
 					/*
 					 * We need to get out. Peer probably
 					 * aborted.
 					 */
 					sctp_m_freem(mm);
 					if (asoc->state & SCTP_STATE_WAS_ABORTED) {
 						/*
 						 * XXX: Could also be
 						 * ECONNABORTED, not enough
 						 * info.
 						 */
 						error = ECONNRESET;
 					} else {
 						error = ENOTCONN;
 					}
 					goto out;
 				}
 				if ((mm == NULL) || (error != 0)) {
 					if (mm != NULL) {
 						sctp_m_freem(mm);
 					}
 					if (sp != NULL) {
 						sp->processing = 0;
 					}
 					goto out;
 				}
 				/* Update the mbuf and count */
 				if (sp->tail_mbuf != NULL) {
 					/* Tack it to the end. */
 					SCTP_BUF_NEXT(sp->tail_mbuf) = mm;
 				} else {
 					/* A stolen mbuf. */
 					sp->data = mm;
 				}
 				sp->tail_mbuf = new_tail;
 				sctp_snd_sb_alloc(stcb, sndout);
 				atomic_add_int(&sp->length, sndout);
 				if (sinfo_flags & SCTP_SACK_IMMEDIATELY) {
 					sp->sinfo_flags |= SCTP_SACK_IMMEDIATELY;
 				}
 
 				/* Did we reach EOR? */
 				if ((uio->uio_resid == 0) &&
 				    ((user_marks_eor == 0) ||
 				    (sinfo_flags & SCTP_EOF) ||
 				    (user_marks_eor && (sinfo_flags & SCTP_EOR)))) {
 					sp->msg_is_complete = 1;
 				} else {
 					sp->msg_is_complete = 0;
 				}
 			}
 
 			KASSERT(stcb != NULL, ("stcb is NULL"));
 			KASSERT(hold_tcblock, ("hold_tcblock is false"));
 			SCTP_TCB_LOCK_ASSERT(stcb);
 			KASSERT((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0,
 			    ("Association about to be freed"));
 			KASSERT((asoc->state & SCTP_STATE_WAS_ABORTED) == 0,
 			    ("Association was aborted"));
 
 			if (uio->uio_resid == 0) {
 				/* got it all? */
 				continue;
 			}
 			/* PR-SCTP? */
 			if ((asoc->prsctp_supported) && (asoc->sent_queue_cnt_removeable > 0)) {
 				/*
 				 * This is ugly but we must assure locking
 				 * order
 				 */
 				sctp_prune_prsctp(stcb, asoc, srcv, (int)sndlen);
 				inqueue_bytes = asoc->total_output_queue_size - (asoc->chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb));
 				if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes)
 					max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
 				else
 					max_len = 0;
 				if (max_len > 0) {
 					continue;
 				}
 			}
 			/* wait for space now */
 			if (non_blocking) {
 				/* Non-blocking io in place out */
 				if (sp != NULL) {
 					sp->processing = 0;
 				}
 				goto skip_out_eof;
 			}
 			/* What about the INIT, send it maybe */
 			if (queue_only_for_init) {
 				if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
 					/* a collision took us forward? */
 					queue_only = 0;
 				} else {
 					NET_EPOCH_ENTER(et);
 					sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
 					NET_EPOCH_EXIT(et);
 					SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
 					queue_only = 1;
 				}
 			}
 			if ((net->flight_size > net->cwnd) &&
 			    (asoc->sctp_cmt_on_off == 0)) {
 				SCTP_STAT_INCR(sctps_send_cwnd_avoid);
 				queue_only = 1;
 			} else if (asoc->ifp_had_enobuf) {
 				SCTP_STAT_INCR(sctps_ifnomemqueued);
 				if (net->flight_size > (2 * net->mtu)) {
 					queue_only = 1;
 				}
 				asoc->ifp_had_enobuf = 0;
 			}
 			un_sent = asoc->total_output_queue_size - asoc->total_flight;
 			if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
 			    (asoc->total_flight > 0) &&
 			    (asoc->stream_queue_cnt < SCTP_MAX_DATA_BUNDLING) &&
 			    (un_sent < (int)(asoc->smallest_mtu - SCTP_MIN_OVERHEAD))) {
 				/*-
 				 * Ok, Nagle is set on and we have data outstanding.
 				 * Don't send anything and let SACKs drive out the
 				 * data unless we have a "full" segment to send.
 				 */
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
 					sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
 				}
 				SCTP_STAT_INCR(sctps_naglequeued);
 				nagle_applies = 1;
 			} else {
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
 					if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY))
 						sctp_log_nagle_event(stcb, SCTP_NAGLE_SKIPPED);
 				}
 				SCTP_STAT_INCR(sctps_naglesent);
 				nagle_applies = 0;
 			}
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
 				sctp_misc_ints(SCTP_CWNDLOG_PRESEND, queue_only_for_init, queue_only,
 				    nagle_applies, un_sent);
 				sctp_misc_ints(SCTP_CWNDLOG_PRESEND, asoc->total_output_queue_size,
 				    asoc->total_flight,
 				    asoc->chunks_on_out_queue, asoc->total_flight_count);
 			}
 			if (queue_only_for_init) {
 				queue_only_for_init = 0;
 			}
 			if ((queue_only == 0) && (nagle_applies == 0)) {
 				/*-
 				 * need to start chunk output
 				 * before blocking.. note that if
 				 * a lock is already applied, then
 				 * the input via the net is happening
 				 * and I don't need to start output :-D
 				 */
 				NET_EPOCH_ENTER(et);
 				sctp_chunk_output(inp, stcb,
 				    SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
 				NET_EPOCH_EXIT(et);
 			}
 			/*-
 			 * This is a bit strange, but I think it will
 			 * work. The total_output_queue_size is locked and
 			 * protected by the TCB_LOCK, which we just released.
 			 * There is a race that can occur between releasing it
 			 * above, and me getting the socket lock, where sacks
 			 * come in but we have not put the SB_WAIT on the
 			 * so_snd buffer to get the wakeup. After the LOCK
 			 * is applied the sack_processing will also need to
 			 * LOCK the so->so_snd to do the actual sowwakeup(). So
 			 * once we have the socket buffer lock if we recheck the
 			 * size we KNOW we will get to sleep safely with the
 			 * wakeup flag in place.
 			 */
 			inqueue_bytes = asoc->total_output_queue_size - (asoc->chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb));
 			SOCKBUF_LOCK(&so->so_snd);
 			if (SCTP_SB_LIMIT_SND(so) <= (inqueue_bytes +
 			    min(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTP_SB_LIMIT_SND(so)))) {
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
 					sctp_log_block(SCTP_BLOCK_LOG_INTO_BLK,
 					    asoc, uio->uio_resid);
 				}
 				be.error = 0;
 				stcb->block_entry = &be;
 				SCTP_TCB_UNLOCK(stcb);
 				hold_tcblock = false;
-				error = sbwait(&so->so_snd);
+				error = sbwait(so, SO_SND);
 				if (error == 0) {
 					if (so->so_error != 0)
 						error = so->so_error;
 					if (be.error != 0) {
 						error = be.error;
 					}
 				}
 				SOCKBUF_UNLOCK(&so->so_snd);
 				SCTP_TCB_LOCK(stcb);
 				hold_tcblock = true;
 				stcb->block_entry = NULL;
 				if ((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) ||
 				    (asoc->state & SCTP_STATE_WAS_ABORTED)) {
 					if (asoc->state & SCTP_STATE_WAS_ABORTED) {
 						/*
 						 * XXX: Could also be
 						 * ECONNABORTED, not enough
 						 * info.
 						 */
 						error = ECONNRESET;
 					} else {
 						error = ENOTCONN;
 					}
 					goto out_unlocked;
 				}
 				if (error != 0) {
 					if (sp != NULL) {
 						sp->processing = 0;
 					}
 					goto out_unlocked;
 				}
 			} else {
 				SOCKBUF_UNLOCK(&so->so_snd);
 			}
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
 				sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK,
 				    asoc, asoc->total_output_queue_size);
 			}
 		}
 
 		KASSERT(stcb != NULL, ("stcb is NULL"));
 		KASSERT(hold_tcblock, ("hold_tcblock is false"));
 		SCTP_TCB_LOCK_ASSERT(stcb);
 		KASSERT((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0,
 		    ("Association about to be freed"));
 		KASSERT((asoc->state & SCTP_STATE_WAS_ABORTED) == 0,
 		    ("Association was aborted"));
 
 		/* The out streams might be reallocated. */
 		strm = &asoc->strmout[srcv->sinfo_stream];
 		if (sp != NULL) {
 			if (sp->msg_is_complete == 0) {
 				strm->last_msg_incomplete = 1;
 				if (asoc->idata_supported == 0) {
 					asoc->stream_locked = 1;
 					asoc->stream_locked_on = srcv->sinfo_stream;
 				}
 			} else {
 				sp->sender_all_done = 1;
 				strm->last_msg_incomplete = 0;
 				asoc->stream_locked = 0;
 			}
 			sp->processing = 0;
 		} else {
 			SCTP_PRINTF("Huh no sp TSNH?\n");
 			strm->last_msg_incomplete = 0;
 			asoc->stream_locked = 0;
 		}
 		if (uio->uio_resid == 0) {
 			got_all_of_the_send = true;
 		}
 	} else {
 		error = sctp_msg_append(stcb, net, top, srcv);
 		top = NULL;
 		if ((sinfo_flags & SCTP_EOF) != 0) {
 			got_all_of_the_send = true;
 		}
 	}
 	if (error != 0) {
 		goto out;
 	}
 
 dataless_eof:
 	KASSERT(stcb != NULL, ("stcb is NULL"));
 	KASSERT(hold_tcblock, ("hold_tcblock is false"));
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	KASSERT((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0,
 	    ("Association about to be freed"));
 	KASSERT((asoc->state & SCTP_STATE_WAS_ABORTED) == 0,
 	    ("Association was aborted"));
 
 	/* EOF thing ? */
 	if ((sinfo_flags & SCTP_EOF) && got_all_of_the_send) {
 		SCTP_STAT_INCR(sctps_sends_with_eof);
 		error = 0;
 		if (TAILQ_EMPTY(&asoc->send_queue) &&
 		    TAILQ_EMPTY(&asoc->sent_queue) &&
 		    sctp_is_there_unsent_data(stcb, SCTP_SO_LOCKED) == 0) {
 			if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
 				goto abort_anyway;
 			}
 			/* there is nothing queued to send, so I'm done... */
 			if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
 			    (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
 			    (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 				struct sctp_nets *netp;
 
 				/* only send SHUTDOWN the first time through */
 				if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
 					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 				}
 				SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
 				sctp_stop_timers_for_shutdown(stcb);
 				if (asoc->alternate != NULL) {
 					netp = asoc->alternate;
 				} else {
 					netp = asoc->primary_destination;
 				}
 				sctp_send_shutdown(stcb, netp);
 				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
 				    netp);
 				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
 				    NULL);
 			}
 		} else {
 			/*-
 			 * we still got (or just got) data to send, so set
 			 * SHUTDOWN_PENDING
 			 */
 			/*-
 			 * XXX sockets draft says that SCTP_EOF should be
 			 * sent with no data.  currently, we will allow user
 			 * data to be sent first and move to
 			 * SHUTDOWN-PENDING
 			 */
 			if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
 			    (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
 			    (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 				if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
 					SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
 				}
 				SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
 				if (TAILQ_EMPTY(&asoc->send_queue) &&
 				    TAILQ_EMPTY(&asoc->sent_queue) &&
 				    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
 					struct mbuf *op_err;
 					char msg[SCTP_DIAG_INFO_LEN];
 
 			abort_anyway:
 					if (free_cnt_applied) {
 						atomic_subtract_int(&asoc->refcnt, 1);
 						free_cnt_applied = false;
 					}
 					SCTP_SNPRINTF(msg, sizeof(msg),
 					    "%s:%d at %s", __FILE__, __LINE__, __func__);
 					op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 					    msg);
 					NET_EPOCH_ENTER(et);
 					sctp_abort_an_association(stcb->sctp_ep, stcb,
 					    op_err, false, SCTP_SO_LOCKED);
 					NET_EPOCH_EXIT(et);
 					hold_tcblock = false;
 					stcb = NULL;
 					error = ECONNABORTED;
 					goto out;
 				}
 				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
 				    NULL);
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_NODELAY);
 			}
 		}
 	}
 
 skip_out_eof:
 	KASSERT(stcb != NULL, ("stcb is NULL"));
 	KASSERT(hold_tcblock, ("hold_tcblock is false"));
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	KASSERT((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0,
 	    ("Association about to be freed"));
 	KASSERT((asoc->state & SCTP_STATE_WAS_ABORTED) == 0,
 	    ("Association was aborted"));
 
 	some_on_control = !TAILQ_EMPTY(&asoc->control_send_queue);
 	if (queue_only_for_init) {
 		if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
 			/* a collision took us forward? */
 			queue_only = 0;
 		} else {
 			NET_EPOCH_ENTER(et);
 			sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
 			NET_EPOCH_EXIT(et);
 			SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
 			queue_only = 1;
 		}
 	}
 
 	KASSERT(stcb != NULL, ("stcb is NULL"));
 	KASSERT(hold_tcblock, ("hold_tcblock is false"));
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	KASSERT((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0,
 	    ("Association about to be freed"));
 	KASSERT((asoc->state & SCTP_STATE_WAS_ABORTED) == 0,
 	    ("Association was aborted"));
 
 	if ((net->flight_size > net->cwnd) &&
 	    (asoc->sctp_cmt_on_off == 0)) {
 		SCTP_STAT_INCR(sctps_send_cwnd_avoid);
 		queue_only = 1;
 	} else if (asoc->ifp_had_enobuf) {
 		SCTP_STAT_INCR(sctps_ifnomemqueued);
 		if (net->flight_size > (2 * net->mtu)) {
 			queue_only = 1;
 		}
 		asoc->ifp_had_enobuf = 0;
 	}
 	un_sent = asoc->total_output_queue_size - asoc->total_flight;
 	if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
 	    (asoc->total_flight > 0) &&
 	    (asoc->stream_queue_cnt < SCTP_MAX_DATA_BUNDLING) &&
 	    (un_sent < (int)(asoc->smallest_mtu - SCTP_MIN_OVERHEAD))) {
 		/*-
 		 * Ok, Nagle is set on and we have data outstanding.
 		 * Don't send anything and let SACKs drive out the
 		 * data unless wen have a "full" segment to send.
 		 */
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
 			sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
 		}
 		SCTP_STAT_INCR(sctps_naglequeued);
 		nagle_applies = 1;
 	} else {
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
 			if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY))
 				sctp_log_nagle_event(stcb, SCTP_NAGLE_SKIPPED);
 		}
 		SCTP_STAT_INCR(sctps_naglesent);
 		nagle_applies = 0;
 	}
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
 		sctp_misc_ints(SCTP_CWNDLOG_PRESEND, queue_only_for_init, queue_only,
 		    nagle_applies, un_sent);
 		sctp_misc_ints(SCTP_CWNDLOG_PRESEND, asoc->total_output_queue_size,
 		    asoc->total_flight,
 		    asoc->chunks_on_out_queue, asoc->total_flight_count);
 	}
 
 	KASSERT(stcb != NULL, ("stcb is NULL"));
 	KASSERT(hold_tcblock, ("hold_tcblock is false"));
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	KASSERT((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0,
 	    ("Association about to be freed"));
 	KASSERT((asoc->state & SCTP_STATE_WAS_ABORTED) == 0,
 	    ("Association was aborted"));
 
 	NET_EPOCH_ENTER(et);
 	if ((queue_only == 0) && (nagle_applies == 0) && (asoc->peers_rwnd && un_sent)) {
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
 	} else if ((queue_only == 0) &&
 		    (asoc->peers_rwnd == 0) &&
 	    (asoc->total_flight == 0)) {
 		/* We get to have a probe outstanding */
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
 	} else if (some_on_control) {
 		int num_out, reason;
 
 		/* Here we do control only */
 		(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out,
 		    &reason, 1, 1, &now, &now_filled,
 		    sctp_get_frag_point(stcb),
 		    SCTP_SO_LOCKED);
 	}
 	NET_EPOCH_EXIT(et);
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "USR Send complete qo:%d prw:%d unsent:%d tf:%d cooq:%d toqs:%d err:%d\n",
 	    queue_only, asoc->peers_rwnd, un_sent,
 	    asoc->total_flight, asoc->chunks_on_out_queue,
 	    asoc->total_output_queue_size, error);
 
 	KASSERT(stcb != NULL, ("stcb is NULL"));
 	KASSERT(hold_tcblock, ("hold_tcblock is false"));
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	KASSERT((asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0,
 	    ("Association about to be freed"));
 	KASSERT((asoc->state & SCTP_STATE_WAS_ABORTED) == 0,
 	    ("Association was aborted"));
 
 out:
 out_unlocked:
 	if (create_lock_applied) {
 		SCTP_ASOC_CREATE_UNLOCK(inp);
 	}
 	if (stcb != NULL) {
 		if (local_soresv) {
 			atomic_subtract_int(&asoc->sb_send_resv, (int)sndlen);
 		}
 		if (hold_tcblock) {
 			SCTP_TCB_UNLOCK(stcb);
 		}
 		if (free_cnt_applied) {
 			atomic_subtract_int(&asoc->refcnt, 1);
 		}
 	}
 	if (top != NULL) {
 		sctp_m_freem(top);
 	}
 	if (control != NULL) {
 		sctp_m_freem(control);
 	}
 	SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, error);
 	return (error);
 }
 
 /*
  * generate an AUTHentication chunk, if required
  */
 struct mbuf *
 sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end,
     struct sctp_auth_chunk **auth_ret, uint32_t *offset,
     struct sctp_tcb *stcb, uint8_t chunk)
 {
 	struct mbuf *m_auth;
 	struct sctp_auth_chunk *auth;
 	int chunk_len;
 	struct mbuf *cn;
 
 	if ((m_end == NULL) || (auth_ret == NULL) || (offset == NULL) ||
 	    (stcb == NULL))
 		return (m);
 
 	if (stcb->asoc.auth_supported == 0) {
 		return (m);
 	}
 	/* does the requested chunk require auth? */
 	if (!sctp_auth_is_required_chunk(chunk, stcb->asoc.peer_auth_chunks)) {
 		return (m);
 	}
 	m_auth = sctp_get_mbuf_for_msg(sizeof(*auth), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_auth == NULL) {
 		/* no mbuf's */
 		return (m);
 	}
 	/* reserve some space if this will be the first mbuf */
 	if (m == NULL)
 		SCTP_BUF_RESV_UF(m_auth, SCTP_MIN_OVERHEAD);
 	/* fill in the AUTH chunk details */
 	auth = mtod(m_auth, struct sctp_auth_chunk *);
 	memset(auth, 0, sizeof(*auth));
 	auth->ch.chunk_type = SCTP_AUTHENTICATION;
 	auth->ch.chunk_flags = 0;
 	chunk_len = sizeof(*auth) +
 	    sctp_get_hmac_digest_len(stcb->asoc.peer_hmac_id);
 	auth->ch.chunk_length = htons(chunk_len);
 	auth->hmac_id = htons(stcb->asoc.peer_hmac_id);
 	/* key id and hmac digest will be computed and filled in upon send */
 
 	/* save the offset where the auth was inserted into the chain */
 	*offset = 0;
 	for (cn = m; cn; cn = SCTP_BUF_NEXT(cn)) {
 		*offset += SCTP_BUF_LEN(cn);
 	}
 
 	/* update length and return pointer to the auth chunk */
 	SCTP_BUF_LEN(m_auth) = chunk_len;
 	m = sctp_copy_mbufchain(m_auth, m, m_end, 1, chunk_len, 0);
 	if (auth_ret != NULL)
 		*auth_ret = auth;
 
 	return (m);
 }
 
 #ifdef INET6
 int
 sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t *ro)
 {
 	struct nd_prefix *pfx = NULL;
 	struct nd_pfxrouter *pfxrtr = NULL;
 	struct sockaddr_in6 gw6;
 
 	if (ro == NULL || ro->ro_nh == NULL || src6->sin6_family != AF_INET6)
 		return (0);
 
 	/* get prefix entry of address */
 	ND6_RLOCK();
 	LIST_FOREACH(pfx, &MODULE_GLOBAL(nd_prefix), ndpr_entry) {
 		if (pfx->ndpr_stateflags & NDPRF_DETACHED)
 			continue;
 		if (IN6_ARE_MASKED_ADDR_EQUAL(&pfx->ndpr_prefix.sin6_addr,
 		    &src6->sin6_addr, &pfx->ndpr_mask))
 			break;
 	}
 	/* no prefix entry in the prefix list */
 	if (pfx == NULL) {
 		ND6_RUNLOCK();
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefix entry for ");
 		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
 		return (0);
 	}
 
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "v6src_match_nexthop(), Prefix entry is ");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
 
 	/* search installed gateway from prefix entry */
 	LIST_FOREACH(pfxrtr, &pfx->ndpr_advrtrs, pfr_entry) {
 		memset(&gw6, 0, sizeof(struct sockaddr_in6));
 		gw6.sin6_family = AF_INET6;
 		gw6.sin6_len = sizeof(struct sockaddr_in6);
 		memcpy(&gw6.sin6_addr, &pfxrtr->router->rtaddr,
 		    sizeof(struct in6_addr));
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "prefix router is ");
 		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&gw6);
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "installed router is ");
 		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ro->ro_nh->gw_sa);
 		if (sctp_cmpaddr((struct sockaddr *)&gw6, &ro->ro_nh->gw_sa)) {
 			ND6_RUNLOCK();
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is installed\n");
 			return (1);
 		}
 	}
 	ND6_RUNLOCK();
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is not installed\n");
 	return (0);
 }
 #endif
 
 int
 sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t *ro)
 {
 #ifdef INET
 	struct sockaddr_in *sin, *mask;
 	struct ifaddr *ifa;
 	struct in_addr srcnetaddr, gwnetaddr;
 
 	if (ro == NULL || ro->ro_nh == NULL ||
 	    sifa->address.sa.sa_family != AF_INET) {
 		return (0);
 	}
 	ifa = (struct ifaddr *)sifa->ifa;
 	mask = (struct sockaddr_in *)(ifa->ifa_netmask);
 	sin = &sifa->address.sin;
 	srcnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: src address is ");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", srcnetaddr.s_addr);
 
 	sin = &ro->ro_nh->gw4_sa;
 	gwnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: nexthop is ");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ro->ro_nh->gw_sa);
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", gwnetaddr.s_addr);
 	if (srcnetaddr.s_addr == gwnetaddr.s_addr) {
 		return (1);
 	}
 #endif
 	return (0);
 }
diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c
index 8451ed5e2007..e20a49be1adb 100644
--- a/sys/netinet/sctputil.c
+++ b/sys/netinet/sctputil.c
@@ -1,7678 +1,7678 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
  * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
  * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * a) Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  *
  * b) Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the distribution.
  *
  * c) Neither the name of Cisco Systems, Inc. nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctputil.h>
 #include <netinet/sctp_var.h>
 #include <netinet/sctp_sysctl.h>
 #ifdef INET6
 #include <netinet6/sctp6_var.h>
 #endif
 #include <netinet/sctp_header.h>
 #include <netinet/sctp_output.h>
 #include <netinet/sctp_uio.h>
 #include <netinet/sctp_timer.h>
 #include <netinet/sctp_indata.h>
 #include <netinet/sctp_auth.h>
 #include <netinet/sctp_asconf.h>
 #include <netinet/sctp_bsd_addr.h>
 #include <netinet/sctp_kdtrace.h>
 #if defined(INET6) || defined(INET)
 #include <netinet/tcp_var.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <sys/proc.h>
 #ifdef INET6
 #include <netinet/icmp6.h>
 #endif
 
 #ifndef KTR_SCTP
 #define KTR_SCTP KTR_SUBSYS
 #endif
 
 extern const struct sctp_cc_functions sctp_cc_functions[];
 extern const struct sctp_ss_functions sctp_ss_functions[];
 
 void
 sctp_sblog(struct sockbuf *sb, struct sctp_tcb *stcb, int from, int incr)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	sctp_clog.x.sb.stcb = stcb;
 	sctp_clog.x.sb.so_sbcc = sb->sb_cc;
 	if (stcb)
 		sctp_clog.x.sb.stcb_sbcc = stcb->asoc.sb_cc;
 	else
 		sctp_clog.x.sb.stcb_sbcc = 0;
 	sctp_clog.x.sb.incr = incr;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_SB,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_closing(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int16_t loc)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	sctp_clog.x.close.inp = (void *)inp;
 	sctp_clog.x.close.sctp_flags = inp->sctp_flags;
 	if (stcb) {
 		sctp_clog.x.close.stcb = (void *)stcb;
 		sctp_clog.x.close.state = (uint16_t)stcb->asoc.state;
 	} else {
 		sctp_clog.x.close.stcb = 0;
 		sctp_clog.x.close.state = 0;
 	}
 	sctp_clog.x.close.loc = loc;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_CLOSE,
 	    0,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 rto_logging(struct sctp_nets *net, int from)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	memset(&sctp_clog, 0, sizeof(sctp_clog));
 	sctp_clog.x.rto.net = (void *)net;
 	sctp_clog.x.rto.rtt = net->rtt / 1000;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_RTT,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_strm_del_alt(struct sctp_tcb *stcb, uint32_t tsn, uint16_t sseq, uint16_t stream, int from)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	sctp_clog.x.strlog.stcb = stcb;
 	sctp_clog.x.strlog.n_tsn = tsn;
 	sctp_clog.x.strlog.n_sseq = sseq;
 	sctp_clog.x.strlog.e_tsn = 0;
 	sctp_clog.x.strlog.e_sseq = 0;
 	sctp_clog.x.strlog.strm = stream;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_STRM,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_nagle_event(struct sctp_tcb *stcb, int action)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	sctp_clog.x.nagle.stcb = (void *)stcb;
 	sctp_clog.x.nagle.total_flight = stcb->asoc.total_flight;
 	sctp_clog.x.nagle.total_in_queue = stcb->asoc.total_output_queue_size;
 	sctp_clog.x.nagle.count_in_queue = stcb->asoc.chunks_on_out_queue;
 	sctp_clog.x.nagle.count_in_flight = stcb->asoc.total_flight_count;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_NAGLE,
 	    action,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_sack(uint32_t old_cumack, uint32_t cumack, uint32_t tsn, uint16_t gaps, uint16_t dups, int from)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	sctp_clog.x.sack.cumack = cumack;
 	sctp_clog.x.sack.oldcumack = old_cumack;
 	sctp_clog.x.sack.tsn = tsn;
 	sctp_clog.x.sack.numGaps = gaps;
 	sctp_clog.x.sack.numDups = dups;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_SACK,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_map(uint32_t map, uint32_t cum, uint32_t high, int from)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	memset(&sctp_clog, 0, sizeof(sctp_clog));
 	sctp_clog.x.map.base = map;
 	sctp_clog.x.map.cum = cum;
 	sctp_clog.x.map.high = high;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_MAP,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_fr(uint32_t biggest_tsn, uint32_t biggest_new_tsn, uint32_t tsn, int from)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	memset(&sctp_clog, 0, sizeof(sctp_clog));
 	sctp_clog.x.fr.largest_tsn = biggest_tsn;
 	sctp_clog.x.fr.largest_new_tsn = biggest_new_tsn;
 	sctp_clog.x.fr.tsn = tsn;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_FR,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 #ifdef SCTP_MBUF_LOGGING
 void
 sctp_log_mb(struct mbuf *m, int from)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	sctp_clog.x.mb.mp = m;
 	sctp_clog.x.mb.mbuf_flags = (uint8_t)(SCTP_BUF_GET_FLAGS(m));
 	sctp_clog.x.mb.size = (uint16_t)(SCTP_BUF_LEN(m));
 	sctp_clog.x.mb.data = SCTP_BUF_AT(m, 0);
 	if (SCTP_BUF_IS_EXTENDED(m)) {
 		sctp_clog.x.mb.ext = SCTP_BUF_EXTEND_BASE(m);
 		sctp_clog.x.mb.refcnt = (uint8_t)(SCTP_BUF_EXTEND_REFCNT(m));
 	} else {
 		sctp_clog.x.mb.ext = 0;
 		sctp_clog.x.mb.refcnt = 0;
 	}
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_MBUF,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_mbc(struct mbuf *m, int from)
 {
 	struct mbuf *mat;
 
 	for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
 		sctp_log_mb(mat, from);
 	}
 }
 #endif
 
 void
 sctp_log_strm_del(struct sctp_queued_to_read *control, struct sctp_queued_to_read *poschk, int from)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	if (control == NULL) {
 		SCTP_PRINTF("Gak log of NULL?\n");
 		return;
 	}
 	sctp_clog.x.strlog.stcb = control->stcb;
 	sctp_clog.x.strlog.n_tsn = control->sinfo_tsn;
 	sctp_clog.x.strlog.n_sseq = (uint16_t)control->mid;
 	sctp_clog.x.strlog.strm = control->sinfo_stream;
 	if (poschk != NULL) {
 		sctp_clog.x.strlog.e_tsn = poschk->sinfo_tsn;
 		sctp_clog.x.strlog.e_sseq = (uint16_t)poschk->mid;
 	} else {
 		sctp_clog.x.strlog.e_tsn = 0;
 		sctp_clog.x.strlog.e_sseq = 0;
 	}
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_STRM,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net, int augment, uint8_t from)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	sctp_clog.x.cwnd.net = net;
 	if (stcb->asoc.send_queue_cnt > 255)
 		sctp_clog.x.cwnd.cnt_in_send = 255;
 	else
 		sctp_clog.x.cwnd.cnt_in_send = stcb->asoc.send_queue_cnt;
 	if (stcb->asoc.stream_queue_cnt > 255)
 		sctp_clog.x.cwnd.cnt_in_str = 255;
 	else
 		sctp_clog.x.cwnd.cnt_in_str = stcb->asoc.stream_queue_cnt;
 
 	if (net) {
 		sctp_clog.x.cwnd.cwnd_new_value = net->cwnd;
 		sctp_clog.x.cwnd.inflight = net->flight_size;
 		sctp_clog.x.cwnd.pseudo_cumack = net->pseudo_cumack;
 		sctp_clog.x.cwnd.meets_pseudo_cumack = net->new_pseudo_cumack;
 		sctp_clog.x.cwnd.need_new_pseudo_cumack = net->find_pseudo_cumack;
 	}
 	if (SCTP_CWNDLOG_PRESEND == from) {
 		sctp_clog.x.cwnd.meets_pseudo_cumack = stcb->asoc.peers_rwnd;
 	}
 	sctp_clog.x.cwnd.cwnd_augment = augment;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_CWND,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_lock(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint8_t from)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	memset(&sctp_clog, 0, sizeof(sctp_clog));
 	if (inp) {
 		sctp_clog.x.lock.sock = (void *)inp->sctp_socket;
 
 	} else {
 		sctp_clog.x.lock.sock = (void *)NULL;
 	}
 	sctp_clog.x.lock.inp = (void *)inp;
 	if (stcb) {
 		sctp_clog.x.lock.tcb_lock = mtx_owned(&stcb->tcb_mtx);
 	} else {
 		sctp_clog.x.lock.tcb_lock = SCTP_LOCK_UNKNOWN;
 	}
 	if (inp) {
 		sctp_clog.x.lock.inp_lock = mtx_owned(&inp->inp_mtx);
 		sctp_clog.x.lock.create_lock = mtx_owned(&inp->inp_create_mtx);
 	} else {
 		sctp_clog.x.lock.inp_lock = SCTP_LOCK_UNKNOWN;
 		sctp_clog.x.lock.create_lock = SCTP_LOCK_UNKNOWN;
 	}
 	sctp_clog.x.lock.info_lock = rw_wowned(&SCTP_BASE_INFO(ipi_ep_mtx));
 	if (inp && (inp->sctp_socket)) {
 		sctp_clog.x.lock.sock_lock = mtx_owned(SOCK_MTX(inp->sctp_socket));
 		sctp_clog.x.lock.sockrcvbuf_lock = mtx_owned(SOCKBUF_MTX(&inp->sctp_socket->so_rcv));
 		sctp_clog.x.lock.socksndbuf_lock = mtx_owned(SOCKBUF_MTX(&inp->sctp_socket->so_snd));
 	} else {
 		sctp_clog.x.lock.sock_lock = SCTP_LOCK_UNKNOWN;
 		sctp_clog.x.lock.sockrcvbuf_lock = SCTP_LOCK_UNKNOWN;
 		sctp_clog.x.lock.socksndbuf_lock = SCTP_LOCK_UNKNOWN;
 	}
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_LOCK_EVENT,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *net, int error, int burst, uint8_t from)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	memset(&sctp_clog, 0, sizeof(sctp_clog));
 	sctp_clog.x.cwnd.net = net;
 	sctp_clog.x.cwnd.cwnd_new_value = error;
 	sctp_clog.x.cwnd.inflight = net->flight_size;
 	sctp_clog.x.cwnd.cwnd_augment = burst;
 	if (stcb->asoc.send_queue_cnt > 255)
 		sctp_clog.x.cwnd.cnt_in_send = 255;
 	else
 		sctp_clog.x.cwnd.cnt_in_send = stcb->asoc.send_queue_cnt;
 	if (stcb->asoc.stream_queue_cnt > 255)
 		sctp_clog.x.cwnd.cnt_in_str = 255;
 	else
 		sctp_clog.x.cwnd.cnt_in_str = stcb->asoc.stream_queue_cnt;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_MAXBURST,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_rwnd(uint8_t from, uint32_t peers_rwnd, uint32_t snd_size, uint32_t overhead)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	sctp_clog.x.rwnd.rwnd = peers_rwnd;
 	sctp_clog.x.rwnd.send_size = snd_size;
 	sctp_clog.x.rwnd.overhead = overhead;
 	sctp_clog.x.rwnd.new_rwnd = 0;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_RWND,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_rwnd_set(uint8_t from, uint32_t peers_rwnd, uint32_t flight_size, uint32_t overhead, uint32_t a_rwndval)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	sctp_clog.x.rwnd.rwnd = peers_rwnd;
 	sctp_clog.x.rwnd.send_size = flight_size;
 	sctp_clog.x.rwnd.overhead = overhead;
 	sctp_clog.x.rwnd.new_rwnd = a_rwndval;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_RWND,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 #ifdef SCTP_MBCNT_LOGGING
 static void
 sctp_log_mbcnt(uint8_t from, uint32_t total_oq, uint32_t book, uint32_t total_mbcnt_q, uint32_t mbcnt)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	sctp_clog.x.mbcnt.total_queue_size = total_oq;
 	sctp_clog.x.mbcnt.size_change = book;
 	sctp_clog.x.mbcnt.total_queue_mb_size = total_mbcnt_q;
 	sctp_clog.x.mbcnt.mbcnt_change = mbcnt;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_MBCNT,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 #endif
 
 void
 sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_MISC_EVENT,
 	    from,
 	    a, b, c, d);
 #endif
 }
 
 void
 sctp_wakeup_log(struct sctp_tcb *stcb, uint32_t wake_cnt, int from)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	sctp_clog.x.wake.stcb = (void *)stcb;
 	sctp_clog.x.wake.wake_cnt = wake_cnt;
 	sctp_clog.x.wake.flight = stcb->asoc.total_flight_count;
 	sctp_clog.x.wake.send_q = stcb->asoc.send_queue_cnt;
 	sctp_clog.x.wake.sent_q = stcb->asoc.sent_queue_cnt;
 
 	if (stcb->asoc.stream_queue_cnt < 0xff)
 		sctp_clog.x.wake.stream_qcnt = (uint8_t)stcb->asoc.stream_queue_cnt;
 	else
 		sctp_clog.x.wake.stream_qcnt = 0xff;
 
 	if (stcb->asoc.chunks_on_out_queue < 0xff)
 		sctp_clog.x.wake.chunks_on_oque = (uint8_t)stcb->asoc.chunks_on_out_queue;
 	else
 		sctp_clog.x.wake.chunks_on_oque = 0xff;
 
 	sctp_clog.x.wake.sctpflags = 0;
 	/* set in the defered mode stuff */
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE)
 		sctp_clog.x.wake.sctpflags |= 1;
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_WAKEOUTPUT)
 		sctp_clog.x.wake.sctpflags |= 2;
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_WAKEINPUT)
 		sctp_clog.x.wake.sctpflags |= 4;
 	/* what about the sb */
 	if (stcb->sctp_socket) {
 		struct socket *so = stcb->sctp_socket;
 
 		sctp_clog.x.wake.sbflags = (uint8_t)((so->so_snd.sb_flags & 0x00ff));
 	} else {
 		sctp_clog.x.wake.sbflags = 0xff;
 	}
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_WAKE,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 void
 sctp_log_block(uint8_t from, struct sctp_association *asoc, ssize_t sendlen)
 {
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_cwnd_log sctp_clog;
 
 	sctp_clog.x.blk.onsb = asoc->total_output_queue_size;
 	sctp_clog.x.blk.send_sent_qcnt = (uint16_t)(asoc->send_queue_cnt + asoc->sent_queue_cnt);
 	sctp_clog.x.blk.peer_rwnd = asoc->peers_rwnd;
 	sctp_clog.x.blk.stream_qcnt = (uint16_t)asoc->stream_queue_cnt;
 	sctp_clog.x.blk.chunks_on_oque = (uint16_t)asoc->chunks_on_out_queue;
 	sctp_clog.x.blk.flight_size = (uint16_t)(asoc->total_flight / 1024);
 	sctp_clog.x.blk.sndlen = (uint32_t)sendlen;
 	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
 	    SCTP_LOG_EVENT_BLOCK,
 	    from,
 	    sctp_clog.x.misc.log1,
 	    sctp_clog.x.misc.log2,
 	    sctp_clog.x.misc.log3,
 	    sctp_clog.x.misc.log4);
 #endif
 }
 
 int
 sctp_fill_stat_log(void *optval SCTP_UNUSED, size_t *optsize SCTP_UNUSED)
 {
 	/* May need to fix this if ktrdump does not work */
 	return (0);
 }
 
 #ifdef SCTP_AUDITING_ENABLED
 uint8_t sctp_audit_data[SCTP_AUDIT_SIZE][2];
 static int sctp_audit_indx = 0;
 
 static
 void
 sctp_print_audit_report(void)
 {
 	int i;
 	int cnt;
 
 	cnt = 0;
 	for (i = sctp_audit_indx; i < SCTP_AUDIT_SIZE; i++) {
 		if ((sctp_audit_data[i][0] == 0xe0) &&
 		    (sctp_audit_data[i][1] == 0x01)) {
 			cnt = 0;
 			SCTP_PRINTF("\n");
 		} else if (sctp_audit_data[i][0] == 0xf0) {
 			cnt = 0;
 			SCTP_PRINTF("\n");
 		} else if ((sctp_audit_data[i][0] == 0xc0) &&
 		    (sctp_audit_data[i][1] == 0x01)) {
 			SCTP_PRINTF("\n");
 			cnt = 0;
 		}
 		SCTP_PRINTF("%2.2x%2.2x ", (uint32_t)sctp_audit_data[i][0],
 		    (uint32_t)sctp_audit_data[i][1]);
 		cnt++;
 		if ((cnt % 14) == 0)
 			SCTP_PRINTF("\n");
 	}
 	for (i = 0; i < sctp_audit_indx; i++) {
 		if ((sctp_audit_data[i][0] == 0xe0) &&
 		    (sctp_audit_data[i][1] == 0x01)) {
 			cnt = 0;
 			SCTP_PRINTF("\n");
 		} else if (sctp_audit_data[i][0] == 0xf0) {
 			cnt = 0;
 			SCTP_PRINTF("\n");
 		} else if ((sctp_audit_data[i][0] == 0xc0) &&
 		    (sctp_audit_data[i][1] == 0x01)) {
 			SCTP_PRINTF("\n");
 			cnt = 0;
 		}
 		SCTP_PRINTF("%2.2x%2.2x ", (uint32_t)sctp_audit_data[i][0],
 		    (uint32_t)sctp_audit_data[i][1]);
 		cnt++;
 		if ((cnt % 14) == 0)
 			SCTP_PRINTF("\n");
 	}
 	SCTP_PRINTF("\n");
 }
 
 void
 sctp_auditing(int from, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
 	int resend_cnt, tot_out, rep, tot_book_cnt;
 	struct sctp_nets *lnet;
 	struct sctp_tmit_chunk *chk;
 
 	sctp_audit_data[sctp_audit_indx][0] = 0xAA;
 	sctp_audit_data[sctp_audit_indx][1] = 0x000000ff & from;
 	sctp_audit_indx++;
 	if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
 		sctp_audit_indx = 0;
 	}
 	if (inp == NULL) {
 		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
 		sctp_audit_data[sctp_audit_indx][1] = 0x01;
 		sctp_audit_indx++;
 		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
 			sctp_audit_indx = 0;
 		}
 		return;
 	}
 	if (stcb == NULL) {
 		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
 		sctp_audit_data[sctp_audit_indx][1] = 0x02;
 		sctp_audit_indx++;
 		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
 			sctp_audit_indx = 0;
 		}
 		return;
 	}
 	sctp_audit_data[sctp_audit_indx][0] = 0xA1;
 	sctp_audit_data[sctp_audit_indx][1] =
 	    (0x000000ff & stcb->asoc.sent_queue_retran_cnt);
 	sctp_audit_indx++;
 	if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
 		sctp_audit_indx = 0;
 	}
 	rep = 0;
 	tot_book_cnt = 0;
 	resend_cnt = tot_out = 0;
 	TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
 		if (chk->sent == SCTP_DATAGRAM_RESEND) {
 			resend_cnt++;
 		} else if (chk->sent < SCTP_DATAGRAM_RESEND) {
 			tot_out += chk->book_size;
 			tot_book_cnt++;
 		}
 	}
 	if (resend_cnt != stcb->asoc.sent_queue_retran_cnt) {
 		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
 		sctp_audit_data[sctp_audit_indx][1] = 0xA1;
 		sctp_audit_indx++;
 		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
 			sctp_audit_indx = 0;
 		}
 		SCTP_PRINTF("resend_cnt:%d asoc-tot:%d\n",
 		    resend_cnt, stcb->asoc.sent_queue_retran_cnt);
 		rep = 1;
 		stcb->asoc.sent_queue_retran_cnt = resend_cnt;
 		sctp_audit_data[sctp_audit_indx][0] = 0xA2;
 		sctp_audit_data[sctp_audit_indx][1] =
 		    (0x000000ff & stcb->asoc.sent_queue_retran_cnt);
 		sctp_audit_indx++;
 		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
 			sctp_audit_indx = 0;
 		}
 	}
 	if (tot_out != stcb->asoc.total_flight) {
 		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
 		sctp_audit_data[sctp_audit_indx][1] = 0xA2;
 		sctp_audit_indx++;
 		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
 			sctp_audit_indx = 0;
 		}
 		rep = 1;
 		SCTP_PRINTF("tot_flt:%d asoc_tot:%d\n", tot_out,
 		    (int)stcb->asoc.total_flight);
 		stcb->asoc.total_flight = tot_out;
 	}
 	if (tot_book_cnt != stcb->asoc.total_flight_count) {
 		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
 		sctp_audit_data[sctp_audit_indx][1] = 0xA5;
 		sctp_audit_indx++;
 		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
 			sctp_audit_indx = 0;
 		}
 		rep = 1;
 		SCTP_PRINTF("tot_flt_book:%d\n", tot_book_cnt);
 
 		stcb->asoc.total_flight_count = tot_book_cnt;
 	}
 	tot_out = 0;
 	TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
 		tot_out += lnet->flight_size;
 	}
 	if (tot_out != stcb->asoc.total_flight) {
 		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
 		sctp_audit_data[sctp_audit_indx][1] = 0xA3;
 		sctp_audit_indx++;
 		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
 			sctp_audit_indx = 0;
 		}
 		rep = 1;
 		SCTP_PRINTF("real flight:%d net total was %d\n",
 		    stcb->asoc.total_flight, tot_out);
 		/* now corrective action */
 		TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
 			tot_out = 0;
 			TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
 				if ((chk->whoTo == lnet) &&
 				    (chk->sent < SCTP_DATAGRAM_RESEND)) {
 					tot_out += chk->book_size;
 				}
 			}
 			if (lnet->flight_size != tot_out) {
 				SCTP_PRINTF("net:%p flight was %d corrected to %d\n",
 				    (void *)lnet, lnet->flight_size,
 				    tot_out);
 				lnet->flight_size = tot_out;
 			}
 		}
 	}
 	if (rep) {
 		sctp_print_audit_report();
 	}
 }
 
 void
 sctp_audit_log(uint8_t ev, uint8_t fd)
 {
 
 	sctp_audit_data[sctp_audit_indx][0] = ev;
 	sctp_audit_data[sctp_audit_indx][1] = fd;
 	sctp_audit_indx++;
 	if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
 		sctp_audit_indx = 0;
 	}
 }
 
 #endif
 
 /*
  * The conversion from time to ticks and vice versa is done by rounding
  * upwards. This way we can test in the code the time to be positive and
  * know that this corresponds to a positive number of ticks.
  */
 
 uint32_t
 sctp_msecs_to_ticks(uint32_t msecs)
 {
 	uint64_t temp;
 	uint32_t ticks;
 
 	if (hz == 1000) {
 		ticks = msecs;
 	} else {
 		temp = (((uint64_t)msecs * hz) + 999) / 1000;
 		if (temp > UINT32_MAX) {
 			ticks = UINT32_MAX;
 		} else {
 			ticks = (uint32_t)temp;
 		}
 	}
 	return (ticks);
 }
 
 uint32_t
 sctp_ticks_to_msecs(uint32_t ticks)
 {
 	uint64_t temp;
 	uint32_t msecs;
 
 	if (hz == 1000) {
 		msecs = ticks;
 	} else {
 		temp = (((uint64_t)ticks * 1000) + (hz - 1)) / hz;
 		if (temp > UINT32_MAX) {
 			msecs = UINT32_MAX;
 		} else {
 			msecs = (uint32_t)temp;
 		}
 	}
 	return (msecs);
 }
 
 uint32_t
 sctp_secs_to_ticks(uint32_t secs)
 {
 	uint64_t temp;
 	uint32_t ticks;
 
 	temp = (uint64_t)secs * hz;
 	if (temp > UINT32_MAX) {
 		ticks = UINT32_MAX;
 	} else {
 		ticks = (uint32_t)temp;
 	}
 	return (ticks);
 }
 
 uint32_t
 sctp_ticks_to_secs(uint32_t ticks)
 {
 	uint64_t temp;
 	uint32_t secs;
 
 	temp = ((uint64_t)ticks + (hz - 1)) / hz;
 	if (temp > UINT32_MAX) {
 		secs = UINT32_MAX;
 	} else {
 		secs = (uint32_t)temp;
 	}
 	return (secs);
 }
 
 /*
  * sctp_stop_timers_for_shutdown() should be called
  * when entering the SHUTDOWN_SENT or SHUTDOWN_ACK_SENT
  * state to make sure that all timers are stopped.
  */
 void
 sctp_stop_timers_for_shutdown(struct sctp_tcb *stcb)
 {
 	struct sctp_inpcb *inp;
 	struct sctp_nets *net;
 
 	inp = stcb->sctp_ep;
 
 	sctp_timer_stop(SCTP_TIMER_TYPE_RECV, inp, stcb, NULL,
 	    SCTP_FROM_SCTPUTIL + SCTP_LOC_12);
 	sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, inp, stcb, NULL,
 	    SCTP_FROM_SCTPUTIL + SCTP_LOC_13);
 	sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, inp, stcb, NULL,
 	    SCTP_FROM_SCTPUTIL + SCTP_LOC_14);
 	sctp_timer_stop(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb, NULL,
 	    SCTP_FROM_SCTPUTIL + SCTP_LOC_15);
 	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 		sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_16);
 		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_17);
 	}
 }
 
 void
 sctp_stop_association_timers(struct sctp_tcb *stcb, bool stop_assoc_kill_timer)
 {
 	struct sctp_inpcb *inp;
 	struct sctp_nets *net;
 
 	inp = stcb->sctp_ep;
 	sctp_timer_stop(SCTP_TIMER_TYPE_RECV, inp, stcb, NULL,
 	    SCTP_FROM_SCTPUTIL + SCTP_LOC_18);
 	sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, inp, stcb, NULL,
 	    SCTP_FROM_SCTPUTIL + SCTP_LOC_19);
 	if (stop_assoc_kill_timer) {
 		sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_20);
 	}
 	sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, inp, stcb, NULL,
 	    SCTP_FROM_SCTPUTIL + SCTP_LOC_21);
 	sctp_timer_stop(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb, NULL,
 	    SCTP_FROM_SCTPUTIL + SCTP_LOC_22);
 	sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWNGUARD, inp, stcb, NULL,
 	    SCTP_FROM_SCTPUTIL + SCTP_LOC_23);
 	/* Mobility adaptation */
 	sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, inp, stcb, NULL,
 	    SCTP_FROM_SCTPUTIL + SCTP_LOC_24);
 	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 		sctp_timer_stop(SCTP_TIMER_TYPE_SEND, inp, stcb, net,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_25);
 		sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_26);
 		sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, inp, stcb, net,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_27);
 		sctp_timer_stop(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_28);
 		sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWNACK, inp, stcb, net,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_29);
 		sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_30);
 		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_31);
 	}
 }
 
 /*
  * A list of sizes based on typical mtu's, used only if next hop size not
  * returned. These values MUST be multiples of 4 and MUST be ordered.
  */
 static uint32_t sctp_mtu_sizes[] = {
 	68,
 	296,
 	508,
 	512,
 	544,
 	576,
 	1004,
 	1492,
 	1500,
 	1536,
 	2000,
 	2048,
 	4352,
 	4464,
 	8168,
 	17912,
 	32000,
 	65532
 };
 
 /*
  * Return the largest MTU in sctp_mtu_sizes smaller than val.
  * If val is smaller than the minimum, just return the largest
  * multiple of 4 smaller or equal to val.
  * Ensure that the result is a multiple of 4.
  */
 uint32_t
 sctp_get_prev_mtu(uint32_t val)
 {
 	uint32_t i;
 
 	val &= 0xfffffffc;
 	if (val <= sctp_mtu_sizes[0]) {
 		return (val);
 	}
 	for (i = 1; i < (sizeof(sctp_mtu_sizes) / sizeof(uint32_t)); i++) {
 		if (val <= sctp_mtu_sizes[i]) {
 			break;
 		}
 	}
 	KASSERT((sctp_mtu_sizes[i - 1] & 0x00000003) == 0,
 	    ("sctp_mtu_sizes[%u] not a multiple of 4", i - 1));
 	return (sctp_mtu_sizes[i - 1]);
 }
 
 /*
  * Return the smallest MTU in sctp_mtu_sizes larger than val.
  * If val is larger than the maximum, just return the largest multiple of 4 smaller
  * or equal to val.
  * Ensure that the result is a multiple of 4.
  */
 uint32_t
 sctp_get_next_mtu(uint32_t val)
 {
 	/* select another MTU that is just bigger than this one */
 	uint32_t i;
 
 	val &= 0xfffffffc;
 	for (i = 0; i < (sizeof(sctp_mtu_sizes) / sizeof(uint32_t)); i++) {
 		if (val < sctp_mtu_sizes[i]) {
 			KASSERT((sctp_mtu_sizes[i] & 0x00000003) == 0,
 			    ("sctp_mtu_sizes[%u] not a multiple of 4", i));
 			return (sctp_mtu_sizes[i]);
 		}
 	}
 	return (val);
 }
 
 void
 sctp_fill_random_store(struct sctp_pcb *m)
 {
 	/*
 	 * Here we use the MD5/SHA-1 to hash with our good randomNumbers and
 	 * our counter. The result becomes our good random numbers and we
 	 * then setup to give these out. Note that we do no locking to
 	 * protect this. This is ok, since if competing folks call this we
 	 * will get more gobbled gook in the random store which is what we
 	 * want. There is a danger that two guys will use the same random
 	 * numbers, but thats ok too since that is random as well :->
 	 */
 	m->store_at = 0;
 	(void)sctp_hmac(SCTP_HMAC, (uint8_t *)m->random_numbers,
 	    sizeof(m->random_numbers), (uint8_t *)&m->random_counter,
 	    sizeof(m->random_counter), (uint8_t *)m->random_store);
 	m->random_counter++;
 }
 
 uint32_t
 sctp_select_initial_TSN(struct sctp_pcb *inp)
 {
 	/*
 	 * A true implementation should use random selection process to get
 	 * the initial stream sequence number, using RFC1750 as a good
 	 * guideline
 	 */
 	uint32_t x, *xp;
 	uint8_t *p;
 	int store_at, new_store;
 
 	if (inp->initial_sequence_debug != 0) {
 		uint32_t ret;
 
 		ret = inp->initial_sequence_debug;
 		inp->initial_sequence_debug++;
 		return (ret);
 	}
 retry:
 	store_at = inp->store_at;
 	new_store = store_at + sizeof(uint32_t);
 	if (new_store >= (SCTP_SIGNATURE_SIZE - 3)) {
 		new_store = 0;
 	}
 	if (!atomic_cmpset_int(&inp->store_at, store_at, new_store)) {
 		goto retry;
 	}
 	if (new_store == 0) {
 		/* Refill the random store */
 		sctp_fill_random_store(inp);
 	}
 	p = &inp->random_store[store_at];
 	xp = (uint32_t *)p;
 	x = *xp;
 	return (x);
 }
 
 uint32_t
 sctp_select_a_tag(struct sctp_inpcb *inp, uint16_t lport, uint16_t rport, int check)
 {
 	uint32_t x;
 	struct timeval now;
 
 	if (check) {
 		(void)SCTP_GETTIME_TIMEVAL(&now);
 	}
 	for (;;) {
 		x = sctp_select_initial_TSN(&inp->sctp_ep);
 		if (x == 0) {
 			/* we never use 0 */
 			continue;
 		}
 		if (!check || sctp_is_vtag_good(x, lport, rport, &now)) {
 			break;
 		}
 	}
 	return (x);
 }
 
 int32_t
 sctp_map_assoc_state(int kernel_state)
 {
 	int32_t user_state;
 
 	if (kernel_state & SCTP_STATE_WAS_ABORTED) {
 		user_state = SCTP_CLOSED;
 	} else if (kernel_state & SCTP_STATE_SHUTDOWN_PENDING) {
 		user_state = SCTP_SHUTDOWN_PENDING;
 	} else {
 		switch (kernel_state & SCTP_STATE_MASK) {
 		case SCTP_STATE_EMPTY:
 			user_state = SCTP_CLOSED;
 			break;
 		case SCTP_STATE_INUSE:
 			user_state = SCTP_CLOSED;
 			break;
 		case SCTP_STATE_COOKIE_WAIT:
 			user_state = SCTP_COOKIE_WAIT;
 			break;
 		case SCTP_STATE_COOKIE_ECHOED:
 			user_state = SCTP_COOKIE_ECHOED;
 			break;
 		case SCTP_STATE_OPEN:
 			user_state = SCTP_ESTABLISHED;
 			break;
 		case SCTP_STATE_SHUTDOWN_SENT:
 			user_state = SCTP_SHUTDOWN_SENT;
 			break;
 		case SCTP_STATE_SHUTDOWN_RECEIVED:
 			user_state = SCTP_SHUTDOWN_RECEIVED;
 			break;
 		case SCTP_STATE_SHUTDOWN_ACK_SENT:
 			user_state = SCTP_SHUTDOWN_ACK_SENT;
 			break;
 		default:
 			user_state = SCTP_CLOSED;
 			break;
 		}
 	}
 	return (user_state);
 }
 
 int
 sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
     uint32_t override_tag, uint32_t initial_tsn, uint32_t vrf_id,
     uint16_t o_strms)
 {
 	struct sctp_association *asoc;
 
 	/*
 	 * Anything set to zero is taken care of by the allocation routine's
 	 * bzero
 	 */
 
 	/*
 	 * Up front select what scoping to apply on addresses I tell my peer
 	 * Not sure what to do with these right now, we will need to come up
 	 * with a way to set them. We may need to pass them through from the
 	 * caller in the sctp_aloc_assoc() function.
 	 */
 	int i;
 #if defined(SCTP_DETAILED_STR_STATS)
 	int j;
 #endif
 
 	asoc = &stcb->asoc;
 	/* init all variables to a known value. */
 	SCTP_SET_STATE(stcb, SCTP_STATE_INUSE);
 	asoc->max_burst = inp->sctp_ep.max_burst;
 	asoc->fr_max_burst = inp->sctp_ep.fr_max_burst;
 	asoc->heart_beat_delay = sctp_ticks_to_msecs(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
 	asoc->cookie_life = inp->sctp_ep.def_cookie_life;
 	asoc->sctp_cmt_on_off = inp->sctp_cmt_on_off;
 	asoc->ecn_supported = inp->ecn_supported;
 	asoc->prsctp_supported = inp->prsctp_supported;
 	asoc->auth_supported = inp->auth_supported;
 	asoc->asconf_supported = inp->asconf_supported;
 	asoc->reconfig_supported = inp->reconfig_supported;
 	asoc->nrsack_supported = inp->nrsack_supported;
 	asoc->pktdrop_supported = inp->pktdrop_supported;
 	asoc->idata_supported = inp->idata_supported;
 	asoc->sctp_cmt_pf = (uint8_t)0;
 	asoc->sctp_frag_point = inp->sctp_frag_point;
 	asoc->sctp_features = inp->sctp_features;
 	asoc->default_dscp = inp->sctp_ep.default_dscp;
 	asoc->max_cwnd = inp->max_cwnd;
 #ifdef INET6
 	if (inp->sctp_ep.default_flowlabel) {
 		asoc->default_flowlabel = inp->sctp_ep.default_flowlabel;
 	} else {
 		if (inp->ip_inp.inp.inp_flags & IN6P_AUTOFLOWLABEL) {
 			asoc->default_flowlabel = sctp_select_initial_TSN(&inp->sctp_ep);
 			asoc->default_flowlabel &= 0x000fffff;
 			asoc->default_flowlabel |= 0x80000000;
 		} else {
 			asoc->default_flowlabel = 0;
 		}
 	}
 #endif
 	asoc->sb_send_resv = 0;
 	if (override_tag) {
 		asoc->my_vtag = override_tag;
 	} else {
 		asoc->my_vtag = sctp_select_a_tag(inp, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
 	}
 	/* Get the nonce tags */
 	asoc->my_vtag_nonce = sctp_select_a_tag(inp, stcb->sctp_ep->sctp_lport, stcb->rport, 0);
 	asoc->peer_vtag_nonce = sctp_select_a_tag(inp, stcb->sctp_ep->sctp_lport, stcb->rport, 0);
 	asoc->vrf_id = vrf_id;
 
 #ifdef SCTP_ASOCLOG_OF_TSNS
 	asoc->tsn_in_at = 0;
 	asoc->tsn_out_at = 0;
 	asoc->tsn_in_wrapped = 0;
 	asoc->tsn_out_wrapped = 0;
 	asoc->cumack_log_at = 0;
 	asoc->cumack_log_atsnt = 0;
 #endif
 #ifdef SCTP_FS_SPEC_LOG
 	asoc->fs_index = 0;
 #endif
 	asoc->refcnt = 0;
 	asoc->assoc_up_sent = 0;
 	if (override_tag) {
 		asoc->init_seq_number = initial_tsn;
 	} else {
 		asoc->init_seq_number = sctp_select_initial_TSN(&inp->sctp_ep);
 	}
 	asoc->asconf_seq_out = asoc->init_seq_number;
 	asoc->str_reset_seq_out = asoc->init_seq_number;
 	asoc->sending_seq = asoc->init_seq_number;
 	asoc->asconf_seq_out_acked = asoc->init_seq_number - 1;
 	/* we are optimistic here */
 	asoc->peer_supports_nat = 0;
 	asoc->sent_queue_retran_cnt = 0;
 
 	/* for CMT */
 	asoc->last_net_cmt_send_started = NULL;
 
 	asoc->last_acked_seq = asoc->init_seq_number - 1;
 	asoc->advanced_peer_ack_point = asoc->init_seq_number - 1;
 	asoc->asconf_seq_in = asoc->init_seq_number - 1;
 
 	/* here we are different, we hold the next one we expect */
 	asoc->str_reset_seq_in = asoc->init_seq_number;
 
 	asoc->initial_init_rto_max = inp->sctp_ep.initial_init_rto_max;
 	asoc->initial_rto = inp->sctp_ep.initial_rto;
 
 	asoc->default_mtu = inp->sctp_ep.default_mtu;
 	asoc->max_init_times = inp->sctp_ep.max_init_times;
 	asoc->max_send_times = inp->sctp_ep.max_send_times;
 	asoc->def_net_failure = inp->sctp_ep.def_net_failure;
 	asoc->def_net_pf_threshold = inp->sctp_ep.def_net_pf_threshold;
 	asoc->free_chunk_cnt = 0;
 
 	asoc->iam_blocking = 0;
 	asoc->context = inp->sctp_context;
 	asoc->local_strreset_support = inp->local_strreset_support;
 	asoc->def_send = inp->def_send;
 	asoc->delayed_ack = sctp_ticks_to_msecs(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
 	asoc->sack_freq = inp->sctp_ep.sctp_sack_freq;
 	asoc->pr_sctp_cnt = 0;
 	asoc->total_output_queue_size = 0;
 
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 		asoc->scope.ipv6_addr_legal = 1;
 		if (SCTP_IPV6_V6ONLY(inp) == 0) {
 			asoc->scope.ipv4_addr_legal = 1;
 		} else {
 			asoc->scope.ipv4_addr_legal = 0;
 		}
 	} else {
 		asoc->scope.ipv6_addr_legal = 0;
 		asoc->scope.ipv4_addr_legal = 1;
 	}
 
 	asoc->my_rwnd = max(SCTP_SB_LIMIT_RCV(inp->sctp_socket), SCTP_MINIMAL_RWND);
 	asoc->peers_rwnd = SCTP_SB_LIMIT_RCV(inp->sctp_socket);
 
 	asoc->smallest_mtu = 0;
 	asoc->minrto = inp->sctp_ep.sctp_minrto;
 	asoc->maxrto = inp->sctp_ep.sctp_maxrto;
 
 	asoc->stream_locked_on = 0;
 	asoc->ecn_echo_cnt_onq = 0;
 	asoc->stream_locked = 0;
 
 	asoc->send_sack = 1;
 
 	LIST_INIT(&asoc->sctp_restricted_addrs);
 
 	TAILQ_INIT(&asoc->nets);
 	TAILQ_INIT(&asoc->pending_reply_queue);
 	TAILQ_INIT(&asoc->asconf_ack_sent);
 	/* Setup to fill the hb random cache at first HB */
 	asoc->hb_random_idx = 4;
 
 	asoc->sctp_autoclose_ticks = inp->sctp_ep.auto_close_time;
 
 	stcb->asoc.congestion_control_module = inp->sctp_ep.sctp_default_cc_module;
 	stcb->asoc.cc_functions = sctp_cc_functions[inp->sctp_ep.sctp_default_cc_module];
 
 	stcb->asoc.stream_scheduling_module = inp->sctp_ep.sctp_default_ss_module;
 	stcb->asoc.ss_functions = sctp_ss_functions[inp->sctp_ep.sctp_default_ss_module];
 
 	/*
 	 * Now the stream parameters, here we allocate space for all streams
 	 * that we request by default.
 	 */
 	asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams =
 	    o_strms;
 	SCTP_MALLOC(asoc->strmout, struct sctp_stream_out *,
 	    asoc->streamoutcnt * sizeof(struct sctp_stream_out),
 	    SCTP_M_STRMO);
 	if (asoc->strmout == NULL) {
 		/* big trouble no memory */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
 		return (ENOMEM);
 	}
 	SCTP_TCB_LOCK(stcb);
 	for (i = 0; i < asoc->streamoutcnt; i++) {
 		/*
 		 * inbound side must be set to 0xffff, also NOTE when we get
 		 * the INIT-ACK back (for INIT sender) we MUST reduce the
 		 * count (streamoutcnt) but first check if we sent to any of
 		 * the upper streams that were dropped (if some were). Those
 		 * that were dropped must be notified to the upper layer as
 		 * failed to send.
 		 */
 		TAILQ_INIT(&asoc->strmout[i].outqueue);
 		asoc->ss_functions.sctp_ss_init_stream(stcb, &asoc->strmout[i], NULL);
 		asoc->strmout[i].chunks_on_queues = 0;
 #if defined(SCTP_DETAILED_STR_STATS)
 		for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
 			asoc->strmout[i].abandoned_sent[j] = 0;
 			asoc->strmout[i].abandoned_unsent[j] = 0;
 		}
 #else
 		asoc->strmout[i].abandoned_sent[0] = 0;
 		asoc->strmout[i].abandoned_unsent[0] = 0;
 #endif
 		asoc->strmout[i].next_mid_ordered = 0;
 		asoc->strmout[i].next_mid_unordered = 0;
 		asoc->strmout[i].sid = i;
 		asoc->strmout[i].last_msg_incomplete = 0;
 		asoc->strmout[i].state = SCTP_STREAM_OPENING;
 	}
 	asoc->ss_functions.sctp_ss_init(stcb, asoc);
 	SCTP_TCB_UNLOCK(stcb);
 
 	/* Now the mapping array */
 	asoc->mapping_array_size = SCTP_INITIAL_MAPPING_ARRAY;
 	SCTP_MALLOC(asoc->mapping_array, uint8_t *, asoc->mapping_array_size,
 	    SCTP_M_MAP);
 	if (asoc->mapping_array == NULL) {
 		SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
 		return (ENOMEM);
 	}
 	memset(asoc->mapping_array, 0, asoc->mapping_array_size);
 	SCTP_MALLOC(asoc->nr_mapping_array, uint8_t *, asoc->mapping_array_size,
 	    SCTP_M_MAP);
 	if (asoc->nr_mapping_array == NULL) {
 		SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
 		SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
 		return (ENOMEM);
 	}
 	memset(asoc->nr_mapping_array, 0, asoc->mapping_array_size);
 
 	/* Now the init of the other outqueues */
 	TAILQ_INIT(&asoc->free_chunks);
 	TAILQ_INIT(&asoc->control_send_queue);
 	TAILQ_INIT(&asoc->asconf_send_queue);
 	TAILQ_INIT(&asoc->send_queue);
 	TAILQ_INIT(&asoc->sent_queue);
 	TAILQ_INIT(&asoc->resetHead);
 	asoc->max_inbound_streams = inp->sctp_ep.max_open_streams_intome;
 	TAILQ_INIT(&asoc->asconf_queue);
 	/* authentication fields */
 	asoc->authinfo.random = NULL;
 	asoc->authinfo.active_keyid = 0;
 	asoc->authinfo.assoc_key = NULL;
 	asoc->authinfo.assoc_keyid = 0;
 	asoc->authinfo.recv_key = NULL;
 	asoc->authinfo.recv_keyid = 0;
 	LIST_INIT(&asoc->shared_keys);
 	asoc->marked_retrans = 0;
 	asoc->port = inp->sctp_ep.port;
 	asoc->timoinit = 0;
 	asoc->timodata = 0;
 	asoc->timosack = 0;
 	asoc->timoshutdown = 0;
 	asoc->timoheartbeat = 0;
 	asoc->timocookie = 0;
 	asoc->timoshutdownack = 0;
 	(void)SCTP_GETTIME_TIMEVAL(&asoc->start_time);
 	asoc->discontinuity_time = asoc->start_time;
 	for (i = 0; i < SCTP_PR_SCTP_MAX + 1; i++) {
 		asoc->abandoned_unsent[i] = 0;
 		asoc->abandoned_sent[i] = 0;
 	}
 	/*
 	 * sa_ignore MEMLEAK {memory is put in the assoc mapping array and
 	 * freed later when the association is freed.
 	 */
 	return (0);
 }
 
 void
 sctp_print_mapping_array(struct sctp_association *asoc)
 {
 	unsigned int i, limit;
 
 	SCTP_PRINTF("Mapping array size: %d, baseTSN: %8.8x, cumAck: %8.8x, highestTSN: (%8.8x, %8.8x).\n",
 	    asoc->mapping_array_size,
 	    asoc->mapping_array_base_tsn,
 	    asoc->cumulative_tsn,
 	    asoc->highest_tsn_inside_map,
 	    asoc->highest_tsn_inside_nr_map);
 	for (limit = asoc->mapping_array_size; limit > 1; limit--) {
 		if (asoc->mapping_array[limit - 1] != 0) {
 			break;
 		}
 	}
 	SCTP_PRINTF("Renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit);
 	for (i = 0; i < limit; i++) {
 		SCTP_PRINTF("%2.2x%c", asoc->mapping_array[i], ((i + 1) % 16) ? ' ' : '\n');
 	}
 	if (limit % 16)
 		SCTP_PRINTF("\n");
 	for (limit = asoc->mapping_array_size; limit > 1; limit--) {
 		if (asoc->nr_mapping_array[limit - 1]) {
 			break;
 		}
 	}
 	SCTP_PRINTF("Non renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit);
 	for (i = 0; i < limit; i++) {
 		SCTP_PRINTF("%2.2x%c", asoc->nr_mapping_array[i], ((i + 1) % 16) ? ' ' : '\n');
 	}
 	if (limit % 16)
 		SCTP_PRINTF("\n");
 }
 
 int
 sctp_expand_mapping_array(struct sctp_association *asoc, uint32_t needed)
 {
 	/* mapping array needs to grow */
 	uint8_t *new_array1, *new_array2;
 	uint32_t new_size;
 
 	new_size = asoc->mapping_array_size + ((needed + 7) / 8 + SCTP_MAPPING_ARRAY_INCR);
 	SCTP_MALLOC(new_array1, uint8_t *, new_size, SCTP_M_MAP);
 	SCTP_MALLOC(new_array2, uint8_t *, new_size, SCTP_M_MAP);
 	if ((new_array1 == NULL) || (new_array2 == NULL)) {
 		/* can't get more, forget it */
 		SCTP_PRINTF("No memory for expansion of SCTP mapping array %d\n", new_size);
 		if (new_array1) {
 			SCTP_FREE(new_array1, SCTP_M_MAP);
 		}
 		if (new_array2) {
 			SCTP_FREE(new_array2, SCTP_M_MAP);
 		}
 		return (-1);
 	}
 	memset(new_array1, 0, new_size);
 	memset(new_array2, 0, new_size);
 	memcpy(new_array1, asoc->mapping_array, asoc->mapping_array_size);
 	memcpy(new_array2, asoc->nr_mapping_array, asoc->mapping_array_size);
 	SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
 	SCTP_FREE(asoc->nr_mapping_array, SCTP_M_MAP);
 	asoc->mapping_array = new_array1;
 	asoc->nr_mapping_array = new_array2;
 	asoc->mapping_array_size = new_size;
 	return (0);
 }
 
 static void
 sctp_iterator_work(struct sctp_iterator *it)
 {
 	struct epoch_tracker et;
 	struct sctp_inpcb *tinp;
 	int iteration_count = 0;
 	int inp_skip = 0;
 	int first_in = 1;
 
 	NET_EPOCH_ENTER(et);
 	SCTP_INP_INFO_RLOCK();
 	SCTP_ITERATOR_LOCK();
 	sctp_it_ctl.cur_it = it;
 	if (it->inp) {
 		SCTP_INP_RLOCK(it->inp);
 		SCTP_INP_DECR_REF(it->inp);
 	}
 	if (it->inp == NULL) {
 		/* iterator is complete */
 done_with_iterator:
 		sctp_it_ctl.cur_it = NULL;
 		SCTP_ITERATOR_UNLOCK();
 		SCTP_INP_INFO_RUNLOCK();
 		if (it->function_atend != NULL) {
 			(*it->function_atend) (it->pointer, it->val);
 		}
 		SCTP_FREE(it, SCTP_M_ITER);
 		NET_EPOCH_EXIT(et);
 		return;
 	}
 select_a_new_ep:
 	if (first_in) {
 		first_in = 0;
 	} else {
 		SCTP_INP_RLOCK(it->inp);
 	}
 	while (((it->pcb_flags) &&
 	    ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) ||
 	    ((it->pcb_features) &&
 	    ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) {
 		/* endpoint flags or features don't match, so keep looking */
 		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
 			SCTP_INP_RUNLOCK(it->inp);
 			goto done_with_iterator;
 		}
 		tinp = it->inp;
 		it->inp = LIST_NEXT(it->inp, sctp_list);
 		it->stcb = NULL;
 		SCTP_INP_RUNLOCK(tinp);
 		if (it->inp == NULL) {
 			goto done_with_iterator;
 		}
 		SCTP_INP_RLOCK(it->inp);
 	}
 	/* now go through each assoc which is in the desired state */
 	if (it->done_current_ep == 0) {
 		if (it->function_inp != NULL)
 			inp_skip = (*it->function_inp) (it->inp, it->pointer, it->val);
 		it->done_current_ep = 1;
 	}
 	if (it->stcb == NULL) {
 		/* run the per instance function */
 		it->stcb = LIST_FIRST(&it->inp->sctp_asoc_list);
 	}
 	if ((inp_skip) || it->stcb == NULL) {
 		if (it->function_inp_end != NULL) {
 			inp_skip = (*it->function_inp_end) (it->inp,
 			    it->pointer,
 			    it->val);
 		}
 		SCTP_INP_RUNLOCK(it->inp);
 		goto no_stcb;
 	}
 	while (it->stcb != NULL) {
 		SCTP_TCB_LOCK(it->stcb);
 		if (it->asoc_state && ((it->stcb->asoc.state & it->asoc_state) != it->asoc_state)) {
 			/* not in the right state... keep looking */
 			SCTP_TCB_UNLOCK(it->stcb);
 			goto next_assoc;
 		}
 		/* see if we have limited out the iterator loop */
 		iteration_count++;
 		if (iteration_count > SCTP_ITERATOR_MAX_AT_ONCE) {
 			/* Pause to let others grab the lock */
 			atomic_add_int(&it->stcb->asoc.refcnt, 1);
 			SCTP_TCB_UNLOCK(it->stcb);
 			SCTP_INP_INCR_REF(it->inp);
 			SCTP_INP_RUNLOCK(it->inp);
 			SCTP_ITERATOR_UNLOCK();
 			SCTP_INP_INFO_RUNLOCK();
 			SCTP_INP_INFO_RLOCK();
 			SCTP_ITERATOR_LOCK();
 			if (sctp_it_ctl.iterator_flags) {
 				/* We won't be staying here */
 				SCTP_INP_DECR_REF(it->inp);
 				atomic_subtract_int(&it->stcb->asoc.refcnt, 1);
 				if (sctp_it_ctl.iterator_flags &
 				    SCTP_ITERATOR_STOP_CUR_IT) {
 					sctp_it_ctl.iterator_flags &= ~SCTP_ITERATOR_STOP_CUR_IT;
 					goto done_with_iterator;
 				}
 				if (sctp_it_ctl.iterator_flags &
 				    SCTP_ITERATOR_STOP_CUR_INP) {
 					sctp_it_ctl.iterator_flags &= ~SCTP_ITERATOR_STOP_CUR_INP;
 					goto no_stcb;
 				}
 				/* If we reach here huh? */
 				SCTP_PRINTF("Unknown it ctl flag %x\n",
 				    sctp_it_ctl.iterator_flags);
 				sctp_it_ctl.iterator_flags = 0;
 			}
 			SCTP_INP_RLOCK(it->inp);
 			SCTP_INP_DECR_REF(it->inp);
 			SCTP_TCB_LOCK(it->stcb);
 			atomic_subtract_int(&it->stcb->asoc.refcnt, 1);
 			iteration_count = 0;
 		}
 		KASSERT(it->inp == it->stcb->sctp_ep,
 		    ("%s: stcb %p does not belong to inp %p, but inp %p",
 		    __func__, it->stcb, it->inp, it->stcb->sctp_ep));
 		SCTP_INP_RLOCK_ASSERT(it->inp);
 		SCTP_TCB_LOCK_ASSERT(it->stcb);
 
 		/* run function on this one */
 		(*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val);
 		SCTP_INP_RLOCK_ASSERT(it->inp);
 		SCTP_TCB_LOCK_ASSERT(it->stcb);
 
 		/*
 		 * we lie here, it really needs to have its own type but
 		 * first I must verify that this won't effect things :-0
 		 */
 		if (it->no_chunk_output == 0) {
 			sctp_chunk_output(it->inp, it->stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
 			SCTP_INP_RLOCK_ASSERT(it->inp);
 			SCTP_TCB_LOCK_ASSERT(it->stcb);
 		}
 
 		SCTP_TCB_UNLOCK(it->stcb);
 next_assoc:
 		it->stcb = LIST_NEXT(it->stcb, sctp_tcblist);
 		if (it->stcb == NULL) {
 			/* Run last function */
 			if (it->function_inp_end != NULL) {
 				inp_skip = (*it->function_inp_end) (it->inp,
 				    it->pointer,
 				    it->val);
 			}
 		}
 	}
 	SCTP_INP_RUNLOCK(it->inp);
 no_stcb:
 	/* done with all assocs on this endpoint, move on to next endpoint */
 	it->done_current_ep = 0;
 	if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
 		it->inp = NULL;
 	} else {
 		it->inp = LIST_NEXT(it->inp, sctp_list);
 	}
 	it->stcb = NULL;
 	if (it->inp == NULL) {
 		goto done_with_iterator;
 	}
 	goto select_a_new_ep;
 }
 
 void
 sctp_iterator_worker(void)
 {
 	struct sctp_iterator *it;
 
 	/* This function is called with the WQ lock in place */
 	sctp_it_ctl.iterator_running = 1;
 	while ((it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead)) != NULL) {
 		/* now lets work on this one */
 		TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
 		SCTP_IPI_ITERATOR_WQ_UNLOCK();
 		CURVNET_SET(it->vn);
 		sctp_iterator_work(it);
 		CURVNET_RESTORE();
 		SCTP_IPI_ITERATOR_WQ_LOCK();
 		/* sa_ignore FREED_MEMORY */
 	}
 	sctp_it_ctl.iterator_running = 0;
 	return;
 }
 
 static void
 sctp_handle_addr_wq(void)
 {
 	/* deal with the ADDR wq from the rtsock calls */
 	struct sctp_laddr *wi, *nwi;
 	struct sctp_asconf_iterator *asc;
 
 	SCTP_MALLOC(asc, struct sctp_asconf_iterator *,
 	    sizeof(struct sctp_asconf_iterator), SCTP_M_ASC_IT);
 	if (asc == NULL) {
 		/* Try later, no memory */
 		sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
 		    (struct sctp_inpcb *)NULL,
 		    (struct sctp_tcb *)NULL,
 		    (struct sctp_nets *)NULL);
 		return;
 	}
 	LIST_INIT(&asc->list_of_work);
 	asc->cnt = 0;
 
 	LIST_FOREACH_SAFE(wi, &SCTP_BASE_INFO(addr_wq), sctp_nxt_addr, nwi) {
 		LIST_REMOVE(wi, sctp_nxt_addr);
 		LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr);
 		asc->cnt++;
 	}
 
 	if (asc->cnt == 0) {
 		SCTP_FREE(asc, SCTP_M_ASC_IT);
 	} else {
 		int ret;
 
 		ret = sctp_initiate_iterator(sctp_asconf_iterator_ep,
 		    sctp_asconf_iterator_stcb,
 		    NULL,	/* No ep end for boundall */
 		    SCTP_PCB_FLAGS_BOUNDALL,
 		    SCTP_PCB_ANY_FEATURES,
 		    SCTP_ASOC_ANY_STATE,
 		    (void *)asc, 0,
 		    sctp_asconf_iterator_end, NULL, 0);
 		if (ret) {
 			SCTP_PRINTF("Failed to initiate iterator for handle_addr_wq\n");
 			/*
 			 * Freeing if we are stopping or put back on the
 			 * addr_wq.
 			 */
 			if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
 				sctp_asconf_iterator_end(asc, 0);
 			} else {
 				LIST_FOREACH(wi, &asc->list_of_work, sctp_nxt_addr) {
 					LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr);
 				}
 				SCTP_FREE(asc, SCTP_M_ASC_IT);
 			}
 		}
 	}
 }
 
 /*-
  * The following table shows which pointers for the inp, stcb, or net are
  * stored for each timer after it was started.
  *
  *|Name                         |Timer                        |inp |stcb|net |
  *|-----------------------------|-----------------------------|----|----|----|
  *|SCTP_TIMER_TYPE_SEND         |net->rxt_timer               |Yes |Yes |Yes |
  *|SCTP_TIMER_TYPE_INIT         |net->rxt_timer               |Yes |Yes |Yes |
  *|SCTP_TIMER_TYPE_RECV         |stcb->asoc.dack_timer        |Yes |Yes |No  |
  *|SCTP_TIMER_TYPE_SHUTDOWN     |net->rxt_timer               |Yes |Yes |Yes |
  *|SCTP_TIMER_TYPE_HEARTBEAT    |net->hb_timer                |Yes |Yes |Yes |
  *|SCTP_TIMER_TYPE_COOKIE       |net->rxt_timer               |Yes |Yes |Yes |
  *|SCTP_TIMER_TYPE_NEWCOOKIE    |inp->sctp_ep.signature_change|Yes |No  |No  |
  *|SCTP_TIMER_TYPE_PATHMTURAISE |net->pmtu_timer              |Yes |Yes |Yes |
  *|SCTP_TIMER_TYPE_SHUTDOWNACK  |net->rxt_timer               |Yes |Yes |Yes |
  *|SCTP_TIMER_TYPE_ASCONF       |stcb->asoc.asconf_timer      |Yes |Yes |Yes |
  *|SCTP_TIMER_TYPE_SHUTDOWNGUARD|stcb->asoc.shut_guard_timer  |Yes |Yes |No  |
  *|SCTP_TIMER_TYPE_AUTOCLOSE    |stcb->asoc.autoclose_timer   |Yes |Yes |No  |
  *|SCTP_TIMER_TYPE_STRRESET     |stcb->asoc.strreset_timer    |Yes |Yes |No  |
  *|SCTP_TIMER_TYPE_INPKILL      |inp->sctp_ep.signature_change|Yes |No  |No  |
  *|SCTP_TIMER_TYPE_ASOCKILL     |stcb->asoc.strreset_timer    |Yes |Yes |No  |
  *|SCTP_TIMER_TYPE_ADDR_WQ      |SCTP_BASE_INFO(addr_wq_timer)|No  |No  |No  |
  *|SCTP_TIMER_TYPE_PRIM_DELETED |stcb->asoc.delete_prim_timer |Yes |Yes |No  |
  */
 
 void
 sctp_timeout_handler(void *t)
 {
 	struct epoch_tracker et;
 	struct timeval tv;
 	struct sctp_inpcb *inp;
 	struct sctp_tcb *stcb;
 	struct sctp_nets *net;
 	struct sctp_timer *tmr;
 	struct mbuf *op_err;
 	int type;
 	int i, secret;
 	bool did_output, released_asoc_reference;
 
 	/*
 	 * If inp, stcb or net are not NULL, then references to these were
 	 * added when the timer was started, and must be released before
 	 * this function returns.
 	 */
 	tmr = (struct sctp_timer *)t;
 	inp = (struct sctp_inpcb *)tmr->ep;
 	stcb = (struct sctp_tcb *)tmr->tcb;
 	net = (struct sctp_nets *)tmr->net;
 	CURVNET_SET((struct vnet *)tmr->vnet);
 	NET_EPOCH_ENTER(et);
 	released_asoc_reference = false;
 
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_audit_log(0xF0, (uint8_t)tmr->type);
 	sctp_auditing(3, inp, stcb, net);
 #endif
 
 	/* sanity checks... */
 	KASSERT(tmr->self == NULL || tmr->self == tmr,
 	    ("sctp_timeout_handler: tmr->self corrupted"));
 	KASSERT(SCTP_IS_TIMER_TYPE_VALID(tmr->type),
 	    ("sctp_timeout_handler: invalid timer type %d", tmr->type));
 	type = tmr->type;
 	KASSERT(stcb == NULL || stcb->sctp_ep == inp,
 	    ("sctp_timeout_handler of type %d: inp = %p, stcb->sctp_ep %p",
 	    type, stcb, stcb->sctp_ep));
 	tmr->stopped_from = 0xa001;
 	if ((stcb != NULL) && (stcb->asoc.state == SCTP_STATE_EMPTY)) {
 		SCTPDBG(SCTP_DEBUG_TIMER2,
 		    "Timer type %d handler exiting due to CLOSED association.\n",
 		    type);
 		goto out_decr;
 	}
 	tmr->stopped_from = 0xa002;
 	SCTPDBG(SCTP_DEBUG_TIMER2, "Timer type %d goes off.\n", type);
 	if (!SCTP_OS_TIMER_ACTIVE(&tmr->timer)) {
 		SCTPDBG(SCTP_DEBUG_TIMER2,
 		    "Timer type %d handler exiting due to not being active.\n",
 		    type);
 		goto out_decr;
 	}
 
 	tmr->stopped_from = 0xa003;
 	if (stcb) {
 		SCTP_TCB_LOCK(stcb);
 		/*
 		 * Release reference so that association can be freed if
 		 * necessary below. This is safe now that we have acquired
 		 * the lock.
 		 */
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 		released_asoc_reference = true;
 		if ((type != SCTP_TIMER_TYPE_ASOCKILL) &&
 		    ((stcb->asoc.state == SCTP_STATE_EMPTY) ||
 		    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED))) {
 			SCTPDBG(SCTP_DEBUG_TIMER2,
 			    "Timer type %d handler exiting due to CLOSED association.\n",
 			    type);
 			goto out;
 		}
 	} else if (inp != NULL) {
 		SCTP_INP_WLOCK(inp);
 	} else {
 		SCTP_WQ_ADDR_LOCK();
 	}
 
 	/* Record in stopped_from which timeout occurred. */
 	tmr->stopped_from = type;
 	/* mark as being serviced now */
 	if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
 		/*
 		 * Callout has been rescheduled.
 		 */
 		goto out;
 	}
 	if (!SCTP_OS_TIMER_ACTIVE(&tmr->timer)) {
 		/*
 		 * Not active, so no action.
 		 */
 		goto out;
 	}
 	SCTP_OS_TIMER_DEACTIVATE(&tmr->timer);
 
 	/* call the handler for the appropriate timer type */
 	switch (type) {
 	case SCTP_TIMER_TYPE_SEND:
 		KASSERT(inp != NULL && stcb != NULL && net != NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timodata);
 		stcb->asoc.timodata++;
 		stcb->asoc.num_send_timers_up--;
 		if (stcb->asoc.num_send_timers_up < 0) {
 			stcb->asoc.num_send_timers_up = 0;
 		}
 		SCTP_TCB_LOCK_ASSERT(stcb);
 		if (sctp_t3rxt_timer(inp, stcb, net)) {
 			/* no need to unlock on tcb its gone */
 
 			goto out_decr;
 		}
 		SCTP_TCB_LOCK_ASSERT(stcb);
 #ifdef SCTP_AUDITING_ENABLED
 		sctp_auditing(4, inp, stcb, net);
 #endif
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
 		did_output = true;
 		if ((stcb->asoc.num_send_timers_up == 0) &&
 		    (stcb->asoc.sent_queue_cnt > 0)) {
 			struct sctp_tmit_chunk *chk;
 
 			/*
 			 * Safeguard. If there on some on the sent queue
 			 * somewhere but no timers running something is
 			 * wrong... so we start a timer on the first chunk
 			 * on the send queue on whatever net it is sent to.
 			 */
 			TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
 				if (chk->whoTo != NULL) {
 					break;
 				}
 			}
 			if (chk != NULL) {
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, chk->whoTo);
 			}
 		}
 		break;
 	case SCTP_TIMER_TYPE_INIT:
 		KASSERT(inp != NULL && stcb != NULL && net != NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timoinit);
 		stcb->asoc.timoinit++;
 		if (sctp_t1init_timer(inp, stcb, net)) {
 			/* no need to unlock on tcb its gone */
 			goto out_decr;
 		}
 		did_output = false;
 		break;
 	case SCTP_TIMER_TYPE_RECV:
 		KASSERT(inp != NULL && stcb != NULL && net == NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timosack);
 		stcb->asoc.timosack++;
 		sctp_send_sack(stcb, SCTP_SO_NOT_LOCKED);
 #ifdef SCTP_AUDITING_ENABLED
 		sctp_auditing(4, inp, stcb, NULL);
 #endif
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SACK_TMR, SCTP_SO_NOT_LOCKED);
 		did_output = true;
 		break;
 	case SCTP_TIMER_TYPE_SHUTDOWN:
 		KASSERT(inp != NULL && stcb != NULL && net != NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timoshutdown);
 		stcb->asoc.timoshutdown++;
 		if (sctp_shutdown_timer(inp, stcb, net)) {
 			/* no need to unlock on tcb its gone */
 			goto out_decr;
 		}
 #ifdef SCTP_AUDITING_ENABLED
 		sctp_auditing(4, inp, stcb, net);
 #endif
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SHUT_TMR, SCTP_SO_NOT_LOCKED);
 		did_output = true;
 		break;
 	case SCTP_TIMER_TYPE_HEARTBEAT:
 		KASSERT(inp != NULL && stcb != NULL && net != NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timoheartbeat);
 		stcb->asoc.timoheartbeat++;
 		if (sctp_heartbeat_timer(inp, stcb, net)) {
 			/* no need to unlock on tcb its gone */
 			goto out_decr;
 		}
 #ifdef SCTP_AUDITING_ENABLED
 		sctp_auditing(4, inp, stcb, net);
 #endif
 		if (!(net->dest_state & SCTP_ADDR_NOHB)) {
 			sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
 			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_HB_TMR, SCTP_SO_NOT_LOCKED);
 			did_output = true;
 		} else {
 			did_output = false;
 		}
 		break;
 	case SCTP_TIMER_TYPE_COOKIE:
 		KASSERT(inp != NULL && stcb != NULL && net != NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timocookie);
 		stcb->asoc.timocookie++;
 		if (sctp_cookie_timer(inp, stcb, net)) {
 			/* no need to unlock on tcb its gone */
 			goto out_decr;
 		}
 #ifdef SCTP_AUDITING_ENABLED
 		sctp_auditing(4, inp, stcb, net);
 #endif
 		/*
 		 * We consider T3 and Cookie timer pretty much the same with
 		 * respect to where from in chunk_output.
 		 */
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
 		did_output = true;
 		break;
 	case SCTP_TIMER_TYPE_NEWCOOKIE:
 		KASSERT(inp != NULL && stcb == NULL && net == NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timosecret);
 		(void)SCTP_GETTIME_TIMEVAL(&tv);
 		inp->sctp_ep.time_of_secret_change = (unsigned int)tv.tv_sec;
 		inp->sctp_ep.last_secret_number =
 		    inp->sctp_ep.current_secret_number;
 		inp->sctp_ep.current_secret_number++;
 		if (inp->sctp_ep.current_secret_number >=
 		    SCTP_HOW_MANY_SECRETS) {
 			inp->sctp_ep.current_secret_number = 0;
 		}
 		secret = (int)inp->sctp_ep.current_secret_number;
 		for (i = 0; i < SCTP_NUMBER_OF_SECRETS; i++) {
 			inp->sctp_ep.secret_key[secret][i] =
 			    sctp_select_initial_TSN(&inp->sctp_ep);
 		}
 		sctp_timer_start(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL);
 		did_output = false;
 		break;
 	case SCTP_TIMER_TYPE_PATHMTURAISE:
 		KASSERT(inp != NULL && stcb != NULL && net != NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timopathmtu);
 		sctp_pathmtu_timer(inp, stcb, net);
 		did_output = false;
 		break;
 	case SCTP_TIMER_TYPE_SHUTDOWNACK:
 		KASSERT(inp != NULL && stcb != NULL && net != NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		if (sctp_shutdownack_timer(inp, stcb, net)) {
 			/* no need to unlock on tcb its gone */
 			goto out_decr;
 		}
 		SCTP_STAT_INCR(sctps_timoshutdownack);
 		stcb->asoc.timoshutdownack++;
 #ifdef SCTP_AUDITING_ENABLED
 		sctp_auditing(4, inp, stcb, net);
 #endif
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SHUT_ACK_TMR, SCTP_SO_NOT_LOCKED);
 		did_output = true;
 		break;
 	case SCTP_TIMER_TYPE_ASCONF:
 		KASSERT(inp != NULL && stcb != NULL && net != NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timoasconf);
 		if (sctp_asconf_timer(inp, stcb, net)) {
 			/* no need to unlock on tcb its gone */
 			goto out_decr;
 		}
 #ifdef SCTP_AUDITING_ENABLED
 		sctp_auditing(4, inp, stcb, net);
 #endif
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_ASCONF_TMR, SCTP_SO_NOT_LOCKED);
 		did_output = true;
 		break;
 	case SCTP_TIMER_TYPE_SHUTDOWNGUARD:
 		KASSERT(inp != NULL && stcb != NULL && net == NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timoshutdownguard);
 		op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 		    "Shutdown guard timer expired");
 		sctp_abort_an_association(inp, stcb, op_err, true, SCTP_SO_NOT_LOCKED);
 		/* no need to unlock on tcb its gone */
 		goto out_decr;
 	case SCTP_TIMER_TYPE_AUTOCLOSE:
 		KASSERT(inp != NULL && stcb != NULL && net == NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timoautoclose);
 		sctp_autoclose_timer(inp, stcb);
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_AUTOCLOSE_TMR, SCTP_SO_NOT_LOCKED);
 		did_output = true;
 		break;
 	case SCTP_TIMER_TYPE_STRRESET:
 		KASSERT(inp != NULL && stcb != NULL && net == NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timostrmrst);
 		if (sctp_strreset_timer(inp, stcb)) {
 			/* no need to unlock on tcb its gone */
 			goto out_decr;
 		}
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_TMR, SCTP_SO_NOT_LOCKED);
 		did_output = true;
 		break;
 	case SCTP_TIMER_TYPE_INPKILL:
 		KASSERT(inp != NULL && stcb == NULL && net == NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timoinpkill);
 		/*
 		 * special case, take away our increment since WE are the
 		 * killer
 		 */
 		sctp_timer_stop(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_3);
 		SCTP_INP_DECR_REF(inp);
 		SCTP_INP_WUNLOCK(inp);
 		sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
 		    SCTP_CALLED_FROM_INPKILL_TIMER);
 		inp = NULL;
 		goto out_decr;
 	case SCTP_TIMER_TYPE_ASOCKILL:
 		KASSERT(inp != NULL && stcb != NULL && net == NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timoassockill);
 		/* Can we free it yet? */
 		sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_1);
 		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_2);
 		/*
 		 * free asoc, always unlocks (or destroy's) so prevent
 		 * duplicate unlock or unlock of a free mtx :-0
 		 */
 		stcb = NULL;
 		goto out_decr;
 	case SCTP_TIMER_TYPE_ADDR_WQ:
 		KASSERT(inp == NULL && stcb == NULL && net == NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		sctp_handle_addr_wq();
 		did_output = true;
 		break;
 	case SCTP_TIMER_TYPE_PRIM_DELETED:
 		KASSERT(inp != NULL && stcb != NULL && net == NULL,
 		    ("timeout of type %d: inp = %p, stcb = %p, net = %p",
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timodelprim);
 		sctp_delete_prim_timer(inp, stcb);
 		did_output = false;
 		break;
 	default:
 #ifdef INVARIANTS
 		panic("Unknown timer type %d", type);
 #else
 		goto out;
 #endif
 	}
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_audit_log(0xF1, (uint8_t)type);
 	if (inp != NULL)
 		sctp_auditing(5, inp, stcb, net);
 #endif
 	if (did_output && (stcb != NULL)) {
 		/*
 		 * Now we need to clean up the control chunk chain if an
 		 * ECNE is on it. It must be marked as UNSENT again so next
 		 * call will continue to send it until such time that we get
 		 * a CWR, to remove it. It is, however, less likely that we
 		 * will find a ecn echo on the chain though.
 		 */
 		sctp_fix_ecn_echo(&stcb->asoc);
 	}
 out:
 	if (stcb != NULL) {
 		SCTP_TCB_UNLOCK(stcb);
 	} else if (inp != NULL) {
 		SCTP_INP_WUNLOCK(inp);
 	} else {
 		SCTP_WQ_ADDR_UNLOCK();
 	}
 
 out_decr:
 	/* These reference counts were incremented in sctp_timer_start(). */
 	if (inp != NULL) {
 		SCTP_INP_DECR_REF(inp);
 	}
 	if ((stcb != NULL) && !released_asoc_reference) {
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 	}
 	if (net != NULL) {
 		sctp_free_remote_addr(net);
 	}
 	SCTPDBG(SCTP_DEBUG_TIMER2, "Timer type %d handler finished.\n", type);
 	CURVNET_RESTORE();
 	NET_EPOCH_EXIT(et);
 }
 
 /*-
  * The following table shows which parameters must be provided
  * when calling sctp_timer_start(). For parameters not being
  * provided, NULL must be used.
  *
  * |Name                         |inp |stcb|net |
  * |-----------------------------|----|----|----|
  * |SCTP_TIMER_TYPE_SEND         |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_INIT         |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_RECV         |Yes |Yes |No  |
  * |SCTP_TIMER_TYPE_SHUTDOWN     |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_HEARTBEAT    |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_COOKIE       |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_NEWCOOKIE    |Yes |No  |No  |
  * |SCTP_TIMER_TYPE_PATHMTURAISE |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_SHUTDOWNACK  |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_ASCONF       |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_SHUTDOWNGUARD|Yes |Yes |No  |
  * |SCTP_TIMER_TYPE_AUTOCLOSE    |Yes |Yes |No  |
  * |SCTP_TIMER_TYPE_STRRESET     |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_INPKILL      |Yes |No  |No  |
  * |SCTP_TIMER_TYPE_ASOCKILL     |Yes |Yes |No  |
  * |SCTP_TIMER_TYPE_ADDR_WQ      |No  |No  |No  |
  * |SCTP_TIMER_TYPE_PRIM_DELETED |Yes |Yes |No  |
  *
  */
 
 void
 sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
 	struct sctp_timer *tmr;
 	uint32_t to_ticks;
 	uint32_t rndval, jitter;
 
 	KASSERT(stcb == NULL || stcb->sctp_ep == inp,
 	    ("sctp_timer_start of type %d: inp = %p, stcb->sctp_ep %p",
 	    t_type, stcb, stcb->sctp_ep));
 	tmr = NULL;
 	if (stcb != NULL) {
 		SCTP_TCB_LOCK_ASSERT(stcb);
 	} else if (inp != NULL) {
 		SCTP_INP_WLOCK_ASSERT(inp);
 	} else {
 		SCTP_WQ_ADDR_LOCK_ASSERT();
 	}
 	if (stcb != NULL) {
 		/*
 		 * Don't restart timer on association that's about to be
 		 * killed.
 		 */
 		if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) &&
 		    (t_type != SCTP_TIMER_TYPE_ASOCKILL)) {
 			SCTPDBG(SCTP_DEBUG_TIMER2,
 			    "Timer type %d not started: inp=%p, stcb=%p, net=%p (stcb deleted).\n",
 			    t_type, inp, stcb, net);
 			return;
 		}
 		/* Don't restart timer on net that's been removed. */
 		if (net != NULL && (net->dest_state & SCTP_ADDR_BEING_DELETED)) {
 			SCTPDBG(SCTP_DEBUG_TIMER2,
 			    "Timer type %d not started: inp=%p, stcb=%p, net=%p (net deleted).\n",
 			    t_type, inp, stcb, net);
 			return;
 		}
 	}
 	switch (t_type) {
 	case SCTP_TIMER_TYPE_SEND:
 		/* Here we use the RTO timer. */
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &net->rxt_timer;
 		if (net->RTO == 0) {
 			to_ticks = sctp_msecs_to_ticks(stcb->asoc.initial_rto);
 		} else {
 			to_ticks = sctp_msecs_to_ticks(net->RTO);
 		}
 		break;
 	case SCTP_TIMER_TYPE_INIT:
 		/*
 		 * Here we use the INIT timer default usually about 1
 		 * second.
 		 */
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &net->rxt_timer;
 		if (net->RTO == 0) {
 			to_ticks = sctp_msecs_to_ticks(stcb->asoc.initial_rto);
 		} else {
 			to_ticks = sctp_msecs_to_ticks(net->RTO);
 		}
 		break;
 	case SCTP_TIMER_TYPE_RECV:
 		/*
 		 * Here we use the Delayed-Ack timer value from the inp,
 		 * usually about 200ms.
 		 */
 		if ((inp == NULL) || (stcb == NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.dack_timer;
 		to_ticks = sctp_msecs_to_ticks(stcb->asoc.delayed_ack);
 		break;
 	case SCTP_TIMER_TYPE_SHUTDOWN:
 		/* Here we use the RTO of the destination. */
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &net->rxt_timer;
 		if (net->RTO == 0) {
 			to_ticks = sctp_msecs_to_ticks(stcb->asoc.initial_rto);
 		} else {
 			to_ticks = sctp_msecs_to_ticks(net->RTO);
 		}
 		break;
 	case SCTP_TIMER_TYPE_HEARTBEAT:
 		/*
 		 * The net is used here so that we can add in the RTO. Even
 		 * though we use a different timer. We also add the HB timer
 		 * PLUS a random jitter.
 		 */
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		if ((net->dest_state & SCTP_ADDR_NOHB) &&
 		    !(net->dest_state & SCTP_ADDR_UNCONFIRMED)) {
 			SCTPDBG(SCTP_DEBUG_TIMER2,
 			    "Timer type %d not started: inp=%p, stcb=%p, net=%p.\n",
 			    t_type, inp, stcb, net);
 			return;
 		}
 		tmr = &net->hb_timer;
 		if (net->RTO == 0) {
 			to_ticks = stcb->asoc.initial_rto;
 		} else {
 			to_ticks = net->RTO;
 		}
 		rndval = sctp_select_initial_TSN(&inp->sctp_ep);
 		jitter = rndval % to_ticks;
 		if (to_ticks > 1) {
 			to_ticks >>= 1;
 		}
 		if (jitter < (UINT32_MAX - to_ticks)) {
 			to_ticks += jitter;
 		} else {
 			to_ticks = UINT32_MAX;
 		}
 		if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED) &&
 		    !(net->dest_state & SCTP_ADDR_PF)) {
 			if (net->heart_beat_delay < (UINT32_MAX - to_ticks)) {
 				to_ticks += net->heart_beat_delay;
 			} else {
 				to_ticks = UINT32_MAX;
 			}
 		}
 		/*
 		 * Now we must convert the to_ticks that are now in ms to
 		 * ticks.
 		 */
 		to_ticks = sctp_msecs_to_ticks(to_ticks);
 		break;
 	case SCTP_TIMER_TYPE_COOKIE:
 		/*
 		 * Here we can use the RTO timer from the network since one
 		 * RTT was complete. If a retransmission happened then we
 		 * will be using the RTO initial value.
 		 */
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &net->rxt_timer;
 		if (net->RTO == 0) {
 			to_ticks = sctp_msecs_to_ticks(stcb->asoc.initial_rto);
 		} else {
 			to_ticks = sctp_msecs_to_ticks(net->RTO);
 		}
 		break;
 	case SCTP_TIMER_TYPE_NEWCOOKIE:
 		/*
 		 * Nothing needed but the endpoint here usually about 60
 		 * minutes.
 		 */
 		if ((inp == NULL) || (stcb != NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &inp->sctp_ep.signature_change;
 		to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_SIGNATURE];
 		break;
 	case SCTP_TIMER_TYPE_PATHMTURAISE:
 		/*
 		 * Here we use the value found in the EP for PMTUD, usually
 		 * about 10 minutes.
 		 */
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		if (net->dest_state & SCTP_ADDR_NO_PMTUD) {
 			SCTPDBG(SCTP_DEBUG_TIMER2,
 			    "Timer type %d not started: inp=%p, stcb=%p, net=%p.\n",
 			    t_type, inp, stcb, net);
 			return;
 		}
 		tmr = &net->pmtu_timer;
 		to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_PMTU];
 		break;
 	case SCTP_TIMER_TYPE_SHUTDOWNACK:
 		/* Here we use the RTO of the destination. */
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &net->rxt_timer;
 		if (net->RTO == 0) {
 			to_ticks = sctp_msecs_to_ticks(stcb->asoc.initial_rto);
 		} else {
 			to_ticks = sctp_msecs_to_ticks(net->RTO);
 		}
 		break;
 	case SCTP_TIMER_TYPE_ASCONF:
 		/*
 		 * Here the timer comes from the stcb but its value is from
 		 * the net's RTO.
 		 */
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.asconf_timer;
 		if (net->RTO == 0) {
 			to_ticks = sctp_msecs_to_ticks(stcb->asoc.initial_rto);
 		} else {
 			to_ticks = sctp_msecs_to_ticks(net->RTO);
 		}
 		break;
 	case SCTP_TIMER_TYPE_SHUTDOWNGUARD:
 		/*
 		 * Here we use the endpoints shutdown guard timer usually
 		 * about 3 minutes.
 		 */
 		if ((inp == NULL) || (stcb == NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.shut_guard_timer;
 		if (inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN] == 0) {
 			if (stcb->asoc.maxrto < UINT32_MAX / 5) {
 				to_ticks = sctp_msecs_to_ticks(5 * stcb->asoc.maxrto);
 			} else {
 				to_ticks = sctp_msecs_to_ticks(UINT32_MAX);
 			}
 		} else {
 			to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN];
 		}
 		break;
 	case SCTP_TIMER_TYPE_AUTOCLOSE:
 		if ((inp == NULL) || (stcb == NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.autoclose_timer;
 		to_ticks = stcb->asoc.sctp_autoclose_ticks;
 		break;
 	case SCTP_TIMER_TYPE_STRRESET:
 		/*
 		 * Here the timer comes from the stcb but its value is from
 		 * the net's RTO.
 		 */
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.strreset_timer;
 		if (net->RTO == 0) {
 			to_ticks = sctp_msecs_to_ticks(stcb->asoc.initial_rto);
 		} else {
 			to_ticks = sctp_msecs_to_ticks(net->RTO);
 		}
 		break;
 	case SCTP_TIMER_TYPE_INPKILL:
 		/*
 		 * The inp is setup to die. We re-use the signature_change
 		 * timer since that has stopped and we are in the GONE
 		 * state.
 		 */
 		if ((inp == NULL) || (stcb != NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &inp->sctp_ep.signature_change;
 		to_ticks = sctp_msecs_to_ticks(SCTP_INP_KILL_TIMEOUT);
 		break;
 	case SCTP_TIMER_TYPE_ASOCKILL:
 		if ((inp == NULL) || (stcb == NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.strreset_timer;
 		to_ticks = sctp_msecs_to_ticks(SCTP_ASOC_KILL_TIMEOUT);
 		break;
 	case SCTP_TIMER_TYPE_ADDR_WQ:
 		if ((inp != NULL) || (stcb != NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		/* Only 1 tick away :-) */
 		tmr = &SCTP_BASE_INFO(addr_wq_timer);
 		to_ticks = SCTP_ADDRESS_TICK_DELAY;
 		break;
 	case SCTP_TIMER_TYPE_PRIM_DELETED:
 		if ((inp == NULL) || (stcb == NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_start of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.delete_prim_timer;
 		to_ticks = sctp_msecs_to_ticks(stcb->asoc.initial_rto);
 		break;
 	default:
 #ifdef INVARIANTS
 		panic("Unknown timer type %d", t_type);
 #else
 		return;
 #endif
 	}
 	KASSERT(tmr != NULL, ("tmr is NULL for timer type %d", t_type));
 	KASSERT(to_ticks > 0, ("to_ticks == 0 for timer type %d", t_type));
 	if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
 		/*
 		 * We do NOT allow you to have it already running. If it is,
 		 * we leave the current one up unchanged.
 		 */
 		SCTPDBG(SCTP_DEBUG_TIMER2,
 		    "Timer type %d already running: inp=%p, stcb=%p, net=%p.\n",
 		    t_type, inp, stcb, net);
 		return;
 	}
 	/* At this point we can proceed. */
 	if (t_type == SCTP_TIMER_TYPE_SEND) {
 		stcb->asoc.num_send_timers_up++;
 	}
 	tmr->stopped_from = 0;
 	tmr->type = t_type;
 	tmr->ep = (void *)inp;
 	tmr->tcb = (void *)stcb;
 	if (t_type == SCTP_TIMER_TYPE_STRRESET) {
 		tmr->net = NULL;
 	} else {
 		tmr->net = (void *)net;
 	}
 	tmr->self = (void *)tmr;
 	tmr->vnet = (void *)curvnet;
 	tmr->ticks = sctp_get_tick_count();
 	if (SCTP_OS_TIMER_START(&tmr->timer, to_ticks, sctp_timeout_handler, tmr) == 0) {
 		SCTPDBG(SCTP_DEBUG_TIMER2,
 		    "Timer type %d started: ticks=%u, inp=%p, stcb=%p, net=%p.\n",
 		    t_type, to_ticks, inp, stcb, net);
 		/*
 		 * If this is a newly scheduled callout, as opposed to a
 		 * rescheduled one, increment relevant reference counts.
 		 */
 		if (tmr->ep != NULL) {
 			SCTP_INP_INCR_REF(inp);
 		}
 		if (tmr->tcb != NULL) {
 			atomic_add_int(&stcb->asoc.refcnt, 1);
 		}
 		if (tmr->net != NULL) {
 			atomic_add_int(&net->ref_count, 1);
 		}
 	} else {
 		/*
 		 * This should not happen, since we checked for pending
 		 * above.
 		 */
 		SCTPDBG(SCTP_DEBUG_TIMER2,
 		    "Timer type %d restarted: ticks=%u, inp=%p, stcb=%p, net=%p.\n",
 		    t_type, to_ticks, inp, stcb, net);
 	}
 	return;
 }
 
 /*-
  * The following table shows which parameters must be provided
  * when calling sctp_timer_stop(). For parameters not being
  * provided, NULL must be used.
  *
  * |Name                         |inp |stcb|net |
  * |-----------------------------|----|----|----|
  * |SCTP_TIMER_TYPE_SEND         |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_INIT         |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_RECV         |Yes |Yes |No  |
  * |SCTP_TIMER_TYPE_SHUTDOWN     |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_HEARTBEAT    |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_COOKIE       |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_NEWCOOKIE    |Yes |No  |No  |
  * |SCTP_TIMER_TYPE_PATHMTURAISE |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_SHUTDOWNACK  |Yes |Yes |Yes |
  * |SCTP_TIMER_TYPE_ASCONF       |Yes |Yes |No  |
  * |SCTP_TIMER_TYPE_SHUTDOWNGUARD|Yes |Yes |No  |
  * |SCTP_TIMER_TYPE_AUTOCLOSE    |Yes |Yes |No  |
  * |SCTP_TIMER_TYPE_STRRESET     |Yes |Yes |No  |
  * |SCTP_TIMER_TYPE_INPKILL      |Yes |No  |No  |
  * |SCTP_TIMER_TYPE_ASOCKILL     |Yes |Yes |No  |
  * |SCTP_TIMER_TYPE_ADDR_WQ      |No  |No  |No  |
  * |SCTP_TIMER_TYPE_PRIM_DELETED |Yes |Yes |No  |
  *
  */
 
 void
 sctp_timer_stop(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
     struct sctp_nets *net, uint32_t from)
 {
 	struct sctp_timer *tmr;
 
 	KASSERT(stcb == NULL || stcb->sctp_ep == inp,
 	    ("sctp_timer_stop of type %d: inp = %p, stcb->sctp_ep %p",
 	    t_type, stcb, stcb->sctp_ep));
 	if (stcb != NULL) {
 		SCTP_TCB_LOCK_ASSERT(stcb);
 	} else if (inp != NULL) {
 		SCTP_INP_WLOCK_ASSERT(inp);
 	} else {
 		SCTP_WQ_ADDR_LOCK_ASSERT();
 	}
 	tmr = NULL;
 	switch (t_type) {
 	case SCTP_TIMER_TYPE_SEND:
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &net->rxt_timer;
 		break;
 	case SCTP_TIMER_TYPE_INIT:
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &net->rxt_timer;
 		break;
 	case SCTP_TIMER_TYPE_RECV:
 		if ((inp == NULL) || (stcb == NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.dack_timer;
 		break;
 	case SCTP_TIMER_TYPE_SHUTDOWN:
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &net->rxt_timer;
 		break;
 	case SCTP_TIMER_TYPE_HEARTBEAT:
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &net->hb_timer;
 		break;
 	case SCTP_TIMER_TYPE_COOKIE:
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &net->rxt_timer;
 		break;
 	case SCTP_TIMER_TYPE_NEWCOOKIE:
 		if ((inp == NULL) || (stcb != NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &inp->sctp_ep.signature_change;
 		break;
 	case SCTP_TIMER_TYPE_PATHMTURAISE:
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &net->pmtu_timer;
 		break;
 	case SCTP_TIMER_TYPE_SHUTDOWNACK:
 		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &net->rxt_timer;
 		break;
 	case SCTP_TIMER_TYPE_ASCONF:
 		if ((inp == NULL) || (stcb == NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.asconf_timer;
 		break;
 	case SCTP_TIMER_TYPE_SHUTDOWNGUARD:
 		if ((inp == NULL) || (stcb == NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.shut_guard_timer;
 		break;
 	case SCTP_TIMER_TYPE_AUTOCLOSE:
 		if ((inp == NULL) || (stcb == NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.autoclose_timer;
 		break;
 	case SCTP_TIMER_TYPE_STRRESET:
 		if ((inp == NULL) || (stcb == NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.strreset_timer;
 		break;
 	case SCTP_TIMER_TYPE_INPKILL:
 		/*
 		 * The inp is setup to die. We re-use the signature_change
 		 * timer since that has stopped and we are in the GONE
 		 * state.
 		 */
 		if ((inp == NULL) || (stcb != NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &inp->sctp_ep.signature_change;
 		break;
 	case SCTP_TIMER_TYPE_ASOCKILL:
 		if ((inp == NULL) || (stcb == NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.strreset_timer;
 		break;
 	case SCTP_TIMER_TYPE_ADDR_WQ:
 		if ((inp != NULL) || (stcb != NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &SCTP_BASE_INFO(addr_wq_timer);
 		break;
 	case SCTP_TIMER_TYPE_PRIM_DELETED:
 		if ((inp == NULL) || (stcb == NULL) || (net != NULL)) {
 #ifdef INVARIANTS
 			panic("sctp_timer_stop of type %d: inp = %p, stcb = %p, net = %p",
 			    t_type, inp, stcb, net);
 #else
 			return;
 #endif
 		}
 		tmr = &stcb->asoc.delete_prim_timer;
 		break;
 	default:
 #ifdef INVARIANTS
 		panic("Unknown timer type %d", t_type);
 #else
 		return;
 #endif
 	}
 	KASSERT(tmr != NULL, ("tmr is NULL for timer type %d", t_type));
 	if ((tmr->type != SCTP_TIMER_TYPE_NONE) &&
 	    (tmr->type != t_type)) {
 		/*
 		 * Ok we have a timer that is under joint use. Cookie timer
 		 * per chance with the SEND timer. We therefore are NOT
 		 * running the timer that the caller wants stopped.  So just
 		 * return.
 		 */
 		SCTPDBG(SCTP_DEBUG_TIMER2,
 		    "Shared timer type %d not running: inp=%p, stcb=%p, net=%p.\n",
 		    t_type, inp, stcb, net);
 		return;
 	}
 	if ((t_type == SCTP_TIMER_TYPE_SEND) && (stcb != NULL)) {
 		stcb->asoc.num_send_timers_up--;
 		if (stcb->asoc.num_send_timers_up < 0) {
 			stcb->asoc.num_send_timers_up = 0;
 		}
 	}
 	tmr->self = NULL;
 	tmr->stopped_from = from;
 	if (SCTP_OS_TIMER_STOP(&tmr->timer) == 1) {
 		KASSERT(tmr->ep == inp,
 		    ("sctp_timer_stop of type %d: inp = %p, tmr->inp = %p",
 		    t_type, inp, tmr->ep));
 		KASSERT(tmr->tcb == stcb,
 		    ("sctp_timer_stop of type %d: stcb = %p, tmr->stcb = %p",
 		    t_type, stcb, tmr->tcb));
 		KASSERT(((t_type == SCTP_TIMER_TYPE_ASCONF) && (tmr->net != NULL)) ||
 		    ((t_type != SCTP_TIMER_TYPE_ASCONF) && (tmr->net == net)),
 		    ("sctp_timer_stop of type %d: net = %p, tmr->net = %p",
 		    t_type, net, tmr->net));
 		SCTPDBG(SCTP_DEBUG_TIMER2,
 		    "Timer type %d stopped: inp=%p, stcb=%p, net=%p.\n",
 		    t_type, inp, stcb, net);
 		/*
 		 * If the timer was actually stopped, decrement reference
 		 * counts that were incremented in sctp_timer_start().
 		 */
 		if (tmr->ep != NULL) {
 			tmr->ep = NULL;
 			SCTP_INP_DECR_REF(inp);
 		}
 		if (tmr->tcb != NULL) {
 			tmr->tcb = NULL;
 			atomic_subtract_int(&stcb->asoc.refcnt, 1);
 		}
 		if (tmr->net != NULL) {
 			struct sctp_nets *tmr_net;
 
 			/*
 			 * Can't use net, since it doesn't work for
 			 * SCTP_TIMER_TYPE_ASCONF.
 			 */
 			tmr_net = tmr->net;
 			tmr->net = NULL;
 			sctp_free_remote_addr(tmr_net);
 		}
 	} else {
 		SCTPDBG(SCTP_DEBUG_TIMER2,
 		    "Timer type %d not stopped: inp=%p, stcb=%p, net=%p.\n",
 		    t_type, inp, stcb, net);
 	}
 	return;
 }
 
 uint32_t
 sctp_calculate_len(struct mbuf *m)
 {
 	struct mbuf *at;
 	uint32_t tlen;
 
 	tlen = 0;
 	for (at = m; at != NULL; at = SCTP_BUF_NEXT(at)) {
 		tlen += SCTP_BUF_LEN(at);
 	}
 	return (tlen);
 }
 
 /*
  * Given an association and starting time of the current RTT period, update
  * RTO in number of msecs. net should point to the current network.
  * Return 1, if an RTO update was performed, return 0 if no update was
  * performed due to invalid starting point.
  */
 
 int
 sctp_calculate_rto(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     struct sctp_nets *net,
     struct timeval *old,
     int rtt_from_sack)
 {
 	struct timeval now;
 	uint64_t rtt_us;	/* RTT in us */
 	int32_t rtt;		/* RTT in ms */
 	uint32_t new_rto;
 	int first_measure = 0;
 
 	/************************/
 	/* 1. calculate new RTT */
 	/************************/
 	/* get the current time */
 	if (stcb->asoc.use_precise_time) {
 		(void)SCTP_GETPTIME_TIMEVAL(&now);
 	} else {
 		(void)SCTP_GETTIME_TIMEVAL(&now);
 	}
 	if ((old->tv_sec > now.tv_sec) ||
 	    ((old->tv_sec == now.tv_sec) && (old->tv_usec > now.tv_usec))) {
 		/* The starting point is in the future. */
 		return (0);
 	}
 	timevalsub(&now, old);
 	rtt_us = (uint64_t)1000000 * (uint64_t)now.tv_sec + (uint64_t)now.tv_usec;
 	if (rtt_us > SCTP_RTO_UPPER_BOUND * 1000) {
 		/* The RTT is larger than a sane value. */
 		return (0);
 	}
 	/* store the current RTT in us */
 	net->rtt = rtt_us;
 	/* compute rtt in ms */
 	rtt = (int32_t)(net->rtt / 1000);
 	if ((asoc->cc_functions.sctp_rtt_calculated) && (rtt_from_sack == SCTP_RTT_FROM_DATA)) {
 		/*
 		 * Tell the CC module that a new update has just occurred
 		 * from a sack
 		 */
 		(*asoc->cc_functions.sctp_rtt_calculated) (stcb, net, &now);
 	}
 	/*
 	 * Do we need to determine the lan? We do this only on sacks i.e.
 	 * RTT being determined from data not non-data (HB/INIT->INITACK).
 	 */
 	if ((rtt_from_sack == SCTP_RTT_FROM_DATA) &&
 	    (net->lan_type == SCTP_LAN_UNKNOWN)) {
 		if (net->rtt > SCTP_LOCAL_LAN_RTT) {
 			net->lan_type = SCTP_LAN_INTERNET;
 		} else {
 			net->lan_type = SCTP_LAN_LOCAL;
 		}
 	}
 
 	/***************************/
 	/* 2. update RTTVAR & SRTT */
 	/***************************/
 	/*-
 	 * Compute the scaled average lastsa and the
 	 * scaled variance lastsv as described in van Jacobson
 	 * Paper "Congestion Avoidance and Control", Annex A.
 	 *
 	 * (net->lastsa >> SCTP_RTT_SHIFT) is the srtt
 	 * (net->lastsv >> SCTP_RTT_VAR_SHIFT) is the rttvar
 	 */
 	if (net->RTO_measured) {
 		rtt -= (net->lastsa >> SCTP_RTT_SHIFT);
 		net->lastsa += rtt;
 		if (rtt < 0) {
 			rtt = -rtt;
 		}
 		rtt -= (net->lastsv >> SCTP_RTT_VAR_SHIFT);
 		net->lastsv += rtt;
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RTTVAR_LOGGING_ENABLE) {
 			rto_logging(net, SCTP_LOG_RTTVAR);
 		}
 	} else {
 		/* First RTO measurement */
 		net->RTO_measured = 1;
 		first_measure = 1;
 		net->lastsa = rtt << SCTP_RTT_SHIFT;
 		net->lastsv = (rtt / 2) << SCTP_RTT_VAR_SHIFT;
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RTTVAR_LOGGING_ENABLE) {
 			rto_logging(net, SCTP_LOG_INITIAL_RTT);
 		}
 	}
 	if (net->lastsv == 0) {
 		net->lastsv = SCTP_CLOCK_GRANULARITY;
 	}
 	new_rto = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv;
 	if ((new_rto > SCTP_SAT_NETWORK_MIN) &&
 	    (stcb->asoc.sat_network_lockout == 0)) {
 		stcb->asoc.sat_network = 1;
 	} else if ((!first_measure) && stcb->asoc.sat_network) {
 		stcb->asoc.sat_network = 0;
 		stcb->asoc.sat_network_lockout = 1;
 	}
 	/* bound it, per C6/C7 in Section 5.3.1 */
 	if (new_rto < stcb->asoc.minrto) {
 		new_rto = stcb->asoc.minrto;
 	}
 	if (new_rto > stcb->asoc.maxrto) {
 		new_rto = stcb->asoc.maxrto;
 	}
 	net->RTO = new_rto;
 	return (1);
 }
 
 /*
  * return a pointer to a contiguous piece of data from the given mbuf chain
  * starting at 'off' for 'len' bytes.  If the desired piece spans more than
  * one mbuf, a copy is made at 'ptr'. caller must ensure that the buffer size
  * is >= 'len' returns NULL if there there isn't 'len' bytes in the chain.
  */
 caddr_t
 sctp_m_getptr(struct mbuf *m, int off, int len, uint8_t *in_ptr)
 {
 	uint32_t count;
 	uint8_t *ptr;
 
 	ptr = in_ptr;
 	if ((off < 0) || (len <= 0))
 		return (NULL);
 
 	/* find the desired start location */
 	while ((m != NULL) && (off > 0)) {
 		if (off < SCTP_BUF_LEN(m))
 			break;
 		off -= SCTP_BUF_LEN(m);
 		m = SCTP_BUF_NEXT(m);
 	}
 	if (m == NULL)
 		return (NULL);
 
 	/* is the current mbuf large enough (eg. contiguous)? */
 	if ((SCTP_BUF_LEN(m) - off) >= len) {
 		return (mtod(m, caddr_t)+off);
 	} else {
 		/* else, it spans more than one mbuf, so save a temp copy... */
 		while ((m != NULL) && (len > 0)) {
 			count = min(SCTP_BUF_LEN(m) - off, len);
 			memcpy(ptr, mtod(m, caddr_t)+off, count);
 			len -= count;
 			ptr += count;
 			off = 0;
 			m = SCTP_BUF_NEXT(m);
 		}
 		if ((m == NULL) && (len > 0))
 			return (NULL);
 		else
 			return ((caddr_t)in_ptr);
 	}
 }
 
 struct sctp_paramhdr *
 sctp_get_next_param(struct mbuf *m,
     int offset,
     struct sctp_paramhdr *pull,
     int pull_limit)
 {
 	/* This just provides a typed signature to Peter's Pull routine */
 	return ((struct sctp_paramhdr *)sctp_m_getptr(m, offset, pull_limit,
 	    (uint8_t *)pull));
 }
 
 struct mbuf *
 sctp_add_pad_tombuf(struct mbuf *m, int padlen)
 {
 	struct mbuf *m_last;
 	caddr_t dp;
 
 	if (padlen > 3) {
 		return (NULL);
 	}
 	if (padlen <= M_TRAILINGSPACE(m)) {
 		/*
 		 * The easy way. We hope the majority of the time we hit
 		 * here :)
 		 */
 		m_last = m;
 	} else {
 		/* Hard way we must grow the mbuf chain */
 		m_last = sctp_get_mbuf_for_msg(padlen, 0, M_NOWAIT, 1, MT_DATA);
 		if (m_last == NULL) {
 			return (NULL);
 		}
 		SCTP_BUF_LEN(m_last) = 0;
 		SCTP_BUF_NEXT(m_last) = NULL;
 		SCTP_BUF_NEXT(m) = m_last;
 	}
 	dp = mtod(m_last, caddr_t)+SCTP_BUF_LEN(m_last);
 	SCTP_BUF_LEN(m_last) += padlen;
 	memset(dp, 0, padlen);
 	return (m_last);
 }
 
 struct mbuf *
 sctp_pad_lastmbuf(struct mbuf *m, int padval, struct mbuf *last_mbuf)
 {
 	/* find the last mbuf in chain and pad it */
 	struct mbuf *m_at;
 
 	if (last_mbuf != NULL) {
 		return (sctp_add_pad_tombuf(last_mbuf, padval));
 	} else {
 		for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
 			if (SCTP_BUF_NEXT(m_at) == NULL) {
 				return (sctp_add_pad_tombuf(m_at, padval));
 			}
 		}
 	}
 	return (NULL);
 }
 
 static void
 sctp_notify_assoc_change(uint16_t state, struct sctp_tcb *stcb,
     uint16_t error, struct sctp_abort_chunk *abort,
     bool from_peer, bool timedout, int so_locked)
 {
 	struct mbuf *m_notify;
 	struct sctp_assoc_change *sac;
 	struct sctp_queued_to_read *control;
 	unsigned int notif_len;
 	uint16_t abort_len;
 	unsigned int i;
 
 	KASSERT(abort == NULL || from_peer,
 	    ("sctp_notify_assoc_change: ABORT chunk provided for local termination"));
 	KASSERT(!from_peer || !timedout,
 	    ("sctp_notify_assoc_change: timeouts can only be local"));
 	if (stcb == NULL) {
 		return;
 	}
 	if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT)) {
 		notif_len = (unsigned int)sizeof(struct sctp_assoc_change);
 		if (abort != NULL) {
 			abort_len = ntohs(abort->ch.chunk_length);
 			/*
 			 * Only SCTP_CHUNK_BUFFER_SIZE are guaranteed to be
 			 * contiguous.
 			 */
 			if (abort_len > SCTP_CHUNK_BUFFER_SIZE) {
 				abort_len = SCTP_CHUNK_BUFFER_SIZE;
 			}
 		} else {
 			abort_len = 0;
 		}
 		if ((state == SCTP_COMM_UP) || (state == SCTP_RESTART)) {
 			notif_len += SCTP_ASSOC_SUPPORTS_MAX;
 		} else if ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC)) {
 			notif_len += abort_len;
 		}
 		m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
 		if (m_notify == NULL) {
 			/* Retry with smaller value. */
 			notif_len = (unsigned int)sizeof(struct sctp_assoc_change);
 			m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
 			if (m_notify == NULL) {
 				goto set_error;
 			}
 		}
 		SCTP_BUF_NEXT(m_notify) = NULL;
 		sac = mtod(m_notify, struct sctp_assoc_change *);
 		memset(sac, 0, notif_len);
 		sac->sac_type = SCTP_ASSOC_CHANGE;
 		sac->sac_flags = 0;
 		sac->sac_length = sizeof(struct sctp_assoc_change);
 		sac->sac_state = state;
 		sac->sac_error = error;
 		if (state == SCTP_CANT_STR_ASSOC) {
 			sac->sac_outbound_streams = 0;
 			sac->sac_inbound_streams = 0;
 		} else {
 			sac->sac_outbound_streams = stcb->asoc.streamoutcnt;
 			sac->sac_inbound_streams = stcb->asoc.streamincnt;
 		}
 		sac->sac_assoc_id = sctp_get_associd(stcb);
 		if (notif_len > sizeof(struct sctp_assoc_change)) {
 			if ((state == SCTP_COMM_UP) || (state == SCTP_RESTART)) {
 				i = 0;
 				if (stcb->asoc.prsctp_supported == 1) {
 					sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_PR;
 				}
 				if (stcb->asoc.auth_supported == 1) {
 					sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_AUTH;
 				}
 				if (stcb->asoc.asconf_supported == 1) {
 					sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_ASCONF;
 				}
 				if (stcb->asoc.idata_supported == 1) {
 					sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_INTERLEAVING;
 				}
 				sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_MULTIBUF;
 				if (stcb->asoc.reconfig_supported == 1) {
 					sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_RE_CONFIG;
 				}
 				sac->sac_length += i;
 			} else if ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC)) {
 				memcpy(sac->sac_info, abort, abort_len);
 				sac->sac_length += abort_len;
 			}
 		}
 		SCTP_BUF_LEN(m_notify) = sac->sac_length;
 		control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 		    0, 0, stcb->asoc.context, 0, 0, 0,
 		    m_notify);
 		if (control != NULL) {
 			control->length = SCTP_BUF_LEN(m_notify);
 			control->spec_flags = M_NOTIFICATION;
 			/* not that we need this */
 			control->tail_mbuf = m_notify;
 			sctp_add_to_readq(stcb->sctp_ep, stcb,
 			    control,
 			    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD,
 			    so_locked);
 		} else {
 			sctp_m_freem(m_notify);
 		}
 	}
 	/*
 	 * For 1-to-1 style sockets, we send up and error when an ABORT
 	 * comes in.
 	 */
 set_error:
 	if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
 	    ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) {
 		SOCK_LOCK(stcb->sctp_socket);
 		if (from_peer) {
 			if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) {
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNREFUSED);
 				stcb->sctp_socket->so_error = ECONNREFUSED;
 			} else {
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
 				stcb->sctp_socket->so_error = ECONNRESET;
 			}
 		} else {
 			if (timedout) {
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ETIMEDOUT);
 				stcb->sctp_socket->so_error = ETIMEDOUT;
 			} else {
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNABORTED);
 				stcb->sctp_socket->so_error = ECONNABORTED;
 			}
 		}
 		SOCK_UNLOCK(stcb->sctp_socket);
 	}
 	/* Wake ANY sleepers */
 	if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
 	    ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) {
 		socantrcvmore(stcb->sctp_socket);
 	}
 	sorwakeup(stcb->sctp_socket);
 	sowwakeup(stcb->sctp_socket);
 }
 
 static void
 sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state,
     struct sockaddr *sa, uint32_t error, int so_locked)
 {
 	struct mbuf *m_notify;
 	struct sctp_paddr_change *spc;
 	struct sctp_queued_to_read *control;
 
 	if ((stcb == NULL) ||
 	    sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT)) {
 		/* event not enabled */
 		return;
 	}
 	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_paddr_change), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		return;
 	SCTP_BUF_LEN(m_notify) = 0;
 	spc = mtod(m_notify, struct sctp_paddr_change *);
 	memset(spc, 0, sizeof(struct sctp_paddr_change));
 	spc->spc_type = SCTP_PEER_ADDR_CHANGE;
 	spc->spc_flags = 0;
 	spc->spc_length = sizeof(struct sctp_paddr_change);
 	switch (sa->sa_family) {
 #ifdef INET
 	case AF_INET:
 #ifdef INET6
 		if (sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
 			in6_sin_2_v4mapsin6((struct sockaddr_in *)sa,
 			    (struct sockaddr_in6 *)&spc->spc_aaddr);
 		} else {
 			memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in));
 		}
 #else
 		memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in));
 #endif
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		{
 			struct sockaddr_in6 *sin6;
 
 			memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in6));
 
 			sin6 = (struct sockaddr_in6 *)&spc->spc_aaddr;
 			if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
 				if (sin6->sin6_scope_id == 0) {
 					/* recover scope_id for user */
 					(void)sa6_recoverscope(sin6);
 				} else {
 					/* clear embedded scope_id for user */
 					in6_clearscope(&sin6->sin6_addr);
 				}
 			}
 			break;
 		}
 #endif
 	default:
 		/* TSNH */
 		break;
 	}
 	spc->spc_state = state;
 	spc->spc_error = error;
 	spc->spc_assoc_id = sctp_get_associd(stcb);
 
 	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_paddr_change);
 	SCTP_BUF_NEXT(m_notify) = NULL;
 
 	/* append to socket */
 	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 	    0, 0, stcb->asoc.context, 0, 0, 0,
 	    m_notify);
 	if (control == NULL) {
 		/* no memory */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	control->length = SCTP_BUF_LEN(m_notify);
 	control->spec_flags = M_NOTIFICATION;
 	/* not that we need this */
 	control->tail_mbuf = m_notify;
 	sctp_add_to_readq(stcb->sctp_ep, stcb,
 	    control,
 	    &stcb->sctp_socket->so_rcv, 1,
 	    SCTP_READ_LOCK_NOT_HELD,
 	    so_locked);
 }
 
 static void
 sctp_notify_send_failed(struct sctp_tcb *stcb, uint8_t sent, uint32_t error,
     struct sctp_tmit_chunk *chk, int so_locked)
 {
 	struct mbuf *m_notify;
 	struct sctp_send_failed *ssf;
 	struct sctp_send_failed_event *ssfe;
 	struct sctp_queued_to_read *control;
 	struct sctp_chunkhdr *chkhdr;
 	int notifhdr_len, chk_len, chkhdr_len, padding_len, payload_len;
 
 	if ((stcb == NULL) ||
 	    (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT) &&
 	    sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT))) {
 		/* event not enabled */
 		return;
 	}
 
 	if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
 		notifhdr_len = sizeof(struct sctp_send_failed_event);
 	} else {
 		notifhdr_len = sizeof(struct sctp_send_failed);
 	}
 	m_notify = sctp_get_mbuf_for_msg(notifhdr_len, 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 	SCTP_BUF_LEN(m_notify) = notifhdr_len;
 	if (stcb->asoc.idata_supported) {
 		chkhdr_len = sizeof(struct sctp_idata_chunk);
 	} else {
 		chkhdr_len = sizeof(struct sctp_data_chunk);
 	}
 	/* Use some defaults in case we can't access the chunk header */
 	if (chk->send_size >= chkhdr_len) {
 		payload_len = chk->send_size - chkhdr_len;
 	} else {
 		payload_len = 0;
 	}
 	padding_len = 0;
 	if (chk->data != NULL) {
 		chkhdr = mtod(chk->data, struct sctp_chunkhdr *);
 		if (chkhdr != NULL) {
 			chk_len = ntohs(chkhdr->chunk_length);
 			if ((chk_len >= chkhdr_len) &&
 			    (chk->send_size >= chk_len) &&
 			    (chk->send_size - chk_len < 4)) {
 				padding_len = chk->send_size - chk_len;
 				payload_len = chk->send_size - chkhdr_len - padding_len;
 			}
 		}
 	}
 	if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
 		ssfe = mtod(m_notify, struct sctp_send_failed_event *);
 		memset(ssfe, 0, notifhdr_len);
 		ssfe->ssfe_type = SCTP_SEND_FAILED_EVENT;
 		if (sent) {
 			ssfe->ssfe_flags = SCTP_DATA_SENT;
 		} else {
 			ssfe->ssfe_flags = SCTP_DATA_UNSENT;
 		}
 		ssfe->ssfe_length = (uint32_t)(notifhdr_len + payload_len);
 		ssfe->ssfe_error = error;
 		/* not exactly what the user sent in, but should be close :) */
 		ssfe->ssfe_info.snd_sid = chk->rec.data.sid;
 		ssfe->ssfe_info.snd_flags = chk->rec.data.rcv_flags;
 		ssfe->ssfe_info.snd_ppid = chk->rec.data.ppid;
 		ssfe->ssfe_info.snd_context = chk->rec.data.context;
 		ssfe->ssfe_info.snd_assoc_id = sctp_get_associd(stcb);
 		ssfe->ssfe_assoc_id = sctp_get_associd(stcb);
 	} else {
 		ssf = mtod(m_notify, struct sctp_send_failed *);
 		memset(ssf, 0, notifhdr_len);
 		ssf->ssf_type = SCTP_SEND_FAILED;
 		if (sent) {
 			ssf->ssf_flags = SCTP_DATA_SENT;
 		} else {
 			ssf->ssf_flags = SCTP_DATA_UNSENT;
 		}
 		ssf->ssf_length = (uint32_t)(notifhdr_len + payload_len);
 		ssf->ssf_error = error;
 		/* not exactly what the user sent in, but should be close :) */
 		ssf->ssf_info.sinfo_stream = chk->rec.data.sid;
 		ssf->ssf_info.sinfo_ssn = (uint16_t)chk->rec.data.mid;
 		ssf->ssf_info.sinfo_flags = chk->rec.data.rcv_flags;
 		ssf->ssf_info.sinfo_ppid = chk->rec.data.ppid;
 		ssf->ssf_info.sinfo_context = chk->rec.data.context;
 		ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
 		ssf->ssf_assoc_id = sctp_get_associd(stcb);
 	}
 	if (chk->data != NULL) {
 		/* Trim off the sctp chunk header (it should be there) */
 		if (chk->send_size == chkhdr_len + payload_len + padding_len) {
 			m_adj(chk->data, chkhdr_len);
 			m_adj(chk->data, -padding_len);
 			sctp_mbuf_crush(chk->data);
 			chk->send_size -= (chkhdr_len + padding_len);
 		}
 	}
 	SCTP_BUF_NEXT(m_notify) = chk->data;
 	/* Steal off the mbuf */
 	chk->data = NULL;
 	/*
 	 * For this case, we check the actual socket buffer, since the assoc
 	 * is going away we don't want to overfill the socket buffer for a
 	 * non-reader
 	 */
 	if (sctp_sbspace_failedmsgs(&stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
 		sctp_m_freem(m_notify);
 		return;
 	}
 	/* append to socket */
 	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 	    0, 0, stcb->asoc.context, 0, 0, 0,
 	    m_notify);
 	if (control == NULL) {
 		/* no memory */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	control->length = SCTP_BUF_LEN(m_notify);
 	control->spec_flags = M_NOTIFICATION;
 	/* not that we need this */
 	control->tail_mbuf = m_notify;
 	sctp_add_to_readq(stcb->sctp_ep, stcb,
 	    control,
 	    &stcb->sctp_socket->so_rcv, 1,
 	    SCTP_READ_LOCK_NOT_HELD,
 	    so_locked);
 }
 
 static void
 sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error,
     struct sctp_stream_queue_pending *sp, int so_locked)
 {
 	struct mbuf *m_notify;
 	struct sctp_send_failed *ssf;
 	struct sctp_send_failed_event *ssfe;
 	struct sctp_queued_to_read *control;
 	int notifhdr_len;
 
 	if ((stcb == NULL) ||
 	    (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT) &&
 	    sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT))) {
 		/* event not enabled */
 		return;
 	}
 	if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
 		notifhdr_len = sizeof(struct sctp_send_failed_event);
 	} else {
 		notifhdr_len = sizeof(struct sctp_send_failed);
 	}
 	m_notify = sctp_get_mbuf_for_msg(notifhdr_len, 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL) {
 		/* no space left */
 		return;
 	}
 	SCTP_BUF_LEN(m_notify) = notifhdr_len;
 	if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
 		ssfe = mtod(m_notify, struct sctp_send_failed_event *);
 		memset(ssfe, 0, notifhdr_len);
 		ssfe->ssfe_type = SCTP_SEND_FAILED_EVENT;
 		ssfe->ssfe_flags = SCTP_DATA_UNSENT;
 		ssfe->ssfe_length = (uint32_t)(notifhdr_len + sp->length);
 		ssfe->ssfe_error = error;
 		/* not exactly what the user sent in, but should be close :) */
 		ssfe->ssfe_info.snd_sid = sp->sid;
 		if (sp->some_taken) {
 			ssfe->ssfe_info.snd_flags = SCTP_DATA_LAST_FRAG;
 		} else {
 			ssfe->ssfe_info.snd_flags = SCTP_DATA_NOT_FRAG;
 		}
 		ssfe->ssfe_info.snd_ppid = sp->ppid;
 		ssfe->ssfe_info.snd_context = sp->context;
 		ssfe->ssfe_info.snd_assoc_id = sctp_get_associd(stcb);
 		ssfe->ssfe_assoc_id = sctp_get_associd(stcb);
 	} else {
 		ssf = mtod(m_notify, struct sctp_send_failed *);
 		memset(ssf, 0, notifhdr_len);
 		ssf->ssf_type = SCTP_SEND_FAILED;
 		ssf->ssf_flags = SCTP_DATA_UNSENT;
 		ssf->ssf_length = (uint32_t)(notifhdr_len + sp->length);
 		ssf->ssf_error = error;
 		/* not exactly what the user sent in, but should be close :) */
 		ssf->ssf_info.sinfo_stream = sp->sid;
 		ssf->ssf_info.sinfo_ssn = 0;
 		if (sp->some_taken) {
 			ssf->ssf_info.sinfo_flags = SCTP_DATA_LAST_FRAG;
 		} else {
 			ssf->ssf_info.sinfo_flags = SCTP_DATA_NOT_FRAG;
 		}
 		ssf->ssf_info.sinfo_ppid = sp->ppid;
 		ssf->ssf_info.sinfo_context = sp->context;
 		ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
 		ssf->ssf_assoc_id = sctp_get_associd(stcb);
 	}
 	SCTP_BUF_NEXT(m_notify) = sp->data;
 
 	/* Steal off the mbuf */
 	sp->data = NULL;
 	/*
 	 * For this case, we check the actual socket buffer, since the assoc
 	 * is going away we don't want to overfill the socket buffer for a
 	 * non-reader
 	 */
 	if (sctp_sbspace_failedmsgs(&stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
 		sctp_m_freem(m_notify);
 		return;
 	}
 	/* append to socket */
 	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 	    0, 0, stcb->asoc.context, 0, 0, 0,
 	    m_notify);
 	if (control == NULL) {
 		/* no memory */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	control->length = SCTP_BUF_LEN(m_notify);
 	control->spec_flags = M_NOTIFICATION;
 	/* not that we need this */
 	control->tail_mbuf = m_notify;
 	sctp_add_to_readq(stcb->sctp_ep, stcb,
 	    control,
 	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, so_locked);
 }
 
 static void
 sctp_notify_adaptation_layer(struct sctp_tcb *stcb)
 {
 	struct mbuf *m_notify;
 	struct sctp_adaptation_event *sai;
 	struct sctp_queued_to_read *control;
 
 	if ((stcb == NULL) ||
 	    sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT)) {
 		/* event not enabled */
 		return;
 	}
 
 	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_adaption_event), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 	SCTP_BUF_LEN(m_notify) = 0;
 	sai = mtod(m_notify, struct sctp_adaptation_event *);
 	memset(sai, 0, sizeof(struct sctp_adaptation_event));
 	sai->sai_type = SCTP_ADAPTATION_INDICATION;
 	sai->sai_flags = 0;
 	sai->sai_length = sizeof(struct sctp_adaptation_event);
 	sai->sai_adaptation_ind = stcb->asoc.peers_adaptation;
 	sai->sai_assoc_id = sctp_get_associd(stcb);
 
 	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_adaptation_event);
 	SCTP_BUF_NEXT(m_notify) = NULL;
 
 	/* append to socket */
 	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 	    0, 0, stcb->asoc.context, 0, 0, 0,
 	    m_notify);
 	if (control == NULL) {
 		/* no memory */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	control->length = SCTP_BUF_LEN(m_notify);
 	control->spec_flags = M_NOTIFICATION;
 	/* not that we need this */
 	control->tail_mbuf = m_notify;
 	sctp_add_to_readq(stcb->sctp_ep, stcb,
 	    control,
 	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
 }
 
 /* This always must be called with the read-queue LOCKED in the INP */
 static void
 sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error,
     uint32_t val, int so_locked)
 {
 	struct mbuf *m_notify;
 	struct sctp_pdapi_event *pdapi;
 	struct sctp_queued_to_read *control;
 	struct sockbuf *sb;
 
 	if ((stcb == NULL) ||
 	    sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_PDAPIEVNT)) {
 		/* event not enabled */
 		return;
 	}
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ) {
 		return;
 	}
 
 	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_pdapi_event), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 	SCTP_BUF_LEN(m_notify) = 0;
 	pdapi = mtod(m_notify, struct sctp_pdapi_event *);
 	memset(pdapi, 0, sizeof(struct sctp_pdapi_event));
 	pdapi->pdapi_type = SCTP_PARTIAL_DELIVERY_EVENT;
 	pdapi->pdapi_flags = 0;
 	pdapi->pdapi_length = sizeof(struct sctp_pdapi_event);
 	pdapi->pdapi_indication = error;
 	pdapi->pdapi_stream = (val >> 16);
 	pdapi->pdapi_seq = (val & 0x0000ffff);
 	pdapi->pdapi_assoc_id = sctp_get_associd(stcb);
 
 	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_pdapi_event);
 	SCTP_BUF_NEXT(m_notify) = NULL;
 	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 	    0, 0, stcb->asoc.context, 0, 0, 0,
 	    m_notify);
 	if (control == NULL) {
 		/* no memory */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	control->length = SCTP_BUF_LEN(m_notify);
 	control->spec_flags = M_NOTIFICATION;
 	/* not that we need this */
 	control->tail_mbuf = m_notify;
 	sb = &stcb->sctp_socket->so_rcv;
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 		sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(m_notify));
 	}
 	sctp_sballoc(stcb, sb, m_notify);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 		sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
 	}
 	control->end_added = 1;
 	if (stcb->asoc.control_pdapi)
 		TAILQ_INSERT_AFTER(&stcb->sctp_ep->read_queue, stcb->asoc.control_pdapi, control, next);
 	else {
 		/* we really should not see this case */
 		TAILQ_INSERT_TAIL(&stcb->sctp_ep->read_queue, control, next);
 	}
 	if (stcb->sctp_ep && stcb->sctp_socket) {
 		/* This should always be the case */
 		sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
 	}
 }
 
 static void
 sctp_notify_shutdown_event(struct sctp_tcb *stcb)
 {
 	struct mbuf *m_notify;
 	struct sctp_shutdown_event *sse;
 	struct sctp_queued_to_read *control;
 
 	/*
 	 * For TCP model AND UDP connected sockets we will send an error up
 	 * when an SHUTDOWN completes
 	 */
 	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 		/* mark socket closed for read/write and wakeup! */
 		socantsendmore(stcb->sctp_socket);
 	}
 	if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT)) {
 		/* event not enabled */
 		return;
 	}
 
 	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_event), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 	sse = mtod(m_notify, struct sctp_shutdown_event *);
 	memset(sse, 0, sizeof(struct sctp_shutdown_event));
 	sse->sse_type = SCTP_SHUTDOWN_EVENT;
 	sse->sse_flags = 0;
 	sse->sse_length = sizeof(struct sctp_shutdown_event);
 	sse->sse_assoc_id = sctp_get_associd(stcb);
 
 	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_shutdown_event);
 	SCTP_BUF_NEXT(m_notify) = NULL;
 
 	/* append to socket */
 	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 	    0, 0, stcb->asoc.context, 0, 0, 0,
 	    m_notify);
 	if (control == NULL) {
 		/* no memory */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	control->length = SCTP_BUF_LEN(m_notify);
 	control->spec_flags = M_NOTIFICATION;
 	/* not that we need this */
 	control->tail_mbuf = m_notify;
 	sctp_add_to_readq(stcb->sctp_ep, stcb,
 	    control,
 	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
 }
 
 static void
 sctp_notify_sender_dry_event(struct sctp_tcb *stcb,
     int so_locked)
 {
 	struct mbuf *m_notify;
 	struct sctp_sender_dry_event *event;
 	struct sctp_queued_to_read *control;
 
 	if ((stcb == NULL) ||
 	    sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_DRYEVNT)) {
 		/* event not enabled */
 		return;
 	}
 
 	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_sender_dry_event), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL) {
 		/* no space left */
 		return;
 	}
 	SCTP_BUF_LEN(m_notify) = 0;
 	event = mtod(m_notify, struct sctp_sender_dry_event *);
 	memset(event, 0, sizeof(struct sctp_sender_dry_event));
 	event->sender_dry_type = SCTP_SENDER_DRY_EVENT;
 	event->sender_dry_flags = 0;
 	event->sender_dry_length = sizeof(struct sctp_sender_dry_event);
 	event->sender_dry_assoc_id = sctp_get_associd(stcb);
 
 	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_sender_dry_event);
 	SCTP_BUF_NEXT(m_notify) = NULL;
 
 	/* append to socket */
 	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 	    0, 0, stcb->asoc.context, 0, 0, 0,
 	    m_notify);
 	if (control == NULL) {
 		/* no memory */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	control->length = SCTP_BUF_LEN(m_notify);
 	control->spec_flags = M_NOTIFICATION;
 	/* not that we need this */
 	control->tail_mbuf = m_notify;
 	sctp_add_to_readq(stcb->sctp_ep, stcb, control,
 	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, so_locked);
 }
 
 void
 sctp_notify_stream_reset_add(struct sctp_tcb *stcb, uint16_t numberin, uint16_t numberout, int flag)
 {
 	struct mbuf *m_notify;
 	struct sctp_queued_to_read *control;
 	struct sctp_stream_change_event *stradd;
 
 	if ((stcb == NULL) ||
 	    (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_STREAM_CHANGEEVNT))) {
 		/* event not enabled */
 		return;
 	}
 	if ((stcb->asoc.peer_req_out) && flag) {
 		/* Peer made the request, don't tell the local user */
 		stcb->asoc.peer_req_out = 0;
 		return;
 	}
 	stcb->asoc.peer_req_out = 0;
 	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_stream_change_event), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 	SCTP_BUF_LEN(m_notify) = 0;
 	stradd = mtod(m_notify, struct sctp_stream_change_event *);
 	memset(stradd, 0, sizeof(struct sctp_stream_change_event));
 	stradd->strchange_type = SCTP_STREAM_CHANGE_EVENT;
 	stradd->strchange_flags = flag;
 	stradd->strchange_length = sizeof(struct sctp_stream_change_event);
 	stradd->strchange_assoc_id = sctp_get_associd(stcb);
 	stradd->strchange_instrms = numberin;
 	stradd->strchange_outstrms = numberout;
 	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_stream_change_event);
 	SCTP_BUF_NEXT(m_notify) = NULL;
 	if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
 		/* no space */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	/* append to socket */
 	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 	    0, 0, stcb->asoc.context, 0, 0, 0,
 	    m_notify);
 	if (control == NULL) {
 		/* no memory */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	control->length = SCTP_BUF_LEN(m_notify);
 	control->spec_flags = M_NOTIFICATION;
 	/* not that we need this */
 	control->tail_mbuf = m_notify;
 	sctp_add_to_readq(stcb->sctp_ep, stcb,
 	    control,
 	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
 }
 
 void
 sctp_notify_stream_reset_tsn(struct sctp_tcb *stcb, uint32_t sending_tsn, uint32_t recv_tsn, int flag)
 {
 	struct mbuf *m_notify;
 	struct sctp_queued_to_read *control;
 	struct sctp_assoc_reset_event *strasoc;
 
 	if ((stcb == NULL) ||
 	    (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_ASSOC_RESETEVNT))) {
 		/* event not enabled */
 		return;
 	}
 	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_assoc_reset_event), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 	SCTP_BUF_LEN(m_notify) = 0;
 	strasoc = mtod(m_notify, struct sctp_assoc_reset_event *);
 	memset(strasoc, 0, sizeof(struct sctp_assoc_reset_event));
 	strasoc->assocreset_type = SCTP_ASSOC_RESET_EVENT;
 	strasoc->assocreset_flags = flag;
 	strasoc->assocreset_length = sizeof(struct sctp_assoc_reset_event);
 	strasoc->assocreset_assoc_id = sctp_get_associd(stcb);
 	strasoc->assocreset_local_tsn = sending_tsn;
 	strasoc->assocreset_remote_tsn = recv_tsn;
 	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_assoc_reset_event);
 	SCTP_BUF_NEXT(m_notify) = NULL;
 	if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
 		/* no space */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	/* append to socket */
 	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 	    0, 0, stcb->asoc.context, 0, 0, 0,
 	    m_notify);
 	if (control == NULL) {
 		/* no memory */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	control->length = SCTP_BUF_LEN(m_notify);
 	control->spec_flags = M_NOTIFICATION;
 	/* not that we need this */
 	control->tail_mbuf = m_notify;
 	sctp_add_to_readq(stcb->sctp_ep, stcb,
 	    control,
 	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
 }
 
 static void
 sctp_notify_stream_reset(struct sctp_tcb *stcb,
     int number_entries, uint16_t *list, int flag)
 {
 	struct mbuf *m_notify;
 	struct sctp_queued_to_read *control;
 	struct sctp_stream_reset_event *strreset;
 	int len;
 
 	if ((stcb == NULL) ||
 	    (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT))) {
 		/* event not enabled */
 		return;
 	}
 
 	m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 	SCTP_BUF_LEN(m_notify) = 0;
 	len = sizeof(struct sctp_stream_reset_event) + (number_entries * sizeof(uint16_t));
 	if (len > M_TRAILINGSPACE(m_notify)) {
 		/* never enough room */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	strreset = mtod(m_notify, struct sctp_stream_reset_event *);
 	memset(strreset, 0, len);
 	strreset->strreset_type = SCTP_STREAM_RESET_EVENT;
 	strreset->strreset_flags = flag;
 	strreset->strreset_length = len;
 	strreset->strreset_assoc_id = sctp_get_associd(stcb);
 	if (number_entries) {
 		int i;
 
 		for (i = 0; i < number_entries; i++) {
 			strreset->strreset_stream_list[i] = ntohs(list[i]);
 		}
 	}
 	SCTP_BUF_LEN(m_notify) = len;
 	SCTP_BUF_NEXT(m_notify) = NULL;
 	if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
 		/* no space */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	/* append to socket */
 	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 	    0, 0, stcb->asoc.context, 0, 0, 0,
 	    m_notify);
 	if (control == NULL) {
 		/* no memory */
 		sctp_m_freem(m_notify);
 		return;
 	}
 	control->length = SCTP_BUF_LEN(m_notify);
 	control->spec_flags = M_NOTIFICATION;
 	/* not that we need this */
 	control->tail_mbuf = m_notify;
 	sctp_add_to_readq(stcb->sctp_ep, stcb,
 	    control,
 	    &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
 }
 
 static void
 sctp_notify_remote_error(struct sctp_tcb *stcb, uint16_t error, struct sctp_error_chunk *chunk)
 {
 	struct mbuf *m_notify;
 	struct sctp_remote_error *sre;
 	struct sctp_queued_to_read *control;
 	unsigned int notif_len;
 	uint16_t chunk_len;
 
 	if ((stcb == NULL) ||
 	    sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVPEERERR)) {
 		return;
 	}
 	if (chunk != NULL) {
 		chunk_len = ntohs(chunk->ch.chunk_length);
 		/*
 		 * Only SCTP_CHUNK_BUFFER_SIZE are guaranteed to be
 		 * contiguous.
 		 */
 		if (chunk_len > SCTP_CHUNK_BUFFER_SIZE) {
 			chunk_len = SCTP_CHUNK_BUFFER_SIZE;
 		}
 	} else {
 		chunk_len = 0;
 	}
 	notif_len = (unsigned int)(sizeof(struct sctp_remote_error) + chunk_len);
 	m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL) {
 		/* Retry with smaller value. */
 		notif_len = (unsigned int)sizeof(struct sctp_remote_error);
 		m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
 		if (m_notify == NULL) {
 			return;
 		}
 	}
 	SCTP_BUF_NEXT(m_notify) = NULL;
 	sre = mtod(m_notify, struct sctp_remote_error *);
 	memset(sre, 0, notif_len);
 	sre->sre_type = SCTP_REMOTE_ERROR;
 	sre->sre_flags = 0;
 	sre->sre_length = sizeof(struct sctp_remote_error);
 	sre->sre_error = error;
 	sre->sre_assoc_id = sctp_get_associd(stcb);
 	if (notif_len > sizeof(struct sctp_remote_error)) {
 		memcpy(sre->sre_data, chunk, chunk_len);
 		sre->sre_length += chunk_len;
 	}
 	SCTP_BUF_LEN(m_notify) = sre->sre_length;
 	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
 	    0, 0, stcb->asoc.context, 0, 0, 0,
 	    m_notify);
 	if (control != NULL) {
 		control->length = SCTP_BUF_LEN(m_notify);
 		control->spec_flags = M_NOTIFICATION;
 		/* not that we need this */
 		control->tail_mbuf = m_notify;
 		sctp_add_to_readq(stcb->sctp_ep, stcb,
 		    control,
 		    &stcb->sctp_socket->so_rcv, 1,
 		    SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
 	} else {
 		sctp_m_freem(m_notify);
 	}
 }
 
 void
 sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
     uint32_t error, void *data, int so_locked)
 {
 	if ((stcb == NULL) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
 	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
 		/* If the socket is gone we are out of here */
 		return;
 	}
 	if (stcb->sctp_socket->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		return;
 	}
 	if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
 		if ((notification == SCTP_NOTIFY_INTERFACE_DOWN) ||
 		    (notification == SCTP_NOTIFY_INTERFACE_UP) ||
 		    (notification == SCTP_NOTIFY_INTERFACE_CONFIRMED)) {
 			/* Don't report these in front states */
 			return;
 		}
 	}
 	switch (notification) {
 	case SCTP_NOTIFY_ASSOC_UP:
 		if (stcb->asoc.assoc_up_sent == 0) {
 			sctp_notify_assoc_change(SCTP_COMM_UP, stcb, error, NULL, false, false, so_locked);
 			stcb->asoc.assoc_up_sent = 1;
 		}
 		if (stcb->asoc.adaptation_needed && (stcb->asoc.adaptation_sent == 0)) {
 			sctp_notify_adaptation_layer(stcb);
 		}
 		if (stcb->asoc.auth_supported == 0) {
 			sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0,
 			    NULL, so_locked);
 		}
 		break;
 	case SCTP_NOTIFY_ASSOC_DOWN:
 		sctp_notify_assoc_change(SCTP_SHUTDOWN_COMP, stcb, error, NULL, false, false, so_locked);
 		break;
 	case SCTP_NOTIFY_INTERFACE_DOWN:
 		{
 			struct sctp_nets *net;
 
 			net = (struct sctp_nets *)data;
 			sctp_notify_peer_addr_change(stcb, SCTP_ADDR_UNREACHABLE,
 			    (struct sockaddr *)&net->ro._l_addr, error, so_locked);
 			break;
 		}
 	case SCTP_NOTIFY_INTERFACE_UP:
 		{
 			struct sctp_nets *net;
 
 			net = (struct sctp_nets *)data;
 			sctp_notify_peer_addr_change(stcb, SCTP_ADDR_AVAILABLE,
 			    (struct sockaddr *)&net->ro._l_addr, error, so_locked);
 			break;
 		}
 	case SCTP_NOTIFY_INTERFACE_CONFIRMED:
 		{
 			struct sctp_nets *net;
 
 			net = (struct sctp_nets *)data;
 			sctp_notify_peer_addr_change(stcb, SCTP_ADDR_CONFIRMED,
 			    (struct sockaddr *)&net->ro._l_addr, error, so_locked);
 			break;
 		}
 	case SCTP_NOTIFY_SPECIAL_SP_FAIL:
 		sctp_notify_send_failed2(stcb, error,
 		    (struct sctp_stream_queue_pending *)data, so_locked);
 		break;
 	case SCTP_NOTIFY_SENT_DG_FAIL:
 		sctp_notify_send_failed(stcb, 1, error,
 		    (struct sctp_tmit_chunk *)data, so_locked);
 		break;
 	case SCTP_NOTIFY_UNSENT_DG_FAIL:
 		sctp_notify_send_failed(stcb, 0, error,
 		    (struct sctp_tmit_chunk *)data, so_locked);
 		break;
 	case SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION:
 		{
 			uint32_t val;
 
 			val = *((uint32_t *)data);
 
 			sctp_notify_partial_delivery_indication(stcb, error, val, so_locked);
 			break;
 		}
 	case SCTP_NOTIFY_ASSOC_LOC_ABORTED:
 		if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
 		    (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
 			sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, false, false, so_locked);
 		} else {
 			sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, false, false, so_locked);
 		}
 		break;
 	case SCTP_NOTIFY_ASSOC_REM_ABORTED:
 		if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
 		    (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
 			sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, true, false, so_locked);
 		} else {
 			sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, true, false, so_locked);
 		}
 		break;
 	case SCTP_NOTIFY_ASSOC_TIMEDOUT:
 		if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
 		    (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
 			sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, false, true, so_locked);
 		} else {
 			sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, false, true, so_locked);
 		}
 		break;
 	case SCTP_NOTIFY_ASSOC_RESTART:
 		sctp_notify_assoc_change(SCTP_RESTART, stcb, error, NULL, false, false, so_locked);
 		if (stcb->asoc.auth_supported == 0) {
 			sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0,
 			    NULL, so_locked);
 		}
 		break;
 	case SCTP_NOTIFY_STR_RESET_SEND:
 		sctp_notify_stream_reset(stcb, error, ((uint16_t *)data), SCTP_STREAM_RESET_OUTGOING_SSN);
 		break;
 	case SCTP_NOTIFY_STR_RESET_RECV:
 		sctp_notify_stream_reset(stcb, error, ((uint16_t *)data), SCTP_STREAM_RESET_INCOMING);
 		break;
 	case SCTP_NOTIFY_STR_RESET_FAILED_OUT:
 		sctp_notify_stream_reset(stcb, error, ((uint16_t *)data),
 		    (SCTP_STREAM_RESET_OUTGOING_SSN | SCTP_STREAM_RESET_FAILED));
 		break;
 	case SCTP_NOTIFY_STR_RESET_DENIED_OUT:
 		sctp_notify_stream_reset(stcb, error, ((uint16_t *)data),
 		    (SCTP_STREAM_RESET_OUTGOING_SSN | SCTP_STREAM_RESET_DENIED));
 		break;
 	case SCTP_NOTIFY_STR_RESET_FAILED_IN:
 		sctp_notify_stream_reset(stcb, error, ((uint16_t *)data),
 		    (SCTP_STREAM_RESET_INCOMING | SCTP_STREAM_RESET_FAILED));
 		break;
 	case SCTP_NOTIFY_STR_RESET_DENIED_IN:
 		sctp_notify_stream_reset(stcb, error, ((uint16_t *)data),
 		    (SCTP_STREAM_RESET_INCOMING | SCTP_STREAM_RESET_DENIED));
 		break;
 	case SCTP_NOTIFY_ASCONF_ADD_IP:
 		sctp_notify_peer_addr_change(stcb, SCTP_ADDR_ADDED, data,
 		    error, so_locked);
 		break;
 	case SCTP_NOTIFY_ASCONF_DELETE_IP:
 		sctp_notify_peer_addr_change(stcb, SCTP_ADDR_REMOVED, data,
 		    error, so_locked);
 		break;
 	case SCTP_NOTIFY_ASCONF_SET_PRIMARY:
 		sctp_notify_peer_addr_change(stcb, SCTP_ADDR_MADE_PRIM, data,
 		    error, so_locked);
 		break;
 	case SCTP_NOTIFY_PEER_SHUTDOWN:
 		sctp_notify_shutdown_event(stcb);
 		break;
 	case SCTP_NOTIFY_AUTH_NEW_KEY:
 		sctp_notify_authentication(stcb, SCTP_AUTH_NEW_KEY, error,
 		    (uint16_t)(uintptr_t)data,
 		    so_locked);
 		break;
 	case SCTP_NOTIFY_AUTH_FREE_KEY:
 		sctp_notify_authentication(stcb, SCTP_AUTH_FREE_KEY, error,
 		    (uint16_t)(uintptr_t)data,
 		    so_locked);
 		break;
 	case SCTP_NOTIFY_NO_PEER_AUTH:
 		sctp_notify_authentication(stcb, SCTP_AUTH_NO_AUTH, error,
 		    (uint16_t)(uintptr_t)data,
 		    so_locked);
 		break;
 	case SCTP_NOTIFY_SENDER_DRY:
 		sctp_notify_sender_dry_event(stcb, so_locked);
 		break;
 	case SCTP_NOTIFY_REMOTE_ERROR:
 		sctp_notify_remote_error(stcb, error, data);
 		break;
 	default:
 		SCTPDBG(SCTP_DEBUG_UTIL1, "%s: unknown notification %xh (%u)\n",
 		    __func__, notification, notification);
 		break;
 	}			/* end switch */
 }
 
 void
 sctp_report_all_outbound(struct sctp_tcb *stcb, uint16_t error, int so_locked)
 {
 	struct sctp_association *asoc;
 	struct sctp_stream_out *outs;
 	struct sctp_tmit_chunk *chk, *nchk;
 	struct sctp_stream_queue_pending *sp, *nsp;
 	int i;
 
 	if (stcb == NULL) {
 		return;
 	}
 	asoc = &stcb->asoc;
 	if (asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 		/* already being freed */
 		return;
 	}
 	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
 	    (asoc->state & SCTP_STATE_CLOSED_SOCKET)) {
 		return;
 	}
 	/* now through all the gunk freeing chunks */
 	/* sent queue SHOULD be empty */
 	TAILQ_FOREACH_SAFE(chk, &asoc->sent_queue, sctp_next, nchk) {
 		TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
 		asoc->sent_queue_cnt--;
 		if (chk->sent != SCTP_DATAGRAM_NR_ACKED) {
 			if (asoc->strmout[chk->rec.data.sid].chunks_on_queues > 0) {
 				asoc->strmout[chk->rec.data.sid].chunks_on_queues--;
 #ifdef INVARIANTS
 			} else {
 				panic("No chunks on the queues for sid %u.", chk->rec.data.sid);
 #endif
 			}
 		}
 		if (chk->data != NULL) {
 			sctp_free_bufspace(stcb, asoc, chk, 1);
 			sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb,
 			    error, chk, so_locked);
 			if (chk->data) {
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
 			}
 		}
 		sctp_free_a_chunk(stcb, chk, so_locked);
 		/* sa_ignore FREED_MEMORY */
 	}
 	/* pending send queue SHOULD be empty */
 	TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
 		TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
 		asoc->send_queue_cnt--;
 		if (asoc->strmout[chk->rec.data.sid].chunks_on_queues > 0) {
 			asoc->strmout[chk->rec.data.sid].chunks_on_queues--;
 #ifdef INVARIANTS
 		} else {
 			panic("No chunks on the queues for sid %u.", chk->rec.data.sid);
 #endif
 		}
 		if (chk->data != NULL) {
 			sctp_free_bufspace(stcb, asoc, chk, 1);
 			sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb,
 			    error, chk, so_locked);
 			if (chk->data) {
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
 			}
 		}
 		sctp_free_a_chunk(stcb, chk, so_locked);
 		/* sa_ignore FREED_MEMORY */
 	}
 	for (i = 0; i < asoc->streamoutcnt; i++) {
 		/* For each stream */
 		outs = &asoc->strmout[i];
 		/* clean up any sends there */
 		TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) {
 			atomic_subtract_int(&asoc->stream_queue_cnt, 1);
 			TAILQ_REMOVE(&outs->outqueue, sp, next);
 			stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp);
 			sctp_free_spbufspace(stcb, asoc, sp);
 			if (sp->data) {
 				sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb,
 				    error, (void *)sp, so_locked);
 				if (sp->data) {
 					sctp_m_freem(sp->data);
 					sp->data = NULL;
 					sp->tail_mbuf = NULL;
 					sp->length = 0;
 				}
 			}
 			if (sp->net) {
 				sctp_free_remote_addr(sp->net);
 				sp->net = NULL;
 			}
 			/* Free the chunk */
 			sctp_free_a_strmoq(stcb, sp, so_locked);
 			/* sa_ignore FREED_MEMORY */
 		}
 	}
 }
 
 void
 sctp_abort_notification(struct sctp_tcb *stcb, bool from_peer, bool timeout,
     uint16_t error, struct sctp_abort_chunk *abort,
     int so_locked)
 {
 	if (stcb == NULL) {
 		return;
 	}
 	SCTP_TCB_LOCK_ASSERT(stcb);
 
 	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 	    ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_CONNECTED))) {
 		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_WAS_ABORTED;
 	}
 	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
 	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
 		return;
 	}
 	SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_WAS_ABORTED);
 	/* Tell them we lost the asoc */
 	sctp_report_all_outbound(stcb, error, so_locked);
 	if (from_peer) {
 		sctp_ulp_notify(SCTP_NOTIFY_ASSOC_REM_ABORTED, stcb, error, abort, so_locked);
 	} else {
 		if (timeout) {
 			sctp_ulp_notify(SCTP_NOTIFY_ASSOC_TIMEDOUT, stcb, error, abort, so_locked);
 		} else {
 			sctp_ulp_notify(SCTP_NOTIFY_ASSOC_LOC_ABORTED, stcb, error, abort, so_locked);
 		}
 	}
 }
 
 void
 sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
     struct mbuf *m, int iphlen,
     struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct mbuf *op_err,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_gen_error_cause *cause;
 	uint32_t vtag;
 	uint16_t cause_code;
 
 	if (stcb != NULL) {
 		vtag = stcb->asoc.peer_vtag;
 		vrf_id = stcb->asoc.vrf_id;
 		if (op_err != NULL) {
 			/* Read the cause code from the error cause. */
 			cause = mtod(op_err, struct sctp_gen_error_cause *);
 			cause_code = ntohs(cause->code);
 		} else {
 			cause_code = 0;
 		}
 	} else {
 		vtag = 0;
 	}
 	sctp_send_abort(m, iphlen, src, dst, sh, vtag, op_err,
 	    mflowtype, mflowid, inp->fibnum,
 	    vrf_id, port);
 	if (stcb != NULL) {
 		/* We have a TCB to abort, send notification too */
 		sctp_abort_notification(stcb, false, false, cause_code, NULL, SCTP_SO_NOT_LOCKED);
 		/* Ok, now lets free it */
 		SCTP_STAT_INCR_COUNTER32(sctps_aborted);
 		if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
 		    (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 		}
 		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_4);
 	}
 }
 #ifdef SCTP_ASOCLOG_OF_TSNS
 void
 sctp_print_out_track_log(struct sctp_tcb *stcb)
 {
 #ifdef NOSIY_PRINTS
 	int i;
 
 	SCTP_PRINTF("Last ep reason:%x\n", stcb->sctp_ep->last_abort_code);
 	SCTP_PRINTF("IN bound TSN log-aaa\n");
 	if ((stcb->asoc.tsn_in_at == 0) && (stcb->asoc.tsn_in_wrapped == 0)) {
 		SCTP_PRINTF("None rcvd\n");
 		goto none_in;
 	}
 	if (stcb->asoc.tsn_in_wrapped) {
 		for (i = stcb->asoc.tsn_in_at; i < SCTP_TSN_LOG_SIZE; i++) {
 			SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
 			    stcb->asoc.in_tsnlog[i].tsn,
 			    stcb->asoc.in_tsnlog[i].strm,
 			    stcb->asoc.in_tsnlog[i].seq,
 			    stcb->asoc.in_tsnlog[i].flgs,
 			    stcb->asoc.in_tsnlog[i].sz);
 		}
 	}
 	if (stcb->asoc.tsn_in_at) {
 		for (i = 0; i < stcb->asoc.tsn_in_at; i++) {
 			SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
 			    stcb->asoc.in_tsnlog[i].tsn,
 			    stcb->asoc.in_tsnlog[i].strm,
 			    stcb->asoc.in_tsnlog[i].seq,
 			    stcb->asoc.in_tsnlog[i].flgs,
 			    stcb->asoc.in_tsnlog[i].sz);
 		}
 	}
 none_in:
 	SCTP_PRINTF("OUT bound TSN log-aaa\n");
 	if ((stcb->asoc.tsn_out_at == 0) &&
 	    (stcb->asoc.tsn_out_wrapped == 0)) {
 		SCTP_PRINTF("None sent\n");
 	}
 	if (stcb->asoc.tsn_out_wrapped) {
 		for (i = stcb->asoc.tsn_out_at; i < SCTP_TSN_LOG_SIZE; i++) {
 			SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
 			    stcb->asoc.out_tsnlog[i].tsn,
 			    stcb->asoc.out_tsnlog[i].strm,
 			    stcb->asoc.out_tsnlog[i].seq,
 			    stcb->asoc.out_tsnlog[i].flgs,
 			    stcb->asoc.out_tsnlog[i].sz);
 		}
 	}
 	if (stcb->asoc.tsn_out_at) {
 		for (i = 0; i < stcb->asoc.tsn_out_at; i++) {
 			SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
 			    stcb->asoc.out_tsnlog[i].tsn,
 			    stcb->asoc.out_tsnlog[i].strm,
 			    stcb->asoc.out_tsnlog[i].seq,
 			    stcb->asoc.out_tsnlog[i].flgs,
 			    stcb->asoc.out_tsnlog[i].sz);
 		}
 	}
 #endif
 }
 #endif
 
 void
 sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
     struct mbuf *op_err, bool timedout, int so_locked)
 {
 	struct sctp_gen_error_cause *cause;
 	uint16_t cause_code;
 
 	if (stcb == NULL) {
 		/* Got to have a TCB */
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
 			if (LIST_EMPTY(&inp->sctp_asoc_list)) {
 				sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
 				    SCTP_CALLED_DIRECTLY_NOCMPSET);
 			}
 		}
 		return;
 	}
 	if (op_err != NULL) {
 		/* Read the cause code from the error cause. */
 		cause = mtod(op_err, struct sctp_gen_error_cause *);
 		cause_code = ntohs(cause->code);
 	} else {
 		cause_code = 0;
 	}
 	/* notify the peer */
 	sctp_send_abort_tcb(stcb, op_err, so_locked);
 	SCTP_STAT_INCR_COUNTER32(sctps_aborted);
 	if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
 	    (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 		SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 	}
 	/* notify the ulp */
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
 		sctp_abort_notification(stcb, false, timedout, cause_code, NULL, so_locked);
 	}
 	/* now free the asoc */
 #ifdef SCTP_ASOCLOG_OF_TSNS
 	sctp_print_out_track_log(stcb);
 #endif
 	(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 	    SCTP_FROM_SCTPUTIL + SCTP_LOC_5);
 }
 
 void
 sctp_handle_ootb(struct mbuf *m, int iphlen, int offset,
     struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_inpcb *inp,
     struct mbuf *cause,
     uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_chunkhdr *ch, chunk_buf;
 	unsigned int chk_length;
 	int contains_init_chunk;
 
 	SCTP_STAT_INCR_COUNTER32(sctps_outoftheblue);
 	/* Generate a TO address for future reference */
 	if (inp && (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
 		if (LIST_EMPTY(&inp->sctp_asoc_list)) {
 			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
 			    SCTP_CALLED_DIRECTLY_NOCMPSET);
 		}
 	}
 	contains_init_chunk = 0;
 	ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
 	    sizeof(*ch), (uint8_t *)&chunk_buf);
 	while (ch != NULL) {
 		chk_length = ntohs(ch->chunk_length);
 		if (chk_length < sizeof(*ch)) {
 			/* break to abort land */
 			break;
 		}
 		switch (ch->chunk_type) {
 		case SCTP_INIT:
 			contains_init_chunk = 1;
 			break;
 		case SCTP_PACKET_DROPPED:
 			/* we don't respond to pkt-dropped */
 			return;
 		case SCTP_ABORT_ASSOCIATION:
 			/* we don't respond with an ABORT to an ABORT */
 			return;
 		case SCTP_SHUTDOWN_COMPLETE:
 			/*
 			 * we ignore it since we are not waiting for it and
 			 * peer is gone
 			 */
 			return;
 		case SCTP_SHUTDOWN_ACK:
 			sctp_send_shutdown_complete2(src, dst, sh,
 			    mflowtype, mflowid, fibnum,
 			    vrf_id, port);
 			return;
 		default:
 			break;
 		}
 		offset += SCTP_SIZE32(chk_length);
 		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
 		    sizeof(*ch), (uint8_t *)&chunk_buf);
 	}
 	if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
 	    ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
 	    (contains_init_chunk == 0))) {
 		sctp_send_abort(m, iphlen, src, dst, sh, 0, cause,
 		    mflowtype, mflowid, fibnum,
 		    vrf_id, port);
 	}
 }
 
 /*
  * check the inbound datagram to make sure there is not an abort inside it,
  * if there is return 1, else return 0.
  */
 int
 sctp_is_there_an_abort_here(struct mbuf *m, int iphlen, uint32_t *vtag)
 {
 	struct sctp_chunkhdr *ch;
 	struct sctp_init_chunk *init_chk, chunk_buf;
 	int offset;
 	unsigned int chk_length;
 
 	offset = iphlen + sizeof(struct sctphdr);
 	ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset, sizeof(*ch),
 	    (uint8_t *)&chunk_buf);
 	while (ch != NULL) {
 		chk_length = ntohs(ch->chunk_length);
 		if (chk_length < sizeof(*ch)) {
 			/* packet is probably corrupt */
 			break;
 		}
 		/* we seem to be ok, is it an abort? */
 		if (ch->chunk_type == SCTP_ABORT_ASSOCIATION) {
 			/* yep, tell them */
 			return (1);
 		}
 		if ((ch->chunk_type == SCTP_INITIATION) ||
 		    (ch->chunk_type == SCTP_INITIATION_ACK)) {
 			/* need to update the Vtag */
 			init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
 			    offset, sizeof(struct sctp_init_chunk), (uint8_t *)&chunk_buf);
 			if (init_chk != NULL) {
 				*vtag = ntohl(init_chk->init.initiate_tag);
 			}
 		}
 		/* Nope, move to the next chunk */
 		offset += SCTP_SIZE32(chk_length);
 		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
 		    sizeof(*ch), (uint8_t *)&chunk_buf);
 	}
 	return (0);
 }
 
 /*
  * currently (2/02), ifa_addr embeds scope_id's and don't have sin6_scope_id
  * set (i.e. it's 0) so, create this function to compare link local scopes
  */
 #ifdef INET6
 uint32_t
 sctp_is_same_scope(struct sockaddr_in6 *addr1, struct sockaddr_in6 *addr2)
 {
 	struct sockaddr_in6 a, b;
 
 	/* save copies */
 	a = *addr1;
 	b = *addr2;
 
 	if (a.sin6_scope_id == 0)
 		if (sa6_recoverscope(&a)) {
 			/* can't get scope, so can't match */
 			return (0);
 		}
 	if (b.sin6_scope_id == 0)
 		if (sa6_recoverscope(&b)) {
 			/* can't get scope, so can't match */
 			return (0);
 		}
 	if (a.sin6_scope_id != b.sin6_scope_id)
 		return (0);
 
 	return (1);
 }
 
 /*
  * returns a sockaddr_in6 with embedded scope recovered and removed
  */
 struct sockaddr_in6 *
 sctp_recover_scope(struct sockaddr_in6 *addr, struct sockaddr_in6 *store)
 {
 	/* check and strip embedded scope junk */
 	if (addr->sin6_family == AF_INET6) {
 		if (IN6_IS_SCOPE_LINKLOCAL(&addr->sin6_addr)) {
 			if (addr->sin6_scope_id == 0) {
 				*store = *addr;
 				if (!sa6_recoverscope(store)) {
 					/* use the recovered scope */
 					addr = store;
 				}
 			} else {
 				/* else, return the original "to" addr */
 				in6_clearscope(&addr->sin6_addr);
 			}
 		}
 	}
 	return (addr);
 }
 #endif
 
 /*
  * are the two addresses the same?  currently a "scopeless" check returns: 1
  * if same, 0 if not
  */
 int
 sctp_cmpaddr(struct sockaddr *sa1, struct sockaddr *sa2)
 {
 
 	/* must be valid */
 	if (sa1 == NULL || sa2 == NULL)
 		return (0);
 
 	/* must be the same family */
 	if (sa1->sa_family != sa2->sa_family)
 		return (0);
 
 	switch (sa1->sa_family) {
 #ifdef INET6
 	case AF_INET6:
 		{
 			/* IPv6 addresses */
 			struct sockaddr_in6 *sin6_1, *sin6_2;
 
 			sin6_1 = (struct sockaddr_in6 *)sa1;
 			sin6_2 = (struct sockaddr_in6 *)sa2;
 			return (SCTP6_ARE_ADDR_EQUAL(sin6_1,
 			    sin6_2));
 		}
 #endif
 #ifdef INET
 	case AF_INET:
 		{
 			/* IPv4 addresses */
 			struct sockaddr_in *sin_1, *sin_2;
 
 			sin_1 = (struct sockaddr_in *)sa1;
 			sin_2 = (struct sockaddr_in *)sa2;
 			return (sin_1->sin_addr.s_addr == sin_2->sin_addr.s_addr);
 		}
 #endif
 	default:
 		/* we don't do these... */
 		return (0);
 	}
 }
 
 void
 sctp_print_address(struct sockaddr *sa)
 {
 #ifdef INET6
 	char ip6buf[INET6_ADDRSTRLEN];
 #endif
 
 	switch (sa->sa_family) {
 #ifdef INET6
 	case AF_INET6:
 		{
 			struct sockaddr_in6 *sin6;
 
 			sin6 = (struct sockaddr_in6 *)sa;
 			SCTP_PRINTF("IPv6 address: %s:port:%d scope:%u\n",
 			    ip6_sprintf(ip6buf, &sin6->sin6_addr),
 			    ntohs(sin6->sin6_port),
 			    sin6->sin6_scope_id);
 			break;
 		}
 #endif
 #ifdef INET
 	case AF_INET:
 		{
 			struct sockaddr_in *sin;
 			unsigned char *p;
 
 			sin = (struct sockaddr_in *)sa;
 			p = (unsigned char *)&sin->sin_addr;
 			SCTP_PRINTF("IPv4 address: %u.%u.%u.%u:%d\n",
 			    p[0], p[1], p[2], p[3], ntohs(sin->sin_port));
 			break;
 		}
 #endif
 	default:
 		SCTP_PRINTF("?\n");
 		break;
 	}
 }
 
 void
 sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
     struct sctp_inpcb *new_inp,
     struct sctp_tcb *stcb,
     int waitflags)
 {
 	/*
 	 * go through our old INP and pull off any control structures that
 	 * belong to stcb and move then to the new inp.
 	 */
 	struct socket *old_so, *new_so;
 	struct sctp_queued_to_read *control, *nctl;
 	struct sctp_readhead tmp_queue;
 	struct mbuf *m;
 	int error = 0;
 
 	old_so = old_inp->sctp_socket;
 	new_so = new_inp->sctp_socket;
 	TAILQ_INIT(&tmp_queue);
 	error = SOCK_IO_RECV_LOCK(old_so, waitflags);
 	if (error) {
 		/*
 		 * Gak, can't get I/O lock, we have a problem. data will be
 		 * left stranded.. and we don't dare look at it since the
 		 * other thread may be reading something. Oh well, its a
 		 * screwed up app that does a peeloff OR a accept while
 		 * reading from the main socket... actually its only the
 		 * peeloff() case, since I think read will fail on a
 		 * listening socket..
 		 */
 		return;
 	}
 	/* lock the socket buffers */
 	SCTP_INP_READ_LOCK(old_inp);
 	TAILQ_FOREACH_SAFE(control, &old_inp->read_queue, next, nctl) {
 		/* Pull off all for out target stcb */
 		if (control->stcb == stcb) {
 			/* remove it we want it */
 			TAILQ_REMOVE(&old_inp->read_queue, control, next);
 			TAILQ_INSERT_TAIL(&tmp_queue, control, next);
 			m = control->data;
 			while (m) {
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 					sctp_sblog(&old_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
 				}
 				sctp_sbfree(control, stcb, &old_so->so_rcv, m);
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 					sctp_sblog(&old_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
 				}
 				m = SCTP_BUF_NEXT(m);
 			}
 		}
 	}
 	SCTP_INP_READ_UNLOCK(old_inp);
 	/* Remove the recv-lock on the old socket */
 	SOCK_IO_RECV_UNLOCK(old_so);
 	/* Now we move them over to the new socket buffer */
 	SCTP_INP_READ_LOCK(new_inp);
 	TAILQ_FOREACH_SAFE(control, &tmp_queue, next, nctl) {
 		TAILQ_INSERT_TAIL(&new_inp->read_queue, control, next);
 		m = control->data;
 		while (m) {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 				sctp_sblog(&new_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(m));
 			}
 			sctp_sballoc(stcb, &new_so->so_rcv, m);
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 				sctp_sblog(&new_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
 			}
 			m = SCTP_BUF_NEXT(m);
 		}
 	}
 	SCTP_INP_READ_UNLOCK(new_inp);
 }
 
 void
 sctp_wakeup_the_read_socket(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     int so_locked
     SCTP_UNUSED
 )
 {
 	if ((inp != NULL) &&
 	    (inp->sctp_socket != NULL) &&
 	    (((inp->sctp_flags & (SCTP_PCB_FLAGS_TCPTYPE | SCTP_PCB_FLAGS_IN_TCPPOOL)) == 0) ||
 	    !SCTP_IS_LISTENING(inp))) {
 		sctp_sorwakeup(inp, inp->sctp_socket);
 	}
 }
 
 void
 sctp_add_to_readq(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_queued_to_read *control,
     struct sockbuf *sb,
     int end,
     int inp_read_lock_held,
     int so_locked)
 {
 	/*
 	 * Here we must place the control on the end of the socket read
 	 * queue AND increment sb_cc so that select will work properly on
 	 * read.
 	 */
 	struct mbuf *m, *prev = NULL;
 
 	if (inp == NULL) {
 		/* Gak, TSNH!! */
 #ifdef INVARIANTS
 		panic("Gak, inp NULL on add_to_readq");
 #endif
 		return;
 	}
 	if (inp_read_lock_held == SCTP_READ_LOCK_NOT_HELD) {
 		SCTP_INP_READ_LOCK(inp);
 	}
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ) {
 		if (!control->on_strm_q) {
 			sctp_free_remote_addr(control->whoFrom);
 			if (control->data) {
 				sctp_m_freem(control->data);
 				control->data = NULL;
 			}
 			sctp_free_a_readq(stcb, control);
 		}
 		if (inp_read_lock_held == SCTP_READ_LOCK_NOT_HELD) {
 			SCTP_INP_READ_UNLOCK(inp);
 		}
 		return;
 	}
 	if (!(control->spec_flags & M_NOTIFICATION)) {
 		atomic_add_int(&inp->total_recvs, 1);
 		if (!control->do_not_ref_stcb) {
 			atomic_add_int(&stcb->total_recvs, 1);
 		}
 	}
 	m = control->data;
 	control->held_length = 0;
 	control->length = 0;
 	while (m != NULL) {
 		if (SCTP_BUF_LEN(m) == 0) {
 			/* Skip mbufs with NO length */
 			if (prev == NULL) {
 				/* First one */
 				control->data = sctp_m_free(m);
 				m = control->data;
 			} else {
 				SCTP_BUF_NEXT(prev) = sctp_m_free(m);
 				m = SCTP_BUF_NEXT(prev);
 			}
 			if (m == NULL) {
 				control->tail_mbuf = prev;
 			}
 			continue;
 		}
 		prev = m;
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 			sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(m));
 		}
 		sctp_sballoc(stcb, sb, m);
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 			sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
 		}
 		atomic_add_int(&control->length, SCTP_BUF_LEN(m));
 		m = SCTP_BUF_NEXT(m);
 	}
 	if (prev != NULL) {
 		control->tail_mbuf = prev;
 	} else {
 		/* Everything got collapsed out?? */
 		if (!control->on_strm_q) {
 			sctp_free_remote_addr(control->whoFrom);
 			sctp_free_a_readq(stcb, control);
 		}
 		if (inp_read_lock_held == 0)
 			SCTP_INP_READ_UNLOCK(inp);
 		return;
 	}
 	if (end) {
 		control->end_added = 1;
 	}
 	TAILQ_INSERT_TAIL(&inp->read_queue, control, next);
 	control->on_read_q = 1;
 	if ((inp != NULL) && (inp->sctp_socket != NULL)) {
 		sctp_wakeup_the_read_socket(inp, stcb, so_locked);
 	}
 	if (inp_read_lock_held == SCTP_READ_LOCK_NOT_HELD) {
 		SCTP_INP_READ_UNLOCK(inp);
 	}
 }
 
 /*************HOLD THIS COMMENT FOR PATCH FILE OF
  *************ALTERNATE ROUTING CODE
  */
 
 /*************HOLD THIS COMMENT FOR END OF PATCH FILE OF
  *************ALTERNATE ROUTING CODE
  */
 
 struct mbuf *
 sctp_generate_cause(uint16_t code, char *info)
 {
 	struct mbuf *m;
 	struct sctp_gen_error_cause *cause;
 	size_t info_len;
 	uint16_t len;
 
 	if ((code == 0) || (info == NULL)) {
 		return (NULL);
 	}
 	info_len = strlen(info);
 	if (info_len > (SCTP_MAX_CAUSE_LENGTH - sizeof(struct sctp_paramhdr))) {
 		return (NULL);
 	}
 	len = (uint16_t)(sizeof(struct sctp_paramhdr) + info_len);
 	m = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
 	if (m != NULL) {
 		SCTP_BUF_LEN(m) = len;
 		cause = mtod(m, struct sctp_gen_error_cause *);
 		cause->code = htons(code);
 		cause->length = htons(len);
 		memcpy(cause->info, info, info_len);
 	}
 	return (m);
 }
 
 struct mbuf *
 sctp_generate_no_user_data_cause(uint32_t tsn)
 {
 	struct mbuf *m;
 	struct sctp_error_no_user_data *no_user_data_cause;
 	uint16_t len;
 
 	len = (uint16_t)sizeof(struct sctp_error_no_user_data);
 	m = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
 	if (m != NULL) {
 		SCTP_BUF_LEN(m) = len;
 		no_user_data_cause = mtod(m, struct sctp_error_no_user_data *);
 		no_user_data_cause->cause.code = htons(SCTP_CAUSE_NO_USER_DATA);
 		no_user_data_cause->cause.length = htons(len);
 		no_user_data_cause->tsn = htonl(tsn);
 	}
 	return (m);
 }
 
 #ifdef SCTP_MBCNT_LOGGING
 void
 sctp_free_bufspace(struct sctp_tcb *stcb, struct sctp_association *asoc,
     struct sctp_tmit_chunk *tp1, int chk_cnt)
 {
 	if (tp1->data == NULL) {
 		return;
 	}
 	asoc->chunks_on_out_queue -= chk_cnt;
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBCNT_LOGGING_ENABLE) {
 		sctp_log_mbcnt(SCTP_LOG_MBCNT_DECREASE,
 		    asoc->total_output_queue_size,
 		    tp1->book_size,
 		    0,
 		    tp1->mbcnt);
 	}
 	if (asoc->total_output_queue_size >= tp1->book_size) {
 		atomic_add_int(&asoc->total_output_queue_size, -tp1->book_size);
 	} else {
 		asoc->total_output_queue_size = 0;
 	}
 
 	if (stcb->sctp_socket && (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) ||
 	    ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE)))) {
 		if (stcb->sctp_socket->so_snd.sb_cc >= tp1->book_size) {
 			stcb->sctp_socket->so_snd.sb_cc -= tp1->book_size;
 		} else {
 			stcb->sctp_socket->so_snd.sb_cc = 0;
 		}
 	}
 }
 
 #endif
 
 int
 sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1,
     uint8_t sent, int so_locked)
 {
 	struct sctp_stream_out *strq;
 	struct sctp_tmit_chunk *chk = NULL, *tp2;
 	struct sctp_stream_queue_pending *sp;
 	uint32_t mid;
 	uint16_t sid;
 	uint8_t foundeom = 0;
 	int ret_sz = 0;
 	int notdone;
 	int do_wakeup_routine = 0;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
 
 	sid = tp1->rec.data.sid;
 	mid = tp1->rec.data.mid;
 	if (sent || !(tp1->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG)) {
 		stcb->asoc.abandoned_sent[0]++;
 		stcb->asoc.abandoned_sent[PR_SCTP_POLICY(tp1->flags)]++;
 		stcb->asoc.strmout[sid].abandoned_sent[0]++;
 #if defined(SCTP_DETAILED_STR_STATS)
 		stcb->asoc.strmout[sid].abandoned_sent[PR_SCTP_POLICY(tp1->flags)]++;
 #endif
 	} else {
 		stcb->asoc.abandoned_unsent[0]++;
 		stcb->asoc.abandoned_unsent[PR_SCTP_POLICY(tp1->flags)]++;
 		stcb->asoc.strmout[sid].abandoned_unsent[0]++;
 #if defined(SCTP_DETAILED_STR_STATS)
 		stcb->asoc.strmout[sid].abandoned_unsent[PR_SCTP_POLICY(tp1->flags)]++;
 #endif
 	}
 	do {
 		ret_sz += tp1->book_size;
 		if (tp1->data != NULL) {
 			if (tp1->sent < SCTP_DATAGRAM_RESEND) {
 				sctp_flight_size_decrease(tp1);
 				sctp_total_flight_decrease(stcb, tp1);
 			}
 			sctp_free_bufspace(stcb, &stcb->asoc, tp1, 1);
 			stcb->asoc.peers_rwnd += tp1->send_size;
 			stcb->asoc.peers_rwnd += SCTP_BASE_SYSCTL(sctp_peer_chunk_oh);
 			if (sent) {
 				sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb, 0, tp1, so_locked);
 			} else {
 				sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb, 0, tp1, so_locked);
 			}
 			if (tp1->data) {
 				sctp_m_freem(tp1->data);
 				tp1->data = NULL;
 			}
 			do_wakeup_routine = 1;
 			if (PR_SCTP_BUF_ENABLED(tp1->flags)) {
 				stcb->asoc.sent_queue_cnt_removeable--;
 			}
 		}
 		tp1->sent = SCTP_FORWARD_TSN_SKIP;
 		if ((tp1->rec.data.rcv_flags & SCTP_DATA_NOT_FRAG) ==
 		    SCTP_DATA_NOT_FRAG) {
 			/* not frag'ed we ae done   */
 			notdone = 0;
 			foundeom = 1;
 		} else if (tp1->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
 			/* end of frag, we are done */
 			notdone = 0;
 			foundeom = 1;
 		} else {
 			/*
 			 * Its a begin or middle piece, we must mark all of
 			 * it
 			 */
 			notdone = 1;
 			tp1 = TAILQ_NEXT(tp1, sctp_next);
 		}
 	} while (tp1 && notdone);
 	if (foundeom == 0) {
 		/*
 		 * The multi-part message was scattered across the send and
 		 * sent queue.
 		 */
 		TAILQ_FOREACH_SAFE(tp1, &stcb->asoc.send_queue, sctp_next, tp2) {
 			if ((tp1->rec.data.sid != sid) ||
 			    (!SCTP_MID_EQ(stcb->asoc.idata_supported, tp1->rec.data.mid, mid))) {
 				break;
 			}
 			/*
 			 * save to chk in case we have some on stream out
 			 * queue. If so and we have an un-transmitted one we
 			 * don't have to fudge the TSN.
 			 */
 			chk = tp1;
 			ret_sz += tp1->book_size;
 			sctp_free_bufspace(stcb, &stcb->asoc, tp1, 1);
 			if (sent) {
 				sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb, 0, tp1, so_locked);
 			} else {
 				sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb, 0, tp1, so_locked);
 			}
 			if (tp1->data) {
 				sctp_m_freem(tp1->data);
 				tp1->data = NULL;
 			}
 			/* No flight involved here book the size to 0 */
 			tp1->book_size = 0;
 			if (tp1->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
 				foundeom = 1;
 			}
 			do_wakeup_routine = 1;
 			tp1->sent = SCTP_FORWARD_TSN_SKIP;
 			TAILQ_REMOVE(&stcb->asoc.send_queue, tp1, sctp_next);
 			/*
 			 * on to the sent queue so we can wait for it to be
 			 * passed by.
 			 */
 			TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, tp1,
 			    sctp_next);
 			stcb->asoc.send_queue_cnt--;
 			stcb->asoc.sent_queue_cnt++;
 		}
 	}
 	if (foundeom == 0) {
 		/*
 		 * Still no eom found. That means there is stuff left on the
 		 * stream out queue.. yuck.
 		 */
 		strq = &stcb->asoc.strmout[sid];
 		sp = TAILQ_FIRST(&strq->outqueue);
 		if (sp != NULL) {
 			sp->discard_rest = 1;
 			/*
 			 * We may need to put a chunk on the queue that
 			 * holds the TSN that would have been sent with the
 			 * LAST bit.
 			 */
 			if (chk == NULL) {
 				/* Yep, we have to */
 				sctp_alloc_a_chunk(stcb, chk);
 				if (chk == NULL) {
 					/*
 					 * we are hosed. All we can do is
 					 * nothing.. which will cause an
 					 * abort if the peer is paying
 					 * attention.
 					 */
 					goto oh_well;
 				}
 				memset(chk, 0, sizeof(*chk));
 				chk->rec.data.rcv_flags = 0;
 				chk->sent = SCTP_FORWARD_TSN_SKIP;
 				chk->asoc = &stcb->asoc;
 				if (stcb->asoc.idata_supported == 0) {
 					if (sp->sinfo_flags & SCTP_UNORDERED) {
 						chk->rec.data.mid = 0;
 					} else {
 						chk->rec.data.mid = strq->next_mid_ordered;
 					}
 				} else {
 					if (sp->sinfo_flags & SCTP_UNORDERED) {
 						chk->rec.data.mid = strq->next_mid_unordered;
 					} else {
 						chk->rec.data.mid = strq->next_mid_ordered;
 					}
 				}
 				chk->rec.data.sid = sp->sid;
 				chk->rec.data.ppid = sp->ppid;
 				chk->rec.data.context = sp->context;
 				chk->flags = sp->act_flags;
 				chk->whoTo = NULL;
 				chk->rec.data.tsn = atomic_fetchadd_int(&stcb->asoc.sending_seq, 1);
 				strq->chunks_on_queues++;
 				TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, chk, sctp_next);
 				stcb->asoc.sent_queue_cnt++;
 				stcb->asoc.pr_sctp_cnt++;
 			}
 			chk->rec.data.rcv_flags |= SCTP_DATA_LAST_FRAG;
 			if (sp->sinfo_flags & SCTP_UNORDERED) {
 				chk->rec.data.rcv_flags |= SCTP_DATA_UNORDERED;
 			}
 			if (stcb->asoc.idata_supported == 0) {
 				if ((sp->sinfo_flags & SCTP_UNORDERED) == 0) {
 					strq->next_mid_ordered++;
 				}
 			} else {
 				if (sp->sinfo_flags & SCTP_UNORDERED) {
 					strq->next_mid_unordered++;
 				} else {
 					strq->next_mid_ordered++;
 				}
 			}
 	oh_well:
 			if (sp->data) {
 				/*
 				 * Pull any data to free up the SB and allow
 				 * sender to "add more" while we will throw
 				 * away :-)
 				 */
 				sctp_free_spbufspace(stcb, &stcb->asoc, sp);
 				ret_sz += sp->length;
 				do_wakeup_routine = 1;
 				sp->some_taken = 1;
 				sctp_m_freem(sp->data);
 				sp->data = NULL;
 				sp->tail_mbuf = NULL;
 				sp->length = 0;
 			}
 		}
 	}
 	if (do_wakeup_routine) {
 		sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
 	}
 	return (ret_sz);
 }
 
 /*
  * checks to see if the given address, sa, is one that is currently known by
  * the kernel note: can't distinguish the same address on multiple interfaces
  * and doesn't handle multiple addresses with different zone/scope id's note:
  * ifa_ifwithaddr() compares the entire sockaddr struct
  */
 struct sctp_ifa *
 sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr,
     int holds_lock)
 {
 	struct sctp_laddr *laddr;
 
 	if (holds_lock == 0) {
 		SCTP_INP_RLOCK(inp);
 	}
 
 	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 		if (laddr->ifa == NULL)
 			continue;
 		if (addr->sa_family != laddr->ifa->address.sa.sa_family)
 			continue;
 #ifdef INET
 		if (addr->sa_family == AF_INET) {
 			if (((struct sockaddr_in *)addr)->sin_addr.s_addr ==
 			    laddr->ifa->address.sin.sin_addr.s_addr) {
 				/* found him. */
 				break;
 			}
 		}
 #endif
 #ifdef INET6
 		if (addr->sa_family == AF_INET6) {
 			if (SCTP6_ARE_ADDR_EQUAL((struct sockaddr_in6 *)addr,
 			    &laddr->ifa->address.sin6)) {
 				/* found him. */
 				break;
 			}
 		}
 #endif
 	}
 	if (holds_lock == 0) {
 		SCTP_INP_RUNLOCK(inp);
 	}
 	if (laddr != NULL) {
 		return (laddr->ifa);
 	} else {
 		return (NULL);
 	}
 }
 
 uint32_t
 sctp_get_ifa_hash_val(struct sockaddr *addr)
 {
 	switch (addr->sa_family) {
 #ifdef INET
 	case AF_INET:
 		{
 			struct sockaddr_in *sin;
 
 			sin = (struct sockaddr_in *)addr;
 			return (sin->sin_addr.s_addr ^ (sin->sin_addr.s_addr >> 16));
 		}
 #endif
 #ifdef INET6
 	case AF_INET6:
 		{
 			struct sockaddr_in6 *sin6;
 			uint32_t hash_of_addr;
 
 			sin6 = (struct sockaddr_in6 *)addr;
 			hash_of_addr = (sin6->sin6_addr.s6_addr32[0] +
 			    sin6->sin6_addr.s6_addr32[1] +
 			    sin6->sin6_addr.s6_addr32[2] +
 			    sin6->sin6_addr.s6_addr32[3]);
 			hash_of_addr = (hash_of_addr ^ (hash_of_addr >> 16));
 			return (hash_of_addr);
 		}
 #endif
 	default:
 		break;
 	}
 	return (0);
 }
 
 struct sctp_ifa *
 sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock)
 {
 	struct sctp_ifa *sctp_ifap;
 	struct sctp_vrf *vrf;
 	struct sctp_ifalist *hash_head;
 	uint32_t hash_of_addr;
 
 	if (holds_lock == 0) {
 		SCTP_IPI_ADDR_RLOCK();
 	} else {
 		SCTP_IPI_ADDR_LOCK_ASSERT();
 	}
 
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL) {
 		if (holds_lock == 0)
 			SCTP_IPI_ADDR_RUNLOCK();
 		return (NULL);
 	}
 
 	hash_of_addr = sctp_get_ifa_hash_val(addr);
 
 	hash_head = &vrf->vrf_addr_hash[(hash_of_addr & vrf->vrf_addr_hashmark)];
 	if (hash_head == NULL) {
 		SCTP_PRINTF("hash_of_addr:%x mask:%x table:%x - ",
 		    hash_of_addr, (uint32_t)vrf->vrf_addr_hashmark,
 		    (uint32_t)(hash_of_addr & vrf->vrf_addr_hashmark));
 		sctp_print_address(addr);
 		SCTP_PRINTF("No such bucket for address\n");
 		if (holds_lock == 0)
 			SCTP_IPI_ADDR_RUNLOCK();
 
 		return (NULL);
 	}
 	LIST_FOREACH(sctp_ifap, hash_head, next_bucket) {
 		if (addr->sa_family != sctp_ifap->address.sa.sa_family)
 			continue;
 #ifdef INET
 		if (addr->sa_family == AF_INET) {
 			if (((struct sockaddr_in *)addr)->sin_addr.s_addr ==
 			    sctp_ifap->address.sin.sin_addr.s_addr) {
 				/* found him. */
 				break;
 			}
 		}
 #endif
 #ifdef INET6
 		if (addr->sa_family == AF_INET6) {
 			if (SCTP6_ARE_ADDR_EQUAL((struct sockaddr_in6 *)addr,
 			    &sctp_ifap->address.sin6)) {
 				/* found him. */
 				break;
 			}
 		}
 #endif
 	}
 	if (holds_lock == 0)
 		SCTP_IPI_ADDR_RUNLOCK();
 	return (sctp_ifap);
 }
 
 static void
 sctp_user_rcvd(struct sctp_tcb *stcb, uint32_t *freed_so_far, int hold_rlock,
     uint32_t rwnd_req)
 {
 	/* User pulled some data, do we need a rwnd update? */
 	struct epoch_tracker et;
 	int r_unlocked = 0;
 	uint32_t dif, rwnd;
 	struct socket *so = NULL;
 
 	if (stcb == NULL)
 		return;
 
 	atomic_add_int(&stcb->asoc.refcnt, 1);
 
 	if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
 	    (stcb->asoc.state & (SCTP_STATE_ABOUT_TO_BE_FREED | SCTP_STATE_SHUTDOWN_RECEIVED))) {
 		/* Pre-check If we are freeing no update */
 		goto no_lock;
 	}
 	SCTP_INP_INCR_REF(stcb->sctp_ep);
 	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
 		goto out;
 	}
 	so = stcb->sctp_socket;
 	if (so == NULL) {
 		goto out;
 	}
 	atomic_add_int(&stcb->freed_by_sorcv_sincelast, *freed_so_far);
 	/* Have you have freed enough to look */
 	*freed_so_far = 0;
 	/* Yep, its worth a look and the lock overhead */
 
 	/* Figure out what the rwnd would be */
 	rwnd = sctp_calc_rwnd(stcb, &stcb->asoc);
 	if (rwnd >= stcb->asoc.my_last_reported_rwnd) {
 		dif = rwnd - stcb->asoc.my_last_reported_rwnd;
 	} else {
 		dif = 0;
 	}
 	if (dif >= rwnd_req) {
 		if (hold_rlock) {
 			SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
 			r_unlocked = 1;
 		}
 		if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 			/*
 			 * One last check before we allow the guy possibly
 			 * to get in. There is a race, where the guy has not
 			 * reached the gate. In that case
 			 */
 			goto out;
 		}
 		SCTP_TCB_LOCK(stcb);
 		if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 			/* No reports here */
 			SCTP_TCB_UNLOCK(stcb);
 			goto out;
 		}
 		SCTP_STAT_INCR(sctps_wu_sacks_sent);
 		NET_EPOCH_ENTER(et);
 		sctp_send_sack(stcb, SCTP_SO_LOCKED);
 
 		sctp_chunk_output(stcb->sctp_ep, stcb,
 		    SCTP_OUTPUT_FROM_USR_RCVD, SCTP_SO_LOCKED);
 		/* make sure no timer is running */
 		NET_EPOCH_EXIT(et);
 		sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL,
 		    SCTP_FROM_SCTPUTIL + SCTP_LOC_6);
 		SCTP_TCB_UNLOCK(stcb);
 	} else {
 		/* Update how much we have pending */
 		stcb->freed_by_sorcv_sincelast = dif;
 	}
 out:
 	if (so && r_unlocked && hold_rlock) {
 		SCTP_INP_READ_LOCK(stcb->sctp_ep);
 	}
 
 	SCTP_INP_DECR_REF(stcb->sctp_ep);
 no_lock:
 	atomic_subtract_int(&stcb->asoc.refcnt, 1);
 	return;
 }
 
 int
 sctp_sorecvmsg(struct socket *so,
     struct uio *uio,
     struct mbuf **mp,
     struct sockaddr *from,
     int fromlen,
     int *msg_flags,
     struct sctp_sndrcvinfo *sinfo,
     int filling_sinfo)
 {
 	/*
 	 * MSG flags we will look at MSG_DONTWAIT - non-blocking IO.
 	 * MSG_PEEK - Look don't touch :-D (only valid with OUT mbuf copy
 	 * mp=NULL thus uio is the copy method to userland) MSG_WAITALL - ??
 	 * On the way out we may send out any combination of:
 	 * MSG_NOTIFICATION MSG_EOR
 	 *
 	 */
 	struct sctp_inpcb *inp = NULL;
 	ssize_t my_len = 0;
 	ssize_t cp_len = 0;
 	int error = 0;
 	struct sctp_queued_to_read *control = NULL, *ctl = NULL, *nxt = NULL;
 	struct mbuf *m = NULL;
 	struct sctp_tcb *stcb = NULL;
 	int wakeup_read_socket = 0;
 	int freecnt_applied = 0;
 	int out_flags = 0, in_flags = 0;
 	int block_allowed = 1;
 	uint32_t freed_so_far = 0;
 	ssize_t copied_so_far = 0;
 	int in_eeor_mode = 0;
 	int no_rcv_needed = 0;
 	uint32_t rwnd_req = 0;
 	int hold_sblock = 0;
 	int hold_rlock = 0;
 	ssize_t slen = 0;
 	uint32_t held_length = 0;
 	int sockbuf_lock = 0;
 
 	if (uio == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 		return (EINVAL);
 	}
 
 	if (msg_flags) {
 		in_flags = *msg_flags;
 		if (in_flags & MSG_PEEK)
 			SCTP_STAT_INCR(sctps_read_peeks);
 	} else {
 		in_flags = 0;
 	}
 	slen = uio->uio_resid;
 
 	/* Pull in and set up our int flags */
 	if (in_flags & MSG_OOB) {
 		/* Out of band's NOT supported */
 		return (EOPNOTSUPP);
 	}
 	if ((in_flags & MSG_PEEK) && (mp != NULL)) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 		return (EINVAL);
 	}
 	if ((in_flags & (MSG_DONTWAIT
 	    | MSG_NBIO
 	    )) ||
 	    SCTP_SO_IS_NBIO(so)) {
 		block_allowed = 0;
 	}
 	/* setup the endpoint */
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EFAULT);
 		return (EFAULT);
 	}
 	rwnd_req = (SCTP_SB_LIMIT_RCV(so) >> SCTP_RWND_HIWAT_SHIFT);
 	/* Must be at least a MTU's worth */
 	if (rwnd_req < SCTP_MIN_RWND)
 		rwnd_req = SCTP_MIN_RWND;
 	in_eeor_mode = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RECV_RWND_LOGGING_ENABLE) {
 		sctp_misc_ints(SCTP_SORECV_ENTER,
 		    rwnd_req, in_eeor_mode, so->so_rcv.sb_cc, (uint32_t)uio->uio_resid);
 	}
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RECV_RWND_LOGGING_ENABLE) {
 		sctp_misc_ints(SCTP_SORECV_ENTERPL,
 		    rwnd_req, block_allowed, so->so_rcv.sb_cc, (uint32_t)uio->uio_resid);
 	}
 
 	error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(in_flags));
 	if (error) {
 		goto release_unlocked;
 	}
 	sockbuf_lock = 1;
 restart:
 
 restart_nosblocks:
 	if (hold_sblock == 0) {
 		SOCKBUF_LOCK(&so->so_rcv);
 		hold_sblock = 1;
 	}
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
 		goto out;
 	}
 	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && (so->so_rcv.sb_cc == 0)) {
 		if (so->so_error) {
 			error = so->so_error;
 			if ((in_flags & MSG_PEEK) == 0)
 				so->so_error = 0;
 			goto out;
 		} else {
 			if (so->so_rcv.sb_cc == 0) {
 				/* indicate EOF */
 				error = 0;
 				goto out;
 			}
 		}
 	}
 	if (so->so_rcv.sb_cc <= held_length) {
 		if (so->so_error) {
 			error = so->so_error;
 			if ((in_flags & MSG_PEEK) == 0) {
 				so->so_error = 0;
 			}
 			goto out;
 		}
 		if ((so->so_rcv.sb_cc == 0) &&
 		    ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 		    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0) {
 				/*
 				 * For active open side clear flags for
 				 * re-use passive open is blocked by
 				 * connect.
 				 */
 				if (inp->sctp_flags & SCTP_PCB_FLAGS_WAS_ABORTED) {
 					/*
 					 * You were aborted, passive side
 					 * always hits here
 					 */
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
 					error = ECONNRESET;
 				}
 				so->so_state &= ~(SS_ISCONNECTING |
 				    SS_ISDISCONNECTING |
 				    SS_ISCONFIRMING |
 				    SS_ISCONNECTED);
 				if (error == 0) {
 					if ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) == 0) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN);
 						error = ENOTCONN;
 					}
 				}
 				goto out;
 			}
 		}
 		if (block_allowed) {
-			error = sbwait(&so->so_rcv);
+			error = sbwait(so, SO_RCV);
 			if (error) {
 				goto out;
 			}
 			held_length = 0;
 			goto restart_nosblocks;
 		} else {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EWOULDBLOCK);
 			error = EWOULDBLOCK;
 			goto out;
 		}
 	}
 	if (hold_sblock == 1) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		hold_sblock = 0;
 	}
 	/* we possibly have data we can read */
 	/* sa_ignore FREED_MEMORY */
 	control = TAILQ_FIRST(&inp->read_queue);
 	if (control == NULL) {
 		/*
 		 * This could be happening since the appender did the
 		 * increment but as not yet did the tailq insert onto the
 		 * read_queue
 		 */
 		if (hold_rlock == 0) {
 			SCTP_INP_READ_LOCK(inp);
 		}
 		control = TAILQ_FIRST(&inp->read_queue);
 		if ((control == NULL) && (so->so_rcv.sb_cc != 0)) {
 #ifdef INVARIANTS
 			panic("Huh, its non zero and nothing on control?");
 #endif
 			so->so_rcv.sb_cc = 0;
 		}
 		SCTP_INP_READ_UNLOCK(inp);
 		hold_rlock = 0;
 		goto restart;
 	}
 
 	if ((control->length == 0) &&
 	    (control->do_not_ref_stcb)) {
 		/*
 		 * Clean up code for freeing assoc that left behind a
 		 * pdapi.. maybe a peer in EEOR that just closed after
 		 * sending and never indicated a EOR.
 		 */
 		if (hold_rlock == 0) {
 			hold_rlock = 1;
 			SCTP_INP_READ_LOCK(inp);
 		}
 		control->held_length = 0;
 		if (control->data) {
 			/* Hmm there is data here .. fix */
 			struct mbuf *m_tmp;
 			int cnt = 0;
 
 			m_tmp = control->data;
 			while (m_tmp) {
 				cnt += SCTP_BUF_LEN(m_tmp);
 				if (SCTP_BUF_NEXT(m_tmp) == NULL) {
 					control->tail_mbuf = m_tmp;
 					control->end_added = 1;
 				}
 				m_tmp = SCTP_BUF_NEXT(m_tmp);
 			}
 			control->length = cnt;
 		} else {
 			/* remove it */
 			TAILQ_REMOVE(&inp->read_queue, control, next);
 			/* Add back any hidden data */
 			sctp_free_remote_addr(control->whoFrom);
 			sctp_free_a_readq(stcb, control);
 		}
 		if (hold_rlock) {
 			hold_rlock = 0;
 			SCTP_INP_READ_UNLOCK(inp);
 		}
 		goto restart;
 	}
 	if ((control->length == 0) &&
 	    (control->end_added == 1)) {
 		/*
 		 * Do we also need to check for (control->pdapi_aborted ==
 		 * 1)?
 		 */
 		if (hold_rlock == 0) {
 			hold_rlock = 1;
 			SCTP_INP_READ_LOCK(inp);
 		}
 		TAILQ_REMOVE(&inp->read_queue, control, next);
 		if (control->data) {
 #ifdef INVARIANTS
 			panic("control->data not null but control->length == 0");
 #else
 			SCTP_PRINTF("Strange, data left in the control buffer. Cleaning up.\n");
 			sctp_m_freem(control->data);
 			control->data = NULL;
 #endif
 		}
 		if (control->aux_data) {
 			sctp_m_free(control->aux_data);
 			control->aux_data = NULL;
 		}
 #ifdef INVARIANTS
 		if (control->on_strm_q) {
 			panic("About to free ctl:%p so:%p and its in %d",
 			    control, so, control->on_strm_q);
 		}
 #endif
 		sctp_free_remote_addr(control->whoFrom);
 		sctp_free_a_readq(stcb, control);
 		if (hold_rlock) {
 			hold_rlock = 0;
 			SCTP_INP_READ_UNLOCK(inp);
 		}
 		goto restart;
 	}
 	if (control->length == 0) {
 		if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE)) &&
 		    (filling_sinfo)) {
 			/* find a more suitable one then this */
 			ctl = TAILQ_NEXT(control, next);
 			while (ctl) {
 				if ((ctl->stcb != control->stcb) && (ctl->length) &&
 				    (ctl->some_taken ||
 				    (ctl->spec_flags & M_NOTIFICATION) ||
 				    ((ctl->do_not_ref_stcb == 0) &&
 				    (ctl->stcb->asoc.strmin[ctl->sinfo_stream].delivery_started == 0)))
 				    ) {
 					/*-
 					 * If we have a different TCB next, and there is data
 					 * present. If we have already taken some (pdapi), OR we can
 					 * ref the tcb and no delivery as started on this stream, we
 					 * take it. Note we allow a notification on a different
 					 * assoc to be delivered..
 					 */
 					control = ctl;
 					goto found_one;
 				} else if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS)) &&
 					    (ctl->length) &&
 					    ((ctl->some_taken) ||
 					    ((ctl->do_not_ref_stcb == 0) &&
 					    ((ctl->spec_flags & M_NOTIFICATION) == 0) &&
 				    (ctl->stcb->asoc.strmin[ctl->sinfo_stream].delivery_started == 0)))) {
 					/*-
 					 * If we have the same tcb, and there is data present, and we
 					 * have the strm interleave feature present. Then if we have
 					 * taken some (pdapi) or we can refer to tht tcb AND we have
 					 * not started a delivery for this stream, we can take it.
 					 * Note we do NOT allow a notification on the same assoc to
 					 * be delivered.
 					 */
 					control = ctl;
 					goto found_one;
 				}
 				ctl = TAILQ_NEXT(ctl, next);
 			}
 		}
 		/*
 		 * if we reach here, not suitable replacement is available
 		 * <or> fragment interleave is NOT on. So stuff the sb_cc
 		 * into the our held count, and its time to sleep again.
 		 */
 		held_length = so->so_rcv.sb_cc;
 		control->held_length = so->so_rcv.sb_cc;
 		goto restart;
 	}
 	/* Clear the held length since there is something to read */
 	control->held_length = 0;
 found_one:
 	/*
 	 * If we reach here, control has a some data for us to read off.
 	 * Note that stcb COULD be NULL.
 	 */
 	if (hold_rlock == 0) {
 		hold_rlock = 1;
 		SCTP_INP_READ_LOCK(inp);
 	}
 	control->some_taken++;
 	stcb = control->stcb;
 	if (stcb) {
 		if ((control->do_not_ref_stcb == 0) &&
 		    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED)) {
 			if (freecnt_applied == 0)
 				stcb = NULL;
 		} else if (control->do_not_ref_stcb == 0) {
 			/* you can't free it on me please */
 			/*
 			 * The lock on the socket buffer protects us so the
 			 * free code will stop. But since we used the
 			 * socketbuf lock and the sender uses the tcb_lock
 			 * to increment, we need to use the atomic add to
 			 * the refcnt
 			 */
 			if (freecnt_applied) {
 #ifdef INVARIANTS
 				panic("refcnt already incremented");
 #else
 				SCTP_PRINTF("refcnt already incremented?\n");
 #endif
 			} else {
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				freecnt_applied = 1;
 			}
 			/*
 			 * Setup to remember how much we have not yet told
 			 * the peer our rwnd has opened up. Note we grab the
 			 * value from the tcb from last time. Note too that
 			 * sack sending clears this when a sack is sent,
 			 * which is fine. Once we hit the rwnd_req, we then
 			 * will go to the sctp_user_rcvd() that will not
 			 * lock until it KNOWs it MUST send a WUP-SACK.
 			 */
 			freed_so_far = (uint32_t)stcb->freed_by_sorcv_sincelast;
 			stcb->freed_by_sorcv_sincelast = 0;
 		}
 	}
 	if (stcb &&
 	    ((control->spec_flags & M_NOTIFICATION) == 0) &&
 	    control->do_not_ref_stcb == 0) {
 		stcb->asoc.strmin[control->sinfo_stream].delivery_started = 1;
 	}
 
 	/* First lets get off the sinfo and sockaddr info */
 	if ((sinfo != NULL) && (filling_sinfo != 0)) {
 		sinfo->sinfo_stream = control->sinfo_stream;
 		sinfo->sinfo_ssn = (uint16_t)control->mid;
 		sinfo->sinfo_flags = control->sinfo_flags;
 		sinfo->sinfo_ppid = control->sinfo_ppid;
 		sinfo->sinfo_context = control->sinfo_context;
 		sinfo->sinfo_timetolive = control->sinfo_timetolive;
 		sinfo->sinfo_tsn = control->sinfo_tsn;
 		sinfo->sinfo_cumtsn = control->sinfo_cumtsn;
 		sinfo->sinfo_assoc_id = control->sinfo_assoc_id;
 		nxt = TAILQ_NEXT(control, next);
 		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO) ||
 		    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) {
 			struct sctp_extrcvinfo *s_extra;
 
 			s_extra = (struct sctp_extrcvinfo *)sinfo;
 			if ((nxt) &&
 			    (nxt->length)) {
 				s_extra->serinfo_next_flags = SCTP_NEXT_MSG_AVAIL;
 				if (nxt->sinfo_flags & SCTP_UNORDERED) {
 					s_extra->serinfo_next_flags |= SCTP_NEXT_MSG_IS_UNORDERED;
 				}
 				if (nxt->spec_flags & M_NOTIFICATION) {
 					s_extra->serinfo_next_flags |= SCTP_NEXT_MSG_IS_NOTIFICATION;
 				}
 				s_extra->serinfo_next_aid = nxt->sinfo_assoc_id;
 				s_extra->serinfo_next_length = nxt->length;
 				s_extra->serinfo_next_ppid = nxt->sinfo_ppid;
 				s_extra->serinfo_next_stream = nxt->sinfo_stream;
 				if (nxt->tail_mbuf != NULL) {
 					if (nxt->end_added) {
 						s_extra->serinfo_next_flags |= SCTP_NEXT_MSG_ISCOMPLETE;
 					}
 				}
 			} else {
 				/*
 				 * we explicitly 0 this, since the memcpy
 				 * got some other things beyond the older
 				 * sinfo_ that is on the control's structure
 				 * :-D
 				 */
 				nxt = NULL;
 				s_extra->serinfo_next_flags = SCTP_NO_NEXT_MSG;
 				s_extra->serinfo_next_aid = 0;
 				s_extra->serinfo_next_length = 0;
 				s_extra->serinfo_next_ppid = 0;
 				s_extra->serinfo_next_stream = 0;
 			}
 		}
 		/*
 		 * update off the real current cum-ack, if we have an stcb.
 		 */
 		if ((control->do_not_ref_stcb == 0) && stcb)
 			sinfo->sinfo_cumtsn = stcb->asoc.cumulative_tsn;
 		/*
 		 * mask off the high bits, we keep the actual chunk bits in
 		 * there.
 		 */
 		sinfo->sinfo_flags &= 0x00ff;
 		if ((control->sinfo_flags >> 8) & SCTP_DATA_UNORDERED) {
 			sinfo->sinfo_flags |= SCTP_UNORDERED;
 		}
 	}
 #ifdef SCTP_ASOCLOG_OF_TSNS
 	{
 		int index, newindex;
 		struct sctp_pcbtsn_rlog *entry;
 
 		do {
 			index = inp->readlog_index;
 			newindex = index + 1;
 			if (newindex >= SCTP_READ_LOG_SIZE) {
 				newindex = 0;
 			}
 		} while (atomic_cmpset_int(&inp->readlog_index, index, newindex) == 0);
 		entry = &inp->readlog[index];
 		entry->vtag = control->sinfo_assoc_id;
 		entry->strm = control->sinfo_stream;
 		entry->seq = (uint16_t)control->mid;
 		entry->sz = control->length;
 		entry->flgs = control->sinfo_flags;
 	}
 #endif
 	if ((fromlen > 0) && (from != NULL)) {
 		union sctp_sockstore store;
 		size_t len;
 
 		switch (control->whoFrom->ro._l_addr.sa.sa_family) {
 #ifdef INET6
 		case AF_INET6:
 			len = sizeof(struct sockaddr_in6);
 			store.sin6 = control->whoFrom->ro._l_addr.sin6;
 			store.sin6.sin6_port = control->port_from;
 			break;
 #endif
 #ifdef INET
 		case AF_INET:
 #ifdef INET6
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
 				len = sizeof(struct sockaddr_in6);
 				in6_sin_2_v4mapsin6(&control->whoFrom->ro._l_addr.sin,
 				    &store.sin6);
 				store.sin6.sin6_port = control->port_from;
 			} else {
 				len = sizeof(struct sockaddr_in);
 				store.sin = control->whoFrom->ro._l_addr.sin;
 				store.sin.sin_port = control->port_from;
 			}
 #else
 			len = sizeof(struct sockaddr_in);
 			store.sin = control->whoFrom->ro._l_addr.sin;
 			store.sin.sin_port = control->port_from;
 #endif
 			break;
 #endif
 		default:
 			len = 0;
 			break;
 		}
 		memcpy(from, &store, min((size_t)fromlen, len));
 #ifdef INET6
 		{
 			struct sockaddr_in6 lsa6, *from6;
 
 			from6 = (struct sockaddr_in6 *)from;
 			sctp_recover_scope_mac(from6, (&lsa6));
 		}
 #endif
 	}
 	if (hold_rlock) {
 		SCTP_INP_READ_UNLOCK(inp);
 		hold_rlock = 0;
 	}
 	if (hold_sblock) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		hold_sblock = 0;
 	}
 	/* now copy out what data we can */
 	if (mp == NULL) {
 		/* copy out each mbuf in the chain up to length */
 get_more_data:
 		m = control->data;
 		while (m) {
 			/* Move out all we can */
 			cp_len = uio->uio_resid;
 			my_len = SCTP_BUF_LEN(m);
 			if (cp_len > my_len) {
 				/* not enough in this buf */
 				cp_len = my_len;
 			}
 			if (hold_rlock) {
 				SCTP_INP_READ_UNLOCK(inp);
 				hold_rlock = 0;
 			}
 			if (cp_len > 0)
 				error = uiomove(mtod(m, char *), (int)cp_len, uio);
 			/* re-read */
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
 				goto release;
 			}
 
 			if ((control->do_not_ref_stcb == 0) && stcb &&
 			    stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 				no_rcv_needed = 1;
 			}
 			if (error) {
 				/* error we are out of here */
 				goto release;
 			}
 			SCTP_INP_READ_LOCK(inp);
 			hold_rlock = 1;
 			if (cp_len == SCTP_BUF_LEN(m)) {
 				if ((SCTP_BUF_NEXT(m) == NULL) &&
 				    (control->end_added)) {
 					out_flags |= MSG_EOR;
 					if ((control->do_not_ref_stcb == 0) &&
 					    (control->stcb != NULL) &&
 					    ((control->spec_flags & M_NOTIFICATION) == 0))
 						control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
 				}
 				if (control->spec_flags & M_NOTIFICATION) {
 					out_flags |= MSG_NOTIFICATION;
 				}
 				/* we ate up the mbuf */
 				if (in_flags & MSG_PEEK) {
 					/* just looking */
 					m = SCTP_BUF_NEXT(m);
 					copied_so_far += cp_len;
 				} else {
 					/* dispose of the mbuf */
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 						sctp_sblog(&so->so_rcv,
 						    control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
 					}
 					sctp_sbfree(control, stcb, &so->so_rcv, m);
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 						sctp_sblog(&so->so_rcv,
 						    control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
 					}
 					copied_so_far += cp_len;
 					freed_so_far += (uint32_t)cp_len;
 					freed_so_far += MSIZE;
 					atomic_subtract_int(&control->length, (int)cp_len);
 					control->data = sctp_m_free(m);
 					m = control->data;
 					/*
 					 * been through it all, must hold sb
 					 * lock ok to null tail
 					 */
 					if (control->data == NULL) {
 #ifdef INVARIANTS
 						if ((control->end_added == 0) ||
 						    (TAILQ_NEXT(control, next) == NULL)) {
 							/*
 							 * If the end is not
 							 * added, OR the
 							 * next is NOT null
 							 * we MUST have the
 							 * lock.
 							 */
 							if (mtx_owned(&inp->inp_rdata_mtx) == 0) {
 								panic("Hmm we don't own the lock?");
 							}
 						}
 #endif
 						control->tail_mbuf = NULL;
 #ifdef INVARIANTS
 						if ((control->end_added) && ((out_flags & MSG_EOR) == 0)) {
 							panic("end_added, nothing left and no MSG_EOR");
 						}
 #endif
 					}
 				}
 			} else {
 				/* Do we need to trim the mbuf? */
 				if (control->spec_flags & M_NOTIFICATION) {
 					out_flags |= MSG_NOTIFICATION;
 				}
 				if ((in_flags & MSG_PEEK) == 0) {
 					SCTP_BUF_RESV_UF(m, cp_len);
 					SCTP_BUF_LEN(m) -= (int)cp_len;
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 						sctp_sblog(&so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, (int)cp_len);
 					}
 					atomic_subtract_int(&so->so_rcv.sb_cc, (int)cp_len);
 					if ((control->do_not_ref_stcb == 0) &&
 					    stcb) {
 						atomic_subtract_int(&stcb->asoc.sb_cc, (int)cp_len);
 					}
 					copied_so_far += cp_len;
 					freed_so_far += (uint32_t)cp_len;
 					freed_so_far += MSIZE;
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 						sctp_sblog(&so->so_rcv, control->do_not_ref_stcb ? NULL : stcb,
 						    SCTP_LOG_SBRESULT, 0);
 					}
 					atomic_subtract_int(&control->length, (int)cp_len);
 				} else {
 					copied_so_far += cp_len;
 				}
 			}
 			if ((out_flags & MSG_EOR) || (uio->uio_resid == 0)) {
 				break;
 			}
 			if (((stcb) && (in_flags & MSG_PEEK) == 0) &&
 			    (control->do_not_ref_stcb == 0) &&
 			    (freed_so_far >= rwnd_req)) {
 				sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
 			}
 		}		/* end while(m) */
 		/*
 		 * At this point we have looked at it all and we either have
 		 * a MSG_EOR/or read all the user wants... <OR>
 		 * control->length == 0.
 		 */
 		if ((out_flags & MSG_EOR) && ((in_flags & MSG_PEEK) == 0)) {
 			/* we are done with this control */
 			if (control->length == 0) {
 				if (control->data) {
 #ifdef INVARIANTS
 					panic("control->data not null at read eor?");
 #else
 					SCTP_PRINTF("Strange, data left in the control buffer .. invariants would panic?\n");
 					sctp_m_freem(control->data);
 					control->data = NULL;
 #endif
 				}
 		done_with_control:
 				if (hold_rlock == 0) {
 					SCTP_INP_READ_LOCK(inp);
 					hold_rlock = 1;
 				}
 				TAILQ_REMOVE(&inp->read_queue, control, next);
 				/* Add back any hidden data */
 				if (control->held_length) {
 					held_length = 0;
 					control->held_length = 0;
 					wakeup_read_socket = 1;
 				}
 				if (control->aux_data) {
 					sctp_m_free(control->aux_data);
 					control->aux_data = NULL;
 				}
 				no_rcv_needed = control->do_not_ref_stcb;
 				sctp_free_remote_addr(control->whoFrom);
 				control->data = NULL;
 #ifdef INVARIANTS
 				if (control->on_strm_q) {
 					panic("About to free ctl:%p so:%p and its in %d",
 					    control, so, control->on_strm_q);
 				}
 #endif
 				sctp_free_a_readq(stcb, control);
 				control = NULL;
 				if ((freed_so_far >= rwnd_req) &&
 				    (no_rcv_needed == 0))
 					sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
 
 			} else {
 				/*
 				 * The user did not read all of this
 				 * message, turn off the returned MSG_EOR
 				 * since we are leaving more behind on the
 				 * control to read.
 				 */
 #ifdef INVARIANTS
 				if (control->end_added &&
 				    (control->data == NULL) &&
 				    (control->tail_mbuf == NULL)) {
 					panic("Gak, control->length is corrupt?");
 				}
 #endif
 				no_rcv_needed = control->do_not_ref_stcb;
 				out_flags &= ~MSG_EOR;
 			}
 		}
 		if (out_flags & MSG_EOR) {
 			goto release;
 		}
 		if ((uio->uio_resid == 0) ||
 		    ((in_eeor_mode) &&
 		    (copied_so_far >= max(so->so_rcv.sb_lowat, 1)))) {
 			goto release;
 		}
 		/*
 		 * If I hit here the receiver wants more and this message is
 		 * NOT done (pd-api). So two questions. Can we block? if not
 		 * we are done. Did the user NOT set MSG_WAITALL?
 		 */
 		if (block_allowed == 0) {
 			goto release;
 		}
 		/*
 		 * We need to wait for more data a few things: - We don't
 		 * release the I/O lock so we don't get someone else
 		 * reading. - We must be sure to account for the case where
 		 * what is added is NOT to our control when we wakeup.
 		 */
 
 		/*
 		 * Do we need to tell the transport a rwnd update might be
 		 * needed before we go to sleep?
 		 */
 		if (((stcb) && (in_flags & MSG_PEEK) == 0) &&
 		    ((freed_so_far >= rwnd_req) &&
 		    (control->do_not_ref_stcb == 0) &&
 		    (no_rcv_needed == 0))) {
 			sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
 		}
 wait_some_more:
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			goto release;
 		}
 
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)
 			goto release;
 
 		if (hold_rlock == 1) {
 			SCTP_INP_READ_UNLOCK(inp);
 			hold_rlock = 0;
 		}
 		if (hold_sblock == 0) {
 			SOCKBUF_LOCK(&so->so_rcv);
 			hold_sblock = 1;
 		}
 		if ((copied_so_far) && (control->length == 0) &&
 		    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE))) {
 			goto release;
 		}
 		if (so->so_rcv.sb_cc <= control->held_length) {
-			error = sbwait(&so->so_rcv);
+			error = sbwait(so, SO_RCV);
 			if (error) {
 				goto release;
 			}
 			control->held_length = 0;
 		}
 		if (hold_sblock) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			hold_sblock = 0;
 		}
 		if (control->length == 0) {
 			/* still nothing here */
 			if (control->end_added == 1) {
 				/* he aborted, or is done i.e.did a shutdown */
 				out_flags |= MSG_EOR;
 				if (control->pdapi_aborted) {
 					if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
 						control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
 
 					out_flags |= MSG_TRUNC;
 				} else {
 					if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
 						control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
 				}
 				goto done_with_control;
 			}
 			if (so->so_rcv.sb_cc > held_length) {
 				control->held_length = so->so_rcv.sb_cc;
 				held_length = 0;
 			}
 			goto wait_some_more;
 		} else if (control->data == NULL) {
 			/*
 			 * we must re-sync since data is probably being
 			 * added
 			 */
 			SCTP_INP_READ_LOCK(inp);
 			if ((control->length > 0) && (control->data == NULL)) {
 				/*
 				 * big trouble.. we have the lock and its
 				 * corrupt?
 				 */
 #ifdef INVARIANTS
 				panic("Impossible data==NULL length !=0");
 #endif
 				out_flags |= MSG_EOR;
 				out_flags |= MSG_TRUNC;
 				control->length = 0;
 				SCTP_INP_READ_UNLOCK(inp);
 				goto done_with_control;
 			}
 			SCTP_INP_READ_UNLOCK(inp);
 			/* We will fall around to get more data */
 		}
 		goto get_more_data;
 	} else {
 		/*-
 		 * Give caller back the mbuf chain,
 		 * store in uio_resid the length
 		 */
 		wakeup_read_socket = 0;
 		if ((control->end_added == 0) ||
 		    (TAILQ_NEXT(control, next) == NULL)) {
 			/* Need to get rlock */
 			if (hold_rlock == 0) {
 				SCTP_INP_READ_LOCK(inp);
 				hold_rlock = 1;
 			}
 		}
 		if (control->end_added) {
 			out_flags |= MSG_EOR;
 			if ((control->do_not_ref_stcb == 0) &&
 			    (control->stcb != NULL) &&
 			    ((control->spec_flags & M_NOTIFICATION) == 0))
 				control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
 		}
 		if (control->spec_flags & M_NOTIFICATION) {
 			out_flags |= MSG_NOTIFICATION;
 		}
 		uio->uio_resid = control->length;
 		*mp = control->data;
 		m = control->data;
 		while (m) {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 				sctp_sblog(&so->so_rcv,
 				    control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
 			}
 			sctp_sbfree(control, stcb, &so->so_rcv, m);
 			freed_so_far += (uint32_t)SCTP_BUF_LEN(m);
 			freed_so_far += MSIZE;
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
 				sctp_sblog(&so->so_rcv,
 				    control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
 			}
 			m = SCTP_BUF_NEXT(m);
 		}
 		control->data = control->tail_mbuf = NULL;
 		control->length = 0;
 		if (out_flags & MSG_EOR) {
 			/* Done with this control */
 			goto done_with_control;
 		}
 	}
 release:
 	if (hold_rlock == 1) {
 		SCTP_INP_READ_UNLOCK(inp);
 		hold_rlock = 0;
 	}
 	if (hold_sblock == 1) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		hold_sblock = 0;
 	}
 
 	SOCK_IO_RECV_UNLOCK(so);
 	sockbuf_lock = 0;
 
 release_unlocked:
 	if (hold_sblock) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		hold_sblock = 0;
 	}
 	if ((stcb) && (in_flags & MSG_PEEK) == 0) {
 		if ((freed_so_far >= rwnd_req) &&
 		    (control && (control->do_not_ref_stcb == 0)) &&
 		    (no_rcv_needed == 0))
 			sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
 	}
 out:
 	if (msg_flags) {
 		*msg_flags = out_flags;
 	}
 	if (((out_flags & MSG_EOR) == 0) &&
 	    ((in_flags & MSG_PEEK) == 0) &&
 	    (sinfo) &&
 	    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO) ||
 	    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO))) {
 		struct sctp_extrcvinfo *s_extra;
 
 		s_extra = (struct sctp_extrcvinfo *)sinfo;
 		s_extra->serinfo_next_flags = SCTP_NO_NEXT_MSG;
 	}
 	if (hold_rlock == 1) {
 		SCTP_INP_READ_UNLOCK(inp);
 	}
 	if (hold_sblock) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 	}
 	if (sockbuf_lock) {
 		SOCK_IO_RECV_UNLOCK(so);
 	}
 
 	if (freecnt_applied) {
 		/*
 		 * The lock on the socket buffer protects us so the free
 		 * code will stop. But since we used the socketbuf lock and
 		 * the sender uses the tcb_lock to increment, we need to use
 		 * the atomic add to the refcnt.
 		 */
 		if (stcb == NULL) {
 #ifdef INVARIANTS
 			panic("stcb for refcnt has gone NULL?");
 			goto stage_left;
 #else
 			goto stage_left;
 #endif
 		}
 		/* Save the value back for next time */
 		stcb->freed_by_sorcv_sincelast = freed_so_far;
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 	}
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RECV_RWND_LOGGING_ENABLE) {
 		if (stcb) {
 			sctp_misc_ints(SCTP_SORECV_DONE,
 			    freed_so_far,
 			    (uint32_t)((uio) ? (slen - uio->uio_resid) : slen),
 			    stcb->asoc.my_rwnd,
 			    so->so_rcv.sb_cc);
 		} else {
 			sctp_misc_ints(SCTP_SORECV_DONE,
 			    freed_so_far,
 			    (uint32_t)((uio) ? (slen - uio->uio_resid) : slen),
 			    0,
 			    so->so_rcv.sb_cc);
 		}
 	}
 stage_left:
 	if (wakeup_read_socket) {
 		sctp_sorwakeup(inp, so);
 	}
 	return (error);
 }
 
 #ifdef SCTP_MBUF_LOGGING
 struct mbuf *
 sctp_m_free(struct mbuf *m)
 {
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 		sctp_log_mb(m, SCTP_MBUF_IFREE);
 	}
 	return (m_free(m));
 }
 
 void
 sctp_m_freem(struct mbuf *mb)
 {
 	while (mb != NULL)
 		mb = sctp_m_free(mb);
 }
 
 #endif
 
 int
 sctp_dynamic_set_primary(struct sockaddr *sa, uint32_t vrf_id)
 {
 	/*
 	 * Given a local address. For all associations that holds the
 	 * address, request a peer-set-primary.
 	 */
 	struct sctp_ifa *ifa;
 	struct sctp_laddr *wi;
 
 	ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
 	if (ifa == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EADDRNOTAVAIL);
 		return (EADDRNOTAVAIL);
 	}
 	/*
 	 * Now that we have the ifa we must awaken the iterator with this
 	 * message.
 	 */
 	wi = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
 	if (wi == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
 		return (ENOMEM);
 	}
 	/* Now incr the count and int wi structure */
 	SCTP_INCR_LADDR_COUNT();
 	memset(wi, 0, sizeof(*wi));
 	(void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
 	wi->ifa = ifa;
 	wi->action = SCTP_SET_PRIM_ADDR;
 	atomic_add_int(&ifa->refcount, 1);
 
 	/* Now add it to the work queue */
 	SCTP_WQ_ADDR_LOCK();
 	/*
 	 * Should this really be a tailq? As it is we will process the
 	 * newest first :-0
 	 */
 	LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr);
 	sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
 	    (struct sctp_inpcb *)NULL,
 	    (struct sctp_tcb *)NULL,
 	    (struct sctp_nets *)NULL);
 	SCTP_WQ_ADDR_UNLOCK();
 	return (0);
 }
 
 int
 sctp_soreceive(struct socket *so,
     struct sockaddr **psa,
     struct uio *uio,
     struct mbuf **mp0,
     struct mbuf **controlp,
     int *flagsp)
 {
 	int error, fromlen;
 	uint8_t sockbuf[256];
 	struct sockaddr *from;
 	struct sctp_extrcvinfo sinfo;
 	int filling_sinfo = 1;
 	int flags;
 	struct sctp_inpcb *inp;
 
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	/* pickup the assoc we are reading from */
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 		return (EINVAL);
 	}
 	if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT) &&
 	    sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO) &&
 	    sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) ||
 	    (controlp == NULL)) {
 		/* user does not want the sndrcv ctl */
 		filling_sinfo = 0;
 	}
 	if (psa) {
 		from = (struct sockaddr *)sockbuf;
 		fromlen = sizeof(sockbuf);
 		from->sa_len = 0;
 	} else {
 		from = NULL;
 		fromlen = 0;
 	}
 
 	if (filling_sinfo) {
 		memset(&sinfo, 0, sizeof(struct sctp_extrcvinfo));
 	}
 	if (flagsp != NULL) {
 		flags = *flagsp;
 	} else {
 		flags = 0;
 	}
 	error = sctp_sorecvmsg(so, uio, mp0, from, fromlen, &flags,
 	    (struct sctp_sndrcvinfo *)&sinfo, filling_sinfo);
 	if (flagsp != NULL) {
 		*flagsp = flags;
 	}
 	if (controlp != NULL) {
 		/* copy back the sinfo in a CMSG format */
 		if (filling_sinfo && ((flags & MSG_NOTIFICATION) == 0)) {
 			*controlp = sctp_build_ctl_nchunk(inp,
 			    (struct sctp_sndrcvinfo *)&sinfo);
 		} else {
 			*controlp = NULL;
 		}
 	}
 	if (psa) {
 		/* copy back the address info */
 		if (from && from->sa_len) {
 			*psa = sodupsockaddr(from, M_NOWAIT);
 		} else {
 			*psa = NULL;
 		}
 	}
 	return (error);
 }
 
 int
 sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
     int totaddr, int *error)
 {
 	int added = 0;
 	int i;
 	struct sctp_inpcb *inp;
 	struct sockaddr *sa;
 	size_t incr = 0;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 
 	sa = addr;
 	inp = stcb->sctp_ep;
 	*error = 0;
 	for (i = 0; i < totaddr; i++) {
 		switch (sa->sa_family) {
 #ifdef INET
 		case AF_INET:
 			incr = sizeof(struct sockaddr_in);
 			sin = (struct sockaddr_in *)sa;
 			if ((sin->sin_addr.s_addr == INADDR_ANY) ||
 			    (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
 			    IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 				    SCTP_FROM_SCTPUTIL + SCTP_LOC_7);
 				*error = EINVAL;
 				goto out_now;
 			}
 			if (sctp_add_remote_addr(stcb, sa, NULL, stcb->asoc.port,
 			    SCTP_DONOT_SETSCOPE,
 			    SCTP_ADDR_IS_CONFIRMED)) {
 				/* assoc gone no un-lock */
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
 				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 				    SCTP_FROM_SCTPUTIL + SCTP_LOC_8);
 				*error = ENOBUFS;
 				goto out_now;
 			}
 			added++;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			incr = sizeof(struct sockaddr_in6);
 			sin6 = (struct sockaddr_in6 *)sa;
 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
 			    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 				    SCTP_FROM_SCTPUTIL + SCTP_LOC_9);
 				*error = EINVAL;
 				goto out_now;
 			}
 			if (sctp_add_remote_addr(stcb, sa, NULL, stcb->asoc.port,
 			    SCTP_DONOT_SETSCOPE,
 			    SCTP_ADDR_IS_CONFIRMED)) {
 				/* assoc gone no un-lock */
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
 				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 				    SCTP_FROM_SCTPUTIL + SCTP_LOC_10);
 				*error = ENOBUFS;
 				goto out_now;
 			}
 			added++;
 			break;
 #endif
 		default:
 			break;
 		}
 		sa = (struct sockaddr *)((caddr_t)sa + incr);
 	}
 out_now:
 	return (added);
 }
 
 int
 sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
     unsigned int totaddr,
     unsigned int *num_v4, unsigned int *num_v6,
     unsigned int limit)
 {
 	struct sockaddr *sa;
 	struct sctp_tcb *stcb;
 	unsigned int incr, at, i;
 
 	at = 0;
 	sa = addr;
 	*num_v6 = *num_v4 = 0;
 	/* account and validate addresses */
 	if (totaddr == 0) {
 		return (EINVAL);
 	}
 	for (i = 0; i < totaddr; i++) {
 		if (at + sizeof(struct sockaddr) > limit) {
 			return (EINVAL);
 		}
 		switch (sa->sa_family) {
 #ifdef INET
 		case AF_INET:
 			incr = (unsigned int)sizeof(struct sockaddr_in);
 			if (sa->sa_len != incr) {
 				return (EINVAL);
 			}
 			(*num_v4) += 1;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			{
 				struct sockaddr_in6 *sin6;
 
 				incr = (unsigned int)sizeof(struct sockaddr_in6);
 				if (sa->sa_len != incr) {
 					return (EINVAL);
 				}
 				sin6 = (struct sockaddr_in6 *)sa;
 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 					/* Must be non-mapped for connectx */
 					return (EINVAL);
 				}
 				(*num_v6) += 1;
 				break;
 			}
 #endif
 		default:
 			return (EINVAL);
 		}
 		if ((at + incr) > limit) {
 			return (EINVAL);
 		}
 		SCTP_INP_INCR_REF(inp);
 		stcb = sctp_findassociation_ep_addr(&inp, sa, NULL, NULL, NULL);
 		if (stcb != NULL) {
 			SCTP_TCB_UNLOCK(stcb);
 			return (EALREADY);
 		} else {
 			SCTP_INP_DECR_REF(inp);
 		}
 		at += incr;
 		sa = (struct sockaddr *)((caddr_t)sa + incr);
 	}
 	return (0);
 }
 
 /*
  * sctp_bindx(ADD) for one address.
  * assumes all arguments are valid/checked by caller.
  */
 void
 sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
     struct sockaddr *sa, uint32_t vrf_id, int *error,
     void *p)
 {
 #if defined(INET) && defined(INET6)
 	struct sockaddr_in sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 #ifdef INET
 	struct sockaddr_in *sinp;
 #endif
 	struct sockaddr *addr_to_use;
 	struct sctp_inpcb *lep;
 	uint16_t port;
 
 	/* see if we're bound all already! */
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 		*error = EINVAL;
 		return;
 	}
 	switch (sa->sa_family) {
 #ifdef INET6
 	case AF_INET6:
 		if (sa->sa_len != sizeof(struct sockaddr_in6)) {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 			*error = EINVAL;
 			return;
 		}
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
 			/* can only bind v6 on PF_INET6 sockets */
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 			*error = EINVAL;
 			return;
 		}
 		sin6 = (struct sockaddr_in6 *)sa;
 		port = sin6->sin6_port;
 #ifdef INET
 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
 			    SCTP_IPV6_V6ONLY(inp)) {
 				/* can't bind v4-mapped on PF_INET sockets */
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 				*error = EINVAL;
 				return;
 			}
 			in6_sin6_2_sin(&sin, sin6);
 			addr_to_use = (struct sockaddr *)&sin;
 		} else {
 			addr_to_use = sa;
 		}
 #else
 		addr_to_use = sa;
 #endif
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		if (sa->sa_len != sizeof(struct sockaddr_in)) {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 			*error = EINVAL;
 			return;
 		}
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
 		    SCTP_IPV6_V6ONLY(inp)) {
 			/* can't bind v4 on PF_INET sockets */
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 			*error = EINVAL;
 			return;
 		}
 		sinp = (struct sockaddr_in *)sa;
 		port = sinp->sin_port;
 		addr_to_use = sa;
 		break;
 #endif
 	default:
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 		*error = EINVAL;
 		return;
 	}
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
 		if (p == NULL) {
 			/* Can't get proc for Net/Open BSD */
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 			*error = EINVAL;
 			return;
 		}
 		*error = sctp_inpcb_bind(so, addr_to_use, NULL, p);
 		return;
 	}
 	/* Validate the incoming port. */
 	if ((port != 0) && (port != inp->sctp_lport)) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 		*error = EINVAL;
 		return;
 	}
 	lep = sctp_pcb_findep(addr_to_use, 1, 0, vrf_id);
 	if (lep == NULL) {
 		/* add the address */
 		*error = sctp_addr_mgmt_ep_sa(inp, addr_to_use,
 		    SCTP_ADD_IP_ADDRESS, vrf_id);
 	} else {
 		if (lep != inp) {
 			*error = EADDRINUSE;
 		}
 		SCTP_INP_DECR_REF(lep);
 	}
 }
 
 /*
  * sctp_bindx(DELETE) for one address.
  * assumes all arguments are valid/checked by caller.
  */
 void
 sctp_bindx_delete_address(struct sctp_inpcb *inp,
     struct sockaddr *sa, uint32_t vrf_id, int *error)
 {
 	struct sockaddr *addr_to_use;
 #if defined(INET) && defined(INET6)
 	struct sockaddr_in6 *sin6;
 	struct sockaddr_in sin;
 #endif
 
 	/* see if we're bound all already! */
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 		*error = EINVAL;
 		return;
 	}
 	switch (sa->sa_family) {
 #ifdef INET6
 	case AF_INET6:
 		if (sa->sa_len != sizeof(struct sockaddr_in6)) {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 			*error = EINVAL;
 			return;
 		}
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
 			/* can only bind v6 on PF_INET6 sockets */
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 			*error = EINVAL;
 			return;
 		}
 #ifdef INET
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
 			    SCTP_IPV6_V6ONLY(inp)) {
 				/* can't bind mapped-v4 on PF_INET sockets */
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 				*error = EINVAL;
 				return;
 			}
 			in6_sin6_2_sin(&sin, sin6);
 			addr_to_use = (struct sockaddr *)&sin;
 		} else {
 			addr_to_use = sa;
 		}
 #else
 		addr_to_use = sa;
 #endif
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		if (sa->sa_len != sizeof(struct sockaddr_in)) {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 			*error = EINVAL;
 			return;
 		}
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
 		    SCTP_IPV6_V6ONLY(inp)) {
 			/* can't bind v4 on PF_INET sockets */
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 			*error = EINVAL;
 			return;
 		}
 		addr_to_use = sa;
 		break;
 #endif
 	default:
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 		*error = EINVAL;
 		return;
 	}
 	/* No lock required mgmt_ep_sa does its own locking. */
 	*error = sctp_addr_mgmt_ep_sa(inp, addr_to_use, SCTP_DEL_IP_ADDRESS,
 	    vrf_id);
 }
 
 /*
  * returns the valid local address count for an assoc, taking into account
  * all scoping rules
  */
 int
 sctp_local_addr_count(struct sctp_tcb *stcb)
 {
 	int loopback_scope;
 #if defined(INET)
 	int ipv4_local_scope, ipv4_addr_legal;
 #endif
 #if defined(INET6)
 	int local_scope, site_scope, ipv6_addr_legal;
 #endif
 	struct sctp_vrf *vrf;
 	struct sctp_ifn *sctp_ifn;
 	struct sctp_ifa *sctp_ifa;
 	int count = 0;
 
 	/* Turn on all the appropriate scopes */
 	loopback_scope = stcb->asoc.scope.loopback_scope;
 #if defined(INET)
 	ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope;
 	ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal;
 #endif
 #if defined(INET6)
 	local_scope = stcb->asoc.scope.local_scope;
 	site_scope = stcb->asoc.scope.site_scope;
 	ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal;
 #endif
 	SCTP_IPI_ADDR_RLOCK();
 	vrf = sctp_find_vrf(stcb->asoc.vrf_id);
 	if (vrf == NULL) {
 		/* no vrf, no addresses */
 		SCTP_IPI_ADDR_RUNLOCK();
 		return (0);
 	}
 
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		/*
 		 * bound all case: go through all ifns on the vrf
 		 */
 		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 			if ((loopback_scope == 0) &&
 			    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
 				continue;
 			}
 			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 				if (sctp_is_addr_restricted(stcb, sctp_ifa))
 					continue;
 				switch (sctp_ifa->address.sa.sa_family) {
 #ifdef INET
 				case AF_INET:
 					if (ipv4_addr_legal) {
 						struct sockaddr_in *sin;
 
 						sin = &sctp_ifa->address.sin;
 						if (sin->sin_addr.s_addr == 0) {
 							/*
 							 * skip unspecified
 							 * addrs
 							 */
 							continue;
 						}
 						if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred,
 						    &sin->sin_addr) != 0) {
 							continue;
 						}
 						if ((ipv4_local_scope == 0) &&
 						    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
 							continue;
 						}
 						/* count this one */
 						count++;
 					} else {
 						continue;
 					}
 					break;
 #endif
 #ifdef INET6
 				case AF_INET6:
 					if (ipv6_addr_legal) {
 						struct sockaddr_in6 *sin6;
 
 						sin6 = &sctp_ifa->address.sin6;
 						if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 							continue;
 						}
 						if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred,
 						    &sin6->sin6_addr) != 0) {
 							continue;
 						}
 						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
 							if (local_scope == 0)
 								continue;
 							if (sin6->sin6_scope_id == 0) {
 								if (sa6_recoverscope(sin6) != 0)
 									/*
 									 *
 									 * bad
 									 * link
 									 *
 									 * local
 									 *
 									 * address
 									 */
 									continue;
 							}
 						}
 						if ((site_scope == 0) &&
 						    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
 							continue;
 						}
 						/* count this one */
 						count++;
 					}
 					break;
 #endif
 				default:
 					/* TSNH */
 					break;
 				}
 			}
 		}
 	} else {
 		/*
 		 * subset bound case
 		 */
 		struct sctp_laddr *laddr;
 
 		LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list,
 		    sctp_nxt_addr) {
 			if (sctp_is_addr_restricted(stcb, laddr->ifa)) {
 				continue;
 			}
 			/* count this one */
 			count++;
 		}
 	}
 	SCTP_IPI_ADDR_RUNLOCK();
 	return (count);
 }
 
 #if defined(SCTP_LOCAL_TRACE_BUF)
 
 void
 sctp_log_trace(uint32_t subsys, const char *str SCTP_UNUSED, uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, uint32_t f)
 {
 	uint32_t saveindex, newindex;
 
 	do {
 		saveindex = SCTP_BASE_SYSCTL(sctp_log).index;
 		if (saveindex >= SCTP_MAX_LOGGING_SIZE) {
 			newindex = 1;
 		} else {
 			newindex = saveindex + 1;
 		}
 	} while (atomic_cmpset_int(&SCTP_BASE_SYSCTL(sctp_log).index, saveindex, newindex) == 0);
 	if (saveindex >= SCTP_MAX_LOGGING_SIZE) {
 		saveindex = 0;
 	}
 	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].timestamp = SCTP_GET_CYCLECOUNT;
 	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].subsys = subsys;
 	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].params[0] = a;
 	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].params[1] = b;
 	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].params[2] = c;
 	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].params[3] = d;
 	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].params[4] = e;
 	SCTP_BASE_SYSCTL(sctp_log).entry[saveindex].params[5] = f;
 }
 
 #endif
 static bool
 sctp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,
     const struct sockaddr *sa SCTP_UNUSED, void *ctx SCTP_UNUSED)
 {
 	struct ip *iph;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct mbuf *sp, *last;
 	struct udphdr *uhdr;
 	uint16_t port;
 
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		/* Can't handle one that is not a pkt hdr */
 		goto out;
 	}
 	/* Pull the src port */
 	iph = mtod(m, struct ip *);
 	uhdr = (struct udphdr *)((caddr_t)iph + off);
 	port = uhdr->uh_sport;
 	/*
 	 * Split out the mbuf chain. Leave the IP header in m, place the
 	 * rest in the sp.
 	 */
 	sp = m_split(m, off, M_NOWAIT);
 	if (sp == NULL) {
 		/* Gak, drop packet, we can't do a split */
 		goto out;
 	}
 	if (sp->m_pkthdr.len < sizeof(struct udphdr) + sizeof(struct sctphdr)) {
 		/* Gak, packet can't have an SCTP header in it - too small */
 		m_freem(sp);
 		goto out;
 	}
 	/* Now pull up the UDP header and SCTP header together */
 	sp = m_pullup(sp, sizeof(struct udphdr) + sizeof(struct sctphdr));
 	if (sp == NULL) {
 		/* Gak pullup failed */
 		goto out;
 	}
 	/* Trim out the UDP header */
 	m_adj(sp, sizeof(struct udphdr));
 
 	/* Now reconstruct the mbuf chain */
 	for (last = m; last->m_next; last = last->m_next);
 	last->m_next = sp;
 	m->m_pkthdr.len += sp->m_pkthdr.len;
 	/*
 	 * The CSUM_DATA_VALID flags indicates that the HW checked the UDP
 	 * checksum and it was valid. Since CSUM_DATA_VALID ==
 	 * CSUM_SCTP_VALID this would imply that the HW also verified the
 	 * SCTP checksum. Therefore, clear the bit.
 	 */
 	SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
 	    "sctp_recv_udp_tunneled_packet(): Packet of length %d received on %s with csum_flags 0x%b.\n",
 	    m->m_pkthdr.len,
 	    if_name(m->m_pkthdr.rcvif),
 	    (int)m->m_pkthdr.csum_flags, CSUM_BITS);
 	m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
 	iph = mtod(m, struct ip *);
 	switch (iph->ip_v) {
 #ifdef INET
 	case IPVERSION:
 		iph->ip_len = htons(ntohs(iph->ip_len) - sizeof(struct udphdr));
 		sctp_input_with_port(m, off, port);
 		break;
 #endif
 #ifdef INET6
 	case IPV6_VERSION >> 4:
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - sizeof(struct udphdr));
 		sctp6_input_with_port(&m, &off, port);
 		break;
 #endif
 	default:
 		goto out;
 		break;
 	}
 	return (true);
 out:
 	m_freem(m);
 
 	return (true);
 }
 
 #ifdef INET
 static void
 sctp_recv_icmp_tunneled_packet(int cmd, struct sockaddr *sa, void *vip, void *ctx SCTP_UNUSED)
 {
 	struct ip *outer_ip, *inner_ip;
 	struct sctphdr *sh;
 	struct icmp *icmp;
 	struct udphdr *udp;
 	struct sctp_inpcb *inp;
 	struct sctp_tcb *stcb;
 	struct sctp_nets *net;
 	struct sctp_init_chunk *ch;
 	struct sockaddr_in src, dst;
 	uint8_t type, code;
 
 	inner_ip = (struct ip *)vip;
 	icmp = (struct icmp *)((caddr_t)inner_ip -
 	    (sizeof(struct icmp) - sizeof(struct ip)));
 	outer_ip = (struct ip *)((caddr_t)icmp - sizeof(struct ip));
 	if (ntohs(outer_ip->ip_len) <
 	    sizeof(struct ip) + 8 + (inner_ip->ip_hl << 2) + sizeof(struct udphdr) + 8) {
 		return;
 	}
 	udp = (struct udphdr *)((caddr_t)inner_ip + (inner_ip->ip_hl << 2));
 	sh = (struct sctphdr *)(udp + 1);
 	memset(&src, 0, sizeof(struct sockaddr_in));
 	src.sin_family = AF_INET;
 	src.sin_len = sizeof(struct sockaddr_in);
 	src.sin_port = sh->src_port;
 	src.sin_addr = inner_ip->ip_src;
 	memset(&dst, 0, sizeof(struct sockaddr_in));
 	dst.sin_family = AF_INET;
 	dst.sin_len = sizeof(struct sockaddr_in);
 	dst.sin_port = sh->dest_port;
 	dst.sin_addr = inner_ip->ip_dst;
 	/*
 	 * 'dst' holds the dest of the packet that failed to be sent. 'src'
 	 * holds our local endpoint address. Thus we reverse the dst and the
 	 * src in the lookup.
 	 */
 	inp = NULL;
 	net = NULL;
 	stcb = sctp_findassociation_addr_sa((struct sockaddr *)&dst,
 	    (struct sockaddr *)&src,
 	    &inp, &net, 1,
 	    SCTP_DEFAULT_VRFID);
 	if ((stcb != NULL) &&
 	    (net != NULL) &&
 	    (inp != NULL)) {
 		/* Check the UDP port numbers */
 		if ((udp->uh_dport != net->port) ||
 		    (udp->uh_sport != htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)))) {
 			SCTP_TCB_UNLOCK(stcb);
 			return;
 		}
 		/* Check the verification tag */
 		if (ntohl(sh->v_tag) != 0) {
 			/*
 			 * This must be the verification tag used for
 			 * sending out packets. We don't consider packets
 			 * reflecting the verification tag.
 			 */
 			if (ntohl(sh->v_tag) != stcb->asoc.peer_vtag) {
 				SCTP_TCB_UNLOCK(stcb);
 				return;
 			}
 		} else {
 			if (ntohs(outer_ip->ip_len) >=
 			    sizeof(struct ip) +
 			    8 + (inner_ip->ip_hl << 2) + 8 + 20) {
 				/*
 				 * In this case we can check if we got an
 				 * INIT chunk and if the initiate tag
 				 * matches.
 				 */
 				ch = (struct sctp_init_chunk *)(sh + 1);
 				if ((ch->ch.chunk_type != SCTP_INITIATION) ||
 				    (ntohl(ch->init.initiate_tag) != stcb->asoc.my_vtag)) {
 					SCTP_TCB_UNLOCK(stcb);
 					return;
 				}
 			} else {
 				SCTP_TCB_UNLOCK(stcb);
 				return;
 			}
 		}
 		type = icmp->icmp_type;
 		code = icmp->icmp_code;
 		if ((type == ICMP_UNREACH) &&
 		    (code == ICMP_UNREACH_PORT)) {
 			code = ICMP_UNREACH_PROTOCOL;
 		}
 		sctp_notify(inp, stcb, net, type, code,
 		    ntohs(inner_ip->ip_len),
 		    (uint32_t)ntohs(icmp->icmp_nextmtu));
 	} else {
 		if ((stcb == NULL) && (inp != NULL)) {
 			/* reduce ref-count */
 			SCTP_INP_WLOCK(inp);
 			SCTP_INP_DECR_REF(inp);
 			SCTP_INP_WUNLOCK(inp);
 		}
 		if (stcb) {
 			SCTP_TCB_UNLOCK(stcb);
 		}
 	}
 	return;
 }
 #endif
 
 #ifdef INET6
 static void
 sctp_recv_icmp6_tunneled_packet(int cmd, struct sockaddr *sa, void *d, void *ctx SCTP_UNUSED)
 {
 	struct ip6ctlparam *ip6cp;
 	struct sctp_inpcb *inp;
 	struct sctp_tcb *stcb;
 	struct sctp_nets *net;
 	struct sctphdr sh;
 	struct udphdr udp;
 	struct sockaddr_in6 src, dst;
 	uint8_t type, code;
 
 	ip6cp = (struct ip6ctlparam *)d;
 	/*
 	 * XXX: We assume that when IPV6 is non NULL, M and OFF are valid.
 	 */
 	if (ip6cp->ip6c_m == NULL) {
 		return;
 	}
 	/*
 	 * Check if we can safely examine the ports and the verification tag
 	 * of the SCTP common header.
 	 */
 	if (ip6cp->ip6c_m->m_pkthdr.len <
 	    ip6cp->ip6c_off + sizeof(struct udphdr) + offsetof(struct sctphdr, checksum)) {
 		return;
 	}
 	/* Copy out the UDP header. */
 	memset(&udp, 0, sizeof(struct udphdr));
 	m_copydata(ip6cp->ip6c_m,
 	    ip6cp->ip6c_off,
 	    sizeof(struct udphdr),
 	    (caddr_t)&udp);
 	/* Copy out the port numbers and the verification tag. */
 	memset(&sh, 0, sizeof(struct sctphdr));
 	m_copydata(ip6cp->ip6c_m,
 	    ip6cp->ip6c_off + sizeof(struct udphdr),
 	    sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t),
 	    (caddr_t)&sh);
 	memset(&src, 0, sizeof(struct sockaddr_in6));
 	src.sin6_family = AF_INET6;
 	src.sin6_len = sizeof(struct sockaddr_in6);
 	src.sin6_port = sh.src_port;
 	src.sin6_addr = ip6cp->ip6c_ip6->ip6_src;
 	if (in6_setscope(&src.sin6_addr, ip6cp->ip6c_m->m_pkthdr.rcvif, NULL) != 0) {
 		return;
 	}
 	memset(&dst, 0, sizeof(struct sockaddr_in6));
 	dst.sin6_family = AF_INET6;
 	dst.sin6_len = sizeof(struct sockaddr_in6);
 	dst.sin6_port = sh.dest_port;
 	dst.sin6_addr = ip6cp->ip6c_ip6->ip6_dst;
 	if (in6_setscope(&dst.sin6_addr, ip6cp->ip6c_m->m_pkthdr.rcvif, NULL) != 0) {
 		return;
 	}
 	inp = NULL;
 	net = NULL;
 	stcb = sctp_findassociation_addr_sa((struct sockaddr *)&dst,
 	    (struct sockaddr *)&src,
 	    &inp, &net, 1, SCTP_DEFAULT_VRFID);
 	if ((stcb != NULL) &&
 	    (net != NULL) &&
 	    (inp != NULL)) {
 		/* Check the UDP port numbers */
 		if ((udp.uh_dport != net->port) ||
 		    (udp.uh_sport != htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)))) {
 			SCTP_TCB_UNLOCK(stcb);
 			return;
 		}
 		/* Check the verification tag */
 		if (ntohl(sh.v_tag) != 0) {
 			/*
 			 * This must be the verification tag used for
 			 * sending out packets. We don't consider packets
 			 * reflecting the verification tag.
 			 */
 			if (ntohl(sh.v_tag) != stcb->asoc.peer_vtag) {
 				SCTP_TCB_UNLOCK(stcb);
 				return;
 			}
 		} else {
 			if (ip6cp->ip6c_m->m_pkthdr.len >=
 			    ip6cp->ip6c_off + sizeof(struct udphdr) +
 			    sizeof(struct sctphdr) +
 			    sizeof(struct sctp_chunkhdr) +
 			    offsetof(struct sctp_init, a_rwnd)) {
 				/*
 				 * In this case we can check if we got an
 				 * INIT chunk and if the initiate tag
 				 * matches.
 				 */
 				uint32_t initiate_tag;
 				uint8_t chunk_type;
 
 				m_copydata(ip6cp->ip6c_m,
 				    ip6cp->ip6c_off +
 				    sizeof(struct udphdr) +
 				    sizeof(struct sctphdr),
 				    sizeof(uint8_t),
 				    (caddr_t)&chunk_type);
 				m_copydata(ip6cp->ip6c_m,
 				    ip6cp->ip6c_off +
 				    sizeof(struct udphdr) +
 				    sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr),
 				    sizeof(uint32_t),
 				    (caddr_t)&initiate_tag);
 				if ((chunk_type != SCTP_INITIATION) ||
 				    (ntohl(initiate_tag) != stcb->asoc.my_vtag)) {
 					SCTP_TCB_UNLOCK(stcb);
 					return;
 				}
 			} else {
 				SCTP_TCB_UNLOCK(stcb);
 				return;
 			}
 		}
 		type = ip6cp->ip6c_icmp6->icmp6_type;
 		code = ip6cp->ip6c_icmp6->icmp6_code;
 		if ((type == ICMP6_DST_UNREACH) &&
 		    (code == ICMP6_DST_UNREACH_NOPORT)) {
 			type = ICMP6_PARAM_PROB;
 			code = ICMP6_PARAMPROB_NEXTHEADER;
 		}
 		sctp6_notify(inp, stcb, net, type, code,
 		    ntohl(ip6cp->ip6c_icmp6->icmp6_mtu));
 	} else {
 		if ((stcb == NULL) && (inp != NULL)) {
 			/* reduce inp's ref-count */
 			SCTP_INP_WLOCK(inp);
 			SCTP_INP_DECR_REF(inp);
 			SCTP_INP_WUNLOCK(inp);
 		}
 		if (stcb) {
 			SCTP_TCB_UNLOCK(stcb);
 		}
 	}
 }
 #endif
 
 void
 sctp_over_udp_stop(void)
 {
 	/*
 	 * This function assumes sysctl caller holds sctp_sysctl_info_lock()
 	 * for writing!
 	 */
 #ifdef INET
 	if (SCTP_BASE_INFO(udp4_tun_socket) != NULL) {
 		soclose(SCTP_BASE_INFO(udp4_tun_socket));
 		SCTP_BASE_INFO(udp4_tun_socket) = NULL;
 	}
 #endif
 #ifdef INET6
 	if (SCTP_BASE_INFO(udp6_tun_socket) != NULL) {
 		soclose(SCTP_BASE_INFO(udp6_tun_socket));
 		SCTP_BASE_INFO(udp6_tun_socket) = NULL;
 	}
 #endif
 }
 
 int
 sctp_over_udp_start(void)
 {
 	uint16_t port;
 	int ret;
 #ifdef INET
 	struct sockaddr_in sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 sin6;
 #endif
 	/*
 	 * This function assumes sysctl caller holds sctp_sysctl_info_lock()
 	 * for writing!
 	 */
 	port = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port);
 	if (ntohs(port) == 0) {
 		/* Must have a port set */
 		return (EINVAL);
 	}
 #ifdef INET
 	if (SCTP_BASE_INFO(udp4_tun_socket) != NULL) {
 		/* Already running -- must stop first */
 		return (EALREADY);
 	}
 #endif
 #ifdef INET6
 	if (SCTP_BASE_INFO(udp6_tun_socket) != NULL) {
 		/* Already running -- must stop first */
 		return (EALREADY);
 	}
 #endif
 #ifdef INET
 	if ((ret = socreate(PF_INET, &SCTP_BASE_INFO(udp4_tun_socket),
 	    SOCK_DGRAM, IPPROTO_UDP,
 	    curthread->td_ucred, curthread))) {
 		sctp_over_udp_stop();
 		return (ret);
 	}
 	/* Call the special UDP hook. */
 	if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp4_tun_socket),
 	    sctp_recv_udp_tunneled_packet,
 	    sctp_recv_icmp_tunneled_packet,
 	    NULL))) {
 		sctp_over_udp_stop();
 		return (ret);
 	}
 	/* Ok, we have a socket, bind it to the port. */
 	memset(&sin, 0, sizeof(struct sockaddr_in));
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_port = htons(port);
 	if ((ret = sobind(SCTP_BASE_INFO(udp4_tun_socket),
 	    (struct sockaddr *)&sin, curthread))) {
 		sctp_over_udp_stop();
 		return (ret);
 	}
 #endif
 #ifdef INET6
 	if ((ret = socreate(PF_INET6, &SCTP_BASE_INFO(udp6_tun_socket),
 	    SOCK_DGRAM, IPPROTO_UDP,
 	    curthread->td_ucred, curthread))) {
 		sctp_over_udp_stop();
 		return (ret);
 	}
 	/* Call the special UDP hook. */
 	if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp6_tun_socket),
 	    sctp_recv_udp_tunneled_packet,
 	    sctp_recv_icmp6_tunneled_packet,
 	    NULL))) {
 		sctp_over_udp_stop();
 		return (ret);
 	}
 	/* Ok, we have a socket, bind it to the port. */
 	memset(&sin6, 0, sizeof(struct sockaddr_in6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_port = htons(port);
 	if ((ret = sobind(SCTP_BASE_INFO(udp6_tun_socket),
 	    (struct sockaddr *)&sin6, curthread))) {
 		sctp_over_udp_stop();
 		return (ret);
 	}
 #endif
 	return (0);
 }
 
 /*
  * sctp_min_mtu ()returns the minimum of all non-zero arguments.
  * If all arguments are zero, zero is returned.
  */
 uint32_t
 sctp_min_mtu(uint32_t mtu1, uint32_t mtu2, uint32_t mtu3)
 {
 	if (mtu1 > 0) {
 		if (mtu2 > 0) {
 			if (mtu3 > 0) {
 				return (min(mtu1, min(mtu2, mtu3)));
 			} else {
 				return (min(mtu1, mtu2));
 			}
 		} else {
 			if (mtu3 > 0) {
 				return (min(mtu1, mtu3));
 			} else {
 				return (mtu1);
 			}
 		}
 	} else {
 		if (mtu2 > 0) {
 			if (mtu3 > 0) {
 				return (min(mtu2, mtu3));
 			} else {
 				return (mtu2);
 			}
 		} else {
 			return (mtu3);
 		}
 	}
 }
 
 void
 sctp_hc_set_mtu(union sctp_sockstore *addr, uint16_t fibnum, uint32_t mtu)
 {
 	struct in_conninfo inc;
 
 	memset(&inc, 0, sizeof(struct in_conninfo));
 	inc.inc_fibnum = fibnum;
 	switch (addr->sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		inc.inc_faddr = addr->sin.sin_addr;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		inc.inc_flags |= INC_ISIPV6;
 		inc.inc6_faddr = addr->sin6.sin6_addr;
 		break;
 #endif
 	default:
 		return;
 	}
 	tcp_hc_updatemtu(&inc, (u_long)mtu);
 }
 
 uint32_t
 sctp_hc_get_mtu(union sctp_sockstore *addr, uint16_t fibnum)
 {
 	struct in_conninfo inc;
 
 	memset(&inc, 0, sizeof(struct in_conninfo));
 	inc.inc_fibnum = fibnum;
 	switch (addr->sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		inc.inc_faddr = addr->sin.sin_addr;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		inc.inc_flags |= INC_ISIPV6;
 		inc.inc6_faddr = addr->sin6.sin6_addr;
 		break;
 #endif
 	default:
 		return (0);
 	}
 	return ((uint32_t)tcp_hc_getmtu(&inc));
 }
 
 void
 sctp_set_state(struct sctp_tcb *stcb, int new_state)
 {
 #if defined(KDTRACE_HOOKS)
 	int old_state = stcb->asoc.state;
 #endif
 
 	KASSERT((new_state & ~SCTP_STATE_MASK) == 0,
 	    ("sctp_set_state: Can't set substate (new_state = %x)",
 	    new_state));
 	stcb->asoc.state = (stcb->asoc.state & ~SCTP_STATE_MASK) | new_state;
 	if ((new_state == SCTP_STATE_SHUTDOWN_RECEIVED) ||
 	    (new_state == SCTP_STATE_SHUTDOWN_SENT) ||
 	    (new_state == SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 		SCTP_CLEAR_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
 	}
 #if defined(KDTRACE_HOOKS)
 	if (((old_state & SCTP_STATE_MASK) != new_state) &&
 	    !(((old_state & SCTP_STATE_MASK) == SCTP_STATE_EMPTY) &&
 	    (new_state == SCTP_STATE_INUSE))) {
 		SCTP_PROBE6(state__change, NULL, stcb, NULL, stcb, NULL, old_state);
 	}
 #endif
 }
 
 void
 sctp_add_substate(struct sctp_tcb *stcb, int substate)
 {
 #if defined(KDTRACE_HOOKS)
 	int old_state = stcb->asoc.state;
 #endif
 
 	KASSERT((substate & SCTP_STATE_MASK) == 0,
 	    ("sctp_add_substate: Can't set state (substate = %x)",
 	    substate));
 	stcb->asoc.state |= substate;
 #if defined(KDTRACE_HOOKS)
 	if (((substate & SCTP_STATE_ABOUT_TO_BE_FREED) &&
 	    ((old_state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) ||
 	    ((substate & SCTP_STATE_SHUTDOWN_PENDING) &&
 	    ((old_state & SCTP_STATE_SHUTDOWN_PENDING) == 0))) {
 		SCTP_PROBE6(state__change, NULL, stcb, NULL, stcb, NULL, old_state);
 	}
 #endif
 }
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 695cd5a916db..be86ceca445c 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -1,4073 +1,4073 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 2007-2008,2010
  *	Swinburne University of Technology, Melbourne, Australia.
  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
  * Copyright (c) 2010 The FreeBSD Foundation
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * Portions of this software were developed at the Centre for Advanced Internet
  * Architectures, Swinburne University of Technology, by Lawrence Stewart,
  * James Healy and David Hayes, made possible in part by a grant from the Cisco
  * University Research Program Fund at Community Foundation Silicon Valley.
  *
  * Portions of this software were developed at the Centre for Advanced
  * Internet Architectures, Swinburne University of Technology, Melbourne,
  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/arb.h>
 #include <sys/kernel.h>
 #ifdef TCP_HHOOK
 #include <sys/hhook.h>
 #endif
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/protosw.h>
 #include <sys/qmath.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/stats.h>
 
 #include <machine/cpu.h>	/* before tcp_seq.h, for tcp_random18() */
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #define TCPSTATES		/* for logging */
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>	/* required for icmp_var.h */
 #include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/tcp6_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_fastopen.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #include <netinet/tcp_syncache.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif /* TCPDEBUG */
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 #include <netinet/tcp_ecn.h>
 #include <netinet/udp.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 const int tcprexmtthresh = 3;
 
 VNET_DEFINE(int, tcp_log_in_vain) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_log_in_vain), 0,
     "Log all incoming TCP segments to closed ports");
 
 VNET_DEFINE(int, blackhole) = 0;
 #define	V_blackhole		VNET(blackhole)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(blackhole), 0,
     "Do not send RST on segments to closed ports");
 
 VNET_DEFINE(bool, blackhole_local) = false;
 #define	V_blackhole_local	VNET(blackhole_local)
 SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, blackhole_local, CTLFLAG_VNET |
     CTLFLAG_RW, &VNET_NAME(blackhole_local), false,
     "Enforce net.inet.tcp.blackhole for locally originated packets");
 
 VNET_DEFINE(int, tcp_delack_enabled) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_delack_enabled), 0,
     "Delay ACK to try and piggyback it onto a data packet");
 
 VNET_DEFINE(int, drop_synfin) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(drop_synfin), 0,
     "Drop TCP packets with SYN+FIN set");
 
 VNET_DEFINE(int, tcp_do_prr_conservative) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_prr_conservative), 0,
     "Do conservative Proportional Rate Reduction");
 
 VNET_DEFINE(int, tcp_do_prr) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_prr), 1,
     "Enable Proportional Rate Reduction per RFC 6937");
 
 VNET_DEFINE(int, tcp_do_lrd) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_lrd, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_lrd), 1,
     "Perform Lost Retransmission Detection");
 
 VNET_DEFINE(int, tcp_do_newcwv) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_newcwv), 0,
     "Enable New Congestion Window Validation per RFC7661");
 
 VNET_DEFINE(int, tcp_do_rfc3042) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc3042), 0,
     "Enable RFC 3042 (Limited Transmit)");
 
 VNET_DEFINE(int, tcp_do_rfc3390) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc3390), 0,
     "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
 
 VNET_DEFINE(int, tcp_initcwnd_segments) = 10;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, initcwnd_segments,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_initcwnd_segments), 0,
     "Slow-start flight size (initial congestion window) in number of segments");
 
 VNET_DEFINE(int, tcp_do_rfc3465) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc3465), 0,
     "Enable RFC 3465 (Appropriate Byte Counting)");
 
 VNET_DEFINE(int, tcp_abc_l_var) = 2;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_abc_l_var), 2,
     "Cap the max cwnd increment during slow-start to this number of segments");
 
 static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "TCP ECN");
 
 VNET_DEFINE(int, tcp_do_ecn) = 2;
 SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_ecn), 0,
     "TCP ECN support");
 
 VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
 SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_ecn_maxretries), 0,
     "Max retries before giving up on ECN");
 
 VNET_DEFINE(int, tcp_insecure_syn) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_syn, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_insecure_syn), 0,
     "Follow RFC793 instead of RFC5961 criteria for accepting SYN packets");
 
 VNET_DEFINE(int, tcp_insecure_rst) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_insecure_rst), 0,
     "Follow RFC793 instead of RFC5961 criteria for accepting RST packets");
 
 VNET_DEFINE(int, tcp_recvspace) = 1024*64;
 #define	V_tcp_recvspace	VNET(tcp_recvspace)
 SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_recvspace), 0, "Initial receive socket buffer size");
 
 VNET_DEFINE(int, tcp_do_autorcvbuf) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_autorcvbuf), 0,
     "Enable automatic receive buffer sizing");
 
 VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_autorcvbuf_max), 0,
     "Max size of automatic receive buffer");
 
 VNET_DEFINE(struct inpcbinfo, tcbinfo);
 
 /*
  * TCP statistics are stored in an array of counter(9)s, which size matches
  * size of struct tcpstat.  TCP running connection count is a regular array.
  */
 VNET_PCPUSTAT_DEFINE(struct tcpstat, tcpstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_tcp, TCPCTL_STATS, stats, struct tcpstat,
     tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
 VNET_DEFINE(counter_u64_t, tcps_states[TCP_NSTATES]);
 SYSCTL_COUNTER_U64_ARRAY(_net_inet_tcp, TCPCTL_STATES, states, CTLFLAG_RD |
     CTLFLAG_VNET, &VNET_NAME(tcps_states)[0], TCP_NSTATES,
     "TCP connection counts by TCP state");
 
 /*
  * Kernel module interface for updating tcpstat.  The first argument is an index
  * into tcpstat treated as an array.
  */
 void
 kmod_tcpstat_add(int statnum, int val)
 {
 
 	counter_u64_add(VNET(tcpstat)[statnum], val);
 }
 
 #ifdef TCP_HHOOK
 /*
  * Wrapper for the TCP established input helper hook.
  */
 void
 hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
 {
 	struct tcp_hhook_data hhook_data;
 
 	if (V_tcp_hhh[HHOOK_TCP_EST_IN]->hhh_nhooks > 0) {
 		hhook_data.tp = tp;
 		hhook_data.th = th;
 		hhook_data.to = to;
 
 		hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_IN], &hhook_data,
 		    tp->osd);
 	}
 }
 #endif
 
 /*
  * CC wrapper hook functions
  */
 void
 cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
     uint16_t type)
 {
 #ifdef STATS
 	int32_t gput;
 #endif
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tp->ccv->nsegs = nsegs;
 	tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th);
 	if ((!V_tcp_do_newcwv && (tp->snd_cwnd <= tp->snd_wnd)) ||
 	    (V_tcp_do_newcwv && (tp->snd_cwnd <= tp->snd_wnd) &&
 	     (tp->snd_cwnd < (tcp_compute_pipe(tp) * 2))))
 		tp->ccv->flags |= CCF_CWND_LIMITED;
 	else
 		tp->ccv->flags &= ~CCF_CWND_LIMITED;
 
 	if (type == CC_ACK) {
 #ifdef STATS
 		stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
 		    ((int32_t)tp->snd_cwnd) - tp->snd_wnd);
 		if (!IN_RECOVERY(tp->t_flags))
 			stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_ACKLEN,
 			   tp->ccv->bytes_this_ack / (tcp_maxseg(tp) * nsegs));
 		if ((tp->t_flags & TF_GPUTINPROG) &&
 		    SEQ_GEQ(th->th_ack, tp->gput_ack)) {
 			/*
 			 * Compute goodput in bits per millisecond.
 			 */
 			gput = (((int64_t)(th->th_ack - tp->gput_seq)) << 3) /
 			    max(1, tcp_ts_getticks() - tp->gput_ts);
 			stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT,
 			    gput);
 			/*
 			 * XXXLAS: This is a temporary hack, and should be
 			 * chained off VOI_TCP_GPUT when stats(9) grows an API
 			 * to deal with chained VOIs.
 			 */
 			if (tp->t_stats_gput_prev > 0)
 				stats_voi_update_abs_s32(tp->t_stats,
 				    VOI_TCP_GPUT_ND,
 				    ((gput - tp->t_stats_gput_prev) * 100) /
 				    tp->t_stats_gput_prev);
 			tp->t_flags &= ~TF_GPUTINPROG;
 			tp->t_stats_gput_prev = gput;
 		}
 #endif /* STATS */
 		if (tp->snd_cwnd > tp->snd_ssthresh) {
 			tp->t_bytes_acked += tp->ccv->bytes_this_ack;
 			if (tp->t_bytes_acked >= tp->snd_cwnd) {
 				tp->t_bytes_acked -= tp->snd_cwnd;
 				tp->ccv->flags |= CCF_ABC_SENTAWND;
 			}
 		} else {
 				tp->ccv->flags &= ~CCF_ABC_SENTAWND;
 				tp->t_bytes_acked = 0;
 		}
 	}
 
 	if (CC_ALGO(tp)->ack_received != NULL) {
 		/* XXXLAS: Find a way to live without this */
 		tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->ack_received(tp->ccv, type);
 	}
 #ifdef STATS
 	stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
 #endif
 }
 
 void
 cc_conn_init(struct tcpcb *tp)
 {
 	struct hc_metrics_lite metrics;
 	struct inpcb *inp = tp->t_inpcb;
 	u_int maxseg;
 	int rtt;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tcp_hc_get(&inp->inp_inc, &metrics);
 	maxseg = tcp_maxseg(tp);
 
 	if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
 		tp->t_srtt = rtt;
 		tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE;
 		TCPSTAT_INC(tcps_usedrtt);
 		if (metrics.rmx_rttvar) {
 			tp->t_rttvar = metrics.rmx_rttvar;
 			TCPSTAT_INC(tcps_usedrttvar);
 		} else {
 			/* default variation is +- 1 rtt */
 			tp->t_rttvar =
 			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
 		}
 		TCPT_RANGESET(tp->t_rxtcur,
 		    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
 		    tp->t_rttmin, TCPTV_REXMTMAX);
 	}
 	if (metrics.rmx_ssthresh) {
 		/*
 		 * There's some sort of gateway or interface
 		 * buffer limit on the path.  Use this to set
 		 * the slow start threshold, but set the
 		 * threshold to no less than 2*mss.
 		 */
 		tp->snd_ssthresh = max(2 * maxseg, metrics.rmx_ssthresh);
 		TCPSTAT_INC(tcps_usedssthresh);
 	}
 
 	/*
 	 * Set the initial slow-start flight size.
 	 *
 	 * If a SYN or SYN/ACK was lost and retransmitted, we have to
 	 * reduce the initial CWND to one segment as congestion is likely
 	 * requiring us to be cautious.
 	 */
 	if (tp->snd_cwnd == 1)
 		tp->snd_cwnd = maxseg;		/* SYN(-ACK) lost */
 	else
 		tp->snd_cwnd = tcp_compute_initwnd(maxseg);
 
 	if (CC_ALGO(tp)->conn_init != NULL)
 		CC_ALGO(tp)->conn_init(tp->ccv);
 }
 
 void inline
 cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 #ifdef STATS
 	stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type);
 #endif
 
 	switch(type) {
 	case CC_NDUPACK:
 		if (!IN_FASTRECOVERY(tp->t_flags)) {
 			tp->snd_recover = tp->snd_max;
 			if (tp->t_flags2 & TF2_ECN_PERMIT)
 				tp->t_flags2 |= TF2_ECN_SND_CWR;
 		}
 		break;
 	case CC_ECN:
 		if (!IN_CONGRECOVERY(tp->t_flags) ||
 		    /*
 		     * Allow ECN reaction on ACK to CWR, if
 		     * that data segment was also CE marked.
 		     */
 		    SEQ_GEQ(th->th_ack, tp->snd_recover)) {
 			EXIT_CONGRECOVERY(tp->t_flags);
 			TCPSTAT_INC(tcps_ecn_rcwnd);
 			tp->snd_recover = tp->snd_max + 1;
 			if (tp->t_flags2 & TF2_ECN_PERMIT)
 				tp->t_flags2 |= TF2_ECN_SND_CWR;
 		}
 		break;
 	case CC_RTO:
 		tp->t_dupacks = 0;
 		tp->t_bytes_acked = 0;
 		EXIT_RECOVERY(tp->t_flags);
 		if (tp->t_flags2 & TF2_ECN_PERMIT)
 			tp->t_flags2 |= TF2_ECN_SND_CWR;
 		break;
 	case CC_RTO_ERR:
 		TCPSTAT_INC(tcps_sndrexmitbad);
 		/* RTO was unnecessary, so reset everything. */
 		tp->snd_cwnd = tp->snd_cwnd_prev;
 		tp->snd_ssthresh = tp->snd_ssthresh_prev;
 		tp->snd_recover = tp->snd_recover_prev;
 		if (tp->t_flags & TF_WASFRECOVERY)
 			ENTER_FASTRECOVERY(tp->t_flags);
 		if (tp->t_flags & TF_WASCRECOVERY)
 			ENTER_CONGRECOVERY(tp->t_flags);
 		tp->snd_nxt = tp->snd_max;
 		tp->t_flags &= ~TF_PREVVALID;
 		tp->t_badrxtwin = 0;
 		break;
 	}
 
 	if (CC_ALGO(tp)->cong_signal != NULL) {
 		if (th != NULL)
 			tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->cong_signal(tp->ccv, type);
 	}
 }
 
 void inline
 cc_post_recovery(struct tcpcb *tp, struct tcphdr *th)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/* XXXLAS: KASSERT that we're in recovery? */
 
 	if (CC_ALGO(tp)->post_recovery != NULL) {
 		tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->post_recovery(tp->ccv);
 	}
 	/* XXXLAS: EXIT_RECOVERY ? */
 	tp->t_bytes_acked = 0;
 	tp->sackhint.delivered_data = 0;
 	tp->sackhint.prr_out = 0;
 }
 
 /*
  * Indicate whether this ack should be delayed.  We can delay the ack if
  * following conditions are met:
  *	- There is no delayed ack timer in progress.
  *	- Our last ack wasn't a 0-sized window. We never want to delay
  *	  the ack that opens up a 0-sized window.
  *	- LRO wasn't used for this segment. We make sure by checking that the
  *	  segment size is not larger than the MSS.
  */
 #define DELAY_ACK(tp, tlen)						\
 	((!tcp_timer_active(tp, TT_DELACK) &&				\
 	    (tp->t_flags & TF_RXWIN0SENT) == 0) &&			\
 	    (tlen <= tp->t_maxseg) &&					\
 	    (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
 void inline
 cc_ecnpkt_handler_flags(struct tcpcb *tp, uint16_t flags, uint8_t iptos)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (CC_ALGO(tp)->ecnpkt_handler != NULL) {
 		switch (iptos & IPTOS_ECN_MASK) {
 		case IPTOS_ECN_CE:
 			tp->ccv->flags |= CCF_IPHDR_CE;
 			break;
 		case IPTOS_ECN_ECT0:
 			/* FALLTHROUGH */
 		case IPTOS_ECN_ECT1:
 			/* FALLTHROUGH */
 		case IPTOS_ECN_NOTECT:
 			tp->ccv->flags &= ~CCF_IPHDR_CE;
 			break;
 		}
 
 		if (flags & TH_CWR)
 			tp->ccv->flags |= CCF_TCPHDR_CWR;
 		else
 			tp->ccv->flags &= ~CCF_TCPHDR_CWR;
 
 		CC_ALGO(tp)->ecnpkt_handler(tp->ccv);
 
 		if (tp->ccv->flags & CCF_ACKNOW) {
 			tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 			tp->t_flags |= TF_ACKNOW;
 		}
 	}
 }
 
 void inline
 cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
 {
 	cc_ecnpkt_handler_flags(tp, tcp_get_flags(th), iptos);
 }
 
 /*
  * TCP input handling is split into multiple parts:
  *   tcp6_input is a thin wrapper around tcp_input for the extended
  *	ip6_protox[] call format in ip6_input
  *   tcp_input handles primary segment validation, inpcb lookup and
  *	SYN processing on listen sockets
  *   tcp_do_segment processes the ACK and text of the segment for
  *	establishing, established and closing connections
  */
 #ifdef INET6
 int
 tcp6_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
 {
 	struct mbuf *m;
 	struct in6_ifaddr *ia6;
 	struct ip6_hdr *ip6;
 
 	m = *mp;
 	if (m->m_len < *offp + sizeof(struct tcphdr)) {
 		m = m_pullup(m, *offp + sizeof(struct tcphdr));
 		if (m == NULL) {
 			*mp = m;
 			TCPSTAT_INC(tcps_rcvshort);
 			return (IPPROTO_DONE);
 		}
 	}
 
 	/*
 	 * draft-itojun-ipv6-tcp-to-anycast
 	 * better place to put this in?
 	 */
 	ip6 = mtod(m, struct ip6_hdr *);
 	ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false);
 	if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) {
 		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
 			    (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
 		*mp = NULL;
 		return (IPPROTO_DONE);
 	}
 
 	*mp = m;
 	return (tcp_input_with_port(mp, offp, proto, port));
 }
 
 int
 tcp6_input(struct mbuf **mp, int *offp, int proto)
 {
 
 	return(tcp6_input_with_port(mp, offp, proto, 0));
 }
 #endif /* INET6 */
 
 int
 tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
 {
 	struct mbuf *m = *mp;
 	struct tcphdr *th = NULL;
 	struct ip *ip = NULL;
 	struct inpcb *inp = NULL;
 	struct tcpcb *tp = NULL;
 	struct socket *so = NULL;
 	u_char *optp = NULL;
 	int off0;
 	int optlen = 0;
 #ifdef INET
 	int len;
 	uint8_t ipttl;
 #endif
 	int tlen = 0, off;
 	int drop_hdrlen;
 	int thflags;
 	int rstreason = 0;	/* For badport_bandlim accounting purposes */
 	int lookupflag;
 	uint8_t iptos;
 	struct m_tag *fwd_tag = NULL;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 	int isipv6;
 #else
 	const void *ip6 = NULL;
 #endif /* INET6 */
 	struct tcpopt to;		/* options in this segment */
 	char *s = NULL;			/* address and port logging */
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 
 	NET_EPOCH_ASSERT();
 
 #ifdef INET6
 	isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
 #endif
 
 	off0 = *offp;
 	m = *mp;
 	*mp = NULL;
 	to.to_flags = 0;
 	TCPSTAT_INC(tcps_rcvtotal);
 
 #ifdef INET6
 	if (isipv6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		th = (struct tcphdr *)((caddr_t)ip6 + off0);
 		tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
 		if (port)
 			goto skip6_csum;
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				th->th_sum = m->m_pkthdr.csum_data;
 			else
 				th->th_sum = in6_cksum_pseudo(ip6, tlen,
 				    IPPROTO_TCP, m->m_pkthdr.csum_data);
 			th->th_sum ^= 0xffff;
 		} else
 			th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen);
 		if (th->th_sum) {
 			TCPSTAT_INC(tcps_rcvbadsum);
 			goto drop;
 		}
 	skip6_csum:
 		/*
 		 * Be proactive about unspecified IPv6 address in source.
 		 * As we use all-zero to indicate unbounded/unconnected pcb,
 		 * unspecified IPv6 address can be used to confuse us.
 		 *
 		 * Note that packets with unspecified IPv6 destination is
 		 * already dropped in ip6_input.
 		 */
 		if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 			/* XXX stat */
 			goto drop;
 		}
 		iptos = IPV6_TRAFFIC_CLASS(ip6);
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		/*
 		 * Get IP and TCP header together in first mbuf.
 		 * Note: IP leaves IP header in first mbuf.
 		 */
 		if (off0 > sizeof (struct ip)) {
 			ip_stripoptions(m);
 			off0 = sizeof(struct ip);
 		}
 		if (m->m_len < sizeof (struct tcpiphdr)) {
 			if ((m = m_pullup(m, sizeof (struct tcpiphdr)))
 			    == NULL) {
 				TCPSTAT_INC(tcps_rcvshort);
 				return (IPPROTO_DONE);
 			}
 		}
 		ip = mtod(m, struct ip *);
 		th = (struct tcphdr *)((caddr_t)ip + off0);
 		tlen = ntohs(ip->ip_len) - off0;
 
 		iptos = ip->ip_tos;
 		if (port)
 			goto skip_csum;
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				th->th_sum = m->m_pkthdr.csum_data;
 			else
 				th->th_sum = in_pseudo(ip->ip_src.s_addr,
 				    ip->ip_dst.s_addr,
 				    htonl(m->m_pkthdr.csum_data + tlen +
 				    IPPROTO_TCP));
 			th->th_sum ^= 0xffff;
 		} else {
 			struct ipovly *ipov = (struct ipovly *)ip;
 
 			/*
 			 * Checksum extended TCP header and data.
 			 */
 			len = off0 + tlen;
 			ipttl = ip->ip_ttl;
 			bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
 			ipov->ih_len = htons(tlen);
 			th->th_sum = in_cksum(m, len);
 			/* Reset length for SDT probes. */
 			ip->ip_len = htons(len);
 			/* Reset TOS bits */
 			ip->ip_tos = iptos;
 			/* Re-initialization for later version check */
 			ip->ip_ttl = ipttl;
 			ip->ip_v = IPVERSION;
 			ip->ip_hl = off0 >> 2;
 		}
 	skip_csum:
 		if (th->th_sum && (port == 0)) {
 			TCPSTAT_INC(tcps_rcvbadsum);
 			goto drop;
 		}
 	}
 #endif /* INET */
 
 	/*
 	 * Check that TCP offset makes sense,
 	 * pull out TCP options and adjust length.		XXX
 	 */
 	off = th->th_off << 2;
 	if (off < sizeof (struct tcphdr) || off > tlen) {
 		TCPSTAT_INC(tcps_rcvbadoff);
 		goto drop;
 	}
 	tlen -= off;	/* tlen is used instead of ti->ti_len */
 	if (off > sizeof (struct tcphdr)) {
 #ifdef INET6
 		if (isipv6) {
 			if (m->m_len < off0 + off) {
 				m = m_pullup(m, off0 + off);
 				if (m == NULL) {
 					TCPSTAT_INC(tcps_rcvshort);
 					return (IPPROTO_DONE);
 				}
 			}
 			ip6 = mtod(m, struct ip6_hdr *);
 			th = (struct tcphdr *)((caddr_t)ip6 + off0);
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			if (m->m_len < sizeof(struct ip) + off) {
 				if ((m = m_pullup(m, sizeof (struct ip) + off))
 				    == NULL) {
 					TCPSTAT_INC(tcps_rcvshort);
 					return (IPPROTO_DONE);
 				}
 				ip = mtod(m, struct ip *);
 				th = (struct tcphdr *)((caddr_t)ip + off0);
 			}
 		}
 #endif
 		optlen = off - sizeof (struct tcphdr);
 		optp = (u_char *)(th + 1);
 	}
 	thflags = tcp_get_flags(th);
 
 	/*
 	 * Convert TCP protocol specific fields to host format.
 	 */
 	tcp_fields_to_host(th);
 
 	/*
 	 * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options.
 	 */
 	drop_hdrlen = off0 + off;
 
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
         if (
 #ifdef INET6
 	    (isipv6 && (m->m_flags & M_IP6_NEXTHOP))
 #ifdef INET
 	    || (!isipv6 && (m->m_flags & M_IP_NEXTHOP))
 #endif
 #endif
 #if defined(INET) && !defined(INET6)
 	    (m->m_flags & M_IP_NEXTHOP)
 #endif
 	    )
 		fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 
 	/*
 	 * For initial SYN packets we don't need write lock on matching
 	 * PCB, be it a listening one or a synchronized one.  The packet
 	 * shall not modify its state.
 	 */
 	lookupflag = (thflags & (TH_ACK|TH_SYN)) == TH_SYN ?
 	    INPLOOKUP_RLOCKPCB : INPLOOKUP_WLOCKPCB;
 findpcb:
 #ifdef INET6
 	if (isipv6 && fwd_tag != NULL) {
 		struct sockaddr_in6 *next_hop6;
 
 		next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1);
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
 		inp = in6_pcblookup_mbuf(&V_tcbinfo,
 		    &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport,
 		    lookupflag, m->m_pkthdr.rcvif, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src,
 			    th->th_sport, &next_hop6->sin6_addr,
 			    next_hop6->sin6_port ? ntohs(next_hop6->sin6_port) :
 			    th->th_dport, INPLOOKUP_WILDCARD | lookupflag,
 			    m->m_pkthdr.rcvif);
 		}
 	} else if (isipv6) {
 		inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src,
 		    th->th_sport, &ip6->ip6_dst, th->th_dport,
 		    INPLOOKUP_WILDCARD | lookupflag, m->m_pkthdr.rcvif, m);
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	if (fwd_tag != NULL) {
 		struct sockaddr_in *next_hop;
 
 		next_hop = (struct sockaddr_in *)(fwd_tag+1);
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * already got one like this?
 		 */
 		inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport,
 		    ip->ip_dst, th->th_dport, lookupflag, m->m_pkthdr.rcvif, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in_pcblookup(&V_tcbinfo, ip->ip_src,
 			    th->th_sport, next_hop->sin_addr,
 			    next_hop->sin_port ? ntohs(next_hop->sin_port) :
 			    th->th_dport, INPLOOKUP_WILDCARD | lookupflag,
 			    m->m_pkthdr.rcvif);
 		}
 	} else
 		inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src,
 		    th->th_sport, ip->ip_dst, th->th_dport,
 		    INPLOOKUP_WILDCARD | lookupflag, m->m_pkthdr.rcvif, m);
 #endif /* INET */
 
 	/*
 	 * If the INPCB does not exist then all data in the incoming
 	 * segment is discarded and an appropriate RST is sent back.
 	 * XXX MRT Send RST using which routing table?
 	 */
 	if (inp == NULL) {
 		/*
 		 * Log communication attempts to ports that are not
 		 * in use.
 		 */
 		if ((V_tcp_log_in_vain == 1 && (thflags & TH_SYN)) ||
 		    V_tcp_log_in_vain == 2) {
 			if ((s = tcp_log_vain(NULL, th, (void *)ip, ip6)))
 				log(LOG_INFO, "%s; %s: Connection attempt "
 				    "to closed port\n", s, __func__);
 		}
 		/*
 		 * When blackholing do not respond with a RST but
 		 * completely ignore the segment and drop it.
 		 */
 		if (((V_blackhole == 1 && (thflags & TH_SYN)) ||
 		    V_blackhole == 2) && (V_blackhole_local ||
 #ifdef INET6
 		    isipv6 ? !in6_localaddr(&ip6->ip6_src) :
 #endif
 #ifdef INET
 		    !in_localip(ip->ip_src)
 #else
 		    true
 #endif
 		    ))
 			goto dropunlock;
 
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
 	INP_LOCK_ASSERT(inp);
 	/*
 	 * While waiting for inp lock during the lookup, another thread
 	 * can have dropped the inpcb, in which case we need to loop back
 	 * and try to find a new inpcb to deliver to.
 	 */
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_UNLOCK(inp);
 		inp = NULL;
 		goto findpcb;
 	}
 	if ((inp->inp_flowtype == M_HASHTYPE_NONE) &&
 	    (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) &&
 	    ((inp->inp_socket == NULL) || !SOLISTENING(inp->inp_socket))) {
 		inp->inp_flowid = m->m_pkthdr.flowid;
 		inp->inp_flowtype = M_HASHTYPE_GET(m);
 	}
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 #ifdef INET6
 	if (isipv6 && IPSEC_ENABLED(ipv6) &&
 	    IPSEC_CHECK_POLICY(ipv6, m, inp) != 0) {
 		goto dropunlock;
 	}
 #ifdef INET
 	else
 #endif
 #endif /* INET6 */
 #ifdef INET
 	if (IPSEC_ENABLED(ipv4) &&
 	    IPSEC_CHECK_POLICY(ipv4, m, inp) != 0) {
 		goto dropunlock;
 	}
 #endif /* INET */
 #endif /* IPSEC */
 
 	/*
 	 * Check the minimum TTL for socket.
 	 */
 	if (inp->inp_ip_minttl != 0) {
 #ifdef INET6
 		if (isipv6) {
 			if (inp->inp_ip_minttl > ip6->ip6_hlim)
 				goto dropunlock;
 		} else
 #endif
 		if (inp->inp_ip_minttl > ip->ip_ttl)
 			goto dropunlock;
 	}
 
 	/*
 	 * A previous connection in TIMEWAIT state is supposed to catch stray
 	 * or duplicate segments arriving late.  If this segment was a
 	 * legitimate new connection attempt, the old INPCB gets removed and
 	 * we can try again to find a listening socket.
 	 */
 	if (inp->inp_flags & INP_TIMEWAIT) {
 		tcp_dooptions(&to, optp, optlen,
 		    (thflags & TH_SYN) ? TO_SYN : 0);
 		/*
 		 * NB: tcp_twcheck unlocks the INP and frees the mbuf.
 		 */
 		if (tcp_twcheck(inp, &to, th, m, tlen))
 			goto findpcb;
 		return (IPPROTO_DONE);
 	}
 	/*
 	 * The TCPCB may no longer exist if the connection is winding
 	 * down or it is in the CLOSED state.  Either way we drop the
 	 * segment and send an appropriate response.
 	 */
 	tp = intotcpcb(inp);
 	if (tp == NULL || tp->t_state == TCPS_CLOSED) {
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
 
 	if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) {
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE) {
 		tcp_offload_input(tp, m);
 		m = NULL;	/* consumed by the TOE driver */
 		goto dropunlock;
 	}
 #endif
 
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, m))
 		goto dropunlock;
 #endif
 	so = inp->inp_socket;
 	KASSERT(so != NULL, ("%s: so == NULL", __func__));
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG) {
 		ostate = tp->t_state;
 #ifdef INET6
 		if (isipv6) {
 			bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6));
 		} else
 #endif
 			bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip));
 		tcp_savetcp = *th;
 	}
 #endif /* TCPDEBUG */
 	/*
 	 * When the socket is accepting connections (the INPCB is in LISTEN
 	 * state) we look into the SYN cache if this is a new connection
 	 * attempt or the completion of a previous one.
 	 */
 	KASSERT(tp->t_state == TCPS_LISTEN || !SOLISTENING(so),
 	    ("%s: so accepting but tp %p not listening", __func__, tp));
 	if (tp->t_state == TCPS_LISTEN && SOLISTENING(so)) {
 		struct in_conninfo inc;
 
 		bzero(&inc, sizeof(inc));
 #ifdef INET6
 		if (isipv6) {
 			inc.inc_flags |= INC_ISIPV6;
 			if (inp->inp_inc.inc_flags & INC_IPV6MINMTU)
 				inc.inc_flags |= INC_IPV6MINMTU;
 			inc.inc6_faddr = ip6->ip6_src;
 			inc.inc6_laddr = ip6->ip6_dst;
 		} else
 #endif
 		{
 			inc.inc_faddr = ip->ip_src;
 			inc.inc_laddr = ip->ip_dst;
 		}
 		inc.inc_fport = th->th_sport;
 		inc.inc_lport = th->th_dport;
 		inc.inc_fibnum = so->so_fibnum;
 
 		/*
 		 * Check for an existing connection attempt in syncache if
 		 * the flag is only ACK.  A successful lookup creates a new
 		 * socket appended to the listen queue in SYN_RECEIVED state.
 		 */
 		if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
 			/*
 			 * Parse the TCP options here because
 			 * syncookies need access to the reflected
 			 * timestamp.
 			 */
 			tcp_dooptions(&to, optp, optlen, 0);
 			/*
 			 * NB: syncache_expand() doesn't unlock
 			 * inp and tcpinfo locks.
 			 */
 			rstreason = syncache_expand(&inc, &to, th, &so, m, port);
 			if (rstreason < 0) {
 				/*
 				 * A failing TCP MD5 signature comparison
 				 * must result in the segment being dropped
 				 * and must not produce any response back
 				 * to the sender.
 				 */
 				goto dropunlock;
 			} else if (rstreason == 0) {
 				/*
 				 * No syncache entry or ACK was not
 				 * for our SYN/ACK.  Send a RST.
 				 * NB: syncache did its own logging
 				 * of the failure cause.
 				 */
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			}
 tfo_socket_result:
 			if (so == NULL) {
 				/*
 				 * We completed the 3-way handshake
 				 * but could not allocate a socket
 				 * either due to memory shortage,
 				 * listen queue length limits or
 				 * global socket limits.  Send RST
 				 * or wait and have the remote end
 				 * retransmit the ACK for another
 				 * try.
 				 */
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 					log(LOG_DEBUG, "%s; %s: Listen socket: "
 					    "Socket allocation failed due to "
 					    "limits or memory shortage, %s\n",
 					    s, __func__,
 					    V_tcp_sc_rst_sock_fail ?
 					    "sending RST" : "try again");
 				if (V_tcp_sc_rst_sock_fail) {
 					rstreason = BANDLIM_UNLIMITED;
 					goto dropwithreset;
 				} else
 					goto dropunlock;
 			}
 			/*
 			 * Socket is created in state SYN_RECEIVED.
 			 * Unlock the listen socket, lock the newly
 			 * created socket and update the tp variable.
 			 * If we came here via jump to tfo_socket_result,
 			 * then listening socket is read-locked.
 			 */
 			INP_UNLOCK(inp);	/* listen socket */
 			inp = sotoinpcb(so);
 			/*
 			 * New connection inpcb is already locked by
 			 * syncache_expand().
 			 */
 			INP_WLOCK_ASSERT(inp);
 			tp = intotcpcb(inp);
 			KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
 			    ("%s: ", __func__));
 			/*
 			 * Process the segment and the data it
 			 * contains.  tcp_do_segment() consumes
 			 * the mbuf chain and unlocks the inpcb.
 			 */
 			TCP_PROBE5(receive, NULL, tp, m, tp, th);
 			tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
 			    iptos);
 			return (IPPROTO_DONE);
 		}
 		/*
 		 * Segment flag validation for new connection attempts:
 		 *
 		 * Our (SYN|ACK) response was rejected.
 		 * Check with syncache and remove entry to prevent
 		 * retransmits.
 		 *
 		 * NB: syncache_chkrst does its own logging of failure
 		 * causes.
 		 */
 		if (thflags & TH_RST) {
 			syncache_chkrst(&inc, th, m, port);
 			goto dropunlock;
 		}
 		/*
 		 * We can't do anything without SYN.
 		 */
 		if ((thflags & TH_SYN) == 0) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Listen socket: "
 				    "SYN is missing, segment ignored\n",
 				    s, __func__);
 			TCPSTAT_INC(tcps_badsyn);
 			goto dropunlock;
 		}
 		/*
 		 * (SYN|ACK) is bogus on a listen socket.
 		 */
 		if (thflags & TH_ACK) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Listen socket: "
 				    "SYN|ACK invalid, segment rejected\n",
 				    s, __func__);
 			syncache_badack(&inc, port);	/* XXX: Not needed! */
 			TCPSTAT_INC(tcps_badsyn);
 			rstreason = BANDLIM_RST_OPENPORT;
 			goto dropwithreset;
 		}
 		/*
 		 * If the drop_synfin option is enabled, drop all
 		 * segments with both the SYN and FIN bits set.
 		 * This prevents e.g. nmap from identifying the
 		 * TCP/IP stack.
 		 * XXX: Poor reasoning.  nmap has other methods
 		 * and is constantly refining its stack detection
 		 * strategies.
 		 * XXX: This is a violation of the TCP specification
 		 * and was used by RFC1644.
 		 */
 		if ((thflags & TH_FIN) && V_drop_synfin) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Listen socket: "
 				    "SYN|FIN segment ignored (based on "
 				    "sysctl setting)\n", s, __func__);
 			TCPSTAT_INC(tcps_badsyn);
 			goto dropunlock;
 		}
 		/*
 		 * Segment's flags are (SYN) or (SYN|FIN).
 		 *
 		 * TH_PUSH, TH_URG, TH_ECE, TH_CWR are ignored
 		 * as they do not affect the state of the TCP FSM.
 		 * The data pointed to by TH_URG and th_urp is ignored.
 		 */
 		KASSERT((thflags & (TH_RST|TH_ACK)) == 0,
 		    ("%s: Listen socket: TH_RST or TH_ACK set", __func__));
 		KASSERT(thflags & (TH_SYN),
 		    ("%s: Listen socket: TH_SYN not set", __func__));
 		INP_RLOCK_ASSERT(inp);
 #ifdef INET6
 		/*
 		 * If deprecated address is forbidden,
 		 * we do not accept SYN to deprecated interface
 		 * address to prevent any new inbound connection from
 		 * getting established.
 		 * When we do not accept SYN, we send a TCP RST,
 		 * with deprecated source address (instead of dropping
 		 * it).  We compromise it as it is much better for peer
 		 * to send a RST, and RST will be the final packet
 		 * for the exchange.
 		 *
 		 * If we do not forbid deprecated addresses, we accept
 		 * the SYN packet.  RFC2462 does not suggest dropping
 		 * SYN in this case.
 		 * If we decipher RFC2462 5.5.4, it says like this:
 		 * 1. use of deprecated addr with existing
 		 *    communication is okay - "SHOULD continue to be
 		 *    used"
 		 * 2. use of it with new communication:
 		 *   (2a) "SHOULD NOT be used if alternate address
 		 *        with sufficient scope is available"
 		 *   (2b) nothing mentioned otherwise.
 		 * Here we fall into (2b) case as we have no choice in
 		 * our source address selection - we must obey the peer.
 		 *
 		 * The wording in RFC2462 is confusing, and there are
 		 * multiple description text for deprecated address
 		 * handling - worse, they are not exactly the same.
 		 * I believe 5.5.4 is the best one, so we follow 5.5.4.
 		 */
 		if (isipv6 && !V_ip6_use_deprecated) {
 			struct in6_ifaddr *ia6;
 
 			ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false);
 			if (ia6 != NULL &&
 			    (ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt to deprecated "
 					"IPv6 address rejected\n",
 					s, __func__);
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			}
 		}
 #endif /* INET6 */
 		/*
 		 * Basic sanity checks on incoming SYN requests:
 		 *   Don't respond if the destination is a link layer
 		 *	broadcast according to RFC1122 4.2.3.10, p. 104.
 		 *   If it is from this socket it must be forged.
 		 *   Don't respond if the source or destination is a
 		 *	global or subnet broad- or multicast address.
 		 *   Note that it is quite possible to receive unicast
 		 *	link-layer packets with a broadcast IP address. Use
 		 *	in_broadcast() to find them.
 		 */
 		if (m->m_flags & (M_BCAST|M_MCAST)) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 			    log(LOG_DEBUG, "%s; %s: Listen socket: "
 				"Connection attempt from broad- or multicast "
 				"link layer address ignored\n", s, __func__);
 			goto dropunlock;
 		}
 #ifdef INET6
 		if (isipv6) {
 			if (th->th_dport == th->th_sport &&
 			    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt to/from self "
 					"ignored\n", s, __func__);
 				goto dropunlock;
 			}
 			if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 			    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to multicast "
 					"address ignored\n", s, __func__);
 				goto dropunlock;
 			}
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			if (th->th_dport == th->th_sport &&
 			    ip->ip_dst.s_addr == ip->ip_src.s_addr) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to self "
 					"ignored\n", s, __func__);
 				goto dropunlock;
 			}
 			if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 			    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 			    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
 			    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to broad- "
 					"or multicast address ignored\n",
 					s, __func__);
 				goto dropunlock;
 			}
 		}
 #endif
 		/*
 		 * SYN appears to be valid.  Create compressed TCP state
 		 * for syncache.
 		 */
 #ifdef TCPDEBUG
 		if (so->so_options & SO_DEBUG)
 			tcp_trace(TA_INPUT, ostate, tp,
 			    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
 		TCP_PROBE3(debug__input, tp, th, m);
 		tcp_dooptions(&to, optp, optlen, TO_SYN);
 		if ((so = syncache_add(&inc, &to, th, inp, so, m, NULL, NULL,
 		    iptos, port)) != NULL)
 			goto tfo_socket_result;
 
 		/*
 		 * Entry added to syncache and mbuf consumed.
 		 * Only the listen socket is unlocked by syncache_add().
 		 */
 		return (IPPROTO_DONE);
 	} else if (tp->t_state == TCPS_LISTEN) {
 		/*
 		 * When a listen socket is torn down the SO_ACCEPTCONN
 		 * flag is removed first while connections are drained
 		 * from the accept queue in a unlock/lock cycle of the
 		 * ACCEPT_LOCK, opening a race condition allowing a SYN
 		 * attempt go through unhandled.
 		 */
 		goto dropunlock;
 	}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	if (tp->t_flags & TF_SIGNATURE) {
 		tcp_dooptions(&to, optp, optlen, thflags);
 		if ((to.to_flags & TOF_SIGNATURE) == 0) {
 			TCPSTAT_INC(tcps_sig_err_nosigopt);
 			goto dropunlock;
 		}
 		if (!TCPMD5_ENABLED() ||
 		    TCPMD5_INPUT(m, th, to.to_signature) != 0)
 			goto dropunlock;
 	}
 #endif
 	TCP_PROBE5(receive, NULL, tp, m, tp, th);
 
 	/*
 	 * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later
 	 * state.  tcp_do_segment() always consumes the mbuf chain, unlocks
 	 * the inpcb, and unlocks pcbinfo.
 	 *
 	 * XXXGL: in case of a pure SYN arriving on existing connection
 	 * TCP stacks won't need to modify the PCB, they would either drop
 	 * the segment silently, or send a challenge ACK.  However, we try
 	 * to upgrade the lock, because calling convention for stacks is
 	 * write-lock on PCB.  If upgrade fails, drop the SYN.
 	 */
 	if (lookupflag == INPLOOKUP_RLOCKPCB && INP_TRY_UPGRADE(inp) == 0)
 		goto dropunlock;
 
 	tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos);
 	return (IPPROTO_DONE);
 
 dropwithreset:
 	TCP_PROBE5(receive, NULL, tp, m, tp, th);
 
 	if (inp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
 		INP_UNLOCK(inp);
 	} else
 		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
 	m = NULL;	/* mbuf chain got consumed. */
 	goto drop;
 
 dropunlock:
 	if (m != NULL)
 		TCP_PROBE5(receive, NULL, tp, m, tp, th);
 
 	if (inp != NULL)
 		INP_UNLOCK(inp);
 
 drop:
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	if (m != NULL)
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 /*
  * Automatic sizing of receive socket buffer.  Often the send
  * buffer size is not optimally adjusted to the actual network
  * conditions at hand (delay bandwidth product).  Setting the
  * buffer size too small limits throughput on links with high
  * bandwidth and high delay (eg. trans-continental/oceanic links).
  *
  * On the receive side the socket buffer memory is only rarely
  * used to any significant extent.  This allows us to be much
  * more aggressive in scaling the receive socket buffer.  For
  * the case that the buffer space is actually used to a large
  * extent and we run out of kernel memory we can simply drop
  * the new segments; TCP on the sender will just retransmit it
  * later.  Setting the buffer size too big may only consume too
  * much kernel memory if the application doesn't read() from
  * the socket or packet loss or reordering makes use of the
  * reassembly queue.
  *
  * The criteria to step up the receive buffer one notch are:
  *  1. Application has not set receive buffer size with
  *     SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
  *  2. the number of bytes received during 1/2 of an sRTT
  *     is at least 3/8 of the current socket buffer size.
  *  3. receive buffer size has not hit maximal automatic size;
  *
  * If all of the criteria are met we increaset the socket buffer
  * by a 1/2 (bounded by the max). This allows us to keep ahead
  * of slow-start but also makes it so our peer never gets limited
  * by our rwnd which we then open up causing a burst.
  *
  * This algorithm does two steps per RTT at most and only if
  * we receive a bulk stream w/o packet losses or reorderings.
  * Shrinking the buffer during idle times is not necessary as
  * it doesn't consume any memory when idle.
  *
  * TODO: Only step up if the application is actually serving
  * the buffer to better manage the socket buffer resources.
  */
 int
 tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int tlen)
 {
 	int newsize = 0;
 
 	if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) &&
 	    tp->t_srtt != 0 && tp->rfbuf_ts != 0 &&
 	    TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) >
 	    ((tp->t_srtt >> TCP_RTT_SHIFT)/2)) {
 		if (tp->rfbuf_cnt > ((so->so_rcv.sb_hiwat / 2)/ 4 * 3) &&
 		    so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) {
 			newsize = min((so->so_rcv.sb_hiwat + (so->so_rcv.sb_hiwat/2)), V_tcp_autorcvbuf_max);
 		}
 		TCP_PROBE6(receive__autoresize, NULL, tp, m, tp, th, newsize);
 
 		/* Start over with next RTT. */
 		tp->rfbuf_ts = 0;
 		tp->rfbuf_cnt = 0;
 	} else {
 		tp->rfbuf_cnt += tlen;	/* add up */
 	}
 	return (newsize);
 }
 
 int
 tcp_input(struct mbuf **mp, int *offp, int proto)
 {
 	return(tcp_input_with_port(mp, offp, proto, 0));
 }
 
 void
 tcp_handle_wakeup(struct tcpcb *tp, struct socket *so)
 {
 	/*
 	 * Since tp might be gone if the session entered
 	 * the TIME_WAIT state before coming here, we need
 	 * to check if the socket is still connected.
 	 */
 	if (tp == NULL) {
 		return;
 	}
 	if (so == NULL) {
 		return;
 	}
 	INP_LOCK_ASSERT(tp->t_inpcb);
 	if (tp->t_flags & TF_WAKESOR) {
 		tp->t_flags &= ~TF_WAKESOR;
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		sorwakeup_locked(so);
 	}
 }
 
 void
 tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
 {
 	uint16_t thflags;
 	int acked, ourfinisacked, needoutput = 0, sack_changed;
 	int rstreason, todrop, win, incforsyn = 0;
 	uint32_t tiwin;
 	uint16_t nsegs;
 	char *s;
 	struct in_conninfo *inc;
 	struct mbuf *mfree;
 	struct tcpopt to;
 	int tfo_syn;
 	u_int maxseg;
 
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 	thflags = tcp_get_flags(th);
 	inc = &tp->t_inpcb->inp_inc;
 	tp->sackhint.last_sack_ack = 0;
 	sack_changed = 0;
 	nsegs = max(1, m->m_pkthdr.lro_nsegs);
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
 	    __func__));
 	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
 	    __func__));
 
 #ifdef TCPPCAP
 	/* Save segment, if requested. */
 	tcp_pcap_add(th, m, &(tp->t_inpkts));
 #endif
 	TCP_LOG_EVENT(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_IN, 0,
 	    tlen, NULL, true);
 
 	if ((thflags & TH_SYN) && (thflags & TH_FIN) && V_drop_synfin) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: "
 			    "SYN|FIN segment ignored (based on "
 			    "sysctl setting)\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 		goto drop;
 	}
 
 	/*
 	 * If a segment with the ACK-bit set arrives in the SYN-SENT state
 	 * check SEQ.ACK first.
 	 */
 	if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) &&
 	    (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) {
 		rstreason = BANDLIM_UNLIMITED;
 		goto dropwithreset;
 	}
 
 	/*
 	 * Segment received on connection.
 	 * Reset idle time and keep-alive timer.
 	 * XXX: This should be done after segment
 	 * validation to ignore broken/spoofed segs.
 	 */
 	tp->t_rcvtime = ticks;
 
 	/*
 	 * Scale up the window into a 32-bit value.
 	 * For the SYN_SENT state the scale is zero.
 	 */
 	tiwin = th->th_win << tp->snd_scale;
 #ifdef STATS
 	stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
 #endif
 
 	/*
 	 * TCP ECN processing.
 	 */
 	if (tcp_ecn_input_segment(tp, thflags, iptos))
 		cc_cong_signal(tp, th, CC_ECN);
 
 	/*
 	 * Parse options on any incoming segment.
 	 */
 	tcp_dooptions(&to, (u_char *)(th + 1),
 	    (th->th_off << 2) - sizeof(struct tcphdr),
 	    (thflags & TH_SYN) ? TO_SYN : 0);
 
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	if ((tp->t_flags & TF_SIGNATURE) != 0 &&
 	    (to.to_flags & TOF_SIGNATURE) == 0) {
 		TCPSTAT_INC(tcps_sig_err_sigopt);
 		/* XXX: should drop? */
 	}
 #endif
 	/*
 	 * If echoed timestamp is later than the current time,
 	 * fall back to non RFC1323 RTT calculation.  Normalize
 	 * timestamp if syncookies were used when this connection
 	 * was established.
 	 */
 	if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
 		to.to_tsecr -= tp->ts_offset;
 		if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
 			to.to_tsecr = 0;
 		else if (tp->t_rxtshift == 1 &&
 			 tp->t_flags & TF_PREVVALID &&
 			 tp->t_badrxtwin != 0 &&
 			 TSTMP_LT(to.to_tsecr, tp->t_badrxtwin))
 			cc_cong_signal(tp, th, CC_RTO_ERR);
 	}
 	/*
 	 * Process options only when we get SYN/ACK back. The SYN case
 	 * for incoming connections is handled in tcp_syncache.
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
 	 * or <SYN,ACK>) segment itself is never scaled.
 	 * XXX this is traditional behavior, may need to be cleaned up.
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 		/* Handle parallel SYN for ECN */
 		tcp_ecn_input_parallel_syn(tp, thflags, iptos);
 		if ((to.to_flags & TOF_SCALE) &&
 		    (tp->t_flags & TF_REQ_SCALE) &&
 		    !(tp->t_flags & TF_NOOPT)) {
 			tp->t_flags |= TF_RCVD_SCALE;
 			tp->snd_scale = to.to_wscale;
 		} else
 			tp->t_flags &= ~TF_REQ_SCALE;
 		/*
 		 * Initial send window.  It will be updated with
 		 * the next incoming segment to the scaled value.
 		 */
 		tp->snd_wnd = th->th_win;
 		if ((to.to_flags & TOF_TS) &&
 		    (tp->t_flags & TF_REQ_TSTMP) &&
 		    !(tp->t_flags & TF_NOOPT)) {
 			tp->t_flags |= TF_RCVD_TSTMP;
 			tp->ts_recent = to.to_tsval;
 			tp->ts_recent_age = tcp_ts_getticks();
 		} else
 			tp->t_flags &= ~TF_REQ_TSTMP;
 		if (to.to_flags & TOF_MSS)
 			tcp_mss(tp, to.to_mss);
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (!(to.to_flags & TOF_SACKPERM) ||
 		    (tp->t_flags & TF_NOOPT)))
 			tp->t_flags &= ~TF_SACK_PERMIT;
 		if (IS_FASTOPEN(tp->t_flags)) {
 			if ((to.to_flags & TOF_FASTOPEN) &&
 			    !(tp->t_flags & TF_NOOPT)) {
 				uint16_t mss;
 
 				if (to.to_flags & TOF_MSS)
 					mss = to.to_mss;
 				else
 					if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
 						mss = TCP6_MSS;
 					else
 						mss = TCP_MSS;
 				tcp_fastopen_update_cache(tp, mss,
 				    to.to_tfo_len, to.to_tfo_cookie);
 			} else
 				tcp_fastopen_disable_path(tp);
 		}
 	}
 
 	/*
 	 * If timestamps were negotiated during SYN/ACK and a
 	 * segment without a timestamp is received, silently drop
 	 * the segment, unless it is a RST segment or missing timestamps are
 	 * tolerated.
 	 * See section 3.2 of RFC 7323.
 	 */
 	if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
 		if (((thflags & TH_RST) != 0) || V_tcp_tolerate_missing_ts) {
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Timestamp missing, "
 				    "segment processed normally\n",
 				    s, __func__);
 				free(s, M_TCPLOG);
 			}
 		} else {
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Timestamp missing, "
 				    "segment silently dropped\n", s, __func__);
 				free(s, M_TCPLOG);
 			}
 			goto drop;
 		}
 	}
 	/*
 	 * If timestamps were not negotiated during SYN/ACK and a
 	 * segment with a timestamp is received, ignore the
 	 * timestamp and process the packet normally.
 	 * See section 3.2 of RFC 7323.
 	 */
 	if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
 			    "segment processed normally\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 	}
 
 	/*
 	 * Header prediction: check for the two common cases
 	 * of a uni-directional data xfer.  If the packet has
 	 * no control flags, is in-sequence, the window didn't
 	 * change and we're not retransmitting, it's a
 	 * candidate.  If the length is zero and the ack moved
 	 * forward, we're the sender side of the xfer.  Just
 	 * free the data acked & wake any higher level process
 	 * that was blocked waiting for space.  If the length
 	 * is non-zero and the ack didn't move, we're the
 	 * receiver side.  If we're getting packets in-order
 	 * (the reassembly queue is empty), add the data to
 	 * the socket buffer and note that we need a delayed ack.
 	 * Make sure that the hidden state-flags are also off.
 	 * Since we check for TCPS_ESTABLISHED first, it can only
 	 * be TH_NEEDSYN.
 	 */
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    th->th_seq == tp->rcv_nxt &&
 	    (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
 	    tp->snd_nxt == tp->snd_max &&
 	    tiwin && tiwin == tp->snd_wnd &&
 	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
 	    SEGQ_EMPTY(tp) &&
 	    ((to.to_flags & TOF_TS) == 0 ||
 	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
 		/*
 		 * If last ACK falls within this segment's sequence numbers,
 		 * record the timestamp.
 		 * NOTE that the test is modified according to the latest
 		 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
 		 */
 		if ((to.to_flags & TOF_TS) != 0 &&
 		    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
 			tp->ts_recent_age = tcp_ts_getticks();
 			tp->ts_recent = to.to_tsval;
 		}
 
 		if (tlen == 0) {
 			if (SEQ_GT(th->th_ack, tp->snd_una) &&
 			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
 			    !IN_RECOVERY(tp->t_flags) &&
 			    (to.to_flags & TOF_SACK) == 0 &&
 			    TAILQ_EMPTY(&tp->snd_holes)) {
 				/*
 				 * This is a pure ack for outstanding data.
 				 */
 				TCPSTAT_INC(tcps_predack);
 
 				/*
 				 * "bad retransmit" recovery without timestamps.
 				 */
 				if ((to.to_flags & TOF_TS) == 0 &&
 				    tp->t_rxtshift == 1 &&
 				    tp->t_flags & TF_PREVVALID &&
 				    tp->t_badrxtwin != 0 &&
 				    TSTMP_LT(ticks, tp->t_badrxtwin)) {
 					cc_cong_signal(tp, th, CC_RTO_ERR);
 				}
 
 				/*
 				 * Recalculate the transmit timer / rtt.
 				 *
 				 * Some boxes send broken timestamp replies
 				 * during the SYN+ACK phase, ignore
 				 * timestamps of 0 or we could calculate a
 				 * huge RTT and blow up the retransmit timer.
 				 */
 				if ((to.to_flags & TOF_TS) != 0 &&
 				    to.to_tsecr) {
 					uint32_t t;
 
 					t = tcp_ts_getticks() - to.to_tsecr;
 					if (!tp->t_rttlow || tp->t_rttlow > t)
 						tp->t_rttlow = t;
 					tcp_xmit_timer(tp,
 					    TCP_TS_TO_TICKS(t) + 1);
 				} else if (tp->t_rtttime &&
 				    SEQ_GT(th->th_ack, tp->t_rtseq)) {
 					if (!tp->t_rttlow ||
 					    tp->t_rttlow > ticks - tp->t_rtttime)
 						tp->t_rttlow = ticks - tp->t_rtttime;
 					tcp_xmit_timer(tp,
 							ticks - tp->t_rtttime);
 				}
 				acked = BYTES_THIS_ACK(tp, th);
 
 #ifdef TCP_HHOOK
 				/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 				hhook_run_tcp_est_in(tp, th, &to);
 #endif
 
 				TCPSTAT_ADD(tcps_rcvackpack, nsegs);
 				TCPSTAT_ADD(tcps_rcvackbyte, acked);
 				sbdrop(&so->so_snd, acked);
 				if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
 				    SEQ_LEQ(th->th_ack, tp->snd_recover))
 					tp->snd_recover = th->th_ack - 1;
 
 				/*
 				 * Let the congestion control algorithm update
 				 * congestion control related information. This
 				 * typically means increasing the congestion
 				 * window.
 				 */
 				cc_ack_received(tp, th, nsegs, CC_ACK);
 
 				tp->snd_una = th->th_ack;
 				/*
 				 * Pull snd_wl2 up to prevent seq wrap relative
 				 * to th_ack.
 				 */
 				tp->snd_wl2 = th->th_ack;
 				tp->t_dupacks = 0;
 				m_freem(m);
 
 				/*
 				 * If all outstanding data are acked, stop
 				 * retransmit timer, otherwise restart timer
 				 * using current (possibly backed-off) value.
 				 * If process is waiting for space,
 				 * wakeup/selwakeup/signal.  If data
 				 * are ready to send, let tcp_output
 				 * decide between more output or persist.
 				 */
 #ifdef TCPDEBUG
 				if (so->so_options & SO_DEBUG)
 					tcp_trace(TA_INPUT, ostate, tp,
 					    (void *)tcp_saveipgen,
 					    &tcp_savetcp, 0);
 #endif
 				TCP_PROBE3(debug__input, tp, th, m);
 				if (tp->snd_una == tp->snd_max)
 					tcp_timer_activate(tp, TT_REXMT, 0);
 				else if (!tcp_timer_active(tp, TT_PERSIST))
 					tcp_timer_activate(tp, TT_REXMT,
 						      tp->t_rxtcur);
 				sowwakeup(so);
 				if (sbavail(&so->so_snd))
 					(void) tcp_output(tp);
 				goto check_delack;
 			}
 		} else if (th->th_ack == tp->snd_una &&
 		    tlen <= sbspace(&so->so_rcv)) {
 			int newsize = 0;	/* automatic sockbuf scaling */
 
 			/*
 			 * This is a pure, in-sequence data packet with
 			 * nothing on the reassembly queue and we have enough
 			 * buffer space to take it.
 			 */
 			/* Clean receiver SACK report if present */
 			if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
 				tcp_clean_sackreport(tp);
 			TCPSTAT_INC(tcps_preddat);
 			tp->rcv_nxt += tlen;
 			if (tlen &&
 			    ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
 			    (tp->t_fbyte_in == 0)) {
 				tp->t_fbyte_in = ticks;
 				if (tp->t_fbyte_in == 0)
 					tp->t_fbyte_in = 1;
 				if (tp->t_fbyte_out && tp->t_fbyte_in)
 					tp->t_flags2 |= TF2_FBYTES_COMPLETE;
 			}
 			/*
 			 * Pull snd_wl1 up to prevent seq wrap relative to
 			 * th_seq.
 			 */
 			tp->snd_wl1 = th->th_seq;
 			/*
 			 * Pull rcv_up up to prevent seq wrap relative to
 			 * rcv_nxt.
 			 */
 			tp->rcv_up = tp->rcv_nxt;
 			TCPSTAT_ADD(tcps_rcvpack, nsegs);
 			TCPSTAT_ADD(tcps_rcvbyte, tlen);
 #ifdef TCPDEBUG
 			if (so->so_options & SO_DEBUG)
 				tcp_trace(TA_INPUT, ostate, tp,
 				    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
 			TCP_PROBE3(debug__input, tp, th, m);
 
 			newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
 
 			/* Add data to socket buffer. */
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 				m_freem(m);
 			} else {
 				/*
 				 * Set new socket buffer size.
 				 * Give up when limit is reached.
 				 */
 				if (newsize)
-					if (!sbreserve_locked(&so->so_rcv,
-					    newsize, so, NULL))
+					if (!sbreserve_locked(so, SO_RCV,
+					    newsize, NULL))
 						so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
 				m_adj(m, drop_hdrlen);	/* delayed header drop */
 				sbappendstream_locked(&so->so_rcv, m, 0);
 			}
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
 			if (DELAY_ACK(tp, tlen)) {
 				tp->t_flags |= TF_DELACK;
 			} else {
 				tp->t_flags |= TF_ACKNOW;
 				tcp_output(tp);
 			}
 			goto check_delack;
 		}
 	}
 
 	/*
 	 * Calculate amount of space in receive window,
 	 * and then do TCP input processing.
 	 * Receive window is amount of space in rcv queue,
 	 * but not less than advertised window.
 	 */
 	win = sbspace(&so->so_rcv);
 	if (win < 0)
 		win = 0;
 	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
 
 	switch (tp->t_state) {
 	/*
 	 * If the state is SYN_RECEIVED:
 	 *	if seg contains an ACK, but not for our SYN/ACK, send a RST.
 	 */
 	case TCPS_SYN_RECEIVED:
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
 		     SEQ_GT(th->th_ack, tp->snd_max))) {
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 		}
 		if (IS_FASTOPEN(tp->t_flags)) {
 			/*
 			 * When a TFO connection is in SYN_RECEIVED, the
 			 * only valid packets are the initial SYN, a
 			 * retransmit/copy of the initial SYN (possibly with
 			 * a subset of the original data), a valid ACK, a
 			 * FIN, or a RST.
 			 */
 			if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			} else if (thflags & TH_SYN) {
 				/* non-initial SYN is ignored */
 				if ((tcp_timer_active(tp, TT_DELACK) ||
 				     tcp_timer_active(tp, TT_REXMT)))
 					goto drop;
 			} else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) {
 				goto drop;
 			}
 		}
 		break;
 
 	/*
 	 * If the state is SYN_SENT:
 	 *	if seg contains a RST with valid ACK (SEQ.ACK has already
 	 *	    been verified), then drop the connection.
 	 *	if seg contains a RST without an ACK, drop the seg.
 	 *	if seg does not contain SYN, then drop the seg.
 	 * Otherwise this is an acceptable SYN segment
 	 *	initialize tp->rcv_nxt and tp->irs
 	 *	if seg contains ack then advance tp->snd_una
 	 *	if seg contains an ECE and ECN support is enabled, the stream
 	 *	    is ECN capable.
 	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
 	 *	arrange for segment to be acked (eventually)
 	 *	continue processing rest of data/controls, beginning with URG
 	 */
 	case TCPS_SYN_SENT:
 		if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) {
 			TCP_PROBE5(connect__refused, NULL, tp,
 			    m, tp, th);
 			tp = tcp_drop(tp, ECONNREFUSED);
 		}
 		if (thflags & TH_RST)
 			goto drop;
 		if (!(thflags & TH_SYN))
 			goto drop;
 
 		tp->irs = th->th_seq;
 		tcp_rcvseqinit(tp);
 		if (thflags & TH_ACK) {
 			int tfo_partial_ack = 0;
 
 			TCPSTAT_INC(tcps_connects);
 			soisconnected(so);
 #ifdef MAC
 			mac_socketpeer_set_from_mbuf(m, so);
 #endif
 			/* Do window scaling on this connection? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 				tp->rcv_scale = tp->request_r_scale;
 			}
 			tp->rcv_adv += min(tp->rcv_wnd,
 			    TCP_MAXWIN << tp->rcv_scale);
 			tp->snd_una++;		/* SYN is acked */
 			/*
 			 * If not all the data that was sent in the TFO SYN
 			 * has been acked, resend the remainder right away.
 			 */
 			if (IS_FASTOPEN(tp->t_flags) &&
 			    (tp->snd_una != tp->snd_max)) {
 				tp->snd_nxt = th->th_ack;
 				tfo_partial_ack = 1;
 			}
 			/*
 			 * If there's data, delay ACK; if there's also a FIN
 			 * ACKNOW will be turned on later.
 			 */
 			if (DELAY_ACK(tp, tlen) && tlen != 0 && !tfo_partial_ack)
 				tcp_timer_activate(tp, TT_DELACK,
 				    tcp_delacktime);
 			else
 				tp->t_flags |= TF_ACKNOW;
 
 			tcp_ecn_input_syn_sent(tp, thflags, iptos);
 
 			/*
 			 * Received <SYN,ACK> in SYN_SENT[*] state.
 			 * Transitions:
 			 *	SYN_SENT  --> ESTABLISHED
 			 *	SYN_SENT* --> FIN_WAIT_1
 			 */
 			tp->t_starttime = ticks;
 			if (tp->t_flags & TF_NEEDFIN) {
 				tcp_state_change(tp, TCPS_FIN_WAIT_1);
 				tp->t_flags &= ~TF_NEEDFIN;
 				thflags &= ~TH_SYN;
 			} else {
 				tcp_state_change(tp, TCPS_ESTABLISHED);
 				TCP_PROBE5(connect__established, NULL, tp,
 				    m, tp, th);
 				cc_conn_init(tp);
 				tcp_timer_activate(tp, TT_KEEP,
 				    TP_KEEPIDLE(tp));
 			}
 		} else {
 			/*
 			 * Received initial SYN in SYN-SENT[*] state =>
 			 * simultaneous open.
 			 * If it succeeds, connection is * half-synchronized.
 			 * Otherwise, do 3-way handshake:
 			 *        SYN-SENT -> SYN-RECEIVED
 			 *        SYN-SENT* -> SYN-RECEIVED*
 			 */
 			tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			tcp_state_change(tp, TCPS_SYN_RECEIVED);
 		}
 
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		/*
 		 * Advance th->th_seq to correspond to first data byte.
 		 * If data, trim to stay within window,
 		 * dropping FIN if necessary.
 		 */
 		th->th_seq++;
 		if (tlen > tp->rcv_wnd) {
 			todrop = tlen - tp->rcv_wnd;
 			m_adj(m, -todrop);
 			tlen = tp->rcv_wnd;
 			thflags &= ~TH_FIN;
 			TCPSTAT_INC(tcps_rcvpackafterwin);
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
 		}
 		tp->snd_wl1 = th->th_seq - 1;
 		tp->rcv_up = th->th_seq;
 		/*
 		 * Client side of transaction: already sent SYN and data.
 		 * If the remote host used T/TCP to validate the SYN,
 		 * our data will be ACK'd; if so, enter normal data segment
 		 * processing in the middle of step 5, ack processing.
 		 * Otherwise, goto step 6.
 		 */
 		if (thflags & TH_ACK)
 			goto process_ACK;
 
 		goto step6;
 
 	/*
 	 * If the state is LAST_ACK or CLOSING or TIME_WAIT:
 	 *      do normal processing.
 	 *
 	 * NB: Leftover from RFC1644 T/TCP.  Cases to be reused later.
 	 */
 	case TCPS_LAST_ACK:
 	case TCPS_CLOSING:
 		break;  /* continue normal processing */
 	}
 
 	/*
 	 * States other than LISTEN or SYN_SENT.
 	 * First check the RST flag and sequence number since reset segments
 	 * are exempt from the timestamp and connection count tests.  This
 	 * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
 	 * below which allowed reset segments in half the sequence space
 	 * to fall though and be processed (which gives forged reset
 	 * segments with a random sequence number a 50 percent chance of
 	 * killing a connection).
 	 * Then check timestamp, if present.
 	 * Then check the connection count, if present.
 	 * Then check that at least some bytes of segment are within
 	 * receive window.  If segment begins before rcv_nxt,
 	 * drop leading data (and SYN); if nothing left, just ack.
 	 */
 	if (thflags & TH_RST) {
 		/*
 		 * RFC5961 Section 3.2
 		 *
 		 * - RST drops connection only if SEG.SEQ == RCV.NXT.
 		 * - If RST is in window, we send challenge ACK.
 		 *
 		 * Note: to take into account delayed ACKs, we should
 		 *   test against last_ack_sent instead of rcv_nxt.
 		 * Note 2: we handle special case of closed window, not
 		 *   covered by the RFC.
 		 */
 		if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
 		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
 		    (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
 			KASSERT(tp->t_state != TCPS_SYN_SENT,
 			    ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
 			    __func__, th, tp));
 
 			if (V_tcp_insecure_rst ||
 			    tp->last_ack_sent == th->th_seq) {
 				TCPSTAT_INC(tcps_drops);
 				/* Drop the connection. */
 				switch (tp->t_state) {
 				case TCPS_SYN_RECEIVED:
 					so->so_error = ECONNREFUSED;
 					goto close;
 				case TCPS_ESTABLISHED:
 				case TCPS_FIN_WAIT_1:
 				case TCPS_FIN_WAIT_2:
 				case TCPS_CLOSE_WAIT:
 				case TCPS_CLOSING:
 				case TCPS_LAST_ACK:
 					so->so_error = ECONNRESET;
 				close:
 					/* FALLTHROUGH */
 				default:
 					tp = tcp_close(tp);
 				}
 			} else {
 				TCPSTAT_INC(tcps_badrst);
 				/* Send challenge ACK. */
 				tcp_respond(tp, mtod(m, void *), th, m,
 				    tp->rcv_nxt, tp->snd_nxt, TH_ACK);
 				tp->last_ack_sent = tp->rcv_nxt;
 				m = NULL;
 			}
 		}
 		goto drop;
 	}
 
 	/*
 	 * RFC5961 Section 4.2
 	 * Send challenge ACK for any SYN in synchronized state.
 	 */
 	if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
 	    tp->t_state != TCPS_SYN_RECEIVED) {
 		TCPSTAT_INC(tcps_badsyn);
 		if (V_tcp_insecure_syn &&
 		    SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
 		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
 			tp = tcp_drop(tp, ECONNRESET);
 			rstreason = BANDLIM_UNLIMITED;
 		} else {
 			/* Send challenge ACK. */
 			tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
 			    tp->snd_nxt, TH_ACK);
 			tp->last_ack_sent = tp->rcv_nxt;
 			m = NULL;
 		}
 		goto drop;
 	}
 
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
 	 * and it's less than ts_recent, drop it.
 	 */
 	if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
 		/* Check to see if ts_recent is over 24 days old.  */
 		if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) {
 			/*
 			 * Invalidate ts_recent.  If this segment updates
 			 * ts_recent, the age will be reset later and ts_recent
 			 * will get a valid value.  If it does not, setting
 			 * ts_recent to zero will at least satisfy the
 			 * requirement that zero be placed in the timestamp
 			 * echo reply when ts_recent isn't valid.  The
 			 * age isn't reset until we get a valid ts_recent
 			 * because we don't want out-of-order segments to be
 			 * dropped when ts_recent is old.
 			 */
 			tp->ts_recent = 0;
 		} else {
 			TCPSTAT_INC(tcps_rcvduppack);
 			TCPSTAT_ADD(tcps_rcvdupbyte, tlen);
 			TCPSTAT_INC(tcps_pawsdrop);
 			if (tlen)
 				goto dropafterack;
 			goto drop;
 		}
 	}
 
 	/*
 	 * In the SYN-RECEIVED state, validate that the packet belongs to
 	 * this connection before trimming the data to fit the receive
 	 * window.  Check the sequence number versus IRS since we know
 	 * the sequence numbers haven't wrapped.  This is a partial fix
 	 * for the "LAND" DoS attack.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
 		rstreason = BANDLIM_RST_OPENPORT;
 		goto dropwithreset;
 	}
 
 	todrop = tp->rcv_nxt - th->th_seq;
 	if (todrop > 0) {
 		if (thflags & TH_SYN) {
 			thflags &= ~TH_SYN;
 			th->th_seq++;
 			if (th->th_urp > 1)
 				th->th_urp--;
 			else
 				thflags &= ~TH_URG;
 			todrop--;
 		}
 		/*
 		 * Following if statement from Stevens, vol. 2, p. 960.
 		 */
 		if (todrop > tlen
 		    || (todrop == tlen && (thflags & TH_FIN) == 0)) {
 			/*
 			 * Any valid FIN must be to the left of the window.
 			 * At this point the FIN must be a duplicate or out
 			 * of sequence; drop it.
 			 */
 			thflags &= ~TH_FIN;
 
 			/*
 			 * Send an ACK to resynchronize and drop any data.
 			 * But keep on processing for RST or ACK.
 			 */
 			tp->t_flags |= TF_ACKNOW;
 			todrop = tlen;
 			TCPSTAT_INC(tcps_rcvduppack);
 			TCPSTAT_ADD(tcps_rcvdupbyte, todrop);
 		} else {
 			TCPSTAT_INC(tcps_rcvpartduppack);
 			TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
 		}
 		/*
 		 * DSACK - add SACK block for dropped range
 		 */
 		if ((todrop > 0) && (tp->t_flags & TF_SACK_PERMIT)) {
 			tcp_update_sack_list(tp, th->th_seq,
 			    th->th_seq + todrop);
 			/*
 			 * ACK now, as the next in-sequence segment
 			 * will clear the DSACK block again
 			 */
 			tp->t_flags |= TF_ACKNOW;
 		}
 		drop_hdrlen += todrop;	/* drop from the top afterwards */
 		th->th_seq += todrop;
 		tlen -= todrop;
 		if (th->th_urp > todrop)
 			th->th_urp -= todrop;
 		else {
 			thflags &= ~TH_URG;
 			th->th_urp = 0;
 		}
 	}
 
 	/*
 	 * If new data are received on a connection after the
 	 * user processes are gone, then RST the other end.
 	 */
 	if ((so->so_state & SS_NOFDREF) &&
 	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
 			    "after socket was closed, "
 			    "sending RST and removing tcpcb\n",
 			    s, __func__, tcpstates[tp->t_state], tlen);
 			free(s, M_TCPLOG);
 		}
 		tp = tcp_close(tp);
 		TCPSTAT_INC(tcps_rcvafterclose);
 		rstreason = BANDLIM_UNLIMITED;
 		goto dropwithreset;
 	}
 
 	/*
 	 * If segment ends after window, drop trailing data
 	 * (and PUSH and FIN); if nothing left, just ACK.
 	 */
 	todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
 	if (todrop > 0) {
 		TCPSTAT_INC(tcps_rcvpackafterwin);
 		if (todrop >= tlen) {
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen);
 			/*
 			 * If window is closed can only take segments at
 			 * window edge, and have to drop data and PUSH from
 			 * incoming segments.  Continue processing, but
 			 * remember to ack.  Otherwise, drop segment
 			 * and ack.
 			 */
 			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
 				tp->t_flags |= TF_ACKNOW;
 				TCPSTAT_INC(tcps_rcvwinprobe);
 			} else
 				goto dropafterack;
 		} else
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
 		m_adj(m, -todrop);
 		tlen -= todrop;
 		thflags &= ~(TH_PUSH|TH_FIN);
 	}
 
 	/*
 	 * If last ACK falls within this segment's sequence numbers,
 	 * record its timestamp.
 	 * NOTE:
 	 * 1) That the test incorporates suggestions from the latest
 	 *    proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 * 2) That updating only on newer timestamps interferes with
 	 *    our earlier PAWS tests, so this check should be solely
 	 *    predicated on the sequence space of this segment.
 	 * 3) That we modify the segment boundary check to be
 	 *        Last.ACK.Sent <= SEG.SEQ + SEG.Len
 	 *    instead of RFC1323's
 	 *        Last.ACK.Sent < SEG.SEQ + SEG.Len,
 	 *    This modified check allows us to overcome RFC1323's
 	 *    limitations as described in Stevens TCP/IP Illustrated
 	 *    Vol. 2 p.869. In such cases, we can still calculate the
 	 *    RTT correctly when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to.to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 		((thflags & (TH_SYN|TH_FIN)) != 0))) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to.to_tsval;
 	}
 
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
 	 * flag is on (half-synchronized state), then queue data for
 	 * later processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_state == TCPS_SYN_RECEIVED ||
 		    (tp->t_flags & TF_NEEDSYN)) {
 			if (tp->t_state == TCPS_SYN_RECEIVED &&
 			    IS_FASTOPEN(tp->t_flags)) {
 				tp->snd_wnd = tiwin;
 				cc_conn_init(tp);
 			}
 			goto step6;
 		} else if (tp->t_flags & TF_ACKNOW)
 			goto dropafterack;
 		else
 			goto drop;
 	}
 
 	/*
 	 * Ack processing.
 	 */
 	switch (tp->t_state) {
 	/*
 	 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
 	 * ESTABLISHED state and continue processing.
 	 * The ACK was checked above.
 	 */
 	case TCPS_SYN_RECEIVED:
 
 		TCPSTAT_INC(tcps_connects);
 		soisconnected(so);
 		/* Do window scaling? */
 		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 			tp->rcv_scale = tp->request_r_scale;
 		}
 		tp->snd_wnd = tiwin;
 		/*
 		 * Make transitions:
 		 *      SYN-RECEIVED  -> ESTABLISHED
 		 *      SYN-RECEIVED* -> FIN-WAIT-1
 		 */
 		tp->t_starttime = ticks;
 		if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) {
 			tcp_fastopen_decrement_counter(tp->t_tfo_pending);
 			tp->t_tfo_pending = NULL;
 		}
 		if (tp->t_flags & TF_NEEDFIN) {
 			tcp_state_change(tp, TCPS_FIN_WAIT_1);
 			tp->t_flags &= ~TF_NEEDFIN;
 		} else {
 			tcp_state_change(tp, TCPS_ESTABLISHED);
 			TCP_PROBE5(accept__established, NULL, tp,
 			    m, tp, th);
 			/*
 			 * TFO connections call cc_conn_init() during SYN
 			 * processing.  Calling it again here for such
 			 * connections is not harmless as it would undo the
 			 * snd_cwnd reduction that occurs when a TFO SYN|ACK
 			 * is retransmitted.
 			 */
 			if (!IS_FASTOPEN(tp->t_flags))
 				cc_conn_init(tp);
 			tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 		}
 		/*
 		 * Account for the ACK of our SYN prior to
 		 * regular ACK processing below, except for
 		 * simultaneous SYN, which is handled later.
 		 */
 		if (SEQ_GT(th->th_ack, tp->snd_una) && !(tp->t_flags & TF_NEEDSYN))
 			incforsyn = 1;
 		/*
 		 * If segment contains data or ACK, will call tcp_reass()
 		 * later; if not, do so now to pass queued data to user.
 		 */
 		if (tlen == 0 && (thflags & TH_FIN) == 0) {
 			(void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
 			    (struct mbuf *)0);
 			tcp_handle_wakeup(tp, so);
 		}
 		tp->snd_wl1 = th->th_seq - 1;
 		/* FALLTHROUGH */
 
 	/*
 	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
 	 * ACKs.  If the ack is in the range
 	 *	tp->snd_una < th->th_ack <= tp->snd_max
 	 * then advance tp->snd_una to th->th_ack and drop
 	 * data from the retransmission queue.  If this ACK reflects
 	 * more up to date window information we update our window information.
 	 */
 	case TCPS_ESTABLISHED:
 	case TCPS_FIN_WAIT_1:
 	case TCPS_FIN_WAIT_2:
 	case TCPS_CLOSE_WAIT:
 	case TCPS_CLOSING:
 	case TCPS_LAST_ACK:
 		if (SEQ_GT(th->th_ack, tp->snd_max)) {
 			TCPSTAT_INC(tcps_rcvacktoomuch);
 			goto dropafterack;
 		}
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    ((to.to_flags & TOF_SACK) ||
 		     !TAILQ_EMPTY(&tp->snd_holes))) {
 			if (((sack_changed = tcp_sack_doack(tp, &to, th->th_ack)) != 0) &&
 			    (tp->t_flags & TF_LRD)) {
 				tcp_sack_lost_retransmission(tp, th);
 			}
 		} else
 			/*
 			 * Reset the value so that previous (valid) value
 			 * from the last ack with SACK doesn't get used.
 			 */
 			tp->sackhint.sacked_bytes = 0;
 
 #ifdef TCP_HHOOK
 		/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 		hhook_run_tcp_est_in(tp, th, &to);
 #endif
 
 		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
 			maxseg = tcp_maxseg(tp);
 			if (tlen == 0 &&
 			    (tiwin == tp->snd_wnd ||
 			    (tp->t_flags & TF_SACK_PERMIT))) {
 				/*
 				 * If this is the first time we've seen a
 				 * FIN from the remote, this is not a
 				 * duplicate and it needs to be processed
 				 * normally.  This happens during a
 				 * simultaneous close.
 				 */
 				if ((thflags & TH_FIN) &&
 				    (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
 					tp->t_dupacks = 0;
 					break;
 				}
 				TCPSTAT_INC(tcps_rcvdupack);
 				/*
 				 * If we have outstanding data (other than
 				 * a window probe), this is a completely
 				 * duplicate ack (ie, window info didn't
 				 * change and FIN isn't set),
 				 * the ack is the biggest we've
 				 * seen and we've seen exactly our rexmt
 				 * threshold of them, assume a packet
 				 * has been dropped and retransmit it.
 				 * Kludge snd_nxt & the congestion
 				 * window so we send only this one
 				 * packet.
 				 *
 				 * We know we're losing at the current
 				 * window size so do congestion avoidance
 				 * (set ssthresh to half the current window
 				 * and pull our congestion window back to
 				 * the new ssthresh).
 				 *
 				 * Dup acks mean that packets have left the
 				 * network (they're now cached at the receiver)
 				 * so bump cwnd by the amount in the receiver
 				 * to keep a constant cwnd packets in the
 				 * network.
 				 *
 				 * When using TCP ECN, notify the peer that
 				 * we reduced the cwnd.
 				 */
 				/*
 				 * Following 2 kinds of acks should not affect
 				 * dupack counting:
 				 * 1) Old acks
 				 * 2) Acks with SACK but without any new SACK
 				 * information in them. These could result from
 				 * any anomaly in the network like a switch
 				 * duplicating packets or a possible DoS attack.
 				 */
 				if (th->th_ack != tp->snd_una ||
 				    ((tp->t_flags & TF_SACK_PERMIT) &&
 				    (to.to_flags & TOF_SACK) &&
 				    !sack_changed))
 					break;
 				else if (!tcp_timer_active(tp, TT_REXMT))
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks > tcprexmtthresh ||
 				     IN_FASTRECOVERY(tp->t_flags)) {
 					cc_ack_received(tp, th, nsegs,
 					    CC_DUPACK);
 					if (V_tcp_do_prr &&
 					    IN_FASTRECOVERY(tp->t_flags)) {
 						tcp_do_prr_ack(tp, th, &to);
 					} else if ((tp->t_flags & TF_SACK_PERMIT) &&
 					    (to.to_flags & TOF_SACK) &&
 					    IN_FASTRECOVERY(tp->t_flags)) {
 						int awnd;
 
 						/*
 						 * Compute the amount of data in flight first.
 						 * We can inject new data into the pipe iff
 						 * we have less than 1/2 the original window's
 						 * worth of data in flight.
 						 */
 						if (V_tcp_do_newsack)
 							awnd = tcp_compute_pipe(tp);
 						else
 							awnd = (tp->snd_nxt - tp->snd_fack) +
 								tp->sackhint.sack_bytes_rexmit;
 
 						if (awnd < tp->snd_ssthresh) {
 							tp->snd_cwnd += maxseg;
 							if (tp->snd_cwnd > tp->snd_ssthresh)
 								tp->snd_cwnd = tp->snd_ssthresh;
 						}
 					} else
 						tp->snd_cwnd += maxseg;
 					(void) tcp_output(tp);
 					goto drop;
 				} else if (tp->t_dupacks == tcprexmtthresh ||
 					    (tp->t_flags & TF_SACK_PERMIT &&
 					     V_tcp_do_newsack &&
 					     tp->sackhint.sacked_bytes >
 					     (tcprexmtthresh - 1) * maxseg)) {
 enter_recovery:
 					/*
 					 * Above is the RFC6675 trigger condition of
 					 * more than (dupthresh-1)*maxseg sacked data.
 					 * If the count of holes in the
 					 * scoreboard is >= dupthresh, we could
 					 * also enter loss recovery, but don't
 					 * have that value readily available.
 					 */
 					tp->t_dupacks = tcprexmtthresh;
 					tcp_seq onxt = tp->snd_nxt;
 
 					/*
 					 * If we're doing sack, or prr, check
 					 * to see if we're already in sack
 					 * recovery. If we're not doing sack,
 					 * check to see if we're in newreno
 					 * recovery.
 					 */
 					if (V_tcp_do_prr ||
 					    (tp->t_flags & TF_SACK_PERMIT)) {
 						if (IN_FASTRECOVERY(tp->t_flags)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					} else {
 						if (SEQ_LEQ(th->th_ack,
 						    tp->snd_recover)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					}
 					/* Congestion signal before ack. */
 					cc_cong_signal(tp, th, CC_NDUPACK);
 					cc_ack_received(tp, th, nsegs,
 					    CC_DUPACK);
 					tcp_timer_activate(tp, TT_REXMT, 0);
 					tp->t_rtttime = 0;
 					if (V_tcp_do_prr) {
 						/*
 						 * snd_ssthresh is already updated by
 						 * cc_cong_signal.
 						 */
 						if ((tp->t_flags & TF_SACK_PERMIT) &&
 						    (to.to_flags & TOF_SACK)) {
 							tp->sackhint.prr_delivered =
 							    tp->sackhint.sacked_bytes;
 						} else {
 							tp->sackhint.prr_delivered =
 							    imin(tp->snd_max - tp->snd_una,
 							    imin(INT_MAX / 65536,
 								tp->t_dupacks) * maxseg);
 						}
 						tp->sackhint.recover_fs = max(1,
 						    tp->snd_nxt - tp->snd_una);
 					}
 					if ((tp->t_flags & TF_SACK_PERMIT) &&
 					    (to.to_flags & TOF_SACK)) {
 						TCPSTAT_INC(
 						    tcps_sack_recovery_episode);
 						tp->snd_recover = tp->snd_nxt;
 						tp->snd_cwnd = maxseg;
 						(void) tcp_output(tp);
 						if (SEQ_GT(th->th_ack, tp->snd_una))
 							goto resume_partialack;
 						goto drop;
 					}
 					tp->snd_nxt = th->th_ack;
 					tp->snd_cwnd = maxseg;
 					(void) tcp_output(tp);
 					KASSERT(tp->snd_limited <= 2,
 					    ("%s: tp->snd_limited too big",
 					    __func__));
 					tp->snd_cwnd = tp->snd_ssthresh +
 					     maxseg *
 					     (tp->t_dupacks - tp->snd_limited);
 					if (SEQ_GT(onxt, tp->snd_nxt))
 						tp->snd_nxt = onxt;
 					goto drop;
 				} else if (V_tcp_do_rfc3042) {
 					/*
 					 * Process first and second duplicate
 					 * ACKs. Each indicates a segment
 					 * leaving the network, creating room
 					 * for more. Make sure we can send a
 					 * packet on reception of each duplicate
 					 * ACK by increasing snd_cwnd by one
 					 * segment. Restore the original
 					 * snd_cwnd after packet transmission.
 					 */
 					cc_ack_received(tp, th, nsegs,
 					    CC_DUPACK);
 					uint32_t oldcwnd = tp->snd_cwnd;
 					tcp_seq oldsndmax = tp->snd_max;
 					u_int sent;
 					int avail;
 
 					KASSERT(tp->t_dupacks == 1 ||
 					    tp->t_dupacks == 2,
 					    ("%s: dupacks not 1 or 2",
 					    __func__));
 					if (tp->t_dupacks == 1)
 						tp->snd_limited = 0;
 					tp->snd_cwnd =
 					    (tp->snd_nxt - tp->snd_una) +
 					    (tp->t_dupacks - tp->snd_limited) *
 					    maxseg;
 					/*
 					 * Only call tcp_output when there
 					 * is new data available to be sent.
 					 * Otherwise we would send pure ACKs.
 					 */
 					SOCKBUF_LOCK(&so->so_snd);
 					avail = sbavail(&so->so_snd) -
 					    (tp->snd_nxt - tp->snd_una);
 					SOCKBUF_UNLOCK(&so->so_snd);
 					if (avail > 0)
 						(void) tcp_output(tp);
 					sent = tp->snd_max - oldsndmax;
 					if (sent > maxseg) {
 						KASSERT((tp->t_dupacks == 2 &&
 						    tp->snd_limited == 0) ||
 						   (sent == maxseg + 1 &&
 						    tp->t_flags & TF_SENTFIN),
 						    ("%s: sent too much",
 						    __func__));
 						tp->snd_limited = 2;
 					} else if (sent > 0)
 						++tp->snd_limited;
 					tp->snd_cwnd = oldcwnd;
 					goto drop;
 				}
 			}
 			break;
 		} else {
 			/*
 			 * This ack is advancing the left edge, reset the
 			 * counter.
 			 */
 			tp->t_dupacks = 0;
 			/*
 			 * If this ack also has new SACK info, increment the
 			 * counter as per rfc6675. The variable
 			 * sack_changed tracks all changes to the SACK
 			 * scoreboard, including when partial ACKs without
 			 * SACK options are received, and clear the scoreboard
 			 * from the left side. Such partial ACKs should not be
 			 * counted as dupacks here.
 			 */
 			if ((tp->t_flags & TF_SACK_PERMIT) &&
 			    (to.to_flags & TOF_SACK) &&
 			    sack_changed) {
 				tp->t_dupacks++;
 				/* limit overhead by setting maxseg last */
 				if (!IN_FASTRECOVERY(tp->t_flags) &&
 				    (tp->sackhint.sacked_bytes >
 				    ((tcprexmtthresh - 1) *
 				    (maxseg = tcp_maxseg(tp))))) {
 					goto enter_recovery;
 				}
 			}
 		}
 
 resume_partialack:
 		KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
 		    ("%s: th_ack <= snd_una", __func__));
 
 		/*
 		 * If the congestion window was inflated to account
 		 * for the other side's cached packets, retract it.
 		 */
 		if (IN_FASTRECOVERY(tp->t_flags)) {
 			if (SEQ_LT(th->th_ack, tp->snd_recover)) {
 				if (tp->t_flags & TF_SACK_PERMIT)
 					if (V_tcp_do_prr && to.to_flags & TOF_SACK) {
 						tcp_timer_activate(tp, TT_REXMT, 0);
 						tp->t_rtttime = 0;
 						tcp_do_prr_ack(tp, th, &to);
 						tp->t_flags |= TF_ACKNOW;
 						(void) tcp_output(tp);
 					} else
 						tcp_sack_partialack(tp, th);
 				else
 					tcp_newreno_partial_ack(tp, th);
 			} else
 				cc_post_recovery(tp, th);
 		} else if (IN_CONGRECOVERY(tp->t_flags)) {
 			if (SEQ_LT(th->th_ack, tp->snd_recover)) {
 				if (V_tcp_do_prr) {
 					tp->sackhint.delivered_data = BYTES_THIS_ACK(tp, th);
 					tp->snd_fack = th->th_ack;
 					tcp_do_prr_ack(tp, th, &to);
 					(void) tcp_output(tp);
 				}
 			} else
 				cc_post_recovery(tp, th);
 		}
 		/*
 		 * If we reach this point, ACK is not a duplicate,
 		 *     i.e., it ACKs something we sent.
 		 */
 		if (tp->t_flags & TF_NEEDSYN) {
 			/*
 			 * T/TCP: Connection was half-synchronized, and our
 			 * SYN has been ACK'd (so connection is now fully
 			 * synchronized).  Go to non-starred state,
 			 * increment snd_una for ACK of SYN, and check if
 			 * we can do window scaling.
 			 */
 			tp->t_flags &= ~TF_NEEDSYN;
 			tp->snd_una++;
 			/* Do window scaling? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 				tp->rcv_scale = tp->request_r_scale;
 				/* Send window already scaled. */
 			}
 		}
 
 process_ACK:
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		/*
 		 * Adjust for the SYN bit in sequence space,
 		 * but don't account for it in cwnd calculations.
 		 * This is for the SYN_RECEIVED, non-simultaneous
 		 * SYN case. SYN_SENT and simultaneous SYN are
 		 * treated elsewhere.
 		 */
 		if (incforsyn)
 			tp->snd_una++;
 		acked = BYTES_THIS_ACK(tp, th);
 		KASSERT(acked >= 0, ("%s: acked unexepectedly negative "
 		    "(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__,
 		    tp->snd_una, th->th_ack, tp, m));
 		TCPSTAT_ADD(tcps_rcvackpack, nsegs);
 		TCPSTAT_ADD(tcps_rcvackbyte, acked);
 
 		/*
 		 * If we just performed our first retransmit, and the ACK
 		 * arrives within our recovery window, then it was a mistake
 		 * to do the retransmit in the first place.  Recover our
 		 * original cwnd and ssthresh, and proceed to transmit where
 		 * we left off.
 		 */
 		if (tp->t_rxtshift == 1 &&
 		    tp->t_flags & TF_PREVVALID &&
 		    tp->t_badrxtwin != 0 &&
 		    to.to_flags & TOF_TS &&
 		    to.to_tsecr != 0 &&
 		    TSTMP_LT(to.to_tsecr, tp->t_badrxtwin))
 			cc_cong_signal(tp, th, CC_RTO_ERR);
 
 		/*
 		 * If we have a timestamp reply, update smoothed
 		 * round trip time.  If no timestamp is present but
 		 * transmit timer is running and timed sequence
 		 * number was acked, update smoothed round trip time.
 		 * Since we now have an rtt measurement, cancel the
 		 * timer backoff (cf., Phil Karn's retransmit alg.).
 		 * Recompute the initial retransmit timer.
 		 *
 		 * Some boxes send broken timestamp replies
 		 * during the SYN+ACK phase, ignore
 		 * timestamps of 0 or we could calculate a
 		 * huge RTT and blow up the retransmit timer.
 		 */
 		if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) {
 			uint32_t t;
 
 			t = tcp_ts_getticks() - to.to_tsecr;
 			if (!tp->t_rttlow || tp->t_rttlow > t)
 				tp->t_rttlow = t;
 			tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1);
 		} else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) {
 			if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime)
 				tp->t_rttlow = ticks - tp->t_rtttime;
 			tcp_xmit_timer(tp, ticks - tp->t_rtttime);
 		}
 
 		/*
 		 * If all outstanding data is acked, stop retransmit
 		 * timer and remember to restart (more output or persist).
 		 * If there is more data to be acked, restart retransmit
 		 * timer, using current (possibly backed-off) value.
 		 */
 		if (th->th_ack == tp->snd_max) {
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			needoutput = 1;
 		} else if (!tcp_timer_active(tp, TT_PERSIST))
 			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 
 		/*
 		 * If no data (only SYN) was ACK'd,
 		 *    skip rest of ACK processing.
 		 */
 		if (acked == 0)
 			goto step6;
 
 		/*
 		 * Let the congestion control algorithm update congestion
 		 * control related information. This typically means increasing
 		 * the congestion window.
 		 */
 		cc_ack_received(tp, th, nsegs, CC_ACK);
 
 		SOCKBUF_LOCK(&so->so_snd);
 		if (acked > sbavail(&so->so_snd)) {
 			if (tp->snd_wnd >= sbavail(&so->so_snd))
 				tp->snd_wnd -= sbavail(&so->so_snd);
 			else
 				tp->snd_wnd = 0;
 			mfree = sbcut_locked(&so->so_snd,
 			    (int)sbavail(&so->so_snd));
 			ourfinisacked = 1;
 		} else {
 			mfree = sbcut_locked(&so->so_snd, acked);
 			if (tp->snd_wnd >= (uint32_t) acked)
 				tp->snd_wnd -= acked;
 			else
 				tp->snd_wnd = 0;
 			ourfinisacked = 0;
 		}
 		/* NB: sowwakeup_locked() does an implicit unlock. */
 		sowwakeup_locked(so);
 		m_freem(mfree);
 		/* Detect una wraparound. */
 		if (!IN_RECOVERY(tp->t_flags) &&
 		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
 		/* XXXLAS: Can this be moved up into cc_post_recovery? */
 		if (IN_RECOVERY(tp->t_flags) &&
 		    SEQ_GEQ(th->th_ack, tp->snd_recover)) {
 			EXIT_RECOVERY(tp->t_flags);
 		}
 		tp->snd_una = th->th_ack;
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (SEQ_GT(tp->snd_una, tp->snd_recover))
 				tp->snd_recover = tp->snd_una;
 		}
 		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
 			tp->snd_nxt = tp->snd_una;
 
 		switch (tp->t_state) {
 		/*
 		 * In FIN_WAIT_1 STATE in addition to the processing
 		 * for the ESTABLISHED state if our FIN is now acknowledged
 		 * then enter FIN_WAIT_2.
 		 */
 		case TCPS_FIN_WAIT_1:
 			if (ourfinisacked) {
 				/*
 				 * If we can't receive any more
 				 * data, then closing user can proceed.
 				 * Starting the timer is contrary to the
 				 * specification, but if we don't get a FIN
 				 * we'll hang forever.
 				 *
 				 * XXXjl:
 				 * we should release the tp also, and use a
 				 * compressed state.
 				 */
 				if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 					soisdisconnected(so);
 					tcp_timer_activate(tp, TT_2MSL,
 					    (tcp_fast_finwait2_recycle ?
 					    tcp_finwait2_timeout :
 					    TP_MAXIDLE(tp)));
 				}
 				tcp_state_change(tp, TCPS_FIN_WAIT_2);
 			}
 			break;
 
 		/*
 		 * In CLOSING STATE in addition to the processing for
 		 * the ESTABLISHED state if the ACK acknowledges our FIN
 		 * then enter the TIME-WAIT state, otherwise ignore
 		 * the segment.
 		 */
 		case TCPS_CLOSING:
 			if (ourfinisacked) {
 				tcp_twstart(tp);
 				m_freem(m);
 				return;
 			}
 			break;
 
 		/*
 		 * In LAST_ACK, we may still be waiting for data to drain
 		 * and/or to be acked, as well as for the ack of our FIN.
 		 * If our FIN is now acknowledged, delete the TCB,
 		 * enter the closed state and return.
 		 */
 		case TCPS_LAST_ACK:
 			if (ourfinisacked) {
 				tp = tcp_close(tp);
 				goto drop;
 			}
 			break;
 		}
 	}
 
 step6:
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Update window information.
 	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
 	 */
 	if ((thflags & TH_ACK) &&
 	    (SEQ_LT(tp->snd_wl1, th->th_seq) ||
 	    (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
 	     (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
 		/* keep track of pure window updates */
 		if (tlen == 0 &&
 		    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
 			TCPSTAT_INC(tcps_rcvwinupd);
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
 		tp->snd_wl2 = th->th_ack;
 		if (tp->snd_wnd > tp->max_sndwnd)
 			tp->max_sndwnd = tp->snd_wnd;
 		needoutput = 1;
 	}
 
 	/*
 	 * Process segments with URG.
 	 */
 	if ((thflags & TH_URG) && th->th_urp &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		/*
 		 * This is a kludge, but if we receive and accept
 		 * random urgent pointers, we'll crash in
 		 * soreceive.  It's hard to imagine someone
 		 * actually wanting to send this much urgent data.
 		 */
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (th->th_urp + sbavail(&so->so_rcv) > sb_max) {
 			th->th_urp = 0;			/* XXX */
 			thflags &= ~TH_URG;		/* XXX */
 			SOCKBUF_UNLOCK(&so->so_rcv);	/* XXX */
 			goto dodata;			/* XXX */
 		}
 		/*
 		 * If this segment advances the known urgent pointer,
 		 * then mark the data stream.  This should not happen
 		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
 		 * a FIN has been received from the remote side.
 		 * In these states we ignore the URG.
 		 *
 		 * According to RFC961 (Assigned Protocols),
 		 * the urgent pointer points to the last octet
 		 * of urgent data.  We continue, however,
 		 * to consider it to indicate the first octet
 		 * of data past the urgent section as the original
 		 * spec states (in one of two places).
 		 */
 		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
 			tp->rcv_up = th->th_seq + th->th_urp;
 			so->so_oobmark = sbavail(&so->so_rcv) +
 			    (tp->rcv_up - tp->rcv_nxt) - 1;
 			if (so->so_oobmark == 0)
 				so->so_rcv.sb_state |= SBS_RCVATMARK;
 			sohasoutofband(so);
 			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 		}
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		/*
 		 * Remove out of band data so doesn't get presented to user.
 		 * This can happen independent of advancing the URG pointer,
 		 * but if two URG's are pending at once, some out-of-band
 		 * data may creep in... ick.
 		 */
 		if (th->th_urp <= (uint32_t)tlen &&
 		    !(so->so_options & SO_OOBINLINE)) {
 			/* hdr drop is delayed */
 			tcp_pulloutofband(so, th, m, drop_hdrlen);
 		}
 	} else {
 		/*
 		 * If no out of band data is expected,
 		 * pull receive urgent pointer along
 		 * with the receive window.
 		 */
 		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
 			tp->rcv_up = tp->rcv_nxt;
 	}
 dodata:							/* XXX */
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Process the segment text, merging it into the TCP sequencing queue,
 	 * and arranging for acknowledgment of receipt if necessary.
 	 * This process logically involves adjusting tp->rcv_wnd as data
 	 * is presented to the user (this happens in tcp_usrreq.c,
 	 * case PRU_RCVD).  If a FIN has already been received on this
 	 * connection then we just ignore the text.
 	 */
 	tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
 		   IS_FASTOPEN(tp->t_flags));
 	if ((tlen || (thflags & TH_FIN) || (tfo_syn && tlen > 0)) &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		tcp_seq save_start = th->th_seq;
 		tcp_seq save_rnxt  = tp->rcv_nxt;
 		int     save_tlen  = tlen;
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
 		/*
 		 * Insert segment which includes th into TCP reassembly queue
 		 * with control block tp.  Set thflags to whether reassembly now
 		 * includes a segment with FIN.  This handles the common case
 		 * inline (segment is the next to be received on an established
 		 * connection, and the queue is empty), avoiding linkage into
 		 * and removal from the queue and repetition of various
 		 * conversions.
 		 * Set DELACK for segments received in order, but ack
 		 * immediately when segments are out of order (so
 		 * fast retransmit can work).
 		 */
 		if (th->th_seq == tp->rcv_nxt &&
 		    SEGQ_EMPTY(tp) &&
 		    (TCPS_HAVEESTABLISHED(tp->t_state) ||
 		     tfo_syn)) {
 			if (DELAY_ACK(tp, tlen) || tfo_syn)
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt += tlen;
 			if (tlen &&
 			    ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
 			    (tp->t_fbyte_in == 0)) {
 				tp->t_fbyte_in = ticks;
 				if (tp->t_fbyte_in == 0)
 					tp->t_fbyte_in = 1;
 				if (tp->t_fbyte_out && tp->t_fbyte_in)
 					tp->t_flags2 |= TF2_FBYTES_COMPLETE;
 			}
 			thflags = tcp_get_flags(th) & TH_FIN;
 			TCPSTAT_INC(tcps_rcvpack);
 			TCPSTAT_ADD(tcps_rcvbyte, tlen);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				m_freem(m);
 			else
 				sbappendstream_locked(&so->so_rcv, m, 0);
 			tp->t_flags |= TF_WAKESOR;
 		} else {
 			/*
 			 * XXX: Due to the header drop above "th" is
 			 * theoretically invalid by now.  Fortunately
 			 * m_adj() doesn't actually frees any mbufs
 			 * when trimming from the head.
 			 */
 			tcp_seq temp = save_start;
 
 			thflags = tcp_reass(tp, th, &temp, &tlen, m);
 			tp->t_flags |= TF_ACKNOW;
 		}
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (save_tlen > 0) &&
 		    TCPS_HAVEESTABLISHED(tp->t_state)) {
 			if ((tlen == 0) && (SEQ_LT(save_start, save_rnxt))) {
 				/*
 				 * DSACK actually handled in the fastpath
 				 * above.
 				 */
 				tcp_update_sack_list(tp, save_start,
 				    save_start + save_tlen);
 			} else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
 				if ((tp->rcv_numsacks >= 1) &&
 				    (tp->sackblks[0].end == save_start)) {
 					/*
 					 * Partial overlap, recorded at todrop
 					 * above.
 					 */
 					tcp_update_sack_list(tp,
 					    tp->sackblks[0].start,
 					    tp->sackblks[0].end);
 				} else {
 					tcp_update_dsack_list(tp, save_start,
 					    save_start + save_tlen);
 				}
 			} else if (tlen >= save_tlen) {
 				/* Update of sackblks. */
 				tcp_update_dsack_list(tp, save_start,
 				    save_start + save_tlen);
 			} else if (tlen > 0) {
 				tcp_update_dsack_list(tp, save_start,
 				    save_start + tlen);
 			}
 		}
 		tcp_handle_wakeup(tp, so);
 #if 0
 		/*
 		 * Note the amount of data that peer has sent into
 		 * our window, in order to estimate the sender's
 		 * buffer size.
 		 * XXX: Unused.
 		 */
 		if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
 			len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
 		else
 			len = so->so_rcv.sb_hiwat;
 #endif
 	} else {
 		m_freem(m);
 		thflags &= ~TH_FIN;
 	}
 
 	/*
 	 * If FIN is received ACK the FIN and let the user know
 	 * that the connection is closing.
 	 */
 	if (thflags & TH_FIN) {
 		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 			/* The socket upcall is handled by socantrcvmore. */
 			socantrcvmore(so);
 			/*
 			 * If connection is half-synchronized
 			 * (ie NEEDSYN flag on) then delay ACK,
 			 * so it may be piggybacked when SYN is sent.
 			 * Otherwise, since we received a FIN then no
 			 * more input can be expected, send ACK now.
 			 */
 			if (tp->t_flags & TF_NEEDSYN)
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt++;
 		}
 		switch (tp->t_state) {
 		/*
 		 * In SYN_RECEIVED and ESTABLISHED STATES
 		 * enter the CLOSE_WAIT state.
 		 */
 		case TCPS_SYN_RECEIVED:
 			tp->t_starttime = ticks;
 			/* FALLTHROUGH */
 		case TCPS_ESTABLISHED:
 			tcp_state_change(tp, TCPS_CLOSE_WAIT);
 			break;
 
 		/*
 		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
 		 * enter the CLOSING state.
 		 */
 		case TCPS_FIN_WAIT_1:
 			tcp_state_change(tp, TCPS_CLOSING);
 			break;
 
 		/*
 		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
 		 * starting the time-wait timer, turning off the other
 		 * standard timers.
 		 */
 		case TCPS_FIN_WAIT_2:
 			tcp_twstart(tp);
 			return;
 		}
 	}
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, m);
 
 	/*
 	 * Return any desired output.
 	 */
 	if (needoutput || (tp->t_flags & TF_ACKNOW))
 		(void) tcp_output(tp);
 
 check_delack:
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 	INP_WUNLOCK(tp->t_inpcb);
 	return;
 
 dropafterack:
 	/*
 	 * Generate an ACK dropping incoming segment if it occupies
 	 * sequence space, where the ACK reflects our state.
 	 *
 	 * We can now skip the test for the RST flag since all
 	 * paths to this code happen after packets containing
 	 * RST have been dropped.
 	 *
 	 * In the SYN-RECEIVED state, don't send an ACK unless the
 	 * segment we received passes the SYN-RECEIVED ACK test.
 	 * If it fails send a RST.  This breaks the loop in the
 	 * "LAND" DoS attack, and also prevents an ACK storm
 	 * between two listening ports that have been sent forged
 	 * SYN segments, each with the source address of the other.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
 	    (SEQ_GT(tp->snd_una, th->th_ack) ||
 	     SEQ_GT(th->th_ack, tp->snd_max)) ) {
 		rstreason = BANDLIM_RST_OPENPORT;
 		goto dropwithreset;
 	}
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, m);
 	tp->t_flags |= TF_ACKNOW;
 	(void) tcp_output(tp);
 	INP_WUNLOCK(tp->t_inpcb);
 	m_freem(m);
 	return;
 
 dropwithreset:
 	if (tp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
 		INP_WUNLOCK(tp->t_inpcb);
 	} else
 		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
 	return;
 
 drop:
 	/*
 	 * Drop space held by incoming segment and return.
 	 */
 #ifdef TCPDEBUG
 	if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, m);
 	if (tp != NULL) {
 		INP_WUNLOCK(tp->t_inpcb);
 	}
 	m_freem(m);
 }
 
 /*
  * Issue RST and make ACK acceptable to originator of segment.
  * The mbuf must still include the original packet header.
  * tp may be NULL.
  */
 void
 tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
     int tlen, int rstreason)
 {
 #ifdef INET
 	struct ip *ip;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 
 	if (tp != NULL) {
 		INP_LOCK_ASSERT(tp->t_inpcb);
 	}
 
 	/* Don't bother if destination was broadcast/multicast. */
 	if ((tcp_get_flags(th) & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
 		goto drop;
 #ifdef INET6
 	if (mtod(m, struct ip *)->ip_v == 6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
 			goto drop;
 		/* IPv6 anycast check is done at tcp6_input() */
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		ip = mtod(m, struct ip *);
 		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 		    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 		    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
 		    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
 			goto drop;
 	}
 #endif
 
 	/* Perform bandwidth limiting. */
 	if (badport_bandlim(rstreason) < 0)
 		goto drop;
 
 	/* tcp_respond consumes the mbuf chain. */
 	if (tcp_get_flags(th) & TH_ACK) {
 		tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0,
 		    th->th_ack, TH_RST);
 	} else {
 		if (tcp_get_flags(th) & TH_SYN)
 			tlen++;
 		if (tcp_get_flags(th) & TH_FIN)
 			tlen++;
 		tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
 		    (tcp_seq)0, TH_RST|TH_ACK);
 	}
 	return;
 drop:
 	m_freem(m);
 }
 
 /*
  * Parse TCP options and place in tcpopt.
  */
 void
 tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
 {
 	int opt, optlen;
 
 	to->to_flags = 0;
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < 2)
 				break;
 			optlen = cp[1];
 			if (optlen < 2 || optlen > cnt)
 				break;
 		}
 		switch (opt) {
 		case TCPOPT_MAXSEG:
 			if (optlen != TCPOLEN_MAXSEG)
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			to->to_flags |= TOF_MSS;
 			bcopy((char *)cp + 2,
 			    (char *)&to->to_mss, sizeof(to->to_mss));
 			to->to_mss = ntohs(to->to_mss);
 			break;
 		case TCPOPT_WINDOW:
 			if (optlen != TCPOLEN_WINDOW)
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			to->to_flags |= TOF_SCALE;
 			to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT);
 			break;
 		case TCPOPT_TIMESTAMP:
 			if (optlen != TCPOLEN_TIMESTAMP)
 				continue;
 			to->to_flags |= TOF_TS;
 			bcopy((char *)cp + 2,
 			    (char *)&to->to_tsval, sizeof(to->to_tsval));
 			to->to_tsval = ntohl(to->to_tsval);
 			bcopy((char *)cp + 6,
 			    (char *)&to->to_tsecr, sizeof(to->to_tsecr));
 			to->to_tsecr = ntohl(to->to_tsecr);
 			break;
 		case TCPOPT_SIGNATURE:
 			/*
 			 * In order to reply to a host which has set the
 			 * TCP_SIGNATURE option in its initial SYN, we have
 			 * to record the fact that the option was observed
 			 * here for the syncache code to perform the correct
 			 * response.
 			 */
 			if (optlen != TCPOLEN_SIGNATURE)
 				continue;
 			to->to_flags |= TOF_SIGNATURE;
 			to->to_signature = cp + 2;
 			break;
 		case TCPOPT_SACK_PERMITTED:
 			if (optlen != TCPOLEN_SACK_PERMITTED)
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			if (!V_tcp_do_sack)
 				continue;
 			to->to_flags |= TOF_SACKPERM;
 			break;
 		case TCPOPT_SACK:
 			if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
 				continue;
 			if (flags & TO_SYN)
 				continue;
 			to->to_flags |= TOF_SACK;
 			to->to_nsacks = (optlen - 2) / TCPOLEN_SACK;
 			to->to_sacks = cp + 2;
 			TCPSTAT_INC(tcps_sack_rcv_blocks);
 			break;
 		case TCPOPT_FAST_OPEN:
 			/*
 			 * Cookie length validation is performed by the
 			 * server side cookie checking code or the client
 			 * side cookie cache update code.
 			 */
 			if (!(flags & TO_SYN))
 				continue;
 			if (!V_tcp_fastopen_client_enable &&
 			    !V_tcp_fastopen_server_enable)
 				continue;
 			to->to_flags |= TOF_FASTOPEN;
 			to->to_tfo_len = optlen - 2;
 			to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
 			break;
 		default:
 			continue;
 		}
 	}
 }
 
 /*
  * Pull out of band byte out of a segment so
  * it doesn't appear in the user's data queue.
  * It is still reflected in the segment length for
  * sequencing purposes.
  */
 void
 tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m,
     int off)
 {
 	int cnt = off + th->th_urp - 1;
 
 	while (cnt >= 0) {
 		if (m->m_len > cnt) {
 			char *cp = mtod(m, caddr_t) + cnt;
 			struct tcpcb *tp = sototcpcb(so);
 
 			INP_WLOCK_ASSERT(tp->t_inpcb);
 
 			tp->t_iobc = *cp;
 			tp->t_oobflags |= TCPOOB_HAVEDATA;
 			bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
 			m->m_len--;
 			if (m->m_flags & M_PKTHDR)
 				m->m_pkthdr.len--;
 			return;
 		}
 		cnt -= m->m_len;
 		m = m->m_next;
 		if (m == NULL)
 			break;
 	}
 	panic("tcp_pulloutofband");
 }
 
 /*
  * Collect new round-trip time estimate
  * and update averages and current timeout.
  */
 void
 tcp_xmit_timer(struct tcpcb *tp, int rtt)
 {
 	int delta;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	TCPSTAT_INC(tcps_rttupdated);
 	tp->t_rttupdated++;
 #ifdef STATS
 	stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT,
 	    imax(0, rtt * 1000 / hz));
 #endif
 	if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) {
 		/*
 		 * srtt is stored as fixed point with 5 bits after the
 		 * binary point (i.e., scaled by 8).  The following magic
 		 * is equivalent to the smoothing algorithm in rfc793 with
 		 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
 		 * point).  Adjust rtt to origin 0.
 		 */
 		delta = ((rtt - 1) << TCP_DELTA_SHIFT)
 			- (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
 
 		if ((tp->t_srtt += delta) <= 0)
 			tp->t_srtt = 1;
 
 		/*
 		 * We accumulate a smoothed rtt variance (actually, a
 		 * smoothed mean difference), then set the retransmit
 		 * timer to smoothed rtt + 4 times the smoothed variance.
 		 * rttvar is stored as fixed point with 4 bits after the
 		 * binary point (scaled by 16).  The following is
 		 * equivalent to rfc793 smoothing with an alpha of .75
 		 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
 		 * rfc793's wired-in beta.
 		 */
 		if (delta < 0)
 			delta = -delta;
 		delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
 		if ((tp->t_rttvar += delta) <= 0)
 			tp->t_rttvar = 1;
 		if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar)
 		    tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
 	} else {
 		/*
 		 * No rtt measurement yet - use the unsmoothed rtt.
 		 * Set the variance to half the rtt (so our first
 		 * retransmit happens at 3*rtt).
 		 */
 		tp->t_srtt = rtt << TCP_RTT_SHIFT;
 		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
 		tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
 	}
 	tp->t_rtttime = 0;
 	tp->t_rxtshift = 0;
 
 	/*
 	 * the retransmit should happen at rtt + 4 * rttvar.
 	 * Because of the way we do the smoothing, srtt and rttvar
 	 * will each average +1/2 tick of bias.  When we compute
 	 * the retransmit timer, we want 1/2 tick of rounding and
 	 * 1 extra tick because of +-1/2 tick uncertainty in the
 	 * firing of the timer.  The bias will give us exactly the
 	 * 1.5 tick we need.  But, because the bias is
 	 * statistical, we have to test that we don't drop below
 	 * the minimum feasible timer (which is 2 ticks).
 	 */
 	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
 		      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
 
 	/*
 	 * We received an ack for a packet that wasn't retransmitted;
 	 * it is probably safe to discard any error indications we've
 	 * received recently.  This isn't quite right, but close enough
 	 * for now (a route might have failed after we sent a segment,
 	 * and the return path might not be symmetrical).
 	 */
 	tp->t_softerror = 0;
 }
 
 /*
  * Determine a reasonable value for maxseg size.
  * If the route is known, check route for mtu.
  * If none, use an mss that can be handled on the outgoing interface
  * without forcing IP to fragment.  If no route is found, route has no mtu,
  * or the destination isn't local, use a default, hopefully conservative
  * size (usually 512 or the default IP max size, but no more than the mtu
  * of the interface), as we can't discover anything about intervening
  * gateways or networks.  We also initialize the congestion/slow start
  * window to be a single segment if the destination isn't local.
  * While looking at the routing entry, we also initialize other path-dependent
  * parameters from pre-set or cached values in the routing entry.
  *
  * NOTE that resulting t_maxseg doesn't include space for TCP options or
  * IP options, e.g. IPSEC data, since length of this data may vary, and
  * thus it is calculated for every segment separately in tcp_output().
  *
  * NOTE that this routine is only called when we process an incoming
  * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS
  * settings are handled in tcp_mssopt().
  */
 void
 tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
     struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap)
 {
 	int mss = 0;
 	uint32_t maxmtu = 0;
 	struct inpcb *inp = tp->t_inpcb;
 	struct hc_metrics_lite metrics;
 #ifdef INET6
 	int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
 	size_t min_protoh = isipv6 ?
 			    sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
 			    sizeof (struct tcpiphdr);
 #else
 	 size_t min_protoh = sizeof(struct tcpiphdr);
 #endif
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_port)
 		min_protoh += V_tcp_udp_tunneling_overhead;
 	if (mtuoffer != -1) {
 		KASSERT(offer == -1, ("%s: conflict", __func__));
 		offer = mtuoffer - min_protoh;
 	}
 
 	/* Initialize. */
 #ifdef INET6
 	if (isipv6) {
 		maxmtu = tcp_maxmtu6(&inp->inp_inc, cap);
 		tp->t_maxseg = V_tcp_v6mssdflt;
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		maxmtu = tcp_maxmtu(&inp->inp_inc, cap);
 		tp->t_maxseg = V_tcp_mssdflt;
 	}
 #endif
 
 	/*
 	 * No route to sender, stay with default mss and return.
 	 */
 	if (maxmtu == 0) {
 		/*
 		 * In case we return early we need to initialize metrics
 		 * to a defined state as tcp_hc_get() would do for us
 		 * if there was no cache hit.
 		 */
 		if (metricptr != NULL)
 			bzero(metricptr, sizeof(struct hc_metrics_lite));
 		return;
 	}
 
 	/* What have we got? */
 	switch (offer) {
 		case 0:
 			/*
 			 * Offer == 0 means that there was no MSS on the SYN
 			 * segment, in this case we use tcp_mssdflt as
 			 * already assigned to t_maxseg above.
 			 */
 			offer = tp->t_maxseg;
 			break;
 
 		case -1:
 			/*
 			 * Offer == -1 means that we didn't receive SYN yet.
 			 */
 			/* FALLTHROUGH */
 
 		default:
 			/*
 			 * Prevent DoS attack with too small MSS. Round up
 			 * to at least minmss.
 			 */
 			offer = max(offer, V_tcp_minmss);
 	}
 
 	/*
 	 * rmx information is now retrieved from tcp_hostcache.
 	 */
 	tcp_hc_get(&inp->inp_inc, &metrics);
 	if (metricptr != NULL)
 		bcopy(&metrics, metricptr, sizeof(struct hc_metrics_lite));
 
 	/*
 	 * If there's a discovered mtu in tcp hostcache, use it.
 	 * Else, use the link mtu.
 	 */
 	if (metrics.rmx_mtu)
 		mss = min(metrics.rmx_mtu, maxmtu) - min_protoh;
 	else {
 #ifdef INET6
 		if (isipv6) {
 			mss = maxmtu - min_protoh;
 			if (!V_path_mtu_discovery &&
 			    !in6_localaddr(&inp->in6p_faddr))
 				mss = min(mss, V_tcp_v6mssdflt);
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			mss = maxmtu - min_protoh;
 			if (!V_path_mtu_discovery &&
 			    !in_localaddr(inp->inp_faddr))
 				mss = min(mss, V_tcp_mssdflt);
 		}
 #endif
 		/*
 		 * XXX - The above conditional (mss = maxmtu - min_protoh)
 		 * probably violates the TCP spec.
 		 * The problem is that, since we don't know the
 		 * other end's MSS, we are supposed to use a conservative
 		 * default.  But, if we do that, then MTU discovery will
 		 * never actually take place, because the conservative
 		 * default is much less than the MTUs typically seen
 		 * on the Internet today.  For the moment, we'll sweep
 		 * this under the carpet.
 		 *
 		 * The conservative default might not actually be a problem
 		 * if the only case this occurs is when sending an initial
 		 * SYN with options and data to a host we've never talked
 		 * to before.  Then, they will reply with an MSS value which
 		 * will get recorded and the new parameters should get
 		 * recomputed.  For Further Study.
 		 */
 	}
 	mss = min(mss, offer);
 
 	/*
 	 * Sanity check: make sure that maxseg will be large
 	 * enough to allow some data on segments even if the
 	 * all the option space is used (40bytes).  Otherwise
 	 * funny things may happen in tcp_output.
 	 *
 	 * XXXGL: shouldn't we reserve space for IP/IPv6 options?
 	 */
 	mss = max(mss, 64);
 
 	tp->t_maxseg = mss;
 }
 
 void
 tcp_mss(struct tcpcb *tp, int offer)
 {
 	int mss;
 	uint32_t bufsize;
 	struct inpcb *inp;
 	struct socket *so;
 	struct hc_metrics_lite metrics;
 	struct tcp_ifcap cap;
 
 	KASSERT(tp != NULL, ("%s: tp == NULL", __func__));
 
 	bzero(&cap, sizeof(cap));
 	tcp_mss_update(tp, offer, -1, &metrics, &cap);
 
 	mss = tp->t_maxseg;
 	inp = tp->t_inpcb;
 
 	/*
 	 * If there's a pipesize, change the socket buffer to that size,
 	 * don't change if sb_hiwat is different than default (then it
 	 * has been changed on purpose with setsockopt).
 	 * Make the socket buffers an integral number of mss units;
 	 * if the mss is larger than the socket buffer, decrease the mss.
 	 */
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_snd);
 	if ((so->so_snd.sb_hiwat == V_tcp_sendspace) && metrics.rmx_sendpipe)
 		bufsize = metrics.rmx_sendpipe;
 	else
 		bufsize = so->so_snd.sb_hiwat;
 	if (bufsize < mss)
 		mss = bufsize;
 	else {
 		bufsize = roundup(bufsize, mss);
 		if (bufsize > sb_max)
 			bufsize = sb_max;
 		if (bufsize > so->so_snd.sb_hiwat)
-			(void)sbreserve_locked(&so->so_snd, bufsize, so, NULL);
+			(void)sbreserve_locked(so, SO_SND, bufsize, NULL);
 	}
 	SOCKBUF_UNLOCK(&so->so_snd);
 	/*
 	 * Sanity check: make sure that maxseg will be large
 	 * enough to allow some data on segments even if the
 	 * all the option space is used (40bytes).  Otherwise
 	 * funny things may happen in tcp_output.
 	 *
 	 * XXXGL: shouldn't we reserve space for IP/IPv6 options?
 	 */
 	tp->t_maxseg = max(mss, 64);
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe)
 		bufsize = metrics.rmx_recvpipe;
 	else
 		bufsize = so->so_rcv.sb_hiwat;
 	if (bufsize > mss) {
 		bufsize = roundup(bufsize, mss);
 		if (bufsize > sb_max)
 			bufsize = sb_max;
 		if (bufsize > so->so_rcv.sb_hiwat)
-			(void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL);
+			(void)sbreserve_locked(so, SO_RCV, bufsize, NULL);
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	/* Check the interface for TSO capabilities. */
 	if (cap.ifcap & CSUM_TSO) {
 		tp->t_flags |= TF_TSO;
 		tp->t_tsomax = cap.tsomax;
 		tp->t_tsomaxsegcount = cap.tsomaxsegcount;
 		tp->t_tsomaxsegsize = cap.tsomaxsegsize;
 	}
 }
 
 /*
  * Determine the MSS option to send on an outgoing SYN.
  */
 int
 tcp_mssopt(struct in_conninfo *inc)
 {
 	int mss = 0;
 	uint32_t thcmtu = 0;
 	uint32_t maxmtu = 0;
 	size_t min_protoh;
 
 	KASSERT(inc != NULL, ("tcp_mssopt with NULL in_conninfo pointer"));
 
 #ifdef INET6
 	if (inc->inc_flags & INC_ISIPV6) {
 		mss = V_tcp_v6mssdflt;
 		maxmtu = tcp_maxmtu6(inc, NULL);
 		min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		mss = V_tcp_mssdflt;
 		maxmtu = tcp_maxmtu(inc, NULL);
 		min_protoh = sizeof(struct tcpiphdr);
 	}
 #endif
 #if defined(INET6) || defined(INET)
 	thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
 #endif
 
 	if (maxmtu && thcmtu)
 		mss = min(maxmtu, thcmtu) - min_protoh;
 	else if (maxmtu || thcmtu)
 		mss = max(maxmtu, thcmtu) - min_protoh;
 
 	return (mss);
 }
 
 void
 tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
 {
 	int snd_cnt = 0, limit = 0, del_data = 0, pipe = 0;
 	int maxseg = tcp_maxseg(tp);
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Compute the amount of data that this ACK is indicating
 	 * (del_data) and an estimate of how many bytes are in the
 	 * network.
 	 */
 	if (((tp->t_flags & TF_SACK_PERMIT) &&
 	    (to->to_flags & TOF_SACK)) ||
 	    (IN_CONGRECOVERY(tp->t_flags) &&
 	     !IN_FASTRECOVERY(tp->t_flags))) {
 		del_data = tp->sackhint.delivered_data;
 		if (V_tcp_do_newsack)
 			pipe = tcp_compute_pipe(tp);
 		else
 			pipe = (tp->snd_nxt - tp->snd_fack) +
 				tp->sackhint.sack_bytes_rexmit;
 	} else {
 		if (tp->sackhint.prr_delivered < (tcprexmtthresh * maxseg +
 					     tp->snd_recover - tp->snd_una))
 			del_data = maxseg;
 		pipe = imax(0, tp->snd_max - tp->snd_una -
 			    imin(INT_MAX / 65536, tp->t_dupacks) * maxseg);
 	}
 	tp->sackhint.prr_delivered += del_data;
 	/*
 	 * Proportional Rate Reduction
 	 */
 	if (pipe >= tp->snd_ssthresh) {
 		if (tp->sackhint.recover_fs == 0)
 			tp->sackhint.recover_fs =
 			    imax(1, tp->snd_nxt - tp->snd_una);
 		snd_cnt = howmany((long)tp->sackhint.prr_delivered *
 			    tp->snd_ssthresh, tp->sackhint.recover_fs) -
 			    tp->sackhint.prr_out;
 	} else {
 		if (V_tcp_do_prr_conservative || (del_data == 0))
 			limit = tp->sackhint.prr_delivered -
 				tp->sackhint.prr_out;
 		else
 			limit = imax(tp->sackhint.prr_delivered -
 				    tp->sackhint.prr_out, del_data) +
 				    maxseg;
 		snd_cnt = imin((tp->snd_ssthresh - pipe), limit);
 	}
 	snd_cnt = imax(snd_cnt, 0) / maxseg;
 	/*
 	 * Send snd_cnt new data into the network in response to this ack.
 	 * If there is going to be a SACK retransmission, adjust snd_cwnd
 	 * accordingly.
 	 */
 	if (IN_FASTRECOVERY(tp->t_flags)) {
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (to->to_flags & TOF_SACK)) {
 			tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
 					    tp->sackhint.sack_bytes_rexmit +
 					    (snd_cnt * maxseg);
 		} else {
 			tp->snd_cwnd = (tp->snd_max - tp->snd_una) +
 					    (snd_cnt * maxseg);
 		}
 	} else if (IN_CONGRECOVERY(tp->t_flags))
 		tp->snd_cwnd = pipe - del_data + (snd_cnt * maxseg);
 	tp->snd_cwnd = imax(maxseg, tp->snd_cwnd);
 }
 
 /*
  * On a partial ack arrives, force the retransmission of the
  * next unacknowledged segment.  Do not clear tp->t_dupacks.
  * By setting snd_nxt to ti_ack, this forces retransmission timer to
  * be started again.
  */
 void
 tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
 {
 	tcp_seq onxt = tp->snd_nxt;
 	uint32_t ocwnd = tp->snd_cwnd;
 	u_int maxseg = tcp_maxseg(tp);
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tcp_timer_activate(tp, TT_REXMT, 0);
 	tp->t_rtttime = 0;
 	tp->snd_nxt = th->th_ack;
 	/*
 	 * Set snd_cwnd to one segment beyond acknowledged offset.
 	 * (tp->snd_una has not yet been updated when this function is called.)
 	 */
 	tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
 	tp->t_flags |= TF_ACKNOW;
 	(void) tcp_output(tp);
 	tp->snd_cwnd = ocwnd;
 	if (SEQ_GT(onxt, tp->snd_nxt))
 		tp->snd_nxt = onxt;
 	/*
 	 * Partial window deflation.  Relies on fact that tp->snd_una
 	 * not updated yet.
 	 */
 	if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th))
 		tp->snd_cwnd -= BYTES_THIS_ACK(tp, th);
 	else
 		tp->snd_cwnd = 0;
 	tp->snd_cwnd += maxseg;
 }
 
 int
 tcp_compute_pipe(struct tcpcb *tp)
 {
 	return (tp->snd_max - tp->snd_una +
 		tp->sackhint.sack_bytes_rexmit -
 		tp->sackhint.sacked_bytes);
 }
 
 uint32_t
 tcp_compute_initwnd(uint32_t maxseg)
 {
 	/*
 	 * Calculate the Initial Window, also used as Restart Window
 	 *
 	 * RFC5681 Section 3.1 specifies the default conservative values.
 	 * RFC3390 specifies slightly more aggressive values.
 	 * RFC6928 increases it to ten segments.
 	 * Support for user specified value for initial flight size.
 	 */
 	if (V_tcp_initcwnd_segments)
 		return min(V_tcp_initcwnd_segments * maxseg,
 		    max(2 * maxseg, V_tcp_initcwnd_segments * 1460));
 	else if (V_tcp_do_rfc3390)
 		return min(4 * maxseg, max(2 * maxseg, 4380));
 	else {
 		/* Per RFC5681 Section 3.1 */
 		if (maxseg > 2190)
 			return (2 * maxseg);
 		else if (maxseg > 1095)
 			return (3 * maxseg);
 		else
 			return (4 * maxseg);
 	}
 }
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 299f1d034717..b0dedb1a24d1 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1,2175 +1,2175 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_output.c	8.4 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_kern_tls.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/arb.h>
 #include <sys/domain.h>
 #ifdef TCP_HHOOK
 #include <sys/hhook.h>
 #endif
 #include <sys/kernel.h>
 #ifdef KERN_TLS
 #include <sys/ktls.h>
 #endif
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/qmath.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/stats.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
 #include <netinet/tcp.h>
 #define	TCPOUTFLAGS
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_fastopen.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 #include <netinet/tcp_ecn.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, path_mtu_discovery) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(path_mtu_discovery), 1,
 	"Enable Path MTU Discovery");
 
 VNET_DEFINE(int, tcp_do_tso) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_do_tso), 0,
 	"Enable TCP Segmentation Offload");
 
 VNET_DEFINE(int, tcp_sendspace) = 1024*32;
 #define	V_tcp_sendspace	VNET(tcp_sendspace)
 SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_sendspace), 0, "Initial send socket buffer size");
 
 VNET_DEFINE(int, tcp_do_autosndbuf) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_do_autosndbuf), 0,
 	"Enable automatic send buffer sizing");
 
 VNET_DEFINE(int, tcp_autosndbuf_inc) = 8*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_autosndbuf_inc), 0,
 	"Incrementor step size of automatic send buffer");
 
 VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_autosndbuf_max), 0,
 	"Max size of automatic send buffer");
 
 VNET_DEFINE(int, tcp_sendbuf_auto_lowat) = 0;
 #define	V_tcp_sendbuf_auto_lowat	VNET(tcp_sendbuf_auto_lowat)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto_lowat, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_sendbuf_auto_lowat), 0,
 	"Modify threshold for auto send buffer growth to account for SO_SNDLOWAT");
 
 /*
  * Make sure that either retransmit or persist timer is set for SYN, FIN and
  * non-ACK.
  */
 #define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags)			\
 	KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\
 	    tcp_timer_active((tp), TT_REXMT) ||				\
 	    tcp_timer_active((tp), TT_PERSIST),				\
 	    ("neither rexmt nor persist timer is set"))
 
 static void inline	cc_after_idle(struct tcpcb *tp);
 
 #ifdef TCP_HHOOK
 /*
  * Wrapper for the TCP established output helper hook.
  */
 void
 hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th,
     struct tcpopt *to, uint32_t len, int tso)
 {
 	struct tcp_hhook_data hhook_data;
 
 	if (V_tcp_hhh[HHOOK_TCP_EST_OUT]->hhh_nhooks > 0) {
 		hhook_data.tp = tp;
 		hhook_data.th = th;
 		hhook_data.to = to;
 		hhook_data.len = len;
 		hhook_data.tso = tso;
 
 		hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_OUT], &hhook_data,
 		    tp->osd);
 	}
 }
 #endif
 
 /*
  * CC wrapper hook functions
  */
 static void inline
 cc_after_idle(struct tcpcb *tp)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (CC_ALGO(tp)->after_idle != NULL)
 		CC_ALGO(tp)->after_idle(tp->ccv);
 }
 
 /*
  * Tcp output routine: figure out what should be sent and send it.
  */
 int
 tcp_default_output(struct tcpcb *tp)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
 	int32_t len;
 	uint32_t recwin, sendwin;
 	uint16_t flags;
 	int off, error = 0;	/* Keep compiler happy */
 	u_int if_hw_tsomaxsegcount = 0;
 	u_int if_hw_tsomaxsegsize = 0;
 	struct mbuf *m;
 	struct ip *ip = NULL;
 #ifdef TCPDEBUG
 	struct ipovly *ipov = NULL;
 #endif
 	struct tcphdr *th;
 	u_char opt[TCP_MAXOLEN];
 	unsigned ipoptlen, optlen, hdrlen, ulen;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	unsigned ipsec_optlen = 0;
 #endif
 	int idle, sendalot, curticks;
 	int sack_rxmit, sack_bytes_rxmt;
 	struct sackhole *p;
 	int tso, mtu;
 	struct tcpopt to;
 	struct udphdr *udp = NULL;
 	unsigned int wanted_cookie = 0;
 	unsigned int dont_sendalot = 0;
 #if 0
 	int maxburst = TCP_MAXBURST;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 	int isipv6;
 
 	isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
 #endif
 #ifdef KERN_TLS
 	const bool hw_tls = (so->so_snd.sb_flags & SB_TLS_IFNET) != 0;
 #else
 	const bool hw_tls = false;
 #endif
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE)
 		return (tcp_offload_output(tp));
 #endif
 
 	/*
 	 * For TFO connections in SYN_SENT or SYN_RECEIVED,
 	 * only allow the initial SYN or SYN|ACK and those sent
 	 * by the retransmit timer.
 	 */
 	if (IS_FASTOPEN(tp->t_flags) &&
 	    ((tp->t_state == TCPS_SYN_SENT) ||
 	     (tp->t_state == TCPS_SYN_RECEIVED)) &&
 	    SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent */
 	    (tp->snd_nxt != tp->snd_una))       /* not a retransmit */
 		return (0);
 
 	/*
 	 * Determine length of data that should be transmitted,
 	 * and flags that will be used.
 	 * If there is some data or critical controls (SYN, RST)
 	 * to send, then transmit; otherwise, investigate further.
 	 */
 	idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
 	if (idle && (((ticks - tp->t_rcvtime) >= tp->t_rxtcur) ||
 	    (tp->t_sndtime && ((ticks - tp->t_sndtime) >= tp->t_rxtcur))))
 		cc_after_idle(tp);
 	tp->t_flags &= ~TF_LASTIDLE;
 	if (idle) {
 		if (tp->t_flags & TF_MORETOCOME) {
 			tp->t_flags |= TF_LASTIDLE;
 			idle = 0;
 		}
 	}
 again:
 	/*
 	 * If we've recently taken a timeout, snd_max will be greater than
 	 * snd_nxt.  There may be SACK information that allows us to avoid
 	 * resending already delivered data.  Adjust snd_nxt accordingly.
 	 */
 	if ((tp->t_flags & TF_SACK_PERMIT) &&
 	    SEQ_LT(tp->snd_nxt, tp->snd_max))
 		tcp_sack_adjust(tp);
 	sendalot = 0;
 	tso = 0;
 	mtu = 0;
 	off = tp->snd_nxt - tp->snd_una;
 	sendwin = min(tp->snd_wnd, tp->snd_cwnd);
 
 	flags = tcp_outflags[tp->t_state];
 	/*
 	 * Send any SACK-generated retransmissions.  If we're explicitly trying
 	 * to send out new data (when sendalot is 1), bypass this function.
 	 * If we retransmit in fast recovery mode, decrement snd_cwnd, since
 	 * we're replacing a (future) new transmission with a retransmission
 	 * now, and we previously incremented snd_cwnd in tcp_input().
 	 */
 	/*
 	 * Still in sack recovery , reset rxmit flag to zero.
 	 */
 	sack_rxmit = 0;
 	sack_bytes_rxmt = 0;
 	len = 0;
 	p = NULL;
 	if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) &&
 	    (p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
 		uint32_t cwin;
 
 		cwin =
 		    imax(min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt, 0);
 		/* Do not retransmit SACK segments beyond snd_recover */
 		if (SEQ_GT(p->end, tp->snd_recover)) {
 			/*
 			 * (At least) part of sack hole extends beyond
 			 * snd_recover. Check to see if we can rexmit data
 			 * for this hole.
 			 */
 			if (SEQ_GEQ(p->rxmit, tp->snd_recover)) {
 				/*
 				 * Can't rexmit any more data for this hole.
 				 * That data will be rexmitted in the next
 				 * sack recovery episode, when snd_recover
 				 * moves past p->rxmit.
 				 */
 				p = NULL;
 				goto after_sack_rexmit;
 			} else
 				/* Can rexmit part of the current hole */
 				len = ((int32_t)ulmin(cwin,
 						   tp->snd_recover - p->rxmit));
 		} else
 			len = ((int32_t)ulmin(cwin, p->end - p->rxmit));
 		off = p->rxmit - tp->snd_una;
 		KASSERT(off >= 0,("%s: sack block to the left of una : %d",
 		    __func__, off));
 		if (len > 0) {
 			sack_rxmit = 1;
 			sendalot = 1;
 			TCPSTAT_INC(tcps_sack_rexmits);
 			TCPSTAT_ADD(tcps_sack_rexmit_bytes,
 			    min(len, tcp_maxseg(tp)));
 		}
 	}
 after_sack_rexmit:
 	/*
 	 * Get standard flags, and add SYN or FIN if requested by 'hidden'
 	 * state flags.
 	 */
 	if (tp->t_flags & TF_NEEDFIN)
 		flags |= TH_FIN;
 	if (tp->t_flags & TF_NEEDSYN)
 		flags |= TH_SYN;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	/*
 	 * If in persist timeout with window of 0, send 1 byte.
 	 * Otherwise, if window is small but nonzero
 	 * and timer expired, we will send what we can
 	 * and go to transmit state.
 	 */
 	if (tp->t_flags & TF_FORCEDATA) {
 		if (sendwin == 0) {
 			/*
 			 * If we still have some data to send, then
 			 * clear the FIN bit.  Usually this would
 			 * happen below when it realizes that we
 			 * aren't sending all the data.  However,
 			 * if we have exactly 1 byte of unsent data,
 			 * then it won't clear the FIN bit below,
 			 * and if we are in persist state, we wind
 			 * up sending the packet without recording
 			 * that we sent the FIN bit.
 			 *
 			 * We can't just blindly clear the FIN bit,
 			 * because if we don't have any more data
 			 * to send then the probe will be the FIN
 			 * itself.
 			 */
 			if (off < sbused(&so->so_snd))
 				flags &= ~TH_FIN;
 			sendwin = 1;
 		} else {
 			tcp_timer_activate(tp, TT_PERSIST, 0);
 			tp->t_rxtshift = 0;
 		}
 	}
 
 	/*
 	 * If snd_nxt == snd_max and we have transmitted a FIN, the
 	 * offset will be > 0 even if so_snd.sb_cc is 0, resulting in
 	 * a negative length.  This can also occur when TCP opens up
 	 * its congestion window while receiving additional duplicate
 	 * acks after fast-retransmit because TCP will reset snd_nxt
 	 * to snd_max after the fast-retransmit.
 	 *
 	 * In the normal retransmit-FIN-only case, however, snd_nxt will
 	 * be set to snd_una, the offset will be 0, and the length may
 	 * wind up 0.
 	 *
 	 * If sack_rxmit is true we are retransmitting from the scoreboard
 	 * in which case len is already set.
 	 */
 	if (sack_rxmit == 0) {
 		if (sack_bytes_rxmt == 0)
 			len = ((int32_t)min(sbavail(&so->so_snd), sendwin) -
 			    off);
 		else {
 			int32_t cwin;
 
                         /*
 			 * We are inside of a SACK recovery episode and are
 			 * sending new data, having retransmitted all the
 			 * data possible in the scoreboard.
 			 */
 			len = ((int32_t)min(sbavail(&so->so_snd), tp->snd_wnd) -
 			    off);
 			/*
 			 * Don't remove this (len > 0) check !
 			 * We explicitly check for len > 0 here (although it
 			 * isn't really necessary), to work around a gcc
 			 * optimization issue - to force gcc to compute
 			 * len above. Without this check, the computation
 			 * of len is bungled by the optimizer.
 			 */
 			if (len > 0) {
 				cwin = tp->snd_cwnd -
 					(tp->snd_nxt - tp->snd_recover) -
 					sack_bytes_rxmt;
 				if (cwin < 0)
 					cwin = 0;
 				len = imin(len, cwin);
 			}
 		}
 	}
 
 	/*
 	 * Lop off SYN bit if it has already been sent.  However, if this
 	 * is SYN-SENT state and if segment contains data and if we don't
 	 * know that foreign host supports TAO, suppress sending segment.
 	 */
 	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
 		if (tp->t_state != TCPS_SYN_RECEIVED)
 			flags &= ~TH_SYN;
 		/*
 		 * When sending additional segments following a TFO SYN|ACK,
 		 * do not include the SYN bit.
 		 */
 		if (IS_FASTOPEN(tp->t_flags) &&
 		    (tp->t_state == TCPS_SYN_RECEIVED))
 			flags &= ~TH_SYN;
 		off--, len++;
 	}
 
 	/*
 	 * Be careful not to send data and/or FIN on SYN segments.
 	 * This measure is needed to prevent interoperability problems
 	 * with not fully conformant TCP implementations.
 	 */
 	if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) {
 		len = 0;
 		flags &= ~TH_FIN;
 	}
 
 	/*
 	 * On TFO sockets, ensure no data is sent in the following cases:
 	 *
 	 *  - When retransmitting SYN|ACK on a passively-created socket
 	 *
 	 *  - When retransmitting SYN on an actively created socket
 	 *
 	 *  - When sending a zero-length cookie (cookie request) on an
 	 *    actively created socket
 	 *
 	 *  - When the socket is in the CLOSED state (RST is being sent)
 	 */
 	if (IS_FASTOPEN(tp->t_flags) &&
 	    (((flags & TH_SYN) && (tp->t_rxtshift > 0)) ||
 	     ((tp->t_state == TCPS_SYN_SENT) &&
 	      (tp->t_tfo_client_cookie_len == 0)) ||
 	     (flags & TH_RST)))
 		len = 0;
 	if (len <= 0) {
 		/*
 		 * If FIN has been sent but not acked,
 		 * but we haven't been called to retransmit,
 		 * len will be < 0.  Otherwise, window shrank
 		 * after we sent into it.  If window shrank to 0,
 		 * cancel pending retransmit, pull snd_nxt back
 		 * to (closed) window, and set the persist timer
 		 * if it isn't already going.  If the window didn't
 		 * close completely, just wait for an ACK.
 		 *
 		 * We also do a general check here to ensure that
 		 * we will set the persist timer when we have data
 		 * to send, but a 0-byte window. This makes sure
 		 * the persist timer is set even if the packet
 		 * hits one of the "goto send" lines below.
 		 */
 		len = 0;
 		if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) &&
 			(off < (int) sbavail(&so->so_snd))) {
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			tp->t_rxtshift = 0;
 			tp->snd_nxt = tp->snd_una;
 			if (!tcp_timer_active(tp, TT_PERSIST))
 				tcp_setpersist(tp);
 		}
 	}
 
 	/* len will be >= 0 after this point. */
 	KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
 
 	tcp_sndbuf_autoscale(tp, so, sendwin);
 
 	/*
 	 * Decide if we can use TCP Segmentation Offloading (if supported by
 	 * hardware).
 	 *
 	 * TSO may only be used if we are in a pure bulk sending state.  The
 	 * presence of TCP-MD5, SACK retransmits, SACK advertizements and
 	 * IP options prevent using TSO.  With TSO the TCP header is the same
 	 * (except for the sequence number) for all generated packets.  This
 	 * makes it impossible to transmit any options which vary per generated
 	 * segment or packet.
 	 *
 	 * IPv4 handling has a clear separation of ip options and ip header
 	 * flags while IPv6 combines both in in6p_outputopts. ip6_optlen() does
 	 * the right thing below to provide length of just ip options and thus
 	 * checking for ipoptlen is enough to decide if ip options are present.
 	 */
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/*
 	 * Pre-calculate here as we save another lookup into the darknesses
 	 * of IPsec that way and can actually decide if TSO is ok.
 	 */
 #ifdef INET6
 	if (isipv6 && IPSEC_ENABLED(ipv6))
 		ipsec_optlen = IPSEC_HDRSIZE(ipv6, tp->t_inpcb);
 #ifdef INET
 	else
 #endif
 #endif /* INET6 */
 #ifdef INET
 	if (IPSEC_ENABLED(ipv4))
 		ipsec_optlen = IPSEC_HDRSIZE(ipv4, tp->t_inpcb);
 #endif /* INET */
 #endif /* IPSEC */
 #ifdef INET6
 	if (isipv6)
 		ipoptlen = ip6_optlen(tp->t_inpcb);
 	else
 #endif
 	if (tp->t_inpcb->inp_options)
 		ipoptlen = tp->t_inpcb->inp_options->m_len -
 				offsetof(struct ipoption, ipopt_list);
 	else
 		ipoptlen = 0;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	ipoptlen += ipsec_optlen;
 #endif
 
 	if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
 	    (tp->t_port == 0) &&
 	    ((tp->t_flags & TF_SIGNATURE) == 0) &&
 	    tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
 	    ipoptlen == 0 && !(flags & TH_SYN))
 		tso = 1;
 
 	if (sack_rxmit) {
 		if (SEQ_LT(p->rxmit + len, tp->snd_una + sbused(&so->so_snd)))
 			flags &= ~TH_FIN;
 	} else {
 		if (SEQ_LT(tp->snd_nxt + len, tp->snd_una +
 		    sbused(&so->so_snd)))
 			flags &= ~TH_FIN;
 	}
 
 	recwin = lmin(lmax(sbspace(&so->so_rcv), 0),
 	    (long)TCP_MAXWIN << tp->rcv_scale);
 
 	/*
 	 * Sender silly window avoidance.   We transmit under the following
 	 * conditions when len is non-zero:
 	 *
 	 *	- We have a full segment (or more with TSO)
 	 *	- This is the last buffer in a write()/send() and we are
 	 *	  either idle or running NODELAY
 	 *	- we've timed out (e.g. persist timer)
 	 *	- we have more then 1/2 the maximum send window's worth of
 	 *	  data (receiver may be limited the window size)
 	 *	- we need to retransmit
 	 */
 	if (len) {
 		if (len >= tp->t_maxseg)
 			goto send;
 		/*
 		 * As the TCP header options are now
 		 * considered when setting up the initial
 		 * window, we would not send the last segment
 		 * if we skip considering the option length here.
 		 * Note: this may not work when tcp headers change
 		 * very dynamically in the future.
 		 */
 		if ((((tp->t_flags & TF_SIGNATURE) ?
 			PADTCPOLEN(TCPOLEN_SIGNATURE) : 0) +
 		    ((tp->t_flags & TF_RCVD_TSTMP) ?
 			PADTCPOLEN(TCPOLEN_TIMESTAMP) : 0) +
 		    len) >= tp->t_maxseg)
 			goto send;
 		/*
 		 * NOTE! on localhost connections an 'ack' from the remote
 		 * end may occur synchronously with the output and cause
 		 * us to flush a buffer queued with moretocome.  XXX
 		 *
 		 * note: the len + off check is almost certainly unnecessary.
 		 */
 		if (!(tp->t_flags & TF_MORETOCOME) &&	/* normal case */
 		    (idle || (tp->t_flags & TF_NODELAY)) &&
 		    (uint32_t)len + (uint32_t)off >= sbavail(&so->so_snd) &&
 		    (tp->t_flags & TF_NOPUSH) == 0) {
 			goto send;
 		}
 		if (tp->t_flags & TF_FORCEDATA)		/* typ. timeout case */
 			goto send;
 		if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
 			goto send;
 		if (SEQ_LT(tp->snd_nxt, tp->snd_max))	/* retransmit case */
 			goto send;
 		if (sack_rxmit)
 			goto send;
 	}
 
 	/*
 	 * Sending of standalone window updates.
 	 *
 	 * Window updates are important when we close our window due to a
 	 * full socket buffer and are opening it again after the application
 	 * reads data from it.  Once the window has opened again and the
 	 * remote end starts to send again the ACK clock takes over and
 	 * provides the most current window information.
 	 *
 	 * We must avoid the silly window syndrome whereas every read
 	 * from the receive buffer, no matter how small, causes a window
 	 * update to be sent.  We also should avoid sending a flurry of
 	 * window updates when the socket buffer had queued a lot of data
 	 * and the application is doing small reads.
 	 *
 	 * Prevent a flurry of pointless window updates by only sending
 	 * an update when we can increase the advertized window by more
 	 * than 1/4th of the socket buffer capacity.  When the buffer is
 	 * getting full or is very small be more aggressive and send an
 	 * update whenever we can increase by two mss sized segments.
 	 * In all other situations the ACK's to new incoming data will
 	 * carry further window increases.
 	 *
 	 * Don't send an independent window update if a delayed
 	 * ACK is pending (it will get piggy-backed on it) or the
 	 * remote side already has done a half-close and won't send
 	 * more data.  Skip this if the connection is in T/TCP
 	 * half-open state.
 	 */
 	if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
 	    !(tp->t_flags & TF_DELACK) &&
 	    !TCPS_HAVERCVDFIN(tp->t_state)) {
 		/*
 		 * "adv" is the amount we could increase the window,
 		 * taking into account that we are limited by
 		 * TCP_MAXWIN << tp->rcv_scale.
 		 */
 		int32_t adv;
 		int oldwin;
 
 		adv = recwin;
 		if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) {
 			oldwin = (tp->rcv_adv - tp->rcv_nxt);
 			if (adv > oldwin)
 				adv -= oldwin;
 			else
 				adv = 0;
 		} else
 			oldwin = 0;
 
 		/*
 		 * If the new window size ends up being the same as or less
 		 * than the old size when it is scaled, then don't force
 		 * a window update.
 		 */
 		if (oldwin >> tp->rcv_scale >= (adv + oldwin) >> tp->rcv_scale)
 			goto dontupdate;
 
 		if (adv >= (int32_t)(2 * tp->t_maxseg) &&
 		    (adv >= (int32_t)(so->so_rcv.sb_hiwat / 4) ||
 		     recwin <= (so->so_rcv.sb_hiwat / 8) ||
 		     so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg ||
 		     adv >= TCP_MAXWIN << tp->rcv_scale))
 			goto send;
 		if (2 * adv >= (int32_t)so->so_rcv.sb_hiwat)
 			goto send;
 	}
 dontupdate:
 
 	/*
 	 * Send if we owe the peer an ACK, RST, SYN, or urgent data.  ACKNOW
 	 * is also a catch-all for the retransmit timer timeout case.
 	 */
 	if (tp->t_flags & TF_ACKNOW)
 		goto send;
 	if ((flags & TH_RST) ||
 	    ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
 		goto send;
 	if (SEQ_GT(tp->snd_up, tp->snd_una))
 		goto send;
 	/*
 	 * If our state indicates that FIN should be sent
 	 * and we have not yet done so, then we need to send.
 	 */
 	if (flags & TH_FIN &&
 	    ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
 		goto send;
 	/*
 	 * In SACK, it is possible for tcp_output to fail to send a segment
 	 * after the retransmission timer has been turned off.  Make sure
 	 * that the retransmission timer is set.
 	 */
 	if ((tp->t_flags & TF_SACK_PERMIT) &&
 	    SEQ_GT(tp->snd_max, tp->snd_una) &&
 	    !tcp_timer_active(tp, TT_REXMT) &&
 	    !tcp_timer_active(tp, TT_PERSIST)) {
 		tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 		goto just_return;
 	}
 	/*
 	 * TCP window updates are not reliable, rather a polling protocol
 	 * using ``persist'' packets is used to insure receipt of window
 	 * updates.  The three ``states'' for the output side are:
 	 *	idle			not doing retransmits or persists
 	 *	persisting		to move a small or zero window
 	 *	(re)transmitting	and thereby not persisting
 	 *
 	 * tcp_timer_active(tp, TT_PERSIST)
 	 *	is true when we are in persist state.
 	 * (tp->t_flags & TF_FORCEDATA)
 	 *	is set when we are called to send a persist packet.
 	 * tcp_timer_active(tp, TT_REXMT)
 	 *	is set when we are retransmitting
 	 * The output side is idle when both timers are zero.
 	 *
 	 * If send window is too small, there is data to transmit, and no
 	 * retransmit or persist is pending, then go to persist state.
 	 * If nothing happens soon, send when timer expires:
 	 * if window is nonzero, transmit what we can,
 	 * otherwise force out a byte.
 	 */
 	if (sbavail(&so->so_snd) && !tcp_timer_active(tp, TT_REXMT) &&
 	    !tcp_timer_active(tp, TT_PERSIST)) {
 		tp->t_rxtshift = 0;
 		tcp_setpersist(tp);
 	}
 
 	/*
 	 * No reason to send a segment, just return.
 	 */
 just_return:
 	SOCKBUF_UNLOCK(&so->so_snd);
 	return (0);
 
 send:
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	if (len > 0) {
 		if (len >= tp->t_maxseg)
 			tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
 		else
 			tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
 	}
 	/*
 	 * Before ESTABLISHED, force sending of initial options
 	 * unless TCP set not to do any options.
 	 * NOTE: we assume that the IP/TCP header plus TCP options
 	 * always fit in a single mbuf, leaving room for a maximum
 	 * link header, i.e.
 	 *	max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES
 	 */
 	optlen = 0;
 #ifdef INET6
 	if (isipv6)
 		hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
 	else
 #endif
 		hdrlen = sizeof (struct tcpiphdr);
 
 	if (flags & TH_SYN) {
 		tp->snd_nxt = tp->iss;
 	}
 
 	/*
 	 * Compute options for segment.
 	 * We only have to care about SYN and established connection
 	 * segments.  Options for SYN-ACK segments are handled in TCP
 	 * syncache.
 	 */
 	to.to_flags = 0;
 	if ((tp->t_flags & TF_NOOPT) == 0) {
 		/* Maximum segment size. */
 		if (flags & TH_SYN) {
 			to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
 			if (tp->t_port)
 				to.to_mss -= V_tcp_udp_tunneling_overhead;
 			to.to_flags |= TOF_MSS;
 
 			/*
 			 * On SYN or SYN|ACK transmits on TFO connections,
 			 * only include the TFO option if it is not a
 			 * retransmit, as the presence of the TFO option may
 			 * have caused the original SYN or SYN|ACK to have
 			 * been dropped by a middlebox.
 			 */
 			if (IS_FASTOPEN(tp->t_flags) &&
 			    (tp->t_rxtshift == 0)) {
 				if (tp->t_state == TCPS_SYN_RECEIVED) {
 					to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
 					to.to_tfo_cookie =
 					    (u_int8_t *)&tp->t_tfo_cookie.server;
 					to.to_flags |= TOF_FASTOPEN;
 					wanted_cookie = 1;
 				} else if (tp->t_state == TCPS_SYN_SENT) {
 					to.to_tfo_len =
 					    tp->t_tfo_client_cookie_len;
 					to.to_tfo_cookie =
 					    tp->t_tfo_cookie.client;
 					to.to_flags |= TOF_FASTOPEN;
 					wanted_cookie = 1;
 					/*
 					 * If we wind up having more data to
 					 * send with the SYN than can fit in
 					 * one segment, don't send any more
 					 * until the SYN|ACK comes back from
 					 * the other end.
 					 */
 					dont_sendalot = 1;
 				}
 			}
 		}
 		/* Window scaling. */
 		if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
 			to.to_wscale = tp->request_r_scale;
 			to.to_flags |= TOF_SCALE;
 		}
 		/* Timestamps. */
 		if ((tp->t_flags & TF_RCVD_TSTMP) ||
 		    ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
 			curticks = tcp_ts_getticks();
 			to.to_tsval = curticks + tp->ts_offset;
 			to.to_tsecr = tp->ts_recent;
 			to.to_flags |= TOF_TS;
 			if (tp->t_rxtshift == 1)
 				tp->t_badrxtwin = curticks;
 		}
 
 		/* Set receive buffer autosizing timestamp. */
 		if (tp->rfbuf_ts == 0 &&
 		    (so->so_rcv.sb_flags & SB_AUTOSIZE))
 			tp->rfbuf_ts = tcp_ts_getticks();
 
 		/* Selective ACK's. */
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (flags & TH_SYN)
 				to.to_flags |= TOF_SACKPERM;
 			else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
 			    tp->rcv_numsacks > 0) {
 				to.to_flags |= TOF_SACK;
 				to.to_nsacks = tp->rcv_numsacks;
 				to.to_sacks = (u_char *)tp->sackblks;
 			}
 		}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		/* TCP-MD5 (RFC2385). */
 		/*
 		 * Check that TCP_MD5SIG is enabled in tcpcb to
 		 * account the size needed to set this TCP option.
 		 */
 		if (tp->t_flags & TF_SIGNATURE)
 			to.to_flags |= TOF_SIGNATURE;
 #endif /* TCP_SIGNATURE */
 
 		/* Processing the options. */
 		hdrlen += optlen = tcp_addoptions(&to, opt);
 		/*
 		 * If we wanted a TFO option to be added, but it was unable
 		 * to fit, ensure no data is sent.
 		 */
 		if (IS_FASTOPEN(tp->t_flags) && wanted_cookie &&
 		    !(to.to_flags & TOF_FASTOPEN))
 			len = 0;
 	}
 	if (tp->t_port) {
 		if (V_tcp_udp_tunneling_port == 0) {
 			/* The port was removed?? */
 			SOCKBUF_UNLOCK(&so->so_snd);
 			return (EHOSTUNREACH);
 		}
 		hdrlen += sizeof(struct udphdr);
 	}
 	/*
 	 * Adjust data length if insertion of options will
 	 * bump the packet length beyond the t_maxseg length.
 	 * Clear the FIN bit because we cut off the tail of
 	 * the segment.
 	 */
 	if (len + optlen + ipoptlen > tp->t_maxseg) {
 		flags &= ~TH_FIN;
 
 		if (tso) {
 			u_int if_hw_tsomax;
 			u_int moff;
 			int max_len;
 
 			/* extract TSO information */
 			if_hw_tsomax = tp->t_tsomax;
 			if_hw_tsomaxsegcount = tp->t_tsomaxsegcount;
 			if_hw_tsomaxsegsize = tp->t_tsomaxsegsize;
 
 			/*
 			 * Limit a TSO burst to prevent it from
 			 * overflowing or exceeding the maximum length
 			 * allowed by the network interface:
 			 */
 			KASSERT(ipoptlen == 0,
 			    ("%s: TSO can't do IP options", __func__));
 
 			/*
 			 * Check if we should limit by maximum payload
 			 * length:
 			 */
 			if (if_hw_tsomax != 0) {
 				/* compute maximum TSO length */
 				max_len = (if_hw_tsomax - hdrlen -
 				    max_linkhdr);
 				if (max_len <= 0) {
 					len = 0;
 				} else if (len > max_len) {
 					sendalot = 1;
 					len = max_len;
 				}
 			}
 
 			/*
 			 * Prevent the last segment from being
 			 * fractional unless the send sockbuf can be
 			 * emptied:
 			 */
 			max_len = (tp->t_maxseg - optlen);
 			if (((uint32_t)off + (uint32_t)len) <
 			    sbavail(&so->so_snd)) {
 				moff = len % max_len;
 				if (moff != 0) {
 					len -= moff;
 					sendalot = 1;
 				}
 			}
 
 			/*
 			 * In case there are too many small fragments
 			 * don't use TSO:
 			 */
 			if (len <= max_len) {
 				len = max_len;
 				sendalot = 1;
 				tso = 0;
 			}
 
 			/*
 			 * Send the FIN in a separate segment
 			 * after the bulk sending is done.
 			 * We don't trust the TSO implementations
 			 * to clear the FIN flag on all but the
 			 * last segment.
 			 */
 			if (tp->t_flags & TF_NEEDFIN)
 				sendalot = 1;
 		} else {
 			if (optlen + ipoptlen >= tp->t_maxseg) {
 				/*
 				 * Since we don't have enough space to put
 				 * the IP header chain and the TCP header in
 				 * one packet as required by RFC 7112, don't
 				 * send it. Also ensure that at least one
 				 * byte of the payload can be put into the
 				 * TCP segment.
 				 */
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = EMSGSIZE;
 				sack_rxmit = 0;
 				goto out;
 			}
 			len = tp->t_maxseg - optlen - ipoptlen;
 			sendalot = 1;
 			if (dont_sendalot)
 				sendalot = 0;
 		}
 	} else
 		tso = 0;
 
 	KASSERT(len + hdrlen + ipoptlen <= IP_MAXPACKET,
 	    ("%s: len > IP_MAXPACKET", __func__));
 
 /*#ifdef DIAGNOSTIC*/
 #ifdef INET6
 	if (max_linkhdr + hdrlen > MCLBYTES)
 #else
 	if (max_linkhdr + hdrlen > MHLEN)
 #endif
 		panic("tcphdr too big");
 /*#endif*/
 
 	/*
 	 * This KASSERT is here to catch edge cases at a well defined place.
 	 * Before, those had triggered (random) panic conditions further down.
 	 */
 	KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
 
 	/*
 	 * Grab a header mbuf, attaching a copy of data to
 	 * be transmitted, and initialize the header from
 	 * the template for sends on this connection.
 	 */
 	if (len) {
 		struct mbuf *mb;
 		struct sockbuf *msb;
 		u_int moff;
 
 		if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
 			TCPSTAT_INC(tcps_sndprobe);
 #ifdef STATS
 			if (SEQ_LT(tp->snd_nxt, tp->snd_max))
 				stats_voi_update_abs_u32(tp->t_stats,
 				VOI_TCP_RETXPB, len);
 			else
 				stats_voi_update_abs_u64(tp->t_stats,
 				    VOI_TCP_TXPB, len);
 #endif /* STATS */
 		} else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
 			tp->t_sndrexmitpack++;
 			TCPSTAT_INC(tcps_sndrexmitpack);
 			TCPSTAT_ADD(tcps_sndrexmitbyte, len);
 #ifdef STATS
 			stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
 			    len);
 #endif /* STATS */
 		} else {
 			TCPSTAT_INC(tcps_sndpack);
 			TCPSTAT_ADD(tcps_sndbyte, len);
 #ifdef STATS
 			stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
 			    len);
 #endif /* STATS */
 		}
 #ifdef INET6
 		if (MHLEN < hdrlen + max_linkhdr)
 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		else
 #endif
 			m = m_gethdr(M_NOWAIT, MT_DATA);
 
 		if (m == NULL) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = ENOBUFS;
 			sack_rxmit = 0;
 			goto out;
 		}
 
 		m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
 
 		/*
 		 * Start the m_copy functions from the closest mbuf
 		 * to the offset in the socket buffer chain.
 		 */
 		mb = sbsndptr_noadv(&so->so_snd, off, &moff);
 		if (len <= MHLEN - hdrlen - max_linkhdr && !hw_tls) {
 			m_copydata(mb, moff, len,
 			    mtod(m, caddr_t) + hdrlen);
 			if (SEQ_LT(tp->snd_nxt, tp->snd_max))
 				sbsndptr_adv(&so->so_snd, mb, len);
 			m->m_len += len;
 		} else {
 			if (SEQ_LT(tp->snd_nxt, tp->snd_max))
 				msb = NULL;
 			else
 				msb = &so->so_snd;
 			m->m_next = tcp_m_copym(mb, moff,
 			    &len, if_hw_tsomaxsegcount,
 			    if_hw_tsomaxsegsize, msb, hw_tls);
 			if (len <= (tp->t_maxseg - optlen)) {
 				/*
 				 * Must have ran out of mbufs for the copy
 				 * shorten it to no longer need tso. Lets
 				 * not put on sendalot since we are low on
 				 * mbufs.
 				 */
 				tso = 0;
 			}
 			if (m->m_next == NULL) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				(void) m_free(m);
 				error = ENOBUFS;
 				sack_rxmit = 0;
 				goto out;
 			}
 		}
 
 		/*
 		 * If we're sending everything we've got, set PUSH.
 		 * (This will keep happy those implementations which only
 		 * give data to the user when a buffer fills or
 		 * a PUSH comes in.)
 		 */
 		if (((uint32_t)off + (uint32_t)len == sbused(&so->so_snd)) &&
 		    !(flags & TH_SYN))
 			flags |= TH_PUSH;
 		SOCKBUF_UNLOCK(&so->so_snd);
 	} else {
 		SOCKBUF_UNLOCK(&so->so_snd);
 		if (tp->t_flags & TF_ACKNOW)
 			TCPSTAT_INC(tcps_sndacks);
 		else if (flags & (TH_SYN|TH_FIN|TH_RST))
 			TCPSTAT_INC(tcps_sndctrl);
 		else if (SEQ_GT(tp->snd_up, tp->snd_una))
 			TCPSTAT_INC(tcps_sndurg);
 		else
 			TCPSTAT_INC(tcps_sndwinup);
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			sack_rxmit = 0;
 			goto out;
 		}
 #ifdef INET6
 		if (isipv6 && (MHLEN < hdrlen + max_linkhdr) &&
 		    MHLEN >= hdrlen) {
 			M_ALIGN(m, hdrlen);
 		} else
 #endif
 		m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
 	}
 	SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef MAC
 	mac_inpcb_create_mbuf(tp->t_inpcb, m);
 #endif
 #ifdef INET6
 	if (isipv6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (tp->t_port) {
 			udp = (struct udphdr *)((caddr_t)ip6 + sizeof(struct ip6_hdr));
 			udp->uh_sport = htons(V_tcp_udp_tunneling_port);
 			udp->uh_dport = tp->t_port;
 			ulen = hdrlen + len - sizeof(struct ip6_hdr);
 			udp->uh_ulen = htons(ulen);
 			th = (struct tcphdr *)(udp + 1);
 		} else {
 			th = (struct tcphdr *)(ip6 + 1);
 		}
 		tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip6, th);
 	} else
 #endif /* INET6 */
 	{
 		ip = mtod(m, struct ip *);
 #ifdef TCPDEBUG
 		ipov = (struct ipovly *)ip;
 #endif
 		if (tp->t_port) {
 			udp = (struct udphdr *)((caddr_t)ip + sizeof(struct ip));
 			udp->uh_sport = htons(V_tcp_udp_tunneling_port);
 			udp->uh_dport = tp->t_port;
 			ulen = hdrlen + len - sizeof(struct ip);
 			udp->uh_ulen = htons(ulen);
 			th = (struct tcphdr *)(udp + 1);
 		} else
 			th = (struct tcphdr *)(ip + 1);
 		tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip, th);
 	}
 
 	/*
 	 * Fill in fields, remembering maximum advertised
 	 * window for use in delaying messages about window sizes.
 	 * If resending a FIN, be sure not to use a new sequence number.
 	 */
 	if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
 	    tp->snd_nxt == tp->snd_max)
 		tp->snd_nxt--;
 	/*
 	 * If we are starting a connection, send ECN setup
 	 * SYN packet. If we are on a retransmit, we may
 	 * resend those bits a number of times as per
 	 * RFC 3168.
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
 		flags |= tcp_ecn_output_syn_sent(tp);
 	}
 	/* Also handle parallel SYN for ECN */
 	if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
 	    (tp->t_flags2 & TF2_ECN_PERMIT)) {
 		int ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
 		if ((tp->t_state == TCPS_SYN_RECEIVED) &&
 		    (tp->t_flags2 & TF2_ECN_SND_ECE))
 			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
 #ifdef INET6
 		if (isipv6) {
 			ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
 			ip6->ip6_flow |= htonl(ect << 20);
 		}
 		else
 #endif
 		{
 			ip->ip_tos &= ~IPTOS_ECN_MASK;
 			ip->ip_tos |= ect;
 		}
 	}
 
 	/*
 	 * If we are doing retransmissions, then snd_nxt will
 	 * not reflect the first unsent octet.  For ACK only
 	 * packets, we do not want the sequence number of the
 	 * retransmitted packet, we want the sequence number
 	 * of the next unsent octet.  So, if there is no data
 	 * (and no SYN or FIN), use snd_max instead of snd_nxt
 	 * when filling in ti_seq.  But if we are in persist
 	 * state, snd_max might reflect one byte beyond the
 	 * right edge of the window, so use snd_nxt in that
 	 * case, since we know we aren't doing a retransmission.
 	 * (retransmit and persist are mutually exclusive...)
 	 */
 	if (sack_rxmit == 0) {
 		if (len || (flags & (TH_SYN|TH_FIN)) ||
 		    tcp_timer_active(tp, TT_PERSIST))
 			th->th_seq = htonl(tp->snd_nxt);
 		else
 			th->th_seq = htonl(tp->snd_max);
 	} else {
 		th->th_seq = htonl(p->rxmit);
 		p->rxmit += len;
 		/*
 		 * Lost Retransmission Detection
 		 * trigger resending of a (then
 		 * still existing) hole, when
 		 * fack acks recoverypoint.
 		 */
 		if ((tp->t_flags & TF_LRD) && SEQ_GEQ(p->rxmit, p->end))
 			p->rxmit = tp->snd_recover;
 		tp->sackhint.sack_bytes_rexmit += len;
 	}
 	if (IN_RECOVERY(tp->t_flags)) {
 		/*
 		 * Account all bytes transmitted while
 		 * IN_RECOVERY, simplifying PRR and
 		 * Lost Retransmit Detection
 		 */
 		tp->sackhint.prr_out += len;
 	}
 	th->th_ack = htonl(tp->rcv_nxt);
 	if (optlen) {
 		bcopy(opt, th + 1, optlen);
 		th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
 	}
 	tcp_set_flags(th, flags);
 	/*
 	 * Calculate receive window.  Don't shrink window,
 	 * but avoid silly window syndrome.
 	 * If a RST segment is sent, advertise a window of zero.
 	 */
 	if (flags & TH_RST) {
 		recwin = 0;
 	} else {
 		if (recwin < (so->so_rcv.sb_hiwat / 4) &&
 		    recwin < tp->t_maxseg)
 			recwin = 0;
 		if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
 		    recwin < (tp->rcv_adv - tp->rcv_nxt))
 			recwin = (tp->rcv_adv - tp->rcv_nxt);
 	}
 	/*
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
 	 * or <SYN,ACK>) segment itself is never scaled.  The <SYN,ACK>
 	 * case is handled in syncache.
 	 */
 	if (flags & TH_SYN)
 		th->th_win = htons((u_short)
 				(min(sbspace(&so->so_rcv), TCP_MAXWIN)));
 	else {
 		/* Avoid shrinking window with window scaling. */
 		recwin = roundup2(recwin, 1 << tp->rcv_scale);
 		th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
 	}
 
 	/*
 	 * Adjust the RXWIN0SENT flag - indicate that we have advertised
 	 * a 0 window.  This may cause the remote transmitter to stall.  This
 	 * flag tells soreceive() to disable delayed acknowledgements when
 	 * draining the buffer.  This can occur if the receiver is attempting
 	 * to read more data than can be buffered prior to transmitting on
 	 * the connection.
 	 */
 	if (th->th_win == 0) {
 		tp->t_sndzerowin++;
 		tp->t_flags |= TF_RXWIN0SENT;
 	} else
 		tp->t_flags &= ~TF_RXWIN0SENT;
 	if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
 		th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
 		th->th_flags |= TH_URG;
 	} else
 		/*
 		 * If no urgent pointer to send, then we pull
 		 * the urgent pointer to the left edge of the send window
 		 * so that it doesn't drift into the send window on sequence
 		 * number wraparound.
 		 */
 		tp->snd_up = tp->snd_una;		/* drag it along */
 
 	/*
 	 * Put TCP length in extended header, and then
 	 * checksum extended header and data.
 	 */
 	m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
 
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	if (to.to_flags & TOF_SIGNATURE) {
 		/*
 		 * Calculate MD5 signature and put it into the place
 		 * determined before.
 		 * NOTE: since TCP options buffer doesn't point into
 		 * mbuf's data, calculate offset and use it.
 		 */
 		if (!TCPMD5_ENABLED() || (error = TCPMD5_OUTPUT(m, th,
 		    (u_char *)(th + 1) + (to.to_signature - opt))) != 0) {
 			/*
 			 * Do not send segment if the calculation of MD5
 			 * digest has failed.
 			 */
 			m_freem(m);
 			goto out;
 		}
 	}
 #endif
 #ifdef INET6
 	if (isipv6) {
 		/*
 		 * There is no need to fill in ip6_plen right now.
 		 * It will be filled later by ip6_output.
 		 */
 		if (tp->t_port) {
 			m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 			udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
 			th->th_sum = htons(0);
 			UDPSTAT_INC(udps_opackets);
 		} else {
 			m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 			th->th_sum = in6_cksum_pseudo(ip6,
 			    sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP,
 			    0);
 		}
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		if (tp->t_port) {
 			m->m_pkthdr.csum_flags = CSUM_UDP;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 			udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
 			   ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
 			th->th_sum = htons(0);
 			UDPSTAT_INC(udps_opackets);
 		} else {
 			m->m_pkthdr.csum_flags = CSUM_TCP;
 			m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 			th->th_sum = in_pseudo(ip->ip_src.s_addr,
 			    ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
 			    IPPROTO_TCP + len + optlen));
 		}
 
 		/* IP version must be set here for ipv4/ipv6 checking later */
 		KASSERT(ip->ip_v == IPVERSION,
 		    ("%s: IP version incorrect: %d", __func__, ip->ip_v));
 	}
 #endif
 
 	/*
 	 * Enable TSO and specify the size of the segments.
 	 * The TCP pseudo header checksum is always provided.
 	 */
 	if (tso) {
 		KASSERT(len > tp->t_maxseg - optlen,
 		    ("%s: len <= tso_segsz", __func__));
 		m->m_pkthdr.csum_flags |= CSUM_TSO;
 		m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen;
 	}
 
 	KASSERT(len + hdrlen == m_length(m, NULL),
 	    ("%s: mbuf chain shorter than expected: %d + %u != %u",
 	    __func__, len, hdrlen, m_length(m, NULL)));
 
 #ifdef TCP_HHOOK
 	/* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */
 	hhook_run_tcp_est_out(tp, th, &to, len, tso);
 #endif
 
 #ifdef TCPDEBUG
 	/*
 	 * Trace.
 	 */
 	if (so->so_options & SO_DEBUG) {
 		u_short save = 0;
 #ifdef INET6
 		if (!isipv6)
 #endif
 		{
 			save = ipov->ih_len;
 			ipov->ih_len = htons(m->m_pkthdr.len /* - hdrlen + (th->th_off << 2) */);
 		}
 		tcp_trace(TA_OUTPUT, tp->t_state, tp, mtod(m, void *), th, 0);
 #ifdef INET6
 		if (!isipv6)
 #endif
 		ipov->ih_len = save;
 	}
 #endif /* TCPDEBUG */
 	TCP_PROBE3(debug__output, tp, th, m);
 
 	/* We're getting ready to send; log now. */
 	TCP_LOG_EVENT(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_OUT, ERRNO_UNK,
 	    len, NULL, false);
 
 	/*
 	 * Fill in IP length and desired time to live and
 	 * send to IP level.  There should be a better way
 	 * to handle ttl and tos; we could keep them in
 	 * the template, but need a way to checksum without them.
 	 */
 	/*
 	 * m->m_pkthdr.len should have been set before checksum calculation,
 	 * because in6_cksum() need it.
 	 */
 #ifdef INET6
 	if (isipv6) {
 		/*
 		 * we separately set hoplimit for every segment, since the
 		 * user might want to change the value via setsockopt.
 		 * Also, desired default hop limit might be changed via
 		 * Neighbor Discovery.
 		 */
 		ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL);
 
 		/*
 		 * Set the packet size here for the benefit of DTrace probes.
 		 * ip6_output() will set it properly; it's supposed to include
 		 * the option header lengths as well.
 		 */
 		ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
 
 		if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss)
 			tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 		else
 			tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 
 		if (tp->t_state == TCPS_SYN_SENT)
 			TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th);
 
 		TCP_PROBE5(send, NULL, tp, ip6, tp, th);
 
 #ifdef TCPPCAP
 		/* Save packet, if requested. */
 		tcp_pcap_add(th, m, &(tp->t_outpkts));
 #endif
 
 		/* TODO: IPv6 IP6TOS_ECT bit on */
 		error = ip6_output(m, tp->t_inpcb->in6p_outputopts,
 		    &tp->t_inpcb->inp_route6,
 		    ((so->so_options & SO_DONTROUTE) ?  IP_ROUTETOIF : 0),
 		    NULL, NULL, tp->t_inpcb);
 
 		if (error == EMSGSIZE && tp->t_inpcb->inp_route6.ro_nh != NULL)
 			mtu = tp->t_inpcb->inp_route6.ro_nh->nh_mtu;
 	}
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
     {
 	ip->ip_len = htons(m->m_pkthdr.len);
 #ifdef INET6
 	if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO)
 		ip->ip_ttl = in6_selecthlim(tp->t_inpcb, NULL);
 #endif /* INET6 */
 	/*
 	 * If we do path MTU discovery, then we set DF on every packet.
 	 * This might not be the best thing to do according to RFC3390
 	 * Section 2. However the tcp hostcache migitates the problem
 	 * so it affects only the first tcp connection with a host.
 	 *
 	 * NB: Don't set DF on small MTU/MSS to have a safe fallback.
 	 */
 	if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
 		tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 		if (tp->t_port == 0 || len < V_tcp_minmss) {
 			ip->ip_off |= htons(IP_DF);
 		}
 	} else {
 		tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 	}
 
 	if (tp->t_state == TCPS_SYN_SENT)
 		TCP_PROBE5(connect__request, NULL, tp, ip, tp, th);
 
 	TCP_PROBE5(send, NULL, tp, ip, tp, th);
 
 #ifdef TCPPCAP
 	/* Save packet, if requested. */
 	tcp_pcap_add(th, m, &(tp->t_outpkts));
 #endif
 
 	error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
 	    ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
 	    tp->t_inpcb);
 
 	if (error == EMSGSIZE && tp->t_inpcb->inp_route.ro_nh != NULL)
 		mtu = tp->t_inpcb->inp_route.ro_nh->nh_mtu;
     }
 #endif /* INET */
 
 out:
 	if (error == 0)
 		tcp_account_for_send(tp, len, (tp->snd_nxt != tp->snd_max), 0, hw_tls);
 	/*
 	 * In transmit state, time the transmission and arrange for
 	 * the retransmit.  In persist state, just set snd_max.
 	 */
 	if ((tp->t_flags & TF_FORCEDATA) == 0 ||
 	    !tcp_timer_active(tp, TT_PERSIST)) {
 		tcp_seq startseq = tp->snd_nxt;
 
 		/*
 		 * Advance snd_nxt over sequence space of this segment.
 		 */
 		if (flags & (TH_SYN|TH_FIN)) {
 			if (flags & TH_SYN)
 				tp->snd_nxt++;
 			if (flags & TH_FIN) {
 				tp->snd_nxt++;
 				tp->t_flags |= TF_SENTFIN;
 			}
 		}
 		if (sack_rxmit)
 			goto timer;
 		tp->snd_nxt += len;
 		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
 			tp->snd_max = tp->snd_nxt;
 			/*
 			 * Time this transmission if not a retransmission and
 			 * not currently timing anything.
 			 */
 			tp->t_sndtime = ticks;
 			if (tp->t_rtttime == 0) {
 				tp->t_rtttime = ticks;
 				tp->t_rtseq = startseq;
 				TCPSTAT_INC(tcps_segstimed);
 			}
 #ifdef STATS
 			if (!(tp->t_flags & TF_GPUTINPROG) && len) {
 				tp->t_flags |= TF_GPUTINPROG;
 				tp->gput_seq = startseq;
 				tp->gput_ack = startseq +
 				    ulmin(sbavail(&so->so_snd) - off, sendwin);
 				tp->gput_ts = tcp_ts_getticks();
 			}
 #endif /* STATS */
 		}
 
 		/*
 		 * Set retransmit timer if not currently set,
 		 * and not doing a pure ack or a keep-alive probe.
 		 * Initial value for retransmit timer is smoothed
 		 * round-trip time + 2 * round-trip time variance.
 		 * Initialize shift counter which is used for backoff
 		 * of retransmit time.
 		 */
 timer:
 		if (!tcp_timer_active(tp, TT_REXMT) &&
 		    ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
 		     (tp->snd_nxt != tp->snd_una))) {
 			if (tcp_timer_active(tp, TT_PERSIST)) {
 				tcp_timer_activate(tp, TT_PERSIST, 0);
 				tp->t_rxtshift = 0;
 			}
 			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 		} else if (len == 0 && sbavail(&so->so_snd) &&
 		    !tcp_timer_active(tp, TT_REXMT) &&
 		    !tcp_timer_active(tp, TT_PERSIST)) {
 			/*
 			 * Avoid a situation where we do not set persist timer
 			 * after a zero window condition. For example:
 			 * 1) A -> B: packet with enough data to fill the window
 			 * 2) B -> A: ACK for #1 + new data (0 window
 			 *    advertisement)
 			 * 3) A -> B: ACK for #2, 0 len packet
 			 *
 			 * In this case, A will not activate the persist timer,
 			 * because it chose to send a packet. Unless tcp_output
 			 * is called for some other reason (delayed ack timer,
 			 * another input packet from B, socket syscall), A will
 			 * not send zero window probes.
 			 *
 			 * So, if you send a 0-length packet, but there is data
 			 * in the socket buffer, and neither the rexmt or
 			 * persist timer is already set, then activate the
 			 * persist timer.
 			 */
 			tp->t_rxtshift = 0;
 			tcp_setpersist(tp);
 		}
 	} else {
 		/*
 		 * Persist case, update snd_max but since we are in
 		 * persist mode (no window) we do not update snd_nxt.
 		 */
 		int xlen = len;
 		if (flags & TH_SYN)
 			++xlen;
 		if (flags & TH_FIN) {
 			++xlen;
 			tp->t_flags |= TF_SENTFIN;
 		}
 		if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
 			tp->snd_max = tp->snd_nxt + xlen;
 	}
 	if ((error == 0) &&
 	    (TCPS_HAVEESTABLISHED(tp->t_state) &&
 	     (tp->t_flags & TF_SACK_PERMIT) &&
 	     tp->rcv_numsacks > 0)) {
 		    /* Clean up any DSACK's sent */
 		    tcp_clean_dsack_blocks(tp);
 	}
 	if (error) {
 		/* Record the error. */
 		TCP_LOG_EVENT(tp, NULL, &so->so_rcv, &so->so_snd, TCP_LOG_OUT,
 		    error, 0, NULL, false);
 
 		/*
 		 * We know that the packet was lost, so back out the
 		 * sequence number advance, if any.
 		 *
 		 * If the error is EPERM the packet got blocked by the
 		 * local firewall.  Normally we should terminate the
 		 * connection but the blocking may have been spurious
 		 * due to a firewall reconfiguration cycle.  So we treat
 		 * it like a packet loss and let the retransmit timer and
 		 * timeouts do their work over time.
 		 * XXX: It is a POLA question whether calling tcp_drop right
 		 * away would be the really correct behavior instead.
 		 */
 		if (((tp->t_flags & TF_FORCEDATA) == 0 ||
 		    !tcp_timer_active(tp, TT_PERSIST)) &&
 		    ((flags & TH_SYN) == 0) &&
 		    (error != EPERM)) {
 			if (sack_rxmit) {
 				p->rxmit -= len;
 				tp->sackhint.sack_bytes_rexmit -= len;
 				KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
 				    ("sackhint bytes rtx >= 0"));
 			} else
 				tp->snd_nxt -= len;
 		}
 		SOCKBUF_UNLOCK_ASSERT(&so->so_snd);	/* Check gotos. */
 		switch (error) {
 		case EACCES:
 		case EPERM:
 			tp->t_softerror = error;
 			return (error);
 		case ENOBUFS:
 			TCP_XMIT_TIMER_ASSERT(tp, len, flags);
 			tp->snd_cwnd = tp->t_maxseg;
 			return (0);
 		case EMSGSIZE:
 			/*
 			 * For some reason the interface we used initially
 			 * to send segments changed to another or lowered
 			 * its MTU.
 			 * If TSO was active we either got an interface
 			 * without TSO capabilits or TSO was turned off.
 			 * If we obtained mtu from ip_output() then update
 			 * it and try again.
 			 */
 			if (tso)
 				tp->t_flags &= ~TF_TSO;
 			if (mtu != 0) {
 				tcp_mss_update(tp, -1, mtu, NULL, NULL);
 				goto again;
 			}
 			return (error);
 		case EHOSTDOWN:
 		case EHOSTUNREACH:
 		case ENETDOWN:
 		case ENETUNREACH:
 			if (TCPS_HAVERCVDSYN(tp->t_state)) {
 				tp->t_softerror = error;
 				return (0);
 			}
 			/* FALLTHROUGH */
 		default:
 			return (error);
 		}
 	}
 	TCPSTAT_INC(tcps_sndtotal);
 
 	/*
 	 * Data sent (as far as we can tell).
 	 * If this advertises a larger window than any other segment,
 	 * then remember the size of the advertised window.
 	 * Any pending ACK has now been sent.
 	 */
 	if (SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
 		tp->rcv_adv = tp->rcv_nxt + recwin;
 	tp->last_ack_sent = tp->rcv_nxt;
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
 	if (tcp_timer_active(tp, TT_DELACK))
 		tcp_timer_activate(tp, TT_DELACK, 0);
 #if 0
 	/*
 	 * This completely breaks TCP if newreno is turned on.  What happens
 	 * is that if delayed-acks are turned on on the receiver, this code
 	 * on the transmitter effectively destroys the TCP window, forcing
 	 * it to four packets (1.5Kx4 = 6K window).
 	 */
 	if (sendalot && --maxburst)
 		goto again;
 #endif
 	if (sendalot)
 		goto again;
 	return (0);
 }
 
 void
 tcp_setpersist(struct tcpcb *tp)
 {
 	int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
 	int tt;
 
 	tp->t_flags &= ~TF_PREVVALID;
 	if (tcp_timer_active(tp, TT_REXMT))
 		panic("tcp_setpersist: retransmit pending");
 	/*
 	 * Start/restart persistence timer.
 	 */
 	TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
 		      tcp_persmin, tcp_persmax);
 	tcp_timer_activate(tp, TT_PERSIST, tt);
 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
 		tp->t_rxtshift++;
 }
 
 /*
  * Insert TCP options according to the supplied parameters to the place
  * optp in a consistent way.  Can handle unaligned destinations.
  *
  * The order of the option processing is crucial for optimal packing and
  * alignment for the scarce option space.
  *
  * The optimal order for a SYN/SYN-ACK segment is:
  *   MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) +
  *   Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40.
  *
  * The SACK options should be last.  SACK blocks consume 8*n+2 bytes.
  * So a full size SACK blocks option is 34 bytes (with 4 SACK blocks).
  * At minimum we need 10 bytes (to generate 1 SACK block).  If both
  * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present,
  * we only have 10 bytes for SACK options (40 - (12 + 18)).
  */
 int
 tcp_addoptions(struct tcpopt *to, u_char *optp)
 {
 	u_int32_t mask, optlen = 0;
 
 	for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) {
 		if ((to->to_flags & mask) != mask)
 			continue;
 		if (optlen == TCP_MAXOLEN)
 			break;
 		switch (to->to_flags & mask) {
 		case TOF_MSS:
 			while (optlen % 4) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_MAXSEG)
 				continue;
 			optlen += TCPOLEN_MAXSEG;
 			*optp++ = TCPOPT_MAXSEG;
 			*optp++ = TCPOLEN_MAXSEG;
 			to->to_mss = htons(to->to_mss);
 			bcopy((u_char *)&to->to_mss, optp, sizeof(to->to_mss));
 			optp += sizeof(to->to_mss);
 			break;
 		case TOF_SCALE:
 			while (!optlen || optlen % 2 != 1) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_WINDOW)
 				continue;
 			optlen += TCPOLEN_WINDOW;
 			*optp++ = TCPOPT_WINDOW;
 			*optp++ = TCPOLEN_WINDOW;
 			*optp++ = to->to_wscale;
 			break;
 		case TOF_SACKPERM:
 			while (optlen % 2) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_SACK_PERMITTED)
 				continue;
 			optlen += TCPOLEN_SACK_PERMITTED;
 			*optp++ = TCPOPT_SACK_PERMITTED;
 			*optp++ = TCPOLEN_SACK_PERMITTED;
 			break;
 		case TOF_TS:
 			while (!optlen || optlen % 4 != 2) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_TIMESTAMP)
 				continue;
 			optlen += TCPOLEN_TIMESTAMP;
 			*optp++ = TCPOPT_TIMESTAMP;
 			*optp++ = TCPOLEN_TIMESTAMP;
 			to->to_tsval = htonl(to->to_tsval);
 			to->to_tsecr = htonl(to->to_tsecr);
 			bcopy((u_char *)&to->to_tsval, optp, sizeof(to->to_tsval));
 			optp += sizeof(to->to_tsval);
 			bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
 			optp += sizeof(to->to_tsecr);
 			break;
 		case TOF_SIGNATURE:
 			{
 			int siglen = TCPOLEN_SIGNATURE - 2;
 
 			while (!optlen || optlen % 4 != 2) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE) {
 				to->to_flags &= ~TOF_SIGNATURE;
 				continue;
 			}
 			optlen += TCPOLEN_SIGNATURE;
 			*optp++ = TCPOPT_SIGNATURE;
 			*optp++ = TCPOLEN_SIGNATURE;
 			to->to_signature = optp;
 			while (siglen--)
 				 *optp++ = 0;
 			break;
 			}
 		case TOF_SACK:
 			{
 			int sackblks = 0;
 			struct sackblk *sack = (struct sackblk *)to->to_sacks;
 			tcp_seq sack_seq;
 
 			while (!optlen || optlen % 4 != 2) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_SACKHDR + TCPOLEN_SACK)
 				continue;
 			optlen += TCPOLEN_SACKHDR;
 			*optp++ = TCPOPT_SACK;
 			sackblks = min(to->to_nsacks,
 					(TCP_MAXOLEN - optlen) / TCPOLEN_SACK);
 			*optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK;
 			while (sackblks--) {
 				sack_seq = htonl(sack->start);
 				bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
 				optp += sizeof(sack_seq);
 				sack_seq = htonl(sack->end);
 				bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
 				optp += sizeof(sack_seq);
 				optlen += TCPOLEN_SACK;
 				sack++;
 			}
 			TCPSTAT_INC(tcps_sack_send_blocks);
 			break;
 			}
 		case TOF_FASTOPEN:
 			{
 			int total_len;
 
 			/* XXX is there any point to aligning this option? */
 			total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
 			if (TCP_MAXOLEN - optlen < total_len) {
 				to->to_flags &= ~TOF_FASTOPEN;
 				continue;
 			}
 			*optp++ = TCPOPT_FAST_OPEN;
 			*optp++ = total_len;
 			if (to->to_tfo_len > 0) {
 				bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
 				optp += to->to_tfo_len;
 			}
 			optlen += total_len;
 			break;
 			}
 		default:
 			panic("%s: unknown TCP option type", __func__);
 			break;
 		}
 	}
 
 	/* Terminate and pad TCP options to a 4 byte boundary. */
 	if (optlen % 4) {
 		optlen += TCPOLEN_EOL;
 		*optp++ = TCPOPT_EOL;
 	}
 	/*
 	 * According to RFC 793 (STD0007):
 	 *   "The content of the header beyond the End-of-Option option
 	 *    must be header padding (i.e., zero)."
 	 *   and later: "The padding is composed of zeros."
 	 */
 	while (optlen % 4) {
 		optlen += TCPOLEN_PAD;
 		*optp++ = TCPOPT_PAD;
 	}
 
 	KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__));
 	return (optlen);
 }
 
 /*
  * This is a copy of m_copym(), taking the TSO segment size/limit
  * constraints into account, and advancing the sndptr as it goes.
  */
 struct mbuf *
 tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
     int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls)
 {
 #ifdef KERN_TLS
 	struct ktls_session *tls, *ntls;
 	struct mbuf *start __diagused;
 #endif
 	struct mbuf *n, **np;
 	struct mbuf *top;
 	int32_t off = off0;
 	int32_t len = *plen;
 	int32_t fragsize;
 	int32_t len_cp = 0;
 	int32_t *pkthdrlen;
 	uint32_t mlen, frags;
 	bool copyhdr;
 
 	KASSERT(off >= 0, ("tcp_m_copym, negative off %d", off));
 	KASSERT(len >= 0, ("tcp_m_copym, negative len %d", len));
 	if (off == 0 && m->m_flags & M_PKTHDR)
 		copyhdr = true;
 	else
 		copyhdr = false;
 	while (off > 0) {
 		KASSERT(m != NULL, ("tcp_m_copym, offset > size of mbuf chain"));
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		if ((sb) && (m == sb->sb_sndptr)) {
 			sb->sb_sndptroff += m->m_len;
 			sb->sb_sndptr = m->m_next;
 		}
 		m = m->m_next;
 	}
 	np = &top;
 	top = NULL;
 	pkthdrlen = NULL;
 #ifdef KERN_TLS
 	if (hw_tls && (m->m_flags & M_EXTPG))
 		tls = m->m_epg_tls;
 	else
 		tls = NULL;
 	start = m;
 #endif
 	while (len > 0) {
 		if (m == NULL) {
 			KASSERT(len == M_COPYALL,
 			    ("tcp_m_copym, length > size of mbuf chain"));
 			*plen = len_cp;
 			if (pkthdrlen != NULL)
 				*pkthdrlen = len_cp;
 			break;
 		}
 #ifdef KERN_TLS
 		if (hw_tls) {
 			if (m->m_flags & M_EXTPG)
 				ntls = m->m_epg_tls;
 			else
 				ntls = NULL;
 
 			/*
 			 * Avoid mixing TLS records with handshake
 			 * data or TLS records from different
 			 * sessions.
 			 */
 			if (tls != ntls) {
 				MPASS(m != start);
 				*plen = len_cp;
 				if (pkthdrlen != NULL)
 					*pkthdrlen = len_cp;
 				break;
 			}
 		}
 #endif
 		mlen = min(len, m->m_len - off);
 		if (seglimit) {
 			/*
 			 * For M_EXTPG mbufs, add 3 segments
 			 * + 1 in case we are crossing page boundaries
 			 * + 2 in case the TLS hdr/trailer are used
 			 * It is cheaper to just add the segments
 			 * than it is to take the cache miss to look
 			 * at the mbuf ext_pgs state in detail.
 			 */
 			if (m->m_flags & M_EXTPG) {
 				fragsize = min(segsize, PAGE_SIZE);
 				frags = 3;
 			} else {
 				fragsize = segsize;
 				frags = 0;
 			}
 
 			/* Break if we really can't fit anymore. */
 			if ((frags + 1) >= seglimit) {
 				*plen =	len_cp;
 				if (pkthdrlen != NULL)
 					*pkthdrlen = len_cp;
 				break;
 			}
 
 			/*
 			 * Reduce size if you can't copy the whole
 			 * mbuf. If we can't copy the whole mbuf, also
 			 * adjust len so the loop will end after this
 			 * mbuf.
 			 */
 			if ((frags + howmany(mlen, fragsize)) >= seglimit) {
 				mlen = (seglimit - frags - 1) * fragsize;
 				len = mlen;
 				*plen = len_cp + len;
 				if (pkthdrlen != NULL)
 					*pkthdrlen = *plen;
 			}
 			frags += howmany(mlen, fragsize);
 			if (frags == 0)
 				frags++;
 			seglimit -= frags;
 			KASSERT(seglimit > 0,
 			    ("%s: seglimit went too low", __func__));
 		}
 		if (copyhdr)
 			n = m_gethdr(M_NOWAIT, m->m_type);
 		else
 			n = m_get(M_NOWAIT, m->m_type);
 		*np = n;
 		if (n == NULL)
 			goto nospace;
 		if (copyhdr) {
 			if (!m_dup_pkthdr(n, m, M_NOWAIT))
 				goto nospace;
 			if (len == M_COPYALL)
 				n->m_pkthdr.len -= off0;
 			else
 				n->m_pkthdr.len = len;
 			pkthdrlen = &n->m_pkthdr.len;
 			copyhdr = false;
 		}
 		n->m_len = mlen;
 		len_cp += n->m_len;
 		if (m->m_flags & (M_EXT|M_EXTPG)) {
 			n->m_data = m->m_data + off;
 			mb_dupcl(n, m);
 		} else
 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
 			    (u_int)n->m_len);
 
 		if (sb && (sb->sb_sndptr == m) &&
 		    ((n->m_len + off) >= m->m_len) && m->m_next) {
 			sb->sb_sndptroff += m->m_len;
 			sb->sb_sndptr = m->m_next;
 		}
 		off = 0;
 		if (len != M_COPYALL) {
 			len -= n->m_len;
 		}
 		m = m->m_next;
 		np = &n->m_next;
 	}
 	return (top);
 nospace:
 	m_freem(top);
 	return (NULL);
 }
 
 void
 tcp_sndbuf_autoscale(struct tcpcb *tp, struct socket *so, uint32_t sendwin)
 {
 
 	/*
 	 * Automatic sizing of send socket buffer.  Often the send buffer
 	 * size is not optimally adjusted to the actual network conditions
 	 * at hand (delay bandwidth product).  Setting the buffer size too
 	 * small limits throughput on links with high bandwidth and high
 	 * delay (eg. trans-continental/oceanic links).  Setting the
 	 * buffer size too big consumes too much real kernel memory,
 	 * especially with many connections on busy servers.
 	 *
 	 * The criteria to step up the send buffer one notch are:
 	 *  1. receive window of remote host is larger than send buffer
 	 *     (with a fudge factor of 5/4th);
 	 *  2. send buffer is filled to 7/8th with data (so we actually
 	 *     have data to make use of it);
 	 *  3. send buffer fill has not hit maximal automatic size;
 	 *  4. our send window (slow start and cogestion controlled) is
 	 *     larger than sent but unacknowledged data in send buffer.
 	 *
 	 * The remote host receive window scaling factor may limit the
 	 * growing of the send buffer before it reaches its allowed
 	 * maximum.
 	 *
 	 * It scales directly with slow start or congestion window
 	 * and does at most one step per received ACK.  This fast
 	 * scaling has the drawback of growing the send buffer beyond
 	 * what is strictly necessary to make full use of a given
 	 * delay*bandwidth product.  However testing has shown this not
 	 * to be much of an problem.  At worst we are trading wasting
 	 * of available bandwidth (the non-use of it) for wasting some
 	 * socket buffer memory.
 	 *
 	 * TODO: Shrink send buffer during idle periods together
 	 * with congestion window.  Requires another timer.  Has to
 	 * wait for upcoming tcp timer rewrite.
 	 *
 	 * XXXGL: should there be used sbused() or sbavail()?
 	 */
 	if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
 		int lowat;
 
 		lowat = V_tcp_sendbuf_auto_lowat ? so->so_snd.sb_lowat : 0;
 		if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat - lowat &&
 		    sbused(&so->so_snd) >=
 		    (so->so_snd.sb_hiwat / 8 * 7) - lowat &&
 		    sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
 		    sendwin >= (sbused(&so->so_snd) -
 		    (tp->snd_nxt - tp->snd_una))) {
-			if (!sbreserve_locked(&so->so_snd,
+			if (!sbreserve_locked(so, SO_SND,
 			    min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc,
-			     V_tcp_autosndbuf_max), so, curthread))
+			     V_tcp_autosndbuf_max), curthread))
 				so->so_snd.sb_flags &= ~SB_AUTOSIZE;
 		}
 	}
 }
diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
index c822276185cb..def5edb98983 100644
--- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
+++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
@@ -1,1977 +1,1977 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *      The Regents of the University of California.  All rights reserved.
  * Copyright (c) 2004 The FreeBSD Foundation.  All rights reserved.
  * Copyright (c) 2004-2008 Robert N. M. Watson.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Excerpts taken from tcp_subr.c, tcp_usrreq.c, uipc_socket.c
  */
 
 /*
  *
  * Copyright (c) 2010 Isilon Systems, Inc.
  * Copyright (c) 2010 iX Systems, Inc.
  * Copyright (c) 2010 Panasas, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include "sdp.h"
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/vnet.h>
 #include <sys/sysctl.h>
 
 uma_zone_t	sdp_zone;
 struct rwlock	sdp_lock;
 LIST_HEAD(, sdp_sock) sdp_list;
 
 struct workqueue_struct *rx_comp_wq;
 
 RW_SYSINIT(sdplockinit, &sdp_lock, "SDP lock");
 #define	SDP_LIST_WLOCK()	rw_wlock(&sdp_lock)
 #define	SDP_LIST_RLOCK()	rw_rlock(&sdp_lock)
 #define	SDP_LIST_WUNLOCK()	rw_wunlock(&sdp_lock)
 #define	SDP_LIST_RUNLOCK()	rw_runlock(&sdp_lock)
 #define	SDP_LIST_WLOCK_ASSERT()	rw_assert(&sdp_lock, RW_WLOCKED)
 #define	SDP_LIST_RLOCK_ASSERT()	rw_assert(&sdp_lock, RW_RLOCKED)
 #define	SDP_LIST_LOCK_ASSERT()	rw_assert(&sdp_lock, RW_LOCKED)
 
 MALLOC_DEFINE(M_SDP, "sdp", "Sockets Direct Protocol");
 
 static void sdp_stop_keepalive_timer(struct socket *so);
 
 /*
  * SDP protocol interface to socket abstraction.
  */
 /*
  * sdp_sendspace and sdp_recvspace are the default send and receive window
  * sizes, respectively.
  */
 u_long	sdp_sendspace = 1024*32;
 u_long	sdp_recvspace = 1024*64;
 
 static int sdp_count;
 
 /*
  * Disable async. CMA events for sockets which are being torn down.
  */
 static void
 sdp_destroy_cma(struct sdp_sock *ssk)
 {
 
 	if (ssk->id == NULL)
 		return;
 	rdma_destroy_id(ssk->id);
 	ssk->id = NULL;
 }
 
 static int
 sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred)
 {
 	struct sockaddr_in *sin;
 	struct sockaddr_in null;
 	int error;
 
 	SDP_WLOCK_ASSERT(ssk);
 
 	if (ssk->lport != 0 || ssk->laddr != INADDR_ANY)
 		return (EINVAL);
 	/* rdma_bind_addr handles bind races.  */
 	SDP_WUNLOCK(ssk);
 	if (ssk->id == NULL)
 		ssk->id = rdma_create_id(&init_net, sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC);
 	if (ssk->id == NULL) {
 		SDP_WLOCK(ssk);
 		return (ENOMEM);
 	}
 	if (nam == NULL) {
 		null.sin_family = AF_INET;
 		null.sin_len = sizeof(null);
 		null.sin_addr.s_addr = INADDR_ANY;
 		null.sin_port = 0;
 		bzero(&null.sin_zero, sizeof(null.sin_zero));
 		nam = (struct sockaddr *)&null;
 	}
 	error = -rdma_bind_addr(ssk->id, nam);
 	SDP_WLOCK(ssk);
 	if (error == 0) {
 		sin = (struct sockaddr_in *)&ssk->id->route.addr.src_addr;
 		ssk->laddr = sin->sin_addr.s_addr;
 		ssk->lport = sin->sin_port;
 	} else
 		sdp_destroy_cma(ssk);
 	return (error);
 }
 
 static void
 sdp_pcbfree(struct sdp_sock *ssk)
 {
 
 	KASSERT(ssk->socket == NULL, ("ssk %p socket still attached", ssk));
 	KASSERT((ssk->flags & SDP_DESTROY) == 0,
 	    ("ssk %p already destroyed", ssk));
 
 	sdp_dbg(ssk->socket, "Freeing pcb");
 	SDP_WLOCK_ASSERT(ssk);
 	ssk->flags |= SDP_DESTROY;
 	SDP_WUNLOCK(ssk);
 	SDP_LIST_WLOCK();
 	sdp_count--;
 	LIST_REMOVE(ssk, list);
 	SDP_LIST_WUNLOCK();
 	crfree(ssk->cred);
 	ssk->qp_active = 0;
 	if (ssk->qp) {
 		ib_destroy_qp(ssk->qp);
 		ssk->qp = NULL;
 	}
 	sdp_tx_ring_destroy(ssk);
 	sdp_rx_ring_destroy(ssk);
 	sdp_destroy_cma(ssk);
 	rw_destroy(&ssk->rx_ring.destroyed_lock);
 	rw_destroy(&ssk->lock);
 	uma_zfree(sdp_zone, ssk);
 }
 
 /*
  * Common routines to return a socket address.
  */
 static struct sockaddr *
 sdp_sockaddr(in_port_t port, struct in_addr *addr_p)
 {
 	struct sockaddr_in *sin;
 
 	sin = malloc(sizeof *sin, M_SONAME,
 		M_WAITOK | M_ZERO);
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = *addr_p;
 	sin->sin_port = port;
 
 	return (struct sockaddr *)sin;
 }
 
 static int
 sdp_getsockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct sdp_sock *ssk;
 	struct in_addr addr;
 	in_port_t port;
 
 	ssk = sdp_sk(so);
 	SDP_RLOCK(ssk);
 	port = ssk->lport;
 	addr.s_addr = ssk->laddr;
 	SDP_RUNLOCK(ssk);
 
 	*nam = sdp_sockaddr(port, &addr);
 	return 0;
 }
 
 static int
 sdp_getpeeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct sdp_sock *ssk;
 	struct in_addr addr;
 	in_port_t port;
 
 	ssk = sdp_sk(so);
 	SDP_RLOCK(ssk);
 	port = ssk->fport;
 	addr.s_addr = ssk->faddr;
 	SDP_RUNLOCK(ssk);
 
 	*nam = sdp_sockaddr(port, &addr);
 	return 0;
 }
 
 static void
 sdp_pcbnotifyall(struct in_addr faddr, int errno,
     struct sdp_sock *(*notify)(struct sdp_sock *, int))
 {
 	struct sdp_sock *ssk, *ssk_temp;
 
 	SDP_LIST_WLOCK();
 	LIST_FOREACH_SAFE(ssk, &sdp_list, list, ssk_temp) {
 		SDP_WLOCK(ssk);
 		if (ssk->faddr != faddr.s_addr || ssk->socket == NULL) {
 			SDP_WUNLOCK(ssk);
 			continue;
 		}
 		if ((ssk->flags & SDP_DESTROY) == 0)
 			if ((*notify)(ssk, errno))
 				SDP_WUNLOCK(ssk);
 	}
 	SDP_LIST_WUNLOCK();
 }
 
 #if 0
 static void
 sdp_apply_all(void (*func)(struct sdp_sock *, void *), void *arg)
 {
 	struct sdp_sock *ssk;
 
 	SDP_LIST_RLOCK();
 	LIST_FOREACH(ssk, &sdp_list, list) {
 		SDP_WLOCK(ssk);
 		func(ssk, arg);
 		SDP_WUNLOCK(ssk);
 	}
 	SDP_LIST_RUNLOCK();
 }
 #endif
 
 static void
 sdp_output_reset(struct sdp_sock *ssk)
 {
 	struct rdma_cm_id *id;
 
 	SDP_WLOCK_ASSERT(ssk);
 	if (ssk->id) {
 		id = ssk->id;
 		ssk->qp_active = 0;
 		SDP_WUNLOCK(ssk);
 		rdma_disconnect(id);
 		SDP_WLOCK(ssk);
 	}
 	ssk->state = TCPS_CLOSED;
 }
 
 /*
  * Attempt to close a SDP socket, marking it as dropped, and freeing
  * the socket if we hold the only reference.
  */
 static struct sdp_sock *
 sdp_closed(struct sdp_sock *ssk)
 {
 	struct socket *so;
 
 	SDP_WLOCK_ASSERT(ssk);
 
 	ssk->flags |= SDP_DROPPED;
 	so = ssk->socket;
 	soisdisconnected(so);
 	if (ssk->flags & SDP_SOCKREF) {
 		KASSERT(so->so_state & SS_PROTOREF,
 		    ("sdp_closed: !SS_PROTOREF"));
 		ssk->flags &= ~SDP_SOCKREF;
 		SDP_WUNLOCK(ssk);
 		SOCK_LOCK(so);
 		so->so_state &= ~SS_PROTOREF;
 		sofree(so);
 		return (NULL);
 	}
 	return (ssk);
 }
 
 /*
  * Perform timer based shutdowns which can not operate in
  * callout context.
  */
 static void
 sdp_shutdown_task(void *data, int pending)
 {
 	struct sdp_sock *ssk;
 
 	ssk = data;
 	SDP_WLOCK(ssk);
 	/*
 	 * I don't think this can race with another call to pcbfree()
 	 * because SDP_TIMEWAIT protects it.  SDP_DESTROY may be redundant.
 	 */
 	if (ssk->flags & SDP_DESTROY)
 		panic("sdp_shutdown_task: Racing with pcbfree for ssk %p",
 		    ssk);
 	if (ssk->flags & SDP_DISCON)
 		sdp_output_reset(ssk);
 	/* We have to clear this so sdp_detach() will call pcbfree(). */
 	ssk->flags &= ~(SDP_TIMEWAIT | SDP_DREQWAIT);
 	if ((ssk->flags & SDP_DROPPED) == 0 &&
 	    sdp_closed(ssk) == NULL)
 		return;
 	if (ssk->socket == NULL) {
 		sdp_pcbfree(ssk);
 		return;
 	}
 	SDP_WUNLOCK(ssk);
 }
 
 /*
  * 2msl has expired, schedule the shutdown task.
  */
 static void
 sdp_2msl_timeout(void *data)
 {
 	struct sdp_sock *ssk;
 
 	ssk = data;
 	/* Callout canceled. */
         if (!callout_active(&ssk->keep2msl))
 		goto out;
         callout_deactivate(&ssk->keep2msl);
 	/* Should be impossible, defensive programming. */
 	if ((ssk->flags & SDP_TIMEWAIT) == 0)
 		goto out;
 	taskqueue_enqueue(taskqueue_thread, &ssk->shutdown_task);
 out:
 	SDP_WUNLOCK(ssk);
 	return;
 }
 
 /*
  * Schedule the 2msl wait timer.
  */
 static void
 sdp_2msl_wait(struct sdp_sock *ssk)
 {
 
 	SDP_WLOCK_ASSERT(ssk);
 	ssk->flags |= SDP_TIMEWAIT;
 	ssk->state = TCPS_TIME_WAIT;
 	soisdisconnected(ssk->socket);
 	callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk);
 }
 
 /*
  * Timed out waiting for the final fin/ack from rdma_disconnect().
  */
 static void
 sdp_dreq_timeout(void *data)
 {
 	struct sdp_sock *ssk;
 
 	ssk = data;
 	/* Callout canceled. */
         if (!callout_active(&ssk->keep2msl))
 		goto out;
 	/* Callout rescheduled, probably as a different timer. */
 	if (callout_pending(&ssk->keep2msl))
 		goto out;
         callout_deactivate(&ssk->keep2msl);
 	if (ssk->state != TCPS_FIN_WAIT_1 && ssk->state != TCPS_LAST_ACK)
 		goto out;
 	if ((ssk->flags & SDP_DREQWAIT) == 0)
 		goto out;
 	ssk->flags &= ~SDP_DREQWAIT;
 	ssk->flags |= SDP_DISCON;
 	sdp_2msl_wait(ssk);
 	ssk->qp_active = 0;
 out:
 	SDP_WUNLOCK(ssk);
 }
 
 /*
  * Received the final fin/ack.  Cancel the 2msl.
  */
 void
 sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
 {
 	sdp_dbg(ssk->socket, "cancelling dreq wait timeout\n");
 	ssk->flags &= ~SDP_DREQWAIT;
 	sdp_2msl_wait(ssk);
 }
 
 static int
 sdp_init_sock(struct socket *sk)
 {
 	struct sdp_sock *ssk = sdp_sk(sk);
 
 	sdp_dbg(sk, "%s\n", __func__);
 
 	callout_init_rw(&ssk->keep2msl, &ssk->lock, CALLOUT_RETURNUNLOCKED);
 	TASK_INIT(&ssk->shutdown_task, 0, sdp_shutdown_task, ssk);
 #ifdef SDP_ZCOPY
 	INIT_DELAYED_WORK(&ssk->srcavail_cancel_work, srcavail_cancel_timeout);
 	ssk->zcopy_thresh = -1; /* use global sdp_zcopy_thresh */
 	ssk->tx_ring.rdma_inflight = NULL;
 #endif
 	atomic_set(&ssk->mseq_ack, 0);
 	sdp_rx_ring_init(ssk);
 	ssk->tx_ring.buffer = NULL;
 
 	return 0;
 }
 
 /*
  * Allocate an sdp_sock for the socket and reserve socket buffer space.
  */
 static int
 sdp_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct sdp_sock *ssk;
 	int error;
 
 	ssk = sdp_sk(so);
 	KASSERT(ssk == NULL, ("sdp_attach: ssk already set on so %p", so));
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		error = soreserve(so, sdp_sendspace, sdp_recvspace);
 		if (error)
 			return (error);
 	}
 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
 	so->so_snd.sb_flags |= SB_AUTOSIZE;
 	ssk = uma_zalloc(sdp_zone, M_NOWAIT | M_ZERO);
 	if (ssk == NULL)
 		return (ENOBUFS);
 	rw_init(&ssk->lock, "sdpsock");
 	ssk->socket = so;
 	ssk->cred = crhold(so->so_cred);
 	so->so_pcb = (caddr_t)ssk;
 	sdp_init_sock(so);
 	ssk->flags = 0;
 	ssk->qp_active = 0;
 	ssk->state = TCPS_CLOSED;
 	mbufq_init(&ssk->rxctlq, INT_MAX);
 	SDP_LIST_WLOCK();
 	LIST_INSERT_HEAD(&sdp_list, ssk, list);
 	sdp_count++;
 	SDP_LIST_WUNLOCK();
 
 	return (0);
 }
 
 /*
  * Detach SDP from the socket, potentially leaving it around for the
  * timewait to expire.
  */
 static void
 sdp_detach(struct socket *so)
 {
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	KASSERT(ssk->socket != NULL, ("sdp_detach: socket is NULL"));
 	ssk->socket->so_pcb = NULL;
 	ssk->socket = NULL;
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DREQWAIT))
 		SDP_WUNLOCK(ssk);
 	else if (ssk->flags & SDP_DROPPED || ssk->state < TCPS_SYN_SENT)
 		sdp_pcbfree(ssk);
 	else
 		panic("sdp_detach: Unexpected state, ssk %p.\n", ssk);
 }
 
 /*
  * Allocate a local address for the socket.
  */
 static int
 sdp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error = 0;
 	struct sdp_sock *ssk;
 	struct sockaddr_in *sin;
 
 	sin = (struct sockaddr_in *)nam;
 	if (sin->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
 	if (nam->sa_len != sizeof(*sin))
 		return (EINVAL);
 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 		return (EAFNOSUPPORT);
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		error = EINVAL;
 		goto out;
 	}
 	error = sdp_pcbbind(ssk, nam, td->td_ucred);
 out:
 	SDP_WUNLOCK(ssk);
 
 	return (error);
 }
 
 /*
  * Prepare to accept connections.
  */
 static int
 sdp_listen(struct socket *so, int backlog, struct thread *td)
 {
 	int error = 0;
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		error = EINVAL;
 		goto out;
 	}
 	if (error == 0 && ssk->lport == 0)
 		error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
 	SOCK_LOCK(so);
 	if (error == 0)
 		error = solisten_proto_check(so);
 	if (error == 0) {
 		solisten_proto(so, backlog);
 		ssk->state = TCPS_LISTEN;
 	}
 	SOCK_UNLOCK(so);
 
 out:
 	SDP_WUNLOCK(ssk);
 	if (error == 0)
 		error = -rdma_listen(ssk->id, backlog);
 	return (error);
 }
 
 /*
  * Initiate a SDP connection to nam.
  */
 static int
 sdp_start_connect(struct sdp_sock *ssk, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in src;
 	struct socket *so;
 	int error;
 
 	so = ssk->socket;
 
 	SDP_WLOCK_ASSERT(ssk);
 	if (ssk->lport == 0) {
 		error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
 		if (error)
 			return error;
 	}
 	src.sin_family = AF_INET;
 	src.sin_len = sizeof(src);
 	bzero(&src.sin_zero, sizeof(src.sin_zero));
 	src.sin_port = ssk->lport;
 	src.sin_addr.s_addr = ssk->laddr;
 	soisconnecting(so);
 	SDP_WUNLOCK(ssk);
 	error = -rdma_resolve_addr(ssk->id, (struct sockaddr *)&src, nam,
 	    SDP_RESOLVE_TIMEOUT);
 	SDP_WLOCK(ssk);
 	if (error == 0)
 		ssk->state = TCPS_SYN_SENT;
 
 	return 0;
 }
 
 /*
  * Initiate SDP connection.
  */
 static int
 sdp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error = 0;
 	struct sdp_sock *ssk;
 	struct sockaddr_in *sin;
 
 	sin = (struct sockaddr_in *)nam;
 	if (nam->sa_len != sizeof(*sin))
 		return (EINVAL);
 	if (sin->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 		return (EAFNOSUPPORT);
 	if ((error = prison_remote_ip4(td->td_ucred, &sin->sin_addr)) != 0)
 		return (error);
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
 		error = EINVAL;
 	else
 		error = sdp_start_connect(ssk, nam, td);
 	SDP_WUNLOCK(ssk);
 	return (error);
 }
 
 /*
  * Drop a SDP socket, reporting
  * the specified error.  If connection is synchronized,
  * then send a RST to peer.
  */
 static struct sdp_sock *
 sdp_drop(struct sdp_sock *ssk, int errno)
 {
 	struct socket *so;
 
 	SDP_WLOCK_ASSERT(ssk);
 	so = ssk->socket;
 	if (TCPS_HAVERCVDSYN(ssk->state))
 		sdp_output_reset(ssk);
 	if (errno == ETIMEDOUT && ssk->softerror)
 		errno = ssk->softerror;
 	so->so_error = errno;
 	return (sdp_closed(ssk));
 }
 
 /*
  * User issued close, and wish to trail through shutdown states:
  * if never received SYN, just forget it.  If got a SYN from peer,
  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
  * If already got a FIN from peer, then almost done; go to LAST_ACK
  * state.  In all other cases, have already sent FIN to peer (e.g.
  * after PRU_SHUTDOWN), and just have to play tedious game waiting
  * for peer to send FIN or not respond to keep-alives, etc.
  * We can let the user exit from the close as soon as the FIN is acked.
  */
 static void
 sdp_usrclosed(struct sdp_sock *ssk)
 {
 
 	SDP_WLOCK_ASSERT(ssk);
 
 	switch (ssk->state) {
 	case TCPS_LISTEN:
 		ssk->state = TCPS_CLOSED;
 		SDP_WUNLOCK(ssk);
 		sdp_destroy_cma(ssk);
 		SDP_WLOCK(ssk);
 		/* FALLTHROUGH */
 	case TCPS_CLOSED:
 		ssk = sdp_closed(ssk);
 		/*
 		 * sdp_closed() should never return NULL here as the socket is
 		 * still open.
 		 */
 		KASSERT(ssk != NULL,
 		    ("sdp_usrclosed: sdp_closed() returned NULL"));
 		break;
 
 	case TCPS_SYN_SENT:
 		/* FALLTHROUGH */
 	case TCPS_SYN_RECEIVED:
 		ssk->flags |= SDP_NEEDFIN;
 		break;
 
 	case TCPS_ESTABLISHED:
 		ssk->flags |= SDP_NEEDFIN;
 		ssk->state = TCPS_FIN_WAIT_1;
 		break;
 
 	case TCPS_CLOSE_WAIT:
 		ssk->state = TCPS_LAST_ACK;
 		break;
 	}
 	if (ssk->state >= TCPS_FIN_WAIT_2) {
 		/* Prevent the connection hanging in FIN_WAIT_2 forever. */
 		if (ssk->state == TCPS_FIN_WAIT_2)
 			sdp_2msl_wait(ssk);
 		else
 			soisdisconnected(ssk->socket);
 	}
 }
 
 static void
 sdp_output_disconnect(struct sdp_sock *ssk)
 {
 
 	SDP_WLOCK_ASSERT(ssk);
 	callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT,
 	    sdp_dreq_timeout, ssk);
 	ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT;
 	sdp_post_sends(ssk, M_NOWAIT);
 }
 
 /*
  * Initiate or continue a disconnect.
  * If embryonic state, just send reset (once).
  * If in ``let data drain'' option and linger null, just drop.
  * Otherwise (hard), mark socket disconnecting and drop
  * current input data; switch states based on user close, and
  * send segment to peer (with FIN).
  */
 static void
 sdp_start_disconnect(struct sdp_sock *ssk)
 {
 	struct socket *so;
 	int unread;
 
 	so = ssk->socket;
 	SDP_WLOCK_ASSERT(ssk);
 	sdp_stop_keepalive_timer(so);
 	/*
 	 * Neither sdp_closed() nor sdp_drop() should return NULL, as the
 	 * socket is still open.
 	 */
 	if (ssk->state < TCPS_ESTABLISHED) {
 		ssk = sdp_closed(ssk);
 		KASSERT(ssk != NULL,
 		    ("sdp_start_disconnect: sdp_close() returned NULL"));
 	} else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
 		ssk = sdp_drop(ssk, 0);
 		KASSERT(ssk != NULL,
 		    ("sdp_start_disconnect: sdp_drop() returned NULL"));
 	} else {
 		soisdisconnecting(so);
 		unread = sbused(&so->so_rcv);
 		sbflush(&so->so_rcv);
 		sdp_usrclosed(ssk);
 		if (!(ssk->flags & SDP_DROPPED)) {
 			if (unread)
 				sdp_output_reset(ssk);
 			else
 				sdp_output_disconnect(ssk);
 		}
 	}
 }
 
 /*
  * User initiated disconnect.
  */
 static int
 sdp_disconnect(struct socket *so)
 {
 	struct sdp_sock *ssk;
 	int error = 0;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		error = ECONNRESET;
 		goto out;
 	}
 	sdp_start_disconnect(ssk);
 out:
 	SDP_WUNLOCK(ssk);
 	return (error);
 }
 
 /*
  * Accept a connection.  Essentially all the work is done at higher levels;
  * just return the address of the peer, storing through addr.
  *
  *
  * XXX This is broken XXX
  * 
  * The rationale for acquiring the sdp lock here is somewhat complicated,
  * and is described in detail in the commit log entry for r175612.  Acquiring
  * it delays an accept(2) racing with sonewconn(), which inserts the socket
  * before the address/port fields are initialized.  A better fix would
  * prevent the socket from being placed in the listen queue until all fields
  * are fully initialized.
  */
 static int
 sdp_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct sdp_sock *ssk = NULL;
 	struct in_addr addr;
 	in_port_t port;
 	int error;
 
 	if (so->so_state & SS_ISDISCONNECTED)
 		return (ECONNABORTED);
 
 	port = 0;
 	addr.s_addr = 0;
 	error = 0;
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		error = ECONNABORTED;
 		goto out;
 	}
 	port = ssk->fport;
 	addr.s_addr = ssk->faddr;
 out:
 	SDP_WUNLOCK(ssk);
 	if (error == 0)
 		*nam = sdp_sockaddr(port, &addr);
 	return error;
 }
 
 /*
  * Mark the connection as being incapable of further output.
  */
 static int
 sdp_shutdown(struct socket *so)
 {
 	int error = 0;
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		error = ECONNRESET;
 		goto out;
 	}
 	socantsendmore(so);
 	sdp_usrclosed(ssk);
 	if (!(ssk->flags & SDP_DROPPED))
 		sdp_output_disconnect(ssk);
 
 out:
 	SDP_WUNLOCK(ssk);
 
 	return (error);
 }
 
 static void
 sdp_append(struct sdp_sock *ssk, struct sockbuf *sb, struct mbuf *mb, int cnt)
 {
 	struct mbuf *n;
 	int ncnt;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	SBLASTRECORDCHK(sb);
 	KASSERT(mb->m_flags & M_PKTHDR,
 		("sdp_append: %p Missing packet header.\n", mb));
 	n = sb->sb_lastrecord;
 	/*
 	 * If the queue is empty just set all pointers and proceed.
 	 */
 	if (n == NULL) {
 		sb->sb_lastrecord = sb->sb_mb = sb->sb_sndptr = mb;
 		for (; mb; mb = mb->m_next) {
 	                sb->sb_mbtail = mb;
 			sballoc(sb, mb);
 		}
 		return;
 	}
 	/*
 	 * Count the number of mbufs in the current tail.
 	 */
 	for (ncnt = 0; n->m_next; n = n->m_next)
 		ncnt++;
 	n = sb->sb_lastrecord;
 	/*
 	 * If the two chains can fit in a single sdp packet and
 	 * the last record has not been sent yet (WRITABLE) coalesce
 	 * them.  The lastrecord remains the same but we must strip the
 	 * packet header and then let sbcompress do the hard part.
 	 */
 	if (M_WRITABLE(n) && ncnt + cnt < SDP_MAX_SEND_SGES &&
 	    n->m_pkthdr.len + mb->m_pkthdr.len - SDP_HEAD_SIZE <
 	    ssk->xmit_size_goal) {
 		m_adj(mb, SDP_HEAD_SIZE);
 		n->m_pkthdr.len += mb->m_pkthdr.len;
 		n->m_flags |= mb->m_flags & (M_PUSH | M_URG);
 		m_demote(mb, 1, 0);
 		sbcompress(sb, mb, sb->sb_mbtail);
 		return;
 	}
 	/*
 	 * Not compressible, just append to the end and adjust counters.
 	 */
 	sb->sb_lastrecord->m_flags |= M_PUSH;
 	sb->sb_lastrecord->m_nextpkt = mb;
 	sb->sb_lastrecord = mb;
 	if (sb->sb_sndptr == NULL)
 		sb->sb_sndptr = mb;
 	for (; mb; mb = mb->m_next) {
 		sb->sb_mbtail = mb;
 		sballoc(sb, mb);
 	}
 }
 
 /*
  * Do a send by putting data in output queue and updating urgent
  * marker if URG set.  Possibly send more data.  Unlike the other
  * pru_*() routines, the mbuf chains are our responsibility.  We
  * must either enqueue them or free them.  The other pru_* routines
  * generally are caller-frees.
  *
  * This comes from sendfile, normal sends will come from sdp_sosend().
  */
 static int
 sdp_send(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	struct sdp_sock *ssk;
 	struct mbuf *n;
 	int error;
 	int cnt;
 
 	if (nam != NULL) {
 		if (nam->sa_family != AF_INET) {
 			if (control)
 				m_freem(control);
 			m_freem(m);
 			return (EAFNOSUPPORT);
 		}
 		if (nam->sa_len != sizeof(struct sockaddr_in)) {
 			if (control)
 				m_freem(control);
 			m_freem(m);
 			return (EINVAL);
 		}
 	}
 
 	error = 0;
 	ssk = sdp_sk(so);
 	KASSERT(m->m_flags & M_PKTHDR,
 	    ("sdp_send: %p no packet header", m));
 	M_PREPEND(m, SDP_HEAD_SIZE, M_WAITOK);
 	mtod(m, struct sdp_bsdh *)->mid = SDP_MID_DATA; 
 	for (n = m, cnt = 0; n->m_next; n = n->m_next)
 		cnt++;
 	if (cnt > SDP_MAX_SEND_SGES) {
 		n = m_collapse(m, M_WAITOK, SDP_MAX_SEND_SGES);
 		if (n == NULL) {
 			m_freem(m);
 			return (EMSGSIZE);
 		}
 		m = n;
 		for (cnt = 0; n->m_next; n = n->m_next)
 			cnt++;
 	}
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		if (control)
 			m_freem(control);
 		if (m)
 			m_freem(m);
 		error = ECONNRESET;
 		goto out;
 	}
 	if (control) {
 		/* SDP doesn't support control messages. */
 		if (control->m_len) {
 			m_freem(control);
 			if (m)
 				m_freem(m);
 			error = EINVAL;
 			goto out;
 		}
 		m_freem(control);	/* empty control, just free it */
 	}
 	if (!(flags & PRUS_OOB)) {
 		SOCKBUF_LOCK(&so->so_snd);
 		sdp_append(ssk, &so->so_snd, m, cnt);
 		SOCKBUF_UNLOCK(&so->so_snd);
 		if (nam && ssk->state < TCPS_SYN_SENT) {
 			/*
 			 * Do implied connect if not yet connected.
 			 */
 			error = sdp_start_connect(ssk, nam, td);
 			if (error)
 				goto out;
 		}
 		if (flags & PRUS_EOF) {
 			/*
 			 * Close the send side of the connection after
 			 * the data is sent.
 			 */
 			socantsendmore(so);
 			sdp_usrclosed(ssk);
 			if (!(ssk->flags & SDP_DROPPED))
 				sdp_output_disconnect(ssk);
 		} else if (!(ssk->flags & SDP_DROPPED) &&
 		    !(flags & PRUS_MORETOCOME))
 			sdp_post_sends(ssk, M_NOWAIT);
 		SDP_WUNLOCK(ssk);
 		return (0);
 	} else {
 		SOCKBUF_LOCK(&so->so_snd);
 		if (sbspace(&so->so_snd) < -512) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			m_freem(m);
 			error = ENOBUFS;
 			goto out;
 		}
 		/*
 		 * According to RFC961 (Assigned Protocols),
 		 * the urgent pointer points to the last octet
 		 * of urgent data.  We continue, however,
 		 * to consider it to indicate the first octet
 		 * of data past the urgent section.
 		 * Otherwise, snd_up should be one lower.
 		 */
 		m->m_flags |= M_URG | M_PUSH;
 		sdp_append(ssk, &so->so_snd, m, cnt);
 		SOCKBUF_UNLOCK(&so->so_snd);
 		if (nam && ssk->state < TCPS_SYN_SENT) {
 			/*
 			 * Do implied connect if not yet connected.
 			 */
 			error = sdp_start_connect(ssk, nam, td);
 			if (error)
 				goto out;
 		}
 		sdp_post_sends(ssk, M_NOWAIT);
 		SDP_WUNLOCK(ssk);
 		return (0);
 	}
 out:
 	SDP_WUNLOCK(ssk);
 	return (error);
 }
 
 /*
  * Send on a socket.  If send must go all at once and message is larger than
  * send buffering, then hard error.  Lock against other senders.  If must go
  * all at once and not enough room now, then inform user that this would
  * block and do nothing.  Otherwise, if nonblocking, send as much as
  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
  * in mbuf chain must be small enough to send all at once.
  *
  * Returns nonzero on error, timeout or signal; callers must check for short
  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
  * on return.
  */
 static int
 sdp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	struct sdp_sock *ssk;
 	long space, resid;
 	int atomic;
 	int error;
 	int copy;
 
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	atomic = top != NULL;
 	if (control != NULL) {
 		if (control->m_len) {
 			m_freem(control);
 			if (top)
 				m_freem(top);
 			return (EINVAL);
 		}
 		m_freem(control);
 		control = NULL;
 	}
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 *
 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 	 * type sockets since that's an error.
 	 */
 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
 		error = EINVAL;
 		goto out;
 	}
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 
 	ssk = sdp_sk(so);
 	error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 
 restart:
 	do {
 		SOCKBUF_LOCK(&so->so_snd);
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EPIPE;
 			goto release;
 		}
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0 && addr == NULL) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = ENOTCONN;
 			goto release;
 		}
 		space = sbspace(&so->so_snd);
 		if (flags & MSG_OOB)
 			space += 1024;
 		if (atomic && resid > ssk->xmit_size_goal - SDP_HEAD_SIZE) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EMSGSIZE;
 			goto release;
 		}
 		if (space < resid &&
 		    (atomic || space < so->so_snd.sb_lowat)) {
 			if ((so->so_state & SS_NBIO) ||
 			    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = EWOULDBLOCK;
 				goto release;
 			}
-			error = sbwait(&so->so_snd);
+			error = sbwait(so, SO_SND);
 			SOCKBUF_UNLOCK(&so->so_snd);
 			if (error)
 				goto release;
 			goto restart;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 		do {
 			if (uio == NULL) {
 				resid = 0;
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 			} else {
 				/*
 				 * Copy the data from userland into a mbuf
 				 * chain.  If no data is to be copied in,
 				 * a single empty mbuf is returned.
 				 */
 				copy = min(space,
 				    ssk->xmit_size_goal - SDP_HEAD_SIZE);
 				top = m_uiotombuf(uio, M_WAITOK, copy,
 				    0, M_PKTHDR |
 				    ((flags & MSG_EOR) ? M_EOR : 0));
 				if (top == NULL) {
 					/* only possible error */
 					error = EFAULT;
 					goto release;
 				}
 				space -= resid - uio->uio_resid;
 				resid = uio->uio_resid;
 			}
 			/*
 			 * XXX all the SBS_CANTSENDMORE checks previously
 			 * done could be out of date after dropping the
 			 * socket lock.
 			 */
 			error = sdp_send(so, (flags & MSG_OOB) ? PRUS_OOB :
 			/*
 			 * Set EOF on the last send if the user specified
 			 * MSG_EOF.
 			 */
 			    ((flags & MSG_EOF) && (resid <= 0)) ? PRUS_EOF :
 			/* If there is more to send set PRUS_MORETOCOME. */
 			    (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 			    top, addr, NULL, td);
 			top = NULL;
 			if (error)
 				goto release;
 		} while (resid && space > 0);
 	} while (resid);
 
 release:
 	SOCK_IO_SEND_UNLOCK(so);
 out:
 	if (top != NULL)
 		m_freem(top);
 	return (error);
 }
 
 /*
  * The part of soreceive() that implements reading non-inline out-of-band
  * data from a socket.  For more complete comments, see soreceive(), from
  * which this code originated.
  *
  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
  * unable to return an mbuf chain to the caller.
  */
 static int
 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
 {
 	struct protosw *pr = so->so_proto;
 	struct mbuf *m;
 	int error;
 
 	KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
 
 	m = m_get(M_WAITOK, MT_DATA);
 	error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
 	if (error)
 		goto bad;
 	do {
 		error = uiomove(mtod(m, void *),
 		    (int) min(uio->uio_resid, m->m_len), uio);
 		m = m_free(m);
 	} while (uio->uio_resid && error == 0 && m);
 bad:
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Optimized version of soreceive() for stream (TCP) sockets.
  */
 static int
 sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	int len = 0, error = 0, flags, oresid;
 	struct sockbuf *sb;
 	struct mbuf *m, *n = NULL;
 	struct sdp_sock *ssk;
 
 	/* We only do stream sockets. */
 	if (so->so_type != SOCK_STREAM)
 		return (EINVAL);
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		return (EINVAL);
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp0 != NULL)
 		*mp0 = NULL;
 
 	sb = &so->so_rcv;
 	ssk = sdp_sk(so);
 
 	/* Prevent other readers from entering the socket. */
 	error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 	SOCKBUF_LOCK(sb);
 
 	/* Easy one, no space to copyout anything. */
 	if (uio->uio_resid == 0) {
 		error = EINVAL;
 		goto out;
 	}
 	oresid = uio->uio_resid;
 
 	/* We will never ever get anything unless we are connected. */
 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
 		/* When disconnecting there may be still some data left. */
 		if (sbavail(sb))
 			goto deliver;
 		if (!(so->so_state & SS_ISDISCONNECTED))
 			error = ENOTCONN;
 		goto out;
 	}
 
 	/* Socket buffer is empty and we shall not block. */
 	if (sbavail(sb) == 0 &&
 	    ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
 		error = EAGAIN;
 		goto out;
 	}
 
 restart:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	/* Abort if socket has reported problems. */
 	if (so->so_error) {
 		if (sbavail(sb))
 			goto deliver;
 		if (oresid > uio->uio_resid)
 			goto out;
 		error = so->so_error;
 		if (!(flags & MSG_PEEK))
 			so->so_error = 0;
 		goto out;
 	}
 
 	/* Door is closed.  Deliver what is left, if any. */
 	if (sb->sb_state & SBS_CANTRCVMORE) {
 		if (sbavail(sb))
 			goto deliver;
 		else
 			goto out;
 	}
 
 	/* Socket buffer got some data that we shall deliver now. */
 	if (sbavail(sb) && !(flags & MSG_WAITALL) &&
 	    ((so->so_state & SS_NBIO) ||
 	     (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
 	     sbavail(sb) >= sb->sb_lowat ||
 	     sbavail(sb) >= uio->uio_resid ||
 	     sbavail(sb) >= sb->sb_hiwat) ) {
 		goto deliver;
 	}
 
 	/* On MSG_WAITALL we must wait until all data or error arrives. */
 	if ((flags & MSG_WAITALL) &&
 	    (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat))
 		goto deliver;
 
 	/*
 	 * Wait and block until (more) data comes in.
 	 * NB: Drops the sockbuf lock during wait.
 	 */
-	error = sbwait(sb);
+	error = sbwait(so, SO_RCV);
 	if (error)
 		goto out;
 	goto restart;
 
 deliver:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	KASSERT(sbavail(sb), ("%s: sockbuf empty", __func__));
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
 
 	/* Statistics. */
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 
 	/* Fill uio until full or current end of socket buffer is reached. */
 	len = min(uio->uio_resid, sbavail(sb));
 	if (mp0 != NULL) {
 		/* Dequeue as many mbufs as possible. */
 		if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
 			for (*mp0 = m = sb->sb_mb;
 			     m != NULL && m->m_len <= len;
 			     m = m->m_next) {
 				len -= m->m_len;
 				uio->uio_resid -= m->m_len;
 				sbfree(sb, m);
 				n = m;
 			}
 			sb->sb_mb = m;
 			if (sb->sb_mb == NULL)
 				SB_EMPTY_FIXUP(sb);
 			n->m_next = NULL;
 		}
 		/* Copy the remainder. */
 		if (len > 0) {
 			KASSERT(sb->sb_mb != NULL,
 			    ("%s: len > 0 && sb->sb_mb empty", __func__));
 
 			m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
 			if (m == NULL)
 				len = 0;	/* Don't flush data from sockbuf. */
 			else
 				uio->uio_resid -= m->m_len;
 			if (*mp0 != NULL)
 				n->m_next = m;
 			else
 				*mp0 = m;
 			if (*mp0 == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 		}
 	} else {
 		/* NB: Must unlock socket buffer as uiomove may sleep. */
 		SOCKBUF_UNLOCK(sb);
 		error = m_mbuftouio(uio, sb->sb_mb, len);
 		SOCKBUF_LOCK(sb);
 		if (error)
 			goto out;
 	}
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 
 	/*
 	 * Remove the delivered data from the socket buffer unless we
 	 * were only peeking.
 	 */
 	if (!(flags & MSG_PEEK)) {
 		if (len > 0)
 			sbdrop_locked(sb, len);
 
 		/* Notify protocol that we drained some data. */
 		SOCKBUF_UNLOCK(sb);
 		SDP_WLOCK(ssk);
 		sdp_do_posts(ssk);
 		SDP_WUNLOCK(ssk);
 		SOCKBUF_LOCK(sb);
 	}
 
 	/*
 	 * For MSG_WAITALL we may have to loop again and wait for
 	 * more data to come in.
 	 */
 	if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
 		goto restart;
 out:
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 	SOCKBUF_UNLOCK(sb);
 	SOCK_IO_RECV_UNLOCK(so);
 	return (error);
 }
 
 /*
  * Abort is used to teardown a connection typically while sitting in
  * the accept queue.
  */
 void
 sdp_abort(struct socket *so)
 {
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	/*
 	 * If we have not yet dropped, do it now.
 	 */
 	if (!(ssk->flags & SDP_TIMEWAIT) &&
 	    !(ssk->flags & SDP_DROPPED))
 		sdp_drop(ssk, ECONNABORTED);
 	KASSERT(ssk->flags & SDP_DROPPED, ("sdp_abort: %p not dropped 0x%X",
 	    ssk, ssk->flags));
 	SDP_WUNLOCK(ssk);
 }
 
 /*
  * Close a SDP socket and initiate a friendly disconnect.
  */
 static void
 sdp_close(struct socket *so)
 {
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	/*
 	 * If we have not yet dropped, do it now.
 	 */
 	if (!(ssk->flags & SDP_TIMEWAIT) &&
 	    !(ssk->flags & SDP_DROPPED)) 
 		sdp_start_disconnect(ssk);
 
 	/*
 	 * If we've still not dropped let the socket layer know we're
 	 * holding on to the socket and pcb for a while.
 	 */
 	if (!(ssk->flags & SDP_DROPPED)) {
 		SOCK_LOCK(so);
 		so->so_state |= SS_PROTOREF;
 		SOCK_UNLOCK(so);
 		ssk->flags |= SDP_SOCKREF;
 	}
 	SDP_WUNLOCK(ssk);
 }
 
 /*
  * User requests out-of-band data.
  */
 static int
 sdp_rcvoob(struct socket *so, struct mbuf *m, int flags)
 {
 	int error = 0;
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (!rx_ring_trylock(&ssk->rx_ring)) {
 		SDP_WUNLOCK(ssk);
 		return (ECONNRESET);
 	}
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		error = ECONNRESET;
 		goto out;
 	}
 	if ((so->so_oobmark == 0 &&
 	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
 	    so->so_options & SO_OOBINLINE ||
 	    ssk->oobflags & SDP_HADOOB) {
 		error = EINVAL;
 		goto out;
 	}
 	if ((ssk->oobflags & SDP_HAVEOOB) == 0) {
 		error = EWOULDBLOCK;
 		goto out;
 	}
 	m->m_len = 1;
 	*mtod(m, caddr_t) = ssk->iobc;
 	if ((flags & MSG_PEEK) == 0)
 		ssk->oobflags ^= (SDP_HAVEOOB | SDP_HADOOB);
 out:
 	rx_ring_unlock(&ssk->rx_ring);
 	SDP_WUNLOCK(ssk);
 	return (error);
 }
 
 void
 sdp_urg(struct sdp_sock *ssk, struct mbuf *mb)
 {
 	struct mbuf *m;
 	struct socket *so;
 
 	so = ssk->socket;
 	if (so == NULL)
 		return;
 
 	so->so_oobmark = sbused(&so->so_rcv) + mb->m_pkthdr.len - 1;
 	sohasoutofband(so);
 	ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB);
 	if (!(so->so_options & SO_OOBINLINE)) {
 		for (m = mb; m->m_next != NULL; m = m->m_next);
 		ssk->iobc = *(mtod(m, char *) + m->m_len - 1);
 		ssk->oobflags |= SDP_HAVEOOB;
 		m->m_len--;
 		mb->m_pkthdr.len--;
 	}
 }
 
 /*
  * Notify a sdp socket of an asynchronous error.
  *
  * Do not wake up user since there currently is no mechanism for
  * reporting soft errors (yet - a kqueue filter may be added).
  */
 struct sdp_sock *
 sdp_notify(struct sdp_sock *ssk, int error)
 {
 
 	SDP_WLOCK_ASSERT(ssk);
 
 	if ((ssk->flags & SDP_TIMEWAIT) ||
 	    (ssk->flags & SDP_DROPPED))
 		return (ssk);
 
 	/*
 	 * Ignore some errors if we are hooked up.
 	 */
 	if (ssk->state == TCPS_ESTABLISHED &&
 	    (error == EHOSTUNREACH || error == ENETUNREACH ||
 	     error == EHOSTDOWN))
 		return (ssk);
 	ssk->softerror = error;
 	return sdp_drop(ssk, error);
 }
 
 static void
 sdp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct in_addr faddr;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
 		return;
 
 	sdp_pcbnotifyall(faddr, inetctlerrmap[cmd], sdp_notify);
 }
 
 static int
 sdp_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
     struct thread *td)
 {
 	return (EOPNOTSUPP);
 }
 
 static void
 sdp_keepalive_timeout(void *data)
 {
 	struct sdp_sock *ssk;
 
 	ssk = data;
 	/* Callout canceled. */
         if (!callout_active(&ssk->keep2msl))
                 return;
 	/* Callout rescheduled as a different kind of timer. */
 	if (callout_pending(&ssk->keep2msl))
 		goto out;
         callout_deactivate(&ssk->keep2msl);
 	if (ssk->flags & SDP_DROPPED ||
 	    (ssk->socket->so_options & SO_KEEPALIVE) == 0)
 		goto out;
 	sdp_post_keepalive(ssk);
 	callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
 	    sdp_keepalive_timeout, ssk);
 out:
 	SDP_WUNLOCK(ssk);
 }
 
 
 void
 sdp_start_keepalive_timer(struct socket *so)
 {
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	if (!callout_pending(&ssk->keep2msl))
                 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
                     sdp_keepalive_timeout, ssk);
 }
 
 static void
 sdp_stop_keepalive_timer(struct socket *so)
 {
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	callout_stop(&ssk->keep2msl);
 }
 
 /*
  * sdp_ctloutput() must drop the inpcb lock before performing copyin on
  * socket option arguments.  When it re-acquires the lock after the copy, it
  * has to revalidate that the connection is still valid for the socket
  * option.
  */
 #define SDP_WLOCK_RECHECK(inp) do {					\
 	SDP_WLOCK(ssk);							\
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {		\
 		SDP_WUNLOCK(ssk);					\
 		return (ECONNRESET);					\
 	}								\
 } while(0)
 
 static int
 sdp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int	error, opt, optval;
 	struct sdp_sock *ssk;
 
 	error = 0;
 	ssk = sdp_sk(so);
 	if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_KEEPALIVE) {
 		SDP_WLOCK(ssk);
 		if (so->so_options & SO_KEEPALIVE)
 			sdp_start_keepalive_timer(so);
 		else
 			sdp_stop_keepalive_timer(so);
 		SDP_WUNLOCK(ssk);
 	}
 	if (sopt->sopt_level != IPPROTO_TCP)
 		return (error);
 
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		SDP_WUNLOCK(ssk);
 		return (ECONNRESET);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case TCP_NODELAY:
 			SDP_WUNLOCK(ssk);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			SDP_WLOCK_RECHECK(ssk);
 			opt = SDP_NODELAY;
 			if (optval)
 				ssk->flags |= opt;
 			else
 				ssk->flags &= ~opt;
 			sdp_do_posts(ssk);
 			SDP_WUNLOCK(ssk);
 			break;
 
 		default:
 			SDP_WUNLOCK(ssk);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case TCP_NODELAY:
 			optval = ssk->flags & SDP_NODELAY;
 			SDP_WUNLOCK(ssk);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		default:
 			SDP_WUNLOCK(ssk);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 #undef SDP_WLOCK_RECHECK
 
 int sdp_mod_count = 0;
 int sdp_mod_usec = 0;
 
 void
 sdp_set_default_moderation(struct sdp_sock *ssk)
 {
 	if (sdp_mod_count <= 0 || sdp_mod_usec <= 0)
 		return;
 	ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec);
 }
 
 static void
 sdp_dev_add(struct ib_device *device)
 {
 	struct ib_fmr_pool_param param;
 	struct sdp_device *sdp_dev;
 
 	sdp_dev = malloc(sizeof(*sdp_dev), M_SDP, M_WAITOK | M_ZERO);
 	sdp_dev->pd = ib_alloc_pd(device, 0);
 	if (IS_ERR(sdp_dev->pd))
 		goto out_pd;
 	memset(&param, 0, sizeof param);
 	param.max_pages_per_fmr = SDP_FMR_SIZE;
 	param.page_shift = PAGE_SHIFT;
 	param.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ);
 	param.pool_size = SDP_FMR_POOL_SIZE;
 	param.dirty_watermark = SDP_FMR_DIRTY_SIZE;
 	param.cache = 1;
 	sdp_dev->fmr_pool = ib_create_fmr_pool(sdp_dev->pd, &param);
 	if (IS_ERR(sdp_dev->fmr_pool))
 		goto out_fmr;
 	ib_set_client_data(device, &sdp_client, sdp_dev);
 	return;
 
 out_fmr:
 	ib_dealloc_pd(sdp_dev->pd);
 out_pd:
 	free(sdp_dev, M_SDP);
 }
 
 static void
 sdp_dev_rem(struct ib_device *device, void *client_data)
 {
 	struct sdp_device *sdp_dev;
 	struct sdp_sock *ssk;
 
 	SDP_LIST_WLOCK();
 	LIST_FOREACH(ssk, &sdp_list, list) {
 		if (ssk->ib_device != device)
 			continue;
 		SDP_WLOCK(ssk);
 		if ((ssk->flags & SDP_DESTROY) == 0)
 			ssk = sdp_notify(ssk, ECONNRESET);
 		if (ssk)
 			SDP_WUNLOCK(ssk);
 	}
 	SDP_LIST_WUNLOCK();
 	/*
 	 * XXX Do I need to wait between these two?
 	 */
 	sdp_dev = ib_get_client_data(device, &sdp_client);
 	if (!sdp_dev)
 		return;
 	ib_flush_fmr_pool(sdp_dev->fmr_pool);
 	ib_destroy_fmr_pool(sdp_dev->fmr_pool);
 	ib_dealloc_pd(sdp_dev->pd);
 	free(sdp_dev, M_SDP);
 }
 
 struct ib_client sdp_client =
     { .name = "sdp", .add = sdp_dev_add, .remove = sdp_dev_rem };
 
 
 static int
 sdp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, n, i;
 	struct sdp_sock *ssk;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		n = sdp_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	SDP_LIST_RLOCK();
 	n = sdp_count;
 	SDP_LIST_RUNLOCK();
 
 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 		+ n * sizeof(struct xtcpcb));
 	if (error != 0)
 		return (error);
 
 	bzero(&xig, sizeof(xig));
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
 	xig.xig_gen = 0;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	SDP_LIST_RLOCK();
 	for (ssk = LIST_FIRST(&sdp_list), i = 0;
 	    ssk != NULL && i < n; ssk = LIST_NEXT(ssk, list)) {
 		struct xtcpcb xt;
 
 		SDP_RLOCK(ssk);
 		if (ssk->flags & SDP_TIMEWAIT) {
 			if (ssk->cred != NULL)
 				error = cr_cansee(req->td->td_ucred,
 				    ssk->cred);
 			else
 				error = EINVAL;	/* Skip this inp. */
 		} else if (ssk->socket)
 			error = cr_canseesocket(req->td->td_ucred,
 			    ssk->socket);
 		else
 			error = EINVAL;
 		if (error) {
 			error = 0;
 			goto next;
 		}
 
 		bzero(&xt, sizeof(xt));
 		xt.xt_len = sizeof xt;
 		xt.xt_inp.inp_gencnt = 0;
 		xt.xt_inp.inp_vflag = INP_IPV4;
 		memcpy(&xt.xt_inp.inp_laddr, &ssk->laddr, sizeof(ssk->laddr));
 		xt.xt_inp.inp_lport = ssk->lport;
 		memcpy(&xt.xt_inp.inp_faddr, &ssk->faddr, sizeof(ssk->faddr));
 		xt.xt_inp.inp_fport = ssk->fport;
 		xt.t_state = ssk->state;
 		if (ssk->socket != NULL)
 			sotoxsocket(ssk->socket, &xt.xt_inp.xi_socket);
 		xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
 		SDP_RUNLOCK(ssk);
 		error = SYSCTL_OUT(req, &xt, sizeof xt);
 		if (error)
 			break;
 		i++;
 		continue;
 next:
 		SDP_RUNLOCK(ssk);
 	}
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.
 		 * If the generation differs from what we told
 		 * her before, she knows that something happened
 		 * while we were processing this request, and it
 		 * might be necessary to retry.
 		 */
 		xig.xig_gen = 0;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = sdp_count;
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	SDP_LIST_RUNLOCK();
 	return (error);
 }
 
 SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SDP");
 
 SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist,
     CTLFLAG_RD | CTLTYPE_STRUCT | CTLFLAG_MPSAFE,
     0, 0, sdp_pcblist, "S,xtcpcb",
     "List of active SDP connections");
 
 static void
 sdp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(sdp_zone, maxsockets);
 }
 
 static void
 sdp_init(void *arg __unused)
 {
 
 	LIST_INIT(&sdp_list);
 	sdp_zone = uma_zcreate("sdp_sock", sizeof(struct sdp_sock),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(sdp_zone, maxsockets);
 	EVENTHANDLER_REGISTER(maxsockets_change, sdp_zone_change, NULL,
 		EVENTHANDLER_PRI_ANY);
 	rx_comp_wq = create_singlethread_workqueue("rx_comp_wq");
 	ib_register_client(&sdp_client);
 }
 SYSINIT(sdp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, sdp_init, NULL);
 
 extern struct domain sdpdomain;
 
 struct pr_usrreqs sdp_usrreqs = {
 	.pru_abort =		sdp_abort,
 	.pru_accept =		sdp_accept,
 	.pru_attach =		sdp_attach,
 	.pru_bind =		sdp_bind,
 	.pru_connect =		sdp_connect,
 	.pru_control =		sdp_control,
 	.pru_detach =		sdp_detach,
 	.pru_disconnect =	sdp_disconnect,
 	.pru_listen =		sdp_listen,
 	.pru_peeraddr =		sdp_getpeeraddr,
 	.pru_rcvoob =		sdp_rcvoob,
 	.pru_send =		sdp_send,
 	.pru_sosend =		sdp_sosend,
 	.pru_soreceive =	sdp_sorecv,
 	.pru_shutdown =		sdp_shutdown,
 	.pru_sockaddr =		sdp_getsockaddr,
 	.pru_close =		sdp_close,
 };
 
 struct protosw sdpsw[] = {
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&sdpdomain,
 	.pr_protocol =		IPPROTO_IP,
 	.pr_flags =		PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
 	.pr_ctlinput =		sdp_ctlinput,
 	.pr_ctloutput =		sdp_ctloutput,
 	.pr_usrreqs =		&sdp_usrreqs
 },
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&sdpdomain,
 	.pr_protocol =		IPPROTO_TCP,
 	.pr_flags =		PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
 	.pr_ctlinput =		sdp_ctlinput,
 	.pr_ctloutput =		sdp_ctloutput,
 	.pr_usrreqs =		&sdp_usrreqs
 },
 };
 
 struct domain sdpdomain = {
 	.dom_family =		AF_INET_SDP,
 	.dom_name =		"SDP",
 	.dom_protosw =		sdpsw,
 	.dom_protoswNPROTOSW =	&sdpsw[sizeof(sdpsw)/sizeof(sdpsw[0])],
 };
 
 DOMAIN_SET(sdp);
 
 int sdp_debug_level = 1;
 int sdp_data_debug_level = 0;
diff --git a/sys/rpc/clnt_bck.c b/sys/rpc/clnt_bck.c
index 514905bf1cc2..810a957bb97b 100644
--- a/sys/rpc/clnt_bck.c
+++ b/sys/rpc/clnt_bck.c
@@ -1,619 +1,619 @@
 /*	$NetBSD: clnt_vc.c,v 1.4 2000/07/14 08:40:42 fvdl Exp $	*/
 
 /*-
  * Copyright (c) 2009, Sun Microsystems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without 
  * modification, are permitted provided that the following conditions are met:
  * - Redistributions of source code must retain the above copyright notice, 
  *   this list of conditions and the following disclaimer.
  * - Redistributions in binary form must reproduce the above copyright notice, 
  *   this list of conditions and the following disclaimer in the documentation 
  *   and/or other materials provided with the distribution.
  * - Neither the name of Sun Microsystems, Inc. nor the names of its 
  *   contributors may be used to endorse or promote products derived 
  *   from this software without specific prior written permission.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #if defined(LIBC_SCCS) && !defined(lint)
 static char *sccsid2 = "@(#)clnt_tcp.c 1.37 87/10/05 Copyr 1984 Sun Micro";
 static char *sccsid = "@(#)clnt_tcp.c	2.2 88/08/01 4.0 RPCSRC";
 static char sccsid3[] = "@(#)clnt_vc.c 1.19 89/03/16 Copyr 1988 Sun Micro";
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
  
 /*
  * clnt_tcp.c, Implements a TCP/IP based, client side RPC.
  *
  * Copyright (C) 1984, Sun Microsystems, Inc.
  *
  * TCP based RPC supports 'batched calls'.
  * A sequence of calls may be batched-up in a send buffer.  The rpc call
  * return immediately to the client even though the call was not necessarily
  * sent.  The batching occurs if the results' xdr routine is NULL (0) AND
  * the rpc timeout value is zero (see clnt.h, rpc).
  *
  * Clients should NOT casually batch calls that in fact return results; that is,
  * the server side should be aware that a call is batched and not produce any
  * return message.  Batched calls that produce many result messages can
  * deadlock (netlock) the client and the server....
  *
  * Now go hang yourself.
  */
 
 /*
  * This code handles the special case of a NFSv4.n backchannel for
  * callback RPCs. It is similar to clnt_vc.c, but uses the TCP
  * connection provided by the client to the server.
  */
 
 #include "opt_kern_tls.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/ktls.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 #include <sys/time.h>
 #include <sys/uio.h>
 
 #include <net/vnet.h>
 
 #include <netinet/tcp.h>
 
 #include <rpc/rpc.h>
 #include <rpc/rpc_com.h>
 #include <rpc/krpc.h>
 #include <rpc/rpcsec_tls.h>
 
 struct cmessage {
         struct cmsghdr cmsg;
         struct cmsgcred cmcred;
 };
 
 static void clnt_bck_geterr(CLIENT *, struct rpc_err *);
 static bool_t clnt_bck_freeres(CLIENT *, xdrproc_t, void *);
 static void clnt_bck_abort(CLIENT *);
 static bool_t clnt_bck_control(CLIENT *, u_int, void *);
 static void clnt_bck_close(CLIENT *);
 static void clnt_bck_destroy(CLIENT *);
 
 static const struct clnt_ops clnt_bck_ops = {
 	.cl_abort =	clnt_bck_abort,
 	.cl_geterr =	clnt_bck_geterr,
 	.cl_freeres =	clnt_bck_freeres,
 	.cl_close =	clnt_bck_close,
 	.cl_destroy =	clnt_bck_destroy,
 	.cl_control =	clnt_bck_control
 };
 
 /*
  * Create a client handle for a connection.
  * Default options are set, which the user can change using clnt_control()'s.
  * This code handles the special case of an NFSv4.1 session backchannel
  * call, which is sent on a TCP connection created against the server
  * by a client.
  */
 void *
 clnt_bck_create(
 	struct socket *so,		/* Server transport socket. */
 	const rpcprog_t prog,		/* program number */
 	const rpcvers_t vers)		/* version number */
 {
 	CLIENT *cl;			/* client handle */
 	struct ct_data *ct = NULL;	/* client handle */
 	struct timeval now;
 	struct rpc_msg call_msg;
 	static uint32_t disrupt;
 	XDR xdrs;
 
 	if (disrupt == 0)
 		disrupt = (uint32_t)(long)so;
 
 	cl = (CLIENT *)mem_alloc(sizeof (*cl));
 	ct = (struct ct_data *)mem_alloc(sizeof (*ct));
 
 	mtx_init(&ct->ct_lock, "ct->ct_lock", NULL, MTX_DEF);
 	ct->ct_threads = 0;
 	ct->ct_closing = FALSE;
 	ct->ct_closed = FALSE;
 	ct->ct_upcallrefs = 0;
 	ct->ct_closeit = FALSE;
 
 	/*
 	 * Set up private data struct
 	 */
 	ct->ct_wait.tv_sec = -1;
 	ct->ct_wait.tv_usec = -1;
 
 	/*
 	 * Initialize call message
 	 */
 	getmicrotime(&now);
 	ct->ct_xid = ((uint32_t)++disrupt) ^ __RPC_GETXID(&now);
 	call_msg.rm_xid = ct->ct_xid;
 	call_msg.rm_direction = CALL;
 	call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
 	call_msg.rm_call.cb_prog = (uint32_t)prog;
 	call_msg.rm_call.cb_vers = (uint32_t)vers;
 
 	/*
 	 * pre-serialize the static part of the call msg and stash it away
 	 */
 	xdrmem_create(&xdrs, ct->ct_mcallc, MCALL_MSG_SIZE,
 	    XDR_ENCODE);
 	if (!xdr_callhdr(&xdrs, &call_msg))
 		goto err;
 	ct->ct_mpos = XDR_GETPOS(&xdrs);
 	XDR_DESTROY(&xdrs);
 	ct->ct_waitchan = "rpcbck";
 	ct->ct_waitflag = 0;
 	cl->cl_refs = 1;
 	cl->cl_ops = &clnt_bck_ops;
 	cl->cl_private = ct;
 	cl->cl_auth = authnone_create();
 	TAILQ_INIT(&ct->ct_pending);
 	return (cl);
 
 err:
 	mtx_destroy(&ct->ct_lock);
 	mem_free(ct, sizeof (struct ct_data));
 	mem_free(cl, sizeof (CLIENT));
 	return (NULL);
 }
 
 enum clnt_stat
 clnt_bck_call(
 	CLIENT		*cl,		/* client handle */
 	struct rpc_callextra *ext,	/* call metadata */
 	rpcproc_t	proc,		/* procedure number */
 	struct mbuf	*args,		/* pointer to args */
 	struct mbuf	**resultsp,	/* pointer to results */
 	struct timeval	utimeout,
 	SVCXPRT		*xprt)
 {
 	struct ct_data *ct = (struct ct_data *) cl->cl_private;
 	AUTH *auth;
 	struct rpc_err *errp;
 	enum clnt_stat stat;
 	XDR xdrs;
 	struct rpc_msg reply_msg;
 	bool_t ok;
 	int nrefreshes = 2;		/* number of times to refresh cred */
 	struct timeval timeout;
 	uint32_t xid;
 	struct mbuf *mreq = NULL, *results;
 	struct ct_request *cr;
 	int error, maxextsiz;
 #ifdef KERN_TLS
 	u_int maxlen;
 #endif
 
 	cr = malloc(sizeof(struct ct_request), M_RPC, M_WAITOK);
 
 	mtx_lock(&ct->ct_lock);
 
 	if (ct->ct_closing || ct->ct_closed) {
 		mtx_unlock(&ct->ct_lock);
 		free(cr, M_RPC);
 		return (RPC_CANTSEND);
 	}
 	ct->ct_threads++;
 
 	if (ext) {
 		auth = ext->rc_auth;
 		errp = &ext->rc_err;
 	} else {
 		auth = cl->cl_auth;
 		errp = &ct->ct_error;
 	}
 
 	cr->cr_mrep = NULL;
 	cr->cr_error = 0;
 
 	if (ct->ct_wait.tv_usec == -1)
 		timeout = utimeout;	/* use supplied timeout */
 	else
 		timeout = ct->ct_wait;	/* use default timeout */
 
 call_again:
 	mtx_assert(&ct->ct_lock, MA_OWNED);
 
 	ct->ct_xid++;
 	xid = ct->ct_xid;
 
 	mtx_unlock(&ct->ct_lock);
 
 	/*
 	 * Leave space to pre-pend the record mark.
 	 */
 	mreq = m_gethdr(M_WAITOK, MT_DATA);
 	mreq->m_data += sizeof(uint32_t);
 	KASSERT(ct->ct_mpos + sizeof(uint32_t) <= MHLEN,
 	    ("RPC header too big"));
 	bcopy(ct->ct_mcallc, mreq->m_data, ct->ct_mpos);
 	mreq->m_len = ct->ct_mpos;
 
 	/*
 	 * The XID is the first thing in the request.
 	 */
 	*mtod(mreq, uint32_t *) = htonl(xid);
 
 	xdrmbuf_create(&xdrs, mreq, XDR_ENCODE);
 
 	errp->re_status = stat = RPC_SUCCESS;
 
 	if ((!XDR_PUTINT32(&xdrs, &proc)) ||
 	    (!AUTH_MARSHALL(auth, xid, &xdrs,
 	     m_copym(args, 0, M_COPYALL, M_WAITOK)))) {
 		errp->re_status = stat = RPC_CANTENCODEARGS;
 		mtx_lock(&ct->ct_lock);
 		goto out;
 	}
 	mreq->m_pkthdr.len = m_length(mreq, NULL);
 
 	/*
 	 * Prepend a record marker containing the packet length.
 	 */
 	M_PREPEND(mreq, sizeof(uint32_t), M_WAITOK);
 	*mtod(mreq, uint32_t *) =
 	    htonl(0x80000000 | (mreq->m_pkthdr.len - sizeof(uint32_t)));
 
 	cr->cr_xid = xid;
 	mtx_lock(&ct->ct_lock);
 	/*
 	 * Check to see if the client end has already started to close down
 	 * the connection. The svc code will have set ct_error.re_status
 	 * to RPC_CANTRECV if this is the case.
 	 * If the client starts to close down the connection after this
 	 * point, it will be detected later when cr_error is checked,
 	 * since the request is in the ct_pending queue.
 	 */
 	if (ct->ct_error.re_status == RPC_CANTRECV) {
 		if (errp != &ct->ct_error) {
 			errp->re_errno = ct->ct_error.re_errno;
 			errp->re_status = RPC_CANTRECV;
 		}
 		stat = RPC_CANTRECV;
 		goto out;
 	}
 	TAILQ_INSERT_TAIL(&ct->ct_pending, cr, cr_link);
 	mtx_unlock(&ct->ct_lock);
 
 	/* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */
 	if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
 		/*
 		 * Copy the mbuf chain to a chain of
 		 * ext_pgs mbuf(s) as required by KERN_TLS.
 		 */
 		maxextsiz = TLS_MAX_MSG_SIZE_V10_2;
 #ifdef KERN_TLS
 		if (rpctls_getinfo(&maxlen, false, false))
 			maxextsiz = min(maxextsiz, maxlen);
 #endif
 		mreq = _rpc_copym_into_ext_pgs(mreq, maxextsiz);
 	}
 	/*
 	 * sosend consumes mreq.
 	 */
 	sx_xlock(&xprt->xp_lock);
 	error = sosend(xprt->xp_socket, NULL, NULL, mreq, NULL, 0, curthread);
 if (error != 0) printf("sosend=%d\n", error);
 	mreq = NULL;
 	if (error == EMSGSIZE) {
 printf("emsgsize\n");
 		SOCKBUF_LOCK(&xprt->xp_socket->so_snd);
-		sbwait(&xprt->xp_socket->so_snd);
+		sbwait(xprt->xp_socket, SO_SND);
 		SOCKBUF_UNLOCK(&xprt->xp_socket->so_snd);
 		sx_xunlock(&xprt->xp_lock);
 		AUTH_VALIDATE(auth, xid, NULL, NULL);
 		mtx_lock(&ct->ct_lock);
 		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		goto call_again;
 	}
 	sx_xunlock(&xprt->xp_lock);
 
 	reply_msg.acpted_rply.ar_verf.oa_flavor = AUTH_NULL;
 	reply_msg.acpted_rply.ar_verf.oa_base = cr->cr_verf;
 	reply_msg.acpted_rply.ar_verf.oa_length = 0;
 	reply_msg.acpted_rply.ar_results.where = NULL;
 	reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void;
 
 	mtx_lock(&ct->ct_lock);
 	if (error) {
 		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		errp->re_errno = error;
 		errp->re_status = stat = RPC_CANTSEND;
 		goto out;
 	}
 
 	/*
 	 * Check to see if we got an upcall while waiting for the
 	 * lock. In both these cases, the request has been removed
 	 * from ct->ct_pending.
 	 */
 	if (cr->cr_error) {
 		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		errp->re_errno = cr->cr_error;
 		errp->re_status = stat = RPC_CANTRECV;
 		goto out;
 	}
 	if (cr->cr_mrep) {
 		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		goto got_reply;
 	}
 
 	/*
 	 * Hack to provide rpc-based message passing
 	 */
 	if (timeout.tv_sec == 0 && timeout.tv_usec == 0) {
 		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		errp->re_status = stat = RPC_TIMEDOUT;
 		goto out;
 	}
 
 	error = msleep(cr, &ct->ct_lock, ct->ct_waitflag, ct->ct_waitchan,
 	    tvtohz(&timeout));
 
 	TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 
 	if (error) {
 		/*
 		 * The sleep returned an error so our request is still
 		 * on the list. Turn the error code into an
 		 * appropriate client status.
 		 */
 		errp->re_errno = error;
 		switch (error) {
 		case EINTR:
 			stat = RPC_INTR;
 			break;
 		case EWOULDBLOCK:
 			stat = RPC_TIMEDOUT;
 			break;
 		default:
 			stat = RPC_CANTRECV;
 		}
 		errp->re_status = stat;
 		goto out;
 	} else {
 		/*
 		 * We were woken up by the svc thread.  If the
 		 * upcall had a receive error, report that,
 		 * otherwise we have a reply.
 		 */
 		if (cr->cr_error) {
 			errp->re_errno = cr->cr_error;
 			errp->re_status = stat = RPC_CANTRECV;
 			goto out;
 		}
 	}
 
 got_reply:
 	/*
 	 * Now decode and validate the response. We need to drop the
 	 * lock since xdr_replymsg may end up sleeping in malloc.
 	 */
 	mtx_unlock(&ct->ct_lock);
 
 	if (ext && ext->rc_feedback)
 		ext->rc_feedback(FEEDBACK_OK, proc, ext->rc_feedback_arg);
 
 	xdrmbuf_create(&xdrs, cr->cr_mrep, XDR_DECODE);
 	ok = xdr_replymsg(&xdrs, &reply_msg);
 	cr->cr_mrep = NULL;
 
 	if (ok) {
 		if ((reply_msg.rm_reply.rp_stat == MSG_ACCEPTED) &&
 		    (reply_msg.acpted_rply.ar_stat == SUCCESS))
 			errp->re_status = stat = RPC_SUCCESS;
 		else
 			stat = _seterr_reply(&reply_msg, errp);
 
 		if (stat == RPC_SUCCESS) {
 			results = xdrmbuf_getall(&xdrs);
 			if (!AUTH_VALIDATE(auth, xid,
 			    &reply_msg.acpted_rply.ar_verf, &results)) {
 				errp->re_status = stat = RPC_AUTHERROR;
 				errp->re_why = AUTH_INVALIDRESP;
 			} else {
 				KASSERT(results,
 				    ("auth validated but no result"));
 				*resultsp = results;
 			}
 		}		/* end successful completion */
 		/*
 		 * If unsuccessful AND error is an authentication error
 		 * then refresh credentials and try again, else break
 		 */
 		else if (stat == RPC_AUTHERROR)
 			/* maybe our credentials need to be refreshed ... */
 			if (nrefreshes > 0 && AUTH_REFRESH(auth, &reply_msg)) {
 				nrefreshes--;
 				XDR_DESTROY(&xdrs);
 				mtx_lock(&ct->ct_lock);
 				goto call_again;
 			}
 			/* end of unsuccessful completion */
 		/* end of valid reply message */
 	} else
 		errp->re_status = stat = RPC_CANTDECODERES;
 	XDR_DESTROY(&xdrs);
 	mtx_lock(&ct->ct_lock);
 out:
 	mtx_assert(&ct->ct_lock, MA_OWNED);
 
 	KASSERT(stat != RPC_SUCCESS || *resultsp,
 	    ("RPC_SUCCESS without reply"));
 
 	if (mreq != NULL)
 		m_freem(mreq);
 	if (cr->cr_mrep != NULL)
 		m_freem(cr->cr_mrep);
 
 	ct->ct_threads--;
 	if (ct->ct_closing)
 		wakeup(ct);
 		
 	mtx_unlock(&ct->ct_lock);
 
 	if (auth && stat != RPC_SUCCESS)
 		AUTH_VALIDATE(auth, xid, NULL, NULL);
 
 	free(cr, M_RPC);
 
 	return (stat);
 }
 
 static void
 clnt_bck_geterr(CLIENT *cl, struct rpc_err *errp)
 {
 	struct ct_data *ct = (struct ct_data *) cl->cl_private;
 
 	*errp = ct->ct_error;
 }
 
 static bool_t
 clnt_bck_freeres(CLIENT *cl, xdrproc_t xdr_res, void *res_ptr)
 {
 	XDR xdrs;
 	bool_t dummy;
 
 	xdrs.x_op = XDR_FREE;
 	dummy = (*xdr_res)(&xdrs, res_ptr);
 
 	return (dummy);
 }
 
 /*ARGSUSED*/
 static void
 clnt_bck_abort(CLIENT *cl)
 {
 }
 
 static bool_t
 clnt_bck_control(CLIENT *cl, u_int request, void *info)
 {
 
 	return (TRUE);
 }
 
 static void
 clnt_bck_close(CLIENT *cl)
 {
 	struct ct_data *ct = (struct ct_data *) cl->cl_private;
 
 	mtx_lock(&ct->ct_lock);
 
 	if (ct->ct_closed) {
 		mtx_unlock(&ct->ct_lock);
 		return;
 	}
 
 	if (ct->ct_closing) {
 		while (ct->ct_closing)
 			msleep(ct, &ct->ct_lock, 0, "rpcclose", 0);
 		KASSERT(ct->ct_closed, ("client should be closed"));
 		mtx_unlock(&ct->ct_lock);
 		return;
 	}
 
 	ct->ct_closing = FALSE;
 	ct->ct_closed = TRUE;
 	mtx_unlock(&ct->ct_lock);
 	wakeup(ct);
 }
 
 static void
 clnt_bck_destroy(CLIENT *cl)
 {
 	struct ct_data *ct = (struct ct_data *) cl->cl_private;
 
 	clnt_bck_close(cl);
 
 	mtx_destroy(&ct->ct_lock);
 	mem_free(ct, sizeof(struct ct_data));
 	if (cl->cl_netid && cl->cl_netid[0])
 		mem_free(cl->cl_netid, strlen(cl->cl_netid) +1);
 	if (cl->cl_tp && cl->cl_tp[0])
 		mem_free(cl->cl_tp, strlen(cl->cl_tp) +1);
 	mem_free(cl, sizeof(CLIENT));
 }
 
 /*
  * This call is done by the svc code when a backchannel RPC reply is
  * received.
  * For the server end, where callback RPCs to the client are performed,
  * xp_p2 points to the "CLIENT" and not the associated "struct ct_data"
  * so that svc_vc_destroy() can CLNT_RELEASE() the reference count on it.
  */
 void
 clnt_bck_svccall(void *arg, struct mbuf *mrep, uint32_t xid)
 {
 	CLIENT *cl = (CLIENT *)arg;
 	struct ct_data *ct;
 	struct ct_request *cr;
 	int foundreq;
 
 	ct = (struct ct_data *)cl->cl_private;
 	mtx_lock(&ct->ct_lock);
 	if (ct->ct_closing || ct->ct_closed) {
 		mtx_unlock(&ct->ct_lock);
 		m_freem(mrep);
 		return;
 	}
 
 	ct->ct_upcallrefs++;
 	/*
 	 * See if we can match this reply to a request.
 	 */
 	foundreq = 0;
 	TAILQ_FOREACH(cr, &ct->ct_pending, cr_link) {
 		if (cr->cr_xid == xid) {
 			/*
 			 * This one matches. We leave the reply mbuf list in
 			 * cr->cr_mrep. Set the XID to zero so that we will
 			 * ignore any duplicated replies.
 			 */
 			cr->cr_xid = 0;
 			cr->cr_mrep = mrep;
 			cr->cr_error = 0;
 			foundreq = 1;
 			wakeup(cr);
 			break;
 		}
 	}
 
 	ct->ct_upcallrefs--;
 	if (ct->ct_upcallrefs < 0)
 		panic("rpcvc svccall refcnt");
 	if (ct->ct_upcallrefs == 0)
 		wakeup(&ct->ct_upcallrefs);
 	mtx_unlock(&ct->ct_lock);
 	if (foundreq == 0)
 		m_freem(mrep);
 }
 
diff --git a/sys/rpc/clnt_vc.c b/sys/rpc/clnt_vc.c
index dfada2bea388..f565de06f4bd 100644
--- a/sys/rpc/clnt_vc.c
+++ b/sys/rpc/clnt_vc.c
@@ -1,1320 +1,1320 @@
 /*	$NetBSD: clnt_vc.c,v 1.4 2000/07/14 08:40:42 fvdl Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2009, Sun Microsystems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without 
  * modification, are permitted provided that the following conditions are met:
  * - Redistributions of source code must retain the above copyright notice, 
  *   this list of conditions and the following disclaimer.
  * - Redistributions in binary form must reproduce the above copyright notice, 
  *   this list of conditions and the following disclaimer in the documentation 
  *   and/or other materials provided with the distribution.
  * - Neither the name of Sun Microsystems, Inc. nor the names of its 
  *   contributors may be used to endorse or promote products derived 
  *   from this software without specific prior written permission.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #if defined(LIBC_SCCS) && !defined(lint)
 static char *sccsid2 = "@(#)clnt_tcp.c 1.37 87/10/05 Copyr 1984 Sun Micro";
 static char *sccsid = "@(#)clnt_tcp.c	2.2 88/08/01 4.0 RPCSRC";
 static char sccsid3[] = "@(#)clnt_vc.c 1.19 89/03/16 Copyr 1988 Sun Micro";
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
  
 /*
  * clnt_tcp.c, Implements a TCP/IP based, client side RPC.
  *
  * Copyright (C) 1984, Sun Microsystems, Inc.
  *
  * TCP based RPC supports 'batched calls'.
  * A sequence of calls may be batched-up in a send buffer.  The rpc call
  * return immediately to the client even though the call was not necessarily
  * sent.  The batching occurs if the results' xdr routine is NULL (0) AND
  * the rpc timeout value is zero (see clnt.h, rpc).
  *
  * Clients should NOT casually batch calls that in fact return results; that is,
  * the server side should be aware that a call is batched and not produce any
  * return message.  Batched calls that produce many result messages can
  * deadlock (netlock) the client and the server....
  *
  * Now go hang yourself.
  */
 
 #include "opt_kern_tls.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/ktls.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 #include <sys/time.h>
 #include <sys/uio.h>
 
 #include <net/vnet.h>
 
 #include <netinet/tcp.h>
 
 #include <rpc/rpc.h>
 #include <rpc/rpc_com.h>
 #include <rpc/krpc.h>
 #include <rpc/rpcsec_tls.h>
 
 struct cmessage {
         struct cmsghdr cmsg;
         struct cmsgcred cmcred;
 };
 
 static enum clnt_stat clnt_vc_call(CLIENT *, struct rpc_callextra *,
     rpcproc_t, struct mbuf *, struct mbuf **, struct timeval);
 static void clnt_vc_geterr(CLIENT *, struct rpc_err *);
 static bool_t clnt_vc_freeres(CLIENT *, xdrproc_t, void *);
 static void clnt_vc_abort(CLIENT *);
 static bool_t clnt_vc_control(CLIENT *, u_int, void *);
 static void clnt_vc_close(CLIENT *);
 static void clnt_vc_destroy(CLIENT *);
 static bool_t time_not_ok(struct timeval *);
 static int clnt_vc_soupcall(struct socket *so, void *arg, int waitflag);
 static void clnt_vc_dotlsupcall(void *data);
 
 static const struct clnt_ops clnt_vc_ops = {
 	.cl_call =	clnt_vc_call,
 	.cl_abort =	clnt_vc_abort,
 	.cl_geterr =	clnt_vc_geterr,
 	.cl_freeres =	clnt_vc_freeres,
 	.cl_close =	clnt_vc_close,
 	.cl_destroy =	clnt_vc_destroy,
 	.cl_control =	clnt_vc_control
 };
 
 static void clnt_vc_upcallsdone(struct ct_data *);
 
 static int	fake_wchan;
 
 /*
  * Create a client handle for a connection.
  * Default options are set, which the user can change using clnt_control()'s.
  * The rpc/vc package does buffering similar to stdio, so the client
  * must pick send and receive buffer sizes, 0 => use the default.
  * NB: fd is copied into a private area.
  * NB: The rpch->cl_auth is set null authentication. Caller may wish to
  * set this something more useful.
  *
  * fd should be an open socket
  */
 CLIENT *
 clnt_vc_create(
 	struct socket *so,		/* open file descriptor */
 	struct sockaddr *raddr,		/* servers address */
 	const rpcprog_t prog,		/* program number */
 	const rpcvers_t vers,		/* version number */
 	size_t sendsz,			/* buffer recv size */
 	size_t recvsz,			/* buffer send size */
 	int intrflag)			/* interruptible */
 {
 	CLIENT *cl;			/* client handle */
 	struct ct_data *ct = NULL;	/* client handle */
 	struct timeval now;
 	struct rpc_msg call_msg;
 	static uint32_t disrupt;
 	struct __rpc_sockinfo si;
 	XDR xdrs;
 	int error, interrupted, one = 1, sleep_flag;
 	struct sockopt sopt;
 
 	if (disrupt == 0)
 		disrupt = (uint32_t)(long)raddr;
 
 	cl = (CLIENT *)mem_alloc(sizeof (*cl));
 	ct = (struct ct_data *)mem_alloc(sizeof (*ct));
 
 	mtx_init(&ct->ct_lock, "ct->ct_lock", NULL, MTX_DEF);
 	ct->ct_threads = 0;
 	ct->ct_closing = FALSE;
 	ct->ct_closed = FALSE;
 	ct->ct_upcallrefs = 0;
 	ct->ct_rcvstate = RPCRCVSTATE_NORMAL;
 
 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 		error = soconnect(so, raddr, curthread);
 		SOCK_LOCK(so);
 		interrupted = 0;
 		sleep_flag = PSOCK;
 		if (intrflag != 0)
 			sleep_flag |= PCATCH;
 		while ((so->so_state & SS_ISCONNECTING)
 		    && so->so_error == 0) {
 			error = msleep(&so->so_timeo, SOCK_MTX(so),
 			    sleep_flag, "connec", 0);
 			if (error) {
 				if (error == EINTR || error == ERESTART)
 					interrupted = 1;
 				break;
 			}
 		}
 		if (error == 0) {
 			error = so->so_error;
 			so->so_error = 0;
 		}
 		SOCK_UNLOCK(so);
 		if (error) {
 			if (!interrupted)
 				so->so_state &= ~SS_ISCONNECTING;
 			rpc_createerr.cf_stat = RPC_SYSTEMERROR;
 			rpc_createerr.cf_error.re_errno = error;
 			goto err;
 		}
 	}
 
 	if (!__rpc_socket2sockinfo(so, &si)) {
 		goto err;
 	}
 
 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
 		bzero(&sopt, sizeof(sopt));
 		sopt.sopt_dir = SOPT_SET;
 		sopt.sopt_level = SOL_SOCKET;
 		sopt.sopt_name = SO_KEEPALIVE;
 		sopt.sopt_val = &one;
 		sopt.sopt_valsize = sizeof(one);
 		sosetopt(so, &sopt);
 	}
 
 	if (so->so_proto->pr_protocol == IPPROTO_TCP) {
 		bzero(&sopt, sizeof(sopt));
 		sopt.sopt_dir = SOPT_SET;
 		sopt.sopt_level = IPPROTO_TCP;
 		sopt.sopt_name = TCP_NODELAY;
 		sopt.sopt_val = &one;
 		sopt.sopt_valsize = sizeof(one);
 		sosetopt(so, &sopt);
 	}
 
 	ct->ct_closeit = FALSE;
 
 	/*
 	 * Set up private data struct
 	 */
 	ct->ct_socket = so;
 	ct->ct_wait.tv_sec = -1;
 	ct->ct_wait.tv_usec = -1;
 	memcpy(&ct->ct_addr, raddr, raddr->sa_len);
 
 	/*
 	 * Initialize call message
 	 */
 	getmicrotime(&now);
 	ct->ct_xid = ((uint32_t)++disrupt) ^ __RPC_GETXID(&now);
 	call_msg.rm_xid = ct->ct_xid;
 	call_msg.rm_direction = CALL;
 	call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
 	call_msg.rm_call.cb_prog = (uint32_t)prog;
 	call_msg.rm_call.cb_vers = (uint32_t)vers;
 
 	/*
 	 * pre-serialize the static part of the call msg and stash it away
 	 */
 	xdrmem_create(&xdrs, ct->ct_mcallc, MCALL_MSG_SIZE,
 	    XDR_ENCODE);
 	if (! xdr_callhdr(&xdrs, &call_msg)) {
 		if (ct->ct_closeit) {
 			soclose(ct->ct_socket);
 		}
 		goto err;
 	}
 	ct->ct_mpos = XDR_GETPOS(&xdrs);
 	XDR_DESTROY(&xdrs);
 	ct->ct_waitchan = "rpcrecv";
 	ct->ct_waitflag = 0;
 
 	/*
 	 * Create a client handle which uses xdrrec for serialization
 	 * and authnone for authentication.
 	 */
 	sendsz = __rpc_get_t_size(si.si_af, si.si_proto, (int)sendsz);
 	recvsz = __rpc_get_t_size(si.si_af, si.si_proto, (int)recvsz);
 	error = soreserve(ct->ct_socket, sendsz, recvsz);
 	if (error != 0) {
 		if (ct->ct_closeit) {
 			soclose(ct->ct_socket);
 		}
 		goto err;
 	}
 	cl->cl_refs = 1;
 	cl->cl_ops = &clnt_vc_ops;
 	cl->cl_private = ct;
 	cl->cl_auth = authnone_create();
 
 	SOCKBUF_LOCK(&ct->ct_socket->so_rcv);
 	soupcall_set(ct->ct_socket, SO_RCV, clnt_vc_soupcall, ct);
 	SOCKBUF_UNLOCK(&ct->ct_socket->so_rcv);
 
 	ct->ct_raw = NULL;
 	ct->ct_record = NULL;
 	ct->ct_record_resid = 0;
 	ct->ct_sslrefno = 0;
 	TAILQ_INIT(&ct->ct_pending);
 	return (cl);
 
 err:
 	mtx_destroy(&ct->ct_lock);
 	mem_free(ct, sizeof (struct ct_data));
 	mem_free(cl, sizeof (CLIENT));
 
 	return ((CLIENT *)NULL);
 }
 
 static enum clnt_stat
 clnt_vc_call(
 	CLIENT		*cl,		/* client handle */
 	struct rpc_callextra *ext,	/* call metadata */
 	rpcproc_t	proc,		/* procedure number */
 	struct mbuf	*args,		/* pointer to args */
 	struct mbuf	**resultsp,	/* pointer to results */
 	struct timeval	utimeout)
 {
 	struct ct_data *ct = (struct ct_data *) cl->cl_private;
 	AUTH *auth;
 	struct rpc_err *errp;
 	enum clnt_stat stat;
 	XDR xdrs;
 	struct rpc_msg reply_msg;
 	bool_t ok;
 	int nrefreshes = 2;		/* number of times to refresh cred */
 	struct timeval timeout;
 	uint32_t xid;
 	struct mbuf *mreq = NULL, *results;
 	struct ct_request *cr;
 	int error, maxextsiz, trycnt;
 #ifdef KERN_TLS
 	u_int maxlen;
 #endif
 
 	cr = malloc(sizeof(struct ct_request), M_RPC, M_WAITOK);
 
 	mtx_lock(&ct->ct_lock);
 
 	if (ct->ct_closing || ct->ct_closed) {
 		mtx_unlock(&ct->ct_lock);
 		free(cr, M_RPC);
 		return (RPC_CANTSEND);
 	}
 	ct->ct_threads++;
 
 	if (ext) {
 		auth = ext->rc_auth;
 		errp = &ext->rc_err;
 	} else {
 		auth = cl->cl_auth;
 		errp = &ct->ct_error;
 	}
 
 	cr->cr_mrep = NULL;
 	cr->cr_error = 0;
 
 	if (ct->ct_wait.tv_usec == -1) {
 		timeout = utimeout;	/* use supplied timeout */
 	} else {
 		timeout = ct->ct_wait;	/* use default timeout */
 	}
 
 	/*
 	 * After 15sec of looping, allow it to return RPC_CANTSEND, which will
 	 * cause the clnt_reconnect layer to create a new TCP connection.
 	 */
 	trycnt = 15 * hz;
 call_again:
 	mtx_assert(&ct->ct_lock, MA_OWNED);
 	if (ct->ct_closing || ct->ct_closed) {
 		ct->ct_threads--;
 		wakeup(ct);
 		mtx_unlock(&ct->ct_lock);
 		free(cr, M_RPC);
 		return (RPC_CANTSEND);
 	}
 
 	ct->ct_xid++;
 	xid = ct->ct_xid;
 
 	mtx_unlock(&ct->ct_lock);
 
 	/*
 	 * Leave space to pre-pend the record mark.
 	 */
 	mreq = m_gethdr(M_WAITOK, MT_DATA);
 	mreq->m_data += sizeof(uint32_t);
 	KASSERT(ct->ct_mpos + sizeof(uint32_t) <= MHLEN,
 	    ("RPC header too big"));
 	bcopy(ct->ct_mcallc, mreq->m_data, ct->ct_mpos);
 	mreq->m_len = ct->ct_mpos;
 
 	/*
 	 * The XID is the first thing in the request.
 	 */
 	*mtod(mreq, uint32_t *) = htonl(xid);
 
 	xdrmbuf_create(&xdrs, mreq, XDR_ENCODE);
 
 	errp->re_status = stat = RPC_SUCCESS;
 
 	if ((! XDR_PUTINT32(&xdrs, &proc)) ||
 	    (! AUTH_MARSHALL(auth, xid, &xdrs,
 		m_copym(args, 0, M_COPYALL, M_WAITOK)))) {
 		errp->re_status = stat = RPC_CANTENCODEARGS;
 		mtx_lock(&ct->ct_lock);
 		goto out;
 	}
 	mreq->m_pkthdr.len = m_length(mreq, NULL);
 
 	/*
 	 * Prepend a record marker containing the packet length.
 	 */
 	M_PREPEND(mreq, sizeof(uint32_t), M_WAITOK);
 	*mtod(mreq, uint32_t *) =
 		htonl(0x80000000 | (mreq->m_pkthdr.len - sizeof(uint32_t)));
 
 	cr->cr_xid = xid;
 	mtx_lock(&ct->ct_lock);
 	/*
 	 * Check to see if the other end has already started to close down
 	 * the connection. The upcall will have set ct_error.re_status
 	 * to RPC_CANTRECV if this is the case.
 	 * If the other end starts to close down the connection after this
 	 * point, it will be detected later when cr_error is checked,
 	 * since the request is in the ct_pending queue.
 	 */
 	if (ct->ct_error.re_status == RPC_CANTRECV) {
 		if (errp != &ct->ct_error) {
 			errp->re_errno = ct->ct_error.re_errno;
 			errp->re_status = RPC_CANTRECV;
 		}
 		stat = RPC_CANTRECV;
 		goto out;
 	}
 
 	/* For TLS, wait for an upcall to be done, as required. */
 	while ((ct->ct_rcvstate & (RPCRCVSTATE_NORMAL |
 	    RPCRCVSTATE_NONAPPDATA)) == 0)
 		msleep(&ct->ct_rcvstate, &ct->ct_lock, 0, "rpcrcvst", hz);
 
 	TAILQ_INSERT_TAIL(&ct->ct_pending, cr, cr_link);
 	mtx_unlock(&ct->ct_lock);
 
 	if (ct->ct_sslrefno != 0) {
 		/*
 		 * Copy the mbuf chain to a chain of ext_pgs mbuf(s)
 		 * as required by KERN_TLS.
 		 */
 		maxextsiz = TLS_MAX_MSG_SIZE_V10_2;
 #ifdef KERN_TLS
 		if (rpctls_getinfo(&maxlen, false, false))
 			maxextsiz = min(maxextsiz, maxlen);
 #endif
 		mreq = _rpc_copym_into_ext_pgs(mreq, maxextsiz);
 	}
 	/*
 	 * sosend consumes mreq.
 	 */
 	error = sosend(ct->ct_socket, NULL, NULL, mreq, NULL, 0, curthread);
 	mreq = NULL;
 	if (error == EMSGSIZE || (error == ERESTART &&
 	    (ct->ct_waitflag & PCATCH) == 0 && trycnt-- > 0)) {
 		SOCKBUF_LOCK(&ct->ct_socket->so_snd);
-		sbwait(&ct->ct_socket->so_snd);
+		sbwait(ct->ct_socket, SO_SND);
 		SOCKBUF_UNLOCK(&ct->ct_socket->so_snd);
 		AUTH_VALIDATE(auth, xid, NULL, NULL);
 		mtx_lock(&ct->ct_lock);
 		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		/* Sleep for 1 clock tick before trying the sosend() again. */
 		msleep(&fake_wchan, &ct->ct_lock, 0, "rpclpsnd", 1);
 		goto call_again;
 	}
 
 	reply_msg.acpted_rply.ar_verf.oa_flavor = AUTH_NULL;
 	reply_msg.acpted_rply.ar_verf.oa_base = cr->cr_verf;
 	reply_msg.acpted_rply.ar_verf.oa_length = 0;
 	reply_msg.acpted_rply.ar_results.where = NULL;
 	reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void;
 
 	mtx_lock(&ct->ct_lock);
 	if (error) {
 		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		errp->re_errno = error;
 		errp->re_status = stat = RPC_CANTSEND;
 		goto out;
 	}
 
 	/*
 	 * Check to see if we got an upcall while waiting for the
 	 * lock. In both these cases, the request has been removed
 	 * from ct->ct_pending.
 	 */
 	if (cr->cr_error) {
 		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		errp->re_errno = cr->cr_error;
 		errp->re_status = stat = RPC_CANTRECV;
 		goto out;
 	}
 	if (cr->cr_mrep) {
 		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		goto got_reply;
 	}
 
 	/*
 	 * Hack to provide rpc-based message passing
 	 */
 	if (timeout.tv_sec == 0 && timeout.tv_usec == 0) {
 		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		errp->re_status = stat = RPC_TIMEDOUT;
 		goto out;
 	}
 
 	error = msleep(cr, &ct->ct_lock, ct->ct_waitflag, ct->ct_waitchan,
 	    tvtohz(&timeout));
 
 	TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 
 	if (error) {
 		/*
 		 * The sleep returned an error so our request is still
 		 * on the list. Turn the error code into an
 		 * appropriate client status.
 		 */
 		errp->re_errno = error;
 		switch (error) {
 		case EINTR:
 			stat = RPC_INTR;
 			break;
 		case EWOULDBLOCK:
 			stat = RPC_TIMEDOUT;
 			break;
 		default:
 			stat = RPC_CANTRECV;
 		}
 		errp->re_status = stat;
 		goto out;
 	} else {
 		/*
 		 * We were woken up by the upcall.  If the
 		 * upcall had a receive error, report that,
 		 * otherwise we have a reply.
 		 */
 		if (cr->cr_error) {
 			errp->re_errno = cr->cr_error;
 			errp->re_status = stat = RPC_CANTRECV;
 			goto out;
 		}
 	}
 
 got_reply:
 	/*
 	 * Now decode and validate the response. We need to drop the
 	 * lock since xdr_replymsg may end up sleeping in malloc.
 	 */
 	mtx_unlock(&ct->ct_lock);
 
 	if (ext && ext->rc_feedback)
 		ext->rc_feedback(FEEDBACK_OK, proc, ext->rc_feedback_arg);
 
 	xdrmbuf_create(&xdrs, cr->cr_mrep, XDR_DECODE);
 	ok = xdr_replymsg(&xdrs, &reply_msg);
 	cr->cr_mrep = NULL;
 
 	if (ok) {
 		if ((reply_msg.rm_reply.rp_stat == MSG_ACCEPTED) &&
 		    (reply_msg.acpted_rply.ar_stat == SUCCESS))
 			errp->re_status = stat = RPC_SUCCESS;
 		else
 			stat = _seterr_reply(&reply_msg, errp);
 
 		if (stat == RPC_SUCCESS) {
 			results = xdrmbuf_getall(&xdrs);
 			if (!AUTH_VALIDATE(auth, xid,
 				&reply_msg.acpted_rply.ar_verf,
 				&results)) {
 				errp->re_status = stat = RPC_AUTHERROR;
 				errp->re_why = AUTH_INVALIDRESP;
 			} else {
 				KASSERT(results,
 				    ("auth validated but no result"));
 				*resultsp = results;
 			}
 		}		/* end successful completion */
 		/*
 		 * If unsuccessful AND error is an authentication error
 		 * then refresh credentials and try again, else break
 		 */
 		else if (stat == RPC_AUTHERROR)
 			/* maybe our credentials need to be refreshed ... */
 			if (nrefreshes > 0 &&
 			    AUTH_REFRESH(auth, &reply_msg)) {
 				nrefreshes--;
 				XDR_DESTROY(&xdrs);
 				mtx_lock(&ct->ct_lock);
 				goto call_again;
 			}
 		/* end of unsuccessful completion */
 	}	/* end of valid reply message */
 	else {
 		errp->re_status = stat = RPC_CANTDECODERES;
 	}
 	XDR_DESTROY(&xdrs);
 	mtx_lock(&ct->ct_lock);
 out:
 	mtx_assert(&ct->ct_lock, MA_OWNED);
 
 	KASSERT(stat != RPC_SUCCESS || *resultsp,
 	    ("RPC_SUCCESS without reply"));
 
 	if (mreq)
 		m_freem(mreq);
 	if (cr->cr_mrep)
 		m_freem(cr->cr_mrep);
 
 	ct->ct_threads--;
 	if (ct->ct_closing)
 		wakeup(ct);
 		
 	mtx_unlock(&ct->ct_lock);
 
 	if (auth && stat != RPC_SUCCESS)
 		AUTH_VALIDATE(auth, xid, NULL, NULL);
 
 	free(cr, M_RPC);
 
 	return (stat);
 }
 
 static void
 clnt_vc_geterr(CLIENT *cl, struct rpc_err *errp)
 {
 	struct ct_data *ct = (struct ct_data *) cl->cl_private;
 
 	*errp = ct->ct_error;
 }
 
 static bool_t
 clnt_vc_freeres(CLIENT *cl, xdrproc_t xdr_res, void *res_ptr)
 {
 	XDR xdrs;
 	bool_t dummy;
 
 	xdrs.x_op = XDR_FREE;
 	dummy = (*xdr_res)(&xdrs, res_ptr);
 
 	return (dummy);
 }
 
 /*ARGSUSED*/
 static void
 clnt_vc_abort(CLIENT *cl)
 {
 }
 
 static bool_t
 clnt_vc_control(CLIENT *cl, u_int request, void *info)
 {
 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
 	void *infop = info;
 	SVCXPRT *xprt;
 	uint64_t *p;
 	int error;
 	static u_int thrdnum = 0;
 
 	mtx_lock(&ct->ct_lock);
 
 	switch (request) {
 	case CLSET_FD_CLOSE:
 		ct->ct_closeit = TRUE;
 		mtx_unlock(&ct->ct_lock);
 		return (TRUE);
 	case CLSET_FD_NCLOSE:
 		ct->ct_closeit = FALSE;
 		mtx_unlock(&ct->ct_lock);
 		return (TRUE);
 	default:
 		break;
 	}
 
 	/* for other requests which use info */
 	if (info == NULL) {
 		mtx_unlock(&ct->ct_lock);
 		return (FALSE);
 	}
 	switch (request) {
 	case CLSET_TIMEOUT:
 		if (time_not_ok((struct timeval *)info)) {
 			mtx_unlock(&ct->ct_lock);
 			return (FALSE);
 		}
 		ct->ct_wait = *(struct timeval *)infop;
 		break;
 	case CLGET_TIMEOUT:
 		*(struct timeval *)infop = ct->ct_wait;
 		break;
 	case CLGET_SERVER_ADDR:
 		(void) memcpy(info, &ct->ct_addr, (size_t)ct->ct_addr.ss_len);
 		break;
 	case CLGET_SVC_ADDR:
 		/*
 		 * Slightly different semantics to userland - we use
 		 * sockaddr instead of netbuf.
 		 */
 		memcpy(info, &ct->ct_addr, ct->ct_addr.ss_len);
 		break;
 	case CLSET_SVC_ADDR:		/* set to new address */
 		mtx_unlock(&ct->ct_lock);
 		return (FALSE);
 	case CLGET_XID:
 		*(uint32_t *)info = ct->ct_xid;
 		break;
 	case CLSET_XID:
 		/* This will set the xid of the NEXT call */
 		/* decrement by 1 as clnt_vc_call() increments once */
 		ct->ct_xid = *(uint32_t *)info - 1;
 		break;
 	case CLGET_VERS:
 		/*
 		 * This RELIES on the information that, in the call body,
 		 * the version number field is the fifth field from the
 		 * beginning of the RPC header. MUST be changed if the
 		 * call_struct is changed
 		 */
 		*(uint32_t *)info =
 		    ntohl(*(uint32_t *)(void *)(ct->ct_mcallc +
 		    4 * BYTES_PER_XDR_UNIT));
 		break;
 
 	case CLSET_VERS:
 		*(uint32_t *)(void *)(ct->ct_mcallc +
 		    4 * BYTES_PER_XDR_UNIT) =
 		    htonl(*(uint32_t *)info);
 		break;
 
 	case CLGET_PROG:
 		/*
 		 * This RELIES on the information that, in the call body,
 		 * the program number field is the fourth field from the
 		 * beginning of the RPC header. MUST be changed if the
 		 * call_struct is changed
 		 */
 		*(uint32_t *)info =
 		    ntohl(*(uint32_t *)(void *)(ct->ct_mcallc +
 		    3 * BYTES_PER_XDR_UNIT));
 		break;
 
 	case CLSET_PROG:
 		*(uint32_t *)(void *)(ct->ct_mcallc +
 		    3 * BYTES_PER_XDR_UNIT) =
 		    htonl(*(uint32_t *)info);
 		break;
 
 	case CLSET_WAITCHAN:
 		ct->ct_waitchan = (const char *)info;
 		break;
 
 	case CLGET_WAITCHAN:
 		*(const char **) info = ct->ct_waitchan;
 		break;
 
 	case CLSET_INTERRUPTIBLE:
 		if (*(int *) info)
 			ct->ct_waitflag = PCATCH;
 		else
 			ct->ct_waitflag = 0;
 		break;
 
 	case CLGET_INTERRUPTIBLE:
 		if (ct->ct_waitflag)
 			*(int *) info = TRUE;
 		else
 			*(int *) info = FALSE;
 		break;
 
 	case CLSET_BACKCHANNEL:
 		xprt = (SVCXPRT *)info;
 		if (ct->ct_backchannelxprt == NULL) {
 			xprt->xp_p2 = ct;
 			if (ct->ct_sslrefno != 0)
 				xprt->xp_tls = RPCTLS_FLAGS_HANDSHAKE;
 			ct->ct_backchannelxprt = xprt;
 		}
 		break;
 
 	case CLSET_TLS:
 		p = (uint64_t *)info;
 		ct->ct_sslsec = *p++;
 		ct->ct_sslusec = *p++;
 		ct->ct_sslrefno = *p;
 		if (ct->ct_sslrefno != RPCTLS_REFNO_HANDSHAKE) {
 			mtx_unlock(&ct->ct_lock);
 			/* Start the kthread that handles upcalls. */
 			error = kthread_add(clnt_vc_dotlsupcall, ct,
 			    NULL, NULL, 0, 0, "krpctls%u", thrdnum++);
 			if (error != 0)
 				panic("Can't add KRPC thread error %d", error);
 		} else
 			mtx_unlock(&ct->ct_lock);
 		return (TRUE);
 
 	case CLSET_BLOCKRCV:
 		if (*(int *) info) {
 			ct->ct_rcvstate &= ~RPCRCVSTATE_NORMAL;
 			ct->ct_rcvstate |= RPCRCVSTATE_TLSHANDSHAKE;
 		} else {
 			ct->ct_rcvstate &= ~RPCRCVSTATE_TLSHANDSHAKE;
 			ct->ct_rcvstate |= RPCRCVSTATE_NORMAL;
 		}
 		break;
 
 	default:
 		mtx_unlock(&ct->ct_lock);
 		return (FALSE);
 	}
 
 	mtx_unlock(&ct->ct_lock);
 	return (TRUE);
 }
 
 static void
 clnt_vc_close(CLIENT *cl)
 {
 	struct ct_data *ct = (struct ct_data *) cl->cl_private;
 	struct ct_request *cr;
 
 	mtx_lock(&ct->ct_lock);
 
 	if (ct->ct_closed) {
 		mtx_unlock(&ct->ct_lock);
 		return;
 	}
 
 	if (ct->ct_closing) {
 		while (ct->ct_closing)
 			msleep(ct, &ct->ct_lock, 0, "rpcclose", 0);
 		KASSERT(ct->ct_closed, ("client should be closed"));
 		mtx_unlock(&ct->ct_lock);
 		return;
 	}
 
 	if (ct->ct_socket) {
 		ct->ct_closing = TRUE;
 		mtx_unlock(&ct->ct_lock);
 
 		SOCKBUF_LOCK(&ct->ct_socket->so_rcv);
 		if (ct->ct_socket->so_rcv.sb_upcall != NULL) {
 			soupcall_clear(ct->ct_socket, SO_RCV);
 			clnt_vc_upcallsdone(ct);
 		}
 		SOCKBUF_UNLOCK(&ct->ct_socket->so_rcv);
 
 		/*
 		 * Abort any pending requests and wait until everyone
 		 * has finished with clnt_vc_call.
 		 */
 		mtx_lock(&ct->ct_lock);
 		TAILQ_FOREACH(cr, &ct->ct_pending, cr_link) {
 			cr->cr_xid = 0;
 			cr->cr_error = ESHUTDOWN;
 			wakeup(cr);
 		}
 
 		while (ct->ct_threads)
 			msleep(ct, &ct->ct_lock, 0, "rpcclose", 0);
 	}
 
 	ct->ct_closing = FALSE;
 	ct->ct_closed = TRUE;
 	wakeup(&ct->ct_sslrefno);
 	mtx_unlock(&ct->ct_lock);
 	wakeup(ct);
 }
 
 static void
 clnt_vc_destroy(CLIENT *cl)
 {
 	struct ct_data *ct = (struct ct_data *) cl->cl_private;
 	struct socket *so = NULL;
 	SVCXPRT *xprt;
 	uint32_t reterr;
 
 	clnt_vc_close(cl);
 
 	mtx_lock(&ct->ct_lock);
 	xprt = ct->ct_backchannelxprt;
 	ct->ct_backchannelxprt = NULL;
 	if (xprt != NULL) {
 		mtx_unlock(&ct->ct_lock);	/* To avoid a LOR. */
 		sx_xlock(&xprt->xp_lock);
 		mtx_lock(&ct->ct_lock);
 		xprt->xp_p2 = NULL;
 		sx_xunlock(&xprt->xp_lock);
 	}
 
 	if (ct->ct_socket) {
 		if (ct->ct_closeit) {
 			so = ct->ct_socket;
 		}
 	}
 
 	/* Wait for the upcall kthread to terminate. */
 	while ((ct->ct_rcvstate & RPCRCVSTATE_UPCALLTHREAD) != 0)
 		msleep(&ct->ct_sslrefno, &ct->ct_lock, 0,
 		    "clntvccl", hz);
 	mtx_unlock(&ct->ct_lock);
 
 	mtx_destroy(&ct->ct_lock);
 	if (so) {
 		if (ct->ct_sslrefno != 0) {
 			/*
 			 * If the TLS handshake is in progress, the upcall
 			 * will fail, but the socket should be closed by the
 			 * daemon, since the connect upcall has just failed.
 			 */
 			if (ct->ct_sslrefno != RPCTLS_REFNO_HANDSHAKE) {
 				/*
 				 * If the upcall fails, the socket has
 				 * probably been closed via the rpctlscd
 				 * daemon having crashed or been
 				 * restarted, so ignore return stat.
 				 */
 				rpctls_cl_disconnect(ct->ct_sslsec,
 				    ct->ct_sslusec, ct->ct_sslrefno,
 				    &reterr);
 			}
 			/* Must sorele() to get rid of reference. */
 			CURVNET_SET(so->so_vnet);
 			sorele(so);
 			CURVNET_RESTORE();
 		} else {
 			soshutdown(so, SHUT_WR);
 			soclose(so);
 		}
 	}
 	m_freem(ct->ct_record);
 	m_freem(ct->ct_raw);
 	mem_free(ct, sizeof(struct ct_data));
 	if (cl->cl_netid && cl->cl_netid[0])
 		mem_free(cl->cl_netid, strlen(cl->cl_netid) +1);
 	if (cl->cl_tp && cl->cl_tp[0])
 		mem_free(cl->cl_tp, strlen(cl->cl_tp) +1);
 	mem_free(cl, sizeof(CLIENT));
 }
 
 /*
  * Make sure that the time is not garbage.   -1 value is disallowed.
  * Note this is different from time_not_ok in clnt_dg.c
  */
 static bool_t
 time_not_ok(struct timeval *t)
 {
 	return (t->tv_sec <= -1 || t->tv_sec > 100000000 ||
 		t->tv_usec <= -1 || t->tv_usec > 1000000);
 }
 
 int
 clnt_vc_soupcall(struct socket *so, void *arg, int waitflag)
 {
 	struct ct_data *ct = (struct ct_data *) arg;
 	struct uio uio;
 	struct mbuf *m, *m2, **ctrlp;
 	struct ct_request *cr;
 	int error, rcvflag, foundreq;
 	uint32_t xid_plus_direction[2], header;
 	SVCXPRT *xprt;
 	struct cf_conn *cd;
 	u_int rawlen;
 	struct cmsghdr *cmsg;
 	struct tls_get_record tgr;
 
 	/*
 	 * RPC-over-TLS needs to block reception during
 	 * upcalls since the upcall will be doing I/O on
 	 * the socket via openssl library calls.
 	 */
 	mtx_lock(&ct->ct_lock);
 	if ((ct->ct_rcvstate & (RPCRCVSTATE_NORMAL |
 	    RPCRCVSTATE_NONAPPDATA)) == 0) {
 		/* Mark that a socket upcall needs to be done. */
 		if ((ct->ct_rcvstate & (RPCRCVSTATE_UPCALLNEEDED |
 		    RPCRCVSTATE_UPCALLINPROG)) != 0)
 			ct->ct_rcvstate |= RPCRCVSTATE_SOUPCALLNEEDED;
 		mtx_unlock(&ct->ct_lock);
 		return (SU_OK);
 	}
 	mtx_unlock(&ct->ct_lock);
 
 	/*
 	 * If another thread is already here, it must be in
 	 * soreceive(), so just return to avoid races with it.
 	 * ct_upcallrefs is protected by the SOCKBUF_LOCK(),
 	 * which is held in this function, except when
 	 * soreceive() is called.
 	 */
 	if (ct->ct_upcallrefs > 0)
 		return (SU_OK);
 	ct->ct_upcallrefs++;
 
 	/*
 	 * Read as much as possible off the socket and link it
 	 * onto ct_raw.
 	 */
 	for (;;) {
 		uio.uio_resid = 1000000000;
 		uio.uio_td = curthread;
 		m2 = m = NULL;
 		rcvflag = MSG_DONTWAIT | MSG_SOCALLBCK;
 		if (ct->ct_sslrefno != 0 && (ct->ct_rcvstate &
 		    RPCRCVSTATE_NORMAL) != 0) {
 			rcvflag |= MSG_TLSAPPDATA;
 			ctrlp = NULL;
 		} else
 			ctrlp = &m2;
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		error = soreceive(so, NULL, &uio, &m, ctrlp, &rcvflag);
 		SOCKBUF_LOCK(&so->so_rcv);
 
 		if (error == EWOULDBLOCK) {
 			/*
 			 * We must re-test for readability after
 			 * taking the lock to protect us in the case
 			 * where a new packet arrives on the socket
 			 * after our call to soreceive fails with
 			 * EWOULDBLOCK.
 			 */
 			error = 0;
 			if (!soreadable(so))
 				break;
 			continue;
 		}
 		if (error == 0 && m == NULL) {
 			/*
 			 * We must have got EOF trying
 			 * to read from the stream.
 			 */
 			error = ECONNRESET;
 		}
 
 		/*
 		 * A return of ENXIO indicates that there is a
 		 * non-application data record at the head of the
 		 * socket's receive queue, for TLS connections.
 		 * This record needs to be handled in userland
 		 * via an SSL_read() call, so do an upcall to the daemon.
 		 */
 		if (ct->ct_sslrefno != 0 && error == ENXIO) {
 			/* Disable reception, marking an upcall needed. */
 			mtx_lock(&ct->ct_lock);
 			ct->ct_rcvstate |= RPCRCVSTATE_UPCALLNEEDED;
 			/*
 			 * If an upcall in needed, wake up the kthread
 			 * that runs clnt_vc_dotlsupcall().
 			 */
 			wakeup(&ct->ct_sslrefno);
 			mtx_unlock(&ct->ct_lock);
 			break;
 		}
 		if (error != 0)
 			break;
 
 		/* Process any record header(s). */
 		if (m2 != NULL) {
 			cmsg = mtod(m2, struct cmsghdr *);
 			if (cmsg->cmsg_type == TLS_GET_RECORD &&
 			    cmsg->cmsg_len == CMSG_LEN(sizeof(tgr))) {
 				memcpy(&tgr, CMSG_DATA(cmsg), sizeof(tgr));
 				/*
 				 * This should have been handled by
 				 * setting RPCRCVSTATE_UPCALLNEEDED in
 				 * ct_rcvstate but if not, all we can do
 				 * is toss it away.
 				 */
 				if (tgr.tls_type != TLS_RLTYPE_APP) {
 					m_freem(m);
 					m_free(m2);
 					mtx_lock(&ct->ct_lock);
 					ct->ct_rcvstate &=
 					    ~RPCRCVSTATE_NONAPPDATA;
 					ct->ct_rcvstate |= RPCRCVSTATE_NORMAL;
 					mtx_unlock(&ct->ct_lock);
 					continue;
 				}
 			}
 			m_free(m2);
 		}
 
 		if (ct->ct_raw != NULL)
 			m_last(ct->ct_raw)->m_next = m;
 		else
 			ct->ct_raw = m;
 	}
 	rawlen = m_length(ct->ct_raw, NULL);
 
 	/* Now, process as much of ct_raw as possible. */
 	for (;;) {
 		/*
 		 * If ct_record_resid is zero, we are waiting for a
 		 * record mark.
 		 */
 		if (ct->ct_record_resid == 0) {
 			if (rawlen < sizeof(uint32_t))
 				break;
 			m_copydata(ct->ct_raw, 0, sizeof(uint32_t),
 			    (char *)&header);
 			header = ntohl(header);
 			ct->ct_record_resid = header & 0x7fffffff;
 			ct->ct_record_eor = ((header & 0x80000000) != 0);
 			m_adj(ct->ct_raw, sizeof(uint32_t));
 			rawlen -= sizeof(uint32_t);
 		} else {
 			/*
 			 * Move as much of the record as possible to
 			 * ct_record.
 			 */
 			if (rawlen == 0)
 				break;
 			if (rawlen <= ct->ct_record_resid) {
 				if (ct->ct_record != NULL)
 					m_last(ct->ct_record)->m_next =
 					    ct->ct_raw;
 				else
 					ct->ct_record = ct->ct_raw;
 				ct->ct_raw = NULL;
 				ct->ct_record_resid -= rawlen;
 				rawlen = 0;
 			} else {
 				m = m_split(ct->ct_raw, ct->ct_record_resid,
 				    M_NOWAIT);
 				if (m == NULL)
 					break;
 				if (ct->ct_record != NULL)
 					m_last(ct->ct_record)->m_next =
 					    ct->ct_raw;
 				else
 					ct->ct_record = ct->ct_raw;
 				rawlen -= ct->ct_record_resid;
 				ct->ct_record_resid = 0;
 				ct->ct_raw = m;
 			}
 			if (ct->ct_record_resid > 0)
 				break;
 
 			/*
 			 * If we have the entire record, see if we can
 			 * match it to a request.
 			 */
 			if (ct->ct_record_eor) {
 				/*
 				 * The XID is in the first uint32_t of
 				 * the reply and the message direction
 				 * is the second one.
 				 */
 				if (ct->ct_record->m_len <
 				    sizeof(xid_plus_direction) &&
 				    m_length(ct->ct_record, NULL) <
 				    sizeof(xid_plus_direction)) {
 					/*
 					 * What to do now?
 					 * The data in the TCP stream is
 					 * corrupted such that there is no
 					 * valid RPC message to parse.
 					 * I think it best to close this
 					 * connection and allow
 					 * clnt_reconnect_call() to try
 					 * and establish a new one.
 					 */
 					printf("clnt_vc_soupcall: "
 					    "connection data corrupted\n");
 					error = ECONNRESET;
 					goto wakeup_all;
 				}
 				m_copydata(ct->ct_record, 0,
 				    sizeof(xid_plus_direction),
 				    (char *)xid_plus_direction);
 				xid_plus_direction[0] =
 				    ntohl(xid_plus_direction[0]);
 				xid_plus_direction[1] =
 				    ntohl(xid_plus_direction[1]);
 				/* Check message direction. */
 				if (xid_plus_direction[1] == CALL) {
 					/* This is a backchannel request. */
 					mtx_lock(&ct->ct_lock);
 					xprt = ct->ct_backchannelxprt;
 					if (xprt == NULL) {
 						mtx_unlock(&ct->ct_lock);
 						/* Just throw it away. */
 						m_freem(ct->ct_record);
 						ct->ct_record = NULL;
 					} else {
 						cd = (struct cf_conn *)
 						    xprt->xp_p1;
 						m2 = cd->mreq;
 						/*
 						 * The requests are chained
 						 * in the m_nextpkt list.
 						 */
 						while (m2 != NULL &&
 						    m2->m_nextpkt != NULL)
 							/* Find end of list. */
 							m2 = m2->m_nextpkt;
 						if (m2 != NULL)
 							m2->m_nextpkt =
 							    ct->ct_record;
 						else
 							cd->mreq =
 							    ct->ct_record;
 						ct->ct_record->m_nextpkt =
 						    NULL;
 						ct->ct_record = NULL;
 						xprt_active(xprt);
 						mtx_unlock(&ct->ct_lock);
 					}
 				} else {
 					mtx_lock(&ct->ct_lock);
 					foundreq = 0;
 					TAILQ_FOREACH(cr, &ct->ct_pending,
 					    cr_link) {
 						if (cr->cr_xid ==
 						    xid_plus_direction[0]) {
 							/*
 							 * This one
 							 * matches. We leave
 							 * the reply mbuf in
 							 * cr->cr_mrep. Set
 							 * the XID to zero so
 							 * that we will ignore
 							 * any duplicated
 							 * replies.
 							 */
 							cr->cr_xid = 0;
 							cr->cr_mrep =
 							    ct->ct_record;
 							cr->cr_error = 0;
 							foundreq = 1;
 							wakeup(cr);
 							break;
 						}
 					}
 					mtx_unlock(&ct->ct_lock);
 
 					if (!foundreq)
 						m_freem(ct->ct_record);
 					ct->ct_record = NULL;
 				}
 			}
 		}
 	}
 
 	if (error != 0) {
 	wakeup_all:
 		/*
 		 * This socket is broken, so mark that it cannot
 		 * receive and fail all RPCs waiting for a reply
 		 * on it, so that they will be retried on a new
 		 * TCP connection created by clnt_reconnect_X().
 		 */
 		mtx_lock(&ct->ct_lock);
 		ct->ct_error.re_status = RPC_CANTRECV;
 		ct->ct_error.re_errno = error;
 		TAILQ_FOREACH(cr, &ct->ct_pending, cr_link) {
 			cr->cr_error = error;
 			wakeup(cr);
 		}
 		mtx_unlock(&ct->ct_lock);
 	}
 
 	ct->ct_upcallrefs--;
 	if (ct->ct_upcallrefs < 0)
 		panic("rpcvc upcall refcnt");
 	if (ct->ct_upcallrefs == 0)
 		wakeup(&ct->ct_upcallrefs);
 	return (SU_OK);
 }
 
 /*
  * Wait for all upcalls in progress to complete.
  */
 static void
 clnt_vc_upcallsdone(struct ct_data *ct)
 {
 
 	SOCKBUF_LOCK_ASSERT(&ct->ct_socket->so_rcv);
 
 	while (ct->ct_upcallrefs > 0)
 		(void) msleep(&ct->ct_upcallrefs,
 		    SOCKBUF_MTX(&ct->ct_socket->so_rcv), 0, "rpcvcup", 0);
 }
 
 /*
  * Do a TLS upcall to the rpctlscd daemon, as required.
  * This function runs as a kthread.
  */
 static void
 clnt_vc_dotlsupcall(void *data)
 {
 	struct ct_data *ct = (struct ct_data *)data;
 	enum clnt_stat ret;
 	uint32_t reterr;
 
 	mtx_lock(&ct->ct_lock);
 	ct->ct_rcvstate |= RPCRCVSTATE_UPCALLTHREAD;
 	while (!ct->ct_closed) {
 		if ((ct->ct_rcvstate & RPCRCVSTATE_UPCALLNEEDED) != 0) {
 			ct->ct_rcvstate &= ~RPCRCVSTATE_UPCALLNEEDED;
 			ct->ct_rcvstate |= RPCRCVSTATE_UPCALLINPROG;
 			if (ct->ct_sslrefno != 0 && ct->ct_sslrefno !=
 			    RPCTLS_REFNO_HANDSHAKE) {
 				mtx_unlock(&ct->ct_lock);
 				ret = rpctls_cl_handlerecord(ct->ct_sslsec,
 				    ct->ct_sslusec, ct->ct_sslrefno, &reterr);
 				mtx_lock(&ct->ct_lock);
 			}
 			ct->ct_rcvstate &= ~RPCRCVSTATE_UPCALLINPROG;
 			if (ret == RPC_SUCCESS && reterr == RPCTLSERR_OK)
 				ct->ct_rcvstate |= RPCRCVSTATE_NORMAL;
 			else
 				ct->ct_rcvstate |= RPCRCVSTATE_NONAPPDATA;
 			wakeup(&ct->ct_rcvstate);
 		}
 		if ((ct->ct_rcvstate & RPCRCVSTATE_SOUPCALLNEEDED) != 0) {
 			ct->ct_rcvstate &= ~RPCRCVSTATE_SOUPCALLNEEDED;
 			mtx_unlock(&ct->ct_lock);
 			SOCKBUF_LOCK(&ct->ct_socket->so_rcv);
 			clnt_vc_soupcall(ct->ct_socket, ct, M_NOWAIT);
 			SOCKBUF_UNLOCK(&ct->ct_socket->so_rcv);
 			mtx_lock(&ct->ct_lock);
 		}
 		msleep(&ct->ct_sslrefno, &ct->ct_lock, 0, "clntvcdu", hz);
 	}
 	ct->ct_rcvstate &= ~RPCRCVSTATE_UPCALLTHREAD;
 	wakeup(&ct->ct_sslrefno);
 	mtx_unlock(&ct->ct_lock);
 	kthread_exit();
 }
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index 372f04eba54c..2484407d557c 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -1,264 +1,264 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)socketvar.h	8.3 (Berkeley) 2/19/95
  *
  * $FreeBSD$
  */
 #ifndef _SYS_SOCKBUF_H_
 #define _SYS_SOCKBUF_H_
 
 /*
  * Constants for sb_flags field of struct sockbuf/xsockbuf.
  */
 #define	SB_TLS_RX	0x01		/* using KTLS on RX */
 #define	SB_TLS_RX_RUNNING 0x02		/* KTLS RX operation running */
 #define	SB_WAIT		0x04		/* someone is waiting for data/space */
 #define	SB_SEL		0x08		/* someone is selecting */
 #define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
 #define	SB_UPCALL	0x20		/* someone wants an upcall */
 #define	SB_NOINTR	0x40		/* operations not interruptible */
 #define	SB_AIO		0x80		/* AIO operations queued */
 #define	SB_KNOTE	0x100		/* kernel note attached */
 #define	SB_NOCOALESCE	0x200		/* don't coalesce new data into existing mbufs */
 #define	SB_IN_TOE	0x400		/* socket buffer is in the middle of an operation */
 #define	SB_AUTOSIZE	0x800		/* automatically size socket buffer */
 #define	SB_STOP		0x1000		/* backpressure indicator */
 #define	SB_AIO_RUNNING	0x2000		/* AIO operation running */
 #define	SB_TLS_IFNET	0x4000		/* has used / is using ifnet KTLS */
 
 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
 #define	SBS_RCVATMARK		0x0040	/* at mark on input */
 
 #if defined(_KERNEL) || defined(_WANT_SOCKET)
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_sx.h>
 #include <sys/_task.h>
 
 #define	SB_MAX		(2*1024*1024)	/* default for max chars in sockbuf */
 
 struct ktls_session;
 struct mbuf;
 struct sockaddr;
 struct socket;
 struct thread;
 struct selinfo;
 
 /*
  * Variables for socket buffering.
  *
  * Locking key to struct sockbuf:
  * (a) locked by SOCKBUF_LOCK().
  */
 struct sockbuf {
 	struct	mtx *sb_mtx;		/* sockbuf lock */
 	struct	selinfo *sb_sel;	/* process selecting read/write */
 	short	sb_state;	/* (a) socket state on sockbuf */
 	short	sb_flags;	/* (a) flags, see above */
 	struct	mbuf *sb_mb;	/* (a) the mbuf chain */
 	struct	mbuf *sb_mbtail; /* (a) the last mbuf in the chain */
 	struct	mbuf *sb_lastrecord;	/* (a) first mbuf of last
 					 * record in socket buffer */
 	struct	mbuf *sb_sndptr; /* (a) pointer into mbuf chain */
 	struct	mbuf *sb_fnrdy;	/* (a) pointer to first not ready buffer */
 	u_int	sb_sndptroff;	/* (a) byte offset of ptr into chain */
 	u_int	sb_acc;		/* (a) available chars in buffer */
 	u_int	sb_ccc;		/* (a) claimed chars in buffer */
 	u_int	sb_hiwat;	/* (a) max actual char count */
 	u_int	sb_mbcnt;	/* (a) chars of mbufs used */
 	u_int   sb_mcnt;        /* (a) number of mbufs in buffer */
 	u_int   sb_ccnt;        /* (a) number of clusters in buffer */
 	u_int	sb_mbmax;	/* (a) max chars of mbufs to use */
 	u_int	sb_ctl;		/* (a) non-data chars in buffer */
 	u_int	sb_tlscc;	/* (a) TLS chain characters */
 	u_int	sb_tlsdcc;	/* (a) TLS characters being decrypted */
 	int	sb_lowat;	/* (a) low water mark */
 	sbintime_t	sb_timeo;	/* (a) timeout for read/write */
 	struct	mbuf *sb_mtls;	/* (a) TLS mbuf chain */
 	struct	mbuf *sb_mtlstail; /* (a) last mbuf in TLS chain */
 	int	(*sb_upcall)(struct socket *, void *, int); /* (a) */
 	void	*sb_upcallarg;	/* (a) */
 	uint64_t sb_tls_seqno;	/* (a) TLS seqno */
 	struct	ktls_session *sb_tls_info; /* (a + b) TLS state */
 	TAILQ_HEAD(, kaiocb) sb_aiojobq; /* (a) pending AIO ops */
 	struct	task sb_aiotask; /* AIO task */
 };
 
 #endif	/* defined(_KERNEL) || defined(_WANT_SOCKET) */
 #ifdef _KERNEL
 
+/* 'which' values for KPIs that operate on one buffer of a socket. */
+typedef enum { SO_RCV, SO_SND } sb_which;
+
 /*
  * Per-socket buffer mutex used to protect most fields in the socket buffer.
  * These make use of the mutex pointer embedded in struct sockbuf, which
  * currently just references mutexes in the containing socket.  The
  * SOCK_SENDBUF_LOCK() etc. macros can be used instead of or in combination with
  * these locking macros.
  */
 #define	SOCKBUF_MTX(_sb)		((_sb)->sb_mtx)
-#define	SOCKBUF_LOCK_INIT(_sb, _name) \
-	mtx_init(SOCKBUF_MTX(_sb), _name, NULL, MTX_DEF)
-#define	SOCKBUF_LOCK_DESTROY(_sb)	mtx_destroy(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_LOCK(_sb)		mtx_lock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_OWNED(_sb)		mtx_owned(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_UNLOCK(_sb)		mtx_unlock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_LOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
 #define	SOCKBUF_UNLOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
 
 /*
  * Socket buffer private mbuf(9) flags.
  */
 #define	M_NOTREADY	M_PROTO1	/* m_data not populated yet */
 #define	M_BLOCKED	M_PROTO2	/* M_NOTREADY in front of m */
 #define	M_NOTAVAIL	(M_NOTREADY | M_BLOCKED)
 
 void	sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
 int	sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
 	    const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
 void	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control, int flags);
 void	sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control, int flags);
 void	sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
 void	sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
 void	sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
 struct mbuf *
 	sbcreatecontrol(caddr_t p, int size, int type, int level);
 struct mbuf *
 	sbcreatecontrol_how(void *p, int size, int type, int level,
 	    int wait);
-void	sbdestroy(struct sockbuf *sb, struct socket *so);
+void	sbdestroy(struct socket *, sb_which);
 void	sbdrop(struct sockbuf *sb, int len);
 void	sbdrop_locked(struct sockbuf *sb, int len);
 struct mbuf *
 	sbcut_locked(struct sockbuf *sb, int len);
 void	sbdroprecord(struct sockbuf *sb);
 void	sbdroprecord_locked(struct sockbuf *sb);
 void	sbflush(struct sockbuf *sb);
 void	sbflush_locked(struct sockbuf *sb);
-void	sbrelease(struct sockbuf *sb, struct socket *so);
-void	sbrelease_locked(struct sockbuf *sb, struct socket *so);
+void	sbrelease(struct socket *, sb_which);
+void	sbrelease_locked(struct socket *, sb_which);
 int	sbsetopt(struct socket *so, int cmd, u_long cc);
-int	sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
+bool	sbreserve_locked(struct socket *so, sb_which which, u_long cc,
 	    struct thread *td);
 void	sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len);
 struct mbuf *
 	sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff);
 struct mbuf *
 	sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
-int	sbwait(struct sockbuf *sb);
+int	sbwait(struct socket *, sb_which);
 void	sballoc(struct sockbuf *, struct mbuf *);
 void	sbfree(struct sockbuf *, struct mbuf *);
 void	sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
 void	sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m);
 int	sbready(struct sockbuf *, struct mbuf *, int);
 
 /*
  * Return how much data is available to be taken out of socket
  * buffer right now.
  */
 static inline u_int
 sbavail(struct sockbuf *sb)
 {
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 	return (sb->sb_acc);
 }
 
 /*
  * Return how much data sits there in the socket buffer
  * It might be that some data is not yet ready to be read.
  */
 static inline u_int
 sbused(struct sockbuf *sb)
 {
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 	return (sb->sb_ccc);
 }
 
 /*
  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
  * This is problematical if the fields are unsigned, as the space might
  * still be negative (ccc > hiwat or mbcnt > mbmax).
  */
 static inline long
 sbspace(struct sockbuf *sb)
 {
 	int bleft, mleft;		/* size should match sockbuf fields */
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 
 	if (sb->sb_flags & SB_STOP)
 		return(0);
 
 	bleft = sb->sb_hiwat - sb->sb_ccc;
 	mleft = sb->sb_mbmax - sb->sb_mbcnt;
 
 	return ((bleft < mleft) ? bleft : mleft);
 }
 
 #define SB_EMPTY_FIXUP(sb) do {						\
 	if ((sb)->sb_mb == NULL) {					\
 		(sb)->sb_mbtail = NULL;					\
 		(sb)->sb_lastrecord = NULL;				\
 	}								\
 } while (/*CONSTCOND*/0)
 
 #ifdef SOCKBUF_DEBUG
 void	sblastrecordchk(struct sockbuf *, const char *, int);
 void	sblastmbufchk(struct sockbuf *, const char *, int);
 void	sbcheck(struct sockbuf *, const char *, int);
 #define	SBLASTRECORDCHK(sb)	sblastrecordchk((sb), __FILE__, __LINE__)
 #define	SBLASTMBUFCHK(sb)	sblastmbufchk((sb), __FILE__, __LINE__)
 #define	SBCHECK(sb)		sbcheck((sb), __FILE__, __LINE__)
 #else
 #define	SBLASTRECORDCHK(sb)	do {} while (0)
 #define	SBLASTMBUFCHK(sb)	do {} while (0)
 #define	SBCHECK(sb)		do {} while (0)
 #endif /* SOCKBUF_DEBUG */
 
 #endif /* _KERNEL */
 
 #endif /* _SYS_SOCKBUF_H_ */
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index fe6faa842bda..05eefd7e4fd4 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -1,604 +1,601 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)socketvar.h	8.3 (Berkeley) 2/19/95
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SOCKETVAR_H_
 #define _SYS_SOCKETVAR_H_
 
 /*
  * Socket generation count type.  Also used in xinpcb, xtcpcb, xunpcb.
  */
 typedef uint64_t so_gen_t;
 
 #if defined(_KERNEL) || defined(_WANT_SOCKET)
 #include <sys/queue.h>			/* for TAILQ macros */
 #include <sys/selinfo.h>		/* for struct selinfo */
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/osd.h>
 #include <sys/_sx.h>
 #include <sys/sockbuf.h>
 #ifdef _KERNEL
 #include <sys/caprights.h>
 #include <sys/sockopt.h>
 #endif
 
 struct vnet;
 
 /*
  * Kernel structure per socket.
  * Contains send and receive buffer queues,
  * handle on protocol and pointer to protocol
  * private data and error information.
  */
 typedef	int so_upcall_t(struct socket *, void *, int);
 typedef	void so_dtor_t(struct socket *);
 
 struct socket;
 
 enum socket_qstate {
 	SQ_NONE = 0,
 	SQ_INCOMP = 0x0800,	/* on sol_incomp */
 	SQ_COMP = 0x1000,	/* on sol_comp */
 };
 
 /*-
  * Locking key to struct socket:
  * (a) constant after allocation, no locking required.
  * (b) locked by SOCK_LOCK(so).
- * (cr) locked by SOCK_RECVBUF_LOCK(so)/SOCKBUF_LOCK(&so->so_rcv).
- * (cs) locked by SOCK_SENDBUF_LOCK(so)/SOCKBUF_LOCK(&so->so_snd).
+ * (cr) locked by SOCK_RECVBUF_LOCK(so)
+ * (cs) locked by SOCK_SENDBUF_LOCK(so)
  * (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
  * (f) not locked since integer reads/writes are atomic.
  * (g) used only as a sleep/wakeup address, no value.
  * (h) locked by global mutex so_global_mtx.
  * (k) locked by KTLS workqueue mutex
  */
 TAILQ_HEAD(accept_queue, socket);
 struct socket {
 	struct mtx	so_lock;
 	volatile u_int	so_count;	/* (b / refcount) */
 	struct selinfo	so_rdsel;	/* (b/cr) for so_rcv/so_comp */
 	struct selinfo	so_wrsel;	/* (b/cs) for so_snd */
 	int	so_options;		/* (b) from socket call, see socket.h */
 	short	so_type;		/* (a) generic type, see socket.h */
 	short	so_state;		/* (b) internal state flags SS_* */
 	void	*so_pcb;		/* protocol control block */
 	struct	vnet *so_vnet;		/* (a) network stack instance */
 	struct	protosw *so_proto;	/* (a) protocol handle */
 	short	so_linger;		/* time to linger close(2) */
 	short	so_timeo;		/* (g) connection timeout */
 	u_short	so_error;		/* (f) error affecting connection */
 	u_short so_rerror;		/* (f) error affecting connection */
 	struct	sigio *so_sigio;	/* [sg] information for async I/O or
 					   out of band data (SIGURG) */
 	struct	ucred *so_cred;		/* (a) user credentials */
 	struct	label *so_label;	/* (b) MAC label for socket */
 	/* NB: generation count must not be first. */
 	so_gen_t so_gencnt;		/* (h) generation count */
 	void	*so_emuldata;		/* (b) private data for emulators */
 	so_dtor_t *so_dtor;		/* (b) optional destructor */
 	struct	osd	osd;		/* Object Specific extensions */
 	/*
 	 * so_fibnum, so_user_cookie and friends can be used to attach
 	 * some user-specified metadata to a socket, which then can be
 	 * used by the kernel for various actions.
 	 * so_user_cookie is used by ipfw/dummynet.
 	 */
 	int so_fibnum;		/* routing domain for this socket */
 	uint32_t so_user_cookie;
 
 	int so_ts_clock;	/* type of the clock used for timestamps */
 	uint32_t so_max_pacing_rate;	/* (f) TX rate limit in bytes/s */
 
 	/*
 	 * Mutexes to prevent interleaving of socket I/O.  These have to be
 	 * outside of the socket buffers in order to interlock with listen(2).
 	 */
 	struct sx so_snd_sx __aligned(CACHE_LINE_SIZE);
 	struct mtx so_snd_mtx;
 
 	struct sx so_rcv_sx __aligned(CACHE_LINE_SIZE);
 	struct mtx so_rcv_mtx;
 
 	union {
 		/* Regular (data flow) socket. */
 		struct {
 			/* (cr, cs) Receive and send buffers. */
 			struct sockbuf		so_rcv, so_snd;
 
 			/* (e) Our place on accept queue. */
 			TAILQ_ENTRY(socket)	so_list;
 			struct socket		*so_listen;	/* (b) */
 			enum socket_qstate so_qstate;		/* (b) */
 			/* (b) cached MAC label for peer */
 			struct	label		*so_peerlabel;
 			u_long	so_oobmark;	/* chars to oob mark */
 
 			/* (k) Our place on KTLS RX work queue. */
 			STAILQ_ENTRY(socket)	so_ktls_rx_list;
 		};
 		/*
 		 * Listening socket, where accepts occur, is so_listen in all
 		 * subsidiary sockets.  If so_listen is NULL, socket is not
 		 * related to an accept.  For a listening socket itself
 		 * sol_incomp queues partially completed connections, while
 		 * sol_comp is a queue of connections ready to be accepted.
 		 * If a connection is aborted and it has so_listen set, then
 		 * it has to be pulled out of either sol_incomp or sol_comp.
 		 * We allow connections to queue up based on current queue
 		 * lengths and limit on number of queued connections for this
 		 * socket.
 		 */
 		struct {
 			/* (e) queue of partial unaccepted connections */
 			struct accept_queue	sol_incomp;
 			/* (e) queue of complete unaccepted connections */
 			struct accept_queue	sol_comp;
 			u_int	sol_qlen;    /* (e) sol_comp length */
 			u_int	sol_incqlen; /* (e) sol_incomp length */
 			u_int	sol_qlimit;  /* (e) queue limit */
 
 			/* accept_filter(9) optional data */
 			struct	accept_filter	*sol_accept_filter;
 			void	*sol_accept_filter_arg;	/* saved filter args */
 			char	*sol_accept_filter_str;	/* saved user args */
 
 			/* Optional upcall, for kernel socket. */
 			so_upcall_t	*sol_upcall;	/* (e) */
 			void		*sol_upcallarg;	/* (e) */
 
 			/* Socket buffer parameters, to be copied to
 			 * dataflow sockets, accepted from this one. */
 			int		sol_sbrcv_lowat;
 			int		sol_sbsnd_lowat;
 			u_int		sol_sbrcv_hiwat;
 			u_int		sol_sbsnd_hiwat;
 			short		sol_sbrcv_flags;
 			short		sol_sbsnd_flags;
 			sbintime_t	sol_sbrcv_timeo;
 			sbintime_t	sol_sbsnd_timeo;
 
 			/* Information tracking listen queue overflows. */
 			struct timeval	sol_lastover;	/* (e) */
 			int		sol_overcount;	/* (e) */
 		};
 	};
 };
 #endif	/* defined(_KERNEL) || defined(_WANT_SOCKET) */
 
 /*
  * Socket state bits.
  *
  * Historically, these bits were all kept in the so_state field.
  * They are now split into separate, lock-specific fields.
  * so_state maintains basic socket state protected by the socket lock.
  * so_qstate holds information about the socket accept queues.
  * Each socket buffer also has a state field holding information
  * relevant to that socket buffer (can't send, rcv).
  * Many fields will be read without locks to improve performance and avoid
  * lock order issues.  However, this approach must be used with caution.
  */
 #define	SS_NOFDREF		0x0001	/* no file table ref any more */
 #define	SS_ISCONNECTED		0x0002	/* socket connected to a peer */
 #define	SS_ISCONNECTING		0x0004	/* in process of connecting to peer */
 #define	SS_ISDISCONNECTING	0x0008	/* in process of disconnecting */
 #define	SS_NBIO			0x0100	/* non-blocking ops */
 #define	SS_ASYNC		0x0200	/* async i/o notify */
 #define	SS_ISCONFIRMING		0x0400	/* deciding to accept connection req */
 #define	SS_ISDISCONNECTED	0x2000	/* socket disconnected from peer */
 
 /*
  * Protocols can mark a socket as SS_PROTOREF to indicate that, following
  * pru_detach, they still want the socket to persist, and will free it
  * themselves when they are done.  Protocols should only ever call sofree()
  * following setting this flag in pru_detach(), and never otherwise, as
  * sofree() bypasses socket reference counting.
  */
 #define	SS_PROTOREF		0x4000	/* strong protocol reference */
 
 #ifdef _KERNEL
 
 #define	SOCK_MTX(so)		(&(so)->so_lock)
 #define	SOCK_LOCK(so)		mtx_lock(&(so)->so_lock)
 #define	SOCK_OWNED(so)		mtx_owned(&(so)->so_lock)
 #define	SOCK_UNLOCK(so)		mtx_unlock(&(so)->so_lock)
 #define	SOCK_LOCK_ASSERT(so)	mtx_assert(&(so)->so_lock, MA_OWNED)
 #define	SOCK_UNLOCK_ASSERT(so)	mtx_assert(&(so)->so_lock, MA_NOTOWNED)
 
 #define	SOLISTENING(sol)	(((sol)->so_options & SO_ACCEPTCONN) != 0)
 #define	SOLISTEN_LOCK(sol)	do {					\
 	mtx_lock(&(sol)->so_lock);					\
 	KASSERT(SOLISTENING(sol),					\
 	    ("%s: %p not listening", __func__, (sol)));			\
 } while (0)
 #define	SOLISTEN_TRYLOCK(sol)	mtx_trylock(&(sol)->so_lock)
 #define	SOLISTEN_UNLOCK(sol)	do {					\
 	KASSERT(SOLISTENING(sol),					\
 	    ("%s: %p not listening", __func__, (sol)));			\
 	mtx_unlock(&(sol)->so_lock);					\
 } while (0)
 #define	SOLISTEN_LOCK_ASSERT(sol)	do {				\
 	mtx_assert(&(sol)->so_lock, MA_OWNED);				\
 	KASSERT(SOLISTENING(sol),					\
 	    ("%s: %p not listening", __func__, (sol)));			\
 } while (0)
 
 /*
- * Socket buffer locks.  These manipulate the same mutexes as SOCKBUF_LOCK()
- * and related macros.
+ * Socket buffer locks.  These are strongly preferred over SOCKBUF_LOCK(sb)
+ * macros, as we are moving towards protocol specific socket buffers.
  */
 #define	SOCK_RECVBUF_MTX(so)						\
 	(&(so)->so_rcv_mtx)
 #define	SOCK_RECVBUF_LOCK(so)						\
 	mtx_lock(SOCK_RECVBUF_MTX(so))
 #define	SOCK_RECVBUF_UNLOCK(so)						\
 	mtx_unlock(SOCK_RECVBUF_MTX(so))
 #define	SOCK_RECVBUF_LOCK_ASSERT(so)					\
 	mtx_assert(SOCK_RECVBUF_MTX(so), MA_OWNED)
 #define	SOCK_RECVBUF_UNLOCK_ASSERT(so)					\
 	mtx_assert(SOCK_RECVBUF_MTX(so), MA_NOTOWNED)
 
 #define	SOCK_SENDBUF_MTX(so)						\
 	(&(so)->so_snd_mtx)
 #define	SOCK_SENDBUF_LOCK(so)						\
 	mtx_lock(SOCK_SENDBUF_MTX(so))
 #define	SOCK_SENDBUF_UNLOCK(so)						\
 	mtx_unlock(SOCK_SENDBUF_MTX(so))
 #define	SOCK_SENDBUF_LOCK_ASSERT(so)					\
 	mtx_assert(SOCK_SENDBUF_MTX(so), MA_OWNED)
 #define	SOCK_SENDBUF_UNLOCK_ASSERT(so)					\
 	mtx_assert(SOCK_SENDBUF_MTX(so), MA_NOTOWNED)
 
-/* 'which' values for socket buffer events and upcalls. */
-typedef enum { SO_RCV, SO_SND } sb_which;
+#define	SOCK_BUF_LOCK(so, which)					\
+	mtx_lock(soeventmtx(so, which))
+#define	SOCK_BUF_UNLOCK(so, which)					\
+	mtx_unlock(soeventmtx(so, which))
+#define	SOCK_BUF_LOCK_ASSERT(so, which)					\
+	mtx_assert(soeventmtx(so, which), MA_OWNED)
+#define	SOCK_BUF_UNLOCK_ASSERT(so, which)				\
+	mtx_assert(soeventmtx(so, which), MA_NOTOWNED)
+
+static inline struct sockbuf *
+sobuf(struct socket *so, const sb_which which)
+{
+	return (which == SO_RCV ? &so->so_rcv : &so->so_snd);
+}
+
+static inline struct mtx *
+soeventmtx(struct socket *so, const sb_which which)
+{
+	return (which == SO_RCV ? SOCK_RECVBUF_MTX(so) : SOCK_SENDBUF_MTX(so));
+}
 
 /*
  * Macros for sockets and socket buffering.
  */
 
 /*
  * Flags to soiolock().
  */
 #define	SBL_WAIT	0x00000001	/* Wait if not immediately available. */
 #define	SBL_NOINTR	0x00000002	/* Force non-interruptible sleep. */
 #define	SBL_VALID	(SBL_WAIT | SBL_NOINTR)
 
 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
 
 #define	SOCK_IO_SEND_LOCK(so, flags)					\
 	soiolock((so), &(so)->so_snd_sx, (flags))
 #define	SOCK_IO_SEND_UNLOCK(so)						\
 	soiounlock(&(so)->so_snd_sx)
 #define	SOCK_IO_SEND_OWNED(so)	sx_xlocked(&(so)->so_snd_sx)
 #define	SOCK_IO_RECV_LOCK(so, flags)					\
 	soiolock((so), &(so)->so_rcv_sx, (flags))
 #define	SOCK_IO_RECV_UNLOCK(so)						\
 	soiounlock(&(so)->so_rcv_sx)
 #define	SOCK_IO_RECV_OWNED(so)	sx_xlocked(&(so)->so_rcv_sx)
 
-/*
- * Do we need to notify the other side when I/O is possible?
- */
-#define	sb_notify(sb)	(((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \
-    SB_UPCALL | SB_AIO | SB_KNOTE)) != 0)
-
 /* do we have to send all at once on a socket? */
 #define	sosendallatonce(so) \
     ((so)->so_proto->pr_flags & PR_ATOMIC)
 
 /* can we read something from so? */
 #define	soreadabledata(so) \
 	(sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \
 	(so)->so_error || (so)->so_rerror)
 #define	soreadable(so) \
 	(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
 
 /* can we write something to so? */
 #define	sowriteable(so) \
     ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
 	(((so)->so_state&SS_ISCONNECTED) || \
 	  ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \
      ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \
      (so)->so_error)
 
 /*
  * soref()/sorele() ref-count the socket structure.
  * soref() may be called without owning socket lock, but in that case a
  * caller must own something that holds socket, and so_count must be not 0.
  * Note that you must still explicitly close the socket, but the last ref
  * count will free the structure.
  */
 #define	soref(so)	refcount_acquire(&(so)->so_count)
 #define	sorele(so) do {							\
 	SOCK_UNLOCK_ASSERT(so);						\
 	if (!refcount_release_if_not_last(&(so)->so_count)) {		\
 		SOCK_LOCK(so);						\
 		sorele_locked(so);					\
 	}								\
 } while (0)
 
 /*
  * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to
  * avoid a non-atomic test-and-wakeup.  However, sowakeup is
  * responsible for releasing the lock if it is called.  We unlock only
  * if we don't call into sowakeup.  If any code is introduced that
  * directly invokes the underlying sowakeup() primitives, it must
  * maintain the same semantics.
  */
-#define	sorwakeup_locked(so) do {					\
-	SOCKBUF_LOCK_ASSERT(&(so)->so_rcv);				\
-	if (sb_notify(&(so)->so_rcv))					\
-		sowakeup((so), &(so)->so_rcv);	 			\
-	else								\
-		SOCKBUF_UNLOCK(&(so)->so_rcv);				\
-} while (0)
-
 #define	sorwakeup(so) do {						\
-	SOCKBUF_LOCK(&(so)->so_rcv);					\
+	SOCK_RECVBUF_LOCK(so);						\
 	sorwakeup_locked(so);						\
 } while (0)
 
-#define	sowwakeup_locked(so) do {					\
-	SOCKBUF_LOCK_ASSERT(&(so)->so_snd);				\
-	if (sb_notify(&(so)->so_snd))					\
-		sowakeup((so), &(so)->so_snd); 				\
-	else								\
-		SOCKBUF_UNLOCK(&(so)->so_snd);				\
-} while (0)
-
 #define	sowwakeup(so) do {						\
-	SOCKBUF_LOCK(&(so)->so_snd);					\
+	SOCK_SENDBUF_LOCK(so);						\
 	sowwakeup_locked(so);						\
 } while (0)
 
 struct accept_filter {
 	char	accf_name[16];
 	int	(*accf_callback)
 		(struct socket *so, void *arg, int waitflag);
 	void *	(*accf_create)
 		(struct socket *so, char *arg);
 	void	(*accf_destroy)
 		(struct socket *so);
 	SLIST_ENTRY(accept_filter) accf_next;
 };
 
 #define	ACCEPT_FILTER_DEFINE(modname, filtname, cb, create, destroy, ver) \
 	static struct accept_filter modname##_filter = {		\
 		.accf_name = filtname,					\
 		.accf_callback = cb,					\
 		.accf_create = create,					\
 		.accf_destroy = destroy,				\
 	};								\
 	static moduledata_t modname##_mod = {				\
 		.name = __XSTRING(modname),				\
 		.evhand = accept_filt_generic_mod_event,		\
 		.priv = &modname##_filter,				\
 	};								\
 	DECLARE_MODULE(modname, modname##_mod, SI_SUB_DRIVERS,		\
 	    SI_ORDER_MIDDLE);						\
 	MODULE_VERSION(modname, ver)
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_ACCF);
 MALLOC_DECLARE(M_PCB);
 MALLOC_DECLARE(M_SONAME);
 #endif
 
 /*
  * Socket specific helper hook point identifiers
  * Do not leave holes in the sequence, hook registration is a loop.
  */
 #define HHOOK_SOCKET_OPT		0
 #define HHOOK_SOCKET_CREATE		1
 #define HHOOK_SOCKET_RCV 		2
 #define HHOOK_SOCKET_SND		3
 #define HHOOK_FILT_SOREAD		4
 #define HHOOK_FILT_SOWRITE		5
 #define HHOOK_SOCKET_CLOSE		6
 #define HHOOK_SOCKET_LAST		HHOOK_SOCKET_CLOSE
 
 struct socket_hhook_data {
 	struct socket	*so;
 	struct mbuf	*m;
 	void		*hctx;		/* hook point specific data*/
 	int		status;
 };
 
 extern int	maxsockets;
 extern u_long	sb_max;
 extern so_gen_t so_gencnt;
 
 struct file;
 struct filecaps;
 struct filedesc;
 struct mbuf;
 struct sockaddr;
 struct ucred;
 struct uio;
 
 /* Return values for socket upcalls. */
 #define	SU_OK		0
 #define	SU_ISCONNECTED	1
 
 /*
  * From uipc_socket and friends
  */
 int	getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr,
 	    size_t len);
 int	getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
 	    struct file **fpp, u_int *fflagp, struct filecaps *havecaps);
 void	soabort(struct socket *so);
 int	soaccept(struct socket *so, struct sockaddr **nam);
 void	soaio_enqueue(struct task *task);
 void	soaio_rcv(void *context, int pending);
 void	soaio_snd(void *context, int pending);
 int	socheckuid(struct socket *so, uid_t uid);
 int	sobind(struct socket *so, struct sockaddr *nam, struct thread *td);
 int	sobindat(int fd, struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	soclose(struct socket *so);
 int	soconnect(struct socket *so, struct sockaddr *nam, struct thread *td);
 int	soconnectat(int fd, struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	soconnect2(struct socket *so1, struct socket *so2);
 int	socreate(int dom, struct socket **aso, int type, int proto,
 	    struct ucred *cred, struct thread *td);
 int	sodisconnect(struct socket *so);
 void	sodtor_set(struct socket *, so_dtor_t *);
 struct	sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags);
 void	sofree(struct socket *so);
 void	sohasoutofband(struct socket *so);
 int	solisten(struct socket *so, int backlog, struct thread *td);
 void	solisten_proto(struct socket *so, int backlog);
 void	solisten_proto_abort(struct socket *so);
 int	solisten_proto_check(struct socket *so);
 int	solisten_dequeue(struct socket *, struct socket **, int);
 struct socket *
 	sonewconn(struct socket *head, int connstatus);
 struct socket *
 	sopeeloff(struct socket *);
 int	sopoll(struct socket *so, int events, struct ucred *active_cred,
 	    struct thread *td);
 int	sopoll_generic(struct socket *so, int events,
 	    struct ucred *active_cred, struct thread *td);
 int	soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio,
 	    struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
 int	soreceive_stream(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);
 int	soreceive_dgram(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);
 int	soreceive_generic(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);
 void	sorele_locked(struct socket *so);
 int	soreserve(struct socket *so, u_long sndcc, u_long rcvcc);
 void	sorflush(struct socket *so);
 int	sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 	    struct mbuf *top, struct mbuf *control, int flags,
 	    struct thread *td);
 int	sosend_dgram(struct socket *so, struct sockaddr *addr,
 	    struct uio *uio, struct mbuf *top, struct mbuf *control,
 	    int flags, struct thread *td);
 int	sosend_generic(struct socket *so, struct sockaddr *addr,
 	    struct uio *uio, struct mbuf *top, struct mbuf *control,
 	    int flags, struct thread *td);
 int	soshutdown(struct socket *so, int how);
 void	soupcall_clear(struct socket *, sb_which);
 void	soupcall_set(struct socket *, sb_which, so_upcall_t, void *);
 void	solisten_upcall_set(struct socket *, so_upcall_t, void *);
-void	sowakeup(struct socket *so, struct sockbuf *sb);
-void	sowakeup_aio(struct socket *so, struct sockbuf *sb);
+void	sorwakeup_locked(struct socket *);
+void	sowwakeup_locked(struct socket *);
+void	sowakeup_aio(struct socket *, sb_which);
 void	solisten_wakeup(struct socket *);
 int	selsocket(struct socket *so, int events, struct timeval *tv,
 	    struct thread *td);
 void	soisconnected(struct socket *so);
 void	soisconnecting(struct socket *so);
 void	soisdisconnected(struct socket *so);
 void	soisdisconnecting(struct socket *so);
 void	socantrcvmore(struct socket *so);
 void	socantrcvmore_locked(struct socket *so);
 void	socantsendmore(struct socket *so);
 void	socantsendmore_locked(struct socket *so);
 void	soroverflow(struct socket *so);
 void	soroverflow_locked(struct socket *so);
 int	soiolock(struct socket *so, struct sx *sx, int flags);
 void	soiounlock(struct sx *sx);
 
 /*
  * Accept filter functions (duh).
  */
 int	accept_filt_add(struct accept_filter *filt);
 int	accept_filt_del(char *name);
 struct	accept_filter *accept_filt_get(char *name);
 #ifdef ACCEPT_FILTER_MOD
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet_accf);
 #endif
 int	accept_filt_generic_mod_event(module_t mod, int event, void *data);
 #endif
 
 #endif /* _KERNEL */
 
 /*
  * Structure to export socket from kernel to utilities, via sysctl(3).
  */
 struct xsocket {
 	ksize_t		xso_len;	/* length of this structure */
 	kvaddr_t	xso_so;		/* kernel address of struct socket */
 	kvaddr_t	so_pcb;		/* kernel address of struct inpcb */
 	uint64_t	so_oobmark;
 	int64_t		so_spare64[8];
 	int32_t		xso_protocol;
 	int32_t		xso_family;
 	uint32_t	so_qlen;
 	uint32_t	so_incqlen;
 	uint32_t	so_qlimit;
 	pid_t		so_pgid;
 	uid_t		so_uid;
 	int32_t		so_spare32[8];
 	int16_t		so_type;
 	int16_t		so_options;
 	int16_t		so_linger;
 	int16_t		so_state;
 	int16_t		so_timeo;
 	uint16_t	so_error;
 	struct xsockbuf {
 		uint32_t	sb_cc;
 		uint32_t	sb_hiwat;
 		uint32_t	sb_mbcnt;
 		uint32_t	sb_mcnt;
 		uint32_t	sb_ccnt;
 		uint32_t	sb_mbmax;
 		int32_t		sb_lowat;
 		int32_t		sb_timeo;
 		int16_t		sb_flags;
 	} so_rcv, so_snd;
 };
 
 #ifdef _KERNEL
 void	sotoxsocket(struct socket *so, struct xsocket *xso);
 void	sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
 #endif
 
 /*
  * Socket buffer state bits.  Exported via libprocstat(3).
  */
 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
 #define	SBS_RCVATMARK		0x0040	/* at mark on input */
 
 #endif /* !_SYS_SOCKETVAR_H_ */