diff --git a/sys/dev/cxgbe/tom/t4_tls.c b/sys/dev/cxgbe/tom/t4_tls.c
index 936199fd18f1..eb540406a271 100644
--- a/sys/dev/cxgbe/tom/t4_tls.c
+++ b/sys/dev/cxgbe/tom/t4_tls.c
@@ -1,1257 +1,1257 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2017-2018 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_inet.h"
 #include "opt_kern_tls.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifdef KERN_TLS
 #include <sys/param.h>
 #include <sys/ktr.h>
 #include <sys/ktls.h>
 #include <sys/sglist.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/systm.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_var.h>
 #include <netinet/toecore.h>
 #include <opencrypto/cryptodev.h>
 #include <opencrypto/xform.h>
 
 #ifdef TCP_OFFLOAD
 #include "common/common.h"
 #include "common/t4_tcb.h"
 #include "crypto/t4_crypto.h"
 #include "tom/t4_tom_l2t.h"
 #include "tom/t4_tom.h"
 
 /*
  * The TCP sequence number of a CPL_TLS_DATA mbuf is saved here while
  * the mbuf is in the ulp_pdu_reclaimq.
  */
 #define	tls_tcp_seq	PH_loc.thirtytwo[0]
 
 static void
 t4_set_tls_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask,
     uint64_t val)
 {
 	struct adapter *sc = td_adapter(toep->td);
 
 	t4_set_tcb_field(sc, &toep->ofld_txq->wrq, toep, word, mask, val, 0, 0);
 }
 
 /* TLS and DTLS common routines */
 bool
 can_tls_offload(struct adapter *sc)
 {
 
 	return (sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS);
 }
 
 int
 tls_tx_key(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	return (tls_ofld->tx_key_addr >= 0);
 }
 
 /* Set TLS Key-Id in TCB */
 static void
 t4_set_tls_keyid(struct toepcb *toep, unsigned int key_id)
 {
 
 	t4_set_tls_tcb_field(toep, W_TCB_RX_TLS_KEY_TAG,
 			 V_TCB_RX_TLS_KEY_TAG(M_TCB_RX_TLS_BUF_TAG),
 			 V_TCB_RX_TLS_KEY_TAG(key_id));
 }
 
 /* Clear TF_RX_QUIESCE to re-enable receive. */
 static void
 t4_clear_rx_quiesce(struct toepcb *toep)
 {
 
 	t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1), 0);
 }
 
 static void
 tls_clr_ofld_mode(struct toepcb *toep)
 {
 
 	tls_stop_handshake_timer(toep);
 
 	KASSERT(toep->tls.rx_key_addr == -1,
 	    ("%s: tid %d has RX key", __func__, toep->tid));
 
 	/* Switch to plain TOE mode. */
 	t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW,
 	    V_TCB_ULP_RAW(V_TF_TLS_ENABLE(1)),
 	    V_TCB_ULP_RAW(V_TF_TLS_ENABLE(0)));
 	t4_set_tls_tcb_field(toep, W_TCB_ULP_TYPE,
 	    V_TCB_ULP_TYPE(M_TCB_ULP_TYPE), V_TCB_ULP_TYPE(ULP_MODE_NONE));
 	t4_clear_rx_quiesce(toep);
 
 	toep->flags &= ~(TPF_FORCE_CREDITS | TPF_TLS_ESTABLISHED);
 	toep->params.ulp_mode = ULP_MODE_NONE;
 }
 
 /* TLS/DTLS content type  for CPL SFO */
 static inline unsigned char
 tls_content_type(unsigned char content_type)
 {
 	/*
 	 * XXX: Shouldn't this map CONTENT_TYPE_APP_DATA to DATA and
 	 * default to "CUSTOM" for all other types including
 	 * heartbeat?
 	 */
 	switch (content_type) {
 	case CONTENT_TYPE_CCS:
 		return CPL_TX_TLS_SFO_TYPE_CCS;
 	case CONTENT_TYPE_ALERT:
 		return CPL_TX_TLS_SFO_TYPE_ALERT;
 	case CONTENT_TYPE_HANDSHAKE:
 		return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
 	case CONTENT_TYPE_HEARTBEAT:
 		return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
 	}
 	return CPL_TX_TLS_SFO_TYPE_DATA;
 }
 
 /* TLS Key memory management */
 static void
 clear_tls_keyid(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct adapter *sc = td_adapter(toep->td);
 
 	if (tls_ofld->rx_key_addr >= 0) {
 		t4_free_tls_keyid(sc, tls_ofld->rx_key_addr);
 		tls_ofld->rx_key_addr = -1;
 	}
 	if (tls_ofld->tx_key_addr >= 0) {
 		t4_free_tls_keyid(sc, tls_ofld->tx_key_addr);
 		tls_ofld->tx_key_addr = -1;
 	}
 }
 
 static int
 get_tp_plen_max(struct ktls_session *tls)
 {
 	int plen = ((min(3*4096, TP_TX_PG_SZ))/1448) * 1448;
 
 	return (tls->params.max_frame_len <= 8192 ? plen : FC_TP_PLEN_MAX);
 }
 
 /* Send request to get the key-id */
 static int
 tls_program_key_id(struct toepcb *toep, struct ktls_session *tls,
     int direction)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct adapter *sc = td_adapter(toep->td);
 	struct ofld_tx_sdesc *txsd;
 	int keyid;
 	struct wrqe *wr;
 	struct tls_key_req *kwr;
 	struct tls_keyctx *kctx;
 
 #ifdef INVARIANTS
 	int kwrlen, kctxlen, len;
 
 	kwrlen = sizeof(*kwr);
 	kctxlen = roundup2(sizeof(*kctx), 32);
 	len = roundup2(kwrlen + kctxlen, 16);
 	MPASS(TLS_KEY_WR_SZ == len);
 #endif
 	if (toep->txsd_avail == 0)
 		return (EAGAIN);
 
 	if ((keyid = t4_alloc_tls_keyid(sc)) < 0) {
 		return (ENOSPC);
 	}
 
 	wr = alloc_wrqe(TLS_KEY_WR_SZ, &toep->ofld_txq->wrq);
 	if (wr == NULL) {
 		t4_free_tls_keyid(sc, keyid);
 		return (ENOMEM);
 	}
 	kwr = wrtod(wr);
 	memset(kwr, 0, TLS_KEY_WR_SZ);
 
 	t4_write_tlskey_wr(tls, direction, toep->tid, F_FW_WR_COMPL, keyid,
 	    kwr);
 	kctx = (struct tls_keyctx *)(kwr + 1);
 	if (direction == KTLS_TX)
 		tls_ofld->tx_key_addr = keyid;
 	else
 		tls_ofld->rx_key_addr = keyid;
 	t4_tls_key_ctx(tls, direction, kctx);
 
 	txsd = &toep->txsd[toep->txsd_pidx];
 	txsd->tx_credits = DIV_ROUND_UP(TLS_KEY_WR_SZ, 16);
 	txsd->plen = 0;
 	toep->tx_credits -= txsd->tx_credits;
 	if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
 		toep->txsd_pidx = 0;
 	toep->txsd_avail--;
 
 	t4_wrq_tx(sc, wr);
 
 	return (0);
 }
 
 /*
  * In some cases a client connection can hang without sending the
  * ServerHelloDone message from the NIC to the host.  Send a dummy
  * RX_DATA_ACK with RX_MODULATE to unstick the connection.
  */
 static void
 tls_send_handshake_ack(void *arg)
 {
 	struct toepcb *toep = arg;
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct adapter *sc = td_adapter(toep->td);
 
 	/* Bail without rescheduling if the connection has closed. */
 	if ((toep->flags & (TPF_FIN_SENT | TPF_ABORT_SHUTDOWN)) != 0)
 		return;
 
 	/*
 	 * If this connection has timed out without receiving more
 	 * data, downgrade to plain TOE mode and don't re-arm the
 	 * timer.
 	 */
 	if (sc->tt.tls_rx_timeout != 0) {
 		struct inpcb *inp;
 		struct tcpcb *tp;
 
 		inp = toep->inp;
 		tp = intotcpcb(inp);
 		if ((ticks - tp->t_rcvtime) >= sc->tt.tls_rx_timeout) {
 			CTR2(KTR_CXGBE, "%s: tid %d clr_ofld_mode", __func__,
 			    toep->tid);
 			tls_clr_ofld_mode(toep);
 			return;
 		}
 	}
 
 	/*
 	 * XXX: Does not have the t4_get_tcb() checks to refine the
 	 * workaround.
 	 */
 	callout_schedule(&tls_ofld->handshake_timer, TLS_SRV_HELLO_RD_TM * hz);
 
 	CTR2(KTR_CXGBE, "%s: tid %d sending RX_DATA_ACK", __func__, toep->tid);
 	send_rx_modulate(sc, toep);
 }
 
 static void
 tls_start_handshake_timer(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	INP_WLOCK_ASSERT(toep->inp);
 	callout_reset(&tls_ofld->handshake_timer, TLS_SRV_HELLO_BKOFF_TM * hz,
 	    tls_send_handshake_ack, toep);
 }
 
 void
 tls_stop_handshake_timer(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	INP_WLOCK_ASSERT(toep->inp);
 	callout_stop(&tls_ofld->handshake_timer);
 }
 
 int
 tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction)
 {
 	struct adapter *sc = td_adapter(toep->td);
 	int error, explicit_iv_size, key_offset, mac_first;
 
 	if (!can_tls_offload(td_adapter(toep->td)))
 		return (EINVAL);
 	switch (ulp_mode(toep)) {
 	case ULP_MODE_TLS:
 		break;
 	case ULP_MODE_NONE:
 	case ULP_MODE_TCPDDP:
 		if (direction != KTLS_TX)
 			return (EINVAL);
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	switch (tls->params.cipher_algorithm) {
 	case CRYPTO_AES_CBC:
 		/* XXX: Explicitly ignore any provided IV. */
 		switch (tls->params.cipher_key_len) {
 		case 128 / 8:
 		case 192 / 8:
 		case 256 / 8:
 			break;
 		default:
 			error = EINVAL;
 			goto clr_ofld;
 		}
 		switch (tls->params.auth_algorithm) {
 		case CRYPTO_SHA1_HMAC:
 		case CRYPTO_SHA2_256_HMAC:
 		case CRYPTO_SHA2_384_HMAC:
 			break;
 		default:
 			error = EPROTONOSUPPORT;
 			goto clr_ofld;
 		}
 		explicit_iv_size = AES_BLOCK_LEN;
 		mac_first = 1;
 		break;
 	case CRYPTO_AES_NIST_GCM_16:
 		if (tls->params.iv_len != SALT_SIZE) {
 			error = EINVAL;
 			goto clr_ofld;
 		}
 		switch (tls->params.cipher_key_len) {
 		case 128 / 8:
 		case 192 / 8:
 		case 256 / 8:
 			break;
 		default:
 			error = EINVAL;
 			goto clr_ofld;
 		}
 		explicit_iv_size = 8;
 		mac_first = 0;
 		break;
 	default:
 		error = EPROTONOSUPPORT;
 		goto clr_ofld;
 	}
 
 	/* Only TLS 1.1 and TLS 1.2 are currently supported. */
 	if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE ||
 	    tls->params.tls_vminor < TLS_MINOR_VER_ONE ||
 	    tls->params.tls_vminor > TLS_MINOR_VER_TWO) {
 		error = EPROTONOSUPPORT;
 		goto clr_ofld;
 	}
 
 	/* Bail if we already have a key. */
 	if (direction == KTLS_TX) {
 		if (toep->tls.tx_key_addr != -1)
 			return (EOPNOTSUPP);
 	} else {
 		if (toep->tls.rx_key_addr != -1)
 			return (EOPNOTSUPP);
 	}
 
 	error = tls_program_key_id(toep, tls, direction);
 	if (error) {
 		if (direction == KTLS_RX)
 			goto clr_ofld;
 		return (error);
 	}
 
 	if (direction == KTLS_TX) {
 		toep->tls.scmd0.seqno_numivs =
 			(V_SCMD_SEQ_NO_CTRL(3) |
 			 V_SCMD_PROTO_VERSION(t4_tls_proto_ver(tls)) |
 			 V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
 			 V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
 			 V_SCMD_CIPH_MODE(t4_tls_cipher_mode(tls)) |
 			 V_SCMD_AUTH_MODE(t4_tls_auth_mode(tls)) |
 			 V_SCMD_HMAC_CTRL(t4_tls_hmac_ctrl(tls)) |
 			 V_SCMD_IV_SIZE(explicit_iv_size / 2));
 
 		toep->tls.scmd0.ivgen_hdrlen =
 			(V_SCMD_IV_GEN_CTRL(1) |
 			 V_SCMD_KEY_CTX_INLINE(0) |
 			 V_SCMD_TLS_FRAG_ENABLE(1));
 
 		if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16)
 			toep->tls.iv_len = 8;
 		else
 			toep->tls.iv_len = AES_BLOCK_LEN;
 
 		toep->tls.frag_size = tls->params.max_frame_len;
 		toep->tls.fcplenmax = get_tp_plen_max(tls);
 		toep->tls.expn_per_ulp = tls->params.tls_hlen +
 		    tls->params.tls_tlen;
 		toep->tls.pdus_per_ulp = 1;
 		toep->tls.adjusted_plen = toep->tls.expn_per_ulp +
 		    tls->params.max_frame_len;
 		toep->tls.tx_key_info_size = t4_tls_key_info_size(tls);
 	} else {
 		/* Stop timer on handshake completion */
 		tls_stop_handshake_timer(toep);
 
 		toep->flags &= ~TPF_FORCE_CREDITS;
 		toep->flags |= TPF_TLS_RECEIVE;
 		toep->tls.rx_version = tls->params.tls_vmajor << 8 |
 		    tls->params.tls_vminor;
 
 		/*
 		 * RX key tags are an index into the key portion of MA
 		 * memory stored as an offset from the base address in
 		 * units of 64 bytes.
 		 */
 		key_offset = toep->tls.rx_key_addr - sc->vres.key.start;
 		t4_set_tls_keyid(toep, key_offset / 64);
 		t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW,
 				 V_TCB_ULP_RAW(M_TCB_ULP_RAW),
 				 V_TCB_ULP_RAW((V_TF_TLS_KEY_SIZE(3) |
 						V_TF_TLS_CONTROL(1) |
 						V_TF_TLS_ACTIVE(1) |
 						V_TF_TLS_ENABLE(1))));
 		t4_set_tls_tcb_field(toep, W_TCB_TLS_SEQ,
 				 V_TCB_TLS_SEQ(M_TCB_TLS_SEQ),
 				 V_TCB_TLS_SEQ(0));
 		t4_clear_rx_quiesce(toep);
 	}
 
 	return (0);
 
 clr_ofld:
 	if (ulp_mode(toep) == ULP_MODE_TLS) {
 		CTR2(KTR_CXGBE, "%s: tid %d clr_ofld_mode", __func__,
 		    toep->tid);
 		tls_clr_ofld_mode(toep);
 	}
 	return (error);
 }
 
 void
 tls_init_toep(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	tls_ofld->rx_key_addr = -1;
 	tls_ofld->tx_key_addr = -1;
 }
 
 void
 tls_establish(struct toepcb *toep)
 {
 
 	/*
 	 * Enable PDU extraction.
 	 *
 	 * XXX: Supposedly this should be done by the firmware when
 	 * the ULP_MODE FLOWC parameter is set in send_flowc_wr(), but
 	 * in practice this seems to be required.
 	 */
 	CTR2(KTR_CXGBE, "%s: tid %d setting TLS_ENABLE", __func__, toep->tid);
 	t4_set_tls_tcb_field(toep, W_TCB_ULP_RAW, V_TCB_ULP_RAW(M_TCB_ULP_RAW),
 	    V_TCB_ULP_RAW(V_TF_TLS_ENABLE(1)));
 
 	toep->flags |= TPF_FORCE_CREDITS | TPF_TLS_ESTABLISHED;
 
 	callout_init_rw(&toep->tls.handshake_timer, &toep->inp->inp_lock, 0);
 	tls_start_handshake_timer(toep);
 }
 
 void
 tls_detach(struct toepcb *toep)
 {
 
 	if (toep->flags & TPF_TLS_ESTABLISHED) {
 		tls_stop_handshake_timer(toep);
 		toep->flags &= ~TPF_TLS_ESTABLISHED;
 	}
 }
 
 void
 tls_uninit_toep(struct toepcb *toep)
 {
 
 	MPASS((toep->flags & TPF_TLS_ESTABLISHED) == 0);
 	clear_tls_keyid(toep);
 }
 
 #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16)
 #define	MIN_OFLD_TLSTX_CREDITS(toep)					\
 	(howmany(sizeof(struct fw_tlstx_data_wr) + 			\
 	    sizeof(struct cpl_tx_tls_sfo) + sizeof(struct ulptx_idata) + \
 	    sizeof(struct ulptx_sc_memrd) +				\
 	    AES_BLOCK_LEN + 1, 16))
 
 static void
 write_tlstx_wr(struct fw_tlstx_data_wr *txwr, struct toepcb *toep,
     unsigned int immdlen, unsigned int plen, unsigned int expn,
     unsigned int pdus, uint8_t credits, int shove, int imm_ivs)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	unsigned int len = plen + expn;
 
 	txwr->op_to_immdlen = htobe32(V_WR_OP(FW_TLSTX_DATA_WR) |
 	    V_FW_TLSTX_DATA_WR_COMPL(1) |
 	    V_FW_TLSTX_DATA_WR_IMMDLEN(immdlen));
 	txwr->flowid_len16 = htobe32(V_FW_TLSTX_DATA_WR_FLOWID(toep->tid) |
 	    V_FW_TLSTX_DATA_WR_LEN16(credits));
 	txwr->plen = htobe32(len);
 	txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(ULP_MODE_TLS) |
 	    V_TX_URG(0) | /* F_T6_TX_FORCE | */ V_TX_SHOVE(shove));
 	txwr->ctxloc_to_exp = htobe32(V_FW_TLSTX_DATA_WR_NUMIVS(pdus) |
 	    V_FW_TLSTX_DATA_WR_EXP(expn) |
 	    V_FW_TLSTX_DATA_WR_CTXLOC(TLS_SFO_WR_CONTEXTLOC_DDR) |
 	    V_FW_TLSTX_DATA_WR_IVDSGL(!imm_ivs) |
 	    V_FW_TLSTX_DATA_WR_KEYSIZE(tls_ofld->tx_key_info_size >> 4));
 	txwr->mfs = htobe16(tls_ofld->frag_size);
 	txwr->adjustedplen_pkd = htobe16(
 	    V_FW_TLSTX_DATA_WR_ADJUSTEDPLEN(tls_ofld->adjusted_plen));
 	txwr->expinplenmax_pkd = htobe16(
 	    V_FW_TLSTX_DATA_WR_EXPINPLENMAX(tls_ofld->expn_per_ulp));
 	txwr->pdusinplenmax_pkd = 
 	    V_FW_TLSTX_DATA_WR_PDUSINPLENMAX(tls_ofld->pdus_per_ulp);
 }
 
 static void
 write_tlstx_cpl(struct cpl_tx_tls_sfo *cpl, struct toepcb *toep,
     struct tls_hdr *tls_hdr, unsigned int plen, unsigned int pdus)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	int data_type, seglen;
 
 	if (plen < tls_ofld->frag_size)
 		seglen = plen;
 	else
 		seglen = tls_ofld->frag_size;
 	data_type = tls_content_type(tls_hdr->type);
 	cpl->op_to_seg_len = htobe32(V_CPL_TX_TLS_SFO_OPCODE(CPL_TX_TLS_SFO) |
 	    V_CPL_TX_TLS_SFO_DATA_TYPE(data_type) |
 	    V_CPL_TX_TLS_SFO_CPL_LEN(2) | V_CPL_TX_TLS_SFO_SEG_LEN(seglen));
 	cpl->pld_len = htobe32(plen);
 	if (data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT)
 		cpl->type_protover = htobe32(
 		    V_CPL_TX_TLS_SFO_TYPE(tls_hdr->type));
 	cpl->seqno_numivs = htobe32(tls_ofld->scmd0.seqno_numivs |
 	    V_SCMD_NUM_IVS(pdus));
 	cpl->ivgen_hdrlen = htobe32(tls_ofld->scmd0.ivgen_hdrlen);
 	cpl->scmd1 = htobe64(tls_ofld->tx_seq_no);
 	tls_ofld->tx_seq_no += pdus;
 }
 
 static int
 count_ext_pgs_segs(struct mbuf *m)
 {
 	vm_paddr_t nextpa;
 	u_int i, nsegs;
 
 	MPASS(m->m_epg_npgs > 0);
 	nsegs = 1;
 	nextpa = m->m_epg_pa[0] + PAGE_SIZE;
 	for (i = 1; i < m->m_epg_npgs; i++) {
 		if (nextpa != m->m_epg_pa[i])
 			nsegs++;
 		nextpa = m->m_epg_pa[i] + PAGE_SIZE;
 	}
 	return (nsegs);
 }
 
 static void
 write_ktlstx_sgl(void *dst, struct mbuf *m, int nsegs)
 {
 	struct ulptx_sgl *usgl = dst;
 	vm_paddr_t pa;
 	uint32_t len;
 	int i, j;
 
 	KASSERT(nsegs > 0, ("%s: nsegs 0", __func__));
 
 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
 	    V_ULPTX_NSGE(nsegs));
 
 	/* Figure out the first S/G length. */
 	pa = m->m_epg_pa[0] + m->m_epg_1st_off;
 	usgl->addr0 = htobe64(pa);
 	len = m_epg_pagelen(m, 0, m->m_epg_1st_off);
 	pa += len;
 	for (i = 1; i < m->m_epg_npgs; i++) {
 		if (m->m_epg_pa[i] != pa)
 			break;
 		len += m_epg_pagelen(m, i, 0);
 		pa += m_epg_pagelen(m, i, 0);
 	}
 	usgl->len0 = htobe32(len);
 #ifdef INVARIANTS
 	nsegs--;
 #endif
 
 	j = -1;
 	for (; i < m->m_epg_npgs; i++) {
 		if (j == -1 || m->m_epg_pa[i] != pa) {
 			if (j >= 0)
 				usgl->sge[j / 2].len[j & 1] = htobe32(len);
 			j++;
 #ifdef INVARIANTS
 			nsegs--;
 #endif
 			pa = m->m_epg_pa[i];
 			usgl->sge[j / 2].addr[j & 1] = htobe64(pa);
 			len = m_epg_pagelen(m, i, 0);
 			pa += len;
 		} else {
 			len += m_epg_pagelen(m, i, 0);
 			pa += m_epg_pagelen(m, i, 0);
 		}
 	}
 	if (j >= 0) {
 		usgl->sge[j / 2].len[j & 1] = htobe32(len);
 
 		if ((j & 1) == 0)
 			usgl->sge[j / 2].len[1] = htobe32(0);
 	}
 	KASSERT(nsegs == 0, ("%s: nsegs %d, m %p", __func__, nsegs, m));
 }
 
 /*
  * Similar to t4_push_frames() but handles sockets that contain TLS
  * record mbufs.
  */
 void
 t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
 {
 	struct tls_hdr *thdr;
 	struct fw_tlstx_data_wr *txwr;
 	struct cpl_tx_tls_sfo *cpl;
 	struct ulptx_idata *idata;
 	struct ulptx_sc_memrd *memrd;
 	struct wrqe *wr;
 	struct mbuf *m;
 	u_int nsegs, credits, wr_len;
 	u_int expn_size;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *sb = &so->so_snd;
 	int tls_size, tx_credits, shove, sowwakeup;
 	struct ofld_tx_sdesc *txsd;
 	char *buf;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
 
 	KASSERT(ulp_mode(toep) == ULP_MODE_NONE ||
 	    ulp_mode(toep) == ULP_MODE_TCPDDP || ulp_mode(toep) == ULP_MODE_TLS,
 	    ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep));
 	KASSERT(tls_tx_key(toep),
 	    ("%s: TX key not set for toep %p", __func__, toep));
 
 #ifdef VERBOSE_TRACES
 	CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d",
 	    __func__, toep->tid, toep->flags, tp->t_flags);
 #endif
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
 		return;
 
 #ifdef RATELIMIT
 	if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) &&
 	    (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) {
 		inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
 	}
 #endif
 
 	/*
 	 * This function doesn't resume by itself.  Someone else must clear the
 	 * flag and call this function.
 	 */
 	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
 		KASSERT(drop == 0,
 		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
 		return;
 	}
 
 	txsd = &toep->txsd[toep->txsd_pidx];
 	for (;;) {
 		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
 
 		SOCKBUF_LOCK(sb);
 		sowwakeup = drop;
 		if (drop) {
 			sbdrop_locked(sb, drop);
 			drop = 0;
 		}
 
 		m = sb->sb_sndptr != NULL ? sb->sb_sndptr->m_next : sb->sb_mb;
 
 		/*
 		 * Send a FIN if requested, but only if there's no
 		 * more data to send.
 		 */
 		if (m == NULL && toep->flags & TPF_SEND_FIN) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
 				SOCKBUF_UNLOCK(sb);
 			SOCKBUF_UNLOCK_ASSERT(sb);
 			t4_close_conn(sc, toep);
 			return;
 		}
 
 		/*
 		 * If there is no ready data to send, wait until more
 		 * data arrives.
 		 */
 		if (m == NULL || (m->m_flags & M_NOTAVAIL) != 0) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
 				SOCKBUF_UNLOCK(sb);
 			SOCKBUF_UNLOCK_ASSERT(sb);
 #ifdef VERBOSE_TRACES
 			CTR2(KTR_CXGBE, "%s: tid %d no ready data to send",
 			    __func__, toep->tid);
 #endif
 			return;
 		}
 
 		KASSERT(m->m_flags & M_EXTPG, ("%s: mbuf %p is not NOMAP",
 		    __func__, m));
 		KASSERT(m->m_epg_tls != NULL,
 		    ("%s: mbuf %p doesn't have TLS session", __func__, m));
 
 		/* Calculate WR length. */
 		wr_len = sizeof(struct fw_tlstx_data_wr) +
 		    sizeof(struct cpl_tx_tls_sfo) +
 		    sizeof(struct ulptx_idata) + sizeof(struct ulptx_sc_memrd);
 
 		/* Explicit IVs for AES-CBC and AES-GCM are <= 16. */
 		MPASS(toep->tls.iv_len <= AES_BLOCK_LEN);
 		wr_len += AES_BLOCK_LEN;
 
 		/* Account for SGL in work request length. */
 		nsegs = count_ext_pgs_segs(m);
 		wr_len += sizeof(struct ulptx_sgl) +
 		    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
 
 		/* Not enough credits for this work request. */
 		if (howmany(wr_len, 16) > tx_credits) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
 				SOCKBUF_UNLOCK(sb);
 			SOCKBUF_UNLOCK_ASSERT(sb);
 #ifdef VERBOSE_TRACES
 			CTR5(KTR_CXGBE,
 	    "%s: tid %d mbuf %p requires %d credits, but only %d available",
 			    __func__, toep->tid, m, howmany(wr_len, 16),
 			    tx_credits);
 #endif
 			toep->flags |= TPF_TX_SUSPENDED;
 			return;
 		}
 	
 		/* Shove if there is no additional data pending. */
 		shove = ((m->m_next == NULL ||
 		    (m->m_next->m_flags & M_NOTAVAIL) != 0)) &&
 		    (tp->t_flags & TF_MORETOCOME) == 0;
 
 		if (sb->sb_flags & SB_AUTOSIZE &&
 		    V_tcp_do_autosndbuf &&
 		    sb->sb_hiwat < V_tcp_autosndbuf_max &&
 		    sbused(sb) >= sb->sb_hiwat * 7 / 8) {
 			int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
 			    V_tcp_autosndbuf_max);
 
 			if (!sbreserve_locked(so, SO_SND, newsize, NULL))
 				sb->sb_flags &= ~SB_AUTOSIZE;
 			else
 				sowwakeup = 1;	/* room available */
 		}
 		if (sowwakeup)
 			sowwakeup_locked(so);
 		else
 			SOCKBUF_UNLOCK(sb);
 		SOCKBUF_UNLOCK_ASSERT(sb);
 
 		if (__predict_false(toep->flags & TPF_FIN_SENT))
 			panic("%s: excess tx.", __func__);
 
 		wr = alloc_wrqe(roundup2(wr_len, 16), &toep->ofld_txq->wrq);
 		if (wr == NULL) {
 			/* XXX: how will we recover from this? */
 			toep->flags |= TPF_TX_SUSPENDED;
 			return;
 		}
 
 		thdr = (struct tls_hdr *)&m->m_epg_hdr;
 #ifdef VERBOSE_TRACES
 		CTR5(KTR_CXGBE, "%s: tid %d TLS record %ju type %d len %#x",
 		    __func__, toep->tid, m->m_epg_seqno, thdr->type,
 		    m->m_len);
 #endif
 		txwr = wrtod(wr);
 		cpl = (struct cpl_tx_tls_sfo *)(txwr + 1);
 		memset(txwr, 0, roundup2(wr_len, 16));
 		credits = howmany(wr_len, 16);
 		expn_size = m->m_epg_hdrlen +
 		    m->m_epg_trllen;
 		tls_size = m->m_len - expn_size;
 		write_tlstx_wr(txwr, toep, 0,
 		    tls_size, expn_size, 1, credits, shove, 1);
 		toep->tls.tx_seq_no = m->m_epg_seqno;
 		write_tlstx_cpl(cpl, toep, thdr, tls_size, 1);
 
 		idata = (struct ulptx_idata *)(cpl + 1);
 		idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
 		idata->len = htobe32(0);
 		memrd = (struct ulptx_sc_memrd *)(idata + 1);
 		memrd->cmd_to_len = htobe32(V_ULPTX_CMD(ULP_TX_SC_MEMRD) |
 		    V_ULP_TX_SC_MORE(1) |
 		    V_ULPTX_LEN16(toep->tls.tx_key_info_size >> 4));
 		memrd->addr = htobe32(toep->tls.tx_key_addr >> 5);
 
 		/* Copy IV. */
 		buf = (char *)(memrd + 1);
 		memcpy(buf, thdr + 1, toep->tls.iv_len);
 		buf += AES_BLOCK_LEN;
 
 		write_ktlstx_sgl(buf, m, nsegs);
 
 		KASSERT(toep->tx_credits >= credits,
 			("%s: not enough credits", __func__));
 
 		toep->tx_credits -= credits;
 
 		tp->snd_nxt += m->m_len;
 		tp->snd_max += m->m_len;
 
 		SOCKBUF_LOCK(sb);
 		sb->sb_sndptr = m;
 		SOCKBUF_UNLOCK(sb);
 
 		toep->flags |= TPF_TX_DATA_SENT;
 		if (toep->tx_credits < MIN_OFLD_TLSTX_CREDITS(toep))
 			toep->flags |= TPF_TX_SUSPENDED;
 
 		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
 		txsd->plen = m->m_len;
 		txsd->tx_credits = credits;
 		txsd++;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
 			toep->txsd_pidx = 0;
 			txsd = &toep->txsd[0];
 		}
 		toep->txsd_avail--;
 
 		counter_u64_add(toep->ofld_txq->tx_toe_tls_records, 1);
 		counter_u64_add(toep->ofld_txq->tx_toe_tls_octets, m->m_len);
 
 		t4_l2t_send(sc, wr, toep->l2te);
 	}
 }
 
 /*
  * For TLS data we place received mbufs received via CPL_TLS_DATA into
  * an mbufq in the TLS offload state.  When CPL_RX_TLS_CMP is
  * received, the completed PDUs are placed into the socket receive
  * buffer.
  *
  * The TLS code reuses the ulp_pdu_reclaimq to hold the pending mbufs.
  */
 static int
 do_tls_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_tls_data *cpl = mtod(m, const void *);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp;
 	int len;
 
 	/* XXX: Should this match do_rx_data instead? */
 	KASSERT(!(toep->flags & TPF_SYNQE),
 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
 
 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
 
 	/* strip off CPL header */
 	m_adj(m, sizeof(*cpl));
 	len = m->m_pkthdr.len;
 
 	toep->ofld_rxq->rx_toe_tls_octets += len;
 
 	KASSERT(len == G_CPL_TLS_DATA_LENGTH(be32toh(cpl->length_pkd)),
 	    ("%s: payload length mismatch", __func__));
 
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
 		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
 		    __func__, tid, len, inp->inp_flags);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 	}
 
 	/* Save TCP sequence number. */
 	m->m_pkthdr.tls_tcp_seq = be32toh(cpl->seq);
 
 	if (mbufq_enqueue(&toep->ulp_pdu_reclaimq, m)) {
 #ifdef INVARIANTS
 		panic("Failed to queue TLS data packet");
 #else
 		printf("%s: Failed to queue TLS data packet\n", __func__);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 #endif
 	}
 
 	tp = intotcpcb(inp);
 	tp->t_rcvtime = ticks;
 
 #ifdef VERBOSE_TRACES
 	CTR4(KTR_CXGBE, "%s: tid %u len %d seq %u", __func__, tid, len,
 	    be32toh(cpl->seq));
 #endif
 
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_rx_tls_cmp *cpl = mtod(m, const void *);
 	struct tlsrx_hdr_pkt *tls_hdr_pkt;
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	struct sockbuf *sb;
 	struct mbuf *tls_data;
 	struct tls_get_record *tgr;
 	struct mbuf *control;
 	int pdu_length, rx_credits;
 #if defined(KTR) || defined(INVARIANTS)
 	int len;
 #endif
 
 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
 	KASSERT(!(toep->flags & TPF_SYNQE),
 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
 
 	/* strip off CPL header */
 	m_adj(m, sizeof(*cpl));
 #if defined(KTR) || defined(INVARIANTS)
 	len = m->m_pkthdr.len;
 #endif
 
 	toep->ofld_rxq->rx_toe_tls_records++;
 
 	KASSERT(len == G_CPL_RX_TLS_CMP_LENGTH(be32toh(cpl->pdulength_length)),
 	    ("%s: payload length mismatch", __func__));
 
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
 		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
 		    __func__, tid, len, inp->inp_flags);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 	}
 
 	pdu_length = G_CPL_RX_TLS_CMP_PDULENGTH(be32toh(cpl->pdulength_length));
 
 	so = inp_inpcbtosocket(inp);
 	tp = intotcpcb(inp);
 
 #ifdef VERBOSE_TRACES
 	CTR6(KTR_CXGBE, "%s: tid %u PDU len %d len %d seq %u, rcv_nxt %u",
 	    __func__, tid, pdu_length, len, be32toh(cpl->seq), tp->rcv_nxt);
 #endif
 
 	tp->rcv_nxt += pdu_length;
 	KASSERT(tp->rcv_wnd >= pdu_length,
 	    ("%s: negative window size", __func__));
 	tp->rcv_wnd -= pdu_length;
 
 	/* XXX: Not sure what to do about urgent data. */
 
 	/*
 	 * The payload of this CPL is the TLS header followed by
 	 * additional fields.
 	 */
 	KASSERT(m->m_len >= sizeof(*tls_hdr_pkt),
 	    ("%s: payload too small", __func__));
 	tls_hdr_pkt = mtod(m, void *);
 
 	tls_data = mbufq_dequeue(&toep->ulp_pdu_reclaimq);
 	if (tls_data != NULL) {
 		KASSERT(be32toh(cpl->seq) == tls_data->m_pkthdr.tls_tcp_seq,
 		    ("%s: sequence mismatch", __func__));
 	}
 
 	/* Report decryption errors as EBADMSG. */
 	if ((tls_hdr_pkt->res_to_mac_error & M_TLSRX_HDR_PKT_ERROR) != 0) {
 		m_freem(m);
 		m_freem(tls_data);
 
 		CURVNET_SET(toep->vnet);
 		so->so_error = EBADMSG;
 		sorwakeup(so);
 
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 
 		return (0);
 	}
 
 	/* Allocate the control message mbuf. */
 	control = sbcreatecontrol(NULL, sizeof(*tgr), TLS_GET_RECORD,
-	    IPPROTO_TCP);
+	    IPPROTO_TCP, M_NOWAIT);
 	if (control == NULL) {
 		m_freem(m);
 		m_freem(tls_data);
 
 		CURVNET_SET(toep->vnet);
 		so->so_error = ENOBUFS;
 		sorwakeup(so);
 
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 
 		return (0);
 	}
 
 	tgr = (struct tls_get_record *)
 	    CMSG_DATA(mtod(control, struct cmsghdr *));
 	memset(tgr, 0, sizeof(*tgr));
 	tgr->tls_type = tls_hdr_pkt->type;
 	tgr->tls_vmajor = be16toh(tls_hdr_pkt->version) >> 8;
 	tgr->tls_vminor = be16toh(tls_hdr_pkt->version) & 0xff;
 
 	m_freem(m);
 
 	if (tls_data != NULL) {
 		m_last(tls_data)->m_flags |= M_EOR;
 		tgr->tls_length = htobe16(tls_data->m_pkthdr.len);
 	} else
 		tgr->tls_length = 0;
 	m = tls_data;
 
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 
 	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
 		struct epoch_tracker et;
 
 		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
 		    __func__, tid, pdu_length);
 		m_freem(m);
 		m_freem(control);
 		SOCKBUF_UNLOCK(sb);
 		INP_WUNLOCK(inp);
 
 		CURVNET_SET(toep->vnet);
 		NET_EPOCH_ENTER(et);
 		INP_WLOCK(inp);
 		tp = tcp_drop(tp, ECONNRESET);
 		if (tp)
 			INP_WUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
 		CURVNET_RESTORE();
 
 		return (0);
 	}
 
 	/*
 	 * Not all of the bytes on the wire are included in the socket buffer
 	 * (e.g. the MAC of the TLS record).  However, those bytes are included
 	 * in the TCP sequence space.
 	 */
 
 	/* receive buffer autosize */
 	MPASS(toep->vnet == so->so_vnet);
 	CURVNET_SET(toep->vnet);
 	if (sb->sb_flags & SB_AUTOSIZE &&
 	    V_tcp_do_autorcvbuf &&
 	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
 	    m->m_pkthdr.len > (sbspace(sb) / 8 * 7)) {
 		unsigned int hiwat = sb->sb_hiwat;
 		unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
 		    V_tcp_autorcvbuf_max);
 
 		if (!sbreserve_locked(so, SO_RCV, newsize, NULL))
 			sb->sb_flags &= ~SB_AUTOSIZE;
 	}
 
 	sbappendcontrol_locked(sb, m, control, 0);
 	rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
 #ifdef VERBOSE_TRACES
 	CTR4(KTR_CXGBE, "%s: tid %u rx_credits %u rcv_wnd %u",
 	    __func__, tid, rx_credits, tp->rcv_wnd);
 #endif
 	if (rx_credits > 0 && sbused(sb) + tp->rcv_wnd < sb->sb_lowat) {
 		rx_credits = send_rx_credits(sc, toep, rx_credits);
 		tp->rcv_wnd += rx_credits;
 		tp->rcv_adv += rx_credits;
 	}
 
 	sorwakeup_locked(so);
 	SOCKBUF_UNLOCK_ASSERT(sb);
 
 	INP_WUNLOCK(inp);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 void
 do_rx_data_tls(const struct cpl_rx_data *cpl, struct toepcb *toep,
     struct mbuf *m)
 {
 	struct inpcb *inp = toep->inp;
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct tls_hdr *hdr;
 	struct tcpcb *tp;
 	struct socket *so;
 	struct sockbuf *sb;
 	int len, rx_credits;
 
 	len = m->m_pkthdr.len;
 
 	INP_WLOCK_ASSERT(inp);
 
 	so = inp_inpcbtosocket(inp);
 	tp = intotcpcb(inp);
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	CURVNET_SET(toep->vnet);
 
 	tp->rcv_nxt += len;
 	KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
 	tp->rcv_wnd -= len;
 
 	/* Do we have a full TLS header? */
 	if (len < sizeof(*hdr)) {
 		CTR3(KTR_CXGBE, "%s: tid %u len %d: too short for a TLS header",
 		    __func__, toep->tid, len);
 		so->so_error = EMSGSIZE;
 		goto out;
 	}
 	hdr = mtod(m, struct tls_hdr *);
 
 	/* Is the header valid? */
 	if (be16toh(hdr->version) != tls_ofld->rx_version) {
 		CTR3(KTR_CXGBE, "%s: tid %u invalid version %04x",
 		    __func__, toep->tid, be16toh(hdr->version));
 		so->so_error = EINVAL;
 		goto out;
 	}
 	if (be16toh(hdr->length) < sizeof(*hdr)) {
 		CTR3(KTR_CXGBE, "%s: tid %u invalid length %u",
 		    __func__, toep->tid, be16toh(hdr->length));
 		so->so_error = EBADMSG;
 		goto out;
 	}
 
 	/* Did we get a truncated record? */
 	if (len < be16toh(hdr->length)) {
 		CTR4(KTR_CXGBE, "%s: tid %u truncated TLS record (%d vs %u)",
 		    __func__, toep->tid, len, be16toh(hdr->length));
 
 		so->so_error = EMSGSIZE;
 		goto out;
 	}
 
 	/* Is the header type unknown? */
 	switch (hdr->type) {
 	case CONTENT_TYPE_CCS:
 	case CONTENT_TYPE_ALERT:
 	case CONTENT_TYPE_APP_DATA:
 	case CONTENT_TYPE_HANDSHAKE:
 		break;
 	default:
 		CTR3(KTR_CXGBE, "%s: tid %u invalid TLS record type %u",
 		    __func__, toep->tid, hdr->type);
 		so->so_error = EBADMSG;
 		goto out;
 	}
 
 	/*
 	 * Just punt.  Although this could fall back to software
 	 * decryption, this case should never really happen.
 	 */
 	CTR4(KTR_CXGBE, "%s: tid %u dropping TLS record type %u, length %u",
 	    __func__, toep->tid, hdr->type, be16toh(hdr->length));
 	so->so_error = EBADMSG;
 
 out:
 	/*
 	 * This connection is going to die anyway, so probably don't
 	 * need to bother with returning credits.
 	 */
 	rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
 #ifdef VERBOSE_TRACES
 	CTR4(KTR_CXGBE, "%s: tid %u rx_credits %u rcv_wnd %u",
 	    __func__, toep->tid, rx_credits, tp->rcv_wnd);
 #endif
 	if (rx_credits > 0 && sbused(sb) + tp->rcv_wnd < sb->sb_lowat) {
 		rx_credits = send_rx_credits(toep->vi->adapter, toep,
 		    rx_credits);
 		tp->rcv_wnd += rx_credits;
 		tp->rcv_adv += rx_credits;
 	}
 
 	sorwakeup_locked(so);
 	SOCKBUF_UNLOCK_ASSERT(sb);
 
 	INP_WUNLOCK(inp);
 	CURVNET_RESTORE();
 
 	m_freem(m);
 }
 
 void
 t4_tls_mod_load(void)
 {
 
 	t4_register_cpl_handler(CPL_TLS_DATA, do_tls_data);
 	t4_register_cpl_handler(CPL_RX_TLS_CMP, do_rx_tls_cmp);
 }
 
 void
 t4_tls_mod_unload(void)
 {
 
 	t4_register_cpl_handler(CPL_TLS_DATA, NULL);
 	t4_register_cpl_handler(CPL_RX_TLS_CMP, NULL);
 }
 #endif	/* TCP_OFFLOAD */
 #endif	/* KERN_TLS */
diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c
index 7b99c460e8de..ea20532a4d8c 100644
--- a/sys/kern/uipc_ktls.c
+++ b/sys/kern/uipc_ktls.c
@@ -1,2890 +1,2890 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2014-2019 Netflix Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_kern_tls.h"
 #include "opt_ratelimit.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/domainset.h>
 #include <sys/endian.h>
 #include <sys/ktls.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/rmlock.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/refcount.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/kthread.h>
 #include <sys/uio.h>
 #include <sys/vmmeter.h>
 #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
 #include <machine/pcb.h>
 #endif
 #include <machine/vmparam.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #ifdef RSS
 #include <net/netisr.h>
 #include <net/rss_config.h>
 #endif
 #include <net/route.h>
 #include <net/route/nhop.h>
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #endif
 #include <netinet/tcp_var.h>
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 #include <opencrypto/cryptodev.h>
 #include <opencrypto/ktls.h>
 #include <vm/uma_dbg.h>
 #include <vm/vm.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pagequeue.h>
 
 struct ktls_wq {
 	struct mtx	mtx;
 	STAILQ_HEAD(, mbuf) m_head;
 	STAILQ_HEAD(, socket) so_head;
 	bool		running;
 	int		lastallocfail;
 } __aligned(CACHE_LINE_SIZE);
 
 struct ktls_alloc_thread {
 	uint64_t wakeups;
 	uint64_t allocs;
 	struct thread *td;
 	int running;
 };
 
 struct ktls_domain_info {
 	int count;
 	int cpu[MAXCPU];
 	struct ktls_alloc_thread alloc_td;
 };
 
 struct ktls_domain_info ktls_domains[MAXMEMDOM];
 static struct ktls_wq *ktls_wq;
 static struct proc *ktls_proc;
 static uma_zone_t ktls_session_zone;
 static uma_zone_t ktls_buffer_zone;
 static uint16_t ktls_cpuid_lookup[MAXCPU];
 static int ktls_init_state;
 static struct sx ktls_init_lock;
 SX_SYSINIT(ktls_init_lock, &ktls_init_lock, "ktls init");
 
 SYSCTL_NODE(_kern_ipc, OID_AUTO, tls, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Kernel TLS offload");
 SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Kernel TLS offload stats");
 
 #ifdef RSS
 static int ktls_bind_threads = 1;
 #else
 static int ktls_bind_threads;
 #endif
 SYSCTL_INT(_kern_ipc_tls, OID_AUTO, bind_threads, CTLFLAG_RDTUN,
     &ktls_bind_threads, 0,
     "Bind crypto threads to cores (1) or cores and domains (2) at boot");
 
 static u_int ktls_maxlen = 16384;
 SYSCTL_UINT(_kern_ipc_tls, OID_AUTO, maxlen, CTLFLAG_RDTUN,
     &ktls_maxlen, 0, "Maximum TLS record size");
 
 static int ktls_number_threads;
 SYSCTL_INT(_kern_ipc_tls_stats, OID_AUTO, threads, CTLFLAG_RD,
     &ktls_number_threads, 0,
     "Number of TLS threads in thread-pool");
 
 unsigned int ktls_ifnet_max_rexmit_pct = 2;
 SYSCTL_UINT(_kern_ipc_tls, OID_AUTO, ifnet_max_rexmit_pct, CTLFLAG_RWTUN,
     &ktls_ifnet_max_rexmit_pct, 2,
     "Max percent bytes retransmitted before ifnet TLS is disabled");
 
 static bool ktls_offload_enable;
 SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, enable, CTLFLAG_RWTUN,
     &ktls_offload_enable, 0,
     "Enable support for kernel TLS offload");
 
 static bool ktls_cbc_enable = true;
 SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, cbc_enable, CTLFLAG_RWTUN,
     &ktls_cbc_enable, 1,
     "Enable Support of AES-CBC crypto for kernel TLS");
 
 static bool ktls_sw_buffer_cache = true;
 SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, sw_buffer_cache, CTLFLAG_RDTUN,
     &ktls_sw_buffer_cache, 1,
     "Enable caching of output buffers for SW encryption");
 
 static int ktls_max_alloc = 128;
 SYSCTL_INT(_kern_ipc_tls, OID_AUTO, max_alloc, CTLFLAG_RWTUN,
     &ktls_max_alloc, 128,
     "Max number of 16k buffers to allocate in thread context");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_tasks_active);
 SYSCTL_COUNTER_U64(_kern_ipc_tls, OID_AUTO, tasks_active, CTLFLAG_RD,
     &ktls_tasks_active, "Number of active tasks");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_cnt_tx_pending);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, sw_tx_pending, CTLFLAG_RD,
     &ktls_cnt_tx_pending,
     "Number of TLS 1.0 records waiting for earlier TLS records");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_cnt_tx_queued);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, sw_tx_inqueue, CTLFLAG_RD,
     &ktls_cnt_tx_queued,
     "Number of TLS records in queue to tasks for SW encryption");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_cnt_rx_queued);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, sw_rx_inqueue, CTLFLAG_RD,
     &ktls_cnt_rx_queued,
     "Number of TLS sockets in queue to tasks for SW decryption");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_offload_total);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, offload_total,
     CTLFLAG_RD, &ktls_offload_total,
     "Total successful TLS setups (parameters set)");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_offload_enable_calls);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, enable_calls,
     CTLFLAG_RD, &ktls_offload_enable_calls,
     "Total number of TLS enable calls made");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_offload_active);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, active, CTLFLAG_RD,
     &ktls_offload_active, "Total Active TLS sessions");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_offload_corrupted_records);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, corrupted_records, CTLFLAG_RD,
     &ktls_offload_corrupted_records, "Total corrupted TLS records received");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_offload_failed_crypto);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, failed_crypto, CTLFLAG_RD,
     &ktls_offload_failed_crypto, "Total TLS crypto failures");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_switch_to_ifnet);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_to_ifnet, CTLFLAG_RD,
     &ktls_switch_to_ifnet, "TLS sessions switched from SW to ifnet");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_switch_to_sw);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_to_sw, CTLFLAG_RD,
     &ktls_switch_to_sw, "TLS sessions switched from ifnet to SW");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_switch_failed);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_failed, CTLFLAG_RD,
     &ktls_switch_failed, "TLS sessions unable to switch between SW and ifnet");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_disable_fail);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, ifnet_disable_failed, CTLFLAG_RD,
     &ktls_ifnet_disable_fail, "TLS sessions unable to switch to SW from ifnet");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_disable_ok);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, ifnet_disable_ok, CTLFLAG_RD,
     &ktls_ifnet_disable_ok, "TLS sessions able to switch to SW from ifnet");
 
 SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, sw, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "Software TLS session stats");
 SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, ifnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "Hardware (ifnet) TLS session stats");
 #ifdef TCP_OFFLOAD
 SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, toe, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "TOE TLS session stats");
 #endif
 
 static COUNTER_U64_DEFINE_EARLY(ktls_sw_cbc);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_sw, OID_AUTO, cbc, CTLFLAG_RD, &ktls_sw_cbc,
     "Active number of software TLS sessions using AES-CBC");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_sw_gcm);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_sw, OID_AUTO, gcm, CTLFLAG_RD, &ktls_sw_gcm,
     "Active number of software TLS sessions using AES-GCM");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_sw_chacha20);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_sw, OID_AUTO, chacha20, CTLFLAG_RD,
     &ktls_sw_chacha20,
     "Active number of software TLS sessions using Chacha20-Poly1305");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_cbc);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, cbc, CTLFLAG_RD,
     &ktls_ifnet_cbc,
     "Active number of ifnet TLS sessions using AES-CBC");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_gcm);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, gcm, CTLFLAG_RD,
     &ktls_ifnet_gcm,
     "Active number of ifnet TLS sessions using AES-GCM");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_chacha20);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, chacha20, CTLFLAG_RD,
     &ktls_ifnet_chacha20,
     "Active number of ifnet TLS sessions using Chacha20-Poly1305");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_reset);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, reset, CTLFLAG_RD,
     &ktls_ifnet_reset, "TLS sessions updated to a new ifnet send tag");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_reset_dropped);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, reset_dropped, CTLFLAG_RD,
     &ktls_ifnet_reset_dropped,
     "TLS sessions dropped after failing to update ifnet send tag");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_reset_failed);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, reset_failed, CTLFLAG_RD,
     &ktls_ifnet_reset_failed,
     "TLS sessions that failed to allocate a new ifnet send tag");
 
 static int ktls_ifnet_permitted;
 SYSCTL_UINT(_kern_ipc_tls_ifnet, OID_AUTO, permitted, CTLFLAG_RWTUN,
     &ktls_ifnet_permitted, 1,
     "Whether to permit hardware (ifnet) TLS sessions");
 
 #ifdef TCP_OFFLOAD
 static COUNTER_U64_DEFINE_EARLY(ktls_toe_cbc);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_toe, OID_AUTO, cbc, CTLFLAG_RD,
     &ktls_toe_cbc,
     "Active number of TOE TLS sessions using AES-CBC");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_toe_gcm);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_toe, OID_AUTO, gcm, CTLFLAG_RD,
     &ktls_toe_gcm,
     "Active number of TOE TLS sessions using AES-GCM");
 
 static COUNTER_U64_DEFINE_EARLY(ktls_toe_chacha20);
 SYSCTL_COUNTER_U64(_kern_ipc_tls_toe, OID_AUTO, chacha20, CTLFLAG_RD,
     &ktls_toe_chacha20,
     "Active number of TOE TLS sessions using Chacha20-Poly1305");
 #endif
 
 static MALLOC_DEFINE(M_KTLS, "ktls", "Kernel TLS");
 
 static void ktls_cleanup(struct ktls_session *tls);
 #if defined(INET) || defined(INET6)
 static void ktls_reset_send_tag(void *context, int pending);
 #endif
 static void ktls_work_thread(void *ctx);
 static void ktls_alloc_thread(void *ctx);
 
 #if defined(INET) || defined(INET6)
 static u_int
 ktls_get_cpu(struct socket *so)
 {
 	struct inpcb *inp;
 #ifdef NUMA
 	struct ktls_domain_info *di;
 #endif
 	u_int cpuid;
 
 	inp = sotoinpcb(so);
 #ifdef RSS
 	cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
 	if (cpuid != NETISR_CPUID_NONE)
 		return (cpuid);
 #endif
 	/*
 	 * Just use the flowid to shard connections in a repeatable
 	 * fashion.  Note that TLS 1.0 sessions rely on the
 	 * serialization provided by having the same connection use
 	 * the same queue.
 	 */
 #ifdef NUMA
 	if (ktls_bind_threads > 1 && inp->inp_numa_domain != M_NODOM) {
 		di = &ktls_domains[inp->inp_numa_domain];
 		cpuid = di->cpu[inp->inp_flowid % di->count];
 	} else
 #endif
 		cpuid = ktls_cpuid_lookup[inp->inp_flowid % ktls_number_threads];
 	return (cpuid);
 }
 #endif
 
 static int
 ktls_buffer_import(void *arg, void **store, int count, int domain, int flags)
 {
 	vm_page_t m;
 	int i, req;
 
 	KASSERT((ktls_maxlen & PAGE_MASK) == 0,
 	    ("%s: ktls max length %d is not page size-aligned",
 	    __func__, ktls_maxlen));
 
 	req = VM_ALLOC_WIRED | VM_ALLOC_NODUMP | malloc2vm_flags(flags);
 	for (i = 0; i < count; i++) {
 		m = vm_page_alloc_noobj_contig_domain(domain, req,
 		    atop(ktls_maxlen), 0, ~0ul, PAGE_SIZE, 0,
 		    VM_MEMATTR_DEFAULT);
 		if (m == NULL)
 			break;
 		store[i] = (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 	}
 	return (i);
 }
 
 static void
 ktls_buffer_release(void *arg __unused, void **store, int count)
 {
 	vm_page_t m;
 	int i, j;
 
 	for (i = 0; i < count; i++) {
 		m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)store[i]));
 		for (j = 0; j < atop(ktls_maxlen); j++) {
 			(void)vm_page_unwire_noq(m + j);
 			vm_page_free(m + j);
 		}
 	}
 }
 
 static void
 ktls_free_mext_contig(struct mbuf *m)
 {
 	M_ASSERTEXTPG(m);
 	uma_zfree(ktls_buffer_zone, (void *)PHYS_TO_DMAP(m->m_epg_pa[0]));
 }
 
 static int
 ktls_init(void)
 {
 	struct thread *td;
 	struct pcpu *pc;
 	int count, domain, error, i;
 
 	ktls_wq = malloc(sizeof(*ktls_wq) * (mp_maxid + 1), M_KTLS,
 	    M_WAITOK | M_ZERO);
 
 	ktls_session_zone = uma_zcreate("ktls_session",
 	    sizeof(struct ktls_session),
 	    NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_CACHE, 0);
 
 	if (ktls_sw_buffer_cache) {
 		ktls_buffer_zone = uma_zcache_create("ktls_buffers",
 		    roundup2(ktls_maxlen, PAGE_SIZE), NULL, NULL, NULL, NULL,
 		    ktls_buffer_import, ktls_buffer_release, NULL,
 		    UMA_ZONE_FIRSTTOUCH);
 	}
 
 	/*
 	 * Initialize the workqueues to run the TLS work.  We create a
 	 * work queue for each CPU.
 	 */
 	CPU_FOREACH(i) {
 		STAILQ_INIT(&ktls_wq[i].m_head);
 		STAILQ_INIT(&ktls_wq[i].so_head);
 		mtx_init(&ktls_wq[i].mtx, "ktls work queue", NULL, MTX_DEF);
 		if (ktls_bind_threads > 1) {
 			pc = pcpu_find(i);
 			domain = pc->pc_domain;
 			count = ktls_domains[domain].count;
 			ktls_domains[domain].cpu[count] = i;
 			ktls_domains[domain].count++;
 		}
 		ktls_cpuid_lookup[ktls_number_threads] = i;
 		ktls_number_threads++;
 	}
 
 	/*
 	 * If we somehow have an empty domain, fall back to choosing
 	 * among all KTLS threads.
 	 */
 	if (ktls_bind_threads > 1) {
 		for (i = 0; i < vm_ndomains; i++) {
 			if (ktls_domains[i].count == 0) {
 				ktls_bind_threads = 1;
 				break;
 			}
 		}
 	}
 
 	/* Start kthreads for each workqueue. */
 	CPU_FOREACH(i) {
 		error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i],
 		    &ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i);
 		if (error) {
 			printf("Can't add KTLS thread %d error %d\n", i, error);
 			return (error);
 		}
 	}
 
 	/*
 	 * Start an allocation thread per-domain to perform blocking allocations
 	 * of 16k physically contiguous TLS crypto destination buffers.
 	 */
 	if (ktls_sw_buffer_cache) {
 		for (domain = 0; domain < vm_ndomains; domain++) {
 			if (VM_DOMAIN_EMPTY(domain))
 				continue;
 			if (CPU_EMPTY(&cpuset_domain[domain]))
 				continue;
 			error = kproc_kthread_add(ktls_alloc_thread,
 			    &ktls_domains[domain], &ktls_proc,
 			    &ktls_domains[domain].alloc_td.td,
 			    0, 0, "KTLS", "alloc_%d", domain);
 			if (error) {
 				printf("Can't add KTLS alloc thread %d error %d\n",
 				    domain, error);
 				return (error);
 			}
 		}
 	}
 
 	if (bootverbose)
 		printf("KTLS: Initialized %d threads\n", ktls_number_threads);
 	return (0);
 }
 
 static int
 ktls_start_kthreads(void)
 {
 	int error, state;
 
 start:
 	state = atomic_load_acq_int(&ktls_init_state);
 	if (__predict_true(state > 0))
 		return (0);
 	if (state < 0)
 		return (ENXIO);
 
 	sx_xlock(&ktls_init_lock);
 	if (ktls_init_state != 0) {
 		sx_xunlock(&ktls_init_lock);
 		goto start;
 	}
 
 	error = ktls_init();
 	if (error == 0)
 		state = 1;
 	else
 		state = -1;
 	atomic_store_rel_int(&ktls_init_state, state);
 	sx_xunlock(&ktls_init_lock);
 	return (error);
 }
 
 #if defined(INET) || defined(INET6)
 static int
 ktls_create_session(struct socket *so, struct tls_enable *en,
     struct ktls_session **tlsp)
 {
 	struct ktls_session *tls;
 	int error;
 
 	/* Only TLS 1.0 - 1.3 are supported. */
 	if (en->tls_vmajor != TLS_MAJOR_VER_ONE)
 		return (EINVAL);
 	if (en->tls_vminor < TLS_MINOR_VER_ZERO ||
 	    en->tls_vminor > TLS_MINOR_VER_THREE)
 		return (EINVAL);
 
 	if (en->auth_key_len < 0 || en->auth_key_len > TLS_MAX_PARAM_SIZE)
 		return (EINVAL);
 	if (en->cipher_key_len < 0 || en->cipher_key_len > TLS_MAX_PARAM_SIZE)
 		return (EINVAL);
 	if (en->iv_len < 0 || en->iv_len > sizeof(tls->params.iv))
 		return (EINVAL);
 
 	/* All supported algorithms require a cipher key. */
 	if (en->cipher_key_len == 0)
 		return (EINVAL);
 
 	/* No flags are currently supported. */
 	if (en->flags != 0)
 		return (EINVAL);
 
 	/* Common checks for supported algorithms. */
 	switch (en->cipher_algorithm) {
 	case CRYPTO_AES_NIST_GCM_16:
 		/*
 		 * auth_algorithm isn't used, but permit GMAC values
 		 * for compatibility.
 		 */
 		switch (en->auth_algorithm) {
 		case 0:
 #ifdef COMPAT_FREEBSD12
 		/* XXX: Really 13.0-current COMPAT. */
 		case CRYPTO_AES_128_NIST_GMAC:
 		case CRYPTO_AES_192_NIST_GMAC:
 		case CRYPTO_AES_256_NIST_GMAC:
 #endif
 			break;
 		default:
 			return (EINVAL);
 		}
 		if (en->auth_key_len != 0)
 			return (EINVAL);
 		switch (en->tls_vminor) {
 		case TLS_MINOR_VER_TWO:
 			if (en->iv_len != TLS_AEAD_GCM_LEN)
 				return (EINVAL);
 			break;
 		case TLS_MINOR_VER_THREE:
 			if (en->iv_len != TLS_1_3_GCM_IV_LEN)
 				return (EINVAL);
 			break;
 		default:
 			return (EINVAL);
 		}
 		break;
 	case CRYPTO_AES_CBC:
 		switch (en->auth_algorithm) {
 		case CRYPTO_SHA1_HMAC:
 			break;
 		case CRYPTO_SHA2_256_HMAC:
 		case CRYPTO_SHA2_384_HMAC:
 			if (en->tls_vminor != TLS_MINOR_VER_TWO)
 				return (EINVAL);
 			break;
 		default:
 			return (EINVAL);
 		}
 		if (en->auth_key_len == 0)
 			return (EINVAL);
 
 		/*
 		 * TLS 1.0 requires an implicit IV.  TLS 1.1 and 1.2
 		 * use explicit IVs.
 		 */
 		switch (en->tls_vminor) {
 		case TLS_MINOR_VER_ZERO:
 			if (en->iv_len != TLS_CBC_IMPLICIT_IV_LEN)
 				return (EINVAL);
 			break;
 		case TLS_MINOR_VER_ONE:
 		case TLS_MINOR_VER_TWO:
 			/* Ignore any supplied IV. */
 			en->iv_len = 0;
 			break;
 		default:
 			return (EINVAL);
 		}
 		break;
 	case CRYPTO_CHACHA20_POLY1305:
 		if (en->auth_algorithm != 0 || en->auth_key_len != 0)
 			return (EINVAL);
 		if (en->tls_vminor != TLS_MINOR_VER_TWO &&
 		    en->tls_vminor != TLS_MINOR_VER_THREE)
 			return (EINVAL);
 		if (en->iv_len != TLS_CHACHA20_IV_LEN)
 			return (EINVAL);
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	error = ktls_start_kthreads();
 	if (error != 0)
 		return (error);
 
 	tls = uma_zalloc(ktls_session_zone, M_WAITOK | M_ZERO);
 
 	counter_u64_add(ktls_offload_active, 1);
 
 	refcount_init(&tls->refcount, 1);
 	TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_send_tag, tls);
 
 	tls->wq_index = ktls_get_cpu(so);
 
 	tls->params.cipher_algorithm = en->cipher_algorithm;
 	tls->params.auth_algorithm = en->auth_algorithm;
 	tls->params.tls_vmajor = en->tls_vmajor;
 	tls->params.tls_vminor = en->tls_vminor;
 	tls->params.flags = en->flags;
 	tls->params.max_frame_len = min(TLS_MAX_MSG_SIZE_V10_2, ktls_maxlen);
 
 	/* Set the header and trailer lengths. */
 	tls->params.tls_hlen = sizeof(struct tls_record_layer);
 	switch (en->cipher_algorithm) {
 	case CRYPTO_AES_NIST_GCM_16:
 		/*
 		 * TLS 1.2 uses a 4 byte implicit IV with an explicit 8 byte
 		 * nonce.  TLS 1.3 uses a 12 byte implicit IV.
 		 */
 		if (en->tls_vminor < TLS_MINOR_VER_THREE)
 			tls->params.tls_hlen += sizeof(uint64_t);
 		tls->params.tls_tlen = AES_GMAC_HASH_LEN;
 		tls->params.tls_bs = 1;
 		break;
 	case CRYPTO_AES_CBC:
 		switch (en->auth_algorithm) {
 		case CRYPTO_SHA1_HMAC:
 			if (en->tls_vminor == TLS_MINOR_VER_ZERO) {
 				/* Implicit IV, no nonce. */
 				tls->sequential_records = true;
 				tls->next_seqno = be64dec(en->rec_seq);
 				STAILQ_INIT(&tls->pending_records);
 			} else {
 				tls->params.tls_hlen += AES_BLOCK_LEN;
 			}
 			tls->params.tls_tlen = AES_BLOCK_LEN +
 			    SHA1_HASH_LEN;
 			break;
 		case CRYPTO_SHA2_256_HMAC:
 			tls->params.tls_hlen += AES_BLOCK_LEN;
 			tls->params.tls_tlen = AES_BLOCK_LEN +
 			    SHA2_256_HASH_LEN;
 			break;
 		case CRYPTO_SHA2_384_HMAC:
 			tls->params.tls_hlen += AES_BLOCK_LEN;
 			tls->params.tls_tlen = AES_BLOCK_LEN +
 			    SHA2_384_HASH_LEN;
 			break;
 		default:
 			panic("invalid hmac");
 		}
 		tls->params.tls_bs = AES_BLOCK_LEN;
 		break;
 	case CRYPTO_CHACHA20_POLY1305:
 		/*
 		 * Chacha20 uses a 12 byte implicit IV.
 		 */
 		tls->params.tls_tlen = POLY1305_HASH_LEN;
 		tls->params.tls_bs = 1;
 		break;
 	default:
 		panic("invalid cipher");
 	}
 
 	/*
 	 * TLS 1.3 includes optional padding which we do not support,
 	 * and also puts the "real" record type at the end of the
 	 * encrypted data.
 	 */
 	if (en->tls_vminor == TLS_MINOR_VER_THREE)
 		tls->params.tls_tlen += sizeof(uint8_t);
 
 	KASSERT(tls->params.tls_hlen <= MBUF_PEXT_HDR_LEN,
 	    ("TLS header length too long: %d", tls->params.tls_hlen));
 	KASSERT(tls->params.tls_tlen <= MBUF_PEXT_TRAIL_LEN,
 	    ("TLS trailer length too long: %d", tls->params.tls_tlen));
 
 	if (en->auth_key_len != 0) {
 		tls->params.auth_key_len = en->auth_key_len;
 		tls->params.auth_key = malloc(en->auth_key_len, M_KTLS,
 		    M_WAITOK);
 		error = copyin(en->auth_key, tls->params.auth_key,
 		    en->auth_key_len);
 		if (error)
 			goto out;
 	}
 
 	tls->params.cipher_key_len = en->cipher_key_len;
 	tls->params.cipher_key = malloc(en->cipher_key_len, M_KTLS, M_WAITOK);
 	error = copyin(en->cipher_key, tls->params.cipher_key,
 	    en->cipher_key_len);
 	if (error)
 		goto out;
 
 	/*
 	 * This holds the implicit portion of the nonce for AEAD
 	 * ciphers and the initial implicit IV for TLS 1.0.  The
 	 * explicit portions of the IV are generated in ktls_frame().
 	 */
 	if (en->iv_len != 0) {
 		tls->params.iv_len = en->iv_len;
 		error = copyin(en->iv, tls->params.iv, en->iv_len);
 		if (error)
 			goto out;
 
 		/*
 		 * For TLS 1.2 with GCM, generate an 8-byte nonce as a
 		 * counter to generate unique explicit IVs.
 		 *
 		 * Store this counter in the last 8 bytes of the IV
 		 * array so that it is 8-byte aligned.
 		 */
 		if (en->cipher_algorithm == CRYPTO_AES_NIST_GCM_16 &&
 		    en->tls_vminor == TLS_MINOR_VER_TWO)
 			arc4rand(tls->params.iv + 8, sizeof(uint64_t), 0);
 	}
 
 	*tlsp = tls;
 	return (0);
 
 out:
 	ktls_cleanup(tls);
 	return (error);
 }
 
 static struct ktls_session *
 ktls_clone_session(struct ktls_session *tls)
 {
 	struct ktls_session *tls_new;
 
 	tls_new = uma_zalloc(ktls_session_zone, M_WAITOK | M_ZERO);
 
 	counter_u64_add(ktls_offload_active, 1);
 
 	refcount_init(&tls_new->refcount, 1);
 	TASK_INIT(&tls_new->reset_tag_task, 0, ktls_reset_send_tag, tls_new);
 
 	/* Copy fields from existing session. */
 	tls_new->params = tls->params;
 	tls_new->wq_index = tls->wq_index;
 
 	/* Deep copy keys. */
 	if (tls_new->params.auth_key != NULL) {
 		tls_new->params.auth_key = malloc(tls->params.auth_key_len,
 		    M_KTLS, M_WAITOK);
 		memcpy(tls_new->params.auth_key, tls->params.auth_key,
 		    tls->params.auth_key_len);
 	}
 
 	tls_new->params.cipher_key = malloc(tls->params.cipher_key_len, M_KTLS,
 	    M_WAITOK);
 	memcpy(tls_new->params.cipher_key, tls->params.cipher_key,
 	    tls->params.cipher_key_len);
 
 	return (tls_new);
 }
 #endif
 
 static void
 ktls_cleanup(struct ktls_session *tls)
 {
 
 	counter_u64_add(ktls_offload_active, -1);
 	switch (tls->mode) {
 	case TCP_TLS_MODE_SW:
 		switch (tls->params.cipher_algorithm) {
 		case CRYPTO_AES_CBC:
 			counter_u64_add(ktls_sw_cbc, -1);
 			break;
 		case CRYPTO_AES_NIST_GCM_16:
 			counter_u64_add(ktls_sw_gcm, -1);
 			break;
 		case CRYPTO_CHACHA20_POLY1305:
 			counter_u64_add(ktls_sw_chacha20, -1);
 			break;
 		}
 		break;
 	case TCP_TLS_MODE_IFNET:
 		switch (tls->params.cipher_algorithm) {
 		case CRYPTO_AES_CBC:
 			counter_u64_add(ktls_ifnet_cbc, -1);
 			break;
 		case CRYPTO_AES_NIST_GCM_16:
 			counter_u64_add(ktls_ifnet_gcm, -1);
 			break;
 		case CRYPTO_CHACHA20_POLY1305:
 			counter_u64_add(ktls_ifnet_chacha20, -1);
 			break;
 		}
 		if (tls->snd_tag != NULL)
 			m_snd_tag_rele(tls->snd_tag);
 		break;
 #ifdef TCP_OFFLOAD
 	case TCP_TLS_MODE_TOE:
 		switch (tls->params.cipher_algorithm) {
 		case CRYPTO_AES_CBC:
 			counter_u64_add(ktls_toe_cbc, -1);
 			break;
 		case CRYPTO_AES_NIST_GCM_16:
 			counter_u64_add(ktls_toe_gcm, -1);
 			break;
 		case CRYPTO_CHACHA20_POLY1305:
 			counter_u64_add(ktls_toe_chacha20, -1);
 			break;
 		}
 		break;
 #endif
 	}
 	if (tls->ocf_session != NULL)
 		ktls_ocf_free(tls);
 	if (tls->params.auth_key != NULL) {
 		zfree(tls->params.auth_key, M_KTLS);
 		tls->params.auth_key = NULL;
 		tls->params.auth_key_len = 0;
 	}
 	if (tls->params.cipher_key != NULL) {
 		zfree(tls->params.cipher_key, M_KTLS);
 		tls->params.cipher_key = NULL;
 		tls->params.cipher_key_len = 0;
 	}
 	explicit_bzero(tls->params.iv, sizeof(tls->params.iv));
 }
 
 #if defined(INET) || defined(INET6)
 
 #ifdef TCP_OFFLOAD
 static int
 ktls_try_toe(struct socket *so, struct ktls_session *tls, int direction)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	int error;
 
 	inp = so->so_pcb;
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	if (inp->inp_socket == NULL) {
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 	if (!(tp->t_flags & TF_TOE)) {
 		INP_WUNLOCK(inp);
 		return (EOPNOTSUPP);
 	}
 
 	error = tcp_offload_alloc_tls_session(tp, tls, direction);
 	INP_WUNLOCK(inp);
 	if (error == 0) {
 		tls->mode = TCP_TLS_MODE_TOE;
 		switch (tls->params.cipher_algorithm) {
 		case CRYPTO_AES_CBC:
 			counter_u64_add(ktls_toe_cbc, 1);
 			break;
 		case CRYPTO_AES_NIST_GCM_16:
 			counter_u64_add(ktls_toe_gcm, 1);
 			break;
 		case CRYPTO_CHACHA20_POLY1305:
 			counter_u64_add(ktls_toe_chacha20, 1);
 			break;
 		}
 	}
 	return (error);
 }
 #endif
 
 /*
  * Common code used when first enabling ifnet TLS on a connection or
  * when allocating a new ifnet TLS session due to a routing change.
  * This function allocates a new TLS send tag on whatever interface
  * the connection is currently routed over.
  */
 static int
 ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force,
     struct m_snd_tag **mstp)
 {
 	union if_snd_tag_alloc_params params;
 	struct ifnet *ifp;
 	struct nhop_object *nh;
 	struct tcpcb *tp;
 	int error;
 
 	INP_RLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		INP_RUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	if (inp->inp_socket == NULL) {
 		INP_RUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 
 	/*
 	 * Check administrative controls on ifnet TLS to determine if
 	 * ifnet TLS should be denied.
 	 *
 	 * - Always permit 'force' requests.
 	 * - ktls_ifnet_permitted == 0: always deny.
 	 */
 	if (!force && ktls_ifnet_permitted == 0) {
 		INP_RUNLOCK(inp);
 		return (ENXIO);
 	}
 
 	/*
 	 * XXX: Use the cached route in the inpcb to find the
 	 * interface.  This should perhaps instead use
 	 * rtalloc1_fib(dst, 0, 0, fibnum).  Since KTLS is only
 	 * enabled after a connection has completed key negotiation in
 	 * userland, the cached route will be present in practice.
 	 */
 	nh = inp->inp_route.ro_nh;
 	if (nh == NULL) {
 		INP_RUNLOCK(inp);
 		return (ENXIO);
 	}
 	ifp = nh->nh_ifp;
 	if_ref(ifp);
 
 	/*
 	 * Allocate a TLS + ratelimit tag if the connection has an
 	 * existing pacing rate.
 	 */
 	if (tp->t_pacing_rate != -1 &&
 	    (ifp->if_capenable & IFCAP_TXTLS_RTLMT) != 0) {
 		params.hdr.type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT;
 		params.tls_rate_limit.inp = inp;
 		params.tls_rate_limit.tls = tls;
 		params.tls_rate_limit.max_rate = tp->t_pacing_rate;
 	} else {
 		params.hdr.type = IF_SND_TAG_TYPE_TLS;
 		params.tls.inp = inp;
 		params.tls.tls = tls;
 	}
 	params.hdr.flowid = inp->inp_flowid;
 	params.hdr.flowtype = inp->inp_flowtype;
 	params.hdr.numa_domain = inp->inp_numa_domain;
 	INP_RUNLOCK(inp);
 
 	if ((ifp->if_capenable & IFCAP_MEXTPG) == 0) {
 		error = EOPNOTSUPP;
 		goto out;
 	}
 	if (inp->inp_vflag & INP_IPV6) {
 		if ((ifp->if_capenable & IFCAP_TXTLS6) == 0) {
 			error = EOPNOTSUPP;
 			goto out;
 		}
 	} else {
 		if ((ifp->if_capenable & IFCAP_TXTLS4) == 0) {
 			error = EOPNOTSUPP;
 			goto out;
 		}
 	}
 	error = m_snd_tag_alloc(ifp, &params, mstp);
 out:
 	if_rele(ifp);
 	return (error);
 }
 
 static int
 ktls_try_ifnet(struct socket *so, struct ktls_session *tls, bool force)
 {
 	struct m_snd_tag *mst;
 	int error;
 
 	error = ktls_alloc_snd_tag(so->so_pcb, tls, force, &mst);
 	if (error == 0) {
 		tls->mode = TCP_TLS_MODE_IFNET;
 		tls->snd_tag = mst;
 		switch (tls->params.cipher_algorithm) {
 		case CRYPTO_AES_CBC:
 			counter_u64_add(ktls_ifnet_cbc, 1);
 			break;
 		case CRYPTO_AES_NIST_GCM_16:
 			counter_u64_add(ktls_ifnet_gcm, 1);
 			break;
 		case CRYPTO_CHACHA20_POLY1305:
 			counter_u64_add(ktls_ifnet_chacha20, 1);
 			break;
 		}
 	}
 	return (error);
 }
 
 static void
 ktls_use_sw(struct ktls_session *tls)
 {
 	tls->mode = TCP_TLS_MODE_SW;
 	switch (tls->params.cipher_algorithm) {
 	case CRYPTO_AES_CBC:
 		counter_u64_add(ktls_sw_cbc, 1);
 		break;
 	case CRYPTO_AES_NIST_GCM_16:
 		counter_u64_add(ktls_sw_gcm, 1);
 		break;
 	case CRYPTO_CHACHA20_POLY1305:
 		counter_u64_add(ktls_sw_chacha20, 1);
 		break;
 	}
 }
 
 static int
 ktls_try_sw(struct socket *so, struct ktls_session *tls, int direction)
 {
 	int error;
 
 	error = ktls_ocf_try(so, tls, direction);
 	if (error)
 		return (error);
 	ktls_use_sw(tls);
 	return (0);
 }
 
 /*
  * KTLS RX stores data in the socket buffer as a list of TLS records,
  * where each record is stored as a control message containg the TLS
  * header followed by data mbufs containing the decrypted data.  This
  * is different from KTLS TX which always uses an mb_ext_pgs mbuf for
  * both encrypted and decrypted data.  TLS records decrypted by a NIC
  * should be queued to the socket buffer as records, but encrypted
  * data which needs to be decrypted by software arrives as a stream of
  * regular mbufs which need to be converted.  In addition, there may
  * already be pending encrypted data in the socket buffer when KTLS RX
  * is enabled.
  *
  * To manage not-yet-decrypted data for KTLS RX, the following scheme
  * is used:
  *
  * - A single chain of NOTREADY mbufs is hung off of sb_mtls.
  *
  * - ktls_check_rx checks this chain of mbufs reading the TLS header
  *   from the first mbuf.  Once all of the data for that TLS record is
  *   queued, the socket is queued to a worker thread.
  *
  * - The worker thread calls ktls_decrypt to decrypt TLS records in
  *   the TLS chain.  Each TLS record is detached from the TLS chain,
  *   decrypted, and inserted into the regular socket buffer chain as
  *   record starting with a control message holding the TLS header and
  *   a chain of mbufs holding the encrypted data.
  */
 
 static void
 sb_mark_notready(struct sockbuf *sb)
 {
 	struct mbuf *m;
 
 	m = sb->sb_mb;
 	sb->sb_mtls = m;
 	sb->sb_mb = NULL;
 	sb->sb_mbtail = NULL;
 	sb->sb_lastrecord = NULL;
 	for (; m != NULL; m = m->m_next) {
 		KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt != NULL",
 		    __func__));
 		KASSERT((m->m_flags & M_NOTAVAIL) == 0, ("%s: mbuf not avail",
 		    __func__));
 		KASSERT(sb->sb_acc >= m->m_len, ("%s: sb_acc < m->m_len",
 		    __func__));
 		m->m_flags |= M_NOTREADY;
 		sb->sb_acc -= m->m_len;
 		sb->sb_tlscc += m->m_len;
 		sb->sb_mtlstail = m;
 	}
 	KASSERT(sb->sb_acc == 0 && sb->sb_tlscc == sb->sb_ccc,
 	    ("%s: acc %u tlscc %u ccc %u", __func__, sb->sb_acc, sb->sb_tlscc,
 	    sb->sb_ccc));
 }
 
 /*
  * Return information about the pending TLS data in a socket
  * buffer.  On return, 'seqno' is set to the sequence number
  * of the next TLS record to be received, 'resid' is set to
  * the amount of bytes still needed for the last pending
  * record.  The function returns 'false' if the last pending
  * record contains a partial TLS header.  In that case, 'resid'
  * is the number of bytes needed to complete the TLS header.
  */
 bool
 ktls_pending_rx_info(struct sockbuf *sb, uint64_t *seqnop, size_t *residp)
 {
 	struct tls_record_layer hdr;
 	struct mbuf *m;
 	uint64_t seqno;
 	size_t resid;
 	u_int offset, record_len;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	MPASS(sb->sb_flags & SB_TLS_RX);
 	seqno = sb->sb_tls_seqno;
 	resid = sb->sb_tlscc;
 	m = sb->sb_mtls;
 	offset = 0;
 
 	if (resid == 0) {
 		*seqnop = seqno;
 		*residp = 0;
 		return (true);
 	}
 
 	for (;;) {
 		seqno++;
 
 		if (resid < sizeof(hdr)) {
 			*seqnop = seqno;
 			*residp = sizeof(hdr) - resid;
 			return (false);
 		}
 
 		m_copydata(m, offset, sizeof(hdr), (void *)&hdr);
 
 		record_len = sizeof(hdr) + ntohs(hdr.tls_length);
 		if (resid <= record_len) {
 			*seqnop = seqno;
 			*residp = record_len - resid;
 			return (true);
 		}
 		resid -= record_len;
 
 		while (record_len != 0) {
 			if (m->m_len - offset > record_len) {
 				offset += record_len;
 				break;
 			}
 
 			record_len -= (m->m_len - offset);
 			offset = 0;
 			m = m->m_next;
 		}
 	}
 }
 
 int
 ktls_enable_rx(struct socket *so, struct tls_enable *en)
 {
 	struct ktls_session *tls;
 	int error;
 
 	if (!ktls_offload_enable)
 		return (ENOTSUP);
 	if (SOLISTENING(so))
 		return (EINVAL);
 
 	counter_u64_add(ktls_offload_enable_calls, 1);
 
 	/*
 	 * This should always be true since only the TCP socket option
 	 * invokes this function.
 	 */
 	if (so->so_proto->pr_protocol != IPPROTO_TCP)
 		return (EINVAL);
 
 	/*
 	 * XXX: Don't overwrite existing sessions.  We should permit
 	 * this to support rekeying in the future.
 	 */
 	if (so->so_rcv.sb_tls_info != NULL)
 		return (EALREADY);
 
 	if (en->cipher_algorithm == CRYPTO_AES_CBC && !ktls_cbc_enable)
 		return (ENOTSUP);
 
 	error = ktls_create_session(so, en, &tls);
 	if (error)
 		return (error);
 
 	error = ktls_ocf_try(so, tls, KTLS_RX);
 	if (error) {
 		ktls_cleanup(tls);
 		return (error);
 	}
 
 	/* Mark the socket as using TLS offload. */
 	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_rcv.sb_tls_seqno = be64dec(en->rec_seq);
 	so->so_rcv.sb_tls_info = tls;
 	so->so_rcv.sb_flags |= SB_TLS_RX;
 
 	/* Mark existing data as not ready until it can be decrypted. */
 	sb_mark_notready(&so->so_rcv);
 	ktls_check_rx(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 #ifdef TCP_OFFLOAD
 	error = ktls_try_toe(so, tls, KTLS_RX);
 	if (error)
 #endif
 		ktls_use_sw(tls);
 
 	counter_u64_add(ktls_offload_total, 1);
 
 	return (0);
 }
 
 int
 ktls_enable_tx(struct socket *so, struct tls_enable *en)
 {
 	struct ktls_session *tls;
 	struct inpcb *inp;
 	int error;
 
 	if (!ktls_offload_enable)
 		return (ENOTSUP);
 	if (SOLISTENING(so))
 		return (EINVAL);
 
 	counter_u64_add(ktls_offload_enable_calls, 1);
 
 	/*
 	 * This should always be true since only the TCP socket option
 	 * invokes this function.
 	 */
 	if (so->so_proto->pr_protocol != IPPROTO_TCP)
 		return (EINVAL);
 
 	/*
 	 * XXX: Don't overwrite existing sessions.  We should permit
 	 * this to support rekeying in the future.
 	 */
 	if (so->so_snd.sb_tls_info != NULL)
 		return (EALREADY);
 
 	if (en->cipher_algorithm == CRYPTO_AES_CBC && !ktls_cbc_enable)
 		return (ENOTSUP);
 
 	/* TLS requires ext pgs */
 	if (mb_use_ext_pgs == 0)
 		return (ENXIO);
 
 	error = ktls_create_session(so, en, &tls);
 	if (error)
 		return (error);
 
 	/* Prefer TOE -> ifnet TLS -> software TLS. */
 #ifdef TCP_OFFLOAD
 	error = ktls_try_toe(so, tls, KTLS_TX);
 	if (error)
 #endif
 		error = ktls_try_ifnet(so, tls, false);
 	if (error)
 		error = ktls_try_sw(so, tls, KTLS_TX);
 
 	if (error) {
 		ktls_cleanup(tls);
 		return (error);
 	}
 
 	error = SOCK_IO_SEND_LOCK(so, SBL_WAIT);
 	if (error) {
 		ktls_cleanup(tls);
 		return (error);
 	}
 
 	/*
 	 * Write lock the INP when setting sb_tls_info so that
 	 * routines in tcp_ratelimit.c can read sb_tls_info while
 	 * holding the INP lock.
 	 */
 	inp = so->so_pcb;
 	INP_WLOCK(inp);
 	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_tls_seqno = be64dec(en->rec_seq);
 	so->so_snd.sb_tls_info = tls;
 	if (tls->mode != TCP_TLS_MODE_SW)
 		so->so_snd.sb_flags |= SB_TLS_IFNET;
 	SOCKBUF_UNLOCK(&so->so_snd);
 	INP_WUNLOCK(inp);
 	SOCK_IO_SEND_UNLOCK(so);
 
 	counter_u64_add(ktls_offload_total, 1);
 
 	return (0);
 }
 
 int
 ktls_get_rx_mode(struct socket *so, int *modep)
 {
 	struct ktls_session *tls;
 	struct inpcb *inp __diagused;
 
 	if (SOLISTENING(so))
 		return (EINVAL);
 	inp = so->so_pcb;
 	INP_WLOCK_ASSERT(inp);
 	SOCK_RECVBUF_LOCK(so);
 	tls = so->so_rcv.sb_tls_info;
 	if (tls == NULL)
 		*modep = TCP_TLS_MODE_NONE;
 	else
 		*modep = tls->mode;
 	SOCK_RECVBUF_UNLOCK(so);
 	return (0);
 }
 
 /*
  * ktls_get_rx_sequence - get the next TCP- and TLS- sequence number.
  *
  * This function gets information about the next TCP- and TLS-
  * sequence number to be processed by the TLS receive worker
  * thread. The information is extracted from the given "inpcb"
  * structure. The values are stored in host endian format at the two
  * given output pointer locations. The TCP sequence number points to
  * the beginning of the TLS header.
  *
  * This function returns zero on success, else a non-zero error code
  * is returned.
  */
 int
 ktls_get_rx_sequence(struct inpcb *inp, uint32_t *tcpseq, uint64_t *tlsseq)
 {
 	struct socket *so;
 	struct tcpcb *tp;
 
 	INP_RLOCK(inp);
 	so = inp->inp_socket;
 	if (__predict_false(so == NULL)) {
 		INP_RUNLOCK(inp);
 		return (EINVAL);
 	}
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		INP_RUNLOCK(inp);
 		return (ECONNRESET);
 	}
 
 	tp = intotcpcb(inp);
 	MPASS(tp != NULL);
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	*tcpseq = tp->rcv_nxt - so->so_rcv.sb_tlscc;
 	*tlsseq = so->so_rcv.sb_tls_seqno;
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	INP_RUNLOCK(inp);
 
 	return (0);
 }
 
 int
 ktls_get_tx_mode(struct socket *so, int *modep)
 {
 	struct ktls_session *tls;
 	struct inpcb *inp __diagused;
 
 	if (SOLISTENING(so))
 		return (EINVAL);
 	inp = so->so_pcb;
 	INP_WLOCK_ASSERT(inp);
 	SOCK_SENDBUF_LOCK(so);
 	tls = so->so_snd.sb_tls_info;
 	if (tls == NULL)
 		*modep = TCP_TLS_MODE_NONE;
 	else
 		*modep = tls->mode;
 	SOCK_SENDBUF_UNLOCK(so);
 	return (0);
 }
 
 /*
  * Switch between SW and ifnet TLS sessions as requested.
  */
 int
 ktls_set_tx_mode(struct socket *so, int mode)
 {
 	struct ktls_session *tls, *tls_new;
 	struct inpcb *inp;
 	int error;
 
 	if (SOLISTENING(so))
 		return (EINVAL);
 	switch (mode) {
 	case TCP_TLS_MODE_SW:
 	case TCP_TLS_MODE_IFNET:
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	inp = so->so_pcb;
 	INP_WLOCK_ASSERT(inp);
 	SOCKBUF_LOCK(&so->so_snd);
 	tls = so->so_snd.sb_tls_info;
 	if (tls == NULL) {
 		SOCKBUF_UNLOCK(&so->so_snd);
 		return (0);
 	}
 
 	if (tls->mode == mode) {
 		SOCKBUF_UNLOCK(&so->so_snd);
 		return (0);
 	}
 
 	tls = ktls_hold(tls);
 	SOCKBUF_UNLOCK(&so->so_snd);
 	INP_WUNLOCK(inp);
 
 	tls_new = ktls_clone_session(tls);
 
 	if (mode == TCP_TLS_MODE_IFNET)
 		error = ktls_try_ifnet(so, tls_new, true);
 	else
 		error = ktls_try_sw(so, tls_new, KTLS_TX);
 	if (error) {
 		counter_u64_add(ktls_switch_failed, 1);
 		ktls_free(tls_new);
 		ktls_free(tls);
 		INP_WLOCK(inp);
 		return (error);
 	}
 
 	error = SOCK_IO_SEND_LOCK(so, SBL_WAIT);
 	if (error) {
 		counter_u64_add(ktls_switch_failed, 1);
 		ktls_free(tls_new);
 		ktls_free(tls);
 		INP_WLOCK(inp);
 		return (error);
 	}
 
 	/*
 	 * If we raced with another session change, keep the existing
 	 * session.
 	 */
 	if (tls != so->so_snd.sb_tls_info) {
 		counter_u64_add(ktls_switch_failed, 1);
 		SOCK_IO_SEND_UNLOCK(so);
 		ktls_free(tls_new);
 		ktls_free(tls);
 		INP_WLOCK(inp);
 		return (EBUSY);
 	}
 
 	INP_WLOCK(inp);
 	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_tls_info = tls_new;
 	if (tls_new->mode != TCP_TLS_MODE_SW)
 		so->so_snd.sb_flags |= SB_TLS_IFNET;
 	SOCKBUF_UNLOCK(&so->so_snd);
 	SOCK_IO_SEND_UNLOCK(so);
 
 	/*
 	 * Drop two references on 'tls'.  The first is for the
 	 * ktls_hold() above.  The second drops the reference from the
 	 * socket buffer.
 	 */
 	KASSERT(tls->refcount >= 2, ("too few references on old session"));
 	ktls_free(tls);
 	ktls_free(tls);
 
 	if (mode == TCP_TLS_MODE_IFNET)
 		counter_u64_add(ktls_switch_to_ifnet, 1);
 	else
 		counter_u64_add(ktls_switch_to_sw, 1);
 
 	return (0);
 }
 
 /*
  * Try to allocate a new TLS send tag.  This task is scheduled when
  * ip_output detects a route change while trying to transmit a packet
  * holding a TLS record.  If a new tag is allocated, replace the tag
  * in the TLS session.  Subsequent packets on the connection will use
  * the new tag.  If a new tag cannot be allocated, drop the
  * connection.
  */
 static void
 ktls_reset_send_tag(void *context, int pending)
 {
 	struct epoch_tracker et;
 	struct ktls_session *tls;
 	struct m_snd_tag *old, *new;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	int error;
 
 	MPASS(pending == 1);
 
 	tls = context;
 	inp = tls->inp;
 
 	/*
 	 * Free the old tag first before allocating a new one.
 	 * ip[6]_output_send() will treat a NULL send tag the same as
 	 * an ifp mismatch and drop packets until a new tag is
 	 * allocated.
 	 *
 	 * Write-lock the INP when changing tls->snd_tag since
 	 * ip[6]_output_send() holds a read-lock when reading the
 	 * pointer.
 	 */
 	INP_WLOCK(inp);
 	old = tls->snd_tag;
 	tls->snd_tag = NULL;
 	INP_WUNLOCK(inp);
 	if (old != NULL)
 		m_snd_tag_rele(old);
 
 	error = ktls_alloc_snd_tag(inp, tls, true, &new);
 
 	if (error == 0) {
 		INP_WLOCK(inp);
 		tls->snd_tag = new;
 		mtx_pool_lock(mtxpool_sleep, tls);
 		tls->reset_pending = false;
 		mtx_pool_unlock(mtxpool_sleep, tls);
 		if (!in_pcbrele_wlocked(inp))
 			INP_WUNLOCK(inp);
 
 		counter_u64_add(ktls_ifnet_reset, 1);
 
 		/*
 		 * XXX: Should we kick tcp_output explicitly now that
 		 * the send tag is fixed or just rely on timers?
 		 */
 	} else {
 		NET_EPOCH_ENTER(et);
 		INP_WLOCK(inp);
 		if (!in_pcbrele_wlocked(inp)) {
 			if (!(inp->inp_flags & INP_TIMEWAIT) &&
 			    !(inp->inp_flags & INP_DROPPED)) {
 				tp = intotcpcb(inp);
 				CURVNET_SET(tp->t_vnet);
 				tp = tcp_drop(tp, ECONNABORTED);
 				CURVNET_RESTORE();
 				if (tp != NULL)
 					INP_WUNLOCK(inp);
 				counter_u64_add(ktls_ifnet_reset_dropped, 1);
 			} else
 				INP_WUNLOCK(inp);
 		}
 		NET_EPOCH_EXIT(et);
 
 		counter_u64_add(ktls_ifnet_reset_failed, 1);
 
 		/*
 		 * Leave reset_pending true to avoid future tasks while
 		 * the socket goes away.
 		 */
 	}
 
 	ktls_free(tls);
 }
 
 int
 ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls)
 {
 
 	if (inp == NULL)
 		return (ENOBUFS);
 
 	INP_LOCK_ASSERT(inp);
 
 	/*
 	 * See if we should schedule a task to update the send tag for
 	 * this session.
 	 */
 	mtx_pool_lock(mtxpool_sleep, tls);
 	if (!tls->reset_pending) {
 		(void) ktls_hold(tls);
 		in_pcbref(inp);
 		tls->inp = inp;
 		tls->reset_pending = true;
 		taskqueue_enqueue(taskqueue_thread, &tls->reset_tag_task);
 	}
 	mtx_pool_unlock(mtxpool_sleep, tls);
 	return (ENOBUFS);
 }
 
 #ifdef RATELIMIT
 int
 ktls_modify_txrtlmt(struct ktls_session *tls, uint64_t max_pacing_rate)
 {
 	union if_snd_tag_modify_params params = {
 		.rate_limit.max_rate = max_pacing_rate,
 		.rate_limit.flags = M_NOWAIT,
 	};
 	struct m_snd_tag *mst;
 
 	/* Can't get to the inp, but it should be locked. */
 	/* INP_LOCK_ASSERT(inp); */
 
 	MPASS(tls->mode == TCP_TLS_MODE_IFNET);
 
 	if (tls->snd_tag == NULL) {
 		/*
 		 * Resetting send tag, ignore this change.  The
 		 * pending reset may or may not see this updated rate
 		 * in the tcpcb.  If it doesn't, we will just lose
 		 * this rate change.
 		 */
 		return (0);
 	}
 
 	MPASS(tls->snd_tag != NULL);
 	MPASS(tls->snd_tag->sw->type == IF_SND_TAG_TYPE_TLS_RATE_LIMIT);
 
 	mst = tls->snd_tag;
 	return (mst->sw->snd_tag_modify(mst, &params));
 }
 #endif
 #endif
 
 void
 ktls_destroy(struct ktls_session *tls)
 {
 
 	if (tls->sequential_records) {
 		struct mbuf *m, *n;
 		int page_count;
 
 		STAILQ_FOREACH_SAFE(m, &tls->pending_records, m_epg_stailq, n) {
 			page_count = m->m_epg_enc_cnt;
 			while (page_count > 0) {
 				KASSERT(page_count >= m->m_epg_nrdy,
 				    ("%s: too few pages", __func__));
 				page_count -= m->m_epg_nrdy;
 				m = m_free(m);
 			}
 		}
 	}
 	ktls_cleanup(tls);
 	uma_zfree(ktls_session_zone, tls);
 }
 
 void
 ktls_seq(struct sockbuf *sb, struct mbuf *m)
 {
 
 	for (; m != NULL; m = m->m_next) {
 		KASSERT((m->m_flags & M_EXTPG) != 0,
 		    ("ktls_seq: mapped mbuf %p", m));
 
 		m->m_epg_seqno = sb->sb_tls_seqno;
 		sb->sb_tls_seqno++;
 	}
 }
 
 /*
  * Add TLS framing (headers and trailers) to a chain of mbufs.  Each
  * mbuf in the chain must be an unmapped mbuf.  The payload of the
  * mbuf must be populated with the payload of each TLS record.
  *
  * The record_type argument specifies the TLS record type used when
  * populating the TLS header.
  *
  * The enq_count argument on return is set to the number of pages of
  * payload data for this entire chain that need to be encrypted via SW
  * encryption.  The returned value should be passed to ktls_enqueue
  * when scheduling encryption of this chain of mbufs.  To handle the
  * special case of empty fragments for TLS 1.0 sessions, an empty
  * fragment counts as one page.
  */
 void
 ktls_frame(struct mbuf *top, struct ktls_session *tls, int *enq_cnt,
     uint8_t record_type)
 {
 	struct tls_record_layer *tlshdr;
 	struct mbuf *m;
 	uint64_t *noncep;
 	uint16_t tls_len;
 	int maxlen __diagused;
 
 	maxlen = tls->params.max_frame_len;
 	*enq_cnt = 0;
 	for (m = top; m != NULL; m = m->m_next) {
 		/*
 		 * All mbufs in the chain should be TLS records whose
 		 * payload does not exceed the maximum frame length.
 		 *
 		 * Empty TLS 1.0 records are permitted when using CBC.
 		 */
 		KASSERT(m->m_len <= maxlen && m->m_len >= 0 &&
 		    (m->m_len > 0 || ktls_permit_empty_frames(tls)),
 		    ("ktls_frame: m %p len %d", m, m->m_len));
 
 		/*
 		 * TLS frames require unmapped mbufs to store session
 		 * info.
 		 */
 		KASSERT((m->m_flags & M_EXTPG) != 0,
 		    ("ktls_frame: mapped mbuf %p (top = %p)", m, top));
 
 		tls_len = m->m_len;
 
 		/* Save a reference to the session. */
 		m->m_epg_tls = ktls_hold(tls);
 
 		m->m_epg_hdrlen = tls->params.tls_hlen;
 		m->m_epg_trllen = tls->params.tls_tlen;
 		if (tls->params.cipher_algorithm == CRYPTO_AES_CBC) {
 			int bs, delta;
 
 			/*
 			 * AES-CBC pads messages to a multiple of the
 			 * block size.  Note that the padding is
 			 * applied after the digest and the encryption
 			 * is done on the "plaintext || mac || padding".
 			 * At least one byte of padding is always
 			 * present.
 			 *
 			 * Compute the final trailer length assuming
 			 * at most one block of padding.
 			 * tls->params.tls_tlen is the maximum
 			 * possible trailer length (padding + digest).
 			 * delta holds the number of excess padding
 			 * bytes if the maximum were used.  Those
 			 * extra bytes are removed.
 			 */
 			bs = tls->params.tls_bs;
 			delta = (tls_len + tls->params.tls_tlen) & (bs - 1);
 			m->m_epg_trllen -= delta;
 		}
 		m->m_len += m->m_epg_hdrlen + m->m_epg_trllen;
 
 		/* Populate the TLS header. */
 		tlshdr = (void *)m->m_epg_hdr;
 		tlshdr->tls_vmajor = tls->params.tls_vmajor;
 
 		/*
 		 * TLS 1.3 masquarades as TLS 1.2 with a record type
 		 * of TLS_RLTYPE_APP.
 		 */
 		if (tls->params.tls_vminor == TLS_MINOR_VER_THREE &&
 		    tls->params.tls_vmajor == TLS_MAJOR_VER_ONE) {
 			tlshdr->tls_vminor = TLS_MINOR_VER_TWO;
 			tlshdr->tls_type = TLS_RLTYPE_APP;
 			/* save the real record type for later */
 			m->m_epg_record_type = record_type;
 			m->m_epg_trail[0] = record_type;
 		} else {
 			tlshdr->tls_vminor = tls->params.tls_vminor;
 			tlshdr->tls_type = record_type;
 		}
 		tlshdr->tls_length = htons(m->m_len - sizeof(*tlshdr));
 
 		/*
 		 * Store nonces / explicit IVs after the end of the
 		 * TLS header.
 		 *
 		 * For GCM with TLS 1.2, an 8 byte nonce is copied
 		 * from the end of the IV.  The nonce is then
 		 * incremented for use by the next record.
 		 *
 		 * For CBC, a random nonce is inserted for TLS 1.1+.
 		 */
 		if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16 &&
 		    tls->params.tls_vminor == TLS_MINOR_VER_TWO) {
 			noncep = (uint64_t *)(tls->params.iv + 8);
 			be64enc(tlshdr + 1, *noncep);
 			(*noncep)++;
 		} else if (tls->params.cipher_algorithm == CRYPTO_AES_CBC &&
 		    tls->params.tls_vminor >= TLS_MINOR_VER_ONE)
 			arc4rand(tlshdr + 1, AES_BLOCK_LEN, 0);
 
 		/*
 		 * When using SW encryption, mark the mbuf not ready.
 		 * It will be marked ready via sbready() after the
 		 * record has been encrypted.
 		 *
 		 * When using ifnet TLS, unencrypted TLS records are
 		 * sent down the stack to the NIC.
 		 */
 		if (tls->mode == TCP_TLS_MODE_SW) {
 			m->m_flags |= M_NOTREADY;
 			if (__predict_false(tls_len == 0)) {
 				/* TLS 1.0 empty fragment. */
 				m->m_epg_nrdy = 1;
 			} else
 				m->m_epg_nrdy = m->m_epg_npgs;
 			*enq_cnt += m->m_epg_nrdy;
 		}
 	}
 }
 
 bool
 ktls_permit_empty_frames(struct ktls_session *tls)
 {
 	return (tls->params.cipher_algorithm == CRYPTO_AES_CBC &&
 	    tls->params.tls_vminor == TLS_MINOR_VER_ZERO);
 }
 
 void
 ktls_check_rx(struct sockbuf *sb)
 {
 	struct tls_record_layer hdr;
 	struct ktls_wq *wq;
 	struct socket *so;
 	bool running;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	KASSERT(sb->sb_flags & SB_TLS_RX, ("%s: sockbuf %p isn't TLS RX",
 	    __func__, sb));
 	so = __containerof(sb, struct socket, so_rcv);
 
 	if (sb->sb_flags & SB_TLS_RX_RUNNING)
 		return;
 
 	/* Is there enough queued for a TLS header? */
 	if (sb->sb_tlscc < sizeof(hdr)) {
 		if ((sb->sb_state & SBS_CANTRCVMORE) != 0 && sb->sb_tlscc != 0)
 			so->so_error = EMSGSIZE;
 		return;
 	}
 
 	m_copydata(sb->sb_mtls, 0, sizeof(hdr), (void *)&hdr);
 
 	/* Is the entire record queued? */
 	if (sb->sb_tlscc < sizeof(hdr) + ntohs(hdr.tls_length)) {
 		if ((sb->sb_state & SBS_CANTRCVMORE) != 0)
 			so->so_error = EMSGSIZE;
 		return;
 	}
 
 	sb->sb_flags |= SB_TLS_RX_RUNNING;
 
 	soref(so);
 	wq = &ktls_wq[so->so_rcv.sb_tls_info->wq_index];
 	mtx_lock(&wq->mtx);
 	STAILQ_INSERT_TAIL(&wq->so_head, so, so_ktls_rx_list);
 	running = wq->running;
 	mtx_unlock(&wq->mtx);
 	if (!running)
 		wakeup(wq);
 	counter_u64_add(ktls_cnt_rx_queued, 1);
 }
 
 static struct mbuf *
 ktls_detach_record(struct sockbuf *sb, int len)
 {
 	struct mbuf *m, *n, *top;
 	int remain;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	MPASS(len <= sb->sb_tlscc);
 
 	/*
 	 * If TLS chain is the exact size of the record,
 	 * just grab the whole record.
 	 */
 	top = sb->sb_mtls;
 	if (sb->sb_tlscc == len) {
 		sb->sb_mtls = NULL;
 		sb->sb_mtlstail = NULL;
 		goto out;
 	}
 
 	/*
 	 * While it would be nice to use m_split() here, we need
 	 * to know exactly what m_split() allocates to update the
 	 * accounting, so do it inline instead.
 	 */
 	remain = len;
 	for (m = top; remain > m->m_len; m = m->m_next)
 		remain -= m->m_len;
 
 	/* Easy case: don't have to split 'm'. */
 	if (remain == m->m_len) {
 		sb->sb_mtls = m->m_next;
 		if (sb->sb_mtls == NULL)
 			sb->sb_mtlstail = NULL;
 		m->m_next = NULL;
 		goto out;
 	}
 
 	/*
 	 * Need to allocate an mbuf to hold the remainder of 'm'.  Try
 	 * with M_NOWAIT first.
 	 */
 	n = m_get(M_NOWAIT, MT_DATA);
 	if (n == NULL) {
 		/*
 		 * Use M_WAITOK with socket buffer unlocked.  If
 		 * 'sb_mtls' changes while the lock is dropped, return
 		 * NULL to force the caller to retry.
 		 */
 		SOCKBUF_UNLOCK(sb);
 
 		n = m_get(M_WAITOK, MT_DATA);
 
 		SOCKBUF_LOCK(sb);
 		if (sb->sb_mtls != top) {
 			m_free(n);
 			return (NULL);
 		}
 	}
 	n->m_flags |= M_NOTREADY;
 
 	/* Store remainder in 'n'. */
 	n->m_len = m->m_len - remain;
 	if (m->m_flags & M_EXT) {
 		n->m_data = m->m_data + remain;
 		mb_dupcl(n, m);
 	} else {
 		bcopy(mtod(m, caddr_t) + remain, mtod(n, caddr_t), n->m_len);
 	}
 
 	/* Trim 'm' and update accounting. */
 	m->m_len -= n->m_len;
 	sb->sb_tlscc -= n->m_len;
 	sb->sb_ccc -= n->m_len;
 
 	/* Account for 'n'. */
 	sballoc_ktls_rx(sb, n);
 
 	/* Insert 'n' into the TLS chain. */
 	sb->sb_mtls = n;
 	n->m_next = m->m_next;
 	if (sb->sb_mtlstail == m)
 		sb->sb_mtlstail = n;
 
 	/* Detach the record from the TLS chain. */
 	m->m_next = NULL;
 
 out:
 	MPASS(m_length(top, NULL) == len);
 	for (m = top; m != NULL; m = m->m_next)
 		sbfree_ktls_rx(sb, m);
 	sb->sb_tlsdcc = len;
 	sb->sb_ccc += len;
 	SBCHECK(sb);
 	return (top);
 }
 
 /*
  * Determine the length of the trailing zero padding and find the real
  * record type in the byte before the padding.
  *
  * Walking the mbuf chain backwards is clumsy, so another option would
  * be to scan forwards remembering the last non-zero byte before the
  * trailer.  However, it would be expensive to scan the entire record.
  * Instead, find the last non-zero byte of each mbuf in the chain
  * keeping track of the relative offset of that nonzero byte.
  *
  * trail_len is the size of the MAC/tag on input and is set to the
  * size of the full trailer including padding and the record type on
  * return.
  */
 static int
 tls13_find_record_type(struct ktls_session *tls, struct mbuf *m, int tls_len,
     int *trailer_len, uint8_t *record_typep)
 {
 	char *cp;
 	u_int digest_start, last_offset, m_len, offset;
 	uint8_t record_type;
 
 	digest_start = tls_len - *trailer_len;
 	last_offset = 0;
 	offset = 0;
 	for (; m != NULL && offset < digest_start;
 	     offset += m->m_len, m = m->m_next) {
 		/* Don't look for padding in the tag. */
 		m_len = min(digest_start - offset, m->m_len);
 		cp = mtod(m, char *);
 
 		/* Find last non-zero byte in this mbuf. */
 		while (m_len > 0 && cp[m_len - 1] == 0)
 			m_len--;
 		if (m_len > 0) {
 			record_type = cp[m_len - 1];
 			last_offset = offset + m_len;
 		}
 	}
 	if (last_offset < tls->params.tls_hlen)
 		return (EBADMSG);
 
 	*record_typep = record_type;
 	*trailer_len = tls_len - last_offset + 1;
 	return (0);
 }
 
 static void
 ktls_decrypt(struct socket *so)
 {
 	char tls_header[MBUF_PEXT_HDR_LEN];
 	struct ktls_session *tls;
 	struct sockbuf *sb;
 	struct tls_record_layer *hdr;
 	struct tls_get_record tgr;
 	struct mbuf *control, *data, *m;
 	uint64_t seqno;
 	int error, remain, tls_len, trail_len;
 	bool tls13;
 	uint8_t vminor, record_type;
 
 	hdr = (struct tls_record_layer *)tls_header;
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	KASSERT(sb->sb_flags & SB_TLS_RX_RUNNING,
 	    ("%s: socket %p not running", __func__, so));
 
 	tls = sb->sb_tls_info;
 	MPASS(tls != NULL);
 
 	tls13 = (tls->params.tls_vminor == TLS_MINOR_VER_THREE);
 	if (tls13)
 		vminor = TLS_MINOR_VER_TWO;
 	else
 		vminor = tls->params.tls_vminor;
 	for (;;) {
 		/* Is there enough queued for a TLS header? */
 		if (sb->sb_tlscc < tls->params.tls_hlen)
 			break;
 
 		m_copydata(sb->sb_mtls, 0, tls->params.tls_hlen, tls_header);
 		tls_len = sizeof(*hdr) + ntohs(hdr->tls_length);
 
 		if (hdr->tls_vmajor != tls->params.tls_vmajor ||
 		    hdr->tls_vminor != vminor)
 			error = EINVAL;
 		else if (tls13 && hdr->tls_type != TLS_RLTYPE_APP)
 			error = EINVAL;
 		else if (tls_len < tls->params.tls_hlen || tls_len >
 		    tls->params.tls_hlen + TLS_MAX_MSG_SIZE_V10_2 +
 		    tls->params.tls_tlen)
 			error = EMSGSIZE;
 		else
 			error = 0;
 		if (__predict_false(error != 0)) {
 			/*
 			 * We have a corrupted record and are likely
 			 * out of sync.  The connection isn't
 			 * recoverable at this point, so abort it.
 			 */
 			SOCKBUF_UNLOCK(sb);
 			counter_u64_add(ktls_offload_corrupted_records, 1);
 
 			CURVNET_SET(so->so_vnet);
 			so->so_proto->pr_usrreqs->pru_abort(so);
 			so->so_error = error;
 			CURVNET_RESTORE();
 			goto deref;
 		}
 
 		/* Is the entire record queued? */
 		if (sb->sb_tlscc < tls_len)
 			break;
 
 		/*
 		 * Split out the portion of the mbuf chain containing
 		 * this TLS record.
 		 */
 		data = ktls_detach_record(sb, tls_len);
 		if (data == NULL)
 			continue;
 		MPASS(sb->sb_tlsdcc == tls_len);
 
 		seqno = sb->sb_tls_seqno;
 		sb->sb_tls_seqno++;
 		SBCHECK(sb);
 		SOCKBUF_UNLOCK(sb);
 
 		error = ktls_ocf_decrypt(tls, hdr, data, seqno, &trail_len);
 		if (error == 0) {
 			if (tls13)
 				error = tls13_find_record_type(tls, data,
 				    tls_len, &trail_len, &record_type);
 			else
 				record_type = hdr->tls_type;
 		}
 		if (error) {
 			counter_u64_add(ktls_offload_failed_crypto, 1);
 
 			SOCKBUF_LOCK(sb);
 			if (sb->sb_tlsdcc == 0) {
 				/*
 				 * sbcut/drop/flush discarded these
 				 * mbufs.
 				 */
 				m_freem(data);
 				break;
 			}
 
 			/*
 			 * Drop this TLS record's data, but keep
 			 * decrypting subsequent records.
 			 */
 			sb->sb_ccc -= tls_len;
 			sb->sb_tlsdcc = 0;
 
 			CURVNET_SET(so->so_vnet);
 			so->so_error = EBADMSG;
 			sorwakeup_locked(so);
 			CURVNET_RESTORE();
 
 			m_freem(data);
 
 			SOCKBUF_LOCK(sb);
 			continue;
 		}
 
 		/* Allocate the control mbuf. */
 		memset(&tgr, 0, sizeof(tgr));
 		tgr.tls_type = record_type;
 		tgr.tls_vmajor = hdr->tls_vmajor;
 		tgr.tls_vminor = hdr->tls_vminor;
 		tgr.tls_length = htobe16(tls_len - tls->params.tls_hlen -
 		    trail_len);
-		control = sbcreatecontrol_how(&tgr, sizeof(tgr),
+		control = sbcreatecontrol(&tgr, sizeof(tgr),
 		    TLS_GET_RECORD, IPPROTO_TCP, M_WAITOK);
 
 		SOCKBUF_LOCK(sb);
 		if (sb->sb_tlsdcc == 0) {
 			/* sbcut/drop/flush discarded these mbufs. */
 			MPASS(sb->sb_tlscc == 0);
 			m_freem(data);
 			m_freem(control);
 			break;
 		}
 
 		/*
 		 * Clear the 'dcc' accounting in preparation for
 		 * adding the decrypted record.
 		 */
 		sb->sb_ccc -= tls_len;
 		sb->sb_tlsdcc = 0;
 		SBCHECK(sb);
 
 		/* If there is no payload, drop all of the data. */
 		if (tgr.tls_length == htobe16(0)) {
 			m_freem(data);
 			data = NULL;
 		} else {
 			/* Trim header. */
 			remain = tls->params.tls_hlen;
 			while (remain > 0) {
 				if (data->m_len > remain) {
 					data->m_data += remain;
 					data->m_len -= remain;
 					break;
 				}
 				remain -= data->m_len;
 				data = m_free(data);
 			}
 
 			/* Trim trailer and clear M_NOTREADY. */
 			remain = be16toh(tgr.tls_length);
 			m = data;
 			for (m = data; remain > m->m_len; m = m->m_next) {
 				m->m_flags &= ~M_NOTREADY;
 				remain -= m->m_len;
 			}
 			m->m_len = remain;
 			m_freem(m->m_next);
 			m->m_next = NULL;
 			m->m_flags &= ~M_NOTREADY;
 
 			/* Set EOR on the final mbuf. */
 			m->m_flags |= M_EOR;
 		}
 
 		sbappendcontrol_locked(sb, data, control, 0);
 	}
 
 	sb->sb_flags &= ~SB_TLS_RX_RUNNING;
 
 	if ((sb->sb_state & SBS_CANTRCVMORE) != 0 && sb->sb_tlscc > 0)
 		so->so_error = EMSGSIZE;
 
 	sorwakeup_locked(so);
 
 deref:
 	SOCKBUF_UNLOCK_ASSERT(sb);
 
 	CURVNET_SET(so->so_vnet);
 	sorele(so);
 	CURVNET_RESTORE();
 }
 
 void
 ktls_enqueue_to_free(struct mbuf *m)
 {
 	struct ktls_wq *wq;
 	bool running;
 
 	/* Mark it for freeing. */
 	m->m_epg_flags |= EPG_FLAG_2FREE;
 	wq = &ktls_wq[m->m_epg_tls->wq_index];
 	mtx_lock(&wq->mtx);
 	STAILQ_INSERT_TAIL(&wq->m_head, m, m_epg_stailq);
 	running = wq->running;
 	mtx_unlock(&wq->mtx);
 	if (!running)
 		wakeup(wq);
 }
 
 static void *
 ktls_buffer_alloc(struct ktls_wq *wq, struct mbuf *m)
 {
 	void *buf;
 	int domain, running;
 
 	if (m->m_epg_npgs <= 2)
 		return (NULL);
 	if (ktls_buffer_zone == NULL)
 		return (NULL);
 	if ((u_int)(ticks - wq->lastallocfail) < hz) {
 		/*
 		 * Rate-limit allocation attempts after a failure.
 		 * ktls_buffer_import() will acquire a per-domain mutex to check
 		 * the free page queues and may fail consistently if memory is
 		 * fragmented.
 		 */
 		return (NULL);
 	}
 	buf = uma_zalloc(ktls_buffer_zone, M_NOWAIT | M_NORECLAIM);
 	if (buf == NULL) {
 		domain = PCPU_GET(domain);
 		wq->lastallocfail = ticks;
 
 		/*
 		 * Note that this check is "racy", but the races are
 		 * harmless, and are either a spurious wakeup if
 		 * multiple threads fail allocations before the alloc
 		 * thread wakes, or waiting an extra second in case we
 		 * see an old value of running == true.
 		 */
 		if (!VM_DOMAIN_EMPTY(domain)) {
 			running = atomic_load_int(&ktls_domains[domain].alloc_td.running);
 			if (!running)
 				wakeup(&ktls_domains[domain].alloc_td);
 		}
 	}
 	return (buf);
 }
 
 static int
 ktls_encrypt_record(struct ktls_wq *wq, struct mbuf *m,
     struct ktls_session *tls, struct ktls_ocf_encrypt_state *state)
 {
 	vm_page_t pg;
 	int error, i, len, off;
 
 	KASSERT((m->m_flags & (M_EXTPG | M_NOTREADY)) == (M_EXTPG | M_NOTREADY),
 	    ("%p not unready & nomap mbuf\n", m));
 	KASSERT(ptoa(m->m_epg_npgs) <= ktls_maxlen,
 	    ("page count %d larger than maximum frame length %d", m->m_epg_npgs,
 	    ktls_maxlen));
 
 	/* Anonymous mbufs are encrypted in place. */
 	if ((m->m_epg_flags & EPG_FLAG_ANON) != 0)
 		return (ktls_ocf_encrypt(state, tls, m, NULL, 0));
 
 	/*
 	 * For file-backed mbufs (from sendfile), anonymous wired
 	 * pages are allocated and used as the encryption destination.
 	 */
 	if ((state->cbuf = ktls_buffer_alloc(wq, m)) != NULL) {
 		len = ptoa(m->m_epg_npgs - 1) + m->m_epg_last_len -
 		    m->m_epg_1st_off;
 		state->dst_iov[0].iov_base = (char *)state->cbuf +
 		    m->m_epg_1st_off;
 		state->dst_iov[0].iov_len = len;
 		state->parray[0] = DMAP_TO_PHYS((vm_offset_t)state->cbuf);
 		i = 1;
 	} else {
 		off = m->m_epg_1st_off;
 		for (i = 0; i < m->m_epg_npgs; i++, off = 0) {
 			pg = vm_page_alloc_noobj(VM_ALLOC_NODUMP |
 			    VM_ALLOC_WIRED | VM_ALLOC_WAITOK);
 			len = m_epg_pagelen(m, i, off);
 			state->parray[i] = VM_PAGE_TO_PHYS(pg);
 			state->dst_iov[i].iov_base =
 			    (char *)PHYS_TO_DMAP(state->parray[i]) + off;
 			state->dst_iov[i].iov_len = len;
 		}
 	}
 	KASSERT(i + 1 <= nitems(state->dst_iov), ("dst_iov is too small"));
 	state->dst_iov[i].iov_base = m->m_epg_trail;
 	state->dst_iov[i].iov_len = m->m_epg_trllen;
 
 	error = ktls_ocf_encrypt(state, tls, m, state->dst_iov, i + 1);
 
 	if (__predict_false(error != 0)) {
 		/* Free the anonymous pages. */
 		if (state->cbuf != NULL)
 			uma_zfree(ktls_buffer_zone, state->cbuf);
 		else {
 			for (i = 0; i < m->m_epg_npgs; i++) {
 				pg = PHYS_TO_VM_PAGE(state->parray[i]);
 				(void)vm_page_unwire_noq(pg);
 				vm_page_free(pg);
 			}
 		}
 	}
 	return (error);
 }
 
 /* Number of TLS records in a batch passed to ktls_enqueue(). */
 static u_int
 ktls_batched_records(struct mbuf *m)
 {
 	int page_count, records;
 
 	records = 0;
 	page_count = m->m_epg_enc_cnt;
 	while (page_count > 0) {
 		records++;
 		page_count -= m->m_epg_nrdy;
 		m = m->m_next;
 	}
 	KASSERT(page_count == 0, ("%s: mismatched page count", __func__));
 	return (records);
 }
 
 void
 ktls_enqueue(struct mbuf *m, struct socket *so, int page_count)
 {
 	struct ktls_session *tls;
 	struct ktls_wq *wq;
 	int queued;
 	bool running;
 
 	KASSERT(((m->m_flags & (M_EXTPG | M_NOTREADY)) ==
 	    (M_EXTPG | M_NOTREADY)),
 	    ("ktls_enqueue: %p not unready & nomap mbuf\n", m));
 	KASSERT(page_count != 0, ("enqueueing TLS mbuf with zero page count"));
 
 	KASSERT(m->m_epg_tls->mode == TCP_TLS_MODE_SW, ("!SW TLS mbuf"));
 
 	m->m_epg_enc_cnt = page_count;
 
 	/*
 	 * Save a pointer to the socket.  The caller is responsible
 	 * for taking an additional reference via soref().
 	 */
 	m->m_epg_so = so;
 
 	queued = 1;
 	tls = m->m_epg_tls;
 	wq = &ktls_wq[tls->wq_index];
 	mtx_lock(&wq->mtx);
 	if (__predict_false(tls->sequential_records)) {
 		/*
 		 * For TLS 1.0, records must be encrypted
 		 * sequentially.  For a given connection, all records
 		 * queued to the associated work queue are processed
 		 * sequentially.  However, sendfile(2) might complete
 		 * I/O requests spanning multiple TLS records out of
 		 * order.  Here we ensure TLS records are enqueued to
 		 * the work queue in FIFO order.
 		 *
 		 * tls->next_seqno holds the sequence number of the
 		 * next TLS record that should be enqueued to the work
 		 * queue.  If this next record is not tls->next_seqno,
 		 * it must be a future record, so insert it, sorted by
 		 * TLS sequence number, into tls->pending_records and
 		 * return.
 		 *
 		 * If this TLS record matches tls->next_seqno, place
 		 * it in the work queue and then check
 		 * tls->pending_records to see if any
 		 * previously-queued records are now ready for
 		 * encryption.
 		 */
 		if (m->m_epg_seqno != tls->next_seqno) {
 			struct mbuf *n, *p;
 
 			p = NULL;
 			STAILQ_FOREACH(n, &tls->pending_records, m_epg_stailq) {
 				if (n->m_epg_seqno > m->m_epg_seqno)
 					break;
 				p = n;
 			}
 			if (n == NULL)
 				STAILQ_INSERT_TAIL(&tls->pending_records, m,
 				    m_epg_stailq);
 			else if (p == NULL)
 				STAILQ_INSERT_HEAD(&tls->pending_records, m,
 				    m_epg_stailq);
 			else
 				STAILQ_INSERT_AFTER(&tls->pending_records, p, m,
 				    m_epg_stailq);
 			mtx_unlock(&wq->mtx);
 			counter_u64_add(ktls_cnt_tx_pending, 1);
 			return;
 		}
 
 		tls->next_seqno += ktls_batched_records(m);
 		STAILQ_INSERT_TAIL(&wq->m_head, m, m_epg_stailq);
 
 		while (!STAILQ_EMPTY(&tls->pending_records)) {
 			struct mbuf *n;
 
 			n = STAILQ_FIRST(&tls->pending_records);
 			if (n->m_epg_seqno != tls->next_seqno)
 				break;
 
 			queued++;
 			STAILQ_REMOVE_HEAD(&tls->pending_records, m_epg_stailq);
 			tls->next_seqno += ktls_batched_records(n);
 			STAILQ_INSERT_TAIL(&wq->m_head, n, m_epg_stailq);
 		}
 		counter_u64_add(ktls_cnt_tx_pending, -(queued - 1));
 	} else
 		STAILQ_INSERT_TAIL(&wq->m_head, m, m_epg_stailq);
 
 	running = wq->running;
 	mtx_unlock(&wq->mtx);
 	if (!running)
 		wakeup(wq);
 	counter_u64_add(ktls_cnt_tx_queued, queued);
 }
 
 /*
  * Once a file-backed mbuf (from sendfile) has been encrypted, free
  * the pages from the file and replace them with the anonymous pages
  * allocated in ktls_encrypt_record().
  */
 static void
 ktls_finish_nonanon(struct mbuf *m, struct ktls_ocf_encrypt_state *state)
 {
 	int i;
 
 	MPASS((m->m_epg_flags & EPG_FLAG_ANON) == 0);
 
 	/* Free the old pages. */
 	m->m_ext.ext_free(m);
 
 	/* Replace them with the new pages. */
 	if (state->cbuf != NULL) {
 		for (i = 0; i < m->m_epg_npgs; i++)
 			m->m_epg_pa[i] = state->parray[0] + ptoa(i);
 
 		/* Contig pages should go back to the cache. */
 		m->m_ext.ext_free = ktls_free_mext_contig;
 	} else {
 		for (i = 0; i < m->m_epg_npgs; i++)
 			m->m_epg_pa[i] = state->parray[i];
 
 		/* Use the basic free routine. */
 		m->m_ext.ext_free = mb_free_mext_pgs;
 	}
 
 	/* Pages are now writable. */
 	m->m_epg_flags |= EPG_FLAG_ANON;
 }
 
 static __noinline void
 ktls_encrypt(struct ktls_wq *wq, struct mbuf *top)
 {
 	struct ktls_ocf_encrypt_state state;
 	struct ktls_session *tls;
 	struct socket *so;
 	struct mbuf *m;
 	int error, npages, total_pages;
 
 	so = top->m_epg_so;
 	tls = top->m_epg_tls;
 	KASSERT(tls != NULL, ("tls = NULL, top = %p\n", top));
 	KASSERT(so != NULL, ("so = NULL, top = %p\n", top));
 #ifdef INVARIANTS
 	top->m_epg_so = NULL;
 #endif
 	total_pages = top->m_epg_enc_cnt;
 	npages = 0;
 
 	/*
 	 * Encrypt the TLS records in the chain of mbufs starting with
 	 * 'top'.  'total_pages' gives us a total count of pages and is
 	 * used to know when we have finished encrypting the TLS
 	 * records originally queued with 'top'.
 	 *
 	 * NB: These mbufs are queued in the socket buffer and
 	 * 'm_next' is traversing the mbufs in the socket buffer.  The
 	 * socket buffer lock is not held while traversing this chain.
 	 * Since the mbufs are all marked M_NOTREADY their 'm_next'
 	 * pointers should be stable.  However, the 'm_next' of the
 	 * last mbuf encrypted is not necessarily NULL.  It can point
 	 * to other mbufs appended while 'top' was on the TLS work
 	 * queue.
 	 *
 	 * Each mbuf holds an entire TLS record.
 	 */
 	error = 0;
 	for (m = top; npages != total_pages; m = m->m_next) {
 		KASSERT(m->m_epg_tls == tls,
 		    ("different TLS sessions in a single mbuf chain: %p vs %p",
 		    tls, m->m_epg_tls));
 		KASSERT(npages + m->m_epg_npgs <= total_pages,
 		    ("page count mismatch: top %p, total_pages %d, m %p", top,
 		    total_pages, m));
 
 		error = ktls_encrypt_record(wq, m, tls, &state);
 		if (error) {
 			counter_u64_add(ktls_offload_failed_crypto, 1);
 			break;
 		}
 
 		if ((m->m_epg_flags & EPG_FLAG_ANON) == 0)
 			ktls_finish_nonanon(m, &state);
 
 		npages += m->m_epg_nrdy;
 
 		/*
 		 * Drop a reference to the session now that it is no
 		 * longer needed.  Existing code depends on encrypted
 		 * records having no associated session vs
 		 * yet-to-be-encrypted records having an associated
 		 * session.
 		 */
 		m->m_epg_tls = NULL;
 		ktls_free(tls);
 	}
 
 	CURVNET_SET(so->so_vnet);
 	if (error == 0) {
 		(void)(*so->so_proto->pr_usrreqs->pru_ready)(so, top, npages);
 	} else {
 		so->so_proto->pr_usrreqs->pru_abort(so);
 		so->so_error = EIO;
 		mb_free_notready(top, total_pages);
 	}
 
 	sorele(so);
 	CURVNET_RESTORE();
 }
 
 void
 ktls_encrypt_cb(struct ktls_ocf_encrypt_state *state, int error)
 {
 	struct ktls_session *tls;
 	struct socket *so;
 	struct mbuf *m;
 	int npages;
 
 	m = state->m;
 
 	if ((m->m_epg_flags & EPG_FLAG_ANON) == 0)
 		ktls_finish_nonanon(m, state);
 
 	so = state->so;
 	free(state, M_KTLS);
 
 	/*
 	 * Drop a reference to the session now that it is no longer
 	 * needed.  Existing code depends on encrypted records having
 	 * no associated session vs yet-to-be-encrypted records having
 	 * an associated session.
 	 */
 	tls = m->m_epg_tls;
 	m->m_epg_tls = NULL;
 	ktls_free(tls);
 
 	if (error != 0)
 		counter_u64_add(ktls_offload_failed_crypto, 1);
 
 	CURVNET_SET(so->so_vnet);
 	npages = m->m_epg_nrdy;
 
 	if (error == 0) {
 		(void)(*so->so_proto->pr_usrreqs->pru_ready)(so, m, npages);
 	} else {
 		so->so_proto->pr_usrreqs->pru_abort(so);
 		so->so_error = EIO;
 		mb_free_notready(m, npages);
 	}
 
 	sorele(so);
 	CURVNET_RESTORE();
 }
 
 /*
  * Similar to ktls_encrypt, but used with asynchronous OCF backends
  * (coprocessors) where encryption does not use host CPU resources and
  * it can be beneficial to queue more requests than CPUs.
  */
 static __noinline void
 ktls_encrypt_async(struct ktls_wq *wq, struct mbuf *top)
 {
 	struct ktls_ocf_encrypt_state *state;
 	struct ktls_session *tls;
 	struct socket *so;
 	struct mbuf *m, *n;
 	int error, mpages, npages, total_pages;
 
 	so = top->m_epg_so;
 	tls = top->m_epg_tls;
 	KASSERT(tls != NULL, ("tls = NULL, top = %p\n", top));
 	KASSERT(so != NULL, ("so = NULL, top = %p\n", top));
 #ifdef INVARIANTS
 	top->m_epg_so = NULL;
 #endif
 	total_pages = top->m_epg_enc_cnt;
 	npages = 0;
 
 	error = 0;
 	for (m = top; npages != total_pages; m = n) {
 		KASSERT(m->m_epg_tls == tls,
 		    ("different TLS sessions in a single mbuf chain: %p vs %p",
 		    tls, m->m_epg_tls));
 		KASSERT(npages + m->m_epg_npgs <= total_pages,
 		    ("page count mismatch: top %p, total_pages %d, m %p", top,
 		    total_pages, m));
 
 		state = malloc(sizeof(*state), M_KTLS, M_WAITOK | M_ZERO);
 		soref(so);
 		state->so = so;
 		state->m = m;
 
 		mpages = m->m_epg_nrdy;
 		n = m->m_next;
 
 		error = ktls_encrypt_record(wq, m, tls, state);
 		if (error) {
 			counter_u64_add(ktls_offload_failed_crypto, 1);
 			free(state, M_KTLS);
 			CURVNET_SET(so->so_vnet);
 			sorele(so);
 			CURVNET_RESTORE();
 			break;
 		}
 
 		npages += mpages;
 	}
 
 	CURVNET_SET(so->so_vnet);
 	if (error != 0) {
 		so->so_proto->pr_usrreqs->pru_abort(so);
 		so->so_error = EIO;
 		mb_free_notready(m, total_pages - npages);
 	}
 
 	sorele(so);
 	CURVNET_RESTORE();
 }
 
 static int
 ktls_bind_domain(int domain)
 {
 	int error;
 
 	error = cpuset_setthread(curthread->td_tid, &cpuset_domain[domain]);
 	if (error != 0)
 		return (error);
 	curthread->td_domain.dr_policy = DOMAINSET_PREF(domain);
 	return (0);
 }
 
 static void
 ktls_alloc_thread(void *ctx)
 {
 	struct ktls_domain_info *ktls_domain = ctx;
 	struct ktls_alloc_thread *sc = &ktls_domain->alloc_td;
 	void **buf;
 	struct sysctl_oid *oid;
 	char name[80];
 	int domain, error, i, nbufs;
 
 	domain = ktls_domain - ktls_domains;
 	if (bootverbose)
 		printf("Starting KTLS alloc thread for domain %d\n", domain);
 	error = ktls_bind_domain(domain);
 	if (error)
 		printf("Unable to bind KTLS alloc thread for domain %d: error %d\n",
 		    domain, error);
 	snprintf(name, sizeof(name), "domain%d", domain);
 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_ipc_tls), OID_AUTO,
 	    name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "allocs",
 	    CTLFLAG_RD,  &sc->allocs, 0, "buffers allocated");
 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "wakeups",
 	    CTLFLAG_RD,  &sc->wakeups, 0, "thread wakeups");
 	SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "running",
 	    CTLFLAG_RD,  &sc->running, 0, "thread running");
 
 	buf = NULL;
 	nbufs = 0;
 	for (;;) {
 		atomic_store_int(&sc->running, 0);
 		tsleep(sc, PZERO | PNOLOCK, "-",  0);
 		atomic_store_int(&sc->running, 1);
 		sc->wakeups++;
 		if (nbufs != ktls_max_alloc) {
 			free(buf, M_KTLS);
 			nbufs = atomic_load_int(&ktls_max_alloc);
 			buf = malloc(sizeof(void *) * nbufs, M_KTLS,
 			    M_WAITOK | M_ZERO);
 		}
 		/*
 		 * Below we allocate nbufs with different allocation
 		 * flags than we use when allocating normally during
 		 * encryption in the ktls worker thread.  We specify
 		 * M_NORECLAIM in the worker thread. However, we omit
 		 * that flag here and add M_WAITOK so that the VM
 		 * system is permitted to perform expensive work to
 		 * defragment memory.  We do this here, as it does not
 		 * matter if this thread blocks.  If we block a ktls
 		 * worker thread, we risk developing backlogs of
 		 * buffers to be encrypted, leading to surges of
 		 * traffic and potential NIC output drops.
 		 */
 		for (i = 0; i < nbufs; i++) {
 			buf[i] = uma_zalloc(ktls_buffer_zone, M_WAITOK);
 			sc->allocs++;
 		}
 		for (i = 0; i < nbufs; i++) {
 			uma_zfree(ktls_buffer_zone, buf[i]);
 			buf[i] = NULL;
 		}
 	}
 }
 
 static void
 ktls_work_thread(void *ctx)
 {
 	struct ktls_wq *wq = ctx;
 	struct mbuf *m, *n;
 	struct socket *so, *son;
 	STAILQ_HEAD(, mbuf) local_m_head;
 	STAILQ_HEAD(, socket) local_so_head;
 	int cpu;
 
 	cpu = wq - ktls_wq;
 	if (bootverbose)
 		printf("Starting KTLS worker thread for CPU %d\n", cpu);
 
 	/*
 	 * Bind to a core.  If ktls_bind_threads is > 1, then
 	 * we bind to the NUMA domain instead.
 	 */
 	if (ktls_bind_threads) {
 		int error;
 
 		if (ktls_bind_threads > 1) {
 			struct pcpu *pc = pcpu_find(cpu);
 
 			error = ktls_bind_domain(pc->pc_domain);
 		} else {
 			cpuset_t mask;
 
 			CPU_SETOF(cpu, &mask);
 			error = cpuset_setthread(curthread->td_tid, &mask);
 		}
 		if (error)
 			printf("Unable to bind KTLS worker thread for CPU %d: error %d\n",
 				cpu, error);
 	}
 #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
 	fpu_kern_thread(0);
 #endif
 	for (;;) {
 		mtx_lock(&wq->mtx);
 		while (STAILQ_EMPTY(&wq->m_head) &&
 		    STAILQ_EMPTY(&wq->so_head)) {
 			wq->running = false;
 			mtx_sleep(wq, &wq->mtx, 0, "-", 0);
 			wq->running = true;
 		}
 
 		STAILQ_INIT(&local_m_head);
 		STAILQ_CONCAT(&local_m_head, &wq->m_head);
 		STAILQ_INIT(&local_so_head);
 		STAILQ_CONCAT(&local_so_head, &wq->so_head);
 		mtx_unlock(&wq->mtx);
 
 		STAILQ_FOREACH_SAFE(m, &local_m_head, m_epg_stailq, n) {
 			if (m->m_epg_flags & EPG_FLAG_2FREE) {
 				ktls_free(m->m_epg_tls);
 				m_free_raw(m);
 			} else {
 				if (m->m_epg_tls->sync_dispatch)
 					ktls_encrypt(wq, m);
 				else
 					ktls_encrypt_async(wq, m);
 				counter_u64_add(ktls_cnt_tx_queued, -1);
 			}
 		}
 
 		STAILQ_FOREACH_SAFE(so, &local_so_head, so_ktls_rx_list, son) {
 			ktls_decrypt(so);
 			counter_u64_add(ktls_cnt_rx_queued, -1);
 		}
 	}
 }
 
 #if defined(INET) || defined(INET6)
 static void
 ktls_disable_ifnet_help(void *context, int pending __unused)
 {
 	struct ktls_session *tls;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	int err;
 
 	tls = context;
 	inp = tls->inp;
 	if (inp == NULL)
 		return;
 	INP_WLOCK(inp);
 	so = inp->inp_socket;
 	MPASS(so != NULL);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		goto out;
 	}
 
 	if (so->so_snd.sb_tls_info != NULL)
 		err = ktls_set_tx_mode(so, TCP_TLS_MODE_SW);
 	else
 		err = ENXIO;
 	if (err == 0) {
 		counter_u64_add(ktls_ifnet_disable_ok, 1);
 		/* ktls_set_tx_mode() drops inp wlock, so recheck flags */
 		if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0 &&
 		    (tp = intotcpcb(inp)) != NULL &&
 		    tp->t_fb->tfb_hwtls_change != NULL)
 			(*tp->t_fb->tfb_hwtls_change)(tp, 0);
 	} else {
 		counter_u64_add(ktls_ifnet_disable_fail, 1);
 	}
 
 out:
 	sorele(so);
 	if (!in_pcbrele_wlocked(inp))
 		INP_WUNLOCK(inp);
 	ktls_free(tls);
 }
 
 /*
  * Called when re-transmits are becoming a substantial portion of the
  * sends on this connection.  When this happens, we transition the
  * connection to software TLS.  This is needed because most inline TLS
  * NICs keep crypto state only for in-order transmits.  This means
  * that to handle a TCP rexmit (which is out-of-order), the NIC must
  * re-DMA the entire TLS record up to and including the current
  * segment.  This means that when re-transmitting the last ~1448 byte
  * segment of a 16KB TLS record, we could wind up re-DMA'ing an order
  * of magnitude more data than we are sending.  This can cause the
  * PCIe link to saturate well before the network, which can cause
  * output drops, and a general loss of capacity.
  */
 void
 ktls_disable_ifnet(void *arg)
 {
 	struct tcpcb *tp;
 	struct inpcb *inp;
 	struct socket *so;
 	struct ktls_session *tls;
 
 	tp = arg;
 	inp = tp->t_inpcb;
 	INP_WLOCK_ASSERT(inp);
 	so = inp->inp_socket;
 	SOCK_LOCK(so);
 	tls = so->so_snd.sb_tls_info;
 	if (tls->disable_ifnet_pending) {
 		SOCK_UNLOCK(so);
 		return;
 	}
 
 	/*
 	 * note that disable_ifnet_pending is never cleared; disabling
 	 * ifnet can only be done once per session, so we never want
 	 * to do it again
 	 */
 
 	(void)ktls_hold(tls);
 	in_pcbref(inp);
 	soref(so);
 	tls->disable_ifnet_pending = true;
 	tls->inp = inp;
 	SOCK_UNLOCK(so);
 	TASK_INIT(&tls->disable_ifnet_task, 0, ktls_disable_ifnet_help, tls);
 	(void)taskqueue_enqueue(taskqueue_thread, &tls->disable_ifnet_task);
 }
 #endif
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index 421fa5da37d9..884562510e61 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -1,1832 +1,1825 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_kern_tls.h"
 #include "opt_param.h"
 
 #include <sys/param.h>
 #include <sys/aio.h> /* for aio_swake proto */
 #include <sys/kernel.h>
 #include <sys/ktls.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 /*
  * Function pointer set by the AIO routines so that the socket buffer code
  * can call back into the AIO module if it is loaded.
  */
 void	(*aio_swake)(struct socket *, struct sockbuf *);
 
 /*
  * Primitive routines for operating on socket buffers
  */
 
 u_long	sb_max = SB_MAX;
 u_long sb_max_adj =
        (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
 
 static	u_long sb_efficiency = 8;	/* parameter for sbreserve() */
 
 #ifdef KERN_TLS
 static void	sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m,
     struct mbuf *n);
 #endif
 static struct mbuf	*sbcut_internal(struct sockbuf *sb, int len);
 static void	sbflush_internal(struct sockbuf *sb);
 
 /*
  * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY.
  */
 static void
 sbm_clrprotoflags(struct mbuf *m, int flags)
 {
 	int mask;
 
 	mask = ~M_PROTOFLAGS;
 	if (flags & PRUS_NOTREADY)
 		mask |= M_NOTREADY;
 	while (m) {
 		m->m_flags &= mask;
 		m = m->m_next;
 	}
 }
 
 /*
  * Compress M_NOTREADY mbufs after they have been readied by sbready().
  *
  * sbcompress() skips M_NOTREADY mbufs since the data is not available to
  * be copied at the time of sbcompress().  This function combines small
  * mbufs similar to sbcompress() once mbufs are ready.  'm0' is the first
  * mbuf sbready() marked ready, and 'end' is the first mbuf still not
  * ready.
  */
 static void
 sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end)
 {
 	struct mbuf *m, *n;
 	int ext_size;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if ((sb->sb_flags & SB_NOCOALESCE) != 0)
 		return;
 
 	for (m = m0; m != end; m = m->m_next) {
 		MPASS((m->m_flags & M_NOTREADY) == 0);
 		/*
 		 * NB: In sbcompress(), 'n' is the last mbuf in the
 		 * socket buffer and 'm' is the new mbuf being copied
 		 * into the trailing space of 'n'.  Here, the roles
 		 * are reversed and 'n' is the next mbuf after 'm'
 		 * that is being copied into the trailing space of
 		 * 'm'.
 		 */
 		n = m->m_next;
 #ifdef KERN_TLS
 		/* Try to coalesce adjacent ktls mbuf hdr/trailers. */
 		if ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
 		    (m->m_flags & M_EXTPG) &&
 		    (n->m_flags & M_EXTPG) &&
 		    !mbuf_has_tls_session(m) &&
 		    !mbuf_has_tls_session(n)) {
 			int hdr_len, trail_len;
 
 			hdr_len = n->m_epg_hdrlen;
 			trail_len = m->m_epg_trllen;
 			if (trail_len != 0 && hdr_len != 0 &&
 			    trail_len + hdr_len <= MBUF_PEXT_TRAIL_LEN) {
 				/* copy n's header to m's trailer */
 				memcpy(&m->m_epg_trail[trail_len],
 				    n->m_epg_hdr, hdr_len);
 				m->m_epg_trllen += hdr_len;
 				m->m_len += hdr_len;
 				n->m_epg_hdrlen = 0;
 				n->m_len -= hdr_len;
 			}
 		}
 #endif
 
 		/* Compress small unmapped mbufs into plain mbufs. */
 		if ((m->m_flags & M_EXTPG) && m->m_len <= MLEN &&
 		    !mbuf_has_tls_session(m)) {
 			ext_size = m->m_ext.ext_size;
 			if (mb_unmapped_compress(m) == 0) {
 				sb->sb_mbcnt -= ext_size;
 				sb->sb_ccnt -= 1;
 			}
 		}
 
 		while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
 		    M_WRITABLE(m) &&
 		    (m->m_flags & M_EXTPG) == 0 &&
 		    !mbuf_has_tls_session(n) &&
 		    !mbuf_has_tls_session(m) &&
 		    n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    n->m_len <= M_TRAILINGSPACE(m) &&
 		    m->m_type == n->m_type) {
 			KASSERT(sb->sb_lastrecord != n,
 		    ("%s: merging start of record (%p) into previous mbuf (%p)",
 			    __func__, n, m));
 			m_copydata(n, 0, n->m_len, mtodo(m, m->m_len));
 			m->m_len += n->m_len;
 			m->m_next = n->m_next;
 			m->m_flags |= n->m_flags & M_EOR;
 			if (sb->sb_mbtail == n)
 				sb->sb_mbtail = m;
 
 			sb->sb_mbcnt -= MSIZE;
 			sb->sb_mcnt -= 1;
 			if (n->m_flags & M_EXT) {
 				sb->sb_mbcnt -= n->m_ext.ext_size;
 				sb->sb_ccnt -= 1;
 			}
 			m_free(n);
 			n = m->m_next;
 		}
 	}
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 }
 
 /*
  * Mark ready "count" units of I/O starting with "m".  Most mbufs
  * count as a single unit of I/O except for M_EXTPG mbufs which
  * are backed by multiple pages.
  */
 int
 sbready(struct sockbuf *sb, struct mbuf *m0, int count)
 {
 	struct mbuf *m;
 	u_int blocker;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
 	KASSERT(count > 0, ("%s: invalid count %d", __func__, count));
 
 	m = m0;
 	blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
 
 	while (count > 0) {
 		KASSERT(m->m_flags & M_NOTREADY,
 		    ("%s: m %p !M_NOTREADY", __func__, m));
 		if ((m->m_flags & M_EXTPG) != 0 && m->m_epg_npgs != 0) {
 			if (count < m->m_epg_nrdy) {
 				m->m_epg_nrdy -= count;
 				count = 0;
 				break;
 			}
 			count -= m->m_epg_nrdy;
 			m->m_epg_nrdy = 0;
 		} else
 			count--;
 
 		m->m_flags &= ~(M_NOTREADY | blocker);
 		if (blocker)
 			sb->sb_acc += m->m_len;
 		m = m->m_next;
 	}
 
 	/*
 	 * If the first mbuf is still not fully ready because only
 	 * some of its backing pages were readied, no further progress
 	 * can be made.
 	 */
 	if (m0 == m) {
 		MPASS(m->m_flags & M_NOTREADY);
 		return (EINPROGRESS);
 	}
 
 	if (!blocker) {
 		sbready_compress(sb, m0, m);
 		return (EINPROGRESS);
 	}
 
 	/* This one was blocking all the queue. */
 	for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
 		KASSERT(m->m_flags & M_BLOCKED,
 		    ("%s: m %p !M_BLOCKED", __func__, m));
 		m->m_flags &= ~M_BLOCKED;
 		sb->sb_acc += m->m_len;
 	}
 
 	sb->sb_fnrdy = m;
 	sbready_compress(sb, m0, m);
 
 	return (0);
 }
 
 /*
  * Adjust sockbuf state reflecting allocation of m.
  */
 void
 sballoc(struct sockbuf *sb, struct mbuf *m)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	sb->sb_ccc += m->m_len;
 
 	if (sb->sb_fnrdy == NULL) {
 		if (m->m_flags & M_NOTREADY)
 			sb->sb_fnrdy = m;
 		else
 			sb->sb_acc += m->m_len;
 	} else
 		m->m_flags |= M_BLOCKED;
 
 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 		sb->sb_ctl += m->m_len;
 
 	sb->sb_mbcnt += MSIZE;
 	sb->sb_mcnt += 1;
 
 	if (m->m_flags & M_EXT) {
 		sb->sb_mbcnt += m->m_ext.ext_size;
 		sb->sb_ccnt += 1;
 	}
 }
 
 /*
  * Adjust sockbuf state reflecting freeing of m.
  */
 void
 sbfree(struct sockbuf *sb, struct mbuf *m)
 {
 
 #if 0	/* XXX: not yet: soclose() call path comes here w/o lock. */
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 
 	sb->sb_ccc -= m->m_len;
 
 	if (!(m->m_flags & M_NOTAVAIL))
 		sb->sb_acc -= m->m_len;
 
 	if (m == sb->sb_fnrdy) {
 		struct mbuf *n;
 
 		KASSERT(m->m_flags & M_NOTREADY,
 		    ("%s: m %p !M_NOTREADY", __func__, m));
 
 		n = m->m_next;
 		while (n != NULL && !(n->m_flags & M_NOTREADY)) {
 			n->m_flags &= ~M_BLOCKED;
 			sb->sb_acc += n->m_len;
 			n = n->m_next;
 		}
 		sb->sb_fnrdy = n;
 	}
 
 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 		sb->sb_ctl -= m->m_len;
 
 	sb->sb_mbcnt -= MSIZE;
 	sb->sb_mcnt -= 1;
 	if (m->m_flags & M_EXT) {
 		sb->sb_mbcnt -= m->m_ext.ext_size;
 		sb->sb_ccnt -= 1;
 	}
 
 	if (sb->sb_sndptr == m) {
 		sb->sb_sndptr = NULL;
 		sb->sb_sndptroff = 0;
 	}
 	if (sb->sb_sndptroff != 0)
 		sb->sb_sndptroff -= m->m_len;
 }
 
 #ifdef KERN_TLS
 /*
  * Similar to sballoc/sbfree but does not adjust state associated with
  * the sb_mb chain such as sb_fnrdy or sb_sndptr*.  Also assumes mbufs
  * are not ready.
  */
 void
 sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	sb->sb_ccc += m->m_len;
 	sb->sb_tlscc += m->m_len;
 
 	sb->sb_mbcnt += MSIZE;
 	sb->sb_mcnt += 1;
 
 	if (m->m_flags & M_EXT) {
 		sb->sb_mbcnt += m->m_ext.ext_size;
 		sb->sb_ccnt += 1;
 	}
 }
 
 void
 sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m)
 {
 
 #if 0	/* XXX: not yet: soclose() call path comes here w/o lock. */
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 
 	sb->sb_ccc -= m->m_len;
 	sb->sb_tlscc -= m->m_len;
 
 	sb->sb_mbcnt -= MSIZE;
 	sb->sb_mcnt -= 1;
 
 	if (m->m_flags & M_EXT) {
 		sb->sb_mbcnt -= m->m_ext.ext_size;
 		sb->sb_ccnt -= 1;
 	}
 }
 #endif
 
 /*
  * Socantsendmore indicates that no more data will be sent on the socket; it
  * would normally be applied to a socket when the user informs the system
  * that no more data is to be sent, by the protocol code (in case
  * PRU_SHUTDOWN).  Socantrcvmore indicates that no more data will be
  * received, and will normally be applied to the socket by a protocol when it
  * detects that the peer will send no more data.  Data queued for reading in
  * the socket may yet be read.
  */
 void
 socantsendmore_locked(struct socket *so)
 {
 
 	SOCK_SENDBUF_LOCK_ASSERT(so);
 
 	so->so_snd.sb_state |= SBS_CANTSENDMORE;
 	sowwakeup_locked(so);
 	SOCK_SENDBUF_UNLOCK_ASSERT(so);
 }
 
 void
 socantsendmore(struct socket *so)
 {
 
 	SOCK_SENDBUF_LOCK(so);
 	socantsendmore_locked(so);
 	SOCK_SENDBUF_UNLOCK_ASSERT(so);
 }
 
 void
 socantrcvmore_locked(struct socket *so)
 {
 
 	SOCK_RECVBUF_LOCK_ASSERT(so);
 
 	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
 #ifdef KERN_TLS
 	if (so->so_rcv.sb_flags & SB_TLS_RX)
 		ktls_check_rx(&so->so_rcv);
 #endif
 	sorwakeup_locked(so);
 	SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 void
 socantrcvmore(struct socket *so)
 {
 
 	SOCK_RECVBUF_LOCK(so);
 	socantrcvmore_locked(so);
 	SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 void
 soroverflow_locked(struct socket *so)
 {
 
 	SOCK_RECVBUF_LOCK_ASSERT(so);
 
 	if (so->so_options & SO_RERROR) {
 		so->so_rerror = ENOBUFS;
 		sorwakeup_locked(so);
 	} else
 		SOCK_RECVBUF_UNLOCK(so);
 
 	SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 void
 soroverflow(struct socket *so)
 {
 
 	SOCK_RECVBUF_LOCK(so);
 	soroverflow_locked(so);
 	SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 /*
  * Wait for data to arrive at/drain from a socket buffer.
  */
 int
 sbwait(struct socket *so, sb_which which)
 {
 	struct sockbuf *sb;
 
 	SOCK_BUF_LOCK_ASSERT(so, which);
 
 	sb = sobuf(so, which);
 	sb->sb_flags |= SB_WAIT;
 	return (msleep_sbt(&sb->sb_acc, soeventmtx(so, which),
 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 	    sb->sb_timeo, 0, 0));
 }
 
 /*
  * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
  * via SIGIO if the socket has the SS_ASYNC flag set.
  *
  * Called with the socket buffer lock held; will release the lock by the end
  * of the function.  This allows the caller to acquire the socket buffer lock
  * while testing for the need for various sorts of wakeup and hold it through
  * to the point where it's no longer required.  We currently hold the lock
  * through calls out to other subsystems (with the exception of kqueue), and
  * then release it to avoid lock order issues.  It's not clear that's
  * correct.
  */
 static __always_inline void
 sowakeup(struct socket *so, const sb_which which)
 {
 	struct sockbuf *sb;
 	int ret;
 
 	SOCK_BUF_LOCK_ASSERT(so, which);
 
 	sb = sobuf(so, which);
 	selwakeuppri(sb->sb_sel, PSOCK);
 	if (!SEL_WAITING(sb->sb_sel))
 		sb->sb_flags &= ~SB_SEL;
 	if (sb->sb_flags & SB_WAIT) {
 		sb->sb_flags &= ~SB_WAIT;
 		wakeup(&sb->sb_acc);
 	}
 	KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
 	if (sb->sb_upcall != NULL) {
 		ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
 		if (ret == SU_ISCONNECTED) {
 			KASSERT(sb == &so->so_rcv,
 			    ("SO_SND upcall returned SU_ISCONNECTED"));
 			soupcall_clear(so, SO_RCV);
 		}
 	} else
 		ret = SU_OK;
 	if (sb->sb_flags & SB_AIO)
 		sowakeup_aio(so, which);
 	SOCK_BUF_UNLOCK(so, which);
 	if (ret == SU_ISCONNECTED)
 		soisconnected(so);
 	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGIO, 0);
 	SOCK_BUF_UNLOCK_ASSERT(so, which);
 }
 
 /*
  * Do we need to notify the other side when I/O is possible?
  */
 static __always_inline bool
 sb_notify(const struct sockbuf *sb)
 {
 	return ((sb->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC |
 	    SB_UPCALL | SB_AIO | SB_KNOTE)) != 0);
 }
 
 void
 sorwakeup_locked(struct socket *so)
 {
 	SOCK_RECVBUF_LOCK_ASSERT(so);
 	if (sb_notify(&so->so_rcv))
 		sowakeup(so, SO_RCV);
 	else
 		SOCK_RECVBUF_UNLOCK(so);
 }
 
 void
 sowwakeup_locked(struct socket *so)
 {
 	SOCK_SENDBUF_LOCK_ASSERT(so);
 	if (sb_notify(&so->so_snd))
 		sowakeup(so, SO_SND);
 	else
 		SOCK_SENDBUF_UNLOCK(so);
 }
 
 /*
  * Socket buffer (struct sockbuf) utility routines.
  *
  * Each socket contains two socket buffers: one for sending data and one for
  * receiving data.  Each buffer contains a queue of mbufs, information about
  * the number of mbufs and amount of data in the queue, and other fields
  * allowing select() statements and notification on data availability to be
  * implemented.
  *
  * Data stored in a socket buffer is maintained as a list of records.  Each
  * record is a list of mbufs chained together with the m_next field.  Records
  * are chained together with the m_nextpkt field. The upper level routine
  * soreceive() expects the following conventions to be observed when placing
  * information in the receive buffer:
  *
  * 1. If the protocol requires each message be preceded by the sender's name,
  *    then a record containing that name must be present before any
  *    associated data (mbuf's must be of type MT_SONAME).
  * 2. If the protocol supports the exchange of ``access rights'' (really just
  *    additional data associated with the message), and there are ``rights''
  *    to be received, then a record containing this data should be present
  *    (mbuf's must be of type MT_RIGHTS).
  * 3. If a name or rights record exists, then it must be followed by a data
  *    record, perhaps of zero length.
  *
  * Before using a new socket structure it is first necessary to reserve
  * buffer space to the socket, by calling sbreserve().  This should commit
  * some of the available buffer space in the system buffer pool for the
  * socket (currently, it does nothing but enforce limits).  The space should
  * be released by calling sbrelease() when the socket is destroyed.
  */
 int
 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
 {
 	struct thread *td = curthread;
 
 	SOCK_SENDBUF_LOCK(so);
 	SOCK_RECVBUF_LOCK(so);
 	if (sbreserve_locked(so, SO_SND, sndcc, td) == 0)
 		goto bad;
 	if (sbreserve_locked(so, SO_RCV, rcvcc, td) == 0)
 		goto bad2;
 	if (so->so_rcv.sb_lowat == 0)
 		so->so_rcv.sb_lowat = 1;
 	if (so->so_snd.sb_lowat == 0)
 		so->so_snd.sb_lowat = MCLBYTES;
 	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
 		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
 	SOCK_RECVBUF_UNLOCK(so);
 	SOCK_SENDBUF_UNLOCK(so);
 	return (0);
 bad2:
 	sbrelease_locked(so, SO_SND);
 bad:
 	SOCK_RECVBUF_UNLOCK(so);
 	SOCK_SENDBUF_UNLOCK(so);
 	return (ENOBUFS);
 }
 
 static int
 sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 	u_long tmp_sb_max = sb_max;
 
 	error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
 	if (error || !req->newptr)
 		return (error);
 	if (tmp_sb_max < MSIZE + MCLBYTES)
 		return (EINVAL);
 	sb_max = tmp_sb_max;
 	sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
 	return (0);
 }
 
 /*
  * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
  * become limiting if buffering efficiency is near the normal case.
  */
 bool
 sbreserve_locked(struct socket *so, sb_which which, u_long cc,
     struct thread *td)
 {
 	struct sockbuf *sb = sobuf(so, which);
 	rlim_t sbsize_limit;
 
 	SOCK_BUF_LOCK_ASSERT(so, which);
 
 	/*
 	 * When a thread is passed, we take into account the thread's socket
 	 * buffer size limit.  The caller will generally pass curthread, but
 	 * in the TCP input path, NULL will be passed to indicate that no
 	 * appropriate thread resource limits are available.  In that case,
 	 * we don't apply a process limit.
 	 */
 	if (cc > sb_max_adj)
 		return (false);
 	if (td != NULL) {
 		sbsize_limit = lim_cur(td, RLIMIT_SBSIZE);
 	} else
 		sbsize_limit = RLIM_INFINITY;
 	if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
 	    sbsize_limit))
 		return (false);
 	sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
 	if (sb->sb_lowat > sb->sb_hiwat)
 		sb->sb_lowat = sb->sb_hiwat;
 	return (true);
 }
 
 int
 sbsetopt(struct socket *so, int cmd, u_long cc)
 {
 	struct sockbuf *sb;
 	sb_which wh;
 	short *flags;
 	u_int *hiwat, *lowat;
 	int error;
 
 	sb = NULL;
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		switch (cmd) {
 			case SO_SNDLOWAT:
 			case SO_SNDBUF:
 				lowat = &so->sol_sbsnd_lowat;
 				hiwat = &so->sol_sbsnd_hiwat;
 				flags = &so->sol_sbsnd_flags;
 				break;
 			case SO_RCVLOWAT:
 			case SO_RCVBUF:
 				lowat = &so->sol_sbrcv_lowat;
 				hiwat = &so->sol_sbrcv_hiwat;
 				flags = &so->sol_sbrcv_flags;
 				break;
 		}
 	} else {
 		switch (cmd) {
 			case SO_SNDLOWAT:
 			case SO_SNDBUF:
 				sb = &so->so_snd;
 				wh = SO_SND;
 				break;
 			case SO_RCVLOWAT:
 			case SO_RCVBUF:
 				sb = &so->so_rcv;
 				wh = SO_RCV;
 				break;
 		}
 		flags = &sb->sb_flags;
 		hiwat = &sb->sb_hiwat;
 		lowat = &sb->sb_lowat;
 		SOCK_BUF_LOCK(so, wh);
 	}
 
 	error = 0;
 	switch (cmd) {
 	case SO_SNDBUF:
 	case SO_RCVBUF:
 		if (SOLISTENING(so)) {
 			if (cc > sb_max_adj) {
 				error = ENOBUFS;
 				break;
 			}
 			*hiwat = cc;
 			if (*lowat > *hiwat)
 				*lowat = *hiwat;
 		} else {
 			if (!sbreserve_locked(so, wh, cc, curthread))
 				error = ENOBUFS;
 		}
 		if (error == 0)
 			*flags &= ~SB_AUTOSIZE;
 		break;
 	case SO_SNDLOWAT:
 	case SO_RCVLOWAT:
 		/*
 		 * Make sure the low-water is never greater than the
 		 * high-water.
 		 */
 		*lowat = (cc > *hiwat) ? *hiwat : cc;
 		break;
 	}
 
 	if (!SOLISTENING(so))
 		SOCK_BUF_UNLOCK(so, wh);
 	SOCK_UNLOCK(so);
 	return (error);
 }
 
 /*
  * Free mbufs held by a socket, and reserved mbuf space.
  */
 static void
 sbrelease_internal(struct socket *so, sb_which which)
 {
 	struct sockbuf *sb = sobuf(so, which);
 
 	sbflush_internal(sb);
 	(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
 	    RLIM_INFINITY);
 	sb->sb_mbmax = 0;
 }
 
 void
 sbrelease_locked(struct socket *so, sb_which which)
 {
 
 	SOCK_BUF_LOCK_ASSERT(so, which);
 
 	sbrelease_internal(so, which);
 }
 
 void
 sbrelease(struct socket *so, sb_which which)
 {
 
 	SOCK_BUF_LOCK(so, which);
 	sbrelease_locked(so, which);
 	SOCK_BUF_UNLOCK(so, which);
 }
 
 void
 sbdestroy(struct socket *so, sb_which which)
 {
 #ifdef KERN_TLS
 	struct sockbuf *sb = sobuf(so, which);
 
 	if (sb->sb_tls_info != NULL)
 		ktls_free(sb->sb_tls_info);
 	sb->sb_tls_info = NULL;
 #endif
 	sbrelease_internal(so, which);
 }
 
 /*
  * Routines to add and remove data from an mbuf queue.
  *
  * The routines sbappend() or sbappendrecord() are normally called to append
  * new mbufs to a socket buffer, after checking that adequate space is
  * available, comparing the function sbspace() with the amount of data to be
  * added.  sbappendrecord() differs from sbappend() in that data supplied is
  * treated as the beginning of a new record.  To place a sender's address,
  * optional access rights, and data in a socket receive buffer,
  * sbappendaddr() should be used.  To place access rights and data in a
  * socket receive buffer, sbappendrights() should be used.  In either case,
  * the new data begins a new record.  Note that unlike sbappend() and
  * sbappendrecord(), these routines check for the caller that there will be
  * enough space to store the data.  Each fails if there is not enough space,
  * or if it cannot find mbufs to store additional information in.
  *
  * Reliable protocols may use the socket send buffer to hold data awaiting
  * acknowledgement.  Data is normally copied from a socket send buffer in a
  * protocol with m_copy for output to a peer, and then removing the data from
  * the socket buffer with sbdrop() or sbdroprecord() when the data is
  * acknowledged by the peer.
  */
 #ifdef SOCKBUF_DEBUG
 void
 sblastrecordchk(struct sockbuf *sb, const char *file, int line)
 {
 	struct mbuf *m = sb->sb_mb;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
 	if (m != sb->sb_lastrecord) {
 		printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
 			__func__, sb->sb_mb, sb->sb_lastrecord, m);
 		printf("packet chain:\n");
 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
 			printf("\t%p\n", m);
 		panic("%s from %s:%u", __func__, file, line);
 	}
 }
 
 void
 sblastmbufchk(struct sockbuf *sb, const char *file, int line)
 {
 	struct mbuf *m = sb->sb_mb;
 	struct mbuf *n;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
 	while (m && m->m_next)
 		m = m->m_next;
 
 	if (m != sb->sb_mbtail) {
 		printf("%s: sb_mb %p sb_mbtail %p last %p\n",
 			__func__, sb->sb_mb, sb->sb_mbtail, m);
 		printf("packet tree:\n");
 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
 			printf("\t");
 			for (n = m; n != NULL; n = n->m_next)
 				printf("%p ", n);
 			printf("\n");
 		}
 		panic("%s from %s:%u", __func__, file, line);
 	}
 
 #ifdef KERN_TLS
 	m = sb->sb_mtls;
 	while (m && m->m_next)
 		m = m->m_next;
 
 	if (m != sb->sb_mtlstail) {
 		printf("%s: sb_mtls %p sb_mtlstail %p last %p\n",
 			__func__, sb->sb_mtls, sb->sb_mtlstail, m);
 		printf("TLS packet tree:\n");
 		printf("\t");
 		for (m = sb->sb_mtls; m != NULL; m = m->m_next) {
 			printf("%p ", m);
 		}
 		printf("\n");
 		panic("%s from %s:%u", __func__, file, line);
 	}
 #endif
 }
 #endif /* SOCKBUF_DEBUG */
 
 #define SBLINKRECORD(sb, m0) do {					\
 	SOCKBUF_LOCK_ASSERT(sb);					\
 	if ((sb)->sb_lastrecord != NULL)				\
 		(sb)->sb_lastrecord->m_nextpkt = (m0);			\
 	else								\
 		(sb)->sb_mb = (m0);					\
 	(sb)->sb_lastrecord = (m0);					\
 } while (/*CONSTCOND*/0)
 
 /*
  * Append mbuf chain m to the last record in the socket buffer sb.  The
  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  * are discarded and mbufs are compacted where possible.
  */
 void
 sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 	struct mbuf *n;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (m == NULL)
 		return;
 	sbm_clrprotoflags(m, flags);
 	SBLASTRECORDCHK(sb);
 	n = sb->sb_mb;
 	if (n) {
 		while (n->m_nextpkt)
 			n = n->m_nextpkt;
 		do {
 			if (n->m_flags & M_EOR) {
 				sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
 				return;
 			}
 		} while (n->m_next && (n = n->m_next));
 	} else {
 		/*
 		 * XXX Would like to simply use sb_mbtail here, but
 		 * XXX I need to verify that I won't miss an EOR that
 		 * XXX way.
 		 */
 		if ((n = sb->sb_lastrecord) != NULL) {
 			do {
 				if (n->m_flags & M_EOR) {
 					sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
 					return;
 				}
 			} while (n->m_next && (n = n->m_next));
 		} else {
 			/*
 			 * If this is the first record in the socket buffer,
 			 * it's also the last record.
 			 */
 			sb->sb_lastrecord = m;
 		}
 	}
 	sbcompress(sb, m, n);
 	SBLASTRECORDCHK(sb);
 }
 
 /*
  * Append mbuf chain m to the last record in the socket buffer sb.  The
  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  * are discarded and mbufs are compacted where possible.
  */
 void
 sbappend(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappend_locked(sb, m, flags);
 	SOCKBUF_UNLOCK(sb);
 }
 
 #ifdef KERN_TLS
 /*
  * Append an mbuf containing encrypted TLS data.  The data
  * is marked M_NOTREADY until it has been decrypted and
  * stored as a TLS record.
  */
 static void
 sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m)
 {
 	struct mbuf *n;
 
 	SBLASTMBUFCHK(sb);
 
 	/* Remove all packet headers and mbuf tags to get a pure data chain. */
 	m_demote(m, 1, 0);
 
 	for (n = m; n != NULL; n = n->m_next)
 		n->m_flags |= M_NOTREADY;
 	sbcompress_ktls_rx(sb, m, sb->sb_mtlstail);
 	ktls_check_rx(sb);
 }
 #endif
 
 /*
  * This version of sbappend() should only be used when the caller absolutely
  * knows that there will never be more than one record in the socket buffer,
  * that is, a stream protocol (such as TCP).
  */
 void
 sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
 
 #ifdef KERN_TLS
 	/*
 	 * Decrypted TLS records are appended as records via
 	 * sbappendrecord().  TCP passes encrypted TLS records to this
 	 * function which must be scheduled for decryption.
 	 */
 	if (sb->sb_flags & SB_TLS_RX) {
 		sbappend_ktls_rx(sb, m);
 		return;
 	}
 #endif
 
 	KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
 
 	SBLASTMBUFCHK(sb);
 
 #ifdef KERN_TLS
 	if (sb->sb_tls_info != NULL)
 		ktls_seq(sb, m);
 #endif
 
 	/* Remove all packet headers and mbuf tags to get a pure data chain. */
 	m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
 
 	sbcompress(sb, m, sb->sb_mbtail);
 
 	sb->sb_lastrecord = sb->sb_mb;
 	SBLASTRECORDCHK(sb);
 }
 
 /*
  * This version of sbappend() should only be used when the caller absolutely
  * knows that there will never be more than one record in the socket buffer,
  * that is, a stream protocol (such as TCP).
  */
 void
 sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappendstream_locked(sb, m, flags);
 	SOCKBUF_UNLOCK(sb);
 }
 
 #ifdef SOCKBUF_DEBUG
 void
 sbcheck(struct sockbuf *sb, const char *file, int line)
 {
 	struct mbuf *m, *n, *fnrdy;
 	u_long acc, ccc, mbcnt;
 #ifdef KERN_TLS
 	u_long tlscc;
 #endif
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	acc = ccc = mbcnt = 0;
 	fnrdy = NULL;
 
 	for (m = sb->sb_mb; m; m = n) {
 	    n = m->m_nextpkt;
 	    for (; m; m = m->m_next) {
 		if (m->m_len == 0) {
 			printf("sb %p empty mbuf %p\n", sb, m);
 			goto fail;
 		}
 		if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) {
 			if (m != sb->sb_fnrdy) {
 				printf("sb %p: fnrdy %p != m %p\n",
 				    sb, sb->sb_fnrdy, m);
 				goto fail;
 			}
 			fnrdy = m;
 		}
 		if (fnrdy) {
 			if (!(m->m_flags & M_NOTAVAIL)) {
 				printf("sb %p: fnrdy %p, m %p is avail\n",
 				    sb, sb->sb_fnrdy, m);
 				goto fail;
 			}
 		} else
 			acc += m->m_len;
 		ccc += m->m_len;
 		mbcnt += MSIZE;
 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 			mbcnt += m->m_ext.ext_size;
 	    }
 	}
 #ifdef KERN_TLS
 	/*
 	 * Account for mbufs "detached" by ktls_detach_record() while
 	 * they are decrypted by ktls_decrypt().  tlsdcc gives a count
 	 * of the detached bytes that are included in ccc.  The mbufs
 	 * and clusters are not included in the socket buffer
 	 * accounting.
 	 */
 	ccc += sb->sb_tlsdcc;
 
 	tlscc = 0;
 	for (m = sb->sb_mtls; m; m = m->m_next) {
 		if (m->m_nextpkt != NULL) {
 			printf("sb %p TLS mbuf %p with nextpkt\n", sb, m);
 			goto fail;
 		}
 		if ((m->m_flags & M_NOTREADY) == 0) {
 			printf("sb %p TLS mbuf %p ready\n", sb, m);
 			goto fail;
 		}
 		tlscc += m->m_len;
 		ccc += m->m_len;
 		mbcnt += MSIZE;
 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 			mbcnt += m->m_ext.ext_size;
 	}
 
 	if (sb->sb_tlscc != tlscc) {
 		printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
 		    sb->sb_tlsdcc);
 		goto fail;
 	}
 #endif
 	if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
 		printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
 		    acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
 #ifdef KERN_TLS
 		printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
 		    sb->sb_tlsdcc);
 #endif
 		goto fail;
 	}
 	return;
 fail:
 	panic("%s from %s:%u", __func__, file, line);
 }
 #endif
 
 /*
  * As above, except the mbuf chain begins a new record.
  */
 void
 sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
 {
 	struct mbuf *m;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (m0 == NULL)
 		return;
 	m_clrprotoflags(m0);
 	/*
 	 * Put the first mbuf on the queue.  Note this permits zero length
 	 * records.
 	 */
 	sballoc(sb, m0);
 	SBLASTRECORDCHK(sb);
 	SBLINKRECORD(sb, m0);
 	sb->sb_mbtail = m0;
 	m = m0->m_next;
 	m0->m_next = 0;
 	if (m && (m0->m_flags & M_EOR)) {
 		m0->m_flags &= ~M_EOR;
 		m->m_flags |= M_EOR;
 	}
 	/* always call sbcompress() so it can do SBLASTMBUFCHK() */
 	sbcompress(sb, m, m0);
 }
 
 /*
  * As above, except the mbuf chain begins a new record.
  */
 void
 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappendrecord_locked(sb, m0);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /* Helper routine that appends data, control, and address to a sockbuf. */
 static int
 sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last)
 {
 	struct mbuf *m, *n, *nlast;
 #if MSIZE <= 256
 	if (asa->sa_len > MLEN)
 		return (0);
 #endif
 	m = m_get(M_NOWAIT, MT_SONAME);
 	if (m == NULL)
 		return (0);
 	m->m_len = asa->sa_len;
 	bcopy(asa, mtod(m, caddr_t), asa->sa_len);
 	if (m0) {
 		M_ASSERT_NO_SND_TAG(m0);
 		m_clrprotoflags(m0);
 		m_tag_delete_chain(m0, NULL);
 		/*
 		 * Clear some persistent info from pkthdr.
 		 * We don't use m_demote(), because some netgraph consumers
 		 * expect M_PKTHDR presence.
 		 */
 		m0->m_pkthdr.rcvif = NULL;
 		m0->m_pkthdr.flowid = 0;
 		m0->m_pkthdr.csum_flags = 0;
 		m0->m_pkthdr.fibnum = 0;
 		m0->m_pkthdr.rsstype = 0;
 	}
 	if (ctrl_last)
 		ctrl_last->m_next = m0;	/* concatenate data to control */
 	else
 		control = m0;
 	m->m_next = control;
 	for (n = m; n->m_next != NULL; n = n->m_next)
 		sballoc(sb, n);
 	sballoc(sb, n);
 	nlast = n;
 	SBLINKRECORD(sb, m);
 
 	sb->sb_mbtail = nlast;
 	SBLASTMBUFCHK(sb);
 
 	SBLASTRECORDCHK(sb);
 	return (1);
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data to the
  * receive queue of a socket.  If present, m0 must include a packet header
  * with total length.  Returns 0 if no space in sockbuf or insufficient
  * mbufs.
  */
 int
 sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control)
 {
 	struct mbuf *ctrl_last;
 	int space = asa->sa_len;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 		panic("sbappendaddr_locked");
 	if (m0)
 		space += m0->m_pkthdr.len;
 	space += m_length(control, &ctrl_last);
 
 	if (space > sbspace(sb))
 		return (0);
 	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data to the
  * receive queue of a socket.  If present, m0 must include a packet header
  * with total length.  Returns 0 if insufficient mbufs.  Does not validate space
  * on the receiving sockbuf.
  */
 int
 sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control)
 {
 	struct mbuf *ctrl_last;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	ctrl_last = (control == NULL) ? NULL : m_last(control);
 	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data to the
  * receive queue of a socket.  If present, m0 must include a packet header
  * with total length.  Returns 0 if no space in sockbuf or insufficient
  * mbufs.
  */
 int
 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control)
 {
 	int retval;
 
 	SOCKBUF_LOCK(sb);
 	retval = sbappendaddr_locked(sb, asa, m0, control);
 	SOCKBUF_UNLOCK(sb);
 	return (retval);
 }
 
 void
 sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
     struct mbuf *control, int flags)
 {
 	struct mbuf *m, *mlast;
 
 	sbm_clrprotoflags(m0, flags);
 	m_last(control)->m_next = m0;
 
 	SBLASTRECORDCHK(sb);
 
 	for (m = control; m->m_next; m = m->m_next)
 		sballoc(sb, m);
 	sballoc(sb, m);
 	mlast = m;
 	SBLINKRECORD(sb, control);
 
 	sb->sb_mbtail = mlast;
 	SBLASTMBUFCHK(sb);
 
 	SBLASTRECORDCHK(sb);
 }
 
 void
 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
     int flags)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappendcontrol_locked(sb, m0, control, flags);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
  * (n).  If (n) is NULL, the buffer is presumed empty.
  *
  * When the data is compressed, mbufs in the chain may be handled in one of
  * three ways:
  *
  * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
  *     record boundary, and no change in data type).
  *
  * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
  *     an mbuf already in the socket buffer.  This can occur if an
  *     appropriate mbuf exists, there is room, both mbufs are not marked as
  *     not ready, and no merging of data types will occur.
  *
  * (3) The mbuf may be appended to the end of the existing mbuf chain.
  *
  * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
  * end-of-record.
  */
 void
 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
 {
 	int eor = 0;
 	struct mbuf *o;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m) {
 		eor |= m->m_flags & M_EOR;
 		if (m->m_len == 0 &&
 		    (eor == 0 ||
 		     (((o = m->m_next) || (o = n)) &&
 		      o->m_type == m->m_type))) {
 			if (sb->sb_lastrecord == m)
 				sb->sb_lastrecord = m->m_next;
 			m = m_free(m);
 			continue;
 		}
 		if (n && (n->m_flags & M_EOR) == 0 &&
 		    M_WRITABLE(n) &&
 		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
 		    !(m->m_flags & M_NOTREADY) &&
 		    !(n->m_flags & (M_NOTREADY | M_EXTPG)) &&
 		    !mbuf_has_tls_session(m) &&
 		    !mbuf_has_tls_session(n) &&
 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    m->m_len <= M_TRAILINGSPACE(n) &&
 		    n->m_type == m->m_type) {
 			m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
 			n->m_len += m->m_len;
 			sb->sb_ccc += m->m_len;
 			if (sb->sb_fnrdy == NULL)
 				sb->sb_acc += m->m_len;
 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 				/* XXX: Probably don't need.*/
 				sb->sb_ctl += m->m_len;
 			m = m_free(m);
 			continue;
 		}
 		if (m->m_len <= MLEN && (m->m_flags & M_EXTPG) &&
 		    (m->m_flags & M_NOTREADY) == 0 &&
 		    !mbuf_has_tls_session(m))
 			(void)mb_unmapped_compress(m);
 		if (n)
 			n->m_next = m;
 		else
 			sb->sb_mb = m;
 		sb->sb_mbtail = m;
 		sballoc(sb, m);
 		n = m;
 		m->m_flags &= ~M_EOR;
 		m = m->m_next;
 		n->m_next = 0;
 	}
 	if (eor) {
 		KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
 		n->m_flags |= eor;
 	}
 	SBLASTMBUFCHK(sb);
 }
 
 #ifdef KERN_TLS
 /*
  * A version of sbcompress() for encrypted TLS RX mbufs.  These mbufs
  * are appended to the 'sb_mtls' chain instead of 'sb_mb' and are also
  * a bit simpler (no EOR markers, always MT_DATA, etc.).
  */
 static void
 sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m) {
 		KASSERT((m->m_flags & M_EOR) == 0,
 		    ("TLS RX mbuf %p with EOR", m));
 		KASSERT(m->m_type == MT_DATA,
 		    ("TLS RX mbuf %p is not MT_DATA", m));
 		KASSERT((m->m_flags & M_NOTREADY) != 0,
 		    ("TLS RX mbuf %p ready", m));
 		KASSERT((m->m_flags & M_EXTPG) == 0,
 		    ("TLS RX mbuf %p unmapped", m));
 
 		if (m->m_len == 0) {
 			m = m_free(m);
 			continue;
 		}
 
 		/*
 		 * Even though both 'n' and 'm' are NOTREADY, it's ok
 		 * to coalesce the data.
 		 */
 		if (n &&
 		    M_WRITABLE(n) &&
 		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
 		    !(n->m_flags & (M_EXTPG)) &&
 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    m->m_len <= M_TRAILINGSPACE(n)) {
 			m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
 			n->m_len += m->m_len;
 			sb->sb_ccc += m->m_len;
 			sb->sb_tlscc += m->m_len;
 			m = m_free(m);
 			continue;
 		}
 		if (n)
 			n->m_next = m;
 		else
 			sb->sb_mtls = m;
 		sb->sb_mtlstail = m;
 		sballoc_ktls_rx(sb, m);
 		n = m;
 		m = m->m_next;
 		n->m_next = NULL;
 	}
 	SBLASTMBUFCHK(sb);
 }
 #endif
 
 /*
  * Free all mbufs in a sockbuf.  Check that all resources are reclaimed.
  */
 static void
 sbflush_internal(struct sockbuf *sb)
 {
 
 	while (sb->sb_mbcnt || sb->sb_tlsdcc) {
 		/*
 		 * Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
 		 * we would loop forever. Panic instead.
 		 */
 		if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len))
 			break;
 		m_freem(sbcut_internal(sb, (int)sb->sb_ccc));
 	}
 	KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
 	    ("%s: ccc %u mb %p mbcnt %u", __func__,
 	    sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
 }
 
 void
 sbflush_locked(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	sbflush_internal(sb);
 }
 
 void
 sbflush(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbflush_locked(sb);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Cut data from (the front of) a sockbuf.
  */
 static struct mbuf *
 sbcut_internal(struct sockbuf *sb, int len)
 {
 	struct mbuf *m, *next, *mfree;
 	bool is_tls;
 
 	KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0",
 	    __func__, len));
 	KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u",
 	    __func__, len, sb->sb_ccc));
 
 	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 	is_tls = false;
 	mfree = NULL;
 
 	while (len > 0) {
 		if (m == NULL) {
 #ifdef KERN_TLS
 			if (next == NULL && !is_tls) {
 				if (sb->sb_tlsdcc != 0) {
 					MPASS(len >= sb->sb_tlsdcc);
 					len -= sb->sb_tlsdcc;
 					sb->sb_ccc -= sb->sb_tlsdcc;
 					sb->sb_tlsdcc = 0;
 					if (len == 0)
 						break;
 				}
 				next = sb->sb_mtls;
 				is_tls = true;
 			}
 #endif
 			KASSERT(next, ("%s: no next, len %d", __func__, len));
 			m = next;
 			next = m->m_nextpkt;
 		}
 		if (m->m_len > len) {
 			KASSERT(!(m->m_flags & M_NOTAVAIL),
 			    ("%s: m %p M_NOTAVAIL", __func__, m));
 			m->m_len -= len;
 			m->m_data += len;
 			sb->sb_ccc -= len;
 			sb->sb_acc -= len;
 			if (sb->sb_sndptroff != 0)
 				sb->sb_sndptroff -= len;
 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 				sb->sb_ctl -= len;
 			break;
 		}
 		len -= m->m_len;
 #ifdef KERN_TLS
 		if (is_tls)
 			sbfree_ktls_rx(sb, m);
 		else
 #endif
 			sbfree(sb, m);
 		/*
 		 * Do not put M_NOTREADY buffers to the free list, they
 		 * are referenced from outside.
 		 */
 		if (m->m_flags & M_NOTREADY && !is_tls)
 			m = m->m_next;
 		else {
 			struct mbuf *n;
 
 			n = m->m_next;
 			m->m_next = mfree;
 			mfree = m;
 			m = n;
 		}
 	}
 	/*
 	 * Free any zero-length mbufs from the buffer.
 	 * For SOCK_DGRAM sockets such mbufs represent empty records.
 	 * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer,
 	 * when sosend_generic() needs to send only control data.
 	 */
 	while (m && m->m_len == 0) {
 		struct mbuf *n;
 
 		sbfree(sb, m);
 		n = m->m_next;
 		m->m_next = mfree;
 		mfree = m;
 		m = n;
 	}
 #ifdef KERN_TLS
 	if (is_tls) {
 		sb->sb_mb = NULL;
 		sb->sb_mtls = m;
 		if (m == NULL)
 			sb->sb_mtlstail = NULL;
 	} else
 #endif
 	if (m) {
 		sb->sb_mb = m;
 		m->m_nextpkt = next;
 	} else
 		sb->sb_mb = next;
 	/*
 	 * First part is an inline SB_EMPTY_FIXUP().  Second part makes sure
 	 * sb_lastrecord is up-to-date if we dropped part of the last record.
 	 */
 	m = sb->sb_mb;
 	if (m == NULL) {
 		sb->sb_mbtail = NULL;
 		sb->sb_lastrecord = NULL;
 	} else if (m->m_nextpkt == NULL) {
 		sb->sb_lastrecord = m;
 	}
 
 	return (mfree);
 }
 
 /*
  * Drop data from (the front of) a sockbuf.
  */
 void
 sbdrop_locked(struct sockbuf *sb, int len)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	m_freem(sbcut_internal(sb, len));
 }
 
 /*
  * Drop data from (the front of) a sockbuf,
  * and return it to caller.
  */
 struct mbuf *
 sbcut_locked(struct sockbuf *sb, int len)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	return (sbcut_internal(sb, len));
 }
 
 void
 sbdrop(struct sockbuf *sb, int len)
 {
 	struct mbuf *mfree;
 
 	SOCKBUF_LOCK(sb);
 	mfree = sbcut_internal(sb, len);
 	SOCKBUF_UNLOCK(sb);
 
 	m_freem(mfree);
 }
 
 struct mbuf *
 sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff)
 {
 	struct mbuf *m;
 
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
 	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
 		*moff = off;
 		if (sb->sb_sndptr == NULL) {
 			sb->sb_sndptr = sb->sb_mb;
 			sb->sb_sndptroff = 0;
 		}
 		return (sb->sb_mb);
 	} else {
 		m = sb->sb_sndptr;
 		off -= sb->sb_sndptroff;
 	}
 	*moff = off;
 	return (m);
 }
 
 void
 sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len)
 {
 	/*
 	 * A small copy was done, advance forward the sb_sbsndptr to cover
 	 * it.
 	 */
 	struct mbuf *m;
 
 	if (mb != sb->sb_sndptr) {
 		/* Did not copyout at the same mbuf */
 		return;
 	}
 	m = mb;
 	while (m && (len > 0)) {
 		if (len >= m->m_len) {
 			len -= m->m_len;
 			if (m->m_next) {
 				sb->sb_sndptroff += m->m_len;
 				sb->sb_sndptr = m->m_next;
 			}
 			m = m->m_next;
 		} else {
 			len = 0;
 		}
 	}
 }
 
 /*
  * Return the first mbuf and the mbuf data offset for the provided
  * send offset without changing the "sb_sndptroff" field.
  */
 struct mbuf *
 sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff)
 {
 	struct mbuf *m;
 
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
 
 	/*
 	 * If the "off" is below the stored offset, which happens on
 	 * retransmits, just use "sb_mb":
 	 */
 	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
 		m = sb->sb_mb;
 	} else {
 		m = sb->sb_sndptr;
 		off -= sb->sb_sndptroff;
 	}
 	while (off > 0 && m != NULL) {
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	*moff = off;
 	return (m);
 }
 
 /*
  * Drop a record off the front of a sockbuf and move the next record to the
  * front.
  */
 void
 sbdroprecord_locked(struct sockbuf *sb)
 {
 	struct mbuf *m;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	m = sb->sb_mb;
 	if (m) {
 		sb->sb_mb = m->m_nextpkt;
 		do {
 			sbfree(sb, m);
 			m = m_free(m);
 		} while (m);
 	}
 	SB_EMPTY_FIXUP(sb);
 }
 
 /*
  * Drop a record off the front of a sockbuf and move the next record to the
  * front.
  */
 void
 sbdroprecord(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbdroprecord_locked(sb);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Create a "control" mbuf containing the specified data with the specified
  * type for presentation on a socket buffer.
  */
 struct mbuf *
-sbcreatecontrol_how(void *p, int size, int type, int level, int wait)
+sbcreatecontrol(void *p, int size, int type, int level, int wait)
 {
 	struct cmsghdr *cp;
 	struct mbuf *m;
 
 	MBUF_CHECKSLEEP(wait);
 	if (CMSG_SPACE((u_int)size) > MCLBYTES)
 		return ((struct mbuf *) NULL);
 	if (CMSG_SPACE((u_int)size) > MLEN)
 		m = m_getcl(wait, MT_CONTROL, 0);
 	else
 		m = m_get(wait, MT_CONTROL);
 	if (m == NULL)
 		return ((struct mbuf *) NULL);
 	cp = mtod(m, struct cmsghdr *);
 	m->m_len = 0;
 	KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
 	    ("sbcreatecontrol: short mbuf"));
 	/*
 	 * Don't leave the padding between the msg header and the
 	 * cmsg data and the padding after the cmsg data un-initialized.
 	 */
 	bzero(cp, CMSG_SPACE((u_int)size));
 	if (p != NULL)
 		(void)memcpy(CMSG_DATA(cp), p, size);
 	m->m_len = CMSG_SPACE(size);
 	cp->cmsg_len = CMSG_LEN(size);
 	cp->cmsg_level = level;
 	cp->cmsg_type = type;
 	return (m);
 }
 
-struct mbuf *
-sbcreatecontrol(caddr_t p, int size, int type, int level)
-{
-
-	return (sbcreatecontrol_how(p, size, type, level, M_NOWAIT));
-}
-
 /*
  * This does the same for socket buffers that sotoxsocket does for sockets:
  * generate an user-format data structure describing the socket buffer.  Note
  * that the xsockbuf structure, since it is always embedded in a socket, does
  * not include a self pointer nor a length.  We make this entry point public
  * in case some other mechanism needs it.
  */
 void
 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 {
 
 	xsb->sb_cc = sb->sb_ccc;
 	xsb->sb_hiwat = sb->sb_hiwat;
 	xsb->sb_mbcnt = sb->sb_mbcnt;
 	xsb->sb_mcnt = sb->sb_mcnt;	
 	xsb->sb_ccnt = sb->sb_ccnt;
 	xsb->sb_mbmax = sb->sb_mbmax;
 	xsb->sb_lowat = sb->sb_lowat;
 	xsb->sb_flags = sb->sb_flags;
 	xsb->sb_timeo = sb->sb_timeo;
 }
 
 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
 static int dummy;
 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW | CTLFLAG_SKIP, &dummy, 0, "");
 SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf,
     CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, &sb_max, 0,
     sysctl_handle_sb_max, "LU",
     "Maximum socket buffer size");
 SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
     &sb_efficiency, 0, "Socket buffer size waste factor");
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index e89d7567fcda..f08b972c6de6 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1,2974 +1,2974 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California. All Rights Reserved.
  * Copyright (c) 2004-2009 Robert N. M. Watson All Rights Reserved.
  * Copyright (c) 2018 Matthew Macy
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
  */
 
 /*
  * UNIX Domain (Local) Sockets
  *
  * This is an implementation of UNIX (local) domain sockets.  Each socket has
  * an associated struct unpcb (UNIX protocol control block).  Stream sockets
  * may be connected to 0 or 1 other socket.  Datagram sockets may be
  * connected to 0, 1, or many other sockets.  Sockets may be created and
  * connected in pairs (socketpair(2)), or bound/connected to using the file
  * system name space.  For most purposes, only the receive socket buffer is
  * used, as sending on one socket delivers directly to the receive socket
  * buffer of a second socket.
  *
  * The implementation is substantially complicated by the fact that
  * "ancillary data", such as file descriptors or credentials, may be passed
  * across UNIX domain sockets.  The potential for passing UNIX domain sockets
  * over other UNIX domain sockets requires the implementation of a simple
  * garbage collector to find and tear down cycles of disconnected sockets.
  *
  * TODO:
  *	RDM
  *	rethink name space problems
  *	need a proper out-of-band
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/vnode.h>
 
 #include <net/vnet.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 MALLOC_DECLARE(M_FILECAPS);
 
 /*
  * See unpcb.h for the locking key.
  */
 
 static uma_zone_t	unp_zone;
 static unp_gen_t	unp_gencnt;	/* (l) */
 static u_int		unp_count;	/* (l) Count of local sockets. */
 static ino_t		unp_ino;	/* Prototype for fake inode numbers. */
 static int		unp_rights;	/* (g) File descriptors in flight. */
 static struct unp_head	unp_shead;	/* (l) List of stream sockets. */
 static struct unp_head	unp_dhead;	/* (l) List of datagram sockets. */
 static struct unp_head	unp_sphead;	/* (l) List of seqpacket sockets. */
 
 struct unp_defer {
 	SLIST_ENTRY(unp_defer) ud_link;
 	struct file *ud_fp;
 };
 static SLIST_HEAD(, unp_defer) unp_defers;
 static int unp_defers_count;
 
 static const struct sockaddr	sun_noname = { sizeof(sun_noname), AF_LOCAL };
 
 /*
  * Garbage collection of cyclic file descriptor/socket references occurs
  * asynchronously in a taskqueue context in order to avoid recursion and
  * reentrance in the UNIX domain socket, file descriptor, and socket layer
  * code.  See unp_gc() for a full description.
  */
 static struct timeout_task unp_gc_task;
 
 /*
  * The close of unix domain sockets attached as SCM_RIGHTS is
  * postponed to the taskqueue, to avoid arbitrary recursion depth.
  * The attached sockets might have another sockets attached.
  */
 static struct task	unp_defer_task;
 
 /*
  * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
  * stream sockets, although the total for sender and receiver is actually
  * only PIPSIZ.
  *
  * Datagram sockets really use the sendspace as the maximum datagram size,
  * and don't really want to reserve the sendspace.  Their recvspace should be
  * large enough for at least one max-size datagram plus address.
  */
 #ifndef PIPSIZ
 #define	PIPSIZ	8192
 #endif
 static u_long	unpst_sendspace = PIPSIZ;
 static u_long	unpst_recvspace = PIPSIZ;
 static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
 static u_long	unpdg_recvspace = 16*1024;	/* support 8KB syslog msgs */
 static u_long	unpsp_sendspace = PIPSIZ;	/* really max datagram size */
 static u_long	unpsp_recvspace = PIPSIZ;
 
 static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Local domain");
 static SYSCTL_NODE(_net_local, SOCK_STREAM, stream,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SOCK_STREAM");
 static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SOCK_DGRAM");
 static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SOCK_SEQPACKET");
 
 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
 	   &unpst_sendspace, 0, "Default stream send space.");
 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpst_recvspace, 0, "Default stream receive space.");
 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
 	   &unpdg_sendspace, 0, "Default datagram send space.");
 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpdg_recvspace, 0, "Default datagram receive space.");
 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW,
 	   &unpsp_sendspace, 0, "Default seqpacket send space.");
 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpsp_recvspace, 0, "Default seqpacket receive space.");
 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
     "File descriptors in flight.");
 SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
     &unp_defers_count, 0,
     "File descriptors deferred to taskqueue for close.");
 
 /*
  * Locking and synchronization:
  *
  * Several types of locks exist in the local domain socket implementation:
  * - a global linkage lock
  * - a global connection list lock
  * - the mtxpool lock
  * - per-unpcb mutexes
  *
  * The linkage lock protects the global socket lists, the generation number
  * counter and garbage collector state.
  *
  * The connection list lock protects the list of referring sockets in a datagram
  * socket PCB.  This lock is also overloaded to protect a global list of
  * sockets whose buffers contain socket references in the form of SCM_RIGHTS
  * messages.  To avoid recursion, such references are released by a dedicated
  * thread.
  *
  * The mtxpool lock protects the vnode from being modified while referenced.
  * Lock ordering rules require that it be acquired before any PCB locks.
  *
  * The unpcb lock (unp_mtx) protects the most commonly referenced fields in the
  * unpcb.  This includes the unp_conn field, which either links two connected
  * PCBs together (for connected socket types) or points at the destination
  * socket (for connectionless socket types).  The operations of creating or
  * destroying a connection therefore involve locking multiple PCBs.  To avoid
  * lock order reversals, in some cases this involves dropping a PCB lock and
  * using a reference counter to maintain liveness.
  *
  * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
  * allocated in pru_attach() and freed in pru_detach().  The validity of that
  * pointer is an invariant, so no lock is required to dereference the so_pcb
  * pointer if a valid socket reference is held by the caller.  In practice,
  * this is always true during operations performed on a socket.  Each unpcb
  * has a back-pointer to its socket, unp_socket, which will be stable under
  * the same circumstances.
  *
  * This pointer may only be safely dereferenced as long as a valid reference
  * to the unpcb is held.  Typically, this reference will be from the socket,
  * or from another unpcb when the referring unpcb's lock is held (in order
  * that the reference not be invalidated during use).  For example, to follow
  * unp->unp_conn->unp_socket, you need to hold a lock on unp_conn to guarantee
  * that detach is not run clearing unp_socket.
  *
  * Blocking with UNIX domain sockets is a tricky issue: unlike most network
  * protocols, bind() is a non-atomic operation, and connect() requires
  * potential sleeping in the protocol, due to potentially waiting on local or
  * distributed file systems.  We try to separate "lookup" operations, which
  * may sleep, and the IPC operations themselves, which typically can occur
  * with relative atomicity as locks can be held over the entire operation.
  *
  * Another tricky issue is simultaneous multi-threaded or multi-process
  * access to a single UNIX domain socket.  These are handled by the flags
  * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or
  * binding, both of which involve dropping UNIX domain socket locks in order
  * to perform namei() and other file system operations.
  */
 static struct rwlock	unp_link_rwlock;
 static struct mtx	unp_defers_lock;
 
 #define	UNP_LINK_LOCK_INIT()		rw_init(&unp_link_rwlock,	\
 					    "unp_link_rwlock")
 
 #define	UNP_LINK_LOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
 					    RA_LOCKED)
 #define	UNP_LINK_UNLOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
 					    RA_UNLOCKED)
 
 #define	UNP_LINK_RLOCK()		rw_rlock(&unp_link_rwlock)
 #define	UNP_LINK_RUNLOCK()		rw_runlock(&unp_link_rwlock)
 #define	UNP_LINK_WLOCK()		rw_wlock(&unp_link_rwlock)
 #define	UNP_LINK_WUNLOCK()		rw_wunlock(&unp_link_rwlock)
 #define	UNP_LINK_WLOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
 					    RA_WLOCKED)
 #define	UNP_LINK_WOWNED()		rw_wowned(&unp_link_rwlock)
 
 #define	UNP_DEFERRED_LOCK_INIT()	mtx_init(&unp_defers_lock, \
 					    "unp_defer", NULL, MTX_DEF)
 #define	UNP_DEFERRED_LOCK()		mtx_lock(&unp_defers_lock)
 #define	UNP_DEFERRED_UNLOCK()		mtx_unlock(&unp_defers_lock)
 
 #define UNP_REF_LIST_LOCK()		UNP_DEFERRED_LOCK();
 #define UNP_REF_LIST_UNLOCK()		UNP_DEFERRED_UNLOCK();
 
 #define UNP_PCB_LOCK_INIT(unp)		mtx_init(&(unp)->unp_mtx,	\
 					    "unp", "unp",	\
 					    MTX_DUPOK|MTX_DEF)
 #define	UNP_PCB_LOCK_DESTROY(unp)	mtx_destroy(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCKPTR(unp)		(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCK(unp)		mtx_lock(&(unp)->unp_mtx)
 #define	UNP_PCB_TRYLOCK(unp)		mtx_trylock(&(unp)->unp_mtx)
 #define	UNP_PCB_UNLOCK(unp)		mtx_unlock(&(unp)->unp_mtx)
 #define	UNP_PCB_OWNED(unp)		mtx_owned(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_OWNED)
 #define	UNP_PCB_UNLOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_NOTOWNED)
 
 static int	uipc_connect2(struct socket *, struct socket *);
 static int	uipc_ctloutput(struct socket *, struct sockopt *);
 static int	unp_connect(struct socket *, struct sockaddr *,
 		    struct thread *);
 static int	unp_connectat(int, struct socket *, struct sockaddr *,
 		    struct thread *);
 static void	unp_connect2(struct socket *so, struct socket *so2, int);
 static void	unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
 static void	unp_dispose(struct socket *so);
 static void	unp_shutdown(struct unpcb *);
 static void	unp_drop(struct unpcb *);
 static void	unp_gc(__unused void *, int);
 static void	unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
 static void	unp_discard(struct file *);
 static void	unp_freerights(struct filedescent **, int);
 static int	unp_internalize(struct mbuf **, struct thread *);
 static void	unp_internalize_fp(struct file *);
 static int	unp_externalize(struct mbuf *, struct mbuf **, int);
 static int	unp_externalize_fp(struct file *);
 static struct mbuf	*unp_addsockcred(struct thread *, struct mbuf *, int);
 static void	unp_process_defers(void * __unused, int);
 
 static void
 unp_pcb_hold(struct unpcb *unp)
 {
 	u_int old __unused;
 
 	old = refcount_acquire(&unp->unp_refcount);
 	KASSERT(old > 0, ("%s: unpcb %p has no references", __func__, unp));
 }
 
 static __result_use_check bool
 unp_pcb_rele(struct unpcb *unp)
 {
 	bool ret;
 
 	UNP_PCB_LOCK_ASSERT(unp);
 
 	if ((ret = refcount_release(&unp->unp_refcount))) {
 		UNP_PCB_UNLOCK(unp);
 		UNP_PCB_LOCK_DESTROY(unp);
 		uma_zfree(unp_zone, unp);
 	}
 	return (ret);
 }
 
 static void
 unp_pcb_rele_notlast(struct unpcb *unp)
 {
 	bool ret __unused;
 
 	ret = refcount_release(&unp->unp_refcount);
 	KASSERT(!ret, ("%s: unpcb %p has no references", __func__, unp));
 }
 
 static void
 unp_pcb_lock_pair(struct unpcb *unp, struct unpcb *unp2)
 {
 	UNP_PCB_UNLOCK_ASSERT(unp);
 	UNP_PCB_UNLOCK_ASSERT(unp2);
 
 	if (unp == unp2) {
 		UNP_PCB_LOCK(unp);
 	} else if ((uintptr_t)unp2 > (uintptr_t)unp) {
 		UNP_PCB_LOCK(unp);
 		UNP_PCB_LOCK(unp2);
 	} else {
 		UNP_PCB_LOCK(unp2);
 		UNP_PCB_LOCK(unp);
 	}
 }
 
 static void
 unp_pcb_unlock_pair(struct unpcb *unp, struct unpcb *unp2)
 {
 	UNP_PCB_UNLOCK(unp);
 	if (unp != unp2)
 		UNP_PCB_UNLOCK(unp2);
 }
 
 /*
  * Try to lock the connected peer of an already locked socket.  In some cases
  * this requires that we unlock the current socket.  The pairbusy counter is
  * used to block concurrent connection attempts while the lock is dropped.  The
  * caller must be careful to revalidate PCB state.
  */
 static struct unpcb *
 unp_pcb_lock_peer(struct unpcb *unp)
 {
 	struct unpcb *unp2;
 
 	UNP_PCB_LOCK_ASSERT(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 == NULL)
 		return (NULL);
 	if (__predict_false(unp == unp2))
 		return (unp);
 
 	UNP_PCB_UNLOCK_ASSERT(unp2);
 
 	if (__predict_true(UNP_PCB_TRYLOCK(unp2)))
 		return (unp2);
 	if ((uintptr_t)unp2 > (uintptr_t)unp) {
 		UNP_PCB_LOCK(unp2);
 		return (unp2);
 	}
 	unp->unp_pairbusy++;
 	unp_pcb_hold(unp2);
 	UNP_PCB_UNLOCK(unp);
 
 	UNP_PCB_LOCK(unp2);
 	UNP_PCB_LOCK(unp);
 	KASSERT(unp->unp_conn == unp2 || unp->unp_conn == NULL,
 	    ("%s: socket %p was reconnected", __func__, unp));
 	if (--unp->unp_pairbusy == 0 && (unp->unp_flags & UNP_WAITING) != 0) {
 		unp->unp_flags &= ~UNP_WAITING;
 		wakeup(unp);
 	}
 	if (unp_pcb_rele(unp2)) {
 		/* unp2 is unlocked. */
 		return (NULL);
 	}
 	if (unp->unp_conn == NULL) {
 		UNP_PCB_UNLOCK(unp2);
 		return (NULL);
 	}
 	return (unp2);
 }
 
 /*
  * Definitions of protocols supported in the LOCAL domain.
  */
 static struct domain localdomain;
 static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream;
 static struct pr_usrreqs uipc_usrreqs_seqpacket;
 static struct protosw localsw[] = {
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&localdomain,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS|
 				    PR_CAPATTACH,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_stream
 },
 {
 	.pr_type =		SOCK_DGRAM,
 	.pr_domain =		&localdomain,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_RIGHTS|PR_CAPATTACH,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_dgram
 },
 {
 	.pr_type =		SOCK_SEQPACKET,
 	.pr_domain =		&localdomain,
 
 	/*
 	 * XXXRW: For now, PR_ADDR because soreceive will bump into them
 	 * due to our use of sbappendaddr.  A new sbappend variants is needed
 	 * that supports both atomic record writes and control data.
 	 */
 	.pr_flags =		PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|
 				    PR_WANTRCVD|PR_RIGHTS|PR_CAPATTACH,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_seqpacket,
 },
 };
 
 static struct domain localdomain = {
 	.dom_family =		AF_LOCAL,
 	.dom_name =		"local",
 	.dom_externalize =	unp_externalize,
 	.dom_dispose =		unp_dispose,
 	.dom_protosw =		localsw,
 	.dom_protoswNPROTOSW =	&localsw[nitems(localsw)]
 };
 DOMAIN_SET(local);
 
 static void
 uipc_abort(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
 	UNP_PCB_UNLOCK_ASSERT(unp);
 
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		unp_pcb_hold(unp2);
 		UNP_PCB_UNLOCK(unp);
 		unp_drop(unp2);
 	} else
 		UNP_PCB_UNLOCK(unp);
 }
 
 static int
 uipc_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *sa;
 
 	/*
 	 * Pass back name of connected socket, if it was bound and we are
 	 * still connected (our peer may have closed already!).
 	 */
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_PCB_LOCK(unp);
 	unp2 = unp_pcb_lock_peer(unp);
 	if (unp2 != NULL && unp2->unp_addr != NULL)
 		sa = (struct sockaddr *)unp2->unp_addr;
 	else
 		sa = &sun_noname;
 	bcopy(sa, *nam, sa->sa_len);
 	if (unp2 != NULL)
 		unp_pcb_unlock_pair(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_attach(struct socket *so, int proto, struct thread *td)
 {
 	u_long sendspace, recvspace;
 	struct unpcb *unp;
 	int error;
 	bool locked;
 
 	KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		switch (so->so_type) {
 		case SOCK_STREAM:
 			sendspace = unpst_sendspace;
 			recvspace = unpst_recvspace;
 			break;
 
 		case SOCK_DGRAM:
 			sendspace = unpdg_sendspace;
 			recvspace = unpdg_recvspace;
 			break;
 
 		case SOCK_SEQPACKET:
 			sendspace = unpsp_sendspace;
 			recvspace = unpsp_recvspace;
 			break;
 
 		default:
 			panic("uipc_attach");
 		}
 		error = soreserve(so, sendspace, recvspace);
 		if (error)
 			return (error);
 	}
 	unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
 	if (unp == NULL)
 		return (ENOBUFS);
 	LIST_INIT(&unp->unp_refs);
 	UNP_PCB_LOCK_INIT(unp);
 	unp->unp_socket = so;
 	so->so_pcb = unp;
 	refcount_init(&unp->unp_refcount, 1);
 
 	if ((locked = UNP_LINK_WOWNED()) == false)
 		UNP_LINK_WLOCK();
 
 	unp->unp_gencnt = ++unp_gencnt;
 	unp->unp_ino = ++unp_ino;
 	unp_count++;
 	switch (so->so_type) {
 	case SOCK_STREAM:
 		LIST_INSERT_HEAD(&unp_shead, unp, unp_link);
 		break;
 
 	case SOCK_DGRAM:
 		LIST_INSERT_HEAD(&unp_dhead, unp, unp_link);
 		break;
 
 	case SOCK_SEQPACKET:
 		LIST_INSERT_HEAD(&unp_sphead, unp, unp_link);
 		break;
 
 	default:
 		panic("uipc_attach");
 	}
 
 	if (locked == false)
 		UNP_LINK_WUNLOCK();
 
 	return (0);
 }
 
 static int
 uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 	struct vattr vattr;
 	int error, namelen;
 	struct nameidata nd;
 	struct unpcb *unp;
 	struct vnode *vp;
 	struct mount *mp;
 	cap_rights_t rights;
 	char *buf;
 
 	if (nam->sa_family != AF_UNIX)
 		return (EAFNOSUPPORT);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
 
 	if (soun->sun_len > sizeof(struct sockaddr_un))
 		return (EINVAL);
 	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
 	if (namelen <= 0)
 		return (EINVAL);
 
 	/*
 	 * We don't allow simultaneous bind() calls on a single UNIX domain
 	 * socket, so flag in-progress operations, and return an error if an
 	 * operation is already in progress.
 	 *
 	 * Historically, we have not allowed a socket to be rebound, so this
 	 * also returns an error.  Not allowing re-binding simplifies the
 	 * implementation and avoids a great many possible failure modes.
 	 */
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode != NULL) {
 		UNP_PCB_UNLOCK(unp);
 		return (EINVAL);
 	}
 	if (unp->unp_flags & UNP_BINDING) {
 		UNP_PCB_UNLOCK(unp);
 		return (EALREADY);
 	}
 	unp->unp_flags |= UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 
 	buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
 	bcopy(soun->sun_path, buf, namelen);
 	buf[namelen] = 0;
 
 restart:
 	NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
 	    UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_BINDAT));
 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
 	error = namei(&nd);
 	if (error)
 		goto error;
 	vp = nd.ni_vp;
 	if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE_PNBUF(&nd);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (vp != NULL) {
 			vrele(vp);
 			error = EADDRINUSE;
 			goto error;
 		}
 		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 		if (error)
 			goto error;
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VSOCK;
 	vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_pd->pd_cmask);
 #ifdef MAC
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 #endif
 	if (error == 0)
 		error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 	NDFREE_PNBUF(&nd);
 	if (error) {
 		VOP_VPUT_PAIR(nd.ni_dvp, NULL, true);
 		vn_finished_write(mp);
 		if (error == ERELOOKUP)
 			goto restart;
 		goto error;
 	}
 	vp = nd.ni_vp;
 	ASSERT_VOP_ELOCKED(vp, "uipc_bind");
 	soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
 
 	UNP_PCB_LOCK(unp);
 	VOP_UNP_BIND(vp, unp);
 	unp->unp_vnode = vp;
 	unp->unp_addr = soun;
 	unp->unp_flags &= ~UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 	vref(vp);
 	VOP_VPUT_PAIR(nd.ni_dvp, &vp, true);
 	vn_finished_write(mp);
 	free(buf, M_TEMP);
 	return (0);
 
 error:
 	UNP_PCB_LOCK(unp);
 	unp->unp_flags &= ~UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 	free(buf, M_TEMP);
 	return (error);
 }
 
 static int
 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (uipc_bindat(AT_FDCWD, so, nam, td));
 }
 
 static int
 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	KASSERT(td == curthread, ("uipc_connect: td != curthread"));
 	error = unp_connect(so, nam, td);
 	return (error);
 }
 
 static int
 uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 	int error;
 
 	KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
 	error = unp_connectat(fd, so, nam, td);
 	return (error);
 }
 
 static void
 uipc_close(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 	struct vnode *vp = NULL;
 	struct mtx *vplock;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
 
 	vplock = NULL;
 	if ((vp = unp->unp_vnode) != NULL) {
 		vplock = mtx_pool_find(mtxpool_sleep, vp);
 		mtx_lock(vplock);
 	}
 	UNP_PCB_LOCK(unp);
 	if (vp && unp->unp_vnode == NULL) {
 		mtx_unlock(vplock);
 		vp = NULL;
 	}
 	if (vp != NULL) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 	}
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 	if (vp) {
 		mtx_unlock(vplock);
 		vrele(vp);
 	}
 }
 
 static int
 uipc_connect2(struct socket *so1, struct socket *so2)
 {
 	struct unpcb *unp, *unp2;
 
 	if (so1->so_type != so2->so_type)
 		return (EPROTOTYPE);
 
 	unp = so1->so_pcb;
 	KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
 	unp2 = so2->so_pcb;
 	KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
 	unp_pcb_lock_pair(unp, unp2);
 	unp_connect2(so1, so2, PRU_CONNECT2);
 	unp_pcb_unlock_pair(unp, unp2);
 
 	return (0);
 }
 
 static void
 uipc_detach(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 	struct mtx *vplock;
 	struct vnode *vp;
 	int local_unp_rights;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
 
 	vp = NULL;
 	vplock = NULL;
 
 	UNP_LINK_WLOCK();
 	LIST_REMOVE(unp, unp_link);
 	if (unp->unp_gcflag & UNPGC_DEAD)
 		LIST_REMOVE(unp, unp_dead);
 	unp->unp_gencnt = ++unp_gencnt;
 	--unp_count;
 	UNP_LINK_WUNLOCK();
 
 	UNP_PCB_UNLOCK_ASSERT(unp);
  restart:
 	if ((vp = unp->unp_vnode) != NULL) {
 		vplock = mtx_pool_find(mtxpool_sleep, vp);
 		mtx_lock(vplock);
 	}
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode != vp && unp->unp_vnode != NULL) {
 		if (vplock)
 			mtx_unlock(vplock);
 		UNP_PCB_UNLOCK(unp);
 		goto restart;
 	}
 	if ((vp = unp->unp_vnode) != NULL) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 	}
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 
 	UNP_REF_LIST_LOCK();
 	while (!LIST_EMPTY(&unp->unp_refs)) {
 		struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
 
 		unp_pcb_hold(ref);
 		UNP_REF_LIST_UNLOCK();
 
 		MPASS(ref != unp);
 		UNP_PCB_UNLOCK_ASSERT(ref);
 		unp_drop(ref);
 		UNP_REF_LIST_LOCK();
 	}
 	UNP_REF_LIST_UNLOCK();
 
 	UNP_PCB_LOCK(unp);
 	local_unp_rights = unp_rights;
 	unp->unp_socket->so_pcb = NULL;
 	unp->unp_socket = NULL;
 	free(unp->unp_addr, M_SONAME);
 	unp->unp_addr = NULL;
 	if (!unp_pcb_rele(unp))
 		UNP_PCB_UNLOCK(unp);
 	if (vp) {
 		mtx_unlock(vplock);
 		vrele(vp);
 	}
 	if (local_unp_rights)
 		taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
 }
 
 static int
 uipc_disconnect(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
 
 	UNP_PCB_LOCK(unp);
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_listen(struct socket *so, int backlog, struct thread *td)
 {
 	struct unpcb *unp;
 	int error;
 
 	MPASS(so->so_type != SOCK_DGRAM);
 
 	/*
 	 * Synchronize with concurrent connection attempts.
 	 */
 	error = 0;
 	unp = sotounpcb(so);
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_conn != NULL || (unp->unp_flags & UNP_CONNECTING) != 0)
 		error = EINVAL;
 	else if (unp->unp_vnode == NULL)
 		error = EDESTADDRREQ;
 	if (error != 0) {
 		UNP_PCB_UNLOCK(unp);
 		return (error);
 	}
 
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error == 0) {
 		cru2xt(td, &unp->unp_peercred);
 		solisten_proto(so, backlog);
 	}
 	SOCK_UNLOCK(so);
 	UNP_PCB_UNLOCK(unp);
 	return (error);
 }
 
 static int
 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *sa;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_LINK_RLOCK();
 	/*
 	 * XXX: It seems that this test always fails even when connection is
 	 * established.  So, this else clause is added as workaround to
 	 * return PF_LOCAL sockaddr.
 	 */
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		if (unp2->unp_addr != NULL)
 			sa = (struct sockaddr *) unp2->unp_addr;
 		else
 			sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 		UNP_PCB_UNLOCK(unp2);
 	} else {
 		sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 	}
 	UNP_LINK_RUNLOCK();
 	return (0);
 }
 
 static int
 uipc_rcvd(struct socket *so, int flags)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	u_int mbcnt, sbcc;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
 	    ("%s: socktype %d", __func__, so->so_type));
 
 	/*
 	 * Adjust backpressure on sender and wakeup any waiting to write.
 	 *
 	 * The unp lock is acquired to maintain the validity of the unp_conn
 	 * pointer; no lock on unp2 is required as unp2->unp_socket will be
 	 * static as long as we don't permit unp2 to disconnect from unp,
 	 * which is prevented by the lock on unp.  We cache values from
 	 * so_rcv to avoid holding the so_rcv lock over the entire
 	 * transaction on the remote so_snd.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	mbcnt = so->so_rcv.sb_mbcnt;
 	sbcc = sbavail(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	/*
 	 * There is a benign race condition at this point.  If we're planning to
 	 * clear SB_STOP, but uipc_send is called on the connected socket at
 	 * this instant, it might add data to the sockbuf and set SB_STOP.  Then
 	 * we would erroneously clear SB_STOP below, even though the sockbuf is
 	 * full.  The race is benign because the only ill effect is to allow the
 	 * sockbuf to exceed its size limit, and the size limits are not
 	 * strictly guaranteed anyway.
 	 */
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 == NULL) {
 		UNP_PCB_UNLOCK(unp);
 		return (0);
 	}
 	so2 = unp2->unp_socket;
 	SOCKBUF_LOCK(&so2->so_snd);
 	if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
 		so2->so_snd.sb_flags &= ~SB_STOP;
 	sowwakeup_locked(so2);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	u_int mbcnt, sbcc;
 	int error;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
 	    so->so_type == SOCK_SEQPACKET,
 	    ("%s: socktype %d", __func__, so->so_type));
 
 	error = 0;
 	if (flags & PRUS_OOB) {
 		error = EOPNOTSUPP;
 		goto release;
 	}
 	if (control != NULL && (error = unp_internalize(&control, td)))
 		goto release;
 
 	unp2 = NULL;
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 	{
 		const struct sockaddr *from;
 
 		if (nam != NULL) {
 			error = unp_connect(so, nam, td);
 			if (error != 0)
 				break;
 		}
 		UNP_PCB_LOCK(unp);
 
 		/*
 		 * Because connect() and send() are non-atomic in a sendto()
 		 * with a target address, it's possible that the socket will
 		 * have disconnected before the send() can run.  In that case
 		 * return the slightly counter-intuitive but otherwise
 		 * correct error that the socket is not connected.
 		 */
 		unp2 = unp_pcb_lock_peer(unp);
 		if (unp2 == NULL) {
 			UNP_PCB_UNLOCK(unp);
 			error = ENOTCONN;
 			break;
 		}
 
 		if (unp2->unp_flags & UNP_WANTCRED_MASK)
 			control = unp_addsockcred(td, control,
 			    unp2->unp_flags);
 		if (unp->unp_addr != NULL)
 			from = (struct sockaddr *)unp->unp_addr;
 		else
 			from = &sun_noname;
 		so2 = unp2->unp_socket;
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if (sbappendaddr_locked(&so2->so_rcv, from, m,
 		    control)) {
 			sorwakeup_locked(so2);
 			m = NULL;
 			control = NULL;
 		} else {
 			soroverflow_locked(so2);
 			error = (so->so_state & SS_NBIO) ? EAGAIN : ENOBUFS;
 		}
 		if (nam != NULL)
 			unp_disconnect(unp, unp2);
 		else
 			unp_pcb_unlock_pair(unp, unp2);
 		break;
 	}
 
 	case SOCK_SEQPACKET:
 	case SOCK_STREAM:
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			if (nam != NULL) {
 				error = unp_connect(so, nam, td);
 				if (error != 0)
 					break;
 			} else {
 				error = ENOTCONN;
 				break;
 			}
 		}
 
 		UNP_PCB_LOCK(unp);
 		if ((unp2 = unp_pcb_lock_peer(unp)) == NULL) {
 			UNP_PCB_UNLOCK(unp);
 			error = ENOTCONN;
 			break;
 		} else if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			unp_pcb_unlock_pair(unp, unp2);
 			error = EPIPE;
 			break;
 		}
 		UNP_PCB_UNLOCK(unp);
 		if ((so2 = unp2->unp_socket) == NULL) {
 			UNP_PCB_UNLOCK(unp2);
 			error = ENOTCONN;
 			break;
 		}
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if (unp2->unp_flags & UNP_WANTCRED_MASK) {
 			/*
 			 * Credentials are passed only once on SOCK_STREAM and
 			 * SOCK_SEQPACKET (LOCAL_CREDS => WANTCRED_ONESHOT), or
 			 * forever (LOCAL_CREDS_PERSISTENT => WANTCRED_ALWAYS).
 			 */
 			control = unp_addsockcred(td, control, unp2->unp_flags);
 			unp2->unp_flags &= ~UNP_WANTCRED_ONESHOT;
 		}
 
 		/*
 		 * Send to paired receive port and wake up readers.  Don't
 		 * check for space available in the receive buffer if we're
 		 * attaching ancillary data; Unix domain sockets only check
 		 * for space in the sending sockbuf, and that check is
 		 * performed one level up the stack.  At that level we cannot
 		 * precisely account for the amount of buffer space used
 		 * (e.g., because control messages are not yet internalized).
 		 */
 		switch (so->so_type) {
 		case SOCK_STREAM:
 			if (control != NULL) {
 				sbappendcontrol_locked(&so2->so_rcv, m,
 				    control, flags);
 				control = NULL;
 			} else
 				sbappend_locked(&so2->so_rcv, m, flags);
 			break;
 
 		case SOCK_SEQPACKET:
 			if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
 			    &sun_noname, m, control))
 				control = NULL;
 			break;
 		}
 
 		mbcnt = so2->so_rcv.sb_mbcnt;
 		sbcc = sbavail(&so2->so_rcv);
 		if (sbcc)
 			sorwakeup_locked(so2);
 		else
 			SOCKBUF_UNLOCK(&so2->so_rcv);
 
 		/*
 		 * The PCB lock on unp2 protects the SB_STOP flag.  Without it,
 		 * it would be possible for uipc_rcvd to be called at this
 		 * point, drain the receiving sockbuf, clear SB_STOP, and then
 		 * we would set SB_STOP below.  That could lead to an empty
 		 * sockbuf having SB_STOP set
 		 */
 		SOCKBUF_LOCK(&so->so_snd);
 		if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
 			so->so_snd.sb_flags |= SB_STOP;
 		SOCKBUF_UNLOCK(&so->so_snd);
 		UNP_PCB_UNLOCK(unp2);
 		m = NULL;
 		break;
 	}
 
 	/*
 	 * PRUS_EOF is equivalent to pru_send followed by pru_shutdown.
 	 */
 	if (flags & PRUS_EOF) {
 		UNP_PCB_LOCK(unp);
 		socantsendmore(so);
 		unp_shutdown(unp);
 		UNP_PCB_UNLOCK(unp);
 	}
 	if (control != NULL && error != 0)
 		unp_scan(control, unp_freerights);
 
 release:
 	if (control != NULL)
 		m_freem(control);
 	/*
 	 * In case of PRUS_NOTREADY, uipc_ready() is responsible
 	 * for freeing memory.
 	 */   
 	if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 		m_freem(m);
 	return (error);
 }
 
 static bool
 uipc_ready_scan(struct socket *so, struct mbuf *m, int count, int *errorp)
 {
 	struct mbuf *mb, *n;
 	struct sockbuf *sb;
 
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		SOCK_UNLOCK(so);
 		return (false);
 	}
 	mb = NULL;
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	if (sb->sb_fnrdy != NULL) {
 		for (mb = sb->sb_mb, n = mb->m_nextpkt; mb != NULL;) {
 			if (mb == m) {
 				*errorp = sbready(sb, m, count);
 				break;
 			}
 			mb = mb->m_next;
 			if (mb == NULL) {
 				mb = n;
 				if (mb != NULL)
 					n = mb->m_nextpkt;
 			}
 		}
 	}
 	SOCKBUF_UNLOCK(sb);
 	SOCK_UNLOCK(so);
 	return (mb != NULL);
 }
 
 static int
 uipc_ready(struct socket *so, struct mbuf *m, int count)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	int error, i;
 
 	unp = sotounpcb(so);
 
 	KASSERT(so->so_type == SOCK_STREAM,
 	    ("%s: unexpected socket type for %p", __func__, so));
 
 	UNP_PCB_LOCK(unp);
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 		UNP_PCB_UNLOCK(unp);
 		so2 = unp2->unp_socket;
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if ((error = sbready(&so2->so_rcv, m, count)) == 0)
 			sorwakeup_locked(so2);
 		else
 			SOCKBUF_UNLOCK(&so2->so_rcv);
 		UNP_PCB_UNLOCK(unp2);
 		return (error);
 	}
 	UNP_PCB_UNLOCK(unp);
 
 	/*
 	 * The receiving socket has been disconnected, but may still be valid.
 	 * In this case, the now-ready mbufs are still present in its socket
 	 * buffer, so perform an exhaustive search before giving up and freeing
 	 * the mbufs.
 	 */
 	UNP_LINK_RLOCK();
 	LIST_FOREACH(unp, &unp_shead, unp_link) {
 		if (uipc_ready_scan(unp->unp_socket, m, count, &error))
 			break;
 	}
 	UNP_LINK_RUNLOCK();
 
 	if (unp == NULL) {
 		for (i = 0; i < count; i++)
 			m = m_free(m);
 		error = ECONNRESET;
 	}
 	return (error);
 }
 
 static int
 uipc_sense(struct socket *so, struct stat *sb)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
 
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	sb->st_dev = NODEV;
 	sb->st_ino = unp->unp_ino;
 	return (0);
 }
 
 static int
 uipc_shutdown(struct socket *so)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
 
 	UNP_PCB_LOCK(unp);
 	socantsendmore(so);
 	unp_shutdown(unp);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp;
 	const struct sockaddr *sa;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_addr != NULL)
 		sa = (struct sockaddr *) unp->unp_addr;
 	else
 		sa = &sun_noname;
 	bcopy(sa, *nam, sa->sa_len);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static struct pr_usrreqs uipc_usrreqs_dgram = {
 	.pru_abort = 		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_send =		uipc_send,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_dgram,
 	.pru_close =		uipc_close,
 };
 
 static struct pr_usrreqs uipc_usrreqs_seqpacket = {
 	.pru_abort =		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_generic,	/* XXX: or...? */
 	.pru_close =		uipc_close,
 };
 
 static struct pr_usrreqs uipc_usrreqs_stream = {
 	.pru_abort = 		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_ready =		uipc_ready,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_generic,
 	.pru_close =		uipc_close,
 };
 
 static int
 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct unpcb *unp;
 	struct xucred xu;
 	int error, optval;
 
 	if (sopt->sopt_level != SOL_LOCAL)
 		return (EINVAL);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case LOCAL_PEERCRED:
 			UNP_PCB_LOCK(unp);
 			if (unp->unp_flags & UNP_HAVEPC)
 				xu = unp->unp_peercred;
 			else {
 				if (so->so_type == SOCK_STREAM)
 					error = ENOTCONN;
 				else
 					error = EINVAL;
 			}
 			UNP_PCB_UNLOCK(unp);
 			if (error == 0)
 				error = sooptcopyout(sopt, &xu, sizeof(xu));
 			break;
 
 		case LOCAL_CREDS:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_WANTCRED_ONESHOT ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		case LOCAL_CREDS_PERSISTENT:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_WANTCRED_ALWAYS ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		case LOCAL_CONNWAIT:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		default:
 			error = EOPNOTSUPP;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case LOCAL_CREDS:
 		case LOCAL_CREDS_PERSISTENT:
 		case LOCAL_CONNWAIT:
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 					    sizeof(optval));
 			if (error)
 				break;
 
 #define	OPTSET(bit, exclusive) do {					\
 	UNP_PCB_LOCK(unp);						\
 	if (optval) {							\
 		if ((unp->unp_flags & (exclusive)) != 0) {		\
 			UNP_PCB_UNLOCK(unp);				\
 			error = EINVAL;					\
 			break;						\
 		}							\
 		unp->unp_flags |= (bit);				\
 	} else								\
 		unp->unp_flags &= ~(bit);				\
 	UNP_PCB_UNLOCK(unp);						\
 } while (0)
 
 			switch (sopt->sopt_name) {
 			case LOCAL_CREDS:
 				OPTSET(UNP_WANTCRED_ONESHOT, UNP_WANTCRED_ALWAYS);
 				break;
 
 			case LOCAL_CREDS_PERSISTENT:
 				OPTSET(UNP_WANTCRED_ALWAYS, UNP_WANTCRED_ONESHOT);
 				break;
 
 			case LOCAL_CONNWAIT:
 				OPTSET(UNP_CONNWAIT, 0);
 				break;
 
 			default:
 				break;
 			}
 			break;
 #undef	OPTSET
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 static int
 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (unp_connectat(AT_FDCWD, so, nam, td));
 }
 
 static int
 unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 	struct mtx *vplock;
 	struct sockaddr_un *soun;
 	struct vnode *vp;
 	struct socket *so2;
 	struct unpcb *unp, *unp2, *unp3;
 	struct nameidata nd;
 	char buf[SOCK_MAXADDRLEN];
 	struct sockaddr *sa;
 	cap_rights_t rights;
 	int error, len;
 	bool connreq;
 
 	if (nam->sa_family != AF_UNIX)
 		return (EAFNOSUPPORT);
 	if (nam->sa_len > sizeof(struct sockaddr_un))
 		return (EINVAL);
 	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 	if (len <= 0)
 		return (EINVAL);
 	soun = (struct sockaddr_un *)nam;
 	bcopy(soun->sun_path, buf, len);
 	buf[len] = 0;
 
 	error = 0;
 	unp = sotounpcb(so);
 	UNP_PCB_LOCK(unp);
 	for (;;) {
 		/*
 		 * Wait for connection state to stabilize.  If a connection
 		 * already exists, give up.  For datagram sockets, which permit
 		 * multiple consecutive connect(2) calls, upper layers are
 		 * responsible for disconnecting in advance of a subsequent
 		 * connect(2), but this is not synchronized with PCB connection
 		 * state.
 		 *
 		 * Also make sure that no threads are currently attempting to
 		 * lock the peer socket, to ensure that unp_conn cannot
 		 * transition between two valid sockets while locks are dropped.
 		 */
 		if (SOLISTENING(so))
 			error = EOPNOTSUPP;
 		else if (unp->unp_conn != NULL)
 			error = EISCONN;
 		else if ((unp->unp_flags & UNP_CONNECTING) != 0) {
 			error = EALREADY;
 		}
 		if (error != 0) {
 			UNP_PCB_UNLOCK(unp);
 			return (error);
 		}
 		if (unp->unp_pairbusy > 0) {
 			unp->unp_flags |= UNP_WAITING;
 			mtx_sleep(unp, UNP_PCB_LOCKPTR(unp), 0, "unpeer", 0);
 			continue;
 		}
 		break;
 	}
 	unp->unp_flags |= UNP_CONNECTING;
 	UNP_PCB_UNLOCK(unp);
 
 	connreq = (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0;
 	if (connreq)
 		sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	else
 		sa = NULL;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 	    UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_CONNECTAT));
 	error = namei(&nd);
 	if (error)
 		vp = NULL;
 	else
 		vp = nd.ni_vp;
 	ASSERT_VOP_LOCKED(vp, "unp_connect");
 	NDFREE_NOTHING(&nd);
 	if (error)
 		goto bad;
 
 	if (vp->v_type != VSOCK) {
 		error = ENOTSOCK;
 		goto bad;
 	}
 #ifdef MAC
 	error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD);
 	if (error)
 		goto bad;
 #endif
 	error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 	if (error)
 		goto bad;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 
 	vplock = mtx_pool_find(mtxpool_sleep, vp);
 	mtx_lock(vplock);
 	VOP_UNP_CONNECT(vp, &unp2);
 	if (unp2 == NULL) {
 		error = ECONNREFUSED;
 		goto bad2;
 	}
 	so2 = unp2->unp_socket;
 	if (so->so_type != so2->so_type) {
 		error = EPROTOTYPE;
 		goto bad2;
 	}
 	if (connreq) {
 		if (SOLISTENING(so2)) {
 			CURVNET_SET(so2->so_vnet);
 			so2 = sonewconn(so2, 0);
 			CURVNET_RESTORE();
 		} else
 			so2 = NULL;
 		if (so2 == NULL) {
 			error = ECONNREFUSED;
 			goto bad2;
 		}
 		unp3 = sotounpcb(so2);
 		unp_pcb_lock_pair(unp2, unp3);
 		if (unp2->unp_addr != NULL) {
 			bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
 			unp3->unp_addr = (struct sockaddr_un *) sa;
 			sa = NULL;
 		}
 
 		unp_copy_peercred(td, unp3, unp, unp2);
 
 		UNP_PCB_UNLOCK(unp2);
 		unp2 = unp3;
 
 		/*
 		 * It is safe to block on the PCB lock here since unp2 is
 		 * nascent and cannot be connected to any other sockets.
 		 */
 		UNP_PCB_LOCK(unp);
 #ifdef MAC
 		mac_socketpeer_set_from_socket(so, so2);
 		mac_socketpeer_set_from_socket(so2, so);
 #endif
 	} else {
 		unp_pcb_lock_pair(unp, unp2);
 	}
 	KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
 	    sotounpcb(so2) == unp2,
 	    ("%s: unp2 %p so2 %p", __func__, unp2, so2));
 	unp_connect2(so, so2, PRU_CONNECT);
 	KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
 	    ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
 	unp->unp_flags &= ~UNP_CONNECTING;
 	unp_pcb_unlock_pair(unp, unp2);
 bad2:
 	mtx_unlock(vplock);
 bad:
 	if (vp != NULL) {
 		vput(vp);
 	}
 	free(sa, M_SONAME);
 	if (__predict_false(error)) {
 		UNP_PCB_LOCK(unp);
 		KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
 		    ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
 		unp->unp_flags &= ~UNP_CONNECTING;
 		UNP_PCB_UNLOCK(unp);
 	}
 	return (error);
 }
 
 /*
  * Set socket peer credentials at connection time.
  *
  * The client's PCB credentials are copied from its process structure.  The
  * server's PCB credentials are copied from the socket on which it called
  * listen(2).  uipc_listen cached that process's credentials at the time.
  */
 void
 unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
     struct unpcb *server_unp, struct unpcb *listen_unp)
 {
 	cru2xt(td, &client_unp->unp_peercred);
 	client_unp->unp_flags |= UNP_HAVEPC;
 
 	memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
 	    sizeof(server_unp->unp_peercred));
 	server_unp->unp_flags |= UNP_HAVEPC;
 	client_unp->unp_flags |= (listen_unp->unp_flags & UNP_WANTCRED_MASK);
 }
 
 static void
 unp_connect2(struct socket *so, struct socket *so2, int req)
 {
 	struct unpcb *unp;
 	struct unpcb *unp2;
 
 	MPASS(so2->so_type == so->so_type);
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect2: unp == NULL"));
 	unp2 = sotounpcb(so2);
 	KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
 
 	UNP_PCB_LOCK_ASSERT(unp);
 	UNP_PCB_LOCK_ASSERT(unp2);
 	KASSERT(unp->unp_conn == NULL,
 	    ("%s: socket %p is already connected", __func__, unp));
 
 	unp->unp_conn = unp2;
 	unp_pcb_hold(unp2);
 	unp_pcb_hold(unp);
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 		UNP_REF_LIST_LOCK();
 		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 		UNP_REF_LIST_UNLOCK();
 		soisconnected(so);
 		break;
 
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		KASSERT(unp2->unp_conn == NULL,
 		    ("%s: socket %p is already connected", __func__, unp2));
 		unp2->unp_conn = unp;
 		if (req == PRU_CONNECT &&
 		    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 			soisconnecting(so);
 		else
 			soisconnected(so);
 		soisconnected(so2);
 		break;
 
 	default:
 		panic("unp_connect2");
 	}
 }
 
 static void
 unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
 {
 	struct socket *so, *so2;
 #ifdef INVARIANTS
 	struct unpcb *unptmp;
 #endif
 
 	UNP_PCB_LOCK_ASSERT(unp);
 	UNP_PCB_LOCK_ASSERT(unp2);
 	KASSERT(unp->unp_conn == unp2,
 	    ("%s: unpcb %p is not connected to %p", __func__, unp, unp2));
 
 	unp->unp_conn = NULL;
 	so = unp->unp_socket;
 	so2 = unp2->unp_socket;
 	switch (unp->unp_socket->so_type) {
 	case SOCK_DGRAM:
 		UNP_REF_LIST_LOCK();
 #ifdef INVARIANTS
 		LIST_FOREACH(unptmp, &unp2->unp_refs, unp_reflink) {
 			if (unptmp == unp)
 				break;
 		}
 		KASSERT(unptmp != NULL,
 		    ("%s: %p not found in reflist of %p", __func__, unp, unp2));
 #endif
 		LIST_REMOVE(unp, unp_reflink);
 		UNP_REF_LIST_UNLOCK();
 		if (so) {
 			SOCK_LOCK(so);
 			so->so_state &= ~SS_ISCONNECTED;
 			SOCK_UNLOCK(so);
 		}
 		break;
 
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		if (so)
 			soisdisconnected(so);
 		MPASS(unp2->unp_conn == unp);
 		unp2->unp_conn = NULL;
 		if (so2)
 			soisdisconnected(so2);
 		break;
 	}
 
 	if (unp == unp2) {
 		unp_pcb_rele_notlast(unp);
 		if (!unp_pcb_rele(unp))
 			UNP_PCB_UNLOCK(unp);
 	} else {
 		if (!unp_pcb_rele(unp))
 			UNP_PCB_UNLOCK(unp);
 		if (!unp_pcb_rele(unp2))
 			UNP_PCB_UNLOCK(unp2);
 	}
 }
 
 /*
  * unp_pcblist() walks the global list of struct unpcb's to generate a
  * pointer list, bumping the refcount on each unpcb.  It then copies them out
  * sequentially, validating the generation number on each to see if it has
  * been detached.  All of this is necessary because copyout() may sleep on
  * disk I/O.
  */
 static int
 unp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	struct unpcb *unp, **unp_list;
 	unp_gen_t gencnt;
 	struct xunpgen *xug;
 	struct unp_head *head;
 	struct xunpcb *xu;
 	u_int i;
 	int error, n;
 
 	switch ((intptr_t)arg1) {
 	case SOCK_STREAM:
 		head = &unp_shead;
 		break;
 
 	case SOCK_DGRAM:
 		head = &unp_dhead;
 		break;
 
 	case SOCK_SEQPACKET:
 		head = &unp_sphead;
 		break;
 
 	default:
 		panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1);
 	}
 
 	/*
 	 * The process of preparing the PCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		n = unp_count;
 		req->oldidx = 2 * (sizeof *xug)
 			+ (n + n/8) * sizeof(struct xunpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK | M_ZERO);
 	UNP_LINK_RLOCK();
 	gencnt = unp_gencnt;
 	n = unp_count;
 	UNP_LINK_RUNLOCK();
 
 	xug->xug_len = sizeof *xug;
 	xug->xug_count = n;
 	xug->xug_gen = gencnt;
 	xug->xug_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, xug, sizeof *xug);
 	if (error) {
 		free(xug, M_TEMP);
 		return (error);
 	}
 
 	unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 
 	UNP_LINK_RLOCK();
 	for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 	     unp = LIST_NEXT(unp, unp_link)) {
 		UNP_PCB_LOCK(unp);
 		if (unp->unp_gencnt <= gencnt) {
 			if (cr_cansee(req->td->td_ucred,
 			    unp->unp_socket->so_cred)) {
 				UNP_PCB_UNLOCK(unp);
 				continue;
 			}
 			unp_list[i++] = unp;
 			unp_pcb_hold(unp);
 		}
 		UNP_PCB_UNLOCK(unp);
 	}
 	UNP_LINK_RUNLOCK();
 	n = i;			/* In case we lost some during malloc. */
 
 	error = 0;
 	xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 	for (i = 0; i < n; i++) {
 		unp = unp_list[i];
 		UNP_PCB_LOCK(unp);
 		if (unp_pcb_rele(unp))
 			continue;
 
 		if (unp->unp_gencnt <= gencnt) {
 			xu->xu_len = sizeof *xu;
 			xu->xu_unpp = (uintptr_t)unp;
 			/*
 			 * XXX - need more locking here to protect against
 			 * connect/disconnect races for SMP.
 			 */
 			if (unp->unp_addr != NULL)
 				bcopy(unp->unp_addr, &xu->xu_addr,
 				      unp->unp_addr->sun_len);
 			else
 				bzero(&xu->xu_addr, sizeof(xu->xu_addr));
 			if (unp->unp_conn != NULL &&
 			    unp->unp_conn->unp_addr != NULL)
 				bcopy(unp->unp_conn->unp_addr,
 				      &xu->xu_caddr,
 				      unp->unp_conn->unp_addr->sun_len);
 			else
 				bzero(&xu->xu_caddr, sizeof(xu->xu_caddr));
 			xu->unp_vnode = (uintptr_t)unp->unp_vnode;
 			xu->unp_conn = (uintptr_t)unp->unp_conn;
 			xu->xu_firstref = (uintptr_t)LIST_FIRST(&unp->unp_refs);
 			xu->xu_nextref = (uintptr_t)LIST_NEXT(unp, unp_reflink);
 			xu->unp_gencnt = unp->unp_gencnt;
 			sotoxsocket(unp->unp_socket, &xu->xu_socket);
 			UNP_PCB_UNLOCK(unp);
 			error = SYSCTL_OUT(req, xu, sizeof *xu);
 		} else {
 			UNP_PCB_UNLOCK(unp);
 		}
 	}
 	free(xu, M_TEMP);
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		xug->xug_gen = unp_gencnt;
 		xug->xug_sogen = so_gencnt;
 		xug->xug_count = unp_count;
 		error = SYSCTL_OUT(req, xug, sizeof *xug);
 	}
 	free(unp_list, M_TEMP);
 	free(xug, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
     "List of active local datagram sockets");
 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
     "List of active local stream sockets");
 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
     "List of active local seqpacket sockets");
 
 static void
 unp_shutdown(struct unpcb *unp)
 {
 	struct unpcb *unp2;
 	struct socket *so;
 
 	UNP_PCB_LOCK_ASSERT(unp);
 
 	unp2 = unp->unp_conn;
 	if ((unp->unp_socket->so_type == SOCK_STREAM ||
 	    (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) {
 		so = unp2->unp_socket;
 		if (so != NULL)
 			socantrcvmore(so);
 	}
 }
 
 static void
 unp_drop(struct unpcb *unp)
 {
 	struct socket *so;
 	struct unpcb *unp2;
 
 	/*
 	 * Regardless of whether the socket's peer dropped the connection
 	 * with this socket by aborting or disconnecting, POSIX requires
 	 * that ECONNRESET is returned.
 	 */
 
 	UNP_PCB_LOCK(unp);
 	so = unp->unp_socket;
 	if (so)
 		so->so_error = ECONNRESET;
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 		/* Last reference dropped in unp_disconnect(). */
 		unp_pcb_rele_notlast(unp);
 		unp_disconnect(unp, unp2);
 	} else if (!unp_pcb_rele(unp)) {
 		UNP_PCB_UNLOCK(unp);
 	}
 }
 
 static void
 unp_freerights(struct filedescent **fdep, int fdcount)
 {
 	struct file *fp;
 	int i;
 
 	KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		filecaps_free(&fdep[i]->fde_caps);
 		unp_discard(fp);
 	}
 	free(fdep[0], M_FILECAPS);
 }
 
 static int
 unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
 {
 	struct thread *td = curthread;		/* XXX */
 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 	int i;
 	int *fdp;
 	struct filedesc *fdesc = td->td_proc->p_fd;
 	struct filedescent **fdep;
 	void *data;
 	socklen_t clen = control->m_len, datalen;
 	int error, newfds;
 	u_int newlen;
 
 	UNP_LINK_UNLOCK_ASSERT();
 
 	error = 0;
 	if (controlp != NULL) /* controlp == NULL => free control messages */
 		*controlp = NULL;
 	while (cm != NULL) {
 		if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 			error = EINVAL;
 			break;
 		}
 		data = CMSG_DATA(cm);
 		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 		if (cm->cmsg_level == SOL_SOCKET
 		    && cm->cmsg_type == SCM_RIGHTS) {
 			newfds = datalen / sizeof(*fdep);
 			if (newfds == 0)
 				goto next;
 			fdep = data;
 
 			/* If we're not outputting the descriptors free them. */
 			if (error || controlp == NULL) {
 				unp_freerights(fdep, newfds);
 				goto next;
 			}
 			FILEDESC_XLOCK(fdesc);
 
 			/*
 			 * Now change each pointer to an fd in the global
 			 * table to an integer that is the index to the local
 			 * fd table entry that we set up to point to the
 			 * global one we are transferring.
 			 */
 			newlen = newfds * sizeof(int);
 			*controlp = sbcreatecontrol(NULL, newlen,
-			    SCM_RIGHTS, SOL_SOCKET);
+			    SCM_RIGHTS, SOL_SOCKET, M_NOWAIT);
 			if (*controlp == NULL) {
 				FILEDESC_XUNLOCK(fdesc);
 				error = E2BIG;
 				unp_freerights(fdep, newfds);
 				goto next;
 			}
 
 			fdp = (int *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			if (fdallocn(td, 0, fdp, newfds) != 0) {
 				FILEDESC_XUNLOCK(fdesc);
 				error = EMSGSIZE;
 				unp_freerights(fdep, newfds);
 				m_freem(*controlp);
 				*controlp = NULL;
 				goto next;
 			}
 			for (i = 0; i < newfds; i++, fdp++) {
 				_finstall(fdesc, fdep[i]->fde_file, *fdp,
 				    (flags & MSG_CMSG_CLOEXEC) != 0 ? O_CLOEXEC : 0,
 				    &fdep[i]->fde_caps);
 				unp_externalize_fp(fdep[i]->fde_file);
 			}
 
 			/*
 			 * The new type indicates that the mbuf data refers to
 			 * kernel resources that may need to be released before
 			 * the mbuf is freed.
 			 */
 			m_chtype(*controlp, MT_EXTCONTROL);
 			FILEDESC_XUNLOCK(fdesc);
 			free(fdep[0], M_FILECAPS);
 		} else {
 			/* We can just copy anything else across. */
 			if (error || controlp == NULL)
 				goto next;
 			*controlp = sbcreatecontrol(NULL, datalen,
-			    cm->cmsg_type, cm->cmsg_level);
+			    cm->cmsg_type, cm->cmsg_level, M_NOWAIT);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto next;
 			}
 			bcopy(data,
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 			    datalen);
 		}
 		controlp = &(*controlp)->m_next;
 
 next:
 		if (CMSG_SPACE(datalen) < clen) {
 			clen -= CMSG_SPACE(datalen);
 			cm = (struct cmsghdr *)
 			    ((caddr_t)cm + CMSG_SPACE(datalen));
 		} else {
 			clen = 0;
 			cm = NULL;
 		}
 	}
 
 	m_freem(control);
 	return (error);
 }
 
 static void
 unp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(unp_zone, maxsockets);
 }
 
 #ifdef INVARIANTS
 static void
 unp_zdtor(void *mem, int size __unused, void *arg __unused)
 {
 	struct unpcb *unp;
 
 	unp = mem;
 
 	KASSERT(LIST_EMPTY(&unp->unp_refs),
 	    ("%s: unpcb %p has lingering refs", __func__, unp));
 	KASSERT(unp->unp_socket == NULL,
 	    ("%s: unpcb %p has socket backpointer", __func__, unp));
 	KASSERT(unp->unp_vnode == NULL,
 	    ("%s: unpcb %p has vnode references", __func__, unp));
 	KASSERT(unp->unp_conn == NULL,
 	    ("%s: unpcb %p is still connected", __func__, unp));
 	KASSERT(unp->unp_addr == NULL,
 	    ("%s: unpcb %p has leaked addr", __func__, unp));
 }
 #endif
 
 static void
 unp_init(void *arg __unused)
 {
 	uma_dtor dtor;
 
 #ifdef INVARIANTS
 	dtor = unp_zdtor;
 #else
 	dtor = NULL;
 #endif
 	unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, dtor,
 	    NULL, NULL, UMA_ALIGN_CACHE, 0);
 	uma_zone_set_max(unp_zone, maxsockets);
 	uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
 	    NULL, EVENTHANDLER_PRI_ANY);
 	LIST_INIT(&unp_dhead);
 	LIST_INIT(&unp_shead);
 	LIST_INIT(&unp_sphead);
 	SLIST_INIT(&unp_defers);
 	TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
 	TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
 	UNP_LINK_LOCK_INIT();
 	UNP_DEFERRED_LOCK_INIT();
 }
 SYSINIT(unp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, unp_init, NULL);
 
 static void
 unp_internalize_cleanup_rights(struct mbuf *control)
 {
 	struct cmsghdr *cp;
 	struct mbuf *m;
 	void *data;
 	socklen_t datalen;
 
 	for (m = control; m != NULL; m = m->m_next) {
 		cp = mtod(m, struct cmsghdr *);
 		if (cp->cmsg_level != SOL_SOCKET ||
 		    cp->cmsg_type != SCM_RIGHTS)
 			continue;
 		data = CMSG_DATA(cp);
 		datalen = (caddr_t)cp + cp->cmsg_len - (caddr_t)data;
 		unp_freerights(data, datalen / sizeof(struct filedesc *));
 	}
 }
 
 static int
 unp_internalize(struct mbuf **controlp, struct thread *td)
 {
 	struct mbuf *control, **initial_controlp;
 	struct proc *p;
 	struct filedesc *fdesc;
 	struct bintime *bt;
 	struct cmsghdr *cm;
 	struct cmsgcred *cmcred;
 	struct filedescent *fde, **fdep, *fdev;
 	struct file *fp;
 	struct timeval *tv;
 	struct timespec *ts;
 	void *data;
 	socklen_t clen, datalen;
 	int i, j, error, *fdp, oldfds;
 	u_int newlen;
 
 	UNP_LINK_UNLOCK_ASSERT();
 
 	p = td->td_proc;
 	fdesc = p->p_fd;
 	error = 0;
 	control = *controlp;
 	clen = control->m_len;
 	*controlp = NULL;
 	initial_controlp = controlp;
 	for (cm = mtod(control, struct cmsghdr *); cm != NULL;) {
 		if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 		    || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
 			error = EINVAL;
 			goto out;
 		}
 		data = CMSG_DATA(cm);
 		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 
 		switch (cm->cmsg_type) {
 		/*
 		 * Fill in credential information.
 		 */
 		case SCM_CREDS:
-			*controlp = sbcreatecontrol_how(NULL, sizeof(*cmcred),
+			*controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 			    SCM_CREDS, SOL_SOCKET, M_WAITOK);
 			cmcred = (struct cmsgcred *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			cmcred->cmcred_pid = p->p_pid;
 			cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 			cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 			cmcred->cmcred_euid = td->td_ucred->cr_uid;
 			cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 			    CMGROUP_MAX);
 			for (i = 0; i < cmcred->cmcred_ngroups; i++)
 				cmcred->cmcred_groups[i] =
 				    td->td_ucred->cr_groups[i];
 			break;
 
 		case SCM_RIGHTS:
 			oldfds = datalen / sizeof (int);
 			if (oldfds == 0)
 				break;
 			/*
 			 * Check that all the FDs passed in refer to legal
 			 * files.  If not, reject the entire operation.
 			 */
 			fdp = data;
 			FILEDESC_SLOCK(fdesc);
 			for (i = 0; i < oldfds; i++, fdp++) {
 				fp = fget_noref(fdesc, *fdp);
 				if (fp == NULL) {
 					FILEDESC_SUNLOCK(fdesc);
 					error = EBADF;
 					goto out;
 				}
 				if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 					FILEDESC_SUNLOCK(fdesc);
 					error = EOPNOTSUPP;
 					goto out;
 				}
 			}
 
 			/*
 			 * Now replace the integer FDs with pointers to the
 			 * file structure and capability rights.
 			 */
 			newlen = oldfds * sizeof(fdep[0]);
-			*controlp = sbcreatecontrol_how(NULL, newlen,
+			*controlp = sbcreatecontrol(NULL, newlen,
 			    SCM_RIGHTS, SOL_SOCKET, M_WAITOK);
 			fdp = data;
 			for (i = 0; i < oldfds; i++, fdp++) {
 				if (!fhold(fdesc->fd_ofiles[*fdp].fde_file)) {
 					fdp = data;
 					for (j = 0; j < i; j++, fdp++) {
 						fdrop(fdesc->fd_ofiles[*fdp].
 						    fde_file, td);
 					}
 					FILEDESC_SUNLOCK(fdesc);
 					error = EBADF;
 					goto out;
 				}
 			}
 			fdp = data;
 			fdep = (struct filedescent **)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
 			    M_WAITOK);
 			for (i = 0; i < oldfds; i++, fdev++, fdp++) {
 				fde = &fdesc->fd_ofiles[*fdp];
 				fdep[i] = fdev;
 				fdep[i]->fde_file = fde->fde_file;
 				filecaps_copy(&fde->fde_caps,
 				    &fdep[i]->fde_caps, true);
 				unp_internalize_fp(fdep[i]->fde_file);
 			}
 			FILEDESC_SUNLOCK(fdesc);
 			break;
 
 		case SCM_TIMESTAMP:
-			*controlp = sbcreatecontrol_how(NULL, sizeof(*tv),
+			*controlp = sbcreatecontrol(NULL, sizeof(*tv),
 			    SCM_TIMESTAMP, SOL_SOCKET, M_WAITOK);
 			tv = (struct timeval *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			microtime(tv);
 			break;
 
 		case SCM_BINTIME:
-			*controlp = sbcreatecontrol_how(NULL, sizeof(*bt),
+			*controlp = sbcreatecontrol(NULL, sizeof(*bt),
 			    SCM_BINTIME, SOL_SOCKET, M_WAITOK);
 			bt = (struct bintime *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			bintime(bt);
 			break;
 
 		case SCM_REALTIME:
-			*controlp = sbcreatecontrol_how(NULL, sizeof(*ts),
+			*controlp = sbcreatecontrol(NULL, sizeof(*ts),
 			    SCM_REALTIME, SOL_SOCKET, M_WAITOK);
 			ts = (struct timespec *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			nanotime(ts);
 			break;
 
 		case SCM_MONOTONIC:
-			*controlp = sbcreatecontrol_how(NULL, sizeof(*ts),
+			*controlp = sbcreatecontrol(NULL, sizeof(*ts),
 			    SCM_MONOTONIC, SOL_SOCKET, M_WAITOK);
 			ts = (struct timespec *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			nanouptime(ts);
 			break;
 
 		default:
 			error = EINVAL;
 			goto out;
 		}
 
 		if (*controlp != NULL)
 			controlp = &(*controlp)->m_next;
 		if (CMSG_SPACE(datalen) < clen) {
 			clen -= CMSG_SPACE(datalen);
 			cm = (struct cmsghdr *)
 			    ((caddr_t)cm + CMSG_SPACE(datalen));
 		} else {
 			clen = 0;
 			cm = NULL;
 		}
 	}
 
 out:
 	if (error != 0 && initial_controlp != NULL)
 		unp_internalize_cleanup_rights(*initial_controlp);
 	m_freem(control);
 	return (error);
 }
 
 static struct mbuf *
 unp_addsockcred(struct thread *td, struct mbuf *control, int mode)
 {
 	struct mbuf *m, *n, *n_prev;
 	const struct cmsghdr *cm;
 	int ngroups, i, cmsgtype;
 	size_t ctrlsz;
 
 	ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
 	if (mode & UNP_WANTCRED_ALWAYS) {
 		ctrlsz = SOCKCRED2SIZE(ngroups);
 		cmsgtype = SCM_CREDS2;
 	} else {
 		ctrlsz = SOCKCREDSIZE(ngroups);
 		cmsgtype = SCM_CREDS;
 	}
 
-	m = sbcreatecontrol(NULL, ctrlsz, cmsgtype, SOL_SOCKET);
+	m = sbcreatecontrol(NULL, ctrlsz, cmsgtype, SOL_SOCKET, M_NOWAIT);
 	if (m == NULL)
 		return (control);
 
 	if (mode & UNP_WANTCRED_ALWAYS) {
 		struct sockcred2 *sc;
 
 		sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 		sc->sc_version = 0;
 		sc->sc_pid = td->td_proc->p_pid;
 		sc->sc_uid = td->td_ucred->cr_ruid;
 		sc->sc_euid = td->td_ucred->cr_uid;
 		sc->sc_gid = td->td_ucred->cr_rgid;
 		sc->sc_egid = td->td_ucred->cr_gid;
 		sc->sc_ngroups = ngroups;
 		for (i = 0; i < sc->sc_ngroups; i++)
 			sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 	} else {
 		struct sockcred *sc;
 
 		sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 		sc->sc_uid = td->td_ucred->cr_ruid;
 		sc->sc_euid = td->td_ucred->cr_uid;
 		sc->sc_gid = td->td_ucred->cr_rgid;
 		sc->sc_egid = td->td_ucred->cr_gid;
 		sc->sc_ngroups = ngroups;
 		for (i = 0; i < sc->sc_ngroups; i++)
 			sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 	}
 
 	/*
 	 * Unlink SCM_CREDS control messages (struct cmsgcred), since just
 	 * created SCM_CREDS control message (struct sockcred) has another
 	 * format.
 	 */
 	if (control != NULL && cmsgtype == SCM_CREDS)
 		for (n = control, n_prev = NULL; n != NULL;) {
 			cm = mtod(n, struct cmsghdr *);
     			if (cm->cmsg_level == SOL_SOCKET &&
 			    cm->cmsg_type == SCM_CREDS) {
     				if (n_prev == NULL)
 					control = n->m_next;
 				else
 					n_prev->m_next = n->m_next;
 				n = m_free(n);
 			} else {
 				n_prev = n;
 				n = n->m_next;
 			}
 		}
 
 	/* Prepend it to the head. */
 	m->m_next = control;
 	return (m);
 }
 
 static struct unpcb *
 fptounp(struct file *fp)
 {
 	struct socket *so;
 
 	if (fp->f_type != DTYPE_SOCKET)
 		return (NULL);
 	if ((so = fp->f_data) == NULL)
 		return (NULL);
 	if (so->so_proto->pr_domain != &localdomain)
 		return (NULL);
 	return sotounpcb(so);
 }
 
 static void
 unp_discard(struct file *fp)
 {
 	struct unp_defer *dr;
 
 	if (unp_externalize_fp(fp)) {
 		dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK);
 		dr->ud_fp = fp;
 		UNP_DEFERRED_LOCK();
 		SLIST_INSERT_HEAD(&unp_defers, dr, ud_link);
 		UNP_DEFERRED_UNLOCK();
 		atomic_add_int(&unp_defers_count, 1);
 		taskqueue_enqueue(taskqueue_thread, &unp_defer_task);
 	} else
 		closef_nothread(fp);
 }
 
 static void
 unp_process_defers(void *arg __unused, int pending)
 {
 	struct unp_defer *dr;
 	SLIST_HEAD(, unp_defer) drl;
 	int count;
 
 	SLIST_INIT(&drl);
 	for (;;) {
 		UNP_DEFERRED_LOCK();
 		if (SLIST_FIRST(&unp_defers) == NULL) {
 			UNP_DEFERRED_UNLOCK();
 			break;
 		}
 		SLIST_SWAP(&unp_defers, &drl, unp_defer);
 		UNP_DEFERRED_UNLOCK();
 		count = 0;
 		while ((dr = SLIST_FIRST(&drl)) != NULL) {
 			SLIST_REMOVE_HEAD(&drl, ud_link);
 			closef_nothread(dr->ud_fp);
 			free(dr, M_TEMP);
 			count++;
 		}
 		atomic_add_int(&unp_defers_count, -count);
 	}
 }
 
 static void
 unp_internalize_fp(struct file *fp)
 {
 	struct unpcb *unp;
 
 	UNP_LINK_WLOCK();
 	if ((unp = fptounp(fp)) != NULL) {
 		unp->unp_file = fp;
 		unp->unp_msgcount++;
 	}
 	unp_rights++;
 	UNP_LINK_WUNLOCK();
 }
 
 static int
 unp_externalize_fp(struct file *fp)
 {
 	struct unpcb *unp;
 	int ret;
 
 	UNP_LINK_WLOCK();
 	if ((unp = fptounp(fp)) != NULL) {
 		unp->unp_msgcount--;
 		ret = 1;
 	} else
 		ret = 0;
 	unp_rights--;
 	UNP_LINK_WUNLOCK();
 	return (ret);
 }
 
 /*
  * unp_defer indicates whether additional work has been defered for a future
  * pass through unp_gc().  It is thread local and does not require explicit
  * synchronization.
  */
 static int	unp_marked;
 
 static void
 unp_remove_dead_ref(struct filedescent **fdep, int fdcount)
 {
 	struct unpcb *unp;
 	struct file *fp;
 	int i;
 
 	/*
 	 * This function can only be called from the gc task.
 	 */
 	KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 	    ("%s: not on gc callout", __func__));
 	UNP_LINK_LOCK_ASSERT();
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		if ((unp = fptounp(fp)) == NULL)
 			continue;
 		if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 			continue;
 		unp->unp_gcrefs--;
 	}
 }
 
 static void
 unp_restore_undead_ref(struct filedescent **fdep, int fdcount)
 {
 	struct unpcb *unp;
 	struct file *fp;
 	int i;
 
 	/*
 	 * This function can only be called from the gc task.
 	 */
 	KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 	    ("%s: not on gc callout", __func__));
 	UNP_LINK_LOCK_ASSERT();
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		if ((unp = fptounp(fp)) == NULL)
 			continue;
 		if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 			continue;
 		unp->unp_gcrefs++;
 		unp_marked++;
 	}
 }
 
 static void
 unp_gc_scan(struct unpcb *unp, void (*op)(struct filedescent **, int))
 {
 	struct socket *so, *soa;
 
 	so = unp->unp_socket;
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		/*
 		 * Mark all sockets in our accept queue.
 		 */
 		TAILQ_FOREACH(soa, &so->sol_comp, so_list) {
 			if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
 				continue;
 			SOCKBUF_LOCK(&soa->so_rcv);
 			unp_scan(soa->so_rcv.sb_mb, op);
 			SOCKBUF_UNLOCK(&soa->so_rcv);
 		}
 	} else {
 		/*
 		 * Mark all sockets we reference with RIGHTS.
 		 */
 		if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
 			SOCKBUF_LOCK(&so->so_rcv);
 			unp_scan(so->so_rcv.sb_mb, op);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 		}
 	}
 	SOCK_UNLOCK(so);
 }
 
 static int unp_recycled;
 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 
     "Number of unreachable sockets claimed by the garbage collector.");
 
 static int unp_taskcount;
 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 
     "Number of times the garbage collector has run.");
 
 SYSCTL_UINT(_net_local, OID_AUTO, sockcount, CTLFLAG_RD, &unp_count, 0, 
     "Number of active local sockets.");
 
 static void
 unp_gc(__unused void *arg, int pending)
 {
 	struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
 				    NULL };
 	struct unp_head **head;
 	struct unp_head unp_deadhead;	/* List of potentially-dead sockets. */
 	struct file *f, **unref;
 	struct unpcb *unp, *unptmp;
 	int i, total, unp_unreachable;
 
 	LIST_INIT(&unp_deadhead);
 	unp_taskcount++;
 	UNP_LINK_RLOCK();
 	/*
 	 * First determine which sockets may be in cycles.
 	 */
 	unp_unreachable = 0;
 
 	for (head = heads; *head != NULL; head++)
 		LIST_FOREACH(unp, *head, unp_link) {
 			KASSERT((unp->unp_gcflag & ~UNPGC_IGNORE_RIGHTS) == 0,
 			    ("%s: unp %p has unexpected gc flags 0x%x",
 			    __func__, unp, (unsigned int)unp->unp_gcflag));
 
 			f = unp->unp_file;
 
 			/*
 			 * Check for an unreachable socket potentially in a
 			 * cycle.  It must be in a queue as indicated by
 			 * msgcount, and this must equal the file reference
 			 * count.  Note that when msgcount is 0 the file is
 			 * NULL.
 			 */
 			if (f != NULL && unp->unp_msgcount != 0 &&
 			    refcount_load(&f->f_count) == unp->unp_msgcount) {
 				LIST_INSERT_HEAD(&unp_deadhead, unp, unp_dead);
 				unp->unp_gcflag |= UNPGC_DEAD;
 				unp->unp_gcrefs = unp->unp_msgcount;
 				unp_unreachable++;
 			}
 		}
 
 	/*
 	 * Scan all sockets previously marked as potentially being in a cycle
 	 * and remove the references each socket holds on any UNPGC_DEAD
 	 * sockets in its queue.  After this step, all remaining references on
 	 * sockets marked UNPGC_DEAD should not be part of any cycle.
 	 */
 	LIST_FOREACH(unp, &unp_deadhead, unp_dead)
 		unp_gc_scan(unp, unp_remove_dead_ref);
 
 	/*
 	 * If a socket still has a non-negative refcount, it cannot be in a
 	 * cycle.  In this case increment refcount of all children iteratively.
 	 * Stop the scan once we do a complete loop without discovering
 	 * a new reachable socket.
 	 */
 	do {
 		unp_marked = 0;
 		LIST_FOREACH_SAFE(unp, &unp_deadhead, unp_dead, unptmp)
 			if (unp->unp_gcrefs > 0) {
 				unp->unp_gcflag &= ~UNPGC_DEAD;
 				LIST_REMOVE(unp, unp_dead);
 				KASSERT(unp_unreachable > 0,
 				    ("%s: unp_unreachable underflow.",
 				    __func__));
 				unp_unreachable--;
 				unp_gc_scan(unp, unp_restore_undead_ref);
 			}
 	} while (unp_marked);
 
 	UNP_LINK_RUNLOCK();
 
 	if (unp_unreachable == 0)
 		return;
 
 	/*
 	 * Allocate space for a local array of dead unpcbs.
 	 * TODO: can this path be simplified by instead using the local
 	 * dead list at unp_deadhead, after taking out references
 	 * on the file object and/or unpcb and dropping the link lock?
 	 */
 	unref = malloc(unp_unreachable * sizeof(struct file *),
 	    M_TEMP, M_WAITOK);
 
 	/*
 	 * Iterate looking for sockets which have been specifically marked
 	 * as unreachable and store them locally.
 	 */
 	UNP_LINK_RLOCK();
 	total = 0;
 	LIST_FOREACH(unp, &unp_deadhead, unp_dead) {
 		KASSERT((unp->unp_gcflag & UNPGC_DEAD) != 0,
 		    ("%s: unp %p not marked UNPGC_DEAD", __func__, unp));
 		unp->unp_gcflag &= ~UNPGC_DEAD;
 		f = unp->unp_file;
 		if (unp->unp_msgcount == 0 || f == NULL ||
 		    refcount_load(&f->f_count) != unp->unp_msgcount ||
 		    !fhold(f))
 			continue;
 		unref[total++] = f;
 		KASSERT(total <= unp_unreachable,
 		    ("%s: incorrect unreachable count.", __func__));
 	}
 	UNP_LINK_RUNLOCK();
 
 	/*
 	 * Now flush all sockets, free'ing rights.  This will free the
 	 * struct files associated with these sockets but leave each socket
 	 * with one remaining ref.
 	 */
 	for (i = 0; i < total; i++) {
 		struct socket *so;
 
 		so = unref[i]->f_data;
 		CURVNET_SET(so->so_vnet);
 		sorflush(so);
 		CURVNET_RESTORE();
 	}
 
 	/*
 	 * And finally release the sockets so they can be reclaimed.
 	 */
 	for (i = 0; i < total; i++)
 		fdrop(unref[i], NULL);
 	unp_recycled += total;
 	free(unref, M_TEMP);
 }
 
 /*
  * Synchronize against unp_gc, which can trip over data as we are freeing it.
  */
 static void
 unp_dispose(struct socket *so)
 {
 	struct sockbuf *sb = &so->so_rcv;
 	struct unpcb *unp;
 	struct mbuf *m;
 
 	MPASS(!SOLISTENING(so));
 
 	unp = sotounpcb(so);
 	UNP_LINK_WLOCK();
 	unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
 	UNP_LINK_WUNLOCK();
 
 	/*
 	 * Grab our special mbufs before calling sbrelease().
 	 */
 	SOCK_RECVBUF_LOCK(so);
 	m = sbcut_locked(sb, sb->sb_ccc);
 	KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
 	    ("%s: ccc %u mb %p mbcnt %u", __func__,
 	    sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
 	sbrelease_locked(so, SO_RCV);
 	SOCK_RECVBUF_UNLOCK(so);
 	if (SOCK_IO_RECV_OWNED(so))
 		SOCK_IO_RECV_UNLOCK(so);
 
 	if (m != NULL) {
 		unp_scan(m, unp_freerights);
 		m_freem(m);
 	}
 }
 
 static void
 unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
 {
 	struct mbuf *m;
 	struct cmsghdr *cm;
 	void *data;
 	socklen_t clen, datalen;
 
 	while (m0 != NULL) {
 		for (m = m0; m; m = m->m_next) {
 			if (m->m_type != MT_CONTROL)
 				continue;
 
 			cm = mtod(m, struct cmsghdr *);
 			clen = m->m_len;
 
 			while (cm != NULL) {
 				if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 					break;
 
 				data = CMSG_DATA(cm);
 				datalen = (caddr_t)cm + cm->cmsg_len
 				    - (caddr_t)data;
 
 				if (cm->cmsg_level == SOL_SOCKET &&
 				    cm->cmsg_type == SCM_RIGHTS) {
 					(*op)(data, datalen /
 					    sizeof(struct filedescent *));
 				}
 
 				if (CMSG_SPACE(datalen) < clen) {
 					clen -= CMSG_SPACE(datalen);
 					cm = (struct cmsghdr *)
 					    ((caddr_t)cm + CMSG_SPACE(datalen));
 				} else {
 					clen = 0;
 					cm = NULL;
 				}
 			}
 		}
 		m0 = m0->m_nextpkt;
 	}
 }
 
 /*
  * A helper function called by VFS before socket-type vnode reclamation.
  * For an active vnode it clears unp_vnode pointer and decrements unp_vnode
  * use count.
  */
 void
 vfs_unp_reclaim(struct vnode *vp)
 {
 	struct unpcb *unp;
 	int active;
 	struct mtx *vplock;
 
 	ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
 	KASSERT(vp->v_type == VSOCK,
 	    ("vfs_unp_reclaim: vp->v_type != VSOCK"));
 
 	active = 0;
 	vplock = mtx_pool_find(mtxpool_sleep, vp);
 	mtx_lock(vplock);
 	VOP_UNP_CONNECT(vp, &unp);
 	if (unp == NULL)
 		goto done;
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode == vp) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 		active = 1;
 	}
 	UNP_PCB_UNLOCK(unp);
  done:
 	mtx_unlock(vplock);
 	if (active)
 		vunref(vp);
 }
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_unpflags(int unp_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (unp_flags & UNP_HAVEPC) {
 		db_printf("%sUNP_HAVEPC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_WANTCRED_ALWAYS) {
 		db_printf("%sUNP_WANTCRED_ALWAYS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_WANTCRED_ONESHOT) {
 		db_printf("%sUNP_WANTCRED_ONESHOT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_CONNWAIT) {
 		db_printf("%sUNP_CONNWAIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_CONNECTING) {
 		db_printf("%sUNP_CONNECTING", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_BINDING) {
 		db_printf("%sUNP_BINDING", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_xucred(int indent, struct xucred *xu)
 {
 	int comma, i;
 
 	db_print_indent(indent);
 	db_printf("cr_version: %u   cr_uid: %u   cr_pid: %d   cr_ngroups: %d\n",
 	    xu->cr_version, xu->cr_uid, xu->cr_pid, xu->cr_ngroups);
 	db_print_indent(indent);
 	db_printf("cr_groups: ");
 	comma = 0;
 	for (i = 0; i < xu->cr_ngroups; i++) {
 		db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]);
 		comma = 1;
 	}
 	db_printf("\n");
 }
 
 static void
 db_print_unprefs(int indent, struct unp_head *uh)
 {
 	struct unpcb *unp;
 	int counter;
 
 	counter = 0;
 	LIST_FOREACH(unp, uh, unp_reflink) {
 		if (counter % 4 == 0)
 			db_print_indent(indent);
 		db_printf("%p  ", unp);
 		if (counter % 4 == 3)
 			db_printf("\n");
 		counter++;
 	}
 	if (counter != 0 && counter % 4 != 0)
 		db_printf("\n");
 }
 
 DB_SHOW_COMMAND(unpcb, db_show_unpcb)
 {
 	struct unpcb *unp;
 
         if (!have_addr) {
                 db_printf("usage: show unpcb <addr>\n");
                 return;
         }
         unp = (struct unpcb *)addr;
 
 	db_printf("unp_socket: %p   unp_vnode: %p\n", unp->unp_socket,
 	    unp->unp_vnode);
 
 	db_printf("unp_ino: %ju   unp_conn: %p\n", (uintmax_t)unp->unp_ino,
 	    unp->unp_conn);
 
 	db_printf("unp_refs:\n");
 	db_print_unprefs(2, &unp->unp_refs);
 
 	/* XXXRW: Would be nice to print the full address, if any. */
 	db_printf("unp_addr: %p\n", unp->unp_addr);
 
 	db_printf("unp_gencnt: %llu\n",
 	    (unsigned long long)unp->unp_gencnt);
 
 	db_printf("unp_flags: %x (", unp->unp_flags);
 	db_print_unpflags(unp->unp_flags);
 	db_printf(")\n");
 
 	db_printf("unp_peercred:\n");
 	db_print_xucred(2, &unp->unp_peercred);
 
 	db_printf("unp_refcount: %u\n", unp->unp_refcount);
 }
 #endif
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c b/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c
index 59bacf8c965b..03894464a69a 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c
@@ -1,1681 +1,1681 @@
 /*
  * ng_btsocket_hci_raw.c
  */
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2001-2002 Maksim Yevmenkin <m_evmenkin@yahoo.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $Id: ng_btsocket_hci_raw.c,v 1.14 2003/09/14 23:29:06 max Exp $
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitstring.h>
 #include <sys/domain.h>
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/filedesc.h>
 #include <sys/ioccom.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <net/vnet.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/bluetooth/include/ng_bluetooth.h>
 #include <netgraph/bluetooth/include/ng_hci.h>
 #include <netgraph/bluetooth/include/ng_l2cap.h>
 #include <netgraph/bluetooth/include/ng_btsocket.h>
 #include <netgraph/bluetooth/include/ng_btsocket_hci_raw.h>
 
 /* MALLOC define */
 #ifdef NG_SEPARATE_MALLOC
 static MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_HCI_RAW, "netgraph_btsocks_hci_raw",
 	"Netgraph Bluetooth raw HCI sockets");
 #else
 #define M_NETGRAPH_BTSOCKET_HCI_RAW M_NETGRAPH
 #endif /* NG_SEPARATE_MALLOC */
 
 /* Netgraph node methods */
 static ng_constructor_t	ng_btsocket_hci_raw_node_constructor;
 static ng_rcvmsg_t	ng_btsocket_hci_raw_node_rcvmsg;
 static ng_shutdown_t	ng_btsocket_hci_raw_node_shutdown;
 static ng_newhook_t	ng_btsocket_hci_raw_node_newhook;
 static ng_connect_t	ng_btsocket_hci_raw_node_connect;
 static ng_rcvdata_t	ng_btsocket_hci_raw_node_rcvdata;
 static ng_disconnect_t	ng_btsocket_hci_raw_node_disconnect;
 
 static void 		ng_btsocket_hci_raw_input (void *, int);
 static void 		ng_btsocket_hci_raw_output(node_p, hook_p, void *, int);
 static void		ng_btsocket_hci_raw_savctl(ng_btsocket_hci_raw_pcb_p, 
 						   struct mbuf **,
 						   struct mbuf *); 
 static int		ng_btsocket_hci_raw_filter(ng_btsocket_hci_raw_pcb_p,
 						   struct mbuf *, int);
 
 #define ng_btsocket_hci_raw_wakeup_input_task() \
 	taskqueue_enqueue(taskqueue_swi, &ng_btsocket_hci_raw_task)
 
 /* Security filter */
 struct ng_btsocket_hci_raw_sec_filter {
 	bitstr_t	bit_decl(events, 0xff);
 	bitstr_t	bit_decl(commands[0x3f], 0x3ff);
 };
 
 /* Netgraph type descriptor */
 static struct ng_type typestruct = {
 	.version =	NG_ABI_VERSION,
 	.name =		NG_BTSOCKET_HCI_RAW_NODE_TYPE,
 	.constructor =	ng_btsocket_hci_raw_node_constructor,
 	.rcvmsg =	ng_btsocket_hci_raw_node_rcvmsg,
 	.shutdown =	ng_btsocket_hci_raw_node_shutdown,
 	.newhook =	ng_btsocket_hci_raw_node_newhook,
 	.connect =	ng_btsocket_hci_raw_node_connect,
 	.rcvdata =	ng_btsocket_hci_raw_node_rcvdata,
 	.disconnect =	ng_btsocket_hci_raw_node_disconnect,
 };
 
 /* Globals */
 static u_int32_t				ng_btsocket_hci_raw_debug_level;
 static u_int32_t				ng_btsocket_hci_raw_ioctl_timeout;
 static node_p					ng_btsocket_hci_raw_node;
 static struct ng_bt_itemq			ng_btsocket_hci_raw_queue;
 static struct mtx				ng_btsocket_hci_raw_queue_mtx;
 static struct task				ng_btsocket_hci_raw_task;
 static LIST_HEAD(, ng_btsocket_hci_raw_pcb)	ng_btsocket_hci_raw_sockets;
 static struct mtx				ng_btsocket_hci_raw_sockets_mtx;
 static u_int32_t				ng_btsocket_hci_raw_token;
 static struct mtx				ng_btsocket_hci_raw_token_mtx;
 static struct ng_btsocket_hci_raw_sec_filter	*ng_btsocket_hci_raw_sec_filter;
 static struct timeval				ng_btsocket_hci_raw_lasttime;
 static int					ng_btsocket_hci_raw_curpps;
 
 /* Sysctl tree */
 SYSCTL_DECL(_net_bluetooth_hci_sockets);
 static SYSCTL_NODE(_net_bluetooth_hci_sockets, OID_AUTO, raw,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Bluetooth raw HCI sockets family");
 SYSCTL_UINT(_net_bluetooth_hci_sockets_raw, OID_AUTO, debug_level, CTLFLAG_RW,
         &ng_btsocket_hci_raw_debug_level, NG_BTSOCKET_WARN_LEVEL,
 	"Bluetooth raw HCI sockets debug level");
 SYSCTL_UINT(_net_bluetooth_hci_sockets_raw, OID_AUTO, ioctl_timeout, CTLFLAG_RW,
         &ng_btsocket_hci_raw_ioctl_timeout, 5,
 	"Bluetooth raw HCI sockets ioctl timeout");
 SYSCTL_UINT(_net_bluetooth_hci_sockets_raw, OID_AUTO, queue_len, CTLFLAG_RD,
         &ng_btsocket_hci_raw_queue.len, 0,
         "Bluetooth raw HCI sockets input queue length");
 SYSCTL_UINT(_net_bluetooth_hci_sockets_raw, OID_AUTO, queue_maxlen, CTLFLAG_RD,
         &ng_btsocket_hci_raw_queue.maxlen, 0,
         "Bluetooth raw HCI sockets input queue max. length");
 SYSCTL_UINT(_net_bluetooth_hci_sockets_raw, OID_AUTO, queue_drops, CTLFLAG_RD,
         &ng_btsocket_hci_raw_queue.drops, 0,
         "Bluetooth raw HCI sockets input queue drops");
 
 /* Debug */
 #define NG_BTSOCKET_HCI_RAW_INFO \
 	if (ng_btsocket_hci_raw_debug_level >= NG_BTSOCKET_INFO_LEVEL && \
 	    ppsratecheck(&ng_btsocket_hci_raw_lasttime, &ng_btsocket_hci_raw_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_HCI_RAW_WARN \
 	if (ng_btsocket_hci_raw_debug_level >= NG_BTSOCKET_WARN_LEVEL && \
 	    ppsratecheck(&ng_btsocket_hci_raw_lasttime, &ng_btsocket_hci_raw_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_HCI_RAW_ERR \
 	if (ng_btsocket_hci_raw_debug_level >= NG_BTSOCKET_ERR_LEVEL && \
 	    ppsratecheck(&ng_btsocket_hci_raw_lasttime, &ng_btsocket_hci_raw_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_HCI_RAW_ALERT \
 	if (ng_btsocket_hci_raw_debug_level >= NG_BTSOCKET_ALERT_LEVEL && \
 	    ppsratecheck(&ng_btsocket_hci_raw_lasttime, &ng_btsocket_hci_raw_curpps, 1)) \
 		printf
 
 /****************************************************************************
  ****************************************************************************
  **                          Netgraph specific
  ****************************************************************************
  ****************************************************************************/
 
 /*
  * Netgraph node constructor. Do not allow to create node of this type.
  */
 
 static int
 ng_btsocket_hci_raw_node_constructor(node_p node)
 {
 	return (EINVAL);
 } /* ng_btsocket_hci_raw_node_constructor */
 
 /*
  * Netgraph node destructor. Just let old node go and create new fresh one.
  */
 
 static int
 ng_btsocket_hci_raw_node_shutdown(node_p node)
 {
 	int	error = 0;
 
 	NG_NODE_UNREF(node);
 
 	error = ng_make_node_common(&typestruct, &ng_btsocket_hci_raw_node);
 	if (error  != 0) {
 		NG_BTSOCKET_HCI_RAW_ALERT(
 "%s: Could not create Netgraph node, error=%d\n", __func__, error);
 
 		ng_btsocket_hci_raw_node = NULL;
 
 		return (ENOMEM);
         }
 
 	error = ng_name_node(ng_btsocket_hci_raw_node,
 				NG_BTSOCKET_HCI_RAW_NODE_TYPE);
 	if (error != 0) {
 		NG_BTSOCKET_HCI_RAW_ALERT(
 "%s: Could not name Netgraph node, error=%d\n", __func__, error);
 
 		NG_NODE_UNREF(ng_btsocket_hci_raw_node);
 		ng_btsocket_hci_raw_node = NULL;
 
 		return (EINVAL);
 	}
 
 	return (0);
 } /* ng_btsocket_hci_raw_node_shutdown */
 
 /*
  * Create new hook. Just say "yes"
  */
 
 static int
 ng_btsocket_hci_raw_node_newhook(node_p node, hook_p hook, char const *name)
 {
 	return (0);
 } /* ng_btsocket_hci_raw_node_newhook */
 
 /*
  * Connect hook. Just say "yes"
  */
 
 static int
 ng_btsocket_hci_raw_node_connect(hook_p hook)
 {
 	return (0);
 } /* ng_btsocket_hci_raw_node_connect */
 
 /*
  * Disconnect hook
  */
 
 static int
 ng_btsocket_hci_raw_node_disconnect(hook_p hook)
 {
 	return (0);
 } /* ng_btsocket_hci_raw_node_disconnect */
 
 /*
  * Receive control message.
  * Make sure it is a message from HCI node and it is a response.
  * Enqueue item and schedule input task.
  */
 
 static int
 ng_btsocket_hci_raw_node_rcvmsg(node_p node, item_p item, hook_p lasthook) 
 {
 	struct ng_mesg	*msg = NGI_MSG(item); /* item still has message */
 	int		 error = 0;
 
 	/*
 	 * Check for empty sockets list creates LOR when both sender and
 	 * receiver device are connected to the same host, so remove it
 	 * for now
 	 */
 
 	if (msg != NULL &&
 	    (msg->header.typecookie == NGM_HCI_COOKIE ||
 	     msg->header.typecookie == NGM_GENERIC_COOKIE) &&
 	    msg->header.flags & NGF_RESP) {
 		if (msg->header.token == 0) {
 			NG_FREE_ITEM(item);
 			return (0);
 		}
 
 		mtx_lock(&ng_btsocket_hci_raw_queue_mtx);
 		if (NG_BT_ITEMQ_FULL(&ng_btsocket_hci_raw_queue)) {
 			NG_BTSOCKET_HCI_RAW_ERR(
 "%s: Input queue is full\n", __func__);
 
 			NG_BT_ITEMQ_DROP(&ng_btsocket_hci_raw_queue);
 			NG_FREE_ITEM(item);
 			error = ENOBUFS;
 		} else {
 			NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_hci_raw_queue, item);
 			error = ng_btsocket_hci_raw_wakeup_input_task();
 		}
 		mtx_unlock(&ng_btsocket_hci_raw_queue_mtx);
 	} else {
 		NG_FREE_ITEM(item);
 		error = EINVAL;
 	}
 
 	return (error);
 } /* ng_btsocket_hci_raw_node_rcvmsg */
 
 /*
  * Receive packet from the one of our hook.
  * Prepend every packet with sockaddr_hci and record sender's node name.
  * Enqueue item and schedule input task.
  */
 
 static int
 ng_btsocket_hci_raw_node_rcvdata(hook_p hook, item_p item)
 {
 	struct mbuf	*nam = NULL;
 	int		 error;
 
 	/*
 	 * Check for empty sockets list creates LOR when both sender and
 	 * receiver device are connected to the same host, so remove it
 	 * for now
 	 */
 
 	MGET(nam, M_NOWAIT, MT_SONAME);
 	if (nam != NULL) {
 		struct sockaddr_hci	*sa = mtod(nam, struct sockaddr_hci *);
 
 		nam->m_len = sizeof(struct sockaddr_hci);
 
 		sa->hci_len = sizeof(*sa);
 		sa->hci_family = AF_BLUETOOTH;
 		strlcpy(sa->hci_node, NG_PEER_NODE_NAME(hook),
 			sizeof(sa->hci_node));
 
 		NGI_GET_M(item, nam->m_next);
 		NGI_M(item) = nam;
 
 		mtx_lock(&ng_btsocket_hci_raw_queue_mtx);
 		if (NG_BT_ITEMQ_FULL(&ng_btsocket_hci_raw_queue)) {
 			NG_BTSOCKET_HCI_RAW_ERR(
 "%s: Input queue is full\n", __func__);
 
 			NG_BT_ITEMQ_DROP(&ng_btsocket_hci_raw_queue);
 			NG_FREE_ITEM(item);
 			error = ENOBUFS;
 		} else {
 			NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_hci_raw_queue, item);
 			error = ng_btsocket_hci_raw_wakeup_input_task();
 		}
 		mtx_unlock(&ng_btsocket_hci_raw_queue_mtx);
 	} else {
 		NG_BTSOCKET_HCI_RAW_ERR(
 "%s: Failed to allocate address mbuf\n", __func__);
 
 		NG_FREE_ITEM(item);
 		error = ENOBUFS;
 	}
 
 	return (error);
 } /* ng_btsocket_hci_raw_node_rcvdata */
 
 /****************************************************************************
  ****************************************************************************
  **                              Sockets specific
  ****************************************************************************
  ****************************************************************************/
 
 /*
  * Get next token. We need token to avoid theoretical race where process
  * submits ioctl() message then interrupts ioctl() and re-submits another
  * ioctl() on the same socket *before* first ioctl() complete.
  */
 
 static void
 ng_btsocket_hci_raw_get_token(u_int32_t *token)
 {
 	mtx_lock(&ng_btsocket_hci_raw_token_mtx);
   
 	if (++ ng_btsocket_hci_raw_token == 0)
 		ng_btsocket_hci_raw_token = 1;
 
 	*token = ng_btsocket_hci_raw_token;
 
 	mtx_unlock(&ng_btsocket_hci_raw_token_mtx);
 } /* ng_btsocket_hci_raw_get_token */
 
 /*
  * Send Netgraph message to the node - do not expect reply
  */
 
 static int
 ng_btsocket_hci_raw_send_ngmsg(char *path, int cmd, void *arg, int arglen)
 {
 	struct ng_mesg	*msg = NULL;
 	int		 error = 0;
 
 	NG_MKMESSAGE(msg, NGM_HCI_COOKIE, cmd, arglen, M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	if (arg != NULL && arglen > 0)
 		bcopy(arg, msg->data, arglen);
 
 	NG_SEND_MSG_PATH(error, ng_btsocket_hci_raw_node, msg, path, 0);
 
 	return (error);
 } /* ng_btsocket_hci_raw_send_ngmsg */
 
 /*
  * Send Netgraph message to the node (no data) and wait for reply 
  */
 
 static int
 ng_btsocket_hci_raw_send_sync_ngmsg(ng_btsocket_hci_raw_pcb_p pcb, char *path,
 		int cmd, void *rsp, int rsplen)
 {
 	struct ng_mesg	*msg = NULL;
 	int		 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	NG_MKMESSAGE(msg, NGM_HCI_COOKIE, cmd, 0, M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	ng_btsocket_hci_raw_get_token(&msg->header.token);
 	pcb->token = msg->header.token;
 	pcb->msg = NULL;
 
 	NG_SEND_MSG_PATH(error, ng_btsocket_hci_raw_node, msg, path, 0);
 	if (error != 0) {
 		pcb->token = 0;
 		return (error);
 	}
 
 	error = msleep(&pcb->msg, &pcb->pcb_mtx, PZERO|PCATCH, "hcictl", 
 			ng_btsocket_hci_raw_ioctl_timeout * hz);
 	pcb->token = 0;
 
 	if (error != 0)
 		return (error);
 
 	if (pcb->msg != NULL && pcb->msg->header.cmd == cmd)
 		bcopy(pcb->msg->data, rsp, rsplen);
 	else
 		error = EINVAL;
 
 	NG_FREE_MSG(pcb->msg); /* checks for != NULL */
 
 	return (0);
 } /* ng_btsocket_hci_raw_send_sync_ngmsg */
 
 /*
  * Create control information for the packet
  */
 
 static void
 ng_btsocket_hci_raw_savctl(ng_btsocket_hci_raw_pcb_p pcb, struct mbuf **ctl,
 		struct mbuf *m) 
 {
 	int		dir;
 	struct timeval	tv;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_HCI_RAW_DIRECTION) {
 		dir = (m->m_flags & M_PROTO1)? 1 : 0;
-		*ctl = sbcreatecontrol((caddr_t) &dir, sizeof(dir),
-					SCM_HCI_RAW_DIRECTION, SOL_HCI_RAW);
+		*ctl = sbcreatecontrol(&dir, sizeof(dir),
+		    SCM_HCI_RAW_DIRECTION, SOL_HCI_RAW, M_NOWAIT);
 		if (*ctl != NULL)
 			ctl = &((*ctl)->m_next);
 	}
 
 	if (pcb->so->so_options & SO_TIMESTAMP) {
 		microtime(&tv);
-		*ctl = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
-					SCM_TIMESTAMP, SOL_SOCKET);
+		*ctl = sbcreatecontrol(&tv, sizeof(tv), SCM_TIMESTAMP,
+		    SOL_SOCKET, M_NOWAIT);
 		if (*ctl != NULL)
 			ctl = &((*ctl)->m_next);
 	}
 } /* ng_btsocket_hci_raw_savctl */
 
 /*
  * Raw HCI sockets data input routine
  */
 
 static void
 ng_btsocket_hci_raw_data_input(struct mbuf *nam)
 {
 	ng_btsocket_hci_raw_pcb_p	 pcb = NULL;
 	struct mbuf			*m0 = NULL, *m = NULL;
 	struct sockaddr_hci		*sa = NULL;
 
 	m0 = nam->m_next;
 	nam->m_next = NULL;
 
 	KASSERT((nam->m_type == MT_SONAME),
 		("%s: m_type=%d\n", __func__, nam->m_type));
 	KASSERT((m0->m_flags & M_PKTHDR),
 		("%s: m_flags=%#x\n", __func__, m0->m_flags));
 
 	sa = mtod(nam, struct sockaddr_hci *);
 
 	mtx_lock(&ng_btsocket_hci_raw_sockets_mtx);
 
 	LIST_FOREACH(pcb, &ng_btsocket_hci_raw_sockets, next) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		/*
 		 * If socket was bound then check address and
 		 *  make sure it matches.
 		 */
 
 		if (pcb->addr.hci_node[0] != 0 &&
 		    strcmp(sa->hci_node, pcb->addr.hci_node) != 0)
 			goto next;
 
 		/*
 		 * Check packet against filters
 		 * XXX do we have to call m_pullup() here?
 		 */
 
 		if (ng_btsocket_hci_raw_filter(pcb, m0, 1) != 0)
 			goto next;
 
 		/*
 		 * Make a copy of the packet, append to the socket's
 		 * receive queue and wakeup socket. sbappendaddr()
 		 * will check if socket has enough buffer space.
 		 */
 
 		m = m_dup(m0, M_NOWAIT);
 		if (m != NULL) {
 			struct mbuf	*ctl = NULL;
 
 			ng_btsocket_hci_raw_savctl(pcb, &ctl, m);
 
 			if (sbappendaddr(&pcb->so->so_rcv, 
 					(struct sockaddr *) sa, m, ctl))
 				sorwakeup(pcb->so);
 			else {
 				NG_BTSOCKET_HCI_RAW_INFO(
 "%s: sbappendaddr() failed\n", __func__);
 
 				NG_FREE_M(m);
 				NG_FREE_M(ctl);
 				soroverflow(pcb->so);
 			}
 		}
 next:
 		mtx_unlock(&pcb->pcb_mtx);
 	}
 
 	mtx_unlock(&ng_btsocket_hci_raw_sockets_mtx);
 
 	NG_FREE_M(nam);
 	NG_FREE_M(m0);
 } /* ng_btsocket_hci_raw_data_input */ 
 
 /*
  * Raw HCI sockets message input routine
  */
 
 static void
 ng_btsocket_hci_raw_msg_input(struct ng_mesg *msg)
 {
 	ng_btsocket_hci_raw_pcb_p	pcb = NULL;
 
 	mtx_lock(&ng_btsocket_hci_raw_sockets_mtx);
 
 	LIST_FOREACH(pcb, &ng_btsocket_hci_raw_sockets, next) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		if (msg->header.token == pcb->token) {
 			pcb->msg = msg;
 			wakeup(&pcb->msg);
 
 			mtx_unlock(&pcb->pcb_mtx);
 			mtx_unlock(&ng_btsocket_hci_raw_sockets_mtx);
 
 			return;
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 	}
 
 	mtx_unlock(&ng_btsocket_hci_raw_sockets_mtx);
 
 	NG_FREE_MSG(msg); /* checks for != NULL */
 } /* ng_btsocket_hci_raw_msg_input */
 
 /*
  * Raw HCI sockets input routines
  */
 
 static void
 ng_btsocket_hci_raw_input(void *context, int pending)
 {
 	item_p	item = NULL;
 
 	for (;;) {
 		mtx_lock(&ng_btsocket_hci_raw_queue_mtx);
 		NG_BT_ITEMQ_DEQUEUE(&ng_btsocket_hci_raw_queue, item);
 		mtx_unlock(&ng_btsocket_hci_raw_queue_mtx);
 
 		if (item == NULL)
 			break;
 
 		switch(item->el_flags & NGQF_TYPE) {
 		case NGQF_DATA: {
 			struct mbuf	*m = NULL;
 
 			NGI_GET_M(item, m);
 			ng_btsocket_hci_raw_data_input(m);
 			} break;
 
 		case NGQF_MESG: {
 			struct ng_mesg	*msg = NULL;
 
 			NGI_GET_MSG(item, msg);
 			ng_btsocket_hci_raw_msg_input(msg);
 			} break;
 
 		default:
 			KASSERT(0, 
 ("%s: invalid item type=%ld\n", __func__, (item->el_flags & NGQF_TYPE)));
 			break;
 		}
 
 		NG_FREE_ITEM(item);
 	}
 } /* ng_btsocket_hci_raw_input */
 
 /*
  * Raw HCI sockets output routine
  */
 
 static void
 ng_btsocket_hci_raw_output(node_p node, hook_p hook, void *arg1, int arg2)
 {
 	struct mbuf		*nam = (struct mbuf *) arg1, *m = NULL;
 	struct sockaddr_hci	*sa = NULL;
 	int			 error;
 
 	m = nam->m_next;
 	nam->m_next = NULL;
 
 	KASSERT((nam->m_type == MT_SONAME),
 		("%s: m_type=%d\n", __func__, nam->m_type));
 	KASSERT((m->m_flags & M_PKTHDR),
 		("%s: m_flags=%#x\n", __func__, m->m_flags));
 
 	sa = mtod(nam, struct sockaddr_hci *);
 
 	/*
 	 * Find downstream hook
 	 * XXX For now access node hook list directly. Should be safe because
 	 * we used ng_send_fn() and we should have exclusive lock on the node.
 	 */
 
 	LIST_FOREACH(hook, &node->nd_hooks, hk_hooks) {
 		if (hook == NULL || NG_HOOK_NOT_VALID(hook) || 
 		    NG_NODE_NOT_VALID(NG_PEER_NODE(hook)))
 			continue;
 
 		if (strcmp(sa->hci_node, NG_PEER_NODE_NAME(hook)) == 0) {
 			NG_SEND_DATA_ONLY(error, hook, m); /* sets m to NULL */
 			break;
 		}
 	}
 
 	NG_FREE_M(nam); /* check for != NULL */
 	NG_FREE_M(m);
 } /* ng_btsocket_hci_raw_output */
 
 /*
  * Check frame against security and socket filters. 
  * d (direction bit) == 1 means incoming frame.
  */
 
 static int
 ng_btsocket_hci_raw_filter(ng_btsocket_hci_raw_pcb_p pcb, struct mbuf *m, int d)
 {
 	int	type, event, opcode;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	switch ((type = *mtod(m, u_int8_t *))) {
 	case NG_HCI_CMD_PKT:
 		if (!(pcb->flags & NG_BTSOCKET_HCI_RAW_PRIVILEGED)) {
 			opcode = le16toh(mtod(m, ng_hci_cmd_pkt_t *)->opcode);
 		
 			if (!bit_test(
 ng_btsocket_hci_raw_sec_filter->commands[NG_HCI_OGF(opcode) - 1],
 NG_HCI_OCF(opcode) - 1))
 				return (EPERM);
 		}
 
 		if (d && !bit_test(pcb->filter.packet_mask, NG_HCI_CMD_PKT - 1))
 			return (EPERM);
 		break;
 
 	case NG_HCI_ACL_DATA_PKT:
 	case NG_HCI_SCO_DATA_PKT:
 		if (!(pcb->flags & NG_BTSOCKET_HCI_RAW_PRIVILEGED) ||
 		    !bit_test(pcb->filter.packet_mask, type - 1) ||
 		    !d)
 			return (EPERM);
 		break;
 
 	case NG_HCI_EVENT_PKT:
 		if (!d)
 			return (EINVAL);
 
 		event = mtod(m, ng_hci_event_pkt_t *)->event - 1;
 
 		if (!(pcb->flags & NG_BTSOCKET_HCI_RAW_PRIVILEGED))
 			if (!bit_test(ng_btsocket_hci_raw_sec_filter->events, event))
 				return (EPERM);
 
 		if (!bit_test(pcb->filter.event_mask, event))
 			return (EPERM);
 		break;
 
 	default:
 		return (EINVAL);
 	}
 
 	return (0);
 } /* ng_btsocket_hci_raw_filter */
 
 /*
  * Initialize everything
  */
 
 static void
 ng_btsocket_hci_raw_init(void *arg __unused)
 {
 	bitstr_t	*f = NULL;
 	int		 error = 0;
 
 	ng_btsocket_hci_raw_node = NULL;
 	ng_btsocket_hci_raw_debug_level = NG_BTSOCKET_WARN_LEVEL;
 	ng_btsocket_hci_raw_ioctl_timeout = 5;
 
 	/* Register Netgraph node type */
 	error = ng_newtype(&typestruct);
 	if (error != 0) {
 		NG_BTSOCKET_HCI_RAW_ALERT(
 "%s: Could not register Netgraph node type, error=%d\n", __func__, error);
 
 		return;
 	}
 
 	/* Create Netgrapg node */
 	error = ng_make_node_common(&typestruct, &ng_btsocket_hci_raw_node);
 	if (error != 0) {
 		NG_BTSOCKET_HCI_RAW_ALERT(
 "%s: Could not create Netgraph node, error=%d\n", __func__, error);
 
 		ng_btsocket_hci_raw_node = NULL;
 
 		return;
         }
 
 	error = ng_name_node(ng_btsocket_hci_raw_node,
 				NG_BTSOCKET_HCI_RAW_NODE_TYPE);
 	if (error != 0) {
 		NG_BTSOCKET_HCI_RAW_ALERT(
 "%s: Could not name Netgraph node, error=%d\n", __func__, error);
 
 		NG_NODE_UNREF(ng_btsocket_hci_raw_node);
 		ng_btsocket_hci_raw_node = NULL;
 
 		return;
 	}
 
 	/* Create input queue */
 	NG_BT_ITEMQ_INIT(&ng_btsocket_hci_raw_queue, 300);
 	mtx_init(&ng_btsocket_hci_raw_queue_mtx,
 		"btsocks_hci_raw_queue_mtx", NULL, MTX_DEF);
 	TASK_INIT(&ng_btsocket_hci_raw_task, 0,
 		ng_btsocket_hci_raw_input, NULL);
 
 	/* Create list of sockets */
 	LIST_INIT(&ng_btsocket_hci_raw_sockets);
 	mtx_init(&ng_btsocket_hci_raw_sockets_mtx,
 		"btsocks_hci_raw_sockets_mtx", NULL, MTX_DEF);
 
 	/* Tokens */
 	ng_btsocket_hci_raw_token = 0;
 	mtx_init(&ng_btsocket_hci_raw_token_mtx,
 		"btsocks_hci_raw_token_mtx", NULL, MTX_DEF);
 
 	/* 
 	 * Security filter
 	 * XXX never free()ed
 	 */
 	ng_btsocket_hci_raw_sec_filter =
 	    malloc(sizeof(struct ng_btsocket_hci_raw_sec_filter), 
 		M_NETGRAPH_BTSOCKET_HCI_RAW, M_NOWAIT|M_ZERO);
 	if (ng_btsocket_hci_raw_sec_filter == NULL) {
 		printf("%s: Could not allocate security filter!\n", __func__);
 		return;
 	}
 
 	/*
 	 * XXX How paranoid can we get? 
 	 *
 	 * Initialize security filter. If bit is set in the mask then
 	 * unprivileged socket is allowed to send (receive) this command
 	 * (event).
 	 */
 
 	/* Enable all events */
 	memset(&ng_btsocket_hci_raw_sec_filter->events, 0xff,
 		sizeof(ng_btsocket_hci_raw_sec_filter->events)/
 			sizeof(ng_btsocket_hci_raw_sec_filter->events[0]));
 
 	/* Disable some critical events */
 	f = ng_btsocket_hci_raw_sec_filter->events;
 	bit_clear(f, NG_HCI_EVENT_RETURN_LINK_KEYS - 1);
 	bit_clear(f, NG_HCI_EVENT_LINK_KEY_NOTIFICATION - 1);
 	bit_clear(f, NG_HCI_EVENT_VENDOR - 1);
 
 	/* Commands - Link control */
 	f = ng_btsocket_hci_raw_sec_filter->commands[NG_HCI_OGF_LINK_CONTROL-1];
 	bit_set(f, NG_HCI_OCF_INQUIRY - 1);
 	bit_set(f, NG_HCI_OCF_INQUIRY_CANCEL - 1);
 	bit_set(f, NG_HCI_OCF_PERIODIC_INQUIRY - 1);
 	bit_set(f, NG_HCI_OCF_EXIT_PERIODIC_INQUIRY - 1);
 	bit_set(f, NG_HCI_OCF_REMOTE_NAME_REQ - 1);
 	bit_set(f, NG_HCI_OCF_READ_REMOTE_FEATURES - 1);
 	bit_set(f, NG_HCI_OCF_READ_REMOTE_VER_INFO - 1);
 	bit_set(f, NG_HCI_OCF_READ_CLOCK_OFFSET - 1);
 
 	/* Commands - Link policy */
 	f = ng_btsocket_hci_raw_sec_filter->commands[NG_HCI_OGF_LINK_POLICY-1];
 	bit_set(f, NG_HCI_OCF_ROLE_DISCOVERY - 1);
 	bit_set(f, NG_HCI_OCF_READ_LINK_POLICY_SETTINGS - 1);
 
 	/* Commands - Host controller and baseband */
 	f = ng_btsocket_hci_raw_sec_filter->commands[NG_HCI_OGF_HC_BASEBAND-1];
 	bit_set(f, NG_HCI_OCF_READ_PIN_TYPE - 1);
 	bit_set(f, NG_HCI_OCF_READ_LOCAL_NAME - 1);
 	bit_set(f, NG_HCI_OCF_READ_CON_ACCEPT_TIMO - 1);
 	bit_set(f, NG_HCI_OCF_READ_PAGE_TIMO - 1);
 	bit_set(f, NG_HCI_OCF_READ_SCAN_ENABLE - 1);
 	bit_set(f, NG_HCI_OCF_READ_PAGE_SCAN_ACTIVITY - 1);
 	bit_set(f, NG_HCI_OCF_READ_INQUIRY_SCAN_ACTIVITY - 1);
 	bit_set(f, NG_HCI_OCF_READ_AUTH_ENABLE - 1);
 	bit_set(f, NG_HCI_OCF_READ_ENCRYPTION_MODE - 1);
 	bit_set(f, NG_HCI_OCF_READ_UNIT_CLASS - 1);
 	bit_set(f, NG_HCI_OCF_READ_VOICE_SETTINGS - 1);
 	bit_set(f, NG_HCI_OCF_READ_AUTO_FLUSH_TIMO - 1);
 	bit_set(f, NG_HCI_OCF_READ_NUM_BROADCAST_RETRANS - 1);
 	bit_set(f, NG_HCI_OCF_READ_HOLD_MODE_ACTIVITY - 1);
 	bit_set(f, NG_HCI_OCF_READ_XMIT_LEVEL - 1);
 	bit_set(f, NG_HCI_OCF_READ_SCO_FLOW_CONTROL - 1);
 	bit_set(f, NG_HCI_OCF_READ_LINK_SUPERVISION_TIMO - 1);
 	bit_set(f, NG_HCI_OCF_READ_SUPPORTED_IAC_NUM - 1);
 	bit_set(f, NG_HCI_OCF_READ_IAC_LAP - 1);
 	bit_set(f, NG_HCI_OCF_READ_PAGE_SCAN_PERIOD - 1);
 	bit_set(f, NG_HCI_OCF_READ_PAGE_SCAN - 1);
 	bit_set(f, NG_HCI_OCF_READ_LE_HOST_SUPPORTED -1);
 
 	/* Commands - Informational */
 	f = ng_btsocket_hci_raw_sec_filter->commands[NG_HCI_OGF_INFO - 1];
 	bit_set(f, NG_HCI_OCF_READ_LOCAL_VER - 1);
 	bit_set(f, NG_HCI_OCF_READ_LOCAL_FEATURES - 1);
 	bit_set(f, NG_HCI_OCF_READ_BUFFER_SIZE - 1);
 	bit_set(f, NG_HCI_OCF_READ_COUNTRY_CODE - 1);
 	bit_set(f, NG_HCI_OCF_READ_BDADDR - 1);
 
 	/* Commands - Status */
 	f = ng_btsocket_hci_raw_sec_filter->commands[NG_HCI_OGF_STATUS - 1];
 	bit_set(f, NG_HCI_OCF_READ_FAILED_CONTACT_CNTR - 1);
 	bit_set(f, NG_HCI_OCF_GET_LINK_QUALITY - 1);
 	bit_set(f, NG_HCI_OCF_READ_RSSI - 1);
 
 	/* Commands - Testing */
 	f = ng_btsocket_hci_raw_sec_filter->commands[NG_HCI_OGF_TESTING - 1];
 	bit_set(f, NG_HCI_OCF_READ_LOOPBACK_MODE - 1);
 	/*Commands - LE*/
 	f = ng_btsocket_hci_raw_sec_filter->commands[NG_HCI_OGF_LE -1];
 	bit_set(f, NG_HCI_OCF_LE_SET_SCAN_ENABLE - 1);
 	bit_set(f, NG_HCI_OCF_LE_SET_SCAN_PARAMETERS - 1);
 	bit_set(f, NG_HCI_OCF_LE_READ_LOCAL_SUPPORTED_FEATURES - 1);
 	bit_set(f, NG_HCI_OCF_LE_READ_BUFFER_SIZE - 1);
 	bit_set(f, NG_HCI_OCF_LE_READ_WHITE_LIST_SIZE - 1);
 
 } /* ng_btsocket_hci_raw_init */
 SYSINIT(ng_btsocket_hci_raw_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
     ng_btsocket_hci_raw_init, NULL);
 
 /*
  * Abort connection on socket
  */
 
 void
 ng_btsocket_hci_raw_abort(struct socket *so)
 {
 } /* ng_btsocket_hci_raw_abort */
 
 void
 ng_btsocket_hci_raw_close(struct socket *so)
 {
 } /* ng_btsocket_hci_raw_close */
 
 /*
  * Create new raw HCI socket
  */
 
 int
 ng_btsocket_hci_raw_attach(struct socket *so, int proto, struct thread *td)
 {
 	ng_btsocket_hci_raw_pcb_p	pcb = so2hci_raw_pcb(so);
 	int				error = 0;
 
 	if (pcb != NULL)
 		return (EISCONN);
 
 	if (ng_btsocket_hci_raw_node == NULL)
 		return (EPROTONOSUPPORT);
 	if (proto != BLUETOOTH_PROTO_HCI)
 		return (EPROTONOSUPPORT);
 	if (so->so_type != SOCK_RAW)
 		return (ESOCKTNOSUPPORT);
 
 	error = soreserve(so, NG_BTSOCKET_HCI_RAW_SENDSPACE,
 				NG_BTSOCKET_HCI_RAW_RECVSPACE);
 	if (error != 0)
 		return (error);
 
 	pcb = malloc(sizeof(*pcb), 
 		M_NETGRAPH_BTSOCKET_HCI_RAW, M_NOWAIT|M_ZERO);
 	if (pcb == NULL)
 		return (ENOMEM);
 
 	so->so_pcb = (caddr_t) pcb;
 	pcb->so = so;
 
 	if (priv_check(td, PRIV_NETBLUETOOTH_RAW) == 0)
 		pcb->flags |= NG_BTSOCKET_HCI_RAW_PRIVILEGED;
 
 	/*
 	 * Set default socket filter. By default socket only accepts HCI
 	 * Command_Complete and Command_Status event packets.
 	 */
 
 	bit_set(pcb->filter.event_mask, NG_HCI_EVENT_COMMAND_COMPL - 1);
 	bit_set(pcb->filter.event_mask, NG_HCI_EVENT_COMMAND_STATUS - 1);
 
 	mtx_init(&pcb->pcb_mtx, "btsocks_hci_raw_pcb_mtx", NULL, MTX_DEF);
 
 	mtx_lock(&ng_btsocket_hci_raw_sockets_mtx);
 	LIST_INSERT_HEAD(&ng_btsocket_hci_raw_sockets, pcb, next);
 	mtx_unlock(&ng_btsocket_hci_raw_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_hci_raw_attach */
 
 /*
  * Bind raw HCI socket
  */
 
 int
 ng_btsocket_hci_raw_bind(struct socket *so, struct sockaddr *nam,
 		struct thread *td)
 {
 	ng_btsocket_hci_raw_pcb_p	 pcb = so2hci_raw_pcb(so);
 	struct sockaddr_hci		*sa = (struct sockaddr_hci *) nam;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_hci_raw_node == NULL)
 		return (EINVAL);
 
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->hci_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->hci_len != sizeof(*sa))
 		return (EINVAL);
 	if (sa->hci_node[0] == 0)
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 	bcopy(sa, &pcb->addr, sizeof(pcb->addr));
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (0);
 } /* ng_btsocket_hci_raw_bind */
 
 /*
  * Connect raw HCI socket
  */
 
 int
 ng_btsocket_hci_raw_connect(struct socket *so, struct sockaddr *nam,
 		struct thread *td)
 {
 	ng_btsocket_hci_raw_pcb_p	 pcb = so2hci_raw_pcb(so);
 	struct sockaddr_hci		*sa = (struct sockaddr_hci *) nam;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_hci_raw_node == NULL)
 		return (EINVAL);
 
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->hci_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->hci_len != sizeof(*sa))
 		return (EINVAL);
 	if (sa->hci_node[0] == 0)
 		return (EDESTADDRREQ);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (bcmp(sa, &pcb->addr, sizeof(pcb->addr)) != 0) {
 		mtx_unlock(&pcb->pcb_mtx);
 		return (EADDRNOTAVAIL);
 	}
 
 	soisconnected(so);
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (0);
 } /* ng_btsocket_hci_raw_connect */
 
 /*
  * Process ioctl on socket
  */
 
 int
 ng_btsocket_hci_raw_control(struct socket *so, u_long cmd, caddr_t data,
 		struct ifnet *ifp, struct thread *td)
 {
 	ng_btsocket_hci_raw_pcb_p	 pcb = so2hci_raw_pcb(so);
 	char				 path[NG_NODESIZ + 1];
 	struct ng_mesg			*msg = NULL;
 	int				 error = 0;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_hci_raw_node == NULL)
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* Check if we have device name */
 	if (pcb->addr.hci_node[0] == 0) {
 		mtx_unlock(&pcb->pcb_mtx);
 		return (EHOSTUNREACH);
 	}
 
 	/* Check if we have pending ioctl() */
 	if (pcb->token != 0) {
 		mtx_unlock(&pcb->pcb_mtx);
 		return (EBUSY);
 	}
 
 	snprintf(path, sizeof(path), "%s:", pcb->addr.hci_node);
 
 	switch (cmd) {
 	case SIOC_HCI_RAW_NODE_GET_STATE: {
 		struct ng_btsocket_hci_raw_node_state	*p =
 			(struct ng_btsocket_hci_raw_node_state *) data;
 
 		error = ng_btsocket_hci_raw_send_sync_ngmsg(pcb, path, 
 				NGM_HCI_NODE_GET_STATE,
 				&p->state, sizeof(p->state));
 		} break;
 
 	case SIOC_HCI_RAW_NODE_INIT:
 		if (pcb->flags & NG_BTSOCKET_HCI_RAW_PRIVILEGED)
 			error = ng_btsocket_hci_raw_send_ngmsg(path,
 					NGM_HCI_NODE_INIT, NULL, 0);
 		else
 			error = EPERM;
 		break;
 
 	case SIOC_HCI_RAW_NODE_GET_DEBUG: {
 		struct ng_btsocket_hci_raw_node_debug	*p = 
 			(struct ng_btsocket_hci_raw_node_debug *) data;
 
 		error = ng_btsocket_hci_raw_send_sync_ngmsg(pcb, path,
 				NGM_HCI_NODE_GET_DEBUG,
 				&p->debug, sizeof(p->debug));
 		} break;
 
 	case SIOC_HCI_RAW_NODE_SET_DEBUG: {
 		struct ng_btsocket_hci_raw_node_debug	*p = 
 			(struct ng_btsocket_hci_raw_node_debug *) data;
 
 		if (pcb->flags & NG_BTSOCKET_HCI_RAW_PRIVILEGED)
 			error = ng_btsocket_hci_raw_send_ngmsg(path,
 					NGM_HCI_NODE_SET_DEBUG, &p->debug,
 					sizeof(p->debug));
 		else
 			error = EPERM;
 		} break;
 
 	case SIOC_HCI_RAW_NODE_GET_BUFFER: {
 		struct ng_btsocket_hci_raw_node_buffer	*p = 
 			(struct ng_btsocket_hci_raw_node_buffer *) data;
 
 		error = ng_btsocket_hci_raw_send_sync_ngmsg(pcb, path,
 				NGM_HCI_NODE_GET_BUFFER,
 				&p->buffer, sizeof(p->buffer));
 		} break;
 
 	case SIOC_HCI_RAW_NODE_GET_BDADDR: {
 		struct ng_btsocket_hci_raw_node_bdaddr	*p = 
 			(struct ng_btsocket_hci_raw_node_bdaddr *) data;
 
 		error = ng_btsocket_hci_raw_send_sync_ngmsg(pcb, path,
 				NGM_HCI_NODE_GET_BDADDR,
 				&p->bdaddr, sizeof(p->bdaddr));
 		} break;
 
 	case SIOC_HCI_RAW_NODE_GET_FEATURES: {
 		struct ng_btsocket_hci_raw_node_features	*p = 
 			(struct ng_btsocket_hci_raw_node_features *) data;
 
 		error = ng_btsocket_hci_raw_send_sync_ngmsg(pcb, path,
 				NGM_HCI_NODE_GET_FEATURES,
 				&p->features, sizeof(p->features));
 		} break;
 
 	case SIOC_HCI_RAW_NODE_GET_STAT: {
 		struct ng_btsocket_hci_raw_node_stat	*p = 
 			(struct ng_btsocket_hci_raw_node_stat *) data;
 
 		error = ng_btsocket_hci_raw_send_sync_ngmsg(pcb, path,
 				NGM_HCI_NODE_GET_STAT,
 				&p->stat, sizeof(p->stat));
 		} break;
 
 	case SIOC_HCI_RAW_NODE_RESET_STAT:
 		if (pcb->flags & NG_BTSOCKET_HCI_RAW_PRIVILEGED)
 			error = ng_btsocket_hci_raw_send_ngmsg(path,
 					NGM_HCI_NODE_RESET_STAT, NULL, 0);
 		else
 			error = EPERM;
 		break;
 
 	case SIOC_HCI_RAW_NODE_FLUSH_NEIGHBOR_CACHE:
 		if (pcb->flags & NG_BTSOCKET_HCI_RAW_PRIVILEGED)
 			error = ng_btsocket_hci_raw_send_ngmsg(path,
 					NGM_HCI_NODE_FLUSH_NEIGHBOR_CACHE,
 					NULL, 0);
 		else
 			error = EPERM;
 		break;
 
 	case SIOC_HCI_RAW_NODE_GET_NEIGHBOR_CACHE:  {
 		struct ng_btsocket_hci_raw_node_neighbor_cache	*p = 
 			(struct ng_btsocket_hci_raw_node_neighbor_cache *) data;
 		ng_hci_node_get_neighbor_cache_ep		*p1 = NULL;
 		ng_hci_node_neighbor_cache_entry_ep		*p2 = NULL;
 
 		if (p->num_entries <= 0 || 
 		    p->num_entries > NG_HCI_MAX_NEIGHBOR_NUM ||
 		    p->entries == NULL) {
 			mtx_unlock(&pcb->pcb_mtx);
 			return (EINVAL);
 		}
 
 		NG_MKMESSAGE(msg, NGM_HCI_COOKIE,
 			NGM_HCI_NODE_GET_NEIGHBOR_CACHE, 0, M_NOWAIT);
 		if (msg == NULL) {
 			mtx_unlock(&pcb->pcb_mtx);
 			return (ENOMEM);
 		}
 		ng_btsocket_hci_raw_get_token(&msg->header.token);
 		pcb->token = msg->header.token;
 		pcb->msg = NULL;
 
 		NG_SEND_MSG_PATH(error, ng_btsocket_hci_raw_node, msg, path, 0);
 		if (error != 0) {
 			pcb->token = 0;
 			mtx_unlock(&pcb->pcb_mtx);
 			return (error);
 		}
 
 		error = msleep(&pcb->msg, &pcb->pcb_mtx,
 				PZERO|PCATCH, "hcictl", 
 				ng_btsocket_hci_raw_ioctl_timeout * hz);
 		pcb->token = 0;
 
 		if (error != 0) {
 			mtx_unlock(&pcb->pcb_mtx);
 			return (error);
 		}
 
 		msg = pcb->msg;
 		pcb->msg = NULL;
 
 		mtx_unlock(&pcb->pcb_mtx);
 		
 		if (msg != NULL &&
 		    msg->header.cmd == NGM_HCI_NODE_GET_NEIGHBOR_CACHE) {
 			/* Return data back to user space */
 			p1 = (ng_hci_node_get_neighbor_cache_ep *)(msg->data);
 			p2 = (ng_hci_node_neighbor_cache_entry_ep *)(p1 + 1);
 
 			p->num_entries = min(p->num_entries, p1->num_entries);
 			if (p->num_entries > 0)
 				error = copyout((caddr_t) p2, 
 						(caddr_t) p->entries,
 						p->num_entries * sizeof(*p2));
 		} else
 			error = EINVAL;
 
 		NG_FREE_MSG(msg); /* checks for != NULL */
 		return (error);
 		} /* NOTREACHED */
 
 	case SIOC_HCI_RAW_NODE_GET_CON_LIST: {
 		struct ng_btsocket_hci_raw_con_list	*p = 
 			(struct ng_btsocket_hci_raw_con_list *) data;
 		ng_hci_node_con_list_ep			*p1 = NULL;
 		ng_hci_node_con_ep			*p2 = NULL;
 
 		if (p->num_connections == 0 ||
 		    p->num_connections > NG_HCI_MAX_CON_NUM ||
 		    p->connections == NULL) {
 			mtx_unlock(&pcb->pcb_mtx);
 			return (EINVAL);
 		}
 
 		NG_MKMESSAGE(msg, NGM_HCI_COOKIE, NGM_HCI_NODE_GET_CON_LIST,
 			0, M_NOWAIT);
 		if (msg == NULL) {
 			mtx_unlock(&pcb->pcb_mtx);
 			return (ENOMEM);
 		}
 		ng_btsocket_hci_raw_get_token(&msg->header.token);
 		pcb->token = msg->header.token;
 		pcb->msg = NULL;
 
 		NG_SEND_MSG_PATH(error, ng_btsocket_hci_raw_node, msg, path, 0);
 		if (error != 0) {
 			pcb->token = 0;
 			mtx_unlock(&pcb->pcb_mtx);
 			return (error);
 		}
 
 		error = msleep(&pcb->msg, &pcb->pcb_mtx,
 				PZERO|PCATCH, "hcictl", 
 				ng_btsocket_hci_raw_ioctl_timeout * hz);
 		pcb->token = 0;
 
 		if (error != 0) {
 			mtx_unlock(&pcb->pcb_mtx);
 			return (error);
 		}
 
 		msg = pcb->msg;
 		pcb->msg = NULL;
 
 		mtx_unlock(&pcb->pcb_mtx);
 
 		if (msg != NULL &&
 		    msg->header.cmd == NGM_HCI_NODE_GET_CON_LIST) {
 			/* Return data back to user space */
 			p1 = (ng_hci_node_con_list_ep *)(msg->data);
 			p2 = (ng_hci_node_con_ep *)(p1 + 1);
 
 			p->num_connections = min(p->num_connections,
 						p1->num_connections);
 			if (p->num_connections > 0)
 				error = copyout((caddr_t) p2, 
 					(caddr_t) p->connections,
 					p->num_connections * sizeof(*p2));
 		} else
 			error = EINVAL;
 
 		NG_FREE_MSG(msg); /* checks for != NULL */
 		return (error);
 		} /* NOTREACHED */
 
 	case SIOC_HCI_RAW_NODE_GET_LINK_POLICY_MASK: {
 		struct ng_btsocket_hci_raw_node_link_policy_mask	*p = 
 			(struct ng_btsocket_hci_raw_node_link_policy_mask *) 
 				data;
 
 		error = ng_btsocket_hci_raw_send_sync_ngmsg(pcb, path,
 				NGM_HCI_NODE_GET_LINK_POLICY_SETTINGS_MASK,
 				&p->policy_mask, sizeof(p->policy_mask));
 		} break;
 
 	case SIOC_HCI_RAW_NODE_SET_LINK_POLICY_MASK: {
 		struct ng_btsocket_hci_raw_node_link_policy_mask	*p = 
 			(struct ng_btsocket_hci_raw_node_link_policy_mask *) 
 				data;
 
 		if (pcb->flags & NG_BTSOCKET_HCI_RAW_PRIVILEGED)
 			error = ng_btsocket_hci_raw_send_ngmsg(path,
 					NGM_HCI_NODE_SET_LINK_POLICY_SETTINGS_MASK,
 					&p->policy_mask,
 					sizeof(p->policy_mask));
 		else
 			error = EPERM;
 		} break;
 
 	case SIOC_HCI_RAW_NODE_GET_PACKET_MASK: {
 		struct ng_btsocket_hci_raw_node_packet_mask	*p = 
 			(struct ng_btsocket_hci_raw_node_packet_mask *) data;
 
 		error = ng_btsocket_hci_raw_send_sync_ngmsg(pcb, path,
 				NGM_HCI_NODE_GET_PACKET_MASK,
 				&p->packet_mask, sizeof(p->packet_mask));
 		} break;
 
 	case SIOC_HCI_RAW_NODE_SET_PACKET_MASK: {
 		struct ng_btsocket_hci_raw_node_packet_mask	*p = 
 			(struct ng_btsocket_hci_raw_node_packet_mask *) data;
 
 		if (pcb->flags & NG_BTSOCKET_HCI_RAW_PRIVILEGED)
 			error = ng_btsocket_hci_raw_send_ngmsg(path,
 					NGM_HCI_NODE_SET_PACKET_MASK,
 					&p->packet_mask,
 					sizeof(p->packet_mask));
 		else
 			error = EPERM;
 		} break;
 
 	case SIOC_HCI_RAW_NODE_GET_ROLE_SWITCH: {
 		struct ng_btsocket_hci_raw_node_role_switch	*p = 
 			(struct ng_btsocket_hci_raw_node_role_switch *) data;
 
 		error = ng_btsocket_hci_raw_send_sync_ngmsg(pcb, path,
 				NGM_HCI_NODE_GET_ROLE_SWITCH,
 				&p->role_switch, sizeof(p->role_switch));
 		} break;
 
 	case SIOC_HCI_RAW_NODE_SET_ROLE_SWITCH: {
 		struct ng_btsocket_hci_raw_node_role_switch	*p = 
 			(struct ng_btsocket_hci_raw_node_role_switch *) data;
 
 		if (pcb->flags & NG_BTSOCKET_HCI_RAW_PRIVILEGED)
 			error = ng_btsocket_hci_raw_send_ngmsg(path,
 					NGM_HCI_NODE_SET_ROLE_SWITCH,
 					&p->role_switch,
 					sizeof(p->role_switch));
 		else
 			error = EPERM;
 		} break;
 
 	case SIOC_HCI_RAW_NODE_LIST_NAMES: {
 		struct ng_btsocket_hci_raw_node_list_names	*nl =
 			(struct ng_btsocket_hci_raw_node_list_names *) data;
 		struct nodeinfo					*ni = nl->names;
 
 		if (nl->num_names == 0) {
 			mtx_unlock(&pcb->pcb_mtx);
 			return (EINVAL);
 		}
 
 		NG_MKMESSAGE(msg, NGM_GENERIC_COOKIE, NGM_LISTNAMES,
 			0, M_NOWAIT);
 		if (msg == NULL) {
 			mtx_unlock(&pcb->pcb_mtx);
 			return (ENOMEM);
 		}
 		ng_btsocket_hci_raw_get_token(&msg->header.token);
 		pcb->token = msg->header.token;
 		pcb->msg = NULL;
 
 		NG_SEND_MSG_PATH(error, ng_btsocket_hci_raw_node, msg, ".:", 0);
 		if (error != 0) {
 			pcb->token = 0;
 			mtx_unlock(&pcb->pcb_mtx);
 			return (error);
 		}
 
 		error = msleep(&pcb->msg, &pcb->pcb_mtx,
 				PZERO|PCATCH, "hcictl",
 				ng_btsocket_hci_raw_ioctl_timeout * hz);
 		pcb->token = 0;
 
 		if (error != 0) {
 			mtx_unlock(&pcb->pcb_mtx);
 			return (error);
 		}
 
 		msg = pcb->msg;
 		pcb->msg = NULL;
 
 		mtx_unlock(&pcb->pcb_mtx);
 
 		if (msg != NULL && msg->header.cmd == NGM_LISTNAMES) {
 			/* Return data back to user space */
 			struct namelist	*nl1 = (struct namelist *) msg->data;
 			struct nodeinfo	*ni1 = &nl1->nodeinfo[0];
 
 			while (nl->num_names > 0 && nl1->numnames > 0) {
 				if (strcmp(ni1->type, NG_HCI_NODE_TYPE) == 0) {
 					error = copyout((caddr_t) ni1,
 							(caddr_t) ni,
 							sizeof(*ni));
 					if (error != 0)
 						break;
 
 					nl->num_names --;
 					ni ++;
 				}
 
 				nl1->numnames --;
 				ni1 ++;
 			}
 
 			nl->num_names = ni - nl->names;
 		} else
 			error = EINVAL;
 
 		NG_FREE_MSG(msg); /* checks for != NULL */
 		return (error);
 		} /* NOTREACHED */
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (error);
 } /* ng_btsocket_hci_raw_control */
 
 /*
  * Process getsockopt/setsockopt system calls
  */
 
 int
 ng_btsocket_hci_raw_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	ng_btsocket_hci_raw_pcb_p		pcb = so2hci_raw_pcb(so);
 	struct ng_btsocket_hci_raw_filter	filter;
 	int					error = 0, dir;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_hci_raw_node == NULL)
 		return (EINVAL);
 
 	if (sopt->sopt_level != SOL_HCI_RAW)
 		return (0);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case SO_HCI_RAW_FILTER:
 			error = sooptcopyout(sopt, &pcb->filter,
 						sizeof(pcb->filter));
 			break;
 
 		case SO_HCI_RAW_DIRECTION:
 			dir = (pcb->flags & NG_BTSOCKET_HCI_RAW_DIRECTION)?1:0;
 			error = sooptcopyout(sopt, &dir, sizeof(dir));
 			break;
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case SO_HCI_RAW_FILTER:
 			error = sooptcopyin(sopt, &filter, sizeof(filter),
 						sizeof(filter));
 			if (error == 0)
 				bcopy(&filter, &pcb->filter,
 						sizeof(pcb->filter));
 			break;
 
 		case SO_HCI_RAW_DIRECTION:
 			error = sooptcopyin(sopt, &dir, sizeof(dir),
 						sizeof(dir));
 			if (error != 0)
 				break;
 
 			if (dir)
 				pcb->flags |= NG_BTSOCKET_HCI_RAW_DIRECTION;
 			else
 				pcb->flags &= ~NG_BTSOCKET_HCI_RAW_DIRECTION;
 			break;
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (error);
 } /* ng_btsocket_hci_raw_ctloutput */
 
 /*
  * Detach raw HCI socket
  */
 
 void
 ng_btsocket_hci_raw_detach(struct socket *so)
 {
 	ng_btsocket_hci_raw_pcb_p	pcb = so2hci_raw_pcb(so);
 
 	KASSERT(pcb != NULL, ("ng_btsocket_hci_raw_detach: pcb == NULL"));
 
 	if (ng_btsocket_hci_raw_node == NULL)
 		return;
 
 	mtx_lock(&ng_btsocket_hci_raw_sockets_mtx);
 	mtx_lock(&pcb->pcb_mtx);
 
 	LIST_REMOVE(pcb, next);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_hci_raw_sockets_mtx);
 
 	mtx_destroy(&pcb->pcb_mtx);
 
 	bzero(pcb, sizeof(*pcb));
 	free(pcb, M_NETGRAPH_BTSOCKET_HCI_RAW);
 
 	so->so_pcb = NULL;
 } /* ng_btsocket_hci_raw_detach */
 
 /*
  * Disconnect raw HCI socket
  */
 
 int
 ng_btsocket_hci_raw_disconnect(struct socket *so)
 {
 	ng_btsocket_hci_raw_pcb_p	 pcb = so2hci_raw_pcb(so);
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_hci_raw_node == NULL)
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 	soisdisconnected(so);
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (0);
 } /* ng_btsocket_hci_raw_disconnect */
 
 /*
  * Get socket peer's address
  */
 
 int
 ng_btsocket_hci_raw_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	return (ng_btsocket_hci_raw_sockaddr(so, nam));
 } /* ng_btsocket_hci_raw_peeraddr */
 
 /*
  * Send data
  */
 
 int
 ng_btsocket_hci_raw_send(struct socket *so, int flags, struct mbuf *m,
 		struct sockaddr *sa, struct mbuf *control, struct thread *td)
 {
 	ng_btsocket_hci_raw_pcb_p	 pcb = so2hci_raw_pcb(so);
 	struct mbuf			*nam = NULL;
 	int				 error = 0;
 
 	if (ng_btsocket_hci_raw_node == NULL) {
 		error = ENETDOWN;
 		goto drop;
 	}
 	if (pcb == NULL) {
 		error = EINVAL;
 		goto drop;
 	}
 	if (control != NULL) {
 		error = EINVAL;
 		goto drop;
 	}
 
 	if (m->m_pkthdr.len < sizeof(ng_hci_cmd_pkt_t) ||
 	    m->m_pkthdr.len > sizeof(ng_hci_cmd_pkt_t) + NG_HCI_CMD_PKT_SIZE) {
 		error = EMSGSIZE;
 		goto drop;
 	}
 
 	if (m->m_len < sizeof(ng_hci_cmd_pkt_t)) {
 		if ((m = m_pullup(m, sizeof(ng_hci_cmd_pkt_t))) == NULL) {
 			error = ENOBUFS;
 			goto drop;
 		}
 	}
 	if (*mtod(m, u_int8_t *) != NG_HCI_CMD_PKT) {
 		error = ENOTSUP;
 		goto drop;
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	error = ng_btsocket_hci_raw_filter(pcb, m, 0);
 	if (error != 0) {
 		mtx_unlock(&pcb->pcb_mtx);
 		goto drop;
 	}
 
 	if (sa == NULL) {
 		if (pcb->addr.hci_node[0] == 0) {
 			mtx_unlock(&pcb->pcb_mtx);
 			error = EDESTADDRREQ;
 			goto drop;
 		}
 
 		sa = (struct sockaddr *) &pcb->addr;
 	}
 
 	MGET(nam, M_NOWAIT, MT_SONAME);
 	if (nam == NULL) {
 		mtx_unlock(&pcb->pcb_mtx);
 		error = ENOBUFS;
 		goto drop;
 	}
 
 	nam->m_len = sizeof(struct sockaddr_hci);
 	bcopy(sa,mtod(nam, struct sockaddr_hci *),sizeof(struct sockaddr_hci));
 
 	nam->m_next = m;
 	m = NULL;
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (ng_send_fn(ng_btsocket_hci_raw_node, NULL, 
 				ng_btsocket_hci_raw_output, nam, 0));
 drop:
 	NG_FREE_M(control); /* NG_FREE_M checks for != NULL */
 	NG_FREE_M(nam);
 	NG_FREE_M(m);
 
 	return (error);
 } /* ng_btsocket_hci_raw_send */
 
 /*
  * Get socket address
  */
 
 int
 ng_btsocket_hci_raw_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_hci_raw_pcb_p	pcb = so2hci_raw_pcb(so);
 	struct sockaddr_hci		sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_hci_raw_node == NULL)
 		return (EINVAL);
 
 	bzero(&sa, sizeof(sa));
 	sa.hci_len = sizeof(sa);
 	sa.hci_family = AF_BLUETOOTH;
 
 	mtx_lock(&pcb->pcb_mtx);
 	strlcpy(sa.hci_node, pcb->addr.hci_node, sizeof(sa.hci_node));
 	mtx_unlock(&pcb->pcb_mtx);
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_hci_raw_sockaddr */
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 7c365b2e51b8..e1029920c95f 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -1,1437 +1,1437 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bootp.h"
 #include "opt_ipstealth.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/hhook.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/pfil.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/netisr.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_options.h>
 #include <machine/in_cksum.h>
 #include <netinet/ip_carp.h>
 #include <netinet/in_rss.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <sys/socketvar.h>
 
 #include <security/mac/mac_framework.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof(struct ip) == 20);
 #endif
 
 /* IP reassembly functions are defined in ip_reass.c. */
 extern void ipreass_init(void);
 extern void ipreass_drain(void);
 extern void ipreass_slowtimo(void);
 #ifdef VIMAGE
 extern void ipreass_destroy(void);
 #endif
 
 VNET_DEFINE(int, rsvp_on);
 
 VNET_DEFINE(int, ipforwarding);
 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipforwarding), 0,
     "Enable IP forwarding between interfaces");
 
 /*
  * Respond with an ICMP host redirect when we forward a packet out of
  * the same interface on which it was received.  See RFC 792.
  */
 VNET_DEFINE(int, ipsendredirects) = 1;
 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipsendredirects), 0,
     "Enable sending IP redirects");
 
 VNET_DEFINE_STATIC(bool, ip_strong_es) = false;
 #define	V_ip_strong_es	VNET(ip_strong_es)
 SYSCTL_BOOL(_net_inet_ip, OID_AUTO, rfc1122_strong_es,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_strong_es), false,
     "Packet's IP destination address must match address on arrival interface");
 
 VNET_DEFINE_STATIC(bool, ip_sav) = true;
 #define	V_ip_sav	VNET(ip_sav)
 SYSCTL_BOOL(_net_inet_ip, OID_AUTO, source_address_validation,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_sav), true,
     "Drop incoming packets with source address that is a local address");
 
 VNET_DEFINE(pfil_head_t, inet_pfil_head);	/* Packet filter hooks */
 
 static struct netisr_handler ip_nh = {
 	.nh_name = "ip",
 	.nh_handler = ip_input,
 	.nh_proto = NETISR_IP,
 #ifdef	RSS
 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 #else
 	.nh_policy = NETISR_POLICY_FLOW,
 #endif
 };
 
 #ifdef	RSS
 /*
  * Directly dispatched frames are currently assumed
  * to have a flowid already calculated.
  *
  * It should likely have something that assert it
  * actually has valid flow details.
  */
 static struct netisr_handler ip_direct_nh = {
 	.nh_name = "ip_direct",
 	.nh_handler = ip_direct_input,
 	.nh_proto = NETISR_IP_DIRECT,
 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 };
 #endif
 
 extern	struct domain inetdomain;
 extern	struct protosw inetsw[];
 u_char	ip_protox[IPPROTO_MAX];
 VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
 VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
 VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
 
 /* Make sure it is safe to use hashinit(9) on CK_LIST. */
 CTASSERT(sizeof(struct in_ifaddrhashhead) == sizeof(LIST_HEAD(, in_addr)));
 
 #ifdef IPCTL_DEFMTU
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
     &ip_mtu, 0, "Default MTU");
 #endif
 
 #ifdef IPSTEALTH
 VNET_DEFINE(int, ipstealth);
 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipstealth), 0,
     "IP stealth mode, no TTL decrementation on forwarding");
 #endif
 
 /*
  * IP statistics are stored in the "array" of counter(9)s.
  */
 VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
 VNET_PCPUSTAT_SYSINIT(ipstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
     "IP statistics (struct ipstat, netinet/ip_var.h)");
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(ipstat);
 #endif /* VIMAGE */
 
 /*
  * Kernel module interface for updating ipstat.  The argument is an index
  * into ipstat treated as an array.
  */
 void
 kmod_ipstat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(ipstat)[statnum], 1);
 }
 
 void
 kmod_ipstat_dec(int statnum)
 {
 
 	counter_u64_add(VNET(ipstat)[statnum], -1);
 }
 
 static int
 sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
     sysctl_netinet_intr_queue_maxlen, "I",
     "Maximum size of the IP input queue");
 
 static int
 sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
 {
 	u_int64_t qdrops_long;
 	int error, qdrops;
 
 	netisr_getqdrops(&ip_nh, &qdrops_long);
 	qdrops = qdrops_long;
 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qdrops != 0)
 		return (EINVAL);
 	netisr_clearqdrops(&ip_nh);
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
     0, 0, sysctl_netinet_intr_queue_drops, "I",
     "Number of packets dropped from the IP input queue");
 
 #ifdef	RSS
 static int
 sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip_direct_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_netinet_intr_direct_queue_maxlen,
     "I", "Maximum size of the IP direct input queue");
 
 static int
 sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
 {
 	u_int64_t qdrops_long;
 	int error, qdrops;
 
 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
 	qdrops = qdrops_long;
 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qdrops != 0)
 		return (EINVAL);
 	netisr_clearqdrops(&ip_direct_nh);
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops,
     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
     sysctl_netinet_intr_direct_queue_drops, "I",
     "Number of packets dropped from the IP direct input queue");
 #endif	/* RSS */
 
 /*
  * IP initialization: fill in IP protocol switch table.
  * All protocols not implemented in kernel go to raw IP protocol handler.
  */
 static void
 ip_vnet_init(void *arg __unused)
 {
 	struct pfil_head_args args;
 
 	CK_STAILQ_INIT(&V_in_ifaddrhead);
 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
 
 	/* Initialize IP reassembly queue. */
 	ipreass_init();
 
 	/* Initialize packet filter hooks. */
 	args.pa_version = PFIL_VERSION;
 	args.pa_flags = PFIL_IN | PFIL_OUT;
 	args.pa_type = PFIL_TYPE_IP4;
 	args.pa_headname = PFIL_INET_NAME;
 	V_inet_pfil_head = pfil_head_register(&args);
 
 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register input helper hook\n",
 		    __func__);
 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register output helper hook\n",
 		    __func__);
 
 #ifdef VIMAGE
 	netisr_register_vnet(&ip_nh);
 #ifdef	RSS
 	netisr_register_vnet(&ip_direct_nh);
 #endif
 #endif
 }
 VNET_SYSINIT(ip_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
     ip_vnet_init, NULL);
 
 
 static void
 ip_init(const void *unused __unused)
 {
 	struct protosw *pr;
 
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	KASSERT(pr, ("%s: PF_INET not found", __func__));
 
 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
 	for (int i = 0; i < IPPROTO_MAX; i++)
 		ip_protox[i] = pr - inetsw;
 	/*
 	 * Cycle through IP protocols and put them into the appropriate place
 	 * in ip_protox[].
 	 */
 	for (pr = inetdomain.dom_protosw;
 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX)
 				ip_protox[pr->pr_protocol] = pr - inetsw;
 		}
 
 	netisr_register(&ip_nh);
 #ifdef	RSS
 	netisr_register(&ip_direct_nh);
 #endif
 }
 SYSINIT(ip_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_init, NULL);
 
 #ifdef VIMAGE
 static void
 ip_destroy(void *unused __unused)
 {
 	int error;
 
 #ifdef	RSS
 	netisr_unregister_vnet(&ip_direct_nh);
 #endif
 	netisr_unregister_vnet(&ip_nh);
 
 	pfil_head_unregister(V_inet_pfil_head);
 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister input helper hook "
 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
 		    "error %d returned\n", __func__, error);
 	}
 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister output helper hook "
 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
 		    "error %d returned\n", __func__, error);
 	}
 
 	/* Remove the IPv4 addresses from all interfaces. */
 	in_ifscrub_all();
 
 	/* Make sure the IPv4 routes are gone as well. */
 	rib_flush_routes_family(AF_INET);
 
 	/* Destroy IP reassembly queue. */
 	ipreass_destroy();
 
 	/* Cleanup in_ifaddr hash table; should be empty. */
 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
 }
 
 VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
 #endif
 
 #ifdef	RSS
 /*
  * IP direct input routine.
  *
  * This is called when reinjecting completed fragments where
  * all of the previous checking and book-keeping has been done.
  */
 void
 ip_direct_input(struct mbuf *m)
 {
 	struct ip *ip;
 	int hlen;
 
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	if (IPSEC_ENABLED(ipv4)) {
 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
 			return;
 	}
 #endif /* IPSEC */
 	IPSTAT_INC(ips_delivered);
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
 	return;
 }
 #endif
 
 /*
  * Ip input routine.  Checksum and byte swap header.  If fragmented
  * try to reassemble.  Process options.  Pass to next level.
  */
 void
 ip_input(struct mbuf *m)
 {
 	struct ip *ip = NULL;
 	struct in_ifaddr *ia = NULL;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	int hlen = 0;
 	uint16_t sum, ip_len;
 	int dchg = 0;				/* dest changed after fw */
 	struct in_addr odst;			/* original dst address */
 	bool strong_es;
 
 	M_ASSERTPKTHDR(m);
 	NET_EPOCH_ASSERT();
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		/* Set up some basics that will be used later. */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
 		ip_len = ntohs(ip->ip_len);
 		goto ours;
 	}
 
 	IPSTAT_INC(ips_total);
 
 	if (__predict_false(m->m_pkthdr.len < sizeof(struct ip)))
 		goto tooshort;
 
 	if (m->m_len < sizeof(struct ip)) {
 		m = m_pullup(m, sizeof(struct ip));
 		if (__predict_false(m == NULL)) {
 			IPSTAT_INC(ips_toosmall);
 			return;
 		}
 	}
 	ip = mtod(m, struct ip *);
 
 	if (__predict_false(ip->ip_v != IPVERSION)) {
 		IPSTAT_INC(ips_badvers);
 		goto bad;
 	}
 
 	hlen = ip->ip_hl << 2;
 	if (__predict_false(hlen < sizeof(struct ip))) {	/* minimum header length */
 		IPSTAT_INC(ips_badhlen);
 		goto bad;
 	}
 	if (hlen > m->m_len) {
 		m = m_pullup(m, hlen);
 		if (__predict_false(m == NULL)) {
 			IPSTAT_INC(ips_badhlen);
 			return;
 		}
 		ip = mtod(m, struct ip *);
 	}
 
 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
 
 	/* IN_LOOPBACK must not appear on the wire - RFC1122 */
 	ifp = m->m_pkthdr.rcvif;
 	if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
 	    IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			IPSTAT_INC(ips_badaddr);
 			goto bad;
 		}
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	} else {
 		if (hlen == sizeof(struct ip)) {
 			sum = in_cksum_hdr(ip);
 		} else {
 			sum = in_cksum(m, hlen);
 		}
 	}
 	if (__predict_false(sum)) {
 		IPSTAT_INC(ips_badsum);
 		goto bad;
 	}
 
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
 		/* packet is dropped by traffic conditioner */
 		return;
 #endif
 
 	ip_len = ntohs(ip->ip_len);
 	if (__predict_false(ip_len < hlen)) {
 		IPSTAT_INC(ips_badlen);
 		goto bad;
 	}
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IP header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (__predict_false(m->m_pkthdr.len < ip_len)) {
 tooshort:
 		IPSTAT_INC(ips_tooshort);
 		goto bad;
 	}
 	if (m->m_pkthdr.len > ip_len) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = ip_len;
 			m->m_pkthdr.len = ip_len;
 		} else
 			m_adj(m, ip_len - m->m_pkthdr.len);
 	}
 
 	/*
 	 * Try to forward the packet, but if we fail continue.
 	 * ip_tryforward() may generate redirects these days.
 	 * XXX the logic below falling through to normal processing
 	 * if redirects are required should be revisited as well.
 	 * ip_tryforward() does inbound and outbound packet firewall
 	 * processing. If firewall has decided that destination becomes
 	 * our local address, it sets M_FASTFWD_OURS flag. In this
 	 * case skip another inbound firewall processing and update
 	 * ip pointer.
 	 */
 	if (V_ipforwarding != 0
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	    && (!IPSEC_ENABLED(ipv4) ||
 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0)
 #endif
 	    ) {
 		/*
 		 * ip_dooptions() was run so we can ignore the source route (or
 		 * any IP options case) case for redirects in ip_tryforward().
 		 */
 		if ((m = ip_tryforward(m)) == NULL)
 			return;
 		if (m->m_flags & M_FASTFWD_OURS) {
 			m->m_flags &= ~M_FASTFWD_OURS;
 			ip = mtod(m, struct ip *);
 			goto ours;
 		}
 	}
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/*
 	 * Bypass packet filtering for packets previously handled by IPsec.
 	 */
 	if (IPSEC_ENABLED(ipv4) &&
 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0)
 			goto passin;
 #endif
 
 	/*
 	 * Run through list of hooks for input packets.
 	 *
 	 * NB: Beware of the destination address changing (e.g.
 	 *     by NAT rewriting).  When this happens, tell
 	 *     ip_forward to do the right thing.
 	 */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED_IN(V_inet_pfil_head))
 		goto passin;
 
 	odst = ip->ip_dst;
 	if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) !=
 	    PFIL_PASS)
 		return;
 	if (m == NULL)			/* consumed by filter */
 		return;
 
 	ip = mtod(m, struct ip *);
 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		goto ours;
 	}
 	if (m->m_flags & M_IP_NEXTHOP) {
 		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
 			/*
 			 * Directly ship the packet on.  This allows
 			 * forwarding packets originally destined to us
 			 * to some other directly connected host.
 			 */
 			ip_forward(m, 1);
 			return;
 		}
 	}
 passin:
 
 	/*
 	 * Process options and, if not destined for us,
 	 * ship it on.  ip_dooptions returns 1 when an
 	 * error was detected (causing an icmp message
 	 * to be sent and the original packet to be freed).
 	 */
 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
 		return;
 
         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
          * matter if it is destined to another node, or whether it is
          * a multicast one, RSVP wants it! and prevents it from being forwarded
          * anywhere else. Also checks if the rsvp daemon is running before
 	 * grabbing the packet.
          */
 	if (ip->ip_p == IPPROTO_RSVP && V_rsvp_on)
 		goto ours;
 
 	/*
 	 * Check our list of addresses, to see if the packet is for us.
 	 * If we don't have any addresses, assume any unicast packet
 	 * we receive might be for us (and let the upper layers deal
 	 * with it).
 	 */
 	if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) &&
 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
 		goto ours;
 
 	/*
 	 * Enable a consistency check between the destination address
 	 * and the arrival interface for a unicast packet (the RFC 1122
 	 * strong ES model) with a list of additional predicates:
 	 * - if IP forwarding is disabled
 	 * - the packet is not locally generated
 	 * - the packet is not subject to 'ipfw fwd'
 	 * - Interface is not running CARP. If the packet got here, we already
 	 *   checked it with carp_iamatch() and carp_forus().
 	 */
 	strong_es = V_ip_strong_es && (V_ipforwarding == 0) &&
 	    ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
 	    ifp->if_carp == NULL && (dchg == 0);
 
 	/*
 	 * Check for exact addresses in the hash bucket.
 	 */
 	CK_LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
 		if (IA_SIN(ia)->sin_addr.s_addr != ip->ip_dst.s_addr)
 			continue;
 
 		/*
 		 * net.inet.ip.rfc1122_strong_es: the address matches, verify
 		 * that the packet arrived via the correct interface.
 		 */
 		if (__predict_false(strong_es && ia->ia_ifp != ifp)) {
 			IPSTAT_INC(ips_badaddr);
 			goto bad;
 		}
 
 		/*
 		 * net.inet.ip.source_address_validation: drop incoming
 		 * packets that pretend to be ours.
 		 */
 		if (V_ip_sav && !(ifp->if_flags & IFF_LOOPBACK) &&
 		    __predict_false(in_localip_fib(ip->ip_src, ifp->if_fib))) {
 			IPSTAT_INC(ips_badaddr);
 			goto bad;
 		}
 
 		counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 		counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len);
 		goto ours;
 	}
 
 	/*
 	 * Check for broadcast addresses.
 	 *
 	 * Only accept broadcast packets that arrive via the matching
 	 * interface.  Reception of forwarded directed broadcasts would
 	 * be handled via ip_forward() and ether_output() with the loopback
 	 * into the stack for SIMPLEX interfaces handled by ether_output().
 	 */
 	if (ifp->if_flags & IFF_BROADCAST) {
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
 			    ip->ip_dst.s_addr) {
 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
 				    m->m_pkthdr.len);
 				goto ours;
 			}
 #ifdef BOOTP_COMPAT
 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
 				    m->m_pkthdr.len);
 				goto ours;
 			}
 #endif
 		}
 		ia = NULL;
 	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		/*
 		 * RFC 3927 2.7: Do not forward multicast packets from
 		 * IN_LINKLOCAL.
 		 */
 		if (V_ip_mrouter && !IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) {
 			/*
 			 * If we are acting as a multicast router, all
 			 * incoming multicast packets are passed to the
 			 * kernel-level multicast forwarding function.
 			 * The packet is returned (relatively) intact; if
 			 * ip_mforward() returns a non-zero value, the packet
 			 * must be discarded, else it may be accepted below.
 			 */
 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
 				IPSTAT_INC(ips_cantforward);
 				m_freem(m);
 				return;
 			}
 
 			/*
 			 * The process-level routing daemon needs to receive
 			 * all multicast IGMP packets, whether or not this
 			 * host belongs to their destination groups.
 			 */
 			if (ip->ip_p == IPPROTO_IGMP) {
 				goto ours;
 			}
 			IPSTAT_INC(ips_forward);
 		}
 		/*
 		 * Assume the packet is for us, to avoid prematurely taking
 		 * a lock on the in_multi hash. Protocols must perform
 		 * their own filtering and update statistics accordingly.
 		 */
 		goto ours;
 	}
 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
 		goto ours;
 	if (ip->ip_dst.s_addr == INADDR_ANY)
 		goto ours;
 	/* RFC 3927 2.7: Do not forward packets to or from IN_LINKLOCAL. */
 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
 	    IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Not for us; forward if possible and desirable.
 	 */
 	if (V_ipforwarding == 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 	} else {
 		ip_forward(m, dchg);
 	}
 	return;
 
 ours:
 #ifdef IPSTEALTH
 	/*
 	 * IPSTEALTH: Process non-routing options only
 	 * if the packet is destined for us.
 	 */
 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
 		return;
 #endif /* IPSTEALTH */
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
 		/* XXXGL: shouldn't we save & set m_flags? */
 		m = ip_reass(m);
 		if (m == NULL)
 			return;
 		ip = mtod(m, struct ip *);
 		/* Get the header length of the reassembled packet */
 		hlen = ip->ip_hl << 2;
 	}
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	if (IPSEC_ENABLED(ipv4)) {
 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
 			return;
 	}
 #endif /* IPSEC */
 
 	/*
 	 * Switch out to protocol's input routine.
 	 */
 	IPSTAT_INC(ips_delivered);
 
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
 	return;
 bad:
 	m_freem(m);
 }
 
 /*
  * IP timer processing;
  * if a timer expires on a reassembly
  * queue, discard it.
  */
 void
 ip_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		ipreass_slowtimo();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 void
 ip_drain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		ipreass_drain();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * The protocol to be inserted into ip_protox[] must be already registered
  * in inetsw[], either statically or through pf_proto_register().
  */
 int
 ipproto_register(short ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * The protocol slot must not be occupied by another protocol
 	 * already.  An index pointing to IPPROTO_RAW is unused.
 	 */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
 		return (EEXIST);
 
 	/* Find the protocol position in inetsw[] and set the index. */
 	for (pr = inetdomain.dom_protosw;
 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
 			ip_protox[pr->pr_protocol] = pr - inetsw;
 			return (0);
 		}
 	}
 	return (EPROTONOSUPPORT);
 }
 
 int
 ipproto_unregister(short ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/* Check if the protocol was indeed registered. */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
 		return (ENOENT);
 
 	/* Reset the protocol slot to IPPROTO_RAW. */
 	ip_protox[ipproto] = pr - inetsw;
 	return (0);
 }
 
 u_char inetctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		EHOSTUNREACH,	0,
 	ENOPROTOOPT,	ECONNREFUSED
 };
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  * The srcrt parameter indicates whether the packet is being forwarded
  * via a source route.
  */
 void
 ip_forward(struct mbuf *m, int srcrt)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct in_ifaddr *ia;
 	struct mbuf *mcopy;
 	struct sockaddr_in *sin;
 	struct in_addr dest;
 	struct route ro;
 	uint32_t flowid;
 	int error, type = 0, code = 0, mtu = 0;
 
 	NET_EPOCH_ASSERT();
 
 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 	if (
 #ifdef IPSTEALTH
 	    V_ipstealth == 0 &&
 #endif
 	    ip->ip_ttl <= IPTTLDEC) {
 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
 		return;
 	}
 
 	bzero(&ro, sizeof(ro));
 	sin = (struct sockaddr_in *)&ro.ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = ip->ip_dst;
 	flowid = m->m_pkthdr.flowid;
 	ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, flowid);
 	if (ro.ro_nh != NULL) {
 		ia = ifatoia(ro.ro_nh->nh_ifa);
 	} else
 		ia = NULL;
 	/*
 	 * Save the IP header and at most 8 bytes of the payload,
 	 * in case we need to generate an ICMP message to the src.
 	 *
 	 * XXX this can be optimized a lot by saving the data in a local
 	 * buffer on the stack (72 bytes at most), and only allocating the
 	 * mbuf if really necessary. The vast majority of the packets
 	 * are forwarded without having to send an ICMP back (either
 	 * because unnecessary, or because rate limited), so we are
 	 * really we are wasting a lot of work here.
 	 *
 	 * We don't use m_copym() because it might return a reference
 	 * to a shared cluster. Both this function and ip_output()
 	 * assume exclusive access to the IP header in `m', so any
 	 * data in a cluster may change before we reach icmp_error().
 	 */
 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
 		/*
 		 * It's probably ok if the pkthdr dup fails (because
 		 * the deep copy of the tag chain failed), but for now
 		 * be conservative and just discard the copy since
 		 * code below may some day want the tags.
 		 */
 		m_free(mcopy);
 		mcopy = NULL;
 	}
 	if (mcopy != NULL) {
 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
 		mcopy->m_pkthdr.len = mcopy->m_len;
 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 	}
 #ifdef IPSTEALTH
 	if (V_ipstealth == 0)
 #endif
 		ip->ip_ttl -= IPTTLDEC;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	if (IPSEC_ENABLED(ipv4)) {
 		if ((error = IPSEC_FORWARD(ipv4, m)) != 0) {
 			/* mbuf consumed by IPsec */
 			RO_NHFREE(&ro);
 			m_freem(mcopy);
 			if (error != EINPROGRESS)
 				IPSTAT_INC(ips_cantforward);
 			return;
 		}
 		/* No IPsec processing required */
 	}
 #endif /* IPSEC */
 	/*
 	 * If forwarding packet using same interface that it came in on,
 	 * perhaps should send a redirect to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a default route
 	 * or a route modified by a redirect.
 	 */
 	dest.s_addr = 0;
 	if (!srcrt && V_ipsendredirects &&
 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
 		struct nhop_object *nh;
 
 		nh = ro.ro_nh;
 
 		if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) {
 			struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa);
 			u_long src = ntohl(ip->ip_src.s_addr);
 
 			if (nh_ia != NULL &&
 			    (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) {
 				/* Router requirements says to only send host redirects */
 				type = ICMP_REDIRECT;
 				code = ICMP_REDIRECT_HOST;
 				if (nh->nh_flags & NHF_GATEWAY) {
 				    if (nh->gw_sa.sa_family == AF_INET)
 					dest.s_addr = nh->gw4_sa.sin_addr.s_addr;
 				    else /* Do not redirect in case gw is AF_INET6 */
 					type = 0;
 				} else
 					dest.s_addr = ip->ip_dst.s_addr;
 			}
 		}
 	}
 
 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
 
 	if (error == EMSGSIZE && ro.ro_nh)
 		mtu = ro.ro_nh->nh_mtu;
 	RO_NHFREE(&ro);
 
 	if (error)
 		IPSTAT_INC(ips_cantforward);
 	else {
 		IPSTAT_INC(ips_forward);
 		if (type)
 			IPSTAT_INC(ips_redirectsent);
 		else {
 			if (mcopy)
 				m_freem(mcopy);
 			return;
 		}
 	}
 	if (mcopy == NULL)
 		return;
 
 	switch (error) {
 	case 0:				/* forwarded, but need redirect */
 		/* type, code set above */
 		break;
 
 	case ENETUNREACH:
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_HOST;
 		break;
 
 	case EMSGSIZE:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_NEEDFRAG;
 		/*
 		 * If the MTU was set before make sure we are below the
 		 * interface MTU.
 		 * If the MTU wasn't set before use the interface mtu or
 		 * fall back to the next smaller mtu step compared to the
 		 * current packet size.
 		 */
 		if (mtu != 0) {
 			if (ia != NULL)
 				mtu = min(mtu, ia->ia_ifp->if_mtu);
 		} else {
 			if (ia != NULL)
 				mtu = ia->ia_ifp->if_mtu;
 			else
 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
 		}
 		IPSTAT_INC(ips_cantfrag);
 		break;
 
 	case ENOBUFS:
 	case EACCES:			/* ipfw denied packet */
 		m_freem(mcopy);
 		return;
 	}
 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
 }
 
 #define	CHECK_SO_CT(sp, ct) \
     (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0)
 
 void
 ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
     struct mbuf *m)
 {
 	bool stamped;
 
 	stamped = false;
 	if ((inp->inp_socket->so_options & SO_BINTIME) ||
 	    CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) {
 		struct bintime boottimebin, bt;
 		struct timespec ts1;
 
 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 		    M_TSTMP)) {
 			mbuf_tstmp2timespec(m, &ts1);
 			timespec2bintime(&ts1, &bt);
 			getboottimebin(&boottimebin);
 			bintime_add(&bt, &boottimebin);
 		} else {
 			bintime(&bt);
 		}
-		*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
-		    SCM_BINTIME, SOL_SOCKET);
+		*mp = sbcreatecontrol(&bt, sizeof(bt), SCM_BINTIME,
+		    SOL_SOCKET, M_NOWAIT);
 		if (*mp != NULL) {
 			mp = &(*mp)->m_next;
 			stamped = true;
 		}
 	}
 	if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) {
 		struct bintime boottimebin, bt1;
 		struct timespec ts1;
 		struct timeval tv;
 
 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 		    M_TSTMP)) {
 			mbuf_tstmp2timespec(m, &ts1);
 			timespec2bintime(&ts1, &bt1);
 			getboottimebin(&boottimebin);
 			bintime_add(&bt1, &boottimebin);
 			bintime2timeval(&bt1, &tv);
 		} else {
 			microtime(&tv);
 		}
-		*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
-		    SCM_TIMESTAMP, SOL_SOCKET);
+		*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), SCM_TIMESTAMP,
+		    SOL_SOCKET, M_NOWAIT);
 		if (*mp != NULL) {
 			mp = &(*mp)->m_next;
 			stamped = true;
 		}
 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) {
 		struct bintime boottimebin;
 		struct timespec ts, ts1;
 
 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 		    M_TSTMP)) {
 			mbuf_tstmp2timespec(m, &ts);
 			getboottimebin(&boottimebin);
 			bintime2timespec(&boottimebin, &ts1);
 			timespecadd(&ts, &ts1, &ts);
 		} else {
 			nanotime(&ts);
 		}
-		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
-		    SCM_REALTIME, SOL_SOCKET);
+		*mp = sbcreatecontrol(&ts, sizeof(ts), SCM_REALTIME,
+		    SOL_SOCKET, M_NOWAIT);
 		if (*mp != NULL) {
 			mp = &(*mp)->m_next;
 			stamped = true;
 		}
 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) {
 		struct timespec ts;
 
 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 		    M_TSTMP))
 			mbuf_tstmp2timespec(m, &ts);
 		else
 			nanouptime(&ts);
-		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
-		    SCM_MONOTONIC, SOL_SOCKET);
+		*mp = sbcreatecontrol(&ts, sizeof(ts), SCM_MONOTONIC,
+		    SOL_SOCKET, M_NOWAIT);
 		if (*mp != NULL) {
 			mp = &(*mp)->m_next;
 			stamped = true;
 		}
 	}
 	if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 	    M_TSTMP)) {
 		struct sock_timestamp_info sti;
 
 		bzero(&sti, sizeof(sti));
 		sti.st_info_flags = ST_INFO_HW;
 		if ((m->m_flags & M_TSTMP_HPREC) != 0)
 			sti.st_info_flags |= ST_INFO_HW_HPREC;
-		*mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti), SCM_TIME_INFO,
-		    SOL_SOCKET);
+		*mp = sbcreatecontrol(&sti, sizeof(sti), SCM_TIME_INFO,
+		    SOL_SOCKET, M_NOWAIT);
 		if (*mp != NULL)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVDSTADDR) {
-		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
-		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
+		*mp = sbcreatecontrol(&ip->ip_dst, sizeof(struct in_addr),
+		    IP_RECVDSTADDR, IPPROTO_IP, M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTTL) {
-		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
-		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
+		*mp = sbcreatecontrol(&ip->ip_ttl, sizeof(u_char), IP_RECVTTL,
+		    IPPROTO_IP, M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #ifdef notyet
 	/* XXX
 	 * Moving these out of udp_input() made them even more broken
 	 * than they already were.
 	 */
 	/* options were tossed already */
 	if (inp->inp_flags & INP_RECVOPTS) {
-		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
-		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
+		*mp = sbcreatecontrol(opts_deleted_above,
+		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP, M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	/* ip_srcroute doesn't do what we want here, need to fix */
 	if (inp->inp_flags & INP_RECVRETOPTS) {
-		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
-		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
+		*mp = sbcreatecontrol(ip_srcroute(m), sizeof(struct in_addr),
+		    IP_RECVRETOPTS, IPPROTO_IP, M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #endif
 	if (inp->inp_flags & INP_RECVIF) {
 		struct ifnet *ifp;
 		struct sdlbuf {
 			struct sockaddr_dl sdl;
 			u_char	pad[32];
 		} sdlbuf;
 		struct sockaddr_dl *sdp;
 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 
 		if ((ifp = m->m_pkthdr.rcvif)) {
 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
 			/*
 			 * Change our mind and don't try copy.
 			 */
 			if (sdp->sdl_family != AF_LINK ||
 			    sdp->sdl_len > sizeof(sdlbuf)) {
 				goto makedummy;
 			}
 			bcopy(sdp, sdl2, sdp->sdl_len);
 		} else {
 makedummy:
 			sdl2->sdl_len =
 			    offsetof(struct sockaddr_dl, sdl_data[0]);
 			sdl2->sdl_family = AF_LINK;
 			sdl2->sdl_index = 0;
 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
 		}
-		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
-		    IP_RECVIF, IPPROTO_IP);
+		*mp = sbcreatecontrol(sdl2, sdl2->sdl_len, IP_RECVIF,
+		    IPPROTO_IP, M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTOS) {
-		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
-		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
+		*mp = sbcreatecontrol(&ip->ip_tos, sizeof(u_char), IP_RECVTOS,
+		    IPPROTO_IP, M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if (inp->inp_flags2 & INP_RECVFLOWID) {
 		uint32_t flowid, flow_type;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		/*
 		 * XXX should handle the failure of one or the
 		 * other - don't populate both?
 		 */
-		*mp = sbcreatecontrol((caddr_t) &flowid,
-		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
+		*mp = sbcreatecontrol(&flowid, sizeof(uint32_t), IP_FLOWID,
+		    IPPROTO_IP, M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
-		*mp = sbcreatecontrol((caddr_t) &flow_type,
-		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
+		*mp = sbcreatecontrol(&flow_type, sizeof(uint32_t),
+		    IP_FLOWTYPE, IPPROTO_IP, M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 #ifdef	RSS
 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
 		uint32_t flowid, flow_type;
 		uint32_t rss_bucketid;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
-			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
-			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
+			*mp = sbcreatecontrol(&rss_bucketid, sizeof(uint32_t),
+			    IP_RSSBUCKETID, IPPROTO_IP, M_NOWAIT);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 #endif
 }
 
 /*
  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
  * compiled.
  */
 VNET_DEFINE_STATIC(int, ip_rsvp_on);
 VNET_DEFINE(struct socket *, ip_rsvpd);
 
 #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
 
 int
 ip_rsvp_init(struct socket *so)
 {
 
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
 		return EOPNOTSUPP;
 
 	if (V_ip_rsvpd != NULL)
 		return EADDRINUSE;
 
 	V_ip_rsvpd = so;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-increment
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (!V_ip_rsvp_on) {
 		V_ip_rsvp_on = 1;
 		V_rsvp_on++;
 	}
 
 	return 0;
 }
 
 int
 ip_rsvp_done(void)
 {
 
 	V_ip_rsvpd = NULL;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-decrement
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (V_ip_rsvp_on) {
 		V_ip_rsvp_on = 0;
 		V_rsvp_on--;
 	}
 	return 0;
 }
 
 int
 rsvp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m;
 
 	m = *mp;
 	*mp = NULL;
 
 	if (rsvp_input_p) { /* call the real one if loaded */
 		*mp = m;
 		rsvp_input_p(mp, offp, proto);
 		return (IPPROTO_DONE);
 	}
 
 	/* Can still get packets with rsvp_on = 0 if there is a local member
 	 * of the group to which the RSVP packet is addressed.  But in this
 	 * case we want to throw the packet away.
 	 */
 
 	if (!V_rsvp_on) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	if (V_ip_rsvpd != NULL) {
 		*mp = m;
 		rip_input(mp, offp, proto);
 		return (IPPROTO_DONE);
 	}
 	/* Drop the packet */
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index f35ba81f3936..3b3bb71339d4 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -1,1768 +1,1769 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.
  * Copyright (c) 2008 Robert N. M. Watson
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * Copyright (c) 2014 Kevin Lo
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/rss_config.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet6/ip6_var.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/udplite.h>
 #include <netinet/in_rss.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 /*
  * UDP and UDP-Lite protocols implementation.
  * Per RFC 768, August, 1980.
  * Per RFC 3828, July, 2004.
  */
 
 /*
  * BSD 4.2 defaulted the udp checksum to be off.  Turning off udp checksums
  * removes the only data integrity mechanism for packets and malformed
  * packets that would otherwise be discarded due to bad checksums, and may
  * cause problems (especially for NFS data blocks).
  */
 VNET_DEFINE(int, udp_cksum) = 1;
 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(udp_cksum), 0, "compute udp checksum");
 
 VNET_DEFINE(int, udp_log_in_vain) = 0;
 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(udp_log_in_vain), 0, "Log all incoming UDP packets");
 
 VNET_DEFINE(int, udp_blackhole) = 0;
 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(udp_blackhole), 0,
     "Do not send port unreachables for refused connects");
 VNET_DEFINE(bool, udp_blackhole_local) = false;
 SYSCTL_BOOL(_net_inet_udp, OID_AUTO, blackhole_local, CTLFLAG_VNET |
     CTLFLAG_RW, &VNET_NAME(udp_blackhole_local), false,
     "Enforce net.inet.udp.blackhole for locally originated packets");
 
 u_long	udp_sendspace = 9216;		/* really max datagram size */
 SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
     &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
 
 u_long	udp_recvspace = 40 * (1024 +
 #ifdef INET6
 				      sizeof(struct sockaddr_in6)
 #else
 				      sizeof(struct sockaddr_in)
 #endif
 				      );	/* 40 1K datagrams */
 
 SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
     &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
 
 VNET_DEFINE(struct inpcbinfo, udbinfo);
 VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
 VNET_DEFINE_STATIC(uma_zone_t, udpcb_zone);
 #define	V_udpcb_zone			VNET(udpcb_zone)
 
 #ifndef UDBHASHSIZE
 #define	UDBHASHSIZE	128
 #endif
 
 VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat);		/* from udp_var.h */
 VNET_PCPUSTAT_SYSINIT(udpstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
     udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(udpstat);
 #endif /* VIMAGE */
 #ifdef INET
 static void	udp_detach(struct socket *so);
 static int	udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
 		    struct mbuf *, struct thread *, int);
 #endif
 
 INPCBSTORAGE_DEFINE(udpcbstor, "udpinp", "udp_inpcb", "udp", "udphash");
 INPCBSTORAGE_DEFINE(udplitecbstor, "udpliteinp", "udplite_inpcb", "udplite",
     "udplitehash");
 
 static void
 udp_init(void *arg __unused)
 {
 
 	/*
 	 * For now default to 2-tuple UDP hashing - until the fragment
 	 * reassembly code can also update the flowid.
 	 *
 	 * Once we can calculate the flowid that way and re-establish
 	 * a 4-tuple, flip this to 4-tuple.
 	 */
 	in_pcbinfo_init(&V_udbinfo, &udpcbstor, UDBHASHSIZE, UDBHASHSIZE);
 	V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(V_udpcb_zone, maxsockets);
 	uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached");
 
 	/* Additional pcbinfo for UDP-Lite */
 	in_pcbinfo_init(&V_ulitecbinfo, &udplitecbstor, UDBHASHSIZE,
 	    UDBHASHSIZE);
 }
 VNET_SYSINIT(udp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, udp_init, NULL);
 
 /*
  * Kernel module interface for updating udpstat.  The argument is an index
  * into udpstat treated as an array of u_long.  While this encodes the
  * general layout of udpstat into the caller, it doesn't encode its location,
  * so that future changes to add, for example, per-CPU stats support won't
  * cause binary compatibility problems for kernel modules.
  */
 void
 kmod_udpstat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(udpstat)[statnum], 1);
 }
 
 int
 udp_newudpcb(struct inpcb *inp)
 {
 	struct udpcb *up;
 
 	up = uma_zalloc(V_udpcb_zone, M_NOWAIT | M_ZERO);
 	if (up == NULL)
 		return (ENOBUFS);
 	inp->inp_ppcb = up;
 	return (0);
 }
 
 void
 udp_discardcb(struct udpcb *up)
 {
 
 	uma_zfree(V_udpcb_zone, up);
 }
 
 #ifdef VIMAGE
 static void
 udp_destroy(void *unused __unused)
 {
 
 	in_pcbinfo_destroy(&V_udbinfo);
 	uma_zdestroy(V_udpcb_zone);
 }
 VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL);
 
 static void
 udplite_destroy(void *unused __unused)
 {
 
 	in_pcbinfo_destroy(&V_ulitecbinfo);
 }
 VNET_SYSUNINIT(udplite, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udplite_destroy,
     NULL);
 #endif
 
 #ifdef INET
 /*
  * Subroutine of udp_input(), which appends the provided mbuf chain to the
  * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
  * contains the source address.  If the socket ends up being an IPv6 socket,
  * udp_append() will convert to a sockaddr_in6 before passing the address
  * into the socket code.
  *
  * In the normal case udp_append() will return 0, indicating that you
  * must unlock the inp. However if a tunneling protocol is in place we increment
  * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we
  * then decrement the reference count. If the inp_rele returns 1, indicating the
  * inp is gone, we return that to the caller to tell them *not* to unlock
  * the inp. In the case of multi-cast this will cause the distribution
  * to stop (though most tunneling protocols known currently do *not* use
  * multicast).
  */
 static int
 udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
     struct sockaddr_in *udp_in)
 {
 	struct sockaddr *append_sa;
 	struct socket *so;
 	struct mbuf *tmpopts, *opts = NULL;
 #ifdef INET6
 	struct sockaddr_in6 udp_in6;
 #endif
 	struct udpcb *up;
 	bool filtered;
 
 	INP_LOCK_ASSERT(inp);
 
 	/*
 	 * Engage the tunneling protocol.
 	 */
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
 		in_pcbref(inp);
 		INP_RUNLOCK(inp);
 		filtered = (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&udp_in[0],
 		    up->u_tun_ctx);
 		INP_RLOCK(inp);
 		if (filtered)
 			return (in_pcbrele_rlocked(inp));
 	}
 
 	off += sizeof(struct udphdr);
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/* Check AH/ESP integrity. */
 	if (IPSEC_ENABLED(ipv4) &&
 	    IPSEC_CHECK_POLICY(ipv4, n, inp) != 0) {
 		m_freem(n);
 		return (0);
 	}
 	if (up->u_flags & UF_ESPINUDP) {/* IPSec UDP encaps. */
 		if (IPSEC_ENABLED(ipv4) &&
 		    UDPENCAP_INPUT(n, off, AF_INET) != 0)
 			return (0);	/* Consumed. */
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, n) != 0) {
 		m_freem(n);
 		return (0);
 	}
 #endif /* MAC */
 	if (inp->inp_flags & INP_CONTROLOPTS ||
 	    inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6)
 			(void)ip6_savecontrol_v4(inp, n, &opts, NULL);
 		else
 #endif /* INET6 */
 			ip_savecontrol(inp, &opts, ip, n);
 	}
 	if ((inp->inp_vflag & INP_IPV4) && (inp->inp_flags2 & INP_ORIGDSTADDR)) {
-		tmpopts = sbcreatecontrol((caddr_t)&udp_in[1],
-			sizeof(struct sockaddr_in), IP_ORIGDSTADDR, IPPROTO_IP);
+		tmpopts = sbcreatecontrol(&udp_in[1],
+		    sizeof(struct sockaddr_in), IP_ORIGDSTADDR, IPPROTO_IP,
+		    M_NOWAIT);
 		if (tmpopts) {
 			if (opts) {
 				tmpopts->m_next = opts;
 				opts = tmpopts;
 			} else
 				opts = tmpopts;
 		}
 	}
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6) {
 		bzero(&udp_in6, sizeof(udp_in6));
 		udp_in6.sin6_len = sizeof(udp_in6);
 		udp_in6.sin6_family = AF_INET6;
 		in6_sin_2_v4mapsin6(&udp_in[0], &udp_in6);
 		append_sa = (struct sockaddr *)&udp_in6;
 	} else
 #endif /* INET6 */
 		append_sa = (struct sockaddr *)&udp_in[0];
 	m_adj(n, off);
 
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
 		soroverflow_locked(so);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
 		UDPSTAT_INC(udps_fullsock);
 	} else
 		sorwakeup_locked(so);
 	return (0);
 }
 
 static bool
 udp_multi_match(const struct inpcb *inp, void *v)
 {
 	struct ip *ip = v;
 	struct udphdr *uh = (struct udphdr *)(ip + 1);
 
 	if (inp->inp_lport != uh->uh_dport)
 		return (false);
 #ifdef INET6
 	if ((inp->inp_vflag & INP_IPV4) == 0)
 		return (false);
 #endif
 	if (inp->inp_laddr.s_addr != INADDR_ANY &&
 	    inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
 		return (false);
 	if (inp->inp_faddr.s_addr != INADDR_ANY &&
 	    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
 		return (false);
 	if (inp->inp_fport != 0 &&
 	    inp->inp_fport != uh->uh_sport)
 		return (false);
 
 	return (true);
 }
 
 static int
 udp_multi_input(struct mbuf *m, int proto, struct sockaddr_in *udp_in)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct inpcb_iterator inpi = INP_ITERATOR(udp_get_inpcbinfo(proto),
 	    INPLOOKUP_RLOCKPCB, udp_multi_match, ip);
 #ifdef KDTRACE_HOOKS
 	struct udphdr *uh = (struct udphdr *)(ip + 1);
 #endif
 	struct inpcb *inp;
 	struct mbuf *n;
 	int appends = 0;
 
 	MPASS(ip->ip_hl == sizeof(struct ip) >> 2);
 
 	while ((inp = inp_next(&inpi)) != NULL) {
 		/*
 		 * XXXRW: Because we weren't holding either the inpcb
 		 * or the hash lock when we checked for a match
 		 * before, we should probably recheck now that the
 		 * inpcb lock is held.
 		 */
 		/*
 		 * Handle socket delivery policy for any-source
 		 * and source-specific multicast. [RFC3678]
 		 */
 		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 			struct ip_moptions	*imo;
 			struct sockaddr_in	 group;
 			int			 blocked;
 
 			imo = inp->inp_moptions;
 			if (imo == NULL)
 				continue;
 			bzero(&group, sizeof(struct sockaddr_in));
 			group.sin_len = sizeof(struct sockaddr_in);
 			group.sin_family = AF_INET;
 			group.sin_addr = ip->ip_dst;
 
 			blocked = imo_multi_filter(imo, m->m_pkthdr.rcvif,
 				(struct sockaddr *)&group,
 				(struct sockaddr *)&udp_in[0]);
 			if (blocked != MCAST_PASS) {
 				if (blocked == MCAST_NOTGMEMBER)
 					IPSTAT_INC(ips_notmember);
 				if (blocked == MCAST_NOTSMEMBER ||
 				    blocked == MCAST_MUTED)
 					UDPSTAT_INC(udps_filtermcast);
 				continue;
 			}
 		}
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
 			if (proto == IPPROTO_UDPLITE)
 				UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
 			else
 				UDP_PROBE(receive, NULL, inp, ip, inp, uh);
 			if (udp_append(inp, ip, n, sizeof(struct ip), udp_in)) {
 				INP_RUNLOCK(inp);
 				break;
 			} else
 				appends++;
 		}
 		/*
 		 * Don't look for additional matches if this one does
 		 * not have either the SO_REUSEPORT or SO_REUSEADDR
 		 * socket options set.  This heuristic avoids
 		 * searching through all pcbs in the common case of a
 		 * non-shared port.  It assumes that an application
 		 * will never clear these options after setting them.
 		 */
 		if ((inp->inp_socket->so_options &
 		    (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0) {
 			INP_RUNLOCK(inp);
 			break;
 		}
 	}
 
 	if (appends == 0) {
 		/*
 		 * No matching pcb found; discard datagram.  (No need
 		 * to send an ICMP Port Unreachable for a broadcast
 		 * or multicast datgram.)
 		 */
 		UDPSTAT_INC(udps_noport);
 		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)))
 			UDPSTAT_INC(udps_noportmcast);
 		else
 			UDPSTAT_INC(udps_noportbcast);
 	}
 	m_freem(m);
 
 	return (IPPROTO_DONE);
 }
 
 int
 udp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ip *ip;
 	struct udphdr *uh;
 	struct ifnet *ifp;
 	struct inpcb *inp;
 	uint16_t len, ip_len;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in udp_in[2];
 	struct mbuf *m;
 	struct m_tag *fwd_tag;
 	int cscov_partial, iphlen;
 
 	m = *mp;
 	iphlen = *offp;
 	ifp = m->m_pkthdr.rcvif;
 	*mp = NULL;
 	UDPSTAT_INC(udps_ipackets);
 
 	/*
 	 * Strip IP options, if any; should skip this, make available to
 	 * user, and use on returned packets, but we don't yet have a way to
 	 * check the checksum with options still present.
 	 */
 	if (iphlen > sizeof (struct ip)) {
 		ip_stripoptions(m);
 		iphlen = sizeof(struct ip);
 	}
 
 	/*
 	 * Get IP and UDP header together in first mbuf.
 	 */
 	if (m->m_len < iphlen + sizeof(struct udphdr)) {
 		if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
 			UDPSTAT_INC(udps_hdrops);
 			return (IPPROTO_DONE);
 		}
 	}
 	ip = mtod(m, struct ip *);
 	uh = (struct udphdr *)((caddr_t)ip + iphlen);
 	cscov_partial = (proto == IPPROTO_UDPLITE) ? 1 : 0;
 
 	/*
 	 * Destination port of 0 is illegal, based on RFC768.
 	 */
 	if (uh->uh_dport == 0)
 		goto badunlocked;
 
 	/*
 	 * Construct sockaddr format source address.  Stuff source address
 	 * and datagram in user buffer.
 	 */
 	bzero(&udp_in[0], sizeof(struct sockaddr_in) * 2);
 	udp_in[0].sin_len = sizeof(struct sockaddr_in);
 	udp_in[0].sin_family = AF_INET;
 	udp_in[0].sin_port = uh->uh_sport;
 	udp_in[0].sin_addr = ip->ip_src;
 	udp_in[1].sin_len = sizeof(struct sockaddr_in);
 	udp_in[1].sin_family = AF_INET;
 	udp_in[1].sin_port = uh->uh_dport;
 	udp_in[1].sin_addr = ip->ip_dst;
 
 	/*
 	 * Make mbuf data length reflect UDP length.  If not enough data to
 	 * reflect UDP length, drop.
 	 */
 	len = ntohs((u_short)uh->uh_ulen);
 	ip_len = ntohs(ip->ip_len) - iphlen;
 	if (proto == IPPROTO_UDPLITE && (len == 0 || len == ip_len)) {
 		/* Zero means checksum over the complete packet. */
 		if (len == 0)
 			len = ip_len;
 		cscov_partial = 0;
 	}
 	if (ip_len != len) {
 		if (len > ip_len || len < sizeof(struct udphdr)) {
 			UDPSTAT_INC(udps_badlen);
 			goto badunlocked;
 		}
 		if (proto == IPPROTO_UDP)
 			m_adj(m, len - ip_len);
 	}
 
 	/*
 	 * Checksum extended UDP header and data.
 	 */
 	if (uh->uh_sum) {
 		u_short uh_sum;
 
 		if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
 		    !cscov_partial) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				uh_sum = m->m_pkthdr.csum_data;
 			else
 				uh_sum = in_pseudo(ip->ip_src.s_addr,
 				    ip->ip_dst.s_addr, htonl((u_short)len +
 				    m->m_pkthdr.csum_data + proto));
 			uh_sum ^= 0xffff;
 		} else {
 			char b[offsetof(struct ipovly, ih_src)];
 			struct ipovly *ipov = (struct ipovly *)ip;
 
 			bcopy(ipov, b, sizeof(b));
 			bzero(ipov, sizeof(ipov->ih_x1));
 			ipov->ih_len = (proto == IPPROTO_UDP) ?
 			    uh->uh_ulen : htons(ip_len);
 			uh_sum = in_cksum(m, len + sizeof (struct ip));
 			bcopy(b, ipov, sizeof(b));
 		}
 		if (uh_sum) {
 			UDPSTAT_INC(udps_badsum);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	} else {
 		if (proto == IPPROTO_UDP) {
 			UDPSTAT_INC(udps_nosum);
 		} else {
 			/* UDPLite requires a checksum */
 			/* XXX: What is the right UDPLite MIB counter here? */
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 	    in_broadcast(ip->ip_dst, ifp))
 		return (udp_multi_input(m, proto, udp_in));
 
 	pcbinfo = udp_get_inpcbinfo(proto);
 
 	/*
 	 * Locate pcb for datagram.
 	 *
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
 	if ((m->m_flags & M_IP_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		struct sockaddr_in *next_hop;
 
 		next_hop = (struct sockaddr_in *)(fwd_tag + 1);
 
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
 		inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
 		    ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in_pcblookup(pcbinfo, ip->ip_src,
 			    uh->uh_sport, next_hop->sin_addr,
 			    next_hop->sin_port ? htons(next_hop->sin_port) :
 			    uh->uh_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_RLOCKPCB, ifp);
 		}
 		/* Remove the tag from the packet. We don't need it anymore. */
 		m_tag_delete(m, fwd_tag);
 		m->m_flags &= ~M_IP_NEXTHOP;
 	} else
 		inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
 		    ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
 		    INPLOOKUP_RLOCKPCB, ifp, m);
 	if (inp == NULL) {
 		if (V_udp_log_in_vain) {
 			char src[INET_ADDRSTRLEN];
 			char dst[INET_ADDRSTRLEN];
 
 			log(LOG_INFO,
 			    "Connection attempt to UDP %s:%d from %s:%d\n",
 			    inet_ntoa_r(ip->ip_dst, dst), ntohs(uh->uh_dport),
 			    inet_ntoa_r(ip->ip_src, src), ntohs(uh->uh_sport));
 		}
 		if (proto == IPPROTO_UDPLITE)
 			UDPLITE_PROBE(receive, NULL, NULL, ip, NULL, uh);
 		else
 			UDP_PROBE(receive, NULL, NULL, ip, NULL, uh);
 		UDPSTAT_INC(udps_noport);
 		if (m->m_flags & (M_BCAST | M_MCAST)) {
 			UDPSTAT_INC(udps_noportbcast);
 			goto badunlocked;
 		}
 		if (V_udp_blackhole && (V_udp_blackhole_local ||
 		    !in_localip(ip->ip_src)))
 			goto badunlocked;
 		if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
 			goto badunlocked;
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
 		return (IPPROTO_DONE);
 	}
 
 	/*
 	 * Check the minimum TTL for socket.
 	 */
 	INP_RLOCK_ASSERT(inp);
 	if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
 		if (proto == IPPROTO_UDPLITE)
 			UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
 		else
 			UDP_PROBE(receive, NULL, inp, ip, inp, uh);
 		INP_RUNLOCK(inp);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 	if (cscov_partial) {
 		struct udpcb *up;
 
 		up = intoudpcb(inp);
 		if (up->u_rxcslen == 0 || up->u_rxcslen > len) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 
 	if (proto == IPPROTO_UDPLITE)
 		UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
 	else
 		UDP_PROBE(receive, NULL, inp, ip, inp, uh);
 	if (udp_append(inp, ip, m, iphlen, udp_in) == 0)
 		INP_RUNLOCK(inp);
 	return (IPPROTO_DONE);
 
 badunlocked:
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
 #endif /* INET */
 
 /*
  * Notify a udp user of an asynchronous error; just wake up so that they can
  * collect error status.
  */
 struct inpcb *
 udp_notify(struct inpcb *inp, int errno)
 {
 
 	INP_WLOCK_ASSERT(inp);
 	if ((errno == EHOSTUNREACH || errno == ENETUNREACH ||
 	     errno == EHOSTDOWN) && inp->inp_route.ro_nh) {
 		NH_FREE(inp->inp_route.ro_nh);
 		inp->inp_route.ro_nh = (struct nhop_object *)NULL;
 	}
 
 	inp->inp_socket->so_error = errno;
 	sorwakeup(inp->inp_socket);
 	sowwakeup(inp->inp_socket);
 	return (inp);
 }
 
 #ifdef INET
 static void
 udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
     struct inpcbinfo *pcbinfo)
 {
 	struct ip *ip = vip;
 	struct udphdr *uh;
 	struct in_addr faddr;
 	struct inpcb *inp;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
 		return;
 
 	if (PRC_IS_REDIRECT(cmd)) {
 		/* signal EHOSTDOWN, as it flushes the cached route */
 		in_pcbnotifyall(pcbinfo, faddr, EHOSTDOWN, udp_notify);
 		return;
 	}
 
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
 	 *
 	 * XXX: We never get this from ICMP, otherwise it makes an excellent
 	 * DoS attack on machines with many connections.
 	 */
 	if (cmd == PRC_HOSTDEAD)
 		ip = NULL;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
 	if (ip != NULL) {
 		uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 		inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
 		    ip->ip_src, uh->uh_sport, INPLOOKUP_WLOCKPCB, NULL);
 		if (inp != NULL) {
 			INP_WLOCK_ASSERT(inp);
 			if (inp->inp_socket != NULL) {
 				udp_notify(inp, inetctlerrmap[cmd]);
 			}
 			INP_WUNLOCK(inp);
 		} else {
 			inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
 					   ip->ip_src, uh->uh_sport,
 					   INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
 			if (inp != NULL) {
 				struct udpcb *up;
 				void *ctx;
 				udp_tun_icmp_t func;
 
 				up = intoudpcb(inp);
 				ctx = up->u_tun_ctx;
 				func = up->u_icmp_func;
 				INP_RUNLOCK(inp);
 				if (func != NULL)
 					(*func)(cmd, sa, vip, ctx);
 			}
 		}
 	} else
 		in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
 		    udp_notify);
 }
 void
 udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 
 	return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo));
 }
 
 void
 udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 
 	return (udp_common_ctlinput(cmd, sa, vip, &V_ulitecbinfo));
 }
 #endif /* INET */
 
 static int
 udp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_udbinfo,
 	    INPLOOKUP_RLOCKPCB);
 	struct xinpgen xig;
 	struct inpcb *inp;
 	int error;
 
 	if (req->newptr != 0)
 		return (EPERM);
 
 	if (req->oldptr == 0) {
 		int n;
 
 		n = V_udbinfo.ipi_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 		return (0);
 	}
 
 	if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
 		return (error);
 
 	bzero(&xig, sizeof(xig));
 	xig.xig_len = sizeof xig;
 	xig.xig_count = V_udbinfo.ipi_count;
 	xig.xig_gen = V_udbinfo.ipi_gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	while ((inp = inp_next(&inpi)) != NULL) {
 		if (inp->inp_gencnt <= xig.xig_gen &&
 		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 			struct xinpcb xi;
 
 			in_pcbtoxinpcb(inp, &xi);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 			if (error) {
 				INP_RUNLOCK(inp);
 				break;
 			}
 		}
 	}
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		xig.xig_gen = V_udbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_udbinfo.ipi_count;
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     udp_pcblist, "S,xinpcb",
     "List of active UDP sockets");
 
 #ifdef INET
 static int
 udp_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in addrs[2];
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	NET_EPOCH_ENTER(et);
 	inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
 	    addrs[0].sin_addr, addrs[0].sin_port,
 	    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
 	NET_EPOCH_EXIT(et);
 	if (inp != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
     CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
     0, 0, udp_getcred, "S,xucred",
     "Get the xucred of a UDP connection");
 #endif /* INET */
 
 int
 udp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct inpcb *inp;
 	struct udpcb *up;
 	int isudplite, error, optval;
 
 	error = 0;
 	isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 	INP_WLOCK(inp);
 	if (sopt->sopt_level != so->so_proto->pr_protocol) {
 #ifdef INET6
 		if (INP_CHECK_SOCKAF(so, AF_INET6)) {
 			INP_WUNLOCK(inp);
 			error = ip6_ctloutput(so, sopt);
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			INP_WUNLOCK(inp);
 			error = ip_ctloutput(so, sopt);
 		}
 #endif
 		return (error);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 #ifdef INET
 		case UDP_ENCAP:
 			if (!IPSEC_ENABLED(ipv4)) {
 				INP_WUNLOCK(inp);
 				return (ENOPROTOOPT);
 			}
 			error = UDPENCAP_PCBCTL(inp, sopt);
 			break;
 #endif /* INET */
 #endif /* IPSEC */
 		case UDPLITE_SEND_CSCOV:
 		case UDPLITE_RECV_CSCOV:
 			if (!isudplite) {
 				INP_WUNLOCK(inp);
 				error = ENOPROTOOPT;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 			    sizeof(optval));
 			if (error != 0)
 				break;
 			inp = sotoinpcb(so);
 			KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 			INP_WLOCK(inp);
 			up = intoudpcb(inp);
 			KASSERT(up != NULL, ("%s: up == NULL", __func__));
 			if ((optval != 0 && optval < 8) || (optval > 65535)) {
 				INP_WUNLOCK(inp);
 				error = EINVAL;
 				break;
 			}
 			if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
 				up->u_txcslen = optval;
 			else
 				up->u_rxcslen = optval;
 			INP_WUNLOCK(inp);
 			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 #ifdef INET
 		case UDP_ENCAP:
 			if (!IPSEC_ENABLED(ipv4)) {
 				INP_WUNLOCK(inp);
 				return (ENOPROTOOPT);
 			}
 			error = UDPENCAP_PCBCTL(inp, sopt);
 			break;
 #endif /* INET */
 #endif /* IPSEC */
 		case UDPLITE_SEND_CSCOV:
 		case UDPLITE_RECV_CSCOV:
 			if (!isudplite) {
 				INP_WUNLOCK(inp);
 				error = ENOPROTOOPT;
 				break;
 			}
 			up = intoudpcb(inp);
 			KASSERT(up != NULL, ("%s: up == NULL", __func__));
 			if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
 				optval = up->u_txcslen;
 			else
 				optval = up->u_rxcslen;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 
 #ifdef INET
 #ifdef INET6
 /* The logic here is derived from ip6_setpktopt(). See comments there. */
 static int
 udp_v4mapped_pktinfo(struct cmsghdr *cm, struct sockaddr_in * src,
     struct inpcb *inp, int flags)
 {
 	struct ifnet *ifp;
 	struct in6_pktinfo *pktinfo;
 	struct in_addr ia;
 
 	if ((flags & PRUS_IPV6) == 0)
 		return (0);
 
 	if (cm->cmsg_level != IPPROTO_IPV6)
 		return (0);
 
 	if  (cm->cmsg_type != IPV6_2292PKTINFO &&
 	    cm->cmsg_type != IPV6_PKTINFO)
 		return (0);
 
 	if (cm->cmsg_len !=
 	    CMSG_LEN(sizeof(struct in6_pktinfo)))
 		return (EINVAL);
 
 	pktinfo = (struct in6_pktinfo *)CMSG_DATA(cm);
 	if (!IN6_IS_ADDR_V4MAPPED(&pktinfo->ipi6_addr) &&
 	    !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr))
 		return (EINVAL);
 
 	/* Validate the interface index if specified. */
 	if (pktinfo->ipi6_ifindex) {
 		struct epoch_tracker et;
 
 		NET_EPOCH_ENTER(et);
 		ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
 		NET_EPOCH_EXIT(et);	/* XXXGL: unsafe ifp */
 		if (ifp == NULL)
 			return (ENXIO);
 	} else
 		ifp = NULL;
 	if (ifp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 		ia.s_addr = pktinfo->ipi6_addr.s6_addr32[3];
 		if (in_ifhasaddr(ifp, ia) == 0)
 			return (EADDRNOTAVAIL);
 	}
 
 	bzero(src, sizeof(*src));
 	src->sin_family = AF_INET;
 	src->sin_len = sizeof(*src);
 	src->sin_port = inp->inp_lport;
 	src->sin_addr.s_addr = pktinfo->ipi6_addr.s6_addr32[3];
 
 	return (0);
 }
 #endif
 
 static int
 udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
     struct mbuf *control, struct thread *td, int flags)
 {
 	struct udpiphdr *ui;
 	int len = m->m_pkthdr.len;
 	struct in_addr faddr, laddr;
 	struct cmsghdr *cm;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in *sin, src;
 	struct epoch_tracker et;
 	int cscov_partial = 0;
 	int error = 0;
 	int ipflags = 0;
 	u_short fport, lport;
 	u_char tos;
 	uint8_t pr;
 	uint16_t cscov = 0;
 	uint32_t flowid = 0;
 	uint8_t flowtype = M_HASHTYPE_NONE;
 
 	if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
 		if (control)
 			m_freem(control);
 		m_freem(m);
 		return (EMSGSIZE);
 	}
 
 	src.sin_family = 0;
 	sin = (struct sockaddr_in *)addr;
 
 	/*
 	 * udp_output() may need to temporarily bind or connect the current
 	 * inpcb.  As such, we don't know up front whether we will need the
 	 * pcbinfo lock or not.  Do any work to decide what is needed up
 	 * front before acquiring any locks.
 	 *
 	 * We will need network epoch in either case, to safely lookup into
 	 * pcb hash.
 	 */
 	if (sin == NULL ||
 	    (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0))
 		INP_WLOCK(inp);
 	else
 		INP_RLOCK(inp);
 	NET_EPOCH_ENTER(et);
 	tos = inp->inp_ip_tos;
 	if (control != NULL) {
 		/*
 		 * XXX: Currently, we assume all the optional information is
 		 * stored in a single mbuf.
 		 */
 		if (control->m_next) {
 			m_freem(control);
 			error = EINVAL;
 			goto release;
 		}
 		for (; control->m_len > 0;
 		    control->m_data += CMSG_ALIGN(cm->cmsg_len),
 		    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 			cm = mtod(control, struct cmsghdr *);
 			if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0
 			    || cm->cmsg_len > control->m_len) {
 				error = EINVAL;
 				break;
 			}
 #ifdef INET6
 			error = udp_v4mapped_pktinfo(cm, &src, inp, flags);
 			if (error != 0)
 				break;
 #endif
 			if (cm->cmsg_level != IPPROTO_IP)
 				continue;
 
 			switch (cm->cmsg_type) {
 			case IP_SENDSRCADDR:
 				if (cm->cmsg_len !=
 				    CMSG_LEN(sizeof(struct in_addr))) {
 					error = EINVAL;
 					break;
 				}
 				bzero(&src, sizeof(src));
 				src.sin_family = AF_INET;
 				src.sin_len = sizeof(src);
 				src.sin_port = inp->inp_lport;
 				src.sin_addr =
 				    *(struct in_addr *)CMSG_DATA(cm);
 				break;
 
 			case IP_TOS:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(u_char))) {
 					error = EINVAL;
 					break;
 				}
 				tos = *(u_char *)CMSG_DATA(cm);
 				break;
 
 			case IP_FLOWID:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 					error = EINVAL;
 					break;
 				}
 				flowid = *(uint32_t *) CMSG_DATA(cm);
 				break;
 
 			case IP_FLOWTYPE:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 					error = EINVAL;
 					break;
 				}
 				flowtype = *(uint32_t *) CMSG_DATA(cm);
 				break;
 
 #ifdef	RSS
 			case IP_RSSBUCKETID:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 					error = EINVAL;
 					break;
 				}
 				/* This is just a placeholder for now */
 				break;
 #endif	/* RSS */
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			if (error)
 				break;
 		}
 		m_freem(control);
 		control = NULL;
 	}
 	if (error)
 		goto release;
 
 	pr = inp->inp_socket->so_proto->pr_protocol;
 	pcbinfo = udp_get_inpcbinfo(pr);
 
 	/*
 	 * If the IP_SENDSRCADDR control message was specified, override the
 	 * source address for this datagram.  Its use is invalidated if the
 	 * address thus specified is incomplete or clobbers other inpcbs.
 	 */
 	laddr = inp->inp_laddr;
 	lport = inp->inp_lport;
 	if (src.sin_family == AF_INET) {
 		if ((lport == 0) ||
 		    (laddr.s_addr == INADDR_ANY &&
 		     src.sin_addr.s_addr == INADDR_ANY)) {
 			error = EINVAL;
 			goto release;
 		}
 		INP_HASH_WLOCK(pcbinfo);
 		error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
 		    &laddr.s_addr, &lport, td->td_ucred);
 		INP_HASH_WUNLOCK(pcbinfo);
 		if (error)
 			goto release;
 	}
 
 	/*
 	 * If a UDP socket has been connected, then a local address/port will
 	 * have been selected and bound.
 	 *
 	 * If a UDP socket has not been connected to, then an explicit
 	 * destination address must be used, in which case a local
 	 * address/port may not have been selected and bound.
 	 */
 	if (sin != NULL) {
 		INP_LOCK_ASSERT(inp);
 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
 			error = EISCONN;
 			goto release;
 		}
 
 		/*
 		 * Jail may rewrite the destination address, so let it do
 		 * that before we use it.
 		 */
 		error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
 		if (error)
 			goto release;
 
 		/*
 		 * If a local address or port hasn't yet been selected, or if
 		 * the destination address needs to be rewritten due to using
 		 * a special INADDR_ constant, invoke in_pcbconnect_setup()
 		 * to do the heavy lifting.  Once a port is selected, we
 		 * commit the binding back to the socket; we also commit the
 		 * binding of the address if in jail.
 		 *
 		 * If we already have a valid binding and we're not
 		 * requesting a destination address rewrite, use a fast path.
 		 */
 		if (inp->inp_laddr.s_addr == INADDR_ANY ||
 		    inp->inp_lport == 0 ||
 		    sin->sin_addr.s_addr == INADDR_ANY ||
 		    sin->sin_addr.s_addr == INADDR_BROADCAST) {
 			INP_HASH_WLOCK(pcbinfo);
 			error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
 			    &lport, &faddr.s_addr, &fport, NULL,
 			    td->td_ucred);
 			if (error) {
 				INP_HASH_WUNLOCK(pcbinfo);
 				goto release;
 			}
 
 			/*
 			 * XXXRW: Why not commit the port if the address is
 			 * !INADDR_ANY?
 			 */
 			/* Commit the local port if newly assigned. */
 			if (inp->inp_laddr.s_addr == INADDR_ANY &&
 			    inp->inp_lport == 0) {
 				INP_WLOCK_ASSERT(inp);
 				/*
 				 * Remember addr if jailed, to prevent
 				 * rebinding.
 				 */
 				if (prison_flag(td->td_ucred, PR_IP4))
 					inp->inp_laddr = laddr;
 				inp->inp_lport = lport;
 				error = in_pcbinshash(inp);
 				INP_HASH_WUNLOCK(pcbinfo);
 				if (error != 0) {
 					inp->inp_lport = 0;
 					error = EAGAIN;
 					goto release;
 				}
 				inp->inp_flags |= INP_ANONPORT;
 			} else
 				INP_HASH_WUNLOCK(pcbinfo);
 		} else {
 			faddr = sin->sin_addr;
 			fport = sin->sin_port;
 		}
 	} else {
 		INP_LOCK_ASSERT(inp);
 		faddr = inp->inp_faddr;
 		fport = inp->inp_fport;
 		if (faddr.s_addr == INADDR_ANY) {
 			error = ENOTCONN;
 			goto release;
 		}
 	}
 
 	/*
 	 * Calculate data length and get a mbuf for UDP, IP, and possible
 	 * link-layer headers.  Immediate slide the data pointer back forward
 	 * since we won't use that space at this layer.
 	 */
 	M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto release;
 	}
 	m->m_data += max_linkhdr;
 	m->m_len -= max_linkhdr;
 	m->m_pkthdr.len -= max_linkhdr;
 
 	/*
 	 * Fill in mbuf with extended UDP header and addresses and length put
 	 * into network format.
 	 */
 	ui = mtod(m, struct udpiphdr *);
 	bzero(ui->ui_x1, sizeof(ui->ui_x1));	/* XXX still needed? */
 	ui->ui_v = IPVERSION << 4;
 	ui->ui_pr = pr;
 	ui->ui_src = laddr;
 	ui->ui_dst = faddr;
 	ui->ui_sport = lport;
 	ui->ui_dport = fport;
 	ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
 	if (pr == IPPROTO_UDPLITE) {
 		struct udpcb *up;
 		uint16_t plen;
 
 		up = intoudpcb(inp);
 		cscov = up->u_txcslen;
 		plen = (u_short)len + sizeof(struct udphdr);
 		if (cscov >= plen)
 			cscov = 0;
 		ui->ui_len = htons(plen);
 		ui->ui_ulen = htons(cscov);
 		/*
 		 * For UDP-Lite, checksum coverage length of zero means
 		 * the entire UDPLite packet is covered by the checksum.
 		 */
 		cscov_partial = (cscov == 0) ? 0 : 1;
 	}
 
 	/*
 	 * Set the Don't Fragment bit in the IP header.
 	 */
 	if (inp->inp_flags & INP_DONTFRAG) {
 		struct ip *ip;
 
 		ip = (struct ip *)&ui->ui_i;
 		ip->ip_off |= htons(IP_DF);
 	}
 
 	if (inp->inp_socket->so_options & SO_DONTROUTE)
 		ipflags |= IP_ROUTETOIF;
 	if (inp->inp_socket->so_options & SO_BROADCAST)
 		ipflags |= IP_ALLOWBROADCAST;
 	if (inp->inp_flags & INP_ONESBCAST)
 		ipflags |= IP_SENDONES;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	/*
 	 * Set up checksum and output datagram.
 	 */
 	ui->ui_sum = 0;
 	if (pr == IPPROTO_UDPLITE) {
 		if (inp->inp_flags & INP_ONESBCAST)
 			faddr.s_addr = INADDR_BROADCAST;
 		if (cscov_partial) {
 			if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0)
 				ui->ui_sum = 0xffff;
 		} else {
 			if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0)
 				ui->ui_sum = 0xffff;
 		}
 	} else if (V_udp_cksum) {
 		if (inp->inp_flags & INP_ONESBCAST)
 			faddr.s_addr = INADDR_BROADCAST;
 		ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
 		    htons((u_short)len + sizeof(struct udphdr) + pr));
 		m->m_pkthdr.csum_flags = CSUM_UDP;
 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 	}
 	((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
 	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;	/* XXX */
 	((struct ip *)ui)->ip_tos = tos;		/* XXX */
 	UDPSTAT_INC(udps_opackets);
 
 	/*
 	 * Setup flowid / RSS information for outbound socket.
 	 *
 	 * Once the UDP code decides to set a flowid some other way,
 	 * this allows the flowid to be overridden by userland.
 	 */
 	if (flowtype != M_HASHTYPE_NONE) {
 		m->m_pkthdr.flowid = flowid;
 		M_HASHTYPE_SET(m, flowtype);
 	}
 #if defined(ROUTE_MPATH) || defined(RSS)
 	else if (CALC_FLOWID_OUTBOUND_SENDTO) {
 		uint32_t hash_val, hash_type;
 
 		hash_val = fib4_calc_packet_hash(laddr, faddr,
 		    lport, fport, pr, &hash_type);
 		m->m_pkthdr.flowid = hash_val;
 		M_HASHTYPE_SET(m, hash_type);
 	}
 
 	/*
 	 * Don't override with the inp cached flowid value.
 	 *
 	 * Depending upon the kind of send being done, the inp
 	 * flowid/flowtype values may actually not be appropriate
 	 * for this particular socket send.
 	 *
 	 * We should either leave the flowid at zero (which is what is
 	 * currently done) or set it to some software generated
 	 * hash value based on the packet contents.
 	 */
 	ipflags |= IP_NODEFAULTFLOWID;
 #endif	/* RSS */
 
 	if (pr == IPPROTO_UDPLITE)
 		UDPLITE_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
 	else
 		UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
 	error = ip_output(m, inp->inp_options,
 	    INP_WLOCKED(inp) ? &inp->inp_route : NULL, ipflags,
 	    inp->inp_moptions, inp);
 	INP_UNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	return (error);
 
 release:
 	INP_UNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	m_freem(m);
 	return (error);
 }
 
 static void
 udp_abort(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_HASH_WLOCK(pcbinfo);
 		in_pcbdisconnect(inp);
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp_attach(struct socket *so, int proto, struct thread *td)
 {
 	static uint32_t udp_flowid;
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
 	error = soreserve(so, udp_sendspace, udp_recvspace);
 	if (error)
 		return (error);
 	error = in_pcballoc(so, pcbinfo);
 	if (error)
 		return (error);
 
 	inp = sotoinpcb(so);
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_ip_ttl = V_ip_defttl;
 	inp->inp_flowid = atomic_fetchadd_int(&udp_flowid, 1);
 	inp->inp_flowtype = M_HASHTYPE_OPAQUE;
 
 	error = udp_newudpcb(inp);
 	if (error) {
 		in_pcbdetach(inp);
 		in_pcbfree(inp);
 		return (error);
 	}
 	INP_WUNLOCK(inp);
 
 	return (0);
 }
 #endif /* INET */
 
 int
 udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, udp_tun_icmp_t i, void *ctx)
 {
 	struct inpcb *inp;
 	struct udpcb *up;
 
 	KASSERT(so->so_type == SOCK_DGRAM,
 	    ("udp_set_kernel_tunneling: !dgram"));
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL"));
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	if ((f != NULL || i != NULL) && ((up->u_tun_func != NULL) ||
 	    (up->u_icmp_func != NULL))) {
 		INP_WUNLOCK(inp);
 		return (EBUSY);
 	}
 	up->u_tun_func = f;
 	up->u_icmp_func = i;
 	up->u_tun_ctx = ctx;
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 #ifdef INET
 static int
 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in *sinp;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
 
 	sinp = (struct sockaddr_in *)nam;
 	if (nam->sa_family != AF_INET) {
 		/*
 		 * Preserve compatibility with old programs.
 		 */
 		if (nam->sa_family != AF_UNSPEC ||
 		    nam->sa_len < offsetof(struct sockaddr_in, sin_zero) ||
 		    sinp->sin_addr.s_addr != INADDR_ANY)
 			return (EAFNOSUPPORT);
 		nam->sa_family = AF_INET;
 	}
 	if (nam->sa_len != sizeof(struct sockaddr_in))
 		return (EINVAL);
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(pcbinfo);
 	error = in_pcbbind(inp, nam, td->td_ucred);
 	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp_close(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_close: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_HASH_WLOCK(pcbinfo);
 		in_pcbdisconnect(inp);
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in *sin;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
 
 	sin = (struct sockaddr_in *)nam;
 	if (sin->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
 	if (sin->sin_len != sizeof(*sin))
 		return (EINVAL);
 
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_WUNLOCK(inp);
 		return (EISCONN);
 	}
 	error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
 	if (error != 0) {
 		INP_WUNLOCK(inp);
 		return (error);
 	}
 	NET_EPOCH_ENTER(et);
 	INP_HASH_WLOCK(pcbinfo);
 	error = in_pcbconnect(inp, nam, td->td_ucred, true);
 	INP_HASH_WUNLOCK(pcbinfo);
 	NET_EPOCH_EXIT(et);
 	if (error == 0)
 		soisconnected(so);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp_detach(struct socket *so)
 {
 	struct inpcb *inp;
 	struct udpcb *up;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
 	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
 	    ("udp_detach: not disconnected"));
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: up == NULL", __func__));
 	inp->inp_ppcb = NULL;
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	udp_discardcb(up);
 }
 
 static int
 udp_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr == INADDR_ANY) {
 		INP_WUNLOCK(inp);
 		return (ENOTCONN);
 	}
 	INP_HASH_WLOCK(pcbinfo);
 	in_pcbdisconnect(inp);
 	inp->inp_laddr.s_addr = INADDR_ANY;
 	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
     struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_send: inp == NULL"));
 
 	if (addr != NULL) {
 		error = 0;
 		if (addr->sa_family != AF_INET)
 			error = EAFNOSUPPORT;
 		else if (addr->sa_len != sizeof(struct sockaddr_in))
 			error = EINVAL;
 		if (__predict_false(error != 0)) {
 			m_freem(control);
 			m_freem(m);
 			return (error);
 		}
 	}
 	return (udp_output(inp, m, addr, control, td, flags));
 }
 #endif /* INET */
 
 int
 udp_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 #ifdef INET
 struct pr_usrreqs udp_usrreqs = {
 	.pru_abort =		udp_abort,
 	.pru_attach =		udp_attach,
 	.pru_bind =		udp_bind,
 	.pru_connect =		udp_connect,
 	.pru_control =		in_control,
 	.pru_detach =		udp_detach,
 	.pru_disconnect =	udp_disconnect,
 	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		udp_send,
 	.pru_soreceive =	soreceive_dgram,
 	.pru_sosend =		sosend_dgram,
 	.pru_shutdown =		udp_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		udp_close,
 };
 #endif /* INET */
diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c
index 3a13d2a9dc74..9835dc495f09 100644
--- a/sys/netinet6/ip6_input.c
+++ b/sys/netinet6/ip6_input.c
@@ -1,1751 +1,1750 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/hhook.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/rss_config.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_systm.h>
 #include <net/if_llatbl.h>
 #ifdef INET
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #endif /* INET */
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/icmp6.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_rss.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <netinet6/ip6protosw.h>
 
 extern struct domain inet6domain;
 
 u_char ip6_protox[IPPROTO_MAX];
 VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead);
 VNET_DEFINE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl);
 VNET_DEFINE(u_long, in6_ifaddrhmask);
 
 static struct netisr_handler ip6_nh = {
 	.nh_name = "ip6",
 	.nh_handler = ip6_input,
 	.nh_proto = NETISR_IPV6,
 #ifdef RSS
 	.nh_m2cpuid = rss_soft_m2cpuid_v6,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 #else
 	.nh_policy = NETISR_POLICY_FLOW,
 #endif
 };
 
 static int
 sysctl_netinet6_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip6_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip6_nh, qlimit));
 }
 SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRQMAXLEN, intr_queue_maxlen,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_netinet6_intr_queue_maxlen, "I",
     "Maximum size of the IPv6 input queue");
 
 VNET_DEFINE_STATIC(bool, ip6_sav) = true;
 #define	V_ip6_sav	VNET(ip6_sav)
 SYSCTL_BOOL(_net_inet6_ip6, OID_AUTO, source_address_validation,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_sav), true,
     "Drop incoming packets with source address that is a local address");
 
 #ifdef RSS
 static struct netisr_handler ip6_direct_nh = {
 	.nh_name = "ip6_direct",
 	.nh_handler = ip6_direct_input,
 	.nh_proto = NETISR_IPV6_DIRECT,
 	.nh_m2cpuid = rss_soft_m2cpuid_v6,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 };
 
 static int
 sysctl_netinet6_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip6_direct_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip6_direct_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_netinet6_intr_direct_queue_maxlen, "I",
     "Maximum size of the IPv6 direct input queue");
 
 #endif
 
 VNET_DEFINE(pfil_head_t, inet6_pfil_head);
 
 VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat);
 VNET_PCPUSTAT_SYSINIT(ip6stat);
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(ip6stat);
 #endif /* VIMAGE */
 
 struct rmlock in6_ifaddr_lock;
 RM_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock");
 
 static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
 
 /*
  * IP6 initialization: fill in IP6 protocol switch table.
  * All protocols not implemented in kernel go to raw IP6 protocol handler.
  */
 static void
 ip6_vnet_init(void *arg __unused)
 {
 	struct pfil_head_args args;
 
 	TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal",
 	    &V_ip6_auto_linklocal);
 	TUNABLE_INT_FETCH("net.inet6.ip6.accept_rtadv", &V_ip6_accept_rtadv);
 	TUNABLE_INT_FETCH("net.inet6.ip6.no_radr", &V_ip6_no_radr);
 
 	CK_STAILQ_INIT(&V_in6_ifaddrhead);
 	V_in6_ifaddrhashtbl = hashinit(IN6ADDR_NHASH, M_IFADDR,
 	    &V_in6_ifaddrhmask);
 
 	/* Initialize packet filter hooks. */
 	args.pa_version = PFIL_VERSION;
 	args.pa_flags = PFIL_IN | PFIL_OUT;
 	args.pa_type = PFIL_TYPE_IP6;
 	args.pa_headname = PFIL_INET6_NAME;
 	V_inet6_pfil_head = pfil_head_register(&args);
 
 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET6,
 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET6],
 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register input helper hook\n",
 		    __func__);
 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET6,
 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET6],
 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register output helper hook\n",
 		    __func__);
 
 	scope6_init();
 	addrsel_policy_init();
 	nd6_init();
 	frag6_init();
 
 	V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
 
 	/* Skip global initialization stuff for non-default instances. */
 #ifdef VIMAGE
 	netisr_register_vnet(&ip6_nh);
 #ifdef RSS
 	netisr_register_vnet(&ip6_direct_nh);
 #endif
 #endif
 }
 VNET_SYSINIT(ip6_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
     ip6_vnet_init, NULL);
 
 static void
 ip6_init(void *arg __unused)
 {
 	struct protosw *pr;
 
 	pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
 	KASSERT(pr, ("%s: PF_INET6 not found", __func__));
 
 	/* Initialize the entire ip6_protox[] array to IPPROTO_RAW. */
 	for (int i = 0; i < IPPROTO_MAX; i++)
 		ip6_protox[i] = pr - inet6sw;
 	/*
 	 * Cycle through IP protocols and put them into the appropriate place
 	 * in ip6_protox[].
 	 */
 	for (pr = inet6domain.dom_protosw;
 	    pr < inet6domain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET6 &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX)
 				ip6_protox[pr->pr_protocol] = pr - inet6sw;
 		}
 
 	netisr_register(&ip6_nh);
 #ifdef RSS
 	netisr_register(&ip6_direct_nh);
 #endif
 }
 SYSINIT(ip6_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_init, NULL);
 
 /*
  * The protocol to be inserted into ip6_protox[] must be already registered
  * in inet6sw[], either statically or through pf_proto_register().
  */
 int
 ip6proto_register(short ip6proto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * The protocol slot must not be occupied by another protocol
 	 * already.  An index pointing to IPPROTO_RAW is unused.
 	 */
 	pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip6_protox[ip6proto] != pr - inet6sw)	/* IPPROTO_RAW */
 		return (EEXIST);
 
 	/*
 	 * Find the protocol position in inet6sw[] and set the index.
 	 */
 	for (pr = inet6domain.dom_protosw;
 	    pr < inet6domain.dom_protoswNPROTOSW; pr++) {
 		if (pr->pr_domain->dom_family == PF_INET6 &&
 		    pr->pr_protocol && pr->pr_protocol == ip6proto) {
 			ip6_protox[pr->pr_protocol] = pr - inet6sw;
 			return (0);
 		}
 	}
 	return (EPROTONOSUPPORT);
 }
 
 int
 ip6proto_unregister(short ip6proto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/* Check if the protocol was indeed registered. */
 	pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip6_protox[ip6proto] == pr - inet6sw)	/* IPPROTO_RAW */
 		return (ENOENT);
 
 	/* Reset the protocol slot to IPPROTO_RAW. */
 	ip6_protox[ip6proto] = pr - inet6sw;
 	return (0);
 }
 
 #ifdef VIMAGE
 static void
 ip6_destroy(void *unused __unused)
 {
 	struct ifaddr *ifa, *nifa;
 	struct ifnet *ifp;
 	int error;
 
 #ifdef RSS
 	netisr_unregister_vnet(&ip6_direct_nh);
 #endif
 	netisr_unregister_vnet(&ip6_nh);
 
 	pfil_head_unregister(V_inet6_pfil_head);
 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET6]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister input helper hook "
 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET6: "
 		    "error %d returned\n", __func__, error);
 	}
 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET6]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister output helper hook "
 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET6: "
 		    "error %d returned\n", __func__, error);
 	}
 
 	/* Cleanup addresses. */
 	IFNET_RLOCK();
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		/* Cannot lock here - lock recursion. */
 		/* IF_ADDR_LOCK(ifp); */
 		CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			in6_purgeaddr(ifa);
 		}
 		/* IF_ADDR_UNLOCK(ifp); */
 		in6_ifdetach_destroy(ifp);
 		mld_domifdetach(ifp);
 	}
 	IFNET_RUNLOCK();
 
 	/* Make sure any routes are gone as well. */
 	rib_flush_routes_family(AF_INET6);
 
 	frag6_destroy();
 	nd6_destroy();
 	in6_ifattach_destroy();
 
 	hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask);
 }
 
 VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL);
 #endif
 
 static int
 ip6_input_hbh(struct mbuf **mp, uint32_t *plen, uint32_t *rtalert, int *off,
     int *nxt, int *ours)
 {
 	struct mbuf *m;
 	struct ip6_hdr *ip6;
 	struct ip6_hbh *hbh;
 
 	if (ip6_hopopts_input(plen, rtalert, mp, off)) {
 #if 0	/*touches NULL pointer*/
 		in6_ifstat_inc((*mp)->m_pkthdr.rcvif, ifs6_in_discard);
 #endif
 		goto out;	/* m have already been freed */
 	}
 
 	/* adjust pointer */
 	m = *mp;
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * if the payload length field is 0 and the next header field
 	 * indicates Hop-by-Hop Options header, then a Jumbo Payload
 	 * option MUST be included.
 	 */
 	if (ip6->ip6_plen == 0 && *plen == 0) {
 		/*
 		 * Note that if a valid jumbo payload option is
 		 * contained, ip6_hopopts_input() must set a valid
 		 * (non-zero) payload length to the variable plen.
 		 */
 		IP6STAT_INC(ip6s_badoptions);
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
 		icmp6_error(m, ICMP6_PARAM_PROB,
 			    ICMP6_PARAMPROB_HEADER,
 			    (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
 		goto out;
 	}
 	/* ip6_hopopts_input() ensures that mbuf is contiguous */
 	hbh = (struct ip6_hbh *)(ip6 + 1);
 	*nxt = hbh->ip6h_nxt;
 
 	/*
 	 * If we are acting as a router and the packet contains a
 	 * router alert option, see if we know the option value.
 	 * Currently, we only support the option value for MLD, in which
 	 * case we should pass the packet to the multicast routing
 	 * daemon.
 	 */
 	if (*rtalert != ~0) {
 		switch (*rtalert) {
 		case IP6OPT_RTALERT_MLD:
 			if (V_ip6_forwarding)
 				*ours = 1;
 			break;
 		default:
 			/*
 			 * RFC2711 requires unrecognized values must be
 			 * silently ignored.
 			 */
 			break;
 		}
 	}
 
 	return (0);
 
 out:
 	return (1);
 }
 
 #ifdef RSS
 /*
  * IPv6 direct input routine.
  *
  * This is called when reinjecting completed fragments where
  * all of the previous checking and book-keeping has been done.
  */
 void
 ip6_direct_input(struct mbuf *m)
 {
 	int off, nxt;
 	int nest;
 	struct m_tag *mtag;
 	struct ip6_direct_ctx *ip6dc;
 
 	mtag = m_tag_locate(m, MTAG_ABI_IPV6, IPV6_TAG_DIRECT, NULL);
 	KASSERT(mtag != NULL, ("Reinjected packet w/o direct ctx tag!"));
 
 	ip6dc = (struct ip6_direct_ctx *)(mtag + 1);
 	nxt = ip6dc->ip6dc_nxt;
 	off = ip6dc->ip6dc_off;
 
 	nest = 0;
 
 	m_tag_delete(m, mtag);
 
 	while (nxt != IPPROTO_DONE) {
 		if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
 			IP6STAT_INC(ip6s_toomanyhdr);
 			goto bad;
 		}
 
 		/*
 		 * protection against faulty packet - there should be
 		 * more sanity checks in header chain processing.
 		 */
 		if (m->m_pkthdr.len < off) {
 			IP6STAT_INC(ip6s_tooshort);
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
 			goto bad;
 		}
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 		if (IPSEC_ENABLED(ipv6)) {
 			if (IPSEC_INPUT(ipv6, m, off, nxt) != 0)
 				return;
 		}
 #endif /* IPSEC */
 
 		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
 	}
 	return;
 bad:
 	m_freem(m);
 }
 #endif
 
 void
 ip6_input(struct mbuf *m)
 {
 	struct in6_addr odst;
 	struct ip6_hdr *ip6;
 	struct in6_ifaddr *ia;
 	struct ifnet *rcvif;
 	u_int32_t plen;
 	u_int32_t rtalert = ~0;
 	int off = sizeof(struct ip6_hdr), nest;
 	int nxt, ours = 0;
 	int srcrt = 0;
 
 	/*
 	 * Drop the packet if IPv6 operation is disabled on the interface.
 	 */
 	rcvif = m->m_pkthdr.rcvif;
 	if ((ND_IFINFO(rcvif)->flags & ND6_IFF_IFDISABLED))
 		goto bad;
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/*
 	 * should the inner packet be considered authentic?
 	 * see comment in ah4_input().
 	 * NB: m cannot be NULL when passed to the input routine
 	 */
 
 	m->m_flags &= ~M_AUTHIPHDR;
 	m->m_flags &= ~M_AUTHIPDGM;
 
 #endif /* IPSEC */
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		/*
 		 * Firewall changed destination to local.
 		 */
 		ip6 = mtod(m, struct ip6_hdr *);
 		goto passin;
 	}
 
 	/*
 	 * mbuf statistics
 	 */
 	if (m->m_flags & M_EXT) {
 		if (m->m_next)
 			IP6STAT_INC(ip6s_mext2m);
 		else
 			IP6STAT_INC(ip6s_mext1);
 	} else {
 		if (m->m_next) {
 			if (m->m_flags & M_LOOP) {
 				IP6STAT_INC(ip6s_m2m[V_loif->if_index]);
 			} else if (rcvif->if_index < IP6S_M2MMAX)
 				IP6STAT_INC(ip6s_m2m[rcvif->if_index]);
 			else
 				IP6STAT_INC(ip6s_m2m[0]);
 		} else
 			IP6STAT_INC(ip6s_m1);
 	}
 
 	in6_ifstat_inc(rcvif, ifs6_in_receive);
 	IP6STAT_INC(ip6s_total);
 
 	/*
 	 * L2 bridge code and some other code can return mbuf chain
 	 * that does not conform to KAME requirement.  too bad.
 	 * XXX: fails to join if interface MTU > MCLBYTES.  jumbogram?
 	 */
 	if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
 		struct mbuf *n;
 
 		if (m->m_pkthdr.len > MHLEN)
 			n = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		else
 			n = m_gethdr(M_NOWAIT, MT_DATA);
 		if (n == NULL)
 			goto bad;
 
 		m_move_pkthdr(n, m);
 		m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
 		n->m_len = n->m_pkthdr.len;
 		m_freem(m);
 		m = n;
 	}
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
 			IP6STAT_INC(ip6s_toosmall);
 			in6_ifstat_inc(rcvif, ifs6_in_hdrerr);
 			goto bad;
 		}
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 		IP6STAT_INC(ip6s_badvers);
 		in6_ifstat_inc(rcvif, ifs6_in_hdrerr);
 		goto bad;
 	}
 
 	IP6STAT_INC(ip6s_nxthist[ip6->ip6_nxt]);
 	IP_PROBE(receive, NULL, NULL, ip6, rcvif, NULL, ip6);
 
 	/*
 	 * Check against address spoofing/corruption.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
 		/*
 		 * XXX: "badscope" is not very suitable for a multicast source.
 		 */
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
 	    !(m->m_flags & M_LOOP)) {
 		/*
 		 * In this case, the packet should come from the loopback
 		 * interface.  However, we cannot just check the if_flags,
 		 * because ip6_mloopback() passes the "actual" interface
 		 * as the outgoing/incoming interface.
 		 */
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
 	    IPV6_ADDR_MC_SCOPE(&ip6->ip6_dst) == 0) {
 		/*
 		 * RFC4291 2.7:
 		 * Nodes must not originate a packet to a multicast address
 		 * whose scop field contains the reserved value 0; if such
 		 * a packet is received, it must be silently dropped.
 		 */
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
 		/* packet is dropped by traffic conditioner */
 		return;
 	}
 #endif
 	/*
 	 * The following check is not documented in specs.  A malicious
 	 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
 	 * and bypass security checks (act as if it was from 127.0.0.1 by using
 	 * IPv6 src ::ffff:127.0.0.1).  Be cautious.
 	 *
 	 * We have supported IPv6-only kernels for a few years and this issue
 	 * has not come up.  The world seems to move mostly towards not using
 	 * v4mapped on the wire, so it makes sense for us to keep rejecting
 	 * any such packets.
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 #if 0
 	/*
 	 * Reject packets with IPv4 compatible addresses (auto tunnel).
 	 *
 	 * The code forbids auto tunnel relay case in RFC1933 (the check is
 	 * stronger than RFC1933).  We may want to re-enable it if mech-xx
 	 * is revised to forbid relaying case.
 	 */
 	if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 #endif
 	/*
 	 * Try to forward the packet, but if we fail continue.
 	 * ip6_tryforward() does not generate redirects, so fall
 	 * through to normal processing if redirects are required.
 	 * ip6_tryforward() does inbound and outbound packet firewall
 	 * processing. If firewall has decided that destination becomes
 	 * our local address, it sets M_FASTFWD_OURS flag. In this
 	 * case skip another inbound firewall processing and update
 	 * ip6 pointer.
 	 */
 	if (V_ip6_forwarding != 0 && V_ip6_sendredirects == 0
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	    && (!IPSEC_ENABLED(ipv6) ||
 	    IPSEC_CAPS(ipv6, m, IPSEC_CAP_OPERABLE) == 0)
 #endif
 	    ) {
 		if ((m = ip6_tryforward(m)) == NULL)
 			return;
 		if (m->m_flags & M_FASTFWD_OURS) {
 			ip6 = mtod(m, struct ip6_hdr *);
 			goto passin;
 		}
 	}
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/*
 	 * Bypass packet filtering for packets previously handled by IPsec.
 	 */
 	if (IPSEC_ENABLED(ipv6) &&
 	    IPSEC_CAPS(ipv6, m, IPSEC_CAP_BYPASS_FILTER) != 0)
 			goto passin;
 #endif
 	/*
 	 * Run through list of hooks for input packets.
 	 *
 	 * NB: Beware of the destination address changing
 	 *     (e.g. by NAT rewriting).  When this happens,
 	 *     tell ip6_forward to do the right thing.
 	 */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED_IN(V_inet6_pfil_head))
 		goto passin;
 
 	odst = ip6->ip6_dst;
 	if (pfil_run_hooks(V_inet6_pfil_head, &m, m->m_pkthdr.rcvif, PFIL_IN,
 	    NULL) != PFIL_PASS)
 		return;
 	ip6 = mtod(m, struct ip6_hdr *);
 	srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
 	if ((m->m_flags & (M_IP6_NEXTHOP | M_FASTFWD_OURS)) == M_IP6_NEXTHOP &&
 	    m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
 		/*
 		 * Directly ship the packet on.  This allows forwarding
 		 * packets originally destined to us to some other directly
 		 * connected host.
 		 */
 		ip6_forward(m, 1);
 		return;
 	}
 
 passin:
 	/*
 	 * Disambiguate address scope zones (if there is ambiguity).
 	 * We first make sure that the original source or destination address
 	 * is not in our internal form for scoped addresses.  Such addresses
 	 * are not necessarily invalid spec-wise, but we cannot accept them due
 	 * to the usage conflict.
 	 * in6_setscope() then also checks and rejects the cases where src or
 	 * dst are the loopback address and the receiving interface
 	 * is not loopback.
 	 */
 	if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) {
 		IP6STAT_INC(ip6s_badscope); /* XXX */
 		goto bad;
 	}
 	if (in6_setscope(&ip6->ip6_src, rcvif, NULL) ||
 	    in6_setscope(&ip6->ip6_dst, rcvif, NULL)) {
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		ours = 1;
 		goto hbhcheck;
 	}
 	/*
 	 * Multicast check. Assume packet is for us to avoid
 	 * prematurely taking locks.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		ours = 1;
 		in6_ifstat_inc(rcvif, ifs6_in_mcast);
 		goto hbhcheck;
 	}
 	/*
 	 * Unicast check
 	 * XXX: For now we keep link-local IPv6 addresses with embedded
 	 *      scope zone id, therefore we use zero zoneid here.
 	 */
 	ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false);
 	if (ia != NULL) {
 		if (ia->ia6_flags & IN6_IFF_NOTREADY) {
 			char ip6bufs[INET6_ADDRSTRLEN];
 			char ip6bufd[INET6_ADDRSTRLEN];
 			/* address is not ready, so discard the packet. */
 			nd6log((LOG_INFO,
 			    "ip6_input: packet to an unready address %s->%s\n",
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
 			goto bad;
 		}
 		if (V_ip6_sav && !(rcvif->if_flags & IFF_LOOPBACK) &&
 		    __predict_false(in6_localip_fib(&ip6->ip6_src,
 			    rcvif->if_fib))) {
 			IP6STAT_INC(ip6s_badscope); /* XXX */
 			goto bad;
 		}
 		/* Count the packet in the ip address stats */
 		counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 		counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len);
 		ours = 1;
 		goto hbhcheck;
 	}
 
 	/*
 	 * Now there is no reason to process the packet if it's not our own
 	 * and we're not a router.
 	 */
 	if (!V_ip6_forwarding) {
 		IP6STAT_INC(ip6s_cantforward);
 		goto bad;
 	}
 
   hbhcheck:
 	/*
 	 * Process Hop-by-Hop options header if it's contained.
 	 * m may be modified in ip6_hopopts_input().
 	 * If a JumboPayload option is included, plen will also be modified.
 	 */
 	plen = (u_int32_t)ntohs(ip6->ip6_plen);
 	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
 		if (ip6_input_hbh(&m, &plen, &rtalert, &off, &nxt, &ours) != 0)
 			return;
 	} else
 		nxt = ip6->ip6_nxt;
 
 	/*
 	 * Use mbuf flags to propagate Router Alert option to
 	 * ICMPv6 layer, as hop-by-hop options have been stripped.
 	 */
 	if (rtalert != ~0)
 		m->m_flags |= M_RTALERT_MLD;
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IPv6 header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
 		IP6STAT_INC(ip6s_tooshort);
 		in6_ifstat_inc(rcvif, ifs6_in_truncated);
 		goto bad;
 	}
 	if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = sizeof(struct ip6_hdr) + plen;
 			m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
 		} else
 			m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
 	}
 
 	/*
 	 * Forward if desirable.
 	 */
 	if (V_ip6_mrouter &&
 	    IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		/*
 		 * If we are acting as a multicast router, all
 		 * incoming multicast packets are passed to the
 		 * kernel-level multicast forwarding function.
 		 * The packet is returned (relatively) intact; if
 		 * ip6_mforward() returns a non-zero value, the packet
 		 * must be discarded, else it may be accepted below.
 		 *
 		 * XXX TODO: Check hlim and multicast scope here to avoid
 		 * unnecessarily calling into ip6_mforward().
 		 */
 		if (ip6_mforward && ip6_mforward(ip6, rcvif, m)) {
 			IP6STAT_INC(ip6s_cantforward);
 			goto bad;
 		}
 	} else if (!ours) {
 		ip6_forward(m, srcrt);
 		return;
 	}
 
 	/*
 	 * Tell launch routine the next header
 	 */
 	IP6STAT_INC(ip6s_delivered);
 	in6_ifstat_inc(rcvif, ifs6_in_deliver);
 	nest = 0;
 
 	while (nxt != IPPROTO_DONE) {
 		if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
 			IP6STAT_INC(ip6s_toomanyhdr);
 			goto bad;
 		}
 
 		/*
 		 * protection against faulty packet - there should be
 		 * more sanity checks in header chain processing.
 		 */
 		if (m->m_pkthdr.len < off) {
 			IP6STAT_INC(ip6s_tooshort);
 			in6_ifstat_inc(rcvif, ifs6_in_truncated);
 			goto bad;
 		}
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 		if (IPSEC_ENABLED(ipv6)) {
 			if (IPSEC_INPUT(ipv6, m, off, nxt) != 0)
 				return;
 		}
 #endif /* IPSEC */
 
 		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
 	}
 	return;
 bad:
 	in6_ifstat_inc(rcvif, ifs6_in_discard);
 	if (m != NULL)
 		m_freem(m);
 }
 
 /*
  * Hop-by-Hop options header processing. If a valid jumbo payload option is
  * included, the real payload length will be stored in plenp.
  *
  * rtalertp - XXX: should be stored more smart way
  */
 static int
 ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp,
     struct mbuf **mp, int *offp)
 {
 	struct mbuf *m = *mp;
 	int off = *offp, hbhlen;
 	struct ip6_hbh *hbh;
 
 	/* validation of the length of the header */
 	if (m->m_len < off + sizeof(*hbh)) {
 		m = m_pullup(m, off + sizeof(*hbh));
 		if (m == NULL) {
 			IP6STAT_INC(ip6s_exthdrtoolong);
 			*mp = NULL;
 			return (-1);
 		}
 	}
 	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
 	hbhlen = (hbh->ip6h_len + 1) << 3;
 
 	if (m->m_len < off + hbhlen) {
 		m = m_pullup(m, off + hbhlen);
 		if (m == NULL) {
 			IP6STAT_INC(ip6s_exthdrtoolong);
 			*mp = NULL;
 			return (-1);
 		}
 	}
 	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
 	off += hbhlen;
 	hbhlen -= sizeof(struct ip6_hbh);
 	if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
 				hbhlen, rtalertp, plenp) < 0) {
 		*mp = NULL;
 		return (-1);
 	}
 
 	*offp = off;
 	*mp = m;
 	return (0);
 }
 
 /*
  * Search header for all Hop-by-hop options and process each option.
  * This function is separate from ip6_hopopts_input() in order to
  * handle a case where the sending node itself process its hop-by-hop
  * options header. In such a case, the function is called from ip6_output().
  *
  * The function assumes that hbh header is located right after the IPv6 header
  * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
  * opthead + hbhlen is located in contiguous memory region.
  */
 int
 ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
     u_int32_t *rtalertp, u_int32_t *plenp)
 {
 	struct ip6_hdr *ip6;
 	int optlen = 0;
 	u_int8_t *opt = opthead;
 	u_int16_t rtalert_val;
 	u_int32_t jumboplen;
 	const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
 
 	for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
 		switch (*opt) {
 		case IP6OPT_PAD1:
 			optlen = 1;
 			break;
 		case IP6OPT_PADN:
 			if (hbhlen < IP6OPT_MINLEN) {
 				IP6STAT_INC(ip6s_toosmall);
 				goto bad;
 			}
 			optlen = *(opt + 1) + 2;
 			break;
 		case IP6OPT_ROUTER_ALERT:
 			/* XXX may need check for alignment */
 			if (hbhlen < IP6OPT_RTALERT_LEN) {
 				IP6STAT_INC(ip6s_toosmall);
 				goto bad;
 			}
 			if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
 				/* XXX stat */
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 1 - opthead);
 				return (-1);
 			}
 			optlen = IP6OPT_RTALERT_LEN;
 			bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
 			*rtalertp = ntohs(rtalert_val);
 			break;
 		case IP6OPT_JUMBO:
 			/* XXX may need check for alignment */
 			if (hbhlen < IP6OPT_JUMBO_LEN) {
 				IP6STAT_INC(ip6s_toosmall);
 				goto bad;
 			}
 			if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
 				/* XXX stat */
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 1 - opthead);
 				return (-1);
 			}
 			optlen = IP6OPT_JUMBO_LEN;
 
 			/*
 			 * IPv6 packets that have non 0 payload length
 			 * must not contain a jumbo payload option.
 			 */
 			ip6 = mtod(m, struct ip6_hdr *);
 			if (ip6->ip6_plen) {
 				IP6STAT_INC(ip6s_badoptions);
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt - opthead);
 				return (-1);
 			}
 
 			/*
 			 * We may see jumbolen in unaligned location, so
 			 * we'd need to perform bcopy().
 			 */
 			bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
 			jumboplen = (u_int32_t)htonl(jumboplen);
 
 #if 1
 			/*
 			 * if there are multiple jumbo payload options,
 			 * *plenp will be non-zero and the packet will be
 			 * rejected.
 			 * the behavior may need some debate in ipngwg -
 			 * multiple options does not make sense, however,
 			 * there's no explicit mention in specification.
 			 */
 			if (*plenp != 0) {
 				IP6STAT_INC(ip6s_badoptions);
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 2 - opthead);
 				return (-1);
 			}
 #endif
 
 			/*
 			 * jumbo payload length must be larger than 65535.
 			 */
 			if (jumboplen <= IPV6_MAXPACKET) {
 				IP6STAT_INC(ip6s_badoptions);
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 2 - opthead);
 				return (-1);
 			}
 			*plenp = jumboplen;
 
 			break;
 		default:		/* unknown option */
 			if (hbhlen < IP6OPT_MINLEN) {
 				IP6STAT_INC(ip6s_toosmall);
 				goto bad;
 			}
 			optlen = ip6_unknown_opt(opt, m,
 			    erroff + opt - opthead);
 			if (optlen == -1)
 				return (-1);
 			optlen += 2;
 			break;
 		}
 	}
 
 	return (0);
 
   bad:
 	m_freem(m);
 	return (-1);
 }
 
 /*
  * Unknown option processing.
  * The third argument `off' is the offset from the IPv6 header to the option,
  * which is necessary if the IPv6 header the and option header and IPv6 header
  * is not contiguous in order to return an ICMPv6 error.
  */
 int
 ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
 {
 	struct ip6_hdr *ip6;
 
 	switch (IP6OPT_TYPE(*optp)) {
 	case IP6OPT_TYPE_SKIP: /* ignore the option */
 		return ((int)*(optp + 1));
 	case IP6OPT_TYPE_DISCARD:	/* silently discard */
 		m_freem(m);
 		return (-1);
 	case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
 		IP6STAT_INC(ip6s_badoptions);
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
 		return (-1);
 	case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
 		IP6STAT_INC(ip6s_badoptions);
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 		    (m->m_flags & (M_BCAST|M_MCAST)))
 			m_freem(m);
 		else
 			icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_OPTION, off);
 		return (-1);
 	}
 
 	m_freem(m);		/* XXX: NOTREACHED */
 	return (-1);
 }
 
 /*
  * Create the "control" list for this pcb.
  * These functions will not modify mbuf chain at all.
  *
  * The routine will be called from upper layer handlers like tcp6_input().
  * Thus the routine assumes that the caller (tcp6_input) have already
  * called m_pullup() and all the extension headers are located in the
  * very first mbuf on the mbuf chain.
  *
  * ip6_savecontrol_v4 will handle those options that are possible to be
  * set on a v4-mapped socket.
  * ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those
  * options and handle the v6-only ones itself.
  */
 struct mbuf **
 ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp,
     int *v4only)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 
 #ifdef SO_TIMESTAMP
 	if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) {
 		union {
 			struct timeval tv;
 			struct bintime bt;
 			struct timespec ts;
 		} t;
 		struct bintime boottimebin, bt1;
 		struct timespec ts1;
 		bool stamped;
 
 		stamped = false;
 		switch (inp->inp_socket->so_ts_clock) {
 		case SO_TS_REALTIME_MICRO:
 			if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 			    M_TSTMP)) {
 				mbuf_tstmp2timespec(m, &ts1);
 				timespec2bintime(&ts1, &bt1);
 				getboottimebin(&boottimebin);
 				bintime_add(&bt1, &boottimebin);
 				bintime2timeval(&bt1, &t.tv);
 			} else {
 				microtime(&t.tv);
 			}
-			*mp = sbcreatecontrol((caddr_t) &t.tv, sizeof(t.tv),
-			    SCM_TIMESTAMP, SOL_SOCKET);
+			*mp = sbcreatecontrol(&t.tv, sizeof(t.tv),
+			    SCM_TIMESTAMP, SOL_SOCKET, M_NOWAIT);
 			if (*mp != NULL) {
 				mp = &(*mp)->m_next;
 				stamped = true;
 			}
 			break;
 
 		case SO_TS_BINTIME:
 			if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 			    M_TSTMP)) {
 				mbuf_tstmp2timespec(m, &ts1);
 				timespec2bintime(&ts1, &t.bt);
 				getboottimebin(&boottimebin);
 				bintime_add(&t.bt, &boottimebin);
 			} else {
 				bintime(&t.bt);
 			}
-			*mp = sbcreatecontrol((caddr_t)&t.bt, sizeof(t.bt),
-			    SCM_BINTIME, SOL_SOCKET);
+			*mp = sbcreatecontrol(&t.bt, sizeof(t.bt), SCM_BINTIME,
+			    SOL_SOCKET, M_NOWAIT);
 			if (*mp != NULL) {
 				mp = &(*mp)->m_next;
 				stamped = true;
 			}
 			break;
 
 		case SO_TS_REALTIME:
 			if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 			    M_TSTMP)) {
 				mbuf_tstmp2timespec(m, &t.ts);
 				getboottimebin(&boottimebin);
 				bintime2timespec(&boottimebin, &ts1);
 				timespecadd(&t.ts, &ts1, &t.ts);
 			} else {
 				nanotime(&t.ts);
 			}
-			*mp = sbcreatecontrol((caddr_t)&t.ts, sizeof(t.ts),
-			    SCM_REALTIME, SOL_SOCKET);
+			*mp = sbcreatecontrol(&t.ts, sizeof(t.ts),
+			    SCM_REALTIME, SOL_SOCKET, M_NOWAIT);
 			if (*mp != NULL) {
 				mp = &(*mp)->m_next;
 				stamped = true;
 			}
 			break;
 
 		case SO_TS_MONOTONIC:
 			if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 			    M_TSTMP))
 				mbuf_tstmp2timespec(m, &t.ts);
 			else
 				nanouptime(&t.ts);
-			*mp = sbcreatecontrol((caddr_t)&t.ts, sizeof(t.ts),
-			    SCM_MONOTONIC, SOL_SOCKET);
+			*mp = sbcreatecontrol(&t.ts, sizeof(t.ts),
+			    SCM_MONOTONIC, SOL_SOCKET, M_NOWAIT);
 			if (*mp != NULL) {
 				mp = &(*mp)->m_next;
 				stamped = true;
 			}
 			break;
 
 		default:
 			panic("unknown (corrupted) so_ts_clock");
 		}
 		if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) ==
 		    (M_PKTHDR | M_TSTMP)) {
 			struct sock_timestamp_info sti;
 
 			bzero(&sti, sizeof(sti));
 			sti.st_info_flags = ST_INFO_HW;
 			if ((m->m_flags & M_TSTMP_HPREC) != 0)
 				sti.st_info_flags |= ST_INFO_HW_HPREC;
-			*mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti),
-			    SCM_TIME_INFO, SOL_SOCKET);
+			*mp = sbcreatecontrol(&sti, sizeof(sti), SCM_TIME_INFO,
+			    SOL_SOCKET, M_NOWAIT);
 			if (*mp != NULL)
 				mp = &(*mp)->m_next;
 		}
 	}
 #endif
 
 #define IS2292(inp, x, y)	(((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y))
 	/* RFC 2292 sec. 5 */
 	if ((inp->inp_flags & IN6P_PKTINFO) != 0) {
 		struct in6_pktinfo pi6;
 
 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 #ifdef INET
 			struct ip *ip;
 
 			ip = mtod(m, struct ip *);
 			pi6.ipi6_addr.s6_addr32[0] = 0;
 			pi6.ipi6_addr.s6_addr32[1] = 0;
 			pi6.ipi6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
 			pi6.ipi6_addr.s6_addr32[3] = ip->ip_dst.s_addr;
 #else
 			/* We won't hit this code */
 			bzero(&pi6.ipi6_addr, sizeof(struct in6_addr));
 #endif
 		} else {	
 			bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
 			in6_clearscope(&pi6.ipi6_addr);	/* XXX */
 		}
 		pi6.ipi6_ifindex =
 		    (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
 
-		*mp = sbcreatecontrol((caddr_t) &pi6,
-		    sizeof(struct in6_pktinfo),
-		    IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6);
+		*mp = sbcreatecontrol(&pi6, sizeof(struct in6_pktinfo),
+		    IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6,
+		    M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) {
 		int hlim;
 
 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 #ifdef INET
 			struct ip *ip;
 
 			ip = mtod(m, struct ip *);
 			hlim = ip->ip_ttl;
 #else
 			/* We won't hit this code */
 			hlim = 0;
 #endif
 		} else {
 			hlim = ip6->ip6_hlim & 0xff;
 		}
-		*mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int),
+		*mp = sbcreatecontrol(&hlim, sizeof(int),
 		    IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT),
-		    IPPROTO_IPV6);
+		    IPPROTO_IPV6, M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if ((inp->inp_flags & IN6P_TCLASS) != 0) {
 		int tclass;
 
 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 #ifdef INET
 			struct ip *ip;
 
 			ip = mtod(m, struct ip *);
 			tclass = ip->ip_tos;
 #else
 			/* We won't hit this code */
 			tclass = 0;
 #endif
 		} else {
 			u_int32_t flowinfo;
 
 			flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
 			flowinfo >>= 20;
 			tclass = flowinfo & 0xff;
 		}
-		*mp = sbcreatecontrol((caddr_t) &tclass, sizeof(int),
-		    IPV6_TCLASS, IPPROTO_IPV6);
+		*mp = sbcreatecontrol(&tclass, sizeof(int), IPV6_TCLASS,
+		    IPPROTO_IPV6, M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if (v4only != NULL) {
 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 			*v4only = 1;
 		} else {
 			*v4only = 0;
 		}
 	}
 
 	return (mp);
 }
 
 void
 ip6_savecontrol(struct inpcb *inp, struct mbuf *m, struct mbuf **mp)
 {
 	struct ip6_hdr *ip6;
 	int v4only = 0;
 
 	mp = ip6_savecontrol_v4(inp, m, mp, &v4only);
 	if (v4only)
 		return;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	/*
 	 * IPV6_HOPOPTS socket option.  Recall that we required super-user
 	 * privilege for the option (see ip6_ctloutput), but it might be too
 	 * strict, since there might be some hop-by-hop options which can be
 	 * returned to normal user.
 	 * See also RFC 2292 section 6 (or RFC 3542 section 8).
 	 */
 	if ((inp->inp_flags & IN6P_HOPOPTS) != 0) {
 		/*
 		 * Check if a hop-by-hop options header is contatined in the
 		 * received packet, and if so, store the options as ancillary
 		 * data. Note that a hop-by-hop options header must be
 		 * just after the IPv6 header, which is assured through the
 		 * IPv6 input processing.
 		 */
 		if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
 			struct ip6_hbh *hbh;
 			int hbhlen;
 
 			hbh = (struct ip6_hbh *)(ip6 + 1);
 			hbhlen = (hbh->ip6h_len + 1) << 3;
 
 			/*
 			 * XXX: We copy the whole header even if a
 			 * jumbo payload option is included, the option which
 			 * is to be removed before returning according to
 			 * RFC2292.
 			 * Note: this constraint is removed in RFC3542
 			 */
-			*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
+			*mp = sbcreatecontrol(hbh, hbhlen,
 			    IS2292(inp, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
-			    IPPROTO_IPV6);
+			    IPPROTO_IPV6, M_NOWAIT);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 
 	if ((inp->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
 		int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
 
 		/*
 		 * Search for destination options headers or routing
 		 * header(s) through the header chain, and stores each
 		 * header as ancillary data.
 		 * Note that the order of the headers remains in
 		 * the chain of ancillary data.
 		 */
 		while (1) {	/* is explicit loop prevention necessary? */
 			struct ip6_ext *ip6e = NULL;
 			int elen;
 
 			/*
 			 * if it is not an extension header, don't try to
 			 * pull it from the chain.
 			 */
 			switch (nxt) {
 			case IPPROTO_DSTOPTS:
 			case IPPROTO_ROUTING:
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_AH: /* is it possible? */
 				break;
 			default:
 				goto loopend;
 			}
 
 			if (off + sizeof(*ip6e) > m->m_len)
 				goto loopend;
 			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
 			if (nxt == IPPROTO_AH)
 				elen = (ip6e->ip6e_len + 2) << 2;
 			else
 				elen = (ip6e->ip6e_len + 1) << 3;
 			if (off + elen > m->m_len)
 				goto loopend;
 
 			switch (nxt) {
 			case IPPROTO_DSTOPTS:
 				if (!(inp->inp_flags & IN6P_DSTOPTS))
 					break;
 
-				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
-				    IS2292(inp,
-					IPV6_2292DSTOPTS, IPV6_DSTOPTS),
-				    IPPROTO_IPV6);
+				*mp = sbcreatecontrol(ip6e, elen,
+				    IS2292(inp, IPV6_2292DSTOPTS, IPV6_DSTOPTS),
+				    IPPROTO_IPV6, M_NOWAIT);
 				if (*mp)
 					mp = &(*mp)->m_next;
 				break;
 			case IPPROTO_ROUTING:
 				if (!(inp->inp_flags & IN6P_RTHDR))
 					break;
 
-				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
+				*mp = sbcreatecontrol(ip6e, elen,
 				    IS2292(inp, IPV6_2292RTHDR, IPV6_RTHDR),
-				    IPPROTO_IPV6);
+				    IPPROTO_IPV6, M_NOWAIT);
 				if (*mp)
 					mp = &(*mp)->m_next;
 				break;
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_AH: /* is it possible? */
 				break;
 
 			default:
 				/*
 				 * other cases have been filtered in the above.
 				 * none will visit this case.  here we supply
 				 * the code just in case (nxt overwritten or
 				 * other cases).
 				 */
 				goto loopend;
 			}
 
 			/* proceed with the next header. */
 			off += elen;
 			nxt = ip6e->ip6e_nxt;
 			ip6e = NULL;
 		}
 	  loopend:
 		;
 	}
 
 	if (inp->inp_flags2 & INP_RECVFLOWID) {
 		uint32_t flowid, flow_type;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		/*
 		 * XXX should handle the failure of one or the
 		 * other - don't populate both?
 		 */
-		*mp = sbcreatecontrol((caddr_t) &flowid,
-		    sizeof(uint32_t), IPV6_FLOWID, IPPROTO_IPV6);
+		*mp = sbcreatecontrol(&flowid, sizeof(uint32_t), IPV6_FLOWID,
+		    IPPROTO_IPV6, M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
-		*mp = sbcreatecontrol((caddr_t) &flow_type,
-		    sizeof(uint32_t), IPV6_FLOWTYPE, IPPROTO_IPV6);
+		*mp = sbcreatecontrol(&flow_type, sizeof(uint32_t),
+		    IPV6_FLOWTYPE, IPPROTO_IPV6, M_NOWAIT);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 #ifdef	RSS
 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
 		uint32_t flowid, flow_type;
 		uint32_t rss_bucketid;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
-			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
-			   sizeof(uint32_t), IPV6_RSSBUCKETID, IPPROTO_IPV6);
+			*mp = sbcreatecontrol(&rss_bucketid, sizeof(uint32_t),
+			    IPV6_RSSBUCKETID, IPPROTO_IPV6, M_NOWAIT);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 #endif
 
 }
 #undef IS2292
 
 void
 ip6_notify_pmtu(struct inpcb *inp, struct sockaddr_in6 *dst, u_int32_t mtu)
 {
 	struct socket *so;
 	struct mbuf *m_mtu;
 	struct ip6_mtuinfo mtuctl;
 
 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 	/*
 	 * Notify the error by sending IPV6_PATHMTU ancillary data if
 	 * application wanted to know the MTU value.
 	 * NOTE: we notify disconnected sockets, because some udp
 	 * applications keep sending sockets disconnected.
 	 * NOTE: our implementation doesn't notify connected sockets that has
 	 * foreign address that is different than given destination addresses
 	 * (this is permitted by RFC 3542).
 	 */
 	if ((inp->inp_flags & IN6P_MTU) == 0 || (
 	    !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
 	    !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &dst->sin6_addr)))
 		return;
 
 	mtuctl.ip6m_mtu = mtu;
 	mtuctl.ip6m_addr = *dst;
 	if (sa6_recoverscope(&mtuctl.ip6m_addr))
 		return;
 
-	if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
-	    IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
+	if ((m_mtu = sbcreatecontrol(&mtuctl, sizeof(mtuctl), IPV6_PATHMTU,
+	    IPPROTO_IPV6, M_NOWAIT)) == NULL)
 		return;
 
 	so =  inp->inp_socket;
 	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
 	    == 0) {
 		soroverflow(so);
 		m_freem(m_mtu);
 		/* XXX: should count statistics */
 	} else
 		sorwakeup(so);
 }
 
 /*
  * Get pointer to the previous header followed by the header
  * currently processed.
  */
 int
 ip6_get_prevhdr(const struct mbuf *m, int off)
 {
 	struct ip6_ext ip6e;
 	struct ip6_hdr *ip6;
 	int len, nlen, nxt;
 
 	if (off == sizeof(struct ip6_hdr))
 		return (offsetof(struct ip6_hdr, ip6_nxt));
 	if (off < sizeof(struct ip6_hdr))
 		panic("%s: off < sizeof(struct ip6_hdr)", __func__);
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	nxt = ip6->ip6_nxt;
 	len = sizeof(struct ip6_hdr);
 	nlen = 0;
 	while (len < off) {
 		m_copydata(m, len, sizeof(ip6e), (caddr_t)&ip6e);
 		switch (nxt) {
 		case IPPROTO_FRAGMENT:
 			nlen = sizeof(struct ip6_frag);
 			break;
 		case IPPROTO_AH:
 			nlen = (ip6e.ip6e_len + 2) << 2;
 			break;
 		default:
 			nlen = (ip6e.ip6e_len + 1) << 3;
 		}
 		len += nlen;
 		nxt = ip6e.ip6e_nxt;
 	}
 	return (len - nlen);
 }
 
 /*
  * get next header offset.  m will be retained.
  */
 int
 ip6_nexthdr(const struct mbuf *m, int off, int proto, int *nxtp)
 {
 	struct ip6_hdr ip6;
 	struct ip6_ext ip6e;
 	struct ip6_frag fh;
 
 	/* just in case */
 	if (m == NULL)
 		panic("ip6_nexthdr: m == NULL");
 	if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
 		return -1;
 
 	switch (proto) {
 	case IPPROTO_IPV6:
 		if (m->m_pkthdr.len < off + sizeof(ip6))
 			return -1;
 		m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
 		if (nxtp)
 			*nxtp = ip6.ip6_nxt;
 		off += sizeof(ip6);
 		return off;
 
 	case IPPROTO_FRAGMENT:
 		/*
 		 * terminate parsing if it is not the first fragment,
 		 * it does not make sense to parse through it.
 		 */
 		if (m->m_pkthdr.len < off + sizeof(fh))
 			return -1;
 		m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
 		/* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */
 		if (fh.ip6f_offlg & IP6F_OFF_MASK)
 			return -1;
 		if (nxtp)
 			*nxtp = fh.ip6f_nxt;
 		off += sizeof(struct ip6_frag);
 		return off;
 
 	case IPPROTO_AH:
 		if (m->m_pkthdr.len < off + sizeof(ip6e))
 			return -1;
 		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 		if (nxtp)
 			*nxtp = ip6e.ip6e_nxt;
 		off += (ip6e.ip6e_len + 2) << 2;
 		return off;
 
 	case IPPROTO_HOPOPTS:
 	case IPPROTO_ROUTING:
 	case IPPROTO_DSTOPTS:
 		if (m->m_pkthdr.len < off + sizeof(ip6e))
 			return -1;
 		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 		if (nxtp)
 			*nxtp = ip6e.ip6e_nxt;
 		off += (ip6e.ip6e_len + 1) << 3;
 		return off;
 
 	case IPPROTO_NONE:
 	case IPPROTO_ESP:
 	case IPPROTO_IPCOMP:
 		/* give up */
 		return -1;
 
 	default:
 		return -1;
 	}
 
 	/* NOTREACHED */
 }
 
 /*
  * get offset for the last header in the chain.  m will be kept untainted.
  */
 int
 ip6_lasthdr(const struct mbuf *m, int off, int proto, int *nxtp)
 {
 	int newoff;
 	int nxt;
 
 	if (!nxtp) {
 		nxt = -1;
 		nxtp = &nxt;
 	}
 	while (1) {
 		newoff = ip6_nexthdr(m, off, proto, nxtp);
 		if (newoff < 0)
 			return off;
 		else if (newoff < off)
 			return -1;	/* invalid */
 		else if (newoff == off)
 			return newoff;
 
 		off = newoff;
 		proto = *nxtp;
 	}
 }
 
 /*
  * System control for IP6
  */
 
 u_char	inet6ctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		EHOSTUNREACH,	0,
 	ENOPROTOOPT,	ECONNREFUSED
 };
diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c
index 20b78009854e..9b3bce7db7bc 100644
--- a/sys/netinet6/udp6_usrreq.c
+++ b/sys/netinet6/udp6_usrreq.c
@@ -1,1359 +1,1360 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * Copyright (c) 2014 Kevin Lo
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $
  *	$KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/rss_config.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/udplite.h>
 
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/in6_rss.h>
 #include <netinet6/udp6_var.h>
 #include <netinet6/scope6_var.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, zero_checksum_port) = 0;
 #define	V_zero_checksum_port	VNET(zero_checksum_port)
 SYSCTL_INT(_net_inet6_udp6, OID_AUTO, rfc6935_port, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(zero_checksum_port), 0,
     "Zero UDP checksum allowed for traffic to/from this port.");
 /*
  * UDP protocol implementation.
  * Per RFC 768, August, 1980.
  */
 
 extern struct protosw	inetsw[];
 static void		udp6_detach(struct socket *so);
 
 static int
 udp6_append(struct inpcb *inp, struct mbuf *n, int off,
     struct sockaddr_in6 *fromsa)
 {
 	struct socket *so;
 	struct mbuf *opts = NULL, *tmp_opts;
 	struct udpcb *up;
 	bool filtered;
 
 	INP_LOCK_ASSERT(inp);
 
 	/*
 	 * Engage the tunneling protocol.
 	 */
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
 		in_pcbref(inp);
 		INP_RUNLOCK(inp);
 		filtered = (*up->u_tun_func)(n, off, inp,
 		    (struct sockaddr *)&fromsa[0], up->u_tun_ctx);
 		INP_RLOCK(inp);
 		if (filtered)
 			return (in_pcbrele_rlocked(inp));
 	}
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/* Check AH/ESP integrity. */
 	if (IPSEC_ENABLED(ipv6)) {
 		if (IPSEC_CHECK_POLICY(ipv6, n, inp) != 0) {
 			m_freem(n);
 			return (0);
 		}
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, n) != 0) {
 		m_freem(n);
 		return (0);
 	}
 #endif
 	opts = NULL;
 	if (inp->inp_flags & INP_CONTROLOPTS ||
 	    inp->inp_socket->so_options & SO_TIMESTAMP)
 		ip6_savecontrol(inp, n, &opts);
 	if ((inp->inp_vflag & INP_IPV6) && (inp->inp_flags2 & INP_ORIGDSTADDR)) {
-		tmp_opts = sbcreatecontrol((caddr_t)&fromsa[1],
-                        sizeof(struct sockaddr_in6), IPV6_ORIGDSTADDR, IPPROTO_IPV6);
+		tmp_opts = sbcreatecontrol(&fromsa[1],
+		    sizeof(struct sockaddr_in6), IPV6_ORIGDSTADDR,
+		    IPPROTO_IPV6, M_NOWAIT);
                 if (tmp_opts) {
                         if (opts) {
                                 tmp_opts->m_next = opts;
                                 opts = tmp_opts;
                         } else
                                 opts = tmp_opts;
                 }
 	}
 	m_adj(n, off + sizeof(struct udphdr));
 
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)&fromsa[0], n,
 	    opts) == 0) {
 		soroverflow_locked(so);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
 		UDPSTAT_INC(udps_fullsock);
 	} else
 		sorwakeup_locked(so);
 	return (0);
 }
 
 struct udp6_multi_match_ctx {
 	struct ip6_hdr *ip6;
 	struct udphdr *uh;
 };
 
 static bool
 udp6_multi_match(const struct inpcb *inp, void *v)
 {
 	struct udp6_multi_match_ctx *ctx = v;
 
 	if ((inp->inp_vflag & INP_IPV6) == 0)
 		return(false);
 	if (inp->inp_lport != ctx->uh->uh_dport)
 		return(false);
 	if (inp->inp_fport != 0 && inp->inp_fport != ctx->uh->uh_sport)
 		return(false);
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
 	    !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ctx->ip6->ip6_dst))
 		return (false);
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
 	    (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ctx->ip6->ip6_src) ||
 	    inp->inp_fport != ctx->uh->uh_sport))
 		return (false);
 
 	return (true);
 }
 
 static int
 udp6_multi_input(struct mbuf *m, int off, int proto,
     struct sockaddr_in6 *fromsa)
 {
 	struct udp6_multi_match_ctx ctx;
 	struct inpcb_iterator inpi = INP_ITERATOR(udp_get_inpcbinfo(proto),
 	    INPLOOKUP_RLOCKPCB, udp6_multi_match, &ctx);
 	struct inpcb *inp;
 	struct ip6_moptions *imo;
 	struct mbuf *n;
 	int appends = 0;
 
 	/*
 	 * In the event that laddr should be set to the link-local
 	 * address (this happens in RIPng), the multicast address
 	 * specified in the received packet will not match laddr.  To
 	 * handle this situation, matching is relaxed if the
 	 * receiving interface is the same as one specified in the
 	 * socket and if the destination multicast address matches
 	 * one of the multicast groups specified in the socket.
 	 */
 
 	/*
 	 * KAME note: traditionally we dropped udpiphdr from mbuf
 	 * here.  We need udphdr for IPsec processing so we do that
 	 * later.
 	 */
 	ctx.ip6 = mtod(m, struct ip6_hdr *);
 	ctx.uh = (struct udphdr *)((char *)ctx.ip6 + off);
 	while ((inp = inp_next(&inpi)) != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		/*
 		 * XXXRW: Because we weren't holding either the inpcb
 		 * or the hash lock when we checked for a match
 		 * before, we should probably recheck now that the
 		 * inpcb lock is (supposed to be) held.
 		 */
 		/*
 		 * Handle socket delivery policy for any-source
 		 * and source-specific multicast. [RFC3678]
 		 */
 		if ((imo = inp->in6p_moptions) != NULL) {
 			struct sockaddr_in6	 mcaddr;
 			int			 blocked;
 
 			bzero(&mcaddr, sizeof(struct sockaddr_in6));
 			mcaddr.sin6_len = sizeof(struct sockaddr_in6);
 			mcaddr.sin6_family = AF_INET6;
 			mcaddr.sin6_addr = ctx.ip6->ip6_dst;
 
 			blocked = im6o_mc_filter(imo, m->m_pkthdr.rcvif,
 				(struct sockaddr *)&mcaddr,
 				(struct sockaddr *)&fromsa[0]);
 			if (blocked != MCAST_PASS) {
 				if (blocked == MCAST_NOTGMEMBER)
 					IP6STAT_INC(ip6s_notmember);
 				if (blocked == MCAST_NOTSMEMBER ||
 				    blocked == MCAST_MUTED)
 					UDPSTAT_INC(udps_filtermcast);
 				continue;
 			}
 		}
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
 			if (proto == IPPROTO_UDPLITE)
 				UDPLITE_PROBE(receive, NULL, inp, ctx.ip6,
 				    inp, ctx.uh);
 			else
 				UDP_PROBE(receive, NULL, inp, ctx.ip6, inp,
 				    ctx.uh);
 			if (udp6_append(inp, n, off, fromsa)) {
 				INP_RUNLOCK(inp);
 				break;
 			} else
 				appends++;
 		}
 		/*
 		 * Don't look for additional matches if this one does
 		 * not have either the SO_REUSEPORT or SO_REUSEADDR
 		 * socket options set.  This heuristic avoids
 		 * searching through all pcbs in the common case of a
 		 * non-shared port.  It assumes that an application
 		 * will never clear these options after setting them.
 		 */
 		if ((inp->inp_socket->so_options &
 		     (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0) {
 			INP_RUNLOCK(inp);
 			break;
 		}
 	}
 	m_freem(m);
 
 	if (appends == 0) {
 		/*
 		 * No matching pcb found; discard datagram.  (No need
 		 * to send an ICMP Port Unreachable for a broadcast
 		 * or multicast datgram.)
 		 */
 		UDPSTAT_INC(udps_noport);
 		UDPSTAT_INC(udps_noportmcast);
 	}
 
 	return (IPPROTO_DONE);
 }
 
 int
 udp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6;
 	struct udphdr *uh;
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 	int off = *offp;
 	int cscov_partial;
 	int plen, ulen;
 	struct sockaddr_in6 fromsa[2];
 	struct m_tag *fwd_tag;
 	uint16_t uh_sum;
 	uint8_t nxt;
 
 	NET_EPOCH_ASSERT();
 
 	if (m->m_len < off + sizeof(struct udphdr)) {
 		m = m_pullup(m, off + sizeof(struct udphdr));
 		if (m == NULL) {
 			IP6STAT_INC(ip6s_exthdrtoolong);
 			*mp = NULL;
 			return (IPPROTO_DONE);
 		}
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 	uh = (struct udphdr *)((caddr_t)ip6 + off);
 
 	UDPSTAT_INC(udps_ipackets);
 
 	/*
 	 * Destination port of 0 is illegal, based on RFC768.
 	 */
 	if (uh->uh_dport == 0)
 		goto badunlocked;
 
 	plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
 	ulen = ntohs((u_short)uh->uh_ulen);
 
 	nxt = proto;
 	cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0;
 	if (nxt == IPPROTO_UDPLITE) {
 		/* Zero means checksum over the complete packet. */
 		if (ulen == 0)
 			ulen = plen;
 		if (ulen == plen)
 			cscov_partial = 0;
 		if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) {
 			/* XXX: What is the right UDPLite MIB counter? */
 			goto badunlocked;
 		}
 		if (uh->uh_sum == 0) {
 			/* XXX: What is the right UDPLite MIB counter? */
 			goto badunlocked;
 		}
 	} else {
 		if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) {
 			UDPSTAT_INC(udps_badlen);
 			goto badunlocked;
 		}
 		if (uh->uh_sum == 0) {
 			UDPSTAT_INC(udps_nosum);
 			/*
 			 * dport 0 was rejected earlier so this is OK even if
 			 * zero_checksum_port is 0 (which is its default value).
 			 */
 			if (ntohs(uh->uh_dport) == V_zero_checksum_port)
 				goto skip_checksum;
 			else
 				goto badunlocked;
 		}
 	}
 
 	if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) &&
 	    !cscov_partial) {
 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 			uh_sum = m->m_pkthdr.csum_data;
 		else
 			uh_sum = in6_cksum_pseudo(ip6, ulen, nxt,
 			    m->m_pkthdr.csum_data);
 		uh_sum ^= 0xffff;
 	} else
 		uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen);
 
 	if (uh_sum != 0) {
 		UDPSTAT_INC(udps_badsum);
 		goto badunlocked;
 	}
 
 skip_checksum:
 	/*
 	 * Construct sockaddr format source address.
 	 */
 	init_sin6(&fromsa[0], m, 0);
 	fromsa[0].sin6_port = uh->uh_sport;
 	init_sin6(&fromsa[1], m, 1);
 	fromsa[1].sin6_port = uh->uh_dport;
 
 	pcbinfo = udp_get_inpcbinfo(nxt);
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))  {
 		*mp = NULL;
 		return (udp6_multi_input(m, off, proto, fromsa));
 	}
 
 	/*
 	 * Locate pcb for datagram.
 	 */
 
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		struct sockaddr_in6 *next_hop6;
 
 		next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1);
 
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
 		inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
 		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 		    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in6_pcblookup(pcbinfo, &ip6->ip6_src,
 			    uh->uh_sport, &next_hop6->sin6_addr,
 			    next_hop6->sin6_port ? htons(next_hop6->sin6_port) :
 			    uh->uh_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif);
 		}
 		/* Remove the tag from the packet. We don't need it anymore. */
 		m_tag_delete(m, fwd_tag);
 		m->m_flags &= ~M_IP6_NEXTHOP;
 	} else
 		inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
 		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 		    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
 		    m->m_pkthdr.rcvif, m);
 	if (inp == NULL) {
 		if (V_udp_log_in_vain) {
 			char ip6bufs[INET6_ADDRSTRLEN];
 			char ip6bufd[INET6_ADDRSTRLEN];
 
 			log(LOG_INFO,
 			    "Connection attempt to UDP [%s]:%d from [%s]:%d\n",
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ntohs(uh->uh_dport),
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ntohs(uh->uh_sport));
 		}
 		if (nxt == IPPROTO_UDPLITE)
 			UDPLITE_PROBE(receive, NULL, NULL, ip6, NULL, uh);
 		else
 			UDP_PROBE(receive, NULL, NULL, ip6, NULL, uh);
 		UDPSTAT_INC(udps_noport);
 		if (m->m_flags & M_MCAST) {
 			printf("UDP6: M_MCAST is set in a unicast packet.\n");
 			UDPSTAT_INC(udps_noportmcast);
 			goto badunlocked;
 		}
 		if (V_udp_blackhole && (V_udp_blackhole_local ||
 		    !in6_localaddr(&ip6->ip6_src)))
 			goto badunlocked;
 		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
 		*mp = NULL;
 		return (IPPROTO_DONE);
 	}
 	INP_RLOCK_ASSERT(inp);
 	up = intoudpcb(inp);
 	if (cscov_partial) {
 		if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			*mp = NULL;
 			return (IPPROTO_DONE);
 		}
 	}
 	if (nxt == IPPROTO_UDPLITE)
 		UDPLITE_PROBE(receive, NULL, inp, ip6, inp, uh);
 	else
 		UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
 	if (udp6_append(inp, m, off, fromsa) == 0)
 		INP_RUNLOCK(inp);
 	*mp = NULL;
 	return (IPPROTO_DONE);
 
 badunlocked:
 	m_freem(m);
 	*mp = NULL;
 	return (IPPROTO_DONE);
 }
 
 static void
 udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d,
     struct inpcbinfo *pcbinfo)
 {
 	struct udphdr uh;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	int off = 0;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
 	void *cmdarg;
 	struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
 	struct udp_portonly {
 		u_int16_t uh_sport;
 		u_int16_t uh_dport;
 	} *uhp;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 
 	if ((unsigned)cmd >= PRC_NCMDS)
 		return;
 	if (PRC_IS_REDIRECT(cmd))
 		notify = in6_rtchange, d = NULL;
 	else if (cmd == PRC_HOSTDEAD)
 		d = NULL;
 	else if (inet6ctlerrmap[cmd] == 0)
 		return;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 		cmdarg = ip6cp->ip6c_cmdarg;
 		sa6_src = ip6cp->ip6c_src;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		cmdarg = NULL;
 		sa6_src = &sa6_any;
 	}
 
 	if (ip6) {
 		/*
 		 * XXX: We assume that when IPV6 is non NULL,
 		 * M and OFF are valid.
 		 */
 
 		/* Check if we can safely examine src and dst ports. */
 		if (m->m_pkthdr.len < off + sizeof(*uhp))
 			return;
 
 		bzero(&uh, sizeof(uh));
 		m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
 
 		if (!PRC_IS_REDIRECT(cmd)) {
 			/* Check to see if its tunneled */
 			struct inpcb *inp;
 			inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_dst,
 			    uh.uh_dport, &ip6->ip6_src, uh.uh_sport,
 			    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
 			    m->m_pkthdr.rcvif, m);
 			if (inp != NULL) {
 				struct udpcb *up;
 				
 				up = intoudpcb(inp);
 				if (up->u_icmp_func) {
 					/* Yes it is. */
 					INP_RUNLOCK(inp);
 					(*up->u_icmp_func)(cmd, (struct sockaddr *)ip6cp->ip6c_src,
 					      d, up->u_tun_ctx);
 					return;
 				} else {
 					/* Can't find it. */
 					INP_RUNLOCK(inp);
 				}
 			}
 		}
 		(void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport,
 		    (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd,
 		    cmdarg, notify);
 	} else
 		(void)in6_pcbnotify(pcbinfo, sa, 0,
 		    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
 }
 
 void
 udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 
 	return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo));
 }
 
 void
 udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 
 	return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo));
 }
 
 static int
 udp6_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in6 addrs[2];
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 
 	if (req->newlen != sizeof(addrs))
 		return (EINVAL);
 	if (req->oldlen != sizeof(struct xucred))
 		return (EINVAL);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
 	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
 		return (error);
 	}
 	NET_EPOCH_ENTER(et);
 	inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr,
 	    addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port,
 	    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
 	NET_EPOCH_EXIT(et);
 	if (inp != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseesocket(req->td->td_ucred,
 			    inp->inp_socket);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred,
     CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, udp6_getcred, "S,xucred",
     "Get the xucred of a UDP6 connection");
 
 static int
 udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
     struct sockaddr *addr6, struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	struct ip6_hdr *ip6;
 	struct udphdr *udp6;
 	struct in6_addr *laddr, *faddr, in6a;
 	struct ip6_pktopts *optp, opt;
 	struct sockaddr_in6 *sin6, tmp;
 	struct epoch_tracker et;
 	int cscov_partial, error, flags, hlen, scope_ambiguous;
 	u_int32_t ulen, plen;
 	uint16_t cscov;
 	u_short fport;
 	uint8_t nxt;
 
 	/* addr6 has been validated in udp6_send(). */
 	sin6 = (struct sockaddr_in6 *)addr6;
 
 	/*
 	 * In contrast to to IPv4 we do not validate the max. packet length
 	 * here due to IPv6 Jumbograms (RFC2675).
 	 */
 
 	scope_ambiguous = 0;
 	if (sin6) {
 		/* Protect *addr6 from overwrites. */
 		tmp = *sin6;
 		sin6 = &tmp;
 
 		/*
 		 * Application should provide a proper zone ID or the use of
 		 * default zone IDs should be enabled.  Unfortunately, some
 		 * applications do not behave as it should, so we need a
 		 * workaround.  Even if an appropriate ID is not determined,
 		 * we'll see if we can determine the outgoing interface.  If we
 		 * can, determine the zone ID based on the interface below.
 		 */
 		if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
 			scope_ambiguous = 1;
 		if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) {
 			if (control)
 				m_freem(control);
 			m_freem(m);
 			return (error);
 		}
 	}
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 	/*
 	 * In the following cases we want a write lock on the inp for either
 	 * local operations or for possible route cache updates in the IPv6
 	 * output path:
 	 * - on connected sockets (sin6 is NULL) for route cache updates,
 	 * - when we are not bound to an address and source port (it is
 	 *   in6_pcbsetport() which will require the write lock).
 	 *
 	 * We check the inp fields before actually locking the inp, so
 	 * here exists a race, and we may WLOCK the inp and end with already
 	 * bound one by other thread. This is fine.
 	 */
 	if (sin6 == NULL || (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
 	    inp->inp_lport == 0))
 		INP_WLOCK(inp);
 	else
 		INP_RLOCK(inp);
 
 	nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 	    IPPROTO_UDP : IPPROTO_UDPLITE;
 
 #ifdef INET
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		int hasv4addr;
 
 		if (sin6 == NULL)
 			hasv4addr = (inp->inp_vflag & INP_IPV4);
 		else
 			hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
 			    ? 1 : 0;
 		if (hasv4addr) {
 			struct pr_usrreqs *pru;
 
 			/*
 			 * XXXRW: We release UDP-layer locks before calling
 			 * udp_send() in order to avoid recursion.  However,
 			 * this does mean there is a short window where inp's
 			 * fields are unstable.  Could this lead to a
 			 * potential race in which the factors causing us to
 			 * select the UDPv4 output routine are invalidated?
 			 */
 			INP_UNLOCK(inp);
 			if (sin6)
 				in6_sin6_2_sin_in_sock((struct sockaddr *)sin6);
 			pru = inetsw[ip_protox[nxt]].pr_usrreqs;
 			/* addr will just be freed in sendit(). */
 			return ((*pru->pru_send)(so, flags_arg | PRUS_IPV6, m,
 			    (struct sockaddr *)sin6, control, td));
 		}
 	} else
 #endif
 	if (sin6 && IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 		/*
 		 * Given this is either an IPv6-only socket or no INET is
 		 * supported we will fail the send if the given destination
 		 * address is a v4mapped address.
 		 *
 		 * XXXGL: do we leak m and control?
 		 */
 		INP_UNLOCK(inp);
 		return (EINVAL);
 	}
 
 	NET_EPOCH_ENTER(et);
 	if (control) {
 		if ((error = ip6_setpktopts(control, &opt,
 		    inp->in6p_outputopts, td->td_ucred, nxt)) != 0) {
 			goto release;
 		}
 		optp = &opt;
 	} else
 		optp = inp->in6p_outputopts;
 
 	if (sin6) {
 		/*
 		 * Since we saw no essential reason for calling in_pcbconnect,
 		 * we get rid of such kind of logic, and call in6_selectsrc
 		 * and in6_pcbsetport in order to fill in the local address
 		 * and the local port.
 		 */
 		if (sin6->sin6_port == 0) {
 			error = EADDRNOTAVAIL;
 			goto release;
 		}
 
 		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 			/* how about ::ffff:0.0.0.0 case? */
 			error = EISCONN;
 			goto release;
 		}
 
 		/*
 		 * Given we handle the v4mapped case in the INET block above
 		 * assert here that it must not happen anymore.
 		 */
 		KASSERT(!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr),
 		    ("%s: sin6(%p)->sin6_addr is v4mapped which we "
 		    "should have handled.", __func__, sin6));
 
 		/* This only requires read-locking. */
 		error = in6_selectsrc_socket(sin6, optp, inp,
 		    td->td_ucred, scope_ambiguous, &in6a, NULL);
 		if (error)
 			goto release;
 		laddr = &in6a;
 
 		if (inp->inp_lport == 0) {
 			struct inpcbinfo *pcbinfo;
 
 			INP_WLOCK_ASSERT(inp);
 
 			pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 			INP_HASH_WLOCK(pcbinfo);
 			error = in6_pcbsetport(laddr, inp, td->td_ucred);
 			INP_HASH_WUNLOCK(pcbinfo);
 			if (error != 0) {
 				/* Undo an address bind that may have occurred. */
 				inp->in6p_laddr = in6addr_any;
 				goto release;
 			}
 		}
 		faddr = &sin6->sin6_addr;
 		fport = sin6->sin6_port; /* allow 0 port */
 
 	} else {
 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 			error = ENOTCONN;
 			goto release;
 		}
 		laddr = &inp->in6p_laddr;
 		faddr = &inp->in6p_faddr;
 		fport = inp->inp_fport;
 	}
 
 	ulen = m->m_pkthdr.len;
 	plen = sizeof(struct udphdr) + ulen;
 	hlen = sizeof(struct ip6_hdr);
 
 	/*
 	 * Calculate data length and get a mbuf
 	 * for UDP and IP6 headers.
 	 */
 	M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto release;
 	}
 
 	/*
 	 * Stuff checksum and output datagram.
 	 */
 	cscov = cscov_partial = 0;
 	udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
 	udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
 	udp6->uh_dport = fport;
 	if (nxt == IPPROTO_UDPLITE) {
 		struct udpcb *up;
 
 		up = intoudpcb(inp);
 		cscov = up->u_txcslen;
 		if (cscov >= plen)
 			cscov = 0;
 		udp6->uh_ulen = htons(cscov);
 		/*
 		 * For UDP-Lite, checksum coverage length of zero means
 		 * the entire UDPLite packet is covered by the checksum.
 		 */
 		cscov_partial = (cscov == 0) ? 0 : 1;
 	} else if (plen <= 0xffff)
 		udp6->uh_ulen = htons((u_short)plen);
 	else
 		udp6->uh_ulen = 0;
 	udp6->uh_sum = 0;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_flow	= inp->inp_flow & IPV6_FLOWINFO_MASK;
 	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc	|= IPV6_VERSION;
 	ip6->ip6_plen	= htons((u_short)plen);
 	ip6->ip6_nxt	= nxt;
 	ip6->ip6_hlim	= in6_selecthlim(inp, NULL);
 	ip6->ip6_src	= *laddr;
 	ip6->ip6_dst	= *faddr;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	if (cscov_partial) {
 		if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
 		    sizeof(struct ip6_hdr), plen, cscov)) == 0)
 			udp6->uh_sum = 0xffff;
 	} else {
 		udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
 		m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 	}
 
 	flags = 0;
 #if defined(ROUTE_MPATH) || defined(RSS)
 	if (CALC_FLOWID_OUTBOUND_SENDTO) {
 		uint32_t hash_type, hash_val;
 		uint8_t pr;
 
 		pr = inp->inp_socket->so_proto->pr_protocol;
 
 		hash_val = fib6_calc_packet_hash(laddr, faddr,
 		    inp->inp_lport, fport, pr, &hash_type);
 		m->m_pkthdr.flowid = hash_val;
 		M_HASHTYPE_SET(m, hash_type);
 	}
 	/* do not use inp flowid */
 	flags |= IP_NODEFAULTFLOWID;
 #endif
 
 	UDPSTAT_INC(udps_opackets);
 	if (nxt == IPPROTO_UDPLITE)
 		UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6);
 	else
 		UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
 	error = ip6_output(m, optp,
 	    INP_WLOCKED(inp) ? &inp->inp_route6 : NULL, flags,
 	    inp->in6p_moptions, NULL, inp);
 	INP_UNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 
 	if (control) {
 		ip6_clearpktopts(&opt, -1);
 		m_freem(control);
 	}
 	return (error);
 
 release:
 	INP_UNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	if (control) {
 		ip6_clearpktopts(&opt, -1);
 		m_freem(control);
 	}
 	m_freem(m);
 
 	return (error);
 }
 
 static void
 udp6_abort(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
 
 	INP_WLOCK(inp);
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		struct pr_usrreqs *pru;
 		uint8_t nxt;
 
 		nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 		    IPPROTO_UDP : IPPROTO_UDPLITE;
 		INP_WUNLOCK(inp);
 		pru = inetsw[ip_protox[nxt]].pr_usrreqs;
 		(*pru->pru_abort)(so);
 		return;
 	}
 #endif
 
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		INP_HASH_WLOCK(pcbinfo);
 		in6_pcbdisconnect(inp);
 		inp->in6p_laddr = in6addr_any;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp6_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
 
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		error = soreserve(so, udp_sendspace, udp_recvspace);
 		if (error)
 			return (error);
 	}
 	error = in_pcballoc(so, pcbinfo);
 	if (error)
 		return (error);
 	inp = (struct inpcb *)so->so_pcb;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
 		inp->inp_vflag |= INP_IPV4;
 	inp->in6p_hops = -1;	/* use kernel default */
 	inp->in6p_cksum = -1;	/* just to be sure */
 	/*
 	 * XXX: ugly!!
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = V_ip_defttl;
 
 	error = udp_newudpcb(inp);
 	if (error) {
 		in_pcbdetach(inp);
 		in_pcbfree(inp);
 		return (error);
 	}
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 	u_char vflagsav;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
 
 	if (nam->sa_family != AF_INET6)
 		return (EAFNOSUPPORT);
 	if (nam->sa_len != sizeof(struct sockaddr_in6))
 		return (EINVAL);
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(pcbinfo);
 	vflagsav = inp->inp_vflag;
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		struct sockaddr_in6 *sin6_p;
 
 		sin6_p = (struct sockaddr_in6 *)nam;
 
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr))
 			inp->inp_vflag |= INP_IPV4;
 #ifdef INET
 		else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
 			struct sockaddr_in sin;
 
 			in6_sin6_2_sin(&sin, sin6_p);
 			inp->inp_vflag |= INP_IPV4;
 			inp->inp_vflag &= ~INP_IPV6;
 			error = in_pcbbind(inp, (struct sockaddr *)&sin,
 			    td->td_ucred);
 			goto out;
 		}
 #endif
 	}
 
 	error = in6_pcbbind(inp, nam, td->td_ucred);
 #ifdef INET
 out:
 #endif
 	if (error != 0)
 		inp->inp_vflag = vflagsav;
 	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp6_close(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
 
 	INP_WLOCK(inp);
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		struct pr_usrreqs *pru;
 		uint8_t nxt;
 
 		nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 		    IPPROTO_UDP : IPPROTO_UDPLITE;
 		INP_WUNLOCK(inp);
 		pru = inetsw[ip_protox[nxt]].pr_usrreqs;
 		(*pru->pru_disconnect)(so);
 		return;
 	}
 #endif
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		INP_HASH_WLOCK(pcbinfo);
 		in6_pcbdisconnect(inp);
 		inp->in6p_laddr = in6addr_any;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 #ifdef INET
 	struct epoch_tracker et;
 #endif
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in6 *sin6;
 	int error;
 	u_char vflagsav;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
 
 	sin6 = (struct sockaddr_in6 *)nam;
 	if (sin6->sin6_family != AF_INET6)
 		return (EAFNOSUPPORT);
 	if (sin6->sin6_len != sizeof(*sin6))
 		return (EINVAL);
 
 	/*
 	 * XXXRW: Need to clarify locking of v4/v6 flags.
 	 */
 	INP_WLOCK(inp);
 #ifdef INET
 	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 		struct sockaddr_in sin;
 
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
 			error = EINVAL;
 			goto out;
 		}
 		if ((inp->inp_vflag & INP_IPV4) == 0) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
 			error = EISCONN;
 			goto out;
 		}
 		in6_sin6_2_sin(&sin, sin6);
 		error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
 		if (error != 0)
 			goto out;
 		vflagsav = inp->inp_vflag;
 		inp->inp_vflag |= INP_IPV4;
 		inp->inp_vflag &= ~INP_IPV6;
 		NET_EPOCH_ENTER(et);
 		INP_HASH_WLOCK(pcbinfo);
 		error = in_pcbconnect(inp, (struct sockaddr *)&sin,
 		    td->td_ucred, true);
 		INP_HASH_WUNLOCK(pcbinfo);
 		NET_EPOCH_EXIT(et);
 		/*
 		 * If connect succeeds, mark socket as connected. If
 		 * connect fails and socket is unbound, reset inp_vflag
 		 * field.
 		 */
 		if (error == 0)
 			soisconnected(so);
 		else if (inp->inp_laddr.s_addr == INADDR_ANY &&
 		    inp->inp_lport == 0)
 			inp->inp_vflag = vflagsav;
 		goto out;
 	} else {
 		if ((inp->inp_vflag & INP_IPV6) == 0) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 	}
 #endif
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		error = EISCONN;
 		goto out;
 	}
 	error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
 	if (error != 0)
 		goto out;
 	vflagsav = inp->inp_vflag;
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	INP_HASH_WLOCK(pcbinfo);
 	error = in6_pcbconnect(inp, nam, td->td_ucred);
 	INP_HASH_WUNLOCK(pcbinfo);
 	/*
 	 * If connect succeeds, mark socket as connected. If
 	 * connect fails and socket is unbound, reset inp_vflag
 	 * field.
 	 */
 	if (error == 0)
 		soisconnected(so);
 	else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
 	    inp->inp_lport == 0)
 		inp->inp_vflag = vflagsav;
 out:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp6_detach(struct socket *so)
 {
 	struct inpcb *inp;
 	struct udpcb *up;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
 
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: up == NULL", __func__));
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	udp_discardcb(up);
 }
 
 static int
 udp6_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
 
 	INP_WLOCK(inp);
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		struct pr_usrreqs *pru;
 		uint8_t nxt;
 
 		nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 		    IPPROTO_UDP : IPPROTO_UDPLITE;
 		INP_WUNLOCK(inp);
 		pru = inetsw[ip_protox[nxt]].pr_usrreqs;
 		(void)(*pru->pru_disconnect)(so);
 		return (0);
 	}
 #endif
 
 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		INP_WUNLOCK(inp);
 		return (ENOTCONN);
 	}
 
 	INP_HASH_WLOCK(pcbinfo);
 	in6_pcbdisconnect(inp);
 	inp->in6p_laddr = in6addr_any;
 	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 udp6_send(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *addr, struct mbuf *control, struct thread *td)
 {
 	int error;
 
 	if (addr) {
 		if (addr->sa_len != sizeof(struct sockaddr_in6)) {
 			error = EINVAL;
 			goto bad;
 		}
 		if (addr->sa_family != AF_INET6) {
 			error = EAFNOSUPPORT;
 			goto bad;
 		}
 	}
 
 	return (udp6_output(so, flags, m, addr, control, td));
 
 bad:
 	if (control)
 		m_freem(control);
 	m_freem(m);
 	return (error);
 }
 
 struct pr_usrreqs udp6_usrreqs = {
 	.pru_abort =		udp6_abort,
 	.pru_attach =		udp6_attach,
 	.pru_bind =		udp6_bind,
 	.pru_connect =		udp6_connect,
 	.pru_control =		in6_control,
 	.pru_detach =		udp6_detach,
 	.pru_disconnect =	udp6_disconnect,
 	.pru_peeraddr =		in6_mapped_peeraddr,
 	.pru_send =		udp6_send,
 	.pru_shutdown =		udp_shutdown,
 	.pru_sockaddr =		in6_mapped_sockaddr,
 	.pru_soreceive =	soreceive_dgram,
 	.pru_sosend =		sosend_dgram,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		udp6_close
 };
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index 2484407d557c..efa87b1f2378 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -1,264 +1,261 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)socketvar.h	8.3 (Berkeley) 2/19/95
  *
  * $FreeBSD$
  */
 #ifndef _SYS_SOCKBUF_H_
 #define _SYS_SOCKBUF_H_
 
 /*
  * Constants for sb_flags field of struct sockbuf/xsockbuf.
  */
 #define	SB_TLS_RX	0x01		/* using KTLS on RX */
 #define	SB_TLS_RX_RUNNING 0x02		/* KTLS RX operation running */
 #define	SB_WAIT		0x04		/* someone is waiting for data/space */
 #define	SB_SEL		0x08		/* someone is selecting */
 #define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
 #define	SB_UPCALL	0x20		/* someone wants an upcall */
 #define	SB_NOINTR	0x40		/* operations not interruptible */
 #define	SB_AIO		0x80		/* AIO operations queued */
 #define	SB_KNOTE	0x100		/* kernel note attached */
 #define	SB_NOCOALESCE	0x200		/* don't coalesce new data into existing mbufs */
 #define	SB_IN_TOE	0x400		/* socket buffer is in the middle of an operation */
 #define	SB_AUTOSIZE	0x800		/* automatically size socket buffer */
 #define	SB_STOP		0x1000		/* backpressure indicator */
 #define	SB_AIO_RUNNING	0x2000		/* AIO operation running */
 #define	SB_TLS_IFNET	0x4000		/* has used / is using ifnet KTLS */
 
 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
 #define	SBS_RCVATMARK		0x0040	/* at mark on input */
 
 #if defined(_KERNEL) || defined(_WANT_SOCKET)
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_sx.h>
 #include <sys/_task.h>
 
 #define	SB_MAX		(2*1024*1024)	/* default for max chars in sockbuf */
 
 struct ktls_session;
 struct mbuf;
 struct sockaddr;
 struct socket;
 struct thread;
 struct selinfo;
 
 /*
  * Variables for socket buffering.
  *
  * Locking key to struct sockbuf:
  * (a) locked by SOCKBUF_LOCK().
  */
 struct sockbuf {
 	struct	mtx *sb_mtx;		/* sockbuf lock */
 	struct	selinfo *sb_sel;	/* process selecting read/write */
 	short	sb_state;	/* (a) socket state on sockbuf */
 	short	sb_flags;	/* (a) flags, see above */
 	struct	mbuf *sb_mb;	/* (a) the mbuf chain */
 	struct	mbuf *sb_mbtail; /* (a) the last mbuf in the chain */
 	struct	mbuf *sb_lastrecord;	/* (a) first mbuf of last
 					 * record in socket buffer */
 	struct	mbuf *sb_sndptr; /* (a) pointer into mbuf chain */
 	struct	mbuf *sb_fnrdy;	/* (a) pointer to first not ready buffer */
 	u_int	sb_sndptroff;	/* (a) byte offset of ptr into chain */
 	u_int	sb_acc;		/* (a) available chars in buffer */
 	u_int	sb_ccc;		/* (a) claimed chars in buffer */
 	u_int	sb_hiwat;	/* (a) max actual char count */
 	u_int	sb_mbcnt;	/* (a) chars of mbufs used */
 	u_int   sb_mcnt;        /* (a) number of mbufs in buffer */
 	u_int   sb_ccnt;        /* (a) number of clusters in buffer */
 	u_int	sb_mbmax;	/* (a) max chars of mbufs to use */
 	u_int	sb_ctl;		/* (a) non-data chars in buffer */
 	u_int	sb_tlscc;	/* (a) TLS chain characters */
 	u_int	sb_tlsdcc;	/* (a) TLS characters being decrypted */
 	int	sb_lowat;	/* (a) low water mark */
 	sbintime_t	sb_timeo;	/* (a) timeout for read/write */
 	struct	mbuf *sb_mtls;	/* (a) TLS mbuf chain */
 	struct	mbuf *sb_mtlstail; /* (a) last mbuf in TLS chain */
 	int	(*sb_upcall)(struct socket *, void *, int); /* (a) */
 	void	*sb_upcallarg;	/* (a) */
 	uint64_t sb_tls_seqno;	/* (a) TLS seqno */
 	struct	ktls_session *sb_tls_info; /* (a + b) TLS state */
 	TAILQ_HEAD(, kaiocb) sb_aiojobq; /* (a) pending AIO ops */
 	struct	task sb_aiotask; /* AIO task */
 };
 
 #endif	/* defined(_KERNEL) || defined(_WANT_SOCKET) */
 #ifdef _KERNEL
 
 /* 'which' values for KPIs that operate on one buffer of a socket. */
 typedef enum { SO_RCV, SO_SND } sb_which;
 
 /*
  * Per-socket buffer mutex used to protect most fields in the socket buffer.
  * These make use of the mutex pointer embedded in struct sockbuf, which
  * currently just references mutexes in the containing socket.  The
  * SOCK_SENDBUF_LOCK() etc. macros can be used instead of or in combination with
  * these locking macros.
  */
 #define	SOCKBUF_MTX(_sb)		((_sb)->sb_mtx)
 #define	SOCKBUF_LOCK(_sb)		mtx_lock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_OWNED(_sb)		mtx_owned(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_UNLOCK(_sb)		mtx_unlock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_LOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
 #define	SOCKBUF_UNLOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
 
 /*
  * Socket buffer private mbuf(9) flags.
  */
 #define	M_NOTREADY	M_PROTO1	/* m_data not populated yet */
 #define	M_BLOCKED	M_PROTO2	/* M_NOTREADY in front of m */
 #define	M_NOTAVAIL	(M_NOTREADY | M_BLOCKED)
 
 void	sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
 int	sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
 	    const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
 void	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control, int flags);
 void	sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control, int flags);
 void	sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
 void	sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
 void	sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
 struct mbuf *
-	sbcreatecontrol(caddr_t p, int size, int type, int level);
-struct mbuf *
-	sbcreatecontrol_how(void *p, int size, int type, int level,
-	    int wait);
+	sbcreatecontrol(void *p, int size, int type, int level, int wait);
 void	sbdestroy(struct socket *, sb_which);
 void	sbdrop(struct sockbuf *sb, int len);
 void	sbdrop_locked(struct sockbuf *sb, int len);
 struct mbuf *
 	sbcut_locked(struct sockbuf *sb, int len);
 void	sbdroprecord(struct sockbuf *sb);
 void	sbdroprecord_locked(struct sockbuf *sb);
 void	sbflush(struct sockbuf *sb);
 void	sbflush_locked(struct sockbuf *sb);
 void	sbrelease(struct socket *, sb_which);
 void	sbrelease_locked(struct socket *, sb_which);
 int	sbsetopt(struct socket *so, int cmd, u_long cc);
 bool	sbreserve_locked(struct socket *so, sb_which which, u_long cc,
 	    struct thread *td);
 void	sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len);
 struct mbuf *
 	sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff);
 struct mbuf *
 	sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
 int	sbwait(struct socket *, sb_which);
 void	sballoc(struct sockbuf *, struct mbuf *);
 void	sbfree(struct sockbuf *, struct mbuf *);
 void	sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
 void	sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m);
 int	sbready(struct sockbuf *, struct mbuf *, int);
 
 /*
  * Return how much data is available to be taken out of socket
  * buffer right now.
  */
 static inline u_int
 sbavail(struct sockbuf *sb)
 {
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 	return (sb->sb_acc);
 }
 
 /*
  * Return how much data sits there in the socket buffer
  * It might be that some data is not yet ready to be read.
  */
 static inline u_int
 sbused(struct sockbuf *sb)
 {
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 	return (sb->sb_ccc);
 }
 
 /*
  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
  * This is problematical if the fields are unsigned, as the space might
  * still be negative (ccc > hiwat or mbcnt > mbmax).
  */
 static inline long
 sbspace(struct sockbuf *sb)
 {
 	int bleft, mleft;		/* size should match sockbuf fields */
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 
 	if (sb->sb_flags & SB_STOP)
 		return(0);
 
 	bleft = sb->sb_hiwat - sb->sb_ccc;
 	mleft = sb->sb_mbmax - sb->sb_mbcnt;
 
 	return ((bleft < mleft) ? bleft : mleft);
 }
 
 #define SB_EMPTY_FIXUP(sb) do {						\
 	if ((sb)->sb_mb == NULL) {					\
 		(sb)->sb_mbtail = NULL;					\
 		(sb)->sb_lastrecord = NULL;				\
 	}								\
 } while (/*CONSTCOND*/0)
 
 #ifdef SOCKBUF_DEBUG
 void	sblastrecordchk(struct sockbuf *, const char *, int);
 void	sblastmbufchk(struct sockbuf *, const char *, int);
 void	sbcheck(struct sockbuf *, const char *, int);
 #define	SBLASTRECORDCHK(sb)	sblastrecordchk((sb), __FILE__, __LINE__)
 #define	SBLASTMBUFCHK(sb)	sblastmbufchk((sb), __FILE__, __LINE__)
 #define	SBCHECK(sb)		sbcheck((sb), __FILE__, __LINE__)
 #else
 #define	SBLASTRECORDCHK(sb)	do {} while (0)
 #define	SBLASTMBUFCHK(sb)	do {} while (0)
 #define	SBCHECK(sb)		do {} while (0)
 #endif /* SOCKBUF_DEBUG */
 
 #endif /* _KERNEL */
 
 #endif /* _SYS_SOCKBUF_H_ */