diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 2586c107ceaf..a54b93812c55 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -1,3585 +1,3576 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1991, 1993, 1995
  *	The Regents of the University of California.
  * Copyright (c) 2007-2009 Robert N. M. Watson
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * Copyright (c) 2021-2022 Gleb Smirnoff <glebius@FreeBSD.org>
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 #include "opt_ddb.h"
 #include "opt_ipsec.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ratelimit.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/hash.h>
 #include <sys/systm.h>
 #include <sys/libkern.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/eventhandler.h>
 #include <sys/domain.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/smp.h>
 #include <sys/smr.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/refcount.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <vm/uma.h>
 #include <vm/vm.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_types.h>
 #include <net/if_llatbl.h>
 #include <net/route.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_pcb_var.h>
 #include <netinet/tcp.h>
 #ifdef INET
 #include <netinet/in_var.h>
 #include <netinet/in_fib.h>
 #endif
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #endif /* INET6 */
 #include <net/route/nhop.h>
 #endif
 
 #include <netipsec/ipsec_support.h>
 
 #include <security/mac/mac_framework.h>
 
 #define	INPCBLBGROUP_SIZMIN	8
 #define	INPCBLBGROUP_SIZMAX	256
 
 #define	INP_FREED	0x00000200	/* Went through in_pcbfree(). */
 #define	INP_INLBGROUP	0x01000000	/* Inserted into inpcblbgroup. */
 
 /*
  * These configure the range of local port addresses assigned to
  * "unspecified" outgoing connections/packets/whatever.
  */
 VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1;	/* 1023 */
 VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART;	/* 600 */
 VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST;	/* 10000 */
 VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST;	/* 65535 */
 VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO;	/* 49152 */
 VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO;	/* 65535 */
 
 /*
  * Reserved ports accessible only to root. There are significant
  * security considerations that must be accounted for when changing these,
  * but the security benefits can be great. Please be careful.
  */
 VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1;	/* 1023 */
 VNET_DEFINE(int, ipport_reservedlow);
 
 /* Enable random ephemeral port allocation by default. */
 VNET_DEFINE(int, ipport_randomized) = 1;
 
 #ifdef INET
 static struct inpcb	*in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
 			    struct in_addr faddr, u_int fport_arg,
 			    struct in_addr laddr, u_int lport_arg,
 			    int lookupflags, uint8_t numa_domain);
 
 #define RANGECHK(var, min, max) \
 	if ((var) < (min)) { (var) = (min); } \
 	else if ((var) > (max)) { (var) = (max); }
 
 static int
 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	error = sysctl_handle_int(oidp, arg1, arg2, req);
 	if (error == 0) {
 		RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX);
 		RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX);
 		RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX);
 		RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX);
 	}
 	return (error);
 }
 
 #undef RANGECHK
 
 static SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IP Ports");
 
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
     &VNET_NAME(ipport_lowfirstauto), 0, &sysctl_net_ipport_check, "I",
     "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
     &VNET_NAME(ipport_lowlastauto), 0, &sysctl_net_ipport_check, "I",
     "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
     &VNET_NAME(ipport_firstauto), 0, &sysctl_net_ipport_check, "I",
     "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
     &VNET_NAME(ipport_lastauto), 0, &sysctl_net_ipport_check, "I",
     "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
     &VNET_NAME(ipport_hifirstauto), 0, &sysctl_net_ipport_check, "I",
     "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
     &VNET_NAME(ipport_hilastauto), 0, &sysctl_net_ipport_check, "I",
     "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
 	CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
 	&VNET_NAME(ipport_reservedhigh), 0, "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
 	CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized,
 	CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(ipport_randomized), 0, "Enable random port allocation");
 
 #ifdef RATELIMIT
 counter_u64_t rate_limit_new;
 counter_u64_t rate_limit_chg;
 counter_u64_t rate_limit_active;
 counter_u64_t rate_limit_alloc_fail;
 counter_u64_t rate_limit_set_ok;
 
 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, rl, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "IP Rate Limiting");
 SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, active, CTLFLAG_RD,
     &rate_limit_active, "Active rate limited connections");
 SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, alloc_fail, CTLFLAG_RD,
    &rate_limit_alloc_fail, "Rate limited connection failures");
 SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, set_ok, CTLFLAG_RD,
    &rate_limit_set_ok, "Rate limited setting succeeded");
 SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, newrl, CTLFLAG_RD,
    &rate_limit_new, "Total Rate limit new attempts");
 SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, chgrl, CTLFLAG_RD,
    &rate_limit_chg, "Total Rate limited change attempts");
 #endif /* RATELIMIT */
 
 #endif /* INET */
 
 VNET_DEFINE(uint32_t, in_pcbhashseed);
 static void
 in_pcbhashseed_init(void)
 {
 
 	V_in_pcbhashseed = arc4random();
 }
 VNET_SYSINIT(in_pcbhashseed_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
     in_pcbhashseed_init, 0);
 
 static void in_pcbremhash(struct inpcb *);
 
 /*
  * in_pcb.c: manage the Protocol Control Blocks.
  *
  * NOTE: It is assumed that most of these functions will be called with
  * the pcbinfo lock held, and often, the inpcb lock held, as these utility
  * functions often modify hash chains or addresses in pcbs.
  */
 
 static struct inpcblbgroup *
 in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, struct ucred *cred,
     u_char vflag, uint16_t port, const union in_dependaddr *addr, int size,
     uint8_t numa_domain)
 {
 	struct inpcblbgroup *grp;
 	size_t bytes;
 
 	bytes = __offsetof(struct inpcblbgroup, il_inp[size]);
 	grp = malloc(bytes, M_PCB, M_ZERO | M_NOWAIT);
 	if (grp == NULL)
 		return (NULL);
 	grp->il_cred = crhold(cred);
 	grp->il_vflag = vflag;
 	grp->il_lport = port;
 	grp->il_numa_domain = numa_domain;
 	grp->il_dependladdr = *addr;
 	grp->il_inpsiz = size;
 	CK_LIST_INSERT_HEAD(hdr, grp, il_list);
 	return (grp);
 }
 
 static void
 in_pcblbgroup_free_deferred(epoch_context_t ctx)
 {
 	struct inpcblbgroup *grp;
 
 	grp = __containerof(ctx, struct inpcblbgroup, il_epoch_ctx);
 	crfree(grp->il_cred);
 	free(grp, M_PCB);
 }
 
 static void
 in_pcblbgroup_free(struct inpcblbgroup *grp)
 {
 
 	CK_LIST_REMOVE(grp, il_list);
 	NET_EPOCH_CALL(in_pcblbgroup_free_deferred, &grp->il_epoch_ctx);
 }
 
 static struct inpcblbgroup *
 in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
     struct inpcblbgroup *old_grp, int size)
 {
 	struct inpcblbgroup *grp;
 	int i;
 
 	grp = in_pcblbgroup_alloc(hdr, old_grp->il_cred, old_grp->il_vflag,
 	    old_grp->il_lport, &old_grp->il_dependladdr, size,
 	    old_grp->il_numa_domain);
 	if (grp == NULL)
 		return (NULL);
 
 	KASSERT(old_grp->il_inpcnt < grp->il_inpsiz,
 	    ("invalid new local group size %d and old local group count %d",
 	     grp->il_inpsiz, old_grp->il_inpcnt));
 
 	for (i = 0; i < old_grp->il_inpcnt; ++i)
 		grp->il_inp[i] = old_grp->il_inp[i];
 	grp->il_inpcnt = old_grp->il_inpcnt;
 	in_pcblbgroup_free(old_grp);
 	return (grp);
 }
 
 /*
  * PCB at index 'i' is removed from the group. Pull up the ones below il_inp[i]
  * and shrink group if possible.
  */
 static void
 in_pcblbgroup_reorder(struct inpcblbgrouphead *hdr, struct inpcblbgroup **grpp,
     int i)
 {
 	struct inpcblbgroup *grp, *new_grp;
 
 	grp = *grpp;
 	for (; i + 1 < grp->il_inpcnt; ++i)
 		grp->il_inp[i] = grp->il_inp[i + 1];
 	grp->il_inpcnt--;
 
 	if (grp->il_inpsiz > INPCBLBGROUP_SIZMIN &&
 	    grp->il_inpcnt <= grp->il_inpsiz / 4) {
 		/* Shrink this group. */
 		new_grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz / 2);
 		if (new_grp != NULL)
 			*grpp = new_grp;
 	}
 }
 
 /*
  * Add PCB to load balance group for SO_REUSEPORT_LB option.
  */
 static int
 in_pcbinslbgrouphash(struct inpcb *inp, uint8_t numa_domain)
 {
 	const static struct timeval interval = { 60, 0 };
 	static struct timeval lastprint;
 	struct inpcbinfo *pcbinfo;
 	struct inpcblbgrouphead *hdr;
 	struct inpcblbgroup *grp;
 	uint32_t idx;
 
 	pcbinfo = inp->inp_pcbinfo;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
 #ifdef INET6
 	/*
 	 * Don't allow IPv4 mapped INET6 wild socket.
 	 */
 	if ((inp->inp_vflag & INP_IPV4) &&
 	    inp->inp_laddr.s_addr == INADDR_ANY &&
 	    INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) {
 		return (0);
 	}
 #endif
 
 	idx = INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask);
 	hdr = &pcbinfo->ipi_lbgrouphashbase[idx];
 	CK_LIST_FOREACH(grp, hdr, il_list) {
 		if (grp->il_cred->cr_prison == inp->inp_cred->cr_prison &&
 		    grp->il_vflag == inp->inp_vflag &&
 		    grp->il_lport == inp->inp_lport &&
 		    grp->il_numa_domain == numa_domain &&
 		    memcmp(&grp->il_dependladdr,
 		    &inp->inp_inc.inc_ie.ie_dependladdr,
 		    sizeof(grp->il_dependladdr)) == 0) {
 			break;
 		}
 	}
 	if (grp == NULL) {
 		/* Create new load balance group. */
 		grp = in_pcblbgroup_alloc(hdr, inp->inp_cred, inp->inp_vflag,
 		    inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
 		    INPCBLBGROUP_SIZMIN, numa_domain);
 		if (grp == NULL)
 			return (ENOBUFS);
 	} else if (grp->il_inpcnt == grp->il_inpsiz) {
 		if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
 			if (ratecheck(&lastprint, &interval))
 				printf("lb group port %d, limit reached\n",
 				    ntohs(grp->il_lport));
 			return (0);
 		}
 
 		/* Expand this local group. */
 		grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2);
 		if (grp == NULL)
 			return (ENOBUFS);
 	}
 
 	KASSERT(grp->il_inpcnt < grp->il_inpsiz,
 	    ("invalid local group size %d and count %d", grp->il_inpsiz,
 	    grp->il_inpcnt));
 
 	grp->il_inp[grp->il_inpcnt] = inp;
 	grp->il_inpcnt++;
 	inp->inp_flags |= INP_INLBGROUP;
 	return (0);
 }
 
 /*
  * Remove PCB from load balance group.
  */
 static void
 in_pcbremlbgrouphash(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 	struct inpcblbgrouphead *hdr;
 	struct inpcblbgroup *grp;
 	int i;
 
 	pcbinfo = inp->inp_pcbinfo;
 
 	INP_WLOCK_ASSERT(inp);
 	MPASS(inp->inp_flags & INP_INLBGROUP);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
 	hdr = &pcbinfo->ipi_lbgrouphashbase[
 	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
 	CK_LIST_FOREACH(grp, hdr, il_list) {
 		for (i = 0; i < grp->il_inpcnt; ++i) {
 			if (grp->il_inp[i] != inp)
 				continue;
 
 			if (grp->il_inpcnt == 1) {
 				/* We are the last, free this local group. */
 				in_pcblbgroup_free(grp);
 			} else {
 				/* Pull up inpcbs, shrink group if possible. */
 				in_pcblbgroup_reorder(hdr, &grp, i);
 			}
 			inp->inp_flags &= ~INP_INLBGROUP;
 			return;
 		}
 	}
 	KASSERT(0, ("%s: did not find %p", __func__, inp));
 }
 
 int
 in_pcblbgroup_numa(struct inpcb *inp, int arg)
 {
 	struct inpcbinfo *pcbinfo;
 	struct inpcblbgrouphead *hdr;
 	struct inpcblbgroup *grp;
 	int err, i;
 	uint8_t numa_domain;
 
 	switch (arg) {
 	case TCP_REUSPORT_LB_NUMA_NODOM:
 		numa_domain = M_NODOM;
 		break;
 	case TCP_REUSPORT_LB_NUMA_CURDOM:
 		numa_domain = PCPU_GET(domain);
 		break;
 	default:
 		if (arg < 0 || arg >= vm_ndomains)
 			return (EINVAL);
 		numa_domain = arg;
 	}
 
 	err = 0;
 	pcbinfo = inp->inp_pcbinfo;
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK(pcbinfo);
 	hdr = &pcbinfo->ipi_lbgrouphashbase[
 	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
 	CK_LIST_FOREACH(grp, hdr, il_list) {
 		for (i = 0; i < grp->il_inpcnt; ++i) {
 			if (grp->il_inp[i] != inp)
 				continue;
 
 			if (grp->il_numa_domain == numa_domain) {
 				goto abort_with_hash_wlock;
 			}
 
 			/* Remove it from the old group. */
 			in_pcbremlbgrouphash(inp);
 
 			/* Add it to the new group based on numa domain. */
 			in_pcbinslbgrouphash(inp, numa_domain);
 			goto abort_with_hash_wlock;
 		}
 	}
 	err = ENOENT;
 abort_with_hash_wlock:
 	INP_HASH_WUNLOCK(pcbinfo);
 	return (err);
 }
 
 /* Make sure it is safe to use hashinit(9) on CK_LIST. */
 CTASSERT(sizeof(struct inpcbhead) == sizeof(LIST_HEAD(, inpcb)));
 
 /*
  * Initialize an inpcbinfo - a per-VNET instance of connections db.
  */
 void
 in_pcbinfo_init(struct inpcbinfo *pcbinfo, struct inpcbstorage *pcbstor,
     u_int hash_nelements, u_int porthash_nelements)
 {
 
 	mtx_init(&pcbinfo->ipi_lock, pcbstor->ips_infolock_name, NULL, MTX_DEF);
 	mtx_init(&pcbinfo->ipi_hash_lock, pcbstor->ips_hashlock_name,
 	    NULL, MTX_DEF);
 #ifdef VIMAGE
 	pcbinfo->ipi_vnet = curvnet;
 #endif
 	CK_LIST_INIT(&pcbinfo->ipi_listhead);
 	pcbinfo->ipi_count = 0;
 	pcbinfo->ipi_hash_exact = hashinit(hash_nelements, M_PCB,
 	    &pcbinfo->ipi_hashmask);
 	pcbinfo->ipi_hash_wild = hashinit(hash_nelements, M_PCB,
 	    &pcbinfo->ipi_hashmask);
 	porthash_nelements = imin(porthash_nelements, IPPORT_MAX + 1);
 	pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
 	    &pcbinfo->ipi_porthashmask);
 	pcbinfo->ipi_lbgrouphashbase = hashinit(porthash_nelements, M_PCB,
 	    &pcbinfo->ipi_lbgrouphashmask);
 	pcbinfo->ipi_zone = pcbstor->ips_zone;
 	pcbinfo->ipi_portzone = pcbstor->ips_portzone;
 	pcbinfo->ipi_smr = uma_zone_get_smr(pcbinfo->ipi_zone);
 }
 
 /*
  * Destroy an inpcbinfo.
  */
 void
 in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
 {
 
 	KASSERT(pcbinfo->ipi_count == 0,
 	    ("%s: ipi_count = %u", __func__, pcbinfo->ipi_count));
 
 	hashdestroy(pcbinfo->ipi_hash_exact, M_PCB, pcbinfo->ipi_hashmask);
 	hashdestroy(pcbinfo->ipi_hash_wild, M_PCB, pcbinfo->ipi_hashmask);
 	hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
 	    pcbinfo->ipi_porthashmask);
 	hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB,
 	    pcbinfo->ipi_lbgrouphashmask);
 	mtx_destroy(&pcbinfo->ipi_hash_lock);
 	mtx_destroy(&pcbinfo->ipi_lock);
 }
 
 /*
  * Initialize a pcbstorage - per protocol zones to allocate inpcbs.
  */
 static void inpcb_fini(void *, int);
 void
 in_pcbstorage_init(void *arg)
 {
 	struct inpcbstorage *pcbstor = arg;
 
 	pcbstor->ips_zone = uma_zcreate(pcbstor->ips_zone_name,
 	    pcbstor->ips_size, NULL, NULL, pcbstor->ips_pcbinit,
 	    inpcb_fini, UMA_ALIGN_CACHE, UMA_ZONE_SMR);
 	pcbstor->ips_portzone = uma_zcreate(pcbstor->ips_portzone_name,
 	    sizeof(struct inpcbport), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	uma_zone_set_smr(pcbstor->ips_portzone,
 	    uma_zone_get_smr(pcbstor->ips_zone));
 }
 
 /*
  * Destroy a pcbstorage - used by unloadable protocols.
  */
 void
 in_pcbstorage_destroy(void *arg)
 {
 	struct inpcbstorage *pcbstor = arg;
 
 	uma_zdestroy(pcbstor->ips_zone);
 	uma_zdestroy(pcbstor->ips_portzone);
 }
 
 /*
  * Allocate a PCB and associate it with the socket.
  * On success return with the PCB locked.
  */
 int
 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
 {
 	struct inpcb *inp;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT) || defined(MAC)
 	int error;
 #endif
 
 	inp = uma_zalloc_smr(pcbinfo->ipi_zone, M_NOWAIT);
 	if (inp == NULL)
 		return (ENOBUFS);
 	bzero(&inp->inp_start_zero, inp_zero_size);
 #ifdef NUMA
 	inp->inp_numa_domain = M_NODOM;
 #endif
 	inp->inp_pcbinfo = pcbinfo;
 	inp->inp_socket = so;
 	inp->inp_cred = crhold(so->so_cred);
 	inp->inp_inc.inc_fibnum = so->so_fibnum;
 #ifdef MAC
 	error = mac_inpcb_init(inp, M_NOWAIT);
 	if (error != 0)
 		goto out;
 	mac_inpcb_create(so, inp);
 #endif
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	error = ipsec_init_pcbpolicy(inp);
 	if (error != 0) {
 #ifdef MAC
 		mac_inpcb_destroy(inp);
 #endif
 		goto out;
 	}
 #endif /*IPSEC*/
 #ifdef INET6
 	if (INP_SOCKAF(so) == AF_INET6) {
 		inp->inp_vflag |= INP_IPV6PROTO | INP_IPV6;
 		if (V_ip6_v6only)
 			inp->inp_flags |= IN6P_IPV6_V6ONLY;
 #ifdef INET
 		else
 			inp->inp_vflag |= INP_IPV4;
 #endif
 		if (V_ip6_auto_flowlabel)
 			inp->inp_flags |= IN6P_AUTOFLOWLABEL;
 		inp->in6p_hops = -1;	/* use kernel default */
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 		inp->inp_vflag |= INP_IPV4;
 #endif
 	inp->inp_smr = SMR_SEQ_INVALID;
 
 	/*
 	 * Routes in inpcb's can cache L2 as well; they are guaranteed
 	 * to be cleaned up.
 	 */
 	inp->inp_route.ro_flags = RT_LLE_CACHE;
 	refcount_init(&inp->inp_refcount, 1);   /* Reference from socket. */
 	INP_WLOCK(inp);
 	INP_INFO_WLOCK(pcbinfo);
 	pcbinfo->ipi_count++;
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	CK_LIST_INSERT_HEAD(&pcbinfo->ipi_listhead, inp, inp_list);
 	INP_INFO_WUNLOCK(pcbinfo);
 	so->so_pcb = inp;
 
 	return (0);
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT) || defined(MAC)
 out:
 	uma_zfree_smr(pcbinfo->ipi_zone, inp);
 	return (error);
 #endif
 }
 
 #ifdef INET
 int
 in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, struct ucred *cred)
 {
 	int anonport, error;
 
 	KASSERT(sin == NULL || sin->sin_family == AF_INET,
 	    ("%s: invalid address family for %p", __func__, sin));
 	KASSERT(sin == NULL || sin->sin_len == sizeof(struct sockaddr_in),
 	    ("%s: invalid address length for %p", __func__, sin));
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	anonport = sin == NULL || sin->sin_port == 0;
 	error = in_pcbbind_setup(inp, sin, &inp->inp_laddr.s_addr,
 	    &inp->inp_lport, cred);
 	if (error)
 		return (error);
 	if (in_pcbinshash(inp) != 0) {
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		inp->inp_lport = 0;
 		return (EAGAIN);
 	}
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
 	return (0);
 }
 #endif
 
 #if defined(INET) || defined(INET6)
 /*
  * Assign a local port like in_pcb_lport(), but also used with connect()
  * and a foreign address and port.  If fsa is non-NULL, choose a local port
  * that is unused with those, otherwise one that is completely unused.
  * lsa can be NULL for IPv6.
  */
 int
 in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa, u_short *lportp,
     struct sockaddr *fsa, u_short fport, struct ucred *cred, int lookupflags)
 {
 	struct inpcbinfo *pcbinfo;
 	struct inpcb *tmpinp;
 	unsigned short *lastport;
 	int count, error;
 	u_short aux, first, last, lport;
 #ifdef INET
 	struct in_addr laddr, faddr;
 #endif
 #ifdef INET6
 	struct in6_addr *laddr6, *faddr6;
 #endif
 
 	pcbinfo = inp->inp_pcbinfo;
 
 	/*
 	 * Because no actual state changes occur here, a global write lock on
 	 * the pcbinfo isn't required.
 	 */
 	INP_LOCK_ASSERT(inp);
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	if (inp->inp_flags & INP_HIGHPORT) {
 		first = V_ipport_hifirstauto;	/* sysctl */
 		last  = V_ipport_hilastauto;
 		lastport = &pcbinfo->ipi_lasthi;
 	} else if (inp->inp_flags & INP_LOWPORT) {
 		error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT);
 		if (error)
 			return (error);
 		first = V_ipport_lowfirstauto;	/* 1023 */
 		last  = V_ipport_lowlastauto;	/* 600 */
 		lastport = &pcbinfo->ipi_lastlow;
 	} else {
 		first = V_ipport_firstauto;	/* sysctl */
 		last  = V_ipport_lastauto;
 		lastport = &pcbinfo->ipi_lastport;
 	}
 
 	/*
 	 * Instead of having two loops further down counting up or down
 	 * make sure that first is always <= last and go with only one
 	 * code path implementing all logic.
 	 */
 	if (first > last) {
 		aux = first;
 		first = last;
 		last = aux;
 	}
 
 #ifdef INET
 	laddr.s_addr = INADDR_ANY;	/* used by INET6+INET below too */
 	if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) {
 		if (lsa != NULL)
 			laddr = ((struct sockaddr_in *)lsa)->sin_addr;
 		if (fsa != NULL)
 			faddr = ((struct sockaddr_in *)fsa)->sin_addr;
 	}
 #endif
 #ifdef INET6
 	laddr6 = NULL;
 	if ((inp->inp_vflag & INP_IPV6) != 0) {
 		if (lsa != NULL)
 			laddr6 = &((struct sockaddr_in6 *)lsa)->sin6_addr;
 		if (fsa != NULL)
 			faddr6 = &((struct sockaddr_in6 *)fsa)->sin6_addr;
 	}
 #endif
 
 	tmpinp = NULL;
 	lport = *lportp;
 
 	if (V_ipport_randomized)
 		*lastport = first + (arc4random() % (last - first));
 
 	count = last - first;
 
 	do {
 		if (count-- < 0)	/* completely used? */
 			return (EADDRNOTAVAIL);
 		++*lastport;
 		if (*lastport < first || *lastport > last)
 			*lastport = first;
 		lport = htons(*lastport);
 
 		if (fsa != NULL) {
 #ifdef INET
 			if (lsa->sa_family == AF_INET) {
 				tmpinp = in_pcblookup_hash_locked(pcbinfo,
 				    faddr, fport, laddr, lport, lookupflags,
 				    M_NODOM);
 			}
 #endif
 #ifdef INET6
 			if (lsa->sa_family == AF_INET6) {
 				tmpinp = in6_pcblookup_hash_locked(pcbinfo,
 				    faddr6, fport, laddr6, lport, lookupflags,
 				    M_NODOM);
 			}
 #endif
 		} else {
 #ifdef INET6
 			if ((inp->inp_vflag & INP_IPV6) != 0) {
 				tmpinp = in6_pcblookup_local(pcbinfo,
 				    &inp->in6p_laddr, lport, lookupflags, cred);
 #ifdef INET
 				if (tmpinp == NULL &&
 				    (inp->inp_vflag & INP_IPV4))
 					tmpinp = in_pcblookup_local(pcbinfo,
 					    laddr, lport, lookupflags, cred);
 #endif
 			}
 #endif
 #if defined(INET) && defined(INET6)
 			else
 #endif
 #ifdef INET
 				tmpinp = in_pcblookup_local(pcbinfo, laddr,
 				    lport, lookupflags, cred);
 #endif
 		}
 	} while (tmpinp != NULL);
 
 	*lportp = lport;
 
 	return (0);
 }
 
 /*
  * Select a local port (number) to use.
  */
 int
 in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
     struct ucred *cred, int lookupflags)
 {
 	struct sockaddr_in laddr;
 
 	if (laddrp) {
 		bzero(&laddr, sizeof(laddr));
 		laddr.sin_family = AF_INET;
 		laddr.sin_addr = *laddrp;
 	}
 	return (in_pcb_lport_dest(inp, laddrp ? (struct sockaddr *) &laddr :
 	    NULL, lportp, NULL, 0, cred, lookupflags));
 }
 #endif /* INET || INET6 */
 
 #ifdef INET
 /*
  * Set up a bind operation on a PCB, performing port allocation
  * as required, but do not actually modify the PCB. Callers can
  * either complete the bind by setting inp_laddr/inp_lport and
  * calling in_pcbinshash(), or they can just use the resulting
  * port and address to authorise the sending of a once-off packet.
  *
  * On error, the values of *laddrp and *lportp are not changed.
  */
 int
 in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t *laddrp,
     u_short *lportp, struct ucred *cred)
 {
 	struct socket *so = inp->inp_socket;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct in_addr laddr;
 	u_short lport = 0;
 	int lookupflags = 0, reuseport = (so->so_options & SO_REUSEPORT);
 	int error;
 
 	/*
 	 * XXX: Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
 	 * so that we don't have to add to the (already messy) code below.
 	 */
 	int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
 
 	/*
 	 * No state changes, so read locks are sufficient here.
 	 */
 	INP_LOCK_ASSERT(inp);
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	laddr.s_addr = *laddrp;
 	if (sin != NULL && laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
 		lookupflags = INPLOOKUP_WILDCARD;
 	if (sin == NULL) {
 		if ((error = prison_local_ip4(cred, &laddr)) != 0)
 			return (error);
 	} else {
 		KASSERT(sin->sin_family == AF_INET,
 		    ("%s: invalid family for address %p", __func__, sin));
 		KASSERT(sin->sin_len == sizeof(*sin),
 		    ("%s: invalid length for address %p", __func__, sin));
 
 		error = prison_local_ip4(cred, &sin->sin_addr);
 		if (error)
 			return (error);
 		if (sin->sin_port != *lportp) {
 			/* Don't allow the port to change. */
 			if (*lportp != 0)
 				return (EINVAL);
 			lport = sin->sin_port;
 		}
 		/* NB: lport is left as 0 if the port isn't being changed. */
 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 			/*
 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
 			 * allow complete duplication of binding if
 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
 			 * and a multicast address is bound on both
 			 * new and duplicated sockets.
 			 */
 			if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 			/*
 			 * XXX: How to deal with SO_REUSEPORT_LB here?
 			 * Treat same as SO_REUSEPORT for now.
 			 */
 			if ((so->so_options &
 			    (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
 				reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
 			sin->sin_port = 0;		/* yech... */
 			bzero(&sin->sin_zero, sizeof(sin->sin_zero));
 			/*
 			 * Is the address a local IP address?
 			 * If INP_BINDANY is set, then the socket may be bound
 			 * to any endpoint address, local or not.
 			 */
 			if ((inp->inp_flags & INP_BINDANY) == 0 &&
 			    ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
 				return (EADDRNOTAVAIL);
 		}
 		laddr = sin->sin_addr;
 		if (lport) {
 			struct inpcb *t;
 
 			/* GROSS */
 			if (ntohs(lport) <= V_ipport_reservedhigh &&
 			    ntohs(lport) >= V_ipport_reservedlow &&
 			    priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT))
 				return (EACCES);
 			if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
 			    priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT) != 0) {
 				t = in_pcblookup_local(pcbinfo, sin->sin_addr,
 				    lport, INPLOOKUP_WILDCARD, cred);
 	/*
 	 * XXX
 	 * This entire block sorely needs a rewrite.
 	 */
 				if (t != NULL &&
 				    (so->so_type != SOCK_STREAM ||
 				     ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
 				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
 				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
 				     (t->inp_socket->so_options & SO_REUSEPORT) ||
 				     (t->inp_socket->so_options & SO_REUSEPORT_LB) == 0) &&
 				    (inp->inp_cred->cr_uid !=
 				     t->inp_cred->cr_uid))
 					return (EADDRINUSE);
 			}
 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
 			    lport, lookupflags, cred);
 			if (t != NULL && (reuseport & t->inp_socket->so_options) == 0 &&
 			    (reuseport_lb & t->inp_socket->so_options) == 0) {
 #ifdef INET6
 				if (ntohl(sin->sin_addr.s_addr) !=
 				    INADDR_ANY ||
 				    ntohl(t->inp_laddr.s_addr) !=
 				    INADDR_ANY ||
 				    (inp->inp_vflag & INP_IPV6PROTO) == 0 ||
 				    (t->inp_vflag & INP_IPV6PROTO) == 0)
 #endif
 						return (EADDRINUSE);
 			}
 		}
 	}
 	if (*lportp != 0)
 		lport = *lportp;
 	if (lport == 0) {
 		error = in_pcb_lport(inp, &laddr, &lport, cred, lookupflags);
 		if (error != 0)
 			return (error);
 	}
 	*laddrp = laddr.s_addr;
 	*lportp = lport;
 	return (0);
 }
 
 /*
  * Connect from a socket to a specified address.
  * Both address and port must be specified in argument sin.
  * If don't have a local address for this socket yet,
  * then pick one.
  */
 int
 in_pcbconnect(struct inpcb *inp, struct sockaddr_in *sin, struct ucred *cred,
     bool rehash __unused)
 {
 	u_short lport, fport;
 	in_addr_t laddr, faddr;
 	int anonport, error;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 	KASSERT(in_nullhost(inp->inp_faddr),
 	    ("%s: inp is already connected", __func__));
 
 	lport = inp->inp_lport;
 	laddr = inp->inp_laddr.s_addr;
 	anonport = (lport == 0);
 	error = in_pcbconnect_setup(inp, sin, &laddr, &lport, &faddr, &fport,
 	    cred);
 	if (error)
 		return (error);
 
 	inp->inp_faddr.s_addr = faddr;
 	inp->inp_fport = fport;
 
 	/* Do the initial binding of the local address if required. */
 	if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
 		inp->inp_lport = lport;
 		inp->inp_laddr.s_addr = laddr;
 		if (in_pcbinshash(inp) != 0) {
 			inp->inp_laddr.s_addr = inp->inp_faddr.s_addr =
 			    INADDR_ANY;
 			inp->inp_lport = inp->inp_fport = 0;
 			return (EAGAIN);
 		}
 	} else {
 		inp->inp_lport = lport;
 		inp->inp_laddr.s_addr = laddr;
 		if ((inp->inp_flags & INP_INHASHLIST) != 0)
 			in_pcbrehash(inp);
 		else
 			in_pcbinshash(inp);
 	}
 
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
 	return (0);
 }
 
 /*
  * Do proper source address selection on an unbound socket in case
  * of connect. Take jails into account as well.
  */
 int
 in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
     struct ucred *cred)
 {
 	struct ifaddr *ifa;
 	struct sockaddr *sa;
 	struct sockaddr_in *sin, dst;
 	struct nhop_object *nh;
 	int error;
 
 	NET_EPOCH_ASSERT();
 	KASSERT(laddr != NULL, ("%s: laddr NULL", __func__));
 
 	/*
 	 * Bypass source address selection and use the primary jail IP
 	 * if requested.
 	 */
 	if (!prison_saddrsel_ip4(cred, laddr))
 		return (0);
 
 	error = 0;
 
 	nh = NULL;
 	bzero(&dst, sizeof(dst));
 	sin = &dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(struct sockaddr_in);
 	sin->sin_addr.s_addr = faddr->s_addr;
 
 	/*
 	 * If route is known our src addr is taken from the i/f,
 	 * else punt.
 	 *
 	 * Find out route to destination.
 	 */
 	if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
 		nh = fib4_lookup(inp->inp_inc.inc_fibnum, *faddr,
 		    0, NHR_NONE, 0);
 
 	/*
 	 * If we found a route, use the address corresponding to
 	 * the outgoing interface.
 	 *
 	 * Otherwise assume faddr is reachable on a directly connected
 	 * network and try to find a corresponding interface to take
 	 * the source address from.
 	 */
 	if (nh == NULL || nh->nh_ifp == NULL) {
 		struct in_ifaddr *ia;
 		struct ifnet *ifp;
 
 		ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin,
 					inp->inp_socket->so_fibnum));
 		if (ia == NULL) {
 			ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0,
 						inp->inp_socket->so_fibnum));
 		}
 		if (ia == NULL) {
 			error = ENETUNREACH;
 			goto done;
 		}
 
 		if (!prison_flag(cred, PR_IP4)) {
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			goto done;
 		}
 
 		ifp = ia->ia_ifp;
 		ia = NULL;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			sa = ifa->ifa_addr;
 			if (sa->sa_family != AF_INET)
 				continue;
 			sin = (struct sockaddr_in *)sa;
 			if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
 				ia = (struct in_ifaddr *)ifa;
 				break;
 			}
 		}
 		if (ia != NULL) {
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			goto done;
 		}
 
 		/* 3. As a last resort return the 'default' jail address. */
 		error = prison_get_ip4(cred, laddr);
 		goto done;
 	}
 
 	/*
 	 * If the outgoing interface on the route found is not
 	 * a loopback interface, use the address from that interface.
 	 * In case of jails do those three steps:
 	 * 1. check if the interface address belongs to the jail. If so use it.
 	 * 2. check if we have any address on the outgoing interface
 	 *    belonging to this jail. If so use it.
 	 * 3. as a last resort return the 'default' jail address.
 	 */
 	if ((nh->nh_ifp->if_flags & IFF_LOOPBACK) == 0) {
 		struct in_ifaddr *ia;
 		struct ifnet *ifp;
 
 		/* If not jailed, use the default returned. */
 		if (!prison_flag(cred, PR_IP4)) {
 			ia = (struct in_ifaddr *)nh->nh_ifa;
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			goto done;
 		}
 
 		/* Jailed. */
 		/* 1. Check if the iface address belongs to the jail. */
 		sin = (struct sockaddr_in *)nh->nh_ifa->ifa_addr;
 		if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
 			ia = (struct in_ifaddr *)nh->nh_ifa;
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			goto done;
 		}
 
 		/*
 		 * 2. Check if we have any address on the outgoing interface
 		 *    belonging to this jail.
 		 */
 		ia = NULL;
 		ifp = nh->nh_ifp;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			sa = ifa->ifa_addr;
 			if (sa->sa_family != AF_INET)
 				continue;
 			sin = (struct sockaddr_in *)sa;
 			if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
 				ia = (struct in_ifaddr *)ifa;
 				break;
 			}
 		}
 		if (ia != NULL) {
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			goto done;
 		}
 
 		/* 3. As a last resort return the 'default' jail address. */
 		error = prison_get_ip4(cred, laddr);
 		goto done;
 	}
 
 	/*
 	 * The outgoing interface is marked with 'loopback net', so a route
 	 * to ourselves is here.
 	 * Try to find the interface of the destination address and then
 	 * take the address from there. That interface is not necessarily
 	 * a loopback interface.
 	 * In case of jails, check that it is an address of the jail
 	 * and if we cannot find, fall back to the 'default' jail address.
 	 */
 	if ((nh->nh_ifp->if_flags & IFF_LOOPBACK) != 0) {
 		struct in_ifaddr *ia;
 
 		ia = ifatoia(ifa_ifwithdstaddr(sintosa(&dst),
 					inp->inp_socket->so_fibnum));
 		if (ia == NULL)
 			ia = ifatoia(ifa_ifwithnet(sintosa(&dst), 0,
 						inp->inp_socket->so_fibnum));
 		if (ia == NULL)
 			ia = ifatoia(ifa_ifwithaddr(sintosa(&dst)));
 
 		if (!prison_flag(cred, PR_IP4)) {
 			if (ia == NULL) {
 				error = ENETUNREACH;
 				goto done;
 			}
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			goto done;
 		}
 
 		/* Jailed. */
 		if (ia != NULL) {
 			struct ifnet *ifp;
 
 			ifp = ia->ia_ifp;
 			ia = NULL;
 			CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 				sa = ifa->ifa_addr;
 				if (sa->sa_family != AF_INET)
 					continue;
 				sin = (struct sockaddr_in *)sa;
 				if (prison_check_ip4(cred,
 				    &sin->sin_addr) == 0) {
 					ia = (struct in_ifaddr *)ifa;
 					break;
 				}
 			}
 			if (ia != NULL) {
 				laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 				goto done;
 			}
 		}
 
 		/* 3. As a last resort return the 'default' jail address. */
 		error = prison_get_ip4(cred, laddr);
 		goto done;
 	}
 
 done:
 	if (error == 0 && laddr->s_addr == INADDR_ANY)
 		return (EHOSTUNREACH);
 	return (error);
 }
 
 /*
  * Set up for a connect from a socket to the specified address.
  * On entry, *laddrp and *lportp should contain the current local
  * address and port for the PCB; these are updated to the values
  * that should be placed in inp_laddr and inp_lport to complete
  * the connect.
  *
  * On success, *faddrp and *fportp will be set to the remote address
  * and port. These are not updated in the error case.
  */
 int
 in_pcbconnect_setup(struct inpcb *inp, struct sockaddr_in *sin,
     in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp,
     struct ucred *cred)
 {
 	struct in_ifaddr *ia;
 	struct in_addr laddr, faddr;
 	u_short lport, fport;
 	int error;
 
 	KASSERT(sin->sin_family == AF_INET,
 	    ("%s: invalid address family for %p", __func__, sin));
 	KASSERT(sin->sin_len == sizeof(*sin),
 	    ("%s: invalid address length for %p", __func__, sin));
 
 	/*
 	 * Because a global state change doesn't actually occur here, a read
 	 * lock is sufficient.
 	 */
 	NET_EPOCH_ASSERT();
 	INP_LOCK_ASSERT(inp);
 	INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo);
 
 	if (sin->sin_port == 0)
 		return (EADDRNOTAVAIL);
 	laddr.s_addr = *laddrp;
 	lport = *lportp;
 	faddr = sin->sin_addr;
 	fport = sin->sin_port;
 #ifdef ROUTE_MPATH
 	if (CALC_FLOWID_OUTBOUND) {
 		uint32_t hash_val, hash_type;
 
 		hash_val = fib4_calc_software_hash(laddr, faddr, 0, fport,
 		    inp->inp_socket->so_proto->pr_protocol, &hash_type);
 
 		inp->inp_flowid = hash_val;
 		inp->inp_flowtype = hash_type;
 	}
 #endif
 	if (!CK_STAILQ_EMPTY(&V_in_ifaddrhead)) {
 		/*
 		 * If the destination address is INADDR_ANY,
 		 * use the primary local address.
 		 * If the supplied address is INADDR_BROADCAST,
 		 * and the primary interface supports broadcast,
 		 * choose the broadcast address for that interface.
 		 */
 		if (faddr.s_addr == INADDR_ANY) {
 			faddr =
 			    IA_SIN(CK_STAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
 			if ((error = prison_get_ip4(cred, &faddr)) != 0)
 				return (error);
 		} else if (faddr.s_addr == (u_long)INADDR_BROADCAST) {
 			if (CK_STAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags &
 			    IFF_BROADCAST)
 				faddr = satosin(&CK_STAILQ_FIRST(
 				    &V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
 		}
 	}
 	if (laddr.s_addr == INADDR_ANY) {
 		error = in_pcbladdr(inp, &faddr, &laddr, cred);
 		/*
 		 * If the destination address is multicast and an outgoing
 		 * interface has been set as a multicast option, prefer the
 		 * address of that interface as our source address.
 		 */
 		if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
 		    inp->inp_moptions != NULL) {
 			struct ip_moptions *imo;
 			struct ifnet *ifp;
 
 			imo = inp->inp_moptions;
 			if (imo->imo_multicast_ifp != NULL) {
 				ifp = imo->imo_multicast_ifp;
 				CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 					if (ia->ia_ifp == ifp &&
 					    prison_check_ip4(cred,
 					    &ia->ia_addr.sin_addr) == 0)
 						break;
 				}
 				if (ia == NULL)
 					error = EADDRNOTAVAIL;
 				else {
 					laddr = ia->ia_addr.sin_addr;
 					error = 0;
 				}
 			}
 		}
 		if (error)
 			return (error);
 	}
 
 	if (lport != 0) {
 		if (in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr,
 		    fport, laddr, lport, 0, M_NODOM) != NULL)
 			return (EADDRINUSE);
 	} else {
 		struct sockaddr_in lsin, fsin;
 
 		bzero(&lsin, sizeof(lsin));
 		bzero(&fsin, sizeof(fsin));
 		lsin.sin_family = AF_INET;
 		lsin.sin_addr = laddr;
 		fsin.sin_family = AF_INET;
 		fsin.sin_addr = faddr;
 		error = in_pcb_lport_dest(inp, (struct sockaddr *) &lsin,
 		    &lport, (struct sockaddr *)& fsin, fport, cred,
 		    INPLOOKUP_WILDCARD);
 		if (error)
 			return (error);
 	}
 	*laddrp = laddr.s_addr;
 	*lportp = lport;
 	*faddrp = faddr.s_addr;
 	*fportp = fport;
 	return (0);
 }
 
 void
 in_pcbdisconnect(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 	KASSERT(inp->inp_smr == SMR_SEQ_INVALID,
 	    ("%s: inp %p was already disconnected", __func__, inp));
 
 	in_pcbremhash_locked(inp);
 
 	/* See the comment in in_pcbinshash(). */
 	inp->inp_smr = smr_advance(inp->inp_pcbinfo->ipi_smr);
 	inp->inp_laddr.s_addr = INADDR_ANY;
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	inp->inp_fport = 0;
 }
 #endif /* INET */
 
-/*
- * in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
- * For most protocols, this will be invoked immediately prior to calling
- * in_pcbfree().  However, with TCP the inpcb may significantly outlive the
- * socket, in which case in_pcbfree() is deferred.
- */
-void
-in_pcbdetach(struct inpcb *inp)
-{
-
-	KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
-
-#ifdef RATELIMIT
-	if (inp->inp_snd_tag != NULL)
-		in_pcbdetach_txrtlmt(inp);
-#endif
-	inp->inp_socket->so_pcb = NULL;
-	inp->inp_socket = NULL;
-}
-
 /*
  * inpcb hash lookups are protected by SMR section.
  *
  * Once desired pcb has been found, switching from SMR section to a pcb
  * lock is performed with inp_smr_lock(). We can not use INP_(W|R)LOCK
  * here because SMR is a critical section.
  * In 99%+ cases inp_smr_lock() would obtain the lock immediately.
  */
 void
 inp_lock(struct inpcb *inp, const inp_lookup_t lock)
 {
 
 	lock == INPLOOKUP_RLOCKPCB ?
 	    rw_rlock(&inp->inp_lock) : rw_wlock(&inp->inp_lock);
 }
 
 void
 inp_unlock(struct inpcb *inp, const inp_lookup_t lock)
 {
 
 	lock == INPLOOKUP_RLOCKPCB ?
 	    rw_runlock(&inp->inp_lock) : rw_wunlock(&inp->inp_lock);
 }
 
 int
 inp_trylock(struct inpcb *inp, const inp_lookup_t lock)
 {
 
 	return (lock == INPLOOKUP_RLOCKPCB ?
 	    rw_try_rlock(&inp->inp_lock) : rw_try_wlock(&inp->inp_lock));
 }
 
 static inline bool
 _inp_smr_lock(struct inpcb *inp, const inp_lookup_t lock, const int ignflags)
 {
 
 	MPASS(lock == INPLOOKUP_RLOCKPCB || lock == INPLOOKUP_WLOCKPCB);
 	SMR_ASSERT_ENTERED(inp->inp_pcbinfo->ipi_smr);
 
 	if (__predict_true(inp_trylock(inp, lock))) {
 		if (__predict_false(inp->inp_flags & ignflags)) {
 			smr_exit(inp->inp_pcbinfo->ipi_smr);
 			inp_unlock(inp, lock);
 			return (false);
 		}
 		smr_exit(inp->inp_pcbinfo->ipi_smr);
 		return (true);
 	}
 
 	if (__predict_true(refcount_acquire_if_not_zero(&inp->inp_refcount))) {
 		smr_exit(inp->inp_pcbinfo->ipi_smr);
 		inp_lock(inp, lock);
 		if (__predict_false(in_pcbrele(inp, lock)))
 			return (false);
 		/*
 		 * inp acquired through refcount & lock for sure didn't went
 		 * through uma_zfree().  However, it may have already went
 		 * through in_pcbfree() and has another reference, that
 		 * prevented its release by our in_pcbrele().
 		 */
 		if (__predict_false(inp->inp_flags & ignflags)) {
 			inp_unlock(inp, lock);
 			return (false);
 		}
 		return (true);
 	} else {
 		smr_exit(inp->inp_pcbinfo->ipi_smr);
 		return (false);
 	}
 }
 
 bool
 inp_smr_lock(struct inpcb *inp, const inp_lookup_t lock)
 {
 
 	/*
 	 * in_pcblookup() family of functions ignore not only freed entries,
 	 * that may be found due to lockless access to the hash, but dropped
 	 * entries, too.
 	 */
 	return (_inp_smr_lock(inp, lock, INP_FREED | INP_DROPPED));
 }
 
 /*
  * inp_next() - inpcb hash/list traversal iterator
  *
  * Requires initialized struct inpcb_iterator for context.
  * The structure can be initialized with INP_ITERATOR() or INP_ALL_ITERATOR().
  *
  * - Iterator can have either write-lock or read-lock semantics, that can not
  *   be changed later.
  * - Iterator can iterate either over all pcbs list (INP_ALL_LIST), or through
  *   a single hash slot.  Note: only rip_input() does the latter.
  * - Iterator may have optional bool matching function.  The matching function
  *   will be executed for each inpcb in the SMR context, so it can not acquire
  *   locks and can safely access only immutable fields of inpcb.
  *
  * A fresh initialized iterator has NULL inpcb in its context and that
  * means that inp_next() call would return the very first inpcb on the list
  * locked with desired semantic.  In all following calls the context pointer
  * shall hold the current inpcb pointer.  The KPI user is not supposed to
  * unlock the current inpcb!  Upon end of traversal inp_next() will return NULL
  * and write NULL to its context.  After end of traversal an iterator can be
  * reused.
  *
  * List traversals have the following features/constraints:
  * - New entries won't be seen, as they are always added to the head of a list.
  * - Removed entries won't stop traversal as long as they are not added to
  *   a different list. This is violated by in_pcbrehash().
  */
 #define	II_LIST_FIRST(ipi, hash)					\
 		(((hash) == INP_ALL_LIST) ?				\
 		    CK_LIST_FIRST(&(ipi)->ipi_listhead) :		\
 		    CK_LIST_FIRST(&(ipi)->ipi_hash_exact[(hash)]))
 #define	II_LIST_NEXT(inp, hash)						\
 		(((hash) == INP_ALL_LIST) ?				\
 		    CK_LIST_NEXT((inp), inp_list) :			\
 		    CK_LIST_NEXT((inp), inp_hash_exact))
 #define	II_LOCK_ASSERT(inp, lock)					\
 		rw_assert(&(inp)->inp_lock,				\
 		    (lock) == INPLOOKUP_RLOCKPCB ?  RA_RLOCKED : RA_WLOCKED )
 struct inpcb *
 inp_next(struct inpcb_iterator *ii)
 {
 	const struct inpcbinfo *ipi = ii->ipi;
 	inp_match_t *match = ii->match;
 	void *ctx = ii->ctx;
 	inp_lookup_t lock = ii->lock;
 	int hash = ii->hash;
 	struct inpcb *inp;
 
 	if (ii->inp == NULL) {		/* First call. */
 		smr_enter(ipi->ipi_smr);
 		/* This is unrolled CK_LIST_FOREACH(). */
 		for (inp = II_LIST_FIRST(ipi, hash);
 		    inp != NULL;
 		    inp = II_LIST_NEXT(inp, hash)) {
 			if (match != NULL && (match)(inp, ctx) == false)
 				continue;
 			if (__predict_true(_inp_smr_lock(inp, lock, INP_FREED)))
 				break;
 			else {
 				smr_enter(ipi->ipi_smr);
 				MPASS(inp != II_LIST_FIRST(ipi, hash));
 				inp = II_LIST_FIRST(ipi, hash);
 				if (inp == NULL)
 					break;
 			}
 		}
 
 		if (inp == NULL)
 			smr_exit(ipi->ipi_smr);
 		else
 			ii->inp = inp;
 
 		return (inp);
 	}
 
 	/* Not a first call. */
 	smr_enter(ipi->ipi_smr);
 restart:
 	inp = ii->inp;
 	II_LOCK_ASSERT(inp, lock);
 next:
 	inp = II_LIST_NEXT(inp, hash);
 	if (inp == NULL) {
 		smr_exit(ipi->ipi_smr);
 		goto found;
 	}
 
 	if (match != NULL && (match)(inp, ctx) == false)
 		goto next;
 
 	if (__predict_true(inp_trylock(inp, lock))) {
 		if (__predict_false(inp->inp_flags & INP_FREED)) {
 			/*
 			 * Entries are never inserted in middle of a list, thus
 			 * as long as we are in SMR, we can continue traversal.
 			 * Jump to 'restart' should yield in the same result,
 			 * but could produce unnecessary looping.  Could this
 			 * looping be unbound?
 			 */
 			inp_unlock(inp, lock);
 			goto next;
 		} else {
 			smr_exit(ipi->ipi_smr);
 			goto found;
 		}
 	}
 
 	/*
 	 * Can't obtain lock immediately, thus going hard.  Once we exit the
 	 * SMR section we can no longer jump to 'next', and our only stable
 	 * anchoring point is ii->inp, which we keep locked for this case, so
 	 * we jump to 'restart'.
 	 */
 	if (__predict_true(refcount_acquire_if_not_zero(&inp->inp_refcount))) {
 		smr_exit(ipi->ipi_smr);
 		inp_lock(inp, lock);
 		if (__predict_false(in_pcbrele(inp, lock))) {
 			smr_enter(ipi->ipi_smr);
 			goto restart;
 		}
 		/*
 		 * See comment in inp_smr_lock().
 		 */
 		if (__predict_false(inp->inp_flags & INP_FREED)) {
 			inp_unlock(inp, lock);
 			smr_enter(ipi->ipi_smr);
 			goto restart;
 		}
 	} else
 		goto next;
 
 found:
 	inp_unlock(ii->inp, lock);
 	ii->inp = inp;
 
 	return (ii->inp);
 }
 
 /*
  * in_pcbref() bumps the reference count on an inpcb in order to maintain
  * stability of an inpcb pointer despite the inpcb lock being released or
  * SMR section exited.
  *
  * To free a reference later in_pcbrele_(r|w)locked() must be performed.
  */
 void
 in_pcbref(struct inpcb *inp)
 {
 	u_int old __diagused;
 
 	old = refcount_acquire(&inp->inp_refcount);
 	KASSERT(old > 0, ("%s: refcount 0", __func__));
 }
 
 /*
  * Drop a refcount on an inpcb elevated using in_pcbref(), potentially
  * freeing the pcb, if the reference was very last.
  */
 bool
 in_pcbrele_rlocked(struct inpcb *inp)
 {
 
 	INP_RLOCK_ASSERT(inp);
 
 	if (!refcount_release(&inp->inp_refcount))
 		return (false);
 
 	MPASS(inp->inp_flags & INP_FREED);
 	MPASS(inp->inp_socket == NULL);
 	crfree(inp->inp_cred);
 #ifdef INVARIANTS
 	inp->inp_cred = NULL;
 #endif
 	INP_RUNLOCK(inp);
 	uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
 	return (true);
 }
 
 bool
 in_pcbrele_wlocked(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 
 	if (!refcount_release(&inp->inp_refcount))
 		return (false);
 
 	MPASS(inp->inp_flags & INP_FREED);
 	MPASS(inp->inp_socket == NULL);
 	crfree(inp->inp_cred);
 #ifdef INVARIANTS
 	inp->inp_cred = NULL;
 #endif
 	INP_WUNLOCK(inp);
 	uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
 	return (true);
 }
 
 bool
 in_pcbrele(struct inpcb *inp, const inp_lookup_t lock)
 {
 
 	return (lock == INPLOOKUP_RLOCKPCB ?
 	    in_pcbrele_rlocked(inp) : in_pcbrele_wlocked(inp));
 }
 
 /*
  * Unconditionally schedule an inpcb to be freed by decrementing its
  * reference count, which should occur only after the inpcb has been detached
  * from its socket.  If another thread holds a temporary reference (acquired
  * using in_pcbref()) then the free is deferred until that reference is
  * released using in_pcbrele_(r|w)locked(), but the inpcb is still unlocked.
  *  Almost all work, including removal from global lists, is done in this
  * context, where the pcbinfo lock is held.
  */
 void
 in_pcbfree(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 #ifdef INET
 	struct ip_moptions *imo;
 #endif
 #ifdef INET6
 	struct ip6_moptions *im6o;
 #endif
 
 	INP_WLOCK_ASSERT(inp);
-	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
+	KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
 	KASSERT((inp->inp_flags & INP_FREED) == 0,
 	    ("%s: called twice for pcb %p", __func__, inp));
 
-	inp->inp_flags |= INP_FREED;
+	/*
+	 * in_pcblookup_local() and in6_pcblookup_local() may return an inpcb
+	 * from the hash without acquiring inpcb lock, they rely on the hash
+	 * lock, thus in_pcbremhash() should be the first action.
+	 */
+	if (inp->inp_flags & INP_INHASHLIST)
+		in_pcbremhash(inp);
 	INP_INFO_WLOCK(pcbinfo);
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	pcbinfo->ipi_count--;
 	CK_LIST_REMOVE(inp, inp_list);
 	INP_INFO_WUNLOCK(pcbinfo);
 
-	if (inp->inp_flags & INP_INHASHLIST)
-		in_pcbremhash(inp);
+#ifdef RATELIMIT
+	if (inp->inp_snd_tag != NULL)
+		in_pcbdetach_txrtlmt(inp);
+#endif
+	inp->inp_flags |= INP_FREED;
+	inp->inp_socket->so_pcb = NULL;
+	inp->inp_socket = NULL;
 
 	RO_INVALIDATE_CACHE(&inp->inp_route);
 #ifdef MAC
 	mac_inpcb_destroy(inp);
 #endif
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	if (inp->inp_sp != NULL)
 		ipsec_delete_pcbpolicy(inp);
 #endif
 #ifdef INET
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
 	imo = inp->inp_moptions;
 #endif
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6PROTO) {
 		ip6_freepcbopts(inp->in6p_outputopts);
 		im6o = inp->in6p_moptions;
 	} else
 		im6o = NULL;
 #endif
 
 	if (__predict_false(in_pcbrele_wlocked(inp) == false)) {
 		INP_WUNLOCK(inp);
 	}
 #ifdef INET6
 	ip6_freemoptions(im6o);
 #endif
 #ifdef INET
 	inp_freemoptions(imo);
 #endif
 }
 
 /*
  * Different protocols initialize their inpcbs differently - giving
  * different name to the lock.  But they all are disposed the same.
  */
 static void
 inpcb_fini(void *mem, int size)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_DESTROY(inp);
 }
 
 /*
  * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and
  * port reservation, and preventing it from being returned by inpcb lookups.
  *
  * It is used by TCP to mark an inpcb as unused and avoid future packet
  * delivery or event notification when a socket remains open but TCP has
  * closed.  This might occur as a result of a shutdown()-initiated TCP close
  * or a RST on the wire, and allows the port binding to be reused while still
  * maintaining the invariant that so_pcb always points to a valid inpcb until
  * in_pcbdetach().
  *
  * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by
  * in_pcbnotifyall() and in_pcbpurgeif0()?
  */
 void
 in_pcbdrop(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 #ifdef INVARIANTS
 	if (inp->inp_socket != NULL && inp->inp_ppcb != NULL)
 		MPASS(inp->inp_refcount > 1);
 #endif
 
 	inp->inp_flags |= INP_DROPPED;
 	if (inp->inp_flags & INP_INHASHLIST)
 		in_pcbremhash(inp);
 }
 
 #ifdef INET
 /*
  * Common routines to return the socket addresses associated with inpcbs.
  */
 struct sockaddr *
 in_sockaddr(in_port_t port, struct in_addr *addr_p)
 {
 	struct sockaddr_in *sin;
 
 	sin = malloc(sizeof *sin, M_SONAME,
 		M_WAITOK | M_ZERO);
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = *addr_p;
 	sin->sin_port = port;
 
 	return (struct sockaddr *)sin;
 }
 
 int
 in_getsockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in_addr addr;
 	in_port_t port;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
 
 	INP_RLOCK(inp);
 	port = inp->inp_lport;
 	addr = inp->inp_laddr;
 	INP_RUNLOCK(inp);
 
 	*nam = in_sockaddr(port, &addr);
 	return 0;
 }
 
 int
 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in_addr addr;
 	in_port_t port;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
 
 	INP_RLOCK(inp);
 	port = inp->inp_fport;
 	addr = inp->inp_faddr;
 	INP_RUNLOCK(inp);
 
 	*nam = in_sockaddr(port, &addr);
 	return 0;
 }
 
 void
 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
     struct inpcb *(*notify)(struct inpcb *, int))
 {
 	struct inpcb *inp, *inp_temp;
 
 	INP_INFO_WLOCK(pcbinfo);
 	CK_LIST_FOREACH_SAFE(inp, &pcbinfo->ipi_listhead, inp_list, inp_temp) {
 		INP_WLOCK(inp);
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV4) == 0) {
 			INP_WUNLOCK(inp);
 			continue;
 		}
 #endif
 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
 		    inp->inp_socket == NULL) {
 			INP_WUNLOCK(inp);
 			continue;
 		}
 		if ((*notify)(inp, errno))
 			INP_WUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 static bool
 inp_v4_multi_match(const struct inpcb *inp, void *v __unused)
 {
 
 	if ((inp->inp_vflag & INP_IPV4) && inp->inp_moptions != NULL)
 		return (true);
 	else
 		return (false);
 }
 
 void
 in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
 {
 	struct inpcb_iterator inpi = INP_ITERATOR(pcbinfo, INPLOOKUP_WLOCKPCB,
 	    inp_v4_multi_match, NULL);
 	struct inpcb *inp;
 	struct in_multi *inm;
 	struct in_mfilter *imf;
 	struct ip_moptions *imo;
 
 	IN_MULTI_LOCK_ASSERT();
 
 	while ((inp = inp_next(&inpi)) != NULL) {
 		INP_WLOCK_ASSERT(inp);
 
 		imo = inp->inp_moptions;
 		/*
 		 * Unselect the outgoing interface if it is being
 		 * detached.
 		 */
 		if (imo->imo_multicast_ifp == ifp)
 			imo->imo_multicast_ifp = NULL;
 
 		/*
 		 * Drop multicast group membership if we joined
 		 * through the interface being detached.
 		 *
 		 * XXX This can all be deferred to an epoch_call
 		 */
 restart:
 		IP_MFILTER_FOREACH(imf, &imo->imo_head) {
 			if ((inm = imf->imf_inm) == NULL)
 				continue;
 			if (inm->inm_ifp != ifp)
 				continue;
 			ip_mfilter_remove(&imo->imo_head, imf);
 			in_leavegroup_locked(inm, NULL);
 			ip_mfilter_free(imf);
 			goto restart;
 		}
 	}
 }
 
 /*
  * Lookup a PCB based on the local address and port.  Caller must hold the
  * hash lock.  No inpcb locks or references are acquired.
  */
 #define INP_LOOKUP_MAPPED_PCB_COST	3
 struct inpcb *
 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
     u_short lport, int lookupflags, struct ucred *cred)
 {
 	struct inpcb *inp;
 #ifdef INET6
 	int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
 #else
 	int matchwild = 3;
 #endif
 	int wildcard;
 
 	KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
 		struct inpcbhead *head;
 		/*
 		 * Look for an unconnected (wildcard foreign addr) PCB that
 		 * matches the local address and port we're looking for.
 		 */
 		head = &pcbinfo->ipi_hash_wild[INP_PCBHASH_WILD(lport,
 		    pcbinfo->ipi_hashmask)];
 		CK_LIST_FOREACH(inp, head, inp_hash_wild) {
 #ifdef INET6
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
 			    inp->inp_laddr.s_addr == laddr.s_addr &&
 			    inp->inp_lport == lport) {
 				/*
 				 * Found?
 				 */
 				if (prison_equal_ip4(cred->cr_prison,
 				    inp->inp_cred->cr_prison))
 					return (inp);
 			}
 		}
 		/*
 		 * Not found.
 		 */
 		return (NULL);
 	} else {
 		struct inpcbporthead *porthash;
 		struct inpcbport *phd;
 		struct inpcb *match = NULL;
 		/*
 		 * Best fit PCB lookup.
 		 *
 		 * First see if this local port is in use by looking on the
 		 * port hash list.
 		 */
 		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
 		    pcbinfo->ipi_porthashmask)];
 		CK_LIST_FOREACH(phd, porthash, phd_hash) {
 			if (phd->phd_port == lport)
 				break;
 		}
 		if (phd != NULL) {
 			/*
 			 * Port is in use by one or more PCBs. Look for best
 			 * fit.
 			 */
 			CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
 				wildcard = 0;
 				if (!prison_equal_ip4(inp->inp_cred->cr_prison,
 				    cred->cr_prison))
 					continue;
 #ifdef INET6
 				/* XXX inp locking */
 				if ((inp->inp_vflag & INP_IPV4) == 0)
 					continue;
 				/*
 				 * We never select the PCB that has
 				 * INP_IPV6 flag and is bound to :: if
 				 * we have another PCB which is bound
 				 * to 0.0.0.0.  If a PCB has the
 				 * INP_IPV6 flag, then we set its cost
 				 * higher than IPv4 only PCBs.
 				 *
 				 * Note that the case only happens
 				 * when a socket is bound to ::, under
 				 * the condition that the use of the
 				 * mapped address is allowed.
 				 */
 				if ((inp->inp_vflag & INP_IPV6) != 0)
 					wildcard += INP_LOOKUP_MAPPED_PCB_COST;
 #endif
 				if (inp->inp_faddr.s_addr != INADDR_ANY)
 					wildcard++;
 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
 					if (laddr.s_addr == INADDR_ANY)
 						wildcard++;
 					else if (inp->inp_laddr.s_addr != laddr.s_addr)
 						continue;
 				} else {
 					if (laddr.s_addr != INADDR_ANY)
 						wildcard++;
 				}
 				if (wildcard < matchwild) {
 					match = inp;
 					matchwild = wildcard;
 					if (matchwild == 0)
 						break;
 				}
 			}
 		}
 		return (match);
 	}
 }
 #undef INP_LOOKUP_MAPPED_PCB_COST
 
 static bool
 in_pcblookup_lb_numa_match(const struct inpcblbgroup *grp, int domain)
 {
 	return (domain == M_NODOM || domain == grp->il_numa_domain);
 }
 
 static struct inpcb *
 in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
     const struct in_addr *faddr, uint16_t fport, const struct in_addr *laddr,
     uint16_t lport, int domain)
 {
 	const struct inpcblbgrouphead *hdr;
 	struct inpcblbgroup *grp;
 	struct inpcblbgroup *jail_exact, *jail_wild, *local_exact, *local_wild;
 
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	hdr = &pcbinfo->ipi_lbgrouphashbase[
 	    INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
 
 	/*
 	 * Search for an LB group match based on the following criteria:
 	 * - prefer jailed groups to non-jailed groups
 	 * - prefer exact source address matches to wildcard matches
 	 * - prefer groups bound to the specified NUMA domain
 	 */
 	jail_exact = jail_wild = local_exact = local_wild = NULL;
 	CK_LIST_FOREACH(grp, hdr, il_list) {
 		bool injail;
 
 #ifdef INET6
 		if (!(grp->il_vflag & INP_IPV4))
 			continue;
 #endif
 		if (grp->il_lport != lport)
 			continue;
 
 		injail = prison_flag(grp->il_cred, PR_IP4) != 0;
 		if (injail && prison_check_ip4_locked(grp->il_cred->cr_prison,
 		    laddr) != 0)
 			continue;
 
 		if (grp->il_laddr.s_addr == laddr->s_addr) {
 			if (injail) {
 				jail_exact = grp;
 				if (in_pcblookup_lb_numa_match(grp, domain))
 					/* This is a perfect match. */
 					goto out;
 			} else if (local_exact == NULL ||
 			    in_pcblookup_lb_numa_match(grp, domain)) {
 				local_exact = grp;
 			}
 		} else if (grp->il_laddr.s_addr == INADDR_ANY) {
 			if (injail) {
 				if (jail_wild == NULL ||
 				    in_pcblookup_lb_numa_match(grp, domain))
 					jail_wild = grp;
 			} else if (local_wild == NULL ||
 			    in_pcblookup_lb_numa_match(grp, domain)) {
 				local_wild = grp;
 			}
 		}
 	}
 
 	if (jail_exact != NULL)
 		grp = jail_exact;
 	else if (jail_wild != NULL)
 		grp = jail_wild;
 	else if (local_exact != NULL)
 		grp = local_exact;
 	else
 		grp = local_wild;
 	if (grp == NULL)
 		return (NULL);
 out:
 	return (grp->il_inp[INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) %
 	    grp->il_inpcnt]);
 }
 
 static bool
 in_pcblookup_exact_match(const struct inpcb *inp, struct in_addr faddr,
     u_short fport, struct in_addr laddr, u_short lport)
 {
 #ifdef INET6
 	/* XXX inp locking */
 	if ((inp->inp_vflag & INP_IPV4) == 0)
 		return (false);
 #endif
 	if (inp->inp_faddr.s_addr == faddr.s_addr &&
 	    inp->inp_laddr.s_addr == laddr.s_addr &&
 	    inp->inp_fport == fport &&
 	    inp->inp_lport == lport)
 		return (true);
 	return (false);
 }
 
 static struct inpcb *
 in_pcblookup_hash_exact(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_short fport, struct in_addr laddr, u_short lport)
 {
 	struct inpcbhead *head;
 	struct inpcb *inp;
 
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	head = &pcbinfo->ipi_hash_exact[INP_PCBHASH(&faddr, lport, fport,
 	    pcbinfo->ipi_hashmask)];
 	CK_LIST_FOREACH(inp, head, inp_hash_exact) {
 		if (in_pcblookup_exact_match(inp, faddr, fport, laddr, lport))
 			return (inp);
 	}
 	return (NULL);
 }
 
 typedef enum {
 	INPLOOKUP_MATCH_NONE = 0,
 	INPLOOKUP_MATCH_WILD = 1,
 	INPLOOKUP_MATCH_LADDR = 2,
 } inp_lookup_match_t;
 
 static inp_lookup_match_t
 in_pcblookup_wild_match(const struct inpcb *inp, struct in_addr laddr,
     u_short lport)
 {
 #ifdef INET6
 	/* XXX inp locking */
 	if ((inp->inp_vflag & INP_IPV4) == 0)
 		return (INPLOOKUP_MATCH_NONE);
 #endif
 	if (inp->inp_faddr.s_addr != INADDR_ANY || inp->inp_lport != lport)
 		return (INPLOOKUP_MATCH_NONE);
 	if (inp->inp_laddr.s_addr == INADDR_ANY)
 		return (INPLOOKUP_MATCH_WILD);
 	if (inp->inp_laddr.s_addr == laddr.s_addr)
 		return (INPLOOKUP_MATCH_LADDR);
 	return (INPLOOKUP_MATCH_NONE);
 }
 
 #define	INP_LOOKUP_AGAIN	((struct inpcb *)(uintptr_t)-1)
 
 static struct inpcb *
 in_pcblookup_hash_wild_smr(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_short fport, struct in_addr laddr, u_short lport,
     const inp_lookup_t lockflags)
 {
 	struct inpcbhead *head;
 	struct inpcb *inp;
 
 	KASSERT(SMR_ENTERED(pcbinfo->ipi_smr),
 	    ("%s: not in SMR read section", __func__));
 
 	head = &pcbinfo->ipi_hash_wild[INP_PCBHASH_WILD(lport,
 	    pcbinfo->ipi_hashmask)];
 	CK_LIST_FOREACH(inp, head, inp_hash_wild) {
 		inp_lookup_match_t match;
 
 		match = in_pcblookup_wild_match(inp, laddr, lport);
 		if (match == INPLOOKUP_MATCH_NONE)
 			continue;
 
 		if (__predict_true(inp_smr_lock(inp, lockflags))) {
 			match = in_pcblookup_wild_match(inp, laddr, lport);
 			if (match != INPLOOKUP_MATCH_NONE &&
 			    prison_check_ip4_locked(inp->inp_cred->cr_prison,
 			    &laddr) == 0)
 				return (inp);
 			inp_unlock(inp, lockflags);
 		}
 
 		/*
 		 * The matching socket disappeared out from under us.  Fall back
 		 * to a serialized lookup.
 		 */
 		return (INP_LOOKUP_AGAIN);
 	}
 	return (NULL);
 }
 
 static struct inpcb *
 in_pcblookup_hash_wild_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_short fport, struct in_addr laddr, u_short lport)
 {
 	struct inpcbhead *head;
 	struct inpcb *inp, *local_wild, *local_exact, *jail_wild;
 #ifdef INET6
 	struct inpcb *local_wild_mapped;
 #endif
 
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	/*
 	 * Order of socket selection - we always prefer jails.
 	 *      1. jailed, non-wild.
 	 *      2. jailed, wild.
 	 *      3. non-jailed, non-wild.
 	 *      4. non-jailed, wild.
 	 */
 	head = &pcbinfo->ipi_hash_wild[INP_PCBHASH_WILD(lport,
 	    pcbinfo->ipi_hashmask)];
 	local_wild = local_exact = jail_wild = NULL;
 #ifdef INET6
 	local_wild_mapped = NULL;
 #endif
 	CK_LIST_FOREACH(inp, head, inp_hash_wild) {
 		inp_lookup_match_t match;
 		bool injail;
 
 		match = in_pcblookup_wild_match(inp, laddr, lport);
 		if (match == INPLOOKUP_MATCH_NONE)
 			continue;
 
 		injail = prison_flag(inp->inp_cred, PR_IP4) != 0;
 		if (injail) {
 			if (prison_check_ip4_locked(inp->inp_cred->cr_prison,
 			    &laddr) != 0)
 				continue;
 		} else {
 			if (local_exact != NULL)
 				continue;
 		}
 
 		if (match == INPLOOKUP_MATCH_LADDR) {
 			if (injail)
 				return (inp);
 			local_exact = inp;
 		} else {
 #ifdef INET6
 			/* XXX inp locking, NULL check */
 			if (inp->inp_vflag & INP_IPV6PROTO)
 				local_wild_mapped = inp;
 			else
 #endif
 				if (injail)
 					jail_wild = inp;
 				else
 					local_wild = inp;
 		}
 	}
 	if (jail_wild != NULL)
 		return (jail_wild);
 	if (local_exact != NULL)
 		return (local_exact);
 	if (local_wild != NULL)
 		return (local_wild);
 #ifdef INET6
 	if (local_wild_mapped != NULL)
 		return (local_wild_mapped);
 #endif
 	return (NULL);
 }
 
 /*
  * Lookup PCB in hash list, using pcbinfo tables.  This variation assumes
  * that the caller has either locked the hash list, which usually happens
  * for bind(2) operations, or is in SMR section, which happens when sorting
  * out incoming packets.
  */
 static struct inpcb *
 in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags,
     uint8_t numa_domain)
 {
 	struct inpcb *inp;
 	const u_short fport = fport_arg, lport = lport_arg;
 
 	KASSERT((lookupflags & ~INPLOOKUP_WILDCARD) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT(faddr.s_addr != INADDR_ANY,
 	    ("%s: invalid foreign address", __func__));
 	KASSERT(laddr.s_addr != INADDR_ANY,
 	    ("%s: invalid local address", __func__));
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
 	inp = in_pcblookup_hash_exact(pcbinfo, faddr, fport, laddr, lport);
 	if (inp != NULL)
 		return (inp);
 
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		inp = in_pcblookup_lbgroup(pcbinfo, &faddr, fport,
 		    &laddr, lport, numa_domain);
 		if (inp == NULL) {
 			inp = in_pcblookup_hash_wild_locked(pcbinfo, faddr,
 			    fport, laddr, lport);
 		}
 	}
 
 	return (inp);
 }
 
 static struct inpcb *
 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
     uint8_t numa_domain)
 {
 	struct inpcb *inp;
 	const inp_lookup_t lockflags = lookupflags & INPLOOKUP_LOCKMASK;
 
 	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
 	    ("%s: LOCKPCB not set", __func__));
 
 	INP_HASH_WLOCK(pcbinfo);
 	inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags & ~INPLOOKUP_LOCKMASK, numa_domain);
 	if (inp != NULL && !inp_trylock(inp, lockflags)) {
 		in_pcbref(inp);
 		INP_HASH_WUNLOCK(pcbinfo);
 		inp_lock(inp, lockflags);
 		if (in_pcbrele(inp, lockflags))
 			/* XXX-MJ or retry until we get a negative match? */
 			inp = NULL;
 	} else {
 		INP_HASH_WUNLOCK(pcbinfo);
 	}
 	return (inp);
 }
 
 static struct inpcb *
 in_pcblookup_hash_smr(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags,
     uint8_t numa_domain)
 {
 	struct inpcb *inp;
 	const inp_lookup_t lockflags = lookupflags & INPLOOKUP_LOCKMASK;
 	const u_short fport = fport_arg, lport = lport_arg;
 
 	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
 	    ("%s: LOCKPCB not set", __func__));
 
 	smr_enter(pcbinfo->ipi_smr);
 	inp = in_pcblookup_hash_exact(pcbinfo, faddr, fport, laddr, lport);
 	if (inp != NULL) {
 		if (__predict_true(inp_smr_lock(inp, lockflags))) {
 			/*
 			 * Revalidate the 4-tuple, the socket could have been
 			 * disconnected.
 			 */
 			if (__predict_true(in_pcblookup_exact_match(inp,
 			    faddr, fport, laddr, lport)))
 				return (inp);
 			inp_unlock(inp, lockflags);
 		}
 
 		/*
 		 * We failed to lock the inpcb, or its connection state changed
 		 * out from under us.  Fall back to a precise search.
 		 */
 		return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
 		    lookupflags, numa_domain));
 	}
 
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		inp = in_pcblookup_lbgroup(pcbinfo, &faddr, fport,
 		    &laddr, lport, numa_domain);
 		if (inp != NULL) {
 			if (__predict_true(inp_smr_lock(inp, lockflags))) {
 				if (__predict_true(in_pcblookup_wild_match(inp,
 				    laddr, lport) != INPLOOKUP_MATCH_NONE))
 					return (inp);
 				inp_unlock(inp, lockflags);
 			}
 			inp = INP_LOOKUP_AGAIN;
 		} else {
 			inp = in_pcblookup_hash_wild_smr(pcbinfo, faddr, fport,
 			    laddr, lport, lockflags);
 		}
 		if (inp == INP_LOOKUP_AGAIN) {
 			return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr,
 			    lport, lookupflags, numa_domain));
 		}
 	}
 
 	if (inp == NULL)
 		smr_exit(pcbinfo->ipi_smr);
 
 	return (inp);
 }
 
 /*
  * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
  * from which a pre-calculated hash value may be extracted.
  */
 struct inpcb *
 in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
     struct in_addr laddr, u_int lport, int lookupflags,
     struct ifnet *ifp __unused)
 {
 	return (in_pcblookup_hash_smr(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags, M_NODOM));
 }
 
 struct inpcb *
 in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
     struct ifnet *ifp __unused, struct mbuf *m)
 {
 	return (in_pcblookup_hash_smr(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags, m->m_pkthdr.numa_domain));
 }
 #endif /* INET */
 
 static bool
 in_pcbjailed(const struct inpcb *inp, unsigned int flag)
 {
 	return (prison_flag(inp->inp_cred, flag) != 0);
 }
 
 /*
  * Insert the PCB into a hash chain using ordering rules which ensure that
  * in_pcblookup_hash_wild_*() always encounter the highest-ranking PCB first.
  *
  * Specifically, keep jailed PCBs in front of non-jailed PCBs, and keep PCBs
  * with exact local addresses ahead of wildcard PCBs.  Unbound v4-mapped v6 PCBs
  * always appear last no matter whether they are jailed.
  */
 static void
 _in_pcbinshash_wild(struct inpcbhead *pcbhash, struct inpcb *inp)
 {
 	struct inpcb *last;
 	bool bound, injail;
 
 	INP_LOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	last = NULL;
 	bound = inp->inp_laddr.s_addr != INADDR_ANY;
 	if (!bound && (inp->inp_vflag & INP_IPV6PROTO) != 0) {
 		CK_LIST_FOREACH(last, pcbhash, inp_hash_wild) {
 			if (CK_LIST_NEXT(last, inp_hash_wild) == NULL) {
 				CK_LIST_INSERT_AFTER(last, inp, inp_hash_wild);
 				return;
 			}
 		}
 		CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_wild);
 		return;
 	}
 
 	injail = in_pcbjailed(inp, PR_IP4);
 	if (!injail) {
 		CK_LIST_FOREACH(last, pcbhash, inp_hash_wild) {
 			if (!in_pcbjailed(last, PR_IP4))
 				break;
 			if (CK_LIST_NEXT(last, inp_hash_wild) == NULL) {
 				CK_LIST_INSERT_AFTER(last, inp, inp_hash_wild);
 				return;
 			}
 		}
 	} else if (!CK_LIST_EMPTY(pcbhash) &&
 	    !in_pcbjailed(CK_LIST_FIRST(pcbhash), PR_IP4)) {
 		CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_wild);
 		return;
 	}
 	if (!bound) {
 		CK_LIST_FOREACH_FROM(last, pcbhash, inp_hash_wild) {
 			if (last->inp_laddr.s_addr == INADDR_ANY)
 				break;
 			if (CK_LIST_NEXT(last, inp_hash_wild) == NULL) {
 				CK_LIST_INSERT_AFTER(last, inp, inp_hash_wild);
 				return;
 			}
 		}
 	}
 	if (last == NULL)
 		CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_wild);
 	else
 		CK_LIST_INSERT_BEFORE(last, inp, inp_hash_wild);
 }
 
 #ifdef INET6
 /*
  * See the comment above _in_pcbinshash_wild().
  */
 static void
 _in6_pcbinshash_wild(struct inpcbhead *pcbhash, struct inpcb *inp)
 {
 	struct inpcb *last;
 	bool bound, injail;
 
 	INP_LOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	last = NULL;
 	bound = !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr);
 	injail = in_pcbjailed(inp, PR_IP6);
 	if (!injail) {
 		CK_LIST_FOREACH(last, pcbhash, inp_hash_wild) {
 			if (!in_pcbjailed(last, PR_IP6))
 				break;
 			if (CK_LIST_NEXT(last, inp_hash_wild) == NULL) {
 				CK_LIST_INSERT_AFTER(last, inp, inp_hash_wild);
 				return;
 			}
 		}
 	} else if (!CK_LIST_EMPTY(pcbhash) &&
 	    !in_pcbjailed(CK_LIST_FIRST(pcbhash), PR_IP6)) {
 		CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_wild);
 		return;
 	}
 	if (!bound) {
 		CK_LIST_FOREACH_FROM(last, pcbhash, inp_hash_wild) {
 			if (IN6_IS_ADDR_UNSPECIFIED(&last->in6p_laddr))
 				break;
 			if (CK_LIST_NEXT(last, inp_hash_wild) == NULL) {
 				CK_LIST_INSERT_AFTER(last, inp, inp_hash_wild);
 				return;
 			}
 		}
 	}
 	if (last == NULL)
 		CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_wild);
 	else
 		CK_LIST_INSERT_BEFORE(last, inp, inp_hash_wild);
 }
 #endif
 
 /*
  * Insert PCB onto various hash lists.
  */
 int
 in_pcbinshash(struct inpcb *inp)
 {
 	struct inpcbhead *pcbhash;
 	struct inpcbporthead *pcbporthash;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbport *phd;
 	uint32_t hash;
 	bool connected;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 	KASSERT((inp->inp_flags & INP_INHASHLIST) == 0,
 	    ("in_pcbinshash: INP_INHASHLIST"));
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6) {
 		hash = INP6_PCBHASH(&inp->in6p_faddr, inp->inp_lport,
 		    inp->inp_fport, pcbinfo->ipi_hashmask);
 		connected = !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr);
 	} else
 #endif
 	{
 		hash = INP_PCBHASH(&inp->inp_faddr, inp->inp_lport,
 		    inp->inp_fport, pcbinfo->ipi_hashmask);
 		connected = !in_nullhost(inp->inp_faddr);
 	}
 
 	if (connected)
 		pcbhash = &pcbinfo->ipi_hash_exact[hash];
 	else
 		pcbhash = &pcbinfo->ipi_hash_wild[hash];
 
 	pcbporthash = &pcbinfo->ipi_porthashbase[
 	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
 
 	/*
 	 * Add entry to load balance group.
 	 * Only do this if SO_REUSEPORT_LB is set.
 	 */
 	if ((inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) {
 		int error = in_pcbinslbgrouphash(inp, M_NODOM);
 		if (error != 0)
 			return (error);
 	}
 
 	/*
 	 * Go through port list and look for a head for this lport.
 	 */
 	CK_LIST_FOREACH(phd, pcbporthash, phd_hash) {
 		if (phd->phd_port == inp->inp_lport)
 			break;
 	}
 
 	/*
 	 * If none exists, malloc one and tack it on.
 	 */
 	if (phd == NULL) {
 		phd = uma_zalloc_smr(pcbinfo->ipi_portzone, M_NOWAIT);
 		if (phd == NULL) {
 			if ((inp->inp_flags & INP_INLBGROUP) != 0)
 				in_pcbremlbgrouphash(inp);
 			return (ENOMEM);
 		}
 		phd->phd_port = inp->inp_lport;
 		CK_LIST_INIT(&phd->phd_pcblist);
 		CK_LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
 	}
 	inp->inp_phd = phd;
 	CK_LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
 
 	/*
 	 * The PCB may have been disconnected in the past.  Before we can safely
 	 * make it visible in the hash table, we must wait for all readers which
 	 * may be traversing this PCB to finish.
 	 */
 	if (inp->inp_smr != SMR_SEQ_INVALID) {
 		smr_wait(pcbinfo->ipi_smr, inp->inp_smr);
 		inp->inp_smr = SMR_SEQ_INVALID;
 	}
 
 	if (connected)
 		CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_exact);
 	else {
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV6) != 0)
 			_in6_pcbinshash_wild(pcbhash, inp);
 		else
 #endif
 			_in_pcbinshash_wild(pcbhash, inp);
 	}
 	inp->inp_flags |= INP_INHASHLIST;
 
 	return (0);
 }
 
 void
 in_pcbremhash_locked(struct inpcb *inp)
 {
 	struct inpcbport *phd = inp->inp_phd;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 	MPASS(inp->inp_flags & INP_INHASHLIST);
 
 	if ((inp->inp_flags & INP_INLBGROUP) != 0)
 		in_pcbremlbgrouphash(inp);
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6) {
 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
 			CK_LIST_REMOVE(inp, inp_hash_wild);
 		else
 			CK_LIST_REMOVE(inp, inp_hash_exact);
 	} else
 #endif
 	{
 		if (in_nullhost(inp->inp_faddr))
 			CK_LIST_REMOVE(inp, inp_hash_wild);
 		else
 			CK_LIST_REMOVE(inp, inp_hash_exact);
 	}
 	CK_LIST_REMOVE(inp, inp_portlist);
 	if (CK_LIST_FIRST(&phd->phd_pcblist) == NULL) {
 		CK_LIST_REMOVE(phd, phd_hash);
 		uma_zfree_smr(inp->inp_pcbinfo->ipi_portzone, phd);
 	}
 	inp->inp_flags &= ~INP_INHASHLIST;
 }
 
 static void
 in_pcbremhash(struct inpcb *inp)
 {
 	INP_HASH_WLOCK(inp->inp_pcbinfo);
 	in_pcbremhash_locked(inp);
 	INP_HASH_WUNLOCK(inp->inp_pcbinfo);
 }
 
 /*
  * Move PCB to the proper hash bucket when { faddr, fport } have  been
  * changed. NOTE: This does not handle the case of the lport changing (the
  * hashed port list would have to be updated as well), so the lport must
  * not change after in_pcbinshash() has been called.
  */
 void
 in_pcbrehash(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbhead *head;
 	uint32_t hash;
 	bool connected;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 	KASSERT(inp->inp_flags & INP_INHASHLIST,
 	    ("%s: !INP_INHASHLIST", __func__));
 	KASSERT(inp->inp_smr == SMR_SEQ_INVALID,
 	    ("%s: inp was disconnected", __func__));
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6) {
 		hash = INP6_PCBHASH(&inp->in6p_faddr, inp->inp_lport,
 		    inp->inp_fport, pcbinfo->ipi_hashmask);
 		connected = !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr);
 	} else
 #endif
 	{
 		hash = INP_PCBHASH(&inp->inp_faddr, inp->inp_lport,
 		    inp->inp_fport, pcbinfo->ipi_hashmask);
 		connected = !in_nullhost(inp->inp_faddr);
 	}
 
 	/*
 	 * When rehashing, the caller must ensure that either the new or the old
 	 * foreign address was unspecified.
 	 */
 	if (connected)
 		CK_LIST_REMOVE(inp, inp_hash_wild);
 	else
 		CK_LIST_REMOVE(inp, inp_hash_exact);
 
 	if (connected) {
 		head = &pcbinfo->ipi_hash_exact[hash];
 		CK_LIST_INSERT_HEAD(head, inp, inp_hash_exact);
 	} else {
 		head = &pcbinfo->ipi_hash_wild[hash];
 		CK_LIST_INSERT_HEAD(head, inp, inp_hash_wild);
 	}
 }
 
 /*
  * Check for alternatives when higher level complains
  * about service problems.  For now, invalidate cached
  * routing information.  If the route was created dynamically
  * (by a redirect), time to try a default gateway again.
  */
 void
 in_losing(struct inpcb *inp)
 {
 
 	RO_INVALIDATE_CACHE(&inp->inp_route);
 	return;
 }
 
 /*
  * A set label operation has occurred at the socket layer, propagate the
  * label change into the in_pcb for the socket.
  */
 void
 in_pcbsosetlabel(struct socket *so)
 {
 #ifdef MAC
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
 
 	INP_WLOCK(inp);
 	SOCK_LOCK(so);
 	mac_inpcb_sosetlabel(so, inp);
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 #endif
 }
 
 void
 inp_wlock(struct inpcb *inp)
 {
 
 	INP_WLOCK(inp);
 }
 
 void
 inp_wunlock(struct inpcb *inp)
 {
 
 	INP_WUNLOCK(inp);
 }
 
 void
 inp_rlock(struct inpcb *inp)
 {
 
 	INP_RLOCK(inp);
 }
 
 void
 inp_runlock(struct inpcb *inp)
 {
 
 	INP_RUNLOCK(inp);
 }
 
 #ifdef INVARIANT_SUPPORT
 void
 inp_lock_assert(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 }
 
 void
 inp_unlock_assert(struct inpcb *inp)
 {
 
 	INP_UNLOCK_ASSERT(inp);
 }
 #endif
 
 void
 inp_apply_all(struct inpcbinfo *pcbinfo,
     void (*func)(struct inpcb *, void *), void *arg)
 {
 	struct inpcb_iterator inpi = INP_ALL_ITERATOR(pcbinfo,
 	    INPLOOKUP_WLOCKPCB);
 	struct inpcb *inp;
 
 	while ((inp = inp_next(&inpi)) != NULL)
 		func(inp, arg);
 }
 
 struct socket *
 inp_inpcbtosocket(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 	return (inp->inp_socket);
 }
 
 struct tcpcb *
 inp_inpcbtotcpcb(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 	return ((struct tcpcb *)inp->inp_ppcb);
 }
 
 int
 inp_ip_tos_get(const struct inpcb *inp)
 {
 
 	return (inp->inp_ip_tos);
 }
 
 void
 inp_ip_tos_set(struct inpcb *inp, int val)
 {
 
 	inp->inp_ip_tos = val;
 }
 
 void
 inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
     uint32_t *faddr, uint16_t *fp)
 {
 
 	INP_LOCK_ASSERT(inp);
 	*laddr = inp->inp_laddr.s_addr;
 	*faddr = inp->inp_faddr.s_addr;
 	*lp = inp->inp_lport;
 	*fp = inp->inp_fport;
 }
 
 struct inpcb *
 so_sotoinpcb(struct socket *so)
 {
 
 	return (sotoinpcb(so));
 }
 
 /*
  * Create an external-format (``xinpcb'') structure using the information in
  * the kernel-format in_pcb structure pointed to by inp.  This is done to
  * reduce the spew of irrelevant information over this interface, to isolate
  * user code from changes in the kernel structure, and potentially to provide
  * information-hiding if we decide that some of this information should be
  * hidden from users.
  */
 void
 in_pcbtoxinpcb(const struct inpcb *inp, struct xinpcb *xi)
 {
 
 	bzero(xi, sizeof(*xi));
 	xi->xi_len = sizeof(struct xinpcb);
 	if (inp->inp_socket)
 		sotoxsocket(inp->inp_socket, &xi->xi_socket);
 	bcopy(&inp->inp_inc, &xi->inp_inc, sizeof(struct in_conninfo));
 	xi->inp_gencnt = inp->inp_gencnt;
 	xi->inp_ppcb = (uintptr_t)inp->inp_ppcb;
 	xi->inp_flow = inp->inp_flow;
 	xi->inp_flowid = inp->inp_flowid;
 	xi->inp_flowtype = inp->inp_flowtype;
 	xi->inp_flags = inp->inp_flags;
 	xi->inp_flags2 = inp->inp_flags2;
 	xi->in6p_cksum = inp->in6p_cksum;
 	xi->in6p_hops = inp->in6p_hops;
 	xi->inp_ip_tos = inp->inp_ip_tos;
 	xi->inp_vflag = inp->inp_vflag;
 	xi->inp_ip_ttl = inp->inp_ip_ttl;
 	xi->inp_ip_p = inp->inp_ip_p;
 	xi->inp_ip_minttl = inp->inp_ip_minttl;
 }
 
 int
 sysctl_setsockopt(SYSCTL_HANDLER_ARGS, struct inpcbinfo *pcbinfo,
     int (*ctloutput_set)(struct inpcb *, struct sockopt *))
 {
 	struct sockopt sopt;
 	struct inpcb_iterator inpi = INP_ALL_ITERATOR(pcbinfo,
 	    INPLOOKUP_WLOCKPCB);
 	struct inpcb *inp;
 	struct sockopt_parameters *params;
 	struct socket *so;
 	int error;
 	char buf[1024];
 
 	if (req->oldptr != NULL || req->oldlen != 0)
 		return (EINVAL);
 	if (req->newptr == NULL)
 		return (EPERM);
 	if (req->newlen > sizeof(buf))
 		return (ENOMEM);
 	error = SYSCTL_IN(req, buf, req->newlen);
 	if (error != 0)
 		return (error);
 	if (req->newlen < sizeof(struct sockopt_parameters))
 		return (EINVAL);
 	params = (struct sockopt_parameters *)buf;
 	sopt.sopt_level = params->sop_level;
 	sopt.sopt_name = params->sop_optname;
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_val = params->sop_optval;
 	sopt.sopt_valsize = req->newlen - sizeof(struct sockopt_parameters);
 	sopt.sopt_td = NULL;
 #ifdef INET6
 	if (params->sop_inc.inc_flags & INC_ISIPV6) {
 		if (IN6_IS_SCOPE_LINKLOCAL(&params->sop_inc.inc6_laddr))
 			params->sop_inc.inc6_laddr.s6_addr16[1] =
 			    htons(params->sop_inc.inc6_zoneid & 0xffff);
 		if (IN6_IS_SCOPE_LINKLOCAL(&params->sop_inc.inc6_faddr))
 			params->sop_inc.inc6_faddr.s6_addr16[1] =
 			    htons(params->sop_inc.inc6_zoneid & 0xffff);
 	}
 #endif
 	if (params->sop_inc.inc_lport != htons(0)) {
 		if (params->sop_inc.inc_fport == htons(0))
 			inpi.hash = INP_PCBHASH_WILD(params->sop_inc.inc_lport,
 			    pcbinfo->ipi_hashmask);
 		else
 #ifdef INET6
 			if (params->sop_inc.inc_flags & INC_ISIPV6)
 				inpi.hash = INP6_PCBHASH(
 				    &params->sop_inc.inc6_faddr,
 				    params->sop_inc.inc_lport,
 				    params->sop_inc.inc_fport,
 				    pcbinfo->ipi_hashmask);
 			else
 #endif
 				inpi.hash = INP_PCBHASH(
 				    &params->sop_inc.inc_faddr,
 				    params->sop_inc.inc_lport,
 				    params->sop_inc.inc_fport,
 				    pcbinfo->ipi_hashmask);
 	}
 	while ((inp = inp_next(&inpi)) != NULL)
 		if (inp->inp_gencnt == params->sop_id) {
 			if (inp->inp_flags & INP_DROPPED) {
 				INP_WUNLOCK(inp);
 				return (ECONNRESET);
 			}
 			so = inp->inp_socket;
 			KASSERT(so != NULL, ("inp_socket == NULL"));
 			soref(so);
 			error = (*ctloutput_set)(inp, &sopt);
 			sorele(so);
 			break;
 		}
 	if (inp == NULL)
 		error = ESRCH;
 	return (error);
 }
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
 {
 	char faddr_str[48], laddr_str[48];
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", name, inc);
 
 	indent += 2;
 
 #ifdef INET6
 	if (inc->inc_flags & INC_ISIPV6) {
 		/* IPv6. */
 		ip6_sprintf(laddr_str, &inc->inc6_laddr);
 		ip6_sprintf(faddr_str, &inc->inc6_faddr);
 	} else
 #endif
 	{
 		/* IPv4. */
 		inet_ntoa_r(inc->inc_laddr, laddr_str);
 		inet_ntoa_r(inc->inc_faddr, faddr_str);
 	}
 	db_print_indent(indent);
 	db_printf("inc_laddr %s   inc_lport %u\n", laddr_str,
 	    ntohs(inc->inc_lport));
 	db_print_indent(indent);
 	db_printf("inc_faddr %s   inc_fport %u\n", faddr_str,
 	    ntohs(inc->inc_fport));
 }
 
 static void
 db_print_inpflags(int inp_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (inp_flags & INP_RECVOPTS) {
 		db_printf("%sINP_RECVOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVRETOPTS) {
 		db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVDSTADDR) {
 		db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_ORIGDSTADDR) {
 		db_printf("%sINP_ORIGDSTADDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_HDRINCL) {
 		db_printf("%sINP_HDRINCL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_HIGHPORT) {
 		db_printf("%sINP_HIGHPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_LOWPORT) {
 		db_printf("%sINP_LOWPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_ANONPORT) {
 		db_printf("%sINP_ANONPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVIF) {
 		db_printf("%sINP_RECVIF", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_MTUDISC) {
 		db_printf("%sINP_MTUDISC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVTTL) {
 		db_printf("%sINP_RECVTTL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_DONTFRAG) {
 		db_printf("%sINP_DONTFRAG", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVTOS) {
 		db_printf("%sINP_RECVTOS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_IPV6_V6ONLY) {
 		db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_PKTINFO) {
 		db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_HOPLIMIT) {
 		db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_HOPOPTS) {
 		db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_DSTOPTS) {
 		db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_RTHDR) {
 		db_printf("%sIN6P_RTHDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_RTHDRDSTOPTS) {
 		db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_TCLASS) {
 		db_printf("%sIN6P_TCLASS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_AUTOFLOWLABEL) {
 		db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_ONESBCAST) {
 		db_printf("%sINP_ONESBCAST", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_flags & INP_DROPPED) {
 		db_printf("%sINP_DROPPED", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_flags & INP_SOCKREF) {
 		db_printf("%sINP_SOCKREF", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_flags & IN6P_RFC2292) {
 		db_printf("%sIN6P_RFC2292", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_MTU) {
 		db_printf("IN6P_MTU%s", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_inpvflag(u_char inp_vflag)
 {
 	int comma;
 
 	comma = 0;
 	if (inp_vflag & INP_IPV4) {
 		db_printf("%sINP_IPV4", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_vflag & INP_IPV6) {
 		db_printf("%sINP_IPV6", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_vflag & INP_IPV6PROTO) {
 		db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
 		comma  = 1;
 	}
 }
 
 static void
 db_print_inpcb(struct inpcb *inp, const char *name, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", name, inp);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("inp_flow: 0x%x\n", inp->inp_flow);
 
 	db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
 
 	db_print_indent(indent);
 	db_printf("inp_ppcb: %p   inp_pcbinfo: %p   inp_socket: %p\n",
 	    inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);
 
 	db_print_indent(indent);
 	db_printf("inp_label: %p   inp_flags: 0x%x (",
 	   inp->inp_label, inp->inp_flags);
 	db_print_inpflags(inp->inp_flags);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("inp_sp: %p   inp_vflag: 0x%x (", inp->inp_sp,
 	    inp->inp_vflag);
 	db_print_inpvflag(inp->inp_vflag);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("inp_ip_ttl: %d   inp_ip_p: %d   inp_ip_minttl: %d\n",
 	    inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
 
 	db_print_indent(indent);
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6) {
 		db_printf("in6p_options: %p   in6p_outputopts: %p   "
 		    "in6p_moptions: %p\n", inp->in6p_options,
 		    inp->in6p_outputopts, inp->in6p_moptions);
 		db_printf("in6p_icmp6filt: %p   in6p_cksum %d   "
 		    "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
 		    inp->in6p_hops);
 	} else
 #endif
 	{
 		db_printf("inp_ip_tos: %d   inp_ip_options: %p   "
 		    "inp_ip_moptions: %p\n", inp->inp_ip_tos,
 		    inp->inp_options, inp->inp_moptions);
 	}
 
 	db_print_indent(indent);
 	db_printf("inp_phd: %p   inp_gencnt: %ju\n", inp->inp_phd,
 	    (uintmax_t)inp->inp_gencnt);
 }
 
 DB_SHOW_COMMAND(inpcb, db_show_inpcb)
 {
 	struct inpcb *inp;
 
 	if (!have_addr) {
 		db_printf("usage: show inpcb <addr>\n");
 		return;
 	}
 	inp = (struct inpcb *)addr;
 
 	db_print_inpcb(inp, "inpcb", 0);
 }
 #endif /* DDB */
 
 #ifdef RATELIMIT
 /*
  * Modify TX rate limit based on the existing "inp->inp_snd_tag",
  * if any.
  */
 int
 in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate)
 {
 	union if_snd_tag_modify_params params = {
 		.rate_limit.max_rate = max_pacing_rate,
 		.rate_limit.flags = M_NOWAIT,
 	};
 	struct m_snd_tag *mst;
 	int error;
 
 	mst = inp->inp_snd_tag;
 	if (mst == NULL)
 		return (EINVAL);
 
 	if (mst->sw->snd_tag_modify == NULL) {
 		error = EOPNOTSUPP;
 	} else {
 		error = mst->sw->snd_tag_modify(mst, &params);
 	}
 	return (error);
 }
 
 /*
  * Query existing TX rate limit based on the existing
  * "inp->inp_snd_tag", if any.
  */
 int
 in_pcbquery_txrtlmt(struct inpcb *inp, uint32_t *p_max_pacing_rate)
 {
 	union if_snd_tag_query_params params = { };
 	struct m_snd_tag *mst;
 	int error;
 
 	mst = inp->inp_snd_tag;
 	if (mst == NULL)
 		return (EINVAL);
 
 	if (mst->sw->snd_tag_query == NULL) {
 		error = EOPNOTSUPP;
 	} else {
 		error = mst->sw->snd_tag_query(mst, &params);
 		if (error == 0 && p_max_pacing_rate != NULL)
 			*p_max_pacing_rate = params.rate_limit.max_rate;
 	}
 	return (error);
 }
 
 /*
  * Query existing TX queue level based on the existing
  * "inp->inp_snd_tag", if any.
  */
 int
 in_pcbquery_txrlevel(struct inpcb *inp, uint32_t *p_txqueue_level)
 {
 	union if_snd_tag_query_params params = { };
 	struct m_snd_tag *mst;
 	int error;
 
 	mst = inp->inp_snd_tag;
 	if (mst == NULL)
 		return (EINVAL);
 
 	if (mst->sw->snd_tag_query == NULL)
 		return (EOPNOTSUPP);
 
 	error = mst->sw->snd_tag_query(mst, &params);
 	if (error == 0 && p_txqueue_level != NULL)
 		*p_txqueue_level = params.rate_limit.queue_level;
 	return (error);
 }
 
 /*
  * Allocate a new TX rate limit send tag from the network interface
  * given by the "ifp" argument and save it in "inp->inp_snd_tag":
  */
 int
 in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
     uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate, struct m_snd_tag **st)
 
 {
 	union if_snd_tag_alloc_params params = {
 		.rate_limit.hdr.type = (max_pacing_rate == -1U) ?
 		    IF_SND_TAG_TYPE_UNLIMITED : IF_SND_TAG_TYPE_RATE_LIMIT,
 		.rate_limit.hdr.flowid = flowid,
 		.rate_limit.hdr.flowtype = flowtype,
 		.rate_limit.hdr.numa_domain = inp->inp_numa_domain,
 		.rate_limit.max_rate = max_pacing_rate,
 		.rate_limit.flags = M_NOWAIT,
 	};
 	int error;
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * If there is already a send tag, or the INP is being torn
 	 * down, allocating a new send tag is not allowed. Else send
 	 * tags may leak.
 	 */
 	if (*st != NULL || (inp->inp_flags & INP_DROPPED) != 0)
 		return (EINVAL);
 
 	error = m_snd_tag_alloc(ifp, &params, st);
 #ifdef INET
 	if (error == 0) {
 		counter_u64_add(rate_limit_set_ok, 1);
 		counter_u64_add(rate_limit_active, 1);
 	} else if (error != EOPNOTSUPP)
 		  counter_u64_add(rate_limit_alloc_fail, 1);
 #endif
 	return (error);
 }
 
 void
 in_pcbdetach_tag(struct m_snd_tag *mst)
 {
 
 	m_snd_tag_rele(mst);
 #ifdef INET
 	counter_u64_add(rate_limit_active, -1);
 #endif
 }
 
 /*
  * Free an existing TX rate limit tag based on the "inp->inp_snd_tag",
  * if any:
  */
 void
 in_pcbdetach_txrtlmt(struct inpcb *inp)
 {
 	struct m_snd_tag *mst;
 
 	INP_WLOCK_ASSERT(inp);
 
 	mst = inp->inp_snd_tag;
 	inp->inp_snd_tag = NULL;
 
 	if (mst == NULL)
 		return;
 
 	m_snd_tag_rele(mst);
 #ifdef INET
 	counter_u64_add(rate_limit_active, -1);
 #endif
 }
 
 int
 in_pcboutput_txrtlmt_locked(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb, uint32_t max_pacing_rate)
 {
 	int error;
 
 	/*
 	 * If the existing send tag is for the wrong interface due to
 	 * a route change, first drop the existing tag.  Set the
 	 * CHANGED flag so that we will keep trying to allocate a new
 	 * tag if we fail to allocate one this time.
 	 */
 	if (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp) {
 		in_pcbdetach_txrtlmt(inp);
 		inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
 	}
 
 	/*
 	 * NOTE: When attaching to a network interface a reference is
 	 * made to ensure the network interface doesn't go away until
 	 * all ratelimit connections are gone. The network interface
 	 * pointers compared below represent valid network interfaces,
 	 * except when comparing towards NULL.
 	 */
 	if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) {
 		error = 0;
 	} else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) {
 		if (inp->inp_snd_tag != NULL)
 			in_pcbdetach_txrtlmt(inp);
 		error = 0;
 	} else if (inp->inp_snd_tag == NULL) {
 		/*
 		 * In order to utilize packet pacing with RSS, we need
 		 * to wait until there is a valid RSS hash before we
 		 * can proceed:
 		 */
 		if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) {
 			error = EAGAIN;
 		} else {
 			error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb),
 			    mb->m_pkthdr.flowid, max_pacing_rate, &inp->inp_snd_tag);
 		}
 	} else {
 		error = in_pcbmodify_txrtlmt(inp, max_pacing_rate);
 	}
 	if (error == 0 || error == EOPNOTSUPP)
 		inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
 
 	return (error);
 }
 
 /*
  * This function should be called when the INP_RATE_LIMIT_CHANGED flag
  * is set in the fast path and will attach/detach/modify the TX rate
  * limit send tag based on the socket's so_max_pacing_rate value.
  */
 void
 in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb)
 {
 	struct socket *socket;
 	uint32_t max_pacing_rate;
 	bool did_upgrade;
 
 	if (inp == NULL)
 		return;
 
 	socket = inp->inp_socket;
 	if (socket == NULL)
 		return;
 
 	if (!INP_WLOCKED(inp)) {
 		/*
 		 * NOTE: If the write locking fails, we need to bail
 		 * out and use the non-ratelimited ring for the
 		 * transmit until there is a new chance to get the
 		 * write lock.
 		 */
 		if (!INP_TRY_UPGRADE(inp))
 			return;
 		did_upgrade = 1;
 	} else {
 		did_upgrade = 0;
 	}
 
 	/*
 	 * NOTE: The so_max_pacing_rate value is read unlocked,
 	 * because atomic updates are not required since the variable
 	 * is checked at every mbuf we send. It is assumed that the
 	 * variable read itself will be atomic.
 	 */
 	max_pacing_rate = socket->so_max_pacing_rate;
 
 	in_pcboutput_txrtlmt_locked(inp, ifp, mb, max_pacing_rate);
 
 	if (did_upgrade)
 		INP_DOWNGRADE(inp);
 }
 
 /*
  * Track route changes for TX rate limiting.
  */
 void
 in_pcboutput_eagain(struct inpcb *inp)
 {
 	bool did_upgrade;
 
 	if (inp == NULL)
 		return;
 
 	if (inp->inp_snd_tag == NULL)
 		return;
 
 	if (!INP_WLOCKED(inp)) {
 		/*
 		 * NOTE: If the write locking fails, we need to bail
 		 * out and use the non-ratelimited ring for the
 		 * transmit until there is a new chance to get the
 		 * write lock.
 		 */
 		if (!INP_TRY_UPGRADE(inp))
 			return;
 		did_upgrade = 1;
 	} else {
 		did_upgrade = 0;
 	}
 
 	/* detach rate limiting */
 	in_pcbdetach_txrtlmt(inp);
 
 	/* make sure new mbuf send tag allocation is made */
 	inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
 
 	if (did_upgrade)
 		INP_DOWNGRADE(inp);
 }
 
 #ifdef INET
 static void
 rl_init(void *st)
 {
 	rate_limit_new = counter_u64_alloc(M_WAITOK);
 	rate_limit_chg = counter_u64_alloc(M_WAITOK);
 	rate_limit_active = counter_u64_alloc(M_WAITOK);
 	rate_limit_alloc_fail = counter_u64_alloc(M_WAITOK);
 	rate_limit_set_ok = counter_u64_alloc(M_WAITOK);
 }
 
 SYSINIT(rl, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, rl_init, NULL);
 #endif
 #endif /* RATELIMIT */
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 19d281937b52..4844bbee3b54 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -1,749 +1,748 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
  */
 
 #ifndef _NETINET_IN_PCB_H_
 #define _NETINET_IN_PCB_H_
 
 #include <sys/queue.h>
 #include <sys/epoch.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_rwlock.h>
 #include <sys/_smr.h>
 #include <net/route.h>
 
 #ifdef _KERNEL
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sysctl.h>
 #include <net/vnet.h>
 #include <vm/uma.h>
 #endif
 #include <sys/ck.h>
 
 /*
  * struct inpcb is the common protocol control block structure used in most
  * IP transport protocols.
  *
  * Pointers to local and foreign host table entries, local and foreign socket
  * numbers, and pointers up (to a socket structure) and down (to a
  * protocol-specific control block) are stored here.
  */
 CK_LIST_HEAD(inpcbhead, inpcb);
 CK_LIST_HEAD(inpcbporthead, inpcbport);
 CK_LIST_HEAD(inpcblbgrouphead, inpcblbgroup);
 typedef	uint64_t	inp_gen_t;
 
 /*
  * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet.
  * So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing
  * the following structure.  This requires padding always be zeroed out,
  * which is done right after inpcb allocation and stays through its lifetime.
  */
 struct in_addr_4in6 {
 	u_int32_t	ia46_pad32[3];
 	struct	in_addr	ia46_addr4;
 };
 
 union in_dependaddr {
 	struct in_addr_4in6 id46_addr;
 	struct in6_addr	id6_addr;
 };
 
 /*
  * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553.  in_conninfo has
  * some extra padding to accomplish this.
  * NOTE 2: tcp_syncache.c uses first 5 32-bit words, which identify fport,
  * lport, faddr to generate hash, so these fields shouldn't be moved.
  */
 struct in_endpoints {
 	u_int16_t	ie_fport;		/* foreign port */
 	u_int16_t	ie_lport;		/* local port */
 	/* protocol dependent part, local and foreign addr */
 	union in_dependaddr ie_dependfaddr;	/* foreign host table entry */
 	union in_dependaddr ie_dependladdr;	/* local host table entry */
 #define	ie_faddr	ie_dependfaddr.id46_addr.ia46_addr4
 #define	ie_laddr	ie_dependladdr.id46_addr.ia46_addr4
 #define	ie6_faddr	ie_dependfaddr.id6_addr
 #define	ie6_laddr	ie_dependladdr.id6_addr
 	u_int32_t	ie6_zoneid;		/* scope zone id */
 };
 
 /*
  * XXX The defines for inc_* are hacks and should be changed to direct
  * references.
  */
 struct in_conninfo {
 	u_int8_t	inc_flags;
 	u_int8_t	inc_len;
 	u_int16_t	inc_fibnum;	/* XXX was pad, 16 bits is plenty */
 	/* protocol dependent part */
 	struct	in_endpoints inc_ie;
 };
 
 /*
  * Flags for inc_flags.
  */
 #define	INC_ISIPV6	0x01
 #define	INC_IPV6MINMTU	0x02
 
 #define	inc_fport	inc_ie.ie_fport
 #define	inc_lport	inc_ie.ie_lport
 #define	inc_faddr	inc_ie.ie_faddr
 #define	inc_laddr	inc_ie.ie_laddr
 #define	inc6_faddr	inc_ie.ie6_faddr
 #define	inc6_laddr	inc_ie.ie6_laddr
 #define	inc6_zoneid	inc_ie.ie6_zoneid
 
 #if defined(_KERNEL) || defined(_WANT_INPCB)
 /*
  * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
  * IPv6 sockets.  In the case of TCP and UDP, further per-connection state is
  * hung off of inp_ppcb most of the time.  Almost all fields of struct inpcb
  * are static after creation or protected by a per-inpcb rwlock, inp_lock.
  *
  * A inpcb database is indexed by addresses/ports hash as well as list of
  * all pcbs that belong to a certain proto. Database lookups or list traversals
  * are be performed inside SMR section. Once desired PCB is found its own
  * lock is to be obtained and SMR section exited.
  *
  * Key:
  * (c) - Constant after initialization
  * (e) - Protected by the SMR section
  * (i) - Protected by the inpcb lock
  * (p) - Protected by the pcbinfo lock for the inpcb
  * (h) - Protected by the pcbhash lock for the inpcb
  * (s) - Protected by another subsystem's locks
  * (x) - Undefined locking
  *
  * A few other notes:
  *
  * When a read lock is held, stability of the field is guaranteed; to write
  * to a field, a write lock must generally be held.
  *
  * netinet/netinet6-layer code should not assume that the inp_socket pointer
  * is safe to dereference without inp_lock being held, there may be
  * close(2)-related races.
  *
  * The inp_vflag field is overloaded, and would otherwise ideally be (c).
  */
 struct icmp6_filter;
 struct inpcbpolicy;
 struct m_snd_tag;
 struct inpcb {
 	/* Cache line #1 (amd64) */
 	CK_LIST_ENTRY(inpcb) inp_hash_exact;	/* hash table linkage */
 	CK_LIST_ENTRY(inpcb) inp_hash_wild;	/* hash table linkage */
 	struct rwlock	inp_lock;
 	/* Cache line #2 (amd64) */
 #define	inp_start_zero	inp_refcount
 #define	inp_zero_size	(sizeof(struct inpcb) - \
 			    offsetof(struct inpcb, inp_start_zero))
 	u_int	inp_refcount;		/* (i) refcount */
 	int	inp_flags;		/* (i) generic IP/datagram flags */
 	int	inp_flags2;		/* (i) generic IP/datagram flags #2*/
 	uint8_t inp_numa_domain;	/* numa domain */
 	void	*inp_ppcb;		/* (i) pointer to per-protocol pcb */
 	struct	socket *inp_socket;	/* (i) back pointer to socket */
 	struct	inpcbinfo *inp_pcbinfo;	/* (c) PCB list info */
 	struct	ucred	*inp_cred;	/* (c) cache of socket cred */
 	u_int32_t inp_flow;		/* (i) IPv6 flow information */
 	u_char	inp_vflag;		/* (i) IP version flag (v4/v6) */
 	u_char	inp_ip_ttl;		/* (i) time to live proto */
 	u_char	inp_ip_p;		/* (c) protocol proto */
 	u_char	inp_ip_minttl;		/* (i) minimum TTL or drop */
 	uint32_t inp_flowid;		/* (x) flow id / queue id */
 	smr_seq_t inp_smr;		/* (i) sequence number at disconnect */
 	struct m_snd_tag *inp_snd_tag;	/* (i) send tag for outgoing mbufs */
 	uint32_t inp_flowtype;		/* (x) M_HASHTYPE value */
 
 	/* Local and foreign ports, local and foreign addr. */
 	struct	in_conninfo inp_inc;	/* (i,h) list for PCB's local port */
 
 	/* MAC and IPSEC policy information. */
 	struct	label *inp_label;	/* (i) MAC label */
 	struct	inpcbpolicy *inp_sp;    /* (s) for IPSEC */
 
 	/* Protocol-dependent part; options. */
 	struct {
 		u_char	inp_ip_tos;		/* (i) type of service proto */
 		struct mbuf		*inp_options;	/* (i) IP options */
 		struct ip_moptions	*inp_moptions;	/* (i) mcast options */
 	};
 	struct {
 		/* (i) IP options */
 		struct mbuf		*in6p_options;
 		/* (i) IP6 options for outgoing packets */
 		struct ip6_pktopts	*in6p_outputopts;
 		/* (i) IP multicast options */
 		struct ip6_moptions	*in6p_moptions;
 		/* (i) ICMPv6 code type filter */
 		struct icmp6_filter	*in6p_icmp6filt;
 		/* (i) IPV6_CHECKSUM setsockopt */
 		int	in6p_cksum;
 		short	in6p_hops;
 	};
 	CK_LIST_ENTRY(inpcb) inp_portlist;	/* (r:e/w:h) port list */
 	struct	inpcbport *inp_phd;	/* (r:e/w:h) head of this list */
 	inp_gen_t	inp_gencnt;	/* (c) generation count */
 	void		*spare_ptr;	/* Spare pointer. */
 	rt_gen_t	inp_rt_cookie;	/* generation for route entry */
 	union {				/* cached L3 information */
 		struct route inp_route;
 		struct route_in6 inp_route6;
 	};
 	CK_LIST_ENTRY(inpcb) inp_list;	/* (r:e/w:p) all PCBs for proto */
 };
 #endif	/* _KERNEL */
 
 #define	inp_fport	inp_inc.inc_fport
 #define	inp_lport	inp_inc.inc_lport
 #define	inp_faddr	inp_inc.inc_faddr
 #define	inp_laddr	inp_inc.inc_laddr
 
 #define	in6p_faddr	inp_inc.inc6_faddr
 #define	in6p_laddr	inp_inc.inc6_laddr
 #define	in6p_zoneid	inp_inc.inc6_zoneid
 
 #define	inp_vnet	inp_pcbinfo->ipi_vnet
 
 /*
  * The range of the generation count, as used in this implementation, is 9e19.
  * We would have to create 300 billion connections per second for this number
  * to roll over in a year.  This seems sufficiently unlikely that we simply
  * don't concern ourselves with that possibility.
  */
 
 /*
  * Interface exported to userland by various protocols which use inpcbs.  Hack
  * alert -- only define if struct xsocket is in scope.
  * Fields prefixed with "xi_" are unique to this structure, and the rest
  * match fields in the struct inpcb, to ease coding and porting.
  *
  * Legend:
  * (s) - used by userland utilities in src
  * (p) - used by utilities in ports
  * (3) - is known to be used by third party software not in ports
  * (n) - no known usage
  */
 #ifdef _SYS_SOCKETVAR_H_
 struct xinpcb {
 	ksize_t		xi_len;			/* length of this structure */
 	struct xsocket	xi_socket;		/* (s,p) */
 	struct in_conninfo inp_inc;		/* (s,p) */
 	uint64_t	inp_gencnt;		/* (s,p) */
 	kvaddr_t	inp_ppcb;		/* (s) netstat(1) */
 	int64_t		inp_spare64[4];
 	uint32_t	inp_flow;		/* (s) */
 	uint32_t	inp_flowid;		/* (s) */
 	uint32_t	inp_flowtype;		/* (s) */
 	int32_t		inp_flags;		/* (s,p) */
 	int32_t		inp_flags2;		/* (s) */
 	uint32_t	inp_unused;
 	int32_t		in6p_cksum;		/* (n) */
 	int32_t		inp_spare32[4];
 	uint16_t	in6p_hops;		/* (n) */
 	uint8_t		inp_ip_tos;		/* (n) */
 	int8_t		pad8;
 	uint8_t		inp_vflag;		/* (s,p) */
 	uint8_t		inp_ip_ttl;		/* (n) */
 	uint8_t		inp_ip_p;		/* (n) */
 	uint8_t		inp_ip_minttl;		/* (n) */
 	int8_t		inp_spare8[4];
 } __aligned(8);
 
 struct xinpgen {
 	ksize_t	xig_len;	/* length of this structure */
 	u_int		xig_count;	/* number of PCBs at this time */
 	uint32_t	_xig_spare32;
 	inp_gen_t	xig_gen;	/* generation count at this time */
 	so_gen_t	xig_sogen;	/* socket generation count this time */
 	uint64_t	_xig_spare64[4];
 } __aligned(8);
 
 struct sockopt_parameters {
 	struct in_conninfo sop_inc;
 	uint64_t sop_id;
 	int sop_level;
 	int sop_optname;
 	char sop_optval[];
 };
 
 #ifdef	_KERNEL
 int	sysctl_setsockopt(SYSCTL_HANDLER_ARGS, struct inpcbinfo *pcbinfo,
 	    int (*ctloutput_set)(struct inpcb *, struct sockopt *));
 void	in_pcbtoxinpcb(const struct inpcb *, struct xinpcb *);
 #endif
 #endif /* _SYS_SOCKETVAR_H_ */
 
 #ifdef _KERNEL
 /*
  * Per-VNET pcb database for each high-level protocol (UDP, TCP, ...) in both
  * IPv4 and IPv6.
  *
  * The pcbs are protected with SMR section and thus all lists in inpcbinfo
  * are CK-lists.  Locking is required to insert a pcb into database. Two
  * locks are provided: one for the hash and one for the global list of pcbs,
  * as well as overall count and generation count.
  *
  * Locking key:
  *
  * (c) Constant or nearly constant after initialisation
  * (e) Protected by SMR section
  * (g) Locked by ipi_lock
  * (h) Locked by ipi_hash_lock
  */
 struct inpcbinfo {
 	/*
 	 * Global lock protecting inpcb list modification
 	 */
 	struct mtx		 ipi_lock;
 	struct inpcbhead	 ipi_listhead;		/* (r:e/w:g) */
 	u_int			 ipi_count;		/* (g) */
 
 	/*
 	 * Generation count -- incremented each time a connection is allocated
 	 * or freed.
 	 */
 	u_quad_t		 ipi_gencnt;		/* (g) */
 
 	/*
 	 * Fields associated with port lookup and allocation.
 	 */
 	u_short			 ipi_lastport;		/* (h) */
 	u_short			 ipi_lastlow;		/* (h) */
 	u_short			 ipi_lasthi;		/* (h) */
 
 	/*
 	 * UMA zone from which inpcbs are allocated for this protocol.
 	 */
 	uma_zone_t		 ipi_zone;		/* (c) */
 	uma_zone_t		 ipi_portzone;		/* (c) */
 	smr_t			 ipi_smr;		/* (c) */
 
 	/*
 	 * Global hash of inpcbs, hashed by local and foreign addresses and
 	 * port numbers.  The "exact" hash holds PCBs connected to a foreign
 	 * address, and "wild" holds the rest.
 	 */
 	struct mtx		 ipi_hash_lock;
 	struct inpcbhead 	*ipi_hash_exact;	/* (r:e/w:h) */
 	struct inpcbhead 	*ipi_hash_wild;		/* (r:e/w:h) */
 	u_long			 ipi_hashmask;		/* (c) */
 
 	/*
 	 * Global hash of inpcbs, hashed by only local port number.
 	 */
 	struct inpcbporthead	*ipi_porthashbase;	/* (h) */
 	u_long			 ipi_porthashmask;	/* (h) */
 
 	/*
 	 * Load balance groups used for the SO_REUSEPORT_LB option,
 	 * hashed by local port.
 	 */
 	struct	inpcblbgrouphead *ipi_lbgrouphashbase;	/* (r:e/w:h) */
 	u_long			 ipi_lbgrouphashmask;	/* (h) */
 
 	/*
 	 * Pointer to network stack instance
 	 */
 	struct vnet		*ipi_vnet;		/* (c) */
 };
 
 /*
  * Global allocation storage for each high-level protocol (UDP, TCP, ...).
  * Each corresponding per-VNET inpcbinfo points into this one.
  */
 struct inpcbstorage {
 	uma_zone_t	ips_zone;
 	uma_zone_t	ips_portzone;
 	uma_init	ips_pcbinit;
 	size_t		ips_size;
 	const char *	ips_zone_name;
 	const char *	ips_portzone_name;
 	const char *	ips_infolock_name;
 	const char *	ips_hashlock_name;
 };
 
 #define INPCBSTORAGE_DEFINE(prot, ppcb, lname, zname, iname, hname)	\
 static int								\
 prot##_inpcb_init(void *mem, int size __unused, int flags __unused)	\
 {									\
 	struct inpcb *inp = mem;					\
 									\
 	rw_init_flags(&inp->inp_lock, lname, RW_RECURSE | RW_DUPOK);	\
 	return (0);							\
 }									\
 static struct inpcbstorage prot = {					\
 	.ips_size = sizeof(struct ppcb),				\
 	.ips_pcbinit = prot##_inpcb_init,				\
 	.ips_zone_name = zname,						\
 	.ips_portzone_name = zname " ports",				\
 	.ips_infolock_name = iname,					\
 	.ips_hashlock_name = hname,					\
 };									\
 SYSINIT(prot##_inpcbstorage_init, SI_SUB_PROTO_DOMAIN,			\
     SI_ORDER_SECOND, in_pcbstorage_init, &prot);			\
 SYSUNINIT(prot##_inpcbstorage_uninit, SI_SUB_PROTO_DOMAIN,		\
     SI_ORDER_SECOND, in_pcbstorage_destroy, &prot)
 
 /*
  * Load balance groups used for the SO_REUSEPORT_LB socket option. Each group
  * (or unique address:port combination) can be re-used at most
  * INPCBLBGROUP_SIZMAX (256) times. The inpcbs are stored in il_inp which
  * is dynamically resized as processes bind/unbind to that specific group.
  */
 struct inpcblbgroup {
 	CK_LIST_ENTRY(inpcblbgroup) il_list;
 	struct epoch_context il_epoch_ctx;
 	struct ucred	*il_cred;
 	uint16_t	il_lport;			/* (c) */
 	u_char		il_vflag;			/* (c) */
 	uint8_t		il_numa_domain;
 	uint32_t	il_pad2;
 	union in_dependaddr il_dependladdr;		/* (c) */
 #define	il_laddr	il_dependladdr.id46_addr.ia46_addr4
 #define	il6_laddr	il_dependladdr.id6_addr
 	uint32_t	il_inpsiz; /* max count in il_inp[] (h) */
 	uint32_t	il_inpcnt; /* cur count in il_inp[] (h) */
 	struct inpcb	*il_inp[];			/* (h) */
 };
 
 #define INP_LOCK_DESTROY(inp)	rw_destroy(&(inp)->inp_lock)
 #define INP_RLOCK(inp)		rw_rlock(&(inp)->inp_lock)
 #define INP_WLOCK(inp)		rw_wlock(&(inp)->inp_lock)
 #define INP_TRY_RLOCK(inp)	rw_try_rlock(&(inp)->inp_lock)
 #define INP_TRY_WLOCK(inp)	rw_try_wlock(&(inp)->inp_lock)
 #define INP_RUNLOCK(inp)	rw_runlock(&(inp)->inp_lock)
 #define INP_WUNLOCK(inp)	rw_wunlock(&(inp)->inp_lock)
 #define INP_UNLOCK(inp)		rw_unlock(&(inp)->inp_lock)
 #define	INP_TRY_UPGRADE(inp)	rw_try_upgrade(&(inp)->inp_lock)
 #define	INP_DOWNGRADE(inp)	rw_downgrade(&(inp)->inp_lock)
 #define	INP_WLOCKED(inp)	rw_wowned(&(inp)->inp_lock)
 #define	INP_LOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_LOCKED)
 #define	INP_RLOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_RLOCKED)
 #define	INP_WLOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_WLOCKED)
 #define	INP_UNLOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_UNLOCKED)
 
 /*
  * These locking functions are for inpcb consumers outside of sys/netinet,
  * more specifically, they were added for the benefit of TOE drivers. The
  * macros are reserved for use by the stack.
  */
 void inp_wlock(struct inpcb *);
 void inp_wunlock(struct inpcb *);
 void inp_rlock(struct inpcb *);
 void inp_runlock(struct inpcb *);
 
 #ifdef INVARIANT_SUPPORT
 void inp_lock_assert(struct inpcb *);
 void inp_unlock_assert(struct inpcb *);
 #else
 #define	inp_lock_assert(inp)	do {} while (0)
 #define	inp_unlock_assert(inp)	do {} while (0)
 #endif
 
 void	inp_apply_all(struct inpcbinfo *, void (*func)(struct inpcb *, void *),
 	    void *arg);
 int 	inp_ip_tos_get(const struct inpcb *inp);
 void 	inp_ip_tos_set(struct inpcb *inp, int val);
 struct socket *
 	inp_inpcbtosocket(struct inpcb *inp);
 struct tcpcb *
 	inp_inpcbtotcpcb(struct inpcb *inp);
 void 	inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
 		uint32_t *faddr, uint16_t *fp);
 
 #endif /* _KERNEL */
 
 #define INP_INFO_WLOCK(ipi)	mtx_lock(&(ipi)->ipi_lock)
 #define INP_INFO_WLOCKED(ipi)	mtx_owned(&(ipi)->ipi_lock)
 #define INP_INFO_WUNLOCK(ipi)	mtx_unlock(&(ipi)->ipi_lock)
 #define	INP_INFO_LOCK_ASSERT(ipi)	MPASS(SMR_ENTERED((ipi)->ipi_smr) || \
 					mtx_owned(&(ipi)->ipi_lock))
 #define INP_INFO_WLOCK_ASSERT(ipi)	mtx_assert(&(ipi)->ipi_lock, MA_OWNED)
 #define INP_INFO_WUNLOCK_ASSERT(ipi)	\
 				mtx_assert(&(ipi)->ipi_lock, MA_NOTOWNED)
 
 #define	INP_HASH_WLOCK(ipi)		mtx_lock(&(ipi)->ipi_hash_lock)
 #define	INP_HASH_WUNLOCK(ipi)		mtx_unlock(&(ipi)->ipi_hash_lock)
 #define	INP_HASH_LOCK_ASSERT(ipi)	MPASS(SMR_ENTERED((ipi)->ipi_smr) || \
 					mtx_owned(&(ipi)->ipi_hash_lock))
 #define	INP_HASH_WLOCK_ASSERT(ipi)	mtx_assert(&(ipi)->ipi_hash_lock, \
 					MA_OWNED)
 
 /*
  * Wildcard matching hash is not just a microoptimisation!  The hash for
  * wildcard IPv4 and wildcard IPv6 must be the same, otherwise AF_INET6
  * wildcard bound pcb won't be able to receive AF_INET connections, while:
  * jenkins_hash(&zeroes, 1, s) != jenkins_hash(&zeroes, 4, s)
  * See also comment above struct in_addr_4in6.
  */
 #define	IN_ADDR_JHASH32(addr)						\
 	((addr)->s_addr == INADDR_ANY ? V_in_pcbhashseed :		\
 	    jenkins_hash32((&(addr)->s_addr), 1, V_in_pcbhashseed))
 #define	IN6_ADDR_JHASH32(addr)						\
 	(memcmp((addr), &in6addr_any, sizeof(in6addr_any)) == 0 ?	\
 	    V_in_pcbhashseed :						\
 	    jenkins_hash32((addr)->__u6_addr.__u6_addr32,		\
 	    nitems((addr)->__u6_addr.__u6_addr32), V_in_pcbhashseed))
 
 #define INP_PCBHASH(faddr, lport, fport, mask)				\
 	((IN_ADDR_JHASH32(faddr) ^ ntohs((lport) ^ (fport))) & (mask))
 #define	INP6_PCBHASH(faddr, lport, fport, mask)				\
 	((IN6_ADDR_JHASH32(faddr) ^ ntohs((lport) ^ (fport))) & (mask))
 
 #define	INP_PCBHASH_WILD(lport, mask)					\
 	((V_in_pcbhashseed ^ ntohs(lport)) & (mask))
 
 #define	INP_PCBLBGROUP_PKTHASH(faddr, lport, fport)			\
 	(IN_ADDR_JHASH32(faddr) ^ ntohs((lport) ^ (fport)))
 #define	INP6_PCBLBGROUP_PKTHASH(faddr, lport, fport)			\
 	(IN6_ADDR_JHASH32(faddr) ^ ntohs((lport) ^ (fport)))
 
 #define INP_PCBPORTHASH(lport, mask)	(ntohs((lport)) & (mask))
 
 /*
  * Flags for inp_vflags -- historically version flags only
  */
 #define	INP_IPV4	0x1
 #define	INP_IPV6	0x2
 #define	INP_IPV6PROTO	0x4		/* opened under IPv6 protocol */
 
 /*
  * Flags for inp_flags.
  */
 #define	INP_RECVOPTS		0x00000001 /* receive incoming IP options */
 #define	INP_RECVRETOPTS		0x00000002 /* receive IP options for reply */
 #define	INP_RECVDSTADDR		0x00000004 /* receive IP dst address */
 #define	INP_HDRINCL		0x00000008 /* user supplies entire IP header */
 #define	INP_HIGHPORT		0x00000010 /* user wants "high" port binding */
 #define	INP_LOWPORT		0x00000020 /* user wants "low" port binding */
 #define	INP_ANONPORT		0x00000040 /* read by netstat(1) */
 #define	INP_RECVIF		0x00000080 /* receive incoming interface */
 #define	INP_MTUDISC		0x00000100 /* user can do MTU discovery */
 /*	INP_FREED		0x00000200 private to in_pcb.c */
 #define	INP_RECVTTL		0x00000400 /* receive incoming IP TTL */
 #define	INP_DONTFRAG		0x00000800 /* don't fragment packet */
 #define	INP_BINDANY		0x00001000 /* allow bind to any address */
 #define	INP_INHASHLIST		0x00002000 /* in_pcbinshash() has been called */
 #define	INP_RECVTOS		0x00004000 /* receive incoming IP TOS */
 #define	IN6P_IPV6_V6ONLY	0x00008000 /* restrict AF_INET6 socket for v6 */
 #define	IN6P_PKTINFO		0x00010000 /* receive IP6 dst and I/F */
 #define	IN6P_HOPLIMIT		0x00020000 /* receive hoplimit */
 #define	IN6P_HOPOPTS		0x00040000 /* receive hop-by-hop options */
 #define	IN6P_DSTOPTS		0x00080000 /* receive dst options after rthdr */
 #define	IN6P_RTHDR		0x00100000 /* receive routing header */
 #define	IN6P_RTHDRDSTOPTS	0x00200000 /* receive dstoptions before rthdr */
 #define	IN6P_TCLASS		0x00400000 /* receive traffic class value */
 #define	IN6P_AUTOFLOWLABEL	0x00800000 /* attach flowlabel automatically */
 /*	INP_INLBGROUP		0x01000000 private to in_pcb.c */
 #define	INP_ONESBCAST		0x02000000 /* send all-ones broadcast */
 #define	INP_DROPPED		0x04000000 /* protocol drop flag */
 #define	INP_SOCKREF		0x08000000 /* strong socket reference */
 #define	INP_RESERVED_0          0x10000000 /* reserved field */
 #define	INP_RESERVED_1          0x20000000 /* reserved field */
 #define	IN6P_RFC2292		0x40000000 /* used RFC2292 API on the socket */
 #define	IN6P_MTU		0x80000000 /* receive path MTU */
 
 #define	INP_CONTROLOPTS		(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
 				 INP_RECVIF|INP_RECVTTL|INP_RECVTOS|\
 				 IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\
 				 IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\
 				 IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\
 				 IN6P_MTU)
 
 /*
  * Flags for inp_flags2.
  */
 /*				0x00000001 */
 /*				0x00000002 */
 /*				0x00000004 */
 /*				0x00000008 */
 /*				0x00000010 */
 /*				0x00000020 */
 /*				0x00000040 */
 /*				0x00000080 */
 #define	INP_RECVFLOWID		0x00000100 /* populate recv datagram with flow info */
 #define	INP_RECVRSSBUCKETID	0x00000200 /* populate recv datagram with bucket id */
 #define	INP_RATE_LIMIT_CHANGED	0x00000400 /* rate limit needs attention */
 #define	INP_ORIGDSTADDR		0x00000800 /* receive IP dst address/port */
 /*				0x00001000 */
 /*				0x00002000 */
 /*				0x00004000 */
 /*				0x00008000 */
 /*				0x00010000 */
 #define INP_2PCP_SET		0x00020000 /* If the Eth PCP should be set explicitly */
 #define INP_2PCP_BIT0		0x00040000 /* Eth PCP Bit 0 */
 #define INP_2PCP_BIT1		0x00080000 /* Eth PCP Bit 1 */
 #define INP_2PCP_BIT2		0x00100000 /* Eth PCP Bit 2 */
 #define INP_2PCP_BASE	INP_2PCP_BIT0
 #define INP_2PCP_MASK	(INP_2PCP_BIT0 | INP_2PCP_BIT1 | INP_2PCP_BIT2)
 #define INP_2PCP_SHIFT		18         /* shift PCP field in/out of inp_flags2 */
 
 /*
  * Flags passed to in_pcblookup*(), inp_smr_lock() and inp_next().
  */
 typedef	enum {
 	INPLOOKUP_WILDCARD = 0x00000001,	/* Allow wildcard sockets. */
 	INPLOOKUP_RLOCKPCB = 0x00000002,	/* Return inpcb read-locked. */
 	INPLOOKUP_WLOCKPCB = 0x00000004,	/* Return inpcb write-locked. */
 } inp_lookup_t;
 
 #define	INPLOOKUP_MASK	(INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \
 	    INPLOOKUP_WLOCKPCB)
 #define	INPLOOKUP_LOCKMASK	(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)
 
 #define	sotoinpcb(so)	((struct inpcb *)(so)->so_pcb)
 
 #define	INP_SOCKAF(so) so->so_proto->pr_domain->dom_family
 
 #define	INP_CHECK_SOCKAF(so, af)	(INP_SOCKAF(so) == af)
 
 #ifdef _KERNEL
 VNET_DECLARE(int, ipport_reservedhigh);
 VNET_DECLARE(int, ipport_reservedlow);
 VNET_DECLARE(int, ipport_lowfirstauto);
 VNET_DECLARE(int, ipport_lowlastauto);
 VNET_DECLARE(int, ipport_firstauto);
 VNET_DECLARE(int, ipport_lastauto);
 VNET_DECLARE(int, ipport_hifirstauto);
 VNET_DECLARE(int, ipport_hilastauto);
 VNET_DECLARE(int, ipport_randomized);
 
 #define	V_ipport_reservedhigh	VNET(ipport_reservedhigh)
 #define	V_ipport_reservedlow	VNET(ipport_reservedlow)
 #define	V_ipport_lowfirstauto	VNET(ipport_lowfirstauto)
 #define	V_ipport_lowlastauto	VNET(ipport_lowlastauto)
 #define	V_ipport_firstauto	VNET(ipport_firstauto)
 #define	V_ipport_lastauto	VNET(ipport_lastauto)
 #define	V_ipport_hifirstauto	VNET(ipport_hifirstauto)
 #define	V_ipport_hilastauto	VNET(ipport_hilastauto)
 #define	V_ipport_randomized	VNET(ipport_randomized)
 
 void	in_pcbinfo_init(struct inpcbinfo *, struct inpcbstorage *,
 	    u_int, u_int);
 void	in_pcbinfo_destroy(struct inpcbinfo *);
 void	in_pcbstorage_init(void *);
 void	in_pcbstorage_destroy(void *);
 
 void	in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
 int	in_pcballoc(struct socket *, struct inpcbinfo *);
 int	in_pcbbind(struct inpcb *, struct sockaddr_in *, struct ucred *);
 int	in_pcbbind_setup(struct inpcb *, struct sockaddr_in *, in_addr_t *,
 	    u_short *, struct ucred *);
 int	in_pcbconnect(struct inpcb *, struct sockaddr_in *, struct ucred *,
 	    bool);
 int	in_pcbconnect_setup(struct inpcb *, struct sockaddr_in *, in_addr_t *,
 	    u_short *, in_addr_t *, u_short *, struct ucred *);
-void	in_pcbdetach(struct inpcb *);
 void	in_pcbdisconnect(struct inpcb *);
 void	in_pcbdrop(struct inpcb *);
 void	in_pcbfree(struct inpcb *);
 int	in_pcbinshash(struct inpcb *);
 int	in_pcbladdr(struct inpcb *, struct in_addr *, struct in_addr *,
 	    struct ucred *);
 int	in_pcblbgroup_numa(struct inpcb *, int arg);
 struct inpcb *
 	in_pcblookup(struct inpcbinfo *, struct in_addr, u_int,
 	    struct in_addr, u_int, int, struct ifnet *);
 struct inpcb *
 	in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int,
 	    struct in_addr, u_int, int, struct ifnet *, struct mbuf *);
 void	in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr,
 	    int, struct inpcb *(*)(struct inpcb *, int));
 void	in_pcbref(struct inpcb *);
 void	in_pcbrehash(struct inpcb *);
 void	in_pcbremhash_locked(struct inpcb *);
 bool	in_pcbrele(struct inpcb *, inp_lookup_t);
 bool	in_pcbrele_rlocked(struct inpcb *);
 bool	in_pcbrele_wlocked(struct inpcb *);
 
 typedef bool inp_match_t(const struct inpcb *, void *);
 struct inpcb_iterator {
 	const struct inpcbinfo	*ipi;
 	struct inpcb		*inp;
 	inp_match_t		*match;
 	void			*ctx;
 	int			hash;
 #define	INP_ALL_LIST		-1
 	const inp_lookup_t	lock;
 };
 
 /* Note: sparse initializers guarantee .inp = NULL. */
 #define	INP_ITERATOR(_ipi, _lock, _match, _ctx)		\
 	{						\
 		.ipi = (_ipi),				\
 		.lock = (_lock),			\
 		.hash = INP_ALL_LIST,			\
 		.match = (_match),			\
 		.ctx = (_ctx),				\
 	}
 #define	INP_ALL_ITERATOR(_ipi, _lock)			\
 	{						\
 		.ipi = (_ipi),				\
 		.lock = (_lock),			\
 		.hash = INP_ALL_LIST,			\
 	}
 
 struct inpcb *inp_next(struct inpcb_iterator *);
 void	in_losing(struct inpcb *);
 void	in_pcbsetsolabel(struct socket *so);
 int	in_getpeeraddr(struct socket *so, struct sockaddr **nam);
 int	in_getsockaddr(struct socket *so, struct sockaddr **nam);
 struct sockaddr *
 	in_sockaddr(in_port_t port, struct in_addr *addr);
 void	in_pcbsosetlabel(struct socket *so);
 #ifdef RATELIMIT
 int
 in_pcboutput_txrtlmt_locked(struct inpcb *, struct ifnet *,
 	    struct mbuf *, uint32_t);
 int	in_pcbattach_txrtlmt(struct inpcb *, struct ifnet *, uint32_t, uint32_t,
 	    uint32_t, struct m_snd_tag **);
 void	in_pcbdetach_txrtlmt(struct inpcb *);
 void    in_pcbdetach_tag(struct m_snd_tag *);
 int	in_pcbmodify_txrtlmt(struct inpcb *, uint32_t);
 int	in_pcbquery_txrtlmt(struct inpcb *, uint32_t *);
 int	in_pcbquery_txrlevel(struct inpcb *, uint32_t *);
 void	in_pcboutput_txrtlmt(struct inpcb *, struct ifnet *, struct mbuf *);
 void	in_pcboutput_eagain(struct inpcb *);
 #endif
 #endif /* _KERNEL */
 
 #endif /* !_NETINET_IN_PCB_H_ */
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index e6e8b7a56680..04b12b6587dd 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -1,1089 +1,1088 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/route/route_ctl.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip_icmp.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/stdarg.h>
 #include <security/mac/mac_framework.h>
 
 extern ipproto_input_t *ip_protox[];
 
 VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_defttl), 0,
     "Maximum TTL on IP packets");
 
 VNET_DEFINE(struct inpcbinfo, ripcbinfo);
 #define	V_ripcbinfo		VNET(ripcbinfo)
 
 /*
  * Control and data hooks for ipfw, dummynet, divert and so on.
  * The data hooks are not used here but it is convenient
  * to keep them all in one place.
  */
 VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL;
 
 int	(*ip_dn_ctl_ptr)(struct sockopt *);
 int	(*ip_dn_io_ptr)(struct mbuf **, struct ip_fw_args *);
 void	(*ip_divert_ptr)(struct mbuf *, bool);
 int	(*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool);
 
 #ifdef INET
 /*
  * Hooks for multicast routing. They all default to NULL, so leave them not
  * initialized and rely on BSS being set to 0.
  */
 
 /*
  * The socket used to communicate with the multicast routing daemon.
  */
 VNET_DEFINE(struct socket *, ip_mrouter);
 
 /*
  * The various mrouter and rsvp functions.
  */
 int (*ip_mrouter_set)(struct socket *, struct sockopt *);
 int (*ip_mrouter_get)(struct socket *, struct sockopt *);
 int (*ip_mrouter_done)(void);
 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 		   struct ip_moptions *);
 int (*mrt_ioctl)(u_long, caddr_t, int);
 int (*legal_vif_num)(int);
 u_long (*ip_mcast_src)(int);
 
 int (*rsvp_input_p)(struct mbuf **, int *, int);
 int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
 void (*ip_rsvp_force_done)(struct socket *);
 #endif /* INET */
 
 u_long	rip_sendspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
 
 u_long	rip_recvspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
     &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
 
 /*
  * Hash functions
  */
 
 #define INP_PCBHASH_RAW_SIZE	256
 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \
         (((proto) + (laddr) + (faddr)) % (mask) + 1)
 
 #ifdef INET
 static void
 rip_inshash(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbhead *pcbhash;
 	int hash;
 
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if (inp->inp_ip_p != 0 &&
 	    inp->inp_laddr.s_addr != INADDR_ANY &&
 	    inp->inp_faddr.s_addr != INADDR_ANY) {
 		hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr,
 		    inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask);
 	} else
 		hash = 0;
 	pcbhash = &pcbinfo->ipi_hash_exact[hash];
 	CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_exact);
 }
 
 static void
 rip_delhash(struct inpcb *inp)
 {
 
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	CK_LIST_REMOVE(inp, inp_hash_exact);
 }
 #endif /* INET */
 
 INPCBSTORAGE_DEFINE(ripcbstor, inpcb, "rawinp", "ripcb", "rip", "riphash");
 
 static void
 rip_init(void *arg __unused)
 {
 
 	in_pcbinfo_init(&V_ripcbinfo, &ripcbstor, INP_PCBHASH_RAW_SIZE, 1);
 }
 VNET_SYSINIT(rip_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rip_init, NULL);
 
 #ifdef VIMAGE
 static void
 rip_destroy(void *unused __unused)
 {
 
 	in_pcbinfo_destroy(&V_ripcbinfo);
 }
 VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL);
 #endif
 
 #ifdef INET
 static int
 rip_append(struct inpcb *inp, struct ip *ip, struct mbuf *m,
     struct sockaddr_in *ripsrc)
 {
 	struct socket *so = inp->inp_socket;
 	struct mbuf *n, *opts = NULL;
 
 	INP_LOCK_ASSERT(inp);
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/* check AH/ESP integrity. */
 	if (IPSEC_ENABLED(ipv4) && IPSEC_CHECK_POLICY(ipv4, m, inp) != 0)
 		return (0);
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, m) != 0)
 		return (0);
 #endif
 	/* Check the minimum TTL for socket. */
 	if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl)
 		return (0);
 
 	if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL)
 		return (0);
 
 	if ((inp->inp_flags & INP_CONTROLOPTS) ||
 	    (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
 		ip_savecontrol(inp, &opts, ip, n);
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv,
 	    (struct sockaddr *)ripsrc, n, opts) == 0) {
 		soroverflow_locked(so);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
 		return (0);
 	}
 	sorwakeup_locked(so);
 
 	return (1);
 }
 
 struct rip_inp_match_ctx {
 	struct ip *ip;
 	int proto;
 };
 
 static bool
 rip_inp_match1(const struct inpcb *inp, void *v)
 {
 	struct rip_inp_match_ctx *ctx = v;
 
 	if (inp->inp_ip_p != ctx->proto)
 		return (false);
 #ifdef INET6
 	/* XXX inp locking */
 	if ((inp->inp_vflag & INP_IPV4) == 0)
 		return (false);
 #endif
 	if (inp->inp_laddr.s_addr != ctx->ip->ip_dst.s_addr)
 		return (false);
 	if (inp->inp_faddr.s_addr != ctx->ip->ip_src.s_addr)
 		return (false);
 	return (true);
 }
 
 static bool
 rip_inp_match2(const struct inpcb *inp, void *v)
 {
 	struct rip_inp_match_ctx *ctx = v;
 
 	if (inp->inp_ip_p && inp->inp_ip_p != ctx->proto)
 		return (false);
 #ifdef INET6
 	/* XXX inp locking */
 	if ((inp->inp_vflag & INP_IPV4) == 0)
 		return (false);
 #endif
 	if (!in_nullhost(inp->inp_laddr) &&
 	    !in_hosteq(inp->inp_laddr, ctx->ip->ip_dst))
 		return (false);
 	if (!in_nullhost(inp->inp_faddr) &&
 	    !in_hosteq(inp->inp_faddr, ctx->ip->ip_src))
 		return (false);
 	return (true);
 }
 
 /*
  * Setup generic address and protocol structures for raw_input routine, then
  * pass them along with mbuf chain.
  */
 int
 rip_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct rip_inp_match_ctx ctx = {
 		.ip = mtod(*mp, struct ip *),
 		.proto = proto,
 	};
 	struct inpcb_iterator inpi = INP_ITERATOR(&V_ripcbinfo,
 	    INPLOOKUP_RLOCKPCB, rip_inp_match1, &ctx);
 	struct ifnet *ifp;
 	struct mbuf *m = *mp;
 	struct inpcb *inp;
 	struct sockaddr_in ripsrc;
 	int appended;
 
 	*mp = NULL;
 	appended = 0;
 
 	bzero(&ripsrc, sizeof(ripsrc));
 	ripsrc.sin_len = sizeof(ripsrc);
 	ripsrc.sin_family = AF_INET;
 	ripsrc.sin_addr = ctx.ip->ip_src;
 
 	ifp = m->m_pkthdr.rcvif;
 
 	inpi.hash = INP_PCBHASH_RAW(proto, ctx.ip->ip_src.s_addr,
 	    ctx.ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
 	while ((inp = inp_next(&inpi)) != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (jailed_without_vnet(inp->inp_cred) &&
 		    prison_check_ip4(inp->inp_cred, &ctx.ip->ip_dst) != 0) {
 			/*
 			 * XXX: If faddr was bound to multicast group,
 			 * jailed raw socket will drop datagram.
 			 */
 			continue;
 		}
 		appended += rip_append(inp, ctx.ip, m, &ripsrc);
 	}
 
 	inpi.hash = 0;
 	inpi.match = rip_inp_match2;
 	MPASS(inpi.inp == NULL);
 	while ((inp = inp_next(&inpi)) != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (jailed_without_vnet(inp->inp_cred) &&
 		    !IN_MULTICAST(ntohl(ctx.ip->ip_dst.s_addr)) &&
 		    prison_check_ip4(inp->inp_cred, &ctx.ip->ip_dst) != 0)
 			/*
 			 * Allow raw socket in jail to receive multicast;
 			 * assume process had PRIV_NETINET_RAW at attach,
 			 * and fall through into normal filter path if so.
 			 */
 			continue;
 		/*
 		 * If this raw socket has multicast state, and we
 		 * have received a multicast, check if this socket
 		 * should receive it, as multicast filtering is now
 		 * the responsibility of the transport layer.
 		 */
 		if (inp->inp_moptions != NULL &&
 		    IN_MULTICAST(ntohl(ctx.ip->ip_dst.s_addr))) {
 			/*
 			 * If the incoming datagram is for IGMP, allow it
 			 * through unconditionally to the raw socket.
 			 *
 			 * In the case of IGMPv2, we may not have explicitly
 			 * joined the group, and may have set IFF_ALLMULTI
 			 * on the interface. imo_multi_filter() may discard
 			 * control traffic we actually need to see.
 			 *
 			 * Userland multicast routing daemons should continue
 			 * filter the control traffic appropriately.
 			 */
 			int blocked;
 
 			blocked = MCAST_PASS;
 			if (proto != IPPROTO_IGMP) {
 				struct sockaddr_in group;
 
 				bzero(&group, sizeof(struct sockaddr_in));
 				group.sin_len = sizeof(struct sockaddr_in);
 				group.sin_family = AF_INET;
 				group.sin_addr = ctx.ip->ip_dst;
 
 				blocked = imo_multi_filter(inp->inp_moptions,
 				    ifp,
 				    (struct sockaddr *)&group,
 				    (struct sockaddr *)&ripsrc);
 			}
 
 			if (blocked != MCAST_PASS) {
 				IPSTAT_INC(ips_notmember);
 				continue;
 			}
 		}
 		appended += rip_append(inp, ctx.ip, m, &ripsrc);
 	}
 	if (appended == 0 && ip_protox[ctx.ip->ip_p] == rip_input) {
 		IPSTAT_INC(ips_noproto);
 		IPSTAT_DEC(ips_delivered);
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
 	} else
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 /*
  * Generate IP header and pass packet to ip_output.  Tack on options user may
  * have setup with control call.
  */
 static int
 rip_send(struct socket *so, int pruflags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct epoch_tracker et;
 	struct ip *ip;
 	struct inpcb *inp;
 	in_addr_t *dst;
 	int error, flags, cnt, hlen;
 	u_char opttype, optlen, *cp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_send: inp == NULL"));
 
 	if (control != NULL) {
 		m_freem(control);
 		control = NULL;
 	}
 
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
 			error = EISCONN;
 			m_freem(m);
 			return (error);
 		}
 		dst = &inp->inp_faddr.s_addr;
 	} else {
 		if (nam == NULL)
 			error = ENOTCONN;
 		else if (nam->sa_family != AF_INET)
 			error = EAFNOSUPPORT;
 		else if (nam->sa_len != sizeof(struct sockaddr_in))
 			error = EINVAL;
 		else
 			error = 0;
 		if (error != 0) {
 			m_freem(m);
 			return (error);
 		}
 		dst = &((struct sockaddr_in *)nam)->sin_addr.s_addr;
 	}
 
 	flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
 	    IP_ALLOWBROADCAST;
 
 	/*
 	 * If the user handed us a complete IP packet, use it.  Otherwise,
 	 * allocate an mbuf for a header and fill it in.
 	 */
 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
 		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
 			m_freem(m);
 			return(EMSGSIZE);
 		}
 		M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
 		if (m == NULL)
 			return(ENOBUFS);
 
 		INP_RLOCK(inp);
 		ip = mtod(m, struct ip *);
 		ip->ip_tos = inp->inp_ip_tos;
 		if (inp->inp_flags & INP_DONTFRAG)
 			ip->ip_off = htons(IP_DF);
 		else
 			ip->ip_off = htons(0);
 		ip->ip_p = inp->inp_ip_p;
 		ip->ip_len = htons(m->m_pkthdr.len);
 		ip->ip_src = inp->inp_laddr;
 		ip->ip_dst.s_addr = *dst;
 #ifdef ROUTE_MPATH
 		if (CALC_FLOWID_OUTBOUND) {
 			uint32_t hash_type, hash_val;
 
 			hash_val = fib4_calc_software_hash(ip->ip_src,
 			    ip->ip_dst, 0, 0, ip->ip_p, &hash_type);
 			m->m_pkthdr.flowid = hash_val;
 			M_HASHTYPE_SET(m, hash_type);
 			flags |= IP_NODEFAULTFLOWID;
 		}
 #endif
 		if (jailed(inp->inp_cred)) {
 			/*
 			 * prison_local_ip4() would be good enough but would
 			 * let a source of INADDR_ANY pass, which we do not
 			 * want to see from jails.
 			 */
 			if (ip->ip_src.s_addr == INADDR_ANY) {
 				NET_EPOCH_ENTER(et);
 				error = in_pcbladdr(inp, &ip->ip_dst,
 				    &ip->ip_src, inp->inp_cred);
 				NET_EPOCH_EXIT(et);
 			} else {
 				error = prison_local_ip4(inp->inp_cred,
 				    &ip->ip_src);
 			}
 			if (error != 0) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (error);
 			}
 		}
 		ip->ip_ttl = inp->inp_ip_ttl;
 	} else {
 		if (m->m_pkthdr.len > IP_MAXPACKET) {
 			m_freem(m);
 			return (EMSGSIZE);
 		}
 		if (m->m_pkthdr.len < sizeof(*ip)) {
 			m_freem(m);
 			return (EINVAL);
 		}
 		m = m_pullup(m, sizeof(*ip));
 		if (m == NULL)
 			return (ENOMEM);
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
 		if (m->m_len < hlen) {
 			m = m_pullup(m, hlen);
 			if (m == NULL)
 				return (EINVAL);
 			ip = mtod(m, struct ip *);
 		}
 #ifdef ROUTE_MPATH
 		if (CALC_FLOWID_OUTBOUND) {
 			uint32_t hash_type, hash_val;
 
 			hash_val = fib4_calc_software_hash(ip->ip_dst,
 			    ip->ip_src, 0, 0, ip->ip_p, &hash_type);
 			m->m_pkthdr.flowid = hash_val;
 			M_HASHTYPE_SET(m, hash_type);
 			flags |= IP_NODEFAULTFLOWID;
 		}
 #endif
 		INP_RLOCK(inp);
 		/*
 		 * Don't allow both user specified and setsockopt options,
 		 * and don't allow packet length sizes that will crash.
 		 */
 		if ((hlen < sizeof (*ip))
 		    || ((hlen > sizeof (*ip)) && inp->inp_options)
 		    || (ntohs(ip->ip_len) != m->m_pkthdr.len)) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (EINVAL);
 		}
 		error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
 		if (error != 0) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (error);
 		}
 		/*
 		 * Don't allow IP options which do not have the required
 		 * structure as specified in section 3.1 of RFC 791 on
 		 * pages 15-23.
 		 */
 		cp = (u_char *)(ip + 1);
 		cnt = hlen - sizeof (struct ip);
 		for (; cnt > 0; cnt -= optlen, cp += optlen) {
 			opttype = cp[IPOPT_OPTVAL];
 			if (opttype == IPOPT_EOL)
 				break;
 			if (opttype == IPOPT_NOP) {
 				optlen = 1;
 				continue;
 			}
 			if (cnt < IPOPT_OLEN + sizeof(u_char)) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (EINVAL);
 			}
 			optlen = cp[IPOPT_OLEN];
 			if (optlen < IPOPT_OLEN + sizeof(u_char) ||
 			    optlen > cnt) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (EINVAL);
 			}
 		}
 		/*
 		 * This doesn't allow application to specify ID of zero,
 		 * but we got this limitation from the beginning of history.
 		 */
 		if (ip->ip_id == 0)
 			ip_fillid(ip);
 
 		/*
 		 * XXX prevent ip_output from overwriting header fields.
 		 */
 		flags |= IP_RAWOUTPUT;
 		IPSTAT_INC(ips_rawout);
 	}
 
 	if (inp->inp_flags & INP_ONESBCAST)
 		flags |= IP_SENDONES;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	NET_EPOCH_ENTER(et);
 	error = ip_output(m, inp->inp_options, NULL, flags,
 	    inp->inp_moptions, inp);
 	NET_EPOCH_EXIT(et);
 	INP_RUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Raw IP socket option processing.
  *
  * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could
  * only be created by a privileged process, and as such, socket option
  * operations to manage system properties on any raw socket were allowed to
  * take place without explicit additional access control checks.  However,
  * raw sockets can now also be created in jail(), and therefore explicit
  * checks are now required.  Likewise, raw sockets can be used by a process
  * after it gives up privilege, so some caution is required.  For options
  * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
  * performed in ip_ctloutput() and therefore no check occurs here.
  * Unilaterally checking priv_check() here breaks normal IP socket option
  * operations on raw sockets.
  *
  * When adding new socket options here, make sure to add access control
  * checks here as necessary.
  *
  * XXX-BZ inp locking?
  */
 int
 rip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 
 	if (sopt->sopt_level != IPPROTO_IP) {
 		if ((sopt->sopt_level == SOL_SOCKET) &&
 		    (sopt->sopt_name == SO_SETFIB)) {
 			inp->inp_inc.inc_fibnum = so->so_fibnum;
 			return (0);
 		}
 		return (EINVAL);
 	}
 
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			optval = inp->inp_flags & INP_HDRINCL;
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
 		case IP_FW_ADD:	/* ADD actually returns the body... */
 		case IP_FW_GET:
 		case IP_FW_TABLE_GETSIZE:
 		case IP_FW_TABLE_LIST:
 		case IP_FW_NAT_GET_CONFIG:
 		case IP_FW_NAT_GET_LOG:
 			if (V_ip_fw_ctl_ptr != NULL)
 				error = V_ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
 		case IP_DUMMYNET_GET:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break ;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			if (inp->inp_ip_p != IPPROTO_IGMP)
 				return (EOPNOTSUPP);
 			error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
 				EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 			if (optval)
 				inp->inp_flags |= INP_HDRINCL;
 			else
 				inp->inp_flags &= ~INP_HDRINCL;
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
 		case IP_FW_ADD:
 		case IP_FW_DEL:
 		case IP_FW_FLUSH:
 		case IP_FW_ZERO:
 		case IP_FW_RESETLOG:
 		case IP_FW_TABLE_ADD:
 		case IP_FW_TABLE_DEL:
 		case IP_FW_TABLE_FLUSH:
 		case IP_FW_NAT_CFG:
 		case IP_FW_NAT_DEL:
 			if (V_ip_fw_ctl_ptr != NULL)
 				error = V_ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
 		case IP_DUMMYNET_CONFIGURE:
 		case IP_DUMMYNET_DEL:
 		case IP_DUMMYNET_FLUSH:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT ;
 			break ;
 
 		case IP_RSVP_ON:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			if (inp->inp_ip_p != IPPROTO_RSVP)
 				return (EOPNOTSUPP);
 			error = ip_rsvp_init(so);
 			break;
 
 		case IP_RSVP_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_done();
 			break;
 
 		case IP_RSVP_VIF_ON:
 		case IP_RSVP_VIF_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			if (inp->inp_ip_p != IPPROTO_RSVP)
 				return (EOPNOTSUPP);
 			error = ip_rsvp_vif ?
 				ip_rsvp_vif(so, sopt) : EINVAL;
 			break;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			if (inp->inp_ip_p != IPPROTO_IGMP)
 				return (EOPNOTSUPP);
 			error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
 					EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 void
 rip_ctlinput(struct icmp *icmp)
 {
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	if (IPSEC_ENABLED(ipv4))
 		IPSEC_CTLINPUT(ipv4, icmp);
 #endif
 }
 
 static int
 rip_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
 
 	error = priv_check(td, PRIV_NETINET_RAW);
 	if (error)
 		return (error);
 	if (proto >= IPPROTO_MAX || proto < 0)
 		return EPROTONOSUPPORT;
 	error = soreserve(so, rip_sendspace, rip_recvspace);
 	if (error)
 		return (error);
 	error = in_pcballoc(so, &V_ripcbinfo);
 	if (error)
 		return (error);
 	inp = (struct inpcb *)so->so_pcb;
 	inp->inp_ip_p = proto;
 	inp->inp_ip_ttl = V_ip_defttl;
 	INP_HASH_WLOCK(&V_ripcbinfo);
 	rip_inshash(inp);
 	INP_HASH_WUNLOCK(&V_ripcbinfo);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static void
 rip_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
 	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
 	    ("rip_detach: not closed"));
 
 	/* Disable mrouter first */
 	if (so == V_ip_mrouter && ip_mrouter_done)
 		ip_mrouter_done();
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(&V_ripcbinfo);
 	rip_delhash(inp);
 	INP_HASH_WUNLOCK(&V_ripcbinfo);
 
 	if (ip_rsvp_force_done)
 		ip_rsvp_force_done(so);
 	if (so == V_ip_rsvpd)
 		ip_rsvp_done();
-	in_pcbdetach(inp);
 	in_pcbfree(inp);
 }
 
 static void
 rip_dodisconnect(struct socket *so, struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = inp->inp_pcbinfo;
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(pcbinfo);
 	rip_delhash(inp);
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	rip_inshash(inp);
 	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 }
 
 static void
 rip_abort(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 }
 
 static void
 rip_close(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_close: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 }
 
 static int
 rip_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 	return (0);
 }
 
 static int
 rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 	int error;
 
 	if (nam->sa_family != AF_INET)
 		return (EAFNOSUPPORT);
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 
 	error = prison_check_ip4(td->td_ucred, &addr->sin_addr);
 	if (error != 0)
 		return (error);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
 
 	if (CK_STAILQ_EMPTY(&V_ifnet) ||
 	    (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
 	    (addr->sin_addr.s_addr &&
 	     (inp->inp_flags & INP_BINDANY) == 0 &&
 	     ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
 		return (EADDRNOTAVAIL);
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(&V_ripcbinfo);
 	rip_delhash(inp);
 	inp->inp_laddr = addr->sin_addr;
 	rip_inshash(inp);
 	INP_HASH_WUNLOCK(&V_ripcbinfo);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 	if (CK_STAILQ_EMPTY(&V_ifnet))
 		return (EADDRNOTAVAIL);
 	if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
 		return (EAFNOSUPPORT);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(&V_ripcbinfo);
 	rip_delhash(inp);
 	inp->inp_faddr = addr->sin_addr;
 	rip_inshash(inp);
 	INP_HASH_WUNLOCK(&V_ripcbinfo);
 	soisconnected(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
 
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 #endif /* INET */
 
 static int
 rip_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_ripcbinfo,
 	    INPLOOKUP_RLOCKPCB);
 	struct xinpgen xig;
 	struct inpcb *inp;
 	int error;
 
 	if (req->newptr != 0)
 		return (EPERM);
 
 	if (req->oldptr == 0) {
 		int n;
 
 		n = V_ripcbinfo.ipi_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 		return (0);
 	}
 
 	if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
 		return (error);
 
 	bzero(&xig, sizeof(xig));
 	xig.xig_len = sizeof xig;
 	xig.xig_count = V_ripcbinfo.ipi_count;
 	xig.xig_gen = V_ripcbinfo.ipi_gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	while ((inp = inp_next(&inpi)) != NULL) {
 		if (inp->inp_gencnt <= xig.xig_gen &&
 		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 			struct xinpcb xi;
 
 			in_pcbtoxinpcb(inp, &xi);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 			if (error) {
 				INP_RUNLOCK(inp);
 				break;
 			}
 		}
 	}
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		xig.xig_gen = V_ripcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_ripcbinfo.ipi_count;
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     rip_pcblist, "S,xinpcb",
     "List of active raw IP sockets");
 
 #ifdef INET
 struct protosw rip_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_abort =		rip_abort,
 	.pr_attach =		rip_attach,
 	.pr_bind =		rip_bind,
 	.pr_connect =		rip_connect,
 	.pr_control =		in_control,
 	.pr_detach =		rip_detach,
 	.pr_disconnect =	rip_disconnect,
 	.pr_peeraddr =		in_getpeeraddr,
 	.pr_send =		rip_send,
 	.pr_shutdown =		rip_shutdown,
 	.pr_sockaddr =		in_getsockaddr,
 	.pr_sosetlabel =	in_pcbsosetlabel,
 	.pr_close =		rip_close
 };
 #endif /* INET */
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index ffde6a4b88c9..6faf635213b1 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -1,2576 +1,2574 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2001 McAfee, Inc.
  * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jonathan Lemon
  * and McAfee Research, the Security Research Division of McAfee, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program. [2001 McAfee, Inc.]
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/hash.h>
 #include <sys/refcount.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 
 #include <sys/md5.h>
 #include <crypto/siphash/siphash.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/tcp_fastopen.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/tcp_ecn.h>
 #ifdef TCP_BLACKBOX
 #include <netinet/tcp_log_buf.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/toecore.h>
 #endif
 #include <netinet/udp.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE_STATIC(int, tcp_syncookies) = 1;
 #define	V_tcp_syncookies		VNET(tcp_syncookies)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_syncookies), 0,
     "Use TCP SYN cookies if the syncache overflows");
 
 VNET_DEFINE_STATIC(int, tcp_syncookiesonly) = 0;
 #define	V_tcp_syncookiesonly		VNET(tcp_syncookiesonly)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_syncookiesonly), 0,
     "Use only TCP SYN cookies");
 
 VNET_DEFINE_STATIC(int, functions_inherit_listen_socket_stack) = 1;
 #define V_functions_inherit_listen_socket_stack \
     VNET(functions_inherit_listen_socket_stack)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, functions_inherit_listen_socket_stack,
     CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(functions_inherit_listen_socket_stack), 0,
     "Inherit listen socket's stack");
 
 #ifdef TCP_OFFLOAD
 #define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL)
 #endif
 
 static void	 syncache_drop(struct syncache *, struct syncache_head *);
 static void	 syncache_free(struct syncache *);
 static void	 syncache_insert(struct syncache *, struct syncache_head *);
 static int	 syncache_respond(struct syncache *, const struct mbuf *, int);
 static struct	 socket *syncache_socket(struct syncache *, struct socket *,
 		    struct mbuf *m);
 static void	 syncache_timeout(struct syncache *sc, struct syncache_head *sch,
 		    int docallout);
 static void	 syncache_timer(void *);
 
 static uint32_t	 syncookie_mac(struct in_conninfo *, tcp_seq, uint8_t,
 		    uint8_t *, uintptr_t);
 static tcp_seq	 syncookie_generate(struct syncache_head *, struct syncache *);
 static struct syncache
 		*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
 		    struct syncache *, struct tcphdr *, struct tcpopt *,
 		    struct socket *, uint16_t);
 static void	syncache_pause(struct in_conninfo *);
 static void	syncache_unpause(void *);
 static void	 syncookie_reseed(void *);
 #ifdef INVARIANTS
 static int	 syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
 		    struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
 		    struct socket *lso, uint16_t port);
 #endif
 
 /*
  * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
  * 3 retransmits corresponds to a timeout with default values of
  * tcp_rexmit_initial * (             1 +
  *                       tcp_backoff[1] +
  *                       tcp_backoff[2] +
  *                       tcp_backoff[3]) + 3 * tcp_rexmit_slop,
  * 1000 ms * (1 + 2 + 4 + 8) +  3 * 200 ms = 15600 ms,
  * the odds are that the user has given up attempting to connect by then.
  */
 #define SYNCACHE_MAXREXMTS		3
 
 /* Arbitrary values */
 #define TCP_SYNCACHE_HASHSIZE		512
 #define TCP_SYNCACHE_BUCKETLIMIT	30
 
 VNET_DEFINE_STATIC(struct tcp_syncache, tcp_syncache);
 #define	V_tcp_syncache			VNET(tcp_syncache)
 
 static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "TCP SYN cache");
 
 SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_VNET | CTLFLAG_RDTUN,
     &VNET_NAME(tcp_syncache.bucket_limit), 0,
     "Per-bucket hash limit for syncache");
 
 SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_VNET | CTLFLAG_RDTUN,
     &VNET_NAME(tcp_syncache.cache_limit), 0,
     "Overall entry limit for syncache");
 
 SYSCTL_UMA_CUR(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_VNET,
     &VNET_NAME(tcp_syncache.zone), "Current number of entries in syncache");
 
 SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
     &VNET_NAME(tcp_syncache.hashsize), 0,
     "Size of TCP syncache hashtable");
 
 SYSCTL_BOOL(_net_inet_tcp_syncache, OID_AUTO, see_other, CTLFLAG_VNET |
     CTLFLAG_RW, &VNET_NAME(tcp_syncache.see_other), 0,
     "All syncache(4) entries are visible, ignoring UID/GID, jail(2) "
     "and mac(4) checks");
 
 static int
 sysctl_net_inet_tcp_syncache_rexmtlimit_check(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	u_int new;
 
 	new = V_tcp_syncache.rexmt_limit;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if ((error == 0) && (req->newptr != NULL)) {
 		if (new > TCP_MAXRXTSHIFT)
 			error = EINVAL;
 		else
 			V_tcp_syncache.rexmt_limit = new;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit,
     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
     &VNET_NAME(tcp_syncache.rexmt_limit), 0,
     sysctl_net_inet_tcp_syncache_rexmtlimit_check, "UI",
     "Limit on SYN/ACK retransmissions");
 
 VNET_DEFINE(int, tcp_sc_rst_sock_fail) = 1;
 SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_sc_rst_sock_fail), 0,
     "Send reset on socket allocation failure");
 
 static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
 
 #define	SCH_LOCK(sch)		mtx_lock(&(sch)->sch_mtx)
 #define	SCH_UNLOCK(sch)		mtx_unlock(&(sch)->sch_mtx)
 #define	SCH_LOCK_ASSERT(sch)	mtx_assert(&(sch)->sch_mtx, MA_OWNED)
 
 /*
  * Requires the syncache entry to be already removed from the bucket list.
  */
 static void
 syncache_free(struct syncache *sc)
 {
 
 	if (sc->sc_ipopts)
 		(void) m_free(sc->sc_ipopts);
 	if (sc->sc_cred)
 		crfree(sc->sc_cred);
 #ifdef MAC
 	mac_syncache_destroy(&sc->sc_label);
 #endif
 
 	uma_zfree(V_tcp_syncache.zone, sc);
 }
 
 void
 syncache_init(void)
 {
 	int i;
 
 	V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
 	V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
 	V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
 	V_tcp_syncache.hash_secret = arc4random();
 
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize",
 	    &V_tcp_syncache.hashsize);
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit",
 	    &V_tcp_syncache.bucket_limit);
 	if (!powerof2(V_tcp_syncache.hashsize) ||
 	    V_tcp_syncache.hashsize == 0) {
 		printf("WARNING: syncache hash size is not a power of 2.\n");
 		V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
 	}
 	V_tcp_syncache.hashmask = V_tcp_syncache.hashsize - 1;
 
 	/* Set limits. */
 	V_tcp_syncache.cache_limit =
 	    V_tcp_syncache.hashsize * V_tcp_syncache.bucket_limit;
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
 	    &V_tcp_syncache.cache_limit);
 
 	/* Allocate the hash table. */
 	V_tcp_syncache.hashbase = malloc(V_tcp_syncache.hashsize *
 	    sizeof(struct syncache_head), M_SYNCACHE, M_WAITOK | M_ZERO);
 
 #ifdef VIMAGE
 	V_tcp_syncache.vnet = curvnet;
 #endif
 
 	/* Initialize the hash buckets. */
 	for (i = 0; i < V_tcp_syncache.hashsize; i++) {
 		TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket);
 		mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
 			 NULL, MTX_DEF);
 		callout_init_mtx(&V_tcp_syncache.hashbase[i].sch_timer,
 			 &V_tcp_syncache.hashbase[i].sch_mtx, 0);
 		V_tcp_syncache.hashbase[i].sch_length = 0;
 		V_tcp_syncache.hashbase[i].sch_sc = &V_tcp_syncache;
 		V_tcp_syncache.hashbase[i].sch_last_overflow =
 		    -(SYNCOOKIE_LIFETIME + 1);
 	}
 
 	/* Create the syncache entry zone. */
 	V_tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	V_tcp_syncache.cache_limit = uma_zone_set_max(V_tcp_syncache.zone,
 	    V_tcp_syncache.cache_limit);
 
 	/* Start the SYN cookie reseeder callout. */
 	callout_init(&V_tcp_syncache.secret.reseed, 1);
 	arc4rand(V_tcp_syncache.secret.key[0], SYNCOOKIE_SECRET_SIZE, 0);
 	arc4rand(V_tcp_syncache.secret.key[1], SYNCOOKIE_SECRET_SIZE, 0);
 	callout_reset(&V_tcp_syncache.secret.reseed, SYNCOOKIE_LIFETIME * hz,
 	    syncookie_reseed, &V_tcp_syncache);
 
 	/* Initialize the pause machinery. */
 	mtx_init(&V_tcp_syncache.pause_mtx, "tcp_sc_pause", NULL, MTX_DEF);
 	callout_init_mtx(&V_tcp_syncache.pause_co, &V_tcp_syncache.pause_mtx,
 	    0);
 	V_tcp_syncache.pause_until = time_uptime - TCP_SYNCACHE_PAUSE_TIME;
 	V_tcp_syncache.pause_backoff = 0;
 	V_tcp_syncache.paused = false;
 }
 
 #ifdef VIMAGE
 void
 syncache_destroy(void)
 {
 	struct syncache_head *sch;
 	struct syncache *sc, *nsc;
 	int i;
 
 	/*
 	 * Stop the re-seed timer before freeing resources.  No need to
 	 * possibly schedule it another time.
 	 */
 	callout_drain(&V_tcp_syncache.secret.reseed);
 
 	/* Stop the SYN cache pause callout. */
 	mtx_lock(&V_tcp_syncache.pause_mtx);
 	if (callout_stop(&V_tcp_syncache.pause_co) == 0) {
 		mtx_unlock(&V_tcp_syncache.pause_mtx);
 		callout_drain(&V_tcp_syncache.pause_co);
 	} else
 		mtx_unlock(&V_tcp_syncache.pause_mtx);
 
 	/* Cleanup hash buckets: stop timers, free entries, destroy locks. */
 	for (i = 0; i < V_tcp_syncache.hashsize; i++) {
 		sch = &V_tcp_syncache.hashbase[i];
 		callout_drain(&sch->sch_timer);
 
 		SCH_LOCK(sch);
 		TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc)
 			syncache_drop(sc, sch);
 		SCH_UNLOCK(sch);
 		KASSERT(TAILQ_EMPTY(&sch->sch_bucket),
 		    ("%s: sch->sch_bucket not empty", __func__));
 		KASSERT(sch->sch_length == 0, ("%s: sch->sch_length %d not 0",
 		    __func__, sch->sch_length));
 		mtx_destroy(&sch->sch_mtx);
 	}
 
 	KASSERT(uma_zone_get_cur(V_tcp_syncache.zone) == 0,
 	    ("%s: cache_count not 0", __func__));
 
 	/* Free the allocated global resources. */
 	uma_zdestroy(V_tcp_syncache.zone);
 	free(V_tcp_syncache.hashbase, M_SYNCACHE);
 	mtx_destroy(&V_tcp_syncache.pause_mtx);
 }
 #endif
 
 /*
  * Inserts a syncache entry into the specified bucket row.
  * Locks and unlocks the syncache_head autonomously.
  */
 static void
 syncache_insert(struct syncache *sc, struct syncache_head *sch)
 {
 	struct syncache *sc2;
 
 	SCH_LOCK(sch);
 
 	/*
 	 * Make sure that we don't overflow the per-bucket limit.
 	 * If the bucket is full, toss the oldest element.
 	 */
 	if (sch->sch_length >= V_tcp_syncache.bucket_limit) {
 		KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
 			("sch->sch_length incorrect"));
 		syncache_pause(&sc->sc_inc);
 		sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
 		sch->sch_last_overflow = time_uptime;
 		syncache_drop(sc2, sch);
 	}
 
 	/* Put it into the bucket. */
 	TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length++;
 
 #ifdef TCP_OFFLOAD
 	if (ADDED_BY_TOE(sc)) {
 		struct toedev *tod = sc->sc_tod;
 
 		tod->tod_syncache_added(tod, sc->sc_todctx);
 	}
 #endif
 
 	/* Reinitialize the bucket row's timer. */
 	if (sch->sch_length == 1)
 		sch->sch_nextc = ticks + INT_MAX;
 	syncache_timeout(sc, sch, 1);
 
 	SCH_UNLOCK(sch);
 
 	TCPSTATES_INC(TCPS_SYN_RECEIVED);
 	TCPSTAT_INC(tcps_sc_added);
 }
 
 /*
  * Remove and free entry from syncache bucket row.
  * Expects locked syncache head.
  */
 static void
 syncache_drop(struct syncache *sc, struct syncache_head *sch)
 {
 
 	SCH_LOCK_ASSERT(sch);
 
 	TCPSTATES_DEC(TCPS_SYN_RECEIVED);
 	TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length--;
 
 #ifdef TCP_OFFLOAD
 	if (ADDED_BY_TOE(sc)) {
 		struct toedev *tod = sc->sc_tod;
 
 		tod->tod_syncache_removed(tod, sc->sc_todctx);
 	}
 #endif
 
 	syncache_free(sc);
 }
 
 /*
  * Engage/reengage time on bucket row.
  */
 static void
 syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
 {
 	int rexmt;
 
 	if (sc->sc_rxmits == 0)
 		rexmt = tcp_rexmit_initial;
 	else
 		TCPT_RANGESET(rexmt,
 		    tcp_rexmit_initial * tcp_backoff[sc->sc_rxmits],
 		    tcp_rexmit_min, TCPTV_REXMTMAX);
 	sc->sc_rxttime = ticks + rexmt;
 	sc->sc_rxmits++;
 	if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
 		sch->sch_nextc = sc->sc_rxttime;
 		if (docallout)
 			callout_reset(&sch->sch_timer, sch->sch_nextc - ticks,
 			    syncache_timer, (void *)sch);
 	}
 }
 
 /*
  * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
  * If we have retransmitted an entry the maximum number of times, expire it.
  * One separate timer for each bucket row.
  */
 static void
 syncache_timer(void *xsch)
 {
 	struct syncache_head *sch = (struct syncache_head *)xsch;
 	struct syncache *sc, *nsc;
 	struct epoch_tracker et;
 	int tick = ticks;
 	char *s;
 	bool paused;
 
 	CURVNET_SET(sch->sch_sc->vnet);
 
 	/* NB: syncache_head has already been locked by the callout. */
 	SCH_LOCK_ASSERT(sch);
 
 	/*
 	 * In the following cycle we may remove some entries and/or
 	 * advance some timeouts, so re-initialize the bucket timer.
 	 */
 	sch->sch_nextc = tick + INT_MAX;
 
 	/*
 	 * If we have paused processing, unconditionally remove
 	 * all syncache entries.
 	 */
 	mtx_lock(&V_tcp_syncache.pause_mtx);
 	paused = V_tcp_syncache.paused;
 	mtx_unlock(&V_tcp_syncache.pause_mtx);
 
 	TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc) {
 		if (paused) {
 			syncache_drop(sc, sch);
 			continue;
 		}
 		/*
 		 * We do not check if the listen socket still exists
 		 * and accept the case where the listen socket may be
 		 * gone by the time we resend the SYN/ACK.  We do
 		 * not expect this to happens often. If it does,
 		 * then the RST will be sent by the time the remote
 		 * host does the SYN/ACK->ACK.
 		 */
 		if (TSTMP_GT(sc->sc_rxttime, tick)) {
 			if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc))
 				sch->sch_nextc = sc->sc_rxttime;
 			continue;
 		}
 		if (sc->sc_rxmits > V_tcp_ecn_maxretries) {
 			sc->sc_flags &= ~SCF_ECN_MASK;
 		}
 		if (sc->sc_rxmits > V_tcp_syncache.rexmt_limit) {
 			if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Retransmits exhausted, "
 				    "giving up and removing syncache entry\n",
 				    s, __func__);
 				free(s, M_TCPLOG);
 			}
 			syncache_drop(sc, sch);
 			TCPSTAT_INC(tcps_sc_stale);
 			continue;
 		}
 		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Response timeout, "
 			    "retransmitting (%u) SYN|ACK\n",
 			    s, __func__, sc->sc_rxmits);
 			free(s, M_TCPLOG);
 		}
 
 		NET_EPOCH_ENTER(et);
 		syncache_respond(sc, NULL, TH_SYN|TH_ACK);
 		NET_EPOCH_EXIT(et);
 		TCPSTAT_INC(tcps_sc_retransmitted);
 		syncache_timeout(sc, sch, 0);
 	}
 	if (!TAILQ_EMPTY(&(sch)->sch_bucket))
 		callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick,
 			syncache_timer, (void *)(sch));
 	CURVNET_RESTORE();
 }
 
 /*
  * Returns true if the system is only using cookies at the moment.
  * This could be due to a sysadmin decision to only use cookies, or it
  * could be due to the system detecting an attack.
  */
 static inline bool
 syncache_cookiesonly(void)
 {
 
 	return (V_tcp_syncookies && (V_tcp_syncache.paused ||
 	    V_tcp_syncookiesonly));
 }
 
 /*
  * Find the hash bucket for the given connection.
  */
 static struct syncache_head *
 syncache_hashbucket(struct in_conninfo *inc)
 {
 	uint32_t hash;
 
 	/*
 	 * The hash is built on foreign port + local port + foreign address.
 	 * We rely on the fact that struct in_conninfo starts with 16 bits
 	 * of foreign port, then 16 bits of local port then followed by 128
 	 * bits of foreign address.  In case of IPv4 address, the first 3
 	 * 32-bit words of the address always are zeroes.
 	 */
 	hash = jenkins_hash32((uint32_t *)&inc->inc_ie, 5,
 	    V_tcp_syncache.hash_secret) & V_tcp_syncache.hashmask;
 
 	return (&V_tcp_syncache.hashbase[hash]);
 }
 
 /*
  * Find an entry in the syncache.
  * Returns always with locked syncache_head plus a matching entry or NULL.
  */
 static struct syncache *
 syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
 	*schp = sch = syncache_hashbucket(inc);
 	SCH_LOCK(sch);
 
 	/* Circle through bucket row to find matching entry. */
 	TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash)
 		if (bcmp(&inc->inc_ie, &sc->sc_inc.inc_ie,
 		    sizeof(struct in_endpoints)) == 0)
 			break;
 
 	return (sc);	/* Always returns with locked sch. */
 }
 
 /*
  * This function is called when we get a RST for a
  * non-existent connection, so that we can see if the
  * connection is in the syn cache.  If it is, zap it.
  * If required send a challenge ACK.
  */
 void
 syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m,
     uint16_t port)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 	char *s = NULL;
 
 	if (syncache_cookiesonly())
 		return;
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 
 	/*
 	 * No corresponding connection was found in syncache.
 	 * If syncookies are enabled and possibly exclusively
 	 * used, or we are under memory pressure, a valid RST
 	 * may not find a syncache entry.  In that case we're
 	 * done and no SYN|ACK retransmissions will happen.
 	 * Otherwise the RST was misdirected or spoofed.
 	 */
 	if (sc == NULL) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Spurious RST without matching "
 			    "syncache entry (possibly syncookie only), "
 			    "segment ignored\n", s, __func__);
 		TCPSTAT_INC(tcps_badrst);
 		goto done;
 	}
 
 	/* The remote UDP encaps port does not match. */
 	if (sc->sc_port != port) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Spurious RST with matching "
 			    "syncache entry but non-matching UDP encaps port, "
 			    "segment ignored\n", s, __func__);
 		TCPSTAT_INC(tcps_badrst);
 		goto done;
 	}
 
 	/*
 	 * If the RST bit is set, check the sequence number to see
 	 * if this is a valid reset segment.
 	 *
 	 * RFC 793 page 37:
 	 *   In all states except SYN-SENT, all reset (RST) segments
 	 *   are validated by checking their SEQ-fields.  A reset is
 	 *   valid if its sequence number is in the window.
 	 *
 	 * RFC 793 page 69:
 	 *   There are four cases for the acceptability test for an incoming
 	 *   segment:
 	 *
 	 * Segment Receive  Test
 	 * Length  Window
 	 * ------- -------  -------------------------------------------
 	 *    0       0     SEG.SEQ = RCV.NXT
 	 *    0      >0     RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
 	 *   >0       0     not acceptable
 	 *   >0      >0     RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
 	 *               or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND
 	 *
 	 * Note that when receiving a SYN segment in the LISTEN state,
 	 * IRS is set to SEG.SEQ and RCV.NXT is set to SEG.SEQ+1, as
 	 * described in RFC 793, page 66.
 	 */
 	if ((SEQ_GEQ(th->th_seq, sc->sc_irs + 1) &&
 	    SEQ_LT(th->th_seq, sc->sc_irs + 1 + sc->sc_wnd)) ||
 	    (sc->sc_wnd == 0 && th->th_seq == sc->sc_irs + 1)) {
 		if (V_tcp_insecure_rst ||
 		    th->th_seq == sc->sc_irs + 1) {
 			syncache_drop(sc, sch);
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG,
 				    "%s; %s: Our SYN|ACK was rejected, "
 				    "connection attempt aborted by remote "
 				    "endpoint\n",
 				    s, __func__);
 			TCPSTAT_INC(tcps_sc_reset);
 		} else {
 			TCPSTAT_INC(tcps_badrst);
 			/* Send challenge ACK. */
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: RST with invalid "
 				    " SEQ %u != NXT %u (+WND %u), "
 				    "sending challenge ACK\n",
 				    s, __func__,
 				    th->th_seq, sc->sc_irs + 1, sc->sc_wnd);
 			syncache_respond(sc, m, TH_ACK);
 		}
 	} else {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: RST with invalid SEQ %u != "
 			    "NXT %u (+WND %u), segment ignored\n",
 			    s, __func__,
 			    th->th_seq, sc->sc_irs + 1, sc->sc_wnd);
 		TCPSTAT_INC(tcps_badrst);
 	}
 
 done:
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	SCH_UNLOCK(sch);
 }
 
 void
 syncache_badack(struct in_conninfo *inc, uint16_t port)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
 	if (syncache_cookiesonly())
 		return;
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 	if ((sc != NULL) && (sc->sc_port == port)) {
 		syncache_drop(sc, sch);
 		TCPSTAT_INC(tcps_sc_badack);
 	}
 	SCH_UNLOCK(sch);
 }
 
 void
 syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq, uint16_t port)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
 	if (syncache_cookiesonly())
 		return;
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 	if (sc == NULL)
 		goto done;
 
 	/* If the port != sc_port, then it's a bogus ICMP msg */
 	if (port != sc->sc_port)
 		goto done;
 
 	/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
 	if (ntohl(th_seq) != sc->sc_iss)
 		goto done;
 
 	/*
 	 * If we've rertransmitted 3 times and this is our second error,
 	 * we remove the entry.  Otherwise, we allow it to continue on.
 	 * This prevents us from incorrectly nuking an entry during a
 	 * spurious network outage.
 	 *
 	 * See tcp_notify().
 	 */
 	if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxmits < 3 + 1) {
 		sc->sc_flags |= SCF_UNREACH;
 		goto done;
 	}
 	syncache_drop(sc, sch);
 	TCPSTAT_INC(tcps_sc_unreach);
 done:
 	SCH_UNLOCK(sch);
 }
 
 /*
  * Build a new TCP socket structure from a syncache entry.
  *
  * On success return the newly created socket with its underlying inp locked.
  */
 static struct socket *
 syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 {
 	struct tcp_function_block *blk;
 	struct inpcb *inp = NULL;
 	struct socket *so;
 	struct tcpcb *tp;
 	int error;
 	char *s;
 
 	NET_EPOCH_ASSERT();
 
 	/*
 	 * Ok, create the full blown connection, and set things up
 	 * as they would have been set up if we had created the
 	 * connection when the SYN arrived.
 	 */
 	if ((so = solisten_clone(lso)) == NULL)
 		goto allocfail;
 #ifdef MAC
 	mac_socketpeer_set_from_mbuf(m, so);
 #endif
 	error = in_pcballoc(so, &V_tcbinfo);
 	if (error) {
 		sodealloc(so);
 		goto allocfail;
 	}
 	inp = sotoinpcb(so);
 	if ((tp = tcp_newtcpcb(inp)) == NULL) {
-		in_pcbdetach(inp);
 		in_pcbfree(inp);
 		sodealloc(so);
 		goto allocfail;
 	}
 	inp->inp_inc.inc_flags = sc->sc_inc.inc_flags;
 #ifdef INET6
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		inp->inp_vflag &= ~INP_IPV4;
 		inp->inp_vflag |= INP_IPV6;
 		inp->in6p_laddr = sc->sc_inc.inc6_laddr;
 	} else {
 		inp->inp_vflag &= ~INP_IPV6;
 		inp->inp_vflag |= INP_IPV4;
 #endif
 		inp->inp_ip_ttl = sc->sc_ip_ttl;
 		inp->inp_ip_tos = sc->sc_ip_tos;
 		inp->inp_laddr = sc->sc_inc.inc_laddr;
 #ifdef INET6
 	}
 #endif
 
 	/*
 	 * If there's an mbuf and it has a flowid, then let's initialise the
 	 * inp with that particular flowid.
 	 */
 	if (m != NULL && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
 		inp->inp_flowid = m->m_pkthdr.flowid;
 		inp->inp_flowtype = M_HASHTYPE_GET(m);
 #ifdef NUMA
 		inp->inp_numa_domain = m->m_pkthdr.numa_domain;
 #endif
 	}
 
 	inp->inp_lport = sc->sc_inc.inc_lport;
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6PROTO) {
 		struct inpcb *oinp = sotoinpcb(lso);
 
 		/*
 		 * Inherit socket options from the listening socket.
 		 * Note that in6p_inputopts are not (and should not be)
 		 * copied, since it stores previously received options and is
 		 * used to detect if each new option is different than the
 		 * previous one and hence should be passed to a user.
 		 * If we copied in6p_inputopts, a user would not be able to
 		 * receive options just after calling the accept system call.
 		 */
 		inp->inp_flags |= oinp->inp_flags & INP_CONTROLOPTS;
 		if (oinp->in6p_outputopts)
 			inp->in6p_outputopts =
 			    ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT);
 		inp->in6p_hops = oinp->in6p_hops;
 	}
 
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		struct sockaddr_in6 sin6;
 
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_len = sizeof(sin6);
 		sin6.sin6_addr = sc->sc_inc.inc6_faddr;
 		sin6.sin6_port = sc->sc_inc.inc_fport;
 		sin6.sin6_flowinfo = sin6.sin6_scope_id = 0;
 		INP_HASH_WLOCK(&V_tcbinfo);
 		error = in6_pcbconnect(inp, &sin6, thread0.td_ucred, false);
 		INP_HASH_WUNLOCK(&V_tcbinfo);
 		if (error != 0)
 			goto abort;
 		/* Override flowlabel from in6_pcbconnect. */
 		inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
 		inp->inp_flow |= sc->sc_flowlabel;
 	}
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		struct sockaddr_in sin;
 
 		inp->inp_options = (m) ? ip_srcroute(m) : NULL;
 
 		if (inp->inp_options == NULL) {
 			inp->inp_options = sc->sc_ipopts;
 			sc->sc_ipopts = NULL;
 		}
 
 		sin.sin_family = AF_INET;
 		sin.sin_len = sizeof(sin);
 		sin.sin_addr = sc->sc_inc.inc_faddr;
 		sin.sin_port = sc->sc_inc.inc_fport;
 		bzero((caddr_t)sin.sin_zero, sizeof(sin.sin_zero));
 		INP_HASH_WLOCK(&V_tcbinfo);
 		error = in_pcbconnect(inp, &sin, thread0.td_ucred, false);
 		INP_HASH_WUNLOCK(&V_tcbinfo);
 		if (error != 0)
 			goto abort;
 	}
 #endif /* INET */
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/* Copy old policy into new socket's. */
 	if (ipsec_copy_pcbpolicy(sotoinpcb(lso), inp) != 0)
 		printf("syncache_socket: could not copy policy\n");
 #endif
 	tp->t_state = TCPS_SYN_RECEIVED;
 	tp->iss = sc->sc_iss;
 	tp->irs = sc->sc_irs;
 	tp->t_port = sc->sc_port;
 	tcp_rcvseqinit(tp);
 	tcp_sendseqinit(tp);
 	blk = sototcpcb(lso)->t_fb;
 	if (V_functions_inherit_listen_socket_stack && blk != tp->t_fb) {
 		/*
 		 * Our parents t_fb was not the default,
 		 * we need to release our ref on tp->t_fb and
 		 * pickup one on the new entry.
 		 */
 		struct tcp_function_block *rblk;
 		void *ptr = NULL;
 
 		rblk = find_and_ref_tcp_fb(blk);
 		KASSERT(rblk != NULL,
 		    ("cannot find blk %p out of syncache?", blk));
 
 		if (rblk->tfb_tcp_fb_init == NULL ||
 		    (*rblk->tfb_tcp_fb_init)(tp, &ptr) == 0) {
 			/* Release the old stack */
 			if (tp->t_fb->tfb_tcp_fb_fini != NULL)
 				(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
 			refcount_release(&tp->t_fb->tfb_refcnt);
 			/* Now set in all the pointers */
 			tp->t_fb = rblk;
 			tp->t_fb_ptr = ptr;
 		} else {
 			/*
 			 * Initialization failed. Release the reference count on
 			 * the looked up default stack.
 			 */
 			refcount_release(&rblk->tfb_refcnt);
 		}
 	}
 	tp->snd_wl1 = sc->sc_irs;
 	tp->snd_max = tp->iss + 1;
 	tp->snd_nxt = tp->iss + 1;
 	tp->rcv_up = sc->sc_irs + 1;
 	tp->rcv_wnd = sc->sc_wnd;
 	tp->rcv_adv += tp->rcv_wnd;
 	tp->last_ack_sent = tp->rcv_nxt;
 
 	tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY);
 	if (sc->sc_flags & SCF_NOOPT)
 		tp->t_flags |= TF_NOOPT;
 	else {
 		if (sc->sc_flags & SCF_WINSCALE) {
 			tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
 			tp->snd_scale = sc->sc_requested_s_scale;
 			tp->request_r_scale = sc->sc_requested_r_scale;
 		}
 		if (sc->sc_flags & SCF_TIMESTAMP) {
 			tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
 			tp->ts_recent = sc->sc_tsreflect;
 			tp->ts_recent_age = tcp_ts_getticks();
 			tp->ts_offset = sc->sc_tsoff;
 		}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (sc->sc_flags & SCF_SIGNATURE)
 			tp->t_flags |= TF_SIGNATURE;
 #endif
 		if (sc->sc_flags & SCF_SACK)
 			tp->t_flags |= TF_SACK_PERMIT;
 	}
 
 	tcp_ecn_syncache_socket(tp, sc);
 
 	/*
 	 * Set up MSS and get cached values from tcp_hostcache.
 	 * This might overwrite some of the defaults we just set.
 	 */
 	tcp_mss(tp, sc->sc_peer_mss);
 
 	/*
 	 * If the SYN,ACK was retransmitted, indicate that CWND to be
 	 * limited to one segment in cc_conn_init().
 	 * NB: sc_rxmits counts all SYN,ACK transmits, not just retransmits.
 	 */
 	if (sc->sc_rxmits > 1)
 		tp->snd_cwnd = 1;
 
 #ifdef TCP_OFFLOAD
 	/*
 	 * Allow a TOE driver to install its hooks.  Note that we hold the
 	 * pcbinfo lock too and that prevents tcp_usr_accept from accepting a
 	 * new connection before the TOE driver has done its thing.
 	 */
 	if (ADDED_BY_TOE(sc)) {
 		struct toedev *tod = sc->sc_tod;
 
 		tod->tod_offload_socket(tod, sc->sc_todctx, so);
 	}
 #endif
 #ifdef TCP_BLACKBOX
 	/*
 	 * Inherit the log state from the listening socket, if
 	 * - the log state of the listening socket is not off and
 	 * - the listening socket was not auto selected from all sessions and
 	 * - a log id is not set on the listening socket.
 	 * This avoids inheriting a log state which was automatically set.
 	 */
 	if ((tcp_get_bblog_state(sototcpcb(lso)) != TCP_LOG_STATE_OFF) &&
 	    ((sototcpcb(lso)->t_flags2 & TF2_LOG_AUTO) == 0) &&
 	    (sototcpcb(lso)->t_lib == NULL)) {
 		tcp_log_state_change(tp, tcp_get_bblog_state(sototcpcb(lso)));
 	}
 #endif
 	/*
 	 * Copy and activate timers.
 	 */
 	tp->t_maxunacktime = sototcpcb(lso)->t_maxunacktime;
 	tp->t_keepinit = sototcpcb(lso)->t_keepinit;
 	tp->t_keepidle = sototcpcb(lso)->t_keepidle;
 	tp->t_keepintvl = sototcpcb(lso)->t_keepintvl;
 	tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
 	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 
 	TCPSTAT_INC(tcps_accepts);
 	TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, TCPS_LISTEN);
 
 	if (!solisten_enqueue(so, SS_ISCONNECTED))
 		tp->t_flags |= TF_SONOTCONN;
 
 	return (so);
 
 allocfail:
 	/*
 	 * Drop the connection; we will either send a RST or have the peer
 	 * retransmit its SYN again after its RTO and try again.
 	 */
 	if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 		log(LOG_DEBUG, "%s; %s: Socket create failed "
 		    "due to limits or memory shortage\n",
 		    s, __func__);
 		free(s, M_TCPLOG);
 	}
 	TCPSTAT_INC(tcps_listendrop);
 	return (NULL);
 
 abort:
-	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	sodealloc(so);
 	if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 		log(LOG_DEBUG, "%s; %s: in%s_pcbconnect failed with error %i\n",
 		    s, __func__, (sc->sc_inc.inc_flags & INC_ISIPV6) ? "6" : "",
 		    error);
 		free(s, M_TCPLOG);
 	}
 	TCPSTAT_INC(tcps_listendrop);
 	return (NULL);
 }
 
 /*
  * This function gets called when we receive an ACK for a
  * socket in the LISTEN state.  We look up the connection
  * in the syncache, and if its there, we pull it out of
  * the cache and turn it into a full-blown connection in
  * the SYN-RECEIVED state.
  *
  * On syncache_socket() success the newly created socket
  * has its underlying inp locked.
  */
 int
 syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct socket **lsop, struct mbuf *m, uint16_t port)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 	struct syncache scs;
 	char *s;
 	bool locked;
 
 	NET_EPOCH_ASSERT();
 	KASSERT((tcp_get_flags(th) & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
 	    ("%s: can handle only ACK", __func__));
 
 	if (syncache_cookiesonly()) {
 		sc = NULL;
 		sch = syncache_hashbucket(inc);
 		locked = false;
 	} else {
 		sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 		locked = true;
 		SCH_LOCK_ASSERT(sch);
 	}
 
 #ifdef INVARIANTS
 	/*
 	 * Test code for syncookies comparing the syncache stored
 	 * values with the reconstructed values from the cookie.
 	 */
 	if (sc != NULL)
 		syncookie_cmp(inc, sch, sc, th, to, *lsop, port);
 #endif
 
 	if (sc == NULL) {
 		/*
 		 * There is no syncache entry, so see if this ACK is
 		 * a returning syncookie.  To do this, first:
 		 *  A. Check if syncookies are used in case of syncache
 		 *     overflows
 		 *  B. See if this socket has had a syncache entry dropped in
 		 *     the recent past. We don't want to accept a bogus
 		 *     syncookie if we've never received a SYN or accept it
 		 *     twice.
 		 *  C. check that the syncookie is valid.  If it is, then
 		 *     cobble up a fake syncache entry, and return.
 		 */
 		if (locked && !V_tcp_syncookies) {
 			SCH_UNLOCK(sch);
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
 				    "segment rejected (syncookies disabled)\n",
 				    s, __func__);
 			goto failed;
 		}
 		if (locked && !V_tcp_syncookiesonly &&
 		    sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) {
 			SCH_UNLOCK(sch);
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
 				    "segment rejected (no syncache entry)\n",
 				    s, __func__);
 			goto failed;
 		}
 		bzero(&scs, sizeof(scs));
 		sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop, port);
 		if (locked)
 			SCH_UNLOCK(sch);
 		if (sc == NULL) {
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Segment failed "
 				    "SYNCOOKIE authentication, segment rejected "
 				    "(probably spoofed)\n", s, __func__);
 			goto failed;
 		}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		/* If received ACK has MD5 signature, check it. */
 		if ((to->to_flags & TOF_SIGNATURE) != 0 &&
 		    (!TCPMD5_ENABLED() ||
 		    TCPMD5_INPUT(m, th, to->to_signature) != 0)) {
 			/* Drop the ACK. */
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Segment rejected, "
 				    "MD5 signature doesn't match.\n",
 				    s, __func__);
 				free(s, M_TCPLOG);
 			}
 			TCPSTAT_INC(tcps_sig_err_sigopt);
 			return (-1); /* Do not send RST */
 		}
 #endif /* TCP_SIGNATURE */
 		TCPSTATES_INC(TCPS_SYN_RECEIVED);
 	} else {
 		if (sc->sc_port != port) {
 			SCH_UNLOCK(sch);
 			return (0);
 		}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		/*
 		 * If listening socket requested TCP digests, check that
 		 * received ACK has signature and it is correct.
 		 * If not, drop the ACK and leave sc entry in th cache,
 		 * because SYN was received with correct signature.
 		 */
 		if (sc->sc_flags & SCF_SIGNATURE) {
 			if ((to->to_flags & TOF_SIGNATURE) == 0) {
 				/* No signature */
 				TCPSTAT_INC(tcps_sig_err_nosigopt);
 				SCH_UNLOCK(sch);
 				if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 					log(LOG_DEBUG, "%s; %s: Segment "
 					    "rejected, MD5 signature wasn't "
 					    "provided.\n", s, __func__);
 					free(s, M_TCPLOG);
 				}
 				return (-1); /* Do not send RST */
 			}
 			if (!TCPMD5_ENABLED() ||
 			    TCPMD5_INPUT(m, th, to->to_signature) != 0) {
 				/* Doesn't match or no SA */
 				SCH_UNLOCK(sch);
 				if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 					log(LOG_DEBUG, "%s; %s: Segment "
 					    "rejected, MD5 signature doesn't "
 					    "match.\n", s, __func__);
 					free(s, M_TCPLOG);
 				}
 				return (-1); /* Do not send RST */
 			}
 		}
 #endif /* TCP_SIGNATURE */
 
 		/*
 		 * RFC 7323 PAWS: If we have a timestamp on this segment and
 		 * it's less than ts_recent, drop it.
 		 * XXXMT: RFC 7323 also requires to send an ACK.
 		 *        In tcp_input.c this is only done for TCP segments
 		 *        with user data, so be consistent here and just drop
 		 *        the segment.
 		 */
 		if (sc->sc_flags & SCF_TIMESTAMP && to->to_flags & TOF_TS &&
 		    TSTMP_LT(to->to_tsval, sc->sc_tsreflect)) {
 			SCH_UNLOCK(sch);
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 				log(LOG_DEBUG,
 				    "%s; %s: SEG.TSval %u < TS.Recent %u, "
 				    "segment dropped\n", s, __func__,
 				    to->to_tsval, sc->sc_tsreflect);
 				free(s, M_TCPLOG);
 			}
 			return (-1);  /* Do not send RST */
 		}
 
 		/*
 		 * If timestamps were not negotiated during SYN/ACK and a
 		 * segment with a timestamp is received, ignore the
 		 * timestamp and process the packet normally.
 		 * See section 3.2 of RFC 7323.
 		 */
 		if (!(sc->sc_flags & SCF_TIMESTAMP) &&
 		    (to->to_flags & TOF_TS)) {
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Timestamp not "
 				    "expected, segment processed normally\n",
 				    s, __func__);
 				free(s, M_TCPLOG);
 				s = NULL;
 			}
 		}
 
 		/*
 		 * If timestamps were negotiated during SYN/ACK and a
 		 * segment without a timestamp is received, silently drop
 		 * the segment, unless the missing timestamps are tolerated.
 		 * See section 3.2 of RFC 7323.
 		 */
 		if ((sc->sc_flags & SCF_TIMESTAMP) &&
 		    !(to->to_flags & TOF_TS)) {
 			if (V_tcp_tolerate_missing_ts) {
 				if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 					log(LOG_DEBUG,
 					    "%s; %s: Timestamp missing, "
 					    "segment processed normally\n",
 					    s, __func__);
 					free(s, M_TCPLOG);
 				}
 			} else {
 				SCH_UNLOCK(sch);
 				if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 					log(LOG_DEBUG,
 					    "%s; %s: Timestamp missing, "
 					    "segment silently dropped\n",
 					    s, __func__);
 					free(s, M_TCPLOG);
 				}
 				return (-1);  /* Do not send RST */
 			}
 		}
 		TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 		sch->sch_length--;
 #ifdef TCP_OFFLOAD
 		if (ADDED_BY_TOE(sc)) {
 			struct toedev *tod = sc->sc_tod;
 
 			tod->tod_syncache_removed(tod, sc->sc_todctx);
 		}
 #endif
 		SCH_UNLOCK(sch);
 	}
 
 	/*
 	 * Segment validation:
 	 * ACK must match our initial sequence number + 1 (the SYN|ACK).
 	 */
 	if (th->th_ack != sc->sc_iss + 1) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
 			    "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
 		goto failed;
 	}
 
 	/*
 	 * The SEQ must fall in the window starting at the received
 	 * initial receive sequence number + 1 (the SYN).
 	 */
 	if (SEQ_LEQ(th->th_seq, sc->sc_irs) ||
 	    SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
 			    "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
 		goto failed;
 	}
 
 	*lsop = syncache_socket(sc, *lsop, m);
 
 	if (__predict_false(*lsop == NULL)) {
 		TCPSTAT_INC(tcps_sc_aborted);
 		TCPSTATES_DEC(TCPS_SYN_RECEIVED);
 	} else
 		TCPSTAT_INC(tcps_sc_completed);
 
 /* how do we find the inp for the new socket? */
 	if (sc != &scs)
 		syncache_free(sc);
 	return (1);
 failed:
 	if (sc != NULL) {
 		TCPSTATES_DEC(TCPS_SYN_RECEIVED);
 		if (sc != &scs)
 			syncache_free(sc);
 	}
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	*lsop = NULL;
 	return (0);
 }
 
 static struct socket *
 syncache_tfo_expand(struct syncache *sc, struct socket *lso, struct mbuf *m,
     uint64_t response_cookie)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	unsigned int *pending_counter;
 	struct socket *so;
 
 	NET_EPOCH_ASSERT();
 
 	pending_counter = intotcpcb(sotoinpcb(lso))->t_tfo_pending;
 	so = syncache_socket(sc, lso, m);
 	if (so == NULL) {
 		TCPSTAT_INC(tcps_sc_aborted);
 		atomic_subtract_int(pending_counter, 1);
 	} else {
 		soisconnected(so);
 		inp = sotoinpcb(so);
 		tp = intotcpcb(inp);
 		tp->t_flags |= TF_FASTOPEN;
 		tp->t_tfo_cookie.server = response_cookie;
 		tp->snd_max = tp->iss;
 		tp->snd_nxt = tp->iss;
 		tp->t_tfo_pending = pending_counter;
 		TCPSTATES_INC(TCPS_SYN_RECEIVED);
 		TCPSTAT_INC(tcps_sc_completed);
 	}
 
 	return (so);
 }
 
 /*
  * Given a LISTEN socket and an inbound SYN request, add
  * this to the syn cache, and send back a segment:
  *	<SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
  * to the source.
  *
  * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
  * Doing so would require that we hold onto the data and deliver it
  * to the application.  However, if we are the target of a SYN-flood
  * DoS attack, an attacker could send data which would eventually
  * consume all available buffer space if it were ACKed.  By not ACKing
  * the data, we avoid this DoS scenario.
  *
  * The exception to the above is when a SYN with a valid TCP Fast Open (TFO)
  * cookie is processed and a new socket is created.  In this case, any data
  * accompanying the SYN will be queued to the socket by tcp_input() and will
  * be ACKed either when the application sends response data or the delayed
  * ACK timer expires, whichever comes first.
  */
 struct socket *
 syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct inpcb *inp, struct socket *so, struct mbuf *m, void *tod,
     void *todctx, uint8_t iptos, uint16_t port)
 {
 	struct tcpcb *tp;
 	struct socket *rv = NULL;
 	struct syncache *sc = NULL;
 	struct syncache_head *sch;
 	struct mbuf *ipopts = NULL;
 	u_int ltflags;
 	int win, ip_ttl, ip_tos;
 	char *s;
 #ifdef INET6
 	int autoflowlabel = 0;
 #endif
 #ifdef MAC
 	struct label *maclabel;
 #endif
 	struct syncache scs;
 	struct ucred *cred;
 	uint64_t tfo_response_cookie;
 	unsigned int *tfo_pending = NULL;
 	int tfo_cookie_valid = 0;
 	int tfo_response_cookie_valid = 0;
 	bool locked;
 
 	INP_RLOCK_ASSERT(inp);			/* listen socket */
 	KASSERT((tcp_get_flags(th) & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
 	    ("%s: unexpected tcp flags", __func__));
 
 	/*
 	 * Combine all so/tp operations very early to drop the INP lock as
 	 * soon as possible.
 	 */
 	KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so));
 	tp = sototcpcb(so);
 	cred = V_tcp_syncache.see_other ? NULL : crhold(so->so_cred);
 
 #ifdef INET6
 	if (inc->inc_flags & INC_ISIPV6) {
 		if (inp->inp_flags & IN6P_AUTOFLOWLABEL) {
 			autoflowlabel = 1;
 		}
 		ip_ttl = in6_selecthlim(inp, NULL);
 		if ((inp->in6p_outputopts == NULL) ||
 		    (inp->in6p_outputopts->ip6po_tclass == -1)) {
 			ip_tos = 0;
 		} else {
 			ip_tos = inp->in6p_outputopts->ip6po_tclass;
 		}
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		ip_ttl = inp->inp_ip_ttl;
 		ip_tos = inp->inp_ip_tos;
 	}
 #endif
 	win = so->sol_sbrcv_hiwat;
 	ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
 
 	if (V_tcp_fastopen_server_enable && IS_FASTOPEN(tp->t_flags) &&
 	    (tp->t_tfo_pending != NULL) &&
 	    (to->to_flags & TOF_FASTOPEN)) {
 		/*
 		 * Limit the number of pending TFO connections to
 		 * approximately half of the queue limit.  This prevents TFO
 		 * SYN floods from starving the service by filling the
 		 * listen queue with bogus TFO connections.
 		 */
 		if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
 		    (so->sol_qlimit / 2)) {
 			int result;
 
 			result = tcp_fastopen_check_cookie(inc,
 			    to->to_tfo_cookie, to->to_tfo_len,
 			    &tfo_response_cookie);
 			tfo_cookie_valid = (result > 0);
 			tfo_response_cookie_valid = (result >= 0);
 		}
 
 		/*
 		 * Remember the TFO pending counter as it will have to be
 		 * decremented below if we don't make it to syncache_tfo_expand().
 		 */
 		tfo_pending = tp->t_tfo_pending;
 	}
 
 #ifdef MAC
 	if (mac_syncache_init(&maclabel) != 0) {
 		INP_RUNLOCK(inp);
 		goto done;
 	} else
 		mac_syncache_create(maclabel, inp);
 #endif
 	if (!tfo_cookie_valid)
 		INP_RUNLOCK(inp);
 
 	/*
 	 * Remember the IP options, if any.
 	 */
 #ifdef INET6
 	if (!(inc->inc_flags & INC_ISIPV6))
 #endif
 #ifdef INET
 		ipopts = (m) ? ip_srcroute(m) : NULL;
 #else
 		ipopts = NULL;
 #endif
 
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	/*
 	 * When the socket is TCP-MD5 enabled check that,
 	 *  - a signed packet is valid
 	 *  - a non-signed packet does not have a security association
 	 *
 	 *  If a signed packet fails validation or a non-signed packet has a
 	 *  security association, the packet will be dropped.
 	 */
 	if (ltflags & TF_SIGNATURE) {
 		if (to->to_flags & TOF_SIGNATURE) {
 			if (!TCPMD5_ENABLED() ||
 			    TCPMD5_INPUT(m, th, to->to_signature) != 0)
 				goto done;
 		} else {
 			if (TCPMD5_ENABLED() &&
 			    TCPMD5_INPUT(m, NULL, NULL) != ENOENT)
 				goto done;
 		}
 	} else if (to->to_flags & TOF_SIGNATURE)
 		goto done;
 #endif	/* TCP_SIGNATURE */
 	/*
 	 * See if we already have an entry for this connection.
 	 * If we do, resend the SYN,ACK, and reset the retransmit timer.
 	 *
 	 * XXX: should the syncache be re-initialized with the contents
 	 * of the new SYN here (which may have different options?)
 	 *
 	 * XXX: We do not check the sequence number to see if this is a
 	 * real retransmit or a new connection attempt.  The question is
 	 * how to handle such a case; either ignore it as spoofed, or
 	 * drop the current entry and create a new one?
 	 */
 	if (syncache_cookiesonly()) {
 		sc = NULL;
 		sch = syncache_hashbucket(inc);
 		locked = false;
 	} else {
 		sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 		locked = true;
 		SCH_LOCK_ASSERT(sch);
 	}
 	if (sc != NULL) {
 		if (tfo_cookie_valid)
 			INP_RUNLOCK(inp);
 		TCPSTAT_INC(tcps_sc_dupsyn);
 		if (ipopts) {
 			/*
 			 * If we were remembering a previous source route,
 			 * forget it and use the new one we've been given.
 			 */
 			if (sc->sc_ipopts)
 				(void) m_free(sc->sc_ipopts);
 			sc->sc_ipopts = ipopts;
 		}
 		/*
 		 * Update timestamp if present.
 		 */
 		if ((sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS))
 			sc->sc_tsreflect = to->to_tsval;
 		else
 			sc->sc_flags &= ~SCF_TIMESTAMP;
 		/*
 		 * Adjust ECN response if needed, e.g. different
 		 * IP ECN field, or a fallback by the remote host.
 		 */
 		if (sc->sc_flags & SCF_ECN_MASK) {
 			sc->sc_flags &= ~SCF_ECN_MASK;
 			sc->sc_flags = tcp_ecn_syncache_add(tcp_get_flags(th), iptos);
 		}
 #ifdef MAC
 		/*
 		 * Since we have already unconditionally allocated label
 		 * storage, free it up.  The syncache entry will already
 		 * have an initialized label we can use.
 		 */
 		mac_syncache_destroy(&maclabel);
 #endif
 		TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
 		/* Retransmit SYN|ACK and reset retransmit count. */
 		if ((s = tcp_log_addrs(&sc->sc_inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Received duplicate SYN, "
 			    "resetting timer and retransmitting SYN|ACK\n",
 			    s, __func__);
 			free(s, M_TCPLOG);
 		}
 		if (syncache_respond(sc, m, TH_SYN|TH_ACK) == 0) {
 			sc->sc_rxmits = 0;
 			syncache_timeout(sc, sch, 1);
 			TCPSTAT_INC(tcps_sndacks);
 			TCPSTAT_INC(tcps_sndtotal);
 		}
 		SCH_UNLOCK(sch);
 		goto donenoprobe;
 	}
 
 	if (tfo_cookie_valid) {
 		bzero(&scs, sizeof(scs));
 		sc = &scs;
 		goto skip_alloc;
 	}
 
 	/*
 	 * Skip allocating a syncache entry if we are just going to discard
 	 * it later.
 	 */
 	if (!locked) {
 		bzero(&scs, sizeof(scs));
 		sc = &scs;
 	} else
 		sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
 	if (sc == NULL) {
 		/*
 		 * The zone allocator couldn't provide more entries.
 		 * Treat this as if the cache was full; drop the oldest
 		 * entry and insert the new one.
 		 */
 		TCPSTAT_INC(tcps_sc_zonefail);
 		if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL) {
 			sch->sch_last_overflow = time_uptime;
 			syncache_drop(sc, sch);
 			syncache_pause(inc);
 		}
 		sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
 		if (sc == NULL) {
 			if (V_tcp_syncookies) {
 				bzero(&scs, sizeof(scs));
 				sc = &scs;
 			} else {
 				KASSERT(locked,
 				    ("%s: bucket unexpectedly unlocked",
 				    __func__));
 				SCH_UNLOCK(sch);
 				if (ipopts)
 					(void) m_free(ipopts);
 				goto done;
 			}
 		}
 	}
 
 skip_alloc:
 	if (!tfo_cookie_valid && tfo_response_cookie_valid)
 		sc->sc_tfo_cookie = &tfo_response_cookie;
 
 	/*
 	 * Fill in the syncache values.
 	 */
 #ifdef MAC
 	sc->sc_label = maclabel;
 #endif
 	sc->sc_cred = cred;
 	sc->sc_port = port;
 	cred = NULL;
 	sc->sc_ipopts = ipopts;
 	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
 	sc->sc_ip_tos = ip_tos;
 	sc->sc_ip_ttl = ip_ttl;
 #ifdef TCP_OFFLOAD
 	sc->sc_tod = tod;
 	sc->sc_todctx = todctx;
 #endif
 	sc->sc_irs = th->th_seq;
 	sc->sc_flags = 0;
 	sc->sc_flowlabel = 0;
 
 	/*
 	 * Initial receive window: clip sbspace to [0 .. TCP_MAXWIN].
 	 * win was derived from socket earlier in the function.
 	 */
 	win = imax(win, 0);
 	win = imin(win, TCP_MAXWIN);
 	sc->sc_wnd = win;
 
 	if (V_tcp_do_rfc1323 &&
 	    !(ltflags & TF_NOOPT)) {
 		/*
 		 * A timestamp received in a SYN makes
 		 * it ok to send timestamp requests and replies.
 		 */
 		if ((to->to_flags & TOF_TS) && (V_tcp_do_rfc1323 != 2)) {
 			sc->sc_tsreflect = to->to_tsval;
 			sc->sc_flags |= SCF_TIMESTAMP;
 			sc->sc_tsoff = tcp_new_ts_offset(inc);
 		}
 		if ((to->to_flags & TOF_SCALE) && (V_tcp_do_rfc1323 != 3)) {
 			int wscale = 0;
 
 			/*
 			 * Pick the smallest possible scaling factor that
 			 * will still allow us to scale up to sb_max, aka
 			 * kern.ipc.maxsockbuf.
 			 *
 			 * We do this because there are broken firewalls that
 			 * will corrupt the window scale option, leading to
 			 * the other endpoint believing that our advertised
 			 * window is unscaled.  At scale factors larger than
 			 * 5 the unscaled window will drop below 1500 bytes,
 			 * leading to serious problems when traversing these
 			 * broken firewalls.
 			 *
 			 * With the default maxsockbuf of 256K, a scale factor
 			 * of 3 will be chosen by this algorithm.  Those who
 			 * choose a larger maxsockbuf should watch out
 			 * for the compatibility problems mentioned above.
 			 *
 			 * RFC1323: The Window field in a SYN (i.e., a <SYN>
 			 * or <SYN,ACK>) segment itself is never scaled.
 			 */
 			while (wscale < TCP_MAX_WINSHIFT &&
 			    (TCP_MAXWIN << wscale) < sb_max)
 				wscale++;
 			sc->sc_requested_r_scale = wscale;
 			sc->sc_requested_s_scale = to->to_wscale;
 			sc->sc_flags |= SCF_WINSCALE;
 		}
 	}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	/*
 	 * If incoming packet has an MD5 signature, flag this in the
 	 * syncache so that syncache_respond() will do the right thing
 	 * with the SYN+ACK.
 	 */
 	if (to->to_flags & TOF_SIGNATURE)
 		sc->sc_flags |= SCF_SIGNATURE;
 #endif	/* TCP_SIGNATURE */
 	if (to->to_flags & TOF_SACKPERM)
 		sc->sc_flags |= SCF_SACK;
 	if (to->to_flags & TOF_MSS)
 		sc->sc_peer_mss = to->to_mss;	/* peer mss may be zero */
 	if (ltflags & TF_NOOPT)
 		sc->sc_flags |= SCF_NOOPT;
 	/* ECN Handshake */
 	if (V_tcp_do_ecn && (tp->t_flags2 & TF2_CANNOT_DO_ECN) == 0)
 		sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos);
 
 	if (V_tcp_syncookies)
 		sc->sc_iss = syncookie_generate(sch, sc);
 	else
 		sc->sc_iss = arc4random();
 #ifdef INET6
 	if (autoflowlabel) {
 		if (V_tcp_syncookies)
 			sc->sc_flowlabel = sc->sc_iss;
 		else
 			sc->sc_flowlabel = ip6_randomflowlabel();
 		sc->sc_flowlabel = htonl(sc->sc_flowlabel) & IPV6_FLOWLABEL_MASK;
 	}
 #endif
 	if (locked)
 		SCH_UNLOCK(sch);
 
 	if (tfo_cookie_valid) {
 		rv = syncache_tfo_expand(sc, so, m, tfo_response_cookie);
 		/* INP_RUNLOCK(inp) will be performed by the caller */
 		goto tfo_expanded;
 	}
 
 	TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
 	/*
 	 * Do a standard 3-way handshake.
 	 */
 	if (syncache_respond(sc, m, TH_SYN|TH_ACK) == 0) {
 		if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
 			syncache_free(sc);
 		else if (sc != &scs)
 			syncache_insert(sc, sch);   /* locks and unlocks sch */
 		TCPSTAT_INC(tcps_sndacks);
 		TCPSTAT_INC(tcps_sndtotal);
 	} else {
 		if (sc != &scs)
 			syncache_free(sc);
 		TCPSTAT_INC(tcps_sc_dropped);
 	}
 	goto donenoprobe;
 
 done:
 	TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
 donenoprobe:
 	if (m)
 		m_freem(m);
 	/*
 	 * If tfo_pending is not NULL here, then a TFO SYN that did not
 	 * result in a new socket was processed and the associated pending
 	 * counter has not yet been decremented.  All such TFO processing paths
 	 * transit this point.
 	 */
 	if (tfo_pending != NULL)
 		tcp_fastopen_decrement_counter(tfo_pending);
 
 tfo_expanded:
 	if (cred != NULL)
 		crfree(cred);
 #ifdef MAC
 	if (sc == &scs)
 		mac_syncache_destroy(&maclabel);
 #endif
 	return (rv);
 }
 
 /*
  * Send SYN|ACK or ACK to the peer.  Either in response to a peer's segment,
  * i.e. m0 != NULL, or upon 3WHS ACK timeout, i.e. m0 == NULL.
  */
 static int
 syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
 {
 	struct ip *ip = NULL;
 	struct mbuf *m;
 	struct tcphdr *th = NULL;
 	struct udphdr *udp = NULL;
 	int optlen, error = 0;	/* Make compiler happy */
 	u_int16_t hlen, tlen, mssopt, ulen;
 	struct tcpopt to;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 #endif
 
 	NET_EPOCH_ASSERT();
 
 	hlen =
 #ifdef INET6
 	       (sc->sc_inc.inc_flags & INC_ISIPV6) ? sizeof(struct ip6_hdr) :
 #endif
 		sizeof(struct ip);
 	tlen = hlen + sizeof(struct tcphdr);
 	if (sc->sc_port) {
 		tlen += sizeof(struct udphdr);
 	}
 	/* Determine MSS we advertize to other end of connection. */
 	mssopt = tcp_mssopt(&sc->sc_inc);
 	if (sc->sc_port)
 		mssopt -= V_tcp_udp_tunneling_overhead;
 	mssopt = max(mssopt, V_tcp_minmss);
 
 	/* XXX: Assume that the entire packet will fit in a header mbuf. */
 	KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
 	    ("syncache: mbuf too small: hlen %u, sc_port %u, max_linkhdr %d + "
 	    "tlen %d + TCP_MAXOLEN %ju <= MHLEN %d", hlen, sc->sc_port,
 	    max_linkhdr, tlen, (uintmax_t)TCP_MAXOLEN, MHLEN));
 
 	/* Create the IP+TCP header from scratch. */
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 #ifdef MAC
 	mac_syncache_create_mbuf(sc->sc_label, m);
 #endif
 	m->m_data += max_linkhdr;
 	m->m_len = tlen;
 	m->m_pkthdr.len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 
 #ifdef INET6
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_src = sc->sc_inc.inc6_laddr;
 		ip6->ip6_dst = sc->sc_inc.inc6_faddr;
 		ip6->ip6_plen = htons(tlen - hlen);
 		/* ip6_hlim is set after checksum */
 		/* Zero out traffic class and flow label. */
 		ip6->ip6_flow &= ~IPV6_FLOWINFO_MASK;
 		ip6->ip6_flow |= sc->sc_flowlabel;
 		if (sc->sc_port != 0) {
 			ip6->ip6_nxt = IPPROTO_UDP;
 			udp = (struct udphdr *)(ip6 + 1);
 			udp->uh_sport = htons(V_tcp_udp_tunneling_port);
 			udp->uh_dport = sc->sc_port;
 			ulen = (tlen - sizeof(struct ip6_hdr));
 			th = (struct tcphdr *)(udp + 1);
 		} else {
 			ip6->ip6_nxt = IPPROTO_TCP;
 			th = (struct tcphdr *)(ip6 + 1);
 		}
 		ip6->ip6_flow |= htonl(sc->sc_ip_tos << IPV6_FLOWLABEL_LEN);
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		ip = mtod(m, struct ip *);
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(struct ip) >> 2;
 		ip->ip_len = htons(tlen);
 		ip->ip_id = 0;
 		ip->ip_off = 0;
 		ip->ip_sum = 0;
 		ip->ip_src = sc->sc_inc.inc_laddr;
 		ip->ip_dst = sc->sc_inc.inc_faddr;
 		ip->ip_ttl = sc->sc_ip_ttl;
 		ip->ip_tos = sc->sc_ip_tos;
 
 		/*
 		 * See if we should do MTU discovery.  Route lookups are
 		 * expensive, so we will only unset the DF bit if:
 		 *
 		 *	1) path_mtu_discovery is disabled
 		 *	2) the SCF_UNREACH flag has been set
 		 */
 		if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
 		       ip->ip_off |= htons(IP_DF);
 		if (sc->sc_port == 0) {
 			ip->ip_p = IPPROTO_TCP;
 			th = (struct tcphdr *)(ip + 1);
 		} else {
 			ip->ip_p = IPPROTO_UDP;
 			udp = (struct udphdr *)(ip + 1);
 			udp->uh_sport = htons(V_tcp_udp_tunneling_port);
 			udp->uh_dport = sc->sc_port;
 			ulen = (tlen - sizeof(struct ip));
 			th = (struct tcphdr *)(udp + 1);
 		}
 	}
 #endif /* INET */
 	th->th_sport = sc->sc_inc.inc_lport;
 	th->th_dport = sc->sc_inc.inc_fport;
 
 	if (flags & TH_SYN)
 		th->th_seq = htonl(sc->sc_iss);
 	else
 		th->th_seq = htonl(sc->sc_iss + 1);
 	th->th_ack = htonl(sc->sc_irs + 1);
 	th->th_off = sizeof(struct tcphdr) >> 2;
 	th->th_win = htons(sc->sc_wnd);
 	th->th_urp = 0;
 
 	flags = tcp_ecn_syncache_respond(flags, sc);
 	tcp_set_flags(th, flags);
 
 	/* Tack on the TCP options. */
 	if ((sc->sc_flags & SCF_NOOPT) == 0) {
 		to.to_flags = 0;
 
 		if (flags & TH_SYN) {
 			to.to_mss = mssopt;
 			to.to_flags = TOF_MSS;
 			if (sc->sc_flags & SCF_WINSCALE) {
 				to.to_wscale = sc->sc_requested_r_scale;
 				to.to_flags |= TOF_SCALE;
 			}
 			if (sc->sc_flags & SCF_SACK)
 				to.to_flags |= TOF_SACKPERM;
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 			if (sc->sc_flags & SCF_SIGNATURE)
 				to.to_flags |= TOF_SIGNATURE;
 #endif
 			if (sc->sc_tfo_cookie) {
 				to.to_flags |= TOF_FASTOPEN;
 				to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
 				to.to_tfo_cookie = sc->sc_tfo_cookie;
 				/* don't send cookie again when retransmitting response */
 				sc->sc_tfo_cookie = NULL;
 			}
 		}
 		if (sc->sc_flags & SCF_TIMESTAMP) {
 			to.to_tsval = sc->sc_tsoff + tcp_ts_getticks();
 			to.to_tsecr = sc->sc_tsreflect;
 			to.to_flags |= TOF_TS;
 		}
 		optlen = tcp_addoptions(&to, (u_char *)(th + 1));
 
 		/* Adjust headers by option size. */
 		th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
 		m->m_len += optlen;
 		m->m_pkthdr.len += optlen;
 #ifdef INET6
 		if (sc->sc_inc.inc_flags & INC_ISIPV6)
 			ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
 		else
 #endif
 			ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (sc->sc_flags & SCF_SIGNATURE) {
 			KASSERT(to.to_flags & TOF_SIGNATURE,
 			    ("tcp_addoptions() didn't set tcp_signature"));
 
 			/* NOTE: to.to_signature is inside of mbuf */
 			if (!TCPMD5_ENABLED() ||
 			    TCPMD5_OUTPUT(m, th, to.to_signature) != 0) {
 				m_freem(m);
 				return (EACCES);
 			}
 		}
 #endif
 	} else
 		optlen = 0;
 
 	if (udp) {
 		ulen += optlen;
 		udp->uh_ulen = htons(ulen);
 	}
 	M_SETFIB(m, sc->sc_inc.inc_fibnum);
 	/*
 	 * If we have peer's SYN and it has a flowid, then let's assign it to
 	 * our SYN|ACK.  ip6_output() and ip_output() will not assign flowid
 	 * to SYN|ACK due to lack of inp here.
 	 */
 	if (m0 != NULL && M_HASHTYPE_GET(m0) != M_HASHTYPE_NONE) {
 		m->m_pkthdr.flowid = m0->m_pkthdr.flowid;
 		M_HASHTYPE_SET(m, M_HASHTYPE_GET(m0));
 	}
 #ifdef INET6
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		if (sc->sc_port) {
 			m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 			udp->uh_sum = in6_cksum_pseudo(ip6, ulen,
 			      IPPROTO_UDP, 0);
 			th->th_sum = htons(0);
 		} else {
 			m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 			th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
 			    IPPROTO_TCP, 0);
 		}
 		ip6->ip6_hlim = sc->sc_ip_ttl;
 #ifdef TCP_OFFLOAD
 		if (ADDED_BY_TOE(sc)) {
 			struct toedev *tod = sc->sc_tod;
 
 			error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
 
 			return (error);
 		}
 #endif
 		TCP_PROBE5(send, NULL, NULL, ip6, NULL, th);
 		error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		if (sc->sc_port) {
 			m->m_pkthdr.csum_flags = CSUM_UDP;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 			udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
 			      ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
 			th->th_sum = htons(0);
 		} else {
 			m->m_pkthdr.csum_flags = CSUM_TCP;
 			m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 			th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 			    htons(tlen + optlen - hlen + IPPROTO_TCP));
 		}
 #ifdef TCP_OFFLOAD
 		if (ADDED_BY_TOE(sc)) {
 			struct toedev *tod = sc->sc_tod;
 
 			error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
 
 			return (error);
 		}
 #endif
 		TCP_PROBE5(send, NULL, NULL, ip, NULL, th);
 		error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
 	}
 #endif
 	return (error);
 }
 
 /*
  * The purpose of syncookies is to handle spoofed SYN flooding DoS attacks
  * that exceed the capacity of the syncache by avoiding the storage of any
  * of the SYNs we receive.  Syncookies defend against blind SYN flooding
  * attacks where the attacker does not have access to our responses.
  *
  * Syncookies encode and include all necessary information about the
  * connection setup within the SYN|ACK that we send back.  That way we
  * can avoid keeping any local state until the ACK to our SYN|ACK returns
  * (if ever).  Normally the syncache and syncookies are running in parallel
  * with the latter taking over when the former is exhausted.  When matching
  * syncache entry is found the syncookie is ignored.
  *
  * The only reliable information persisting the 3WHS is our initial sequence
  * number ISS of 32 bits.  Syncookies embed a cryptographically sufficient
  * strong hash (MAC) value and a few bits of TCP SYN options in the ISS
  * of our SYN|ACK.  The MAC can be recomputed when the ACK to our SYN|ACK
  * returns and signifies a legitimate connection if it matches the ACK.
  *
  * The available space of 32 bits to store the hash and to encode the SYN
  * option information is very tight and we should have at least 24 bits for
  * the MAC to keep the number of guesses by blind spoofing reasonably high.
  *
  * SYN option information we have to encode to fully restore a connection:
  * MSS: is imporant to chose an optimal segment size to avoid IP level
  *   fragmentation along the path.  The common MSS values can be encoded
  *   in a 3-bit table.  Uncommon values are captured by the next lower value
  *   in the table leading to a slight increase in packetization overhead.
  * WSCALE: is necessary to allow large windows to be used for high delay-
  *   bandwidth product links.  Not scaling the window when it was initially
  *   negotiated is bad for performance as lack of scaling further decreases
  *   the apparent available send window.  We only need to encode the WSCALE
  *   we received from the remote end.  Our end can be recalculated at any
  *   time.  The common WSCALE values can be encoded in a 3-bit table.
  *   Uncommon values are captured by the next lower value in the table
  *   making us under-estimate the available window size halving our
  *   theoretically possible maximum throughput for that connection.
  * SACK: Greatly assists in packet loss recovery and requires 1 bit.
  * TIMESTAMP and SIGNATURE is not encoded because they are permanent options
  *   that are included in all segments on a connection.  We enable them when
  *   the ACK has them.
  *
  * Security of syncookies and attack vectors:
  *
  * The MAC is computed over (faddr||laddr||fport||lport||irs||flags||secmod)
  * together with the gloabl secret to make it unique per connection attempt.
  * Thus any change of any of those parameters results in a different MAC output
  * in an unpredictable way unless a collision is encountered.  24 bits of the
  * MAC are embedded into the ISS.
  *
  * To prevent replay attacks two rotating global secrets are updated with a
  * new random value every 15 seconds.  The life-time of a syncookie is thus
  * 15-30 seconds.
  *
  * Vector 1: Attacking the secret.  This requires finding a weakness in the
  * MAC itself or the way it is used here.  The attacker can do a chosen plain
  * text attack by varying and testing the all parameters under his control.
  * The strength depends on the size and randomness of the secret, and the
  * cryptographic security of the MAC function.  Due to the constant updating
  * of the secret the attacker has at most 29.999 seconds to find the secret
  * and launch spoofed connections.  After that he has to start all over again.
  *
  * Vector 2: Collision attack on the MAC of a single ACK.  With a 24 bit MAC
  * size an average of 4,823 attempts are required for a 50% chance of success
  * to spoof a single syncookie (birthday collision paradox).  However the
  * attacker is blind and doesn't know if one of his attempts succeeded unless
  * he has a side channel to interfere success from.  A single connection setup
  * success average of 90% requires 8,790 packets, 99.99% requires 17,578 packets.
  * This many attempts are required for each one blind spoofed connection.  For
  * every additional spoofed connection he has to launch another N attempts.
  * Thus for a sustained rate 100 spoofed connections per second approximately
  * 1,800,000 packets per second would have to be sent.
  *
  * NB: The MAC function should be fast so that it doesn't become a CPU
  * exhaustion attack vector itself.
  *
  * References:
  *  RFC4987 TCP SYN Flooding Attacks and Common Mitigations
  *  SYN cookies were first proposed by cryptographer Dan J. Bernstein in 1996
  *   http://cr.yp.to/syncookies.html    (overview)
  *   http://cr.yp.to/syncookies/archive (details)
  *
  *
  * Schematic construction of a syncookie enabled Initial Sequence Number:
  *  0        1         2         3
  *  12345678901234567890123456789012
  * |xxxxxxxxxxxxxxxxxxxxxxxxWWWMMMSP|
  *
  *  x 24 MAC (truncated)
  *  W  3 Send Window Scale index
  *  M  3 MSS index
  *  S  1 SACK permitted
  *  P  1 Odd/even secret
  */
 
 /*
  * Distribution and probability of certain MSS values.  Those in between are
  * rounded down to the next lower one.
  * [An Analysis of TCP Maximum Segment Sizes, S. Alcock and R. Nelson, 2011]
  *                            .2%  .3%   5%    7%    7%    20%   15%   45%
  */
 static int tcp_sc_msstab[] = { 216, 536, 1200, 1360, 1400, 1440, 1452, 1460 };
 
 /*
  * Distribution and probability of certain WSCALE values.  We have to map the
  * (send) window scale (shift) option with a range of 0-14 from 4 bits into 3
  * bits based on prevalence of certain values.  Where we don't have an exact
  * match for are rounded down to the next lower one letting us under-estimate
  * the true available window.  At the moment this would happen only for the
  * very uncommon values 3, 5 and those above 8 (more than 16MB socket buffer
  * and window size).  The absence of the WSCALE option (no scaling in either
  * direction) is encoded with index zero.
  * [WSCALE values histograms, Allman, 2012]
  *                            X 10 10 35  5  6 14 10%   by host
  *                            X 11  4  5  5 18 49  3%   by connections
  */
 static int tcp_sc_wstab[] = { 0, 0, 1, 2, 4, 6, 7, 8 };
 
 /*
  * Compute the MAC for the SYN cookie.  SIPHASH-2-4 is chosen for its speed
  * and good cryptographic properties.
  */
 static uint32_t
 syncookie_mac(struct in_conninfo *inc, tcp_seq irs, uint8_t flags,
     uint8_t *secbits, uintptr_t secmod)
 {
 	SIPHASH_CTX ctx;
 	uint32_t siphash[2];
 
 	SipHash24_Init(&ctx);
 	SipHash_SetKey(&ctx, secbits);
 	switch (inc->inc_flags & INC_ISIPV6) {
 #ifdef INET
 	case 0:
 		SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
 		SipHash_Update(&ctx, &inc->inc_laddr, sizeof(inc->inc_laddr));
 		break;
 #endif
 #ifdef INET6
 	case INC_ISIPV6:
 		SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
 		SipHash_Update(&ctx, &inc->inc6_laddr, sizeof(inc->inc6_laddr));
 		break;
 #endif
 	}
 	SipHash_Update(&ctx, &inc->inc_fport, sizeof(inc->inc_fport));
 	SipHash_Update(&ctx, &inc->inc_lport, sizeof(inc->inc_lport));
 	SipHash_Update(&ctx, &irs, sizeof(irs));
 	SipHash_Update(&ctx, &flags, sizeof(flags));
 	SipHash_Update(&ctx, &secmod, sizeof(secmod));
 	SipHash_Final((u_int8_t *)&siphash, &ctx);
 
 	return (siphash[0] ^ siphash[1]);
 }
 
 static tcp_seq
 syncookie_generate(struct syncache_head *sch, struct syncache *sc)
 {
 	u_int i, secbit, wscale;
 	uint32_t iss, hash;
 	uint8_t *secbits;
 	union syncookie cookie;
 
 	cookie.cookie = 0;
 
 	/* Map our computed MSS into the 3-bit index. */
 	for (i = nitems(tcp_sc_msstab) - 1;
 	     tcp_sc_msstab[i] > sc->sc_peer_mss && i > 0;
 	     i--)
 		;
 	cookie.flags.mss_idx = i;
 
 	/*
 	 * Map the send window scale into the 3-bit index but only if
 	 * the wscale option was received.
 	 */
 	if (sc->sc_flags & SCF_WINSCALE) {
 		wscale = sc->sc_requested_s_scale;
 		for (i = nitems(tcp_sc_wstab) - 1;
 		    tcp_sc_wstab[i] > wscale && i > 0;
 		     i--)
 			;
 		cookie.flags.wscale_idx = i;
 	}
 
 	/* Can we do SACK? */
 	if (sc->sc_flags & SCF_SACK)
 		cookie.flags.sack_ok = 1;
 
 	/* Which of the two secrets to use. */
 	secbit = V_tcp_syncache.secret.oddeven & 0x1;
 	cookie.flags.odd_even = secbit;
 
 	secbits = V_tcp_syncache.secret.key[secbit];
 	hash = syncookie_mac(&sc->sc_inc, sc->sc_irs, cookie.cookie, secbits,
 	    (uintptr_t)sch);
 
 	/*
 	 * Put the flags into the hash and XOR them to get better ISS number
 	 * variance.  This doesn't enhance the cryptographic strength and is
 	 * done to prevent the 8 cookie bits from showing up directly on the
 	 * wire.
 	 */
 	iss = hash & ~0xff;
 	iss |= cookie.cookie ^ (hash >> 24);
 
 	TCPSTAT_INC(tcps_sc_sendcookie);
 	return (iss);
 }
 
 static struct syncache *
 syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
     struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
     struct socket *lso, uint16_t port)
 {
 	uint32_t hash;
 	uint8_t *secbits;
 	tcp_seq ack, seq;
 	int wnd, wscale = 0;
 	union syncookie cookie;
 
 	/*
 	 * Pull information out of SYN-ACK/ACK and revert sequence number
 	 * advances.
 	 */
 	ack = th->th_ack - 1;
 	seq = th->th_seq - 1;
 
 	/*
 	 * Unpack the flags containing enough information to restore the
 	 * connection.
 	 */
 	cookie.cookie = (ack & 0xff) ^ (ack >> 24);
 
 	/* Which of the two secrets to use. */
 	secbits = V_tcp_syncache.secret.key[cookie.flags.odd_even];
 
 	hash = syncookie_mac(inc, seq, cookie.cookie, secbits, (uintptr_t)sch);
 
 	/* The recomputed hash matches the ACK if this was a genuine cookie. */
 	if ((ack & ~0xff) != (hash & ~0xff))
 		return (NULL);
 
 	/* Fill in the syncache values. */
 	sc->sc_flags = 0;
 	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
 	sc->sc_ipopts = NULL;
 
 	sc->sc_irs = seq;
 	sc->sc_iss = ack;
 
 	switch (inc->inc_flags & INC_ISIPV6) {
 #ifdef INET
 	case 0:
 		sc->sc_ip_ttl = sotoinpcb(lso)->inp_ip_ttl;
 		sc->sc_ip_tos = sotoinpcb(lso)->inp_ip_tos;
 		break;
 #endif
 #ifdef INET6
 	case INC_ISIPV6:
 		if (sotoinpcb(lso)->inp_flags & IN6P_AUTOFLOWLABEL)
 			sc->sc_flowlabel =
 			    htonl(sc->sc_iss) & IPV6_FLOWLABEL_MASK;
 		break;
 #endif
 	}
 
 	sc->sc_peer_mss = tcp_sc_msstab[cookie.flags.mss_idx];
 
 	/* We can simply recompute receive window scale we sent earlier. */
 	while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max)
 		wscale++;
 
 	/* Only use wscale if it was enabled in the orignal SYN. */
 	if (cookie.flags.wscale_idx > 0) {
 		sc->sc_requested_r_scale = wscale;
 		sc->sc_requested_s_scale = tcp_sc_wstab[cookie.flags.wscale_idx];
 		sc->sc_flags |= SCF_WINSCALE;
 	}
 
 	wnd = lso->sol_sbrcv_hiwat;
 	wnd = imax(wnd, 0);
 	wnd = imin(wnd, TCP_MAXWIN);
 	sc->sc_wnd = wnd;
 
 	if (cookie.flags.sack_ok)
 		sc->sc_flags |= SCF_SACK;
 
 	if (to->to_flags & TOF_TS) {
 		sc->sc_flags |= SCF_TIMESTAMP;
 		sc->sc_tsreflect = to->to_tsval;
 		sc->sc_tsoff = tcp_new_ts_offset(inc);
 	}
 
 	if (to->to_flags & TOF_SIGNATURE)
 		sc->sc_flags |= SCF_SIGNATURE;
 
 	sc->sc_rxmits = 0;
 
 	sc->sc_port = port;
 
 	TCPSTAT_INC(tcps_sc_recvcookie);
 	return (sc);
 }
 
 #ifdef INVARIANTS
 static int
 syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
     struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
     struct socket *lso, uint16_t port)
 {
 	struct syncache scs, *scx;
 	char *s;
 
 	bzero(&scs, sizeof(scs));
 	scx = syncookie_lookup(inc, sch, &scs, th, to, lso, port);
 
 	if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL)
 		return (0);
 
 	if (scx != NULL) {
 		if (sc->sc_peer_mss != scx->sc_peer_mss)
 			log(LOG_DEBUG, "%s; %s: mss different %i vs %i\n",
 			    s, __func__, sc->sc_peer_mss, scx->sc_peer_mss);
 
 		if (sc->sc_requested_r_scale != scx->sc_requested_r_scale)
 			log(LOG_DEBUG, "%s; %s: rwscale different %i vs %i\n",
 			    s, __func__, sc->sc_requested_r_scale,
 			    scx->sc_requested_r_scale);
 
 		if (sc->sc_requested_s_scale != scx->sc_requested_s_scale)
 			log(LOG_DEBUG, "%s; %s: swscale different %i vs %i\n",
 			    s, __func__, sc->sc_requested_s_scale,
 			    scx->sc_requested_s_scale);
 
 		if ((sc->sc_flags & SCF_SACK) != (scx->sc_flags & SCF_SACK))
 			log(LOG_DEBUG, "%s; %s: SACK different\n", s, __func__);
 	}
 
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	return (0);
 }
 #endif /* INVARIANTS */
 
 static void
 syncookie_reseed(void *arg)
 {
 	struct tcp_syncache *sc = arg;
 	uint8_t *secbits;
 	int secbit;
 
 	/*
 	 * Reseeding the secret doesn't have to be protected by a lock.
 	 * It only must be ensured that the new random values are visible
 	 * to all CPUs in a SMP environment.  The atomic with release
 	 * semantics ensures that.
 	 */
 	secbit = (sc->secret.oddeven & 0x1) ? 0 : 1;
 	secbits = sc->secret.key[secbit];
 	arc4rand(secbits, SYNCOOKIE_SECRET_SIZE, 0);
 	atomic_add_rel_int(&sc->secret.oddeven, 1);
 
 	/* Reschedule ourself. */
 	callout_schedule(&sc->secret.reseed, SYNCOOKIE_LIFETIME * hz);
 }
 
 /*
  * We have overflowed a bucket. Let's pause dealing with the syncache.
  * This function will increment the bucketoverflow statistics appropriately
  * (once per pause when pausing is enabled; otherwise, once per overflow).
  */
 static void
 syncache_pause(struct in_conninfo *inc)
 {
 	time_t delta;
 	const char *s;
 
 	/* XXX:
 	 * 2. Add sysctl read here so we don't get the benefit of this
 	 * change without the new sysctl.
 	 */
 
 	/*
 	 * Try an unlocked read. If we already know that another thread
 	 * has activated the feature, there is no need to proceed.
 	 */
 	if (V_tcp_syncache.paused)
 		return;
 
 	/* Are cookied enabled? If not, we can't pause. */
 	if (!V_tcp_syncookies) {
 		TCPSTAT_INC(tcps_sc_bucketoverflow);
 		return;
 	}
 
 	/*
 	 * We may be the first thread to find an overflow. Get the lock
 	 * and evaluate if we need to take action.
 	 */
 	mtx_lock(&V_tcp_syncache.pause_mtx);
 	if (V_tcp_syncache.paused) {
 		mtx_unlock(&V_tcp_syncache.pause_mtx);
 		return;
 	}
 
 	/* Activate protection. */
 	V_tcp_syncache.paused = true;
 	TCPSTAT_INC(tcps_sc_bucketoverflow);
 
 	/*
 	 * Determine the last backoff time. If we are seeing a re-newed
 	 * attack within that same time after last reactivating the syncache,
 	 * consider it an extension of the same attack.
 	 */
 	delta = TCP_SYNCACHE_PAUSE_TIME << V_tcp_syncache.pause_backoff;
 	if (V_tcp_syncache.pause_until + delta - time_uptime > 0) {
 		if (V_tcp_syncache.pause_backoff < TCP_SYNCACHE_MAX_BACKOFF) {
 			delta <<= 1;
 			V_tcp_syncache.pause_backoff++;
 		}
 	} else {
 		delta = TCP_SYNCACHE_PAUSE_TIME;
 		V_tcp_syncache.pause_backoff = 0;
 	}
 
 	/* Log a warning, including IP addresses, if able. */
 	if (inc != NULL)
 		s = tcp_log_addrs(inc, NULL, NULL, NULL);
 	else
 		s = (const char *)NULL;
 	log(LOG_WARNING, "TCP syncache overflow detected; using syncookies for "
 	    "the next %lld seconds%s%s%s\n", (long long)delta,
 	    (s != NULL) ? " (last SYN: " : "", (s != NULL) ? s : "",
 	    (s != NULL) ? ")" : "");
 	free(__DECONST(void *, s), M_TCPLOG);
 
 	/* Use the calculated delta to set a new pause time. */
 	V_tcp_syncache.pause_until = time_uptime + delta;
 	callout_reset(&V_tcp_syncache.pause_co, delta * hz, syncache_unpause,
 	    &V_tcp_syncache);
 	mtx_unlock(&V_tcp_syncache.pause_mtx);
 }
 
 /* Evaluate whether we need to unpause. */
 static void
 syncache_unpause(void *arg)
 {
 	struct tcp_syncache *sc;
 	time_t delta;
 
 	sc = arg;
 	mtx_assert(&sc->pause_mtx, MA_OWNED | MA_NOTRECURSED);
 	callout_deactivate(&sc->pause_co);
 
 	/*
 	 * Check to make sure we are not running early. If the pause
 	 * time has expired, then deactivate the protection.
 	 */
 	if ((delta = sc->pause_until - time_uptime) > 0)
 		callout_schedule(&sc->pause_co, delta * hz);
 	else
 		sc->paused = false;
 }
 
 /*
  * Exports the syncache entries to userland so that netstat can display
  * them alongside the other sockets.  This function is intended to be
  * called only from tcp_pcblist.
  *
  * Due to concurrency on an active system, the number of pcbs exported
  * may have no relation to max_pcbs.  max_pcbs merely indicates the
  * amount of space the caller allocated for this function to use.
  */
 int
 syncache_pcblist(struct sysctl_req *req)
 {
 	struct xtcpcb xt;
 	struct syncache *sc;
 	struct syncache_head *sch;
 	int error, i;
 
 	bzero(&xt, sizeof(xt));
 	xt.xt_len = sizeof(xt);
 	xt.t_state = TCPS_SYN_RECEIVED;
 	xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
 	xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket);
 	xt.xt_inp.xi_socket.so_type = SOCK_STREAM;
 	xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING;
 
 	for (i = 0; i < V_tcp_syncache.hashsize; i++) {
 		sch = &V_tcp_syncache.hashbase[i];
 		SCH_LOCK(sch);
 		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
 			if (sc->sc_cred != NULL &&
 			    cr_cansee(req->td->td_ucred, sc->sc_cred) != 0)
 				continue;
 			if (sc->sc_inc.inc_flags & INC_ISIPV6)
 				xt.xt_inp.inp_vflag = INP_IPV6;
 			else
 				xt.xt_inp.inp_vflag = INP_IPV4;
 			xt.xt_encaps_port = sc->sc_port;
 			bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc,
 			    sizeof (struct in_conninfo));
 			error = SYSCTL_OUT(req, &xt, sizeof xt);
 			if (error) {
 				SCH_UNLOCK(sch);
 				return (0);
 			}
 		}
 		SCH_UNLOCK(sch);
 	}
 
 	return (0);
 }
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 8b0b3c296c62..767045480abf 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1,3163 +1,3161 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2006-2007 Robert N. M. Watson
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
  */
 
 #include <sys/cdefs.h>
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_kern_tls.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/arb.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/refcount.h>
 #include <sys/kernel.h>
 #include <sys/ktls.h>
 #include <sys/qmath.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #ifdef INET6
 #include <sys/domain.h>
 #endif /* INET6 */
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 #include <sys/stats.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_fastopen.h>
 #include <netinet/tcp_hpts.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 #include <netipsec/ipsec_support.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 
 /*
  * TCP protocol interface to socket abstraction.
  */
 #ifdef INET
 static int	tcp_connect(struct tcpcb *, struct sockaddr_in *,
 		    struct thread *td);
 #endif /* INET */
 #ifdef INET6
 static int	tcp6_connect(struct tcpcb *, struct sockaddr_in6 *,
 		    struct thread *td);
 #endif /* INET6 */
 static void	tcp_disconnect(struct tcpcb *);
 static void	tcp_usrclosed(struct tcpcb *);
 static void	tcp_fill_info(const struct tcpcb *, struct tcp_info *);
 
 static int	tcp_pru_options_support(struct tcpcb *tp, int flags);
 
 static void
 tcp_bblog_pru(struct tcpcb *tp, uint32_t pru, int error)
 {
 	struct tcp_log_buffer *lgb;
 
 	KASSERT(tp != NULL, ("tcp_bblog_pru: tp == NULL"));
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 	if (tcp_bblogging_on(tp)) {
 		lgb = tcp_log_event(tp, NULL, NULL, NULL, TCP_LOG_PRU, error,
 		    0, NULL, false, NULL, NULL, 0, NULL);
 	} else {
 		lgb = NULL;
 	}
 	if (lgb != NULL) {
 		if (error >= 0) {
 			lgb->tlb_errno = (uint32_t)error;
 		}
 		lgb->tlb_flex1 = pru;
 	}
 }
 
 /*
  * TCP attaches to socket via pru_attach(), reserving space,
  * and an internet control block.
  */
 static int
 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
 
 	error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
 	if (error)
 		goto out;
 
 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
 	so->so_snd.sb_flags |= SB_AUTOSIZE;
 	error = in_pcballoc(so, &V_tcbinfo);
 	if (error)
 		goto out;
 	inp = sotoinpcb(so);
 	tp = tcp_newtcpcb(inp);
 	if (tp == NULL) {
 		error = ENOBUFS;
-		in_pcbdetach(inp);
 		in_pcbfree(inp);
 		goto out;
 	}
 	tp->t_state = TCPS_CLOSED;
 	tcp_bblog_pru(tp, PRU_ATTACH, error);
 	INP_WUNLOCK(inp);
 	TCPSTATES_INC(TCPS_CLOSED);
 out:
 	TCP_PROBE2(debug__user, tp, PRU_ATTACH);
 	return (error);
 }
 
 /*
  * tcp_usr_detach is called when the socket layer loses its final reference
  * to the socket, be it a file descriptor reference, a reference from TCP,
  * etc.  At this point, there is only one case in which we will keep around
  * inpcb state: time wait.
  */
 static void
 tcp_usr_detach(struct socket *so)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 	INP_WLOCK(inp);
 	KASSERT(so->so_pcb == inp && inp->inp_socket == so,
 		("%s: socket %p inp %p mismatch", __func__, so, inp));
 
 	tp = intotcpcb(inp);
 
 	KASSERT(inp->inp_flags & INP_DROPPED ||
 	    tp->t_state < TCPS_SYN_SENT,
 	    ("%s: inp %p not dropped or embryonic", __func__, inp));
 
 	tcp_discardcb(tp);
-	in_pcbdetach(inp);
 	in_pcbfree(inp);
 }
 
 #ifdef INET
 /*
  * Give the socket an address.
  */
 static int
 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct sockaddr_in *sinp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (EINVAL);
 	}
 	tp = intotcpcb(inp);
 
 	sinp = (struct sockaddr_in *)nam;
 	if (nam->sa_family != AF_INET) {
 		/*
 		 * Preserve compatibility with old programs.
 		 */
 		if (nam->sa_family != AF_UNSPEC ||
 		    nam->sa_len < offsetof(struct sockaddr_in, sin_zero) ||
 		    sinp->sin_addr.s_addr != INADDR_ANY) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 		nam->sa_family = AF_INET;
 	}
 	if (nam->sa_len != sizeof(*sinp)) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * Must check for multicast addresses and disallow binding
 	 * to them.
 	 */
 	if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 	INP_HASH_WLOCK(&V_tcbinfo);
 	error = in_pcbbind(inp, sinp, td->td_ucred);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 out:
 	tcp_bblog_pru(tp, PRU_BIND, error);
 	TCP_PROBE2(debug__user, tp, PRU_BIND);
 	INP_WUNLOCK(inp);
 
 	return (error);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct sockaddr_in6 *sin6;
 	u_char vflagsav;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (EINVAL);
 	}
 	tp = intotcpcb(inp);
 
 	vflagsav = inp->inp_vflag;
 
 	sin6 = (struct sockaddr_in6 *)nam;
 	if (nam->sa_family != AF_INET6) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 	if (nam->sa_len != sizeof(*sin6)) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * Must check for multicast addresses and disallow binding
 	 * to them.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 
 	INP_HASH_WLOCK(&V_tcbinfo);
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 #ifdef INET
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 			inp->inp_vflag |= INP_IPV4;
 		else if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 			struct sockaddr_in sin;
 
 			in6_sin6_2_sin(&sin, sin6);
 			if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
 				error = EAFNOSUPPORT;
 				INP_HASH_WUNLOCK(&V_tcbinfo);
 				goto out;
 			}
 			inp->inp_vflag |= INP_IPV4;
 			inp->inp_vflag &= ~INP_IPV6;
 			error = in_pcbbind(inp, &sin, td->td_ucred);
 			INP_HASH_WUNLOCK(&V_tcbinfo);
 			goto out;
 		}
 	}
 #endif
 	error = in6_pcbbind(inp, sin6, td->td_ucred);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 out:
 	if (error != 0)
 		inp->inp_vflag = vflagsav;
 	tcp_bblog_pru(tp, PRU_BIND, error);
 	TCP_PROBE2(debug__user, tp, PRU_BIND);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET6 */
 
 #ifdef INET
 /*
  * Prepare to accept connections.
  */
 static int
 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (EINVAL);
 	}
 	tp = intotcpcb(inp);
 
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error != 0) {
 		SOCK_UNLOCK(so);
 		goto out;
 	}
 	if (inp->inp_lport == 0) {
 		INP_HASH_WLOCK(&V_tcbinfo);
 		error = in_pcbbind(inp, NULL, td->td_ucred);
 		INP_HASH_WUNLOCK(&V_tcbinfo);
 	}
 	if (error == 0) {
 		tcp_state_change(tp, TCPS_LISTEN);
 		solisten_proto(so, backlog);
 #ifdef TCP_OFFLOAD
 		if ((so->so_options & SO_NO_OFFLOAD) == 0)
 			tcp_offload_listen_start(tp);
 #endif
 	} else {
 		solisten_proto_abort(so);
 	}
 	SOCK_UNLOCK(so);
 
 	if (IS_FASTOPEN(tp->t_flags))
 		tp->t_tfo_pending = tcp_fastopen_alloc_counter();
 
 out:
 	tcp_bblog_pru(tp, PRU_LISTEN, error);
 	TCP_PROBE2(debug__user, tp, PRU_LISTEN);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	u_char vflagsav;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (EINVAL);
 	}
 	tp = intotcpcb(inp);
 
 	vflagsav = inp->inp_vflag;
 
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error != 0) {
 		SOCK_UNLOCK(so);
 		goto out;
 	}
 	INP_HASH_WLOCK(&V_tcbinfo);
 	if (inp->inp_lport == 0) {
 		inp->inp_vflag &= ~INP_IPV4;
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
 			inp->inp_vflag |= INP_IPV4;
 		error = in6_pcbbind(inp, NULL, td->td_ucred);
 	}
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error == 0) {
 		tcp_state_change(tp, TCPS_LISTEN);
 		solisten_proto(so, backlog);
 #ifdef TCP_OFFLOAD
 		if ((so->so_options & SO_NO_OFFLOAD) == 0)
 			tcp_offload_listen_start(tp);
 #endif
 	} else {
 		solisten_proto_abort(so);
 	}
 	SOCK_UNLOCK(so);
 
 	if (IS_FASTOPEN(tp->t_flags))
 		tp->t_tfo_pending = tcp_fastopen_alloc_counter();
 
 	if (error != 0)
 		inp->inp_vflag = vflagsav;
 
 out:
 	tcp_bblog_pru(tp, PRU_LISTEN, error);
 	TCP_PROBE2(debug__user, tp, PRU_LISTEN);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET6 */
 
 #ifdef INET
 /*
  * Initiate connection to peer.
  * Create a template for use in transmissions on this connection.
  * Enter SYN_SENT state, and mark socket as connecting.
  * Start keep-alive timer, and seed output sequence space.
  * Send initial segment on connection.
  */
 static int
 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct epoch_tracker et;
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct sockaddr_in *sinp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNREFUSED);
 	}
 	tp = intotcpcb(inp);
 
 	sinp = (struct sockaddr_in *)nam;
 	if (nam->sa_family != AF_INET) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 	if (nam->sa_len != sizeof (*sinp)) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * Must disallow TCP ``connections'' to multicast addresses.
 	 */
 	if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 	if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) {
 		error = EACCES;
 		goto out;
 	}
 	if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
 		goto out;
 	if (SOLISTENING(so)) {
 		error = EOPNOTSUPP;
 		goto out;
 	}
 	NET_EPOCH_ENTER(et);
 	if ((error = tcp_connect(tp, sinp, td)) != 0)
 		goto out_in_epoch;
 #ifdef TCP_OFFLOAD
 	if (registered_toedevs > 0 &&
 	    (so->so_options & SO_NO_OFFLOAD) == 0 &&
 	    (error = tcp_offload_connect(so, nam)) == 0)
 		goto out_in_epoch;
 #endif
 	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 	error = tcp_output(tp);
 	KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
 	    ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
 out_in_epoch:
 	NET_EPOCH_EXIT(et);
 out:
 	tcp_bblog_pru(tp, PRU_CONNECT, error);
 	TCP_PROBE2(debug__user, tp, PRU_CONNECT);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct epoch_tracker et;
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct sockaddr_in6 *sin6;
 	u_int8_t incflagsav;
 	u_char vflagsav;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNREFUSED);
 	}
 	tp = intotcpcb(inp);
 
 	vflagsav = inp->inp_vflag;
 	incflagsav = inp->inp_inc.inc_flags;
 
 	sin6 = (struct sockaddr_in6 *)nam;
 	if (nam->sa_family != AF_INET6) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 	if (nam->sa_len != sizeof (*sin6)) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * Must disallow TCP ``connections'' to multicast addresses.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 	if (SOLISTENING(so)) {
 		error = EINVAL;
 		goto out;
 	}
 #ifdef INET
 	/*
 	 * XXXRW: Some confusion: V4/V6 flags relate to binding, and
 	 * therefore probably require the hash lock, which isn't held here.
 	 * Is this a significant problem?
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 		struct sockaddr_in sin;
 
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
 			error = EINVAL;
 			goto out;
 		}
 		if ((inp->inp_vflag & INP_IPV4) == 0) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 
 		in6_sin6_2_sin(&sin, sin6);
 		if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 		if (ntohl(sin.sin_addr.s_addr) == INADDR_BROADCAST) {
 			error = EACCES;
 			goto out;
 		}
 		if ((error = prison_remote_ip4(td->td_ucred,
 		    &sin.sin_addr)) != 0)
 			goto out;
 		inp->inp_vflag |= INP_IPV4;
 		inp->inp_vflag &= ~INP_IPV6;
 		NET_EPOCH_ENTER(et);
 		if ((error = tcp_connect(tp, &sin, td)) != 0)
 			goto out_in_epoch;
 #ifdef TCP_OFFLOAD
 		if (registered_toedevs > 0 &&
 		    (so->so_options & SO_NO_OFFLOAD) == 0 &&
 		    (error = tcp_offload_connect(so, nam)) == 0)
 			goto out_in_epoch;
 #endif
 		error = tcp_output(tp);
 		goto out_in_epoch;
 	} else {
 		if ((inp->inp_vflag & INP_IPV6) == 0) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 	}
 #endif
 	if ((error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr)) != 0)
 		goto out;
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	inp->inp_inc.inc_flags |= INC_ISIPV6;
 	NET_EPOCH_ENTER(et);
 	if ((error = tcp6_connect(tp, sin6, td)) != 0)
 		goto out_in_epoch;
 #ifdef TCP_OFFLOAD
 	if (registered_toedevs > 0 &&
 	    (so->so_options & SO_NO_OFFLOAD) == 0 &&
 	    (error = tcp_offload_connect(so, nam)) == 0)
 		goto out_in_epoch;
 #endif
 	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 	error = tcp_output(tp);
 out_in_epoch:
 	NET_EPOCH_EXIT(et);
 out:
 	KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
 	    ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
 	/*
 	 * If the implicit bind in the connect call fails, restore
 	 * the flags we modified.
 	 */
 	if (error != 0 && inp->inp_lport == 0) {
 		inp->inp_vflag = vflagsav;
 		inp->inp_inc.inc_flags = incflagsav;
 	}
 
 	tcp_bblog_pru(tp, PRU_CONNECT, error);
 	TCP_PROBE2(debug__user, tp, PRU_CONNECT);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET6 */
 
 /*
  * Initiate disconnect from peer.
  * If connection never passed embryonic stage, just drop;
  * else if don't need to let data drain, then can just drop anyways,
  * else have to begin TCP shutdown process: mark socket disconnecting,
  * drain unread data, state switch to reflect user close, and
  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
  * when peer sends FIN and acks ours.
  *
  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
  */
 static int
 tcp_usr_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	struct epoch_tracker et;
 	int error = 0;
 
 	NET_EPOCH_ENTER(et);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 
 	if (tp->t_state == TCPS_TIME_WAIT)
 		goto out;
 	tcp_disconnect(tp);
 out:
 	tcp_bblog_pru(tp, PRU_DISCONNECT, error);
 	TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
 	INP_WUNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	return (error);
 }
 
 #ifdef INET
 /*
  * Accept a connection.  Essentially all the work is done at higher levels;
  * just return the address of the peer, storing through addr.
  */
 static int
 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct in_addr addr;
 	in_port_t port = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNABORTED);
 	}
 	tp = intotcpcb(inp);
 
 	if (so->so_state & SS_ISDISCONNECTED) {
 		error = ECONNABORTED;
 		goto out;
 	}
 	/*
 	 * We inline in_getpeeraddr and COMMON_END here, so that we can
 	 * copy the data of interest and defer the malloc until after we
 	 * release the lock.
 	 */
 	port = inp->inp_fport;
 	addr = inp->inp_faddr;
 
 out:
 	tcp_bblog_pru(tp, PRU_ACCEPT, error);
 	TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
 	INP_WUNLOCK(inp);
 	if (error == 0)
 		*nam = in_sockaddr(port, &addr);
 	return error;
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	int error = 0;
 	struct tcpcb *tp;
 	struct in_addr addr;
 	struct in6_addr addr6;
 	struct epoch_tracker et;
 	in_port_t port = 0;
 	int v4 = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
 	NET_EPOCH_ENTER(et); /* XXXMT Why is this needed? */
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
 		return (ECONNABORTED);
 	}
 	tp = intotcpcb(inp);
 
 	if (so->so_state & SS_ISDISCONNECTED) {
 		error = ECONNABORTED;
 		goto out;
 	}
 	/*
 	 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
 	 * copy the data of interest and defer the malloc until after we
 	 * release the lock.
 	 */
 	if (inp->inp_vflag & INP_IPV4) {
 		v4 = 1;
 		port = inp->inp_fport;
 		addr = inp->inp_faddr;
 	} else {
 		port = inp->inp_fport;
 		addr6 = inp->in6p_faddr;
 	}
 
 out:
 	tcp_bblog_pru(tp, PRU_ACCEPT, error);
 	TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
 	INP_WUNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	if (error == 0) {
 		if (v4)
 			*nam = in6_v4mapsin6_sockaddr(port, &addr);
 		else
 			*nam = in6_sockaddr(port, &addr6);
 	}
 	return error;
 }
 #endif /* INET6 */
 
 /*
  * Mark the connection as being incapable of further output.
  */
 static int
 tcp_usr_shutdown(struct socket *so)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct epoch_tracker et;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 
 	NET_EPOCH_ENTER(et);
 	socantsendmore(so);
 	tcp_usrclosed(tp);
 	if (!(inp->inp_flags & INP_DROPPED))
 		error = tcp_output_nodrop(tp);
 	tcp_bblog_pru(tp, PRU_SHUTDOWN, error);
 	TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
 	error = tcp_unlock_or_drop(tp, error);
 	NET_EPOCH_EXIT(et);
 
 	return (error);
 }
 
 /*
  * After a receive, possibly send window update to peer.
  */
 static int
 tcp_usr_rcvd(struct socket *so, int flags)
 {
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	int outrv = 0, error = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 
 	NET_EPOCH_ENTER(et);
 	/*
 	 * For passively-created TFO connections, don't attempt a window
 	 * update while still in SYN_RECEIVED as this may trigger an early
 	 * SYN|ACK.  It is preferable to have the SYN|ACK be sent along with
 	 * application response data, or failing that, when the DELACK timer
 	 * expires.
 	 */
 	if (IS_FASTOPEN(tp->t_flags) &&
 	    (tp->t_state == TCPS_SYN_RECEIVED))
 		goto out;
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE)
 		tcp_offload_rcvd(tp);
 	else
 #endif
 		outrv = tcp_output_nodrop(tp);
 out:
 	tcp_bblog_pru(tp, PRU_RCVD, error);
 	TCP_PROBE2(debug__user, tp, PRU_RCVD);
 	(void) tcp_unlock_or_drop(tp, outrv);
 	NET_EPOCH_EXIT(et);
 	return (error);
 }
 
 /*
  * Do a send by putting data in output queue and updating urgent
  * marker if URG set.  Possibly send more data.  Unlike the other
  * pru_*() routines, the mbuf chains are our responsibility.  We
  * must either enqueue them or free them.  The other pru_* routines
  * generally are caller-frees.
  */
 static int
 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	struct epoch_tracker et;
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 #ifdef INET
 #ifdef INET6
 	struct sockaddr_in sin;
 #endif
 	struct sockaddr_in *sinp;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 	int isipv6;
 #endif
 	u_int8_t incflagsav;
 	u_char vflagsav;
 	bool restoreflags;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 			m_freem(m);
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 
 	vflagsav = inp->inp_vflag;
 	incflagsav = inp->inp_inc.inc_flags;
 	restoreflags = false;
 
 	NET_EPOCH_ENTER(et);
 	if (control != NULL) {
 		/* TCP doesn't do control messages (rights, creds, etc) */
 		if (control->m_len > 0) {
 			m_freem(control);
 			error = EINVAL;
 			goto out;
 		}
 		m_freem(control);	/* empty control, just free it */
 	}
 
 	if ((flags & PRUS_OOB) != 0 &&
 	    (error = tcp_pru_options_support(tp, PRUS_OOB)) != 0)
 		goto out;
 
 	if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
 		if (tp->t_state == TCPS_LISTEN) {
 			error = EINVAL;
 			goto out;
 		}
 		switch (nam->sa_family) {
 #ifdef INET
 		case AF_INET:
 			sinp = (struct sockaddr_in *)nam;
 			if (sinp->sin_len != sizeof(struct sockaddr_in)) {
 				error = EINVAL;
 				goto out;
 			}
 			if ((inp->inp_vflag & INP_IPV6) != 0) {
 				error = EAFNOSUPPORT;
 				goto out;
 			}
 			if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
 				error = EAFNOSUPPORT;
 				goto out;
 			}
 			if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) {
 				error = EACCES;
 				goto out;
 			}
 			if ((error = prison_remote_ip4(td->td_ucred,
 			    &sinp->sin_addr)))
 				goto out;
 #ifdef INET6
 			isipv6 = 0;
 #endif
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			sin6 = (struct sockaddr_in6 *)nam;
 			if (sin6->sin6_len != sizeof(*sin6)) {
 				error = EINVAL;
 				goto out;
 			}
 			if ((inp->inp_vflag & INP_IPV6PROTO) == 0) {
 				error = EAFNOSUPPORT;
 				goto out;
 			}
 			if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 				error = EAFNOSUPPORT;
 				goto out;
 			}
 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 #ifdef INET
 				if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
 					error = EINVAL;
 					goto out;
 				}
 				if ((inp->inp_vflag & INP_IPV4) == 0) {
 					error = EAFNOSUPPORT;
 					goto out;
 				}
 				restoreflags = true;
 				inp->inp_vflag &= ~INP_IPV6;
 				sinp = &sin;
 				in6_sin6_2_sin(sinp, sin6);
 				if (IN_MULTICAST(
 				    ntohl(sinp->sin_addr.s_addr))) {
 					error = EAFNOSUPPORT;
 					goto out;
 				}
 				if ((error = prison_remote_ip4(td->td_ucred,
 				    &sinp->sin_addr)))
 					goto out;
 				isipv6 = 0;
 #else /* !INET */
 				error = EAFNOSUPPORT;
 				goto out;
 #endif /* INET */
 			} else {
 				if ((inp->inp_vflag & INP_IPV6) == 0) {
 					error = EAFNOSUPPORT;
 					goto out;
 				}
 				restoreflags = true;
 				inp->inp_vflag &= ~INP_IPV4;
 				inp->inp_inc.inc_flags |= INC_ISIPV6;
 				if ((error = prison_remote_ip6(td->td_ucred,
 				    &sin6->sin6_addr)))
 					goto out;
 				isipv6 = 1;
 			}
 			break;
 #endif /* INET6 */
 		default:
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 	}
 	if (!(flags & PRUS_OOB)) {
 		if (tp->t_acktime == 0)
 			tp->t_acktime = ticks;
 		sbappendstream(&so->so_snd, m, flags);
 		m = NULL;
 		if (nam && tp->t_state < TCPS_SYN_SENT) {
 			KASSERT(tp->t_state == TCPS_CLOSED,
 			    ("%s: tp %p is listening", __func__, tp));
 
 			/*
 			 * Do implied connect if not yet connected,
 			 * initialize window to default value, and
 			 * initialize maxseg using peer's cached MSS.
 			 */
 #ifdef INET6
 			if (isipv6)
 				error = tcp6_connect(tp, sin6, td);
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 			else
 #endif
 #ifdef INET
 				error = tcp_connect(tp, sinp, td);
 #endif
 			/*
 			 * The bind operation in tcp_connect succeeded. We
 			 * no longer want to restore the flags if later
 			 * operations fail.
 			 */
 			if (error == 0 || inp->inp_lport != 0)
 				restoreflags = false;
 
 			if (error) {
 				/* m is freed if PRUS_NOTREADY is unset. */
 				sbflush(&so->so_snd);
 				goto out;
 			}
 			if (IS_FASTOPEN(tp->t_flags))
 				tcp_fastopen_connect(tp);
 			else {
 				tp->snd_wnd = TTCP_CLIENT_SND_WND;
 				tcp_mss(tp, -1);
 			}
 		}
 		if (flags & PRUS_EOF) {
 			/*
 			 * Close the send side of the connection after
 			 * the data is sent.
 			 */
 			socantsendmore(so);
 			tcp_usrclosed(tp);
 		}
 		if (TCPS_HAVEESTABLISHED(tp->t_state) &&
 		    ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
 		    (tp->t_fbyte_out == 0) &&
 		    (so->so_snd.sb_ccc > 0)) {
 			tp->t_fbyte_out = ticks;
 			if (tp->t_fbyte_out == 0)
 				tp->t_fbyte_out = 1;
 			if (tp->t_fbyte_out && tp->t_fbyte_in)
 				tp->t_flags2 |= TF2_FBYTES_COMPLETE;
 		}
 		if (!(inp->inp_flags & INP_DROPPED) &&
 		    !(flags & PRUS_NOTREADY)) {
 			if (flags & PRUS_MORETOCOME)
 				tp->t_flags |= TF_MORETOCOME;
 			error = tcp_output_nodrop(tp);
 			if (flags & PRUS_MORETOCOME)
 				tp->t_flags &= ~TF_MORETOCOME;
 		}
 	} else {
 		/*
 		 * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
 		 */
 		SOCKBUF_LOCK(&so->so_snd);
 		if (sbspace(&so->so_snd) < -512) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = ENOBUFS;
 			goto out;
 		}
 		/*
 		 * According to RFC961 (Assigned Protocols),
 		 * the urgent pointer points to the last octet
 		 * of urgent data.  We continue, however,
 		 * to consider it to indicate the first octet
 		 * of data past the urgent section.
 		 * Otherwise, snd_up should be one lower.
 		 */
 		if (tp->t_acktime == 0)
 			tp->t_acktime = ticks;
 		sbappendstream_locked(&so->so_snd, m, flags);
 		SOCKBUF_UNLOCK(&so->so_snd);
 		m = NULL;
 		if (nam && tp->t_state < TCPS_SYN_SENT) {
 			/*
 			 * Do implied connect if not yet connected,
 			 * initialize window to default value, and
 			 * initialize maxseg using peer's cached MSS.
 			 */
 
 			/*
 			 * Not going to contemplate SYN|URG
 			 */
 			if (IS_FASTOPEN(tp->t_flags))
 				tp->t_flags &= ~TF_FASTOPEN;
 #ifdef INET6
 			if (isipv6)
 				error = tcp6_connect(tp, sin6, td);
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 			else
 #endif
 #ifdef INET
 				error = tcp_connect(tp, sinp, td);
 #endif
 			/*
 			 * The bind operation in tcp_connect succeeded. We
 			 * no longer want to restore the flags if later
 			 * operations fail.
 			 */
 			if (error == 0 || inp->inp_lport != 0)
 				restoreflags = false;
 
 			if (error != 0) {
 				/* m is freed if PRUS_NOTREADY is unset. */
 				sbflush(&so->so_snd);
 				goto out;
 			}
 			tp->snd_wnd = TTCP_CLIENT_SND_WND;
 			tcp_mss(tp, -1);
 		}
 		tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
 		if ((flags & PRUS_NOTREADY) == 0) {
 			tp->t_flags |= TF_FORCEDATA;
 			error = tcp_output_nodrop(tp);
 			tp->t_flags &= ~TF_FORCEDATA;
 		}
 	}
 	TCP_LOG_EVENT(tp, NULL,
 	    &inp->inp_socket->so_rcv,
 	    &inp->inp_socket->so_snd,
 	    TCP_LOG_USERSEND, error,
 	    0, NULL, false);
 
 out:
 	/*
 	 * In case of PRUS_NOTREADY, the caller or tcp_usr_ready() is
 	 * responsible for freeing memory.
 	 */
 	if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 		m_freem(m);
 
 	/*
 	 * If the request was unsuccessful and we changed flags,
 	 * restore the original flags.
 	 */
 	if (error != 0 && restoreflags) {
 		inp->inp_vflag = vflagsav;
 		inp->inp_inc.inc_flags = incflagsav;
 	}
 	tcp_bblog_pru(tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
 		      ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND), error);
 	TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
 		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
 	error = tcp_unlock_or_drop(tp, error);
 	NET_EPOCH_EXIT(et);
 	return (error);
 }
 
 static int
 tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
 {
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	int error;
 
 	inp = sotoinpcb(so);
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		mb_free_notready(m, count);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 
 	SOCKBUF_LOCK(&so->so_snd);
 	error = sbready(&so->so_snd, m, count);
 	SOCKBUF_UNLOCK(&so->so_snd);
 	if (error) {
 		INP_WUNLOCK(inp);
 		return (error);
 	}
 	NET_EPOCH_ENTER(et);
 	error = tcp_output_unlock(tp);
 	NET_EPOCH_EXIT(et);
 
 	return (error);
 }
 
 /*
  * Abort the TCP.  Drop the connection abruptly.
  */
 static void
 tcp_usr_abort(struct socket *so)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct epoch_tracker et;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
 
 	NET_EPOCH_ENTER(et);
 	INP_WLOCK(inp);
 	KASSERT(inp->inp_socket != NULL,
 	    ("tcp_usr_abort: inp_socket == NULL"));
 
 	/*
 	 * If we still have full TCP state, and we're not dropped, drop.
 	 */
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		tp = intotcpcb(inp);
 		tp = tcp_drop(tp, ECONNABORTED);
 		if (tp == NULL)
 			goto dropped;
 		tcp_bblog_pru(tp, PRU_ABORT, 0);
 		TCP_PROBE2(debug__user, tp, PRU_ABORT);
 	}
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		soref(so);
 		inp->inp_flags |= INP_SOCKREF;
 	}
 	INP_WUNLOCK(inp);
 dropped:
 	NET_EPOCH_EXIT(et);
 }
 
 /*
  * TCP socket is closed.  Start friendly disconnect.
  */
 static void
 tcp_usr_close(struct socket *so)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct epoch_tracker et;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
 
 	NET_EPOCH_ENTER(et);
 	INP_WLOCK(inp);
 	KASSERT(inp->inp_socket != NULL,
 	    ("tcp_usr_close: inp_socket == NULL"));
 
 	/*
 	 * If we are still connected and we're not dropped, initiate
 	 * a disconnect.
 	 */
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		tp = intotcpcb(inp);
 		if (tp->t_state != TCPS_TIME_WAIT) {
 			tp->t_flags |= TF_CLOSED;
 			tcp_disconnect(tp);
 			tcp_bblog_pru(tp, PRU_CLOSE, 0);
 			TCP_PROBE2(debug__user, tp, PRU_CLOSE);
 		}
 	}
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		soref(so);
 		inp->inp_flags |= INP_SOCKREF;
 	}
 	INP_WUNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 }
 
 static int
 tcp_pru_options_support(struct tcpcb *tp, int flags)
 {
 	/*
 	 * If the specific TCP stack has a pru_options
 	 * specified then it does not always support
 	 * all the PRU_XX options and we must ask it.
 	 * If the function is not specified then all
 	 * of the PRU_XX options are supported.
 	 */
 	int ret = 0;
 
 	if (tp->t_fb->tfb_pru_options) {
 		ret = (*tp->t_fb->tfb_pru_options)(tp, flags);
 	}
 	return (ret);
 }
 
 /*
  * Receive out-of-band data.
  */
 static int
 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 
 	error = tcp_pru_options_support(tp, PRUS_OOB);
 	if (error) {
 		goto out;
 	}
 	if ((so->so_oobmark == 0 &&
 	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
 	    so->so_options & SO_OOBINLINE ||
 	    tp->t_oobflags & TCPOOB_HADDATA) {
 		error = EINVAL;
 		goto out;
 	}
 	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
 		error = EWOULDBLOCK;
 		goto out;
 	}
 	m->m_len = 1;
 	*mtod(m, caddr_t) = tp->t_iobc;
 	if ((flags & MSG_PEEK) == 0)
 		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 
 out:
 	tcp_bblog_pru(tp, PRU_RCVOOB, error);
 	TCP_PROBE2(debug__user, tp, PRU_RCVOOB);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 #ifdef INET
 struct protosw tcp_protosw = {
 	.pr_type =		SOCK_STREAM,
 	.pr_protocol =		IPPROTO_TCP,
 	.pr_flags =		PR_CONNREQUIRED | PR_IMPLOPCL | PR_WANTRCVD |
 				    PR_CAPATTACH,
 	.pr_ctloutput =		tcp_ctloutput,
 	.pr_abort =		tcp_usr_abort,
 	.pr_accept =		tcp_usr_accept,
 	.pr_attach =		tcp_usr_attach,
 	.pr_bind =		tcp_usr_bind,
 	.pr_connect =		tcp_usr_connect,
 	.pr_control =		in_control,
 	.pr_detach =		tcp_usr_detach,
 	.pr_disconnect =	tcp_usr_disconnect,
 	.pr_listen =		tcp_usr_listen,
 	.pr_peeraddr =		in_getpeeraddr,
 	.pr_rcvd =		tcp_usr_rcvd,
 	.pr_rcvoob =		tcp_usr_rcvoob,
 	.pr_send =		tcp_usr_send,
 	.pr_ready =		tcp_usr_ready,
 	.pr_shutdown =		tcp_usr_shutdown,
 	.pr_sockaddr =		in_getsockaddr,
 	.pr_sosetlabel =	in_pcbsosetlabel,
 	.pr_close =		tcp_usr_close,
 };
 #endif /* INET */
 
 #ifdef INET6
 struct protosw tcp6_protosw = {
 	.pr_type =		SOCK_STREAM,
 	.pr_protocol =		IPPROTO_TCP,
 	.pr_flags =		PR_CONNREQUIRED | PR_IMPLOPCL |PR_WANTRCVD |
 				    PR_CAPATTACH,
 	.pr_ctloutput =		tcp_ctloutput,
 	.pr_abort =		tcp_usr_abort,
 	.pr_accept =		tcp6_usr_accept,
 	.pr_attach =		tcp_usr_attach,
 	.pr_bind =		tcp6_usr_bind,
 	.pr_connect =		tcp6_usr_connect,
 	.pr_control =		in6_control,
 	.pr_detach =		tcp_usr_detach,
 	.pr_disconnect =	tcp_usr_disconnect,
 	.pr_listen =		tcp6_usr_listen,
 	.pr_peeraddr =		in6_mapped_peeraddr,
 	.pr_rcvd =		tcp_usr_rcvd,
 	.pr_rcvoob =		tcp_usr_rcvoob,
 	.pr_send =		tcp_usr_send,
 	.pr_ready =		tcp_usr_ready,
 	.pr_shutdown =		tcp_usr_shutdown,
 	.pr_sockaddr =		in6_mapped_sockaddr,
 	.pr_sosetlabel =	in_pcbsosetlabel,
 	.pr_close =		tcp_usr_close,
 };
 #endif /* INET6 */
 
 #ifdef INET
 /*
  * Common subroutine to open a TCP connection to remote host specified
  * by struct sockaddr_in.  Call in_pcbconnect() to choose local host address
  * and assign a local port number and install the inpcb into the hash.
  * Initialize connection parameters and enter SYN-SENT state.
  */
 static int
 tcp_connect(struct tcpcb *tp, struct sockaddr_in *sin, struct thread *td)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct socket *so = tptosocket(tp);
 	int error;
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(inp);
 
 	if (__predict_false((so->so_state &
 	    (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING |
 	    SS_ISDISCONNECTED)) != 0))
 		return (EISCONN);
 
 	INP_HASH_WLOCK(&V_tcbinfo);
 	error = in_pcbconnect(inp, sin, td->td_ucred, true);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Compute window scaling to request:
 	 * Scale to fit into sweet spot.  See tcp_syncache.c.
 	 * XXX: This should move to tcp_output().
 	 */
 	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
 	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 		tp->request_r_scale++;
 
 	soisconnecting(so);
 	TCPSTAT_INC(tcps_connattempt);
 	tcp_state_change(tp, TCPS_SYN_SENT);
 	tp->iss = tcp_new_isn(&inp->inp_inc);
 	if (tp->t_flags & TF_REQ_TSTMP)
 		tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
 	tcp_sendseqinit(tp);
 
 	return (0);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_connect(struct tcpcb *tp, struct sockaddr_in6 *sin6, struct thread *td)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct socket *so = tptosocket(tp);
 	int error;
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(inp);
 
 	if (__predict_false((so->so_state &
 	    (SS_ISCONNECTING | SS_ISCONNECTED)) != 0))
 		return (EISCONN);
 
 	INP_HASH_WLOCK(&V_tcbinfo);
 	error = in6_pcbconnect(inp, sin6, td->td_ucred, true);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error != 0)
 		return (error);
 
 	/* Compute window scaling to request.  */
 	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
 	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 		tp->request_r_scale++;
 
 	soisconnecting(so);
 	TCPSTAT_INC(tcps_connattempt);
 	tcp_state_change(tp, TCPS_SYN_SENT);
 	tp->iss = tcp_new_isn(&inp->inp_inc);
 	if (tp->t_flags & TF_REQ_TSTMP)
 		tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
 	tcp_sendseqinit(tp);
 
 	return (0);
 }
 #endif /* INET6 */
 
 /*
  * Export TCP internal state information via a struct tcp_info, based on the
  * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
  * (TCP state machine, etc).  We export all information using FreeBSD-native
  * constants -- for example, the numeric values for tcpi_state will differ
  * from Linux.
  */
 void
 tcp_fill_info(const struct tcpcb *tp, struct tcp_info *ti)
 {
 
 	INP_LOCK_ASSERT(tptoinpcb(tp));
 	bzero(ti, sizeof(*ti));
 
 	ti->tcpi_state = tp->t_state;
 	if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
 		ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
 	if (tp->t_flags & TF_SACK_PERMIT)
 		ti->tcpi_options |= TCPI_OPT_SACK;
 	if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
 		ti->tcpi_options |= TCPI_OPT_WSCALE;
 		ti->tcpi_snd_wscale = tp->snd_scale;
 		ti->tcpi_rcv_wscale = tp->rcv_scale;
 	}
 	switch (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
 		case TF2_ECN_PERMIT:
 			ti->tcpi_options |= TCPI_OPT_ECN;
 			break;
 		case TF2_ACE_PERMIT:
 			/* FALLTHROUGH */
 		case TF2_ECN_PERMIT | TF2_ACE_PERMIT:
 			ti->tcpi_options |= TCPI_OPT_ACE;
 			break;
 		default:
 			break;
 	}
 	if (IS_FASTOPEN(tp->t_flags))
 		ti->tcpi_options |= TCPI_OPT_TFO;
 
 	ti->tcpi_rto = tp->t_rxtcur * tick;
 	ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick;
 	ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
 	ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
 
 	ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
 	ti->tcpi_snd_cwnd = tp->snd_cwnd;
 
 	/*
 	 * FreeBSD-specific extension fields for tcp_info.
 	 */
 	ti->tcpi_rcv_space = tp->rcv_wnd;
 	ti->tcpi_rcv_nxt = tp->rcv_nxt;
 	ti->tcpi_snd_wnd = tp->snd_wnd;
 	ti->tcpi_snd_bwnd = 0;		/* Unused, kept for compat. */
 	ti->tcpi_snd_nxt = tp->snd_nxt;
 	ti->tcpi_snd_mss = tp->t_maxseg;
 	ti->tcpi_rcv_mss = tp->t_maxseg;
 	ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
 	ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
 	ti->tcpi_snd_zerowin = tp->t_sndzerowin;
 	ti->tcpi_snd_una = tp->snd_una;
 	ti->tcpi_snd_max = tp->snd_max;
 	ti->tcpi_rcv_numsacks = tp->rcv_numsacks;
 	ti->tcpi_rcv_adv = tp->rcv_adv;
 	ti->tcpi_dupacks = tp->t_dupacks;
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE) {
 		ti->tcpi_options |= TCPI_OPT_TOE;
 		tcp_offload_tcp_info(tp, ti);
 	}
 #endif
 	/*
 	 * AccECN related counters.
 	 */
 	if ((tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) ==
 	    (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
 		/*
 		 * Internal counter starts at 5 for AccECN
 		 * but 0 for RFC3168 ECN.
 		 */
 		ti->tcpi_delivered_ce = tp->t_scep - 5;
 	else
 		ti->tcpi_delivered_ce = tp->t_scep;
 	ti->tcpi_received_ce = tp->t_rcep;
 }
 
 /*
  * tcp_ctloutput() must drop the inpcb lock before performing copyin on
  * socket option arguments.  When it re-acquires the lock after the copy, it
  * has to revalidate that the connection is still valid for the socket
  * option.
  */
 #define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do {			\
 	INP_WLOCK(inp);							\
 	if (inp->inp_flags & INP_DROPPED) {				\
 		INP_WUNLOCK(inp);					\
 		cleanup;						\
 		return (ECONNRESET);					\
 	}								\
 	tp = intotcpcb(inp);						\
 } while(0)
 #define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */)
 
 int
 tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct socket *so = inp->inp_socket;
 	struct tcpcb *tp = intotcpcb(inp);
 	int error = 0;
 
 	MPASS(sopt->sopt_dir == SOPT_SET);
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("inp_flags == %x", inp->inp_flags));
 	KASSERT(so != NULL, ("inp_socket == NULL"));
 
 	if (sopt->sopt_level != IPPROTO_TCP) {
 		INP_WUNLOCK(inp);
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6PROTO)
 			error = ip6_ctloutput(so, sopt);
 #endif
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET
 			error = ip_ctloutput(so, sopt);
 #endif
 		/*
 		 * When an IP-level socket option affects TCP, pass control
 		 * down to stack tfb_tcp_ctloutput, otherwise return what
 		 * IP level returned.
 		 */
 		switch (sopt->sopt_level) {
 #ifdef INET6
 		case IPPROTO_IPV6:
 			if ((inp->inp_vflag & INP_IPV6PROTO) == 0)
 				return (error);
 			switch (sopt->sopt_name) {
 			case IPV6_TCLASS:
 				/* Notify tcp stacks that care (e.g. RACK). */
 				break;
 			case IPV6_USE_MIN_MTU:
 				/* Update t_maxseg accordingly. */
 				break;
 			default:
 				return (error);
 			}
 			break;
 #endif
 #ifdef INET
 		case IPPROTO_IP:
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				inp->inp_ip_tos &= ~IPTOS_ECN_MASK;
 				break;
 			case IP_TTL:
 				/* Notify tcp stacks that care (e.g. RACK). */
 				break;
 			default:
 				return (error);
 			}
 			break;
 #endif
 		default:
 			return (error);
 		}
 		INP_WLOCK(inp);
 		if (inp->inp_flags & INP_DROPPED) {
 			INP_WUNLOCK(inp);
 			return (ECONNRESET);
 		}
 	} else if (sopt->sopt_name == TCP_FUNCTION_BLK) {
 		/*
 		 * Protect the TCP option TCP_FUNCTION_BLK so
 		 * that a sub-function can *never* overwrite this.
 		 */
 		struct tcp_function_set fsn;
 		struct tcp_function_block *blk;
 		void *ptr = NULL;
 
 		INP_WUNLOCK(inp);
 		error = sooptcopyin(sopt, &fsn, sizeof fsn, sizeof fsn);
 		if (error)
 			return (error);
 
 		INP_WLOCK(inp);
 		tp = intotcpcb(inp);
 
 		blk = find_and_ref_tcp_functions(&fsn);
 		if (blk == NULL) {
 			INP_WUNLOCK(inp);
 			return (ENOENT);
 		}
 		if (tp->t_fb == blk) {
 			/* You already have this */
 			refcount_release(&blk->tfb_refcnt);
 			INP_WUNLOCK(inp);
 			return (0);
 		}
 		if (tp->t_state != TCPS_CLOSED) {
 			/*
 			 * The user has advanced the state
 			 * past the initial point, we may not
 			 * be able to switch.
 			 */
 			if (blk->tfb_tcp_handoff_ok != NULL) {
 				/*
 				 * Does the stack provide a
 				 * query mechanism, if so it may
 				 * still be possible?
 				 */
 				error = (*blk->tfb_tcp_handoff_ok)(tp);
 			} else
 				error = EINVAL;
 			if (error) {
 				refcount_release(&blk->tfb_refcnt);
 				INP_WUNLOCK(inp);
 				return(error);
 			}
 		}
 		if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
 			refcount_release(&blk->tfb_refcnt);
 			INP_WUNLOCK(inp);
 			return (ENOENT);
 		}
 		/*
 		 * Ensure the new stack takes ownership with a
 		 * clean slate on peak rate threshold.
 		 */
 #ifdef TCPHPTS
 		/* Assure that we are not on any hpts */
 		tcp_hpts_remove(tp);
 #endif
 		if (blk->tfb_tcp_fb_init) {
 			error = (*blk->tfb_tcp_fb_init)(tp, &ptr);
 			if (error) {
 				/*
 				 * Release the ref count the lookup
 				 * acquired.
 				 */ 
 				refcount_release(&blk->tfb_refcnt);
 				/* 
 				 * Now there is a chance that the
 				 * init() function mucked with some
 				 * things before it failed, such as
 				 * hpts or inp_flags2 or timer granularity.
 				 * It should not of, but lets give the old
 				 * stack a chance to reset to a known good state.
 				 */
 				if (tp->t_fb->tfb_switch_failed) {
 					(*tp->t_fb->tfb_switch_failed)(tp);
 				}
 			 	goto err_out;
 			}
 		}
 		if (tp->t_fb->tfb_tcp_fb_fini) {
 			struct epoch_tracker et;
 			/*
 			 * Tell the stack to cleanup with 0 i.e.
 			 * the tcb is not going away.
 			 */
 			NET_EPOCH_ENTER(et);
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
 			NET_EPOCH_EXIT(et);
 		}
 		/*
 		 * Release the old refcnt, the
 		 * lookup acquired a ref on the
 		 * new one already.
 		 */
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		/* 
 		 * Set in the new stack.
 		 */
 		tp->t_fb = blk;
 		tp->t_fb_ptr = ptr;
 #ifdef TCP_OFFLOAD
 		if (tp->t_flags & TF_TOE) {
 			tcp_offload_ctloutput(tp, sopt->sopt_dir,
 			     sopt->sopt_name);
 		}
 #endif
 err_out:
 		INP_WUNLOCK(inp);
 		return (error);
 
 	}
 
 	/* Pass in the INP locked, callee must unlock it. */
 	return (tp->t_fb->tfb_tcp_ctloutput(tp, sopt));
 }
 
 static int
 tcp_ctloutput_get(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct socket *so = inp->inp_socket;
 	struct tcpcb *tp = intotcpcb(inp);
 	int error = 0;
 
 	MPASS(sopt->sopt_dir == SOPT_GET);
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("inp_flags == %x", inp->inp_flags));
 	KASSERT(so != NULL, ("inp_socket == NULL"));
 
 	if (sopt->sopt_level != IPPROTO_TCP) {
 		INP_WUNLOCK(inp);
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6PROTO)
 			error = ip6_ctloutput(so, sopt);
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET
 			error = ip_ctloutput(so, sopt);
 #endif
 		return (error);
 	}
 	if (((sopt->sopt_name == TCP_FUNCTION_BLK) ||
 	     (sopt->sopt_name == TCP_FUNCTION_ALIAS))) {
 		struct tcp_function_set fsn;
 
 		if (sopt->sopt_name == TCP_FUNCTION_ALIAS) {
 			memset(&fsn, 0, sizeof(fsn));
 			find_tcp_function_alias(tp->t_fb, &fsn);
 		} else {
 			strncpy(fsn.function_set_name,
 			    tp->t_fb->tfb_tcp_block_name,
 			    TCP_FUNCTION_NAME_LEN_MAX);
 			fsn.function_set_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
 		}
 		fsn.pcbcnt = tp->t_fb->tfb_refcnt;
 		INP_WUNLOCK(inp);
 		error = sooptcopyout(sopt, &fsn, sizeof fsn);
 		return (error);
 	}
 
 	/* Pass in the INP locked, callee must unlock it. */
 	return (tp->t_fb->tfb_tcp_ctloutput(tp, sopt));
 }
 
 int
 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
 
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	if (sopt->sopt_dir == SOPT_SET)
 		return (tcp_ctloutput_set(inp, sopt));
 	else if (sopt->sopt_dir == SOPT_GET)
 		return (tcp_ctloutput_get(inp, sopt));
 	else
 		panic("%s: sopt_dir $%d", __func__, sopt->sopt_dir);
 }
 
 /*
  * If this assert becomes untrue, we need to change the size of the buf
  * variable in tcp_default_ctloutput().
  */
 #ifdef CTASSERT
 CTASSERT(TCP_CA_NAME_MAX <= TCP_LOG_ID_LEN);
 CTASSERT(TCP_LOG_REASON_LEN <= TCP_LOG_ID_LEN);
 #endif
 
 #ifdef KERN_TLS
 static int
 copyin_tls_enable(struct sockopt *sopt, struct tls_enable *tls)
 {
 	struct tls_enable_v0 tls_v0;
 	int error;
 
 	if (sopt->sopt_valsize == sizeof(tls_v0)) {
 		error = sooptcopyin(sopt, &tls_v0, sizeof(tls_v0),
 		    sizeof(tls_v0));
 		if (error)
 			return (error);
 		memset(tls, 0, sizeof(*tls));
 		tls->cipher_key = tls_v0.cipher_key;
 		tls->iv = tls_v0.iv;
 		tls->auth_key = tls_v0.auth_key;
 		tls->cipher_algorithm = tls_v0.cipher_algorithm;
 		tls->cipher_key_len = tls_v0.cipher_key_len;
 		tls->iv_len = tls_v0.iv_len;
 		tls->auth_algorithm = tls_v0.auth_algorithm;
 		tls->auth_key_len = tls_v0.auth_key_len;
 		tls->flags = tls_v0.flags;
 		tls->tls_vmajor = tls_v0.tls_vmajor;
 		tls->tls_vminor = tls_v0.tls_vminor;
 		return (0);
 	}
 
 	return (sooptcopyin(sopt, tls, sizeof(*tls), sizeof(*tls)));
 }
 #endif
 
 extern struct cc_algo newreno_cc_algo;
 
 static int
 tcp_set_cc_mod(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct cc_algo *algo;
 	void *ptr = NULL;
 	struct tcpcb *tp;
 	struct cc_var cc_mem;
 	char	buf[TCP_CA_NAME_MAX];
 	size_t mem_sz;
 	int error;
 
 	INP_WUNLOCK(inp);
 	error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
 	if (error)
 		return(error);
 	buf[sopt->sopt_valsize] = '\0';
 	CC_LIST_RLOCK();
 	STAILQ_FOREACH(algo, &cc_list, entries) {
 		if (strncmp(buf, algo->name,
 			    TCP_CA_NAME_MAX) == 0) {
 			if (algo->flags & CC_MODULE_BEING_REMOVED) {
 				/* We can't "see" modules being unloaded */
 				continue;
 			}
 			break;
 		}
 	}
 	if (algo == NULL) {
 		CC_LIST_RUNLOCK();
 		return(ESRCH);
 	}
 	/* 
 	 * With a reference the algorithm cannot be removed
 	 * so we hold a reference through the change process.
 	 */
 	cc_refer(algo);
 	CC_LIST_RUNLOCK();
 	if (algo->cb_init != NULL) {
 		/* We can now pre-get the memory for the CC */
 		mem_sz = (*algo->cc_data_sz)();
 		if (mem_sz == 0) {
 			goto no_mem_needed;
 		}
 		ptr = malloc(mem_sz, M_CC_MEM, M_WAITOK);
 	} else {
 no_mem_needed:
 		mem_sz = 0;
 		ptr = NULL;
 	}
 	/*
 	 * Make sure its all clean and zero and also get
 	 * back the inplock.
 	 */
 	memset(&cc_mem, 0, sizeof(cc_mem));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		if (ptr)
 			free(ptr, M_CC_MEM);
 		/* Release our temp reference */
 		CC_LIST_RLOCK();
 		cc_release(algo);
 		CC_LIST_RUNLOCK();
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 	if (ptr != NULL)
 		memset(ptr, 0, mem_sz);
 	cc_mem.ccvc.tcp = tp;
 	/*
 	 * We once again hold a write lock over the tcb so it's
 	 * safe to do these things without ordering concerns.
 	 * Note here we init into stack memory.
 	 */
 	if (algo->cb_init != NULL)
 		error = algo->cb_init(&cc_mem, ptr);
 	else
 		error = 0;
 	/*
 	 * The CC algorithms, when given their memory
 	 * should not fail we could in theory have a
 	 * KASSERT here.
 	 */
 	if (error == 0) {
 		/*
 		 * Touchdown, lets go ahead and move the
 		 * connection to the new CC module by
 		 * copying in the cc_mem after we call
 		 * the old ones cleanup (if any).
 		 */
 		if (CC_ALGO(tp)->cb_destroy != NULL)
 			CC_ALGO(tp)->cb_destroy(&tp->t_ccv);
 		/* Detach the old CC from the tcpcb  */
 		cc_detach(tp);
 		/* Copy in our temp memory that was inited */
 		memcpy(&tp->t_ccv, &cc_mem, sizeof(struct cc_var));
 		/* Now attach the new, which takes a reference */
 		cc_attach(tp, algo);
 		/* Ok now are we where we have gotten past any conn_init? */
 		if (TCPS_HAVEESTABLISHED(tp->t_state) && (CC_ALGO(tp)->conn_init != NULL)) {
 			/* Yep run the connection init for the new CC */
 			CC_ALGO(tp)->conn_init(&tp->t_ccv);
 		}
 	} else if (ptr)
 		free(ptr, M_CC_MEM);
 	INP_WUNLOCK(inp);
 	/* Now lets release our temp reference */
 	CC_LIST_RLOCK();
 	cc_release(algo);
 	CC_LIST_RUNLOCK();
 	return (error);
 }
 
 int
 tcp_default_ctloutput(struct tcpcb *tp, struct sockopt *sopt)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	int	error, opt, optval;
 	u_int	ui;
 	struct	tcp_info ti;
 #ifdef KERN_TLS
 	struct tls_enable tls;
 	struct socket *so = inp->inp_socket;
 #endif
 	char	*pbuf, buf[TCP_LOG_ID_LEN];
 #ifdef STATS
 	struct statsblob *sbp;
 #endif
 	size_t	len;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("inp_flags == %x", inp->inp_flags));
 	KASSERT(inp->inp_socket != NULL, ("inp_socket == NULL"));
 
 	switch (sopt->sopt_level) {
 #ifdef INET6
 	case IPPROTO_IPV6:
 		MPASS(inp->inp_vflag & INP_IPV6PROTO);
 		switch (sopt->sopt_name) {
 		case IPV6_USE_MIN_MTU:
 			tcp6_use_min_mtu(tp);
 			/* FALLTHROUGH */
 		}
 		INP_WUNLOCK(inp);
 		return (0);
 #endif
 #ifdef INET
 	case IPPROTO_IP:
 		INP_WUNLOCK(inp);
 		return (0);
 #endif
 	}
 
 	/*
 	 * For TCP_CCALGOOPT forward the control to CC module, for both
 	 * SOPT_SET and SOPT_GET.
 	 */
 	switch (sopt->sopt_name) {
 	case TCP_CCALGOOPT:
 		INP_WUNLOCK(inp);
 		if (sopt->sopt_valsize > CC_ALGOOPT_LIMIT)
 			return (EINVAL);
 		pbuf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
 		error = sooptcopyin(sopt, pbuf, sopt->sopt_valsize,
 		    sopt->sopt_valsize);
 		if (error) {
 			free(pbuf, M_TEMP);
 			return (error);
 		}
 		INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP));
 		if (CC_ALGO(tp)->ctl_output != NULL)
 			error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, sopt, pbuf);
 		else
 			error = ENOENT;
 		INP_WUNLOCK(inp);
 		if (error == 0 && sopt->sopt_dir == SOPT_GET)
 			error = sooptcopyout(sopt, pbuf, sopt->sopt_valsize);
 		free(pbuf, M_TEMP);
 		return (error);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		case TCP_MD5SIG:
 			INP_WUNLOCK(inp);
 			if (!TCPMD5_ENABLED())
 				return (ENOPROTOOPT);
 			error = TCPMD5_PCBCTL(inp, sopt);
 			if (error)
 				return (error);
 			INP_WLOCK_RECHECK(inp);
 			goto unlock_and_done;
 #endif /* IPSEC */
 
 		case TCP_NODELAY:
 		case TCP_NOOPT:
 		case TCP_LRD:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			switch (sopt->sopt_name) {
 			case TCP_NODELAY:
 				opt = TF_NODELAY;
 				break;
 			case TCP_NOOPT:
 				opt = TF_NOOPT;
 				break;
 			case TCP_LRD:
 				opt = TF_LRD;
 				break;
 			default:
 				opt = 0; /* dead code to fool gcc */
 				break;
 			}
 
 			if (optval)
 				tp->t_flags |= opt;
 			else
 				tp->t_flags &= ~opt;
 unlock_and_done:
 #ifdef TCP_OFFLOAD
 			if (tp->t_flags & TF_TOE) {
 				tcp_offload_ctloutput(tp, sopt->sopt_dir,
 				    sopt->sopt_name);
 			}
 #endif
 			INP_WUNLOCK(inp);
 			break;
 
 		case TCP_NOPUSH:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			if (optval)
 				tp->t_flags |= TF_NOPUSH;
 			else if (tp->t_flags & TF_NOPUSH) {
 				tp->t_flags &= ~TF_NOPUSH;
 				if (TCPS_HAVEESTABLISHED(tp->t_state)) {
 					struct epoch_tracker et;
 
 					NET_EPOCH_ENTER(et);
 					error = tcp_output_nodrop(tp);
 					NET_EPOCH_EXIT(et);
 				}
 			}
 			goto unlock_and_done;
 
 		case TCP_REMOTE_UDP_ENCAPS_PORT:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 			if ((optval < TCP_TUNNELING_PORT_MIN) ||
 			    (optval > TCP_TUNNELING_PORT_MAX)) {
 				/* Its got to be in range */
 				return (EINVAL);
 			}
 			if ((V_tcp_udp_tunneling_port == 0) && (optval != 0)) {
 				/* You have to have enabled a UDP tunneling port first */
 				return (EINVAL);
 			}
 			INP_WLOCK_RECHECK(inp);
 			if (tp->t_state != TCPS_CLOSED) {
 				/* You can't change after you are connected */
 				error = EINVAL;
 			} else {
 				/* Ok we are all good set the port */
 				tp->t_port = htons(optval);
 			}
 			goto unlock_and_done;
 
 		case TCP_MAXSEG:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			if (optval > 0 && optval <= tp->t_maxseg &&
 			    optval + 40 >= V_tcp_minmss)
 				tp->t_maxseg = optval;
 			else
 				error = EINVAL;
 			goto unlock_and_done;
 
 		case TCP_INFO:
 			INP_WUNLOCK(inp);
 			error = EINVAL;
 			break;
 
 		case TCP_STATS:
 			INP_WUNLOCK(inp);
 #ifdef STATS
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			if (optval > 0)
 				sbp = stats_blob_alloc(
 				    V_tcp_perconn_stats_dflt_tpl, 0);
 			else
 				sbp = NULL;
 
 			INP_WLOCK_RECHECK(inp);
 			if ((tp->t_stats != NULL && sbp == NULL) ||
 			    (tp->t_stats == NULL && sbp != NULL)) {
 				struct statsblob *t = tp->t_stats;
 				tp->t_stats = sbp;
 				sbp = t;
 			}
 			INP_WUNLOCK(inp);
 
 			stats_blob_destroy(sbp);
 #else
 			return (EOPNOTSUPP);
 #endif /* !STATS */
 			break;
 
 		case TCP_CONGESTION:
 			error = tcp_set_cc_mod(inp, sopt);
 			break;
 
 		case TCP_REUSPORT_LB_NUMA:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 			    sizeof(optval));
 			INP_WLOCK_RECHECK(inp);
 			if (!error)
 				error = in_pcblbgroup_numa(inp, optval);
 			INP_WUNLOCK(inp);
 			break;
 
 #ifdef KERN_TLS
 		case TCP_TXTLS_ENABLE:
 			INP_WUNLOCK(inp);
 			error = copyin_tls_enable(sopt, &tls);
 			if (error)
 				break;
 			error = ktls_enable_tx(so, &tls);
 			break;
 		case TCP_TXTLS_MODE:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			error = ktls_set_tx_mode(so, ui);
 			INP_WUNLOCK(inp);
 			break;
 		case TCP_RXTLS_ENABLE:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &tls, sizeof(tls),
 			    sizeof(tls));
 			if (error)
 				break;
 			error = ktls_enable_rx(so, &tls);
 			break;
 #endif
 		case TCP_MAXUNACKTIME:
 		case TCP_KEEPIDLE:
 		case TCP_KEEPINTVL:
 		case TCP_KEEPINIT:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 			if (error)
 				return (error);
 
 			if (ui > (UINT_MAX / hz)) {
 				error = EINVAL;
 				break;
 			}
 			ui *= hz;
 
 			INP_WLOCK_RECHECK(inp);
 			switch (sopt->sopt_name) {
 			case TCP_MAXUNACKTIME:
 				tp->t_maxunacktime = ui;
 				break;
 
 			case TCP_KEEPIDLE:
 				tp->t_keepidle = ui;
 				/*
 				 * XXX: better check current remaining
 				 * timeout and "merge" it with new value.
 				 */
 				if ((tp->t_state > TCPS_LISTEN) &&
 				    (tp->t_state <= TCPS_CLOSING))
 					tcp_timer_activate(tp, TT_KEEP,
 					    TP_KEEPIDLE(tp));
 				break;
 			case TCP_KEEPINTVL:
 				tp->t_keepintvl = ui;
 				if ((tp->t_state == TCPS_FIN_WAIT_2) &&
 				    (TP_MAXIDLE(tp) > 0))
 					tcp_timer_activate(tp, TT_2MSL,
 					    TP_MAXIDLE(tp));
 				break;
 			case TCP_KEEPINIT:
 				tp->t_keepinit = ui;
 				if (tp->t_state == TCPS_SYN_RECEIVED ||
 				    tp->t_state == TCPS_SYN_SENT)
 					tcp_timer_activate(tp, TT_KEEP,
 					    TP_KEEPINIT(tp));
 				break;
 			}
 			goto unlock_and_done;
 
 		case TCP_KEEPCNT:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			tp->t_keepcnt = ui;
 			if ((tp->t_state == TCPS_FIN_WAIT_2) &&
 			    (TP_MAXIDLE(tp) > 0))
 				tcp_timer_activate(tp, TT_2MSL,
 				    TP_MAXIDLE(tp));
 			goto unlock_and_done;
 
 #ifdef TCPPCAP
 		case TCP_PCAP_OUT:
 		case TCP_PCAP_IN:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			if (optval >= 0)
 				tcp_pcap_set_sock_max(
 					(sopt->sopt_name == TCP_PCAP_OUT) ?
 					&(tp->t_outpkts) : &(tp->t_inpkts),
 					optval);
 			else
 				error = EINVAL;
 			goto unlock_and_done;
 #endif
 
 		case TCP_FASTOPEN: {
 			struct tcp_fastopen tfo_optval;
 
 			INP_WUNLOCK(inp);
 			if (!V_tcp_fastopen_client_enable &&
 			    !V_tcp_fastopen_server_enable)
 				return (EPERM);
 
 			error = sooptcopyin(sopt, &tfo_optval,
 				    sizeof(tfo_optval), sizeof(int));
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			if ((tp->t_state != TCPS_CLOSED) &&
 			    (tp->t_state != TCPS_LISTEN)) {
 				error = EINVAL;
 				goto unlock_and_done;
 			}
 			if (tfo_optval.enable) {
 				if (tp->t_state == TCPS_LISTEN) {
 					if (!V_tcp_fastopen_server_enable) {
 						error = EPERM;
 						goto unlock_and_done;
 					}
 
 					if (tp->t_tfo_pending == NULL)
 						tp->t_tfo_pending =
 						    tcp_fastopen_alloc_counter();
 				} else {
 					/*
 					 * If a pre-shared key was provided,
 					 * stash it in the client cookie
 					 * field of the tcpcb for use during
 					 * connect.
 					 */
 					if (sopt->sopt_valsize ==
 					    sizeof(tfo_optval)) {
 						memcpy(tp->t_tfo_cookie.client,
 						       tfo_optval.psk,
 						       TCP_FASTOPEN_PSK_LEN);
 						tp->t_tfo_client_cookie_len =
 						    TCP_FASTOPEN_PSK_LEN;
 					}
 				}
 				tp->t_flags |= TF_FASTOPEN;
 			} else
 				tp->t_flags &= ~TF_FASTOPEN;
 			goto unlock_and_done;
 		}
 
 #ifdef TCP_BLACKBOX
 		case TCP_LOG:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			error = tcp_log_state_change(tp, optval);
 			goto unlock_and_done;
 
 		case TCP_LOGBUF:
 			INP_WUNLOCK(inp);
 			error = EINVAL;
 			break;
 
 		case TCP_LOGID:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, buf, TCP_LOG_ID_LEN - 1, 0);
 			if (error)
 				break;
 			buf[sopt->sopt_valsize] = '\0';
 			INP_WLOCK_RECHECK(inp);
 			error = tcp_log_set_id(tp, buf);
 			/* tcp_log_set_id() unlocks the INP. */
 			break;
 
 		case TCP_LOGDUMP:
 		case TCP_LOGDUMPID:
 			INP_WUNLOCK(inp);
 			error =
 			    sooptcopyin(sopt, buf, TCP_LOG_REASON_LEN - 1, 0);
 			if (error)
 				break;
 			buf[sopt->sopt_valsize] = '\0';
 			INP_WLOCK_RECHECK(inp);
 			if (sopt->sopt_name == TCP_LOGDUMP) {
 				error = tcp_log_dump_tp_logbuf(tp, buf,
 				    M_WAITOK, true);
 				INP_WUNLOCK(inp);
 			} else {
 				tcp_log_dump_tp_bucket_logbufs(tp, buf);
 				/*
 				 * tcp_log_dump_tp_bucket_logbufs() drops the
 				 * INP lock.
 				 */
 			}
 			break;
 #endif
 
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		tp = intotcpcb(inp);
 		switch (sopt->sopt_name) {
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		case TCP_MD5SIG:
 			INP_WUNLOCK(inp);
 			if (!TCPMD5_ENABLED())
 				return (ENOPROTOOPT);
 			error = TCPMD5_PCBCTL(inp, sopt);
 			break;
 #endif
 
 		case TCP_NODELAY:
 			optval = tp->t_flags & TF_NODELAY;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_MAXSEG:
 			optval = tp->t_maxseg;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_REMOTE_UDP_ENCAPS_PORT:
 			optval = ntohs(tp->t_port);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_NOOPT:
 			optval = tp->t_flags & TF_NOOPT;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_NOPUSH:
 			optval = tp->t_flags & TF_NOPUSH;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_INFO:
 			tcp_fill_info(tp, &ti);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &ti, sizeof ti);
 			break;
 		case TCP_STATS:
 			{
 #ifdef STATS
 			int nheld;
 			TYPEOF_MEMBER(struct statsblob, flags) sbflags = 0;
 
 			error = 0;
 			socklen_t outsbsz = sopt->sopt_valsize;
 			if (tp->t_stats == NULL)
 				error = ENOENT;
 			else if (outsbsz >= tp->t_stats->cursz)
 				outsbsz = tp->t_stats->cursz;
 			else if (outsbsz >= sizeof(struct statsblob))
 				outsbsz = sizeof(struct statsblob);
 			else
 				error = EINVAL;
 			INP_WUNLOCK(inp);
 			if (error)
 				break;
 
 			sbp = sopt->sopt_val;
 			nheld = atop(round_page(((vm_offset_t)sbp) +
 			    (vm_size_t)outsbsz) - trunc_page((vm_offset_t)sbp));
 			vm_page_t ma[nheld];
 			if (vm_fault_quick_hold_pages(
 			    &curproc->p_vmspace->vm_map, (vm_offset_t)sbp,
 			    outsbsz, VM_PROT_READ | VM_PROT_WRITE, ma,
 			    nheld) < 0) {
 				error = EFAULT;
 				break;
 			}
 
 			if ((error = copyin_nofault(&(sbp->flags), &sbflags,
 			    SIZEOF_MEMBER(struct statsblob, flags))))
 				goto unhold;
 
 			INP_WLOCK_RECHECK(inp);
 			error = stats_blob_snapshot(&sbp, outsbsz, tp->t_stats,
 			    sbflags | SB_CLONE_USRDSTNOFAULT);
 			INP_WUNLOCK(inp);
 			sopt->sopt_valsize = outsbsz;
 unhold:
 			vm_page_unhold_pages(ma, nheld);
 #else
 			INP_WUNLOCK(inp);
 			error = EOPNOTSUPP;
 #endif /* !STATS */
 			break;
 			}
 		case TCP_CONGESTION:
 			len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, buf, len + 1);
 			break;
 		case TCP_MAXUNACKTIME:
 		case TCP_KEEPIDLE:
 		case TCP_KEEPINTVL:
 		case TCP_KEEPINIT:
 		case TCP_KEEPCNT:
 			switch (sopt->sopt_name) {
 			case TCP_MAXUNACKTIME:
 				ui = TP_MAXUNACKTIME(tp) / hz;
 				break;
 			case TCP_KEEPIDLE:
 				ui = TP_KEEPIDLE(tp) / hz;
 				break;
 			case TCP_KEEPINTVL:
 				ui = TP_KEEPINTVL(tp) / hz;
 				break;
 			case TCP_KEEPINIT:
 				ui = TP_KEEPINIT(tp) / hz;
 				break;
 			case TCP_KEEPCNT:
 				ui = TP_KEEPCNT(tp);
 				break;
 			}
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &ui, sizeof(ui));
 			break;
 #ifdef TCPPCAP
 		case TCP_PCAP_OUT:
 		case TCP_PCAP_IN:
 			optval = tcp_pcap_get_sock_max(
 					(sopt->sopt_name == TCP_PCAP_OUT) ?
 					&(tp->t_outpkts) : &(tp->t_inpkts));
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 #endif
 		case TCP_FASTOPEN:
 			optval = tp->t_flags & TF_FASTOPEN;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 #ifdef TCP_BLACKBOX
 		case TCP_LOG:
 			optval = tcp_get_bblog_state(tp);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		case TCP_LOGBUF:
 			/* tcp_log_getlogbuf() does INP_WUNLOCK(inp) */
 			error = tcp_log_getlogbuf(sopt, tp);
 			break;
 		case TCP_LOGID:
 			len = tcp_log_get_id(tp, buf);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, buf, len + 1);
 			break;
 		case TCP_LOGDUMP:
 		case TCP_LOGDUMPID:
 			INP_WUNLOCK(inp);
 			error = EINVAL;
 			break;
 #endif
 #ifdef KERN_TLS
 		case TCP_TXTLS_MODE:
 			error = ktls_get_tx_mode(so, &optval);
 			INP_WUNLOCK(inp);
 			if (error == 0)
 				error = sooptcopyout(sopt, &optval,
 				    sizeof(optval));
 			break;
 		case TCP_RXTLS_MODE:
 			error = ktls_get_rx_mode(so, &optval);
 			INP_WUNLOCK(inp);
 			if (error == 0)
 				error = sooptcopyout(sopt, &optval,
 				    sizeof(optval));
 			break;
 #endif
 		case TCP_LRD:
 			optval = tp->t_flags & TF_LRD;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 #undef INP_WLOCK_RECHECK
 #undef INP_WLOCK_RECHECK_CLEANUP
 
 /*
  * Initiate (or continue) disconnect.
  * If embryonic state, just send reset (once).
  * If in ``let data drain'' option and linger null, just drop.
  * Otherwise (hard), mark socket disconnecting and drop
  * current input data; switch states based on user close, and
  * send segment to peer (with FIN).
  */
 static void
 tcp_disconnect(struct tcpcb *tp)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct socket *so = tptosocket(tp);
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Neither tcp_close() nor tcp_drop() should return NULL, as the
 	 * socket is still open.
 	 */
 	if (tp->t_state < TCPS_ESTABLISHED &&
 	    !(tp->t_state > TCPS_LISTEN && IS_FASTOPEN(tp->t_flags))) {
 		tp = tcp_close(tp);
 		KASSERT(tp != NULL,
 		    ("tcp_disconnect: tcp_close() returned NULL"));
 	} else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
 		tp = tcp_drop(tp, 0);
 		KASSERT(tp != NULL,
 		    ("tcp_disconnect: tcp_drop() returned NULL"));
 	} else {
 		soisdisconnecting(so);
 		sbflush(&so->so_rcv);
 		tcp_usrclosed(tp);
 		if (!(inp->inp_flags & INP_DROPPED))
 			/* Ignore stack's drop request, we already at it. */
 			(void)tcp_output_nodrop(tp);
 	}
 }
 
 /*
  * User issued close, and wish to trail through shutdown states:
  * if never received SYN, just forget it.  If got a SYN from peer,
  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
  * If already got a FIN from peer, then almost done; go to LAST_ACK
  * state.  In all other cases, have already sent FIN to peer (e.g.
  * after PRU_SHUTDOWN), and just have to play tedious game waiting
  * for peer to send FIN or not respond to keep-alives, etc.
  * We can let the user exit from the close as soon as the FIN is acked.
  */
 static void
 tcp_usrclosed(struct tcpcb *tp)
 {
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
 	switch (tp->t_state) {
 	case TCPS_LISTEN:
 #ifdef TCP_OFFLOAD
 		tcp_offload_listen_stop(tp);
 #endif
 		tcp_state_change(tp, TCPS_CLOSED);
 		/* FALLTHROUGH */
 	case TCPS_CLOSED:
 		tp = tcp_close(tp);
 		/*
 		 * tcp_close() should never return NULL here as the socket is
 		 * still open.
 		 */
 		KASSERT(tp != NULL,
 		    ("tcp_usrclosed: tcp_close() returned NULL"));
 		break;
 
 	case TCPS_SYN_SENT:
 	case TCPS_SYN_RECEIVED:
 		tp->t_flags |= TF_NEEDFIN;
 		break;
 
 	case TCPS_ESTABLISHED:
 		tcp_state_change(tp, TCPS_FIN_WAIT_1);
 		break;
 
 	case TCPS_CLOSE_WAIT:
 		tcp_state_change(tp, TCPS_LAST_ACK);
 		break;
 	}
 	if (tp->t_acktime == 0)
 		tp->t_acktime = ticks;
 	if (tp->t_state >= TCPS_FIN_WAIT_2) {
 		soisdisconnected(tptosocket(tp));
 		/* Prevent the connection hanging in FIN_WAIT_2 forever. */
 		if (tp->t_state == TCPS_FIN_WAIT_2) {
 			int timeout;
 
 			timeout = (tcp_fast_finwait2_recycle) ?
 			    tcp_finwait2_timeout : TP_MAXIDLE(tp);
 			tcp_timer_activate(tp, TT_2MSL, timeout);
 		}
 	}
 }
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_tstate(int t_state)
 {
 
 	switch (t_state) {
 	case TCPS_CLOSED:
 		db_printf("TCPS_CLOSED");
 		return;
 
 	case TCPS_LISTEN:
 		db_printf("TCPS_LISTEN");
 		return;
 
 	case TCPS_SYN_SENT:
 		db_printf("TCPS_SYN_SENT");
 		return;
 
 	case TCPS_SYN_RECEIVED:
 		db_printf("TCPS_SYN_RECEIVED");
 		return;
 
 	case TCPS_ESTABLISHED:
 		db_printf("TCPS_ESTABLISHED");
 		return;
 
 	case TCPS_CLOSE_WAIT:
 		db_printf("TCPS_CLOSE_WAIT");
 		return;
 
 	case TCPS_FIN_WAIT_1:
 		db_printf("TCPS_FIN_WAIT_1");
 		return;
 
 	case TCPS_CLOSING:
 		db_printf("TCPS_CLOSING");
 		return;
 
 	case TCPS_LAST_ACK:
 		db_printf("TCPS_LAST_ACK");
 		return;
 
 	case TCPS_FIN_WAIT_2:
 		db_printf("TCPS_FIN_WAIT_2");
 		return;
 
 	case TCPS_TIME_WAIT:
 		db_printf("TCPS_TIME_WAIT");
 		return;
 
 	default:
 		db_printf("unknown");
 		return;
 	}
 }
 
 static void
 db_print_tflags(u_int t_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (t_flags & TF_ACKNOW) {
 		db_printf("%sTF_ACKNOW", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_DELACK) {
 		db_printf("%sTF_DELACK", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NODELAY) {
 		db_printf("%sTF_NODELAY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NOOPT) {
 		db_printf("%sTF_NOOPT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_SENTFIN) {
 		db_printf("%sTF_SENTFIN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_REQ_SCALE) {
 		db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_RCVD_SCALE) {
 		db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_REQ_TSTMP) {
 		db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_RCVD_TSTMP) {
 		db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_SACK_PERMIT) {
 		db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NEEDSYN) {
 		db_printf("%sTF_NEEDSYN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NEEDFIN) {
 		db_printf("%sTF_NEEDFIN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NOPUSH) {
 		db_printf("%sTF_NOPUSH", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_PREVVALID) {
 		db_printf("%sTF_PREVVALID", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_MORETOCOME) {
 		db_printf("%sTF_MORETOCOME", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_SONOTCONN) {
 		db_printf("%sTF_SONOTCONN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_LASTIDLE) {
 		db_printf("%sTF_LASTIDLE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_RXWIN0SENT) {
 		db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_FASTRECOVERY) {
 		db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_CONGRECOVERY) {
 		db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_WASFRECOVERY) {
 		db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_WASCRECOVERY) {
 		db_printf("%sTF_WASCRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_SIGNATURE) {
 		db_printf("%sTF_SIGNATURE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_FORCEDATA) {
 		db_printf("%sTF_FORCEDATA", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_TSO) {
 		db_printf("%sTF_TSO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_FASTOPEN) {
 		db_printf("%sTF_FASTOPEN", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_tflags2(u_int t_flags2)
 {
 	int comma;
 
 	comma = 0;
 	if (t_flags2 & TF2_PLPMTU_BLACKHOLE) {
 		db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_PLPMTU_PMTUD) {
 		db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) {
 		db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_LOG_AUTO) {
 		db_printf("%sTF2_LOG_AUTO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_DROP_AF_DATA) {
 		db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_ECN_PERMIT) {
 		db_printf("%sTF2_ECN_PERMIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_ECN_SND_CWR) {
 		db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_ECN_SND_ECE) {
 		db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_ACE_PERMIT) {
 		db_printf("%sTF2_ACE_PERMIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_FBYTES_COMPLETE) {
 		db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_toobflags(char t_oobflags)
 {
 	int comma;
 
 	comma = 0;
 	if (t_oobflags & TCPOOB_HAVEDATA) {
 		db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_oobflags & TCPOOB_HADDATA) {
 		db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", name, tp);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
 	   TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
 
 	db_print_indent(indent);
 	db_printf("t_callout: %p   t_timers: %p\n",
 	    &tp->t_callout, &tp->t_timers);
 
 	db_print_indent(indent);
 	db_printf("t_state: %d (", tp->t_state);
 	db_print_tstate(tp->t_state);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("t_flags: 0x%x (", tp->t_flags);
 	db_print_tflags(tp->t_flags);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("t_flags2: 0x%x (", tp->t_flags2);
 	db_print_tflags2(tp->t_flags2);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: 0x%08x\n",
 	    tp->snd_una, tp->snd_max, tp->snd_nxt);
 
 	db_print_indent(indent);
 	db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
 	   tp->snd_up, tp->snd_wl1, tp->snd_wl2);
 
 	db_print_indent(indent);
 	db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
 	    tp->iss, tp->irs, tp->rcv_nxt);
 
 	db_print_indent(indent);
 	db_printf("rcv_adv: 0x%08x   rcv_wnd: %u   rcv_up: 0x%08x\n",
 	    tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
 
 	db_print_indent(indent);
 	db_printf("snd_wnd: %u   snd_cwnd: %u\n",
 	   tp->snd_wnd, tp->snd_cwnd);
 
 	db_print_indent(indent);
 	db_printf("snd_ssthresh: %u   snd_recover: "
 	    "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
 
 	db_print_indent(indent);
 	db_printf("t_rcvtime: %u   t_startime: %u\n",
 	    tp->t_rcvtime, tp->t_starttime);
 
 	db_print_indent(indent);
 	db_printf("t_rttime: %u   t_rtsq: 0x%08x\n",
 	    tp->t_rtttime, tp->t_rtseq);
 
 	db_print_indent(indent);
 	db_printf("t_rxtcur: %d   t_maxseg: %u   t_srtt: %d\n",
 	    tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
 
 	db_print_indent(indent);
 	db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u\n",
 	    tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin);
 
 	db_print_indent(indent);
 	db_printf("t_rttupdated: %u   max_sndwnd: %u   t_softerror: %d\n",
 	    tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
 
 	db_print_indent(indent);
 	db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
 	db_print_toobflags(tp->t_oobflags);
 	db_printf(")   t_iobc: 0x%02x\n", tp->t_iobc);
 
 	db_print_indent(indent);
 	db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
 	    tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
 
 	db_print_indent(indent);
 	db_printf("ts_recent: %u   ts_recent_age: %u\n",
 	    tp->ts_recent, tp->ts_recent_age);
 
 	db_print_indent(indent);
 	db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
 	    "%u\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
 
 	db_print_indent(indent);
 	db_printf("snd_ssthresh_prev: %u   snd_recover_prev: 0x%08x   "
 	    "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
 	    tp->snd_recover_prev, tp->t_badrxtwin);
 
 	db_print_indent(indent);
 	db_printf("snd_numholes: %d  snd_holes first: %p\n",
 	    tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
 
 	db_print_indent(indent);
 	db_printf("snd_fack: 0x%08x   rcv_numsacks: %d\n",
 	    tp->snd_fack, tp->rcv_numsacks);
 
 	/* Skip sackblks, sackhint. */
 
 	db_print_indent(indent);
 	db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
 	    tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
 }
 
 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
 {
 	struct tcpcb *tp;
 
 	if (!have_addr) {
 		db_printf("usage: show tcpcb <addr>\n");
 		return;
 	}
 	tp = (struct tcpcb *)addr;
 
 	db_print_tcpcb(tp, "tcpcb", 0);
 }
 #endif
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index cbda7f536262..708a4e6b730d 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -1,1718 +1,1717 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.
  * Copyright (c) 2008 Robert N. M. Watson
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * Copyright (c) 2014 Kevin Lo
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/rss_config.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet6/ip6_var.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/udplite.h>
 #include <netinet/in_rss.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 /*
  * UDP and UDP-Lite protocols implementation.
  * Per RFC 768, August, 1980.
  * Per RFC 3828, July, 2004.
  */
 
 /*
  * BSD 4.2 defaulted the udp checksum to be off.  Turning off udp checksums
  * removes the only data integrity mechanism for packets and malformed
  * packets that would otherwise be discarded due to bad checksums, and may
  * cause problems (especially for NFS data blocks).
  */
 VNET_DEFINE(int, udp_cksum) = 1;
 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(udp_cksum), 0, "compute udp checksum");
 
 VNET_DEFINE(int, udp_log_in_vain) = 0;
 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(udp_log_in_vain), 0, "Log all incoming UDP packets");
 
 VNET_DEFINE(int, udp_blackhole) = 0;
 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(udp_blackhole), 0,
     "Do not send port unreachables for refused connects");
 VNET_DEFINE(bool, udp_blackhole_local) = false;
 SYSCTL_BOOL(_net_inet_udp, OID_AUTO, blackhole_local, CTLFLAG_VNET |
     CTLFLAG_RW, &VNET_NAME(udp_blackhole_local), false,
     "Enforce net.inet.udp.blackhole for locally originated packets");
 
 u_long	udp_sendspace = 9216;		/* really max datagram size */
 SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
     &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
 
 u_long	udp_recvspace = 40 * (1024 +
 #ifdef INET6
 				      sizeof(struct sockaddr_in6)
 #else
 				      sizeof(struct sockaddr_in)
 #endif
 				      );	/* 40 1K datagrams */
 
 SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
     &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
 
 VNET_DEFINE(struct inpcbinfo, udbinfo);
 VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
 
 #ifndef UDBHASHSIZE
 #define	UDBHASHSIZE	128
 #endif
 
 VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat);		/* from udp_var.h */
 VNET_PCPUSTAT_SYSINIT(udpstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
     udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(udpstat);
 #endif /* VIMAGE */
 #ifdef INET
 static void	udp_detach(struct socket *so);
 #endif
 
 INPCBSTORAGE_DEFINE(udpcbstor, udpcb, "udpinp", "udp_inpcb", "udp", "udphash");
 INPCBSTORAGE_DEFINE(udplitecbstor, udpcb, "udpliteinp", "udplite_inpcb",
     "udplite", "udplitehash");
 
 static void
 udp_vnet_init(void *arg __unused)
 {
 
 	/*
 	 * For now default to 2-tuple UDP hashing - until the fragment
 	 * reassembly code can also update the flowid.
 	 *
 	 * Once we can calculate the flowid that way and re-establish
 	 * a 4-tuple, flip this to 4-tuple.
 	 */
 	in_pcbinfo_init(&V_udbinfo, &udpcbstor, UDBHASHSIZE, UDBHASHSIZE);
 	/* Additional pcbinfo for UDP-Lite */
 	in_pcbinfo_init(&V_ulitecbinfo, &udplitecbstor, UDBHASHSIZE,
 	    UDBHASHSIZE);
 }
 VNET_SYSINIT(udp_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
     udp_vnet_init, NULL);
 
 /*
  * Kernel module interface for updating udpstat.  The argument is an index
  * into udpstat treated as an array of u_long.  While this encodes the
  * general layout of udpstat into the caller, it doesn't encode its location,
  * so that future changes to add, for example, per-CPU stats support won't
  * cause binary compatibility problems for kernel modules.
  */
 void
 kmod_udpstat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(udpstat)[statnum], 1);
 }
 
 #ifdef VIMAGE
 static void
 udp_destroy(void *unused __unused)
 {
 
 	in_pcbinfo_destroy(&V_udbinfo);
 	in_pcbinfo_destroy(&V_ulitecbinfo);
 }
 VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL);
 #endif
 
 #ifdef INET
 /*
  * Subroutine of udp_input(), which appends the provided mbuf chain to the
  * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
  * contains the source address.  If the socket ends up being an IPv6 socket,
  * udp_append() will convert to a sockaddr_in6 before passing the address
  * into the socket code.
  *
  * In the normal case udp_append() will return 0, indicating that you
  * must unlock the inp. However if a tunneling protocol is in place we increment
  * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we
  * then decrement the reference count. If the inp_rele returns 1, indicating the
  * inp is gone, we return that to the caller to tell them *not* to unlock
  * the inp. In the case of multi-cast this will cause the distribution
  * to stop (though most tunneling protocols known currently do *not* use
  * multicast).
  */
 static int
 udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
     struct sockaddr_in *udp_in)
 {
 	struct sockaddr *append_sa;
 	struct socket *so;
 	struct mbuf *tmpopts, *opts = NULL;
 #ifdef INET6
 	struct sockaddr_in6 udp_in6;
 #endif
 	struct udpcb *up;
 	bool filtered;
 
 	INP_LOCK_ASSERT(inp);
 
 	/*
 	 * Engage the tunneling protocol.
 	 */
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
 		in_pcbref(inp);
 		INP_RUNLOCK(inp);
 		filtered = (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&udp_in[0],
 		    up->u_tun_ctx);
 		INP_RLOCK(inp);
 		if (filtered)
 			return (in_pcbrele_rlocked(inp));
 	}
 
 	off += sizeof(struct udphdr);
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/* Check AH/ESP integrity. */
 	if (IPSEC_ENABLED(ipv4) &&
 	    IPSEC_CHECK_POLICY(ipv4, n, inp) != 0) {
 		m_freem(n);
 		return (0);
 	}
 	if (up->u_flags & UF_ESPINUDP) {/* IPSec UDP encaps. */
 		if (IPSEC_ENABLED(ipv4) &&
 		    UDPENCAP_INPUT(n, off, AF_INET) != 0)
 			return (0);	/* Consumed. */
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, n) != 0) {
 		m_freem(n);
 		return (0);
 	}
 #endif /* MAC */
 	if (inp->inp_flags & INP_CONTROLOPTS ||
 	    inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6)
 			(void)ip6_savecontrol_v4(inp, n, &opts, NULL);
 		else
 #endif /* INET6 */
 			ip_savecontrol(inp, &opts, ip, n);
 	}
 	if ((inp->inp_vflag & INP_IPV4) && (inp->inp_flags2 & INP_ORIGDSTADDR)) {
 		tmpopts = sbcreatecontrol(&udp_in[1],
 		    sizeof(struct sockaddr_in), IP_ORIGDSTADDR, IPPROTO_IP,
 		    M_NOWAIT);
 		if (tmpopts) {
 			if (opts) {
 				tmpopts->m_next = opts;
 				opts = tmpopts;
 			} else
 				opts = tmpopts;
 		}
 	}
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6) {
 		bzero(&udp_in6, sizeof(udp_in6));
 		udp_in6.sin6_len = sizeof(udp_in6);
 		udp_in6.sin6_family = AF_INET6;
 		in6_sin_2_v4mapsin6(&udp_in[0], &udp_in6);
 		append_sa = (struct sockaddr *)&udp_in6;
 	} else
 #endif /* INET6 */
 		append_sa = (struct sockaddr *)&udp_in[0];
 	m_adj(n, off);
 
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
 		soroverflow_locked(so);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
 		UDPSTAT_INC(udps_fullsock);
 	} else
 		sorwakeup_locked(so);
 	return (0);
 }
 
 static bool
 udp_multi_match(const struct inpcb *inp, void *v)
 {
 	struct ip *ip = v;
 	struct udphdr *uh = (struct udphdr *)(ip + 1);
 
 	if (inp->inp_lport != uh->uh_dport)
 		return (false);
 #ifdef INET6
 	if ((inp->inp_vflag & INP_IPV4) == 0)
 		return (false);
 #endif
 	if (inp->inp_laddr.s_addr != INADDR_ANY &&
 	    inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
 		return (false);
 	if (inp->inp_faddr.s_addr != INADDR_ANY &&
 	    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
 		return (false);
 	if (inp->inp_fport != 0 &&
 	    inp->inp_fport != uh->uh_sport)
 		return (false);
 
 	return (true);
 }
 
 static int
 udp_multi_input(struct mbuf *m, int proto, struct sockaddr_in *udp_in)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct inpcb_iterator inpi = INP_ITERATOR(udp_get_inpcbinfo(proto),
 	    INPLOOKUP_RLOCKPCB, udp_multi_match, ip);
 #ifdef KDTRACE_HOOKS
 	struct udphdr *uh = (struct udphdr *)(ip + 1);
 #endif
 	struct inpcb *inp;
 	struct mbuf *n;
 	int appends = 0;
 
 	MPASS(ip->ip_hl == sizeof(struct ip) >> 2);
 
 	while ((inp = inp_next(&inpi)) != NULL) {
 		/*
 		 * XXXRW: Because we weren't holding either the inpcb
 		 * or the hash lock when we checked for a match
 		 * before, we should probably recheck now that the
 		 * inpcb lock is held.
 		 */
 		/*
 		 * Handle socket delivery policy for any-source
 		 * and source-specific multicast. [RFC3678]
 		 */
 		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 			struct ip_moptions	*imo;
 			struct sockaddr_in	 group;
 			int			 blocked;
 
 			imo = inp->inp_moptions;
 			if (imo == NULL)
 				continue;
 			bzero(&group, sizeof(struct sockaddr_in));
 			group.sin_len = sizeof(struct sockaddr_in);
 			group.sin_family = AF_INET;
 			group.sin_addr = ip->ip_dst;
 
 			blocked = imo_multi_filter(imo, m->m_pkthdr.rcvif,
 				(struct sockaddr *)&group,
 				(struct sockaddr *)&udp_in[0]);
 			if (blocked != MCAST_PASS) {
 				if (blocked == MCAST_NOTGMEMBER)
 					IPSTAT_INC(ips_notmember);
 				if (blocked == MCAST_NOTSMEMBER ||
 				    blocked == MCAST_MUTED)
 					UDPSTAT_INC(udps_filtermcast);
 				continue;
 			}
 		}
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
 			if (proto == IPPROTO_UDPLITE)
 				UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
 			else
 				UDP_PROBE(receive, NULL, inp, ip, inp, uh);
 			if (udp_append(inp, ip, n, sizeof(struct ip), udp_in)) {
 				break;
 			} else
 				appends++;
 		}
 		/*
 		 * Don't look for additional matches if this one does
 		 * not have either the SO_REUSEPORT or SO_REUSEADDR
 		 * socket options set.  This heuristic avoids
 		 * searching through all pcbs in the common case of a
 		 * non-shared port.  It assumes that an application
 		 * will never clear these options after setting them.
 		 */
 		if ((inp->inp_socket->so_options &
 		    (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0) {
 			INP_RUNLOCK(inp);
 			break;
 		}
 	}
 
 	if (appends == 0) {
 		/*
 		 * No matching pcb found; discard datagram.  (No need
 		 * to send an ICMP Port Unreachable for a broadcast
 		 * or multicast datgram.)
 		 */
 		UDPSTAT_INC(udps_noport);
 		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)))
 			UDPSTAT_INC(udps_noportmcast);
 		else
 			UDPSTAT_INC(udps_noportbcast);
 	}
 	m_freem(m);
 
 	return (IPPROTO_DONE);
 }
 
 static int
 udp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ip *ip;
 	struct udphdr *uh;
 	struct ifnet *ifp;
 	struct inpcb *inp;
 	uint16_t len, ip_len;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in udp_in[2];
 	struct mbuf *m;
 	struct m_tag *fwd_tag;
 	int cscov_partial, iphlen;
 
 	m = *mp;
 	iphlen = *offp;
 	ifp = m->m_pkthdr.rcvif;
 	*mp = NULL;
 	UDPSTAT_INC(udps_ipackets);
 
 	/*
 	 * Strip IP options, if any; should skip this, make available to
 	 * user, and use on returned packets, but we don't yet have a way to
 	 * check the checksum with options still present.
 	 */
 	if (iphlen > sizeof (struct ip)) {
 		ip_stripoptions(m);
 		iphlen = sizeof(struct ip);
 	}
 
 	/*
 	 * Get IP and UDP header together in first mbuf.
 	 */
 	if (m->m_len < iphlen + sizeof(struct udphdr)) {
 		if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
 			UDPSTAT_INC(udps_hdrops);
 			return (IPPROTO_DONE);
 		}
 	}
 	ip = mtod(m, struct ip *);
 	uh = (struct udphdr *)((caddr_t)ip + iphlen);
 	cscov_partial = (proto == IPPROTO_UDPLITE) ? 1 : 0;
 
 	/*
 	 * Destination port of 0 is illegal, based on RFC768.
 	 */
 	if (uh->uh_dport == 0)
 		goto badunlocked;
 
 	/*
 	 * Construct sockaddr format source address.  Stuff source address
 	 * and datagram in user buffer.
 	 */
 	bzero(&udp_in[0], sizeof(struct sockaddr_in) * 2);
 	udp_in[0].sin_len = sizeof(struct sockaddr_in);
 	udp_in[0].sin_family = AF_INET;
 	udp_in[0].sin_port = uh->uh_sport;
 	udp_in[0].sin_addr = ip->ip_src;
 	udp_in[1].sin_len = sizeof(struct sockaddr_in);
 	udp_in[1].sin_family = AF_INET;
 	udp_in[1].sin_port = uh->uh_dport;
 	udp_in[1].sin_addr = ip->ip_dst;
 
 	/*
 	 * Make mbuf data length reflect UDP length.  If not enough data to
 	 * reflect UDP length, drop.
 	 */
 	len = ntohs((u_short)uh->uh_ulen);
 	ip_len = ntohs(ip->ip_len) - iphlen;
 	if (proto == IPPROTO_UDPLITE && (len == 0 || len == ip_len)) {
 		/* Zero means checksum over the complete packet. */
 		if (len == 0)
 			len = ip_len;
 		cscov_partial = 0;
 	}
 	if (ip_len != len) {
 		if (len > ip_len || len < sizeof(struct udphdr)) {
 			UDPSTAT_INC(udps_badlen);
 			goto badunlocked;
 		}
 		if (proto == IPPROTO_UDP)
 			m_adj(m, len - ip_len);
 	}
 
 	/*
 	 * Checksum extended UDP header and data.
 	 */
 	if (uh->uh_sum) {
 		u_short uh_sum;
 
 		if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
 		    !cscov_partial) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				uh_sum = m->m_pkthdr.csum_data;
 			else
 				uh_sum = in_pseudo(ip->ip_src.s_addr,
 				    ip->ip_dst.s_addr, htonl((u_short)len +
 				    m->m_pkthdr.csum_data + proto));
 			uh_sum ^= 0xffff;
 		} else {
 			char b[offsetof(struct ipovly, ih_src)];
 			struct ipovly *ipov = (struct ipovly *)ip;
 
 			bcopy(ipov, b, sizeof(b));
 			bzero(ipov, sizeof(ipov->ih_x1));
 			ipov->ih_len = (proto == IPPROTO_UDP) ?
 			    uh->uh_ulen : htons(ip_len);
 			uh_sum = in_cksum(m, len + sizeof (struct ip));
 			bcopy(b, ipov, sizeof(b));
 		}
 		if (uh_sum) {
 			UDPSTAT_INC(udps_badsum);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	} else {
 		if (proto == IPPROTO_UDP) {
 			UDPSTAT_INC(udps_nosum);
 		} else {
 			/* UDPLite requires a checksum */
 			/* XXX: What is the right UDPLite MIB counter here? */
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 	    in_broadcast(ip->ip_dst, ifp))
 		return (udp_multi_input(m, proto, udp_in));
 
 	pcbinfo = udp_get_inpcbinfo(proto);
 
 	/*
 	 * Locate pcb for datagram.
 	 *
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
 	if ((m->m_flags & M_IP_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		struct sockaddr_in *next_hop;
 
 		next_hop = (struct sockaddr_in *)(fwd_tag + 1);
 
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
 		inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
 		    ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in_pcblookup(pcbinfo, ip->ip_src,
 			    uh->uh_sport, next_hop->sin_addr,
 			    next_hop->sin_port ? htons(next_hop->sin_port) :
 			    uh->uh_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_RLOCKPCB, ifp);
 		}
 		/* Remove the tag from the packet. We don't need it anymore. */
 		m_tag_delete(m, fwd_tag);
 		m->m_flags &= ~M_IP_NEXTHOP;
 	} else
 		inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
 		    ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
 		    INPLOOKUP_RLOCKPCB, ifp, m);
 	if (inp == NULL) {
 		if (V_udp_log_in_vain) {
 			char src[INET_ADDRSTRLEN];
 			char dst[INET_ADDRSTRLEN];
 
 			log(LOG_INFO,
 			    "Connection attempt to UDP %s:%d from %s:%d\n",
 			    inet_ntoa_r(ip->ip_dst, dst), ntohs(uh->uh_dport),
 			    inet_ntoa_r(ip->ip_src, src), ntohs(uh->uh_sport));
 		}
 		if (proto == IPPROTO_UDPLITE)
 			UDPLITE_PROBE(receive, NULL, NULL, ip, NULL, uh);
 		else
 			UDP_PROBE(receive, NULL, NULL, ip, NULL, uh);
 		UDPSTAT_INC(udps_noport);
 		if (m->m_flags & (M_BCAST | M_MCAST)) {
 			UDPSTAT_INC(udps_noportbcast);
 			goto badunlocked;
 		}
 		if (V_udp_blackhole && (V_udp_blackhole_local ||
 		    !in_localip(ip->ip_src)))
 			goto badunlocked;
 		if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
 			goto badunlocked;
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
 		return (IPPROTO_DONE);
 	}
 
 	/*
 	 * Check the minimum TTL for socket.
 	 */
 	INP_RLOCK_ASSERT(inp);
 	if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
 		if (proto == IPPROTO_UDPLITE)
 			UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
 		else
 			UDP_PROBE(receive, NULL, inp, ip, inp, uh);
 		INP_RUNLOCK(inp);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 	if (cscov_partial) {
 		struct udpcb *up;
 
 		up = intoudpcb(inp);
 		if (up->u_rxcslen == 0 || up->u_rxcslen > len) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 
 	if (proto == IPPROTO_UDPLITE)
 		UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
 	else
 		UDP_PROBE(receive, NULL, inp, ip, inp, uh);
 	if (udp_append(inp, ip, m, iphlen, udp_in) == 0)
 		INP_RUNLOCK(inp);
 	return (IPPROTO_DONE);
 
 badunlocked:
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
 #endif /* INET */
 
 /*
  * Notify a udp user of an asynchronous error; just wake up so that they can
  * collect error status.
  */
 struct inpcb *
 udp_notify(struct inpcb *inp, int errno)
 {
 
 	INP_WLOCK_ASSERT(inp);
 	if ((errno == EHOSTUNREACH || errno == ENETUNREACH ||
 	     errno == EHOSTDOWN) && inp->inp_route.ro_nh) {
 		NH_FREE(inp->inp_route.ro_nh);
 		inp->inp_route.ro_nh = (struct nhop_object *)NULL;
 	}
 
 	inp->inp_socket->so_error = errno;
 	sorwakeup(inp->inp_socket);
 	sowwakeup(inp->inp_socket);
 	return (inp);
 }
 
 #ifdef INET
 static void
 udp_common_ctlinput(struct icmp *icmp, struct inpcbinfo *pcbinfo)
 {
 	struct ip *ip = &icmp->icmp_ip;
 	struct udphdr *uh;
 	struct inpcb *inp;
 
 	if (icmp_errmap(icmp) == 0)
 		return;
 
 	uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 	inp = in_pcblookup(pcbinfo, ip->ip_dst, uh->uh_dport, ip->ip_src,
 	    uh->uh_sport, INPLOOKUP_WLOCKPCB, NULL);
 	if (inp != NULL) {
 		INP_WLOCK_ASSERT(inp);
 		if (inp->inp_socket != NULL)
 			udp_notify(inp, icmp_errmap(icmp));
 		INP_WUNLOCK(inp);
 	} else {
 		inp = in_pcblookup(pcbinfo, ip->ip_dst, uh->uh_dport,
 		    ip->ip_src, uh->uh_sport,
 		    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
 		if (inp != NULL) {
 			struct udpcb *up;
 			udp_tun_icmp_t *func;
 
 			up = intoudpcb(inp);
 			func = up->u_icmp_func;
 			INP_RUNLOCK(inp);
 			if (func != NULL)
 				func(icmp);
 		}
 	}
 }
 
 static void
 udp_ctlinput(struct icmp *icmp)
 {
 
 	return (udp_common_ctlinput(icmp, &V_udbinfo));
 }
 
 static void
 udplite_ctlinput(struct icmp *icmp)
 {
 
 	return (udp_common_ctlinput(icmp, &V_ulitecbinfo));
 }
 #endif /* INET */
 
 static int
 udp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_udbinfo,
 	    INPLOOKUP_RLOCKPCB);
 	struct xinpgen xig;
 	struct inpcb *inp;
 	int error;
 
 	if (req->newptr != 0)
 		return (EPERM);
 
 	if (req->oldptr == 0) {
 		int n;
 
 		n = V_udbinfo.ipi_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 		return (0);
 	}
 
 	if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
 		return (error);
 
 	bzero(&xig, sizeof(xig));
 	xig.xig_len = sizeof xig;
 	xig.xig_count = V_udbinfo.ipi_count;
 	xig.xig_gen = V_udbinfo.ipi_gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	while ((inp = inp_next(&inpi)) != NULL) {
 		if (inp->inp_gencnt <= xig.xig_gen &&
 		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 			struct xinpcb xi;
 
 			in_pcbtoxinpcb(inp, &xi);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 			if (error) {
 				INP_RUNLOCK(inp);
 				break;
 			}
 		}
 	}
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		xig.xig_gen = V_udbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_udbinfo.ipi_count;
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     udp_pcblist, "S,xinpcb",
     "List of active UDP sockets");
 
 #ifdef INET
 static int
 udp_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in addrs[2];
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	NET_EPOCH_ENTER(et);
 	inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
 	    addrs[0].sin_addr, addrs[0].sin_port,
 	    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
 	NET_EPOCH_EXIT(et);
 	if (inp != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
     CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
     0, 0, udp_getcred, "S,xucred",
     "Get the xucred of a UDP connection");
 #endif /* INET */
 
 int
 udp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct inpcb *inp;
 	struct udpcb *up;
 	int isudplite, error, optval;
 
 	error = 0;
 	isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 	INP_WLOCK(inp);
 	if (sopt->sopt_level != so->so_proto->pr_protocol) {
 #ifdef INET6
 		if (INP_CHECK_SOCKAF(so, AF_INET6)) {
 			INP_WUNLOCK(inp);
 			error = ip6_ctloutput(so, sopt);
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			INP_WUNLOCK(inp);
 			error = ip_ctloutput(so, sopt);
 		}
 #endif
 		return (error);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 #ifdef INET
 		case UDP_ENCAP:
 			if (!IPSEC_ENABLED(ipv4)) {
 				INP_WUNLOCK(inp);
 				return (ENOPROTOOPT);
 			}
 			error = UDPENCAP_PCBCTL(inp, sopt);
 			break;
 #endif /* INET */
 #endif /* IPSEC */
 		case UDPLITE_SEND_CSCOV:
 		case UDPLITE_RECV_CSCOV:
 			if (!isudplite) {
 				INP_WUNLOCK(inp);
 				error = ENOPROTOOPT;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 			    sizeof(optval));
 			if (error != 0)
 				break;
 			inp = sotoinpcb(so);
 			KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 			INP_WLOCK(inp);
 			up = intoudpcb(inp);
 			KASSERT(up != NULL, ("%s: up == NULL", __func__));
 			if ((optval != 0 && optval < 8) || (optval > 65535)) {
 				INP_WUNLOCK(inp);
 				error = EINVAL;
 				break;
 			}
 			if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
 				up->u_txcslen = optval;
 			else
 				up->u_rxcslen = optval;
 			INP_WUNLOCK(inp);
 			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 #ifdef INET
 		case UDP_ENCAP:
 			if (!IPSEC_ENABLED(ipv4)) {
 				INP_WUNLOCK(inp);
 				return (ENOPROTOOPT);
 			}
 			error = UDPENCAP_PCBCTL(inp, sopt);
 			break;
 #endif /* INET */
 #endif /* IPSEC */
 		case UDPLITE_SEND_CSCOV:
 		case UDPLITE_RECV_CSCOV:
 			if (!isudplite) {
 				INP_WUNLOCK(inp);
 				error = ENOPROTOOPT;
 				break;
 			}
 			up = intoudpcb(inp);
 			KASSERT(up != NULL, ("%s: up == NULL", __func__));
 			if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
 				optval = up->u_txcslen;
 			else
 				optval = up->u_rxcslen;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 
 #ifdef INET
 #ifdef INET6
 /* The logic here is derived from ip6_setpktopt(). See comments there. */
 static int
 udp_v4mapped_pktinfo(struct cmsghdr *cm, struct sockaddr_in * src,
     struct inpcb *inp, int flags)
 {
 	struct ifnet *ifp;
 	struct in6_pktinfo *pktinfo;
 	struct in_addr ia;
 
 	if ((flags & PRUS_IPV6) == 0)
 		return (0);
 
 	if (cm->cmsg_level != IPPROTO_IPV6)
 		return (0);
 
 	if  (cm->cmsg_type != IPV6_2292PKTINFO &&
 	    cm->cmsg_type != IPV6_PKTINFO)
 		return (0);
 
 	if (cm->cmsg_len !=
 	    CMSG_LEN(sizeof(struct in6_pktinfo)))
 		return (EINVAL);
 
 	pktinfo = (struct in6_pktinfo *)CMSG_DATA(cm);
 	if (!IN6_IS_ADDR_V4MAPPED(&pktinfo->ipi6_addr) &&
 	    !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr))
 		return (EINVAL);
 
 	/* Validate the interface index if specified. */
 	if (pktinfo->ipi6_ifindex) {
 		struct epoch_tracker et;
 
 		NET_EPOCH_ENTER(et);
 		ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
 		NET_EPOCH_EXIT(et);	/* XXXGL: unsafe ifp */
 		if (ifp == NULL)
 			return (ENXIO);
 	} else
 		ifp = NULL;
 	if (ifp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 		ia.s_addr = pktinfo->ipi6_addr.s6_addr32[3];
 		if (in_ifhasaddr(ifp, ia) == 0)
 			return (EADDRNOTAVAIL);
 	}
 
 	bzero(src, sizeof(*src));
 	src->sin_family = AF_INET;
 	src->sin_len = sizeof(*src);
 	src->sin_port = inp->inp_lport;
 	src->sin_addr.s_addr = pktinfo->ipi6_addr.s6_addr32[3];
 
 	return (0);
 }
 #endif	/* INET6 */
 
 int
 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
     struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	struct udpiphdr *ui;
 	int len, error = 0;
 	struct in_addr faddr, laddr;
 	struct cmsghdr *cm;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in *sin, src;
 	struct epoch_tracker et;
 	int cscov_partial = 0;
 	int ipflags = 0;
 	u_short fport, lport;
 	u_char tos, vflagsav;
 	uint8_t pr;
 	uint16_t cscov = 0;
 	uint32_t flowid = 0;
 	uint8_t flowtype = M_HASHTYPE_NONE;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_send: inp == NULL"));
 
 	if (addr != NULL) {
 		if (addr->sa_family != AF_INET)
 			error = EAFNOSUPPORT;
 		else if (addr->sa_len != sizeof(struct sockaddr_in))
 			error = EINVAL;
 		if (__predict_false(error != 0)) {
 			m_freem(control);
 			m_freem(m);
 			return (error);
 		}
 	}
 
 	len = m->m_pkthdr.len;
 	if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
 		if (control)
 			m_freem(control);
 		m_freem(m);
 		return (EMSGSIZE);
 	}
 
 	src.sin_family = 0;
 	sin = (struct sockaddr_in *)addr;
 
 	/*
 	 * udp_send() may need to temporarily bind or connect the current
 	 * inpcb.  As such, we don't know up front whether we will need the
 	 * pcbinfo lock or not.  Do any work to decide what is needed up
 	 * front before acquiring any locks.
 	 *
 	 * We will need network epoch in either case, to safely lookup into
 	 * pcb hash.
 	 */
 	if (sin == NULL ||
 	    (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) ||
 	    (flags & PRUS_IPV6) != 0)
 		INP_WLOCK(inp);
 	else
 		INP_RLOCK(inp);
 	NET_EPOCH_ENTER(et);
 	tos = inp->inp_ip_tos;
 	if (control != NULL) {
 		/*
 		 * XXX: Currently, we assume all the optional information is
 		 * stored in a single mbuf.
 		 */
 		if (control->m_next) {
 			m_freem(control);
 			error = EINVAL;
 			goto release;
 		}
 		for (; control->m_len > 0;
 		    control->m_data += CMSG_ALIGN(cm->cmsg_len),
 		    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 			cm = mtod(control, struct cmsghdr *);
 			if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0
 			    || cm->cmsg_len > control->m_len) {
 				error = EINVAL;
 				break;
 			}
 #ifdef INET6
 			error = udp_v4mapped_pktinfo(cm, &src, inp, flags);
 			if (error != 0)
 				break;
 #endif
 			if (cm->cmsg_level != IPPROTO_IP)
 				continue;
 
 			switch (cm->cmsg_type) {
 			case IP_SENDSRCADDR:
 				if (cm->cmsg_len !=
 				    CMSG_LEN(sizeof(struct in_addr))) {
 					error = EINVAL;
 					break;
 				}
 				bzero(&src, sizeof(src));
 				src.sin_family = AF_INET;
 				src.sin_len = sizeof(src);
 				src.sin_port = inp->inp_lport;
 				src.sin_addr =
 				    *(struct in_addr *)CMSG_DATA(cm);
 				break;
 
 			case IP_TOS:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(u_char))) {
 					error = EINVAL;
 					break;
 				}
 				tos = *(u_char *)CMSG_DATA(cm);
 				break;
 
 			case IP_FLOWID:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 					error = EINVAL;
 					break;
 				}
 				flowid = *(uint32_t *) CMSG_DATA(cm);
 				break;
 
 			case IP_FLOWTYPE:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 					error = EINVAL;
 					break;
 				}
 				flowtype = *(uint32_t *) CMSG_DATA(cm);
 				break;
 
 #ifdef	RSS
 			case IP_RSSBUCKETID:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 					error = EINVAL;
 					break;
 				}
 				/* This is just a placeholder for now */
 				break;
 #endif	/* RSS */
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			if (error)
 				break;
 		}
 		m_freem(control);
 		control = NULL;
 	}
 	if (error)
 		goto release;
 
 	pr = inp->inp_socket->so_proto->pr_protocol;
 	pcbinfo = udp_get_inpcbinfo(pr);
 
 	/*
 	 * If the IP_SENDSRCADDR control message was specified, override the
 	 * source address for this datagram.  Its use is invalidated if the
 	 * address thus specified is incomplete or clobbers other inpcbs.
 	 */
 	laddr = inp->inp_laddr;
 	lport = inp->inp_lport;
 	if (src.sin_family == AF_INET) {
 		if ((lport == 0) ||
 		    (laddr.s_addr == INADDR_ANY &&
 		     src.sin_addr.s_addr == INADDR_ANY)) {
 			error = EINVAL;
 			goto release;
 		}
 		if ((flags & PRUS_IPV6) != 0) {
 			vflagsav = inp->inp_vflag;
 			inp->inp_vflag |= INP_IPV4;
 			inp->inp_vflag &= ~INP_IPV6;
 		}
 		INP_HASH_WLOCK(pcbinfo);
 		error = in_pcbbind_setup(inp, &src, &laddr.s_addr, &lport,
 		    td->td_ucred);
 		INP_HASH_WUNLOCK(pcbinfo);
 		if ((flags & PRUS_IPV6) != 0)
 			inp->inp_vflag = vflagsav;
 		if (error)
 			goto release;
 	}
 
 	/*
 	 * If a UDP socket has been connected, then a local address/port will
 	 * have been selected and bound.
 	 *
 	 * If a UDP socket has not been connected to, then an explicit
 	 * destination address must be used, in which case a local
 	 * address/port may not have been selected and bound.
 	 */
 	if (sin != NULL) {
 		INP_LOCK_ASSERT(inp);
 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
 			error = EISCONN;
 			goto release;
 		}
 
 		/*
 		 * Jail may rewrite the destination address, so let it do
 		 * that before we use it.
 		 */
 		error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
 		if (error)
 			goto release;
 
 		/*
 		 * If a local address or port hasn't yet been selected, or if
 		 * the destination address needs to be rewritten due to using
 		 * a special INADDR_ constant, invoke in_pcbconnect_setup()
 		 * to do the heavy lifting.  Once a port is selected, we
 		 * commit the binding back to the socket; we also commit the
 		 * binding of the address if in jail.
 		 *
 		 * If we already have a valid binding and we're not
 		 * requesting a destination address rewrite, use a fast path.
 		 */
 		if (inp->inp_laddr.s_addr == INADDR_ANY ||
 		    inp->inp_lport == 0 ||
 		    sin->sin_addr.s_addr == INADDR_ANY ||
 		    sin->sin_addr.s_addr == INADDR_BROADCAST) {
 			if ((flags & PRUS_IPV6) != 0) {
 				vflagsav = inp->inp_vflag;
 				inp->inp_vflag |= INP_IPV4;
 				inp->inp_vflag &= ~INP_IPV6;
 			}
 			INP_HASH_WLOCK(pcbinfo);
 			error = in_pcbconnect_setup(inp, sin, &laddr.s_addr,
 			    &lport, &faddr.s_addr, &fport, td->td_ucred);
 			if ((flags & PRUS_IPV6) != 0)
 				inp->inp_vflag = vflagsav;
 			if (error) {
 				INP_HASH_WUNLOCK(pcbinfo);
 				goto release;
 			}
 
 			/*
 			 * XXXRW: Why not commit the port if the address is
 			 * !INADDR_ANY?
 			 */
 			/* Commit the local port if newly assigned. */
 			if (inp->inp_laddr.s_addr == INADDR_ANY &&
 			    inp->inp_lport == 0) {
 				INP_WLOCK_ASSERT(inp);
 				/*
 				 * Remember addr if jailed, to prevent
 				 * rebinding.
 				 */
 				if (prison_flag(td->td_ucred, PR_IP4))
 					inp->inp_laddr = laddr;
 				inp->inp_lport = lport;
 				error = in_pcbinshash(inp);
 				INP_HASH_WUNLOCK(pcbinfo);
 				if (error != 0) {
 					inp->inp_lport = 0;
 					error = EAGAIN;
 					goto release;
 				}
 				inp->inp_flags |= INP_ANONPORT;
 			} else
 				INP_HASH_WUNLOCK(pcbinfo);
 		} else {
 			faddr = sin->sin_addr;
 			fport = sin->sin_port;
 		}
 	} else {
 		INP_LOCK_ASSERT(inp);
 		faddr = inp->inp_faddr;
 		fport = inp->inp_fport;
 		if (faddr.s_addr == INADDR_ANY) {
 			error = ENOTCONN;
 			goto release;
 		}
 	}
 
 	/*
 	 * Calculate data length and get a mbuf for UDP, IP, and possible
 	 * link-layer headers.  Immediate slide the data pointer back forward
 	 * since we won't use that space at this layer.
 	 */
 	M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto release;
 	}
 	m->m_data += max_linkhdr;
 	m->m_len -= max_linkhdr;
 	m->m_pkthdr.len -= max_linkhdr;
 
 	/*
 	 * Fill in mbuf with extended UDP header and addresses and length put
 	 * into network format.
 	 */
 	ui = mtod(m, struct udpiphdr *);
 	bzero(ui->ui_x1, sizeof(ui->ui_x1));	/* XXX still needed? */
 	ui->ui_v = IPVERSION << 4;
 	ui->ui_pr = pr;
 	ui->ui_src = laddr;
 	ui->ui_dst = faddr;
 	ui->ui_sport = lport;
 	ui->ui_dport = fport;
 	ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
 	if (pr == IPPROTO_UDPLITE) {
 		struct udpcb *up;
 		uint16_t plen;
 
 		up = intoudpcb(inp);
 		cscov = up->u_txcslen;
 		plen = (u_short)len + sizeof(struct udphdr);
 		if (cscov >= plen)
 			cscov = 0;
 		ui->ui_len = htons(plen);
 		ui->ui_ulen = htons(cscov);
 		/*
 		 * For UDP-Lite, checksum coverage length of zero means
 		 * the entire UDPLite packet is covered by the checksum.
 		 */
 		cscov_partial = (cscov == 0) ? 0 : 1;
 	}
 
 	/*
 	 * Set the Don't Fragment bit in the IP header.
 	 */
 	if (inp->inp_flags & INP_DONTFRAG) {
 		struct ip *ip;
 
 		ip = (struct ip *)&ui->ui_i;
 		ip->ip_off |= htons(IP_DF);
 	}
 
 	if (inp->inp_socket->so_options & SO_DONTROUTE)
 		ipflags |= IP_ROUTETOIF;
 	if (inp->inp_socket->so_options & SO_BROADCAST)
 		ipflags |= IP_ALLOWBROADCAST;
 	if (inp->inp_flags & INP_ONESBCAST)
 		ipflags |= IP_SENDONES;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	/*
 	 * Set up checksum and output datagram.
 	 */
 	ui->ui_sum = 0;
 	if (pr == IPPROTO_UDPLITE) {
 		if (inp->inp_flags & INP_ONESBCAST)
 			faddr.s_addr = INADDR_BROADCAST;
 		if (cscov_partial) {
 			if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0)
 				ui->ui_sum = 0xffff;
 		} else {
 			if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0)
 				ui->ui_sum = 0xffff;
 		}
 	} else if (V_udp_cksum) {
 		if (inp->inp_flags & INP_ONESBCAST)
 			faddr.s_addr = INADDR_BROADCAST;
 		ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
 		    htons((u_short)len + sizeof(struct udphdr) + pr));
 		m->m_pkthdr.csum_flags = CSUM_UDP;
 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 	}
 	((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
 	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;	/* XXX */
 	((struct ip *)ui)->ip_tos = tos;		/* XXX */
 	UDPSTAT_INC(udps_opackets);
 
 	/*
 	 * Setup flowid / RSS information for outbound socket.
 	 *
 	 * Once the UDP code decides to set a flowid some other way,
 	 * this allows the flowid to be overridden by userland.
 	 */
 	if (flowtype != M_HASHTYPE_NONE) {
 		m->m_pkthdr.flowid = flowid;
 		M_HASHTYPE_SET(m, flowtype);
 	}
 #if defined(ROUTE_MPATH) || defined(RSS)
 	else if (CALC_FLOWID_OUTBOUND_SENDTO) {
 		uint32_t hash_val, hash_type;
 
 		hash_val = fib4_calc_packet_hash(laddr, faddr,
 		    lport, fport, pr, &hash_type);
 		m->m_pkthdr.flowid = hash_val;
 		M_HASHTYPE_SET(m, hash_type);
 	}
 
 	/*
 	 * Don't override with the inp cached flowid value.
 	 *
 	 * Depending upon the kind of send being done, the inp
 	 * flowid/flowtype values may actually not be appropriate
 	 * for this particular socket send.
 	 *
 	 * We should either leave the flowid at zero (which is what is
 	 * currently done) or set it to some software generated
 	 * hash value based on the packet contents.
 	 */
 	ipflags |= IP_NODEFAULTFLOWID;
 #endif	/* RSS */
 
 	if (pr == IPPROTO_UDPLITE)
 		UDPLITE_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
 	else
 		UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
 	error = ip_output(m, inp->inp_options,
 	    INP_WLOCKED(inp) ? &inp->inp_route : NULL, ipflags,
 	    inp->inp_moptions, inp);
 	INP_UNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	return (error);
 
 release:
 	INP_UNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	m_freem(m);
 	return (error);
 }
 
 void
 udp_abort(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_HASH_WLOCK(pcbinfo);
 		in_pcbdisconnect(inp);
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp_attach(struct socket *so, int proto, struct thread *td)
 {
 	static uint32_t udp_flowid;
 	struct inpcbinfo *pcbinfo;
 	struct inpcb *inp;
 	struct udpcb *up;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
 	error = soreserve(so, udp_sendspace, udp_recvspace);
 	if (error)
 		return (error);
 	error = in_pcballoc(so, pcbinfo);
 	if (error)
 		return (error);
 
 	inp = sotoinpcb(so);
 	inp->inp_ip_ttl = V_ip_defttl;
 	inp->inp_flowid = atomic_fetchadd_int(&udp_flowid, 1);
 	inp->inp_flowtype = M_HASHTYPE_OPAQUE;
 	up = intoudpcb(inp);
 	bzero(&up->u_start_zero, u_zero_size);
 	INP_WUNLOCK(inp);
 
 	return (0);
 }
 #endif /* INET */
 
 int
 udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, udp_tun_icmp_t i, void *ctx)
 {
 	struct inpcb *inp;
 	struct udpcb *up;
 
 	KASSERT(so->so_type == SOCK_DGRAM,
 	    ("udp_set_kernel_tunneling: !dgram"));
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL"));
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	if ((f != NULL || i != NULL) && ((up->u_tun_func != NULL) ||
 	    (up->u_icmp_func != NULL))) {
 		INP_WUNLOCK(inp);
 		return (EBUSY);
 	}
 	up->u_tun_func = f;
 	up->u_icmp_func = i;
 	up->u_tun_ctx = ctx;
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 #ifdef INET
 static int
 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in *sinp;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
 
 	sinp = (struct sockaddr_in *)nam;
 	if (nam->sa_family != AF_INET) {
 		/*
 		 * Preserve compatibility with old programs.
 		 */
 		if (nam->sa_family != AF_UNSPEC ||
 		    nam->sa_len < offsetof(struct sockaddr_in, sin_zero) ||
 		    sinp->sin_addr.s_addr != INADDR_ANY)
 			return (EAFNOSUPPORT);
 		nam->sa_family = AF_INET;
 	}
 	if (nam->sa_len != sizeof(struct sockaddr_in))
 		return (EINVAL);
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(pcbinfo);
 	error = in_pcbbind(inp, sinp, td->td_ucred);
 	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp_close(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_close: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_HASH_WLOCK(pcbinfo);
 		in_pcbdisconnect(inp);
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in *sin;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
 
 	sin = (struct sockaddr_in *)nam;
 	if (sin->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
 	if (sin->sin_len != sizeof(*sin))
 		return (EINVAL);
 
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_WUNLOCK(inp);
 		return (EISCONN);
 	}
 	error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
 	if (error != 0) {
 		INP_WUNLOCK(inp);
 		return (error);
 	}
 	NET_EPOCH_ENTER(et);
 	INP_HASH_WLOCK(pcbinfo);
 	error = in_pcbconnect(inp, sin, td->td_ucred, true);
 	INP_HASH_WUNLOCK(pcbinfo);
 	NET_EPOCH_EXIT(et);
 	if (error == 0)
 		soisconnected(so);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
 	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
 	    ("udp_detach: not disconnected"));
 	INP_WLOCK(inp);
-	in_pcbdetach(inp);
 	in_pcbfree(inp);
 }
 
 int
 udp_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr == INADDR_ANY) {
 		INP_WUNLOCK(inp);
 		return (ENOTCONN);
 	}
 	INP_HASH_WLOCK(pcbinfo);
 	in_pcbdisconnect(inp);
 	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 #endif /* INET */
 
 int
 udp_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 #ifdef INET
 #define	UDP_PROTOSW							\
 	.pr_type =		SOCK_DGRAM,				\
 	.pr_flags =		PR_ATOMIC | PR_ADDR | PR_CAPATTACH,	\
 	.pr_ctloutput =		udp_ctloutput,				\
 	.pr_abort =		udp_abort,				\
 	.pr_attach =		udp_attach,				\
 	.pr_bind =		udp_bind,				\
 	.pr_connect =		udp_connect,				\
 	.pr_control =		in_control,				\
 	.pr_detach =		udp_detach,				\
 	.pr_disconnect =	udp_disconnect,				\
 	.pr_peeraddr =		in_getpeeraddr,				\
 	.pr_send =		udp_send,				\
 	.pr_soreceive =		soreceive_dgram,			\
 	.pr_sosend =		sosend_dgram,				\
 	.pr_shutdown =		udp_shutdown,				\
 	.pr_sockaddr =		in_getsockaddr,				\
 	.pr_sosetlabel =	in_pcbsosetlabel,			\
 	.pr_close =		udp_close
 
 struct protosw udp_protosw = {
 	.pr_protocol =		IPPROTO_UDP,
 	UDP_PROTOSW
 };
 
 struct protosw udplite_protosw = {
 	.pr_protocol =		IPPROTO_UDPLITE,
 	UDP_PROTOSW
 };
 
 static void
 udp_init(void *arg __unused)
 {
 
 	IPPROTO_REGISTER(IPPROTO_UDP, udp_input, udp_ctlinput);
 	IPPROTO_REGISTER(IPPROTO_UDPLITE, udp_input, udplite_ctlinput);
 }
 SYSINIT(udp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, udp_init, NULL);
 #endif /* INET */
diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c
index d790a397f551..66fe0afbe918 100644
--- a/sys/netinet6/raw_ip6.c
+++ b/sys/netinet6/raw_ip6.c
@@ -1,862 +1,861 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)raw_ip.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 #include "opt_ipsec.h"
 #include "opt_inet6.h"
 #include "opt_route.h"
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 
 #include <netinet/icmp6.h>
 #include <netinet/ip6.h>
 #include <netinet/ip_var.h>
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/raw_ip6.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/send.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/stdarg.h>
 
 #define	satosin6(sa)	((struct sockaddr_in6 *)(sa))
 #define	ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
 
 /*
  * Raw interface to IP6 protocol.
  */
 
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 #define	V_ripcbinfo			VNET(ripcbinfo)
 
 extern u_long	rip_sendspace;
 extern u_long	rip_recvspace;
 
 VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat);
 VNET_PCPUSTAT_SYSINIT(rip6stat);
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(rip6stat);
 #endif /* VIMAGE */
 
 /*
  * Hooks for multicast routing. They all default to NULL, so leave them not
  * initialized and rely on BSS being set to 0.
  */
 
 /*
  * The socket used to communicate with the multicast routing daemon.
  */
 VNET_DEFINE(struct socket *, ip6_mrouter);
 
 /*
  * The various mrouter functions.
  */
 int (*ip6_mrouter_set)(struct socket *, struct sockopt *);
 int (*ip6_mrouter_get)(struct socket *, struct sockopt *);
 int (*ip6_mrouter_done)(void);
 int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *);
 int (*mrt6_ioctl)(u_long, caddr_t);
 
 struct rip6_inp_match_ctx {
 	struct ip6_hdr *ip6;
 	int proto;
 };
 
 static bool
 rip6_inp_match(const struct inpcb *inp, void *v)
 {
 	struct rip6_inp_match_ctx *c = v;
 	struct ip6_hdr *ip6 = c->ip6;
 	int proto = c->proto;
 
 	/* XXX inp locking */
 	if ((inp->inp_vflag & INP_IPV6) == 0)
 		return (false);
 	if (inp->inp_ip_p && inp->inp_ip_p != proto)
 		return (false);
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
 	    !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst))
 		return (false);
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
 	    !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src))
 		return (false);
 
 	return (true);
 }
 
 /*
  * Setup generic address and protocol structures for raw_input routine, then
  * pass them along with mbuf chain.
  */
 int
 rip6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ifnet *ifp;
 	struct mbuf *n, *m = *mp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct inpcb *inp;
 	struct mbuf *opts = NULL;
 	struct sockaddr_in6 fromsa;
 	struct rip6_inp_match_ctx ctx = { .ip6 = ip6, .proto = proto };
 	struct inpcb_iterator inpi = INP_ITERATOR(&V_ripcbinfo,
 	    INPLOOKUP_RLOCKPCB, rip6_inp_match, &ctx);
 	int delivered = 0;
 
 	NET_EPOCH_ASSERT();
 
 	RIP6STAT_INC(rip6s_ipackets);
 
 	init_sin6(&fromsa, m, 0); /* general init */
 
 	ifp = m->m_pkthdr.rcvif;
 
 	while ((inp = inp_next(&inpi)) != NULL) {
 		INP_RLOCK_ASSERT(inp);
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 		/*
 		 * Check AH/ESP integrity.
 		 */
 		if (IPSEC_ENABLED(ipv6) &&
 		    IPSEC_CHECK_POLICY(ipv6, m, inp) != 0) {
 			/* Do not inject data into pcb. */
 			continue;
 		}
 #endif /* IPSEC */
 		if (jailed_without_vnet(inp->inp_cred) &&
 		    !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
 		    prison_check_ip6(inp->inp_cred, &ip6->ip6_dst) != 0)
 			/*
 			 * Allow raw socket in jail to receive multicast;
 			 * assume process had PRIV_NETINET_RAW at attach,
 			 * and fall through into normal filter path if so.
 			 */
 			continue;
 		if (inp->in6p_cksum != -1) {
 			RIP6STAT_INC(rip6s_isum);
 			if (m->m_pkthdr.len - (*offp + inp->in6p_cksum) < 2 ||
 			    in6_cksum(m, proto, *offp,
 			    m->m_pkthdr.len - *offp)) {
 				RIP6STAT_INC(rip6s_badsum);
 				/*
 				 * Drop the received message, don't send an
 				 * ICMP6 message. Set proto to IPPROTO_NONE
 				 * to achieve that.
 				 */
 				INP_RUNLOCK(inp);
 				proto = IPPROTO_NONE;
 				break;
 			}
 		}
 		/*
 		 * If this raw socket has multicast state, and we
 		 * have received a multicast, check if this socket
 		 * should receive it, as multicast filtering is now
 		 * the responsibility of the transport layer.
 		 */
 		if (inp->in6p_moptions &&
 		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 			/*
 			 * If the incoming datagram is for MLD, allow it
 			 * through unconditionally to the raw socket.
 			 *
 			 * Use the M_RTALERT_MLD flag to check for MLD
 			 * traffic without having to inspect the mbuf chain
 			 * more deeply, as all MLDv1/v2 host messages MUST
 			 * contain the Router Alert option.
 			 *
 			 * In the case of MLDv1, we may not have explicitly
 			 * joined the group, and may have set IFF_ALLMULTI
 			 * on the interface. im6o_mc_filter() may discard
 			 * control traffic we actually need to see.
 			 *
 			 * Userland multicast routing daemons should continue
 			 * filter the control traffic appropriately.
 			 */
 			int blocked;
 
 			blocked = MCAST_PASS;
 			if ((m->m_flags & M_RTALERT_MLD) == 0) {
 				struct sockaddr_in6 mcaddr;
 
 				bzero(&mcaddr, sizeof(struct sockaddr_in6));
 				mcaddr.sin6_len = sizeof(struct sockaddr_in6);
 				mcaddr.sin6_family = AF_INET6;
 				mcaddr.sin6_addr = ip6->ip6_dst;
 
 				blocked = im6o_mc_filter(inp->in6p_moptions,
 				    ifp,
 				    (struct sockaddr *)&mcaddr,
 				    (struct sockaddr *)&fromsa);
 			}
 			if (blocked != MCAST_PASS) {
 				IP6STAT_INC(ip6s_notmember);
 				continue;
 			}
 		}
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL)
 			continue;
 		if (inp->inp_flags & INP_CONTROLOPTS ||
 		    inp->inp_socket->so_options & SO_TIMESTAMP)
 			ip6_savecontrol(inp, n, &opts);
 		/* strip intermediate headers */
 		m_adj(n, *offp);
 		if (sbappendaddr(&inp->inp_socket->so_rcv,
 		    (struct sockaddr *)&fromsa, n, opts) == 0) {
 			soroverflow(inp->inp_socket);
 			m_freem(n);
 			if (opts)
 				m_freem(opts);
 			RIP6STAT_INC(rip6s_fullsock);
 		} else {
 			sorwakeup(inp->inp_socket);
 			delivered++;
 		}
 		opts = NULL;
 	}
 	if (delivered == 0) {
 		RIP6STAT_INC(rip6s_nosock);
 		if (m->m_flags & M_MCAST)
 			RIP6STAT_INC(rip6s_nosockmcast);
 		if (proto == IPPROTO_NONE)
 			m_freem(m);
 		else
 			icmp6_error(m, ICMP6_PARAM_PROB,
 			    ICMP6_PARAMPROB_NEXTHEADER,
 			    ip6_get_prevhdr(m, *offp));
 		IP6STAT_DEC(ip6s_delivered);
 	} else
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 void
 rip6_ctlinput(struct ip6ctlparam *ip6cp)
 {
 	int errno;
 
 	if ((errno = icmp6_errmap(ip6cp->ip6c_icmp6)) != 0)
 		in6_pcbnotify(&V_ripcbinfo, ip6cp->ip6c_finaldst, 0,
 		    ip6cp->ip6c_src, 0, errno, ip6cp->ip6c_cmdarg,
 		    in6_rtchange);
 }
 
 /*
  * Generate IPv6 header and pass packet to ip6_output.  Tack on options user
  * may have setup with control call.
  */
 static int
 rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	struct sockaddr_in6 tmp, *dstsock;
 	struct m_tag *mtag;
 	struct ip6_hdr *ip6;
 	u_int	plen = m->m_pkthdr.len;
 	struct ip6_pktopts opt, *optp;
 	struct ifnet *oifp = NULL;
 	int error;
 	int type = 0, code = 0;		/* for ICMPv6 output statistics only */
 	int scope_ambiguous = 0;
 	int use_defzone = 0;
 	int hlim = 0;
 	struct in6_addr in6a;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_send: inp == NULL"));
 
 	/* Always copy sockaddr to avoid overwrites. */
 	/* Unlocked read. */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
 			error = EISCONN;
 			goto release;
 		}
 		tmp = (struct sockaddr_in6 ){
 			.sin6_family = AF_INET6,
 			.sin6_len = sizeof(struct sockaddr_in6),
 		};
 		INP_RLOCK(inp);
 		bcopy(&inp->in6p_faddr, &tmp.sin6_addr,
 		    sizeof(struct in6_addr));
 		INP_RUNLOCK(inp);
 		dstsock = &tmp;
 	} else {
 		if (nam == NULL)
 			error = ENOTCONN;
 		else if (nam->sa_family != AF_INET6)
 			error = EAFNOSUPPORT;
 		else if (nam->sa_len != sizeof(struct sockaddr_in6))
 			error = EINVAL;
 		else
 			error = 0;
 		if (error != 0)
 			goto release;
 		dstsock = (struct sockaddr_in6 *)nam;
 		if (dstsock->sin6_family != AF_INET6) {
 			error = EAFNOSUPPORT;
 			goto release;
 		}
 	}
 
 	INP_WLOCK(inp);
 
 	if (control != NULL) {
 		NET_EPOCH_ENTER(et);
 		error = ip6_setpktopts(control, &opt, inp->in6p_outputopts,
 		    so->so_cred, inp->inp_ip_p);
 		NET_EPOCH_EXIT(et);
 
 		if (error != 0) {
 			goto bad;
 		}
 		optp = &opt;
 	} else
 		optp = inp->in6p_outputopts;
 
 	/*
 	 * Check and convert scope zone ID into internal form.
 	 *
 	 * XXX: we may still need to determine the zone later.
 	 */
 	if (!(so->so_state & SS_ISCONNECTED)) {
 		if (!optp || !optp->ip6po_pktinfo ||
 		    !optp->ip6po_pktinfo->ipi6_ifindex)
 			use_defzone = V_ip6_use_defzone;
 		if (dstsock->sin6_scope_id == 0 && !use_defzone)
 			scope_ambiguous = 1;
 		if ((error = sa6_embedscope(dstsock, use_defzone)) != 0)
 			goto bad;
 	}
 
 	/*
 	 * For an ICMPv6 packet, we should know its type and code to update
 	 * statistics.
 	 */
 	if (inp->inp_ip_p == IPPROTO_ICMPV6) {
 		struct icmp6_hdr *icmp6;
 		if (m->m_len < sizeof(struct icmp6_hdr) &&
 		    (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) {
 			error = ENOBUFS;
 			goto bad;
 		}
 		icmp6 = mtod(m, struct icmp6_hdr *);
 		type = icmp6->icmp6_type;
 		code = icmp6->icmp6_code;
 	}
 
 	M_PREPEND(m, sizeof(*ip6), M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto bad;
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 
 #ifdef ROUTE_MPATH
 	if (CALC_FLOWID_OUTBOUND) {
 		uint32_t hash_type, hash_val;
 
 		hash_val = fib6_calc_software_hash(&inp->in6p_laddr,
 		    &dstsock->sin6_addr, 0, 0, inp->inp_ip_p, &hash_type);
 		inp->inp_flowid = hash_val;
 		inp->inp_flowtype = hash_type;
 	}
 #endif
 	/*
 	 * Source address selection.
 	 */
 	NET_EPOCH_ENTER(et);
 	error = in6_selectsrc_socket(dstsock, optp, inp, so->so_cred,
 	    scope_ambiguous, &in6a, &hlim);
 	NET_EPOCH_EXIT(et);
 
 	if (error)
 		goto bad;
 	error = prison_check_ip6(inp->inp_cred, &in6a);
 	if (error != 0)
 		goto bad;
 	ip6->ip6_src = in6a;
 
 	ip6->ip6_dst = dstsock->sin6_addr;
 
 	/*
 	 * Fill in the rest of the IPv6 header fields.
 	 */
 	ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
 	    (inp->inp_flow & IPV6_FLOWINFO_MASK);
 	ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
 	    (IPV6_VERSION & IPV6_VERSION_MASK);
 
 	/*
 	 * ip6_plen will be filled in ip6_output, so not fill it here.
 	 */
 	ip6->ip6_nxt = inp->inp_ip_p;
 	ip6->ip6_hlim = hlim;
 
 	if (inp->inp_ip_p == IPPROTO_ICMPV6 || inp->in6p_cksum != -1) {
 		struct mbuf *n;
 		int off;
 		u_int16_t *p;
 
 		/* Compute checksum. */
 		if (inp->inp_ip_p == IPPROTO_ICMPV6)
 			off = offsetof(struct icmp6_hdr, icmp6_cksum);
 		else
 			off = inp->in6p_cksum;
 		if (plen < off + 2) {
 			error = EINVAL;
 			goto bad;
 		}
 		off += sizeof(struct ip6_hdr);
 
 		n = m;
 		while (n && n->m_len <= off) {
 			off -= n->m_len;
 			n = n->m_next;
 		}
 		if (!n)
 			goto bad;
 		p = (u_int16_t *)(mtod(n, caddr_t) + off);
 		*p = 0;
 		*p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen);
 	}
 
 	/*
 	 * Send RA/RS messages to user land for protection, before sending
 	 * them to rtadvd/rtsol.
 	 */
 	if ((send_sendso_input_hook != NULL) &&
 	    inp->inp_ip_p == IPPROTO_ICMPV6) {
 		switch (type) {
 		case ND_ROUTER_ADVERT:
 		case ND_ROUTER_SOLICIT:
 			mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
 				sizeof(unsigned short), M_NOWAIT);
 			if (mtag == NULL)
 				goto bad;
 			m_tag_prepend(m, mtag);
 		}
 	}
 
 	NET_EPOCH_ENTER(et);
 	error = ip6_output(m, optp, NULL, 0, inp->in6p_moptions, &oifp, inp);
 	NET_EPOCH_EXIT(et);
 	if (inp->inp_ip_p == IPPROTO_ICMPV6) {
 		if (oifp)
 			icmp6_ifoutstat_inc(oifp, type, code);
 		ICMP6STAT_INC(icp6s_outhist[type]);
 	} else
 		RIP6STAT_INC(rip6s_opackets);
 
 	goto freectl;
 
  bad:
 	if (m)
 		m_freem(m);
 
  freectl:
 	if (control != NULL) {
 		ip6_clearpktopts(&opt, -1);
 		m_freem(control);
 	}
 	INP_WUNLOCK(inp);
 	return (error);
 
 release:
 	if (control != NULL)
 		m_freem(control);
 	m_freem(m);
 	return (error);
 }
 
 /*
  * Raw IPv6 socket option processing.
  */
 int
 rip6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	int error;
 
 	if (sopt->sopt_level == IPPROTO_ICMPV6)
 		/*
 		 * XXX: is it better to call icmp6_ctloutput() directly
 		 * from protosw?
 		 */
 		return (icmp6_ctloutput(so, sopt));
 	else if (sopt->sopt_level != IPPROTO_IPV6) {
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_name == SO_SETFIB) {
 			INP_WLOCK(inp);
 			inp->inp_inc.inc_fibnum = so->so_fibnum;
 			INP_WUNLOCK(inp);
 			return (0);
 		}
 		return (EINVAL);
 	}
 
 	error = 0;
 
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case MRT6_INIT:
 		case MRT6_DONE:
 		case MRT6_ADD_MIF:
 		case MRT6_DEL_MIF:
 		case MRT6_ADD_MFC:
 		case MRT6_DEL_MFC:
 		case MRT6_PIM:
 			if (inp->inp_ip_p != IPPROTO_ICMPV6)
 				return (EOPNOTSUPP);
 			error = ip6_mrouter_get ?  ip6_mrouter_get(so, sopt) :
 			    EOPNOTSUPP;
 			break;
 		case IPV6_CHECKSUM:
 			error = ip6_raw_ctloutput(so, sopt);
 			break;
 		default:
 			error = ip6_ctloutput(so, sopt);
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case MRT6_INIT:
 		case MRT6_DONE:
 		case MRT6_ADD_MIF:
 		case MRT6_DEL_MIF:
 		case MRT6_ADD_MFC:
 		case MRT6_DEL_MFC:
 		case MRT6_PIM:
 			if (inp->inp_ip_p != IPPROTO_ICMPV6)
 				return (EOPNOTSUPP);
 			error = ip6_mrouter_set ?  ip6_mrouter_set(so, sopt) :
 			    EOPNOTSUPP;
 			break;
 		case IPV6_CHECKSUM:
 			error = ip6_raw_ctloutput(so, sopt);
 			break;
 		default:
 			error = ip6_ctloutput(so, sopt);
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 static int
 rip6_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	struct icmp6_filter *filter;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("rip6_attach: inp != NULL"));
 
 	error = priv_check(td, PRIV_NETINET_RAW);
 	if (error)
 		return (error);
 	if (proto >= IPPROTO_MAX || proto < 0)
 		return (EPROTONOSUPPORT);
 	error = soreserve(so, rip_sendspace, rip_recvspace);
 	if (error)
 		return (error);
 	filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT);
 	if (filter == NULL)
 		return (ENOMEM);
 	error = in_pcballoc(so, &V_ripcbinfo);
 	if (error) {
 		free(filter, M_PCB);
 		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	inp->inp_ip_p = proto;
 	inp->in6p_cksum = -1;
 	inp->in6p_icmp6filt = filter;
 	ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static void
 rip6_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_detach: inp == NULL"));
 
 	if (so == V_ip6_mrouter && ip6_mrouter_done)
 		ip6_mrouter_done();
 	/* xxx: RSVP */
 	INP_WLOCK(inp);
 	free(inp->in6p_icmp6filt, M_PCB);
-	in_pcbdetach(inp);
 	in_pcbfree(inp);
 }
 
 /* XXXRW: This can't ever be called. */
 static void
 rip6_abort(struct socket *so)
 {
 	struct inpcb *inp __diagused;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_abort: inp == NULL"));
 
 	soisdisconnected(so);
 }
 
 static void
 rip6_close(struct socket *so)
 {
 	struct inpcb *inp __diagused;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_close: inp == NULL"));
 
 	soisdisconnected(so);
 }
 
 static int
 rip6_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_disconnect: inp == NULL"));
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 	inp->in6p_faddr = in6addr_any;
 	rip6_abort(so);
 	return (0);
 }
 
 static int
 rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
 	struct ifaddr *ifa = NULL;
 	int error = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_bind: inp == NULL"));
 
 	if (nam->sa_family != AF_INET6)
 		return (EAFNOSUPPORT);
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 	if ((error = prison_check_ip6(td->td_ucred, &addr->sin6_addr)) != 0)
 		return (error);
 	if (CK_STAILQ_EMPTY(&V_ifnet) || addr->sin6_family != AF_INET6)
 		return (EADDRNOTAVAIL);
 	if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0)
 		return (error);
 
 	NET_EPOCH_ENTER(et);
 	if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) &&
 	    (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == NULL) {
 		NET_EPOCH_EXIT(et);
 		return (EADDRNOTAVAIL);
 	}
 	if (ifa != NULL &&
 	    ((struct in6_ifaddr *)ifa)->ia6_flags &
 	    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
 	     IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
 		NET_EPOCH_EXIT(et);
 		return (EADDRNOTAVAIL);
 	}
 	NET_EPOCH_EXIT(et);
 	INP_WLOCK(inp);
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	inp->in6p_laddr = addr->sin6_addr;
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
 	struct in6_addr in6a;
 	struct epoch_tracker et;
 	int error = 0, scope_ambiguous = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_connect: inp == NULL"));
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 	if (CK_STAILQ_EMPTY(&V_ifnet))
 		return (EADDRNOTAVAIL);
 	if (addr->sin6_family != AF_INET6)
 		return (EAFNOSUPPORT);
 
 	/*
 	 * Application should provide a proper zone ID or the use of default
 	 * zone IDs should be enabled.  Unfortunately, some applications do
 	 * not behave as it should, so we need a workaround.  Even if an
 	 * appropriate ID is not determined, we'll see if we can determine
 	 * the outgoing interface.  If we can, determine the zone ID based on
 	 * the interface below.
 	 */
 	if (addr->sin6_scope_id == 0 && !V_ip6_use_defzone)
 		scope_ambiguous = 1;
 	if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0)
 		return (error);
 
 	INP_WLOCK(inp);
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	/* Source address selection. XXX: need pcblookup? */
 	NET_EPOCH_ENTER(et);
 	error = in6_selectsrc_socket(addr, inp->in6p_outputopts,
 	    inp, so->so_cred, scope_ambiguous, &in6a, NULL);
 	NET_EPOCH_EXIT(et);
 	if (error) {
 		INP_INFO_WUNLOCK(&V_ripcbinfo);
 		INP_WUNLOCK(inp);
 		return (error);
 	}
 
 	inp->in6p_faddr = addr->sin6_addr;
 	inp->in6p_laddr = in6a;
 	soisconnected(so);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip6_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL"));
 
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 struct protosw rip6_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_abort =		rip6_abort,
 	.pr_attach =		rip6_attach,
 	.pr_bind =		rip6_bind,
 	.pr_connect =		rip6_connect,
 	.pr_control =		in6_control,
 	.pr_detach =		rip6_detach,
 	.pr_disconnect =	rip6_disconnect,
 	.pr_peeraddr =		in6_getpeeraddr,
 	.pr_send =		rip6_send,
 	.pr_shutdown =		rip6_shutdown,
 	.pr_sockaddr =		in6_getsockaddr,
 	.pr_close =		rip6_close
 };
diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c
index 4e69608b71de..35d68e164145 100644
--- a/sys/netinet6/udp6_usrreq.c
+++ b/sys/netinet6/udp6_usrreq.c
@@ -1,1281 +1,1280 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * Copyright (c) 2014 Kevin Lo
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $
  *	$KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/rss_config.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/udplite.h>
 
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/in6_rss.h>
 #include <netinet6/udp6_var.h>
 #include <netinet6/scope6_var.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, zero_checksum_port) = 0;
 #define	V_zero_checksum_port	VNET(zero_checksum_port)
 SYSCTL_INT(_net_inet6_udp6, OID_AUTO, rfc6935_port, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(zero_checksum_port), 0,
     "Zero UDP checksum allowed for traffic to/from this port.");
 
 /*
  * UDP protocol implementation.
  * Per RFC 768, August, 1980.
  */
 
 static void		udp6_detach(struct socket *so);
 
 static int
 udp6_append(struct inpcb *inp, struct mbuf *n, int off,
     struct sockaddr_in6 *fromsa)
 {
 	struct socket *so;
 	struct mbuf *opts = NULL, *tmp_opts;
 	struct udpcb *up;
 	bool filtered;
 
 	INP_LOCK_ASSERT(inp);
 
 	/*
 	 * Engage the tunneling protocol.
 	 */
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
 		in_pcbref(inp);
 		INP_RUNLOCK(inp);
 		filtered = (*up->u_tun_func)(n, off, inp,
 		    (struct sockaddr *)&fromsa[0], up->u_tun_ctx);
 		INP_RLOCK(inp);
 		if (filtered)
 			return (in_pcbrele_rlocked(inp));
 	}
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/* Check AH/ESP integrity. */
 	if (IPSEC_ENABLED(ipv6)) {
 		if (IPSEC_CHECK_POLICY(ipv6, n, inp) != 0) {
 			m_freem(n);
 			return (0);
 		}
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, n) != 0) {
 		m_freem(n);
 		return (0);
 	}
 #endif
 	opts = NULL;
 	if (inp->inp_flags & INP_CONTROLOPTS ||
 	    inp->inp_socket->so_options & SO_TIMESTAMP)
 		ip6_savecontrol(inp, n, &opts);
 	if ((inp->inp_vflag & INP_IPV6) && (inp->inp_flags2 & INP_ORIGDSTADDR)) {
 		tmp_opts = sbcreatecontrol(&fromsa[1],
 		    sizeof(struct sockaddr_in6), IPV6_ORIGDSTADDR,
 		    IPPROTO_IPV6, M_NOWAIT);
                 if (tmp_opts) {
                         if (opts) {
                                 tmp_opts->m_next = opts;
                                 opts = tmp_opts;
                         } else
                                 opts = tmp_opts;
                 }
 	}
 	m_adj(n, off + sizeof(struct udphdr));
 
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)&fromsa[0], n,
 	    opts) == 0) {
 		soroverflow_locked(so);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
 		UDPSTAT_INC(udps_fullsock);
 	} else
 		sorwakeup_locked(so);
 	return (0);
 }
 
 struct udp6_multi_match_ctx {
 	struct ip6_hdr *ip6;
 	struct udphdr *uh;
 };
 
 static bool
 udp6_multi_match(const struct inpcb *inp, void *v)
 {
 	struct udp6_multi_match_ctx *ctx = v;
 
 	if ((inp->inp_vflag & INP_IPV6) == 0)
 		return(false);
 	if (inp->inp_lport != ctx->uh->uh_dport)
 		return(false);
 	if (inp->inp_fport != 0 && inp->inp_fport != ctx->uh->uh_sport)
 		return(false);
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
 	    !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ctx->ip6->ip6_dst))
 		return (false);
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
 	    (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ctx->ip6->ip6_src) ||
 	    inp->inp_fport != ctx->uh->uh_sport))
 		return (false);
 
 	return (true);
 }
 
 static int
 udp6_multi_input(struct mbuf *m, int off, int proto,
     struct sockaddr_in6 *fromsa)
 {
 	struct udp6_multi_match_ctx ctx;
 	struct inpcb_iterator inpi = INP_ITERATOR(udp_get_inpcbinfo(proto),
 	    INPLOOKUP_RLOCKPCB, udp6_multi_match, &ctx);
 	struct inpcb *inp;
 	struct ip6_moptions *imo;
 	struct mbuf *n;
 	int appends = 0;
 
 	/*
 	 * In the event that laddr should be set to the link-local
 	 * address (this happens in RIPng), the multicast address
 	 * specified in the received packet will not match laddr.  To
 	 * handle this situation, matching is relaxed if the
 	 * receiving interface is the same as one specified in the
 	 * socket and if the destination multicast address matches
 	 * one of the multicast groups specified in the socket.
 	 */
 
 	/*
 	 * KAME note: traditionally we dropped udpiphdr from mbuf
 	 * here.  We need udphdr for IPsec processing so we do that
 	 * later.
 	 */
 	ctx.ip6 = mtod(m, struct ip6_hdr *);
 	ctx.uh = (struct udphdr *)((char *)ctx.ip6 + off);
 	while ((inp = inp_next(&inpi)) != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		/*
 		 * XXXRW: Because we weren't holding either the inpcb
 		 * or the hash lock when we checked for a match
 		 * before, we should probably recheck now that the
 		 * inpcb lock is (supposed to be) held.
 		 */
 		/*
 		 * Handle socket delivery policy for any-source
 		 * and source-specific multicast. [RFC3678]
 		 */
 		if ((imo = inp->in6p_moptions) != NULL) {
 			struct sockaddr_in6	 mcaddr;
 			int			 blocked;
 
 			bzero(&mcaddr, sizeof(struct sockaddr_in6));
 			mcaddr.sin6_len = sizeof(struct sockaddr_in6);
 			mcaddr.sin6_family = AF_INET6;
 			mcaddr.sin6_addr = ctx.ip6->ip6_dst;
 
 			blocked = im6o_mc_filter(imo, m->m_pkthdr.rcvif,
 				(struct sockaddr *)&mcaddr,
 				(struct sockaddr *)&fromsa[0]);
 			if (blocked != MCAST_PASS) {
 				if (blocked == MCAST_NOTGMEMBER)
 					IP6STAT_INC(ip6s_notmember);
 				if (blocked == MCAST_NOTSMEMBER ||
 				    blocked == MCAST_MUTED)
 					UDPSTAT_INC(udps_filtermcast);
 				continue;
 			}
 		}
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
 			if (proto == IPPROTO_UDPLITE)
 				UDPLITE_PROBE(receive, NULL, inp, ctx.ip6,
 				    inp, ctx.uh);
 			else
 				UDP_PROBE(receive, NULL, inp, ctx.ip6, inp,
 				    ctx.uh);
 			if (udp6_append(inp, n, off, fromsa)) {
 				break;
 			} else
 				appends++;
 		}
 		/*
 		 * Don't look for additional matches if this one does
 		 * not have either the SO_REUSEPORT or SO_REUSEADDR
 		 * socket options set.  This heuristic avoids
 		 * searching through all pcbs in the common case of a
 		 * non-shared port.  It assumes that an application
 		 * will never clear these options after setting them.
 		 */
 		if ((inp->inp_socket->so_options &
 		     (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0) {
 			INP_RUNLOCK(inp);
 			break;
 		}
 	}
 	m_freem(m);
 
 	if (appends == 0) {
 		/*
 		 * No matching pcb found; discard datagram.  (No need
 		 * to send an ICMP Port Unreachable for a broadcast
 		 * or multicast datgram.)
 		 */
 		UDPSTAT_INC(udps_noport);
 		UDPSTAT_INC(udps_noportmcast);
 	}
 
 	return (IPPROTO_DONE);
 }
 
 int
 udp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6;
 	struct udphdr *uh;
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 	int off = *offp;
 	int cscov_partial;
 	int plen, ulen;
 	struct sockaddr_in6 fromsa[2];
 	struct m_tag *fwd_tag;
 	uint16_t uh_sum;
 	uint8_t nxt;
 
 	NET_EPOCH_ASSERT();
 
 	if (m->m_len < off + sizeof(struct udphdr)) {
 		m = m_pullup(m, off + sizeof(struct udphdr));
 		if (m == NULL) {
 			IP6STAT_INC(ip6s_exthdrtoolong);
 			*mp = NULL;
 			return (IPPROTO_DONE);
 		}
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 	uh = (struct udphdr *)((caddr_t)ip6 + off);
 
 	UDPSTAT_INC(udps_ipackets);
 
 	/*
 	 * Destination port of 0 is illegal, based on RFC768.
 	 */
 	if (uh->uh_dport == 0)
 		goto badunlocked;
 
 	plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
 	ulen = ntohs((u_short)uh->uh_ulen);
 
 	nxt = proto;
 	cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0;
 	if (nxt == IPPROTO_UDPLITE) {
 		/* Zero means checksum over the complete packet. */
 		if (ulen == 0)
 			ulen = plen;
 		if (ulen == plen)
 			cscov_partial = 0;
 		if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) {
 			/* XXX: What is the right UDPLite MIB counter? */
 			goto badunlocked;
 		}
 		if (uh->uh_sum == 0) {
 			/* XXX: What is the right UDPLite MIB counter? */
 			goto badunlocked;
 		}
 	} else {
 		if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) {
 			UDPSTAT_INC(udps_badlen);
 			goto badunlocked;
 		}
 		if (uh->uh_sum == 0) {
 			UDPSTAT_INC(udps_nosum);
 			/*
 			 * dport 0 was rejected earlier so this is OK even if
 			 * zero_checksum_port is 0 (which is its default value).
 			 */
 			if (ntohs(uh->uh_dport) == V_zero_checksum_port)
 				goto skip_checksum;
 			else
 				goto badunlocked;
 		}
 	}
 
 	if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) &&
 	    !cscov_partial) {
 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 			uh_sum = m->m_pkthdr.csum_data;
 		else
 			uh_sum = in6_cksum_pseudo(ip6, ulen, nxt,
 			    m->m_pkthdr.csum_data);
 		uh_sum ^= 0xffff;
 	} else
 		uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen);
 
 	if (uh_sum != 0) {
 		UDPSTAT_INC(udps_badsum);
 		goto badunlocked;
 	}
 
 skip_checksum:
 	/*
 	 * Construct sockaddr format source address.
 	 */
 	init_sin6(&fromsa[0], m, 0);
 	fromsa[0].sin6_port = uh->uh_sport;
 	init_sin6(&fromsa[1], m, 1);
 	fromsa[1].sin6_port = uh->uh_dport;
 
 	pcbinfo = udp_get_inpcbinfo(nxt);
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))  {
 		*mp = NULL;
 		return (udp6_multi_input(m, off, proto, fromsa));
 	}
 
 	/*
 	 * Locate pcb for datagram.
 	 */
 
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		struct sockaddr_in6 *next_hop6;
 
 		next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1);
 
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
 		inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
 		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 		    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in6_pcblookup(pcbinfo, &ip6->ip6_src,
 			    uh->uh_sport, &next_hop6->sin6_addr,
 			    next_hop6->sin6_port ? htons(next_hop6->sin6_port) :
 			    uh->uh_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif);
 		}
 		/* Remove the tag from the packet. We don't need it anymore. */
 		m_tag_delete(m, fwd_tag);
 		m->m_flags &= ~M_IP6_NEXTHOP;
 	} else
 		inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
 		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 		    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
 		    m->m_pkthdr.rcvif, m);
 	if (inp == NULL) {
 		if (V_udp_log_in_vain) {
 			char ip6bufs[INET6_ADDRSTRLEN];
 			char ip6bufd[INET6_ADDRSTRLEN];
 
 			log(LOG_INFO,
 			    "Connection attempt to UDP [%s]:%d from [%s]:%d\n",
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ntohs(uh->uh_dport),
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ntohs(uh->uh_sport));
 		}
 		if (nxt == IPPROTO_UDPLITE)
 			UDPLITE_PROBE(receive, NULL, NULL, ip6, NULL, uh);
 		else
 			UDP_PROBE(receive, NULL, NULL, ip6, NULL, uh);
 		UDPSTAT_INC(udps_noport);
 		if (m->m_flags & M_MCAST) {
 			printf("UDP6: M_MCAST is set in a unicast packet.\n");
 			UDPSTAT_INC(udps_noportmcast);
 			goto badunlocked;
 		}
 		if (V_udp_blackhole && (V_udp_blackhole_local ||
 		    !in6_localaddr(&ip6->ip6_src)))
 			goto badunlocked;
 		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
 		*mp = NULL;
 		return (IPPROTO_DONE);
 	}
 	INP_RLOCK_ASSERT(inp);
 	up = intoudpcb(inp);
 	if (cscov_partial) {
 		if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			*mp = NULL;
 			return (IPPROTO_DONE);
 		}
 	}
 	if (nxt == IPPROTO_UDPLITE)
 		UDPLITE_PROBE(receive, NULL, inp, ip6, inp, uh);
 	else
 		UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
 	if (udp6_append(inp, m, off, fromsa) == 0)
 		INP_RUNLOCK(inp);
 	*mp = NULL;
 	return (IPPROTO_DONE);
 
 badunlocked:
 	m_freem(m);
 	*mp = NULL;
 	return (IPPROTO_DONE);
 }
 
 static void
 udp6_common_ctlinput(struct ip6ctlparam *ip6cp, struct inpcbinfo *pcbinfo)
 {
 	struct udphdr uh;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	struct inpcb *inp;
 	int errno, off = 0;
 	struct udp_portonly {
 		u_int16_t uh_sport;
 		u_int16_t uh_dport;
 	} *uhp;
 
 	if ((errno = icmp6_errmap(ip6cp->ip6c_icmp6)) == 0)
 		return;
 
 	m = ip6cp->ip6c_m;
 	ip6 = ip6cp->ip6c_ip6;
 	off = ip6cp->ip6c_off;
 
 	/* Check if we can safely examine src and dst ports. */
 	if (m->m_pkthdr.len < off + sizeof(*uhp))
 		return;
 
 	bzero(&uh, sizeof(uh));
 	m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
 
 	/* Check to see if its tunneled */
 	inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_dst, uh.uh_dport,
 	    &ip6->ip6_src, uh.uh_sport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
 	    m->m_pkthdr.rcvif, m);
 	if (inp != NULL) {
 		struct udpcb *up;
 		udp_tun_icmp_t *func;
 
 		up = intoudpcb(inp);
 		func = up->u_icmp_func;
 		INP_RUNLOCK(inp);
 		if (func != NULL)
 			func(ip6cp);
 	}
 	in6_pcbnotify(pcbinfo, ip6cp->ip6c_finaldst, uh.uh_dport,
 	    ip6cp->ip6c_src, uh.uh_sport, errno, ip6cp->ip6c_cmdarg,
 	    udp_notify);
 }
 
 static void
 udp6_ctlinput(struct ip6ctlparam *ctl)
 {
 
 	return (udp6_common_ctlinput(ctl, &V_udbinfo));
 }
 
 static void
 udplite6_ctlinput(struct ip6ctlparam *ctl)
 {
 
 	return (udp6_common_ctlinput(ctl, &V_ulitecbinfo));
 }
 
 static int
 udp6_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in6 addrs[2];
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 
 	if (req->newlen != sizeof(addrs))
 		return (EINVAL);
 	if (req->oldlen != sizeof(struct xucred))
 		return (EINVAL);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
 	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
 		return (error);
 	}
 	NET_EPOCH_ENTER(et);
 	inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr,
 	    addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port,
 	    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
 	NET_EPOCH_EXIT(et);
 	if (inp != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseesocket(req->td->td_ucred,
 			    inp->inp_socket);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred,
     CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, udp6_getcred, "S,xucred",
     "Get the xucred of a UDP6 connection");
 
 static int
 udp6_send(struct socket *so, int flags_arg, struct mbuf *m,
     struct sockaddr *addr6, struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	struct ip6_hdr *ip6;
 	struct udphdr *udp6;
 	struct in6_addr *laddr, *faddr, in6a;
 	struct ip6_pktopts *optp, opt;
 	struct sockaddr_in6 *sin6, tmp;
 	struct epoch_tracker et;
 	int cscov_partial, error, flags, hlen, scope_ambiguous;
 	u_int32_t ulen, plen;
 	uint16_t cscov;
 	u_short fport;
 	uint8_t nxt;
 
 	if (addr6) {
 		error = 0;
 		if (addr6->sa_family != AF_INET6)
 			error = EAFNOSUPPORT;
 		else if (addr6->sa_len != sizeof(struct sockaddr_in6))
 			error = EINVAL;
 		if (__predict_false(error != 0)) {
 			m_freem(control);
 			m_freem(m);
 			return (error);
 		}
 	}
 
 	sin6 = (struct sockaddr_in6 *)addr6;
 
 	/*
 	 * In contrast to IPv4 we do not validate the max. packet length
 	 * here due to IPv6 Jumbograms (RFC2675).
 	 */
 
 	scope_ambiguous = 0;
 	if (sin6) {
 		/* Protect *addr6 from overwrites. */
 		tmp = *sin6;
 		sin6 = &tmp;
 
 		/*
 		 * Application should provide a proper zone ID or the use of
 		 * default zone IDs should be enabled.  Unfortunately, some
 		 * applications do not behave as it should, so we need a
 		 * workaround.  Even if an appropriate ID is not determined,
 		 * we'll see if we can determine the outgoing interface.  If we
 		 * can, determine the zone ID based on the interface below.
 		 */
 		if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
 			scope_ambiguous = 1;
 		if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) {
 			if (control)
 				m_freem(control);
 			m_freem(m);
 			return (error);
 		}
 	}
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 	/*
 	 * In the following cases we want a write lock on the inp for either
 	 * local operations or for possible route cache updates in the IPv6
 	 * output path:
 	 * - on connected sockets (sin6 is NULL) for route cache updates,
 	 * - when we are not bound to an address and source port (it is
 	 *   in6_pcbsetport() which will require the write lock).
 	 *
 	 * We check the inp fields before actually locking the inp, so
 	 * here exists a race, and we may WLOCK the inp and end with already
 	 * bound one by other thread. This is fine.
 	 */
 	if (sin6 == NULL || (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
 	    inp->inp_lport == 0))
 		INP_WLOCK(inp);
 	else
 		INP_RLOCK(inp);
 
 	nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 	    IPPROTO_UDP : IPPROTO_UDPLITE;
 
 #ifdef INET
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		int hasv4addr;
 
 		if (sin6 == NULL)
 			hasv4addr = (inp->inp_vflag & INP_IPV4);
 		else
 			hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
 			    ? 1 : 0;
 		if (hasv4addr) {
 			/*
 			 * XXXRW: We release UDP-layer locks before calling
 			 * udp_send() in order to avoid recursion.  However,
 			 * this does mean there is a short window where inp's
 			 * fields are unstable.  Could this lead to a
 			 * potential race in which the factors causing us to
 			 * select the UDPv4 output routine are invalidated?
 			 */
 			INP_UNLOCK(inp);
 			if (sin6)
 				in6_sin6_2_sin_in_sock((struct sockaddr *)sin6);
 			/* addr will just be freed in sendit(). */
 			return (udp_send(so, flags_arg | PRUS_IPV6, m,
 			    (struct sockaddr *)sin6, control, td));
 		}
 	} else
 #endif
 	if (sin6 && IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 		/*
 		 * Given this is either an IPv6-only socket or no INET is
 		 * supported we will fail the send if the given destination
 		 * address is a v4mapped address.
 		 */
 		INP_UNLOCK(inp);
 		m_freem(m);
 		m_freem(control);
 		return (EINVAL);
 	}
 
 	NET_EPOCH_ENTER(et);
 	if (control) {
 		if ((error = ip6_setpktopts(control, &opt,
 		    inp->in6p_outputopts, td->td_ucred, nxt)) != 0) {
 			goto release;
 		}
 		optp = &opt;
 	} else
 		optp = inp->in6p_outputopts;
 
 	if (sin6) {
 		/*
 		 * Since we saw no essential reason for calling in_pcbconnect,
 		 * we get rid of such kind of logic, and call in6_selectsrc
 		 * and in6_pcbsetport in order to fill in the local address
 		 * and the local port.
 		 */
 		if (sin6->sin6_port == 0) {
 			error = EADDRNOTAVAIL;
 			goto release;
 		}
 
 		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 			/* how about ::ffff:0.0.0.0 case? */
 			error = EISCONN;
 			goto release;
 		}
 
 		/*
 		 * Given we handle the v4mapped case in the INET block above
 		 * assert here that it must not happen anymore.
 		 */
 		KASSERT(!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr),
 		    ("%s: sin6(%p)->sin6_addr is v4mapped which we "
 		    "should have handled.", __func__, sin6));
 
 		/* This only requires read-locking. */
 		error = in6_selectsrc_socket(sin6, optp, inp,
 		    td->td_ucred, scope_ambiguous, &in6a, NULL);
 		if (error)
 			goto release;
 		laddr = &in6a;
 
 		if (inp->inp_lport == 0) {
 			struct inpcbinfo *pcbinfo;
 
 			INP_WLOCK_ASSERT(inp);
 
 			pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 			INP_HASH_WLOCK(pcbinfo);
 			error = in6_pcbsetport(laddr, inp, td->td_ucred);
 			INP_HASH_WUNLOCK(pcbinfo);
 			if (error != 0) {
 				/* Undo an address bind that may have occurred. */
 				inp->in6p_laddr = in6addr_any;
 				goto release;
 			}
 		}
 		faddr = &sin6->sin6_addr;
 		fport = sin6->sin6_port; /* allow 0 port */
 
 	} else {
 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 			error = ENOTCONN;
 			goto release;
 		}
 		laddr = &inp->in6p_laddr;
 		faddr = &inp->in6p_faddr;
 		fport = inp->inp_fport;
 	}
 
 	ulen = m->m_pkthdr.len;
 	plen = sizeof(struct udphdr) + ulen;
 	hlen = sizeof(struct ip6_hdr);
 
 	/*
 	 * Calculate data length and get a mbuf
 	 * for UDP and IP6 headers.
 	 */
 	M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto release;
 	}
 
 	/*
 	 * Stuff checksum and output datagram.
 	 */
 	cscov = cscov_partial = 0;
 	udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
 	udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
 	udp6->uh_dport = fport;
 	if (nxt == IPPROTO_UDPLITE) {
 		struct udpcb *up;
 
 		up = intoudpcb(inp);
 		cscov = up->u_txcslen;
 		if (cscov >= plen)
 			cscov = 0;
 		udp6->uh_ulen = htons(cscov);
 		/*
 		 * For UDP-Lite, checksum coverage length of zero means
 		 * the entire UDPLite packet is covered by the checksum.
 		 */
 		cscov_partial = (cscov == 0) ? 0 : 1;
 	} else if (plen <= 0xffff)
 		udp6->uh_ulen = htons((u_short)plen);
 	else
 		udp6->uh_ulen = 0;
 	udp6->uh_sum = 0;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_flow	= inp->inp_flow & IPV6_FLOWINFO_MASK;
 	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc	|= IPV6_VERSION;
 	ip6->ip6_plen	= htons((u_short)plen);
 	ip6->ip6_nxt	= nxt;
 	ip6->ip6_hlim	= in6_selecthlim(inp, NULL);
 	ip6->ip6_src	= *laddr;
 	ip6->ip6_dst	= *faddr;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	if (cscov_partial) {
 		if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
 		    sizeof(struct ip6_hdr), plen, cscov)) == 0)
 			udp6->uh_sum = 0xffff;
 	} else {
 		udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
 		m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 	}
 
 	flags = 0;
 #if defined(ROUTE_MPATH) || defined(RSS)
 	if (CALC_FLOWID_OUTBOUND_SENDTO) {
 		uint32_t hash_type, hash_val;
 		uint8_t pr;
 
 		pr = inp->inp_socket->so_proto->pr_protocol;
 
 		hash_val = fib6_calc_packet_hash(laddr, faddr,
 		    inp->inp_lport, fport, pr, &hash_type);
 		m->m_pkthdr.flowid = hash_val;
 		M_HASHTYPE_SET(m, hash_type);
 	}
 	/* do not use inp flowid */
 	flags |= IP_NODEFAULTFLOWID;
 #endif
 
 	UDPSTAT_INC(udps_opackets);
 	if (nxt == IPPROTO_UDPLITE)
 		UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6);
 	else
 		UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
 	error = ip6_output(m, optp,
 	    INP_WLOCKED(inp) ? &inp->inp_route6 : NULL, flags,
 	    inp->in6p_moptions, NULL, inp);
 	INP_UNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 
 	if (control) {
 		ip6_clearpktopts(&opt, -1);
 		m_freem(control);
 	}
 	return (error);
 
 release:
 	INP_UNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	if (control) {
 		ip6_clearpktopts(&opt, -1);
 		m_freem(control);
 	}
 	m_freem(m);
 
 	return (error);
 }
 
 static void
 udp6_abort(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
 
 	INP_WLOCK(inp);
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		INP_WUNLOCK(inp);
 		udp_abort(so);
 		return;
 	}
 #endif
 
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		INP_HASH_WLOCK(pcbinfo);
 		in6_pcbdisconnect(inp);
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp6_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcbinfo *pcbinfo;
 	struct inpcb *inp;
 	struct udpcb *up;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
 
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		error = soreserve(so, udp_sendspace, udp_recvspace);
 		if (error)
 			return (error);
 	}
 	error = in_pcballoc(so, pcbinfo);
 	if (error)
 		return (error);
 	inp = (struct inpcb *)so->so_pcb;
 	inp->in6p_cksum = -1;	/* just to be sure */
 	/*
 	 * XXX: ugly!!
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = V_ip_defttl;
 	up = intoudpcb(inp);
 	bzero(&up->u_start_zero, u_zero_size);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in6 *sin6_p;
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 	u_char vflagsav;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
 
 	if (nam->sa_family != AF_INET6)
 		return (EAFNOSUPPORT);
 	if (nam->sa_len != sizeof(struct sockaddr_in6))
 		return (EINVAL);
 
 	sin6_p = (struct sockaddr_in6 *)nam;
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(pcbinfo);
 	vflagsav = inp->inp_vflag;
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr))
 			inp->inp_vflag |= INP_IPV4;
 #ifdef INET
 		else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
 			struct sockaddr_in sin;
 
 			in6_sin6_2_sin(&sin, sin6_p);
 			inp->inp_vflag |= INP_IPV4;
 			inp->inp_vflag &= ~INP_IPV6;
 			error = in_pcbbind(inp, &sin, td->td_ucred);
 			goto out;
 		}
 #endif
 	}
 
 	error = in6_pcbbind(inp, sin6_p, td->td_ucred);
 #ifdef INET
 out:
 #endif
 	if (error != 0)
 		inp->inp_vflag = vflagsav;
 	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp6_close(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
 
 	INP_WLOCK(inp);
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		INP_WUNLOCK(inp);
 		(void)udp_disconnect(so);
 		return;
 	}
 #endif
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		INP_HASH_WLOCK(pcbinfo);
 		in6_pcbdisconnect(inp);
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in6 *sin6;
 	int error;
 	u_char vflagsav;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
 
 	sin6 = (struct sockaddr_in6 *)nam;
 	if (sin6->sin6_family != AF_INET6)
 		return (EAFNOSUPPORT);
 	if (sin6->sin6_len != sizeof(*sin6))
 		return (EINVAL);
 
 	/*
 	 * XXXRW: Need to clarify locking of v4/v6 flags.
 	 */
 	INP_WLOCK(inp);
 #ifdef INET
 	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 		struct sockaddr_in sin;
 
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
 			error = EINVAL;
 			goto out;
 		}
 		if ((inp->inp_vflag & INP_IPV4) == 0) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
 			error = EISCONN;
 			goto out;
 		}
 		in6_sin6_2_sin(&sin, sin6);
 		error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
 		if (error != 0)
 			goto out;
 		vflagsav = inp->inp_vflag;
 		inp->inp_vflag |= INP_IPV4;
 		inp->inp_vflag &= ~INP_IPV6;
 		NET_EPOCH_ENTER(et);
 		INP_HASH_WLOCK(pcbinfo);
 		error = in_pcbconnect(inp, &sin, td->td_ucred, true);
 		INP_HASH_WUNLOCK(pcbinfo);
 		NET_EPOCH_EXIT(et);
 		/*
 		 * If connect succeeds, mark socket as connected. If
 		 * connect fails and socket is unbound, reset inp_vflag
 		 * field.
 		 */
 		if (error == 0)
 			soisconnected(so);
 		else if (inp->inp_laddr.s_addr == INADDR_ANY &&
 		    inp->inp_lport == 0)
 			inp->inp_vflag = vflagsav;
 		goto out;
 	} else {
 		if ((inp->inp_vflag & INP_IPV6) == 0) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 	}
 #endif
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		error = EISCONN;
 		goto out;
 	}
 	error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
 	if (error != 0)
 		goto out;
 	vflagsav = inp->inp_vflag;
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	NET_EPOCH_ENTER(et);
 	INP_HASH_WLOCK(pcbinfo);
 	error = in6_pcbconnect(inp, sin6, td->td_ucred, true);
 	INP_HASH_WUNLOCK(pcbinfo);
 	NET_EPOCH_EXIT(et);
 	/*
 	 * If connect succeeds, mark socket as connected. If
 	 * connect fails and socket is unbound, reset inp_vflag
 	 * field.
 	 */
 	if (error == 0)
 		soisconnected(so);
 	else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
 	    inp->inp_lport == 0)
 		inp->inp_vflag = vflagsav;
 out:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp6_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
 
 	INP_WLOCK(inp);
-	in_pcbdetach(inp);
 	in_pcbfree(inp);
 }
 
 static int
 udp6_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
 
 	INP_WLOCK(inp);
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		INP_WUNLOCK(inp);
 		(void)udp_disconnect(so);
 		return (0);
 	}
 #endif
 
 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		INP_WUNLOCK(inp);
 		return (ENOTCONN);
 	}
 
 	INP_HASH_WLOCK(pcbinfo);
 	in6_pcbdisconnect(inp);
 	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 #define	UDP6_PROTOSW							\
 	.pr_type =		SOCK_DGRAM,				\
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_CAPATTACH,		\
 	.pr_ctloutput =		ip6_ctloutput,				\
 	.pr_abort =		udp6_abort,				\
 	.pr_attach =		udp6_attach,				\
 	.pr_bind =		udp6_bind,				\
 	.pr_connect =		udp6_connect,				\
 	.pr_control =		in6_control,				\
 	.pr_detach =		udp6_detach,				\
 	.pr_disconnect =	udp6_disconnect,			\
 	.pr_peeraddr =		in6_mapped_peeraddr,			\
 	.pr_send =		udp6_send,				\
 	.pr_shutdown =		udp_shutdown,				\
 	.pr_sockaddr =		in6_mapped_sockaddr,			\
 	.pr_soreceive =		soreceive_dgram,			\
 	.pr_sosend =		sosend_dgram,				\
 	.pr_sosetlabel =	in_pcbsosetlabel,			\
 	.pr_close =		udp6_close
 
 struct protosw udp6_protosw = {
 	.pr_protocol =		IPPROTO_UDP,
 	UDP6_PROTOSW
 };
 
 struct protosw udplite6_protosw = {
 	.pr_protocol =		IPPROTO_UDPLITE,
 	UDP6_PROTOSW
 };
 
 static void
 udp6_init(void *arg __unused)
 {
 
 	IP6PROTO_REGISTER(IPPROTO_UDP, udp6_input, udp6_ctlinput);
 	IP6PROTO_REGISTER(IPPROTO_UDPLITE, udp6_input, udplite6_ctlinput);
 }
 SYSINIT(udp6_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, udp6_init, NULL);