diff --git a/sys/dev/hyperv/hvsock/hv_sock.c b/sys/dev/hyperv/hvsock/hv_sock.c
index bcc237271465..00e296fcd335 100644
--- a/sys/dev/hyperv/hvsock/hv_sock.c
+++ b/sys/dev/hyperv/hvsock/hv_sock.c
@@ -1,1773 +1,1773 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2020 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/domain.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/types.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 #include <sys/sockbuf.h>
 #include <sys/sx.h>
 #include <sys/uio.h>
 
 #include <net/vnet.h>
 
 #include <dev/hyperv/vmbus/vmbus_reg.h>
 
 #include "hv_sock.h"
 
 #define HVSOCK_DBG_NONE			0x0
 #define HVSOCK_DBG_INFO			0x1
 #define HVSOCK_DBG_ERR			0x2
 #define HVSOCK_DBG_VERBOSE		0x3
 
 
 SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket");
 
 static int hvs_dbg_level;
 SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level,
     0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose");
 
 
 #define HVSOCK_DBG(level, ...) do {					\
 	if (hvs_dbg_level >= (level))					\
 		printf(__VA_ARGS__);					\
 	} while (0)
 
 MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures");
 
 /* The MTU is 16KB per host side's design */
 #define HVSOCK_MTU_SIZE		(1024 * 16)
 #define HVSOCK_SEND_BUF_SZ	(PAGE_SIZE - sizeof(struct vmpipe_proto_header))
 
 #define HVSOCK_HEADER_LEN	(sizeof(struct hvs_pkt_header))
 
 #define HVSOCK_PKT_LEN(payload_len)	(HVSOCK_HEADER_LEN + \
 					 roundup2(payload_len, 8) + \
 					 sizeof(uint64_t))
 
 
 static struct domain		hv_socket_domain;
 
 /*
  * HyperV Transport sockets
  */
 static struct pr_usrreqs	hvs_trans_usrreqs = {
 	.pru_attach =		hvs_trans_attach,
 	.pru_bind =		hvs_trans_bind,
 	.pru_listen =		hvs_trans_listen,
 	.pru_accept =		hvs_trans_accept,
 	.pru_connect =		hvs_trans_connect,
 	.pru_peeraddr =		hvs_trans_peeraddr,
 	.pru_sockaddr =		hvs_trans_sockaddr,
 	.pru_soreceive =	hvs_trans_soreceive,
 	.pru_sosend =		hvs_trans_sosend,
 	.pru_disconnect =	hvs_trans_disconnect,
 	.pru_close =		hvs_trans_close,
 	.pru_detach =		hvs_trans_detach,
 	.pru_shutdown =		hvs_trans_shutdown,
 	.pru_abort =		hvs_trans_abort,
 };
 
 /*
  * Definitions of protocols supported in HyperV socket domain
  */
 static struct protosw		hv_socket_protosw[] = {
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&hv_socket_domain,
 	.pr_protocol =		HYPERV_SOCK_PROTO_TRANS,
 	.pr_flags =		PR_CONNREQUIRED,
 	.pr_init =		hvs_trans_init,
 	.pr_usrreqs =		&hvs_trans_usrreqs,
 },
 };
 
 static struct domain		hv_socket_domain = {
 	.dom_family =		AF_HYPERV,
 	.dom_name =		"hyperv",
 	.dom_protosw =		hv_socket_protosw,
 	.dom_protoswNPROTOSW =	&hv_socket_protosw[nitems(hv_socket_protosw)]
 };
 
 VNET_DOMAIN_SET(hv_socket_);
 
 #define MAX_PORT			((uint32_t)0xFFFFFFFF)
 #define MIN_PORT			((uint32_t)0x0)
 
 /* 00000000-facb-11e6-bd58-64006a7986d3 */
 static const struct hyperv_guid srv_id_template = {
 	.hv_guid = {
 	    0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11,
 	    0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 }
 };
 
 static int		hvsock_br_callback(void *, int, void *);
 static uint32_t		hvsock_canread_check(struct hvs_pcb *);
 static uint32_t		hvsock_canwrite_check(struct hvs_pcb *);
 static int		hvsock_send_data(struct vmbus_channel *chan,
     struct uio *uio, uint32_t to_write, struct sockbuf *sb);
 
 
 
 /* Globals */
 static struct sx		hvs_trans_socks_sx;
 static struct mtx		hvs_trans_socks_mtx;
 static LIST_HEAD(, hvs_pcb)	hvs_trans_bound_socks;
 static LIST_HEAD(, hvs_pcb)	hvs_trans_connected_socks;
 static uint32_t			previous_auto_bound_port;
 
 static void
 hvsock_print_guid(struct hyperv_guid *guid)
 {
 	unsigned char *p = (unsigned char *)guid;
 
 	HVSOCK_DBG(HVSOCK_DBG_INFO,
 	    "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n",
 	    *(unsigned int *)p,
 	    *((unsigned short *) &p[4]),
 	    *((unsigned short *) &p[6]),
 	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
 }
 
 static bool
 is_valid_srv_id(const struct hyperv_guid *id)
 {
 	return !memcmp(&id->hv_guid[4],
 	    &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4);
 }
 
 static unsigned int
 get_port_by_srv_id(const struct hyperv_guid *srv_id)
 {
 	return *((const unsigned int *)srv_id);
 }
 
 static void
 set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port)
 {
 	*((unsigned int *)srv_id) = port;
 }
 
 
 static void
 __hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list)
 {
 	struct hvs_pcb *p = NULL;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
 
 	if (!pcb)
 		return;
 
 	if (list & HVS_LIST_BOUND) {
 		LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
 			if  (p == pcb)
 				LIST_REMOVE(p, bound_next);
 	}
 
 	if (list & HVS_LIST_CONNECTED) {
 		LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
 			if (p == pcb)
 				LIST_REMOVE(pcb, connected_next);
 	}
 }
 
 static void
 __hvs_remove_socket_from_list(struct socket *so, unsigned char list)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
 
 	__hvs_remove_pcb_from_list(pcb, list);
 }
 
 static void
 __hvs_insert_socket_on_list(struct socket *so, unsigned char list)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	if (list & HVS_LIST_BOUND)
 		LIST_INSERT_HEAD(&hvs_trans_bound_socks,
 		   pcb, bound_next);
 
 	if (list & HVS_LIST_CONNECTED)
 		LIST_INSERT_HEAD(&hvs_trans_connected_socks,
 		   pcb, connected_next);
 }
 
 void
 hvs_remove_socket_from_list(struct socket *so, unsigned char list)
 {
 	if (!so || !so->so_pcb) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: socket or so_pcb is null\n", __func__);
 		return;
 	}
 
 	mtx_lock(&hvs_trans_socks_mtx);
 	__hvs_remove_socket_from_list(so, list);
 	mtx_unlock(&hvs_trans_socks_mtx);
 }
 
 static void
 hvs_insert_socket_on_list(struct socket *so, unsigned char list)
 {
 	if (!so || !so->so_pcb) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: socket or so_pcb is null\n", __func__);
 		return;
 	}
 
 	mtx_lock(&hvs_trans_socks_mtx);
 	__hvs_insert_socket_on_list(so, list);
 	mtx_unlock(&hvs_trans_socks_mtx);
 }
 
 static struct socket *
 __hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
 {
 	struct hvs_pcb *p = NULL;
 
 	if (list & HVS_LIST_BOUND)
 		LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
 			if (p->so != NULL &&
 			    addr->hvs_port == p->local_addr.hvs_port)
 				return p->so;
 
 	if (list & HVS_LIST_CONNECTED)
 		LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
 			if (p->so != NULL &&
 			    addr->hvs_port == p->local_addr.hvs_port)
 				return p->so;
 
 	return NULL;
 }
 
 static struct socket *
 hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
 {
 	struct socket *s = NULL;
 
 	mtx_lock(&hvs_trans_socks_mtx);
 	s = __hvs_find_socket_on_list(addr, list);
 	mtx_unlock(&hvs_trans_socks_mtx);
 
 	return s;
 }
 
 static inline void
 hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port)
 {
 	memset(addr, 0, sizeof(*addr));
 	addr->sa_family = AF_HYPERV;
 	addr->sa_len = sizeof(*addr);
 	addr->hvs_port = port;
 }
 
 void
 hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id)
 {
 	hvs_addr_set(addr, get_port_by_srv_id(svr_id));
 }
 
 int
 hvs_trans_lock(void)
 {
 	sx_xlock(&hvs_trans_socks_sx);
 	return (0);
 }
 
 void
 hvs_trans_unlock(void)
 {
 	sx_xunlock(&hvs_trans_socks_sx);
 }
 
 void
 hvs_trans_init(void)
 {
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	if (vm_guest != VM_GUEST_HV)
 		return;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_init called\n", __func__);
 
 	/* Initialize Globals */
 	previous_auto_bound_port = MAX_PORT;
 	sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx");
 	mtx_init(&hvs_trans_socks_mtx,
 	    "hvs_trans_socks_mtx", NULL, MTX_DEF);
 	LIST_INIT(&hvs_trans_bound_socks);
 	LIST_INIT(&hvs_trans_connected_socks);
 }
 
 /*
  * Called in two cases:
  * 1) When user calls socket();
  * 2) When we accept new incoming conneciton and call sonewconn().
  */
 int
 hvs_trans_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	if (vm_guest != VM_GUEST_HV)
 		return (ESOCKTNOSUPPORT);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_attach called\n", __func__);
 
 	if (so->so_type != SOCK_STREAM)
 		return (ESOCKTNOSUPPORT);
 
 	if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS)
 		return (EPROTONOSUPPORT);
 
 	if (pcb != NULL)
 		return (EISCONN);
 	pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO);
 	if (pcb == NULL)
 		return (ENOMEM);
 
 	pcb->so = so;
 	so->so_pcb = (void *)pcb;
 
 	return (0);
 }
 
 void
 hvs_trans_detach(struct socket *so)
 {
 	struct hvs_pcb *pcb;
 
 	if (vm_guest != VM_GUEST_HV)
 		return;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_detach called\n", __func__);
 
 	(void) hvs_trans_lock();
 	pcb = so2hvspcb(so);
 	if (pcb == NULL) {
 		hvs_trans_unlock();
 		return;
 	}
 
 	if (SOLISTENING(so)) {
 		bzero(pcb, sizeof(*pcb));
 		free(pcb, M_HVSOCK);
 	}
 
 	so->so_pcb = NULL;
 
 	hvs_trans_unlock();
 }
 
 int
 hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr;
 	int error = 0;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_bind called\n", __func__);
 
 	if (sa == NULL) {
 		return (EINVAL);
 	}
 
 	if (pcb == NULL) {
 		return (EINVAL);
 	}
 
 	if (sa->sa_family != AF_HYPERV) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: Not supported, sa_family is %u\n",
 		    __func__, sa->sa_family);
 		return (EAFNOSUPPORT);
 	}
 	if (sa->sa_len != sizeof(*sa)) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: Not supported, sa_len is %u\n",
 		    __func__, sa->sa_len);
 		return (EINVAL);
 	}
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: binding port = 0x%x\n", __func__, sa->hvs_port);
 
 	mtx_lock(&hvs_trans_socks_mtx);
 	if (__hvs_find_socket_on_list(sa,
 	    HVS_LIST_BOUND | HVS_LIST_CONNECTED)) {
 		error = EADDRINUSE;
 	} else {
 		/*
 		 * The address is available for us to bind.
 		 * Add socket to the bound list.
 		 */
 		hvs_addr_set(&pcb->local_addr, sa->hvs_port);
 		hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY);
 		__hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
 	}
 	mtx_unlock(&hvs_trans_socks_mtx);
 
 	return (error);
 }
 
 int
 hvs_trans_listen(struct socket *so, int backlog, struct thread *td)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	struct socket *bound_so;
 	int error;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_listen called\n", __func__);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/* Check if the address is already bound and it was by us. */
 	bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND);
 	if (bound_so == NULL || bound_so != so) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: Address not bound or not by us.\n", __func__);
 		return (EADDRNOTAVAIL);
 	}
 
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error == 0)
 		solisten_proto(so, backlog);
 	SOCK_UNLOCK(so);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket listen error = %d\n", __func__, error);
 	return (error);
 }
 
 int
 hvs_trans_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_accept called\n", __func__);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	*nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr,
 	    M_NOWAIT);
 
 	return ((*nam == NULL) ? ENOMEM : 0);
 }
 
 int
 hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam;
 	bool found_auto_bound_port = false;
 	int i, error = 0;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n",
 	    __func__, raddr->hvs_port);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/* Verify the remote address */
 	if (raddr == NULL)
 		return (EINVAL);
 	if (raddr->sa_family != AF_HYPERV)
 		return (EAFNOSUPPORT);
 	if (raddr->sa_len != sizeof(*raddr))
 		return (EINVAL);
 
 	mtx_lock(&hvs_trans_socks_mtx);
 	if (so->so_state &
 	    (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) {
 			HVSOCK_DBG(HVSOCK_DBG_ERR,
 			    "%s: socket connect in progress\n",
 			    __func__);
 			error = EINPROGRESS;
 			goto out;
 	}
 
 	/*
 	 * Find an available port for us to auto bind the local
 	 * address.
 	 */
 	hvs_addr_set(&pcb->local_addr, 0);
 
 	for (i = previous_auto_bound_port - 1;
 	    i != previous_auto_bound_port; i --) {
 		if (i == MIN_PORT)
 			i = MAX_PORT;
 
 		pcb->local_addr.hvs_port = i;
 
 		if (__hvs_find_socket_on_list(&pcb->local_addr,
 		    HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) {
 			found_auto_bound_port = true;
 			previous_auto_bound_port = i;
 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 			    "%s: found local bound port is %x\n",
 			    __func__, pcb->local_addr.hvs_port);
 			break;
 		}
 	}
 
 	if (found_auto_bound_port == true) {
 		/* Found available port for auto bound, put on list */
 		__hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
 		/* Set VM service ID */
 		pcb->vm_srv_id = srv_id_template;
 		set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port);
 		/* Set host service ID and remote port */
 		pcb->host_srv_id = srv_id_template;
 		set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port);
 		hvs_addr_set(&pcb->remote_addr, raddr->hvs_port);
 
 		/* Change the socket state to SS_ISCONNECTING */
 		soisconnecting(so);
 	} else {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: No local port available for auto bound\n",
 		    __func__);
 		error = EADDRINUSE;
 	}
 
 	HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is ");
 	hvsock_print_guid(&pcb->vm_srv_id);
 	HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is ");
 	hvsock_print_guid(&pcb->host_srv_id);
 
 out:
 	mtx_unlock(&hvs_trans_socks_mtx);
 
 	if (found_auto_bound_port == true)
 		 vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id);
 
 	return (error);
 }
 
 int
 hvs_trans_disconnect(struct socket *so)
 {
 	struct hvs_pcb *pcb;
 
 	if (vm_guest != VM_GUEST_HV)
 		return (ESOCKTNOSUPPORT);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_disconnect called\n", __func__);
 
 	(void) hvs_trans_lock();
 	pcb = so2hvspcb(so);
 	if (pcb == NULL) {
 		hvs_trans_unlock();
 		return (EINVAL);
 	}
 
 	/* If socket is already disconnected, skip this */
 	if ((so->so_state & SS_ISDISCONNECTED) == 0)
 		soisdisconnecting(so);
 
 	hvs_trans_unlock();
 
 	return (0);
 }
 
 #define SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
 struct hvs_callback_arg {
 	struct uio *uio;
 	struct sockbuf *sb;
 };
 
 int
 hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr,
     struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	struct sockbuf *sb;
 	ssize_t orig_resid;
 	uint32_t canread, to_read;
 	int flags, error = 0;
 	struct hvs_callback_arg cbarg;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_soreceive called\n", __func__);
 
 	if (so->so_type != SOCK_STREAM)
 		return (EINVAL);
 	if (pcb == NULL)
 		return (EINVAL);
 
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 
 	if (flags & MSG_PEEK)
 		return (EOPNOTSUPP);
 
 	/* If no space to copy out anything */
 	if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ)
 		return (EINVAL);
 
 	sb = &so->so_rcv;
 
 	orig_resid = uio->uio_resid;
 
 	/* Prevent other readers from entering the socket. */
 	error = sblock(sb, SBLOCKWAIT(flags));
 	if (error) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: sblock returned error = %d\n", __func__, error);
 		return (error);
 	}
 
 	SOCKBUF_LOCK(sb);
 
 	cbarg.uio = uio;
 	cbarg.sb = sb;
 	/*
 	 * If the socket is closing, there might still be some data
 	 * in rx br to read. However we need to make sure
 	 * the channel is still open.
 	 */
 	if ((sb->sb_state & SBS_CANTRCVMORE) &&
 	    (so->so_state & SS_ISDISCONNECTED)) {
 		/* Other thread already closed the channel */
 		error = EPIPE;
 		goto out;
 	}
 
 	while (true) {
 		while (uio->uio_resid > 0 &&
 		    (canread = hvsock_canread_check(pcb)) > 0) {
 			to_read = MIN(canread, uio->uio_resid);
 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 			    "%s: to_read = %u, skip = %u\n", __func__, to_read,
 			    (unsigned int)(sizeof(struct hvs_pkt_header) +
 			    pcb->recv_data_off));
 
 			error = vmbus_chan_recv_peek_call(pcb->chan, to_read,
 			    sizeof(struct hvs_pkt_header) + pcb->recv_data_off,
 			    hvsock_br_callback, (void *)&cbarg);
 			/*
 			 * It is possible socket is disconnected becasue
 			 * we released lock in hvsock_br_callback. So we
 			 * need to check the state to make sure it is not
 			 * disconnected.
 			 */
 			if (error || so->so_state & SS_ISDISCONNECTED) {
 				break;
 			}
 
 			pcb->recv_data_len -= to_read;
 			pcb->recv_data_off += to_read;
 		}
 
 		if (error)
 			break;
 
 		/* Abort if socket has reported problems. */
 		if (so->so_error) {
 			if (so->so_error == ESHUTDOWN &&
 			    orig_resid > uio->uio_resid) {
 				/*
 				 * Although we got a FIN, we also received
 				 * some data in this round. Delivery it
 				 * to user.
 				 */
 				error = 0;
 			} else {
 				if (so->so_error != ESHUTDOWN)
 					error = so->so_error;
 			}
 
 			break;
 		}
 
 		/* Cannot received more. */
 		if (sb->sb_state & SBS_CANTRCVMORE)
 			break;
 
 		/* We are done if buffer has been filled */
 		if (uio->uio_resid == 0)
 			break;
 
 		if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid)
 			break;
 
 		/* Buffer ring is empty and we shall not block */
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			if (orig_resid == uio->uio_resid) {
 				/* We have not read anything */
 				error = EAGAIN;
 			}
 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 			    "%s: non blocked read return, error %d.\n",
 			    __func__, error);
 			break;
 		}
 
 		/*
 		 * Wait and block until (more) data comes in.
 		 * Note: Drops the sockbuf lock during wait.
 		 */
 		error = sbwait(sb);
 
 		if (error)
 			break;
 
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: wake up from sbwait, read available is %u\n",
 		    __func__, vmbus_chan_read_available(pcb->chan));
 	}
 
 out:
 	SOCKBUF_UNLOCK(sb);
 
 	sbunlock(sb);
 
 	/* We recieved a FIN in this call */
 	if (so->so_error == ESHUTDOWN) {
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			/* Send has already closed */
 			soisdisconnecting(so);
 		} else {
 			/* Just close the receive side */
 			socantrcvmore(so);
 		}
 	}
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: returning error = %d, so_error = %d\n",
 	    __func__, error, so->so_error);
 
 	return (error);
 }
 
 int
 hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	struct sockbuf *sb;
 	ssize_t orig_resid;
 	uint32_t canwrite, to_write;
 	int error = 0;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %zd\n",
 	    __func__, uio->uio_resid);
 
 	if (so->so_type != SOCK_STREAM)
 		return (EINVAL);
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/* If nothing to send */
 	if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE)
 		return (EINVAL);
 
 	sb = &so->so_snd;
 
 	orig_resid = uio->uio_resid;
 
 	/* Prevent other writers from entering the socket. */
 	error = sblock(sb, SBLOCKWAIT(flags));
 	if (error) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: sblock returned error = %d\n", __func__, error);
 		return (error);
 	}
 
 	SOCKBUF_LOCK(sb);
 
 	if ((sb->sb_state & SBS_CANTSENDMORE) ||
 	    so->so_error == ESHUTDOWN) {
 		error = EPIPE;
 		goto out;
 	}
 
 	while (uio->uio_resid > 0) {
 		canwrite = hvsock_canwrite_check(pcb);
 		if (canwrite == 0) {
 			/* We have sent some data */
 			if (orig_resid > uio->uio_resid)
 				break;
 			/*
 			 * We have not sent any data and it is
 			 * non-blocked io
 			 */
 			if (so->so_state & SS_NBIO ||
 			    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
 				error = EWOULDBLOCK;
 				break;
 			} else {
 				/*
 				 * We are here because there is no space on
 				 * send buffer ring. Signal the other side
 				 * to read and free more space.
 				 * Sleep wait until space avaiable to send
 				 * Note: Drops the sockbuf lock during wait.
 				 */
 				error = sbwait(sb);
 
 				if (error)
 					break;
 
 				HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 				    "%s: wake up from sbwait, space avail on "
 				    "tx ring is %u\n",
 				    __func__,
 				    vmbus_chan_write_available(pcb->chan));
 
 				continue;
 			}
 		}
 		to_write = MIN(canwrite, uio->uio_resid);
 		to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ);
 
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: canwrite is %u, to_write = %u\n", __func__,
 		    canwrite, to_write);
 		error = hvsock_send_data(pcb->chan, uio, to_write, sb);
 
 		if (error)
 			break;
 	}
 
 out:
 	SOCKBUF_UNLOCK(sb);
 	sbunlock(sb);
 
 	return (error);
 }
 
 int
 hvs_trans_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	*nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 }
 
 int
 hvs_trans_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	*nam = sodupsockaddr((struct sockaddr *) &pcb->local_addr, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 }
 
 void
 hvs_trans_close(struct socket *so)
 {
 	struct hvs_pcb *pcb;
 
 	if (vm_guest != VM_GUEST_HV)
 		return;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_close called\n", __func__);
 
 	(void) hvs_trans_lock();
 	pcb = so2hvspcb(so);
 	if (!pcb) {
 		hvs_trans_unlock();
 		return;
 	}
 
 	if (so->so_state & SS_ISCONNECTED) {
 		/* Send a FIN to peer */
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: hvs_trans_close sending a FIN to host\n", __func__);
 		(void) hvsock_send_data(pcb->chan, NULL, 0, NULL);
 	}
 
 	if (so->so_state &
 	    (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
 		soisdisconnected(so);
 
 	pcb->chan = NULL;
 	pcb->so = NULL;
 
 	if (SOLISTENING(so)) {
 		mtx_lock(&hvs_trans_socks_mtx);
 		/* Remove from bound list */
 		__hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
 		mtx_unlock(&hvs_trans_socks_mtx);
 	}
 
 	hvs_trans_unlock();
 
 	return;
 }
 
 void
 hvs_trans_abort(struct socket *so)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 
 	if (vm_guest != VM_GUEST_HV)
 		return;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_abort called\n", __func__);
 
 	(void) hvs_trans_lock();
 	if (pcb == NULL) {
 		hvs_trans_unlock();
 		return;
 	}
 
 	if (SOLISTENING(so)) {
 		mtx_lock(&hvs_trans_socks_mtx);
 		/* Remove from bound list */
 		__hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
 		mtx_unlock(&hvs_trans_socks_mtx);
 	}
 
 	if (so->so_state & SS_ISCONNECTED) {
 		(void) sodisconnect(so);
 	}
 	hvs_trans_unlock();
 
 	return;
 }
 
 int
 hvs_trans_shutdown(struct socket *so)
 {
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	struct sockbuf *sb;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: HyperV Socket hvs_trans_shutdown called\n", __func__);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/*
 	 * Only get called with the shutdown method is SHUT_WR or
 	 * SHUT_RDWR.
 	 * When the method is SHUT_RD or SHUT_RDWR, the caller
 	 * already set the SBS_CANTRCVMORE on receive side socket
 	 * buffer.
 	 */
 	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
 		/*
 		 * SHUT_WR only case.
 		 * Receive side is still open. Just close
 		 * the send side.
 		 */
 		socantsendmore(so);
 	} else {
 		/* SHUT_RDWR case */
 		if (so->so_state & SS_ISCONNECTED) {
 			/* Send a FIN to peer */
 			sb = &so->so_snd;
 			SOCKBUF_LOCK(sb);
 			(void) hvsock_send_data(pcb->chan, NULL, 0, sb);
 			SOCKBUF_UNLOCK(sb);
 
 			soisdisconnecting(so);
 		}
 	}
 
 	return (0);
 }
 
 /* In the VM, we support Hyper-V Sockets with AF_HYPERV, and the endpoint is
  * <port> (see struct sockaddr_hvs).
  *
  * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
  * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
  * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
  * the below sockaddr:
  *
  * struct SOCKADDR_HV
  * {
  *    ADDRESS_FAMILY Family;
  *    USHORT Reserved;
  *    GUID VmId;
  *    GUID ServiceId;
  * };
  * Note: VmID is not used by FreeBSD VM and actually it isn't transmitted via
  * VMBus, because here it's obvious the host and the VM can easily identify
  * each other. Though the VmID is useful on the host, especially in the case
  * of Windows container, FreeBSD VM doesn't need it at all.
  *
  * To be compatible with similar infrastructure in Linux VMs, we have
  * to limit the available GUID space of SOCKADDR_HV so that we can create
  * a mapping between FreeBSD AF_HYPERV port and SOCKADDR_HV Service GUID.
  * The rule of writing Hyper-V Sockets apps on the host and in FreeBSD VM is:
  *
  ****************************************************************************
  * The only valid Service GUIDs, from the perspectives of both the host and *
  * FreeBSD VM, that can be connected by the other end, must conform to this *
  * format: <port>-facb-11e6-bd58-64006a7986d3.                              *
  ****************************************************************************
  *
  * When we write apps on the host to connect(), the GUID ServiceID is used.
  * When we write apps in FreeBSD VM to connect(), we only need to specify the
  * port and the driver will form the GUID and use that to request the host.
  *
  * From the perspective of FreeBSD VM, the remote ephemeral port (i.e. the
  * auto-generated remote port for a connect request initiated by the host's
  * connect()) is set to HVADDR_PORT_UNKNOWN, which is not realy used on the
  * FreeBSD guest.
  */
 
 /*
  * Older HyperV hosts (vmbus version 'VMBUS_VERSION_WIN10' or before)
  * restricts HyperV socket ring buffer size to six 4K pages. Newer
  * HyperV hosts doen't have this limit.
  */
 #define HVS_RINGBUF_RCV_SIZE	(PAGE_SIZE * 6)
 #define HVS_RINGBUF_SND_SIZE	(PAGE_SIZE * 6)
 #define HVS_RINGBUF_MAX_SIZE	(PAGE_SIZE * 64)
 
 struct hvsock_sc {
 	device_t		dev;
 	struct hvs_pcb		*pcb;
 	struct vmbus_channel	*channel;
 };
 
 static bool
 hvsock_chan_readable(struct vmbus_channel *chan)
 {
 	uint32_t readable = vmbus_chan_read_available(chan);
 
 	return (readable >= HVSOCK_PKT_LEN(0));
 }
 
 static void
 hvsock_chan_cb(struct vmbus_channel *chan, void *context)
 {
 	struct hvs_pcb *pcb = (struct hvs_pcb *) context;
 	struct socket *so;
 	uint32_t canwrite;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: host send us a wakeup on rb data, pcb = %p\n",
 	    __func__, pcb);
 
 	/*
 	 * Check if the socket is still attached and valid.
 	 * Here we know channel is still open. Need to make
 	 * sure the socket has not been closed or freed.
 	 */
 	(void) hvs_trans_lock();
 	so = hsvpcb2so(pcb);
 
 	if (pcb->chan != NULL && so != NULL) {
 		/*
 		 * Wake up reader if there are data to read.
 		 */
 		SOCKBUF_LOCK(&(so)->so_rcv);
 
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: read available = %u\n", __func__,
 		    vmbus_chan_read_available(pcb->chan));
 
 		if (hvsock_chan_readable(pcb->chan))
 			sorwakeup_locked(so);
 		else
 			SOCKBUF_UNLOCK(&(so)->so_rcv);
 
 		/*
 		 * Wake up sender if space becomes available to write.
 		 */
 		SOCKBUF_LOCK(&(so)->so_snd);
 		canwrite = hvsock_canwrite_check(pcb);
 
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: canwrite = %u\n", __func__, canwrite);
 
 		if (canwrite > 0) {
 			sowwakeup_locked(so);
 		} else {
 			SOCKBUF_UNLOCK(&(so)->so_snd);
 		}
 	}
 
 	hvs_trans_unlock();
 
 	return;
 }
 
 static int
 hvsock_br_callback(void *datap, int cplen, void *cbarg)
 {
 	struct hvs_callback_arg *arg = (struct hvs_callback_arg *)cbarg;
 	struct uio *uio = arg->uio;
 	struct sockbuf *sb = arg->sb;
 	int error = 0;
 
 	if (cbarg == NULL || datap == NULL)
 		return (EINVAL);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: called, uio_rw = %s, uio_resid = %zd, cplen = %u, "
 	    "datap = %p\n",
 	    __func__, (uio->uio_rw == UIO_READ) ? "read from br":"write to br",
 	    uio->uio_resid, cplen, datap);
 
 	if (sb)
 		SOCKBUF_UNLOCK(sb);
 
 	error = uiomove(datap, cplen, uio);
 
 	if (sb)
 		SOCKBUF_LOCK(sb);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: after uiomove, uio_resid = %zd, error = %d\n",
 	    __func__, uio->uio_resid, error);
 
 	return (error);
 }
 
 static int
 hvsock_send_data(struct vmbus_channel *chan, struct uio *uio,
     uint32_t to_write, struct sockbuf *sb)
 {
 	struct hvs_pkt_header hvs_pkt;
 	int hvs_pkthlen, hvs_pktlen, pad_pktlen, hlen, error = 0;
 	uint64_t pad = 0;
 	struct iovec iov[3];
 	struct hvs_callback_arg cbarg;
 
 	if (chan == NULL)
 		return (ENOTCONN);
 
 	hlen = sizeof(struct vmbus_chanpkt_hdr);
 	hvs_pkthlen = sizeof(struct hvs_pkt_header);
 	hvs_pktlen = hvs_pkthlen + to_write;
 	pad_pktlen = VMBUS_CHANPKT_TOTLEN(hvs_pktlen);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: hlen = %u, hvs_pkthlen = %u, hvs_pktlen = %u, "
 	    "pad_pktlen = %u, data_len = %u\n",
 	    __func__, hlen, hvs_pkthlen, hvs_pktlen, pad_pktlen, to_write);
 
 	hvs_pkt.chan_pkt_hdr.cph_type = VMBUS_CHANPKT_TYPE_INBAND;
 	hvs_pkt.chan_pkt_hdr.cph_flags = 0;
 	VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_hlen, hlen);
 	VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_tlen, pad_pktlen);
 	hvs_pkt.chan_pkt_hdr.cph_xactid = 0;
 
 	hvs_pkt.vmpipe_pkt_hdr.vmpipe_pkt_type = 1;
 	hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size = to_write;
 
 	cbarg.uio = uio;
 	cbarg.sb = sb;
 
 	if (uio && to_write > 0) {
 		iov[0].iov_base = &hvs_pkt;
 		iov[0].iov_len = hvs_pkthlen;
 		iov[1].iov_base = NULL;
 		iov[1].iov_len = to_write;
 		iov[2].iov_base = &pad;
 		iov[2].iov_len = pad_pktlen - hvs_pktlen;
 
 		error = vmbus_chan_iov_send(chan, iov, 3,
 		    hvsock_br_callback, &cbarg);
 	} else {
 		if (to_write == 0) {
 			iov[0].iov_base = &hvs_pkt;
 			iov[0].iov_len = hvs_pkthlen;
 			iov[1].iov_base = &pad;
 			iov[1].iov_len = pad_pktlen - hvs_pktlen;
 			error = vmbus_chan_iov_send(chan, iov, 2, NULL, NULL);
 		}
 	}
 
 	if (error) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: error = %d\n", __func__, error);
 	}
 
 	return (error);
 }
 
 /*
  * Check if we have data on current ring buffer to read
  * or not. If not, advance the ring buffer read index to
  * next packet. Update the recev_data_len and recev_data_off
  * to new value.
  * Return the number of bytes can read.
  */
 static uint32_t
 hvsock_canread_check(struct hvs_pcb *pcb)
 {
 	uint32_t advance;
 	uint32_t tlen, hlen, dlen;
 	uint32_t bytes_canread = 0;
 	int error;
 
 	if (pcb == NULL || pcb->chan == NULL) {
 		pcb->so->so_error = EIO;
 		return (0);
 	}
 
 	/* Still have data not read yet on current packet */
 	if (pcb->recv_data_len > 0)
 		return (pcb->recv_data_len);
 
 	if (pcb->rb_init)
 		advance =
 		    VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
 	else
 		advance = 0;
 
 	bytes_canread = vmbus_chan_read_available(pcb->chan);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: bytes_canread on br = %u, advance = %u\n",
 	    __func__, bytes_canread, advance);
 
 	if (pcb->rb_init && bytes_canread == (advance + sizeof(uint64_t))) {
 		/*
 		 * Nothing to read. Need to advance the rindex before
 		 * calling sbwait, so host knows to wake us up when data
 		 * is available to read on rb.
 		 */
 		error = vmbus_chan_recv_idxadv(pcb->chan, advance);
 		if (error) {
 			HVSOCK_DBG(HVSOCK_DBG_ERR,
 			    "%s: after calling vmbus_chan_recv_idxadv, "
 			    "got error = %d\n",  __func__, error);
 			return (0);
 		} else {
 			pcb->rb_init = false;
 			pcb->recv_data_len = 0;
 			pcb->recv_data_off = 0;
 			bytes_canread = vmbus_chan_read_available(pcb->chan);
 
 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 			    "%s: advanced %u bytes, "
 			    " bytes_canread on br now = %u\n",
 			    __func__, advance, bytes_canread);
 
 			if (bytes_canread == 0)
 				return (0);
 			else
 				advance = 0;
 		}
 	}
 
 	if (bytes_canread <
 	    advance + (sizeof(struct hvs_pkt_header) + sizeof(uint64_t)))
 		return (0);
 
 	error = vmbus_chan_recv_peek(pcb->chan, &pcb->hvs_pkt,
 	    sizeof(struct hvs_pkt_header), advance);
 
 	/* Don't have anything to read */
 	if (error) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: after calling vmbus_chan_recv_peek, got error = %d\n",
 		    __func__, error);
 		return (0);
 	}
 
 	/*
 	 * We just read in a new packet header. Do some sanity checks.
 	 */
 	tlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
 	hlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_hlen);
 	dlen = pcb->hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size;
 	if (__predict_false(hlen < sizeof(struct vmbus_chanpkt_hdr)) ||
 	    __predict_false(hlen > tlen) ||
 	    __predict_false(tlen < dlen + sizeof(struct hvs_pkt_header))) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "invalid tlen(%u), hlen(%u) or dlen(%u)\n",
 		    tlen, hlen, dlen);
 		pcb->so->so_error = EIO;
 		return (0);
 	}
 	if (pcb->rb_init == false)
 		pcb->rb_init = true;
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "Got new pkt tlen(%u), hlen(%u) or dlen(%u)\n",
 	    tlen, hlen, dlen);
 
 	/* The other side has sent a close FIN */
 	if (dlen == 0) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "%s: Received FIN from other side\n", __func__);
 		/* inform the caller by seting so_error to ESHUTDOWN */
 		pcb->so->so_error = ESHUTDOWN;
 	}
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: canread on receive ring is %u \n", __func__, dlen);
 
 	pcb->recv_data_len = dlen;
 	pcb->recv_data_off = 0;
 
 	return (pcb->recv_data_len);
 }
 
 static uint32_t
 hvsock_canwrite_check(struct hvs_pcb *pcb)
 {
 	uint32_t writeable;
 	uint32_t ret;
 
 	if (pcb == NULL || pcb->chan == NULL)
 		return (0);
 
 	writeable = vmbus_chan_write_available(pcb->chan);
 
 	/*
 	 * We must always reserve a 0-length-payload packet for the FIN.
 	 */
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: writeable is %u, should be greater than %ju\n",
 	    __func__, writeable,
 	    (uintmax_t)(HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)));
 
 	if (writeable < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) {
 		/*
 		 * The Tx ring seems full.
 		 */
 		return (0);
 	}
 
 	ret = writeable - HVSOCK_PKT_LEN(0) - HVSOCK_PKT_LEN(0);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 	    "%s: available size is %u\n", __func__, rounddown2(ret, 8));
 
 	return (rounddown2(ret, 8));
 }
 
 static void
 hvsock_set_chan_pending_send_size(struct vmbus_channel *chan)
 {
 	vmbus_chan_set_pending_send_size(chan,
 	    HVSOCK_PKT_LEN(HVSOCK_SEND_BUF_SZ));
 }
 
 static int
 hvsock_open_channel(struct vmbus_channel *chan, struct socket *so)
 {
 	unsigned int rcvbuf, sndbuf;
 	struct hvs_pcb *pcb = so2hvspcb(so);
 	int ret;
 
 	if (vmbus_current_version < VMBUS_VERSION_WIN10_V5) {
 		sndbuf = HVS_RINGBUF_SND_SIZE;
 		rcvbuf = HVS_RINGBUF_RCV_SIZE;
 	} else {
 		sndbuf = MAX(so->so_snd.sb_hiwat, HVS_RINGBUF_SND_SIZE);
 		sndbuf = MIN(sndbuf, HVS_RINGBUF_MAX_SIZE);
 		sndbuf = rounddown2(sndbuf, PAGE_SIZE);
 		rcvbuf = MAX(so->so_rcv.sb_hiwat, HVS_RINGBUF_RCV_SIZE);
 		rcvbuf = MIN(rcvbuf, HVS_RINGBUF_MAX_SIZE);
 		rcvbuf = rounddown2(rcvbuf, PAGE_SIZE);
 	}
 
 	/*
 	 * Can only read whatever user provided size of data
 	 * from ring buffer. Turn off batched reading.
 	 */
 	vmbus_chan_set_readbatch(chan, false);
 
 	ret = vmbus_chan_open(chan, sndbuf, rcvbuf, NULL, 0,
 	    hvsock_chan_cb, pcb);
 
 	if (ret != 0) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: failed to open hvsock channel, sndbuf = %u, "
 		    "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
 	} else {
 		HVSOCK_DBG(HVSOCK_DBG_INFO,
 		    "%s: hvsock channel opened, sndbuf = %u, i"
 		    "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
 		/*
 		 * Se the pending send size so to receive wakeup
 		 * signals from host when there is enough space on
 		 * rx buffer ring to write.
 		 */
 		hvsock_set_chan_pending_send_size(chan);
 	}
 
 	return ret;
 }
 
 /*
  * Guest is listening passively on the socket. Open channel and
  * create a new socket for the conneciton.
  */
 static void
 hvsock_open_conn_passive(struct vmbus_channel *chan, struct socket *so,
     struct hvsock_sc *sc)
 {
 	struct socket *new_so;
 	struct hvs_pcb *new_pcb, *pcb;
 	int error;
 
 	/* Do nothing if socket is not listening */
-	if ((so->so_options & SO_ACCEPTCONN) == 0) {
+	if (!SOLISTENING(so)) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: socket is not a listening one\n", __func__);
 		return;
 	}
 
 	/*
 	 * Create a new socket. This will call pru_attach to complete
 	 * the socket initialization and put the new socket onto
 	 * listening socket's sol_incomp list, waiting to be promoted
 	 * to sol_comp list.
 	 * The new socket created has ref count 0. There is no other
 	 * thread that changes the state of this new one at the
 	 * moment, so we don't need to hold its lock while opening
 	 * channel and filling out its pcb information.
 	 */
 	new_so = sonewconn(so, 0);
 	if (!new_so)
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: creating new socket failed\n", __func__);
 
 	/*
 	 * Now open the vmbus channel. If it fails, the socket will be
 	 * on the listening socket's sol_incomp queue until it is
 	 * replaced and aborted.
 	 */
 	error = hvsock_open_channel(chan, new_so);
 	if (error) {
 		new_so->so_error = error;
 		return;
 	}
 
 	pcb = so->so_pcb;
 	new_pcb = new_so->so_pcb;
 
 	hvs_addr_set(&(new_pcb->local_addr), pcb->local_addr.hvs_port);
 	/* Remote port is unknown to guest in this type of conneciton */
 	hvs_addr_set(&(new_pcb->remote_addr), HVADDR_PORT_UNKNOWN);
 	new_pcb->chan = chan;
 	new_pcb->recv_data_len = 0;
 	new_pcb->recv_data_off = 0;
 	new_pcb->rb_init = false;
 
 	new_pcb->vm_srv_id = *vmbus_chan_guid_type(chan);
 	new_pcb->host_srv_id = *vmbus_chan_guid_inst(chan);
 
 	hvs_insert_socket_on_list(new_so, HVS_LIST_CONNECTED);
 
 	sc->pcb = new_pcb;
 
 	/*
 	 * Change the socket state to SS_ISCONNECTED. This will promote
 	 * the socket to sol_comp queue and wake up the thread which
 	 * is accepting connection.
 	 */
 	soisconnected(new_so);
 }
 
 
 /*
  * Guest is actively connecting to host.
  */
 static void
 hvsock_open_conn_active(struct vmbus_channel *chan, struct socket *so)
 {
 	struct hvs_pcb *pcb;
 	int error;
 
 	error = hvsock_open_channel(chan, so);
 	if (error) {
 		so->so_error = error;
 		return;
 	}
 
 	pcb = so->so_pcb;
 	pcb->chan = chan;
 	pcb->recv_data_len = 0;
 	pcb->recv_data_off = 0;
 	pcb->rb_init = false;
 
 	mtx_lock(&hvs_trans_socks_mtx);
 	__hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
 	__hvs_insert_socket_on_list(so, HVS_LIST_CONNECTED);
 	mtx_unlock(&hvs_trans_socks_mtx);
 
 	/*
 	 * Change the socket state to SS_ISCONNECTED. This will wake up
 	 * the thread sleeping in connect call.
 	 */
 	soisconnected(so);
 }
 
 static void
 hvsock_open_connection(struct vmbus_channel *chan, struct hvsock_sc *sc)
 {
 	struct hyperv_guid *inst_guid, *type_guid;
 	bool conn_from_host;
 	struct sockaddr_hvs addr;
 	struct socket *so;
 	struct hvs_pcb *pcb;
 
 	type_guid = (struct hyperv_guid *) vmbus_chan_guid_type(chan);
 	inst_guid = (struct hyperv_guid *) vmbus_chan_guid_inst(chan);
 	conn_from_host = vmbus_chan_is_hvs_conn_from_host(chan);
 
 	HVSOCK_DBG(HVSOCK_DBG_INFO, "type_guid is ");
 	hvsock_print_guid(type_guid);
 	HVSOCK_DBG(HVSOCK_DBG_INFO, "inst_guid is ");
 	hvsock_print_guid(inst_guid);
 	HVSOCK_DBG(HVSOCK_DBG_INFO, "connection %s host\n",
 	    (conn_from_host == true ) ? "from" : "to");
 
 	/*
 	 * The listening port should be in [0, MAX_LISTEN_PORT]
 	 */
 	if (!is_valid_srv_id(type_guid))
 		return;
 
 	/*
 	 * There should be a bound socket already created no matter
 	 * it is a passive or active connection.
 	 * For host initiated connection (passive on guest side),
 	 * the  type_guid contains the port which guest is bound and
 	 * listening.
 	 * For the guest initiated connection (active on guest side),
 	 * the inst_guid contains the port that guest has auto bound
 	 * to.
 	 */
 	hvs_addr_init(&addr, conn_from_host ? type_guid : inst_guid);
 	so = hvs_find_socket_on_list(&addr, HVS_LIST_BOUND);
 	if (!so) {
 		HVSOCK_DBG(HVSOCK_DBG_ERR,
 		    "%s: no bound socket found for port %u\n",
 		    __func__, addr.hvs_port);
 		return;
 	}
 
 	if (conn_from_host) {
 		hvsock_open_conn_passive(chan, so, sc);
 	} else {
 		(void) hvs_trans_lock();
 		pcb = so->so_pcb;
 		if (pcb && pcb->so) {
 			sc->pcb = so2hvspcb(so);
 			hvsock_open_conn_active(chan, so);
 		} else {
 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 			    "%s: channel detached before open\n", __func__);
 		}
 		hvs_trans_unlock();
 	}
 
 }
 
 static int
 hvsock_probe(device_t dev)
 {
 	struct vmbus_channel *channel = vmbus_get_channel(dev);
 
 	if (!channel || !vmbus_chan_is_hvs(channel)) {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "hvsock_probe called but not a hvsock channel id %u\n",
 		    vmbus_chan_id(channel));
 
 		return ENXIO;
 	} else {
 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 		    "hvsock_probe got a hvsock channel id %u\n",
 		    vmbus_chan_id(channel));
 
 		return BUS_PROBE_DEFAULT;
 	}
 }
 
 static int
 hvsock_attach(device_t dev)
 {
 	struct vmbus_channel *channel = vmbus_get_channel(dev);
 	struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_attach called.\n");
 
 	hvsock_open_connection(channel, sc);
 
 	/*
 	 * Always return success. On error the host will rescind the device
 	 * in 30 seconds and we can do cleanup at that time in
 	 * vmbus_chan_msgproc_chrescind().
 	 */
 	return (0);
 }
 
 static int
 hvsock_detach(device_t dev)
 {
 	struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
 	struct socket *so;
 	int error, retry;
 
 	if (bootverbose)
 		device_printf(dev, "hvsock_detach called.\n");
 
 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_detach called.\n");
 
 	if (sc->pcb != NULL) {
 		(void) hvs_trans_lock();
 
 		so = hsvpcb2so(sc->pcb);
 		if (so) {
 			/* Close the connection */
 			if (so->so_state &
 			    (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
 				soisdisconnected(so);
 		}
 
 		mtx_lock(&hvs_trans_socks_mtx);
 		__hvs_remove_pcb_from_list(sc->pcb,
 		    HVS_LIST_BOUND | HVS_LIST_CONNECTED);
 		mtx_unlock(&hvs_trans_socks_mtx);
 
 		/*
 		 * Close channel while no reader and sender are working
 		 * on the buffer rings.
 		 */
 		if (so) {
 			retry = 0;
 			while ((error = sblock(&so->so_rcv, 0)) ==
 			    EWOULDBLOCK) {
 				/*
 				 * Someone is reading, rx br is busy
 				 */
 				soisdisconnected(so);
 				DELAY(500);
 				HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 				    "waiting for rx reader to exit, "
 				    "retry = %d\n", retry++);
 			}
 			retry = 0;
 			while ((error = sblock(&so->so_snd, 0)) ==
 			    EWOULDBLOCK) {
 				/*
 				 * Someone is sending, tx br is busy
 				 */
 				soisdisconnected(so);
 				DELAY(500);
 				HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
 				    "waiting for tx sender to exit, "
 				    "retry = %d\n", retry++);
 			}
 		}
 
 
 		bzero(sc->pcb, sizeof(struct hvs_pcb));
 		free(sc->pcb, M_HVSOCK);
 		sc->pcb = NULL;
 
 		if (so) {
 			sbunlock(&so->so_rcv);
 			sbunlock(&so->so_snd);
 			so->so_pcb = NULL;
 		}
 
 		hvs_trans_unlock();
 	}
 
 	vmbus_chan_close(vmbus_get_channel(dev));
 
 	return (0);
 }
 
 static device_method_t hvsock_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, hvsock_probe),
 	DEVMETHOD(device_attach, hvsock_attach),
 	DEVMETHOD(device_detach, hvsock_detach),
 	DEVMETHOD_END
 };
 
 static driver_t hvsock_driver = {
 	"hv_sock",
 	hvsock_methods,
 	sizeof(struct hvsock_sc)
 };
 
 static devclass_t hvsock_devclass;
 
 DRIVER_MODULE(hvsock, vmbus, hvsock_driver, hvsock_devclass, NULL, NULL);
 MODULE_VERSION(hvsock, 1);
 MODULE_DEPEND(hvsock, vmbus, 1, 1, 1);
diff --git a/sys/kern/uipc_accf.c b/sys/kern/uipc_accf.c
index 3ca64dd21e25..792c53c7baff 100644
--- a/sys/kern/uipc_accf.c
+++ b/sys/kern/uipc_accf.c
@@ -1,310 +1,309 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2000 Paycounter, Inc.
  * Copyright (c) 2005 Robert N. M. Watson
  * Author: Alfred Perlstein <alfred@paycounter.com>, <alfred@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define ACCEPT_FILTER_MOD
 
 #include "opt_param.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/sysctl.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/queue.h>
 
 static struct mtx accept_filter_mtx;
 MTX_SYSINIT(accept_filter, &accept_filter_mtx, "accept_filter_mtx",
 	MTX_DEF);
 #define	ACCEPT_FILTER_LOCK()	mtx_lock(&accept_filter_mtx)
 #define	ACCEPT_FILTER_UNLOCK()	mtx_unlock(&accept_filter_mtx)
 
 static SLIST_HEAD(, accept_filter) accept_filtlsthd =
 	SLIST_HEAD_INITIALIZER(accept_filtlsthd);
 
 MALLOC_DEFINE(M_ACCF, "accf", "accept filter data");
 
 static int unloadable = 0;
 
 SYSCTL_NODE(_net, OID_AUTO, accf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Accept filters");
 SYSCTL_INT(_net_accf, OID_AUTO, unloadable, CTLFLAG_RW, &unloadable, 0,
 	"Allow unload of accept filters (not recommended)");
 
 /*
  * Must be passed a malloc'd structure so we don't explode if the kld is
  * unloaded, we leak the struct on deallocation to deal with this, but if a
  * filter is loaded with the same name as a leaked one we re-use the entry.
  */
 int
 accept_filt_add(struct accept_filter *filt)
 {
 	struct accept_filter *p;
 
 	ACCEPT_FILTER_LOCK();
 	SLIST_FOREACH(p, &accept_filtlsthd, accf_next)
 		if (strcmp(p->accf_name, filt->accf_name) == 0)  {
 			if (p->accf_callback != NULL) {
 				ACCEPT_FILTER_UNLOCK();
 				return (EEXIST);
 			} else {
 				p->accf_callback = filt->accf_callback;
 				ACCEPT_FILTER_UNLOCK();
 				free(filt, M_ACCF);
 				return (0);
 			}
 		}
 				
 	if (p == NULL)
 		SLIST_INSERT_HEAD(&accept_filtlsthd, filt, accf_next);
 	ACCEPT_FILTER_UNLOCK();
 	return (0);
 }
 
 int
 accept_filt_del(char *name)
 {
 	struct accept_filter *p;
 
 	p = accept_filt_get(name);
 	if (p == NULL)
 		return (ENOENT);
 
 	p->accf_callback = NULL;
 	return (0);
 }
 
 struct accept_filter *
 accept_filt_get(char *name)
 {
 	struct accept_filter *p;
 
 	ACCEPT_FILTER_LOCK();
 	SLIST_FOREACH(p, &accept_filtlsthd, accf_next)
 		if (strcmp(p->accf_name, name) == 0)
 			break;
 	ACCEPT_FILTER_UNLOCK();
 
 	return (p);
 }
 
 int
 accept_filt_generic_mod_event(module_t mod, int event, void *data)
 {
 	struct accept_filter *p;
 	struct accept_filter *accfp = (struct accept_filter *) data;
 	int error;
 
 	switch (event) {
 	case MOD_LOAD:
 		p = malloc(sizeof(*p), M_ACCF, M_WAITOK);
 		bcopy(accfp, p, sizeof(*p));
 		error = accept_filt_add(p);
 		break;
 
 	case MOD_UNLOAD:
 		/*
 		 * Do not support unloading yet. we don't keep track of
 		 * refcounts and unloading an accept filter callback and then
 		 * having it called is a bad thing.  A simple fix would be to
 		 * track the refcount in the struct accept_filter.
 		 */
 		if (unloadable != 0) {
 			error = accept_filt_del(accfp->accf_name);
 		} else
 			error = EOPNOTSUPP;
 		break;
 
 	case MOD_SHUTDOWN:
 		error = 0;
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 int
 accept_filt_getopt(struct socket *so, struct sockopt *sopt)
 {
 	struct accept_filter_arg *afap;
 	int error;
 
 	error = 0;
 	afap = malloc(sizeof(*afap), M_TEMP, M_WAITOK | M_ZERO);
 	SOCK_LOCK(so);
-	if ((so->so_options & SO_ACCEPTCONN) == 0) {
+	if (!SOLISTENING(so)) {
 		error = EINVAL;
 		goto out;
 	}
 	if (so->sol_accept_filter == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 	strcpy(afap->af_name, so->sol_accept_filter->accf_name);
 	if (so->sol_accept_filter_str != NULL)
 		strcpy(afap->af_arg, so->sol_accept_filter_str);
 out:
 	SOCK_UNLOCK(so);
 	if (error == 0)
 		error = sooptcopyout(sopt, afap, sizeof(*afap));
 	free(afap, M_TEMP);
 	return (error);
 }
 
 int
 accept_filt_setopt(struct socket *so, struct sockopt *sopt)
 {
 	struct accept_filter_arg *afap;
 	struct accept_filter *afp;
 	char *accept_filter_str = NULL;
 	void *accept_filter_arg = NULL;
 	int error;
 
 	/*
 	 * Handle the simple delete case first.
 	 */
 	if (sopt == NULL || sopt->sopt_val == NULL) {
 		struct socket *sp, *sp1;
 		int wakeup;
 
 		SOCK_LOCK(so);
-		if ((so->so_options & SO_ACCEPTCONN) == 0) {
+		if (!SOLISTENING(so)) {
 			SOCK_UNLOCK(so);
 			return (EINVAL);
 		}
 		if (so->sol_accept_filter == NULL) {
 			SOCK_UNLOCK(so);
 			return (0);
 		}
 		if (so->sol_accept_filter->accf_destroy != NULL)
 			so->sol_accept_filter->accf_destroy(so);
 		if (so->sol_accept_filter_str != NULL)
 			free(so->sol_accept_filter_str, M_ACCF);
 		so->sol_accept_filter = NULL;
 		so->sol_accept_filter_arg = NULL;
 		so->sol_accept_filter_str = NULL;
 		so->so_options &= ~SO_ACCEPTFILTER;
 
 		/*
 		 * Move from incomplete queue to complete only those
 		 * connections, that are blocked by us.
 		 */
 		wakeup = 0;
 		TAILQ_FOREACH_SAFE(sp, &so->sol_incomp, so_list, sp1) {
 			SOCK_LOCK(sp);
 			if (sp->so_options & SO_ACCEPTFILTER) {
 				TAILQ_REMOVE(&so->sol_incomp, sp, so_list);
 				TAILQ_INSERT_TAIL(&so->sol_comp, sp, so_list);
 				sp->so_qstate = SQ_COMP;
 				sp->so_options &= ~SO_ACCEPTFILTER;
 				so->sol_incqlen--;
 				so->sol_qlen++;
 				wakeup = 1;
 			}
 			SOCK_UNLOCK(sp);
 		}
 		if (wakeup)
 			solisten_wakeup(so);  /* unlocks */
 		else
 			SOLISTEN_UNLOCK(so);
 		return (0);
 	}
 
 	/*
 	 * Pre-allocate any memory we may need later to avoid blocking at
 	 * untimely moments.  This does not optimize for invalid arguments.
 	 */
 	afap = malloc(sizeof(*afap), M_TEMP, M_WAITOK);
 	error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap);
 	afap->af_name[sizeof(afap->af_name)-1] = '\0';
 	afap->af_arg[sizeof(afap->af_arg)-1] = '\0';
 	if (error) {
 		free(afap, M_TEMP);
 		return (error);
 	}
 	afp = accept_filt_get(afap->af_name);
 	if (afp == NULL) {
 		free(afap, M_TEMP);
 		return (ENOENT);
 	}
 	if (afp->accf_create != NULL && afap->af_name[0] != '\0') {
 		size_t len = strlen(afap->af_name) + 1;
 		accept_filter_str = malloc(len, M_ACCF, M_WAITOK);
 		strcpy(accept_filter_str, afap->af_name);
 	}
 
 	/*
 	 * Require a listen socket; don't try to replace an existing filter
 	 * without first removing it.
 	 */
 	SOCK_LOCK(so);
-	if ((so->so_options & SO_ACCEPTCONN) == 0 ||
-	    so->sol_accept_filter != NULL) {
+	if (!SOLISTENING(so) || so->sol_accept_filter != NULL) {
 		error = EINVAL;
 		goto out;
 	}
 
 	/*
 	 * Invoke the accf_create() method of the filter if required.  The
 	 * socket mutex is held over this call, so create methods for filters
 	 * can't block.
 	 */
 	if (afp->accf_create != NULL) {
 		accept_filter_arg = afp->accf_create(so, afap->af_arg);
 		if (accept_filter_arg == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 	}
 	so->sol_accept_filter = afp;
 	so->sol_accept_filter_arg = accept_filter_arg;
 	so->sol_accept_filter_str = accept_filter_str;
 	accept_filter_str = NULL;
 	so->so_options |= SO_ACCEPTFILTER;
 out:
 	SOCK_UNLOCK(so);
 	if (accept_filter_str != NULL)
 		free(accept_filter_str, M_ACCF);
 	free(afap, M_TEMP);
 	return (error);
 }
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 94d7782b5f0a..46d9cb8f3a90 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1,4421 +1,4421 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2004 The FreeBSD Foundation
  * Copyright (c) 2004-2008 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
  */
 
 /*
  * Comments on the socket life cycle:
  *
  * soalloc() sets of socket layer state for a socket, called only by
  * socreate() and sonewconn().  Socket layer private.
  *
  * sodealloc() tears down socket layer state for a socket, called only by
  * sofree() and sonewconn().  Socket layer private.
  *
  * pru_attach() associates protocol layer state with an allocated socket;
  * called only once, may fail, aborting socket allocation.  This is called
  * from socreate() and sonewconn().  Socket layer private.
  *
  * pru_detach() disassociates protocol layer state from an attached socket,
  * and will be called exactly once for sockets in which pru_attach() has
  * been successfully called.  If pru_attach() returned an error,
  * pru_detach() will not be called.  Socket layer private.
  *
  * pru_abort() and pru_close() notify the protocol layer that the last
  * consumer of a socket is starting to tear down the socket, and that the
  * protocol should terminate the connection.  Historically, pru_abort() also
  * detached protocol state from the socket state, but this is no longer the
  * case.
  *
  * socreate() creates a socket and attaches protocol state.  This is a public
  * interface that may be used by socket layer consumers to create new
  * sockets.
  *
  * sonewconn() creates a socket and attaches protocol state.  This is a
  * public interface  that may be used by protocols to create new sockets when
  * a new connection is received and will be available for accept() on a
  * listen socket.
  *
  * soclose() destroys a socket after possibly waiting for it to disconnect.
  * This is a public interface that socket consumers should use to close and
  * release a socket when done with it.
  *
  * soabort() destroys a socket without waiting for it to disconnect (used
  * only for incoming connections that are already partially or fully
  * connected).  This is used internally by the socket layer when clearing
  * listen socket queues (due to overflow or close on the listen socket), but
  * is also a public interface protocols may use to abort connections in
  * their incomplete listen queues should they no longer be required.  Sockets
  * placed in completed connection listen queues should not be aborted for
  * reasons described in the comment above the soclose() implementation.  This
  * is not a general purpose close routine, and except in the specific
  * circumstances described here, should not be used.
  *
  * sofree() will free a socket and its protocol state if all references on
  * the socket have been released, and is the public interface to attempt to
  * free a socket when a reference is removed.  This is a socket layer private
  * interface.
  *
  * NOTE: In addition to socreate() and soclose(), which provide a single
  * socket reference to the consumer to be managed as required, there are two
  * calls to explicitly manage socket references, soref(), and sorele().
  * Currently, these are generally required only when transitioning a socket
  * from a listen queue to a file descriptor, in order to prevent garbage
  * collection of the socket at an untimely moment.  For a number of reasons,
  * these interfaces are not preferred, and should be avoided.
  *
  * NOTE: With regard to VNETs the general rule is that callers do not set
  * curvnet. Exceptions to this rule include soabort(), sodisconnect(),
  * sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
  * and sorflush(), which are usually called from a pre-set VNET context.
  * sopoll() currently does not need a VNET context to be set.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_kern_tls.h"
 #include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/fcntl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/domain.h>
 #include <sys/file.h>			/* for struct knote */
 #include <sys/hhook.h>
 #include <sys/kernel.h>
 #include <sys/khelp.h>
 #include <sys/ktls.h>
 #include <sys/event.h>
 #include <sys/eventhandler.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/resourcevar.h>
 #include <net/route.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/jail.h>
 #include <sys/syslog.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp.h>
 
 #include <net/vnet.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <sys/sysent.h>
 #include <compat/freebsd32/freebsd32.h>
 #endif
 
 static int	soreceive_rcvoob(struct socket *so, struct uio *uio,
 		    int flags);
 static void	so_rdknl_lock(void *);
 static void	so_rdknl_unlock(void *);
 static void	so_rdknl_assert_lock(void *, int);
 static void	so_wrknl_lock(void *);
 static void	so_wrknl_unlock(void *);
 static void	so_wrknl_assert_lock(void *, int);
 
 static void	filt_sordetach(struct knote *kn);
 static int	filt_soread(struct knote *kn, long hint);
 static void	filt_sowdetach(struct knote *kn);
 static int	filt_sowrite(struct knote *kn, long hint);
 static int	filt_soempty(struct knote *kn, long hint);
 static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
 fo_kqfilter_t	soo_kqfilter;
 
 static struct filterops soread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sordetach,
 	.f_event = filt_soread,
 };
 static struct filterops sowrite_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sowdetach,
 	.f_event = filt_sowrite,
 };
 static struct filterops soempty_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sowdetach,
 	.f_event = filt_soempty,
 };
 
 so_gen_t	so_gencnt;	/* generation count for sockets */
 
 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
 
 #define	VNET_SO_ASSERT(so)						\
 	VNET_ASSERT(curvnet != NULL,					\
 	    ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
 
 VNET_DEFINE(struct hhook_head *, socket_hhh[HHOOK_SOCKET_LAST + 1]);
 #define	V_socket_hhh		VNET(socket_hhh)
 
 /*
  * Limit on the number of connections in the listen queue waiting
  * for accept(2).
  * NB: The original sysctl somaxconn is still available but hidden
  * to prevent confusion about the actual purpose of this number.
  */
 static u_int somaxconn = SOMAXCONN;
 
 static int
 sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int val;
 
 	val = somaxconn;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr )
 		return (error);
 
 	/*
 	 * The purpose of the UINT_MAX / 3 limit, is so that the formula
 	 *   3 * so_qlimit / 2
 	 * below, will not overflow.
          */
 
 	if (val < 1 || val > UINT_MAX / 3)
 		return (EINVAL);
 
 	somaxconn = val;
 	return (0);
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, soacceptqueue,
     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 0, sizeof(int),
     sysctl_somaxconn, "I",
     "Maximum listen socket pending connection accept queue size");
 SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn,
     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP | CTLFLAG_NEEDGIANT, 0,
     sizeof(int), sysctl_somaxconn, "I",
     "Maximum listen socket pending connection accept queue size (compat)");
 
 static int numopensockets;
 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
     &numopensockets, 0, "Number of open sockets");
 
 /*
  * accept_mtx locks down per-socket fields relating to accept queues.  See
  * socketvar.h for an annotation of the protected fields of struct socket.
  */
 struct mtx accept_mtx;
 MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF);
 
 /*
  * so_global_mtx protects so_gencnt, numopensockets, and the per-socket
  * so_gencnt field.
  */
 static struct mtx so_global_mtx;
 MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF);
 
 /*
  * General IPC sysctl name space, used by sockets and a variety of other IPC
  * types.
  */
 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IPC");
 
 /*
  * Initialize the socket subsystem and set up the socket
  * memory allocator.
  */
 static uma_zone_t socket_zone;
 int	maxsockets;
 
 static void
 socket_zone_change(void *tag)
 {
 
 	maxsockets = uma_zone_set_max(socket_zone, maxsockets);
 }
 
 static void
 socket_hhook_register(int subtype)
 {
 
 	if (hhook_head_register(HHOOK_TYPE_SOCKET, subtype,
 	    &V_socket_hhh[subtype],
 	    HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register hook\n", __func__);
 }
 
 static void
 socket_hhook_deregister(int subtype)
 {
 
 	if (hhook_head_deregister(V_socket_hhh[subtype]) != 0)
 		printf("%s: WARNING: unable to deregister hook\n", __func__);
 }
 
 static void
 socket_init(void *tag)
 {
 
 	socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	maxsockets = uma_zone_set_max(socket_zone, maxsockets);
 	uma_zone_set_warning(socket_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
 	    EVENTHANDLER_PRI_FIRST);
 }
 SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL);
 
 static void
 socket_vnet_init(const void *unused __unused)
 {
 	int i;
 
 	/* We expect a contiguous range */
 	for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
 		socket_hhook_register(i);
 }
 VNET_SYSINIT(socket_vnet_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
     socket_vnet_init, NULL);
 
 static void
 socket_vnet_uninit(const void *unused __unused)
 {
 	int i;
 
 	for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
 		socket_hhook_deregister(i);
 }
 VNET_SYSUNINIT(socket_vnet_uninit, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
     socket_vnet_uninit, NULL);
 
 /*
  * Initialise maxsockets.  This SYSINIT must be run after
  * tunable_mbinit().
  */
 static void
 init_maxsockets(void *ignored)
 {
 
 	TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
 	maxsockets = imax(maxsockets, maxfiles);
 }
 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
 
 /*
  * Sysctl to get and set the maximum global sockets limit.  Notify protocols
  * of the change so that they can update their dependent limits as required.
  */
 static int
 sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
 {
 	int error, newmaxsockets;
 
 	newmaxsockets = maxsockets;
 	error = sysctl_handle_int(oidp, &newmaxsockets, 0, req);
 	if (error == 0 && req->newptr) {
 		if (newmaxsockets > maxsockets &&
 		    newmaxsockets <= maxfiles) {
 			maxsockets = newmaxsockets;
 			EVENTHANDLER_INVOKE(maxsockets_change);
 		} else
 			error = EINVAL;
 	}
 	return (error);
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &maxsockets, 0,
     sysctl_maxsockets, "IU",
     "Maximum number of sockets available");
 
 /*
  * Socket operation routines.  These routines are called by the routines in
  * sys_socket.c or from a system process, and implement the semantics of
  * socket operations by switching out to the protocol specific routines.
  */
 
 /*
  * Get a socket structure from our zone, and initialize it.  Note that it
  * would probably be better to allocate socket and PCB at the same time, but
  * I'm not convinced that all the protocols can be easily modified to do
  * this.
  *
  * soalloc() returns a socket with a ref count of 0.
  */
 static struct socket *
 soalloc(struct vnet *vnet)
 {
 	struct socket *so;
 
 	so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO);
 	if (so == NULL)
 		return (NULL);
 #ifdef MAC
 	if (mac_socket_init(so, M_NOWAIT) != 0) {
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 #endif
 	if (khelp_init_osd(HELPER_CLASS_SOCKET, &so->osd)) {
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 
 	/*
 	 * The socket locking protocol allows to lock 2 sockets at a time,
 	 * however, the first one must be a listening socket.  WITNESS lacks
 	 * a feature to change class of an existing lock, so we use DUPOK.
 	 */
 	mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
 	SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
 	SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
 	so->so_rcv.sb_sel = &so->so_rdsel;
 	so->so_snd.sb_sel = &so->so_wrsel;
 	sx_init(&so->so_snd.sb_sx, "so_snd_sx");
 	sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
 	TAILQ_INIT(&so->so_snd.sb_aiojobq);
 	TAILQ_INIT(&so->so_rcv.sb_aiojobq);
 	TASK_INIT(&so->so_snd.sb_aiotask, 0, soaio_snd, so);
 	TASK_INIT(&so->so_rcv.sb_aiotask, 0, soaio_rcv, so);
 #ifdef VIMAGE
 	VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p",
 	    __func__, __LINE__, so));
 	so->so_vnet = vnet;
 #endif
 	/* We shouldn't need the so_global_mtx */
 	if (hhook_run_socket(so, NULL, HHOOK_SOCKET_CREATE)) {
 		/* Do we need more comprehensive error returns? */
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 	mtx_lock(&so_global_mtx);
 	so->so_gencnt = ++so_gencnt;
 	++numopensockets;
 #ifdef VIMAGE
 	vnet->vnet_sockcnt++;
 #endif
 	mtx_unlock(&so_global_mtx);
 
 	return (so);
 }
 
 /*
  * Free the storage associated with a socket at the socket layer, tear down
  * locks, labels, etc.  All protocol state is assumed already to have been
  * torn down (and possibly never set up) by the caller.
  */
 static void
 sodealloc(struct socket *so)
 {
 
 	KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count));
 	KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL"));
 
 	mtx_lock(&so_global_mtx);
 	so->so_gencnt = ++so_gencnt;
 	--numopensockets;	/* Could be below, but faster here. */
 #ifdef VIMAGE
 	VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p",
 	    __func__, __LINE__, so));
 	so->so_vnet->vnet_sockcnt--;
 #endif
 	mtx_unlock(&so_global_mtx);
 #ifdef MAC
 	mac_socket_destroy(so);
 #endif
 	hhook_run_socket(so, NULL, HHOOK_SOCKET_CLOSE);
 
 	khelp_destroy_osd(&so->osd);
 	if (SOLISTENING(so)) {
 		if (so->sol_accept_filter != NULL)
 			accept_filt_setopt(so, NULL);
 	} else {
 		if (so->so_rcv.sb_hiwat)
 			(void)chgsbsize(so->so_cred->cr_uidinfo,
 			    &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
 		if (so->so_snd.sb_hiwat)
 			(void)chgsbsize(so->so_cred->cr_uidinfo,
 			    &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
 		sx_destroy(&so->so_snd.sb_sx);
 		sx_destroy(&so->so_rcv.sb_sx);
 		SOCKBUF_LOCK_DESTROY(&so->so_snd);
 		SOCKBUF_LOCK_DESTROY(&so->so_rcv);
 	}
 	crfree(so->so_cred);
 	mtx_destroy(&so->so_lock);
 	uma_zfree(socket_zone, so);
 }
 
 /*
  * socreate returns a socket with a ref count of 1.  The socket should be
  * closed with soclose().
  */
 int
 socreate(int dom, struct socket **aso, int type, int proto,
     struct ucred *cred, struct thread *td)
 {
 	struct protosw *prp;
 	struct socket *so;
 	int error;
 
 	if (proto)
 		prp = pffindproto(dom, proto, type);
 	else
 		prp = pffindtype(dom, type);
 
 	if (prp == NULL) {
 		/* No support for domain. */
 		if (pffinddomain(dom) == NULL)
 			return (EAFNOSUPPORT);
 		/* No support for socket type. */
 		if (proto == 0 && type != 0)
 			return (EPROTOTYPE);
 		return (EPROTONOSUPPORT);
 	}
 	if (prp->pr_usrreqs->pru_attach == NULL ||
 	    prp->pr_usrreqs->pru_attach == pru_attach_notsupp)
 		return (EPROTONOSUPPORT);
 
 	if (IN_CAPABILITY_MODE(td) && (prp->pr_flags & PR_CAPATTACH) == 0)
 		return (ECAPMODE);
 
 	if (prison_check_af(cred, prp->pr_domain->dom_family) != 0)
 		return (EPROTONOSUPPORT);
 
 	if (prp->pr_type != type)
 		return (EPROTOTYPE);
 	so = soalloc(CRED_TO_VNET(cred));
 	if (so == NULL)
 		return (ENOBUFS);
 
 	so->so_type = type;
 	so->so_cred = crhold(cred);
 	if ((prp->pr_domain->dom_family == PF_INET) ||
 	    (prp->pr_domain->dom_family == PF_INET6) ||
 	    (prp->pr_domain->dom_family == PF_ROUTE))
 		so->so_fibnum = td->td_proc->p_fibnum;
 	else
 		so->so_fibnum = 0;
 	so->so_proto = prp;
 #ifdef MAC
 	mac_socket_create(cred, so);
 #endif
 	knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
 	    so_rdknl_assert_lock);
 	knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
 	    so_wrknl_assert_lock);
 	/*
 	 * Auto-sizing of socket buffers is managed by the protocols and
 	 * the appropriate flags must be set in the pru_attach function.
 	 */
 	CURVNET_SET(so->so_vnet);
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
 	CURVNET_RESTORE();
 	if (error) {
 		sodealloc(so);
 		return (error);
 	}
 	soref(so);
 	*aso = so;
 	return (0);
 }
 
 #ifdef REGRESSION
 static int regression_sonewconn_earlytest = 1;
 SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW,
     &regression_sonewconn_earlytest, 0, "Perform early sonewconn limit test");
 #endif
 
 static struct timeval overinterval = { 60, 0 };
 SYSCTL_TIMEVAL_SEC(_kern_ipc, OID_AUTO, sooverinterval, CTLFLAG_RW,
     &overinterval,
     "Delay in seconds between warnings for listen socket overflows");
 
 /*
  * When an attempt at a new connection is noted on a socket which accepts
  * connections, sonewconn is called.  If the connection is possible (subject
  * to space constraints, etc.) then we allocate a new structure, properly
  * linked into the data structure of the original socket, and return this.
  * Connstatus may be 0, or SS_ISCONFIRMING, or SS_ISCONNECTED.
  *
  * Note: the ref count on the socket is 0 on return.
  */
 struct socket *
 sonewconn(struct socket *head, int connstatus)
 {
 	struct sbuf descrsb;
 	struct socket *so;
 	int len, overcount;
 	u_int qlen;
 	const char localprefix[] = "local:";
 	char descrbuf[SUNPATHLEN + sizeof(localprefix)];
 #if defined(INET6)
 	char addrbuf[INET6_ADDRSTRLEN];
 #elif defined(INET)
 	char addrbuf[INET_ADDRSTRLEN];
 #endif
 	bool dolog, over;
 
 	SOLISTEN_LOCK(head);
 	over = (head->sol_qlen > 3 * head->sol_qlimit / 2);
 #ifdef REGRESSION
 	if (regression_sonewconn_earlytest && over) {
 #else
 	if (over) {
 #endif
 		head->sol_overcount++;
 		dolog = !!ratecheck(&head->sol_lastover, &overinterval);
 
 		/*
 		 * If we're going to log, copy the overflow count and queue
 		 * length from the listen socket before dropping the lock.
 		 * Also, reset the overflow count.
 		 */
 		if (dolog) {
 			overcount = head->sol_overcount;
 			head->sol_overcount = 0;
 			qlen = head->sol_qlen;
 		}
 		SOLISTEN_UNLOCK(head);
 
 		if (dolog) {
 			/*
 			 * Try to print something descriptive about the
 			 * socket for the error message.
 			 */
 			sbuf_new(&descrsb, descrbuf, sizeof(descrbuf),
 			    SBUF_FIXEDLEN);
 			switch (head->so_proto->pr_domain->dom_family) {
 #if defined(INET) || defined(INET6)
 #ifdef INET
 			case AF_INET:
 #endif
 #ifdef INET6
 			case AF_INET6:
 				if (head->so_proto->pr_domain->dom_family ==
 				    AF_INET6 ||
 				    (sotoinpcb(head)->inp_inc.inc_flags &
 				    INC_ISIPV6)) {
 					ip6_sprintf(addrbuf,
 					    &sotoinpcb(head)->inp_inc.inc6_laddr);
 					sbuf_printf(&descrsb, "[%s]", addrbuf);
 				} else
 #endif
 				{
 #ifdef INET
 					inet_ntoa_r(
 					    sotoinpcb(head)->inp_inc.inc_laddr,
 					    addrbuf);
 					sbuf_cat(&descrsb, addrbuf);
 #endif
 				}
 				sbuf_printf(&descrsb, ":%hu (proto %u)",
 				    ntohs(sotoinpcb(head)->inp_inc.inc_lport),
 				    head->so_proto->pr_protocol);
 				break;
 #endif /* INET || INET6 */
 			case AF_UNIX:
 				sbuf_cat(&descrsb, localprefix);
 				if (sotounpcb(head)->unp_addr != NULL)
 					len =
 					    sotounpcb(head)->unp_addr->sun_len -
 					    offsetof(struct sockaddr_un,
 					    sun_path);
 				else
 					len = 0;
 				if (len > 0)
 					sbuf_bcat(&descrsb,
 					    sotounpcb(head)->unp_addr->sun_path,
 					    len);
 				else
 					sbuf_cat(&descrsb, "(unknown)");
 				break;
 			}
 
 			/*
 			 * If we can't print something more specific, at least
 			 * print the domain name.
 			 */
 			if (sbuf_finish(&descrsb) != 0 ||
 			    sbuf_len(&descrsb) <= 0) {
 				sbuf_clear(&descrsb);
 				sbuf_cat(&descrsb,
 				    head->so_proto->pr_domain->dom_name ?:
 				    "unknown");
 				sbuf_finish(&descrsb);
 			}
 			KASSERT(sbuf_len(&descrsb) > 0,
 			    ("%s: sbuf creation failed", __func__));
 			log(LOG_DEBUG,
 			    "%s: pcb %p (%s): Listen queue overflow: "
 			    "%i already in queue awaiting acceptance "
 			    "(%d occurrences)\n",
 			    __func__, head->so_pcb, sbuf_data(&descrsb),
 			    qlen, overcount);
 			sbuf_delete(&descrsb);
 
 			overcount = 0;
 		}
 
 		return (NULL);
 	}
 	SOLISTEN_UNLOCK(head);
 	VNET_ASSERT(head->so_vnet != NULL, ("%s: so %p vnet is NULL",
 	    __func__, head));
 	so = soalloc(head->so_vnet);
 	if (so == NULL) {
 		log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
 		    "limit reached or out of memory\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_listen = head;
 	so->so_type = head->so_type;
 	so->so_options = head->so_options & ~SO_ACCEPTCONN;
 	so->so_linger = head->so_linger;
 	so->so_state = head->so_state | SS_NOFDREF;
 	so->so_fibnum = head->so_fibnum;
 	so->so_proto = head->so_proto;
 	so->so_cred = crhold(head->so_cred);
 #ifdef MAC
 	mac_socket_newconn(head, so);
 #endif
 	knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
 	    so_rdknl_assert_lock);
 	knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
 	    so_wrknl_assert_lock);
 	VNET_SO_ASSERT(head);
 	if (soreserve(so, head->sol_sbsnd_hiwat, head->sol_sbrcv_hiwat)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_rcv.sb_lowat = head->sol_sbrcv_lowat;
 	so->so_snd.sb_lowat = head->sol_sbsnd_lowat;
 	so->so_rcv.sb_timeo = head->sol_sbrcv_timeo;
 	so->so_snd.sb_timeo = head->sol_sbsnd_timeo;
 	so->so_rcv.sb_flags |= head->sol_sbrcv_flags & SB_AUTOSIZE;
 	so->so_snd.sb_flags |= head->sol_sbsnd_flags & SB_AUTOSIZE;
 
 	SOLISTEN_LOCK(head);
 	if (head->sol_accept_filter != NULL)
 		connstatus = 0;
 	so->so_state |= connstatus;
 	soref(head); /* A socket on (in)complete queue refs head. */
 	if (connstatus) {
 		TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
 		so->so_qstate = SQ_COMP;
 		head->sol_qlen++;
 		solisten_wakeup(head);	/* unlocks */
 	} else {
 		/*
 		 * Keep removing sockets from the head until there's room for
 		 * us to insert on the tail.  In pre-locking revisions, this
 		 * was a simple if(), but as we could be racing with other
 		 * threads and soabort() requires dropping locks, we must
 		 * loop waiting for the condition to be true.
 		 */
 		while (head->sol_incqlen > head->sol_qlimit) {
 			struct socket *sp;
 
 			sp = TAILQ_FIRST(&head->sol_incomp);
 			TAILQ_REMOVE(&head->sol_incomp, sp, so_list);
 			head->sol_incqlen--;
 			SOCK_LOCK(sp);
 			sp->so_qstate = SQ_NONE;
 			sp->so_listen = NULL;
 			SOCK_UNLOCK(sp);
 			sorele(head);	/* does SOLISTEN_UNLOCK, head stays */
 			soabort(sp);
 			SOLISTEN_LOCK(head);
 		}
 		TAILQ_INSERT_TAIL(&head->sol_incomp, so, so_list);
 		so->so_qstate = SQ_INCOMP;
 		head->sol_incqlen++;
 		SOLISTEN_UNLOCK(head);
 	}
 	return (so);
 }
 
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 /*
  * Socket part of sctp_peeloff().  Detach a new socket from an
  * association.  The new socket is returned with a reference.
  */
 struct socket *
 sopeeloff(struct socket *head)
 {
 	struct socket *so;
 
 	VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
 	    __func__, __LINE__, head));
 	so = soalloc(head->so_vnet);
 	if (so == NULL) {
 		log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
 		    "limit reached or out of memory\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_type = head->so_type;
 	so->so_options = head->so_options;
 	so->so_linger = head->so_linger;
 	so->so_state = (head->so_state & SS_NBIO) | SS_ISCONNECTED;
 	so->so_fibnum = head->so_fibnum;
 	so->so_proto = head->so_proto;
 	so->so_cred = crhold(head->so_cred);
 #ifdef MAC
 	mac_socket_newconn(head, so);
 #endif
 	knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
 	    so_rdknl_assert_lock);
 	knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
 	    so_wrknl_assert_lock);
 	VNET_SO_ASSERT(head);
 	if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
 	so->so_snd.sb_lowat = head->so_snd.sb_lowat;
 	so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
 	so->so_snd.sb_timeo = head->so_snd.sb_timeo;
 	so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
 	so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
 
 	soref(so);
 
 	return (so);
 }
 #endif	/* SCTP */
 
 int
 sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_bindat)(fd, so, nam, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * solisten() transitions a socket from a non-listening state to a listening
  * state, but can also be used to update the listen queue depth on an
  * existing listen socket.  The protocol will call back into the sockets
  * layer using solisten_proto_check() and solisten_proto() to check and set
  * socket-layer listen state.  Call backs are used so that the protocol can
  * acquire both protocol and socket layer locks in whatever order is required
  * by the protocol.
  *
  * Protocol implementors are advised to hold the socket lock across the
  * socket-layer test and set to avoid races at the socket layer.
  */
 int
 solisten(struct socket *so, int backlog, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 solisten_proto_check(struct socket *so)
 {
 
 	SOCK_LOCK_ASSERT(so);
 
 	if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
 	    SS_ISDISCONNECTING))
 		return (EINVAL);
 	return (0);
 }
 
 void
 solisten_proto(struct socket *so, int backlog)
 {
 	int sbrcv_lowat, sbsnd_lowat;
 	u_int sbrcv_hiwat, sbsnd_hiwat;
 	short sbrcv_flags, sbsnd_flags;
 	sbintime_t sbrcv_timeo, sbsnd_timeo;
 
 	SOCK_LOCK_ASSERT(so);
 
 	if (SOLISTENING(so))
 		goto listening;
 
 	/*
 	 * Change this socket to listening state.
 	 */
 	sbrcv_lowat = so->so_rcv.sb_lowat;
 	sbsnd_lowat = so->so_snd.sb_lowat;
 	sbrcv_hiwat = so->so_rcv.sb_hiwat;
 	sbsnd_hiwat = so->so_snd.sb_hiwat;
 	sbrcv_flags = so->so_rcv.sb_flags;
 	sbsnd_flags = so->so_snd.sb_flags;
 	sbrcv_timeo = so->so_rcv.sb_timeo;
 	sbsnd_timeo = so->so_snd.sb_timeo;
 
 	sbdestroy(&so->so_snd, so);
 	sbdestroy(&so->so_rcv, so);
 	sx_destroy(&so->so_snd.sb_sx);
 	sx_destroy(&so->so_rcv.sb_sx);
 	SOCKBUF_LOCK_DESTROY(&so->so_snd);
 	SOCKBUF_LOCK_DESTROY(&so->so_rcv);
 
 #ifdef INVARIANTS
 	bzero(&so->so_rcv,
 	    sizeof(struct socket) - offsetof(struct socket, so_rcv));
 #endif
 
 	so->sol_sbrcv_lowat = sbrcv_lowat;
 	so->sol_sbsnd_lowat = sbsnd_lowat;
 	so->sol_sbrcv_hiwat = sbrcv_hiwat;
 	so->sol_sbsnd_hiwat = sbsnd_hiwat;
 	so->sol_sbrcv_flags = sbrcv_flags;
 	so->sol_sbsnd_flags = sbsnd_flags;
 	so->sol_sbrcv_timeo = sbrcv_timeo;
 	so->sol_sbsnd_timeo = sbsnd_timeo;
 
 	so->sol_qlen = so->sol_incqlen = 0;
 	TAILQ_INIT(&so->sol_incomp);
 	TAILQ_INIT(&so->sol_comp);
 
 	so->sol_accept_filter = NULL;
 	so->sol_accept_filter_arg = NULL;
 	so->sol_accept_filter_str = NULL;
 
 	so->sol_upcall = NULL;
 	so->sol_upcallarg = NULL;
 
 	so->so_options |= SO_ACCEPTCONN;
 
 listening:
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
 	so->sol_qlimit = backlog;
 }
 
 /*
  * Wakeup listeners/subsystems once we have a complete connection.
  * Enters with lock, returns unlocked.
  */
 void
 solisten_wakeup(struct socket *sol)
 {
 
 	if (sol->sol_upcall != NULL)
 		(void )sol->sol_upcall(sol, sol->sol_upcallarg, M_NOWAIT);
 	else {
 		selwakeuppri(&sol->so_rdsel, PSOCK);
 		KNOTE_LOCKED(&sol->so_rdsel.si_note, 0);
 	}
 	SOLISTEN_UNLOCK(sol);
 	wakeup_one(&sol->sol_comp);
 	if ((sol->so_state & SS_ASYNC) && sol->so_sigio != NULL)
 		pgsigio(&sol->so_sigio, SIGIO, 0);
 }
 
 /*
  * Return single connection off a listening socket queue.  Main consumer of
  * the function is kern_accept4().  Some modules, that do their own accept
  * management also use the function.
  *
  * Listening socket must be locked on entry and is returned unlocked on
  * return.
  * The flags argument is set of accept4(2) flags and ACCEPT4_INHERIT.
  */
 int
 solisten_dequeue(struct socket *head, struct socket **ret, int flags)
 {
 	struct socket *so;
 	int error;
 
 	SOLISTEN_LOCK_ASSERT(head);
 
 	while (!(head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp) &&
 	    head->so_error == 0) {
 		error = msleep(&head->sol_comp, &head->so_lock, PSOCK | PCATCH,
 		    "accept", 0);
 		if (error != 0) {
 			SOLISTEN_UNLOCK(head);
 			return (error);
 		}
 	}
 	if (head->so_error) {
 		error = head->so_error;
 		head->so_error = 0;
 	} else if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp))
 		error = EWOULDBLOCK;
 	else
 		error = 0;
 	if (error) {
 		SOLISTEN_UNLOCK(head);
 		return (error);
 	}
 	so = TAILQ_FIRST(&head->sol_comp);
 	SOCK_LOCK(so);
 	KASSERT(so->so_qstate == SQ_COMP,
 	    ("%s: so %p not SQ_COMP", __func__, so));
 	soref(so);
 	head->sol_qlen--;
 	so->so_qstate = SQ_NONE;
 	so->so_listen = NULL;
 	TAILQ_REMOVE(&head->sol_comp, so, so_list);
 	if (flags & ACCEPT4_INHERIT)
 		so->so_state |= (head->so_state & SS_NBIO);
 	else
 		so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
 	SOCK_UNLOCK(so);
 	sorele(head);
 
 	*ret = so;
 	return (0);
 }
 
 /*
  * Evaluate the reference count and named references on a socket; if no
  * references remain, free it.  This should be called whenever a reference is
  * released, such as in sorele(), but also when named reference flags are
  * cleared in socket or protocol code.
  *
  * sofree() will free the socket if:
  *
  * - There are no outstanding file descriptor references or related consumers
  *   (so_count == 0).
  *
  * - The socket has been closed by user space, if ever open (SS_NOFDREF).
  *
  * - The protocol does not have an outstanding strong reference on the socket
  *   (SS_PROTOREF).
  *
  * - The socket is not in a completed connection queue, so a process has been
  *   notified that it is present.  If it is removed, the user process may
  *   block in accept() despite select() saying the socket was ready.
  */
 void
 sofree(struct socket *so)
 {
 	struct protosw *pr = so->so_proto;
 
 	SOCK_LOCK_ASSERT(so);
 
 	if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 ||
 	    (so->so_state & SS_PROTOREF) || (so->so_qstate == SQ_COMP)) {
 		SOCK_UNLOCK(so);
 		return;
 	}
 
 	if (!SOLISTENING(so) && so->so_qstate == SQ_INCOMP) {
 		struct socket *sol;
 
 		sol = so->so_listen;
 		KASSERT(sol, ("%s: so %p on incomp of NULL", __func__, so));
 
 		/*
 		 * To solve race between close of a listening socket and
 		 * a socket on its incomplete queue, we need to lock both.
 		 * The order is first listening socket, then regular.
 		 * Since we don't have SS_NOFDREF neither SS_PROTOREF, this
 		 * function and the listening socket are the only pointers
 		 * to so.  To preserve so and sol, we reference both and then
 		 * relock.
 		 * After relock the socket may not move to so_comp since it
 		 * doesn't have PCB already, but it may be removed from
 		 * so_incomp. If that happens, we share responsiblity on
 		 * freeing the socket, but soclose() has already removed
 		 * it from queue.
 		 */
 		soref(sol);
 		soref(so);
 		SOCK_UNLOCK(so);
 		SOLISTEN_LOCK(sol);
 		SOCK_LOCK(so);
 		if (so->so_qstate == SQ_INCOMP) {
 			KASSERT(so->so_listen == sol,
 			    ("%s: so %p migrated out of sol %p",
 			    __func__, so, sol));
 			TAILQ_REMOVE(&sol->sol_incomp, so, so_list);
 			sol->sol_incqlen--;
 			/* This is guarenteed not to be the last. */
 			refcount_release(&sol->so_count);
 			so->so_qstate = SQ_NONE;
 			so->so_listen = NULL;
 		} else
 			KASSERT(so->so_listen == NULL,
 			    ("%s: so %p not on (in)comp with so_listen",
 			    __func__, so));
 		sorele(sol);
 		KASSERT(so->so_count == 1,
 		    ("%s: so %p count %u", __func__, so, so->so_count));
 		so->so_count = 0;
 	}
 	if (SOLISTENING(so))
 		so->so_error = ECONNABORTED;
 	SOCK_UNLOCK(so);
 
 	if (so->so_dtor != NULL)
 		so->so_dtor(so);
 
 	VNET_SO_ASSERT(so);
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 		(*pr->pr_domain->dom_dispose)(so);
 	if (pr->pr_usrreqs->pru_detach != NULL)
 		(*pr->pr_usrreqs->pru_detach)(so);
 
 	/*
 	 * From this point on, we assume that no other references to this
 	 * socket exist anywhere else in the stack.  Therefore, no locks need
 	 * to be acquired or held.
 	 *
 	 * We used to do a lot of socket buffer and socket locking here, as
 	 * well as invoke sorflush() and perform wakeups.  The direct call to
 	 * dom_dispose() and sbdestroy() are an inlining of what was
 	 * necessary from sorflush().
 	 *
 	 * Notice that the socket buffer and kqueue state are torn down
 	 * before calling pru_detach.  This means that protocols shold not
 	 * assume they can perform socket wakeups, etc, in their detach code.
 	 */
 	if (!SOLISTENING(so)) {
 		sbdestroy(&so->so_snd, so);
 		sbdestroy(&so->so_rcv, so);
 	}
 	seldrain(&so->so_rdsel);
 	seldrain(&so->so_wrsel);
 	knlist_destroy(&so->so_rdsel.si_note);
 	knlist_destroy(&so->so_wrsel.si_note);
 	sodealloc(so);
 }
 
 /*
  * Close a socket on last file table reference removal.  Initiate disconnect
  * if connected.  Free socket when disconnect complete.
  *
  * This function will sorele() the socket.  Note that soclose() may be called
  * prior to the ref count reaching zero.  The actual socket structure will
  * not be freed until the ref count reaches zero.
  */
 int
 soclose(struct socket *so)
 {
 	struct accept_queue lqueue;
-	bool listening;
 	int error = 0;
 
 	KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
 
 	CURVNET_SET(so->so_vnet);
 	funsetown(&so->so_sigio);
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 			error = sodisconnect(so);
 			if (error) {
 				if (error == ENOTCONN)
 					error = 0;
 				goto drop;
 			}
 		}
 
 		if ((so->so_options & SO_LINGER) != 0 && so->so_linger != 0) {
 			if ((so->so_state & SS_ISDISCONNECTING) &&
 			    (so->so_state & SS_NBIO))
 				goto drop;
 			while (so->so_state & SS_ISCONNECTED) {
 				error = tsleep(&so->so_timeo,
 				    PSOCK | PCATCH, "soclos",
 				    so->so_linger * hz);
 				if (error)
 					break;
 			}
 		}
 	}
 
 drop:
 	if (so->so_proto->pr_usrreqs->pru_close != NULL)
 		(*so->so_proto->pr_usrreqs->pru_close)(so);
 
 	SOCK_LOCK(so);
-	if ((listening = (so->so_options & SO_ACCEPTCONN))) {
+	if (SOLISTENING(so)) {
 		struct socket *sp;
 
 		TAILQ_INIT(&lqueue);
 		TAILQ_SWAP(&lqueue, &so->sol_incomp, socket, so_list);
 		TAILQ_CONCAT(&lqueue, &so->sol_comp, so_list);
 
 		so->sol_qlen = so->sol_incqlen = 0;
 
 		TAILQ_FOREACH(sp, &lqueue, so_list) {
 			SOCK_LOCK(sp);
 			sp->so_qstate = SQ_NONE;
 			sp->so_listen = NULL;
 			SOCK_UNLOCK(sp);
 			/* Guaranteed not to be the last. */
 			refcount_release(&so->so_count);
 		}
 	}
 	KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
 	so->so_state |= SS_NOFDREF;
 	sorele(so);
-	if (listening) {
+	if (SOLISTENING(so)) {
 		struct socket *sp, *tsp;
 
 		TAILQ_FOREACH_SAFE(sp, &lqueue, so_list, tsp) {
 			SOCK_LOCK(sp);
 			if (sp->so_count == 0) {
 				SOCK_UNLOCK(sp);
 				soabort(sp);
 			} else
 				/* sp is now in sofree() */
 				SOCK_UNLOCK(sp);
 		}
 	}
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * soabort() is used to abruptly tear down a connection, such as when a
  * resource limit is reached (listen queue depth exceeded), or if a listen
  * socket is closed while there are sockets waiting to be accepted.
  *
  * This interface is tricky, because it is called on an unreferenced socket,
  * and must be called only by a thread that has actually removed the socket
  * from the listen queue it was on, or races with other threads are risked.
  *
  * This interface will call into the protocol code, so must not be called
  * with any socket locks held.  Protocols do call it while holding their own
  * recursible protocol mutexes, but this is something that should be subject
  * to review in the future.
  */
 void
 soabort(struct socket *so)
 {
 
 	/*
 	 * In as much as is possible, assert that no references to this
 	 * socket are held.  This is not quite the same as asserting that the
 	 * current thread is responsible for arranging for no references, but
 	 * is as close as we can get for now.
 	 */
 	KASSERT(so->so_count == 0, ("soabort: so_count"));
 	KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
 	KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
 	VNET_SO_ASSERT(so);
 
 	if (so->so_proto->pr_usrreqs->pru_abort != NULL)
 		(*so->so_proto->pr_usrreqs->pru_abort)(so);
 	SOCK_LOCK(so);
 	sofree(so);
 }
 
 int
 soaccept(struct socket *so, struct sockaddr **nam)
 {
 	int error;
 
 	SOCK_LOCK(so);
 	KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF"));
 	so->so_state &= ~SS_NOFDREF;
 	SOCK_UNLOCK(so);
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (soconnectat(AT_FDCWD, so, nam, td));
 }
 
 int
 soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
-	if (so->so_options & SO_ACCEPTCONN)
+	/* XXXMJ racy */
+	if (SOLISTENING(so))
 		return (EOPNOTSUPP);
 
 	CURVNET_SET(so->so_vnet);
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.  This allows
 	 * user to disconnect by connecting to, e.g., a null address.
 	 */
 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
 	    (error = sodisconnect(so)))) {
 		error = EISCONN;
 	} else {
 		/*
 		 * Prevent accumulated error from previous connection from
 		 * biting us.
 		 */
 		so->so_error = 0;
 		if (fd == AT_FDCWD) {
 			error = (*so->so_proto->pr_usrreqs->pru_connect)(so,
 			    nam, td);
 		} else {
 			error = (*so->so_proto->pr_usrreqs->pru_connectat)(fd,
 			    so, nam, td);
 		}
 	}
 	CURVNET_RESTORE();
 
 	return (error);
 }
 
 int
 soconnect2(struct socket *so1, struct socket *so2)
 {
 	int error;
 
 	CURVNET_SET(so1->so_vnet);
 	error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 sodisconnect(struct socket *so)
 {
 	int error;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 	if (so->so_state & SS_ISDISCONNECTING)
 		return (EALREADY);
 	VNET_SO_ASSERT(so);
 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
 	return (error);
 }
 
 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
 
 int
 sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	long space;
 	ssize_t resid;
 	int clen = 0, error, dontroute;
 
 	KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM"));
 	KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
 	    ("sosend_dgram: !PR_ATOMIC"));
 
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 */
 	if (resid < 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0;
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		SOCKBUF_UNLOCK(&so->so_snd);
 		error = EPIPE;
 		goto out;
 	}
 	if (so->so_error) {
 		error = so->so_error;
 		so->so_error = 0;
 		SOCKBUF_UNLOCK(&so->so_snd);
 		goto out;
 	}
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		/*
 		 * `sendto' and `sendmsg' is allowed on a connection-based
 		 * socket if it supports implied connect.  Return ENOTCONN if
 		 * not connected and no address is supplied.
 		 */
 		if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 		    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 			if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 			    !(resid == 0 && clen != 0)) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = ENOTCONN;
 				goto out;
 			}
 		} else if (addr == NULL) {
 			if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 				error = ENOTCONN;
 			else
 				error = EDESTADDRREQ;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto out;
 		}
 	}
 
 	/*
 	 * Do we need MSG_OOB support in SOCK_DGRAM?  Signs here may be a
 	 * problem and need fixing.
 	 */
 	space = sbspace(&so->so_snd);
 	if (flags & MSG_OOB)
 		space += 1024;
 	space -= clen;
 	SOCKBUF_UNLOCK(&so->so_snd);
 	if (resid > space) {
 		error = EMSGSIZE;
 		goto out;
 	}
 	if (uio == NULL) {
 		resid = 0;
 		if (flags & MSG_EOR)
 			top->m_flags |= M_EOR;
 	} else {
 		/*
 		 * Copy the data from userland into a mbuf chain.
 		 * If no data is to be copied in, a single empty mbuf
 		 * is returned.
 		 */
 		top = m_uiotombuf(uio, M_WAITOK, space, max_hdr,
 		    (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0)));
 		if (top == NULL) {
 			error = EFAULT;	/* only possible error */
 			goto out;
 		}
 		space -= resid - uio->uio_resid;
 		resid = uio->uio_resid;
 	}
 	KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
 	/*
 	 * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock
 	 * than with.
 	 */
 	if (dontroute) {
 		SOCK_LOCK(so);
 		so->so_options |= SO_DONTROUTE;
 		SOCK_UNLOCK(so);
 	}
 	/*
 	 * XXX all the SBS_CANTSENDMORE checks previously done could be out
 	 * of date.  We could have received a reset packet in an interrupt or
 	 * maybe we slept while doing page faults in uiomove() etc.  We could
 	 * probably recheck again inside the locking protection here, but
 	 * there are probably other places that this also happens.  We must
 	 * rethink this.
 	 */
 	VNET_SO_ASSERT(so);
 	error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 	    (flags & MSG_OOB) ? PRUS_OOB :
 	/*
 	 * If the user set MSG_EOF, the protocol understands this flag and
 	 * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND.
 	 */
 	    ((flags & MSG_EOF) &&
 	     (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 	     (resid <= 0)) ?
 		PRUS_EOF :
 		/* If there is more to send set PRUS_MORETOCOME */
 		(flags & MSG_MORETOCOME) ||
 		(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 		top, addr, control, td);
 	if (dontroute) {
 		SOCK_LOCK(so);
 		so->so_options &= ~SO_DONTROUTE;
 		SOCK_UNLOCK(so);
 	}
 	clen = 0;
 	control = NULL;
 	top = NULL;
 out:
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
 		m_freem(control);
 	return (error);
 }
 
 /*
  * Send on a socket.  If send must go all at once and message is larger than
  * send buffering, then hard error.  Lock against other senders.  If must go
  * all at once and not enough room now, then inform user that this would
  * block and do nothing.  Otherwise, if nonblocking, send as much as
  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
  * in mbuf chain must be small enough to send all at once.
  *
  * Returns nonzero on error, timeout or signal; callers must check for short
  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
  * on return.
  */
 int
 sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	long space;
 	ssize_t resid;
 	int clen = 0, error, dontroute;
 	int atomic = sosendallatonce(so) || top;
 	int pru_flag;
 #ifdef KERN_TLS
 	struct ktls_session *tls;
 	int tls_enq_cnt, tls_pruflag;
 	uint8_t tls_rtype;
 
 	tls = NULL;
 	tls_rtype = TLS_RLTYPE_APP;
 #endif
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else if ((top->m_flags & M_PKTHDR) != 0)
 		resid = top->m_pkthdr.len;
 	else
 		resid = m_length(top, NULL);
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 *
 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 	 * type sockets since that's an error.
 	 */
 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
 	    (so->so_proto->pr_flags & PR_ATOMIC);
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 
 #ifdef KERN_TLS
 	tls_pruflag = 0;
 	tls = ktls_hold(so->so_snd.sb_tls_info);
 	if (tls != NULL) {
 		if (tls->mode == TCP_TLS_MODE_SW)
 			tls_pruflag = PRUS_NOTREADY;
 
 		if (control != NULL) {
 			struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 
 			if (clen >= sizeof(*cm) &&
 			    cm->cmsg_type == TLS_SET_RECORD_TYPE) {
 				tls_rtype = *((uint8_t *)CMSG_DATA(cm));
 				clen = 0;
 				m_freem(control);
 				control = NULL;
 				atomic = 1;
 			}
 		}
 	}
 #endif
 
 restart:
 	do {
 		SOCKBUF_LOCK(&so->so_snd);
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EPIPE;
 			goto release;
 		}
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			/*
 			 * `sendto' and `sendmsg' is allowed on a connection-
 			 * based socket if it supports implied connect.
 			 * Return ENOTCONN if not connected and no address is
 			 * supplied.
 			 */
 			if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 			    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 				    !(resid == 0 && clen != 0)) {
 					SOCKBUF_UNLOCK(&so->so_snd);
 					error = ENOTCONN;
 					goto release;
 				}
 			} else if (addr == NULL) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 					error = ENOTCONN;
 				else
 					error = EDESTADDRREQ;
 				goto release;
 			}
 		}
 		space = sbspace(&so->so_snd);
 		if (flags & MSG_OOB)
 			space += 1024;
 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
 		    clen > so->so_snd.sb_hiwat) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EMSGSIZE;
 			goto release;
 		}
 		if (space < resid + clen &&
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 			if ((so->so_state & SS_NBIO) ||
 			    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = EWOULDBLOCK;
 				goto release;
 			}
 			error = sbwait(&so->so_snd);
 			SOCKBUF_UNLOCK(&so->so_snd);
 			if (error)
 				goto release;
 			goto restart;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 		space -= clen;
 		do {
 			if (uio == NULL) {
 				resid = 0;
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 #ifdef KERN_TLS
 				if (tls != NULL) {
 					ktls_frame(top, tls, &tls_enq_cnt,
 					    tls_rtype);
 					tls_rtype = TLS_RLTYPE_APP;
 				}
 #endif
 			} else {
 				/*
 				 * Copy the data from userland into a mbuf
 				 * chain.  If resid is 0, which can happen
 				 * only if we have control to send, then
 				 * a single empty mbuf is returned.  This
 				 * is a workaround to prevent protocol send
 				 * methods to panic.
 				 */
 #ifdef KERN_TLS
 				if (tls != NULL) {
 					top = m_uiotombuf(uio, M_WAITOK, space,
 					    tls->params.max_frame_len,
 					    M_EXTPG |
 					    ((flags & MSG_EOR) ? M_EOR : 0));
 					if (top != NULL) {
 						ktls_frame(top, tls,
 						    &tls_enq_cnt, tls_rtype);
 					}
 					tls_rtype = TLS_RLTYPE_APP;
 				} else
 #endif
 					top = m_uiotombuf(uio, M_WAITOK, space,
 					    (atomic ? max_hdr : 0),
 					    (atomic ? M_PKTHDR : 0) |
 					    ((flags & MSG_EOR) ? M_EOR : 0));
 				if (top == NULL) {
 					error = EFAULT; /* only possible error */
 					goto release;
 				}
 				space -= resid - uio->uio_resid;
 				resid = uio->uio_resid;
 			}
 			if (dontroute) {
 				SOCK_LOCK(so);
 				so->so_options |= SO_DONTROUTE;
 				SOCK_UNLOCK(so);
 			}
 			/*
 			 * XXX all the SBS_CANTSENDMORE checks previously
 			 * done could be out of date.  We could have received
 			 * a reset packet in an interrupt or maybe we slept
 			 * while doing page faults in uiomove() etc.  We
 			 * could probably recheck again inside the locking
 			 * protection here, but there are probably other
 			 * places that this also happens.  We must rethink
 			 * this.
 			 */
 			VNET_SO_ASSERT(so);
 
 			pru_flag = (flags & MSG_OOB) ? PRUS_OOB :
 			/*
 			 * If the user set MSG_EOF, the protocol understands
 			 * this flag and nothing left to send then use
 			 * PRU_SEND_EOF instead of PRU_SEND.
 			 */
 			    ((flags & MSG_EOF) &&
 			     (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 			     (resid <= 0)) ?
 				PRUS_EOF :
 			/* If there is more to send set PRUS_MORETOCOME. */
 			    (flags & MSG_MORETOCOME) ||
 			    (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
 
 #ifdef KERN_TLS
 			pru_flag |= tls_pruflag;
 #endif
 
 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 			    pru_flag, top, addr, control, td);
 
 			if (dontroute) {
 				SOCK_LOCK(so);
 				so->so_options &= ~SO_DONTROUTE;
 				SOCK_UNLOCK(so);
 			}
 
 #ifdef KERN_TLS
 			if (tls != NULL && tls->mode == TCP_TLS_MODE_SW) {
 				if (error != 0) {
 					m_freem(top);
 					top = NULL;
 				} else {
 					soref(so);
 					ktls_enqueue(top, so, tls_enq_cnt);
 				}
 			}
 #endif
 			clen = 0;
 			control = NULL;
 			top = NULL;
 			if (error)
 				goto release;
 		} while (resid && space > 0);
 	} while (resid);
 
 release:
 	sbunlock(&so->so_snd);
 out:
 #ifdef KERN_TLS
 	if (tls != NULL)
 		ktls_free(tls);
 #endif
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
 		m_freem(control);
 	return (error);
 }
 
 int
 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	if (!SOLISTENING(so))
 		error = so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio,
 		    top, control, flags, td);
 	else {
 		m_freem(top);
 		m_freem(control);
 		error = ENOTCONN;
 	}
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * The part of soreceive() that implements reading non-inline out-of-band
  * data from a socket.  For more complete comments, see soreceive(), from
  * which this code originated.
  *
  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
  * unable to return an mbuf chain to the caller.
  */
 static int
 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
 {
 	struct protosw *pr = so->so_proto;
 	struct mbuf *m;
 	int error;
 
 	KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
 	VNET_SO_ASSERT(so);
 
 	m = m_get(M_WAITOK, MT_DATA);
 	error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
 	if (error)
 		goto bad;
 	do {
 		error = uiomove(mtod(m, void *),
 		    (int) min(uio->uio_resid, m->m_len), uio);
 		m = m_free(m);
 	} while (uio->uio_resid && error == 0 && m);
 bad:
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Following replacement or removal of the first mbuf on the first mbuf chain
  * of a socket buffer, push necessary state changes back into the socket
  * buffer so that other consumers see the values consistently.  'nextrecord'
  * is the callers locally stored value of the original value of
  * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
  * NOTE: 'nextrecord' may be NULL.
  */
 static __inline void
 sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	/*
 	 * First, update for the new value of nextrecord.  If necessary, make
 	 * it the first record.
 	 */
 	if (sb->sb_mb != NULL)
 		sb->sb_mb->m_nextpkt = nextrecord;
 	else
 		sb->sb_mb = nextrecord;
 
 	/*
 	 * Now update any dependent socket buffer fields to reflect the new
 	 * state.  This is an expanded inline of SB_EMPTY_FIXUP(), with the
 	 * addition of a second clause that takes care of the case where
 	 * sb_mb has been updated, but remains the last record.
 	 */
 	if (sb->sb_mb == NULL) {
 		sb->sb_mbtail = NULL;
 		sb->sb_lastrecord = NULL;
 	} else if (sb->sb_mb->m_nextpkt == NULL)
 		sb->sb_lastrecord = sb->sb_mb;
 }
 
 /*
  * Implement receive operations on a socket.  We depend on the way that
  * records are added to the sockbuf by sbappend.  In particular, each record
  * (mbufs linked through m_next) must begin with an address if the protocol
  * so specifies, followed by an optional mbuf or mbufs containing ancillary
  * data, and then zero or more mbufs of data.  In order to allow parallelism
  * between network receive and copying to user space, as well as avoid
  * sleeping with a mutex held, we release the socket buffer mutex during the
  * user space copy.  Although the sockbuf is locked, new data may still be
  * appended, and thus we must maintain consistency of the sockbuf during that
  * time.
  *
  * The caller may receive the data as a single mbuf chain by supplying an
  * mbuf **mp0 for use in returning the chain.  The uio is then used only for
  * the count in uio_resid.
  */
 int
 soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct mbuf *m, **mp;
 	int flags, error, offset;
 	ssize_t len;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 	int moff, type = 0;
 	ssize_t orig_resid = uio->uio_resid;
 
 	mp = mp0;
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp != NULL)
 		*mp = NULL;
 	if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING)
 	    && uio->uio_resid) {
 		VNET_SO_ASSERT(so);
 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
 	}
 
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 
 restart:
 	SOCKBUF_LOCK(&so->so_rcv);
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more (subject
 	 * to any timeout) if:
 	 *   1. the current count is less than the low water mark, or
 	 *   2. MSG_DONTWAIT is not set
 	 */
 	if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
 	    sbavail(&so->so_rcv) < uio->uio_resid) &&
 	    sbavail(&so->so_rcv) < so->so_rcv.sb_lowat &&
 	    m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
 		KASSERT(m != NULL || !sbavail(&so->so_rcv),
 		    ("receive: m == %p sbavail == %u",
 		    m, sbavail(&so->so_rcv)));
 		if (so->so_error) {
 			if (m != NULL)
 				goto dontblock;
 			error = so->so_error;
 			if ((flags & MSG_PEEK) == 0)
 				so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			if (m != NULL)
 				goto dontblock;
 #ifdef KERN_TLS
 			else if (so->so_rcv.sb_tlsdcc == 0 &&
 			    so->so_rcv.sb_tlscc == 0) {
 #else
 			else {
 #endif
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			}
 		}
 		for (; m != NULL; m = m->m_next)
 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
 				m = so->so_rcv.sb_mb;
 				goto dontblock;
 			}
 		if ((so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED |
 		    SS_ISDISCONNECTING | SS_ISDISCONNECTED)) == 0 &&
 		    (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = ENOTCONN;
 			goto release;
 		}
 		if (uio->uio_resid == 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = EWOULDBLOCK;
 			goto release;
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		if (error)
 			goto release;
 		goto restart;
 	}
 dontblock:
 	/*
 	 * From this point onward, we maintain 'nextrecord' as a cache of the
 	 * pointer to the next record in the socket buffer.  We must keep the
 	 * various socket buffer pointers and local stack versions of the
 	 * pointers in sync, pushing out modifications before dropping the
 	 * socket buffer mutex, and re-reading them when picking it up.
 	 *
 	 * Otherwise, we will race with the network stack appending new data
 	 * or records onto the socket buffer by using inconsistent/stale
 	 * versions of the field, possibly resulting in socket buffer
 	 * corruption.
 	 *
 	 * By holding the high-level sblock(), we prevent simultaneous
 	 * readers from pulling off the front of the socket buffer.
 	 */
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 	KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb"));
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	nextrecord = m->m_nextpkt;
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
 		orig_resid = 0;
 		if (psa != NULL)
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
 			    M_NOWAIT);
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			so->so_rcv.sb_mb = m_free(m);
 			m = so->so_rcv.sb_mb;
 			sockbuf_pushsync(&so->so_rcv, nextrecord);
 		}
 	}
 
 	/*
 	 * Process one or more MT_CONTROL mbufs present before any data mbufs
 	 * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we
 	 * just copy the data; if !MSG_PEEK, we call into the protocol to
 	 * perform externalization (or freeing if controlp == NULL).
 	 */
 	if (m != NULL && m->m_type == MT_CONTROL) {
 		struct mbuf *cm = NULL, *cmn;
 		struct mbuf **cme = &cm;
 #ifdef KERN_TLS
 		struct cmsghdr *cmsg;
 		struct tls_get_record tgr;
 
 		/*
 		 * For MSG_TLSAPPDATA, check for a non-application data
 		 * record.  If found, return ENXIO without removing
 		 * it from the receive queue.  This allows a subsequent
 		 * call without MSG_TLSAPPDATA to receive it.
 		 * Note that, for TLS, there should only be a single
 		 * control mbuf with the TLS_GET_RECORD message in it.
 		 */
 		if (flags & MSG_TLSAPPDATA) {
 			cmsg = mtod(m, struct cmsghdr *);
 			if (cmsg->cmsg_type == TLS_GET_RECORD &&
 			    cmsg->cmsg_len == CMSG_LEN(sizeof(tgr))) {
 				memcpy(&tgr, CMSG_DATA(cmsg), sizeof(tgr));
 				/* This will need to change for TLS 1.3. */
 				if (tgr.tls_type != TLS_RLTYPE_APP) {
 					SOCKBUF_UNLOCK(&so->so_rcv);
 					error = ENXIO;
 					goto release;
 				}
 			}
 		}
 #endif
 
 		do {
 			if (flags & MSG_PEEK) {
 				if (controlp != NULL) {
 					*controlp = m_copym(m, 0, m->m_len,
 					    M_NOWAIT);
 					controlp = &(*controlp)->m_next;
 				}
 				m = m->m_next;
 			} else {
 				sbfree(&so->so_rcv, m);
 				so->so_rcv.sb_mb = m->m_next;
 				m->m_next = NULL;
 				*cme = m;
 				cme = &(*cme)->m_next;
 				m = so->so_rcv.sb_mb;
 			}
 		} while (m != NULL && m->m_type == MT_CONTROL);
 		if ((flags & MSG_PEEK) == 0)
 			sockbuf_pushsync(&so->so_rcv, nextrecord);
 		while (cm != NULL) {
 			cmn = cm->m_next;
 			cm->m_next = NULL;
 			if (pr->pr_domain->dom_externalize != NULL) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				VNET_SO_ASSERT(so);
 				error = (*pr->pr_domain->dom_externalize)
 				    (cm, controlp, flags);
 				SOCKBUF_LOCK(&so->so_rcv);
 			} else if (controlp != NULL)
 				*controlp = cm;
 			else
 				m_freem(cm);
 			if (controlp != NULL) {
 				orig_resid = 0;
 				while (*controlp != NULL)
 					controlp = &(*controlp)->m_next;
 			}
 			cm = cmn;
 		}
 		if (m != NULL)
 			nextrecord = so->so_rcv.sb_mb->m_nextpkt;
 		else
 			nextrecord = so->so_rcv.sb_mb;
 		orig_resid = 0;
 	}
 	if (m != NULL) {
 		if ((flags & MSG_PEEK) == 0) {
 			KASSERT(m->m_nextpkt == nextrecord,
 			    ("soreceive: post-control, nextrecord !sync"));
 			if (nextrecord == NULL) {
 				KASSERT(so->so_rcv.sb_mb == m,
 				    ("soreceive: post-control, sb_mb!=m"));
 				KASSERT(so->so_rcv.sb_lastrecord == m,
 				    ("soreceive: post-control, lastrecord!=m"));
 			}
 		}
 		type = m->m_type;
 		if (type == MT_OOBDATA)
 			flags |= MSG_OOB;
 	} else {
 		if ((flags & MSG_PEEK) == 0) {
 			KASSERT(so->so_rcv.sb_mb == nextrecord,
 			    ("soreceive: sb_mb != nextrecord"));
 			if (so->so_rcv.sb_mb == NULL) {
 				KASSERT(so->so_rcv.sb_lastrecord == NULL,
 				    ("soreceive: sb_lastercord != NULL"));
 			}
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 
 	/*
 	 * Now continue to read any data mbufs off of the head of the socket
 	 * buffer until the read request is satisfied.  Note that 'type' is
 	 * used to store the type of any mbuf reads that have happened so far
 	 * such that soreceive() can stop reading if the type changes, which
 	 * causes soreceive() to return only one of regular data and inline
 	 * out-of-band data in a single socket receive operation.
 	 */
 	moff = 0;
 	offset = 0;
 	while (m != NULL && !(m->m_flags & M_NOTAVAIL) && uio->uio_resid > 0
 	    && error == 0) {
 		/*
 		 * If the type of mbuf has changed since the last mbuf
 		 * examined ('type'), end the receive operation.
 		 */
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) {
 			if (type != m->m_type)
 				break;
 		} else if (type == MT_OOBDATA)
 			break;
 		else
 		    KASSERT(m->m_type == MT_DATA,
 			("m->m_type == %d", m->m_type));
 		so->so_rcv.sb_state &= ~SBS_RCVATMARK;
 		len = uio->uio_resid;
 		if (so->so_oobmark && len > so->so_oobmark - offset)
 			len = so->so_oobmark - offset;
 		if (len > m->m_len - moff)
 			len = m->m_len - moff;
 		/*
 		 * If mp is set, just pass back the mbufs.  Otherwise copy
 		 * them out via the uio, then free.  Sockbuf must be
 		 * consistent here (points to current mbuf, it points to next
 		 * record) when we drop priority; we must note any additions
 		 * to the sockbuf when we block interrupts again.
 		 */
 		if (mp == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			if ((m->m_flags & M_EXTPG) != 0)
 				error = m_unmapped_uiomove(m, moff, uio,
 				    (int)len);
 			else
 				error = uiomove(mtod(m, char *) + moff,
 				    (int)len, uio);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (error) {
 				/*
 				 * The MT_SONAME mbuf has already been removed
 				 * from the record, so it is necessary to
 				 * remove the data mbufs, if any, to preserve
 				 * the invariant in the case of PR_ADDR that
 				 * requires MT_SONAME mbufs at the head of
 				 * each record.
 				 */
 				if (pr->pr_flags & PR_ATOMIC &&
 				    ((flags & MSG_PEEK) == 0))
 					(void)sbdroprecord_locked(&so->so_rcv);
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			}
 		} else
 			uio->uio_resid -= len;
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (len == m->m_len - moff) {
 			if (m->m_flags & M_EOR)
 				flags |= MSG_EOR;
 			if (flags & MSG_PEEK) {
 				m = m->m_next;
 				moff = 0;
 			} else {
 				nextrecord = m->m_nextpkt;
 				sbfree(&so->so_rcv, m);
 				if (mp != NULL) {
 					m->m_nextpkt = NULL;
 					*mp = m;
 					mp = &m->m_next;
 					so->so_rcv.sb_mb = m = m->m_next;
 					*mp = NULL;
 				} else {
 					so->so_rcv.sb_mb = m_free(m);
 					m = so->so_rcv.sb_mb;
 				}
 				sockbuf_pushsync(&so->so_rcv, nextrecord);
 				SBLASTRECORDCHK(&so->so_rcv);
 				SBLASTMBUFCHK(&so->so_rcv);
 			}
 		} else {
 			if (flags & MSG_PEEK)
 				moff += len;
 			else {
 				if (mp != NULL) {
 					if (flags & MSG_DONTWAIT) {
 						*mp = m_copym(m, 0, len,
 						    M_NOWAIT);
 						if (*mp == NULL) {
 							/*
 							 * m_copym() couldn't
 							 * allocate an mbuf.
 							 * Adjust uio_resid back
 							 * (it was adjusted
 							 * down by len bytes,
 							 * which we didn't end
 							 * up "copying" over).
 							 */
 							uio->uio_resid += len;
 							break;
 						}
 					} else {
 						SOCKBUF_UNLOCK(&so->so_rcv);
 						*mp = m_copym(m, 0, len,
 						    M_WAITOK);
 						SOCKBUF_LOCK(&so->so_rcv);
 					}
 				}
 				sbcut_locked(&so->so_rcv, len);
 			}
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (so->so_oobmark) {
 			if ((flags & MSG_PEEK) == 0) {
 				so->so_oobmark -= len;
 				if (so->so_oobmark == 0) {
 					so->so_rcv.sb_state |= SBS_RCVATMARK;
 					break;
 				}
 			} else {
 				offset += len;
 				if (offset == so->so_oobmark)
 					break;
 			}
 		}
 		if (flags & MSG_EOR)
 			break;
 		/*
 		 * If the MSG_WAITALL flag is set (for non-atomic socket), we
 		 * must not quit until "uio->uio_resid == 0" or an error
 		 * termination.  If a signal/timeout occurs, return with a
 		 * short count but without error.  Keep sockbuf locked
 		 * against other readers.
 		 */
 		while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
 		    !sosendallatonce(so) && nextrecord == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 			if (so->so_error ||
 			    so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				break;
 			/*
 			 * Notify the protocol that some data has been
 			 * drained before blocking.
 			 */
 			if (pr->pr_flags & PR_WANTRCVD) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				VNET_SO_ASSERT(so);
 				(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 				SOCKBUF_LOCK(&so->so_rcv);
 			}
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			/*
 			 * We could receive some data while was notifying
 			 * the protocol. Skip blocking in this case.
 			 */
 			if (so->so_rcv.sb_mb == NULL) {
 				error = sbwait(&so->so_rcv);
 				if (error) {
 					SOCKBUF_UNLOCK(&so->so_rcv);
 					goto release;
 				}
 			}
 			m = so->so_rcv.sb_mb;
 			if (m != NULL)
 				nextrecord = m->m_nextpkt;
 		}
 	}
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (m != NULL && pr->pr_flags & PR_ATOMIC) {
 		flags |= MSG_TRUNC;
 		if ((flags & MSG_PEEK) == 0)
 			(void) sbdroprecord_locked(&so->so_rcv);
 	}
 	if ((flags & MSG_PEEK) == 0) {
 		if (m == NULL) {
 			/*
 			 * First part is an inline SB_EMPTY_FIXUP().  Second
 			 * part makes sure sb_lastrecord is up-to-date if
 			 * there is still data in the socket buffer.
 			 */
 			so->so_rcv.sb_mb = nextrecord;
 			if (so->so_rcv.sb_mb == NULL) {
 				so->so_rcv.sb_mbtail = NULL;
 				so->so_rcv.sb_lastrecord = NULL;
 			} else if (nextrecord->m_nextpkt == NULL)
 				so->so_rcv.sb_lastrecord = nextrecord;
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		/*
 		 * If soreceive() is being done from the socket callback,
 		 * then don't need to generate ACK to peer to update window,
 		 * since ACK will be generated on return to TCP.
 		 */
 		if (!(flags & MSG_SOCALLBCK) &&
 		    (pr->pr_flags & PR_WANTRCVD)) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			VNET_SO_ASSERT(so);
 			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 			SOCKBUF_LOCK(&so->so_rcv);
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (orig_resid == uio->uio_resid && orig_resid &&
 	    (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		goto restart;
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	if (flagsp != NULL)
 		*flagsp |= flags;
 release:
 	sbunlock(&so->so_rcv);
 	return (error);
 }
 
 /*
  * Optimized version of soreceive() for stream (TCP) sockets.
  */
 int
 soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	int len = 0, error = 0, flags, oresid;
 	struct sockbuf *sb;
 	struct mbuf *m, *n = NULL;
 
 	/* We only do stream sockets. */
 	if (so->so_type != SOCK_STREAM)
 		return (EINVAL);
 	if (psa != NULL)
 		*psa = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp0 != NULL)
 		*mp0 = NULL;
 
 	sb = &so->so_rcv;
 
 #ifdef KERN_TLS
 	/*
 	 * KTLS store TLS records as records with a control message to
 	 * describe the framing.
 	 *
 	 * We check once here before acquiring locks to optimize the
 	 * common case.
 	 */
 	if (sb->sb_tls_info != NULL)
 		return (soreceive_generic(so, psa, uio, mp0, controlp,
 		    flagsp));
 #endif
 
 	/* Prevent other readers from entering the socket. */
 	error = sblock(sb, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 	SOCKBUF_LOCK(sb);
 
 #ifdef KERN_TLS
 	if (sb->sb_tls_info != NULL) {
 		SOCKBUF_UNLOCK(sb);
 		sbunlock(sb);
 		return (soreceive_generic(so, psa, uio, mp0, controlp,
 		    flagsp));
 	}
 #endif
 
 	/* Easy one, no space to copyout anything. */
 	if (uio->uio_resid == 0) {
 		error = EINVAL;
 		goto out;
 	}
 	oresid = uio->uio_resid;
 
 	/* We will never ever get anything unless we are or were connected. */
 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
 		error = ENOTCONN;
 		goto out;
 	}
 
 restart:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	/* Abort if socket has reported problems. */
 	if (so->so_error) {
 		if (sbavail(sb) > 0)
 			goto deliver;
 		if (oresid > uio->uio_resid)
 			goto out;
 		error = so->so_error;
 		if (!(flags & MSG_PEEK))
 			so->so_error = 0;
 		goto out;
 	}
 
 	/* Door is closed.  Deliver what is left, if any. */
 	if (sb->sb_state & SBS_CANTRCVMORE) {
 		if (sbavail(sb) > 0)
 			goto deliver;
 		else
 			goto out;
 	}
 
 	/* Socket buffer is empty and we shall not block. */
 	if (sbavail(sb) == 0 &&
 	    ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
 		error = EAGAIN;
 		goto out;
 	}
 
 	/* Socket buffer got some data that we shall deliver now. */
 	if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) &&
 	    ((so->so_state & SS_NBIO) ||
 	     (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
 	     sbavail(sb) >= sb->sb_lowat ||
 	     sbavail(sb) >= uio->uio_resid ||
 	     sbavail(sb) >= sb->sb_hiwat) ) {
 		goto deliver;
 	}
 
 	/* On MSG_WAITALL we must wait until all data or error arrives. */
 	if ((flags & MSG_WAITALL) &&
 	    (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat))
 		goto deliver;
 
 	/*
 	 * Wait and block until (more) data comes in.
 	 * NB: Drops the sockbuf lock during wait.
 	 */
 	error = sbwait(sb);
 	if (error)
 		goto out;
 	goto restart;
 
 deliver:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__));
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
 
 	/* Statistics. */
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 
 	/* Fill uio until full or current end of socket buffer is reached. */
 	len = min(uio->uio_resid, sbavail(sb));
 	if (mp0 != NULL) {
 		/* Dequeue as many mbufs as possible. */
 		if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
 			if (*mp0 == NULL)
 				*mp0 = sb->sb_mb;
 			else
 				m_cat(*mp0, sb->sb_mb);
 			for (m = sb->sb_mb;
 			     m != NULL && m->m_len <= len;
 			     m = m->m_next) {
 				KASSERT(!(m->m_flags & M_NOTAVAIL),
 				    ("%s: m %p not available", __func__, m));
 				len -= m->m_len;
 				uio->uio_resid -= m->m_len;
 				sbfree(sb, m);
 				n = m;
 			}
 			n->m_next = NULL;
 			sb->sb_mb = m;
 			sb->sb_lastrecord = sb->sb_mb;
 			if (sb->sb_mb == NULL)
 				SB_EMPTY_FIXUP(sb);
 		}
 		/* Copy the remainder. */
 		if (len > 0) {
 			KASSERT(sb->sb_mb != NULL,
 			    ("%s: len > 0 && sb->sb_mb empty", __func__));
 
 			m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
 			if (m == NULL)
 				len = 0;	/* Don't flush data from sockbuf. */
 			else
 				uio->uio_resid -= len;
 			if (*mp0 != NULL)
 				m_cat(*mp0, m);
 			else
 				*mp0 = m;
 			if (*mp0 == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 		}
 	} else {
 		/* NB: Must unlock socket buffer as uiomove may sleep. */
 		SOCKBUF_UNLOCK(sb);
 		error = m_mbuftouio(uio, sb->sb_mb, len);
 		SOCKBUF_LOCK(sb);
 		if (error)
 			goto out;
 	}
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 
 	/*
 	 * Remove the delivered data from the socket buffer unless we
 	 * were only peeking.
 	 */
 	if (!(flags & MSG_PEEK)) {
 		if (len > 0)
 			sbdrop_locked(sb, len);
 
 		/* Notify protocol that we drained some data. */
 		if ((so->so_proto->pr_flags & PR_WANTRCVD) &&
 		    (((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
 		     !(flags & MSG_SOCALLBCK))) {
 			SOCKBUF_UNLOCK(sb);
 			VNET_SO_ASSERT(so);
 			(*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
 			SOCKBUF_LOCK(sb);
 		}
 	}
 
 	/*
 	 * For MSG_WAITALL we may have to loop again and wait for
 	 * more data to come in.
 	 */
 	if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
 		goto restart;
 out:
 	SOCKBUF_LOCK_ASSERT(sb);
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 	SOCKBUF_UNLOCK(sb);
 	sbunlock(sb);
 	return (error);
 }
 
 /*
  * Optimized version of soreceive() for simple datagram cases from userspace.
  * Unlike in the stream case, we're able to drop a datagram if copyout()
  * fails, and because we handle datagrams atomically, we don't need to use a
  * sleep lock to prevent I/O interlacing.
  */
 int
 soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct mbuf *m, *m2;
 	int flags, error;
 	ssize_t len;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 
 	/*
 	 * For any complicated cases, fall back to the full
 	 * soreceive_generic().
 	 */
 	if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB))
 		return (soreceive_generic(so, psa, uio, mp0, controlp,
 		    flagsp));
 
 	/*
 	 * Enforce restrictions on use.
 	 */
 	KASSERT((pr->pr_flags & PR_WANTRCVD) == 0,
 	    ("soreceive_dgram: wantrcvd"));
 	KASSERT(pr->pr_flags & PR_ATOMIC, ("soreceive_dgram: !atomic"));
 	KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0,
 	    ("soreceive_dgram: SBS_RCVATMARK"));
 	KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0,
 	    ("soreceive_dgram: P_CONNREQUIRED"));
 
 	/*
 	 * Loop blocking while waiting for a datagram.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	while ((m = so->so_rcv.sb_mb) == NULL) {
 		KASSERT(sbavail(&so->so_rcv) == 0,
 		    ("soreceive_dgram: sb_mb NULL but sbavail %u",
 		    sbavail(&so->so_rcv)));
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (error);
 		}
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE ||
 		    uio->uio_resid == 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (0);
 		}
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (EWOULDBLOCK);
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		if (error) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (error);
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	nextrecord = m->m_nextpkt;
 	if (nextrecord == NULL) {
 		KASSERT(so->so_rcv.sb_lastrecord == m,
 		    ("soreceive_dgram: lastrecord != m"));
 	}
 
 	KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord,
 	    ("soreceive_dgram: m_nextpkt != nextrecord"));
 
 	/*
 	 * Pull 'm' and its chain off the front of the packet queue.
 	 */
 	so->so_rcv.sb_mb = NULL;
 	sockbuf_pushsync(&so->so_rcv, nextrecord);
 
 	/*
 	 * Walk 'm's chain and free that many bytes from the socket buffer.
 	 */
 	for (m2 = m; m2 != NULL; m2 = m2->m_next)
 		sbfree(&so->so_rcv, m2);
 
 	/*
 	 * Do a few last checks before we let go of the lock.
 	 */
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
 		if (psa != NULL)
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
 			    M_NOWAIT);
 		m = m_free(m);
 	}
 	if (m == NULL) {
 		/* XXXRW: Can this happen? */
 		return (0);
 	}
 
 	/*
 	 * Packet to copyout() is now in 'm' and it is disconnected from the
 	 * queue.
 	 *
 	 * Process one or more MT_CONTROL mbufs present before any data mbufs
 	 * in the first mbuf chain on the socket buffer.  We call into the
 	 * protocol to perform externalization (or freeing if controlp ==
 	 * NULL). In some cases there can be only MT_CONTROL mbufs without
 	 * MT_DATA mbufs.
 	 */
 	if (m->m_type == MT_CONTROL) {
 		struct mbuf *cm = NULL, *cmn;
 		struct mbuf **cme = &cm;
 
 		do {
 			m2 = m->m_next;
 			m->m_next = NULL;
 			*cme = m;
 			cme = &(*cme)->m_next;
 			m = m2;
 		} while (m != NULL && m->m_type == MT_CONTROL);
 		while (cm != NULL) {
 			cmn = cm->m_next;
 			cm->m_next = NULL;
 			if (pr->pr_domain->dom_externalize != NULL) {
 				error = (*pr->pr_domain->dom_externalize)
 				    (cm, controlp, flags);
 			} else if (controlp != NULL)
 				*controlp = cm;
 			else
 				m_freem(cm);
 			if (controlp != NULL) {
 				while (*controlp != NULL)
 					controlp = &(*controlp)->m_next;
 			}
 			cm = cmn;
 		}
 	}
 	KASSERT(m == NULL || m->m_type == MT_DATA,
 	    ("soreceive_dgram: !data"));
 	while (m != NULL && uio->uio_resid > 0) {
 		len = uio->uio_resid;
 		if (len > m->m_len)
 			len = m->m_len;
 		error = uiomove(mtod(m, char *), (int)len, uio);
 		if (error) {
 			m_freem(m);
 			return (error);
 		}
 		if (len == m->m_len)
 			m = m_free(m);
 		else {
 			m->m_data += len;
 			m->m_len -= len;
 		}
 	}
 	if (m != NULL) {
 		flags |= MSG_TRUNC;
 		m_freem(m);
 	}
 	if (flagsp != NULL)
 		*flagsp |= flags;
 	return (0);
 }
 
 int
 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	if (!SOLISTENING(so))
 		error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio,
 		    mp0, controlp, flagsp));
 	else
 		error = ENOTCONN;
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soshutdown(struct socket *so, int how)
 {
 	struct protosw *pr = so->so_proto;
 	int error, soerror_enotconn;
 
 	if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
 		return (EINVAL);
 
 	soerror_enotconn = 0;
 	if ((so->so_state &
 	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
 		/*
 		 * POSIX mandates us to return ENOTCONN when shutdown(2) is
 		 * invoked on a datagram sockets, however historically we would
 		 * actually tear socket down. This is known to be leveraged by
 		 * some applications to unblock process waiting in recvXXX(2)
 		 * by other process that it shares that socket with. Try to meet
 		 * both backward-compatibility and POSIX requirements by forcing
 		 * ENOTCONN but still asking protocol to perform pru_shutdown().
 		 */
 		if (so->so_type != SOCK_DGRAM && !SOLISTENING(so))
 			return (ENOTCONN);
 		soerror_enotconn = 1;
 	}
 
 	if (SOLISTENING(so)) {
 		if (how != SHUT_WR) {
 			SOLISTEN_LOCK(so);
 			so->so_error = ECONNABORTED;
 			solisten_wakeup(so);	/* unlocks so */
 		}
 		goto done;
 	}
 
 	CURVNET_SET(so->so_vnet);
 	if (pr->pr_usrreqs->pru_flush != NULL)
 		(*pr->pr_usrreqs->pru_flush)(so, how);
 	if (how != SHUT_WR)
 		sorflush(so);
 	if (how != SHUT_RD) {
 		error = (*pr->pr_usrreqs->pru_shutdown)(so);
 		wakeup(&so->so_timeo);
 		CURVNET_RESTORE();
 		return ((error == 0 && soerror_enotconn) ? ENOTCONN : error);
 	}
 	wakeup(&so->so_timeo);
 	CURVNET_RESTORE();
 
 done:
 	return (soerror_enotconn ? ENOTCONN : 0);
 }
 
 void
 sorflush(struct socket *so)
 {
 	struct sockbuf *sb = &so->so_rcv;
 	struct protosw *pr = so->so_proto;
 	struct socket aso;
 
 	VNET_SO_ASSERT(so);
 
 	/*
 	 * In order to avoid calling dom_dispose with the socket buffer mutex
 	 * held, and in order to generally avoid holding the lock for a long
 	 * time, we make a copy of the socket buffer and clear the original
 	 * (except locks, state).  The new socket buffer copy won't have
 	 * initialized locks so we can only call routines that won't use or
 	 * assert those locks.
 	 *
 	 * Dislodge threads currently blocked in receive and wait to acquire
 	 * a lock against other simultaneous readers before clearing the
 	 * socket buffer.  Don't let our acquire be interrupted by a signal
 	 * despite any existing socket disposition on interruptable waiting.
 	 */
 	socantrcvmore(so);
 	(void) sblock(sb, SBL_WAIT | SBL_NOINTR);
 
 	/*
 	 * Invalidate/clear most of the sockbuf structure, but leave selinfo
 	 * and mutex data unchanged.
 	 */
 	SOCKBUF_LOCK(sb);
 	bzero(&aso, sizeof(aso));
 	aso.so_pcb = so->so_pcb;
 	bcopy(&sb->sb_startzero, &aso.so_rcv.sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 	bzero(&sb->sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 	SOCKBUF_UNLOCK(sb);
 	sbunlock(sb);
 
 	/*
 	 * Dispose of special rights and flush the copied socket.  Don't call
 	 * any unsafe routines (that rely on locks being initialized) on aso.
 	 */
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 		(*pr->pr_domain->dom_dispose)(&aso);
 	sbrelease_internal(&aso.so_rcv, so);
 }
 
 /*
  * Wrapper for Socket established helper hook.
  * Parameters: socket, context of the hook point, hook id.
  */
 static int inline
 hhook_run_socket(struct socket *so, void *hctx, int32_t h_id)
 {
 	struct socket_hhook_data hhook_data = {
 		.so = so,
 		.hctx = hctx,
 		.m = NULL,
 		.status = 0
 	};
 
 	CURVNET_SET(so->so_vnet);
 	HHOOKS_RUN_IF(V_socket_hhh[h_id], &hhook_data, &so->osd);
 	CURVNET_RESTORE();
 
 	/* Ugly but needed, since hhooks return void for now */
 	return (hhook_data.status);
 }
 
 /*
  * Perhaps this routine, and sooptcopyout(), below, ought to come in an
  * additional variant to handle the case where the option value needs to be
  * some kind of integer, but not a specific size.  In addition to their use
  * here, these functions are also called by the protocol-level pr_ctloutput()
  * routines.
  */
 int
 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
 {
 	size_t	valsize;
 
 	/*
 	 * If the user gives us more than we wanted, we ignore it, but if we
 	 * don't get the minimum length the caller wants, we return EINVAL.
 	 * On success, sopt->sopt_valsize is set to however much we actually
 	 * retrieved.
 	 */
 	if ((valsize = sopt->sopt_valsize) < minlen)
 		return EINVAL;
 	if (valsize > len)
 		sopt->sopt_valsize = valsize = len;
 
 	if (sopt->sopt_td != NULL)
 		return (copyin(sopt->sopt_val, buf, valsize));
 
 	bcopy(sopt->sopt_val, buf, valsize);
 	return (0);
 }
 
 /*
  * Kernel version of setsockopt(2).
  *
  * XXX: optlen is size_t, not socklen_t
  */
 int
 so_setsockopt(struct socket *so, int level, int optname, void *optval,
     size_t optlen)
 {
 	struct sockopt sopt;
 
 	sopt.sopt_level = level;
 	sopt.sopt_name = optname;
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_val = optval;
 	sopt.sopt_valsize = optlen;
 	sopt.sopt_td = NULL;
 	return (sosetopt(so, &sopt));
 }
 
 int
 sosetopt(struct socket *so, struct sockopt *sopt)
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 	sbintime_t val;
 	uint32_t val32;
 #ifdef MAC
 	struct mac extmac;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto->pr_ctloutput != NULL)
 			error = (*so->so_proto->pr_ctloutput)(so, sopt);
 		else
 			error = ENOPROTOOPT;
 	} else {
 		switch (sopt->sopt_name) {
 		case SO_ACCEPTFILTER:
 			error = accept_filt_setopt(so, sopt);
 			if (error)
 				goto bad;
 			break;
 
 		case SO_LINGER:
 			error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
 			if (error)
 				goto bad;
 			if (l.l_linger < 0 ||
 			    l.l_linger > USHRT_MAX ||
 			    l.l_linger > (INT_MAX / hz)) {
 				error = EDOM;
 				goto bad;
 			}
 			SOCK_LOCK(so);
 			so->so_linger = l.l_linger;
 			if (l.l_onoff)
 				so->so_options |= SO_LINGER;
 			else
 				so->so_options &= ~SO_LINGER;
 			SOCK_UNLOCK(so);
 			break;
 
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_DONTROUTE:
 		case SO_USELOOPBACK:
 		case SO_BROADCAST:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_REUSEPORT_LB:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
 		case SO_NO_DDP:
 		case SO_NO_OFFLOAD:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 			SOCK_LOCK(so);
 			if (optval)
 				so->so_options |= sopt->sopt_name;
 			else
 				so->so_options &= ~sopt->sopt_name;
 			SOCK_UNLOCK(so);
 			break;
 
 		case SO_SETFIB:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 
 			if (optval < 0 || optval >= rt_numfibs) {
 				error = EINVAL;
 				goto bad;
 			}
 			if (((so->so_proto->pr_domain->dom_family == PF_INET) ||
 			   (so->so_proto->pr_domain->dom_family == PF_INET6) ||
 			   (so->so_proto->pr_domain->dom_family == PF_ROUTE)))
 				so->so_fibnum = optval;
 			else
 				so->so_fibnum = 0;
 			break;
 
 		case SO_USER_COOKIE:
 			error = sooptcopyin(sopt, &val32, sizeof val32,
 			    sizeof val32);
 			if (error)
 				goto bad;
 			so->so_user_cookie = val32;
 			break;
 
 		case SO_SNDBUF:
 		case SO_RCVBUF:
 		case SO_SNDLOWAT:
 		case SO_RCVLOWAT:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 
 			/*
 			 * Values < 1 make no sense for any of these options,
 			 * so disallow them.
 			 */
 			if (optval < 1) {
 				error = EINVAL;
 				goto bad;
 			}
 
 			error = sbsetopt(so, sopt->sopt_name, optval);
 			break;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 #ifdef COMPAT_FREEBSD32
 			if (SV_CURPROC_FLAG(SV_ILP32)) {
 				struct timeval32 tv32;
 
 				error = sooptcopyin(sopt, &tv32, sizeof tv32,
 				    sizeof tv32);
 				CP(tv32, tv, tv_sec);
 				CP(tv32, tv, tv_usec);
 			} else
 #endif
 				error = sooptcopyin(sopt, &tv, sizeof tv,
 				    sizeof tv);
 			if (error)
 				goto bad;
 			if (tv.tv_sec < 0 || tv.tv_usec < 0 ||
 			    tv.tv_usec >= 1000000) {
 				error = EDOM;
 				goto bad;
 			}
 			if (tv.tv_sec > INT32_MAX)
 				val = SBT_MAX;
 			else
 				val = tvtosbt(tv);
 			switch (sopt->sopt_name) {
 			case SO_SNDTIMEO:
 				so->so_snd.sb_timeo = val;
 				break;
 			case SO_RCVTIMEO:
 				so->so_rcv.sb_timeo = val;
 				break;
 			}
 			break;
 
 		case SO_LABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof extmac,
 			    sizeof extmac);
 			if (error)
 				goto bad;
 			error = mac_setsockopt_label(sopt->sopt_td->td_ucred,
 			    so, &extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_TS_CLOCK:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 			if (optval < 0 || optval > SO_TS_CLOCK_MAX) {
 				error = EINVAL;
 				goto bad;
 			}
 			so->so_ts_clock = optval;
 			break;
 
 		case SO_MAX_PACING_RATE:
 			error = sooptcopyin(sopt, &val32, sizeof(val32),
 			    sizeof(val32));
 			if (error)
 				goto bad;
 			so->so_max_pacing_rate = val32;
 			break;
 
 		default:
 			if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
 				error = hhook_run_socket(so, sopt,
 				    HHOOK_SOCKET_OPT);
 			else
 				error = ENOPROTOOPT;
 			break;
 		}
 		if (error == 0 && so->so_proto->pr_ctloutput != NULL)
 			(void)(*so->so_proto->pr_ctloutput)(so, sopt);
 	}
 bad:
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * Helper routine for getsockopt.
  */
 int
 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len)
 {
 	int	error;
 	size_t	valsize;
 
 	error = 0;
 
 	/*
 	 * Documented get behavior is that we always return a value, possibly
 	 * truncated to fit in the user's buffer.  Traditional behavior is
 	 * that we always tell the user precisely how much we copied, rather
 	 * than something useful like the total amount we had available for
 	 * her.  Note that this interface is not idempotent; the entire
 	 * answer must be generated ahead of time.
 	 */
 	valsize = min(len, sopt->sopt_valsize);
 	sopt->sopt_valsize = valsize;
 	if (sopt->sopt_val != NULL) {
 		if (sopt->sopt_td != NULL)
 			error = copyout(buf, sopt->sopt_val, valsize);
 		else
 			bcopy(buf, sopt->sopt_val, valsize);
 	}
 	return (error);
 }
 
 int
 sogetopt(struct socket *so, struct sockopt *sopt)
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 #ifdef MAC
 	struct mac extmac;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto->pr_ctloutput != NULL)
 			error = (*so->so_proto->pr_ctloutput)(so, sopt);
 		else
 			error = ENOPROTOOPT;
 		CURVNET_RESTORE();
 		return (error);
 	} else {
 		switch (sopt->sopt_name) {
 		case SO_ACCEPTFILTER:
 			error = accept_filt_getopt(so, sopt);
 			break;
 
 		case SO_LINGER:
 			SOCK_LOCK(so);
 			l.l_onoff = so->so_options & SO_LINGER;
 			l.l_linger = so->so_linger;
 			SOCK_UNLOCK(so);
 			error = sooptcopyout(sopt, &l, sizeof l);
 			break;
 
 		case SO_USELOOPBACK:
 		case SO_DONTROUTE:
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_REUSEPORT_LB:
 		case SO_BROADCAST:
 		case SO_OOBINLINE:
 		case SO_ACCEPTCONN:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
 		case SO_NO_DDP:
 		case SO_NO_OFFLOAD:
 			optval = so->so_options & sopt->sopt_name;
 integer:
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case SO_DOMAIN:
 			optval = so->so_proto->pr_domain->dom_family;
 			goto integer;
 
 		case SO_TYPE:
 			optval = so->so_type;
 			goto integer;
 
 		case SO_PROTOCOL:
 			optval = so->so_proto->pr_protocol;
 			goto integer;
 
 		case SO_ERROR:
 			SOCK_LOCK(so);
 			optval = so->so_error;
 			so->so_error = 0;
 			SOCK_UNLOCK(so);
 			goto integer;
 
 		case SO_SNDBUF:
 			optval = SOLISTENING(so) ? so->sol_sbsnd_hiwat :
 			    so->so_snd.sb_hiwat;
 			goto integer;
 
 		case SO_RCVBUF:
 			optval = SOLISTENING(so) ? so->sol_sbrcv_hiwat :
 			    so->so_rcv.sb_hiwat;
 			goto integer;
 
 		case SO_SNDLOWAT:
 			optval = SOLISTENING(so) ? so->sol_sbsnd_lowat :
 			    so->so_snd.sb_lowat;
 			goto integer;
 
 		case SO_RCVLOWAT:
 			optval = SOLISTENING(so) ? so->sol_sbrcv_lowat :
 			    so->so_rcv.sb_lowat;
 			goto integer;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 			tv = sbttotv(sopt->sopt_name == SO_SNDTIMEO ?
 			    so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 #ifdef COMPAT_FREEBSD32
 			if (SV_CURPROC_FLAG(SV_ILP32)) {
 				struct timeval32 tv32;
 
 				CP(tv, tv32, tv_sec);
 				CP(tv, tv32, tv_usec);
 				error = sooptcopyout(sopt, &tv32, sizeof tv32);
 			} else
 #endif
 				error = sooptcopyout(sopt, &tv, sizeof tv);
 			break;
 
 		case SO_LABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 			    sizeof(extmac));
 			if (error)
 				goto bad;
 			error = mac_getsockopt_label(sopt->sopt_td->td_ucred,
 			    so, &extmac);
 			if (error)
 				goto bad;
 			error = sooptcopyout(sopt, &extmac, sizeof extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_PEERLABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 			    sizeof(extmac));
 			if (error)
 				goto bad;
 			error = mac_getsockopt_peerlabel(
 			    sopt->sopt_td->td_ucred, so, &extmac);
 			if (error)
 				goto bad;
 			error = sooptcopyout(sopt, &extmac, sizeof extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_LISTENQLIMIT:
 			optval = SOLISTENING(so) ? so->sol_qlimit : 0;
 			goto integer;
 
 		case SO_LISTENQLEN:
 			optval = SOLISTENING(so) ? so->sol_qlen : 0;
 			goto integer;
 
 		case SO_LISTENINCQLEN:
 			optval = SOLISTENING(so) ? so->sol_incqlen : 0;
 			goto integer;
 
 		case SO_TS_CLOCK:
 			optval = so->so_ts_clock;
 			goto integer;
 
 		case SO_MAX_PACING_RATE:
 			optval = so->so_max_pacing_rate;
 			goto integer;
 
 		default:
 			if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
 				error = hhook_run_socket(so, sopt,
 				    HHOOK_SOCKET_OPT);
 			else
 				error = ENOPROTOOPT;
 			break;
 		}
 	}
 #ifdef MAC
 bad:
 #endif
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 {
 	struct mbuf *m, *m_prev;
 	int sopt_size = sopt->sopt_valsize;
 
 	MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return ENOBUFS;
 	if (sopt_size > MLEN) {
 		MCLGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			return ENOBUFS;
 		}
 		m->m_len = min(MCLBYTES, sopt_size);
 	} else {
 		m->m_len = min(MLEN, sopt_size);
 	}
 	sopt_size -= m->m_len;
 	*mp = m;
 	m_prev = m;
 
 	while (sopt_size) {
 		MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			m_freem(*mp);
 			return ENOBUFS;
 		}
 		if (sopt_size > MLEN) {
 			MCLGET(m, sopt->sopt_td != NULL ? M_WAITOK :
 			    M_NOWAIT);
 			if ((m->m_flags & M_EXT) == 0) {
 				m_freem(m);
 				m_freem(*mp);
 				return ENOBUFS;
 			}
 			m->m_len = min(MCLBYTES, sopt_size);
 		} else {
 			m->m_len = min(MLEN, sopt_size);
 		}
 		sopt_size -= m->m_len;
 		m_prev->m_next = m;
 		m_prev = m;
 	}
 	return (0);
 }
 
 int
 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 
 	if (sopt->sopt_val == NULL)
 		return (0);
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_td != NULL) {
 			int error;
 
 			error = copyin(sopt->sopt_val, mtod(m, char *),
 			    m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
 		sopt->sopt_valsize -= m->m_len;
 		sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 		m = m->m_next;
 	}
 	if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
 		panic("ip6_sooptmcopyin");
 	return (0);
 }
 
 int
 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 	size_t valsize = 0;
 
 	if (sopt->sopt_val == NULL)
 		return (0);
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_td != NULL) {
 			int error;
 
 			error = copyout(mtod(m, char *), sopt->sopt_val,
 			    m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
 		sopt->sopt_valsize -= m->m_len;
 		sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 		valsize += m->m_len;
 		m = m->m_next;
 	}
 	if (m != NULL) {
 		/* enough soopt buffer should be given from user-land */
 		m_freem(m0);
 		return(EINVAL);
 	}
 	sopt->sopt_valsize = valsize;
 	return (0);
 }
 
 /*
  * sohasoutofband(): protocol notifies socket layer of the arrival of new
  * out-of-band data, which will then notify socket consumers.
  */
 void
 sohasoutofband(struct socket *so)
 {
 
 	if (so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGURG, 0);
 	selwakeuppri(&so->so_rdsel, PSOCK);
 }
 
 int
 sopoll(struct socket *so, int events, struct ucred *active_cred,
     struct thread *td)
 {
 
 	/*
 	 * We do not need to set or assert curvnet as long as everyone uses
 	 * sopoll_generic().
 	 */
 	return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred,
 	    td));
 }
 
 int
 sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	int revents;
 
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		if (!(events & (POLLIN | POLLRDNORM)))
 			revents = 0;
 		else if (!TAILQ_EMPTY(&so->sol_comp))
 			revents = events & (POLLIN | POLLRDNORM);
 		else if ((events & POLLINIGNEOF) == 0 && so->so_error)
 			revents = (events & (POLLIN | POLLRDNORM)) | POLLHUP;
 		else {
 			selrecord(td, &so->so_rdsel);
 			revents = 0;
 		}
 	} else {
 		revents = 0;
 		SOCKBUF_LOCK(&so->so_snd);
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (events & (POLLIN | POLLRDNORM))
 			if (soreadabledata(so))
 				revents |= events & (POLLIN | POLLRDNORM);
 		if (events & (POLLOUT | POLLWRNORM))
 			if (sowriteable(so))
 				revents |= events & (POLLOUT | POLLWRNORM);
 		if (events & (POLLPRI | POLLRDBAND))
 			if (so->so_oobmark ||
 			    (so->so_rcv.sb_state & SBS_RCVATMARK))
 				revents |= events & (POLLPRI | POLLRDBAND);
 		if ((events & POLLINIGNEOF) == 0) {
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 				revents |= events & (POLLIN | POLLRDNORM);
 				if (so->so_snd.sb_state & SBS_CANTSENDMORE)
 					revents |= POLLHUP;
 			}
 		}
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 			revents |= events & POLLRDHUP;
 		if (revents == 0) {
 			if (events &
 			    (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND | POLLRDHUP)) {
 				selrecord(td, &so->so_rdsel);
 				so->so_rcv.sb_flags |= SB_SEL;
 			}
 			if (events & (POLLOUT | POLLWRNORM)) {
 				selrecord(td, &so->so_wrsel);
 				so->so_snd.sb_flags |= SB_SEL;
 			}
 		}
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		SOCKBUF_UNLOCK(&so->so_snd);
 	}
 	SOCK_UNLOCK(so);
 	return (revents);
 }
 
 int
 soo_kqfilter(struct file *fp, struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 	struct sockbuf *sb;
 	struct knlist *knl;
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		kn->kn_fop = &soread_filtops;
 		knl = &so->so_rdsel.si_note;
 		sb = &so->so_rcv;
 		break;
 	case EVFILT_WRITE:
 		kn->kn_fop = &sowrite_filtops;
 		knl = &so->so_wrsel.si_note;
 		sb = &so->so_snd;
 		break;
 	case EVFILT_EMPTY:
 		kn->kn_fop = &soempty_filtops;
 		knl = &so->so_wrsel.si_note;
 		sb = &so->so_snd;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		knlist_add(knl, kn, 1);
 	} else {
 		SOCKBUF_LOCK(sb);
 		knlist_add(knl, kn, 1);
 		sb->sb_flags |= SB_KNOTE;
 		SOCKBUF_UNLOCK(sb);
 	}
 	SOCK_UNLOCK(so);
 	return (0);
 }
 
 /*
  * Some routines that return EOPNOTSUPP for entry points that are not
  * supported by a protocol.  Fill in as needed.
  */
 int
 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_aio_queue_notsupp(struct socket *so, struct kaiocb *job)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_attach_notsupp(struct socket *so, int proto, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
     struct ifnet *ifp, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_disconnect_notsupp(struct socket *so)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_listen_notsupp(struct socket *so, int backlog, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvd_notsupp(struct socket *so, int flags)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *addr, struct mbuf *control, struct thread *td)
 {
 
 	if (control != NULL)
 		m_freem(control);
 	if ((flags & PRUS_NOTREADY) == 0)
 		m_freem(m);
 	return (EOPNOTSUPP);
 }
 
 int
 pru_ready_notsupp(struct socket *so, struct mbuf *m, int count)
 {
 
 	return (EOPNOTSUPP);
 }
 
 /*
  * This isn't really a ``null'' operation, but it's the default one and
  * doesn't do anything destructive.
  */
 int
 pru_sense_null(struct socket *so, struct stat *sb)
 {
 
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	return 0;
 }
 
 int
 pru_shutdown_notsupp(struct socket *so)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
     struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 static void
 filt_sordetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
 	so_rdknl_lock(so);
 	knlist_remove(&so->so_rdsel.si_note, kn, 1);
 	if (!SOLISTENING(so) && knlist_empty(&so->so_rdsel.si_note))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
 	so_rdknl_unlock(so);
 }
 
 /*ARGSUSED*/
 static int
 filt_soread(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 
 	if (SOLISTENING(so)) {
 		SOCK_LOCK_ASSERT(so);
 		kn->kn_data = so->sol_qlen;
 		if (so->so_error) {
 			kn->kn_flags |= EV_EOF;
 			kn->kn_fflags = so->so_error;
 			return (1);
 		}
 		return (!TAILQ_EMPTY(&so->sol_comp));
 	}
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
 	} else if (so->so_error)	/* temporary udp error */
 		return (1);
 
 	if (kn->kn_sfflags & NOTE_LOWAT) {
 		if (kn->kn_data >= kn->kn_sdata)
 			return (1);
 	} else if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat)
 		return (1);
 
 	/* This hook returning non-zero indicates an event, not error */
 	return (hhook_run_socket(so, NULL, HHOOK_FILT_SOREAD));
 }
 
 static void
 filt_sowdetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
 	so_wrknl_lock(so);
 	knlist_remove(&so->so_wrsel.si_note, kn, 1);
 	if (!SOLISTENING(so) && knlist_empty(&so->so_wrsel.si_note))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
 	so_wrknl_unlock(so);
 }
 
 /*ARGSUSED*/
 static int
 filt_sowrite(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 
 	if (SOLISTENING(so))
 		return (0);
 
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	kn->kn_data = sbspace(&so->so_snd);
 
 	hhook_run_socket(so, kn, HHOOK_FILT_SOWRITE);
 
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
 	} else if (so->so_error)	/* temporary udp error */
 		return (1);
 	else if (((so->so_state & SS_ISCONNECTED) == 0) &&
 	    (so->so_proto->pr_flags & PR_CONNREQUIRED))
 		return (0);
 	else if (kn->kn_sfflags & NOTE_LOWAT)
 		return (kn->kn_data >= kn->kn_sdata);
 	else
 		return (kn->kn_data >= so->so_snd.sb_lowat);
 }
 
 static int
 filt_soempty(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 
 	if (SOLISTENING(so))
 		return (1);
 
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	kn->kn_data = sbused(&so->so_snd);
 
 	if (kn->kn_data == 0)
 		return (1);
 	else
 		return (0);
 }
 
 int
 socheckuid(struct socket *so, uid_t uid)
 {
 
 	if (so == NULL)
 		return (EPERM);
 	if (so->so_cred->cr_uid != uid)
 		return (EPERM);
 	return (0);
 }
 
 /*
  * These functions are used by protocols to notify the socket layer (and its
  * consumers) of state changes in the sockets driven by protocol-side events.
  */
 
 /*
  * Procedures to manipulate state flags of socket and do appropriate wakeups.
  *
  * Normal sequence from the active (originating) side is that
  * soisconnecting() is called during processing of connect() call, resulting
  * in an eventual call to soisconnected() if/when the connection is
  * established.  When the connection is torn down soisdisconnecting() is
  * called during processing of disconnect() call, and soisdisconnected() is
  * called when the connection to the peer is totally severed.  The semantics
  * of these routines are such that connectionless protocols can call
  * soisconnected() and soisdisconnected() only, bypassing the in-progress
  * calls when setting up a ``connection'' takes no time.
  *
  * From the passive side, a socket is created with two queues of sockets:
  * so_incomp for connections in progress and so_comp for connections already
  * made and awaiting user acceptance.  As a protocol is preparing incoming
  * connections, it creates a socket structure queued on so_incomp by calling
  * sonewconn().  When the connection is established, soisconnected() is
  * called, and transfers the socket structure to so_comp, making it available
  * to accept().
  *
  * If a socket is closed with sockets on either so_incomp or so_comp, these
  * sockets are dropped.
  *
  * If higher-level protocols are implemented in the kernel, the wakeups done
  * here will sometimes cause software-interrupt process scheduling.
  */
 void
 soisconnecting(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISCONNECTING;
 	SOCK_UNLOCK(so);
 }
 
 void
 soisconnected(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 	so->so_state |= SS_ISCONNECTED;
 
 	if (so->so_qstate == SQ_INCOMP) {
 		struct socket *head = so->so_listen;
 		int ret;
 
 		KASSERT(head, ("%s: so %p on incomp of NULL", __func__, so));
 		/*
 		 * Promoting a socket from incomplete queue to complete, we
 		 * need to go through reverse order of locking.  We first do
 		 * trylock, and if that doesn't succeed, we go the hard way
 		 * leaving a reference and rechecking consistency after proper
 		 * locking.
 		 */
 		if (__predict_false(SOLISTEN_TRYLOCK(head) == 0)) {
 			soref(head);
 			SOCK_UNLOCK(so);
 			SOLISTEN_LOCK(head);
 			SOCK_LOCK(so);
 			if (__predict_false(head != so->so_listen)) {
 				/*
 				 * The socket went off the listen queue,
 				 * should be lost race to close(2) of sol.
 				 * The socket is about to soabort().
 				 */
 				SOCK_UNLOCK(so);
 				sorele(head);
 				return;
 			}
 			/* Not the last one, as so holds a ref. */
 			refcount_release(&head->so_count);
 		}
 again:
 		if ((so->so_options & SO_ACCEPTFILTER) == 0) {
 			TAILQ_REMOVE(&head->sol_incomp, so, so_list);
 			head->sol_incqlen--;
 			TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
 			head->sol_qlen++;
 			so->so_qstate = SQ_COMP;
 			SOCK_UNLOCK(so);
 			solisten_wakeup(head);	/* unlocks */
 		} else {
 			SOCKBUF_LOCK(&so->so_rcv);
 			soupcall_set(so, SO_RCV,
 			    head->sol_accept_filter->accf_callback,
 			    head->sol_accept_filter_arg);
 			so->so_options &= ~SO_ACCEPTFILTER;
 			ret = head->sol_accept_filter->accf_callback(so,
 			    head->sol_accept_filter_arg, M_NOWAIT);
 			if (ret == SU_ISCONNECTED) {
 				soupcall_clear(so, SO_RCV);
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto again;
 			}
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			SOCK_UNLOCK(so);
 			SOLISTEN_UNLOCK(head);
 		}
 		return;
 	}
 	SOCK_UNLOCK(so);
 	wakeup(&so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
 }
 
 void
 soisdisconnecting(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTING;
 	so->so_state |= SS_ISDISCONNECTING;
 
 	if (!SOLISTENING(so)) {
 		SOCKBUF_LOCK(&so->so_rcv);
 		socantrcvmore_locked(so);
 		SOCKBUF_LOCK(&so->so_snd);
 		socantsendmore_locked(so);
 	}
 	SOCK_UNLOCK(so);
 	wakeup(&so->so_timeo);
 }
 
 void
 soisdisconnected(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 
 	/*
 	 * There is at least one reader of so_state that does not
 	 * acquire socket lock, namely soreceive_generic().  Ensure
 	 * that it never sees all flags that track connection status
 	 * cleared, by ordering the update with a barrier semantic of
 	 * our release thread fence.
 	 */
 	so->so_state |= SS_ISDISCONNECTED;
 	atomic_thread_fence_rel();
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 
 	if (!SOLISTENING(so)) {
 		SOCK_UNLOCK(so);
 		SOCKBUF_LOCK(&so->so_rcv);
 		socantrcvmore_locked(so);
 		SOCKBUF_LOCK(&so->so_snd);
 		sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
 		socantsendmore_locked(so);
 	} else
 		SOCK_UNLOCK(so);
 	wakeup(&so->so_timeo);
 }
 
 /*
  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
  */
 struct sockaddr *
 sodupsockaddr(const struct sockaddr *sa, int mflags)
 {
 	struct sockaddr *sa2;
 
 	sa2 = malloc(sa->sa_len, M_SONAME, mflags);
 	if (sa2)
 		bcopy(sa, sa2, sa->sa_len);
 	return sa2;
 }
 
 /*
  * Register per-socket destructor.
  */
 void
 sodtor_set(struct socket *so, so_dtor_t *func)
 {
 
 	SOCK_LOCK_ASSERT(so);
 	so->so_dtor = func;
 }
 
 /*
  * Register per-socket buffer upcalls.
  */
 void
 soupcall_set(struct socket *so, int which, so_upcall_t func, void *arg)
 {
 	struct sockbuf *sb;
 
 	KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
 
 	switch (which) {
 	case SO_RCV:
 		sb = &so->so_rcv;
 		break;
 	case SO_SND:
 		sb = &so->so_snd;
 		break;
 	default:
 		panic("soupcall_set: bad which");
 	}
 	SOCKBUF_LOCK_ASSERT(sb);
 	sb->sb_upcall = func;
 	sb->sb_upcallarg = arg;
 	sb->sb_flags |= SB_UPCALL;
 }
 
 void
 soupcall_clear(struct socket *so, int which)
 {
 	struct sockbuf *sb;
 
 	KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
 
 	switch (which) {
 	case SO_RCV:
 		sb = &so->so_rcv;
 		break;
 	case SO_SND:
 		sb = &so->so_snd;
 		break;
 	default:
 		panic("soupcall_clear: bad which");
 	}
 	SOCKBUF_LOCK_ASSERT(sb);
 	KASSERT(sb->sb_upcall != NULL,
 	    ("%s: so %p no upcall to clear", __func__, so));
 	sb->sb_upcall = NULL;
 	sb->sb_upcallarg = NULL;
 	sb->sb_flags &= ~SB_UPCALL;
 }
 
 void
 solisten_upcall_set(struct socket *so, so_upcall_t func, void *arg)
 {
 
 	SOLISTEN_LOCK_ASSERT(so);
 	so->sol_upcall = func;
 	so->sol_upcallarg = arg;
 }
 
 static void
 so_rdknl_lock(void *arg)
 {
 	struct socket *so = arg;
 
 	if (SOLISTENING(so))
 		SOCK_LOCK(so);
 	else
 		SOCKBUF_LOCK(&so->so_rcv);
 }
 
 static void
 so_rdknl_unlock(void *arg)
 {
 	struct socket *so = arg;
 
 	if (SOLISTENING(so))
 		SOCK_UNLOCK(so);
 	else
 		SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 static void
 so_rdknl_assert_lock(void *arg, int what)
 {
 	struct socket *so = arg;
 
 	if (what == LA_LOCKED) {
 		if (SOLISTENING(so))
 			SOCK_LOCK_ASSERT(so);
 		else
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	} else {
 		if (SOLISTENING(so))
 			SOCK_UNLOCK_ASSERT(so);
 		else
 			SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
 	}
 }
 
 static void
 so_wrknl_lock(void *arg)
 {
 	struct socket *so = arg;
 
 	if (SOLISTENING(so))
 		SOCK_LOCK(so);
 	else
 		SOCKBUF_LOCK(&so->so_snd);
 }
 
 static void
 so_wrknl_unlock(void *arg)
 {
 	struct socket *so = arg;
 
 	if (SOLISTENING(so))
 		SOCK_UNLOCK(so);
 	else
 		SOCKBUF_UNLOCK(&so->so_snd);
 }
 
 static void
 so_wrknl_assert_lock(void *arg, int what)
 {
 	struct socket *so = arg;
 
 	if (what == LA_LOCKED) {
 		if (SOLISTENING(so))
 			SOCK_LOCK_ASSERT(so);
 		else
 			SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	} else {
 		if (SOLISTENING(so))
 			SOCK_UNLOCK_ASSERT(so);
 		else
 			SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
 	}
 }
 
 /*
  * Create an external-format (``xsocket'') structure using the information in
  * the kernel-format socket structure pointed to by so.  This is done to
  * reduce the spew of irrelevant information over this interface, to isolate
  * user code from changes in the kernel structure, and potentially to provide
  * information-hiding if we decide that some of this information should be
  * hidden from users.
  */
 void
 sotoxsocket(struct socket *so, struct xsocket *xso)
 {
 
 	bzero(xso, sizeof(*xso));
 	xso->xso_len = sizeof *xso;
 	xso->xso_so = (uintptr_t)so;
 	xso->so_type = so->so_type;
 	xso->so_options = so->so_options;
 	xso->so_linger = so->so_linger;
 	xso->so_state = so->so_state;
 	xso->so_pcb = (uintptr_t)so->so_pcb;
 	xso->xso_protocol = so->so_proto->pr_protocol;
 	xso->xso_family = so->so_proto->pr_domain->dom_family;
 	xso->so_timeo = so->so_timeo;
 	xso->so_error = so->so_error;
 	xso->so_uid = so->so_cred->cr_uid;
 	xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
 	if (SOLISTENING(so)) {
 		xso->so_qlen = so->sol_qlen;
 		xso->so_incqlen = so->sol_incqlen;
 		xso->so_qlimit = so->sol_qlimit;
 		xso->so_oobmark = 0;
 	} else {
 		xso->so_state |= so->so_qstate;
 		xso->so_qlen = xso->so_incqlen = xso->so_qlimit = 0;
 		xso->so_oobmark = so->so_oobmark;
 		sbtoxsockbuf(&so->so_snd, &xso->so_snd);
 		sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 	}
 }
 
 struct sockbuf *
 so_sockbuf_rcv(struct socket *so)
 {
 
 	return (&so->so_rcv);
 }
 
 struct sockbuf *
 so_sockbuf_snd(struct socket *so)
 {
 
 	return (&so->so_snd);
 }
 
 int
 so_state_get(const struct socket *so)
 {
 
 	return (so->so_state);
 }
 
 void
 so_state_set(struct socket *so, int val)
 {
 
 	so->so_state = val;
 }
 
 int
 so_options_get(const struct socket *so)
 {
 
 	return (so->so_options);
 }
 
 void
 so_options_set(struct socket *so, int val)
 {
 
 	so->so_options = val;
 }
 
 int
 so_error_get(const struct socket *so)
 {
 
 	return (so->so_error);
 }
 
 void
 so_error_set(struct socket *so, int val)
 {
 
 	so->so_error = val;
 }
 
 int
 so_linger_get(const struct socket *so)
 {
 
 	return (so->so_linger);
 }
 
 void
 so_linger_set(struct socket *so, int val)
 {
 
 	KASSERT(val >= 0 && val <= USHRT_MAX && val <= (INT_MAX / hz),
 	    ("%s: val %d out of range", __func__, val));
 
 	so->so_linger = val;
 }
 
 struct protosw *
 so_protosw_get(const struct socket *so)
 {
 
 	return (so->so_proto);
 }
 
 void
 so_protosw_set(struct socket *so, struct protosw *val)
 {
 
 	so->so_proto = val;
 }
 
 void
 so_sorwakeup(struct socket *so)
 {
 
 	sorwakeup(so);
 }
 
 void
 so_sowwakeup(struct socket *so)
 {
 
 	sowwakeup(so);
 }
 
 void
 so_sorwakeup_locked(struct socket *so)
 {
 
 	sorwakeup_locked(so);
 }
 
 void
 so_sowwakeup_locked(struct socket *so)
 {
 
 	sowwakeup_locked(so);
 }
 
 void
 so_lock(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 }
 
 void
 so_unlock(struct socket *so)
 {
 
 	SOCK_UNLOCK(so);
 }
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 23fae343924a..18505b54d603 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -1,1632 +1,1632 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/malloc.h>
 #include <sys/filedesc.h>
 #include <sys/proc.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/uio.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32_util.h>
 #endif
 
 #include <net/vnet.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
 
 static int accept1(struct thread *td, int s, struct sockaddr *uname,
 		   socklen_t *anamelen, int flags);
 static int getsockname1(struct thread *td, struct getsockname_args *uap,
 			int compat);
 static int getpeername1(struct thread *td, struct getpeername_args *uap,
 			int compat);
 static int sockargs(struct mbuf **, char *, socklen_t, int);
 
 /*
  * Convert a user file descriptor to a kernel file entry and check if required
  * capability rights are present.
  * If required copy of current set of capability rights is returned.
  * A reference on the file entry is held upon returning.
  */
 int
 getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
     struct file **fpp, u_int *fflagp, struct filecaps *havecapsp)
 {
 	struct file *fp;
 	int error;
 
 	error = fget_cap(td, fd, rightsp, &fp, havecapsp);
 	if (error != 0)
 		return (error);
 	if (fp->f_type != DTYPE_SOCKET) {
 		fdrop(fp, td);
 		if (havecapsp != NULL)
 			filecaps_free(havecapsp);
 		return (ENOTSOCK);
 	}
 	if (fflagp != NULL)
 		*fflagp = fp->f_flag;
 	*fpp = fp;
 	return (0);
 }
 
 /*
  * System call interface to the socket abstraction.
  */
 #if defined(COMPAT_43)
 #define COMPAT_OLDSOCK
 #endif
 
 int
 sys_socket(struct thread *td, struct socket_args *uap)
 {
 
 	return (kern_socket(td, uap->domain, uap->type, uap->protocol));
 }
 
 int
 kern_socket(struct thread *td, int domain, int type, int protocol)
 {
 	struct socket *so;
 	struct file *fp;
 	int fd, error, oflag, fflag;
 
 	AUDIT_ARG_SOCKET(domain, type, protocol);
 
 	oflag = 0;
 	fflag = 0;
 	if ((type & SOCK_CLOEXEC) != 0) {
 		type &= ~SOCK_CLOEXEC;
 		oflag |= O_CLOEXEC;
 	}
 	if ((type & SOCK_NONBLOCK) != 0) {
 		type &= ~SOCK_NONBLOCK;
 		fflag |= FNONBLOCK;
 	}
 
 #ifdef MAC
 	error = mac_socket_check_create(td->td_ucred, domain, type, protocol);
 	if (error != 0)
 		return (error);
 #endif
 	error = falloc(td, &fp, &fd, oflag);
 	if (error != 0)
 		return (error);
 	/* An extra reference on `fp' has been held for us by falloc(). */
 	error = socreate(domain, &so, type, protocol, td->td_ucred, td);
 	if (error != 0) {
 		fdclose(td, fp, fd);
 	} else {
 		finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops);
 		if ((fflag & FNONBLOCK) != 0)
 			(void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td);
 		td->td_retval[0] = fd;
 	}
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_bind(struct thread *td, struct bind_args *uap)
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_bindat(td, AT_FDCWD, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
 {
 	struct socket *so;
 	struct file *fp;
 	int error;
 
 #ifdef CAPABILITY_MODE
 	if (IN_CAPABILITY_MODE(td) && (dirfd == AT_FDCWD))
 		return (ECAPMODE);
 #endif
 
 	AUDIT_ARG_FD(fd);
 	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
 	error = getsock_cap(td, fd, &cap_bind_rights,
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(sa);
 #endif
 #ifdef MAC
 	error = mac_socket_check_bind(td->td_ucred, so, sa);
 	if (error == 0) {
 #endif
 		if (dirfd == AT_FDCWD)
 			error = sobind(so, sa, td);
 		else
 			error = sobindat(dirfd, so, sa, td);
 #ifdef MAC
 	}
 #endif
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_bindat(struct thread *td, struct bindat_args *uap)
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_bindat(td, uap->fd, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 sys_listen(struct thread *td, struct listen_args *uap)
 {
 
 	return (kern_listen(td, uap->s, uap->backlog));
 }
 
 int
 kern_listen(struct thread *td, int s, int backlog)
 {
 	struct socket *so;
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, &cap_listen_rights,
 	    &fp, NULL, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 #ifdef MAC
 		error = mac_socket_check_listen(td->td_ucred, so);
 		if (error == 0)
 #endif
 			error = solisten(so, backlog, td);
 		fdrop(fp, td);
 	}
 	return (error);
 }
 
 /*
  * accept1()
  */
 static int
 accept1(td, s, uname, anamelen, flags)
 	struct thread *td;
 	int s;
 	struct sockaddr *uname;
 	socklen_t *anamelen;
 	int flags;
 {
 	struct sockaddr *name;
 	socklen_t namelen;
 	struct file *fp;
 	int error;
 
 	if (uname == NULL)
 		return (kern_accept4(td, s, NULL, NULL, flags, NULL));
 
 	error = copyin(anamelen, &namelen, sizeof (namelen));
 	if (error != 0)
 		return (error);
 
 	error = kern_accept4(td, s, &name, &namelen, flags, &fp);
 
 	if (error != 0)
 		return (error);
 
 	if (error == 0 && uname != NULL) {
 #ifdef COMPAT_OLDSOCK
 		if (SV_PROC_FLAG(td->td_proc, SV_AOUT) &&
 		    (flags & ACCEPT4_COMPAT) != 0)
 			((struct osockaddr *)name)->sa_family =
 			    name->sa_family;
 #endif
 		error = copyout(name, uname, namelen);
 	}
 	if (error == 0)
 		error = copyout(&namelen, anamelen,
 		    sizeof(namelen));
 	if (error != 0)
 		fdclose(td, fp, td->td_retval[0]);
 	fdrop(fp, td);
 	free(name, M_SONAME);
 	return (error);
 }
 
 int
 kern_accept(struct thread *td, int s, struct sockaddr **name,
     socklen_t *namelen, struct file **fp)
 {
 	return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp));
 }
 
 int
 kern_accept4(struct thread *td, int s, struct sockaddr **name,
     socklen_t *namelen, int flags, struct file **fp)
 {
 	struct file *headfp, *nfp = NULL;
 	struct sockaddr *sa = NULL;
 	struct socket *head, *so;
 	struct filecaps fcaps;
 	u_int fflag;
 	pid_t pgid;
 	int error, fd, tmp;
 
 	if (name != NULL)
 		*name = NULL;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, &cap_accept_rights,
 	    &headfp, &fflag, &fcaps);
 	if (error != 0)
 		return (error);
 	head = headfp->f_data;
-	if ((head->so_options & SO_ACCEPTCONN) == 0) {
+	if (!SOLISTENING(head)) {
 		error = EINVAL;
 		goto done;
 	}
 #ifdef MAC
 	error = mac_socket_check_accept(td->td_ucred, head);
 	if (error != 0)
 		goto done;
 #endif
 	error = falloc_caps(td, &nfp, &fd,
 	    (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0, &fcaps);
 	if (error != 0)
 		goto done;
 	SOCK_LOCK(head);
 	if (!SOLISTENING(head)) {
 		SOCK_UNLOCK(head);
 		error = EINVAL;
 		goto noconnection;
 	}
 
 	error = solisten_dequeue(head, &so, flags);
 	if (error != 0)
 		goto noconnection;
 
 	/* An extra reference on `nfp' has been held for us by falloc(). */
 	td->td_retval[0] = fd;
 
 	/* Connection has been removed from the listen queue. */
 	KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
 
 	if (flags & ACCEPT4_INHERIT) {
 		pgid = fgetown(&head->so_sigio);
 		if (pgid != 0)
 			fsetown(pgid, &so->so_sigio);
 	} else {
 		fflag &= ~(FNONBLOCK | FASYNC);
 		if (flags & SOCK_NONBLOCK)
 			fflag |= FNONBLOCK;
 	}
 
 	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 	/* Sync socket nonblocking/async state with file flags */
 	tmp = fflag & FNONBLOCK;
 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
 	tmp = fflag & FASYNC;
 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
 	error = soaccept(so, &sa);
 	if (error != 0)
 		goto noconnection;
 	if (sa == NULL) {
 		if (name)
 			*namelen = 0;
 		goto done;
 	}
 	AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa);
 	if (name) {
 		/* check sa_len before it is destroyed */
 		if (*namelen > sa->sa_len)
 			*namelen = sa->sa_len;
 #ifdef KTRACE
 		if (KTRPOINT(td, KTR_STRUCT))
 			ktrsockaddr(sa);
 #endif
 		*name = sa;
 		sa = NULL;
 	}
 noconnection:
 	free(sa, M_SONAME);
 
 	/*
 	 * close the new descriptor, assuming someone hasn't ripped it
 	 * out from under us.
 	 */
 	if (error != 0)
 		fdclose(td, nfp, fd);
 
 	/*
 	 * Release explicitly held references before returning.  We return
 	 * a reference on nfp to the caller on success if they request it.
 	 */
 done:
 	if (nfp == NULL)
 		filecaps_free(&fcaps);
 	if (fp != NULL) {
 		if (error == 0) {
 			*fp = nfp;
 			nfp = NULL;
 		} else
 			*fp = NULL;
 	}
 	if (nfp != NULL)
 		fdrop(nfp, td);
 	fdrop(headfp, td);
 	return (error);
 }
 
 int
 sys_accept(td, uap)
 	struct thread *td;
 	struct accept_args *uap;
 {
 
 	return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT));
 }
 
 int
 sys_accept4(td, uap)
 	struct thread *td;
 	struct accept4_args *uap;
 {
 
 	if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return (EINVAL);
 
 	return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 oaccept(struct thread *td, struct oaccept_args *uap)
 {
 
 	return (accept1(td, uap->s, uap->name, uap->anamelen,
 	    ACCEPT4_INHERIT | ACCEPT4_COMPAT));
 }
 #endif /* COMPAT_OLDSOCK */
 
 int
 sys_connect(struct thread *td, struct connect_args *uap)
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_connectat(td, AT_FDCWD, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
 {
 	struct socket *so;
 	struct file *fp;
 	int error, interrupted = 0;
 
 #ifdef CAPABILITY_MODE
 	if (IN_CAPABILITY_MODE(td) && (dirfd == AT_FDCWD))
 		return (ECAPMODE);
 #endif
 
 	AUDIT_ARG_FD(fd);
 	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
 	error = getsock_cap(td, fd, &cap_connect_rights,
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 	if (so->so_state & SS_ISCONNECTING) {
 		error = EALREADY;
 		goto done1;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(sa);
 #endif
 #ifdef MAC
 	error = mac_socket_check_connect(td->td_ucred, so, sa);
 	if (error != 0)
 		goto bad;
 #endif
 	if (dirfd == AT_FDCWD)
 		error = soconnect(so, sa, td);
 	else
 		error = soconnectat(dirfd, so, sa, td);
 	if (error != 0)
 		goto bad;
 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
 		error = EINPROGRESS;
 		goto done1;
 	}
 	SOCK_LOCK(so);
 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
 		error = msleep(&so->so_timeo, &so->so_lock, PSOCK | PCATCH,
 		    "connec", 0);
 		if (error != 0) {
 			if (error == EINTR || error == ERESTART)
 				interrupted = 1;
 			break;
 		}
 	}
 	if (error == 0) {
 		error = so->so_error;
 		so->so_error = 0;
 	}
 	SOCK_UNLOCK(so);
 bad:
 	if (!interrupted)
 		so->so_state &= ~SS_ISCONNECTING;
 	if (error == ERESTART)
 		error = EINTR;
 done1:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_connectat(struct thread *td, struct connectat_args *uap)
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_connectat(td, uap->fd, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 kern_socketpair(struct thread *td, int domain, int type, int protocol,
     int *rsv)
 {
 	struct file *fp1, *fp2;
 	struct socket *so1, *so2;
 	int fd, error, oflag, fflag;
 
 	AUDIT_ARG_SOCKET(domain, type, protocol);
 
 	oflag = 0;
 	fflag = 0;
 	if ((type & SOCK_CLOEXEC) != 0) {
 		type &= ~SOCK_CLOEXEC;
 		oflag |= O_CLOEXEC;
 	}
 	if ((type & SOCK_NONBLOCK) != 0) {
 		type &= ~SOCK_NONBLOCK;
 		fflag |= FNONBLOCK;
 	}
 #ifdef MAC
 	/* We might want to have a separate check for socket pairs. */
 	error = mac_socket_check_create(td->td_ucred, domain, type,
 	    protocol);
 	if (error != 0)
 		return (error);
 #endif
 	error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
 	if (error != 0)
 		return (error);
 	error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
 	if (error != 0)
 		goto free1;
 	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
 	error = falloc(td, &fp1, &fd, oflag);
 	if (error != 0)
 		goto free2;
 	rsv[0] = fd;
 	fp1->f_data = so1;	/* so1 already has ref count */
 	error = falloc(td, &fp2, &fd, oflag);
 	if (error != 0)
 		goto free3;
 	fp2->f_data = so2;	/* so2 already has ref count */
 	rsv[1] = fd;
 	error = soconnect2(so1, so2);
 	if (error != 0)
 		goto free4;
 	if (type == SOCK_DGRAM) {
 		/*
 		 * Datagram socket connection is asymmetric.
 		 */
 		 error = soconnect2(so2, so1);
 		 if (error != 0)
 			goto free4;
 	} else if (so1->so_proto->pr_flags & PR_CONNREQUIRED) {
 		struct unpcb *unp, *unp2;
 		unp = sotounpcb(so1);
 		unp2 = sotounpcb(so2);
 		/* 
 		 * No need to lock the unps, because the sockets are brand-new.
 		 * No other threads can be using them yet
 		 */
 		unp_copy_peercred(td, unp, unp2, unp);
 	}
 	finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
 	    &socketops);
 	finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data,
 	    &socketops);
 	if ((fflag & FNONBLOCK) != 0) {
 		(void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td);
 		(void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td);
 	}
 	fdrop(fp1, td);
 	fdrop(fp2, td);
 	return (0);
 free4:
 	fdclose(td, fp2, rsv[1]);
 	fdrop(fp2, td);
 free3:
 	fdclose(td, fp1, rsv[0]);
 	fdrop(fp1, td);
 free2:
 	if (so2 != NULL)
 		(void)soclose(so2);
 free1:
 	if (so1 != NULL)
 		(void)soclose(so1);
 	return (error);
 }
 
 int
 sys_socketpair(struct thread *td, struct socketpair_args *uap)
 {
 	int error, sv[2];
 
 	error = kern_socketpair(td, uap->domain, uap->type,
 	    uap->protocol, sv);
 	if (error != 0)
 		return (error);
 	error = copyout(sv, uap->rsv, 2 * sizeof(int));
 	if (error != 0) {
 		(void)kern_close(td, sv[0]);
 		(void)kern_close(td, sv[1]);
 	}
 	return (error);
 }
 
 static int
 sendit(struct thread *td, int s, struct msghdr *mp, int flags)
 {
 	struct mbuf *control;
 	struct sockaddr *to;
 	int error;
 
 #ifdef CAPABILITY_MODE
 	if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
 		return (ECAPMODE);
 #endif
 
 	if (mp->msg_name != NULL) {
 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
 		if (error != 0) {
 			to = NULL;
 			goto bad;
 		}
 		mp->msg_name = to;
 	} else {
 		to = NULL;
 	}
 
 	if (mp->msg_control) {
 		if (mp->msg_controllen < sizeof(struct cmsghdr)
 #ifdef COMPAT_OLDSOCK
 		    && (mp->msg_flags != MSG_COMPAT ||
 		    !SV_PROC_FLAG(td->td_proc, SV_AOUT))
 #endif
 		) {
 			error = EINVAL;
 			goto bad;
 		}
 		error = sockargs(&control, mp->msg_control,
 		    mp->msg_controllen, MT_CONTROL);
 		if (error != 0)
 			goto bad;
 #ifdef COMPAT_OLDSOCK
 		if (mp->msg_flags == MSG_COMPAT &&
 		    SV_PROC_FLAG(td->td_proc, SV_AOUT)) {
 			struct cmsghdr *cm;
 
 			M_PREPEND(control, sizeof(*cm), M_WAITOK);
 			cm = mtod(control, struct cmsghdr *);
 			cm->cmsg_len = control->m_len;
 			cm->cmsg_level = SOL_SOCKET;
 			cm->cmsg_type = SCM_RIGHTS;
 		}
 #endif
 	} else {
 		control = NULL;
 	}
 
 	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
 
 bad:
 	free(to, M_SONAME);
 	return (error);
 }
 
 int
 kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
     struct mbuf *control, enum uio_seg segflg)
 {
 	struct file *fp;
 	struct uio auio;
 	struct iovec *iov;
 	struct socket *so;
 	cap_rights_t *rights;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 	ssize_t len;
 	int i, error;
 
 	AUDIT_ARG_FD(s);
 	rights = &cap_send_rights;
 	if (mp->msg_name != NULL) {
 		AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name);
 		rights = &cap_send_connect_rights;
 	}
 	error = getsock_cap(td, s, rights, &fp, NULL, NULL);
 	if (error != 0) {
 		m_freem(control);
 		return (error);
 	}
 	so = (struct socket *)fp->f_data;
 
 #ifdef KTRACE
 	if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(mp->msg_name);
 #endif
 #ifdef MAC
 	if (mp->msg_name != NULL) {
 		error = mac_socket_check_connect(td->td_ucred, so,
 		    mp->msg_name);
 		if (error != 0) {
 			m_freem(control);
 			goto bad;
 		}
 	}
 	error = mac_socket_check_send(td->td_ucred, so);
 	if (error != 0) {
 		m_freem(control);
 		goto bad;
 	}
 #endif
 
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
 	auio.uio_segflg = segflg;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_td = td;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0) {
 			error = EINVAL;
 			m_freem(control);
 			goto bad;
 		}
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO))
 		ktruio = cloneuio(&auio);
 #endif
 	len = auio.uio_resid;
 	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
 	if (error != 0) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 		/* Generation of SIGPIPE can be controlled per socket */
 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 		    !(flags & MSG_NOSIGNAL)) {
 			PROC_LOCK(td->td_proc);
 			tdsignal(td, SIGPIPE);
 			PROC_UNLOCK(td->td_proc);
 		}
 	}
 	if (error == 0)
 		td->td_retval[0] = len - auio.uio_resid;
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = td->td_retval[0];
 		ktrgenio(s, UIO_WRITE, ktruio, error);
 	}
 #endif
 bad:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_sendto(struct thread *td, struct sendto_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = __DECONST(void *, uap->to);
 	msg.msg_namelen = uap->tolen;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	msg.msg_control = 0;
 #ifdef COMPAT_OLDSOCK
 	if (SV_PROC_FLAG(td->td_proc, SV_AOUT))
 		msg.msg_flags = 0;
 #endif
 	aiov.iov_base = __DECONST(void *, uap->buf);
 	aiov.iov_len = uap->len;
 	return (sendit(td, uap->s, &msg, uap->flags));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 osend(struct thread *td, struct osend_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = 0;
 	msg.msg_namelen = 0;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = __DECONST(void *, uap->buf);
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = 0;
 	return (sendit(td, uap->s, &msg, uap->flags));
 }
 
 int
 osendmsg(struct thread *td, struct osendmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_iov = iov;
 	msg.msg_flags = MSG_COMPAT;
 	error = sendit(td, uap->s, &msg, uap->flags);
 	free(iov, M_IOV);
 	return (error);
 }
 #endif
 
 int
 sys_sendmsg(struct thread *td, struct sendmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (msg));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_iov = iov;
 #ifdef COMPAT_OLDSOCK
 	if (SV_PROC_FLAG(td->td_proc, SV_AOUT))
 		msg.msg_flags = 0;
 #endif
 	error = sendit(td, uap->s, &msg, uap->flags);
 	free(iov, M_IOV);
 	return (error);
 }
 
 int
 kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
     struct mbuf **controlp)
 {
 	struct uio auio;
 	struct iovec *iov;
 	struct mbuf *control, *m;
 	caddr_t ctlbuf;
 	struct file *fp;
 	struct socket *so;
 	struct sockaddr *fromsa = NULL;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 	ssize_t len;
 	int error, i;
 
 	if (controlp != NULL)
 		*controlp = NULL;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, &cap_recv_rights,
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 
 #ifdef MAC
 	error = mac_socket_check_receive(td->td_ucred, so);
 	if (error != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 #endif
 
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_rw = UIO_READ;
 	auio.uio_td = td;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0) {
 			fdrop(fp, td);
 			return (EINVAL);
 		}
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO))
 		ktruio = cloneuio(&auio);
 #endif
 	control = NULL;
 	len = auio.uio_resid;
 	error = soreceive(so, &fromsa, &auio, NULL,
 	    (mp->msg_control || controlp) ? &control : NULL,
 	    &mp->msg_flags);
 	if (error != 0) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 	}
 	if (fromsa != NULL)
 		AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa);
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = len - auio.uio_resid;
 		ktrgenio(s, UIO_READ, ktruio, error);
 	}
 #endif
 	if (error != 0)
 		goto out;
 	td->td_retval[0] = len - auio.uio_resid;
 	if (mp->msg_name) {
 		len = mp->msg_namelen;
 		if (len <= 0 || fromsa == NULL)
 			len = 0;
 		else {
 			/* save sa_len before it is destroyed by MSG_COMPAT */
 			len = MIN(len, fromsa->sa_len);
 #ifdef COMPAT_OLDSOCK
 			if ((mp->msg_flags & MSG_COMPAT) != 0 &&
 			    SV_PROC_FLAG(td->td_proc, SV_AOUT))
 				((struct osockaddr *)fromsa)->sa_family =
 				    fromsa->sa_family;
 #endif
 			if (fromseg == UIO_USERSPACE) {
 				error = copyout(fromsa, mp->msg_name,
 				    (unsigned)len);
 				if (error != 0)
 					goto out;
 			} else
 				bcopy(fromsa, mp->msg_name, len);
 		}
 		mp->msg_namelen = len;
 	}
 	if (mp->msg_control && controlp == NULL) {
 #ifdef COMPAT_OLDSOCK
 		/*
 		 * We assume that old recvmsg calls won't receive access
 		 * rights and other control info, esp. as control info
 		 * is always optional and those options didn't exist in 4.3.
 		 * If we receive rights, trim the cmsghdr; anything else
 		 * is tossed.
 		 */
 		if (control && (mp->msg_flags & MSG_COMPAT) != 0 &&
 		    SV_PROC_FLAG(td->td_proc, SV_AOUT)) {
 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
 			    SOL_SOCKET ||
 			    mtod(control, struct cmsghdr *)->cmsg_type !=
 			    SCM_RIGHTS) {
 				mp->msg_controllen = 0;
 				goto out;
 			}
 			control->m_len -= sizeof (struct cmsghdr);
 			control->m_data += sizeof (struct cmsghdr);
 		}
 #endif
 		ctlbuf = mp->msg_control;
 		len = mp->msg_controllen;
 		mp->msg_controllen = 0;
 		for (m = control; m != NULL && len >= m->m_len; m = m->m_next) {
 			if ((error = copyout(mtod(m, caddr_t), ctlbuf,
 			    m->m_len)) != 0)
 				goto out;
 
 			ctlbuf += m->m_len;
 			len -= m->m_len;
 			mp->msg_controllen += m->m_len;
 		}
 		if (m != NULL) {
 			mp->msg_flags |= MSG_CTRUNC;
 			m_dispose_extcontrolm(m);
 		}
 	}
 out:
 	fdrop(fp, td);
 #ifdef KTRACE
 	if (fromsa && KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(fromsa);
 #endif
 	free(fromsa, M_SONAME);
 
 	if (error == 0 && controlp != NULL)
 		*controlp = control;
 	else if (control != NULL) {
 		if (error != 0)
 			m_dispose_extcontrolm(control);
 		m_freem(control);
 	}
 
 	return (error);
 }
 
 static int
 recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp)
 {
 	int error;
 
 	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
 	if (error != 0)
 		return (error);
 	if (namelenp != NULL) {
 		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
 #ifdef COMPAT_OLDSOCK
 		if ((mp->msg_flags & MSG_COMPAT) != 0 &&
 		    SV_PROC_FLAG(td->td_proc, SV_AOUT))
 			error = 0;	/* old recvfrom didn't check */
 #endif
 	}
 	return (error);
 }
 
 int
 sys_recvfrom(struct thread *td, struct recvfrom_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 	int error;
 
 	if (uap->fromlenaddr) {
 		error = copyin(uap->fromlenaddr,
 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
 		if (error != 0)
 			goto done2;
 	} else {
 		msg.msg_namelen = 0;
 	}
 	msg.msg_name = uap->from;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = uap->flags;
 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 done2:
 	return (error);
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 orecvfrom(struct thread *td, struct recvfrom_args *uap)
 {
 
 	uap->flags |= MSG_COMPAT;
 	return (sys_recvfrom(td, uap));
 }
 #endif
 
 #ifdef COMPAT_OLDSOCK
 int
 orecv(struct thread *td, struct orecv_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = 0;
 	msg.msg_namelen = 0;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = uap->flags;
 	return (recvit(td, uap->s, &msg, NULL));
 }
 
 /*
  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
  * overlays the new one, missing only the flags, and with the (old) access
  * rights where the control fields are now.
  */
 int
 orecvmsg(struct thread *td, struct orecvmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_flags = uap->flags | MSG_COMPAT;
 	msg.msg_iov = iov;
 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 	if (msg.msg_controllen && error == 0)
 		error = copyout(&msg.msg_controllen,
 		    &uap->msg->msg_accrightslen, sizeof (int));
 	free(iov, M_IOV);
 	return (error);
 }
 #endif
 
 int
 sys_recvmsg(struct thread *td, struct recvmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *uiov, *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (msg));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_flags = uap->flags;
 #ifdef COMPAT_OLDSOCK
 	if (SV_PROC_FLAG(td->td_proc, SV_AOUT))
 		msg.msg_flags &= ~MSG_COMPAT;
 #endif
 	uiov = msg.msg_iov;
 	msg.msg_iov = iov;
 	error = recvit(td, uap->s, &msg, NULL);
 	if (error == 0) {
 		msg.msg_iov = uiov;
 		error = copyout(&msg, uap->msg, sizeof(msg));
 	}
 	free(iov, M_IOV);
 	return (error);
 }
 
 int
 sys_shutdown(struct thread *td, struct shutdown_args *uap)
 {
 
 	return (kern_shutdown(td, uap->s, uap->how));
 }
 
 int
 kern_shutdown(struct thread *td, int s, int how)
 {
 	struct socket *so;
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, &cap_shutdown_rights,
 	    &fp, NULL, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 		error = soshutdown(so, how);
 		/*
 		 * Previous versions did not return ENOTCONN, but 0 in
 		 * case the socket was not connected. Some important
 		 * programs like syslogd up to r279016, 2015-02-19,
 		 * still depend on this behavior.
 		 */
 		if (error == ENOTCONN &&
 		    td->td_proc->p_osrel < P_OSREL_SHUTDOWN_ENOTCONN)
 			error = 0;
 		fdrop(fp, td);
 	}
 	return (error);
 }
 
 int
 sys_setsockopt(struct thread *td, struct setsockopt_args *uap)
 {
 
 	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 	    uap->val, UIO_USERSPACE, uap->valsize));
 }
 
 int
 kern_setsockopt(struct thread *td, int s, int level, int name, const void *val,
     enum uio_seg valseg, socklen_t valsize)
 {
 	struct socket *so;
 	struct file *fp;
 	struct sockopt sopt;
 	int error;
 
 	if (val == NULL && valsize != 0)
 		return (EFAULT);
 	if ((int)valsize < 0)
 		return (EINVAL);
 
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_level = level;
 	sopt.sopt_name = name;
 	sopt.sopt_val = __DECONST(void *, val);
 	sopt.sopt_valsize = valsize;
 	switch (valseg) {
 	case UIO_USERSPACE:
 		sopt.sopt_td = td;
 		break;
 	case UIO_SYSSPACE:
 		sopt.sopt_td = NULL;
 		break;
 	default:
 		panic("kern_setsockopt called with bad valseg");
 	}
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, &cap_setsockopt_rights,
 	    &fp, NULL, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 		error = sosetopt(so, &sopt);
 		fdrop(fp, td);
 	}
 	return(error);
 }
 
 int
 sys_getsockopt(struct thread *td, struct getsockopt_args *uap)
 {
 	socklen_t valsize;
 	int error;
 
 	if (uap->val) {
 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 		if (error != 0)
 			return (error);
 	}
 
 	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 	    uap->val, UIO_USERSPACE, &valsize);
 
 	if (error == 0)
 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 	return (error);
 }
 
 /*
  * Kernel version of getsockopt.
  * optval can be a userland or userspace. optlen is always a kernel pointer.
  */
 int
 kern_getsockopt(struct thread *td, int s, int level, int name, void *val,
     enum uio_seg valseg, socklen_t *valsize)
 {
 	struct socket *so;
 	struct file *fp;
 	struct sockopt sopt;
 	int error;
 
 	if (val == NULL)
 		*valsize = 0;
 	if ((int)*valsize < 0)
 		return (EINVAL);
 
 	sopt.sopt_dir = SOPT_GET;
 	sopt.sopt_level = level;
 	sopt.sopt_name = name;
 	sopt.sopt_val = val;
 	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 	switch (valseg) {
 	case UIO_USERSPACE:
 		sopt.sopt_td = td;
 		break;
 	case UIO_SYSSPACE:
 		sopt.sopt_td = NULL;
 		break;
 	default:
 		panic("kern_getsockopt called with bad valseg");
 	}
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, &cap_getsockopt_rights,
 	    &fp, NULL, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 		error = sogetopt(so, &sopt);
 		*valsize = sopt.sopt_valsize;
 		fdrop(fp, td);
 	}
 	return (error);
 }
 
 /*
  * getsockname1() - Get socket name.
  */
 static int
 getsockname1(struct thread *td, struct getsockname_args *uap, int compat)
 {
 	struct sockaddr *sa;
 	socklen_t len;
 	int error;
 
 	error = copyin(uap->alen, &len, sizeof(len));
 	if (error != 0)
 		return (error);
 
 	error = kern_getsockname(td, uap->fdes, &sa, &len);
 	if (error != 0)
 		return (error);
 
 	if (len != 0) {
 #ifdef COMPAT_OLDSOCK
 		if (compat && SV_PROC_FLAG(td->td_proc, SV_AOUT))
 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
 #endif
 		error = copyout(sa, uap->asa, (u_int)len);
 	}
 	free(sa, M_SONAME);
 	if (error == 0)
 		error = copyout(&len, uap->alen, sizeof(len));
 	return (error);
 }
 
 int
 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
     socklen_t *alen)
 {
 	struct socket *so;
 	struct file *fp;
 	socklen_t len;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getsock_cap(td, fd, &cap_getsockname_rights,
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 	*sa = NULL;
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
 	CURVNET_RESTORE();
 	if (error != 0)
 		goto bad;
 	if (*sa == NULL)
 		len = 0;
 	else
 		len = MIN(*alen, (*sa)->sa_len);
 	*alen = len;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(*sa);
 #endif
 bad:
 	fdrop(fp, td);
 	if (error != 0 && *sa != NULL) {
 		free(*sa, M_SONAME);
 		*sa = NULL;
 	}
 	return (error);
 }
 
 int
 sys_getsockname(struct thread *td, struct getsockname_args *uap)
 {
 
 	return (getsockname1(td, uap, 0));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 ogetsockname(struct thread *td, struct getsockname_args *uap)
 {
 
 	return (getsockname1(td, uap, 1));
 }
 #endif /* COMPAT_OLDSOCK */
 
 /*
  * getpeername1() - Get name of peer for connected socket.
  */
 static int
 getpeername1(struct thread *td, struct getpeername_args *uap, int compat)
 {
 	struct sockaddr *sa;
 	socklen_t len;
 	int error;
 
 	error = copyin(uap->alen, &len, sizeof (len));
 	if (error != 0)
 		return (error);
 
 	error = kern_getpeername(td, uap->fdes, &sa, &len);
 	if (error != 0)
 		return (error);
 
 	if (len != 0) {
 #ifdef COMPAT_OLDSOCK
 		if (compat && SV_PROC_FLAG(td->td_proc, SV_AOUT))
 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
 #endif
 		error = copyout(sa, uap->asa, (u_int)len);
 	}
 	free(sa, M_SONAME);
 	if (error == 0)
 		error = copyout(&len, uap->alen, sizeof(len));
 	return (error);
 }
 
 int
 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
     socklen_t *alen)
 {
 	struct socket *so;
 	struct file *fp;
 	socklen_t len;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getsock_cap(td, fd, &cap_getpeername_rights,
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 		error = ENOTCONN;
 		goto done;
 	}
 	*sa = NULL;
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
 	CURVNET_RESTORE();
 	if (error != 0)
 		goto bad;
 	if (*sa == NULL)
 		len = 0;
 	else
 		len = MIN(*alen, (*sa)->sa_len);
 	*alen = len;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(*sa);
 #endif
 bad:
 	if (error != 0 && *sa != NULL) {
 		free(*sa, M_SONAME);
 		*sa = NULL;
 	}
 done:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_getpeername(struct thread *td, struct getpeername_args *uap)
 {
 
 	return (getpeername1(td, uap, 0));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 ogetpeername(struct thread *td, struct ogetpeername_args *uap)
 {
 
 	/* XXX uap should have type `getpeername_args *' to begin with. */
 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
 }
 #endif /* COMPAT_OLDSOCK */
 
 static int
 sockargs(struct mbuf **mp, char *buf, socklen_t buflen, int type)
 {
 	struct sockaddr *sa;
 	struct mbuf *m;
 	int error;
 
 	if (buflen > MLEN) {
 #ifdef COMPAT_OLDSOCK
 		if (type == MT_SONAME && buflen <= 112 &&
 		    SV_CURPROC_FLAG(SV_AOUT))
 			buflen = MLEN;		/* unix domain compat. hack */
 		else
 #endif
 			if (buflen > MCLBYTES)
 				return (EINVAL);
 	}
 	m = m_get2(buflen, M_WAITOK, type, 0);
 	m->m_len = buflen;
 	error = copyin(buf, mtod(m, void *), buflen);
 	if (error != 0)
 		(void) m_free(m);
 	else {
 		*mp = m;
 		if (type == MT_SONAME) {
 			sa = mtod(m, struct sockaddr *);
 
 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX &&
 			    SV_CURPROC_FLAG(SV_AOUT))
 				sa->sa_family = sa->sa_len;
 #endif
 			sa->sa_len = buflen;
 		}
 	}
 	return (error);
 }
 
 int
 getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr, size_t len)
 {
 	struct sockaddr *sa;
 	int error;
 
 	if (len > SOCK_MAXADDRLEN)
 		return (ENAMETOOLONG);
 	if (len < offsetof(struct sockaddr, sa_data[0]))
 		return (EINVAL);
 	sa = malloc(len, M_SONAME, M_WAITOK);
 	error = copyin(uaddr, sa, len);
 	if (error != 0) {
 		free(sa, M_SONAME);
 	} else {
 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX &&
 		    SV_CURPROC_FLAG(SV_AOUT))
 			sa->sa_family = sa->sa_len;
 #endif
 		sa->sa_len = len;
 		*namp = sa;
 	}
 	return (error);
 }
 
 /*
  * Dispose of externalized rights from an SCM_RIGHTS message.  This function
  * should be used in error or truncation cases to avoid leaking file descriptors
  * into the recipient's (the current thread's) table.
  */
 void
 m_dispose_extcontrolm(struct mbuf *m)
 {
 	struct cmsghdr *cm;
 	struct file *fp;
 	struct thread *td;
 	socklen_t clen, datalen;
 	int error, fd, *fds, nfd;
 
 	td = curthread;
 	for (; m != NULL; m = m->m_next) {
 		if (m->m_type != MT_EXTCONTROL)
 			continue;
 		cm = mtod(m, struct cmsghdr *);
 		clen = m->m_len;
 		while (clen > 0) {
 			if (clen < sizeof(*cm))
 				panic("%s: truncated mbuf %p", __func__, m);
 			datalen = CMSG_SPACE(cm->cmsg_len - CMSG_SPACE(0));
 			if (clen < datalen)
 				panic("%s: truncated mbuf %p", __func__, m);
 
 			if (cm->cmsg_level == SOL_SOCKET &&
 			    cm->cmsg_type == SCM_RIGHTS) {
 				fds = (int *)CMSG_DATA(cm);
 				nfd = (cm->cmsg_len - CMSG_SPACE(0)) /
 				    sizeof(int);
 
 				while (nfd-- > 0) {
 					fd = *fds++;
 					error = fget(td, fd, &cap_no_rights,
 					    &fp);
 					if (error == 0) {
 						fdclose(td, fp, fd);
 						fdrop(fp, td);
 					}
 				}
 			}
 			clen -= datalen;
 			cm = (struct cmsghdr *)((uint8_t *)cm + datalen);
 		}
 		m_chtype(m, MT_CONTROL);
 	}
 }
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 3f7198c2f3ae..eada98b48a1e 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1,2996 +1,2996 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California. All Rights Reserved.
  * Copyright (c) 2004-2009 Robert N. M. Watson All Rights Reserved.
  * Copyright (c) 2018 Matthew Macy
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
  */
 
 /*
  * UNIX Domain (Local) Sockets
  *
  * This is an implementation of UNIX (local) domain sockets.  Each socket has
  * an associated struct unpcb (UNIX protocol control block).  Stream sockets
  * may be connected to 0 or 1 other socket.  Datagram sockets may be
  * connected to 0, 1, or many other sockets.  Sockets may be created and
  * connected in pairs (socketpair(2)), or bound/connected to using the file
  * system name space.  For most purposes, only the receive socket buffer is
  * used, as sending on one socket delivers directly to the receive socket
  * buffer of a second socket.
  *
  * The implementation is substantially complicated by the fact that
  * "ancillary data", such as file descriptors or credentials, may be passed
  * across UNIX domain sockets.  The potential for passing UNIX domain sockets
  * over other UNIX domain sockets requires the implementation of a simple
  * garbage collector to find and tear down cycles of disconnected sockets.
  *
  * TODO:
  *	RDM
  *	rethink name space problems
  *	need a proper out-of-band
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/vnode.h>
 
 #include <net/vnet.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 MALLOC_DECLARE(M_FILECAPS);
 
 /*
  * See unpcb.h for the locking key.
  */
 
 static uma_zone_t	unp_zone;
 static unp_gen_t	unp_gencnt;	/* (l) */
 static u_int		unp_count;	/* (l) Count of local sockets. */
 static ino_t		unp_ino;	/* Prototype for fake inode numbers. */
 static int		unp_rights;	/* (g) File descriptors in flight. */
 static struct unp_head	unp_shead;	/* (l) List of stream sockets. */
 static struct unp_head	unp_dhead;	/* (l) List of datagram sockets. */
 static struct unp_head	unp_sphead;	/* (l) List of seqpacket sockets. */
 
 struct unp_defer {
 	SLIST_ENTRY(unp_defer) ud_link;
 	struct file *ud_fp;
 };
 static SLIST_HEAD(, unp_defer) unp_defers;
 static int unp_defers_count;
 
 static const struct sockaddr	sun_noname = { sizeof(sun_noname), AF_LOCAL };
 
 /*
  * Garbage collection of cyclic file descriptor/socket references occurs
  * asynchronously in a taskqueue context in order to avoid recursion and
  * reentrance in the UNIX domain socket, file descriptor, and socket layer
  * code.  See unp_gc() for a full description.
  */
 static struct timeout_task unp_gc_task;
 
 /*
  * The close of unix domain sockets attached as SCM_RIGHTS is
  * postponed to the taskqueue, to avoid arbitrary recursion depth.
  * The attached sockets might have another sockets attached.
  */
 static struct task	unp_defer_task;
 
 /*
  * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
  * stream sockets, although the total for sender and receiver is actually
  * only PIPSIZ.
  *
  * Datagram sockets really use the sendspace as the maximum datagram size,
  * and don't really want to reserve the sendspace.  Their recvspace should be
  * large enough for at least one max-size datagram plus address.
  */
 #ifndef PIPSIZ
 #define	PIPSIZ	8192
 #endif
 static u_long	unpst_sendspace = PIPSIZ;
 static u_long	unpst_recvspace = PIPSIZ;
 static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
 static u_long	unpdg_recvspace = 4*1024;
 static u_long	unpsp_sendspace = PIPSIZ;	/* really max datagram size */
 static u_long	unpsp_recvspace = PIPSIZ;
 
 static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Local domain");
 static SYSCTL_NODE(_net_local, SOCK_STREAM, stream,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SOCK_STREAM");
 static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SOCK_DGRAM");
 static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SOCK_SEQPACKET");
 
 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
 	   &unpst_sendspace, 0, "Default stream send space.");
 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpst_recvspace, 0, "Default stream receive space.");
 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
 	   &unpdg_sendspace, 0, "Default datagram send space.");
 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpdg_recvspace, 0, "Default datagram receive space.");
 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW,
 	   &unpsp_sendspace, 0, "Default seqpacket send space.");
 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpsp_recvspace, 0, "Default seqpacket receive space.");
 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
     "File descriptors in flight.");
 SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
     &unp_defers_count, 0,
     "File descriptors deferred to taskqueue for close.");
 
 /*
  * Locking and synchronization:
  *
  * Several types of locks exist in the local domain socket implementation:
  * - a global linkage lock
  * - a global connection list lock
  * - the mtxpool lock
  * - per-unpcb mutexes
  *
  * The linkage lock protects the global socket lists, the generation number
  * counter and garbage collector state.
  *
  * The connection list lock protects the list of referring sockets in a datagram
  * socket PCB.  This lock is also overloaded to protect a global list of
  * sockets whose buffers contain socket references in the form of SCM_RIGHTS
  * messages.  To avoid recursion, such references are released by a dedicated
  * thread.
  *
  * The mtxpool lock protects the vnode from being modified while referenced.
  * Lock ordering rules require that it be acquired before any PCB locks.
  *
  * The unpcb lock (unp_mtx) protects the most commonly referenced fields in the
  * unpcb.  This includes the unp_conn field, which either links two connected
  * PCBs together (for connected socket types) or points at the destination
  * socket (for connectionless socket types).  The operations of creating or
  * destroying a connection therefore involve locking multiple PCBs.  To avoid
  * lock order reversals, in some cases this involves dropping a PCB lock and
  * using a reference counter to maintain liveness.
  *
  * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
  * allocated in pru_attach() and freed in pru_detach().  The validity of that
  * pointer is an invariant, so no lock is required to dereference the so_pcb
  * pointer if a valid socket reference is held by the caller.  In practice,
  * this is always true during operations performed on a socket.  Each unpcb
  * has a back-pointer to its socket, unp_socket, which will be stable under
  * the same circumstances.
  *
  * This pointer may only be safely dereferenced as long as a valid reference
  * to the unpcb is held.  Typically, this reference will be from the socket,
  * or from another unpcb when the referring unpcb's lock is held (in order
  * that the reference not be invalidated during use).  For example, to follow
  * unp->unp_conn->unp_socket, you need to hold a lock on unp_conn to guarantee
  * that detach is not run clearing unp_socket.
  *
  * Blocking with UNIX domain sockets is a tricky issue: unlike most network
  * protocols, bind() is a non-atomic operation, and connect() requires
  * potential sleeping in the protocol, due to potentially waiting on local or
  * distributed file systems.  We try to separate "lookup" operations, which
  * may sleep, and the IPC operations themselves, which typically can occur
  * with relative atomicity as locks can be held over the entire operation.
  *
  * Another tricky issue is simultaneous multi-threaded or multi-process
  * access to a single UNIX domain socket.  These are handled by the flags
  * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or
  * binding, both of which involve dropping UNIX domain socket locks in order
  * to perform namei() and other file system operations.
  */
 static struct rwlock	unp_link_rwlock;
 static struct mtx	unp_defers_lock;
 
 #define	UNP_LINK_LOCK_INIT()		rw_init(&unp_link_rwlock,	\
 					    "unp_link_rwlock")
 
 #define	UNP_LINK_LOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
 					    RA_LOCKED)
 #define	UNP_LINK_UNLOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
 					    RA_UNLOCKED)
 
 #define	UNP_LINK_RLOCK()		rw_rlock(&unp_link_rwlock)
 #define	UNP_LINK_RUNLOCK()		rw_runlock(&unp_link_rwlock)
 #define	UNP_LINK_WLOCK()		rw_wlock(&unp_link_rwlock)
 #define	UNP_LINK_WUNLOCK()		rw_wunlock(&unp_link_rwlock)
 #define	UNP_LINK_WLOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
 					    RA_WLOCKED)
 #define	UNP_LINK_WOWNED()		rw_wowned(&unp_link_rwlock)
 
 #define	UNP_DEFERRED_LOCK_INIT()	mtx_init(&unp_defers_lock, \
 					    "unp_defer", NULL, MTX_DEF)
 #define	UNP_DEFERRED_LOCK()		mtx_lock(&unp_defers_lock)
 #define	UNP_DEFERRED_UNLOCK()		mtx_unlock(&unp_defers_lock)
 
 #define UNP_REF_LIST_LOCK()		UNP_DEFERRED_LOCK();
 #define UNP_REF_LIST_UNLOCK()		UNP_DEFERRED_UNLOCK();
 
 #define UNP_PCB_LOCK_INIT(unp)		mtx_init(&(unp)->unp_mtx,	\
 					    "unp", "unp",	\
 					    MTX_DUPOK|MTX_DEF)
 #define	UNP_PCB_LOCK_DESTROY(unp)	mtx_destroy(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCKPTR(unp)		(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCK(unp)		mtx_lock(&(unp)->unp_mtx)
 #define	UNP_PCB_TRYLOCK(unp)		mtx_trylock(&(unp)->unp_mtx)
 #define	UNP_PCB_UNLOCK(unp)		mtx_unlock(&(unp)->unp_mtx)
 #define	UNP_PCB_OWNED(unp)		mtx_owned(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_OWNED)
 #define	UNP_PCB_UNLOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_NOTOWNED)
 
 static int	uipc_connect2(struct socket *, struct socket *);
 static int	uipc_ctloutput(struct socket *, struct sockopt *);
 static int	unp_connect(struct socket *, struct sockaddr *,
 		    struct thread *);
 static int	unp_connectat(int, struct socket *, struct sockaddr *,
 		    struct thread *);
 static int	unp_connect2(struct socket *so, struct socket *so2, int);
 static void	unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
 static void	unp_dispose(struct socket *so);
 static void	unp_dispose_mbuf(struct mbuf *);
 static void	unp_shutdown(struct unpcb *);
 static void	unp_drop(struct unpcb *);
 static void	unp_gc(__unused void *, int);
 static void	unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
 static void	unp_discard(struct file *);
 static void	unp_freerights(struct filedescent **, int);
 static void	unp_init(void);
 static int	unp_internalize(struct mbuf **, struct thread *);
 static void	unp_internalize_fp(struct file *);
 static int	unp_externalize(struct mbuf *, struct mbuf **, int);
 static int	unp_externalize_fp(struct file *);
 static struct mbuf	*unp_addsockcred(struct thread *, struct mbuf *, int);
 static void	unp_process_defers(void * __unused, int);
 
 static void
 unp_pcb_hold(struct unpcb *unp)
 {
 	u_int old __unused;
 
 	old = refcount_acquire(&unp->unp_refcount);
 	KASSERT(old > 0, ("%s: unpcb %p has no references", __func__, unp));
 }
 
 static __result_use_check bool
 unp_pcb_rele(struct unpcb *unp)
 {
 	bool ret;
 
 	UNP_PCB_LOCK_ASSERT(unp);
 
 	if ((ret = refcount_release(&unp->unp_refcount))) {
 		UNP_PCB_UNLOCK(unp);
 		UNP_PCB_LOCK_DESTROY(unp);
 		uma_zfree(unp_zone, unp);
 	}
 	return (ret);
 }
 
 static void
 unp_pcb_rele_notlast(struct unpcb *unp)
 {
 	bool ret __unused;
 
 	ret = refcount_release(&unp->unp_refcount);
 	KASSERT(!ret, ("%s: unpcb %p has no references", __func__, unp));
 }
 
 static void
 unp_pcb_lock_pair(struct unpcb *unp, struct unpcb *unp2)
 {
 	UNP_PCB_UNLOCK_ASSERT(unp);
 	UNP_PCB_UNLOCK_ASSERT(unp2);
 
 	if (unp == unp2) {
 		UNP_PCB_LOCK(unp);
 	} else if ((uintptr_t)unp2 > (uintptr_t)unp) {
 		UNP_PCB_LOCK(unp);
 		UNP_PCB_LOCK(unp2);
 	} else {
 		UNP_PCB_LOCK(unp2);
 		UNP_PCB_LOCK(unp);
 	}
 }
 
 static void
 unp_pcb_unlock_pair(struct unpcb *unp, struct unpcb *unp2)
 {
 	UNP_PCB_UNLOCK(unp);
 	if (unp != unp2)
 		UNP_PCB_UNLOCK(unp2);
 }
 
 /*
  * Try to lock the connected peer of an already locked socket.  In some cases
  * this requires that we unlock the current socket.  The pairbusy counter is
  * used to block concurrent connection attempts while the lock is dropped.  The
  * caller must be careful to revalidate PCB state.
  */
 static struct unpcb *
 unp_pcb_lock_peer(struct unpcb *unp)
 {
 	struct unpcb *unp2;
 
 	UNP_PCB_LOCK_ASSERT(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 == NULL)
 		return (NULL);
 	if (__predict_false(unp == unp2))
 		return (unp);
 
 	UNP_PCB_UNLOCK_ASSERT(unp2);
 
 	if (__predict_true(UNP_PCB_TRYLOCK(unp2)))
 		return (unp2);
 	if ((uintptr_t)unp2 > (uintptr_t)unp) {
 		UNP_PCB_LOCK(unp2);
 		return (unp2);
 	}
 	unp->unp_pairbusy++;
 	unp_pcb_hold(unp2);
 	UNP_PCB_UNLOCK(unp);
 
 	UNP_PCB_LOCK(unp2);
 	UNP_PCB_LOCK(unp);
 	KASSERT(unp->unp_conn == unp2 || unp->unp_conn == NULL,
 	    ("%s: socket %p was reconnected", __func__, unp));
 	if (--unp->unp_pairbusy == 0 && (unp->unp_flags & UNP_WAITING) != 0) {
 		unp->unp_flags &= ~UNP_WAITING;
 		wakeup(unp);
 	}
 	if (unp_pcb_rele(unp2)) {
 		/* unp2 is unlocked. */
 		return (NULL);
 	}
 	if (unp->unp_conn == NULL) {
 		UNP_PCB_UNLOCK(unp2);
 		return (NULL);
 	}
 	return (unp2);
 }
 
 /*
  * Definitions of protocols supported in the LOCAL domain.
  */
 static struct domain localdomain;
 static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream;
 static struct pr_usrreqs uipc_usrreqs_seqpacket;
 static struct protosw localsw[] = {
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&localdomain,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS|
 				    PR_CAPATTACH,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_stream
 },
 {
 	.pr_type =		SOCK_DGRAM,
 	.pr_domain =		&localdomain,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_RIGHTS|PR_CAPATTACH,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_dgram
 },
 {
 	.pr_type =		SOCK_SEQPACKET,
 	.pr_domain =		&localdomain,
 
 	/*
 	 * XXXRW: For now, PR_ADDR because soreceive will bump into them
 	 * due to our use of sbappendaddr.  A new sbappend variants is needed
 	 * that supports both atomic record writes and control data.
 	 */
 	.pr_flags =		PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|
 				    PR_WANTRCVD|PR_RIGHTS|PR_CAPATTACH,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_seqpacket,
 },
 };
 
 static struct domain localdomain = {
 	.dom_family =		AF_LOCAL,
 	.dom_name =		"local",
 	.dom_init =		unp_init,
 	.dom_externalize =	unp_externalize,
 	.dom_dispose =		unp_dispose,
 	.dom_protosw =		localsw,
 	.dom_protoswNPROTOSW =	&localsw[nitems(localsw)]
 };
 DOMAIN_SET(local);
 
 static void
 uipc_abort(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
 	UNP_PCB_UNLOCK_ASSERT(unp);
 
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		unp_pcb_hold(unp2);
 		UNP_PCB_UNLOCK(unp);
 		unp_drop(unp2);
 	} else
 		UNP_PCB_UNLOCK(unp);
 }
 
 static int
 uipc_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *sa;
 
 	/*
 	 * Pass back name of connected socket, if it was bound and we are
 	 * still connected (our peer may have closed already!).
 	 */
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_PCB_LOCK(unp);
 	unp2 = unp_pcb_lock_peer(unp);
 	if (unp2 != NULL && unp2->unp_addr != NULL)
 		sa = (struct sockaddr *)unp2->unp_addr;
 	else
 		sa = &sun_noname;
 	bcopy(sa, *nam, sa->sa_len);
 	if (unp2 != NULL)
 		unp_pcb_unlock_pair(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_attach(struct socket *so, int proto, struct thread *td)
 {
 	u_long sendspace, recvspace;
 	struct unpcb *unp;
 	int error;
 	bool locked;
 
 	KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		switch (so->so_type) {
 		case SOCK_STREAM:
 			sendspace = unpst_sendspace;
 			recvspace = unpst_recvspace;
 			break;
 
 		case SOCK_DGRAM:
 			sendspace = unpdg_sendspace;
 			recvspace = unpdg_recvspace;
 			break;
 
 		case SOCK_SEQPACKET:
 			sendspace = unpsp_sendspace;
 			recvspace = unpsp_recvspace;
 			break;
 
 		default:
 			panic("uipc_attach");
 		}
 		error = soreserve(so, sendspace, recvspace);
 		if (error)
 			return (error);
 	}
 	unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
 	if (unp == NULL)
 		return (ENOBUFS);
 	LIST_INIT(&unp->unp_refs);
 	UNP_PCB_LOCK_INIT(unp);
 	unp->unp_socket = so;
 	so->so_pcb = unp;
 	refcount_init(&unp->unp_refcount, 1);
 
 	if ((locked = UNP_LINK_WOWNED()) == false)
 		UNP_LINK_WLOCK();
 
 	unp->unp_gencnt = ++unp_gencnt;
 	unp->unp_ino = ++unp_ino;
 	unp_count++;
 	switch (so->so_type) {
 	case SOCK_STREAM:
 		LIST_INSERT_HEAD(&unp_shead, unp, unp_link);
 		break;
 
 	case SOCK_DGRAM:
 		LIST_INSERT_HEAD(&unp_dhead, unp, unp_link);
 		break;
 
 	case SOCK_SEQPACKET:
 		LIST_INSERT_HEAD(&unp_sphead, unp, unp_link);
 		break;
 
 	default:
 		panic("uipc_attach");
 	}
 
 	if (locked == false)
 		UNP_LINK_WUNLOCK();
 
 	return (0);
 }
 
 static int
 uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 	struct vattr vattr;
 	int error, namelen;
 	struct nameidata nd;
 	struct unpcb *unp;
 	struct vnode *vp;
 	struct mount *mp;
 	cap_rights_t rights;
 	char *buf;
 
 	if (nam->sa_family != AF_UNIX)
 		return (EAFNOSUPPORT);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
 
 	if (soun->sun_len > sizeof(struct sockaddr_un))
 		return (EINVAL);
 	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
 	if (namelen <= 0)
 		return (EINVAL);
 
 	/*
 	 * We don't allow simultaneous bind() calls on a single UNIX domain
 	 * socket, so flag in-progress operations, and return an error if an
 	 * operation is already in progress.
 	 *
 	 * Historically, we have not allowed a socket to be rebound, so this
 	 * also returns an error.  Not allowing re-binding simplifies the
 	 * implementation and avoids a great many possible failure modes.
 	 */
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode != NULL) {
 		UNP_PCB_UNLOCK(unp);
 		return (EINVAL);
 	}
 	if (unp->unp_flags & UNP_BINDING) {
 		UNP_PCB_UNLOCK(unp);
 		return (EALREADY);
 	}
 	unp->unp_flags |= UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 
 	buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
 	bcopy(soun->sun_path, buf, namelen);
 	buf[namelen] = 0;
 
 restart:
 	NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
 	    UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_BINDAT),
 	    td);
 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
 	error = namei(&nd);
 	if (error)
 		goto error;
 	vp = nd.ni_vp;
 	if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (vp != NULL) {
 			vrele(vp);
 			error = EADDRINUSE;
 			goto error;
 		}
 		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 		if (error)
 			goto error;
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VSOCK;
 	vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_pd->pd_cmask);
 #ifdef MAC
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 #endif
 	if (error == 0)
 		error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (error) {
 		VOP_VPUT_PAIR(nd.ni_dvp, NULL, true);
 		vn_finished_write(mp);
 		if (error == ERELOOKUP)
 			goto restart;
 		goto error;
 	}
 	vp = nd.ni_vp;
 	ASSERT_VOP_ELOCKED(vp, "uipc_bind");
 	soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
 
 	UNP_PCB_LOCK(unp);
 	VOP_UNP_BIND(vp, unp);
 	unp->unp_vnode = vp;
 	unp->unp_addr = soun;
 	unp->unp_flags &= ~UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 	vref(vp);
 	VOP_VPUT_PAIR(nd.ni_dvp, &vp, true);
 	vn_finished_write(mp);
 	free(buf, M_TEMP);
 	return (0);
 
 error:
 	UNP_PCB_LOCK(unp);
 	unp->unp_flags &= ~UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 	free(buf, M_TEMP);
 	return (error);
 }
 
 static int
 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (uipc_bindat(AT_FDCWD, so, nam, td));
 }
 
 static int
 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	KASSERT(td == curthread, ("uipc_connect: td != curthread"));
 	error = unp_connect(so, nam, td);
 	return (error);
 }
 
 static int
 uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 	int error;
 
 	KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
 	error = unp_connectat(fd, so, nam, td);
 	return (error);
 }
 
 static void
 uipc_close(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 	struct vnode *vp = NULL;
 	struct mtx *vplock;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
 
 	vplock = NULL;
 	if ((vp = unp->unp_vnode) != NULL) {
 		vplock = mtx_pool_find(mtxpool_sleep, vp);
 		mtx_lock(vplock);
 	}
 	UNP_PCB_LOCK(unp);
 	if (vp && unp->unp_vnode == NULL) {
 		mtx_unlock(vplock);
 		vp = NULL;
 	}
 	if (vp != NULL) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 	}
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 	if (vp) {
 		mtx_unlock(vplock);
 		vrele(vp);
 	}
 }
 
 static int
 uipc_connect2(struct socket *so1, struct socket *so2)
 {
 	struct unpcb *unp, *unp2;
 	int error;
 
 	unp = so1->so_pcb;
 	KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
 	unp2 = so2->so_pcb;
 	KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
 	unp_pcb_lock_pair(unp, unp2);
 	error = unp_connect2(so1, so2, PRU_CONNECT2);
 	unp_pcb_unlock_pair(unp, unp2);
 	return (error);
 }
 
 static void
 uipc_detach(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 	struct mtx *vplock;
 	struct vnode *vp;
 	int local_unp_rights;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
 
 	vp = NULL;
 	vplock = NULL;
 
 	SOCK_LOCK(so);
 	if (!SOLISTENING(so)) {
 		/*
 		 * Once the socket is removed from the global lists,
 		 * uipc_ready() will not be able to locate its socket buffer, so
 		 * clear the buffer now.  At this point internalized rights have
 		 * already been disposed of.
 		 */
 		sbrelease(&so->so_rcv, so);
 	}
 	SOCK_UNLOCK(so);
 
 	UNP_LINK_WLOCK();
 	LIST_REMOVE(unp, unp_link);
 	if (unp->unp_gcflag & UNPGC_DEAD)
 		LIST_REMOVE(unp, unp_dead);
 	unp->unp_gencnt = ++unp_gencnt;
 	--unp_count;
 	UNP_LINK_WUNLOCK();
 
 	UNP_PCB_UNLOCK_ASSERT(unp);
  restart:
 	if ((vp = unp->unp_vnode) != NULL) {
 		vplock = mtx_pool_find(mtxpool_sleep, vp);
 		mtx_lock(vplock);
 	}
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode != vp && unp->unp_vnode != NULL) {
 		if (vplock)
 			mtx_unlock(vplock);
 		UNP_PCB_UNLOCK(unp);
 		goto restart;
 	}
 	if ((vp = unp->unp_vnode) != NULL) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 	}
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 
 	UNP_REF_LIST_LOCK();
 	while (!LIST_EMPTY(&unp->unp_refs)) {
 		struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
 
 		unp_pcb_hold(ref);
 		UNP_REF_LIST_UNLOCK();
 
 		MPASS(ref != unp);
 		UNP_PCB_UNLOCK_ASSERT(ref);
 		unp_drop(ref);
 		UNP_REF_LIST_LOCK();
 	}
 	UNP_REF_LIST_UNLOCK();
 
 	UNP_PCB_LOCK(unp);
 	local_unp_rights = unp_rights;
 	unp->unp_socket->so_pcb = NULL;
 	unp->unp_socket = NULL;
 	free(unp->unp_addr, M_SONAME);
 	unp->unp_addr = NULL;
 	if (!unp_pcb_rele(unp))
 		UNP_PCB_UNLOCK(unp);
 	if (vp) {
 		mtx_unlock(vplock);
 		vrele(vp);
 	}
 	if (local_unp_rights)
 		taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
 }
 
 static int
 uipc_disconnect(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
 
 	UNP_PCB_LOCK(unp);
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_listen(struct socket *so, int backlog, struct thread *td)
 {
 	struct unpcb *unp;
 	int error;
 
 	if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET)
 		return (EOPNOTSUPP);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_listen: unp == NULL"));
 
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode == NULL) {
 		/* Already connected or not bound to an address. */
 		error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
 		UNP_PCB_UNLOCK(unp);
 		return (error);
 	}
 
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error == 0) {
 		cru2xt(td, &unp->unp_peercred);
 		solisten_proto(so, backlog);
 	}
 	SOCK_UNLOCK(so);
 	UNP_PCB_UNLOCK(unp);
 	return (error);
 }
 
 static int
 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *sa;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_LINK_RLOCK();
 	/*
 	 * XXX: It seems that this test always fails even when connection is
 	 * established.  So, this else clause is added as workaround to
 	 * return PF_LOCAL sockaddr.
 	 */
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		if (unp2->unp_addr != NULL)
 			sa = (struct sockaddr *) unp2->unp_addr;
 		else
 			sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 		UNP_PCB_UNLOCK(unp2);
 	} else {
 		sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 	}
 	UNP_LINK_RUNLOCK();
 	return (0);
 }
 
 static int
 uipc_rcvd(struct socket *so, int flags)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	u_int mbcnt, sbcc;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
 	    ("%s: socktype %d", __func__, so->so_type));
 
 	/*
 	 * Adjust backpressure on sender and wakeup any waiting to write.
 	 *
 	 * The unp lock is acquired to maintain the validity of the unp_conn
 	 * pointer; no lock on unp2 is required as unp2->unp_socket will be
 	 * static as long as we don't permit unp2 to disconnect from unp,
 	 * which is prevented by the lock on unp.  We cache values from
 	 * so_rcv to avoid holding the so_rcv lock over the entire
 	 * transaction on the remote so_snd.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	mbcnt = so->so_rcv.sb_mbcnt;
 	sbcc = sbavail(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	/*
 	 * There is a benign race condition at this point.  If we're planning to
 	 * clear SB_STOP, but uipc_send is called on the connected socket at
 	 * this instant, it might add data to the sockbuf and set SB_STOP.  Then
 	 * we would erroneously clear SB_STOP below, even though the sockbuf is
 	 * full.  The race is benign because the only ill effect is to allow the
 	 * sockbuf to exceed its size limit, and the size limits are not
 	 * strictly guaranteed anyway.
 	 */
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 == NULL) {
 		UNP_PCB_UNLOCK(unp);
 		return (0);
 	}
 	so2 = unp2->unp_socket;
 	SOCKBUF_LOCK(&so2->so_snd);
 	if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
 		so2->so_snd.sb_flags &= ~SB_STOP;
 	sowwakeup_locked(so2);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	u_int mbcnt, sbcc;
 	int freed, error;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
 	    so->so_type == SOCK_SEQPACKET,
 	    ("%s: socktype %d", __func__, so->so_type));
 
 	freed = error = 0;
 	if (flags & PRUS_OOB) {
 		error = EOPNOTSUPP;
 		goto release;
 	}
 	if (control != NULL && (error = unp_internalize(&control, td)))
 		goto release;
 
 	unp2 = NULL;
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 	{
 		const struct sockaddr *from;
 
 		if (nam != NULL) {
 			error = unp_connect(so, nam, td);
 			if (error != 0)
 				break;
 		}
 		UNP_PCB_LOCK(unp);
 
 		/*
 		 * Because connect() and send() are non-atomic in a sendto()
 		 * with a target address, it's possible that the socket will
 		 * have disconnected before the send() can run.  In that case
 		 * return the slightly counter-intuitive but otherwise
 		 * correct error that the socket is not connected.
 		 */
 		unp2 = unp_pcb_lock_peer(unp);
 		if (unp2 == NULL) {
 			UNP_PCB_UNLOCK(unp);
 			error = ENOTCONN;
 			break;
 		}
 
 		if (unp2->unp_flags & UNP_WANTCRED_MASK)
 			control = unp_addsockcred(td, control,
 			    unp2->unp_flags);
 		if (unp->unp_addr != NULL)
 			from = (struct sockaddr *)unp->unp_addr;
 		else
 			from = &sun_noname;
 		so2 = unp2->unp_socket;
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if (sbappendaddr_locked(&so2->so_rcv, from, m,
 		    control)) {
 			sorwakeup_locked(so2);
 			m = NULL;
 			control = NULL;
 		} else {
 			SOCKBUF_UNLOCK(&so2->so_rcv);
 			error = ENOBUFS;
 		}
 		if (nam != NULL)
 			unp_disconnect(unp, unp2);
 		else
 			unp_pcb_unlock_pair(unp, unp2);
 		break;
 	}
 
 	case SOCK_SEQPACKET:
 	case SOCK_STREAM:
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			if (nam != NULL) {
 				error = unp_connect(so, nam, td);
 				if (error != 0)
 					break;
 			} else {
 				error = ENOTCONN;
 				break;
 			}
 		}
 
 		UNP_PCB_LOCK(unp);
 		if ((unp2 = unp_pcb_lock_peer(unp)) == NULL) {
 			UNP_PCB_UNLOCK(unp);
 			error = ENOTCONN;
 			break;
 		} else if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			unp_pcb_unlock_pair(unp, unp2);
 			error = EPIPE;
 			break;
 		}
 		UNP_PCB_UNLOCK(unp);
 		if ((so2 = unp2->unp_socket) == NULL) {
 			UNP_PCB_UNLOCK(unp2);
 			error = ENOTCONN;
 			break;
 		}
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if (unp2->unp_flags & UNP_WANTCRED_MASK) {
 			/*
 			 * Credentials are passed only once on SOCK_STREAM and
 			 * SOCK_SEQPACKET (LOCAL_CREDS => WANTCRED_ONESHOT), or
 			 * forever (LOCAL_CREDS_PERSISTENT => WANTCRED_ALWAYS).
 			 */
 			control = unp_addsockcred(td, control, unp2->unp_flags);
 			unp2->unp_flags &= ~UNP_WANTCRED_ONESHOT;
 		}
 
 		/*
 		 * Send to paired receive port and wake up readers.  Don't
 		 * check for space available in the receive buffer if we're
 		 * attaching ancillary data; Unix domain sockets only check
 		 * for space in the sending sockbuf, and that check is
 		 * performed one level up the stack.  At that level we cannot
 		 * precisely account for the amount of buffer space used
 		 * (e.g., because control messages are not yet internalized).
 		 */
 		switch (so->so_type) {
 		case SOCK_STREAM:
 			if (control != NULL) {
 				sbappendcontrol_locked(&so2->so_rcv, m,
 				    control, flags);
 				control = NULL;
 			} else
 				sbappend_locked(&so2->so_rcv, m, flags);
 			break;
 
 		case SOCK_SEQPACKET:
 			if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
 			    &sun_noname, m, control))
 				control = NULL;
 			break;
 		}
 
 		mbcnt = so2->so_rcv.sb_mbcnt;
 		sbcc = sbavail(&so2->so_rcv);
 		if (sbcc)
 			sorwakeup_locked(so2);
 		else
 			SOCKBUF_UNLOCK(&so2->so_rcv);
 
 		/*
 		 * The PCB lock on unp2 protects the SB_STOP flag.  Without it,
 		 * it would be possible for uipc_rcvd to be called at this
 		 * point, drain the receiving sockbuf, clear SB_STOP, and then
 		 * we would set SB_STOP below.  That could lead to an empty
 		 * sockbuf having SB_STOP set
 		 */
 		SOCKBUF_LOCK(&so->so_snd);
 		if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
 			so->so_snd.sb_flags |= SB_STOP;
 		SOCKBUF_UNLOCK(&so->so_snd);
 		UNP_PCB_UNLOCK(unp2);
 		m = NULL;
 		break;
 	}
 
 	/*
 	 * PRUS_EOF is equivalent to pru_send followed by pru_shutdown.
 	 */
 	if (flags & PRUS_EOF) {
 		UNP_PCB_LOCK(unp);
 		socantsendmore(so);
 		unp_shutdown(unp);
 		UNP_PCB_UNLOCK(unp);
 	}
 	if (control != NULL && error != 0)
 		unp_dispose_mbuf(control);
 
 release:
 	if (control != NULL)
 		m_freem(control);
 	/*
 	 * In case of PRUS_NOTREADY, uipc_ready() is responsible
 	 * for freeing memory.
 	 */   
 	if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 		m_freem(m);
 	return (error);
 }
 
 static bool
 uipc_ready_scan(struct socket *so, struct mbuf *m, int count, int *errorp)
 {
 	struct mbuf *mb, *n;
 	struct sockbuf *sb;
 
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		SOCK_UNLOCK(so);
 		return (false);
 	}
 	mb = NULL;
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	if (sb->sb_fnrdy != NULL) {
 		for (mb = sb->sb_mb, n = mb->m_nextpkt; mb != NULL;) {
 			if (mb == m) {
 				*errorp = sbready(sb, m, count);
 				break;
 			}
 			mb = mb->m_next;
 			if (mb == NULL) {
 				mb = n;
 				if (mb != NULL)
 					n = mb->m_nextpkt;
 			}
 		}
 	}
 	SOCKBUF_UNLOCK(sb);
 	SOCK_UNLOCK(so);
 	return (mb != NULL);
 }
 
 static int
 uipc_ready(struct socket *so, struct mbuf *m, int count)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	int error, i;
 
 	unp = sotounpcb(so);
 
 	KASSERT(so->so_type == SOCK_STREAM,
 	    ("%s: unexpected socket type for %p", __func__, so));
 
 	UNP_PCB_LOCK(unp);
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 		UNP_PCB_UNLOCK(unp);
 		so2 = unp2->unp_socket;
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if ((error = sbready(&so2->so_rcv, m, count)) == 0)
 			sorwakeup_locked(so2);
 		else
 			SOCKBUF_UNLOCK(&so2->so_rcv);
 		UNP_PCB_UNLOCK(unp2);
 		return (error);
 	}
 	UNP_PCB_UNLOCK(unp);
 
 	/*
 	 * The receiving socket has been disconnected, but may still be valid.
 	 * In this case, the now-ready mbufs are still present in its socket
 	 * buffer, so perform an exhaustive search before giving up and freeing
 	 * the mbufs.
 	 */
 	UNP_LINK_RLOCK();
 	LIST_FOREACH(unp, &unp_shead, unp_link) {
 		if (uipc_ready_scan(unp->unp_socket, m, count, &error))
 			break;
 	}
 	UNP_LINK_RUNLOCK();
 
 	if (unp == NULL) {
 		for (i = 0; i < count; i++)
 			m = m_free(m);
 		error = ECONNRESET;
 	}
 	return (error);
 }
 
 static int
 uipc_sense(struct socket *so, struct stat *sb)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
 
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	sb->st_dev = NODEV;
 	sb->st_ino = unp->unp_ino;
 	return (0);
 }
 
 static int
 uipc_shutdown(struct socket *so)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
 
 	UNP_PCB_LOCK(unp);
 	socantsendmore(so);
 	unp_shutdown(unp);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp;
 	const struct sockaddr *sa;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_addr != NULL)
 		sa = (struct sockaddr *) unp->unp_addr;
 	else
 		sa = &sun_noname;
 	bcopy(sa, *nam, sa->sa_len);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static struct pr_usrreqs uipc_usrreqs_dgram = {
 	.pru_abort = 		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_dgram,
 	.pru_close =		uipc_close,
 };
 
 static struct pr_usrreqs uipc_usrreqs_seqpacket = {
 	.pru_abort =		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_generic,	/* XXX: or...? */
 	.pru_close =		uipc_close,
 };
 
 static struct pr_usrreqs uipc_usrreqs_stream = {
 	.pru_abort = 		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_ready =		uipc_ready,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_generic,
 	.pru_close =		uipc_close,
 };
 
 static int
 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct unpcb *unp;
 	struct xucred xu;
 	int error, optval;
 
 	if (sopt->sopt_level != SOL_LOCAL)
 		return (EINVAL);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case LOCAL_PEERCRED:
 			UNP_PCB_LOCK(unp);
 			if (unp->unp_flags & UNP_HAVEPC)
 				xu = unp->unp_peercred;
 			else {
 				if (so->so_type == SOCK_STREAM)
 					error = ENOTCONN;
 				else
 					error = EINVAL;
 			}
 			UNP_PCB_UNLOCK(unp);
 			if (error == 0)
 				error = sooptcopyout(sopt, &xu, sizeof(xu));
 			break;
 
 		case LOCAL_CREDS:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_WANTCRED_ONESHOT ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		case LOCAL_CREDS_PERSISTENT:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_WANTCRED_ALWAYS ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		case LOCAL_CONNWAIT:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		default:
 			error = EOPNOTSUPP;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case LOCAL_CREDS:
 		case LOCAL_CREDS_PERSISTENT:
 		case LOCAL_CONNWAIT:
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 					    sizeof(optval));
 			if (error)
 				break;
 
 #define	OPTSET(bit, exclusive) do {					\
 	UNP_PCB_LOCK(unp);						\
 	if (optval) {							\
 		if ((unp->unp_flags & (exclusive)) != 0) {		\
 			UNP_PCB_UNLOCK(unp);				\
 			error = EINVAL;					\
 			break;						\
 		}							\
 		unp->unp_flags |= (bit);				\
 	} else								\
 		unp->unp_flags &= ~(bit);				\
 	UNP_PCB_UNLOCK(unp);						\
 } while (0)
 
 			switch (sopt->sopt_name) {
 			case LOCAL_CREDS:
 				OPTSET(UNP_WANTCRED_ONESHOT, UNP_WANTCRED_ALWAYS);
 				break;
 
 			case LOCAL_CREDS_PERSISTENT:
 				OPTSET(UNP_WANTCRED_ALWAYS, UNP_WANTCRED_ONESHOT);
 				break;
 
 			case LOCAL_CONNWAIT:
 				OPTSET(UNP_CONNWAIT, 0);
 				break;
 
 			default:
 				break;
 			}
 			break;
 #undef	OPTSET
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 static int
 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (unp_connectat(AT_FDCWD, so, nam, td));
 }
 
 static int
 unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 	struct mtx *vplock;
 	struct sockaddr_un *soun;
 	struct vnode *vp;
 	struct socket *so2;
 	struct unpcb *unp, *unp2, *unp3;
 	struct nameidata nd;
 	char buf[SOCK_MAXADDRLEN];
 	struct sockaddr *sa;
 	cap_rights_t rights;
 	int error, len;
 	bool connreq;
 
 	if (nam->sa_family != AF_UNIX)
 		return (EAFNOSUPPORT);
 	if (nam->sa_len > sizeof(struct sockaddr_un))
 		return (EINVAL);
 	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 	if (len <= 0)
 		return (EINVAL);
 	soun = (struct sockaddr_un *)nam;
 	bcopy(soun->sun_path, buf, len);
 	buf[len] = 0;
 
 	unp = sotounpcb(so);
 	UNP_PCB_LOCK(unp);
 	for (;;) {
 		/*
 		 * Wait for connection state to stabilize.  If a connection
 		 * already exists, give up.  For datagram sockets, which permit
 		 * multiple consecutive connect(2) calls, upper layers are
 		 * responsible for disconnecting in advance of a subsequent
 		 * connect(2), but this is not synchronized with PCB connection
 		 * state.
 		 *
 		 * Also make sure that no threads are currently attempting to
 		 * lock the peer socket, to ensure that unp_conn cannot
 		 * transition between two valid sockets while locks are dropped.
 		 */
 		if (unp->unp_conn != NULL) {
 			UNP_PCB_UNLOCK(unp);
 			return (EISCONN);
 		}
 		if ((unp->unp_flags & UNP_CONNECTING) != 0) {
 			UNP_PCB_UNLOCK(unp);
 			return (EALREADY);
 		}
 		if (unp->unp_pairbusy > 0) {
 			unp->unp_flags |= UNP_WAITING;
 			mtx_sleep(unp, UNP_PCB_LOCKPTR(unp), 0, "unpeer", 0);
 			continue;
 		}
 		break;
 	}
 	unp->unp_flags |= UNP_CONNECTING;
 	UNP_PCB_UNLOCK(unp);
 
 	connreq = (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0;
 	if (connreq)
 		sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	else
 		sa = NULL;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 	    UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_CONNECTAT),
 	    td);
 	error = namei(&nd);
 	if (error)
 		vp = NULL;
 	else
 		vp = nd.ni_vp;
 	ASSERT_VOP_LOCKED(vp, "unp_connect");
 	NDFREE_NOTHING(&nd);
 	if (error)
 		goto bad;
 
 	if (vp->v_type != VSOCK) {
 		error = ENOTSOCK;
 		goto bad;
 	}
 #ifdef MAC
 	error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD);
 	if (error)
 		goto bad;
 #endif
 	error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 	if (error)
 		goto bad;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 
 	vplock = mtx_pool_find(mtxpool_sleep, vp);
 	mtx_lock(vplock);
 	VOP_UNP_CONNECT(vp, &unp2);
 	if (unp2 == NULL) {
 		error = ECONNREFUSED;
 		goto bad2;
 	}
 	so2 = unp2->unp_socket;
 	if (so->so_type != so2->so_type) {
 		error = EPROTOTYPE;
 		goto bad2;
 	}
 	if (connreq) {
-		if (so2->so_options & SO_ACCEPTCONN) {
+		if (SOLISTENING(so2)) {
 			CURVNET_SET(so2->so_vnet);
 			so2 = sonewconn(so2, 0);
 			CURVNET_RESTORE();
 		} else
 			so2 = NULL;
 		if (so2 == NULL) {
 			error = ECONNREFUSED;
 			goto bad2;
 		}
 		unp3 = sotounpcb(so2);
 		unp_pcb_lock_pair(unp2, unp3);
 		if (unp2->unp_addr != NULL) {
 			bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
 			unp3->unp_addr = (struct sockaddr_un *) sa;
 			sa = NULL;
 		}
 
 		unp_copy_peercred(td, unp3, unp, unp2);
 
 		UNP_PCB_UNLOCK(unp2);
 		unp2 = unp3;
 
 		/*
 		 * It is safe to block on the PCB lock here since unp2 is
 		 * nascent and cannot be connected to any other sockets.
 		 */
 		UNP_PCB_LOCK(unp);
 #ifdef MAC
 		mac_socketpeer_set_from_socket(so, so2);
 		mac_socketpeer_set_from_socket(so2, so);
 #endif
 	} else {
 		unp_pcb_lock_pair(unp, unp2);
 	}
 	KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
 	    sotounpcb(so2) == unp2,
 	    ("%s: unp2 %p so2 %p", __func__, unp2, so2));
 	error = unp_connect2(so, so2, PRU_CONNECT);
 	unp_pcb_unlock_pair(unp, unp2);
 bad2:
 	mtx_unlock(vplock);
 bad:
 	if (vp != NULL) {
 		vput(vp);
 	}
 	free(sa, M_SONAME);
 	UNP_PCB_LOCK(unp);
 	KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
 	    ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
 	unp->unp_flags &= ~UNP_CONNECTING;
 	UNP_PCB_UNLOCK(unp);
 	return (error);
 }
 
 /*
  * Set socket peer credentials at connection time.
  *
  * The client's PCB credentials are copied from its process structure.  The
  * server's PCB credentials are copied from the socket on which it called
  * listen(2).  uipc_listen cached that process's credentials at the time.
  */
 void
 unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
     struct unpcb *server_unp, struct unpcb *listen_unp)
 {
 	cru2xt(td, &client_unp->unp_peercred);
 	client_unp->unp_flags |= UNP_HAVEPC;
 
 	memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
 	    sizeof(server_unp->unp_peercred));
 	server_unp->unp_flags |= UNP_HAVEPC;
 	client_unp->unp_flags |= (listen_unp->unp_flags & UNP_WANTCRED_MASK);
 }
 
 static int
 unp_connect2(struct socket *so, struct socket *so2, int req)
 {
 	struct unpcb *unp;
 	struct unpcb *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect2: unp == NULL"));
 	unp2 = sotounpcb(so2);
 	KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
 
 	UNP_PCB_LOCK_ASSERT(unp);
 	UNP_PCB_LOCK_ASSERT(unp2);
 	KASSERT(unp->unp_conn == NULL,
 	    ("%s: socket %p is already connected", __func__, unp));
 
 	if (so2->so_type != so->so_type)
 		return (EPROTOTYPE);
 	unp->unp_conn = unp2;
 	unp_pcb_hold(unp2);
 	unp_pcb_hold(unp);
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 		UNP_REF_LIST_LOCK();
 		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 		UNP_REF_LIST_UNLOCK();
 		soisconnected(so);
 		break;
 
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		KASSERT(unp2->unp_conn == NULL,
 		    ("%s: socket %p is already connected", __func__, unp2));
 		unp2->unp_conn = unp;
 		if (req == PRU_CONNECT &&
 		    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 			soisconnecting(so);
 		else
 			soisconnected(so);
 		soisconnected(so2);
 		break;
 
 	default:
 		panic("unp_connect2");
 	}
 	return (0);
 }
 
 static void
 unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
 {
 	struct socket *so, *so2;
 #ifdef INVARIANTS
 	struct unpcb *unptmp;
 #endif
 
 	UNP_PCB_LOCK_ASSERT(unp);
 	UNP_PCB_LOCK_ASSERT(unp2);
 	KASSERT(unp->unp_conn == unp2,
 	    ("%s: unpcb %p is not connected to %p", __func__, unp, unp2));
 
 	unp->unp_conn = NULL;
 	so = unp->unp_socket;
 	so2 = unp2->unp_socket;
 	switch (unp->unp_socket->so_type) {
 	case SOCK_DGRAM:
 		UNP_REF_LIST_LOCK();
 #ifdef INVARIANTS
 		LIST_FOREACH(unptmp, &unp2->unp_refs, unp_reflink) {
 			if (unptmp == unp)
 				break;
 		}
 		KASSERT(unptmp != NULL,
 		    ("%s: %p not found in reflist of %p", __func__, unp, unp2));
 #endif
 		LIST_REMOVE(unp, unp_reflink);
 		UNP_REF_LIST_UNLOCK();
 		if (so) {
 			SOCK_LOCK(so);
 			so->so_state &= ~SS_ISCONNECTED;
 			SOCK_UNLOCK(so);
 		}
 		break;
 
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		if (so)
 			soisdisconnected(so);
 		MPASS(unp2->unp_conn == unp);
 		unp2->unp_conn = NULL;
 		if (so2)
 			soisdisconnected(so2);
 		break;
 	}
 
 	if (unp == unp2) {
 		unp_pcb_rele_notlast(unp);
 		if (!unp_pcb_rele(unp))
 			UNP_PCB_UNLOCK(unp);
 	} else {
 		if (!unp_pcb_rele(unp))
 			UNP_PCB_UNLOCK(unp);
 		if (!unp_pcb_rele(unp2))
 			UNP_PCB_UNLOCK(unp2);
 	}
 }
 
 /*
  * unp_pcblist() walks the global list of struct unpcb's to generate a
  * pointer list, bumping the refcount on each unpcb.  It then copies them out
  * sequentially, validating the generation number on each to see if it has
  * been detached.  All of this is necessary because copyout() may sleep on
  * disk I/O.
  */
 static int
 unp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	struct unpcb *unp, **unp_list;
 	unp_gen_t gencnt;
 	struct xunpgen *xug;
 	struct unp_head *head;
 	struct xunpcb *xu;
 	u_int i;
 	int error, n;
 
 	switch ((intptr_t)arg1) {
 	case SOCK_STREAM:
 		head = &unp_shead;
 		break;
 
 	case SOCK_DGRAM:
 		head = &unp_dhead;
 		break;
 
 	case SOCK_SEQPACKET:
 		head = &unp_sphead;
 		break;
 
 	default:
 		panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1);
 	}
 
 	/*
 	 * The process of preparing the PCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		n = unp_count;
 		req->oldidx = 2 * (sizeof *xug)
 			+ (n + n/8) * sizeof(struct xunpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK | M_ZERO);
 	UNP_LINK_RLOCK();
 	gencnt = unp_gencnt;
 	n = unp_count;
 	UNP_LINK_RUNLOCK();
 
 	xug->xug_len = sizeof *xug;
 	xug->xug_count = n;
 	xug->xug_gen = gencnt;
 	xug->xug_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, xug, sizeof *xug);
 	if (error) {
 		free(xug, M_TEMP);
 		return (error);
 	}
 
 	unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 
 	UNP_LINK_RLOCK();
 	for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 	     unp = LIST_NEXT(unp, unp_link)) {
 		UNP_PCB_LOCK(unp);
 		if (unp->unp_gencnt <= gencnt) {
 			if (cr_cansee(req->td->td_ucred,
 			    unp->unp_socket->so_cred)) {
 				UNP_PCB_UNLOCK(unp);
 				continue;
 			}
 			unp_list[i++] = unp;
 			unp_pcb_hold(unp);
 		}
 		UNP_PCB_UNLOCK(unp);
 	}
 	UNP_LINK_RUNLOCK();
 	n = i;			/* In case we lost some during malloc. */
 
 	error = 0;
 	xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 	for (i = 0; i < n; i++) {
 		unp = unp_list[i];
 		UNP_PCB_LOCK(unp);
 		if (unp_pcb_rele(unp))
 			continue;
 
 		if (unp->unp_gencnt <= gencnt) {
 			xu->xu_len = sizeof *xu;
 			xu->xu_unpp = (uintptr_t)unp;
 			/*
 			 * XXX - need more locking here to protect against
 			 * connect/disconnect races for SMP.
 			 */
 			if (unp->unp_addr != NULL)
 				bcopy(unp->unp_addr, &xu->xu_addr,
 				      unp->unp_addr->sun_len);
 			else
 				bzero(&xu->xu_addr, sizeof(xu->xu_addr));
 			if (unp->unp_conn != NULL &&
 			    unp->unp_conn->unp_addr != NULL)
 				bcopy(unp->unp_conn->unp_addr,
 				      &xu->xu_caddr,
 				      unp->unp_conn->unp_addr->sun_len);
 			else
 				bzero(&xu->xu_caddr, sizeof(xu->xu_caddr));
 			xu->unp_vnode = (uintptr_t)unp->unp_vnode;
 			xu->unp_conn = (uintptr_t)unp->unp_conn;
 			xu->xu_firstref = (uintptr_t)LIST_FIRST(&unp->unp_refs);
 			xu->xu_nextref = (uintptr_t)LIST_NEXT(unp, unp_reflink);
 			xu->unp_gencnt = unp->unp_gencnt;
 			sotoxsocket(unp->unp_socket, &xu->xu_socket);
 			UNP_PCB_UNLOCK(unp);
 			error = SYSCTL_OUT(req, xu, sizeof *xu);
 		} else {
 			UNP_PCB_UNLOCK(unp);
 		}
 	}
 	free(xu, M_TEMP);
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		xug->xug_gen = unp_gencnt;
 		xug->xug_sogen = so_gencnt;
 		xug->xug_count = unp_count;
 		error = SYSCTL_OUT(req, xug, sizeof *xug);
 	}
 	free(unp_list, M_TEMP);
 	free(xug, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
     "List of active local datagram sockets");
 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
     "List of active local stream sockets");
 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
     "List of active local seqpacket sockets");
 
 static void
 unp_shutdown(struct unpcb *unp)
 {
 	struct unpcb *unp2;
 	struct socket *so;
 
 	UNP_PCB_LOCK_ASSERT(unp);
 
 	unp2 = unp->unp_conn;
 	if ((unp->unp_socket->so_type == SOCK_STREAM ||
 	    (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) {
 		so = unp2->unp_socket;
 		if (so != NULL)
 			socantrcvmore(so);
 	}
 }
 
 static void
 unp_drop(struct unpcb *unp)
 {
 	struct socket *so = unp->unp_socket;
 	struct unpcb *unp2;
 
 	/*
 	 * Regardless of whether the socket's peer dropped the connection
 	 * with this socket by aborting or disconnecting, POSIX requires
 	 * that ECONNRESET is returned.
 	 */
 
 	UNP_PCB_LOCK(unp);
 	if (so)
 		so->so_error = ECONNRESET;
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 		/* Last reference dropped in unp_disconnect(). */
 		unp_pcb_rele_notlast(unp);
 		unp_disconnect(unp, unp2);
 	} else if (!unp_pcb_rele(unp)) {
 		UNP_PCB_UNLOCK(unp);
 	}
 }
 
 static void
 unp_freerights(struct filedescent **fdep, int fdcount)
 {
 	struct file *fp;
 	int i;
 
 	KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		filecaps_free(&fdep[i]->fde_caps);
 		unp_discard(fp);
 	}
 	free(fdep[0], M_FILECAPS);
 }
 
 static int
 unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
 {
 	struct thread *td = curthread;		/* XXX */
 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 	int i;
 	int *fdp;
 	struct filedesc *fdesc = td->td_proc->p_fd;
 	struct filedescent **fdep;
 	void *data;
 	socklen_t clen = control->m_len, datalen;
 	int error, newfds;
 	u_int newlen;
 
 	UNP_LINK_UNLOCK_ASSERT();
 
 	error = 0;
 	if (controlp != NULL) /* controlp == NULL => free control messages */
 		*controlp = NULL;
 	while (cm != NULL) {
 		if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 			error = EINVAL;
 			break;
 		}
 		data = CMSG_DATA(cm);
 		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 		if (cm->cmsg_level == SOL_SOCKET
 		    && cm->cmsg_type == SCM_RIGHTS) {
 			newfds = datalen / sizeof(*fdep);
 			if (newfds == 0)
 				goto next;
 			fdep = data;
 
 			/* If we're not outputting the descriptors free them. */
 			if (error || controlp == NULL) {
 				unp_freerights(fdep, newfds);
 				goto next;
 			}
 			FILEDESC_XLOCK(fdesc);
 
 			/*
 			 * Now change each pointer to an fd in the global
 			 * table to an integer that is the index to the local
 			 * fd table entry that we set up to point to the
 			 * global one we are transferring.
 			 */
 			newlen = newfds * sizeof(int);
 			*controlp = sbcreatecontrol(NULL, newlen,
 			    SCM_RIGHTS, SOL_SOCKET);
 			if (*controlp == NULL) {
 				FILEDESC_XUNLOCK(fdesc);
 				error = E2BIG;
 				unp_freerights(fdep, newfds);
 				goto next;
 			}
 
 			fdp = (int *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			if (fdallocn(td, 0, fdp, newfds) != 0) {
 				FILEDESC_XUNLOCK(fdesc);
 				error = EMSGSIZE;
 				unp_freerights(fdep, newfds);
 				m_freem(*controlp);
 				*controlp = NULL;
 				goto next;
 			}
 			for (i = 0; i < newfds; i++, fdp++) {
 				_finstall(fdesc, fdep[i]->fde_file, *fdp,
 				    (flags & MSG_CMSG_CLOEXEC) != 0 ? O_CLOEXEC : 0,
 				    &fdep[i]->fde_caps);
 				unp_externalize_fp(fdep[i]->fde_file);
 			}
 
 			/*
 			 * The new type indicates that the mbuf data refers to
 			 * kernel resources that may need to be released before
 			 * the mbuf is freed.
 			 */
 			m_chtype(*controlp, MT_EXTCONTROL);
 			FILEDESC_XUNLOCK(fdesc);
 			free(fdep[0], M_FILECAPS);
 		} else {
 			/* We can just copy anything else across. */
 			if (error || controlp == NULL)
 				goto next;
 			*controlp = sbcreatecontrol(NULL, datalen,
 			    cm->cmsg_type, cm->cmsg_level);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto next;
 			}
 			bcopy(data,
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 			    datalen);
 		}
 		controlp = &(*controlp)->m_next;
 
 next:
 		if (CMSG_SPACE(datalen) < clen) {
 			clen -= CMSG_SPACE(datalen);
 			cm = (struct cmsghdr *)
 			    ((caddr_t)cm + CMSG_SPACE(datalen));
 		} else {
 			clen = 0;
 			cm = NULL;
 		}
 	}
 
 	m_freem(control);
 	return (error);
 }
 
 static void
 unp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(unp_zone, maxsockets);
 }
 
 #ifdef INVARIANTS
 static void
 unp_zdtor(void *mem, int size __unused, void *arg __unused)
 {
 	struct unpcb *unp;
 
 	unp = mem;
 
 	KASSERT(LIST_EMPTY(&unp->unp_refs),
 	    ("%s: unpcb %p has lingering refs", __func__, unp));
 	KASSERT(unp->unp_socket == NULL,
 	    ("%s: unpcb %p has socket backpointer", __func__, unp));
 	KASSERT(unp->unp_vnode == NULL,
 	    ("%s: unpcb %p has vnode references", __func__, unp));
 	KASSERT(unp->unp_conn == NULL,
 	    ("%s: unpcb %p is still connected", __func__, unp));
 	KASSERT(unp->unp_addr == NULL,
 	    ("%s: unpcb %p has leaked addr", __func__, unp));
 }
 #endif
 
 static void
 unp_init(void)
 {
 	uma_dtor dtor;
 
 #ifdef VIMAGE
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 #endif
 
 #ifdef INVARIANTS
 	dtor = unp_zdtor;
 #else
 	dtor = NULL;
 #endif
 	unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, dtor,
 	    NULL, NULL, UMA_ALIGN_CACHE, 0);
 	uma_zone_set_max(unp_zone, maxsockets);
 	uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
 	    NULL, EVENTHANDLER_PRI_ANY);
 	LIST_INIT(&unp_dhead);
 	LIST_INIT(&unp_shead);
 	LIST_INIT(&unp_sphead);
 	SLIST_INIT(&unp_defers);
 	TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
 	TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
 	UNP_LINK_LOCK_INIT();
 	UNP_DEFERRED_LOCK_INIT();
 }
 
 static void
 unp_internalize_cleanup_rights(struct mbuf *control)
 {
 	struct cmsghdr *cp;
 	struct mbuf *m;
 	void *data;
 	socklen_t datalen;
 
 	for (m = control; m != NULL; m = m->m_next) {
 		cp = mtod(m, struct cmsghdr *);
 		if (cp->cmsg_level != SOL_SOCKET ||
 		    cp->cmsg_type != SCM_RIGHTS)
 			continue;
 		data = CMSG_DATA(cp);
 		datalen = (caddr_t)cp + cp->cmsg_len - (caddr_t)data;
 		unp_freerights(data, datalen / sizeof(struct filedesc *));
 	}
 }
 
 static int
 unp_internalize(struct mbuf **controlp, struct thread *td)
 {
 	struct mbuf *control, **initial_controlp;
 	struct proc *p;
 	struct filedesc *fdesc;
 	struct bintime *bt;
 	struct cmsghdr *cm;
 	struct cmsgcred *cmcred;
 	struct filedescent *fde, **fdep, *fdev;
 	struct file *fp;
 	struct timeval *tv;
 	struct timespec *ts;
 	void *data;
 	socklen_t clen, datalen;
 	int i, j, error, *fdp, oldfds;
 	u_int newlen;
 
 	UNP_LINK_UNLOCK_ASSERT();
 
 	p = td->td_proc;
 	fdesc = p->p_fd;
 	error = 0;
 	control = *controlp;
 	clen = control->m_len;
 	*controlp = NULL;
 	initial_controlp = controlp;
 	for (cm = mtod(control, struct cmsghdr *); cm != NULL;) {
 		if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 		    || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
 			error = EINVAL;
 			goto out;
 		}
 		data = CMSG_DATA(cm);
 		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 
 		switch (cm->cmsg_type) {
 		/*
 		 * Fill in credential information.
 		 */
 		case SCM_CREDS:
 			*controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 			    SCM_CREDS, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			cmcred = (struct cmsgcred *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			cmcred->cmcred_pid = p->p_pid;
 			cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 			cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 			cmcred->cmcred_euid = td->td_ucred->cr_uid;
 			cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 			    CMGROUP_MAX);
 			for (i = 0; i < cmcred->cmcred_ngroups; i++)
 				cmcred->cmcred_groups[i] =
 				    td->td_ucred->cr_groups[i];
 			break;
 
 		case SCM_RIGHTS:
 			oldfds = datalen / sizeof (int);
 			if (oldfds == 0)
 				break;
 			/*
 			 * Check that all the FDs passed in refer to legal
 			 * files.  If not, reject the entire operation.
 			 */
 			fdp = data;
 			FILEDESC_SLOCK(fdesc);
 			for (i = 0; i < oldfds; i++, fdp++) {
 				fp = fget_locked(fdesc, *fdp);
 				if (fp == NULL) {
 					FILEDESC_SUNLOCK(fdesc);
 					error = EBADF;
 					goto out;
 				}
 				if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 					FILEDESC_SUNLOCK(fdesc);
 					error = EOPNOTSUPP;
 					goto out;
 				}
 			}
 
 			/*
 			 * Now replace the integer FDs with pointers to the
 			 * file structure and capability rights.
 			 */
 			newlen = oldfds * sizeof(fdep[0]);
 			*controlp = sbcreatecontrol(NULL, newlen,
 			    SCM_RIGHTS, SOL_SOCKET);
 			if (*controlp == NULL) {
 				FILEDESC_SUNLOCK(fdesc);
 				error = E2BIG;
 				goto out;
 			}
 			fdp = data;
 			for (i = 0; i < oldfds; i++, fdp++) {
 				if (!fhold(fdesc->fd_ofiles[*fdp].fde_file)) {
 					fdp = data;
 					for (j = 0; j < i; j++, fdp++) {
 						fdrop(fdesc->fd_ofiles[*fdp].
 						    fde_file, td);
 					}
 					FILEDESC_SUNLOCK(fdesc);
 					error = EBADF;
 					goto out;
 				}
 			}
 			fdp = data;
 			fdep = (struct filedescent **)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
 			    M_WAITOK);
 			for (i = 0; i < oldfds; i++, fdev++, fdp++) {
 				fde = &fdesc->fd_ofiles[*fdp];
 				fdep[i] = fdev;
 				fdep[i]->fde_file = fde->fde_file;
 				filecaps_copy(&fde->fde_caps,
 				    &fdep[i]->fde_caps, true);
 				unp_internalize_fp(fdep[i]->fde_file);
 			}
 			FILEDESC_SUNLOCK(fdesc);
 			break;
 
 		case SCM_TIMESTAMP:
 			*controlp = sbcreatecontrol(NULL, sizeof(*tv),
 			    SCM_TIMESTAMP, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			tv = (struct timeval *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			microtime(tv);
 			break;
 
 		case SCM_BINTIME:
 			*controlp = sbcreatecontrol(NULL, sizeof(*bt),
 			    SCM_BINTIME, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			bt = (struct bintime *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			bintime(bt);
 			break;
 
 		case SCM_REALTIME:
 			*controlp = sbcreatecontrol(NULL, sizeof(*ts),
 			    SCM_REALTIME, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			ts = (struct timespec *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			nanotime(ts);
 			break;
 
 		case SCM_MONOTONIC:
 			*controlp = sbcreatecontrol(NULL, sizeof(*ts),
 			    SCM_MONOTONIC, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			ts = (struct timespec *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			nanouptime(ts);
 			break;
 
 		default:
 			error = EINVAL;
 			goto out;
 		}
 
 		if (*controlp != NULL)
 			controlp = &(*controlp)->m_next;
 		if (CMSG_SPACE(datalen) < clen) {
 			clen -= CMSG_SPACE(datalen);
 			cm = (struct cmsghdr *)
 			    ((caddr_t)cm + CMSG_SPACE(datalen));
 		} else {
 			clen = 0;
 			cm = NULL;
 		}
 	}
 
 out:
 	if (error != 0 && initial_controlp != NULL)
 		unp_internalize_cleanup_rights(*initial_controlp);
 	m_freem(control);
 	return (error);
 }
 
 static struct mbuf *
 unp_addsockcred(struct thread *td, struct mbuf *control, int mode)
 {
 	struct mbuf *m, *n, *n_prev;
 	const struct cmsghdr *cm;
 	int ngroups, i, cmsgtype;
 	size_t ctrlsz;
 
 	ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
 	if (mode & UNP_WANTCRED_ALWAYS) {
 		ctrlsz = SOCKCRED2SIZE(ngroups);
 		cmsgtype = SCM_CREDS2;
 	} else {
 		ctrlsz = SOCKCREDSIZE(ngroups);
 		cmsgtype = SCM_CREDS;
 	}
 
 	m = sbcreatecontrol(NULL, ctrlsz, cmsgtype, SOL_SOCKET);
 	if (m == NULL)
 		return (control);
 
 	if (mode & UNP_WANTCRED_ALWAYS) {
 		struct sockcred2 *sc;
 
 		sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 		sc->sc_version = 0;
 		sc->sc_pid = td->td_proc->p_pid;
 		sc->sc_uid = td->td_ucred->cr_ruid;
 		sc->sc_euid = td->td_ucred->cr_uid;
 		sc->sc_gid = td->td_ucred->cr_rgid;
 		sc->sc_egid = td->td_ucred->cr_gid;
 		sc->sc_ngroups = ngroups;
 		for (i = 0; i < sc->sc_ngroups; i++)
 			sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 	} else {
 		struct sockcred *sc;
 
 		sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 		sc->sc_uid = td->td_ucred->cr_ruid;
 		sc->sc_euid = td->td_ucred->cr_uid;
 		sc->sc_gid = td->td_ucred->cr_rgid;
 		sc->sc_egid = td->td_ucred->cr_gid;
 		sc->sc_ngroups = ngroups;
 		for (i = 0; i < sc->sc_ngroups; i++)
 			sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 	}
 
 	/*
 	 * Unlink SCM_CREDS control messages (struct cmsgcred), since just
 	 * created SCM_CREDS control message (struct sockcred) has another
 	 * format.
 	 */
 	if (control != NULL && cmsgtype == SCM_CREDS)
 		for (n = control, n_prev = NULL; n != NULL;) {
 			cm = mtod(n, struct cmsghdr *);
     			if (cm->cmsg_level == SOL_SOCKET &&
 			    cm->cmsg_type == SCM_CREDS) {
     				if (n_prev == NULL)
 					control = n->m_next;
 				else
 					n_prev->m_next = n->m_next;
 				n = m_free(n);
 			} else {
 				n_prev = n;
 				n = n->m_next;
 			}
 		}
 
 	/* Prepend it to the head. */
 	m->m_next = control;
 	return (m);
 }
 
 static struct unpcb *
 fptounp(struct file *fp)
 {
 	struct socket *so;
 
 	if (fp->f_type != DTYPE_SOCKET)
 		return (NULL);
 	if ((so = fp->f_data) == NULL)
 		return (NULL);
 	if (so->so_proto->pr_domain != &localdomain)
 		return (NULL);
 	return sotounpcb(so);
 }
 
 static void
 unp_discard(struct file *fp)
 {
 	struct unp_defer *dr;
 
 	if (unp_externalize_fp(fp)) {
 		dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK);
 		dr->ud_fp = fp;
 		UNP_DEFERRED_LOCK();
 		SLIST_INSERT_HEAD(&unp_defers, dr, ud_link);
 		UNP_DEFERRED_UNLOCK();
 		atomic_add_int(&unp_defers_count, 1);
 		taskqueue_enqueue(taskqueue_thread, &unp_defer_task);
 	} else
 		closef_nothread(fp);
 }
 
 static void
 unp_process_defers(void *arg __unused, int pending)
 {
 	struct unp_defer *dr;
 	SLIST_HEAD(, unp_defer) drl;
 	int count;
 
 	SLIST_INIT(&drl);
 	for (;;) {
 		UNP_DEFERRED_LOCK();
 		if (SLIST_FIRST(&unp_defers) == NULL) {
 			UNP_DEFERRED_UNLOCK();
 			break;
 		}
 		SLIST_SWAP(&unp_defers, &drl, unp_defer);
 		UNP_DEFERRED_UNLOCK();
 		count = 0;
 		while ((dr = SLIST_FIRST(&drl)) != NULL) {
 			SLIST_REMOVE_HEAD(&drl, ud_link);
 			closef_nothread(dr->ud_fp);
 			free(dr, M_TEMP);
 			count++;
 		}
 		atomic_add_int(&unp_defers_count, -count);
 	}
 }
 
 static void
 unp_internalize_fp(struct file *fp)
 {
 	struct unpcb *unp;
 
 	UNP_LINK_WLOCK();
 	if ((unp = fptounp(fp)) != NULL) {
 		unp->unp_file = fp;
 		unp->unp_msgcount++;
 	}
 	unp_rights++;
 	UNP_LINK_WUNLOCK();
 }
 
 static int
 unp_externalize_fp(struct file *fp)
 {
 	struct unpcb *unp;
 	int ret;
 
 	UNP_LINK_WLOCK();
 	if ((unp = fptounp(fp)) != NULL) {
 		unp->unp_msgcount--;
 		ret = 1;
 	} else
 		ret = 0;
 	unp_rights--;
 	UNP_LINK_WUNLOCK();
 	return (ret);
 }
 
 /*
  * unp_defer indicates whether additional work has been defered for a future
  * pass through unp_gc().  It is thread local and does not require explicit
  * synchronization.
  */
 static int	unp_marked;
 
 static void
 unp_remove_dead_ref(struct filedescent **fdep, int fdcount)
 {
 	struct unpcb *unp;
 	struct file *fp;
 	int i;
 
 	/*
 	 * This function can only be called from the gc task.
 	 */
 	KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 	    ("%s: not on gc callout", __func__));
 	UNP_LINK_LOCK_ASSERT();
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		if ((unp = fptounp(fp)) == NULL)
 			continue;
 		if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 			continue;
 		unp->unp_gcrefs--;
 	}
 }
 
 static void
 unp_restore_undead_ref(struct filedescent **fdep, int fdcount)
 {
 	struct unpcb *unp;
 	struct file *fp;
 	int i;
 
 	/*
 	 * This function can only be called from the gc task.
 	 */
 	KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 	    ("%s: not on gc callout", __func__));
 	UNP_LINK_LOCK_ASSERT();
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		if ((unp = fptounp(fp)) == NULL)
 			continue;
 		if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 			continue;
 		unp->unp_gcrefs++;
 		unp_marked++;
 	}
 }
 
 static void
 unp_gc_scan(struct unpcb *unp, void (*op)(struct filedescent **, int))
 {
 	struct socket *so, *soa;
 
 	so = unp->unp_socket;
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		/*
 		 * Mark all sockets in our accept queue.
 		 */
 		TAILQ_FOREACH(soa, &so->sol_comp, so_list) {
 			if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
 				continue;
 			SOCKBUF_LOCK(&soa->so_rcv);
 			unp_scan(soa->so_rcv.sb_mb, op);
 			SOCKBUF_UNLOCK(&soa->so_rcv);
 		}
 	} else {
 		/*
 		 * Mark all sockets we reference with RIGHTS.
 		 */
 		if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
 			SOCKBUF_LOCK(&so->so_rcv);
 			unp_scan(so->so_rcv.sb_mb, op);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 		}
 	}
 	SOCK_UNLOCK(so);
 }
 
 static int unp_recycled;
 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 
     "Number of unreachable sockets claimed by the garbage collector.");
 
 static int unp_taskcount;
 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 
     "Number of times the garbage collector has run.");
 
 SYSCTL_UINT(_net_local, OID_AUTO, sockcount, CTLFLAG_RD, &unp_count, 0, 
     "Number of active local sockets.");
 
 static void
 unp_gc(__unused void *arg, int pending)
 {
 	struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
 				    NULL };
 	struct unp_head **head;
 	struct unp_head unp_deadhead;	/* List of potentially-dead sockets. */
 	struct file *f, **unref;
 	struct unpcb *unp, *unptmp;
 	int i, total, unp_unreachable;
 
 	LIST_INIT(&unp_deadhead);
 	unp_taskcount++;
 	UNP_LINK_RLOCK();
 	/*
 	 * First determine which sockets may be in cycles.
 	 */
 	unp_unreachable = 0;
 
 	for (head = heads; *head != NULL; head++)
 		LIST_FOREACH(unp, *head, unp_link) {
 			KASSERT((unp->unp_gcflag & ~UNPGC_IGNORE_RIGHTS) == 0,
 			    ("%s: unp %p has unexpected gc flags 0x%x",
 			    __func__, unp, (unsigned int)unp->unp_gcflag));
 
 			f = unp->unp_file;
 
 			/*
 			 * Check for an unreachable socket potentially in a
 			 * cycle.  It must be in a queue as indicated by
 			 * msgcount, and this must equal the file reference
 			 * count.  Note that when msgcount is 0 the file is
 			 * NULL.
 			 */
 			if (f != NULL && unp->unp_msgcount != 0 &&
 			    refcount_load(&f->f_count) == unp->unp_msgcount) {
 				LIST_INSERT_HEAD(&unp_deadhead, unp, unp_dead);
 				unp->unp_gcflag |= UNPGC_DEAD;
 				unp->unp_gcrefs = unp->unp_msgcount;
 				unp_unreachable++;
 			}
 		}
 
 	/*
 	 * Scan all sockets previously marked as potentially being in a cycle
 	 * and remove the references each socket holds on any UNPGC_DEAD
 	 * sockets in its queue.  After this step, all remaining references on
 	 * sockets marked UNPGC_DEAD should not be part of any cycle.
 	 */
 	LIST_FOREACH(unp, &unp_deadhead, unp_dead)
 		unp_gc_scan(unp, unp_remove_dead_ref);
 
 	/*
 	 * If a socket still has a non-negative refcount, it cannot be in a
 	 * cycle.  In this case increment refcount of all children iteratively.
 	 * Stop the scan once we do a complete loop without discovering
 	 * a new reachable socket.
 	 */
 	do {
 		unp_marked = 0;
 		LIST_FOREACH_SAFE(unp, &unp_deadhead, unp_dead, unptmp)
 			if (unp->unp_gcrefs > 0) {
 				unp->unp_gcflag &= ~UNPGC_DEAD;
 				LIST_REMOVE(unp, unp_dead);
 				KASSERT(unp_unreachable > 0,
 				    ("%s: unp_unreachable underflow.",
 				    __func__));
 				unp_unreachable--;
 				unp_gc_scan(unp, unp_restore_undead_ref);
 			}
 	} while (unp_marked);
 
 	UNP_LINK_RUNLOCK();
 
 	if (unp_unreachable == 0)
 		return;
 
 	/*
 	 * Allocate space for a local array of dead unpcbs.
 	 * TODO: can this path be simplified by instead using the local
 	 * dead list at unp_deadhead, after taking out references
 	 * on the file object and/or unpcb and dropping the link lock?
 	 */
 	unref = malloc(unp_unreachable * sizeof(struct file *),
 	    M_TEMP, M_WAITOK);
 
 	/*
 	 * Iterate looking for sockets which have been specifically marked
 	 * as unreachable and store them locally.
 	 */
 	UNP_LINK_RLOCK();
 	total = 0;
 	LIST_FOREACH(unp, &unp_deadhead, unp_dead) {
 		KASSERT((unp->unp_gcflag & UNPGC_DEAD) != 0,
 		    ("%s: unp %p not marked UNPGC_DEAD", __func__, unp));
 		unp->unp_gcflag &= ~UNPGC_DEAD;
 		f = unp->unp_file;
 		if (unp->unp_msgcount == 0 || f == NULL ||
 		    refcount_load(&f->f_count) != unp->unp_msgcount ||
 		    !fhold(f))
 			continue;
 		unref[total++] = f;
 		KASSERT(total <= unp_unreachable,
 		    ("%s: incorrect unreachable count.", __func__));
 	}
 	UNP_LINK_RUNLOCK();
 
 	/*
 	 * Now flush all sockets, free'ing rights.  This will free the
 	 * struct files associated with these sockets but leave each socket
 	 * with one remaining ref.
 	 */
 	for (i = 0; i < total; i++) {
 		struct socket *so;
 
 		so = unref[i]->f_data;
 		CURVNET_SET(so->so_vnet);
 		sorflush(so);
 		CURVNET_RESTORE();
 	}
 
 	/*
 	 * And finally release the sockets so they can be reclaimed.
 	 */
 	for (i = 0; i < total; i++)
 		fdrop(unref[i], NULL);
 	unp_recycled += total;
 	free(unref, M_TEMP);
 }
 
 static void
 unp_dispose_mbuf(struct mbuf *m)
 {
 
 	if (m)
 		unp_scan(m, unp_freerights);
 }
 
 /*
  * Synchronize against unp_gc, which can trip over data as we are freeing it.
  */
 static void
 unp_dispose(struct socket *so)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	UNP_LINK_WLOCK();
 	unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
 	UNP_LINK_WUNLOCK();
 	if (!SOLISTENING(so))
 		unp_dispose_mbuf(so->so_rcv.sb_mb);
 }
 
 static void
 unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
 {
 	struct mbuf *m;
 	struct cmsghdr *cm;
 	void *data;
 	socklen_t clen, datalen;
 
 	while (m0 != NULL) {
 		for (m = m0; m; m = m->m_next) {
 			if (m->m_type != MT_CONTROL)
 				continue;
 
 			cm = mtod(m, struct cmsghdr *);
 			clen = m->m_len;
 
 			while (cm != NULL) {
 				if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 					break;
 
 				data = CMSG_DATA(cm);
 				datalen = (caddr_t)cm + cm->cmsg_len
 				    - (caddr_t)data;
 
 				if (cm->cmsg_level == SOL_SOCKET &&
 				    cm->cmsg_type == SCM_RIGHTS) {
 					(*op)(data, datalen /
 					    sizeof(struct filedescent *));
 				}
 
 				if (CMSG_SPACE(datalen) < clen) {
 					clen -= CMSG_SPACE(datalen);
 					cm = (struct cmsghdr *)
 					    ((caddr_t)cm + CMSG_SPACE(datalen));
 				} else {
 					clen = 0;
 					cm = NULL;
 				}
 			}
 		}
 		m0 = m0->m_nextpkt;
 	}
 }
 
 /*
  * A helper function called by VFS before socket-type vnode reclamation.
  * For an active vnode it clears unp_vnode pointer and decrements unp_vnode
  * use count.
  */
 void
 vfs_unp_reclaim(struct vnode *vp)
 {
 	struct unpcb *unp;
 	int active;
 	struct mtx *vplock;
 
 	ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
 	KASSERT(vp->v_type == VSOCK,
 	    ("vfs_unp_reclaim: vp->v_type != VSOCK"));
 
 	active = 0;
 	vplock = mtx_pool_find(mtxpool_sleep, vp);
 	mtx_lock(vplock);
 	VOP_UNP_CONNECT(vp, &unp);
 	if (unp == NULL)
 		goto done;
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode == vp) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 		active = 1;
 	}
 	UNP_PCB_UNLOCK(unp);
  done:
 	mtx_unlock(vplock);
 	if (active)
 		vunref(vp);
 }
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_unpflags(int unp_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (unp_flags & UNP_HAVEPC) {
 		db_printf("%sUNP_HAVEPC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_WANTCRED_ALWAYS) {
 		db_printf("%sUNP_WANTCRED_ALWAYS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_WANTCRED_ONESHOT) {
 		db_printf("%sUNP_WANTCRED_ONESHOT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_CONNWAIT) {
 		db_printf("%sUNP_CONNWAIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_CONNECTING) {
 		db_printf("%sUNP_CONNECTING", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_BINDING) {
 		db_printf("%sUNP_BINDING", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_xucred(int indent, struct xucred *xu)
 {
 	int comma, i;
 
 	db_print_indent(indent);
 	db_printf("cr_version: %u   cr_uid: %u   cr_pid: %d   cr_ngroups: %d\n",
 	    xu->cr_version, xu->cr_uid, xu->cr_pid, xu->cr_ngroups);
 	db_print_indent(indent);
 	db_printf("cr_groups: ");
 	comma = 0;
 	for (i = 0; i < xu->cr_ngroups; i++) {
 		db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]);
 		comma = 1;
 	}
 	db_printf("\n");
 }
 
 static void
 db_print_unprefs(int indent, struct unp_head *uh)
 {
 	struct unpcb *unp;
 	int counter;
 
 	counter = 0;
 	LIST_FOREACH(unp, uh, unp_reflink) {
 		if (counter % 4 == 0)
 			db_print_indent(indent);
 		db_printf("%p  ", unp);
 		if (counter % 4 == 3)
 			db_printf("\n");
 		counter++;
 	}
 	if (counter != 0 && counter % 4 != 0)
 		db_printf("\n");
 }
 
 DB_SHOW_COMMAND(unpcb, db_show_unpcb)
 {
 	struct unpcb *unp;
 
         if (!have_addr) {
                 db_printf("usage: show unpcb <addr>\n");
                 return;
         }
         unp = (struct unpcb *)addr;
 
 	db_printf("unp_socket: %p   unp_vnode: %p\n", unp->unp_socket,
 	    unp->unp_vnode);
 
 	db_printf("unp_ino: %ju   unp_conn: %p\n", (uintmax_t)unp->unp_ino,
 	    unp->unp_conn);
 
 	db_printf("unp_refs:\n");
 	db_print_unprefs(2, &unp->unp_refs);
 
 	/* XXXRW: Would be nice to print the full address, if any. */
 	db_printf("unp_addr: %p\n", unp->unp_addr);
 
 	db_printf("unp_gencnt: %llu\n",
 	    (unsigned long long)unp->unp_gencnt);
 
 	db_printf("unp_flags: %x (", unp->unp_flags);
 	db_print_unpflags(unp->unp_flags);
 	db_printf(")\n");
 
 	db_printf("unp_peercred:\n");
 	db_print_xucred(2, &unp->unp_peercred);
 
 	db_printf("unp_refcount: %u\n", unp->unp_refcount);
 }
 #endif
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
index 2bd15ac35895..cd620fe3aef9 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
@@ -1,2965 +1,2964 @@
 /*
  * ng_btsocket_l2cap.c
  */
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2001-2002 Maksim Yevmenkin <m_evmenkin@yahoo.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $Id: ng_btsocket_l2cap.c,v 1.16 2003/09/14 23:29:06 max Exp $
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitstring.h>
 #include <sys/domain.h>
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/filedesc.h>
 #include <sys/ioccom.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <net/vnet.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/bluetooth/include/ng_bluetooth.h>
 #include <netgraph/bluetooth/include/ng_hci.h>
 #include <netgraph/bluetooth/include/ng_l2cap.h>
 #include <netgraph/bluetooth/include/ng_btsocket.h>
 #include <netgraph/bluetooth/include/ng_btsocket_l2cap.h>
 
 /* MALLOC define */
 #ifdef NG_SEPARATE_MALLOC
 static MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_L2CAP, "netgraph_btsocks_l2cap",
 		"Netgraph Bluetooth L2CAP sockets");
 #else
 #define M_NETGRAPH_BTSOCKET_L2CAP M_NETGRAPH
 #endif /* NG_SEPARATE_MALLOC */
 
 /* Netgraph node methods */
 static ng_constructor_t	ng_btsocket_l2cap_node_constructor;
 static ng_rcvmsg_t	ng_btsocket_l2cap_node_rcvmsg;
 static ng_shutdown_t	ng_btsocket_l2cap_node_shutdown;
 static ng_newhook_t	ng_btsocket_l2cap_node_newhook;
 static ng_connect_t	ng_btsocket_l2cap_node_connect;
 static ng_rcvdata_t	ng_btsocket_l2cap_node_rcvdata;
 static ng_disconnect_t	ng_btsocket_l2cap_node_disconnect;
 
 static void		ng_btsocket_l2cap_input   (void *, int);
 static void		ng_btsocket_l2cap_rtclean (void *, int);
 
 /* Netgraph type descriptor */
 static struct ng_type	typestruct = {
 	.version =	NG_ABI_VERSION,
 	.name =		NG_BTSOCKET_L2CAP_NODE_TYPE,
 	.constructor =	ng_btsocket_l2cap_node_constructor,
 	.rcvmsg =	ng_btsocket_l2cap_node_rcvmsg,
 	.shutdown =	ng_btsocket_l2cap_node_shutdown,
 	.newhook =	ng_btsocket_l2cap_node_newhook,
 	.connect =	ng_btsocket_l2cap_node_connect,
 	.rcvdata =	ng_btsocket_l2cap_node_rcvdata,
 	.disconnect =	ng_btsocket_l2cap_node_disconnect,
 };
 
 /* Globals */
 extern int					ifqmaxlen;
 static u_int32_t				ng_btsocket_l2cap_debug_level;
 static node_p					ng_btsocket_l2cap_node;
 static struct ng_bt_itemq			ng_btsocket_l2cap_queue;
 static struct mtx				ng_btsocket_l2cap_queue_mtx;
 static struct task				ng_btsocket_l2cap_queue_task;
 static LIST_HEAD(, ng_btsocket_l2cap_pcb)	ng_btsocket_l2cap_sockets;
 static struct mtx				ng_btsocket_l2cap_sockets_mtx;
 static LIST_HEAD(, ng_btsocket_l2cap_rtentry)	ng_btsocket_l2cap_rt;
 static struct mtx				ng_btsocket_l2cap_rt_mtx;
 static struct task				ng_btsocket_l2cap_rt_task;
 static struct timeval				ng_btsocket_l2cap_lasttime;
 static int					ng_btsocket_l2cap_curpps;
 
 /* Sysctl tree */
 SYSCTL_DECL(_net_bluetooth_l2cap_sockets);
 static SYSCTL_NODE(_net_bluetooth_l2cap_sockets, OID_AUTO, seq,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Bluetooth SEQPACKET L2CAP sockets family");
 SYSCTL_UINT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, debug_level,
 	CTLFLAG_RW,
 	&ng_btsocket_l2cap_debug_level, NG_BTSOCKET_WARN_LEVEL,
 	"Bluetooth SEQPACKET L2CAP sockets debug level");
 SYSCTL_UINT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, queue_len,
 	CTLFLAG_RD,
 	&ng_btsocket_l2cap_queue.len, 0,
 	"Bluetooth SEQPACKET L2CAP sockets input queue length");
 SYSCTL_UINT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, queue_maxlen,
 	CTLFLAG_RD,
 	&ng_btsocket_l2cap_queue.maxlen, 0,
 	"Bluetooth SEQPACKET L2CAP sockets input queue max. length");
 SYSCTL_UINT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, queue_drops,
 	CTLFLAG_RD,
 	&ng_btsocket_l2cap_queue.drops, 0,
 	"Bluetooth SEQPACKET L2CAP sockets input queue drops");
 
 /* Debug */
 #define NG_BTSOCKET_L2CAP_INFO \
 	if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_INFO_LEVEL && \
 	    ppsratecheck(&ng_btsocket_l2cap_lasttime, &ng_btsocket_l2cap_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_L2CAP_WARN \
 	if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_WARN_LEVEL && \
 	    ppsratecheck(&ng_btsocket_l2cap_lasttime, &ng_btsocket_l2cap_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_L2CAP_ERR \
 	if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_ERR_LEVEL && \
 	    ppsratecheck(&ng_btsocket_l2cap_lasttime, &ng_btsocket_l2cap_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_L2CAP_ALERT \
 	if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_ALERT_LEVEL && \
 	    ppsratecheck(&ng_btsocket_l2cap_lasttime, &ng_btsocket_l2cap_curpps, 1)) \
 		printf
 
 /* 
  * Netgraph message processing routines
  */
 
 static int ng_btsocket_l2cap_process_l2ca_con_req_rsp
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 static int ng_btsocket_l2cap_process_l2ca_con_rsp_rsp
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 static int ng_btsocket_l2cap_process_l2ca_con_ind
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 
 static int ng_btsocket_l2cap_process_l2ca_cfg_req_rsp
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 static int ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 static int ng_btsocket_l2cap_process_l2ca_cfg_ind
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 
 static int ng_btsocket_l2cap_process_l2ca_discon_rsp
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 static int ng_btsocket_l2cap_process_l2ca_discon_ind
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 
 static int ng_btsocket_l2cap_process_l2ca_write_rsp
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 
 /*
  * Send L2CA_xxx messages to the lower layer
  */
 
 static int  ng_btsocket_l2cap_send_l2ca_con_req
 	(ng_btsocket_l2cap_pcb_p);
 static int  ng_btsocket_l2cap_send_l2ca_con_rsp_req
 	(u_int32_t, ng_btsocket_l2cap_rtentry_p, bdaddr_p, int, int, int, int);
 static int  ng_btsocket_l2cap_send_l2ca_cfg_req
 	(ng_btsocket_l2cap_pcb_p);
 static int  ng_btsocket_l2cap_send_l2ca_cfg_rsp
 	(ng_btsocket_l2cap_pcb_p);
 static int  ng_btsocket_l2cap_send_l2ca_discon_req
 	(u_int32_t, ng_btsocket_l2cap_pcb_p);
 
 static int ng_btsocket_l2cap_send2
 	(ng_btsocket_l2cap_pcb_p);
 
 /* 
  * Timeout processing routines
  */
 
 static void ng_btsocket_l2cap_timeout         (ng_btsocket_l2cap_pcb_p);
 static void ng_btsocket_l2cap_untimeout       (ng_btsocket_l2cap_pcb_p);
 static void ng_btsocket_l2cap_process_timeout (void *);
 
 /* 
  * Other stuff 
  */
 
 static ng_btsocket_l2cap_pcb_p     ng_btsocket_l2cap_pcb_by_addr(bdaddr_p, int);
 static ng_btsocket_l2cap_pcb_p     ng_btsocket_l2cap_pcb_by_token(u_int32_t);
 static ng_btsocket_l2cap_pcb_p     ng_btsocket_l2cap_pcb_by_cid (bdaddr_p, int,int);
 static int                         ng_btsocket_l2cap_result2errno(int);
 
 static int ng_btsock_l2cap_addrtype_to_linktype(int addrtype);
 
 #define ng_btsocket_l2cap_wakeup_input_task() \
 	taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_l2cap_queue_task)
 
 #define ng_btsocket_l2cap_wakeup_route_task() \
 	taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_l2cap_rt_task)
 
 int ng_btsock_l2cap_addrtype_to_linktype(int addrtype)
 {
 	switch(addrtype){
 	case BDADDR_LE_PUBLIC:
 		return NG_HCI_LINK_LE_PUBLIC;
 	case BDADDR_LE_RANDOM:
 		return NG_HCI_LINK_LE_RANDOM;
 	default:
 		return NG_HCI_LINK_ACL;
 	}
 }
 
 /*****************************************************************************
  *****************************************************************************
  **                        Netgraph node interface
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Netgraph node constructor. Do not allow to create node of this type.
  */
 
 static int
 ng_btsocket_l2cap_node_constructor(node_p node)
 {
 	return (EINVAL);
 } /* ng_btsocket_l2cap_node_constructor */
 
 /*
  * Do local shutdown processing. Let old node go and create new fresh one.
  */
 
 static int
 ng_btsocket_l2cap_node_shutdown(node_p node)
 {
 	int	error = 0;
 
 	NG_NODE_UNREF(node);
 
 	/* Create new node */
 	error = ng_make_node_common(&typestruct, &ng_btsocket_l2cap_node);
 	if (error != 0) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not create Netgraph node, error=%d\n", __func__, error);
 
 		ng_btsocket_l2cap_node = NULL;
 
 		return (error);
 	}
 
 	error = ng_name_node(ng_btsocket_l2cap_node,
 				NG_BTSOCKET_L2CAP_NODE_TYPE);
 	if (error != 0) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not name Netgraph node, error=%d\n", __func__, error);
 
 		NG_NODE_UNREF(ng_btsocket_l2cap_node);
 		ng_btsocket_l2cap_node = NULL;
 
 		return (error);
 	}
 		
 	return (0);
 } /* ng_btsocket_l2cap_node_shutdown */
 
 /*
  * We allow any hook to be connected to the node.
  */
 
 static int
 ng_btsocket_l2cap_node_newhook(node_p node, hook_p hook, char const *name)
 {
 	return (0);
 } /* ng_btsocket_l2cap_node_newhook */
 
 /* 
  * Just say "YEP, that's OK by me!"
  */
 
 static int
 ng_btsocket_l2cap_node_connect(hook_p hook)
 {
 	NG_HOOK_SET_PRIVATE(hook, NULL);
 	NG_HOOK_REF(hook); /* Keep extra reference to the hook */
 
 #if 0
 	NG_HOOK_FORCE_QUEUE(NG_HOOK_PEER(hook));
 	NG_HOOK_FORCE_QUEUE(hook);
 #endif
 
 	return (0);
 } /* ng_btsocket_l2cap_node_connect */
 
 /*
  * Hook disconnection. Schedule route cleanup task
  */
 
 static int
 ng_btsocket_l2cap_node_disconnect(hook_p hook)
 {
 	/*
 	 * If hook has private information than we must have this hook in
 	 * the routing table and must schedule cleaning for the routing table.
 	 * Otherwise hook was connected but we never got "hook_info" message,
 	 * so we have never added this hook to the routing table and it save
 	 * to just delete it.
 	 */
 
 	if (NG_HOOK_PRIVATE(hook) != NULL)
 		return (ng_btsocket_l2cap_wakeup_route_task());
 
 	NG_HOOK_UNREF(hook); /* Remove extra reference */
 
 	return (0);
 } /* ng_btsocket_l2cap_node_disconnect */
 
 /*
  * Process incoming messages 
  */
 
 static int
 ng_btsocket_l2cap_node_rcvmsg(node_p node, item_p item, hook_p hook)
 {
 	struct ng_mesg	*msg = NGI_MSG(item); /* item still has message */
 	int		 error = 0;
 
 	if (msg != NULL && msg->header.typecookie == NGM_L2CAP_COOKIE) {
 		mtx_lock(&ng_btsocket_l2cap_queue_mtx);
 		if (NG_BT_ITEMQ_FULL(&ng_btsocket_l2cap_queue)) {
 			NG_BTSOCKET_L2CAP_ERR(
 "%s: Input queue is full (msg)\n", __func__);
 
 			NG_BT_ITEMQ_DROP(&ng_btsocket_l2cap_queue);
 			NG_FREE_ITEM(item);
 			error = ENOBUFS;
 		} else {
 			if (hook != NULL) {
 				NG_HOOK_REF(hook);
 				NGI_SET_HOOK(item, hook);
 			}
 
 			NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_l2cap_queue, item);
 			error = ng_btsocket_l2cap_wakeup_input_task();
 		}
 		mtx_unlock(&ng_btsocket_l2cap_queue_mtx);
 	} else {
 		NG_FREE_ITEM(item);
 		error = EINVAL;
 	}
 
 	return (error);
 } /* ng_btsocket_l2cap_node_rcvmsg */
 
 /*
  * Receive data on a hook
  */
 
 static int
 ng_btsocket_l2cap_node_rcvdata(hook_p hook, item_p item)
 {
 	int	error = 0;
 
 	mtx_lock(&ng_btsocket_l2cap_queue_mtx);
 	if (NG_BT_ITEMQ_FULL(&ng_btsocket_l2cap_queue)) {
 		NG_BTSOCKET_L2CAP_ERR(
 "%s: Input queue is full (data)\n", __func__);
 
 		NG_BT_ITEMQ_DROP(&ng_btsocket_l2cap_queue);
 		NG_FREE_ITEM(item);
 		error = ENOBUFS;
 	} else {
 		NG_HOOK_REF(hook);
 		NGI_SET_HOOK(item, hook);
 
 		NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_l2cap_queue, item);
 		error = ng_btsocket_l2cap_wakeup_input_task();
 	}
 	mtx_unlock(&ng_btsocket_l2cap_queue_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_node_rcvdata */
 
 /*
  * Process L2CA_Connect respose. Socket layer must have initiated connection,
  * so we have to have a socket associated with message token.
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_con_req_rsp(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_con_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_t	*pcb = NULL;
 	int			 error = 0;
 
 	if (msg->header.arglen != sizeof(*op))
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_con_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* Look for the socket with the token */
 	pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Connect response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, status=%d, " \
 "state=%d\n",	__func__, msg->header.token,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->psm, op->lcid, op->result, op->status,
 		pcb->state);
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_CONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	ng_btsocket_l2cap_untimeout(pcb);
 
 	if (op->result == NG_L2CAP_PENDING) {
 		ng_btsocket_l2cap_timeout(pcb);
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (0);
 	}
 
 	if (op->result == NG_L2CAP_SUCCESS){
 		if((pcb->idtype == NG_L2CAP_L2CA_IDTYPE_ATT)||
 		   (pcb->idtype == NG_L2CAP_L2CA_IDTYPE_SMP)){
 			pcb->encryption = op->encryption;					pcb->cid = op->lcid;	
 			if(pcb->need_encrypt && !(pcb->encryption)){
 				ng_btsocket_l2cap_timeout(pcb);
 				pcb->state = NG_BTSOCKET_L2CAP_W4_ENC_CHANGE;
 			}else{
 				pcb->state = NG_BTSOCKET_L2CAP_OPEN;
 				soisconnected(pcb->so);
 			}
 		}else{
 			/*
 			 * Channel is now open, so update local channel ID and 
 			 * start configuration process. Source and destination
 			 * addresses as well as route must be already set.
 			 */
 			
 			pcb->cid = op->lcid;
 			pcb->encryption = op->encryption;
 			error = ng_btsocket_l2cap_send_l2ca_cfg_req(pcb);
 			if (error != 0) {
 				/* Send disconnect request with "zero" token */
 				ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 				
 				/* ... and close the socket */
 				pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 				soisdisconnected(pcb->so);
 			} else {
 				pcb->cfg_state = NG_BTSOCKET_L2CAP_CFG_IN_SENT;
 				pcb->state = NG_BTSOCKET_L2CAP_CONFIGURING;
 				
 				ng_btsocket_l2cap_timeout(pcb);
 			}
 		}
 	} else {
 		/*
 		 * We have failed to open connection, so convert result
 		 * code to "errno" code and disconnect the socket. Channel
 		 * already has been closed.
 		 */
 
 		pcb->so->so_error = ng_btsocket_l2cap_result2errno(op->result);
 		pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 		soisdisconnected(pcb->so); 
 	}
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_process_l2ca_con_req_rsp */
 
 /*
  * Process L2CA_ConnectRsp response
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_con_rsp_rsp(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_con_rsp_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL;
 
 	if (msg->header.arglen != sizeof(*op)) 
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_con_rsp_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* Look for the socket with the token */
 	pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_ConnectRsp response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d\n",
 		__func__, msg->header.token,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->psm, pcb->cid, op->result, pcb->state);
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_CONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	ng_btsocket_l2cap_untimeout(pcb);
 
 	/* Check the result and disconnect the socket on failure */
 	if (op->result != NG_L2CAP_SUCCESS) {
 		/* Close the socket - channel already closed */
 		pcb->so->so_error = ng_btsocket_l2cap_result2errno(op->result);
 		pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 		soisdisconnected(pcb->so);
 	} else {
 		/* Move to CONFIGURING state and wait for CONFIG_IND */
 		pcb->cfg_state = 0;
 		pcb->state = NG_BTSOCKET_L2CAP_CONFIGURING;
 		ng_btsocket_l2cap_timeout(pcb);
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_process_l2ca_con_rsp_rsp */
 
 /*
  * Process L2CA_Connect indicator. Find socket that listens on address 
  * and PSM. Find exact or closest match. Create new socket and initiate 
  * connection.
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_con_ind(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_con_ind_ip	*ip = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL, *pcb1 = NULL;
 	int				 error = 0;
 	u_int32_t			 token = 0;
 	u_int16_t			 result = 0;
 
 	if (msg->header.arglen != sizeof(*ip))
 		return (EMSGSIZE);
 
 	ip = (ng_l2cap_l2ca_con_ind_ip *)(msg->data);
 
 	NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Connect indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, ident=%d\n",
 		__func__,
 		rt->src.b[5], rt->src.b[4], rt->src.b[3],
 		rt->src.b[2], rt->src.b[1], rt->src.b[0],
 		ip->bdaddr.b[5], ip->bdaddr.b[4], ip->bdaddr.b[3],
 		ip->bdaddr.b[2], ip->bdaddr.b[1], ip->bdaddr.b[0],
 		ip->psm, ip->lcid, ip->ident);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	pcb = ng_btsocket_l2cap_pcb_by_addr(&rt->src, ip->psm);
 	if (pcb != NULL) {
 		struct socket *so1;
 
 		mtx_lock(&pcb->pcb_mtx);
 
 		CURVNET_SET(pcb->so->so_vnet);
 		so1 = sonewconn(pcb->so, 0);
 		CURVNET_RESTORE();
 		if (so1 == NULL) {
 			result = NG_L2CAP_NO_RESOURCES;
 			goto respond;
 		}
 
 		/*
 		 * If we got here than we have created new socket. So complete 
 		 * connection. If we we listening on specific address then copy 
 		 * source address from listening socket, otherwise copy source 
 		 * address from hook's routing information.
 		 */
 
 		pcb1 = so2l2cap_pcb(so1);
 		KASSERT((pcb1 != NULL),
 ("%s: pcb1 == NULL\n", __func__));
 
  		mtx_lock(&pcb1->pcb_mtx);
 
 		if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src)) != 0)
 			bcopy(&pcb->src, &pcb1->src, sizeof(pcb1->src));
 		else
 			bcopy(&rt->src, &pcb1->src, sizeof(pcb1->src));
 
 		pcb1->flags &= ~NG_BTSOCKET_L2CAP_CLIENT;
 
 		bcopy(&ip->bdaddr, &pcb1->dst, sizeof(pcb1->dst));
 		pcb1->psm = ip->psm;
 		pcb1->cid = ip->lcid;
 		pcb1->rt = rt;
 
 		/* Copy socket settings */
 		pcb1->imtu = pcb->imtu;
 		bcopy(&pcb->oflow, &pcb1->oflow, sizeof(pcb1->oflow));
 		pcb1->flush_timo = pcb->flush_timo;
 
 		token = pcb1->token;
 	} else
 		/* Nobody listens on requested BDADDR/PSM */
 		result = NG_L2CAP_PSM_NOT_SUPPORTED;
 
 respond:
 	error = ng_btsocket_l2cap_send_l2ca_con_rsp_req(token, rt,
 							&ip->bdaddr,
 							ip->ident, ip->lcid,
 							result,ip->linktype);
 	if (pcb1 != NULL) {
 		if (error != 0) {
 			pcb1->so->so_error = error;
 			pcb1->state = NG_BTSOCKET_L2CAP_CLOSED;
 			soisdisconnected(pcb1->so);
 		} else {
 			pcb1->state = NG_BTSOCKET_L2CAP_CONNECTING;
 			soisconnecting(pcb1->so);
 
 			ng_btsocket_l2cap_timeout(pcb1);
 		}
 
 		mtx_unlock(&pcb1->pcb_mtx);
 	}
 
 	if (pcb != NULL)
 		mtx_unlock(&pcb->pcb_mtx);
 
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_process_l2ca_con_ind */
 /*Encryption Change*/
 static int ng_btsocket_l2cap_process_l2ca_enc_change(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_enc_chg_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL;
 
 	if (msg->header.arglen != sizeof(*op))
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_enc_chg_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, op->lcid,
 					   op->idtype);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 	pcb->encryption = op->result;
 
 	if(pcb->need_encrypt){
 		ng_btsocket_l2cap_untimeout(pcb);		
 		if(pcb->state != NG_BTSOCKET_L2CAP_W4_ENC_CHANGE){
 			NG_BTSOCKET_L2CAP_WARN("%s: Invalid pcb status %d",
 					       __func__, pcb->state);
 		}else if(pcb->encryption){
 			pcb->state = NG_BTSOCKET_L2CAP_OPEN;
 			soisconnected(pcb->so);
 		}else{
 			pcb->so->so_error = EPERM;
 			ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 			pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 			soisdisconnected(pcb->so);
 		}
 	}
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return 0;
 }
 /*
  * Process L2CA_Config response
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_cfg_req_rsp(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_cfg_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_p	 pcb = NULL;
 
 	if (msg->header.arglen != sizeof(*op))
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_cfg_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* 
 	 * Socket must have issued a Configure request, so we must have a 
 	 * socket that wants to be configured. Use Netgraph message token 
 	 * to find it
 	 */
 
 	pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token);
 	if (pcb == NULL) {
 		/*
 		 * XXX FIXME what to do here? We could not find a
 		 * socket with requested token. We even can not send
 		 * Disconnect, because we do not know channel ID
 		 */
 
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
         NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Config response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d, " \
 "cfg_state=%x\n",
 		__func__, msg->header.token,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->psm, pcb->cid, op->result, pcb->state, pcb->cfg_state);
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_CONFIGURING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	if (op->result == NG_L2CAP_SUCCESS) {
 		/*
 		 * XXX FIXME Actually set flush and link timeout.
 		 * Set QoS here if required. Resolve conficts (flush_timo). 
 		 * Save incoming MTU (peer's outgoing MTU) and outgoing flow 
 		 * spec.
 		 */
 
 		pcb->imtu = op->imtu;
 		bcopy(&op->oflow, &pcb->oflow, sizeof(pcb->oflow));
 		pcb->flush_timo = op->flush_timo;
 
 		/*
 		 * We have configured incoming side, so record it and check 
 		 * if configuration is complete. If complete then mark socket
 		 * as connected, otherwise wait for the peer.
 		 */
 
 		pcb->cfg_state &= ~NG_BTSOCKET_L2CAP_CFG_IN_SENT;
 		pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_IN;
 
 		if (pcb->cfg_state == NG_BTSOCKET_L2CAP_CFG_BOTH) {
 			/* Configuration complete - mark socket as open */
 			ng_btsocket_l2cap_untimeout(pcb);
 			pcb->state = NG_BTSOCKET_L2CAP_OPEN;
 			soisconnected(pcb->so); 
 		} 
 	} else {
 		/*
 		 * Something went wrong. Could be unacceptable parameters,
 		 * reject or unknown option. That's too bad, but we will
 		 * not negotiate. Send Disconnect and close the channel.
 		 */
 
 		ng_btsocket_l2cap_untimeout(pcb);
 
 		switch (op->result) {
 		case NG_L2CAP_UNACCEPTABLE_PARAMS:
 		case NG_L2CAP_UNKNOWN_OPTION:
 			pcb->so->so_error = EINVAL;
 			break;
 
 		default:
 			pcb->so->so_error = ECONNRESET;
 			break;
 		}
 
 		/* Send disconnect with "zero" token */
 		ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 
 		/* ... and close the socket */
 		pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 		soisdisconnected(pcb->so);
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_l2cap_process_l2ca_cfg_req_rsp */
 
 /*
  * Process L2CA_ConfigRsp response
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_cfg_rsp_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL;
 	int				 error = 0;
 
 	if (msg->header.arglen != sizeof(*op))
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_cfg_rsp_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* Look for the socket with the token */
 	pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
         NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_ConfigRsp response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d, " \
 "cfg_state=%x\n",
 		__func__, msg->header.token,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->psm, pcb->cid, op->result, pcb->state, pcb->cfg_state);
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_CONFIGURING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	/* Check the result and disconnect socket of failure */
 	if (op->result != NG_L2CAP_SUCCESS)
 		goto disconnect;
 
 	/*
 	 * Now we done with remote side configuration. Configure local 
 	 * side if we have not done it yet.
 	 */
 
 	pcb->cfg_state &= ~NG_BTSOCKET_L2CAP_CFG_OUT_SENT;
 	pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_OUT;
 
 	if (pcb->cfg_state == NG_BTSOCKET_L2CAP_CFG_BOTH) {
 		/* Configuration complete - mask socket as open */
 		ng_btsocket_l2cap_untimeout(pcb);
 		pcb->state = NG_BTSOCKET_L2CAP_OPEN;
 		soisconnected(pcb->so);
 	} else {
 		if (!(pcb->cfg_state & NG_BTSOCKET_L2CAP_CFG_IN_SENT)) {
 			/* Send L2CA_Config request - incoming path */
 			error = ng_btsocket_l2cap_send_l2ca_cfg_req(pcb);
 			if (error != 0)
 				goto disconnect;
 
 			pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_IN_SENT;
 		}
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (error);
 
 disconnect:
 	ng_btsocket_l2cap_untimeout(pcb);
 
 	/* Send disconnect with "zero" token */
 	ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 
 	/* ... and close the socket */
 	pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 	soisdisconnected(pcb->so);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp */
 
 /*
  * Process L2CA_Config indicator
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_cfg_ind(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_cfg_ind_ip	*ip = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL;
 	int				 error = 0;
 
 	if (msg->header.arglen != sizeof(*ip))
 		return (EMSGSIZE);
 
 	ip = (ng_l2cap_l2ca_cfg_ind_ip *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* Check for the open socket that has given channel ID */
 	pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, ip->lcid,
 					   NG_L2CAP_L2CA_IDTYPE_BREDR);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
         NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Config indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, state=%d, cfg_state=%x\n",
 		__func__,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->psm, pcb->cid, pcb->state, pcb->cfg_state);
 
 	/* XXX FIXME re-configuration on open socket */
  	if (pcb->state != NG_BTSOCKET_L2CAP_CONFIGURING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	/*
 	 * XXX FIXME Actually set flush and link timeout. Set QoS here if
 	 * required. Resolve conficts (flush_timo). Note outgoing MTU (peer's 
 	 * incoming MTU) and incoming flow spec.
 	 */
 
 	pcb->omtu = ip->omtu;
 	bcopy(&ip->iflow, &pcb->iflow, sizeof(pcb->iflow));
 	pcb->flush_timo = ip->flush_timo;
 
 	/*
 	 * Send L2CA_Config response to our peer and check for the errors, 
 	 * if any send disconnect to close the channel. 
 	 */
 
 	if (!(pcb->cfg_state & NG_BTSOCKET_L2CAP_CFG_OUT_SENT)) {
 		error = ng_btsocket_l2cap_send_l2ca_cfg_rsp(pcb);
 		if (error != 0) {
 			ng_btsocket_l2cap_untimeout(pcb);
 
 			pcb->so->so_error = error;
 
 			/* Send disconnect with "zero" token */
 			ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 
 			/* ... and close the socket */
 			pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 			soisdisconnected(pcb->so);
 		} else
 			pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_OUT_SENT;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_process_l2cap_cfg_ind */
 
 /*
  * Process L2CA_Disconnect response
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_discon_rsp(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_discon_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_t	*pcb = NULL;
 
 	/* Check message */
 	if (msg->header.arglen != sizeof(*op))
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_discon_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/*
 	 * Socket layer must have issued L2CA_Disconnect request, so there 
 	 * must be a socket that wants to be disconnected. Use Netgraph 
 	 * message token to find it.
 	 */
 
 	pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (0);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* XXX Close socket no matter what op->result says */
 	if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED) {
        		NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Disconnect response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d\n",
 			__func__, msg->header.token,
 			pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 			pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 			pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 			pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 			pcb->psm, pcb->cid, op->result, pcb->state);
 
 		ng_btsocket_l2cap_untimeout(pcb);
 
 		pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 		soisdisconnected(pcb->so);
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_l2cap_process_l2ca_discon_rsp */
 
 /*
  * Process L2CA_Disconnect indicator
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_discon_ind(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_discon_ind_ip	*ip = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL;
 
 	/* Check message */
 	if (msg->header.arglen != sizeof(*ip))
 		return (EMSGSIZE);
 
 	ip = (ng_l2cap_l2ca_discon_ind_ip *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* Look for the socket with given channel ID */
 	pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, ip->lcid,
 					   ip->idtype);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (0);
 	}
 
 	/*
 	 * Channel has already been destroyed, so disconnect the socket 
 	 * and be done with it. If there was any pending request we can
 	 * not do anything here anyway.
 	 */
 
 	mtx_lock(&pcb->pcb_mtx);
 
        	NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Disconnect indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, state=%d\n",
 		__func__,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->psm, pcb->cid, pcb->state);
 
 	if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO)
 		ng_btsocket_l2cap_untimeout(pcb);
 
 	pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 	soisdisconnected(pcb->so);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_l2cap_process_l2ca_discon_ind */
 
 /*
  * Process L2CA_Write response
  */
 
 static int 
 ng_btsocket_l2cap_process_l2ca_write_rsp(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_write_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_t	*pcb = NULL;
 
 	/* Check message */
 	if (msg->header.arglen != sizeof(*op))
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_write_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* Look for the socket with given token */
 	pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
        	NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Write response, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, length=%d, " \
 "state=%d\n",		__func__,
 			pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 			pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 			pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 			pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 			pcb->psm, pcb->cid, op->result, op->length,
 			pcb->state);
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_OPEN) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	ng_btsocket_l2cap_untimeout(pcb);
 
 	/*
  	 * Check if we have more data to send
  	 */
 	sbdroprecord(&pcb->so->so_snd);
 	if (sbavail(&pcb->so->so_snd) > 0) {
 		if (ng_btsocket_l2cap_send2(pcb) == 0)
 			ng_btsocket_l2cap_timeout(pcb);
 		else
 			sbdroprecord(&pcb->so->so_snd); /* XXX */
 	}
 
 	/*
 	 * Now set the result, drop packet from the socket send queue and 
 	 * ask for more (wakeup sender)
 	 */
 
 	pcb->so->so_error = ng_btsocket_l2cap_result2errno(op->result);
 	sowwakeup(pcb->so);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_l2cap_process_l2ca_write_rsp */
 
 /*
  * Send L2CA_Connect request
  */
 
 static int
 ng_btsocket_l2cap_send_l2ca_con_req(ng_btsocket_l2cap_pcb_p pcb)
 {
 	struct ng_mesg		*msg = NULL;
 	ng_l2cap_l2ca_con_ip	*ip = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->rt == NULL || 
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CON,
 		sizeof(*ip), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	msg->header.token = pcb->token;
 
 	ip = (ng_l2cap_l2ca_con_ip *)(msg->data);
 	bcopy(&pcb->dst, &ip->bdaddr, sizeof(ip->bdaddr));
 	ip->psm = pcb->psm;
 	ip->linktype = ng_btsock_l2cap_addrtype_to_linktype(pcb->dsttype);
 	ip->idtype = pcb->idtype;
 	NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg,pcb->rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_l2cap_send_l2ca_con_req */
 
 /*
  * Send L2CA_Connect response
  */
 
 static int
 ng_btsocket_l2cap_send_l2ca_con_rsp_req(u_int32_t token,
 		ng_btsocket_l2cap_rtentry_p rt, bdaddr_p dst, int ident, 
 					int lcid, int result, int linktype)
 {
 	struct ng_mesg			*msg = NULL;
 	ng_l2cap_l2ca_con_rsp_ip	*ip = NULL;
 	int				 error = 0;
 
 	if (rt == NULL || rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CON_RSP,
 		sizeof(*ip), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	msg->header.token = token;
 
 	ip = (ng_l2cap_l2ca_con_rsp_ip *)(msg->data);
 	bcopy(dst, &ip->bdaddr, sizeof(ip->bdaddr));
 	ip->ident = ident;
 	ip->lcid = lcid;
 	ip->linktype = linktype;
 	ip->result = result;
 	ip->status = 0;
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg, rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_l2cap_send_l2ca_con_rsp_req */
 
 /*
  * Send L2CA_Config request
  */
 
 static int
 ng_btsocket_l2cap_send_l2ca_cfg_req(ng_btsocket_l2cap_pcb_p pcb)
 {
 	struct ng_mesg		*msg = NULL;
 	ng_l2cap_l2ca_cfg_ip	*ip = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->rt == NULL || 
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CFG,
 		sizeof(*ip), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	msg->header.token = pcb->token;
 
 	ip = (ng_l2cap_l2ca_cfg_ip *)(msg->data);
 	ip->lcid = pcb->cid;
 	ip->imtu = pcb->imtu;
 	bcopy(&pcb->oflow, &ip->oflow, sizeof(ip->oflow));
 	ip->flush_timo = pcb->flush_timo;
 	ip->link_timo = pcb->link_timo;
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg,pcb->rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_l2cap_send_l2ca_cfg_req */
 
 /*
  * Send L2CA_Config response
  */
 
 static int
 ng_btsocket_l2cap_send_l2ca_cfg_rsp(ng_btsocket_l2cap_pcb_p pcb)
 {
 	struct ng_mesg			*msg = NULL;
 	ng_l2cap_l2ca_cfg_rsp_ip	*ip = NULL;
 	int				 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->rt == NULL || 
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CFG_RSP,
 		sizeof(*ip), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	msg->header.token = pcb->token;
 
 	ip = (ng_l2cap_l2ca_cfg_rsp_ip *)(msg->data);
 	ip->lcid = pcb->cid;
 	ip->omtu = pcb->omtu;
 	bcopy(&pcb->iflow, &ip->iflow, sizeof(ip->iflow));
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg, pcb->rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_l2cap_send_l2ca_cfg_rsp */
 
 /*
  * Send L2CA_Disconnect request
  */
 
 static int
 ng_btsocket_l2cap_send_l2ca_discon_req(u_int32_t token,
 		ng_btsocket_l2cap_pcb_p pcb)
 {
 	struct ng_mesg		*msg = NULL;
 	ng_l2cap_l2ca_discon_ip	*ip = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->rt == NULL || 
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_DISCON,
 		sizeof(*ip), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	msg->header.token = token;
 
 	ip = (ng_l2cap_l2ca_discon_ip *)(msg->data);
 	ip->lcid = pcb->cid;
 	ip->idtype = pcb->idtype;
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg,pcb->rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_l2cap_send_l2ca_discon_req */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Socket interface
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * L2CAP sockets data input routine
  */
 
 static void
 ng_btsocket_l2cap_data_input(struct mbuf *m, hook_p hook)
 {
 	ng_l2cap_hdr_t			*hdr = NULL;
 	ng_l2cap_clt_hdr_t		*clt_hdr = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL;
 	ng_btsocket_l2cap_rtentry_t	*rt = NULL;
 	uint16_t idtype;
 
 	if (hook == NULL) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Invalid source hook for L2CAP data packet\n", __func__);
 		goto drop;
 	}
 
 	rt = (ng_btsocket_l2cap_rtentry_t *) NG_HOOK_PRIVATE(hook);
 	if (rt == NULL) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not find out source bdaddr for L2CAP data packet\n", __func__);
 		goto drop;
 	}
 
 	m = m_pullup(m, sizeof(uint16_t));
 	idtype = *mtod(m, uint16_t *);
 	m_adj(m, sizeof(uint16_t));
 
 	/* Make sure we can access header */
 	if (m->m_pkthdr.len < sizeof(*hdr)) {
 		NG_BTSOCKET_L2CAP_ERR(
 "%s: L2CAP data packet too small, len=%d\n", __func__, m->m_pkthdr.len);
 		goto drop;
 	}
 
 	if (m->m_len < sizeof(*hdr)) { 
 		m = m_pullup(m, sizeof(*hdr));
 		if (m == NULL)
 			goto drop;
 	}
 
 	/* Strip L2CAP packet header and verify packet length */
 	hdr = mtod(m, ng_l2cap_hdr_t *);
 	m_adj(m, sizeof(*hdr));
 
 	if (hdr->length != m->m_pkthdr.len) {
 		NG_BTSOCKET_L2CAP_ERR(
 "%s: Bad L2CAP data packet length, len=%d, length=%d\n",
 			__func__, m->m_pkthdr.len, hdr->length);
 		goto drop;
 	}
 
 	/*
 	 * Now process packet. Two cases:
 	 *
 	 * 1) Normal packet (cid != 2) then find connected socket and append
 	 *    mbuf to the socket queue. Wakeup socket.
 	 *
 	 * 2) Broadcast packet (cid == 2) then find all sockets that connected
 	 *    to the given PSM and have SO_BROADCAST bit set and append mbuf
 	 *    to the socket queue. Wakeup socket.
 	 */
 
 	NG_BTSOCKET_L2CAP_INFO(
 "%s: Received L2CAP data packet: src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dcid=%d, length=%d\n",
 		__func__, 
 		rt->src.b[5], rt->src.b[4], rt->src.b[3],
 		rt->src.b[2], rt->src.b[1], rt->src.b[0],
 		hdr->dcid, hdr->length);
 
 	if ((hdr->dcid >= NG_L2CAP_FIRST_CID) ||
 	    (idtype == NG_L2CAP_L2CA_IDTYPE_ATT)||
 	    (idtype == NG_L2CAP_L2CA_IDTYPE_SMP)
 	    ){
 		mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 		/* Normal packet: find connected socket */
 		pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, hdr->dcid,idtype);
 		if (pcb == NULL) {
 			mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 			goto drop;
 		}
 
 		mtx_lock(&pcb->pcb_mtx);
 
 		if (pcb->state != NG_BTSOCKET_L2CAP_OPEN) {
 			NG_BTSOCKET_L2CAP_ERR(
 "%s: No connected socket found, src bdaddr=%x:%x:%x:%x:%x:%x, dcid=%d, " \
 "state=%d\n",			__func__,
 				rt->src.b[5], rt->src.b[4], rt->src.b[3],
 				rt->src.b[2], rt->src.b[1], rt->src.b[0],
 				hdr->dcid, pcb->state);
 
 			mtx_unlock(&pcb->pcb_mtx);
 			mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 			goto drop;
 		}
 
 		/* Check packet size against socket's incoming MTU */
 		if (hdr->length > pcb->imtu) {
 			NG_BTSOCKET_L2CAP_ERR(
 "%s: L2CAP data packet too big, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dcid=%d, length=%d, imtu=%d\n",
 				__func__, 
 				rt->src.b[5], rt->src.b[4], rt->src.b[3],
 				rt->src.b[2], rt->src.b[1], rt->src.b[0],
 				hdr->dcid, hdr->length, pcb->imtu);
 
 			mtx_unlock(&pcb->pcb_mtx);
 			mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 			goto drop;
 		}
 
 		/* Check if we have enough space in socket receive queue */
 		if (m->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) {
 			/* 
 			 * This is really bad. Receive queue on socket does
 			 * not have enough space for the packet. We do not 
 			 * have any other choice but drop the packet. L2CAP 
 			 * does not provide any flow control.
 			 */
 
 			NG_BTSOCKET_L2CAP_ERR(
 "%s: Not enough space in socket receive queue. Dropping L2CAP data packet, " \
 "src bdaddr=%x:%x:%x:%x:%x:%x, dcid=%d, len=%d, space=%ld\n",
 				__func__,
 				rt->src.b[5], rt->src.b[4], rt->src.b[3],
 				rt->src.b[2], rt->src.b[1], rt->src.b[0],
 				hdr->dcid, m->m_pkthdr.len,
 				sbspace(&pcb->so->so_rcv));
 
 			mtx_unlock(&pcb->pcb_mtx);
 			mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 			goto drop;
 		}
 
 		/* Append packet to the socket receive queue and wakeup */
 		sbappendrecord(&pcb->so->so_rcv, m);
 		m = NULL;
 
 		sorwakeup(pcb->so);
 
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 	} else if (hdr->dcid == NG_L2CAP_CLT_CID) {
 		/* Broadcast packet: give packet to all sockets  */
 
 		/* Check packet size against connectionless MTU */
 		if (hdr->length > NG_L2CAP_MTU_DEFAULT) {
 			NG_BTSOCKET_L2CAP_ERR(
 "%s: Connectionless L2CAP data packet too big, " \
 "src bdaddr=%x:%x:%x:%x:%x:%x, length=%d\n",
 				__func__,
 				rt->src.b[5], rt->src.b[4], rt->src.b[3],
 				rt->src.b[2], rt->src.b[1], rt->src.b[0],
 				hdr->length);
 			goto drop;
 		}
 
 		/* Make sure we can access connectionless header */
 		if (m->m_pkthdr.len < sizeof(*clt_hdr)) {
 			NG_BTSOCKET_L2CAP_ERR(
 "%s: Can not get L2CAP connectionless packet header, " \
 "src bdaddr=%x:%x:%x:%x:%x:%x, length=%d\n",
 				__func__,
 				rt->src.b[5], rt->src.b[4], rt->src.b[3],
 				rt->src.b[2], rt->src.b[1], rt->src.b[0],
 				hdr->length);
 			goto drop;
 		}
 
 		if (m->m_len < sizeof(*clt_hdr)) {
 			m = m_pullup(m, sizeof(*clt_hdr));
 			if (m == NULL)
 				goto drop;
 		}
 
 		/* Strip connectionless header and deliver packet */
 		clt_hdr = mtod(m, ng_l2cap_clt_hdr_t *);
 		m_adj(m, sizeof(*clt_hdr));
 
 		NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CAP connectionless data packet, " \
 "src bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, length=%d\n",
 			__func__,
 			rt->src.b[5], rt->src.b[4], rt->src.b[3],
 			rt->src.b[2], rt->src.b[1], rt->src.b[0],
 			clt_hdr->psm, hdr->length);
 
 		mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 		LIST_FOREACH(pcb, &ng_btsocket_l2cap_sockets, next) {
 			struct mbuf	*copy = NULL;
 
 			mtx_lock(&pcb->pcb_mtx);
 
 			if (bcmp(&rt->src, &pcb->src, sizeof(pcb->src)) != 0 ||
 			    pcb->psm != clt_hdr->psm || 
 			    pcb->state != NG_BTSOCKET_L2CAP_OPEN || 
 			    (pcb->so->so_options & SO_BROADCAST) == 0 || 
 			    m->m_pkthdr.len > sbspace(&pcb->so->so_rcv))
 				goto next;
 
 			/*
 			 * Create a copy of the packet and append it to the 
 			 * socket's queue. If m_dup() failed - no big deal
 			 * it is a broadcast traffic after all
 			 */
 
 			copy = m_dup(m, M_NOWAIT);
 			if (copy != NULL) {
 				sbappendrecord(&pcb->so->so_rcv, copy);
 				sorwakeup(pcb->so);
 			}
 next:
 			mtx_unlock(&pcb->pcb_mtx);
 		}
 
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 	}
 drop:
 	NG_FREE_M(m); /* checks for m != NULL */
 } /* ng_btsocket_l2cap_data_input */
 
 /*
  * L2CAP sockets default message input routine
  */
 
 static void
 ng_btsocket_l2cap_default_msg_input(struct ng_mesg *msg, hook_p hook)
 {
 	switch (msg->header.cmd) {
 	case NGM_L2CAP_NODE_HOOK_INFO: {
 		ng_btsocket_l2cap_rtentry_t	*rt = NULL;
 		ng_l2cap_node_hook_info_ep *ep =
 		  (ng_l2cap_node_hook_info_ep *)msg->data;
 		if (hook == NULL || msg->header.arglen != sizeof(*ep))
 			break;
 
 		if (bcmp(&ep->addr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 			break;
 
 		mtx_lock(&ng_btsocket_l2cap_rt_mtx);
 
 		rt = (ng_btsocket_l2cap_rtentry_t *) NG_HOOK_PRIVATE(hook);
 		if (rt == NULL) {
 			rt = malloc(sizeof(*rt),
 				M_NETGRAPH_BTSOCKET_L2CAP, M_NOWAIT|M_ZERO);
 			if (rt == NULL) {
 				mtx_unlock(&ng_btsocket_l2cap_rt_mtx);
 				break;
 			}
 
 			LIST_INSERT_HEAD(&ng_btsocket_l2cap_rt, rt, next);
 
 			NG_HOOK_SET_PRIVATE(hook, rt);
 		}
 
 		bcopy(&ep->addr, &rt->src, sizeof(rt->src));
 		rt->hook = hook;
 
 		mtx_unlock(&ng_btsocket_l2cap_rt_mtx);
 
 		NG_BTSOCKET_L2CAP_INFO(
 "%s: Updating hook \"%s\", src bdaddr=%x:%x:%x:%x:%x:%x\n",
 			__func__, NG_HOOK_NAME(hook), 
 			rt->src.b[5], rt->src.b[4], rt->src.b[3], 
 			rt->src.b[2], rt->src.b[1], rt->src.b[0]);
 		} break;
 
 	default:
 		NG_BTSOCKET_L2CAP_WARN(
 "%s: Unknown message, cmd=%d\n", __func__, msg->header.cmd);
 		break;
 	}
 
 	NG_FREE_MSG(msg); /* Checks for msg != NULL */
 } /* ng_btsocket_l2cap_default_msg_input */
 
 /*
  * L2CAP sockets L2CA message input routine
  */
 
 static void
 ng_btsocket_l2cap_l2ca_msg_input(struct ng_mesg *msg, hook_p hook)
 {
 	ng_btsocket_l2cap_rtentry_p	rt = NULL;
 
 	if (hook == NULL) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Invalid source hook for L2CA message\n", __func__);
 		goto drop;
 	}
 
 	rt = (ng_btsocket_l2cap_rtentry_p) NG_HOOK_PRIVATE(hook);
 	if (rt == NULL) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not find out source bdaddr for L2CA message\n", __func__);
 		goto drop;
 	}
 
 	switch (msg->header.cmd) {
 	case NGM_L2CAP_L2CA_CON: /* L2CA_Connect response */
 		ng_btsocket_l2cap_process_l2ca_con_req_rsp(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_CON_RSP: /* L2CA_ConnectRsp response */
 		ng_btsocket_l2cap_process_l2ca_con_rsp_rsp(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_CON_IND: /* L2CA_Connect indicator */
 		ng_btsocket_l2cap_process_l2ca_con_ind(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_CFG: /* L2CA_Config response */
 		ng_btsocket_l2cap_process_l2ca_cfg_req_rsp(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_CFG_RSP: /* L2CA_ConfigRsp response */
 		ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_CFG_IND: /* L2CA_Config indicator */
 		ng_btsocket_l2cap_process_l2ca_cfg_ind(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_DISCON: /* L2CA_Disconnect response */
 		ng_btsocket_l2cap_process_l2ca_discon_rsp(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_DISCON_IND: /* L2CA_Disconnect indicator */
 		ng_btsocket_l2cap_process_l2ca_discon_ind(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_WRITE: /* L2CA_Write response */
 		ng_btsocket_l2cap_process_l2ca_write_rsp(msg, rt);
 		break;
 	case NGM_L2CAP_L2CA_ENC_CHANGE:
 		ng_btsocket_l2cap_process_l2ca_enc_change(msg, rt);
 
 		break;
 	/* XXX FIXME add other L2CA messages */
 
 	default:
 		NG_BTSOCKET_L2CAP_WARN(
 "%s: Unknown L2CA message, cmd=%d\n", __func__, msg->header.cmd);
 		break;
 	}
 drop:
 	NG_FREE_MSG(msg);
 } /* ng_btsocket_l2cap_l2ca_msg_input */
 
 /*
  * L2CAP sockets input routine
  */
 
 static void
 ng_btsocket_l2cap_input(void *context, int pending)
 {
 	item_p	item = NULL;
 	hook_p	hook = NULL;
 
 	for (;;) {
 		mtx_lock(&ng_btsocket_l2cap_queue_mtx);
 		NG_BT_ITEMQ_DEQUEUE(&ng_btsocket_l2cap_queue, item);
 		mtx_unlock(&ng_btsocket_l2cap_queue_mtx);
 
 		if (item == NULL)
 			break;
 
 		NGI_GET_HOOK(item, hook);
 		if (hook != NULL && NG_HOOK_NOT_VALID(hook))
 			goto drop;
 
 		switch(item->el_flags & NGQF_TYPE) {
 		case NGQF_DATA: {
 			struct mbuf     *m = NULL;
 
 			NGI_GET_M(item, m);
 			ng_btsocket_l2cap_data_input(m, hook);
 			} break;
 
 		case NGQF_MESG: {
 			struct ng_mesg  *msg = NULL;
 
 			NGI_GET_MSG(item, msg);
 
 			switch (msg->header.cmd) {
 			case NGM_L2CAP_L2CA_CON:
 			case NGM_L2CAP_L2CA_CON_RSP:
 			case NGM_L2CAP_L2CA_CON_IND:
 			case NGM_L2CAP_L2CA_CFG:
 			case NGM_L2CAP_L2CA_CFG_RSP:
 			case NGM_L2CAP_L2CA_CFG_IND: 
 			case NGM_L2CAP_L2CA_DISCON:
 			case NGM_L2CAP_L2CA_DISCON_IND:
 			case NGM_L2CAP_L2CA_WRITE:
 			case NGM_L2CAP_L2CA_ENC_CHANGE:
 			/* XXX FIXME add other L2CA messages */
 				ng_btsocket_l2cap_l2ca_msg_input(msg, hook);
 				break;
 
 			default:
 				ng_btsocket_l2cap_default_msg_input(msg, hook);
 				break;
 			}
 			} break;
 
 		default:
 			KASSERT(0,
 ("%s: invalid item type=%ld\n", __func__, (item->el_flags & NGQF_TYPE)));
 			break;
 		}
 drop:
 		if (hook != NULL)
 			NG_HOOK_UNREF(hook);
 
 		NG_FREE_ITEM(item);
 	}
 } /* ng_btsocket_l2cap_input */
 
 /*
  * Route cleanup task. Gets scheduled when hook is disconnected. Here we 
  * will find all sockets that use "invalid" hook and disconnect them.
  */
 
 static void
 ng_btsocket_l2cap_rtclean(void *context, int pending)
 {
 	ng_btsocket_l2cap_pcb_p		pcb = NULL, pcb_next = NULL;
 	ng_btsocket_l2cap_rtentry_p	rt = NULL;
 
 	mtx_lock(&ng_btsocket_l2cap_rt_mtx);
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/*
 	 * First disconnect all sockets that use "invalid" hook
 	 */
 
 	for (pcb = LIST_FIRST(&ng_btsocket_l2cap_sockets); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, next);
 
 		if (pcb->rt != NULL &&
 		    pcb->rt->hook != NULL && NG_HOOK_NOT_VALID(pcb->rt->hook)) {
 			if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO)
 				ng_btsocket_l2cap_untimeout(pcb);
 
 			pcb->so->so_error = ENETDOWN;
 			pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 			soisdisconnected(pcb->so);
 
 			pcb->token = 0;
 			pcb->cid = 0;
 			pcb->rt = NULL;
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 
 	/*
 	 * Now cleanup routing table
 	 */
 
 	for (rt = LIST_FIRST(&ng_btsocket_l2cap_rt); rt != NULL; ) {
 		ng_btsocket_l2cap_rtentry_p	rt_next = LIST_NEXT(rt, next);
 
 		if (rt->hook != NULL && NG_HOOK_NOT_VALID(rt->hook)) {
 			LIST_REMOVE(rt, next);
 
 			NG_HOOK_SET_PRIVATE(rt->hook, NULL);
 			NG_HOOK_UNREF(rt->hook); /* Remove extra reference */
 
 			bzero(rt, sizeof(*rt));
 			free(rt, M_NETGRAPH_BTSOCKET_L2CAP);
 		}
 
 		rt = rt_next;
 	}
 
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_rt_mtx);
 } /* ng_btsocket_l2cap_rtclean */
 
 /*
  * Initialize everything
  */
 
 void
 ng_btsocket_l2cap_init(void)
 {
 	int	error = 0;
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	ng_btsocket_l2cap_node = NULL;
 	ng_btsocket_l2cap_debug_level = NG_BTSOCKET_WARN_LEVEL;
 
 	/* Register Netgraph node type */
 	error = ng_newtype(&typestruct);
 	if (error != 0) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not register Netgraph node type, error=%d\n", __func__, error);
 
                 return;
 	}
 
 	/* Create Netgrapg node */
 	error = ng_make_node_common(&typestruct, &ng_btsocket_l2cap_node);
 	if (error != 0) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not create Netgraph node, error=%d\n", __func__, error);
 
 		ng_btsocket_l2cap_node = NULL;
 
 		return;
 	}
 
 	error = ng_name_node(ng_btsocket_l2cap_node,
 				NG_BTSOCKET_L2CAP_NODE_TYPE);
 	if (error != 0) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not name Netgraph node, error=%d\n", __func__, error);
 
 		NG_NODE_UNREF(ng_btsocket_l2cap_node);
 		ng_btsocket_l2cap_node = NULL;
 
 		return;
 	}
 
 	/* Create input queue */
 	NG_BT_ITEMQ_INIT(&ng_btsocket_l2cap_queue, ifqmaxlen);
 	mtx_init(&ng_btsocket_l2cap_queue_mtx,
 		"btsocks_l2cap_queue_mtx", NULL, MTX_DEF);
 	TASK_INIT(&ng_btsocket_l2cap_queue_task, 0,
 		ng_btsocket_l2cap_input, NULL);
 
 	/* Create list of sockets */
 	LIST_INIT(&ng_btsocket_l2cap_sockets);
 	mtx_init(&ng_btsocket_l2cap_sockets_mtx,
 		"btsocks_l2cap_sockets_mtx", NULL, MTX_DEF);
 
 	/* Routing table */
 	LIST_INIT(&ng_btsocket_l2cap_rt);
 	mtx_init(&ng_btsocket_l2cap_rt_mtx,
 		"btsocks_l2cap_rt_mtx", NULL, MTX_DEF);
 	TASK_INIT(&ng_btsocket_l2cap_rt_task, 0,
 		ng_btsocket_l2cap_rtclean, NULL);
 } /* ng_btsocket_l2cap_init */
 
 /*
  * Abort connection on socket
  */
 
 void
 ng_btsocket_l2cap_abort(struct socket *so)
 {
 	so->so_error = ECONNABORTED;
 
 	(void)ng_btsocket_l2cap_disconnect(so);
 } /* ng_btsocket_l2cap_abort */
 
 void
 ng_btsocket_l2cap_close(struct socket *so)
 {
 
 	(void)ng_btsocket_l2cap_disconnect(so);
 } /* ng_btsocket_l2cap_close */
 
 /*
  * Accept connection on socket. Nothing to do here, socket must be connected
  * and ready, so just return peer address and be done with it.
  */
 
 int
 ng_btsocket_l2cap_accept(struct socket *so, struct sockaddr **nam)
 {
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 
 	return (ng_btsocket_l2cap_peeraddr(so, nam));
 } /* ng_btsocket_l2cap_accept */
 
 /*
  * Create and attach new socket
  */
 
 int
 ng_btsocket_l2cap_attach(struct socket *so, int proto, struct thread *td)
 {
 	static u_int32_t	token = 0;
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 	int			error;
 
 	/* Check socket and protocol */
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EPROTONOSUPPORT);
 	if (so->so_type != SOCK_SEQPACKET)
 		return (ESOCKTNOSUPPORT);
 
 #if 0 /* XXX sonewconn() calls "pru_attach" with proto == 0 */
 	if (proto != 0) 
 		if (proto != BLUETOOTH_PROTO_L2CAP)
 			return (EPROTONOSUPPORT);
 #endif /* XXX */
 
 	if (pcb != NULL)
 		return (EISCONN);
 
 	/* Reserve send and receive space if it is not reserved yet */
 	if ((so->so_snd.sb_hiwat == 0) || (so->so_rcv.sb_hiwat == 0)) {
 		error = soreserve(so, NG_BTSOCKET_L2CAP_SENDSPACE,
 					NG_BTSOCKET_L2CAP_RECVSPACE);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Allocate the PCB */
         pcb = malloc(sizeof(*pcb),
 		M_NETGRAPH_BTSOCKET_L2CAP, M_NOWAIT | M_ZERO);
         if (pcb == NULL)
                 return (ENOMEM);
 
 	/* Link the PCB and the socket */
 	so->so_pcb = (caddr_t) pcb;
 	pcb->so = so;
 	pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 
 	/* Initialize PCB */
 	pcb->imtu = pcb->omtu = NG_L2CAP_MTU_DEFAULT;
 
 	/* Default flow */
 	pcb->iflow.flags = 0x0;
 	pcb->iflow.service_type = NG_HCI_SERVICE_TYPE_BEST_EFFORT;
 	pcb->iflow.token_rate = 0xffffffff; /* maximum */
 	pcb->iflow.token_bucket_size = 0xffffffff; /* maximum */
 	pcb->iflow.peak_bandwidth = 0x00000000; /* maximum */
 	pcb->iflow.latency = 0xffffffff; /* don't care */
 	pcb->iflow.delay_variation = 0xffffffff; /* don't care */
 
 	bcopy(&pcb->iflow, &pcb->oflow, sizeof(pcb->oflow));
 
 	pcb->flush_timo = NG_L2CAP_FLUSH_TIMO_DEFAULT;
 	pcb->link_timo = NG_L2CAP_LINK_TIMO_DEFAULT;
 
 	/*
 	 * XXX Mark PCB mutex as DUPOK to prevent "duplicated lock of
 	 * the same type" message. When accepting new L2CAP connection 
 	 * ng_btsocket_l2cap_process_l2ca_con_ind() holds both PCB mutexes 
 	 * for "old" (accepting) PCB and "new" (created) PCB.
 	 */
 		
 	mtx_init(&pcb->pcb_mtx, "btsocks_l2cap_pcb_mtx", NULL,
 		MTX_DEF|MTX_DUPOK);
 	callout_init_mtx(&pcb->timo, &pcb->pcb_mtx, 0);
 
         /*
 	 * Add the PCB to the list
 	 * 
 	 * XXX FIXME VERY IMPORTANT!
 	 *
 	 * This is totally FUBAR. We could get here in two cases:
 	 *
 	 * 1) When user calls socket()
 	 * 2) When we need to accept new incoming connection and call 
 	 *    sonewconn()
 	 *
 	 * In the first case we must acquire ng_btsocket_l2cap_sockets_mtx.
 	 * In the second case we hold ng_btsocket_l2cap_sockets_mtx already.
 	 * So we now need to distinguish between these cases. From reading
 	 * /sys/kern/uipc_socket.c we can find out that sonewconn() calls
 	 * pru_attach with proto == 0 and td == NULL. For now use this fact
 	 * to figure out if we were called from socket() or from sonewconn().
 	 */
 
 	if (td != NULL)
 		mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 	else
 		mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED);
 
 	/* Set PCB token. Use ng_btsocket_l2cap_sockets_mtx for protection */
 	if (++ token == 0)
 		token ++;
 
 	pcb->token = token;
 
 	LIST_INSERT_HEAD(&ng_btsocket_l2cap_sockets, pcb, next);
 
 	if (td != NULL)
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
         return (0);
 } /* ng_btsocket_l2cap_attach */
 
 /*
  * Bind socket
  */
 
 int
 ng_btsocket_l2cap_bind(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_l2cap_pcb_t	*pcb = NULL;
 	struct sockaddr_l2cap	*sa = (struct sockaddr_l2cap *) nam;
 	int			 psm, error = 0;
 
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->l2cap_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	/*For the time being, Not support LE binding.*/
 	if ((sa->l2cap_len != sizeof(*sa))&&
 	    (sa->l2cap_len != sizeof(struct sockaddr_l2cap_compat)))
 		return (EINVAL);
 
 	psm = le16toh(sa->l2cap_psm);
 
 	/* 
 	 * Check if other socket has this address already (look for exact
 	 * match PSM and bdaddr) and assign socket address if it's available.
 	 *
 	 * Note: socket can be bound to ANY PSM (zero) thus allowing several
 	 * channels with the same PSM between the same pair of BD_ADDR'es.
 	 */
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	LIST_FOREACH(pcb, &ng_btsocket_l2cap_sockets, next)
 		if (psm != 0 && psm == pcb->psm &&
 		    bcmp(&pcb->src, &sa->l2cap_bdaddr, sizeof(bdaddr_t)) == 0)
 			break;
 
 	if (pcb == NULL) {
 		/* Set socket address */
 		pcb = so2l2cap_pcb(so);
 		if (pcb != NULL) {
 			bcopy(&sa->l2cap_bdaddr, &pcb->src, sizeof(pcb->src));
 			pcb->psm = psm;
 		} else
 			error = EINVAL;
 	} else
 		error = EADDRINUSE;
 
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_bind */
 
 /*
  * Connect socket
  */
 
 int
 ng_btsocket_l2cap_connect(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_l2cap_pcb_t		*pcb = so2l2cap_pcb(so);
 	struct sockaddr_l2cap_compat	*sal = (struct sockaddr_l2cap_compat *) nam;
 	struct sockaddr_l2cap *sa  = (struct sockaddr_l2cap *)nam;
 	struct sockaddr_l2cap  ba;
 	ng_btsocket_l2cap_rtentry_t	*rt = NULL;
 	int				 have_src, error = 0;
 	int idtype = NG_L2CAP_L2CA_IDTYPE_BREDR;
 	/* Check socket */
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 	if (pcb->state == NG_BTSOCKET_L2CAP_CONNECTING)
 		return (EINPROGRESS);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->l2cap_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->l2cap_len == sizeof(*sal)){
 		bcopy(sal, &ba, sizeof(*sal));
 		sa = &ba;
 		sa->l2cap_len = sizeof(*sa);
 		sa->l2cap_bdaddr_type = BDADDR_BREDR;
 	}
 	if (sa->l2cap_len != sizeof(*sa))
 		return (EINVAL);
 	if ((sa->l2cap_psm &&  sa->l2cap_cid))
 		return EINVAL;
 	if (bcmp(&sa->l2cap_bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 		return (EDESTADDRREQ);
 	if((sa->l2cap_bdaddr_type == BDADDR_BREDR)&&
 	   (sa->l2cap_psm == 0))
 		return EDESTADDRREQ;
 	if(sa->l2cap_bdaddr_type != BDADDR_BREDR){
 		if(sa->l2cap_cid == NG_L2CAP_ATT_CID){
 			idtype = NG_L2CAP_L2CA_IDTYPE_ATT;
 		}else if (sa->l2cap_cid == NG_L2CAP_SMP_CID){
 			idtype =NG_L2CAP_L2CA_IDTYPE_SMP;
 		}else{
 			//if cid == 0 idtype = NG_L2CAP_L2CA_IDTYPE_LE;
 			// Not supported yet
 			return EINVAL;
 		}
 	}
 	if (pcb->psm != 0 && pcb->psm != le16toh(sa->l2cap_psm))
 		return (EINVAL);
 	/*
 	 * Routing. Socket should be bound to some source address. The source
 	 * address can be ANY. Destination address must be set and it must not
 	 * be ANY. If source address is ANY then find first rtentry that has
 	 * src != dst.
 	 */
 
 	mtx_lock(&ng_btsocket_l2cap_rt_mtx);
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* Send destination address and PSM */
 	bcopy(&sa->l2cap_bdaddr, &pcb->dst, sizeof(pcb->dst));
 	pcb->psm = le16toh(sa->l2cap_psm);
 	pcb->dsttype = sa->l2cap_bdaddr_type;
 	pcb->cid = 0;
 	pcb->idtype = idtype;
 	pcb->rt = NULL;
 	have_src = bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src));
 
 	LIST_FOREACH(rt, &ng_btsocket_l2cap_rt, next) {
 		if (rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook))
 			continue;
 
 		/* Match src and dst */
 		if (have_src) {
 			if (bcmp(&pcb->src, &rt->src, sizeof(rt->src)) == 0)
 				break;
 		} else {
 			if (bcmp(&pcb->dst, &rt->src, sizeof(rt->src)) != 0)
 				break;
 		}
 	}
 
 	if (rt != NULL) {
 		pcb->rt = rt;
 
 		if (!have_src){
 			bcopy(&rt->src, &pcb->src, sizeof(pcb->src));
 			pcb->srctype =
 			  (sa->l2cap_bdaddr_type == BDADDR_BREDR)?
 			  BDADDR_BREDR : BDADDR_LE_PUBLIC;
 		}
 	} else
 		error = EHOSTUNREACH;
 
 	/*
 	 * Send L2CA_Connect request 
 	 */
 
 	if (error == 0) {	
 		error = ng_btsocket_l2cap_send_l2ca_con_req(pcb);
 		if (error == 0) {
 			pcb->flags |= NG_BTSOCKET_L2CAP_CLIENT;
 			pcb->state = NG_BTSOCKET_L2CAP_CONNECTING;
 			soisconnecting(pcb->so);
 
 			ng_btsocket_l2cap_timeout(pcb);
 		}
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_rt_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_connect */
 
 /*
  * Process ioctl's calls on socket
  */
 
 int
 ng_btsocket_l2cap_control(struct socket *so, u_long cmd, caddr_t data,
 		struct ifnet *ifp, struct thread *td)
 {
 	return (EINVAL);
 } /* ng_btsocket_l2cap_control */
 
 /*
  * Process getsockopt/setsockopt system calls
  */
 
 int
 ng_btsocket_l2cap_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 	int			error = 0;
 	ng_l2cap_cfg_opt_val_t	v;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 
 	if (sopt->sopt_level != SOL_L2CAP)
 		return (0);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case SO_L2CAP_IMTU: /* get incoming MTU */
 			error = sooptcopyout(sopt, &pcb->imtu,
 						sizeof(pcb->imtu));
 			break;
 
 		case SO_L2CAP_OMTU: /* get outgoing (peer incoming) MTU */
 			error = sooptcopyout(sopt, &pcb->omtu,
 						sizeof(pcb->omtu));
 			break;
 
 		case SO_L2CAP_IFLOW: /* get incoming flow spec. */
 			error = sooptcopyout(sopt, &pcb->iflow,
 						sizeof(pcb->iflow));
 			break;
 
 		case SO_L2CAP_OFLOW: /* get outgoing flow spec. */
 			error = sooptcopyout(sopt, &pcb->oflow,
 						sizeof(pcb->oflow));
 			break;
 
 		case SO_L2CAP_FLUSH: /* get flush timeout */
 			error = sooptcopyout(sopt, &pcb->flush_timo,
 						sizeof(pcb->flush_timo));
 			break;
 		case SO_L2CAP_ENCRYPTED: /* get encrypt required */
 			error = sooptcopyout(sopt, &pcb->need_encrypt,
 						sizeof(pcb->need_encrypt));
 			break;
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		/*
 		 * XXX
 		 * We do not allow to change these parameters while socket is 
 		 * connected or we are in the process of creating a connection.
 		 * May be this should indicate re-configuration of the open 
 		 * channel?
 		 */
 
 		if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED) {
 			error = EACCES;
 			break;
 		}
 
 		switch (sopt->sopt_name) {
 		case SO_L2CAP_IMTU: /* set incoming MTU */
 			error = sooptcopyin(sopt, &v, sizeof(v), sizeof(v.mtu));
 			if (error == 0)
 				pcb->imtu = v.mtu;
 			break;
 
 		case SO_L2CAP_OFLOW: /* set outgoing flow spec. */
 			error = sooptcopyin(sopt, &v, sizeof(v),sizeof(v.flow));
 			if (error == 0)
 				bcopy(&v.flow, &pcb->oflow, sizeof(pcb->oflow));
 			break;
 
 		case SO_L2CAP_FLUSH: /* set flush timeout */
 			error = sooptcopyin(sopt, &v, sizeof(v),
 						sizeof(v.flush_timo));
 			if (error == 0)
 				pcb->flush_timo = v.flush_timo;
 			break;
 		case SO_L2CAP_ENCRYPTED: /*set connect encryption opt*/
 			if((pcb->state != NG_BTSOCKET_L2CAP_OPEN) &&
 			   (pcb->state != NG_BTSOCKET_L2CAP_W4_ENC_CHANGE)){
 				error = sooptcopyin(sopt, &v, sizeof(v),
 						    sizeof(v.encryption));
 				if(error == 0)
 					pcb->need_encrypt = (v.encryption)?1:0;
 			}else{
 				error = EINVAL;
 			}
 			break;
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_ctloutput */
 
 /*
  * Detach and destroy socket
  */
 
 void
 ng_btsocket_l2cap_detach(struct socket *so)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 
 	KASSERT(pcb != NULL, ("ng_btsocket_l2cap_detach: pcb == NULL"));
 
 	if (ng_btsocket_l2cap_node == NULL) 
 		return;
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* XXX what to do with pending request? */
 	if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO)
 		ng_btsocket_l2cap_untimeout(pcb);
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED &&
 	    pcb->state != NG_BTSOCKET_L2CAP_DISCONNECTING)
 		/* Send disconnect request with "zero" token */
 		ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 
 	pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 
 	LIST_REMOVE(pcb, next);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	mtx_destroy(&pcb->pcb_mtx);
 	bzero(pcb, sizeof(*pcb));
 	free(pcb, M_NETGRAPH_BTSOCKET_L2CAP);
 
 	soisdisconnected(so);
 	so->so_pcb = NULL;
 } /* ng_btsocket_l2cap_detach */
 
 /*
  * Disconnect socket
  */
 
 int
 ng_btsocket_l2cap_disconnect(struct socket *so)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 	int			error = 0;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->state == NG_BTSOCKET_L2CAP_DISCONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		return (EINPROGRESS);
 	}
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED) {
 		/* XXX FIXME what to do with pending request? */
 		if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO)
 			ng_btsocket_l2cap_untimeout(pcb);
 
 		error = ng_btsocket_l2cap_send_l2ca_discon_req(pcb->token, pcb);
 		if (error == 0) {
 			pcb->state = NG_BTSOCKET_L2CAP_DISCONNECTING;
 			soisdisconnecting(so);
 
 			ng_btsocket_l2cap_timeout(pcb);
 		}
 
 		/* XXX FIXME what to do if error != 0 */
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_disconnect */
 
 /*
  * Listen on socket
  */
 
 int
 ng_btsocket_l2cap_listen(struct socket *so, int backlog, struct thread *td)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 	int error;
 
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error != 0)
 		goto out;
 	if (pcb == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 	if (ng_btsocket_l2cap_node == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 	if (pcb->psm == 0) {
 		error = EADDRNOTAVAIL;
 		goto out;
 	}
 	solisten_proto(so, backlog);
 out:
 	SOCK_UNLOCK(so);
 	return (error);
 } /* ng_btsocket_listen */
 
 /*
  * Get peer address
  */
 
 int
 ng_btsocket_l2cap_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 	struct sockaddr_l2cap	sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 
 	bcopy(&pcb->dst, &sa.l2cap_bdaddr, sizeof(sa.l2cap_bdaddr));
 	sa.l2cap_psm = htole16(pcb->psm);
 	sa.l2cap_len = sizeof(sa);
 	sa.l2cap_family = AF_BLUETOOTH;
 	switch(pcb->idtype){
 	case NG_L2CAP_L2CA_IDTYPE_ATT:
 		sa.l2cap_cid = NG_L2CAP_ATT_CID;
 		break;
 	case NG_L2CAP_L2CA_IDTYPE_SMP:
 		sa.l2cap_cid = NG_L2CAP_SMP_CID;
 		break;
 	default:
 		sa.l2cap_cid = 0;
 		break;
 	}
 	sa.l2cap_bdaddr_type = pcb->dsttype;
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_l2cap_peeraddr */
 
 /*
  * Send data to socket
  */
 
 int
 ng_btsocket_l2cap_send(struct socket *so, int flags, struct mbuf *m,
 		struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	ng_btsocket_l2cap_pcb_t	*pcb = so2l2cap_pcb(so);
 	int			 error = 0;
 
 	if (ng_btsocket_l2cap_node == NULL) {
 		error = ENETDOWN;
 		goto drop;
 	}
 
 	/* Check socket and input */
 	if (pcb == NULL || m == NULL || control != NULL) {
 		error = EINVAL;
 		goto drop;
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* Make sure socket is connected */
 	if (pcb->state != NG_BTSOCKET_L2CAP_OPEN) {
 		mtx_unlock(&pcb->pcb_mtx);
 		error = ENOTCONN;
 		goto drop;
 	}
 
 	/* Check route */
 	if (pcb->rt == NULL ||
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) {
 		mtx_unlock(&pcb->pcb_mtx);
 		error = ENETDOWN;
 		goto drop;
 	}
 
 	/* Check packet size against outgoing (peer's incoming) MTU) */
 	if (m->m_pkthdr.len > pcb->omtu) {
 		NG_BTSOCKET_L2CAP_ERR(
 "%s: Packet too big, len=%d, omtu=%d\n", __func__, m->m_pkthdr.len, pcb->omtu);
 
 		mtx_unlock(&pcb->pcb_mtx);
 		error = EMSGSIZE;
 		goto drop;
 	}
 
 	/*
 	 * First put packet on socket send queue. Then check if we have
 	 * pending timeout. If we do not have timeout then we must send
 	 * packet and schedule timeout. Otherwise do nothing and wait for
 	 * L2CA_WRITE_RSP.
 	 */
 
 	sbappendrecord(&pcb->so->so_snd, m);
 	m = NULL;
 
 	if (!(pcb->flags & NG_BTSOCKET_L2CAP_TIMO)) {
 		error = ng_btsocket_l2cap_send2(pcb);
 		if (error == 0)
 			ng_btsocket_l2cap_timeout(pcb);
 		else
 			sbdroprecord(&pcb->so->so_snd); /* XXX */
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 drop:
 	NG_FREE_M(m); /* checks for != NULL */
 	NG_FREE_M(control);
 
 	return (error);
 } /* ng_btsocket_l2cap_send */
 
 /*
  * Send first packet in the socket queue to the L2CAP layer
  */
 
 static int
 ng_btsocket_l2cap_send2(ng_btsocket_l2cap_pcb_p pcb)
 {
 	struct	mbuf		*m = NULL;
 	ng_l2cap_l2ca_hdr_t	*hdr = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (sbavail(&pcb->so->so_snd) == 0)
 		return (EINVAL); /* XXX */
 
 	m = m_dup(pcb->so->so_snd.sb_mb, M_NOWAIT);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	/* Create L2CA packet header */
 	M_PREPEND(m, sizeof(*hdr), M_NOWAIT);
 	if (m != NULL)
 		if (m->m_len < sizeof(*hdr))
 			m = m_pullup(m, sizeof(*hdr));
 
 	if (m == NULL) {
 		NG_BTSOCKET_L2CAP_ERR(
 "%s: Failed to create L2CA packet header\n", __func__);
 
 		return (ENOBUFS);
 	}
 
 	hdr = mtod(m, ng_l2cap_l2ca_hdr_t *);
 	hdr->token = pcb->token;
 	hdr->length = m->m_pkthdr.len - sizeof(*hdr);
 	hdr->lcid = pcb->cid;
 	hdr->idtype = pcb->idtype;
 	NG_BTSOCKET_L2CAP_INFO(
 "%s: Sending packet: len=%d, length=%d, lcid=%d, token=%d, state=%d\n",
 		__func__, m->m_pkthdr.len, hdr->length, hdr->lcid, 
 		hdr->token, pcb->state);
 
 	/*
 	 * If we got here than we have successfully creates new L2CAP 
 	 * data packet and now we can send it to the L2CAP layer
 	 */
 
 	NG_SEND_DATA_ONLY(error, pcb->rt->hook, m);
 
 	return (error);
 } /* ng_btsocket_l2cap_send2 */
 
 /*
  * Get socket address
  */
 
 int
 ng_btsocket_l2cap_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 	struct sockaddr_l2cap	sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 
 	bcopy(&pcb->src, &sa.l2cap_bdaddr, sizeof(sa.l2cap_bdaddr));
 	sa.l2cap_psm = htole16(pcb->psm);
 	sa.l2cap_len = sizeof(sa);
 	sa.l2cap_family = AF_BLUETOOTH;
 	sa.l2cap_cid = 0;
 	sa.l2cap_bdaddr_type = pcb->srctype;
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_l2cap_sockaddr */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Misc. functions
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Look for the socket that listens on given PSM and bdaddr. Returns exact or
  * close match (if any). Caller must hold ng_btsocket_l2cap_sockets_mtx.
  */
 
 static ng_btsocket_l2cap_pcb_p
 ng_btsocket_l2cap_pcb_by_addr(bdaddr_p bdaddr, int psm)
 {
 	ng_btsocket_l2cap_pcb_p	p = NULL, p1 = NULL;
 
 	mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED);
 
 	LIST_FOREACH(p, &ng_btsocket_l2cap_sockets, next) {
-		if (p->so == NULL || !(p->so->so_options & SO_ACCEPTCONN) || 
-		    p->psm != psm) 
+		if (p->so == NULL || !SOLISTENING(p->so) || p->psm != psm)
 			continue;
 
 		if (bcmp(&p->src, bdaddr, sizeof(p->src)) == 0)
 			break;
 
 		if (bcmp(&p->src, NG_HCI_BDADDR_ANY, sizeof(p->src)) == 0)
 			p1 = p;
 	}
 
 	return ((p != NULL)? p : p1);
 } /* ng_btsocket_l2cap_pcb_by_addr */
 
 /*
  * Look for the socket that has given token.
  * Caller must hold ng_btsocket_l2cap_sockets_mtx.
  */
 
 static ng_btsocket_l2cap_pcb_p
 ng_btsocket_l2cap_pcb_by_token(u_int32_t token)
 {
 	ng_btsocket_l2cap_pcb_p	p = NULL;
 
 	if (token == 0)
 		return (NULL);
 
 	mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED);
 
 	LIST_FOREACH(p, &ng_btsocket_l2cap_sockets, next)
 		if (p->token == token)
 			break;
 
 	return (p);
 } /* ng_btsocket_l2cap_pcb_by_token */
 
 /*
  * Look for the socket that assigned to given source address and channel ID.
  * Caller must hold ng_btsocket_l2cap_sockets_mtx
  */
 
 static ng_btsocket_l2cap_pcb_p
 ng_btsocket_l2cap_pcb_by_cid(bdaddr_p src, int cid, int idtype)
 {
 	ng_btsocket_l2cap_pcb_p	p = NULL;
 
 	mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED);
 
 	LIST_FOREACH(p, &ng_btsocket_l2cap_sockets, next){
 		if (p->cid == cid &&
 		    bcmp(src, &p->src, sizeof(p->src)) == 0&&
 		    p->idtype == idtype)		    
 			break;
 	}
 	return (p);
 } /* ng_btsocket_l2cap_pcb_by_cid */
 
 /*
  * Set timeout on socket
  */
 
 static void
 ng_btsocket_l2cap_timeout(ng_btsocket_l2cap_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (!(pcb->flags & NG_BTSOCKET_L2CAP_TIMO)) {
 		pcb->flags |= NG_BTSOCKET_L2CAP_TIMO;
 		callout_reset(&pcb->timo, bluetooth_l2cap_ertx_timeout(),
 		    ng_btsocket_l2cap_process_timeout, pcb);
 	} else
 		KASSERT(0,
 ("%s: Duplicated socket timeout?!\n", __func__));
 } /* ng_btsocket_l2cap_timeout */
 
 /*
  * Unset timeout on socket
  */
 
 static void
 ng_btsocket_l2cap_untimeout(ng_btsocket_l2cap_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO) {
 		callout_stop(&pcb->timo);
 		pcb->flags &= ~NG_BTSOCKET_L2CAP_TIMO;
 	} else
 		KASSERT(0,
 ("%s: No socket timeout?!\n", __func__));
 } /* ng_btsocket_l2cap_untimeout */
 
 /*
  * Process timeout on socket
  */
 
 static void
 ng_btsocket_l2cap_process_timeout(void *xpcb)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = (ng_btsocket_l2cap_pcb_p) xpcb;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	pcb->flags &= ~NG_BTSOCKET_L2CAP_TIMO;
 	pcb->so->so_error = ETIMEDOUT;
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_L2CAP_CONNECTING:
 	case NG_BTSOCKET_L2CAP_CONFIGURING:
 	case NG_BTSOCKET_L2CAP_W4_ENC_CHANGE:		
 		/* Send disconnect request with "zero" token */
 		if (pcb->cid != 0)
 			ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 
 		/* ... and close the socket */
 		pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 		soisdisconnected(pcb->so);
 		break;
 
 	case NG_BTSOCKET_L2CAP_OPEN:
 		/* Send timeout - drop packet and wakeup sender */
 		sbdroprecord(&pcb->so->so_snd);
 		sowwakeup(pcb->so);
 		break;
 
 	case NG_BTSOCKET_L2CAP_DISCONNECTING:
 		/* Disconnect timeout - disconnect the socket anyway */
 		pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 		soisdisconnected(pcb->so);
 		break;
 
 	default:
 		NG_BTSOCKET_L2CAP_ERR(
 "%s: Invalid socket state=%d\n", __func__, pcb->state);
 		break;
 	}
 } /* ng_btsocket_l2cap_process_timeout */
 
 /*
  * Translate HCI/L2CAP error code into "errno" code
  * XXX Note: Some L2CAP and HCI error codes have the same value, but 
  *     different meaning
  */
 
 static int
 ng_btsocket_l2cap_result2errno(int result)
 {
 	switch (result) {
 	case 0x00: /* No error */ 
 		return (0);
 
 	case 0x01: /* Unknown HCI command */
 		return (ENODEV);
 
 	case 0x02: /* No connection */
 		return (ENOTCONN);
 
 	case 0x03: /* Hardware failure */
 		return (EIO);
 
 	case 0x04: /* Page timeout */
 		return (EHOSTDOWN);
 
 	case 0x05: /* Authentication failure */
 	case 0x06: /* Key missing */
 	case 0x18: /* Pairing not allowed */
 	case 0x21: /* Role change not allowed */
 	case 0x24: /* LMP PSU not allowed */
 	case 0x25: /* Encryption mode not acceptable */
 	case 0x26: /* Unit key used */
 		return (EACCES);
 
 	case 0x07: /* Memory full */
 		return (ENOMEM);
 
 	case 0x08:   /* Connection timeout */
 	case 0x10:   /* Host timeout */
 	case 0x22:   /* LMP response timeout */
 	case 0xee:   /* HCI timeout */
 	case 0xeeee: /* L2CAP timeout */
 		return (ETIMEDOUT);
 
 	case 0x09: /* Max number of connections */
 	case 0x0a: /* Max number of SCO connections to a unit */
 		return (EMLINK);
 
 	case 0x0b: /* ACL connection already exists */
 		return (EEXIST);
 
 	case 0x0c: /* Command disallowed */
 		return (EBUSY);
 
 	case 0x0d: /* Host rejected due to limited resources */
 	case 0x0e: /* Host rejected due to securiity reasons */
 	case 0x0f: /* Host rejected due to remote unit is a personal unit */
 	case 0x1b: /* SCO offset rejected */
 	case 0x1c: /* SCO interval rejected */
 	case 0x1d: /* SCO air mode rejected */
 		return (ECONNREFUSED);
 
 	case 0x11: /* Unsupported feature or parameter value */
 	case 0x19: /* Unknown LMP PDU */
 	case 0x1a: /* Unsupported remote feature */
 	case 0x20: /* Unsupported LMP parameter value */
 	case 0x27: /* QoS is not supported */
 	case 0x29: /* Paring with unit key not supported */
 		return (EOPNOTSUPP);
 
 	case 0x12: /* Invalid HCI command parameter */
 	case 0x1e: /* Invalid LMP parameters */
 		return (EINVAL);
 
 	case 0x13: /* Other end terminated connection: User ended connection */
 	case 0x14: /* Other end terminated connection: Low resources */
 	case 0x15: /* Other end terminated connection: About to power off */
 		return (ECONNRESET);
 
 	case 0x16: /* Connection terminated by local host */
 		return (ECONNABORTED);
 
 #if 0 /* XXX not yet */
 	case 0x17: /* Repeated attempts */
 	case 0x1f: /* Unspecified error */
 	case 0x23: /* LMP error transaction collision */
 	case 0x28: /* Instant passed */
 #endif
 	}
 
 	return (ENOSYS);
 } /* ng_btsocket_l2cap_result2errno */
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
index 95b28e6cc5e7..c0704bce55fa 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
@@ -1,3558 +1,3557 @@
 /*
  * ng_btsocket_rfcomm.c
  */
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2001-2003 Maksim Yevmenkin <m_evmenkin@yahoo.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $Id: ng_btsocket_rfcomm.c,v 1.28 2003/09/14 23:29:06 max Exp $
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitstring.h>
 #include <sys/domain.h>
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/filedesc.h>
 #include <sys/ioccom.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 
 #include <net/vnet.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/bluetooth/include/ng_bluetooth.h>
 #include <netgraph/bluetooth/include/ng_hci.h>
 #include <netgraph/bluetooth/include/ng_l2cap.h>
 #include <netgraph/bluetooth/include/ng_btsocket.h>
 #include <netgraph/bluetooth/include/ng_btsocket_l2cap.h>
 #include <netgraph/bluetooth/include/ng_btsocket_rfcomm.h>
 
 /* MALLOC define */
 #ifdef NG_SEPARATE_MALLOC
 static MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_RFCOMM, "netgraph_btsocks_rfcomm",
 		"Netgraph Bluetooth RFCOMM sockets");
 #else
 #define M_NETGRAPH_BTSOCKET_RFCOMM M_NETGRAPH
 #endif /* NG_SEPARATE_MALLOC */
 
 /* Debug */
 #define NG_BTSOCKET_RFCOMM_INFO \
 	if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_INFO_LEVEL && \
 	    ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_RFCOMM_WARN \
 	if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_WARN_LEVEL && \
 	    ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_RFCOMM_ERR \
 	if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_ERR_LEVEL && \
 	    ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_RFCOMM_ALERT \
 	if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_ALERT_LEVEL && \
 	    ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \
 		printf
 
 #define	ALOT	0x7fff
 
 /* Local prototypes */
 static int ng_btsocket_rfcomm_upcall
 	(struct socket *so, void *arg, int waitflag);
 static void ng_btsocket_rfcomm_sessions_task
 	(void *ctx, int pending);
 static void ng_btsocket_rfcomm_session_task
 	(ng_btsocket_rfcomm_session_p s);
 #define ng_btsocket_rfcomm_task_wakeup() \
 	taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_rfcomm_task)
 
 static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_connect_ind
 	(ng_btsocket_rfcomm_session_p s, int channel);
 static void ng_btsocket_rfcomm_connect_cfm
 	(ng_btsocket_rfcomm_session_p s);
 
 static int ng_btsocket_rfcomm_session_create
 	(ng_btsocket_rfcomm_session_p *sp, struct socket *l2so,
 	 bdaddr_p src, bdaddr_p dst, struct thread *td);
 static int ng_btsocket_rfcomm_session_accept
 	(ng_btsocket_rfcomm_session_p s0);
 static int ng_btsocket_rfcomm_session_connect
 	(ng_btsocket_rfcomm_session_p s);
 static int ng_btsocket_rfcomm_session_receive
 	(ng_btsocket_rfcomm_session_p s);
 static int ng_btsocket_rfcomm_session_send
 	(ng_btsocket_rfcomm_session_p s);
 static void ng_btsocket_rfcomm_session_clean
 	(ng_btsocket_rfcomm_session_p s);
 static void ng_btsocket_rfcomm_session_process_pcb
 	(ng_btsocket_rfcomm_session_p s);
 static ng_btsocket_rfcomm_session_p ng_btsocket_rfcomm_session_by_addr
 	(bdaddr_p src, bdaddr_p dst);
 
 static int ng_btsocket_rfcomm_receive_frame
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_sabm
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static int ng_btsocket_rfcomm_receive_disc
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static int ng_btsocket_rfcomm_receive_ua
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static int ng_btsocket_rfcomm_receive_dm
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static int ng_btsocket_rfcomm_receive_uih
 	(ng_btsocket_rfcomm_session_p s, int dlci, int pf, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_mcc
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_test
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_fc
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_msc
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_rpn
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_rls
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_pn
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static void ng_btsocket_rfcomm_set_pn
 	(ng_btsocket_rfcomm_pcb_p pcb, u_int8_t cr, u_int8_t flow_control, 
 	 u_int8_t credits, u_int16_t mtu);
 
 static int ng_btsocket_rfcomm_send_command
 	(ng_btsocket_rfcomm_session_p s, u_int8_t type, u_int8_t dlci);
 static int ng_btsocket_rfcomm_send_uih
 	(ng_btsocket_rfcomm_session_p s, u_int8_t address, u_int8_t pf, 
 	 u_int8_t credits, struct mbuf *data);
 static int ng_btsocket_rfcomm_send_msc
 	(ng_btsocket_rfcomm_pcb_p pcb);
 static int ng_btsocket_rfcomm_send_pn
 	(ng_btsocket_rfcomm_pcb_p pcb);
 static int ng_btsocket_rfcomm_send_credits
 	(ng_btsocket_rfcomm_pcb_p pcb);
 
 static int ng_btsocket_rfcomm_pcb_send
 	(ng_btsocket_rfcomm_pcb_p pcb, int limit);
 static void ng_btsocket_rfcomm_pcb_kill
 	(ng_btsocket_rfcomm_pcb_p pcb, int error);
 static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_by_dlci
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_listener
 	(bdaddr_p src, int channel);
 
 static void ng_btsocket_rfcomm_timeout
 	(ng_btsocket_rfcomm_pcb_p pcb);
 static void ng_btsocket_rfcomm_untimeout
 	(ng_btsocket_rfcomm_pcb_p pcb);
 static void ng_btsocket_rfcomm_process_timeout
 	(void *xpcb);
 
 static struct mbuf * ng_btsocket_rfcomm_prepare_packet
 	(struct sockbuf *sb, int length);
 
 /* Globals */
 extern int					ifqmaxlen;
 static u_int32_t				ng_btsocket_rfcomm_debug_level;
 static u_int32_t				ng_btsocket_rfcomm_timo;
 struct task					ng_btsocket_rfcomm_task;
 static LIST_HEAD(, ng_btsocket_rfcomm_session)	ng_btsocket_rfcomm_sessions;
 static struct mtx				ng_btsocket_rfcomm_sessions_mtx;
 static LIST_HEAD(, ng_btsocket_rfcomm_pcb)	ng_btsocket_rfcomm_sockets;
 static struct mtx				ng_btsocket_rfcomm_sockets_mtx;
 static struct timeval				ng_btsocket_rfcomm_lasttime;
 static int					ng_btsocket_rfcomm_curpps;
 
 /* Sysctl tree */
 SYSCTL_DECL(_net_bluetooth_rfcomm_sockets);
 static SYSCTL_NODE(_net_bluetooth_rfcomm_sockets, OID_AUTO, stream,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Bluetooth STREAM RFCOMM sockets family");
 SYSCTL_UINT(_net_bluetooth_rfcomm_sockets_stream, OID_AUTO, debug_level,
 	CTLFLAG_RW,
 	&ng_btsocket_rfcomm_debug_level, NG_BTSOCKET_INFO_LEVEL,
 	"Bluetooth STREAM RFCOMM sockets debug level");
 SYSCTL_UINT(_net_bluetooth_rfcomm_sockets_stream, OID_AUTO, timeout,
 	CTLFLAG_RW,
 	&ng_btsocket_rfcomm_timo, 60,
 	"Bluetooth STREAM RFCOMM sockets timeout");
 
 /*****************************************************************************
  *****************************************************************************
  **                              RFCOMM CRC
  *****************************************************************************
  *****************************************************************************/
 
 static u_int8_t	ng_btsocket_rfcomm_crc_table[256] = {
 	0x00, 0x91, 0xe3, 0x72, 0x07, 0x96, 0xe4, 0x75,
 	0x0e, 0x9f, 0xed, 0x7c, 0x09, 0x98, 0xea, 0x7b,
 	0x1c, 0x8d, 0xff, 0x6e, 0x1b, 0x8a, 0xf8, 0x69,
 	0x12, 0x83, 0xf1, 0x60, 0x15, 0x84, 0xf6, 0x67,
 
 	0x38, 0xa9, 0xdb, 0x4a, 0x3f, 0xae, 0xdc, 0x4d,
 	0x36, 0xa7, 0xd5, 0x44, 0x31, 0xa0, 0xd2, 0x43,
 	0x24, 0xb5, 0xc7, 0x56, 0x23, 0xb2, 0xc0, 0x51,
 	0x2a, 0xbb, 0xc9, 0x58, 0x2d, 0xbc, 0xce, 0x5f,
 
 	0x70, 0xe1, 0x93, 0x02, 0x77, 0xe6, 0x94, 0x05,
 	0x7e, 0xef, 0x9d, 0x0c, 0x79, 0xe8, 0x9a, 0x0b,
 	0x6c, 0xfd, 0x8f, 0x1e, 0x6b, 0xfa, 0x88, 0x19,
 	0x62, 0xf3, 0x81, 0x10, 0x65, 0xf4, 0x86, 0x17,
 
 	0x48, 0xd9, 0xab, 0x3a, 0x4f, 0xde, 0xac, 0x3d,
 	0x46, 0xd7, 0xa5, 0x34, 0x41, 0xd0, 0xa2, 0x33,
 	0x54, 0xc5, 0xb7, 0x26, 0x53, 0xc2, 0xb0, 0x21,
 	0x5a, 0xcb, 0xb9, 0x28, 0x5d, 0xcc, 0xbe, 0x2f,
 
 	0xe0, 0x71, 0x03, 0x92, 0xe7, 0x76, 0x04, 0x95,
 	0xee, 0x7f, 0x0d, 0x9c, 0xe9, 0x78, 0x0a, 0x9b,
 	0xfc, 0x6d, 0x1f, 0x8e, 0xfb, 0x6a, 0x18, 0x89,
 	0xf2, 0x63, 0x11, 0x80, 0xf5, 0x64, 0x16, 0x87,
 
 	0xd8, 0x49, 0x3b, 0xaa, 0xdf, 0x4e, 0x3c, 0xad,
 	0xd6, 0x47, 0x35, 0xa4, 0xd1, 0x40, 0x32, 0xa3,
 	0xc4, 0x55, 0x27, 0xb6, 0xc3, 0x52, 0x20, 0xb1,
 	0xca, 0x5b, 0x29, 0xb8, 0xcd, 0x5c, 0x2e, 0xbf,
 
 	0x90, 0x01, 0x73, 0xe2, 0x97, 0x06, 0x74, 0xe5,
 	0x9e, 0x0f, 0x7d, 0xec, 0x99, 0x08, 0x7a, 0xeb,
 	0x8c, 0x1d, 0x6f, 0xfe, 0x8b, 0x1a, 0x68, 0xf9,
 	0x82, 0x13, 0x61, 0xf0, 0x85, 0x14, 0x66, 0xf7,
 
 	0xa8, 0x39, 0x4b, 0xda, 0xaf, 0x3e, 0x4c, 0xdd,
 	0xa6, 0x37, 0x45, 0xd4, 0xa1, 0x30, 0x42, 0xd3,
 	0xb4, 0x25, 0x57, 0xc6, 0xb3, 0x22, 0x50, 0xc1,
 	0xba, 0x2b, 0x59, 0xc8, 0xbd, 0x2c, 0x5e, 0xcf
 };
 
 /* CRC */
 static u_int8_t
 ng_btsocket_rfcomm_crc(u_int8_t *data, int length)
 {
 	u_int8_t	crc = 0xff;
 
 	while (length --)
 		crc = ng_btsocket_rfcomm_crc_table[crc ^ *data++];
 
 	return (crc);
 } /* ng_btsocket_rfcomm_crc */
 
 /* FCS on 2 bytes */
 static u_int8_t
 ng_btsocket_rfcomm_fcs2(u_int8_t *data)
 {
 	return (0xff - ng_btsocket_rfcomm_crc(data, 2));
 } /* ng_btsocket_rfcomm_fcs2 */
   
 /* FCS on 3 bytes */
 static u_int8_t
 ng_btsocket_rfcomm_fcs3(u_int8_t *data)
 {
 	return (0xff - ng_btsocket_rfcomm_crc(data, 3));
 } /* ng_btsocket_rfcomm_fcs3 */
 
 /* 
  * Check FCS
  *
  * From Bluetooth spec
  *
  * "... In 07.10, the frame check sequence (FCS) is calculated on different 
  * sets of fields for different frame types. These are the fields that the 
  * FCS are calculated on:
  *
  * For SABM, DISC, UA, DM frames: on Address, Control and length field.
  * For UIH frames: on Address and Control field.
  *
  * (This is stated here for clarification, and to set the standard for RFCOMM;
  * the fields included in FCS calculation have actually changed in version
  * 7.0.0 of TS 07.10, but RFCOMM will not change the FCS calculation scheme
  * from the one above.) ..."
  */
 
 static int
 ng_btsocket_rfcomm_check_fcs(u_int8_t *data, int type, u_int8_t fcs)
 {
 	if (type != RFCOMM_FRAME_UIH)
 		return (ng_btsocket_rfcomm_fcs3(data) != fcs);
 
 	return (ng_btsocket_rfcomm_fcs2(data) != fcs);
 } /* ng_btsocket_rfcomm_check_fcs */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Socket interface
  *****************************************************************************
  *****************************************************************************/
 
 /* 
  * Initialize everything
  */
 
 void
 ng_btsocket_rfcomm_init(void)
 {
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	ng_btsocket_rfcomm_debug_level = NG_BTSOCKET_WARN_LEVEL;
 	ng_btsocket_rfcomm_timo = 60;
 
 	/* RFCOMM task */
 	TASK_INIT(&ng_btsocket_rfcomm_task, 0,
 		ng_btsocket_rfcomm_sessions_task, NULL);
 
 	/* RFCOMM sessions list */
 	LIST_INIT(&ng_btsocket_rfcomm_sessions);
 	mtx_init(&ng_btsocket_rfcomm_sessions_mtx,
 		"btsocks_rfcomm_sessions_mtx", NULL, MTX_DEF);
 
 	/* RFCOMM sockets list */
 	LIST_INIT(&ng_btsocket_rfcomm_sockets);
 	mtx_init(&ng_btsocket_rfcomm_sockets_mtx,
 		"btsocks_rfcomm_sockets_mtx", NULL, MTX_DEF);
 } /* ng_btsocket_rfcomm_init */
 
 /*
  * Abort connection on socket
  */
 
 void
 ng_btsocket_rfcomm_abort(struct socket *so)
 {
 
 	so->so_error = ECONNABORTED;
 	(void)ng_btsocket_rfcomm_disconnect(so);
 } /* ng_btsocket_rfcomm_abort */
 
 void
 ng_btsocket_rfcomm_close(struct socket *so)
 {
 
 	(void)ng_btsocket_rfcomm_disconnect(so);
 } /* ng_btsocket_rfcomm_close */
 
 /*
  * Accept connection on socket. Nothing to do here, socket must be connected
  * and ready, so just return peer address and be done with it.
  */
 
 int
 ng_btsocket_rfcomm_accept(struct socket *so, struct sockaddr **nam)
 {
 	return (ng_btsocket_rfcomm_peeraddr(so, nam));
 } /* ng_btsocket_rfcomm_accept */
 
 /*
  * Create and attach new socket
  */
 
 int
 ng_btsocket_rfcomm_attach(struct socket *so, int proto, struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 	int				error;
 
 	/* Check socket and protocol */
 	if (so->so_type != SOCK_STREAM)
 		return (ESOCKTNOSUPPORT);
 
 #if 0 /* XXX sonewconn() calls "pru_attach" with proto == 0 */
 	if (proto != 0) 
 		if (proto != BLUETOOTH_PROTO_RFCOMM)
 			return (EPROTONOSUPPORT);
 #endif /* XXX */
 
 	if (pcb != NULL)
 		return (EISCONN);
 
 	/* Reserve send and receive space if it is not reserved yet */
 	if ((so->so_snd.sb_hiwat == 0) || (so->so_rcv.sb_hiwat == 0)) {
 		error = soreserve(so, NG_BTSOCKET_RFCOMM_SENDSPACE,
 					NG_BTSOCKET_RFCOMM_RECVSPACE);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Allocate the PCB */
         pcb = malloc(sizeof(*pcb),
 		M_NETGRAPH_BTSOCKET_RFCOMM, M_NOWAIT | M_ZERO);
         if (pcb == NULL)
                 return (ENOMEM);
 
 	/* Link the PCB and the socket */
 	so->so_pcb = (caddr_t) pcb;
 	pcb->so = so;
 
 	/* Initialize PCB */
 	pcb->state = NG_BTSOCKET_RFCOMM_DLC_CLOSED;
 	pcb->flags = NG_BTSOCKET_RFCOMM_DLC_CFC;
 
 	pcb->lmodem =
 	pcb->rmodem = (RFCOMM_MODEM_RTC | RFCOMM_MODEM_RTR | RFCOMM_MODEM_DV);
 
 	pcb->mtu = RFCOMM_DEFAULT_MTU;
 	pcb->tx_cred = 0;
 	pcb->rx_cred = RFCOMM_DEFAULT_CREDITS;
 
 	mtx_init(&pcb->pcb_mtx, "btsocks_rfcomm_pcb_mtx", NULL, MTX_DEF);
 	callout_init_mtx(&pcb->timo, &pcb->pcb_mtx, 0);
 
 	/* Add the PCB to the list */
 	mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 	LIST_INSERT_HEAD(&ng_btsocket_rfcomm_sockets, pcb, next);
 	mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 
         return (0);
 } /* ng_btsocket_rfcomm_attach */
 
 /*
  * Bind socket
  */
 
 int
 ng_btsocket_rfcomm_bind(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_t	*pcb = so2rfcomm_pcb(so), *pcb1;
 	struct sockaddr_rfcomm		*sa = (struct sockaddr_rfcomm *) nam;
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->rfcomm_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->rfcomm_len != sizeof(*sa))
 		return (EINVAL);
 	if (sa->rfcomm_channel > 30)
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (sa->rfcomm_channel != 0) {
 		mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 
 		LIST_FOREACH(pcb1, &ng_btsocket_rfcomm_sockets, next) {
 			if (pcb1->channel == sa->rfcomm_channel &&
 			    bcmp(&pcb1->src, &sa->rfcomm_bdaddr,
 					sizeof(pcb1->src)) == 0) {
 				mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 				mtx_unlock(&pcb->pcb_mtx);
 
 				return (EADDRINUSE);
 			}
 		}
 
 		mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 	}
 
 	bcopy(&sa->rfcomm_bdaddr, &pcb->src, sizeof(pcb->src));
 	pcb->channel = sa->rfcomm_channel;
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (0);
 } /* ng_btsocket_rfcomm_bind */
 
 /*
  * Connect socket
  */
 
 int
 ng_btsocket_rfcomm_connect(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_t	*pcb = so2rfcomm_pcb(so);
 	struct sockaddr_rfcomm		*sa = (struct sockaddr_rfcomm *) nam;
 	ng_btsocket_rfcomm_session_t	*s = NULL;
 	struct socket			*l2so = NULL;
 	int				 dlci, error = 0;
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->rfcomm_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->rfcomm_len != sizeof(*sa))
 		return (EINVAL);
 	if (sa->rfcomm_channel > 30)
 		return (EINVAL);
 	if (sa->rfcomm_channel == 0 ||
 	    bcmp(&sa->rfcomm_bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 		return (EDESTADDRREQ);
 
 	/*
 	 * Note that we will not check for errors in socreate() because
 	 * if we failed to create L2CAP socket at this point we still
 	 * might have already open session.
 	 */
 
 	error = socreate(PF_BLUETOOTH, &l2so, SOCK_SEQPACKET,
 			BLUETOOTH_PROTO_L2CAP, td->td_ucred, td);
 
 	/* 
 	 * Look for session between "pcb->src" and "sa->rfcomm_bdaddr" (dst)
 	 */
 
 	mtx_lock(&ng_btsocket_rfcomm_sessions_mtx);
 
 	s = ng_btsocket_rfcomm_session_by_addr(&pcb->src, &sa->rfcomm_bdaddr);
 	if (s == NULL) {
 		/*
 		 * We need to create new RFCOMM session. Check if we have L2CAP
 		 * socket. If l2so == NULL then error has the error code from
 		 * socreate()
 		 */
 
 		if (l2so == NULL) {
 			mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 			return (error);
 		}
 
 		error = ng_btsocket_rfcomm_session_create(&s, l2so,
 				&pcb->src, &sa->rfcomm_bdaddr, td);
 		if (error != 0) {
 			mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 			soclose(l2so);
 
 			return (error);
 		}
 	} else if (l2so != NULL)
 		soclose(l2so); /* we don't need new L2CAP socket */
 
 	/*
 	 * Check if we already have the same DLCI the same session
 	 */
 
 	mtx_lock(&s->session_mtx);
 	mtx_lock(&pcb->pcb_mtx);
 
 	dlci = RFCOMM_MKDLCI(!INITIATOR(s), sa->rfcomm_channel);
 
 	if (ng_btsocket_rfcomm_pcb_by_dlci(s, dlci) != NULL) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&s->session_mtx);
 		mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 
 		return (EBUSY);
 	}
 
 	/*
 	 * Check session state and if its not acceptable then refuse connection
 	 */
 
 	switch (s->state) {
 	case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING:
 	case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 	case NG_BTSOCKET_RFCOMM_SESSION_OPEN:
 		/*
 		 * Update destination address and channel and attach 
 		 * DLC to the session
 		 */
 
 		bcopy(&sa->rfcomm_bdaddr, &pcb->dst, sizeof(pcb->dst));
 		pcb->channel = sa->rfcomm_channel;
 		pcb->dlci = dlci;
 
 		LIST_INSERT_HEAD(&s->dlcs, pcb, session_next);
 		pcb->session = s;
 
 		ng_btsocket_rfcomm_timeout(pcb);
 		soisconnecting(pcb->so);
 
 		if (s->state == NG_BTSOCKET_RFCOMM_SESSION_OPEN) {
 			pcb->mtu = s->mtu;
 			bcopy(&so2l2cap_pcb(s->l2so)->src, &pcb->src,
 				sizeof(pcb->src));
 
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONFIGURING;
 
 			error = ng_btsocket_rfcomm_send_pn(pcb);
 			if (error == 0)
 				error = ng_btsocket_rfcomm_task_wakeup();
 		} else
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT;
 		break;
 
 	default:
 		error = ECONNRESET;
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&s->session_mtx);
 	mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 
 	return (error);
 } /* ng_btsocket_rfcomm_connect */
 
 /*
  * Process ioctl's calls on socket.
  * XXX FIXME this should provide interface to the RFCOMM multiplexor channel
  */
 
 int
 ng_btsocket_rfcomm_control(struct socket *so, u_long cmd, caddr_t data,
 		struct ifnet *ifp, struct thread *td)
 {
 	return (EINVAL);
 } /* ng_btsocket_rfcomm_control */
 
 /*
  * Process getsockopt/setsockopt system calls
  */
 
 int
 ng_btsocket_rfcomm_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	ng_btsocket_rfcomm_pcb_p		pcb = so2rfcomm_pcb(so);
 	struct ng_btsocket_rfcomm_fc_info	fcinfo;
 	int					error = 0;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (sopt->sopt_level != SOL_RFCOMM)
 		return (0);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case SO_RFCOMM_MTU:
 			error = sooptcopyout(sopt, &pcb->mtu, sizeof(pcb->mtu));
 			break;
 
 		case SO_RFCOMM_FC_INFO:
 			fcinfo.lmodem = pcb->lmodem;
 			fcinfo.rmodem = pcb->rmodem;
 			fcinfo.tx_cred = pcb->tx_cred;
 			fcinfo.rx_cred = pcb->rx_cred;
 			fcinfo.cfc = (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)?
 				1 : 0;
 			fcinfo.reserved = 0;
 
 			error = sooptcopyout(sopt, &fcinfo, sizeof(fcinfo));
 			break;
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (error);
 } /* ng_btsocket_rfcomm_ctloutput */
 
 /*
  * Detach and destroy socket
  */
 
 void
 ng_btsocket_rfcomm_detach(struct socket *so)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 
 	KASSERT(pcb != NULL, ("ng_btsocket_rfcomm_detach: pcb == NULL"));
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT:
 	case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING:
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTING:
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTED:
 		/* XXX What to do with pending request? */
 		if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)
 			ng_btsocket_rfcomm_untimeout(pcb);
 
 		if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT)
 			pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_DETACHED;
 		else
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING;
 
 		ng_btsocket_rfcomm_task_wakeup();
 		break;
 
 	case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING:
 		ng_btsocket_rfcomm_task_wakeup();
 		break;
 	}
 
 	while (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CLOSED)
 		msleep(&pcb->state, &pcb->pcb_mtx, PZERO, "rf_det", 0);
 
 	if (pcb->session != NULL)
 		panic("%s: pcb->session != NULL\n", __func__);
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)
 		panic("%s: timeout on closed DLC, flags=%#x\n",
 			__func__, pcb->flags);
 
 	mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 	LIST_REMOVE(pcb, next);
 	mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	mtx_destroy(&pcb->pcb_mtx);
 	bzero(pcb, sizeof(*pcb));
 	free(pcb, M_NETGRAPH_BTSOCKET_RFCOMM);
 
 	soisdisconnected(so);
 	so->so_pcb = NULL;
 } /* ng_btsocket_rfcomm_detach */
 
 /*
  * Disconnect socket
  */
 
 int
 ng_btsocket_rfcomm_disconnect(struct socket *so)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		return (EINPROGRESS);
 	}
 
 	/* XXX What to do with pending request? */
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)
 		ng_btsocket_rfcomm_untimeout(pcb);
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING: /* XXX can we get here? */
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: /* XXX can we get here? */
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTED:
 
 		/*
 		 * Just change DLC state and enqueue RFCOMM task. It will
 		 * queue and send DISC on the DLC.
 		 */ 
 
 		pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING;
 		soisdisconnecting(so);
 
 		ng_btsocket_rfcomm_task_wakeup();
 		break;
 
 	case NG_BTSOCKET_RFCOMM_DLC_CLOSED:
 	case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT:
 		break;
 
 	default:
 		panic("%s: Invalid DLC state=%d, flags=%#x\n",
 			__func__, pcb->state, pcb->flags);
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (0);
 } /* ng_btsocket_rfcomm_disconnect */
 
 /*
  * Listen on socket. First call to listen() will create listening RFCOMM session
  */
 
 int
 ng_btsocket_rfcomm_listen(struct socket *so, int backlog, struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_p	 pcb = so2rfcomm_pcb(so), pcb1;
 	ng_btsocket_rfcomm_session_p	 s = NULL;
 	struct socket			*l2so = NULL;
 	int				 error, socreate_error, usedchannels;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (pcb->channel > 30)
 		return (EADDRNOTAVAIL);
 
 	usedchannels = 0;
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->channel == 0) {
 		mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 
 		LIST_FOREACH(pcb1, &ng_btsocket_rfcomm_sockets, next)
 			if (pcb1->channel != 0 &&
 			    bcmp(&pcb1->src, &pcb->src, sizeof(pcb->src)) == 0)
 				usedchannels |= (1 << (pcb1->channel - 1));
 
 		for (pcb->channel = 30; pcb->channel > 0; pcb->channel --)
 			if (!(usedchannels & (1 << (pcb->channel - 1))))
 				break;
 
 		if (pcb->channel == 0) {
 			mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 			mtx_unlock(&pcb->pcb_mtx);
 
 			return (EADDRNOTAVAIL);
 		}
 
 		mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	/*
 	 * Note that we will not check for errors in socreate() because
 	 * if we failed to create L2CAP socket at this point we still
 	 * might have already open session.
 	 */
 
 	socreate_error = socreate(PF_BLUETOOTH, &l2so, SOCK_SEQPACKET,
 			BLUETOOTH_PROTO_L2CAP, td->td_ucred, td);
 
 	/*
 	 * Transition the socket and session into the LISTENING state.  Check
 	 * for collisions first, as there can only be one.
 	 */
 	mtx_lock(&ng_btsocket_rfcomm_sessions_mtx);
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	SOCK_UNLOCK(so);
 	if (error != 0)
 		goto out;
 
 	LIST_FOREACH(s, &ng_btsocket_rfcomm_sessions, next)
 		if (s->state == NG_BTSOCKET_RFCOMM_SESSION_LISTENING)
 			break;
 
 	if (s == NULL) {
 		/*
 		 * We need to create default RFCOMM session. Check if we have 
 		 * L2CAP socket. If l2so == NULL then error has the error code 
 		 * from socreate()
 		 */
 		if (l2so == NULL) {
 			error = socreate_error;
 			goto out;
 		}
 
 		/* 
 		 * Create default listen RFCOMM session. The default RFCOMM 
 		 * session will listen on ANY address.
 		 *
 		 * XXX FIXME Note that currently there is no way to adjust MTU
 		 * for the default session.
 		 */
 		error = ng_btsocket_rfcomm_session_create(&s, l2so,
 					NG_HCI_BDADDR_ANY, NULL, td);
 		if (error != 0)
 			goto out;
 		l2so = NULL;
 	}
 	SOCK_LOCK(so);
 	solisten_proto(so, backlog);
 	SOCK_UNLOCK(so);
 out:
 	mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 	/*
 	 * If we still have an l2so reference here, it's unneeded, so release
 	 * it.
 	 */
 	if (l2so != NULL)
 		soclose(l2so);
 	return (error);
 } /* ng_btsocket_listen */
 
 /*
  * Get peer address
  */
 
 int
 ng_btsocket_rfcomm_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 	struct sockaddr_rfcomm		sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	bcopy(&pcb->dst, &sa.rfcomm_bdaddr, sizeof(sa.rfcomm_bdaddr));
 	sa.rfcomm_channel = pcb->channel;
 	sa.rfcomm_len = sizeof(sa);
 	sa.rfcomm_family = AF_BLUETOOTH;
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_rfcomm_peeraddr */
 
 /*
  * Send data to socket
  */
 
 int
 ng_btsocket_rfcomm_send(struct socket *so, int flags, struct mbuf *m,
 		struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_t	*pcb = so2rfcomm_pcb(so);
 	int				 error = 0;
 
 	/* Check socket and input */
 	if (pcb == NULL || m == NULL || control != NULL) {
 		error = EINVAL;
 		goto drop;
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* Make sure DLC is connected */
 	if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) {
 		mtx_unlock(&pcb->pcb_mtx);
 		error = ENOTCONN;
 		goto drop;
 	}
 
 	/* Put the packet on the socket's send queue and wakeup RFCOMM task */
 	sbappend(&pcb->so->so_snd, m, flags);
 	m = NULL;
 
 	if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_SENDING)) {
 		pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_SENDING;
 		error = ng_btsocket_rfcomm_task_wakeup();
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 drop:
 	NG_FREE_M(m); /* checks for != NULL */
 	NG_FREE_M(control);
 
 	return (error);
 } /* ng_btsocket_rfcomm_send */
 
 /*
  * Get socket address
  */
 
 int
 ng_btsocket_rfcomm_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 	struct sockaddr_rfcomm		sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	bcopy(&pcb->src, &sa.rfcomm_bdaddr, sizeof(sa.rfcomm_bdaddr));
 	sa.rfcomm_channel = pcb->channel;
 	sa.rfcomm_len = sizeof(sa);
 	sa.rfcomm_family = AF_BLUETOOTH;
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_rfcomm_sockaddr */
 
 /*
  * Upcall function for L2CAP sockets. Enqueue RFCOMM task.
  */
 
 static int
 ng_btsocket_rfcomm_upcall(struct socket *so, void *arg, int waitflag)
 {
 	int	error;
 
 	if (so == NULL)
 		panic("%s: so == NULL\n", __func__);
 
 	if ((error = ng_btsocket_rfcomm_task_wakeup()) != 0)
 		NG_BTSOCKET_RFCOMM_ALERT(
 "%s: Could not enqueue RFCOMM task, error=%d\n", __func__, error);
 	return (SU_OK);
 } /* ng_btsocket_rfcomm_upcall */
 
 /*
  * RFCOMM task. Will handle all RFCOMM sessions in one pass.
  * XXX FIXME does not scale very well
  */
 
 static void
 ng_btsocket_rfcomm_sessions_task(void *ctx, int pending)
 {
 	ng_btsocket_rfcomm_session_p	s = NULL, s_next = NULL;
 
 	mtx_lock(&ng_btsocket_rfcomm_sessions_mtx);
 
 	for (s = LIST_FIRST(&ng_btsocket_rfcomm_sessions); s != NULL; ) {
 		mtx_lock(&s->session_mtx);
 		s_next = LIST_NEXT(s, next);
 
 		ng_btsocket_rfcomm_session_task(s);
 
 		if (s->state == NG_BTSOCKET_RFCOMM_SESSION_CLOSED) {
 			/* Unlink and clean the session */
 			LIST_REMOVE(s, next);
 
 			NG_BT_MBUFQ_DRAIN(&s->outq);
 			if (!LIST_EMPTY(&s->dlcs))
 				panic("%s: DLC list is not empty\n", __func__);
 
 			/* Close L2CAP socket */
 			SOCKBUF_LOCK(&s->l2so->so_rcv);
 			soupcall_clear(s->l2so, SO_RCV);
 			SOCKBUF_UNLOCK(&s->l2so->so_rcv);
 			SOCKBUF_LOCK(&s->l2so->so_snd);
 			soupcall_clear(s->l2so, SO_SND);
 			SOCKBUF_UNLOCK(&s->l2so->so_snd);
 			soclose(s->l2so);
 
 			mtx_unlock(&s->session_mtx);
 
 			mtx_destroy(&s->session_mtx);
 			bzero(s, sizeof(*s));
 			free(s, M_NETGRAPH_BTSOCKET_RFCOMM);
 		} else
 			mtx_unlock(&s->session_mtx);
 
 		s = s_next;
 	}
 
 	mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 } /* ng_btsocket_rfcomm_sessions_task */
 
 /*
  * Process RFCOMM session. Will handle all RFCOMM sockets in one pass.
  */
 
 static void
 ng_btsocket_rfcomm_session_task(ng_btsocket_rfcomm_session_p s)
 {
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	if (s->l2so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: L2CAP connection has been terminated, so=%p, so_state=%#x, so_count=%d, " \
 "state=%d, flags=%#x\n", __func__, s->l2so, s->l2so->so_state, 
 			s->l2so->so_count, s->state, s->flags);
 
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 		ng_btsocket_rfcomm_session_clean(s);
 	}
 
 	/* Now process upcall */
 	switch (s->state) {
 	/* Try to accept new L2CAP connection(s) */
 	case NG_BTSOCKET_RFCOMM_SESSION_LISTENING:
 		while (ng_btsocket_rfcomm_session_accept(s) == 0)
 			;
 		break;
 
 	/* Process the results of the L2CAP connect */
 	case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING:
 		ng_btsocket_rfcomm_session_process_pcb(s);
 
 		if (ng_btsocket_rfcomm_session_connect(s) != 0) {
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			ng_btsocket_rfcomm_session_clean(s);
 		} 
 		break;
 
 	/* Try to receive/send more data */
 	case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 	case NG_BTSOCKET_RFCOMM_SESSION_OPEN:
 	case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING:
 		ng_btsocket_rfcomm_session_process_pcb(s);
 
 		if (ng_btsocket_rfcomm_session_receive(s) != 0) {
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			ng_btsocket_rfcomm_session_clean(s);
 		} else if (ng_btsocket_rfcomm_session_send(s) != 0) {
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			ng_btsocket_rfcomm_session_clean(s);
 		}
 		break;
 
 	case NG_BTSOCKET_RFCOMM_SESSION_CLOSED:
 		break;
 
 	default:
 		panic("%s: Invalid session state=%d, flags=%#x\n",
 			__func__, s->state, s->flags);
 		break;
 	}
 } /* ng_btsocket_rfcomm_session_task */
 
 /*
  * Process RFCOMM connection indicator. Caller must hold s->session_mtx
  */
 
 static ng_btsocket_rfcomm_pcb_p
 ng_btsocket_rfcomm_connect_ind(ng_btsocket_rfcomm_session_p s, int channel)
 {
 	ng_btsocket_rfcomm_pcb_p	 pcb = NULL, pcb1 = NULL;
 	ng_btsocket_l2cap_pcb_p		 l2pcb = NULL;
 	struct socket			*so1;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Try to find RFCOMM socket that listens on given source address 
 	 * and channel. This will return the best possible match.
 	 */
 
 	l2pcb = so2l2cap_pcb(s->l2so);
 	pcb = ng_btsocket_rfcomm_pcb_listener(&l2pcb->src, channel);
 	if (pcb == NULL)
 		return (NULL);
 
 	/*
 	 * Check the pending connections queue and if we have space then 
 	 * create new socket and set proper source and destination address,
 	 * and channel.
 	 */
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	CURVNET_SET(pcb->so->so_vnet);
 	so1 = sonewconn(pcb->so, 0);
 	CURVNET_RESTORE();
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	if (so1 == NULL)
 		return (NULL);
 
 	/*
 	 * If we got here than we have created new socket. So complete the 
 	 * connection. Set source and destination address from the session.
 	 */
 
 	pcb1 = so2rfcomm_pcb(so1);
 	if (pcb1 == NULL)
 		panic("%s: pcb1 == NULL\n", __func__);
 
 	mtx_lock(&pcb1->pcb_mtx);
 
 	bcopy(&l2pcb->src, &pcb1->src, sizeof(pcb1->src));
 	bcopy(&l2pcb->dst, &pcb1->dst, sizeof(pcb1->dst));
 	pcb1->channel = channel;
 
 	/* Link new DLC to the session. We already hold s->session_mtx */
 	LIST_INSERT_HEAD(&s->dlcs, pcb1, session_next);
 	pcb1->session = s;
 			
 	mtx_unlock(&pcb1->pcb_mtx);
 
 	return (pcb1);
 } /* ng_btsocket_rfcomm_connect_ind */
 
 /*
  * Process RFCOMM connect confirmation. Caller must hold s->session_mtx.
  */
 
 static void
 ng_btsocket_rfcomm_connect_cfm(ng_btsocket_rfcomm_session_p s)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL, pcb_next = NULL;
 	int				error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Wake up all waiting sockets and send PN request for each of them. 
 	 * Note that timeout already been set in ng_btsocket_rfcomm_connect()
 	 *
 	 * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill
 	 * will unlink DLC from the session
 	 */
 
 	for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, session_next);
 
 		if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT) {
 			pcb->mtu = s->mtu;
 			bcopy(&so2l2cap_pcb(s->l2so)->src, &pcb->src,
 				sizeof(pcb->src));
 
 			error = ng_btsocket_rfcomm_send_pn(pcb);
 			if (error == 0)
 				pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONFIGURING;
 			else
 				ng_btsocket_rfcomm_pcb_kill(pcb, error);
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 } /* ng_btsocket_rfcomm_connect_cfm */
 
 /*****************************************************************************
  *****************************************************************************
  **                              RFCOMM sessions
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Create new RFCOMM session. That function WILL NOT take ownership over l2so.
  * Caller MUST free l2so if function failed.
  */
 
 static int
 ng_btsocket_rfcomm_session_create(ng_btsocket_rfcomm_session_p *sp,
 		struct socket *l2so, bdaddr_p src, bdaddr_p dst,
 		struct thread *td)
 {
 	ng_btsocket_rfcomm_session_p	s = NULL;
 	struct sockaddr_l2cap		l2sa;
 	struct sockopt			l2sopt;
 	int				error;
 	u_int16_t			mtu;
 
 	mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
 
 	/* Allocate the RFCOMM session */
         s = malloc(sizeof(*s),
 		M_NETGRAPH_BTSOCKET_RFCOMM, M_NOWAIT | M_ZERO);
         if (s == NULL)
                 return (ENOMEM);
 
 	/* Set defaults */
 	s->mtu = RFCOMM_DEFAULT_MTU;
 	s->flags = 0;
 	s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 	NG_BT_MBUFQ_INIT(&s->outq, ifqmaxlen);
 
 	/*
 	 * XXX Mark session mutex as DUPOK to prevent "duplicated lock of 
 	 * the same type" message. When accepting new L2CAP connection
 	 * ng_btsocket_rfcomm_session_accept() holds both session mutexes 
 	 * for "old" (accepting) session and "new" (created) session.
 	 */
 
 	mtx_init(&s->session_mtx, "btsocks_rfcomm_session_mtx", NULL,
 		MTX_DEF|MTX_DUPOK);
 
 	LIST_INIT(&s->dlcs);
 
 	/* Prepare L2CAP socket */
 	SOCKBUF_LOCK(&l2so->so_rcv);
 	soupcall_set(l2so, SO_RCV, ng_btsocket_rfcomm_upcall, NULL);
 	SOCKBUF_UNLOCK(&l2so->so_rcv);
 	SOCKBUF_LOCK(&l2so->so_snd);
 	soupcall_set(l2so, SO_SND, ng_btsocket_rfcomm_upcall, NULL);
 	SOCKBUF_UNLOCK(&l2so->so_snd);
 	l2so->so_state |= SS_NBIO;
 	s->l2so = l2so;
 
 	mtx_lock(&s->session_mtx);
 
 	/*
 	 * "src" == NULL and "dst" == NULL means just create session.
 	 * caller must do the rest
 	 */
 
 	if (src == NULL && dst == NULL)
 		goto done;
 
 	/*
 	 * Set incoming MTU on L2CAP socket. It is RFCOMM session default MTU 
 	 * plus 5 bytes: RFCOMM frame header, one extra byte for length and one
 	 * extra byte for credits.
 	 */
 
 	mtu = s->mtu + sizeof(struct rfcomm_frame_hdr) + 1 + 1;
 
 	l2sopt.sopt_dir = SOPT_SET;
 	l2sopt.sopt_level = SOL_L2CAP;
 	l2sopt.sopt_name = SO_L2CAP_IMTU;
 	l2sopt.sopt_val = (void *) &mtu;
 	l2sopt.sopt_valsize = sizeof(mtu);
 	l2sopt.sopt_td = NULL;
 
 	error = sosetopt(s->l2so, &l2sopt);
 	if (error != 0)
 		goto bad;
 
 	/* Bind socket to "src" address */
 	l2sa.l2cap_len = sizeof(l2sa);
 	l2sa.l2cap_family = AF_BLUETOOTH;
 	l2sa.l2cap_psm = (dst == NULL)? htole16(NG_L2CAP_PSM_RFCOMM) : 0;
 	bcopy(src, &l2sa.l2cap_bdaddr, sizeof(l2sa.l2cap_bdaddr));
 	l2sa.l2cap_cid = 0;
 	l2sa.l2cap_bdaddr_type = BDADDR_BREDR;
 
 	error = sobind(s->l2so, (struct sockaddr *) &l2sa, td);
 	if (error != 0)
 		goto bad;
 
 	/* If "dst" is not NULL then initiate connect(), otherwise listen() */
 	if (dst == NULL) {
 		s->flags = 0;
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_LISTENING;
 
 		error = solisten(s->l2so, 10, td);
 		if (error != 0)
 			goto bad;
 	} else {
 		s->flags = NG_BTSOCKET_RFCOMM_SESSION_INITIATOR;
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTING;
 
 		l2sa.l2cap_len = sizeof(l2sa);   
 		l2sa.l2cap_family = AF_BLUETOOTH;
 		l2sa.l2cap_psm = htole16(NG_L2CAP_PSM_RFCOMM);
 	        bcopy(dst, &l2sa.l2cap_bdaddr, sizeof(l2sa.l2cap_bdaddr));
 		l2sa.l2cap_cid = 0;
 		l2sa.l2cap_bdaddr_type = BDADDR_BREDR;
 
 		error = soconnect(s->l2so, (struct sockaddr *) &l2sa, td);
 		if (error != 0)
 			goto bad;
 	}
 
 done:
 	LIST_INSERT_HEAD(&ng_btsocket_rfcomm_sessions, s, next);
 	*sp = s;
 
 	mtx_unlock(&s->session_mtx);
 
 	return (0);
 
 bad:
 	mtx_unlock(&s->session_mtx);
 
 	/* Return L2CAP socket back to its original state */
 	SOCKBUF_LOCK(&l2so->so_rcv);
 	soupcall_clear(s->l2so, SO_RCV);
 	SOCKBUF_UNLOCK(&l2so->so_rcv);
 	SOCKBUF_LOCK(&l2so->so_snd);
 	soupcall_clear(s->l2so, SO_SND);
 	SOCKBUF_UNLOCK(&l2so->so_snd);
 	l2so->so_state &= ~SS_NBIO;
 
 	mtx_destroy(&s->session_mtx);
 	bzero(s, sizeof(*s));
 	free(s, M_NETGRAPH_BTSOCKET_RFCOMM);
 
 	return (error);
 } /* ng_btsocket_rfcomm_session_create */
 
 /*
  * Process accept() on RFCOMM session
  * XXX FIXME locking for "l2so"?
  */
 
 static int
 ng_btsocket_rfcomm_session_accept(ng_btsocket_rfcomm_session_p s0)
 {
 	struct socket			*l2so;
 	struct sockaddr_l2cap		*l2sa = NULL;
 	ng_btsocket_l2cap_pcb_t		*l2pcb = NULL;
 	ng_btsocket_rfcomm_session_p	 s = NULL;
 	int				 error;
 
 	mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
 	mtx_assert(&s0->session_mtx, MA_OWNED);
 
 	SOLISTEN_LOCK(s0->l2so);
 	error = solisten_dequeue(s0->l2so, &l2so, 0);
 	if (error == EWOULDBLOCK)
 		return (error);
 	if (error) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not accept connection on L2CAP socket, error=%d\n", __func__, error);
 		return (error);
 	}
 
 	error = soaccept(l2so, (struct sockaddr **) &l2sa);
 	if (error != 0) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: soaccept() on L2CAP socket failed, error=%d\n", __func__, error);
 		soclose(l2so);
 
 		return (error);
 	}
 
 	/*
 	 * Check if there is already active RFCOMM session between two devices.
 	 * If so then close L2CAP connection. We only support one RFCOMM session
 	 * between each pair of devices. Note that here we assume session in any
 	 * state. The session even could be in the middle of disconnecting.
 	 */
 
 	l2pcb = so2l2cap_pcb(l2so);
 	s = ng_btsocket_rfcomm_session_by_addr(&l2pcb->src, &l2pcb->dst);
 	if (s == NULL) {
 		/* Create a new RFCOMM session */
 		error = ng_btsocket_rfcomm_session_create(&s, l2so, NULL, NULL,
 				curthread /* XXX */);
 		if (error == 0) {
 			mtx_lock(&s->session_mtx);
 
 			s->flags = 0;
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTED;
 
 			/*
 			 * Adjust MTU on incoming connection. Reserve 5 bytes:
 			 * RFCOMM frame header, one extra byte for length and 
 			 * one extra byte for credits.
 			 */
 
 			s->mtu = min(l2pcb->imtu, l2pcb->omtu) -
 					sizeof(struct rfcomm_frame_hdr) - 1 - 1;
 
 			mtx_unlock(&s->session_mtx);
 		} else {
 			NG_BTSOCKET_RFCOMM_ALERT(
 "%s: Failed to create new RFCOMM session, error=%d\n", __func__, error);
 
 			soclose(l2so);
 		}
 	} else {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Rejecting duplicating RFCOMM session between src=%x:%x:%x:%x:%x:%x and " \
 "dst=%x:%x:%x:%x:%x:%x, state=%d, flags=%#x\n",	__func__,
 			l2pcb->src.b[5], l2pcb->src.b[4], l2pcb->src.b[3],
 			l2pcb->src.b[2], l2pcb->src.b[1], l2pcb->src.b[0],
 			l2pcb->dst.b[5], l2pcb->dst.b[4], l2pcb->dst.b[3],
 			l2pcb->dst.b[2], l2pcb->dst.b[1], l2pcb->dst.b[0],
 			s->state, s->flags);
 
 		error = EBUSY;
 		soclose(l2so);
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_session_accept */
 
 /*
  * Process connect() on RFCOMM session
  * XXX FIXME locking for "l2so"?
  */
 
 static int
 ng_btsocket_rfcomm_session_connect(ng_btsocket_rfcomm_session_p s)
 {
 	ng_btsocket_l2cap_pcb_p	l2pcb = so2l2cap_pcb(s->l2so);
 	int			error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/* First check if connection has failed */
 	if ((error = s->l2so->so_error) != 0) {
 		s->l2so->so_error = 0;
 
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not connect RFCOMM session, error=%d, state=%d, flags=%#x\n",
 			__func__, error, s->state, s->flags);
 
 		return (error);
 	}
 
 	/* Is connection still in progress? */
 	if (s->l2so->so_state & SS_ISCONNECTING)
 		return (0); 
 
 	/* 
 	 * If we got here then we are connected. Send SABM on DLCI 0 to 
 	 * open multiplexor channel.
 	 */
 
 	if (error == 0) {
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTED;
 
 		/*
 		 * Adjust MTU on outgoing connection. Reserve 5 bytes: RFCOMM 
 		 * frame header, one extra byte for length and one extra byte 
 		 * for credits.
 		 */
 
 		s->mtu = min(l2pcb->imtu, l2pcb->omtu) -
 				sizeof(struct rfcomm_frame_hdr) - 1 - 1;
 
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_SABM,0);
 		if (error == 0)
 			error = ng_btsocket_rfcomm_task_wakeup();
 	}
 
 	return (error);
 }/* ng_btsocket_rfcomm_session_connect */
 
 /*
  * Receive data on RFCOMM session
  * XXX FIXME locking for "l2so"?
  */
 
 static int
 ng_btsocket_rfcomm_session_receive(ng_btsocket_rfcomm_session_p s)
 {
 	struct mbuf	*m = NULL;
 	struct uio	 uio;
 	int		 more, flags, error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/* Can we read from the L2CAP socket? */
 	if (!soreadable(s->l2so))
 		return (0);
 
 	/* First check for error on L2CAP socket */
 	if ((error = s->l2so->so_error) != 0) {
 		s->l2so->so_error = 0;
 
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not receive data from L2CAP socket, error=%d, state=%d, flags=%#x\n",
 			__func__, error, s->state, s->flags);
 
 		return (error);
 	}
 
 	/*
 	 * Read all packets from the L2CAP socket. 
 	 * XXX FIXME/VERIFY is that correct? For now use m->m_nextpkt as
 	 * indication that there is more packets on the socket's buffer.
 	 * Also what should we use in uio.uio_resid?
 	 * May be s->mtu + sizeof(struct rfcomm_frame_hdr) + 1 + 1?
 	 */
 
 	for (more = 1; more; ) {
 		/* Try to get next packet from socket */
 		bzero(&uio, sizeof(uio));
 /*		uio.uio_td = NULL; */
 		uio.uio_resid = 1000000000;
 		flags = MSG_DONTWAIT;
 
 		m = NULL;
 		error = soreceive(s->l2so, NULL, &uio, &m,
 		    (struct mbuf **) NULL, &flags);
 		if (error != 0) {
 			if (error == EWOULDBLOCK)
 				return (0); /* XXX can happen? */
 
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not receive data from L2CAP socket, error=%d\n", __func__, error);
 
 			return (error);
 		}
 
 		more = (m->m_nextpkt != NULL);
 		m->m_nextpkt = NULL;
 
 		ng_btsocket_rfcomm_receive_frame(s, m);
 	}
 
 	return (0);
 } /* ng_btsocket_rfcomm_session_receive */
 
 /*
  * Send data on RFCOMM session
  * XXX FIXME locking for "l2so"?
  */
 
 static int
 ng_btsocket_rfcomm_session_send(ng_btsocket_rfcomm_session_p s)
 {
 	struct mbuf	*m = NULL;
 	int		 error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/* Send as much as we can from the session queue */
 	while (sowriteable(s->l2so)) {
 		/* Check if socket still OK */
 		if ((error = s->l2so->so_error) != 0) {
 			s->l2so->so_error = 0;
 
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Detected error=%d on L2CAP socket, state=%d, flags=%#x\n",
 				__func__, error, s->state, s->flags);
 
 			return (error);
 		}
 
 		NG_BT_MBUFQ_DEQUEUE(&s->outq, m);
 		if (m == NULL)
 			return (0); /* we are done */
 
 		/* Call send function on the L2CAP socket */
 		error = (*s->l2so->so_proto->pr_usrreqs->pru_send)(s->l2so,
 				0, m, NULL, NULL, curthread /* XXX */);
 		if (error != 0) {
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not send data to L2CAP socket, error=%d\n", __func__, error);
 
 			return (error);
 		}
 	}
 
 	return (0);
 } /* ng_btsocket_rfcomm_session_send */
 
 /*
  * Close and disconnect all DLCs for the given session. Caller must hold 
  * s->sesson_mtx. Will wakeup session.
  */
 
 static void
 ng_btsocket_rfcomm_session_clean(ng_btsocket_rfcomm_session_p s)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL, pcb_next = NULL;
 	int				error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill
 	 * will unlink DLC from the session
 	 */
 
 	for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, session_next);
 
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Disconnecting dlci=%d, state=%d, flags=%#x\n",
 			__func__, pcb->dlci, pcb->state, pcb->flags);
 
 		if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED)
 			error = ECONNRESET;
 		else
 			error = ECONNREFUSED;
 
 		ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 } /* ng_btsocket_rfcomm_session_clean */
 
 /*
  * Process all DLCs on the session. Caller MUST hold s->session_mtx.
  */
 
 static void
 ng_btsocket_rfcomm_session_process_pcb(ng_btsocket_rfcomm_session_p s)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL, pcb_next = NULL;
 	int				error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill
 	 * will unlink DLC from the session
 	 */
 
 	for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, session_next);
 
 		switch (pcb->state) {
 		/*
 		 * If DLC in W4_CONNECT state then we should check for both
 		 * timeout and detach.
 		 */
 
 		case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT:
 			if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_DETACHED)
 				ng_btsocket_rfcomm_pcb_kill(pcb, 0);
 			else if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT)
 				ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT);
 			break;
 
 		/*
 		 * If DLC in CONFIGURING or CONNECTING state then we only
 		 * should check for timeout. If detach() was called then
 		 * DLC will be moved into DISCONNECTING state.
 		 */
 
 		case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING:
 		case NG_BTSOCKET_RFCOMM_DLC_CONNECTING:
 			if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT)
 				ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT);
 			break;
 
 		/*
 		 * If DLC in CONNECTED state then we need to send data (if any)
 		 * from the socket's send queue. Note that we will send data
 		 * from either all sockets or none. This may overload session's
 		 * outgoing queue (but we do not check for that).
 		 *
  		 * XXX FIXME need scheduler for RFCOMM sockets
 		 */
 
 		case NG_BTSOCKET_RFCOMM_DLC_CONNECTED:
 			error = ng_btsocket_rfcomm_pcb_send(pcb, ALOT);
 			if (error != 0)
 				ng_btsocket_rfcomm_pcb_kill(pcb, error);
 			break;
 
 		/*
 		 * If DLC in DISCONNECTING state then we must send DISC frame.
 		 * Note that if DLC has timeout set then we do not need to 
 		 * resend DISC frame.
 		 *
 		 * XXX FIXME need to drain all data from the socket's queue
 		 * if LINGER option was set
 		 */
 
 		case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING:
 			if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)) {
 				error = ng_btsocket_rfcomm_send_command(
 						pcb->session, RFCOMM_FRAME_DISC,
 						pcb->dlci);
 				if (error == 0)
 					ng_btsocket_rfcomm_timeout(pcb);
 				else
 					ng_btsocket_rfcomm_pcb_kill(pcb, error);
 			} else if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT)
 				ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT);
 			break;
 		
 /*		case NG_BTSOCKET_RFCOMM_DLC_CLOSED: */
 		default:
 			panic("%s: Invalid DLC state=%d, flags=%#x\n",
 				__func__, pcb->state, pcb->flags);
 			break;
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 } /* ng_btsocket_rfcomm_session_process_pcb */
 
 /*
  * Find RFCOMM session between "src" and "dst".
  * Caller MUST hold ng_btsocket_rfcomm_sessions_mtx.
  */
 
 static ng_btsocket_rfcomm_session_p
 ng_btsocket_rfcomm_session_by_addr(bdaddr_p src, bdaddr_p dst)
 {
 	ng_btsocket_rfcomm_session_p	s = NULL;
 	ng_btsocket_l2cap_pcb_p		l2pcb = NULL;
 	int				any_src;
 
 	mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
 
 	any_src = (bcmp(src, NG_HCI_BDADDR_ANY, sizeof(*src)) == 0);
 
 	LIST_FOREACH(s, &ng_btsocket_rfcomm_sessions, next) {
 		l2pcb = so2l2cap_pcb(s->l2so);
 
 		if ((any_src || bcmp(&l2pcb->src, src, sizeof(*src)) == 0) &&
 		    bcmp(&l2pcb->dst, dst, sizeof(*dst)) == 0)
 			break;
 	}
 
 	return (s);
 } /* ng_btsocket_rfcomm_session_by_addr */
 
 /*****************************************************************************
  *****************************************************************************
  **                                  RFCOMM 
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Process incoming RFCOMM frame. Caller must hold s->session_mtx.
  * XXX FIXME check frame length
  */
 
 static int
 ng_btsocket_rfcomm_receive_frame(ng_btsocket_rfcomm_session_p s,
 		struct mbuf *m0)
 {
 	struct rfcomm_frame_hdr	*hdr = NULL;
 	struct mbuf		*m = NULL;
 	u_int16_t		 length;
 	u_int8_t		 dlci, type;
 	int			 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/* Pullup as much as we can into first mbuf (for direct access) */
 	length = min(m0->m_pkthdr.len, MHLEN);
 	if (m0->m_len < length) {
 		if ((m0 = m_pullup(m0, length)) == NULL) {
 			NG_BTSOCKET_RFCOMM_ALERT(
 "%s: m_pullup(%d) failed\n", __func__, length);
 
 			return (ENOBUFS);
 		}
 	}
 
 	hdr = mtod(m0, struct rfcomm_frame_hdr *);
 	dlci = RFCOMM_DLCI(hdr->address);
 	type = RFCOMM_TYPE(hdr->control);
 
 	/* Test EA bit in length. If not set then we have 2 bytes of length */
 	if (!RFCOMM_EA(hdr->length)) {
 		bcopy(&hdr->length, &length, sizeof(length));
 		length = le16toh(length) >> 1;
 		m_adj(m0, sizeof(*hdr) + 1);
 	} else {
 		length = hdr->length >> 1;
 		m_adj(m0, sizeof(*hdr));
 	}
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got frame type=%#x, dlci=%d, length=%d, cr=%d, pf=%d, len=%d\n",
 		__func__, type, dlci, length, RFCOMM_CR(hdr->address),
 		RFCOMM_PF(hdr->control), m0->m_pkthdr.len);
 
 	/*
 	 * Get FCS (the last byte in the frame)
 	 * XXX this will not work if mbuf chain ends with empty mbuf.
 	 * XXX let's hope it never happens :)
 	 */
 
 	for (m = m0; m->m_next != NULL; m = m->m_next)
 		;
 	if (m->m_len <= 0)
 		panic("%s: Empty mbuf at the end of the chain, len=%d\n",
 			__func__, m->m_len);
 
 	/*
 	 * Check FCS. We only need to calculate FCS on first 2 or 3 bytes
 	 * and already m_pullup'ed mbuf chain, so it should be safe.
 	 */
 
 	if (ng_btsocket_rfcomm_check_fcs((u_int8_t *) hdr, type, m->m_data[m->m_len - 1])) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Invalid RFCOMM packet. Bad checksum\n", __func__);
 		NG_FREE_M(m0);
 
 		return (EINVAL);
 	}
 
 	m_adj(m0, -1); /* Trim FCS byte */
 
 	/*
 	 * Process RFCOMM frame.
 	 *
 	 * From TS 07.10 spec
 	 * 
 	 * "... In the case where a SABM or DISC command with the P bit set
 	 * to 0 is received then the received frame shall be discarded..."
  	 *
 	 * "... If a unsolicited DM response is received then the frame shall
 	 * be processed irrespective of the P/F setting... "
 	 *
 	 * "... The station may transmit response frames with the F bit set 
 	 * to 0 at any opportunity on an asynchronous basis. However, in the 
 	 * case where a UA response is received with the F bit set to 0 then 
 	 * the received frame shall be discarded..."
 	 *
 	 * From Bluetooth spec
 	 *
 	 * "... When credit based flow control is being used, the meaning of
 	 * the P/F bit in the control field of the RFCOMM header is redefined
 	 * for UIH frames..."
 	 */
 
 	switch (type) {
 	case RFCOMM_FRAME_SABM:
 		if (RFCOMM_PF(hdr->control))
 			error = ng_btsocket_rfcomm_receive_sabm(s, dlci);
 		break;
 
 	case RFCOMM_FRAME_DISC:
 		if (RFCOMM_PF(hdr->control))
 			error = ng_btsocket_rfcomm_receive_disc(s, dlci);
 		break;
 
 	case RFCOMM_FRAME_UA:
 		if (RFCOMM_PF(hdr->control))
 			error = ng_btsocket_rfcomm_receive_ua(s, dlci);
 		break;
 
 	case RFCOMM_FRAME_DM:
 		error = ng_btsocket_rfcomm_receive_dm(s, dlci);
 		break;
 
 	case RFCOMM_FRAME_UIH:
 		if (dlci == 0)
 			error = ng_btsocket_rfcomm_receive_mcc(s, m0);
 		else
 			error = ng_btsocket_rfcomm_receive_uih(s, dlci,
 					RFCOMM_PF(hdr->control), m0);
 
 		return (error);
 		/* NOT REACHED */
 
 	default:
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Invalid RFCOMM packet. Unknown type=%#x\n", __func__, type);
 		error = EINVAL;
 		break;
 	}
 
 	NG_FREE_M(m0);
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_frame */
 
 /*
  * Process RFCOMM SABM frame
  */
 
 static int
 ng_btsocket_rfcomm_receive_sabm(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got SABM, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci);
 
 	/* DLCI == 0 means open multiplexor channel */
 	if (dlci == 0) {
 		switch (s->state) {
 		case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 		case NG_BTSOCKET_RFCOMM_SESSION_OPEN:
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_UA, dlci);
 			if (error == 0) {
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_OPEN;
 				ng_btsocket_rfcomm_connect_cfm(s);
 			} else {
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 				ng_btsocket_rfcomm_session_clean(s);
 			}
 			break;
 
 		default:
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got SABM for session in invalid state state=%d, flags=%#x\n",
 				__func__, s->state, s->flags);
 			error = EINVAL;
 			break;
 		}
 
 		return (error);
 	}
 
 	/* Make sure multiplexor channel is open */
 	if (s->state != NG_BTSOCKET_RFCOMM_SESSION_OPEN) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got SABM for dlci=%d with mulitplexor channel closed, state=%d, " \
 "flags=%#x\n",		__func__, dlci, s->state, s->flags);
 
 		return (EINVAL);
 	}
 
 	/*
 	 * Check if we have this DLCI. This might happen when remote
 	 * peer uses PN command before actual open (SABM) happens.
 	 */
 
 	pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 	if (pcb != NULL) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTING) {
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got SABM for dlci=%d in invalid state=%d, flags=%#x\n",
 				__func__, dlci, pcb->state, pcb->flags);
 			mtx_unlock(&pcb->pcb_mtx);
 
 			return (ENOENT);
 		}
 
 		ng_btsocket_rfcomm_untimeout(pcb);
 
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_UA,dlci);
 		if (error == 0)
 			error = ng_btsocket_rfcomm_send_msc(pcb);
 
 		if (error == 0) {
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED;
 			soisconnected(pcb->so);
 		} else
 			ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 		mtx_unlock(&pcb->pcb_mtx);
 
 		return (error);
 	}
 
 	/*
 	 * We do not have requested DLCI, so it must be an incoming connection
 	 * with default parameters. Try to accept it.
 	 */ 
 
 	pcb = ng_btsocket_rfcomm_connect_ind(s, RFCOMM_SRVCHANNEL(dlci));
 	if (pcb != NULL) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		pcb->dlci = dlci;
 
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_UA,dlci);
 		if (error == 0)
 			error = ng_btsocket_rfcomm_send_msc(pcb);
 
 		if (error == 0) {
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED;
 			soisconnected(pcb->so);
 		} else
 			ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 		mtx_unlock(&pcb->pcb_mtx);
 	} else
 		/* Nobody is listen()ing on the requested DLCI */
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci);
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_sabm */
 
 /*
  * Process RFCOMM DISC frame
  */
 
 static int
 ng_btsocket_rfcomm_receive_disc(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got DISC, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci);
 
 	/* DLCI == 0 means close multiplexor channel */
 	if (dlci == 0) {
 		/* XXX FIXME assume that remote side will close the socket */
 		error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_UA, 0);
 		if (error == 0) {
 			if (s->state == NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING)
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; /* XXX */
 			else
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING;
 		} else
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; /* XXX */
 
 		ng_btsocket_rfcomm_session_clean(s);
 	} else {
 		pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 		if (pcb != NULL) {
 			int	err;
 
 			mtx_lock(&pcb->pcb_mtx);
 
 			NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got DISC for dlci=%d, state=%d, flags=%#x\n",
 				__func__, dlci, pcb->state, pcb->flags);
 
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_UA, dlci);
 
 			if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED)
 				err = 0;
 			else
 				err = ECONNREFUSED;
 
 			ng_btsocket_rfcomm_pcb_kill(pcb, err);
 
 			mtx_unlock(&pcb->pcb_mtx);
 		} else {
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got DISC for non-existing dlci=%d\n", __func__, dlci);
 
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_DM, dlci);
 		}
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_disc */
 
 /*
  * Process RFCOMM UA frame
  */
 
 static int
 ng_btsocket_rfcomm_receive_ua(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got UA, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci);
 
 	/* dlci == 0 means multiplexor channel */
 	if (dlci == 0) {
 		switch (s->state) {
 		case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_OPEN;
 			ng_btsocket_rfcomm_connect_cfm(s);
 			break;
 
 		case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING:
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			ng_btsocket_rfcomm_session_clean(s);
 			break;
 
 		default:
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UA for session in invalid state=%d(%d), flags=%#x, mtu=%d\n",
 				__func__, s->state, INITIATOR(s), s->flags,
 				s->mtu);
 			error = ENOENT;
 			break;
 		}
 
 		return (error);
 	}
 
 	/* Check if we have this DLCI */
 	pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 	if (pcb != NULL) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got UA for dlci=%d, state=%d, flags=%#x\n",
 			__func__, dlci, pcb->state, pcb->flags);
 
 		switch (pcb->state) {
 		case NG_BTSOCKET_RFCOMM_DLC_CONNECTING:
 			ng_btsocket_rfcomm_untimeout(pcb);
 
 			error = ng_btsocket_rfcomm_send_msc(pcb);
 			if (error == 0) {
 				pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED;
 				soisconnected(pcb->so);
 			}
 			break;
 
 		case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING:
 			ng_btsocket_rfcomm_pcb_kill(pcb, 0);
 			break;
 
 		default:
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UA for dlci=%d in invalid state=%d, flags=%#x\n",
 				__func__, dlci, pcb->state, pcb->flags);
 			error = ENOENT;
 			break;
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 	} else {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UA for non-existing dlci=%d\n", __func__, dlci);
 
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci);
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_ua */
 
 /*
  * Process RFCOMM DM frame
  */
 
 static int
 ng_btsocket_rfcomm_receive_dm(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got DM, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci);
 
 	/* DLCI == 0 means multiplexor channel */
 	if (dlci == 0) {
 		/* Disconnect all dlc's on the session */
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 		ng_btsocket_rfcomm_session_clean(s);
 	} else {
 		pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 		if (pcb != NULL) {
 			mtx_lock(&pcb->pcb_mtx);
 
 			NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got DM for dlci=%d, state=%d, flags=%#x\n",
 				__func__, dlci, pcb->state, pcb->flags);
 
 			if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED)
 				error = ECONNRESET;
 			else
 				error = ECONNREFUSED;
 
 			ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 			mtx_unlock(&pcb->pcb_mtx);
 		} else
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got DM for non-existing dlci=%d\n", __func__, dlci);
 	}
 
 	return (0);
 } /* ng_btsocket_rfcomm_receive_dm */
 
 /*
  * Process RFCOMM UIH frame (data)
  */
 
 static int
 ng_btsocket_rfcomm_receive_uih(ng_btsocket_rfcomm_session_p s, int dlci,
 		int pf, struct mbuf *m0)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got UIH, session state=%d, flags=%#x, mtu=%d, dlci=%d, pf=%d, len=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci, pf,
 		m0->m_pkthdr.len);
 
 	/* XXX should we do it here? Check for session flow control */
 	if (s->flags & NG_BTSOCKET_RFCOMM_SESSION_LFC) {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UIH with session flow control asserted, state=%d, flags=%#x\n",
 			__func__, s->state, s->flags);
 		goto drop;
 	}
 
 	/* Check if we have this dlci */
 	pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 	if (pcb == NULL) {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UIH for non-existing dlci=%d\n", __func__, dlci);
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci);
 		goto drop;
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* Check dlci state */	
 	if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UIH for dlci=%d in invalid state=%d, flags=%#x\n",
 			__func__, dlci, pcb->state, pcb->flags);
 		error = EINVAL;
 		goto drop1;
 	}
 
 	/* Check dlci flow control */
 	if (((pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) && pcb->rx_cred <= 0) ||
 	     (pcb->lmodem & RFCOMM_MODEM_FC)) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got UIH for dlci=%d with asserted flow control, state=%d, " \
 "flags=%#x, rx_cred=%d, lmodem=%#x\n",
 			__func__, dlci, pcb->state, pcb->flags,
 			pcb->rx_cred, pcb->lmodem);
 		goto drop1;
 	}
 
 	/* Did we get any credits? */
 	if ((pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) && pf) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got %d more credits for dlci=%d, state=%d, flags=%#x, " \
 "rx_cred=%d, tx_cred=%d\n",
 			__func__, *mtod(m0, u_int8_t *), dlci, pcb->state, 
 			pcb->flags, pcb->rx_cred, pcb->tx_cred);
 
 		pcb->tx_cred += *mtod(m0, u_int8_t *);
 		m_adj(m0, 1);
 
 		/* Send more from the DLC. XXX check for errors? */
 		ng_btsocket_rfcomm_pcb_send(pcb, ALOT);
 	} 
 
 	/* OK the of the rest of the mbuf is the data */
 	if (m0->m_pkthdr.len > 0) {
 		/* If we are using credit flow control decrease rx_cred here */
 		if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) {
 			/* Give remote peer more credits (if needed) */
 			if (-- pcb->rx_cred <= RFCOMM_MAX_CREDITS / 2)
 				ng_btsocket_rfcomm_send_credits(pcb);
 			else
 				NG_BTSOCKET_RFCOMM_INFO(
 "%s: Remote side still has credits, dlci=%d, state=%d, flags=%#x, " \
 "rx_cred=%d, tx_cred=%d\n",		__func__, dlci, pcb->state, pcb->flags,
 					pcb->rx_cred, pcb->tx_cred);
 		}
 		
 		/* Check packet against mtu on dlci */
 		if (m0->m_pkthdr.len > pcb->mtu) {
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got oversized UIH for dlci=%d, state=%d, flags=%#x, mtu=%d, len=%d\n",
 				__func__, dlci, pcb->state, pcb->flags,
 				pcb->mtu, m0->m_pkthdr.len);
 
 			error = EMSGSIZE;
 		} else if (m0->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) {
 			/*
 			 * This is really bad. Receive queue on socket does
 			 * not have enough space for the packet. We do not
 			 * have any other choice but drop the packet. 
 			 */
 
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Not enough space in socket receive queue. Dropping UIH for dlci=%d, " \
 "state=%d, flags=%#x, len=%d, space=%ld\n",
 				__func__, dlci, pcb->state, pcb->flags,
 				m0->m_pkthdr.len, sbspace(&pcb->so->so_rcv));
 
 			error = ENOBUFS;
 		} else {
 			/* Append packet to the socket receive queue */
 			sbappend(&pcb->so->so_rcv, m0, 0);
 			m0 = NULL;
 
 			sorwakeup(pcb->so);
 		}
 	}
 drop1:
 	mtx_unlock(&pcb->pcb_mtx);
 drop:
 	NG_FREE_M(m0); /* checks for != NULL */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_uih */
 
 /*
  * Process RFCOMM MCC command (Multiplexor)
  * 
  * From TS 07.10 spec
  *
  * "5.4.3.1 Information Data
  * 
  *  ...The frames (UIH) sent by the initiating station have the C/R bit set 
  *  to 1 and those sent by the responding station have the C/R bit set to 0..."
  *
  * "5.4.6.2 Operating procedures
  *
  *  Messages always exist in pairs; a command message and a corresponding 
  *  response message. If the C/R bit is set to 1 the message is a command, 
  *  if it is set to 0 the message is a response...
  *
  *  ...
  * 
  *  NOTE: Notice that when UIH frames are used to convey information on DLCI 0
  *  there are at least two different fields that contain a C/R bit, and the 
  *  bits are set of different form. The C/R bit in the Type field shall be set
  *  as it is stated above, while the C/R bit in the Address field (see subclause
  *  5.2.1.2) shall be set as it is described in subclause 5.4.3.1."
  */
 
 static int
 ng_btsocket_rfcomm_receive_mcc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = NULL;
 	u_int8_t		 cr, type, length;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * We can access data directly in the first mbuf, because we have
 	 * m_pullup()'ed mbuf chain in ng_btsocket_rfcomm_receive_frame().
 	 * All MCC commands should fit into single mbuf (except probably TEST).
 	 */
 
 	hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	cr = RFCOMM_CR(hdr->type);
 	type = RFCOMM_MCC_TYPE(hdr->type);
 	length = RFCOMM_MCC_LENGTH(hdr->length);
 
 	/* Check MCC frame length */
 	if (sizeof(*hdr) + length != m0->m_pkthdr.len) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Invalid MCC frame length=%d, len=%d\n",
 			__func__, length, m0->m_pkthdr.len);
 		NG_FREE_M(m0);
 
 		return (EMSGSIZE);
 	}
 
 	switch (type) {
 	case RFCOMM_MCC_TEST:
 		return (ng_btsocket_rfcomm_receive_test(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_FCON:
 	case RFCOMM_MCC_FCOFF:
 		return (ng_btsocket_rfcomm_receive_fc(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_MSC:
 		return (ng_btsocket_rfcomm_receive_msc(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_RPN:
 		return (ng_btsocket_rfcomm_receive_rpn(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_RLS:
 		return (ng_btsocket_rfcomm_receive_rls(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_PN:
 		return (ng_btsocket_rfcomm_receive_pn(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_NSC:
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got MCC NSC, type=%#x, cr=%d, length=%d, session state=%d, flags=%#x, " \
 "mtu=%d, len=%d\n",	__func__, RFCOMM_MCC_TYPE(*((u_int8_t *)(hdr + 1))), cr,
 			 length, s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 		NG_FREE_M(m0);
 		break;
 
 	default:
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got unknown MCC, type=%#x, cr=%d, length=%d, session state=%d, " \
 "flags=%#x, mtu=%d, len=%d\n",
 			__func__, type, cr, length, s->state, s->flags,
 			s->mtu, m0->m_pkthdr.len);
 
 		/* Reuse mbuf to send NSC */
 		hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 		m0->m_pkthdr.len = m0->m_len = sizeof(*hdr);
 
 		/* Create MCC NSC header */
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_NSC);
 		hdr->length = RFCOMM_MKLEN8(1);
 
 		/* Put back MCC command type we did not like */
 		m0->m_data[m0->m_len] = RFCOMM_MKMCC_TYPE(cr, type);
 		m0->m_pkthdr.len ++;
 		m0->m_len ++;
 
 		/* Send UIH frame */
 		return (ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0));
 		/* NOT REACHED */
 	}
 
 	return (0);
 } /* ng_btsocket_rfcomm_receive_mcc */
 
 /*
  * Receive RFCOMM TEST MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_test(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	int			 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC TEST, cr=%d, length=%d, session state=%d, flags=%#x, mtu=%d, " \
 "len=%d\n",	__func__, RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length),
 		s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_TEST);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 	} else
 		NG_FREE_M(m0); /* XXX ignore response */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_test */
 
 /*
  * Receive RFCOMM FCON/FCOFF MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_fc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	u_int8_t		 type = RFCOMM_MCC_TYPE(hdr->type);
 	int			 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Turn ON/OFF aggregate flow on the entire session. When remote peer 
 	 * asserted flow control no transmission shall occur except on dlci 0
 	 * (control channel).
 	 */
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC FC%s, cr=%d, length=%d, session state=%d, flags=%#x, mtu=%d, " \
 "len=%d\n",	__func__, (type == RFCOMM_MCC_FCON)? "ON" : "OFF",
 		RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length),
 		s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		if (type == RFCOMM_MCC_FCON)
 			s->flags &= ~NG_BTSOCKET_RFCOMM_SESSION_RFC;
 		else
 			s->flags |= NG_BTSOCKET_RFCOMM_SESSION_RFC;
 
 		hdr->type = RFCOMM_MKMCC_TYPE(0, type);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 	} else
 		NG_FREE_M(m0); /* XXX ignore response */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_fc  */
 
 /*
  * Receive RFCOMM MSC MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_msc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr		*hdr = mtod(m0, struct rfcomm_mcc_hdr*);
 	struct rfcomm_mcc_msc		*msc = (struct rfcomm_mcc_msc *)(hdr+1);
 	ng_btsocket_rfcomm_pcb_t	*pcb = NULL;
 	int				 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC MSC, dlci=%d, cr=%d, length=%d, session state=%d, flags=%#x, " \
 "mtu=%d, len=%d\n",
 		__func__,  RFCOMM_DLCI(msc->address), RFCOMM_CR(hdr->type),
 		RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags,
 		s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, RFCOMM_DLCI(msc->address));
 		if (pcb == NULL) {
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got MSC command for non-existing dlci=%d\n",
 				__func__, RFCOMM_DLCI(msc->address));
 			NG_FREE_M(m0);
 
 			return (ENOENT);
 		}
 
 		mtx_lock(&pcb->pcb_mtx);
 
 		if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTING &&
 		    pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) {
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got MSC on dlci=%d in invalid state=%d\n",
 				__func__, RFCOMM_DLCI(msc->address),
 				pcb->state);
 
 			mtx_unlock(&pcb->pcb_mtx);
 			NG_FREE_M(m0);
 
 			return (EINVAL);
 		}
 
 		pcb->rmodem = msc->modem; /* Update remote port signals */
 
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_MSC);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 
 #if 0 /* YYY */
 		/* Send more data from DLC. XXX check for errors? */
 		if (!(pcb->rmodem & RFCOMM_MODEM_FC) &&
 		    !(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC))
 			ng_btsocket_rfcomm_pcb_send(pcb, ALOT);
 #endif /* YYY */
 
 		mtx_unlock(&pcb->pcb_mtx);
 	} else
 		NG_FREE_M(m0); /* XXX ignore response */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_msc */
 
 /*
  * Receive RFCOMM RPN MCC command
  * XXX FIXME do we need htole16/le16toh for RPN param_mask?
  */
 
 static int
 ng_btsocket_rfcomm_receive_rpn(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	struct rfcomm_mcc_rpn	*rpn = (struct rfcomm_mcc_rpn *)(hdr + 1);
 	int			 error = 0;
 	u_int16_t		 param_mask;
 	u_int8_t		 bit_rate, data_bits, stop_bits, parity,
 				 flow_control, xon_char, xoff_char;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC RPN, dlci=%d, cr=%d, length=%d, session state=%d, flags=%#x, " \
 "mtu=%d, len=%d\n",
 		__func__, RFCOMM_DLCI(rpn->dlci), RFCOMM_CR(hdr->type),
 		RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags,
 		s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		param_mask = RFCOMM_RPN_PM_ALL;
 
 		if (RFCOMM_MCC_LENGTH(hdr->length) == 1) {
 			/* Request - return default setting */
 			bit_rate = RFCOMM_RPN_BR_115200;
 			data_bits = RFCOMM_RPN_DATA_8;
 			stop_bits = RFCOMM_RPN_STOP_1;
 			parity = RFCOMM_RPN_PARITY_NONE;
 			flow_control = RFCOMM_RPN_FLOW_NONE;
 			xon_char = RFCOMM_RPN_XON_CHAR;
 			xoff_char = RFCOMM_RPN_XOFF_CHAR;
                 } else {
 			/*
 			 * Ignore/accept bit_rate, 8 bits, 1 stop bit, no 
 			 * parity, no flow control lines, default XON/XOFF 
 			 * chars.
 			 */
 
 			bit_rate = rpn->bit_rate;
 			rpn->param_mask = le16toh(rpn->param_mask); /* XXX */
 
 			data_bits = RFCOMM_RPN_DATA_BITS(rpn->line_settings);
 			if (rpn->param_mask & RFCOMM_RPN_PM_DATA &&
 			    data_bits != RFCOMM_RPN_DATA_8) {
 				data_bits = RFCOMM_RPN_DATA_8;
 				param_mask ^= RFCOMM_RPN_PM_DATA;
 			}
 
 			stop_bits = RFCOMM_RPN_STOP_BITS(rpn->line_settings);
 			if (rpn->param_mask & RFCOMM_RPN_PM_STOP &&
 			    stop_bits != RFCOMM_RPN_STOP_1) {
 				stop_bits = RFCOMM_RPN_STOP_1;
 				param_mask ^= RFCOMM_RPN_PM_STOP;
 			}
 
 			parity = RFCOMM_RPN_PARITY(rpn->line_settings);
 			if (rpn->param_mask & RFCOMM_RPN_PM_PARITY &&
 			    parity != RFCOMM_RPN_PARITY_NONE) {
 				parity = RFCOMM_RPN_PARITY_NONE;
 				param_mask ^= RFCOMM_RPN_PM_PARITY;
 			}
 
 			flow_control = rpn->flow_control;
 			if (rpn->param_mask & RFCOMM_RPN_PM_FLOW &&
 			    flow_control != RFCOMM_RPN_FLOW_NONE) {
 				flow_control = RFCOMM_RPN_FLOW_NONE;
 				param_mask ^= RFCOMM_RPN_PM_FLOW;
 			}
 
 			xon_char = rpn->xon_char;
 			if (rpn->param_mask & RFCOMM_RPN_PM_XON &&
 			    xon_char != RFCOMM_RPN_XON_CHAR) {
 				xon_char = RFCOMM_RPN_XON_CHAR;
 				param_mask ^= RFCOMM_RPN_PM_XON;
 			}
 
 			xoff_char = rpn->xoff_char;
 			if (rpn->param_mask & RFCOMM_RPN_PM_XOFF &&
 			    xoff_char != RFCOMM_RPN_XOFF_CHAR) {
 				xoff_char = RFCOMM_RPN_XOFF_CHAR;
 				param_mask ^= RFCOMM_RPN_PM_XOFF;
 			}
 		}
 
 		rpn->bit_rate = bit_rate;
 		rpn->line_settings = RFCOMM_MKRPN_LINE_SETTINGS(data_bits, 
 						stop_bits, parity);
 		rpn->flow_control = flow_control;
 		rpn->xon_char = xon_char;
 		rpn->xoff_char = xoff_char;
 		rpn->param_mask = htole16(param_mask); /* XXX */
 
 		m0->m_pkthdr.len = m0->m_len = sizeof(*hdr) + sizeof(*rpn);
 
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_RPN);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 	} else
 		NG_FREE_M(m0); /* XXX ignore response */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_rpn */
 
 /*
  * Receive RFCOMM RLS MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_rls(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	struct rfcomm_mcc_rls	*rls = (struct rfcomm_mcc_rls *)(hdr + 1);
 	int			 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * XXX FIXME Do we have to do anything else here? Remote peer tries to 
 	 * tell us something about DLCI. Just report what we have received and
 	 * return back received values as required by TS 07.10 spec.
 	 */
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC RLS, dlci=%d, status=%#x, cr=%d, length=%d, session state=%d, " \
 "flags=%#x, mtu=%d, len=%d\n",
 		__func__, RFCOMM_DLCI(rls->address), rls->status,
 		RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length),
 		s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		if (rls->status & 0x1)
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got RLS dlci=%d, error=%#x\n", __func__, RFCOMM_DLCI(rls->address),
 				rls->status >> 1);
 
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_RLS);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 	} else
 		NG_FREE_M(m0); /* XXX ignore responses */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_rls */
 
 /*
  * Receive RFCOMM PN MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_pn(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr		*hdr = mtod(m0, struct rfcomm_mcc_hdr*);
 	struct rfcomm_mcc_pn		*pn = (struct rfcomm_mcc_pn *)(hdr+1);
 	ng_btsocket_rfcomm_pcb_t	*pcb = NULL;
 	int				 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC PN, dlci=%d, cr=%d, length=%d, flow_control=%#x, priority=%d, " \
 "ack_timer=%d, mtu=%d, max_retrans=%d, credits=%d, session state=%d, " \
 "flags=%#x, session mtu=%d, len=%d\n",
 		__func__, pn->dlci, RFCOMM_CR(hdr->type),
 		RFCOMM_MCC_LENGTH(hdr->length), pn->flow_control, pn->priority,
 		pn->ack_timer, le16toh(pn->mtu), pn->max_retrans, pn->credits,
 		s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 
 	if (pn->dlci == 0) {
 		NG_BTSOCKET_RFCOMM_ERR("%s: Zero dlci in MCC PN\n", __func__);
 		NG_FREE_M(m0);
 
 		return (EINVAL);
 	}
 
 	/* Check if we have this dlci */
 	pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, pn->dlci);
 	if (pcb != NULL) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		if (RFCOMM_CR(hdr->type)) {
 			/* PN Request */
 			ng_btsocket_rfcomm_set_pn(pcb, 1, pn->flow_control,
 				pn->credits, pn->mtu);
 
 			if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) {
 				pn->flow_control = 0xe0;
 				pn->credits = RFCOMM_DEFAULT_CREDITS;
 			} else {
 				pn->flow_control = 0;
 				pn->credits = 0;
 			}
 
 			hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_PN);
 			error = ng_btsocket_rfcomm_send_uih(s, 
 					RFCOMM_MKADDRESS(INITIATOR(s), 0),
 					0, 0, m0);
 		} else {
 			/* PN Response - proceed with SABM. Timeout still set */
 			if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONFIGURING) {
 				ng_btsocket_rfcomm_set_pn(pcb, 0,
 					pn->flow_control, pn->credits, pn->mtu);
 
 				pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTING;
 				error = ng_btsocket_rfcomm_send_command(s,
 						RFCOMM_FRAME_SABM, pn->dlci);
 			} else
 				NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got PN response for dlci=%d in invalid state=%d\n",
 					__func__, pn->dlci, pcb->state);
 
 			NG_FREE_M(m0);
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 	} else if (RFCOMM_CR(hdr->type)) {
 		/* PN request to non-existing dlci - incoming connection */
 		pcb = ng_btsocket_rfcomm_connect_ind(s,
 				RFCOMM_SRVCHANNEL(pn->dlci));
 		if (pcb != NULL) {
 			mtx_lock(&pcb->pcb_mtx);
 
 			pcb->dlci = pn->dlci;
 
 			ng_btsocket_rfcomm_set_pn(pcb, 1, pn->flow_control,
 				pn->credits, pn->mtu);
 
 			if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) {
 				pn->flow_control = 0xe0;
 				pn->credits = RFCOMM_DEFAULT_CREDITS;
 			} else {
 				pn->flow_control = 0;
 				pn->credits = 0;
 			}
 
 			hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_PN);
 			error = ng_btsocket_rfcomm_send_uih(s, 
 					RFCOMM_MKADDRESS(INITIATOR(s), 0),
 					0, 0, m0);
 
 			if (error == 0) {
 				ng_btsocket_rfcomm_timeout(pcb);
 				pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTING;
 				soisconnecting(pcb->so);
 			} else
 				ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 			mtx_unlock(&pcb->pcb_mtx);
 		} else {
 			/* Nobody is listen()ing on this channel */
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_DM, pn->dlci);
 			NG_FREE_M(m0);
 		}
 	} else
 		NG_FREE_M(m0); /* XXX ignore response to non-existing dlci */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_pn */
 
 /*
  * Set PN parameters for dlci. Caller must hold pcb->pcb_mtx.
  * 
  * From Bluetooth spec.
  * 
  * "... The CL1 - CL4 field is completely redefined. (In TS07.10 this defines 
  *  the convergence layer to use, which is not applicable to RFCOMM. In RFCOMM,
  *  in Bluetooth versions up to 1.0B, this field was forced to 0).
  *
  *  In the PN request sent prior to a DLC establishment, this field must contain
  *  the value 15 (0xF), indicating support of credit based flow control in the 
  *  sender. See Table 5.3 below. If the PN response contains any other value 
  *  than 14 (0xE) in this field, it is inferred that the peer RFCOMM entity is 
  *  not supporting the credit based flow control feature. (This is only possible
  *  if the peer RFCOMM implementation is only conforming to Bluetooth version 
  *  1.0B.) If a PN request is sent on an already open DLC, then this field must
  *  contain the value zero; it is not possible to set initial credits  more 
  *  than once per DLC activation. A responding implementation must set this 
  *  field in the PN response to 14 (0xE), if (and only if) the value in the PN 
  *  request was 15..."
  */
 
 static void
 ng_btsocket_rfcomm_set_pn(ng_btsocket_rfcomm_pcb_p pcb, u_int8_t cr,
 		u_int8_t flow_control, u_int8_t credits, u_int16_t mtu)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	pcb->mtu = le16toh(mtu);
 
 	if (cr) {
 		if (flow_control == 0xf0) {
 			pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_CFC;
 			pcb->tx_cred = credits;
 		} else {
 			pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_CFC;
 			pcb->tx_cred = 0;
 		}
 	} else {
 		if (flow_control == 0xe0) {
 			pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_CFC;
 			pcb->tx_cred = credits;
 		} else {
 			pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_CFC;
 			pcb->tx_cred = 0;
 		}
 	}
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: cr=%d, dlci=%d, state=%d, flags=%#x, mtu=%d, rx_cred=%d, tx_cred=%d\n",
 		__func__, cr, pcb->dlci, pcb->state, pcb->flags, pcb->mtu,
 		pcb->rx_cred, pcb->tx_cred);
 } /* ng_btsocket_rfcomm_set_pn */
 
 /*
  * Send RFCOMM SABM/DISC/UA/DM frames. Caller must hold s->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_send_command(ng_btsocket_rfcomm_session_p s,
 		u_int8_t type, u_int8_t dlci)
 {
 	struct rfcomm_cmd_hdr	*hdr = NULL;
 	struct mbuf		*m = NULL;
 	int			 cr;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending command type %#x, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, type, s->state, s->flags, s->mtu, dlci);
 
 	switch (type) {
 	case RFCOMM_FRAME_SABM:
 	case RFCOMM_FRAME_DISC:
 		cr = INITIATOR(s);
 		break;
 
 	case RFCOMM_FRAME_UA:
 	case RFCOMM_FRAME_DM:
 		cr = !INITIATOR(s);
 		break;
 
 	default:
 		panic("%s: Invalid frame type=%#x\n", __func__, type);
 		return (EINVAL);
 		/* NOT REACHED */
 	}
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	m->m_pkthdr.len = m->m_len = sizeof(*hdr);
 
 	hdr = mtod(m, struct rfcomm_cmd_hdr *);
 	hdr->address = RFCOMM_MKADDRESS(cr, dlci);
 	hdr->control = RFCOMM_MKCONTROL(type, 1);
 	hdr->length = RFCOMM_MKLEN8(0);
 	hdr->fcs = ng_btsocket_rfcomm_fcs3((u_int8_t *) hdr);
 
 	NG_BT_MBUFQ_ENQUEUE(&s->outq, m);
 
 	return (0);
 } /* ng_btsocket_rfcomm_send_command */
 
 /*
  * Send RFCOMM UIH frame. Caller must hold s->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_send_uih(ng_btsocket_rfcomm_session_p s, u_int8_t address,
 		u_int8_t pf, u_int8_t credits, struct mbuf *data)
 {
 	struct rfcomm_frame_hdr	*hdr = NULL;
 	struct mbuf		*m = NULL, *mcrc = NULL;
 	u_int16_t		 length;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		NG_FREE_M(data);
 		return (ENOBUFS);
 	}
 	m->m_pkthdr.len = m->m_len = sizeof(*hdr);
 
 	MGET(mcrc, M_NOWAIT, MT_DATA);
 	if (mcrc == NULL) {
 		NG_FREE_M(data);
 		return (ENOBUFS);
 	}
 	mcrc->m_len = 1;
 
 	/* Fill UIH frame header */
 	hdr = mtod(m, struct rfcomm_frame_hdr *);
 	hdr->address = address;
 	hdr->control = RFCOMM_MKCONTROL(RFCOMM_FRAME_UIH, pf);
 
 	/* Calculate FCS */
 	mcrc->m_data[0] = ng_btsocket_rfcomm_fcs2((u_int8_t *) hdr);
 
 	/* Put length back */
 	length = (data != NULL)? data->m_pkthdr.len : 0;
 	if (length > 127) {
 		u_int16_t	l = htole16(RFCOMM_MKLEN16(length));
 
 		bcopy(&l, &hdr->length, sizeof(l));
 		m->m_pkthdr.len ++;
 		m->m_len ++;
 	} else
 		hdr->length = RFCOMM_MKLEN8(length);
 
 	if (pf) {
 		m->m_data[m->m_len] = credits;
 		m->m_pkthdr.len ++;
 		m->m_len ++;
 	}
 
 	/* Add payload */
 	if (data != NULL) {
 		m_cat(m, data);
 		m->m_pkthdr.len += length;
 	}
 
 	/* Put FCS back */
 	m_cat(m, mcrc);
 	m->m_pkthdr.len ++;
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending UIH state=%d, flags=%#x, address=%d, length=%d, pf=%d, " \
 "credits=%d, len=%d\n",
 		__func__, s->state, s->flags, address, length, pf, credits,
 		m->m_pkthdr.len);
 
 	NG_BT_MBUFQ_ENQUEUE(&s->outq, m);
 
 	return (0);
 } /* ng_btsocket_rfcomm_send_uih */
 
 /*
  * Send MSC request. Caller must hold pcb->pcb_mtx and pcb->session->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_send_msc(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	struct mbuf		*m = NULL;
 	struct rfcomm_mcc_hdr	*hdr = NULL;
 	struct rfcomm_mcc_msc	*msc = NULL;
 
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	m->m_pkthdr.len = m->m_len = sizeof(*hdr) + sizeof(*msc);
 
 	hdr = mtod(m, struct rfcomm_mcc_hdr *);
 	msc = (struct rfcomm_mcc_msc *)(hdr + 1);
 
 	hdr->type = RFCOMM_MKMCC_TYPE(1, RFCOMM_MCC_MSC);
 	hdr->length = RFCOMM_MKLEN8(sizeof(*msc));
 
 	msc->address = RFCOMM_MKADDRESS(1, pcb->dlci);
 	msc->modem = pcb->lmodem;
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending MSC dlci=%d, state=%d, flags=%#x, address=%d, modem=%#x\n",
 		__func__, pcb->dlci, pcb->state, pcb->flags, msc->address,
 		msc->modem);
 
 	return (ng_btsocket_rfcomm_send_uih(pcb->session,
 			RFCOMM_MKADDRESS(INITIATOR(pcb->session), 0), 0, 0, m));
 } /* ng_btsocket_rfcomm_send_msc */
 
 /*
  * Send PN request. Caller must hold pcb->pcb_mtx and pcb->session->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_send_pn(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	struct mbuf		*m = NULL;
 	struct rfcomm_mcc_hdr	*hdr = NULL;
 	struct rfcomm_mcc_pn	*pn = NULL;
 
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	m->m_pkthdr.len = m->m_len = sizeof(*hdr) + sizeof(*pn);
 
 	hdr = mtod(m, struct rfcomm_mcc_hdr *);
 	pn = (struct rfcomm_mcc_pn *)(hdr + 1);
 
 	hdr->type = RFCOMM_MKMCC_TYPE(1, RFCOMM_MCC_PN);
 	hdr->length = RFCOMM_MKLEN8(sizeof(*pn));
 
 	pn->dlci = pcb->dlci;
 
 	/*
 	 * Set default DLCI priority as described in GSM 07.10
 	 * (ETSI TS 101 369) clause 5.6 page 42
 	 */
 
 	pn->priority = (pcb->dlci < 56)? (((pcb->dlci >> 3) << 3) + 7) : 61;
 	pn->ack_timer = 0;
 	pn->mtu = htole16(pcb->mtu);
 	pn->max_retrans = 0;
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) {
 		pn->flow_control = 0xf0;
 		pn->credits = pcb->rx_cred;
 	} else {
 		pn->flow_control = 0;
 		pn->credits = 0;
 	}
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending PN dlci=%d, state=%d, flags=%#x, mtu=%d, flow_control=%#x, " \
 "credits=%d\n",	__func__, pcb->dlci, pcb->state, pcb->flags, pcb->mtu,
 		pn->flow_control, pn->credits);
 
 	return (ng_btsocket_rfcomm_send_uih(pcb->session,
 			RFCOMM_MKADDRESS(INITIATOR(pcb->session), 0), 0, 0, m));
 } /* ng_btsocket_rfcomm_send_pn */
 
 /*
  * Calculate and send credits based on available space in receive buffer
  */
 
 static int
 ng_btsocket_rfcomm_send_credits(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	int		error = 0;
 	u_int8_t	credits;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending more credits, dlci=%d, state=%d, flags=%#x, mtu=%d, " \
 "space=%ld, tx_cred=%d, rx_cred=%d\n",
 		__func__, pcb->dlci, pcb->state, pcb->flags, pcb->mtu,
 		sbspace(&pcb->so->so_rcv), pcb->tx_cred, pcb->rx_cred);
 
 	credits = sbspace(&pcb->so->so_rcv) / pcb->mtu;
 	if (credits > 0) {
 		if (pcb->rx_cred + credits > RFCOMM_MAX_CREDITS)
 			credits = RFCOMM_MAX_CREDITS - pcb->rx_cred;
 
 		error = ng_btsocket_rfcomm_send_uih(
 				pcb->session,
 				RFCOMM_MKADDRESS(INITIATOR(pcb->session),
 					pcb->dlci), 1, credits, NULL);
 		if (error == 0) {
 			pcb->rx_cred += credits;
 
 			NG_BTSOCKET_RFCOMM_INFO(
 "%s: Gave remote side %d more credits, dlci=%d, state=%d, flags=%#x, " \
 "rx_cred=%d, tx_cred=%d\n",	__func__, credits, pcb->dlci, pcb->state,
 				pcb->flags, pcb->rx_cred, pcb->tx_cred);
 		} else
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not send credits, error=%d, dlci=%d, state=%d, flags=%#x, " \
 "mtu=%d, space=%ld, tx_cred=%d, rx_cred=%d\n",
 				__func__, error, pcb->dlci, pcb->state,
 				pcb->flags, pcb->mtu, sbspace(&pcb->so->so_rcv),
 				pcb->tx_cred, pcb->rx_cred);
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_send_credits */
 
 /*****************************************************************************
  *****************************************************************************
  **                              RFCOMM DLCs
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Send data from socket send buffer
  * Caller must hold pcb->pcb_mtx and pcb->session->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_pcb_send(ng_btsocket_rfcomm_pcb_p pcb, int limit)
 {
 	struct mbuf	*m = NULL;
 	int		 sent, length, error;
 
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)
 		limit = min(limit, pcb->tx_cred);
 	else if (!(pcb->rmodem & RFCOMM_MODEM_FC))
 		limit = min(limit, RFCOMM_MAX_CREDITS); /* XXX ??? */
 	else
 		limit = 0;
 
 	if (limit == 0) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Could not send - remote flow control asserted, dlci=%d, flags=%#x, " \
 "rmodem=%#x, tx_cred=%d\n",
 			__func__, pcb->dlci, pcb->flags, pcb->rmodem,
 			pcb->tx_cred);
 
 		return (0);
 	}
 
 	for (error = 0, sent = 0; sent < limit; sent ++) { 
 		length = min(pcb->mtu, sbavail(&pcb->so->so_snd));
 		if (length == 0)
 			break;
 
 		/* Get the chunk from the socket's send buffer */
 		m = ng_btsocket_rfcomm_prepare_packet(&pcb->so->so_snd, length);
 		if (m == NULL) {
 			error = ENOBUFS;
 			break;
 		}
 
 		sbdrop(&pcb->so->so_snd, length);
 
 		error = ng_btsocket_rfcomm_send_uih(pcb->session,
 				RFCOMM_MKADDRESS(INITIATOR(pcb->session),
 					pcb->dlci), 0, 0, m);
 		if (error != 0)
 			break;
 	}
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)
 		pcb->tx_cred -= sent;
 
 	if (error == 0 && sent > 0) {
 		pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_SENDING;
 		sowwakeup(pcb->so);
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_pcb_send */
 
 /*
  * Unlink and disconnect DLC. If ng_btsocket_rfcomm_pcb_kill() returns
  * non zero value than socket has no reference and has to be detached.
  * Caller must hold pcb->pcb_mtx and pcb->session->session_mtx
  */
 
 static void
 ng_btsocket_rfcomm_pcb_kill(ng_btsocket_rfcomm_pcb_p pcb, int error)
 {
 	ng_btsocket_rfcomm_session_p	s = pcb->session;
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Killing DLC, so=%p, dlci=%d, state=%d, flags=%#x, error=%d\n",
 		__func__, pcb->so, pcb->dlci, pcb->state, pcb->flags, error);
 
 	if (pcb->session == NULL)
 		panic("%s: DLC without session, pcb=%p, state=%d, flags=%#x\n",
 			__func__, pcb, pcb->state, pcb->flags);
 
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)
 		ng_btsocket_rfcomm_untimeout(pcb);
 
 	/* Detach DLC from the session. Does not matter which state DLC in */
 	LIST_REMOVE(pcb, session_next);
 	pcb->session = NULL;
 
 	/* Change DLC state and wakeup all sleepers */
 	pcb->state = NG_BTSOCKET_RFCOMM_DLC_CLOSED;
 	pcb->so->so_error = error;
 	soisdisconnected(pcb->so);
 	wakeup(&pcb->state);
 
 	/* Check if we have any DLCs left on the session */
 	if (LIST_EMPTY(&s->dlcs) && INITIATOR(s)) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Disconnecting session, state=%d, flags=%#x, mtu=%d\n",
 			__func__, s->state, s->flags, s->mtu);
 
 		switch (s->state) {
 		case NG_BTSOCKET_RFCOMM_SESSION_CLOSED:
 		case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING:
 			/*
 			 * Do not have to do anything here. We can get here
 			 * when L2CAP connection was terminated or we have 
 			 * received DISC on multiplexor channel
 			 */
 			break;
 
 		case NG_BTSOCKET_RFCOMM_SESSION_OPEN:
 			/* Send DISC on multiplexor channel */
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_DISC, 0);
 			if (error == 0) {
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING;
 				break;
 			}
 			/* FALL THROUGH */
 
 		case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING:
 		case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			break;
 
 /*		case NG_BTSOCKET_RFCOMM_SESSION_LISTENING: */
 		default:
 			panic("%s: Invalid session state=%d, flags=%#x\n",
 				__func__, s->state, s->flags);
 			break;
 		}
 
 		ng_btsocket_rfcomm_task_wakeup();
 	}
 } /* ng_btsocket_rfcomm_pcb_kill */
 
 /*
  * Look for given dlci for given RFCOMM session. Caller must hold s->session_mtx
  */
 
 static ng_btsocket_rfcomm_pcb_p
 ng_btsocket_rfcomm_pcb_by_dlci(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	LIST_FOREACH(pcb, &s->dlcs, session_next)
 		if (pcb->dlci == dlci)
 			break;
 
 	return (pcb);
 } /* ng_btsocket_rfcomm_pcb_by_dlci */
 
 /*
  * Look for socket that listens on given src address and given channel
  */
 
 static ng_btsocket_rfcomm_pcb_p
 ng_btsocket_rfcomm_pcb_listener(bdaddr_p src, int channel)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL, pcb1 = NULL;
 
 	mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 
 	LIST_FOREACH(pcb, &ng_btsocket_rfcomm_sockets, next) {
-		if (pcb->channel != channel ||
-		    !(pcb->so->so_options & SO_ACCEPTCONN))
+		if (pcb->channel != channel || !SOLISTENING(pcb->so))
 			continue;
 
 		if (bcmp(&pcb->src, src, sizeof(*src)) == 0)
 			break;
 
 		if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 			pcb1 = pcb;
 	}
 
 	mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 
 	return ((pcb != NULL)? pcb : pcb1);
 } /* ng_btsocket_rfcomm_pcb_listener */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Misc. functions 
  *****************************************************************************
  *****************************************************************************/
 
 /*
  *  Set timeout. Caller MUST hold pcb_mtx
  */
 
 static void
 ng_btsocket_rfcomm_timeout(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)) {
 		pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_TIMO;
 		pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT;
 		callout_reset(&pcb->timo, ng_btsocket_rfcomm_timo * hz,
 		    ng_btsocket_rfcomm_process_timeout, pcb);
 	} else
 		panic("%s: Duplicated socket timeout?!\n", __func__);
 } /* ng_btsocket_rfcomm_timeout */
 
 /*
  *  Unset pcb timeout. Caller MUST hold pcb_mtx
  */
 
 static void
 ng_btsocket_rfcomm_untimeout(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) {
 		callout_stop(&pcb->timo);
 		pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMO;
 		pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT;
 	} else
 		panic("%s: No socket timeout?!\n", __func__);
 } /* ng_btsocket_rfcomm_timeout */
 
 /*
  * Process pcb timeout
  */
 
 static void
 ng_btsocket_rfcomm_process_timeout(void *xpcb)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = (ng_btsocket_rfcomm_pcb_p) xpcb;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Timeout, so=%p, dlci=%d, state=%d, flags=%#x\n",
 		__func__, pcb->so, pcb->dlci, pcb->state, pcb->flags);
 
 	pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMO;
 	pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT;
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING:
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTING:
 		pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING;
 		break;
 
 	case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT:
 	case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING:
 		break;
 
 	default:
 		panic(
 "%s: DLC timeout in invalid state, dlci=%d, state=%d, flags=%#x\n",
 			__func__, pcb->dlci, pcb->state, pcb->flags);
 		break;
 	}
 
 	ng_btsocket_rfcomm_task_wakeup();
 } /* ng_btsocket_rfcomm_process_timeout */
 
 /*
  * Get up to length bytes from the socket buffer
  */
 
 static struct mbuf *
 ng_btsocket_rfcomm_prepare_packet(struct sockbuf *sb, int length)
 {
 	struct mbuf	*top = NULL, *m = NULL, *n = NULL, *nextpkt = NULL;
 	int		 mlen, noff, len;
 
 	MGETHDR(top, M_NOWAIT, MT_DATA);
 	if (top == NULL)
 		return (NULL);
 
 	top->m_pkthdr.len = length;
 	top->m_len = 0;
 	mlen = MHLEN;
 
 	m = top;
 	n = sb->sb_mb;
 	nextpkt = n->m_nextpkt;
 	noff = 0;
 
 	while (length > 0 && n != NULL) {
 		len = min(mlen - m->m_len, n->m_len - noff);
 		if (len > length)
 			len = length;
 
 		bcopy(mtod(n, caddr_t)+noff, mtod(m, caddr_t)+m->m_len, len);
 		m->m_len += len;
 		noff += len;
 		length -= len;
 
 		if (length > 0 && m->m_len == mlen) {
 			MGET(m->m_next, M_NOWAIT, MT_DATA);
 			if (m->m_next == NULL) {
 				NG_FREE_M(top);
 				return (NULL);
 			}
 
 			m = m->m_next;
 			m->m_len = 0;
 			mlen = MLEN;
 		}
 
 		if (noff == n->m_len) {
 			noff = 0;
 			n = n->m_next;
 
 			if (n == NULL)
 				n = nextpkt;
 
 			nextpkt = (n != NULL)? n->m_nextpkt : NULL;
 		}
 	}
 
 	if (length < 0)
 		panic("%s: length=%d\n", __func__, length);
 	if (length > 0 && n == NULL)
 		panic("%s: bogus length=%d, n=%p\n", __func__, length, n);
 
 	return (top);
 } /* ng_btsocket_rfcomm_prepare_packet */
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c b/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
index da8f22befc79..068b1890f27f 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
@@ -1,1981 +1,1981 @@
 /*
  * ng_btsocket_sco.c
  */
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2001-2002 Maksim Yevmenkin <m_evmenkin@yahoo.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $Id: ng_btsocket_sco.c,v 1.2 2005/10/31 18:08:51 max Exp $
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitstring.h>
 #include <sys/domain.h>
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/filedesc.h>
 #include <sys/ioccom.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <net/vnet.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/bluetooth/include/ng_bluetooth.h>
 #include <netgraph/bluetooth/include/ng_hci.h>
 #include <netgraph/bluetooth/include/ng_l2cap.h>
 #include <netgraph/bluetooth/include/ng_btsocket.h>
 #include <netgraph/bluetooth/include/ng_btsocket_sco.h>
 
 /* MALLOC define */
 #ifdef NG_SEPARATE_MALLOC
 static MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_SCO, "netgraph_btsocks_sco",
 		"Netgraph Bluetooth SCO sockets");
 #else
 #define M_NETGRAPH_BTSOCKET_SCO M_NETGRAPH
 #endif /* NG_SEPARATE_MALLOC */
 
 /* Netgraph node methods */
 static ng_constructor_t	ng_btsocket_sco_node_constructor;
 static ng_rcvmsg_t	ng_btsocket_sco_node_rcvmsg;
 static ng_shutdown_t	ng_btsocket_sco_node_shutdown;
 static ng_newhook_t	ng_btsocket_sco_node_newhook;
 static ng_connect_t	ng_btsocket_sco_node_connect;
 static ng_rcvdata_t	ng_btsocket_sco_node_rcvdata;
 static ng_disconnect_t	ng_btsocket_sco_node_disconnect;
 
 static void		ng_btsocket_sco_input   (void *, int);
 static void		ng_btsocket_sco_rtclean (void *, int);
 
 /* Netgraph type descriptor */
 static struct ng_type	typestruct = {
 	.version =	NG_ABI_VERSION,
 	.name =		NG_BTSOCKET_SCO_NODE_TYPE,
 	.constructor =	ng_btsocket_sco_node_constructor,
 	.rcvmsg =	ng_btsocket_sco_node_rcvmsg,
 	.shutdown =	ng_btsocket_sco_node_shutdown,
 	.newhook =	ng_btsocket_sco_node_newhook,
 	.connect =	ng_btsocket_sco_node_connect,
 	.rcvdata =	ng_btsocket_sco_node_rcvdata,
 	.disconnect =	ng_btsocket_sco_node_disconnect,
 };
 
 /* Globals */
 static u_int32_t				ng_btsocket_sco_debug_level;
 static node_p					ng_btsocket_sco_node;
 static struct ng_bt_itemq			ng_btsocket_sco_queue;
 static struct mtx				ng_btsocket_sco_queue_mtx;
 static struct task				ng_btsocket_sco_queue_task;
 static struct mtx				ng_btsocket_sco_sockets_mtx;
 static LIST_HEAD(, ng_btsocket_sco_pcb)		ng_btsocket_sco_sockets;
 static LIST_HEAD(, ng_btsocket_sco_rtentry)	ng_btsocket_sco_rt;
 static struct mtx				ng_btsocket_sco_rt_mtx;
 static struct task				ng_btsocket_sco_rt_task;
 static struct timeval				ng_btsocket_sco_lasttime;
 static int					ng_btsocket_sco_curpps;
 
 /* Sysctl tree */
 SYSCTL_DECL(_net_bluetooth_sco_sockets);
 static SYSCTL_NODE(_net_bluetooth_sco_sockets, OID_AUTO, seq,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Bluetooth SEQPACKET SCO sockets family");
 SYSCTL_UINT(_net_bluetooth_sco_sockets_seq, OID_AUTO, debug_level,
 	CTLFLAG_RW,
 	&ng_btsocket_sco_debug_level, NG_BTSOCKET_WARN_LEVEL,
 	"Bluetooth SEQPACKET SCO sockets debug level");
 SYSCTL_UINT(_net_bluetooth_sco_sockets_seq, OID_AUTO, queue_len,
 	CTLFLAG_RD,
 	&ng_btsocket_sco_queue.len, 0,
 	"Bluetooth SEQPACKET SCO sockets input queue length");
 SYSCTL_UINT(_net_bluetooth_sco_sockets_seq, OID_AUTO, queue_maxlen,
 	CTLFLAG_RD,
 	&ng_btsocket_sco_queue.maxlen, 0,
 	"Bluetooth SEQPACKET SCO sockets input queue max. length");
 SYSCTL_UINT(_net_bluetooth_sco_sockets_seq, OID_AUTO, queue_drops,
 	CTLFLAG_RD,
 	&ng_btsocket_sco_queue.drops, 0,
 	"Bluetooth SEQPACKET SCO sockets input queue drops");
 
 /* Debug */
 #define NG_BTSOCKET_SCO_INFO \
 	if (ng_btsocket_sco_debug_level >= NG_BTSOCKET_INFO_LEVEL && \
 	    ppsratecheck(&ng_btsocket_sco_lasttime, &ng_btsocket_sco_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_SCO_WARN \
 	if (ng_btsocket_sco_debug_level >= NG_BTSOCKET_WARN_LEVEL && \
 	    ppsratecheck(&ng_btsocket_sco_lasttime, &ng_btsocket_sco_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_SCO_ERR \
 	if (ng_btsocket_sco_debug_level >= NG_BTSOCKET_ERR_LEVEL && \
 	    ppsratecheck(&ng_btsocket_sco_lasttime, &ng_btsocket_sco_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_SCO_ALERT \
 	if (ng_btsocket_sco_debug_level >= NG_BTSOCKET_ALERT_LEVEL && \
 	    ppsratecheck(&ng_btsocket_sco_lasttime, &ng_btsocket_sco_curpps, 1)) \
 		printf
 
 /* 
  * Netgraph message processing routines
  */
 
 static int ng_btsocket_sco_process_lp_con_cfm
 	(struct ng_mesg *, ng_btsocket_sco_rtentry_p);
 static int ng_btsocket_sco_process_lp_con_ind
 	(struct ng_mesg *, ng_btsocket_sco_rtentry_p);
 static int ng_btsocket_sco_process_lp_discon_ind
 	(struct ng_mesg *, ng_btsocket_sco_rtentry_p);
 
 /*
  * Send LP messages to the lower layer
  */
 
 static int  ng_btsocket_sco_send_lp_con_req
 	(ng_btsocket_sco_pcb_p);
 static int  ng_btsocket_sco_send_lp_con_rsp
 	(ng_btsocket_sco_rtentry_p, bdaddr_p, int);
 static int  ng_btsocket_sco_send_lp_discon_req
 	(ng_btsocket_sco_pcb_p);
 
 static int ng_btsocket_sco_send2
 	(ng_btsocket_sco_pcb_p);
 
 /* 
  * Timeout processing routines
  */
 
 static void ng_btsocket_sco_timeout         (ng_btsocket_sco_pcb_p);
 static void ng_btsocket_sco_untimeout       (ng_btsocket_sco_pcb_p);
 static void ng_btsocket_sco_process_timeout (void *);
 
 /* 
  * Other stuff 
  */
 
 static ng_btsocket_sco_pcb_p	ng_btsocket_sco_pcb_by_addr(bdaddr_p);
 static ng_btsocket_sco_pcb_p	ng_btsocket_sco_pcb_by_handle(bdaddr_p, int);
 static ng_btsocket_sco_pcb_p	ng_btsocket_sco_pcb_by_addrs(bdaddr_p, bdaddr_p);
 
 #define ng_btsocket_sco_wakeup_input_task() \
 	taskqueue_enqueue(taskqueue_swi, &ng_btsocket_sco_queue_task)
 
 #define ng_btsocket_sco_wakeup_route_task() \
 	taskqueue_enqueue(taskqueue_swi, &ng_btsocket_sco_rt_task)
 
 /*****************************************************************************
  *****************************************************************************
  **                        Netgraph node interface
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Netgraph node constructor. Do not allow to create node of this type.
  */
 
 static int
 ng_btsocket_sco_node_constructor(node_p node)
 {
 	return (EINVAL);
 } /* ng_btsocket_sco_node_constructor */
 
 /*
  * Do local shutdown processing. Let old node go and create new fresh one.
  */
 
 static int
 ng_btsocket_sco_node_shutdown(node_p node)
 {
 	int	error = 0;
 
 	NG_NODE_UNREF(node);
 
 	/* Create new node */
 	error = ng_make_node_common(&typestruct, &ng_btsocket_sco_node);
 	if (error != 0) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not create Netgraph node, error=%d\n", __func__, error);
 
 		ng_btsocket_sco_node = NULL;
 
 		return (error);
 	}
 
 	error = ng_name_node(ng_btsocket_sco_node,
 				NG_BTSOCKET_SCO_NODE_TYPE);
 	if (error != 0) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not name Netgraph node, error=%d\n", __func__, error);
 
 		NG_NODE_UNREF(ng_btsocket_sco_node);
 		ng_btsocket_sco_node = NULL;
 
 		return (error);
 	}
 		
 	return (0);
 } /* ng_btsocket_sco_node_shutdown */
 
 /*
  * We allow any hook to be connected to the node.
  */
 
 static int
 ng_btsocket_sco_node_newhook(node_p node, hook_p hook, char const *name)
 {
 	return (0);
 } /* ng_btsocket_sco_node_newhook */
 
 /* 
  * Just say "YEP, that's OK by me!"
  */
 
 static int
 ng_btsocket_sco_node_connect(hook_p hook)
 {
 	NG_HOOK_SET_PRIVATE(hook, NULL);
 	NG_HOOK_REF(hook); /* Keep extra reference to the hook */
 
 #if 0
 	NG_HOOK_FORCE_QUEUE(NG_HOOK_PEER(hook));
 	NG_HOOK_FORCE_QUEUE(hook);
 #endif
 
 	return (0);
 } /* ng_btsocket_sco_node_connect */
 
 /*
  * Hook disconnection. Schedule route cleanup task
  */
 
 static int
 ng_btsocket_sco_node_disconnect(hook_p hook)
 {
 	/*
 	 * If hook has private information than we must have this hook in
 	 * the routing table and must schedule cleaning for the routing table.
 	 * Otherwise hook was connected but we never got "hook_info" message,
 	 * so we have never added this hook to the routing table and it save
 	 * to just delete it.
 	 */
 
 	if (NG_HOOK_PRIVATE(hook) != NULL)
 		return (ng_btsocket_sco_wakeup_route_task());
 
 	NG_HOOK_UNREF(hook); /* Remove extra reference */
 
 	return (0);
 } /* ng_btsocket_sco_node_disconnect */
 
 /*
  * Process incoming messages 
  */
 
 static int
 ng_btsocket_sco_node_rcvmsg(node_p node, item_p item, hook_p hook)
 {
 	struct ng_mesg	*msg = NGI_MSG(item); /* item still has message */
 	int		 error = 0;
 
 	if (msg != NULL && msg->header.typecookie == NGM_HCI_COOKIE) {
 		mtx_lock(&ng_btsocket_sco_queue_mtx);
 		if (NG_BT_ITEMQ_FULL(&ng_btsocket_sco_queue)) {
 			NG_BTSOCKET_SCO_ERR(
 "%s: Input queue is full (msg)\n", __func__);
 
 			NG_BT_ITEMQ_DROP(&ng_btsocket_sco_queue);
 			NG_FREE_ITEM(item);
 			error = ENOBUFS;
 		} else {
 			if (hook != NULL) {
 				NG_HOOK_REF(hook);
 				NGI_SET_HOOK(item, hook);
 			}
 
 			NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_sco_queue, item);
 			error = ng_btsocket_sco_wakeup_input_task();
 		}
 		mtx_unlock(&ng_btsocket_sco_queue_mtx);
 	} else {
 		NG_FREE_ITEM(item);
 		error = EINVAL;
 	}
 
 	return (error);
 } /* ng_btsocket_sco_node_rcvmsg */
 
 /*
  * Receive data on a hook
  */
 
 static int
 ng_btsocket_sco_node_rcvdata(hook_p hook, item_p item)
 {
 	int	error = 0;
 
 	mtx_lock(&ng_btsocket_sco_queue_mtx);
 	if (NG_BT_ITEMQ_FULL(&ng_btsocket_sco_queue)) {
 		NG_BTSOCKET_SCO_ERR(
 "%s: Input queue is full (data)\n", __func__);
 
 		NG_BT_ITEMQ_DROP(&ng_btsocket_sco_queue);
 		NG_FREE_ITEM(item);
 		error = ENOBUFS;
 	} else {
 		NG_HOOK_REF(hook);
 		NGI_SET_HOOK(item, hook);
 
 		NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_sco_queue, item);
 		error = ng_btsocket_sco_wakeup_input_task();
 	}
 	mtx_unlock(&ng_btsocket_sco_queue_mtx);
 
 	return (error);
 } /* ng_btsocket_sco_node_rcvdata */
 
 /*
  * Process LP_ConnectCfm event from the lower layer protocol
  */
 
 static int
 ng_btsocket_sco_process_lp_con_cfm(struct ng_mesg *msg,
 		ng_btsocket_sco_rtentry_p rt)
 {
 	ng_hci_lp_con_cfm_ep	*ep = NULL;
 	ng_btsocket_sco_pcb_t	*pcb = NULL;
 	int			 error = 0;
 
 	if (msg->header.arglen != sizeof(*ep))
 		return (EMSGSIZE);
 
 	ep = (ng_hci_lp_con_cfm_ep *)(msg->data);
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 	/* Look for the socket with the token */
 	pcb = ng_btsocket_sco_pcb_by_addrs(&rt->src, &ep->bdaddr);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 		return (ENOENT);
 	}
 
 	/* pcb is locked */
 
 	NG_BTSOCKET_SCO_INFO(
 "%s: Got LP_ConnectCfm response, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, status=%d, handle=%d, state=%d\n",
 		__func__,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		ep->status, ep->con_handle, pcb->state);
 
 	if (pcb->state != NG_BTSOCKET_SCO_CONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	ng_btsocket_sco_untimeout(pcb);
 
 	if (ep->status == 0) {
 		/*
 		 * Connection is open. Update connection handle and
 		 * socket state
 		 */
 
 		pcb->con_handle = ep->con_handle; 
 		pcb->state = NG_BTSOCKET_SCO_OPEN;
 		soisconnected(pcb->so); 
 	} else {
 		/*
 		 * We have failed to open connection, so disconnect the socket
 		 */
 
 		pcb->so->so_error = ECONNREFUSED; /* XXX convert status ??? */
 		pcb->state = NG_BTSOCKET_SCO_CLOSED;
 		soisdisconnected(pcb->so); 
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_sco_process_lp_con_cfm */
 
 /*
  * Process LP_ConnectInd indicator. Find socket that listens on address.
  * Find exact or closest match.
  */
 
 static int
 ng_btsocket_sco_process_lp_con_ind(struct ng_mesg *msg,
 		ng_btsocket_sco_rtentry_p rt)
 {
 	ng_hci_lp_con_ind_ep	*ep = NULL;
 	ng_btsocket_sco_pcb_t	*pcb = NULL, *pcb1 = NULL;
 	int			 error = 0;
 	u_int16_t		 status = 0;
 
 	if (msg->header.arglen != sizeof(*ep))
 		return (EMSGSIZE);
 
 	ep = (ng_hci_lp_con_ind_ep *)(msg->data);
 
 	NG_BTSOCKET_SCO_INFO(
 "%s: Got LP_ConnectInd indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x\n",
 		__func__,
 		rt->src.b[5], rt->src.b[4], rt->src.b[3],
 		rt->src.b[2], rt->src.b[1], rt->src.b[0],
 		ep->bdaddr.b[5], ep->bdaddr.b[4], ep->bdaddr.b[3],
 		ep->bdaddr.b[2], ep->bdaddr.b[1], ep->bdaddr.b[0]);
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 	pcb = ng_btsocket_sco_pcb_by_addr(&rt->src);
 	if (pcb != NULL) {
 		struct socket *so1;
 
 		/* pcb is locked */
 
 		CURVNET_SET(pcb->so->so_vnet);
 		so1 = sonewconn(pcb->so, 0);
 		CURVNET_RESTORE();
 
 		if (so1 == NULL) {
 			status = 0x0d; /* Rejected due to limited resources */
 			goto respond;
 		}
 
 		/*
 		 * If we got here than we have created new socket. So complete 
 		 * connection. If we we listening on specific address then copy 
 		 * source address from listening socket, otherwise copy source 
 		 * address from hook's routing information.
 		 */
 
 		pcb1 = so2sco_pcb(so1);
 		KASSERT((pcb1 != NULL),
 ("%s: pcb1 == NULL\n", __func__));
 
  		mtx_lock(&pcb1->pcb_mtx);
 
 		if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src)) != 0)
 			bcopy(&pcb->src, &pcb1->src, sizeof(pcb1->src));
 		else
 			bcopy(&rt->src, &pcb1->src, sizeof(pcb1->src));
 
 		pcb1->flags &= ~NG_BTSOCKET_SCO_CLIENT;
 
 		bcopy(&ep->bdaddr, &pcb1->dst, sizeof(pcb1->dst));
 		pcb1->rt = rt;
 	} else
 		/* Nobody listens on requested BDADDR */
 		status = 0x1f; /* Unspecified Error */
 
 respond:
 	error = ng_btsocket_sco_send_lp_con_rsp(rt, &ep->bdaddr, status);
 	if (pcb1 != NULL) {
 		if (error != 0) {
 			pcb1->so->so_error = error;
 			pcb1->state = NG_BTSOCKET_SCO_CLOSED;
 			soisdisconnected(pcb1->so);
 		} else {
 			pcb1->state = NG_BTSOCKET_SCO_CONNECTING;
 			soisconnecting(pcb1->so);
 
 			ng_btsocket_sco_timeout(pcb1);
 		}
 
 		mtx_unlock(&pcb1->pcb_mtx);
 	}
 
 	if (pcb != NULL)
 		mtx_unlock(&pcb->pcb_mtx);
 
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_sco_process_lp_con_ind */
 
 /*
  * Process LP_DisconnectInd indicator
  */
 
 static int
 ng_btsocket_sco_process_lp_discon_ind(struct ng_mesg *msg,
 		ng_btsocket_sco_rtentry_p rt)
 {
 	ng_hci_lp_discon_ind_ep	*ep = NULL;
 	ng_btsocket_sco_pcb_t	*pcb = NULL;
 
 	/* Check message */
 	if (msg->header.arglen != sizeof(*ep))
 		return (EMSGSIZE);
 
 	ep = (ng_hci_lp_discon_ind_ep *)(msg->data);
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 	/* Look for the socket with given channel ID */
 	pcb = ng_btsocket_sco_pcb_by_handle(&rt->src, ep->con_handle);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 		return (0);
 	}
 
 	/*
 	 * Disconnect the socket. If there was any pending request we can
 	 * not do anything here anyway.
 	 */
 
 	/* pcb is locked */
 
        	NG_BTSOCKET_SCO_INFO(
 "%s: Got LP_DisconnectInd indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, handle=%d, state=%d\n",
 		__func__,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->con_handle, pcb->state);
 
 	if (pcb->flags & NG_BTSOCKET_SCO_TIMO)
 		ng_btsocket_sco_untimeout(pcb);
 
 	pcb->state = NG_BTSOCKET_SCO_CLOSED;
 	soisdisconnected(pcb->so);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_sco_process_lp_discon_ind */
 
 /*
  * Send LP_ConnectReq request
  */
 
 static int
 ng_btsocket_sco_send_lp_con_req(ng_btsocket_sco_pcb_p pcb)
 {
 	struct ng_mesg		*msg = NULL;
 	ng_hci_lp_con_req_ep	*ep = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->rt == NULL || 
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_HCI_COOKIE, NGM_HCI_LP_CON_REQ,
 		sizeof(*ep), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	ep = (ng_hci_lp_con_req_ep *)(msg->data);
 	ep->link_type = NG_HCI_LINK_SCO;
 	bcopy(&pcb->dst, &ep->bdaddr, sizeof(ep->bdaddr));
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_sco_node, msg, pcb->rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_sco_send_lp_con_req */
 
 /*
  * Send LP_ConnectRsp response
  */
 
 static int
 ng_btsocket_sco_send_lp_con_rsp(ng_btsocket_sco_rtentry_p rt, bdaddr_p dst, int status)
 {
 	struct ng_mesg		*msg = NULL;
 	ng_hci_lp_con_rsp_ep	*ep = NULL;
 	int			 error = 0;
 
 	if (rt == NULL || rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_HCI_COOKIE, NGM_HCI_LP_CON_RSP,
 		sizeof(*ep), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	ep = (ng_hci_lp_con_rsp_ep *)(msg->data);
 	ep->status = status;
 	ep->link_type = NG_HCI_LINK_SCO;
 	bcopy(dst, &ep->bdaddr, sizeof(ep->bdaddr));
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_sco_node, msg, rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_sco_send_lp_con_rsp */
 
 /*
  * Send LP_DisconReq request
  */
 
 static int
 ng_btsocket_sco_send_lp_discon_req(ng_btsocket_sco_pcb_p pcb)
 {
 	struct ng_mesg		*msg = NULL;
 	ng_hci_lp_discon_req_ep	*ep = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->rt == NULL || 
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_HCI_COOKIE, NGM_HCI_LP_DISCON_REQ,
 		sizeof(*ep), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	ep = (ng_hci_lp_discon_req_ep *)(msg->data);
 	ep->con_handle = pcb->con_handle;
 	ep->reason = 0x13; /* User Ended Connection */
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_sco_node, msg, pcb->rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_sco_send_lp_discon_req */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Socket interface
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * SCO sockets data input routine
  */
 
 static void
 ng_btsocket_sco_data_input(struct mbuf *m, hook_p hook)
 {
 	ng_hci_scodata_pkt_t		*hdr = NULL;
 	ng_btsocket_sco_pcb_t		*pcb = NULL;
 	ng_btsocket_sco_rtentry_t	*rt = NULL;
 	u_int16_t			 con_handle;
 
 	if (hook == NULL) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Invalid source hook for SCO data packet\n", __func__);
 		goto drop;
 	}
 
 	rt = (ng_btsocket_sco_rtentry_t *) NG_HOOK_PRIVATE(hook);
 	if (rt == NULL) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not find out source bdaddr for SCO data packet\n", __func__);
 		goto drop;
 	}
 
 	/* Make sure we can access header */
 	if (m->m_pkthdr.len < sizeof(*hdr)) {
 		NG_BTSOCKET_SCO_ERR(
 "%s: SCO data packet too small, len=%d\n", __func__, m->m_pkthdr.len);
 		goto drop;
 	}
 
 	if (m->m_len < sizeof(*hdr)) { 
 		m = m_pullup(m, sizeof(*hdr));
 		if (m == NULL)
 			goto drop;
 	}
 
 	/* Strip SCO packet header and verify packet length */
 	hdr = mtod(m, ng_hci_scodata_pkt_t *);
 	m_adj(m, sizeof(*hdr));
 
 	if (hdr->length != m->m_pkthdr.len) {
 		NG_BTSOCKET_SCO_ERR(
 "%s: Bad SCO data packet length, len=%d, length=%d\n",
 			__func__, m->m_pkthdr.len, hdr->length);
 		goto drop;
 	}
 
 	/*
 	 * Now process packet
 	 */
 
 	con_handle = NG_HCI_CON_HANDLE(le16toh(hdr->con_handle));
 
 	NG_BTSOCKET_SCO_INFO(
 "%s: Received SCO data packet: src bdaddr=%x:%x:%x:%x:%x:%x, handle=%d, " \
 "length=%d\n",	__func__,
 		rt->src.b[5], rt->src.b[4], rt->src.b[3],
 		rt->src.b[2], rt->src.b[1], rt->src.b[0],
 		con_handle, hdr->length);
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 	/* Find socket */
 	pcb = ng_btsocket_sco_pcb_by_handle(&rt->src, con_handle);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 		goto drop;
 	}
 
 	/* pcb is locked */
 
 	if (pcb->state != NG_BTSOCKET_SCO_OPEN) {
 		NG_BTSOCKET_SCO_ERR(
 "%s: No connected socket found, src bdaddr=%x:%x:%x:%x:%x:%x, state=%d\n",
 			__func__,
 			rt->src.b[5], rt->src.b[4], rt->src.b[3],
 			rt->src.b[2], rt->src.b[1], rt->src.b[0],
 			pcb->state);
 
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 		goto drop;
 	}
 
 	/* Check if we have enough space in socket receive queue */
 	if (m->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) {
 		NG_BTSOCKET_SCO_ERR(
 "%s: Not enough space in socket receive queue. Dropping SCO data packet, " \
 "src bdaddr=%x:%x:%x:%x:%x:%x, len=%d, space=%ld\n",
 			__func__,
 			rt->src.b[5], rt->src.b[4], rt->src.b[3],
 			rt->src.b[2], rt->src.b[1], rt->src.b[0],
 			m->m_pkthdr.len,
 			sbspace(&pcb->so->so_rcv));
 
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 		goto drop;
 	}
 
 	/* Append packet to the socket receive queue and wakeup */
 	sbappendrecord(&pcb->so->so_rcv, m);
 	m = NULL;
 
 	sorwakeup(pcb->so);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 drop:
 	NG_FREE_M(m); /* checks for m != NULL */
 } /* ng_btsocket_sco_data_input */
 
 /*
  * SCO sockets default message input routine
  */
 
 static void
 ng_btsocket_sco_default_msg_input(struct ng_mesg *msg, hook_p hook)
 {
 	ng_btsocket_sco_rtentry_t	*rt = NULL;
 
 	if (hook == NULL || NG_HOOK_NOT_VALID(hook))
 		return;
 
 	rt = (ng_btsocket_sco_rtentry_t *) NG_HOOK_PRIVATE(hook);
 
 	switch (msg->header.cmd) {
 	case NGM_HCI_NODE_UP: {
 		ng_hci_node_up_ep	*ep = NULL;
 
 		if (msg->header.arglen != sizeof(*ep))
 			break;
 
 		ep = (ng_hci_node_up_ep *)(msg->data);
 		if (bcmp(&ep->bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 			break;
 
 		if (rt == NULL) {
 			rt = malloc(sizeof(*rt),
 				M_NETGRAPH_BTSOCKET_SCO, M_NOWAIT|M_ZERO);
 			if (rt == NULL)
 				break;
 
 			NG_HOOK_SET_PRIVATE(hook, rt);
 
 			mtx_lock(&ng_btsocket_sco_rt_mtx);
 
 			LIST_INSERT_HEAD(&ng_btsocket_sco_rt, rt, next);
 		} else
 			mtx_lock(&ng_btsocket_sco_rt_mtx);
 
 		bcopy(&ep->bdaddr, &rt->src, sizeof(rt->src));
 		rt->pkt_size = (ep->pkt_size == 0)? 60 : ep->pkt_size;
 		rt->num_pkts = ep->num_pkts;
 		rt->hook = hook;
 
 		mtx_unlock(&ng_btsocket_sco_rt_mtx);
 
 		NG_BTSOCKET_SCO_INFO(
 "%s: Updating hook \"%s\", src bdaddr=%x:%x:%x:%x:%x:%x, pkt_size=%d, " \
 "num_pkts=%d\n",	__func__, NG_HOOK_NAME(hook), 
 			rt->src.b[5], rt->src.b[4], rt->src.b[3], 
 			rt->src.b[2], rt->src.b[1], rt->src.b[0],
 			rt->pkt_size, rt->num_pkts);
 		} break;
 
 	case NGM_HCI_SYNC_CON_QUEUE: {
 		ng_hci_sync_con_queue_ep	*ep = NULL;
 		ng_btsocket_sco_pcb_t		*pcb = NULL;
 
 		if (rt == NULL || msg->header.arglen != sizeof(*ep))
 			break;
 
 		ep = (ng_hci_sync_con_queue_ep *)(msg->data);
 
 		rt->pending -= ep->completed;
 		if (rt->pending < 0) {
 			NG_BTSOCKET_SCO_WARN(
 "%s: Pending packet counter is out of sync! bdaddr=%x:%x:%x:%x:%x:%x, " \
 "handle=%d, pending=%d, completed=%d\n",
 				__func__,
 				rt->src.b[5], rt->src.b[4], rt->src.b[3],
 				rt->src.b[2], rt->src.b[1], rt->src.b[0],
 				ep->con_handle, rt->pending,
 				ep->completed);
 
 			rt->pending = 0;
 		}
 
 		mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 		/* Find socket */
 		pcb = ng_btsocket_sco_pcb_by_handle(&rt->src, ep->con_handle);
 		if (pcb == NULL) {
 			mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 			break;
 		}
 
 		/* pcb is locked */
 
 		/* Check state */
 		if (pcb->state == NG_BTSOCKET_SCO_OPEN) {
 			/* Remove timeout */
 			ng_btsocket_sco_untimeout(pcb);
 			
 			/* Drop completed packets from the send queue */
 			for (; ep->completed > 0; ep->completed --)
 				sbdroprecord(&pcb->so->so_snd);
 
 			/* Send more if we have any */
 			if (sbavail(&pcb->so->so_snd) > 0)
 				if (ng_btsocket_sco_send2(pcb) == 0)
 					ng_btsocket_sco_timeout(pcb);
 
 			/* Wake up writers */
 			sowwakeup(pcb->so);
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 	} break;
 
 	default:
 		NG_BTSOCKET_SCO_WARN(
 "%s: Unknown message, cmd=%d\n", __func__, msg->header.cmd);
 		break;
 	}
 
 	NG_FREE_MSG(msg); /* Checks for msg != NULL */
 } /* ng_btsocket_sco_default_msg_input */
 
 /*
  * SCO sockets LP message input routine
  */
 
 static void
 ng_btsocket_sco_lp_msg_input(struct ng_mesg *msg, hook_p hook)
 {
 	ng_btsocket_sco_rtentry_p	 rt = NULL;
 
 	if (hook == NULL) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Invalid source hook for LP message\n", __func__);
 		goto drop;
 	}
 
 	rt = (ng_btsocket_sco_rtentry_p) NG_HOOK_PRIVATE(hook);
 	if (rt == NULL) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not find out source bdaddr for LP message\n", __func__);
 		goto drop;
 	}
 
 	switch (msg->header.cmd) {
 	case NGM_HCI_LP_CON_CFM: /* Connection Confirmation Event */
 		ng_btsocket_sco_process_lp_con_cfm(msg, rt);
 		break;
 
 	case NGM_HCI_LP_CON_IND: /* Connection Indication Event */
 		ng_btsocket_sco_process_lp_con_ind(msg, rt);
 		break;
 
 	case NGM_HCI_LP_DISCON_IND: /* Disconnection Indication Event */
 		ng_btsocket_sco_process_lp_discon_ind(msg, rt);
 		break;
 
 	/* XXX FIXME add other LP messages */
 
 	default:
 		NG_BTSOCKET_SCO_WARN(
 "%s: Unknown LP message, cmd=%d\n", __func__, msg->header.cmd);
 		break;
 	}
 drop:
 	NG_FREE_MSG(msg);
 } /* ng_btsocket_sco_lp_msg_input */
 
 /*
  * SCO sockets input routine
  */
 
 static void
 ng_btsocket_sco_input(void *context, int pending)
 {
 	item_p	item = NULL;
 	hook_p	hook = NULL;
 
 	for (;;) {
 		mtx_lock(&ng_btsocket_sco_queue_mtx);
 		NG_BT_ITEMQ_DEQUEUE(&ng_btsocket_sco_queue, item);
 		mtx_unlock(&ng_btsocket_sco_queue_mtx);
 
 		if (item == NULL)
 			break;
 
 		NGI_GET_HOOK(item, hook);
 		if (hook != NULL && NG_HOOK_NOT_VALID(hook))
 			goto drop;
 
 		switch(item->el_flags & NGQF_TYPE) {
 		case NGQF_DATA: {
 			struct mbuf     *m = NULL;
 
 			NGI_GET_M(item, m);
 			ng_btsocket_sco_data_input(m, hook);
 			} break;
 
 		case NGQF_MESG: {
 			struct ng_mesg  *msg = NULL;
 
 			NGI_GET_MSG(item, msg);
 
 			switch (msg->header.cmd) {
 			case NGM_HCI_LP_CON_CFM:
 			case NGM_HCI_LP_CON_IND:
 			case NGM_HCI_LP_DISCON_IND:
 			/* XXX FIXME add other LP messages */
 				ng_btsocket_sco_lp_msg_input(msg, hook);
 				break;
 
 			default:
 				ng_btsocket_sco_default_msg_input(msg, hook);
 				break;
 			}
 			} break;
 
 		default:
 			KASSERT(0,
 ("%s: invalid item type=%ld\n", __func__, (item->el_flags & NGQF_TYPE)));
 			break;
 		}
 drop:
 		if (hook != NULL)
 			NG_HOOK_UNREF(hook);
 
 		NG_FREE_ITEM(item);
 	}
 } /* ng_btsocket_sco_input */
 
 /*
  * Route cleanup task. Gets scheduled when hook is disconnected. Here we 
  * will find all sockets that use "invalid" hook and disconnect them.
  */
 
 static void
 ng_btsocket_sco_rtclean(void *context, int pending)
 {
 	ng_btsocket_sco_pcb_p		pcb = NULL, pcb_next = NULL;
 	ng_btsocket_sco_rtentry_p	rt = NULL;
 
 	/*
 	 * First disconnect all sockets that use "invalid" hook
 	 */
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 	for(pcb = LIST_FIRST(&ng_btsocket_sco_sockets); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, next);
 
 		if (pcb->rt != NULL &&
 		    pcb->rt->hook != NULL && NG_HOOK_NOT_VALID(pcb->rt->hook)) {
 			if (pcb->flags & NG_BTSOCKET_SCO_TIMO)
 				ng_btsocket_sco_untimeout(pcb);
 
 			pcb->rt = NULL;
 			pcb->so->so_error = ENETDOWN;
 			pcb->state = NG_BTSOCKET_SCO_CLOSED;
 			soisdisconnected(pcb->so);
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 	/*
 	 * Now cleanup routing table
 	 */
 
 	mtx_lock(&ng_btsocket_sco_rt_mtx);
 
 	for (rt = LIST_FIRST(&ng_btsocket_sco_rt); rt != NULL; ) {
 		ng_btsocket_sco_rtentry_p	rt_next = LIST_NEXT(rt, next);
 
 		if (rt->hook != NULL && NG_HOOK_NOT_VALID(rt->hook)) {
 			LIST_REMOVE(rt, next);
 
 			NG_HOOK_SET_PRIVATE(rt->hook, NULL);
 			NG_HOOK_UNREF(rt->hook); /* Remove extra reference */
 
 			bzero(rt, sizeof(*rt));
 			free(rt, M_NETGRAPH_BTSOCKET_SCO);
 		}
 
 		rt = rt_next;
 	}
 
 	mtx_unlock(&ng_btsocket_sco_rt_mtx);
 } /* ng_btsocket_sco_rtclean */
 
 /*
  * Initialize everything
  */
 
 void
 ng_btsocket_sco_init(void)
 {
 	int	error = 0;
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	ng_btsocket_sco_node = NULL;
 	ng_btsocket_sco_debug_level = NG_BTSOCKET_WARN_LEVEL;
 
 	/* Register Netgraph node type */
 	error = ng_newtype(&typestruct);
 	if (error != 0) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not register Netgraph node type, error=%d\n", __func__, error);
 
                 return;
 	}
 
 	/* Create Netgrapg node */
 	error = ng_make_node_common(&typestruct, &ng_btsocket_sco_node);
 	if (error != 0) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not create Netgraph node, error=%d\n", __func__, error);
 
 		ng_btsocket_sco_node = NULL;
 
 		return;
 	}
 
 	error = ng_name_node(ng_btsocket_sco_node, NG_BTSOCKET_SCO_NODE_TYPE);
 	if (error != 0) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not name Netgraph node, error=%d\n", __func__, error);
 
 		NG_NODE_UNREF(ng_btsocket_sco_node);
 		ng_btsocket_sco_node = NULL;
 
 		return;
 	}
 
 	/* Create input queue */
 	NG_BT_ITEMQ_INIT(&ng_btsocket_sco_queue, 300);
 	mtx_init(&ng_btsocket_sco_queue_mtx,
 		"btsocks_sco_queue_mtx", NULL, MTX_DEF);
 	TASK_INIT(&ng_btsocket_sco_queue_task, 0,
 		ng_btsocket_sco_input, NULL);
 
 	/* Create list of sockets */
 	LIST_INIT(&ng_btsocket_sco_sockets);
 	mtx_init(&ng_btsocket_sco_sockets_mtx,
 		"btsocks_sco_sockets_mtx", NULL, MTX_DEF);
 
 	/* Routing table */
 	LIST_INIT(&ng_btsocket_sco_rt);
 	mtx_init(&ng_btsocket_sco_rt_mtx,
 		"btsocks_sco_rt_mtx", NULL, MTX_DEF);
 	TASK_INIT(&ng_btsocket_sco_rt_task, 0,
 		ng_btsocket_sco_rtclean, NULL);
 } /* ng_btsocket_sco_init */
 
 /*
  * Abort connection on socket
  */
 
 void
 ng_btsocket_sco_abort(struct socket *so)
 {
 	so->so_error = ECONNABORTED;
 
 	(void) ng_btsocket_sco_disconnect(so);
 } /* ng_btsocket_sco_abort */
 
 void
 ng_btsocket_sco_close(struct socket *so)
 {
 	(void) ng_btsocket_sco_disconnect(so);
 } /* ng_btsocket_sco_close */
 
 /*
  * Accept connection on socket. Nothing to do here, socket must be connected
  * and ready, so just return peer address and be done with it.
  */
 
 int
 ng_btsocket_sco_accept(struct socket *so, struct sockaddr **nam)
 {
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 
 	return (ng_btsocket_sco_peeraddr(so, nam));
 } /* ng_btsocket_sco_accept */
 
 /*
  * Create and attach new socket
  */
 
 int
 ng_btsocket_sco_attach(struct socket *so, int proto, struct thread *td)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
 	int			error;
 
 	/* Check socket and protocol */
 	if (ng_btsocket_sco_node == NULL) 
 		return (EPROTONOSUPPORT);
 	if (so->so_type != SOCK_SEQPACKET)
 		return (ESOCKTNOSUPPORT);
 
 #if 0 /* XXX sonewconn() calls "pru_attach" with proto == 0 */
 	if (proto != 0) 
 		if (proto != BLUETOOTH_PROTO_SCO)
 			return (EPROTONOSUPPORT);
 #endif /* XXX */
 
 	if (pcb != NULL)
 		return (EISCONN);
 
 	/* Reserve send and receive space if it is not reserved yet */
 	if ((so->so_snd.sb_hiwat == 0) || (so->so_rcv.sb_hiwat == 0)) {
 		error = soreserve(so, NG_BTSOCKET_SCO_SENDSPACE,
 					NG_BTSOCKET_SCO_RECVSPACE);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Allocate the PCB */
         pcb = malloc(sizeof(*pcb),
 		M_NETGRAPH_BTSOCKET_SCO, M_NOWAIT | M_ZERO);
         if (pcb == NULL)
                 return (ENOMEM);
 
 	/* Link the PCB and the socket */
 	so->so_pcb = (caddr_t) pcb;
 	pcb->so = so;
 	pcb->state = NG_BTSOCKET_SCO_CLOSED;
 
 	callout_init(&pcb->timo, 1);
 
 	/*
 	 * Mark PCB mutex as DUPOK to prevent "duplicated lock of
 	 * the same type" message. When accepting new SCO connection 
 	 * ng_btsocket_sco_process_lp_con_ind() holds both PCB mutexes 
 	 * for "old" (accepting) PCB and "new" (created) PCB.
 	 */
 		
 	mtx_init(&pcb->pcb_mtx, "btsocks_sco_pcb_mtx", NULL,
 		MTX_DEF|MTX_DUPOK);
 
 	/*
 	 * Add the PCB to the list
 	 *
 	 * XXX FIXME VERY IMPORTANT!
 	 *
 	 * This is totally FUBAR. We could get here in two cases:
 	 *
 	 * 1) When user calls socket()
 	 * 2) When we need to accept new incoming connection and call
 	 *    sonewconn()
 	 *
 	 * In the first case we must acquire ng_btsocket_sco_sockets_mtx.
 	 * In the second case we hold ng_btsocket_sco_sockets_mtx already.
 	 * So we now need to distinguish between these cases. From reading
 	 * /sys/kern/uipc_socket2.c we can find out that sonewconn() calls
 	 * pru_attach with proto == 0 and td == NULL. For now use this fact
 	 * to figure out if we were called from socket() or from sonewconn().
 	 */
 
 	if (td != NULL)
 		mtx_lock(&ng_btsocket_sco_sockets_mtx);
 	else
 		mtx_assert(&ng_btsocket_sco_sockets_mtx, MA_OWNED);
 
 	LIST_INSERT_HEAD(&ng_btsocket_sco_sockets, pcb, next);
 
 	if (td != NULL)
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
         return (0);
 } /* ng_btsocket_sco_attach */
 
 /*
  * Bind socket
  */
 
 int
 ng_btsocket_sco_bind(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_sco_pcb_t	*pcb = NULL;
 	struct sockaddr_sco	*sa = (struct sockaddr_sco *) nam;
 
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->sco_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->sco_len != sizeof(*sa))
 		return (EINVAL);
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 	/* 
 	 * Check if other socket has this address already (look for exact
 	 * match in bdaddr) and assign socket address if it's available.
 	 */
 
 	if (bcmp(&sa->sco_bdaddr, NG_HCI_BDADDR_ANY, sizeof(sa->sco_bdaddr)) != 0) {
  		LIST_FOREACH(pcb, &ng_btsocket_sco_sockets, next) {
 			mtx_lock(&pcb->pcb_mtx);
 
 			if (bcmp(&pcb->src, &sa->sco_bdaddr, sizeof(bdaddr_t)) == 0) {
 				mtx_unlock(&pcb->pcb_mtx);
 				mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 				return (EADDRINUSE);
 			}
 
 			mtx_unlock(&pcb->pcb_mtx);
 		}
 	}
 
 	pcb = so2sco_pcb(so);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 		return (EINVAL);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 	bcopy(&sa->sco_bdaddr, &pcb->src, sizeof(pcb->src));
 	mtx_unlock(&pcb->pcb_mtx);
 
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_sco_bind */
 
 /*
  * Connect socket
  */
 
 int
 ng_btsocket_sco_connect(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_sco_pcb_t		*pcb = so2sco_pcb(so);
 	struct sockaddr_sco		*sa = (struct sockaddr_sco *) nam;
 	ng_btsocket_sco_rtentry_t	*rt = NULL;
 	int				 have_src, error = 0;
 
 	/* Check socket */
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->sco_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->sco_len != sizeof(*sa))
 		return (EINVAL);
 	if (bcmp(&sa->sco_bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 		return (EDESTADDRREQ);
 
 	/*
 	 * Routing. Socket should be bound to some source address. The source
 	 * address can be ANY. Destination address must be set and it must not
 	 * be ANY. If source address is ANY then find first rtentry that has
 	 * src != dst.
 	 */
 
 	mtx_lock(&ng_btsocket_sco_rt_mtx);
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->state == NG_BTSOCKET_SCO_CONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_sco_rt_mtx);
 
 		return (EINPROGRESS);
 	}
 
 	if (bcmp(&sa->sco_bdaddr, &pcb->src, sizeof(pcb->src)) == 0) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_sco_rt_mtx);
 
 		return (EINVAL);
 	}
 
 	/* Send destination address and PSM */
 	bcopy(&sa->sco_bdaddr, &pcb->dst, sizeof(pcb->dst));
 
 	pcb->rt = NULL;
 	have_src = bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src));
 
 	LIST_FOREACH(rt, &ng_btsocket_sco_rt, next) {
 		if (rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook))
 			continue;
 
 		/* Match src and dst */
 		if (have_src) {
 			if (bcmp(&pcb->src, &rt->src, sizeof(rt->src)) == 0)
 				break;
 		} else {
 			if (bcmp(&pcb->dst, &rt->src, sizeof(rt->src)) != 0)
 				break;
 		}
 	}
 
 	if (rt != NULL) {
 		pcb->rt = rt;
 
 		if (!have_src)
 			bcopy(&rt->src, &pcb->src, sizeof(pcb->src));
 	} else
 		error = EHOSTUNREACH;
 
 	/*
 	 * Send LP_Connect request 
 	 */
 
 	if (error == 0) {	
 		error = ng_btsocket_sco_send_lp_con_req(pcb);
 		if (error == 0) {
 			pcb->flags |= NG_BTSOCKET_SCO_CLIENT;
 			pcb->state = NG_BTSOCKET_SCO_CONNECTING;
 			soisconnecting(pcb->so);
 
 			ng_btsocket_sco_timeout(pcb);
 		}
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_sco_rt_mtx);
 
 	return (error);
 } /* ng_btsocket_sco_connect */
 
 /*
  * Process ioctl's calls on socket
  */
 
 int
 ng_btsocket_sco_control(struct socket *so, u_long cmd, caddr_t data,
 		struct ifnet *ifp, struct thread *td)
 {
 	return (EINVAL);
 } /* ng_btsocket_sco_control */
 
 /*
  * Process getsockopt/setsockopt system calls
  */
 
 int
 ng_btsocket_sco_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
         int			error, tmp;
 
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 	if (pcb == NULL)
 		return (EINVAL);
 
 	if (sopt->sopt_level != SOL_SCO)
 		return (0);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		if (pcb->state != NG_BTSOCKET_SCO_OPEN) {
 			error = ENOTCONN;
 			break;
 		}
 		
 		switch (sopt->sopt_name) {
 		case SO_SCO_MTU:
 			tmp = pcb->rt->pkt_size;
 			error = sooptcopyout(sopt, &tmp, sizeof(tmp));
 			break;
 
 		case SO_SCO_CONNINFO:
 			tmp = pcb->con_handle;
 			error = sooptcopyout(sopt, &tmp, sizeof(tmp));
 			break;
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		error = ENOPROTOOPT;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (error);
 } /* ng_btsocket_sco_ctloutput */
 
 /*
  * Detach and destroy socket
  */
 
 void
 ng_btsocket_sco_detach(struct socket *so)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
 
 	KASSERT(pcb != NULL, ("ng_btsocket_sco_detach: pcb == NULL"));
 
 	if (ng_btsocket_sco_node == NULL) 
 		return;
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->flags & NG_BTSOCKET_SCO_TIMO)
 		ng_btsocket_sco_untimeout(pcb);
 
 	if (pcb->state == NG_BTSOCKET_SCO_OPEN)
 		ng_btsocket_sco_send_lp_discon_req(pcb);
 
 	pcb->state = NG_BTSOCKET_SCO_CLOSED;
 
 	LIST_REMOVE(pcb, next);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 	mtx_destroy(&pcb->pcb_mtx);
 	bzero(pcb, sizeof(*pcb));
 	free(pcb, M_NETGRAPH_BTSOCKET_SCO);
 
 	soisdisconnected(so);
 	so->so_pcb = NULL;
 } /* ng_btsocket_sco_detach */
 
 /*
  * Disconnect socket
  */
 
 int
 ng_btsocket_sco_disconnect(struct socket *so)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->state == NG_BTSOCKET_SCO_DISCONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 
 		return (EINPROGRESS);
 	}
 
 	if (pcb->flags & NG_BTSOCKET_SCO_TIMO)
 		ng_btsocket_sco_untimeout(pcb);
 
 	if (pcb->state == NG_BTSOCKET_SCO_OPEN) {
 		ng_btsocket_sco_send_lp_discon_req(pcb);
 
 		pcb->state = NG_BTSOCKET_SCO_DISCONNECTING;
 		soisdisconnecting(so);
 
 		ng_btsocket_sco_timeout(pcb);
 	} else {
 		pcb->state = NG_BTSOCKET_SCO_CLOSED;
 		soisdisconnected(so);
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (0);
 } /* ng_btsocket_sco_disconnect */
 
 /*
  * Listen on socket
  */
 
 int
 ng_btsocket_sco_listen(struct socket *so, int backlog, struct thread *td)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
 	int			error;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_sco_node == NULL)
 		return (EINVAL);
 
 	SOCK_LOCK(so);
 	mtx_lock(&pcb->pcb_mtx);
 
 	error = solisten_proto_check(so);
 	if (error != 0)
 		goto out;
 #if 0
 	if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) {
 		error = EDESTADDRREQ;
 		goto out;
 	}
 #endif
 	solisten_proto(so, backlog);
 out:
 	mtx_unlock(&pcb->pcb_mtx);
 	SOCK_UNLOCK(so);
 
 	return (error);
 } /* ng_btsocket_listen */
 
 /*
  * Get peer address
  */
 
 int
 ng_btsocket_sco_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
 	struct sockaddr_sco	sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 	bcopy(&pcb->dst, &sa.sco_bdaddr, sizeof(sa.sco_bdaddr));
 	mtx_unlock(&pcb->pcb_mtx);
 
 	sa.sco_len = sizeof(sa);
 	sa.sco_family = AF_BLUETOOTH;
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_sco_peeraddr */
 
 /*
  * Send data to socket
  */
 
 int
 ng_btsocket_sco_send(struct socket *so, int flags, struct mbuf *m,
 		struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	ng_btsocket_sco_pcb_t	*pcb = so2sco_pcb(so);
 	int			 error = 0;
                         
 	if (ng_btsocket_sco_node == NULL) {
 		error = ENETDOWN;
 		goto drop;
 	}
 
 	/* Check socket and input */
 	if (pcb == NULL || m == NULL || control != NULL) {
 		error = EINVAL;
 		goto drop;
 	}
                  
 	mtx_lock(&pcb->pcb_mtx);
                   
 	/* Make sure socket is connected */
 	if (pcb->state != NG_BTSOCKET_SCO_OPEN) {
 		mtx_unlock(&pcb->pcb_mtx); 
 		error = ENOTCONN;
 		goto drop;
 	}
 
 	/* Check route */
 	if (pcb->rt == NULL ||
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) {
 		mtx_unlock(&pcb->pcb_mtx);
 		error = ENETDOWN;
 		goto drop;
 	}
 
 	/* Check packet size */
 	if (m->m_pkthdr.len > pcb->rt->pkt_size) {
 		NG_BTSOCKET_SCO_ERR(
 "%s: Packet too big, len=%d, pkt_size=%d\n",
 			__func__, m->m_pkthdr.len, pcb->rt->pkt_size);
 
 		mtx_unlock(&pcb->pcb_mtx);
 		error = EMSGSIZE;
 		goto drop;
 	}
 
 	/*
 	 * First put packet on socket send queue. Then check if we have
 	 * pending timeout. If we do not have timeout then we must send
 	 * packet and schedule timeout. Otherwise do nothing and wait for
 	 * NGM_HCI_SYNC_CON_QUEUE message.
 	 */
 
 	sbappendrecord(&pcb->so->so_snd, m);
 	m = NULL;
 
 	if (!(pcb->flags & NG_BTSOCKET_SCO_TIMO)) {
 		error = ng_btsocket_sco_send2(pcb);
 		if (error == 0)
 			ng_btsocket_sco_timeout(pcb);
 		else
 			sbdroprecord(&pcb->so->so_snd); /* XXX */
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 drop:
 	NG_FREE_M(m); /* checks for != NULL */
 	NG_FREE_M(control);
 
 	return (error);
 } /* ng_btsocket_sco_send */
 
 /*
  * Send first packet in the socket queue to the SCO layer
  */
 
 static int
 ng_btsocket_sco_send2(ng_btsocket_sco_pcb_p pcb)
 {
 	struct  mbuf		*m = NULL;
 	ng_hci_scodata_pkt_t	*hdr = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	while (pcb->rt->pending < pcb->rt->num_pkts &&
 	       sbavail(&pcb->so->so_snd) > 0) {
 		/* Get a copy of the first packet on send queue */
 		m = m_dup(pcb->so->so_snd.sb_mb, M_NOWAIT);
 		if (m == NULL) {
 			error = ENOBUFS;
 			break;
 		}
 
 		/* Create SCO packet header */
 		M_PREPEND(m, sizeof(*hdr), M_NOWAIT);
 		if (m != NULL)
 			if (m->m_len < sizeof(*hdr))
 				m = m_pullup(m, sizeof(*hdr));
 
 		if (m == NULL) {
 			error = ENOBUFS;
 			break;
 		}
 
 		/* Fill in the header */
 		hdr = mtod(m, ng_hci_scodata_pkt_t *);
 		hdr->type = NG_HCI_SCO_DATA_PKT;
 		hdr->con_handle = htole16(NG_HCI_MK_CON_HANDLE(pcb->con_handle, 0, 0));
 		hdr->length = m->m_pkthdr.len - sizeof(*hdr);
 
 		/* Send packet */
 		NG_SEND_DATA_ONLY(error, pcb->rt->hook, m);
 		if (error != 0)
 			break;
 
 		pcb->rt->pending ++;
 	}
 
 	return ((pcb->rt->pending > 0)? 0 : error);
 } /* ng_btsocket_sco_send2 */
 
 /*
  * Get socket address
  */
 
 int
 ng_btsocket_sco_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
 	struct sockaddr_sco	sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 	bcopy(&pcb->src, &sa.sco_bdaddr, sizeof(sa.sco_bdaddr));
 	mtx_unlock(&pcb->pcb_mtx);
 
 	sa.sco_len = sizeof(sa);
 	sa.sco_family = AF_BLUETOOTH;
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_sco_sockaddr */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Misc. functions
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Look for the socket that listens on given bdaddr.
  * Returns exact or close match (if any).
  * Caller must hold ng_btsocket_sco_sockets_mtx.
  * Returns with locked pcb.
  */
 
 static ng_btsocket_sco_pcb_p
 ng_btsocket_sco_pcb_by_addr(bdaddr_p bdaddr)
 {
 	ng_btsocket_sco_pcb_p	p = NULL, p1 = NULL;
 
 	mtx_assert(&ng_btsocket_sco_sockets_mtx, MA_OWNED);
 
 	LIST_FOREACH(p, &ng_btsocket_sco_sockets, next) {
 		mtx_lock(&p->pcb_mtx);
 
-		if (p->so == NULL || !(p->so->so_options & SO_ACCEPTCONN)) {
+		if (p->so == NULL || !SOLISTENING(p->so)) {
 			mtx_unlock(&p->pcb_mtx);
 			continue;
 		}
 
 		if (bcmp(&p->src, bdaddr, sizeof(p->src)) == 0)
 			return (p); /* return with locked pcb */
 
 		if (bcmp(&p->src, NG_HCI_BDADDR_ANY, sizeof(p->src)) == 0)
 			p1 = p;
 
 		mtx_unlock(&p->pcb_mtx);
 	}
 
 	if (p1 != NULL)
 		mtx_lock(&p1->pcb_mtx);
 
 	return (p1);
 } /* ng_btsocket_sco_pcb_by_addr */
 
 /*
  * Look for the socket that assigned to given source address and handle.
  * Caller must hold ng_btsocket_sco_sockets_mtx.
  * Returns with locked pcb.
  */
 
 static ng_btsocket_sco_pcb_p
 ng_btsocket_sco_pcb_by_handle(bdaddr_p src, int con_handle)
 {
 	ng_btsocket_sco_pcb_p	p = NULL;
 
 	mtx_assert(&ng_btsocket_sco_sockets_mtx, MA_OWNED);
 
 	LIST_FOREACH(p, &ng_btsocket_sco_sockets, next) {
 		mtx_lock(&p->pcb_mtx);
 
 		if (p->con_handle == con_handle &&
 		    bcmp(src, &p->src, sizeof(p->src)) == 0)
 			return (p); /* return with locked pcb */
 
 		mtx_unlock(&p->pcb_mtx);
 	}
 
 	return (NULL);
 } /* ng_btsocket_sco_pcb_by_handle */
 
 /*
  * Look for the socket in CONNECTING state with given source and destination
  * addresses. Caller must hold ng_btsocket_sco_sockets_mtx.
  * Returns with locked pcb.
  */
 
 static ng_btsocket_sco_pcb_p
 ng_btsocket_sco_pcb_by_addrs(bdaddr_p src, bdaddr_p dst)
 {
 	ng_btsocket_sco_pcb_p	p = NULL;
 
 	mtx_assert(&ng_btsocket_sco_sockets_mtx, MA_OWNED);
 
 	LIST_FOREACH(p, &ng_btsocket_sco_sockets, next) {
 		mtx_lock(&p->pcb_mtx);
 
 		if (p->state == NG_BTSOCKET_SCO_CONNECTING &&
 		    bcmp(src, &p->src, sizeof(p->src)) == 0 &&
 		    bcmp(dst, &p->dst, sizeof(p->dst)) == 0)
 			return (p); /* return with locked pcb */
 
 		mtx_unlock(&p->pcb_mtx);
 	}
 
 	return (NULL);
 } /* ng_btsocket_sco_pcb_by_addrs */
 
 /*
  * Set timeout on socket
  */
 
 static void
 ng_btsocket_sco_timeout(ng_btsocket_sco_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (!(pcb->flags & NG_BTSOCKET_SCO_TIMO)) {
 		pcb->flags |= NG_BTSOCKET_SCO_TIMO;
 		callout_reset(&pcb->timo, bluetooth_sco_rtx_timeout(),
 					ng_btsocket_sco_process_timeout, pcb);
 	} else
 		KASSERT(0,
 ("%s: Duplicated socket timeout?!\n", __func__));
 } /* ng_btsocket_sco_timeout */
 
 /*
  * Unset timeout on socket
  */
 
 static void
 ng_btsocket_sco_untimeout(ng_btsocket_sco_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_SCO_TIMO) {
 		callout_stop(&pcb->timo);
 		pcb->flags &= ~NG_BTSOCKET_SCO_TIMO;
 	} else
 		KASSERT(0,
 ("%s: No socket timeout?!\n", __func__));
 } /* ng_btsocket_sco_untimeout */
 
 /*
  * Process timeout on socket
  */
 
 static void
 ng_btsocket_sco_process_timeout(void *xpcb)
 {
 	ng_btsocket_sco_pcb_p	 pcb = (ng_btsocket_sco_pcb_p) xpcb;
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	pcb->flags &= ~NG_BTSOCKET_SCO_TIMO;
 	pcb->so->so_error = ETIMEDOUT;
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_SCO_CONNECTING:
 		/* Connect timeout - close the socket */
 		pcb->state = NG_BTSOCKET_SCO_CLOSED;
 		soisdisconnected(pcb->so);
 		break;
 
 	case NG_BTSOCKET_SCO_OPEN:
 		/* Send timeout - did not get NGM_HCI_SYNC_CON_QUEUE */
 		sbdroprecord(&pcb->so->so_snd);
 		sowwakeup(pcb->so);
 		/* XXX FIXME what to do with pcb->rt->pending??? */
 		break;
 
 	case NG_BTSOCKET_SCO_DISCONNECTING:
 		/* Disconnect timeout - disconnect the socket anyway */
 		pcb->state = NG_BTSOCKET_SCO_CLOSED;
 		soisdisconnected(pcb->so);
 		break;
 
 	default:
 		NG_BTSOCKET_SCO_ERR(
 "%s: Invalid socket state=%d\n", __func__, pcb->state);
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 } /* ng_btsocket_sco_process_timeout */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index e71a11bdef05..1d685b43697f 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -1,4084 +1,4083 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 2007-2008,2010
  *	Swinburne University of Technology, Melbourne, Australia.
  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
  * Copyright (c) 2010 The FreeBSD Foundation
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * Portions of this software were developed at the Centre for Advanced Internet
  * Architectures, Swinburne University of Technology, by Lawrence Stewart,
  * James Healy and David Hayes, made possible in part by a grant from the Cisco
  * University Research Program Fund at Community Foundation Silicon Valley.
  *
  * Portions of this software were developed at the Centre for Advanced
  * Internet Architectures, Swinburne University of Technology, Melbourne,
  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/arb.h>
 #include <sys/kernel.h>
 #ifdef TCP_HHOOK
 #include <sys/hhook.h>
 #endif
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/protosw.h>
 #include <sys/qmath.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/stats.h>
 
 #include <machine/cpu.h>	/* before tcp_seq.h, for tcp_random18() */
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #define TCPSTATES		/* for logging */
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>	/* required for icmp_var.h */
 #include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/tcp6_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_fastopen.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #include <netinet/tcp_syncache.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif /* TCPDEBUG */
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 #include <netinet/udp.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 const int tcprexmtthresh = 3;
 
 VNET_DEFINE(int, tcp_log_in_vain) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_log_in_vain), 0,
     "Log all incoming TCP segments to closed ports");
 
 VNET_DEFINE(int, blackhole) = 0;
 #define	V_blackhole		VNET(blackhole)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(blackhole), 0,
     "Do not send RST on segments to closed ports");
 
 VNET_DEFINE(int, tcp_delack_enabled) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_delack_enabled), 0,
     "Delay ACK to try and piggyback it onto a data packet");
 
 VNET_DEFINE(int, drop_synfin) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(drop_synfin), 0,
     "Drop TCP packets with SYN+FIN set");
 
 VNET_DEFINE(int, tcp_do_prr_conservative) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_prr_conservative), 0,
     "Do conservative Proportional Rate Reduction");
 
 VNET_DEFINE(int, tcp_do_prr) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_prr), 1,
     "Enable Proportional Rate Reduction per RFC 6937");
 
 VNET_DEFINE(int, tcp_do_lrd) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_lrd, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_lrd), 1,
     "Perform Lost Retransmission Detection");
 
 VNET_DEFINE(int, tcp_do_newcwv) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_newcwv), 0,
     "Enable New Congestion Window Validation per RFC7661");
 
 VNET_DEFINE(int, tcp_do_rfc3042) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc3042), 0,
     "Enable RFC 3042 (Limited Transmit)");
 
 VNET_DEFINE(int, tcp_do_rfc3390) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc3390), 0,
     "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
 
 VNET_DEFINE(int, tcp_initcwnd_segments) = 10;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, initcwnd_segments,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_initcwnd_segments), 0,
     "Slow-start flight size (initial congestion window) in number of segments");
 
 VNET_DEFINE(int, tcp_do_rfc3465) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc3465), 0,
     "Enable RFC 3465 (Appropriate Byte Counting)");
 
 VNET_DEFINE(int, tcp_abc_l_var) = 2;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_abc_l_var), 2,
     "Cap the max cwnd increment during slow-start to this number of segments");
 
 static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "TCP ECN");
 
 VNET_DEFINE(int, tcp_do_ecn) = 2;
 SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_ecn), 0,
     "TCP ECN support");
 
 VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
 SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_ecn_maxretries), 0,
     "Max retries before giving up on ECN");
 
 VNET_DEFINE(int, tcp_insecure_syn) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_syn, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_insecure_syn), 0,
     "Follow RFC793 instead of RFC5961 criteria for accepting SYN packets");
 
 VNET_DEFINE(int, tcp_insecure_rst) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_insecure_rst), 0,
     "Follow RFC793 instead of RFC5961 criteria for accepting RST packets");
 
 VNET_DEFINE(int, tcp_recvspace) = 1024*64;
 #define	V_tcp_recvspace	VNET(tcp_recvspace)
 SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_recvspace), 0, "Initial receive socket buffer size");
 
 VNET_DEFINE(int, tcp_do_autorcvbuf) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_autorcvbuf), 0,
     "Enable automatic receive buffer sizing");
 
 VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_autorcvbuf_max), 0,
     "Max size of automatic receive buffer");
 
 VNET_DEFINE(struct inpcbhead, tcb);
 #define	tcb6	tcb  /* for KAME src sync over BSD*'s */
 VNET_DEFINE(struct inpcbinfo, tcbinfo);
 
 /*
  * TCP statistics are stored in an array of counter(9)s, which size matches
  * size of struct tcpstat.  TCP running connection count is a regular array.
  */
 VNET_PCPUSTAT_DEFINE(struct tcpstat, tcpstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_tcp, TCPCTL_STATS, stats, struct tcpstat,
     tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
 VNET_DEFINE(counter_u64_t, tcps_states[TCP_NSTATES]);
 SYSCTL_COUNTER_U64_ARRAY(_net_inet_tcp, TCPCTL_STATES, states, CTLFLAG_RD |
     CTLFLAG_VNET, &VNET_NAME(tcps_states)[0], TCP_NSTATES,
     "TCP connection counts by TCP state");
 
 static void
 tcp_vnet_init(const void *unused)
 {
 
 	COUNTER_ARRAY_ALLOC(V_tcps_states, TCP_NSTATES, M_WAITOK);
 	VNET_PCPUSTAT_ALLOC(tcpstat, M_WAITOK);
 }
 VNET_SYSINIT(tcp_vnet_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     tcp_vnet_init, NULL);
 
 #ifdef VIMAGE
 static void
 tcp_vnet_uninit(const void *unused)
 {
 
 	COUNTER_ARRAY_FREE(V_tcps_states, TCP_NSTATES);
 	VNET_PCPUSTAT_FREE(tcpstat);
 }
 VNET_SYSUNINIT(tcp_vnet_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     tcp_vnet_uninit, NULL);
 #endif /* VIMAGE */
 
 /*
  * Kernel module interface for updating tcpstat.  The first argument is an index
  * into tcpstat treated as an array.
  */
 void
 kmod_tcpstat_add(int statnum, int val)
 {
 
 	counter_u64_add(VNET(tcpstat)[statnum], val);
 }
 
 #ifdef TCP_HHOOK
 /*
  * Wrapper for the TCP established input helper hook.
  */
 void
 hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
 {
 	struct tcp_hhook_data hhook_data;
 
 	if (V_tcp_hhh[HHOOK_TCP_EST_IN]->hhh_nhooks > 0) {
 		hhook_data.tp = tp;
 		hhook_data.th = th;
 		hhook_data.to = to;
 
 		hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_IN], &hhook_data,
 		    tp->osd);
 	}
 }
 #endif
 
 /*
  * CC wrapper hook functions
  */
 void
 cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
     uint16_t type)
 {
 #ifdef STATS
 	int32_t gput;
 #endif
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tp->ccv->nsegs = nsegs;
 	tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th);
 	if ((!V_tcp_do_newcwv && (tp->snd_cwnd <= tp->snd_wnd)) ||
 	    (V_tcp_do_newcwv && (tp->snd_cwnd <= tp->snd_wnd) &&
 	     (tp->snd_cwnd < (tcp_compute_pipe(tp) * 2))))
 		tp->ccv->flags |= CCF_CWND_LIMITED;
 	else
 		tp->ccv->flags &= ~CCF_CWND_LIMITED;
 
 	if (type == CC_ACK) {
 #ifdef STATS
 		stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
 		    ((int32_t)tp->snd_cwnd) - tp->snd_wnd);
 		if (!IN_RECOVERY(tp->t_flags))
 			stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_ACKLEN,
 			   tp->ccv->bytes_this_ack / (tcp_maxseg(tp) * nsegs));
 		if ((tp->t_flags & TF_GPUTINPROG) &&
 		    SEQ_GEQ(th->th_ack, tp->gput_ack)) {
 			/*
 			 * Compute goodput in bits per millisecond.
 			 */
 			gput = (((int64_t)(th->th_ack - tp->gput_seq)) << 3) /
 			    max(1, tcp_ts_getticks() - tp->gput_ts);
 			stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT,
 			    gput);
 			/*
 			 * XXXLAS: This is a temporary hack, and should be
 			 * chained off VOI_TCP_GPUT when stats(9) grows an API
 			 * to deal with chained VOIs.
 			 */
 			if (tp->t_stats_gput_prev > 0)
 				stats_voi_update_abs_s32(tp->t_stats,
 				    VOI_TCP_GPUT_ND,
 				    ((gput - tp->t_stats_gput_prev) * 100) /
 				    tp->t_stats_gput_prev);
 			tp->t_flags &= ~TF_GPUTINPROG;
 			tp->t_stats_gput_prev = gput;
 		}
 #endif /* STATS */
 		if (tp->snd_cwnd > tp->snd_ssthresh) {
 			tp->t_bytes_acked += tp->ccv->bytes_this_ack;
 			if (tp->t_bytes_acked >= tp->snd_cwnd) {
 				tp->t_bytes_acked -= tp->snd_cwnd;
 				tp->ccv->flags |= CCF_ABC_SENTAWND;
 			}
 		} else {
 				tp->ccv->flags &= ~CCF_ABC_SENTAWND;
 				tp->t_bytes_acked = 0;
 		}
 	}
 
 	if (CC_ALGO(tp)->ack_received != NULL) {
 		/* XXXLAS: Find a way to live without this */
 		tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->ack_received(tp->ccv, type);
 	}
 #ifdef STATS
 	stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
 #endif
 }
 
 void
 cc_conn_init(struct tcpcb *tp)
 {
 	struct hc_metrics_lite metrics;
 	struct inpcb *inp = tp->t_inpcb;
 	u_int maxseg;
 	int rtt;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tcp_hc_get(&inp->inp_inc, &metrics);
 	maxseg = tcp_maxseg(tp);
 
 	if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
 		tp->t_srtt = rtt;
 		tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE;
 		TCPSTAT_INC(tcps_usedrtt);
 		if (metrics.rmx_rttvar) {
 			tp->t_rttvar = metrics.rmx_rttvar;
 			TCPSTAT_INC(tcps_usedrttvar);
 		} else {
 			/* default variation is +- 1 rtt */
 			tp->t_rttvar =
 			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
 		}
 		TCPT_RANGESET(tp->t_rxtcur,
 		    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
 		    tp->t_rttmin, TCPTV_REXMTMAX);
 	}
 	if (metrics.rmx_ssthresh) {
 		/*
 		 * There's some sort of gateway or interface
 		 * buffer limit on the path.  Use this to set
 		 * the slow start threshold, but set the
 		 * threshold to no less than 2*mss.
 		 */
 		tp->snd_ssthresh = max(2 * maxseg, metrics.rmx_ssthresh);
 		TCPSTAT_INC(tcps_usedssthresh);
 	}
 
 	/*
 	 * Set the initial slow-start flight size.
 	 *
 	 * If a SYN or SYN/ACK was lost and retransmitted, we have to
 	 * reduce the initial CWND to one segment as congestion is likely
 	 * requiring us to be cautious.
 	 */
 	if (tp->snd_cwnd == 1)
 		tp->snd_cwnd = maxseg;		/* SYN(-ACK) lost */
 	else
 		tp->snd_cwnd = tcp_compute_initwnd(maxseg);
 
 	if (CC_ALGO(tp)->conn_init != NULL)
 		CC_ALGO(tp)->conn_init(tp->ccv);
 }
 
 void inline
 cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 #ifdef STATS
 	stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type);
 #endif
 
 	switch(type) {
 	case CC_NDUPACK:
 		if (!IN_FASTRECOVERY(tp->t_flags)) {
 			tp->snd_recover = tp->snd_max;
 			if (tp->t_flags2 & TF2_ECN_PERMIT)
 				tp->t_flags2 |= TF2_ECN_SND_CWR;
 		}
 		break;
 	case CC_ECN:
 		if (!IN_CONGRECOVERY(tp->t_flags) ||
 		    /*
 		     * Allow ECN reaction on ACK to CWR, if
 		     * that data segment was also CE marked.
 		     */
 		    SEQ_GEQ(th->th_ack, tp->snd_recover)) {
 			EXIT_CONGRECOVERY(tp->t_flags);
 			TCPSTAT_INC(tcps_ecn_rcwnd);
 			tp->snd_recover = tp->snd_max + 1;
 			if (tp->t_flags2 & TF2_ECN_PERMIT)
 				tp->t_flags2 |= TF2_ECN_SND_CWR;
 		}
 		break;
 	case CC_RTO:
 		tp->t_dupacks = 0;
 		tp->t_bytes_acked = 0;
 		EXIT_RECOVERY(tp->t_flags);
 		if (tp->t_flags2 & TF2_ECN_PERMIT)
 			tp->t_flags2 |= TF2_ECN_SND_CWR;
 		break;
 	case CC_RTO_ERR:
 		TCPSTAT_INC(tcps_sndrexmitbad);
 		/* RTO was unnecessary, so reset everything. */
 		tp->snd_cwnd = tp->snd_cwnd_prev;
 		tp->snd_ssthresh = tp->snd_ssthresh_prev;
 		tp->snd_recover = tp->snd_recover_prev;
 		if (tp->t_flags & TF_WASFRECOVERY)
 			ENTER_FASTRECOVERY(tp->t_flags);
 		if (tp->t_flags & TF_WASCRECOVERY)
 			ENTER_CONGRECOVERY(tp->t_flags);
 		tp->snd_nxt = tp->snd_max;
 		tp->t_flags &= ~TF_PREVVALID;
 		tp->t_badrxtwin = 0;
 		break;
 	}
 
 	if (CC_ALGO(tp)->cong_signal != NULL) {
 		if (th != NULL)
 			tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->cong_signal(tp->ccv, type);
 	}
 }
 
 void inline
 cc_post_recovery(struct tcpcb *tp, struct tcphdr *th)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/* XXXLAS: KASSERT that we're in recovery? */
 
 	if (CC_ALGO(tp)->post_recovery != NULL) {
 		tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->post_recovery(tp->ccv);
 	}
 	/* XXXLAS: EXIT_RECOVERY ? */
 	tp->t_bytes_acked = 0;
 	tp->sackhint.delivered_data = 0;
 	tp->sackhint.prr_out = 0;
 }
 
 /*
  * Indicate whether this ack should be delayed.  We can delay the ack if
  * following conditions are met:
  *	- There is no delayed ack timer in progress.
  *	- Our last ack wasn't a 0-sized window. We never want to delay
  *	  the ack that opens up a 0-sized window.
  *	- LRO wasn't used for this segment. We make sure by checking that the
  *	  segment size is not larger than the MSS.
  */
 #define DELAY_ACK(tp, tlen)						\
 	((!tcp_timer_active(tp, TT_DELACK) &&				\
 	    (tp->t_flags & TF_RXWIN0SENT) == 0) &&			\
 	    (tlen <= tp->t_maxseg) &&					\
 	    (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
 void inline
 cc_ecnpkt_handler_flags(struct tcpcb *tp, uint16_t flags, uint8_t iptos)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (CC_ALGO(tp)->ecnpkt_handler != NULL) {
 		switch (iptos & IPTOS_ECN_MASK) {
 		case IPTOS_ECN_CE:
 			tp->ccv->flags |= CCF_IPHDR_CE;
 			break;
 		case IPTOS_ECN_ECT0:
 			/* FALLTHROUGH */
 		case IPTOS_ECN_ECT1:
 			/* FALLTHROUGH */
 		case IPTOS_ECN_NOTECT:
 			tp->ccv->flags &= ~CCF_IPHDR_CE;
 			break;
 		}
 
 		if (flags & TH_CWR)
 			tp->ccv->flags |= CCF_TCPHDR_CWR;
 		else
 			tp->ccv->flags &= ~CCF_TCPHDR_CWR;
 
 		CC_ALGO(tp)->ecnpkt_handler(tp->ccv);
 
 		if (tp->ccv->flags & CCF_ACKNOW) {
 			tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 			tp->t_flags |= TF_ACKNOW;
 		}
 	}
 }
 
 void inline
 cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
 {
 	cc_ecnpkt_handler_flags(tp, th->th_flags, iptos);
 }
 
 /*
  * TCP input handling is split into multiple parts:
  *   tcp6_input is a thin wrapper around tcp_input for the extended
  *	ip6_protox[] call format in ip6_input
  *   tcp_input handles primary segment validation, inpcb lookup and
  *	SYN processing on listen sockets
  *   tcp_do_segment processes the ACK and text of the segment for
  *	establishing, established and closing connections
  */
 #ifdef INET6
 int
 tcp6_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
 {
 	struct mbuf *m;
 	struct in6_ifaddr *ia6;
 	struct ip6_hdr *ip6;
 
 	m = *mp;
 	if (m->m_len < *offp + sizeof(struct tcphdr)) {
 		m = m_pullup(m, *offp + sizeof(struct tcphdr));
 		if (m == NULL) {
 			*mp = m;
 			TCPSTAT_INC(tcps_rcvshort);
 			return (IPPROTO_DONE);
 		}
 	}
 
 	/*
 	 * draft-itojun-ipv6-tcp-to-anycast
 	 * better place to put this in?
 	 */
 	ip6 = mtod(m, struct ip6_hdr *);
 	ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false);
 	if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) {
 		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
 			    (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
 		*mp = NULL;
 		return (IPPROTO_DONE);
 	}
 
 	*mp = m;
 	return (tcp_input_with_port(mp, offp, proto, port));
 }
 
 int
 tcp6_input(struct mbuf **mp, int *offp, int proto)
 {
 
 	return(tcp6_input_with_port(mp, offp, proto, 0));
 }
 #endif /* INET6 */
 
 int
 tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
 {
 	struct mbuf *m = *mp;
 	struct tcphdr *th = NULL;
 	struct ip *ip = NULL;
 	struct inpcb *inp = NULL;
 	struct tcpcb *tp = NULL;
 	struct socket *so = NULL;
 	u_char *optp = NULL;
 	int off0;
 	int optlen = 0;
 #ifdef INET
 	int len;
 	uint8_t ipttl;
 #endif
 	int tlen = 0, off;
 	int drop_hdrlen;
 	int thflags;
 	int rstreason = 0;	/* For badport_bandlim accounting purposes */
 	int lookupflag;
 	uint8_t iptos;
 	struct m_tag *fwd_tag = NULL;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 	int isipv6;
 #else
 	const void *ip6 = NULL;
 #endif /* INET6 */
 	struct tcpopt to;		/* options in this segment */
 	char *s = NULL;			/* address and port logging */
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 
 	NET_EPOCH_ASSERT();
 
 #ifdef INET6
 	isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
 #endif
 
 	off0 = *offp;
 	m = *mp;
 	*mp = NULL;
 	to.to_flags = 0;
 	TCPSTAT_INC(tcps_rcvtotal);
 
 #ifdef INET6
 	if (isipv6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		th = (struct tcphdr *)((caddr_t)ip6 + off0);
 		tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
 		if (port)
 			goto skip6_csum;
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				th->th_sum = m->m_pkthdr.csum_data;
 			else
 				th->th_sum = in6_cksum_pseudo(ip6, tlen,
 				    IPPROTO_TCP, m->m_pkthdr.csum_data);
 			th->th_sum ^= 0xffff;
 		} else
 			th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen);
 		if (th->th_sum) {
 			TCPSTAT_INC(tcps_rcvbadsum);
 			goto drop;
 		}
 	skip6_csum:
 		/*
 		 * Be proactive about unspecified IPv6 address in source.
 		 * As we use all-zero to indicate unbounded/unconnected pcb,
 		 * unspecified IPv6 address can be used to confuse us.
 		 *
 		 * Note that packets with unspecified IPv6 destination is
 		 * already dropped in ip6_input.
 		 */
 		if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 			/* XXX stat */
 			goto drop;
 		}
 		iptos = IPV6_TRAFFIC_CLASS(ip6);
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		/*
 		 * Get IP and TCP header together in first mbuf.
 		 * Note: IP leaves IP header in first mbuf.
 		 */
 		if (off0 > sizeof (struct ip)) {
 			ip_stripoptions(m);
 			off0 = sizeof(struct ip);
 		}
 		if (m->m_len < sizeof (struct tcpiphdr)) {
 			if ((m = m_pullup(m, sizeof (struct tcpiphdr)))
 			    == NULL) {
 				TCPSTAT_INC(tcps_rcvshort);
 				return (IPPROTO_DONE);
 			}
 		}
 		ip = mtod(m, struct ip *);
 		th = (struct tcphdr *)((caddr_t)ip + off0);
 		tlen = ntohs(ip->ip_len) - off0;
 
 		iptos = ip->ip_tos;
 		if (port)
 			goto skip_csum;
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				th->th_sum = m->m_pkthdr.csum_data;
 			else
 				th->th_sum = in_pseudo(ip->ip_src.s_addr,
 				    ip->ip_dst.s_addr,
 				    htonl(m->m_pkthdr.csum_data + tlen +
 				    IPPROTO_TCP));
 			th->th_sum ^= 0xffff;
 		} else {
 			struct ipovly *ipov = (struct ipovly *)ip;
 
 			/*
 			 * Checksum extended TCP header and data.
 			 */
 			len = off0 + tlen;
 			ipttl = ip->ip_ttl;
 			bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
 			ipov->ih_len = htons(tlen);
 			th->th_sum = in_cksum(m, len);
 			/* Reset length for SDT probes. */
 			ip->ip_len = htons(len);
 			/* Reset TOS bits */
 			ip->ip_tos = iptos;
 			/* Re-initialization for later version check */
 			ip->ip_ttl = ipttl;
 			ip->ip_v = IPVERSION;
 			ip->ip_hl = off0 >> 2;
 		}
 	skip_csum:
 		if (th->th_sum && (port == 0)) {
 			TCPSTAT_INC(tcps_rcvbadsum);
 			goto drop;
 		}
 	}
 #endif /* INET */
 
 	/*
 	 * Check that TCP offset makes sense,
 	 * pull out TCP options and adjust length.		XXX
 	 */
 	off = th->th_off << 2;
 	if (off < sizeof (struct tcphdr) || off > tlen) {
 		TCPSTAT_INC(tcps_rcvbadoff);
 		goto drop;
 	}
 	tlen -= off;	/* tlen is used instead of ti->ti_len */
 	if (off > sizeof (struct tcphdr)) {
 #ifdef INET6
 		if (isipv6) {
 			if (m->m_len < off0 + off) {
 				m = m_pullup(m, off0 + off);
 				if (m == NULL) {
 					TCPSTAT_INC(tcps_rcvshort);
 					return (IPPROTO_DONE);
 				}
 			}
 			ip6 = mtod(m, struct ip6_hdr *);
 			th = (struct tcphdr *)((caddr_t)ip6 + off0);
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			if (m->m_len < sizeof(struct ip) + off) {
 				if ((m = m_pullup(m, sizeof (struct ip) + off))
 				    == NULL) {
 					TCPSTAT_INC(tcps_rcvshort);
 					return (IPPROTO_DONE);
 				}
 				ip = mtod(m, struct ip *);
 				th = (struct tcphdr *)((caddr_t)ip + off0);
 			}
 		}
 #endif
 		optlen = off - sizeof (struct tcphdr);
 		optp = (u_char *)(th + 1);
 	}
 	thflags = th->th_flags;
 
 	/*
 	 * Convert TCP protocol specific fields to host format.
 	 */
 	tcp_fields_to_host(th);
 
 	/*
 	 * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options.
 	 */
 	drop_hdrlen = off0 + off;
 
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
         if (
 #ifdef INET6
 	    (isipv6 && (m->m_flags & M_IP6_NEXTHOP))
 #ifdef INET
 	    || (!isipv6 && (m->m_flags & M_IP_NEXTHOP))
 #endif
 #endif
 #if defined(INET) && !defined(INET6)
 	    (m->m_flags & M_IP_NEXTHOP)
 #endif
 	    )
 		fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 
 	/*
 	 * For initial SYN packets we don't need write lock on matching
 	 * PCB, be it a listening one or a synchronized one.  The packet
 	 * shall not modify its state.
 	 */
 	lookupflag = (thflags & (TH_ACK|TH_SYN)) == TH_SYN ?
 	    INPLOOKUP_RLOCKPCB : INPLOOKUP_WLOCKPCB;
 findpcb:
 #ifdef INET6
 	if (isipv6 && fwd_tag != NULL) {
 		struct sockaddr_in6 *next_hop6;
 
 		next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1);
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
 		inp = in6_pcblookup_mbuf(&V_tcbinfo,
 		    &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport,
 		    lookupflag, m->m_pkthdr.rcvif, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src,
 			    th->th_sport, &next_hop6->sin6_addr,
 			    next_hop6->sin6_port ? ntohs(next_hop6->sin6_port) :
 			    th->th_dport, INPLOOKUP_WILDCARD | lookupflag,
 			    m->m_pkthdr.rcvif);
 		}
 	} else if (isipv6) {
 		inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src,
 		    th->th_sport, &ip6->ip6_dst, th->th_dport,
 		    INPLOOKUP_WILDCARD | lookupflag, m->m_pkthdr.rcvif, m);
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	if (fwd_tag != NULL) {
 		struct sockaddr_in *next_hop;
 
 		next_hop = (struct sockaddr_in *)(fwd_tag+1);
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * already got one like this?
 		 */
 		inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport,
 		    ip->ip_dst, th->th_dport, lookupflag, m->m_pkthdr.rcvif, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in_pcblookup(&V_tcbinfo, ip->ip_src,
 			    th->th_sport, next_hop->sin_addr,
 			    next_hop->sin_port ? ntohs(next_hop->sin_port) :
 			    th->th_dport, INPLOOKUP_WILDCARD | lookupflag,
 			    m->m_pkthdr.rcvif);
 		}
 	} else
 		inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src,
 		    th->th_sport, ip->ip_dst, th->th_dport,
 		    INPLOOKUP_WILDCARD | lookupflag, m->m_pkthdr.rcvif, m);
 #endif /* INET */
 
 	/*
 	 * If the INPCB does not exist then all data in the incoming
 	 * segment is discarded and an appropriate RST is sent back.
 	 * XXX MRT Send RST using which routing table?
 	 */
 	if (inp == NULL) {
 		/*
 		 * Log communication attempts to ports that are not
 		 * in use.
 		 */
 		if ((V_tcp_log_in_vain == 1 && (thflags & TH_SYN)) ||
 		    V_tcp_log_in_vain == 2) {
 			if ((s = tcp_log_vain(NULL, th, (void *)ip, ip6)))
 				log(LOG_INFO, "%s; %s: Connection attempt "
 				    "to closed port\n", s, __func__);
 		}
 		/*
 		 * When blackholing do not respond with a RST but
 		 * completely ignore the segment and drop it.
 		 */
 		if ((V_blackhole == 1 && (thflags & TH_SYN)) ||
 		    V_blackhole == 2)
 			goto dropunlock;
 
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
 	INP_LOCK_ASSERT(inp);
 	/*
 	 * While waiting for inp lock during the lookup, another thread
 	 * can have dropped the inpcb, in which case we need to loop back
 	 * and try to find a new inpcb to deliver to.
 	 */
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_UNLOCK(inp);
 		inp = NULL;
 		goto findpcb;
 	}
 	if ((inp->inp_flowtype == M_HASHTYPE_NONE) &&
 	    (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) &&
-	    ((inp->inp_socket == NULL) ||
-	    (inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) {
+	    ((inp->inp_socket == NULL) || !SOLISTENING(inp->inp_socket))) {
 		inp->inp_flowid = m->m_pkthdr.flowid;
 		inp->inp_flowtype = M_HASHTYPE_GET(m);
 	}
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 #ifdef INET6
 	if (isipv6 && IPSEC_ENABLED(ipv6) &&
 	    IPSEC_CHECK_POLICY(ipv6, m, inp) != 0) {
 		goto dropunlock;
 	}
 #ifdef INET
 	else
 #endif
 #endif /* INET6 */
 #ifdef INET
 	if (IPSEC_ENABLED(ipv4) &&
 	    IPSEC_CHECK_POLICY(ipv4, m, inp) != 0) {
 		goto dropunlock;
 	}
 #endif /* INET */
 #endif /* IPSEC */
 
 	/*
 	 * Check the minimum TTL for socket.
 	 */
 	if (inp->inp_ip_minttl != 0) {
 #ifdef INET6
 		if (isipv6) {
 			if (inp->inp_ip_minttl > ip6->ip6_hlim)
 				goto dropunlock;
 		} else
 #endif
 		if (inp->inp_ip_minttl > ip->ip_ttl)
 			goto dropunlock;
 	}
 
 	/*
 	 * A previous connection in TIMEWAIT state is supposed to catch stray
 	 * or duplicate segments arriving late.  If this segment was a
 	 * legitimate new connection attempt, the old INPCB gets removed and
 	 * we can try again to find a listening socket.
 	 */
 	if (inp->inp_flags & INP_TIMEWAIT) {
 		tcp_dooptions(&to, optp, optlen,
 		    (thflags & TH_SYN) ? TO_SYN : 0);
 		/*
 		 * NB: tcp_twcheck unlocks the INP and frees the mbuf.
 		 */
 		if (tcp_twcheck(inp, &to, th, m, tlen))
 			goto findpcb;
 		return (IPPROTO_DONE);
 	}
 	/*
 	 * The TCPCB may no longer exist if the connection is winding
 	 * down or it is in the CLOSED state.  Either way we drop the
 	 * segment and send an appropriate response.
 	 */
 	tp = intotcpcb(inp);
 	if (tp == NULL || tp->t_state == TCPS_CLOSED) {
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
 
 	if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) {
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE) {
 		tcp_offload_input(tp, m);
 		m = NULL;	/* consumed by the TOE driver */
 		goto dropunlock;
 	}
 #endif
 
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, m))
 		goto dropunlock;
 #endif
 	so = inp->inp_socket;
 	KASSERT(so != NULL, ("%s: so == NULL", __func__));
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG) {
 		ostate = tp->t_state;
 #ifdef INET6
 		if (isipv6) {
 			bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6));
 		} else
 #endif
 			bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip));
 		tcp_savetcp = *th;
 	}
 #endif /* TCPDEBUG */
 	/*
 	 * When the socket is accepting connections (the INPCB is in LISTEN
 	 * state) we look into the SYN cache if this is a new connection
 	 * attempt or the completion of a previous one.
 	 */
-	KASSERT(tp->t_state == TCPS_LISTEN || !(so->so_options & SO_ACCEPTCONN),
+	KASSERT(tp->t_state == TCPS_LISTEN || !SOLISTENING(so),
 	    ("%s: so accepting but tp %p not listening", __func__, tp));
-	if (tp->t_state == TCPS_LISTEN && (so->so_options & SO_ACCEPTCONN)) {
+	if (tp->t_state == TCPS_LISTEN && SOLISTENING(so)) {
 		struct in_conninfo inc;
 
 		bzero(&inc, sizeof(inc));
 #ifdef INET6
 		if (isipv6) {
 			inc.inc_flags |= INC_ISIPV6;
 			if (inp->inp_inc.inc_flags & INC_IPV6MINMTU)
 				inc.inc_flags |= INC_IPV6MINMTU;
 			inc.inc6_faddr = ip6->ip6_src;
 			inc.inc6_laddr = ip6->ip6_dst;
 		} else
 #endif
 		{
 			inc.inc_faddr = ip->ip_src;
 			inc.inc_laddr = ip->ip_dst;
 		}
 		inc.inc_fport = th->th_sport;
 		inc.inc_lport = th->th_dport;
 		inc.inc_fibnum = so->so_fibnum;
 
 		/*
 		 * Check for an existing connection attempt in syncache if
 		 * the flag is only ACK.  A successful lookup creates a new
 		 * socket appended to the listen queue in SYN_RECEIVED state.
 		 */
 		if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
 			/*
 			 * Parse the TCP options here because
 			 * syncookies need access to the reflected
 			 * timestamp.
 			 */
 			tcp_dooptions(&to, optp, optlen, 0);
 			/*
 			 * NB: syncache_expand() doesn't unlock
 			 * inp and tcpinfo locks.
 			 */
 			rstreason = syncache_expand(&inc, &to, th, &so, m, port);
 			if (rstreason < 0) {
 				/*
 				 * A failing TCP MD5 signature comparison
 				 * must result in the segment being dropped
 				 * and must not produce any response back
 				 * to the sender.
 				 */
 				goto dropunlock;
 			} else if (rstreason == 0) {
 				/*
 				 * No syncache entry or ACK was not
 				 * for our SYN/ACK.  Send a RST.
 				 * NB: syncache did its own logging
 				 * of the failure cause.
 				 */
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			}
 tfo_socket_result:
 			if (so == NULL) {
 				/*
 				 * We completed the 3-way handshake
 				 * but could not allocate a socket
 				 * either due to memory shortage,
 				 * listen queue length limits or
 				 * global socket limits.  Send RST
 				 * or wait and have the remote end
 				 * retransmit the ACK for another
 				 * try.
 				 */
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 					log(LOG_DEBUG, "%s; %s: Listen socket: "
 					    "Socket allocation failed due to "
 					    "limits or memory shortage, %s\n",
 					    s, __func__,
 					    V_tcp_sc_rst_sock_fail ?
 					    "sending RST" : "try again");
 				if (V_tcp_sc_rst_sock_fail) {
 					rstreason = BANDLIM_UNLIMITED;
 					goto dropwithreset;
 				} else
 					goto dropunlock;
 			}
 			/*
 			 * Socket is created in state SYN_RECEIVED.
 			 * Unlock the listen socket, lock the newly
 			 * created socket and update the tp variable.
 			 * If we came here via jump to tfo_socket_result,
 			 * then listening socket is read-locked.
 			 */
 			INP_UNLOCK(inp);	/* listen socket */
 			inp = sotoinpcb(so);
 			/*
 			 * New connection inpcb is already locked by
 			 * syncache_expand().
 			 */
 			INP_WLOCK_ASSERT(inp);
 			tp = intotcpcb(inp);
 			KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
 			    ("%s: ", __func__));
 			/*
 			 * Process the segment and the data it
 			 * contains.  tcp_do_segment() consumes
 			 * the mbuf chain and unlocks the inpcb.
 			 */
 			TCP_PROBE5(receive, NULL, tp, m, tp, th);
 			tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
 			    iptos);
 			return (IPPROTO_DONE);
 		}
 		/*
 		 * Segment flag validation for new connection attempts:
 		 *
 		 * Our (SYN|ACK) response was rejected.
 		 * Check with syncache and remove entry to prevent
 		 * retransmits.
 		 *
 		 * NB: syncache_chkrst does its own logging of failure
 		 * causes.
 		 */
 		if (thflags & TH_RST) {
 			syncache_chkrst(&inc, th, m, port);
 			goto dropunlock;
 		}
 		/*
 		 * We can't do anything without SYN.
 		 */
 		if ((thflags & TH_SYN) == 0) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Listen socket: "
 				    "SYN is missing, segment ignored\n",
 				    s, __func__);
 			TCPSTAT_INC(tcps_badsyn);
 			goto dropunlock;
 		}
 		/*
 		 * (SYN|ACK) is bogus on a listen socket.
 		 */
 		if (thflags & TH_ACK) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Listen socket: "
 				    "SYN|ACK invalid, segment rejected\n",
 				    s, __func__);
 			syncache_badack(&inc, port);	/* XXX: Not needed! */
 			TCPSTAT_INC(tcps_badsyn);
 			rstreason = BANDLIM_RST_OPENPORT;
 			goto dropwithreset;
 		}
 		/*
 		 * If the drop_synfin option is enabled, drop all
 		 * segments with both the SYN and FIN bits set.
 		 * This prevents e.g. nmap from identifying the
 		 * TCP/IP stack.
 		 * XXX: Poor reasoning.  nmap has other methods
 		 * and is constantly refining its stack detection
 		 * strategies.
 		 * XXX: This is a violation of the TCP specification
 		 * and was used by RFC1644.
 		 */
 		if ((thflags & TH_FIN) && V_drop_synfin) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Listen socket: "
 				    "SYN|FIN segment ignored (based on "
 				    "sysctl setting)\n", s, __func__);
 			TCPSTAT_INC(tcps_badsyn);
 			goto dropunlock;
 		}
 		/*
 		 * Segment's flags are (SYN) or (SYN|FIN).
 		 *
 		 * TH_PUSH, TH_URG, TH_ECE, TH_CWR are ignored
 		 * as they do not affect the state of the TCP FSM.
 		 * The data pointed to by TH_URG and th_urp is ignored.
 		 */
 		KASSERT((thflags & (TH_RST|TH_ACK)) == 0,
 		    ("%s: Listen socket: TH_RST or TH_ACK set", __func__));
 		KASSERT(thflags & (TH_SYN),
 		    ("%s: Listen socket: TH_SYN not set", __func__));
 		INP_RLOCK_ASSERT(inp);
 #ifdef INET6
 		/*
 		 * If deprecated address is forbidden,
 		 * we do not accept SYN to deprecated interface
 		 * address to prevent any new inbound connection from
 		 * getting established.
 		 * When we do not accept SYN, we send a TCP RST,
 		 * with deprecated source address (instead of dropping
 		 * it).  We compromise it as it is much better for peer
 		 * to send a RST, and RST will be the final packet
 		 * for the exchange.
 		 *
 		 * If we do not forbid deprecated addresses, we accept
 		 * the SYN packet.  RFC2462 does not suggest dropping
 		 * SYN in this case.
 		 * If we decipher RFC2462 5.5.4, it says like this:
 		 * 1. use of deprecated addr with existing
 		 *    communication is okay - "SHOULD continue to be
 		 *    used"
 		 * 2. use of it with new communication:
 		 *   (2a) "SHOULD NOT be used if alternate address
 		 *        with sufficient scope is available"
 		 *   (2b) nothing mentioned otherwise.
 		 * Here we fall into (2b) case as we have no choice in
 		 * our source address selection - we must obey the peer.
 		 *
 		 * The wording in RFC2462 is confusing, and there are
 		 * multiple description text for deprecated address
 		 * handling - worse, they are not exactly the same.
 		 * I believe 5.5.4 is the best one, so we follow 5.5.4.
 		 */
 		if (isipv6 && !V_ip6_use_deprecated) {
 			struct in6_ifaddr *ia6;
 
 			ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false);
 			if (ia6 != NULL &&
 			    (ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt to deprecated "
 					"IPv6 address rejected\n",
 					s, __func__);
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			}
 		}
 #endif /* INET6 */
 		/*
 		 * Basic sanity checks on incoming SYN requests:
 		 *   Don't respond if the destination is a link layer
 		 *	broadcast according to RFC1122 4.2.3.10, p. 104.
 		 *   If it is from this socket it must be forged.
 		 *   Don't respond if the source or destination is a
 		 *	global or subnet broad- or multicast address.
 		 *   Note that it is quite possible to receive unicast
 		 *	link-layer packets with a broadcast IP address. Use
 		 *	in_broadcast() to find them.
 		 */
 		if (m->m_flags & (M_BCAST|M_MCAST)) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 			    log(LOG_DEBUG, "%s; %s: Listen socket: "
 				"Connection attempt from broad- or multicast "
 				"link layer address ignored\n", s, __func__);
 			goto dropunlock;
 		}
 #ifdef INET6
 		if (isipv6) {
 			if (th->th_dport == th->th_sport &&
 			    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt to/from self "
 					"ignored\n", s, __func__);
 				goto dropunlock;
 			}
 			if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 			    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to multicast "
 					"address ignored\n", s, __func__);
 				goto dropunlock;
 			}
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			if (th->th_dport == th->th_sport &&
 			    ip->ip_dst.s_addr == ip->ip_src.s_addr) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to self "
 					"ignored\n", s, __func__);
 				goto dropunlock;
 			}
 			if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 			    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 			    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
 			    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to broad- "
 					"or multicast address ignored\n",
 					s, __func__);
 				goto dropunlock;
 			}
 		}
 #endif
 		/*
 		 * SYN appears to be valid.  Create compressed TCP state
 		 * for syncache.
 		 */
 #ifdef TCPDEBUG
 		if (so->so_options & SO_DEBUG)
 			tcp_trace(TA_INPUT, ostate, tp,
 			    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
 		TCP_PROBE3(debug__input, tp, th, m);
 		tcp_dooptions(&to, optp, optlen, TO_SYN);
 		if ((so = syncache_add(&inc, &to, th, inp, so, m, NULL, NULL,
 		    iptos, port)) != NULL)
 			goto tfo_socket_result;
 
 		/*
 		 * Entry added to syncache and mbuf consumed.
 		 * Only the listen socket is unlocked by syncache_add().
 		 */
 		return (IPPROTO_DONE);
 	} else if (tp->t_state == TCPS_LISTEN) {
 		/*
 		 * When a listen socket is torn down the SO_ACCEPTCONN
 		 * flag is removed first while connections are drained
 		 * from the accept queue in a unlock/lock cycle of the
 		 * ACCEPT_LOCK, opening a race condition allowing a SYN
 		 * attempt go through unhandled.
 		 */
 		goto dropunlock;
 	}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	if (tp->t_flags & TF_SIGNATURE) {
 		tcp_dooptions(&to, optp, optlen, thflags);
 		if ((to.to_flags & TOF_SIGNATURE) == 0) {
 			TCPSTAT_INC(tcps_sig_err_nosigopt);
 			goto dropunlock;
 		}
 		if (!TCPMD5_ENABLED() ||
 		    TCPMD5_INPUT(m, th, to.to_signature) != 0)
 			goto dropunlock;
 	}
 #endif
 	TCP_PROBE5(receive, NULL, tp, m, tp, th);
 
 	/*
 	 * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later
 	 * state.  tcp_do_segment() always consumes the mbuf chain, unlocks
 	 * the inpcb, and unlocks pcbinfo.
 	 *
 	 * XXXGL: in case of a pure SYN arriving on existing connection
 	 * TCP stacks won't need to modify the PCB, they would either drop
 	 * the segment silently, or send a challenge ACK.  However, we try
 	 * to upgrade the lock, because calling convention for stacks is
 	 * write-lock on PCB.  If upgrade fails, drop the SYN.
 	 */
 	if (lookupflag == INPLOOKUP_RLOCKPCB && INP_TRY_UPGRADE(inp) == 0)
 		goto dropunlock;
 
 	tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos);
 	return (IPPROTO_DONE);
 
 dropwithreset:
 	TCP_PROBE5(receive, NULL, tp, m, tp, th);
 
 	if (inp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
 		INP_UNLOCK(inp);
 	} else
 		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
 	m = NULL;	/* mbuf chain got consumed. */
 	goto drop;
 
 dropunlock:
 	if (m != NULL)
 		TCP_PROBE5(receive, NULL, tp, m, tp, th);
 
 	if (inp != NULL)
 		INP_UNLOCK(inp);
 
 drop:
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	if (m != NULL)
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 /*
  * Automatic sizing of receive socket buffer.  Often the send
  * buffer size is not optimally adjusted to the actual network
  * conditions at hand (delay bandwidth product).  Setting the
  * buffer size too small limits throughput on links with high
  * bandwidth and high delay (eg. trans-continental/oceanic links).
  *
  * On the receive side the socket buffer memory is only rarely
  * used to any significant extent.  This allows us to be much
  * more aggressive in scaling the receive socket buffer.  For
  * the case that the buffer space is actually used to a large
  * extent and we run out of kernel memory we can simply drop
  * the new segments; TCP on the sender will just retransmit it
  * later.  Setting the buffer size too big may only consume too
  * much kernel memory if the application doesn't read() from
  * the socket or packet loss or reordering makes use of the
  * reassembly queue.
  *
  * The criteria to step up the receive buffer one notch are:
  *  1. Application has not set receive buffer size with
  *     SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
  *  2. the number of bytes received during 1/2 of an sRTT
  *     is at least 3/8 of the current socket buffer size.
  *  3. receive buffer size has not hit maximal automatic size;
  *
  * If all of the criteria are met we increaset the socket buffer
  * by a 1/2 (bounded by the max). This allows us to keep ahead
  * of slow-start but also makes it so our peer never gets limited
  * by our rwnd which we then open up causing a burst.
  *
  * This algorithm does two steps per RTT at most and only if
  * we receive a bulk stream w/o packet losses or reorderings.
  * Shrinking the buffer during idle times is not necessary as
  * it doesn't consume any memory when idle.
  *
  * TODO: Only step up if the application is actually serving
  * the buffer to better manage the socket buffer resources.
  */
 int
 tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int tlen)
 {
 	int newsize = 0;
 
 	if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) &&
 	    tp->t_srtt != 0 && tp->rfbuf_ts != 0 &&
 	    TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) >
 	    ((tp->t_srtt >> TCP_RTT_SHIFT)/2)) {
 		if (tp->rfbuf_cnt > ((so->so_rcv.sb_hiwat / 2)/ 4 * 3) &&
 		    so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) {
 			newsize = min((so->so_rcv.sb_hiwat + (so->so_rcv.sb_hiwat/2)), V_tcp_autorcvbuf_max);
 		}
 		TCP_PROBE6(receive__autoresize, NULL, tp, m, tp, th, newsize);
 
 		/* Start over with next RTT. */
 		tp->rfbuf_ts = 0;
 		tp->rfbuf_cnt = 0;
 	} else {
 		tp->rfbuf_cnt += tlen;	/* add up */
 	}
 	return (newsize);
 }
 
 int
 tcp_input(struct mbuf **mp, int *offp, int proto)
 {
 	return(tcp_input_with_port(mp, offp, proto, 0));
 }
 
 void
 tcp_handle_wakeup(struct tcpcb *tp, struct socket *so)
 {
 	/*
 	 * Since tp might be gone if the session entered
 	 * the TIME_WAIT state before coming here, we need
 	 * to check if the socket is still connected.
 	 */
 	if (tp == NULL) {
 		return;
 	}
 	if (so == NULL) {
 		return;
 	}
 	INP_LOCK_ASSERT(tp->t_inpcb);
 	if (tp->t_flags & TF_WAKESOR) {
 		tp->t_flags &= ~TF_WAKESOR;
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		sorwakeup_locked(so);
 	}
 }
 
 void
 tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
 {
 	int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
 	int rstreason, todrop, win, incforsyn = 0;
 	uint32_t tiwin;
 	uint16_t nsegs;
 	char *s;
 	struct in_conninfo *inc;
 	struct mbuf *mfree;
 	struct tcpopt to;
 	int tfo_syn;
 	u_int maxseg;
 
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 	thflags = th->th_flags;
 	inc = &tp->t_inpcb->inp_inc;
 	tp->sackhint.last_sack_ack = 0;
 	sack_changed = 0;
 	nsegs = max(1, m->m_pkthdr.lro_nsegs);
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
 	    __func__));
 	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
 	    __func__));
 
 #ifdef TCPPCAP
 	/* Save segment, if requested. */
 	tcp_pcap_add(th, m, &(tp->t_inpkts));
 #endif
 	TCP_LOG_EVENT(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_IN, 0,
 	    tlen, NULL, true);
 
 	if ((thflags & TH_SYN) && (thflags & TH_FIN) && V_drop_synfin) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: "
 			    "SYN|FIN segment ignored (based on "
 			    "sysctl setting)\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 		goto drop;
 	}
 
 	/*
 	 * If a segment with the ACK-bit set arrives in the SYN-SENT state
 	 * check SEQ.ACK first.
 	 */
 	if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) &&
 	    (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) {
 		rstreason = BANDLIM_UNLIMITED;
 		goto dropwithreset;
 	}
 
 	/*
 	 * Segment received on connection.
 	 * Reset idle time and keep-alive timer.
 	 * XXX: This should be done after segment
 	 * validation to ignore broken/spoofed segs.
 	 */
 	tp->t_rcvtime = ticks;
 
 	/*
 	 * Scale up the window into a 32-bit value.
 	 * For the SYN_SENT state the scale is zero.
 	 */
 	tiwin = th->th_win << tp->snd_scale;
 #ifdef STATS
 	stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
 #endif
 
 	/*
 	 * TCP ECN processing.
 	 */
 	if (tp->t_flags2 & TF2_ECN_PERMIT) {
 		if (thflags & TH_CWR) {
 			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
 			tp->t_flags |= TF_ACKNOW;
 		}
 		switch (iptos & IPTOS_ECN_MASK) {
 		case IPTOS_ECN_CE:
 			tp->t_flags2 |= TF2_ECN_SND_ECE;
 			TCPSTAT_INC(tcps_ecn_ce);
 			break;
 		case IPTOS_ECN_ECT0:
 			TCPSTAT_INC(tcps_ecn_ect0);
 			break;
 		case IPTOS_ECN_ECT1:
 			TCPSTAT_INC(tcps_ecn_ect1);
 			break;
 		}
 
 		/* Process a packet differently from RFC3168. */
 		cc_ecnpkt_handler(tp, th, iptos);
 
 		/* Congestion experienced. */
 		if (thflags & TH_ECE) {
 			cc_cong_signal(tp, th, CC_ECN);
 		}
 	}
 
 	/*
 	 * Parse options on any incoming segment.
 	 */
 	tcp_dooptions(&to, (u_char *)(th + 1),
 	    (th->th_off << 2) - sizeof(struct tcphdr),
 	    (thflags & TH_SYN) ? TO_SYN : 0);
 
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	if ((tp->t_flags & TF_SIGNATURE) != 0 &&
 	    (to.to_flags & TOF_SIGNATURE) == 0) {
 		TCPSTAT_INC(tcps_sig_err_sigopt);
 		/* XXX: should drop? */
 	}
 #endif
 	/*
 	 * If echoed timestamp is later than the current time,
 	 * fall back to non RFC1323 RTT calculation.  Normalize
 	 * timestamp if syncookies were used when this connection
 	 * was established.
 	 */
 	if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
 		to.to_tsecr -= tp->ts_offset;
 		if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
 			to.to_tsecr = 0;
 		else if (tp->t_flags & TF_PREVVALID &&
 			 tp->t_badrxtwin != 0 && SEQ_LT(to.to_tsecr, tp->t_badrxtwin))
 			cc_cong_signal(tp, th, CC_RTO_ERR);
 	}
 	/*
 	 * Process options only when we get SYN/ACK back. The SYN case
 	 * for incoming connections is handled in tcp_syncache.
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
 	 * or <SYN,ACK>) segment itself is never scaled.
 	 * XXX this is traditional behavior, may need to be cleaned up.
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 		/* Handle parallel SYN for ECN */
 		if (!(thflags & TH_ACK) &&
 		    ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) &&
 		    ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
 			tp->t_flags2 |= TF2_ECN_PERMIT;
 			tp->t_flags2 |= TF2_ECN_SND_ECE;
 			TCPSTAT_INC(tcps_ecn_shs);
 		}
 		if ((to.to_flags & TOF_SCALE) &&
 		    (tp->t_flags & TF_REQ_SCALE) &&
 		    !(tp->t_flags & TF_NOOPT)) {
 			tp->t_flags |= TF_RCVD_SCALE;
 			tp->snd_scale = to.to_wscale;
 		} else
 			tp->t_flags &= ~TF_REQ_SCALE;
 		/*
 		 * Initial send window.  It will be updated with
 		 * the next incoming segment to the scaled value.
 		 */
 		tp->snd_wnd = th->th_win;
 		if ((to.to_flags & TOF_TS) &&
 		    (tp->t_flags & TF_REQ_TSTMP) &&
 		    !(tp->t_flags & TF_NOOPT)) {
 			tp->t_flags |= TF_RCVD_TSTMP;
 			tp->ts_recent = to.to_tsval;
 			tp->ts_recent_age = tcp_ts_getticks();
 		} else
 			tp->t_flags &= ~TF_REQ_TSTMP;
 		if (to.to_flags & TOF_MSS)
 			tcp_mss(tp, to.to_mss);
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (!(to.to_flags & TOF_SACKPERM) ||
 		    (tp->t_flags & TF_NOOPT)))
 			tp->t_flags &= ~TF_SACK_PERMIT;
 		if (IS_FASTOPEN(tp->t_flags)) {
 			if ((to.to_flags & TOF_FASTOPEN) &&
 			    !(tp->t_flags & TF_NOOPT)) {
 				uint16_t mss;
 
 				if (to.to_flags & TOF_MSS)
 					mss = to.to_mss;
 				else
 					if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
 						mss = TCP6_MSS;
 					else
 						mss = TCP_MSS;
 				tcp_fastopen_update_cache(tp, mss,
 				    to.to_tfo_len, to.to_tfo_cookie);
 			} else
 				tcp_fastopen_disable_path(tp);
 		}
 	}
 
 	/*
 	 * If timestamps were negotiated during SYN/ACK and a
 	 * segment without a timestamp is received, silently drop
 	 * the segment, unless it is a RST segment or missing timestamps are
 	 * tolerated.
 	 * See section 3.2 of RFC 7323.
 	 */
 	if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
 		if (((thflags & TH_RST) != 0) || V_tcp_tolerate_missing_ts) {
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Timestamp missing, "
 				    "segment processed normally\n",
 				    s, __func__);
 				free(s, M_TCPLOG);
 			}
 		} else {
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Timestamp missing, "
 				    "segment silently dropped\n", s, __func__);
 				free(s, M_TCPLOG);
 			}
 			goto drop;
 		}
 	}
 	/*
 	 * If timestamps were not negotiated during SYN/ACK and a
 	 * segment with a timestamp is received, ignore the
 	 * timestamp and process the packet normally.
 	 * See section 3.2 of RFC 7323.
 	 */
 	if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
 			    "segment processed normally\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 	}
 
 	/*
 	 * Header prediction: check for the two common cases
 	 * of a uni-directional data xfer.  If the packet has
 	 * no control flags, is in-sequence, the window didn't
 	 * change and we're not retransmitting, it's a
 	 * candidate.  If the length is zero and the ack moved
 	 * forward, we're the sender side of the xfer.  Just
 	 * free the data acked & wake any higher level process
 	 * that was blocked waiting for space.  If the length
 	 * is non-zero and the ack didn't move, we're the
 	 * receiver side.  If we're getting packets in-order
 	 * (the reassembly queue is empty), add the data to
 	 * the socket buffer and note that we need a delayed ack.
 	 * Make sure that the hidden state-flags are also off.
 	 * Since we check for TCPS_ESTABLISHED first, it can only
 	 * be TH_NEEDSYN.
 	 */
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    th->th_seq == tp->rcv_nxt &&
 	    (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
 	    tp->snd_nxt == tp->snd_max &&
 	    tiwin && tiwin == tp->snd_wnd &&
 	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
 	    SEGQ_EMPTY(tp) &&
 	    ((to.to_flags & TOF_TS) == 0 ||
 	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
 		/*
 		 * If last ACK falls within this segment's sequence numbers,
 		 * record the timestamp.
 		 * NOTE that the test is modified according to the latest
 		 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
 		 */
 		if ((to.to_flags & TOF_TS) != 0 &&
 		    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
 			tp->ts_recent_age = tcp_ts_getticks();
 			tp->ts_recent = to.to_tsval;
 		}
 
 		if (tlen == 0) {
 			if (SEQ_GT(th->th_ack, tp->snd_una) &&
 			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
 			    !IN_RECOVERY(tp->t_flags) &&
 			    (to.to_flags & TOF_SACK) == 0 &&
 			    TAILQ_EMPTY(&tp->snd_holes)) {
 				/*
 				 * This is a pure ack for outstanding data.
 				 */
 				TCPSTAT_INC(tcps_predack);
 
 				/*
 				 * "bad retransmit" recovery without timestamps.
 				 */
 				if ((to.to_flags & TOF_TS) == 0 &&
 				    tp->t_rxtshift == 1 &&
 				    tp->t_flags & TF_PREVVALID &&
 				    (int)(ticks - tp->t_badrxtwin) < 0) {
 					cc_cong_signal(tp, th, CC_RTO_ERR);
 				}
 
 				/*
 				 * Recalculate the transmit timer / rtt.
 				 *
 				 * Some boxes send broken timestamp replies
 				 * during the SYN+ACK phase, ignore
 				 * timestamps of 0 or we could calculate a
 				 * huge RTT and blow up the retransmit timer.
 				 */
 				if ((to.to_flags & TOF_TS) != 0 &&
 				    to.to_tsecr) {
 					uint32_t t;
 
 					t = tcp_ts_getticks() - to.to_tsecr;
 					if (!tp->t_rttlow || tp->t_rttlow > t)
 						tp->t_rttlow = t;
 					tcp_xmit_timer(tp,
 					    TCP_TS_TO_TICKS(t) + 1);
 				} else if (tp->t_rtttime &&
 				    SEQ_GT(th->th_ack, tp->t_rtseq)) {
 					if (!tp->t_rttlow ||
 					    tp->t_rttlow > ticks - tp->t_rtttime)
 						tp->t_rttlow = ticks - tp->t_rtttime;
 					tcp_xmit_timer(tp,
 							ticks - tp->t_rtttime);
 				}
 				acked = BYTES_THIS_ACK(tp, th);
 
 #ifdef TCP_HHOOK
 				/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 				hhook_run_tcp_est_in(tp, th, &to);
 #endif
 
 				TCPSTAT_ADD(tcps_rcvackpack, nsegs);
 				TCPSTAT_ADD(tcps_rcvackbyte, acked);
 				sbdrop(&so->so_snd, acked);
 				if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
 				    SEQ_LEQ(th->th_ack, tp->snd_recover))
 					tp->snd_recover = th->th_ack - 1;
 
 				/*
 				 * Let the congestion control algorithm update
 				 * congestion control related information. This
 				 * typically means increasing the congestion
 				 * window.
 				 */
 				cc_ack_received(tp, th, nsegs, CC_ACK);
 
 				tp->snd_una = th->th_ack;
 				/*
 				 * Pull snd_wl2 up to prevent seq wrap relative
 				 * to th_ack.
 				 */
 				tp->snd_wl2 = th->th_ack;
 				tp->t_dupacks = 0;
 				m_freem(m);
 
 				/*
 				 * If all outstanding data are acked, stop
 				 * retransmit timer, otherwise restart timer
 				 * using current (possibly backed-off) value.
 				 * If process is waiting for space,
 				 * wakeup/selwakeup/signal.  If data
 				 * are ready to send, let tcp_output
 				 * decide between more output or persist.
 				 */
 #ifdef TCPDEBUG
 				if (so->so_options & SO_DEBUG)
 					tcp_trace(TA_INPUT, ostate, tp,
 					    (void *)tcp_saveipgen,
 					    &tcp_savetcp, 0);
 #endif
 				TCP_PROBE3(debug__input, tp, th, m);
 				if (tp->snd_una == tp->snd_max)
 					tcp_timer_activate(tp, TT_REXMT, 0);
 				else if (!tcp_timer_active(tp, TT_PERSIST))
 					tcp_timer_activate(tp, TT_REXMT,
 						      tp->t_rxtcur);
 				sowwakeup(so);
 				if (sbavail(&so->so_snd))
 					(void) tp->t_fb->tfb_tcp_output(tp);
 				goto check_delack;
 			}
 		} else if (th->th_ack == tp->snd_una &&
 		    tlen <= sbspace(&so->so_rcv)) {
 			int newsize = 0;	/* automatic sockbuf scaling */
 
 			/*
 			 * This is a pure, in-sequence data packet with
 			 * nothing on the reassembly queue and we have enough
 			 * buffer space to take it.
 			 */
 			/* Clean receiver SACK report if present */
 			if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
 				tcp_clean_sackreport(tp);
 			TCPSTAT_INC(tcps_preddat);
 			tp->rcv_nxt += tlen;
 			if (tlen &&
 			    ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
 			    (tp->t_fbyte_in == 0)) {
 				tp->t_fbyte_in = ticks;
 				if (tp->t_fbyte_in == 0)
 					tp->t_fbyte_in = 1;
 				if (tp->t_fbyte_out && tp->t_fbyte_in)
 					tp->t_flags2 |= TF2_FBYTES_COMPLETE;
 			}
 			/*
 			 * Pull snd_wl1 up to prevent seq wrap relative to
 			 * th_seq.
 			 */
 			tp->snd_wl1 = th->th_seq;
 			/*
 			 * Pull rcv_up up to prevent seq wrap relative to
 			 * rcv_nxt.
 			 */
 			tp->rcv_up = tp->rcv_nxt;
 			TCPSTAT_ADD(tcps_rcvpack, nsegs);
 			TCPSTAT_ADD(tcps_rcvbyte, tlen);
 #ifdef TCPDEBUG
 			if (so->so_options & SO_DEBUG)
 				tcp_trace(TA_INPUT, ostate, tp,
 				    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
 			TCP_PROBE3(debug__input, tp, th, m);
 
 			newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
 
 			/* Add data to socket buffer. */
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 				m_freem(m);
 			} else {
 				/*
 				 * Set new socket buffer size.
 				 * Give up when limit is reached.
 				 */
 				if (newsize)
 					if (!sbreserve_locked(&so->so_rcv,
 					    newsize, so, NULL))
 						so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
 				m_adj(m, drop_hdrlen);	/* delayed header drop */
 				sbappendstream_locked(&so->so_rcv, m, 0);
 			}
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
 			if (DELAY_ACK(tp, tlen)) {
 				tp->t_flags |= TF_DELACK;
 			} else {
 				tp->t_flags |= TF_ACKNOW;
 				tp->t_fb->tfb_tcp_output(tp);
 			}
 			goto check_delack;
 		}
 	}
 
 	/*
 	 * Calculate amount of space in receive window,
 	 * and then do TCP input processing.
 	 * Receive window is amount of space in rcv queue,
 	 * but not less than advertised window.
 	 */
 	win = sbspace(&so->so_rcv);
 	if (win < 0)
 		win = 0;
 	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
 
 	switch (tp->t_state) {
 	/*
 	 * If the state is SYN_RECEIVED:
 	 *	if seg contains an ACK, but not for our SYN/ACK, send a RST.
 	 */
 	case TCPS_SYN_RECEIVED:
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
 		     SEQ_GT(th->th_ack, tp->snd_max))) {
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 		}
 		if (IS_FASTOPEN(tp->t_flags)) {
 			/*
 			 * When a TFO connection is in SYN_RECEIVED, the
 			 * only valid packets are the initial SYN, a
 			 * retransmit/copy of the initial SYN (possibly with
 			 * a subset of the original data), a valid ACK, a
 			 * FIN, or a RST.
 			 */
 			if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			} else if (thflags & TH_SYN) {
 				/* non-initial SYN is ignored */
 				if ((tcp_timer_active(tp, TT_DELACK) ||
 				     tcp_timer_active(tp, TT_REXMT)))
 					goto drop;
 			} else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) {
 				goto drop;
 			}
 		}
 		break;
 
 	/*
 	 * If the state is SYN_SENT:
 	 *	if seg contains a RST with valid ACK (SEQ.ACK has already
 	 *	    been verified), then drop the connection.
 	 *	if seg contains a RST without an ACK, drop the seg.
 	 *	if seg does not contain SYN, then drop the seg.
 	 * Otherwise this is an acceptable SYN segment
 	 *	initialize tp->rcv_nxt and tp->irs
 	 *	if seg contains ack then advance tp->snd_una
 	 *	if seg contains an ECE and ECN support is enabled, the stream
 	 *	    is ECN capable.
 	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
 	 *	arrange for segment to be acked (eventually)
 	 *	continue processing rest of data/controls, beginning with URG
 	 */
 	case TCPS_SYN_SENT:
 		if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) {
 			TCP_PROBE5(connect__refused, NULL, tp,
 			    m, tp, th);
 			tp = tcp_drop(tp, ECONNREFUSED);
 		}
 		if (thflags & TH_RST)
 			goto drop;
 		if (!(thflags & TH_SYN))
 			goto drop;
 
 		tp->irs = th->th_seq;
 		tcp_rcvseqinit(tp);
 		if (thflags & TH_ACK) {
 			int tfo_partial_ack = 0;
 
 			TCPSTAT_INC(tcps_connects);
 			soisconnected(so);
 #ifdef MAC
 			mac_socketpeer_set_from_mbuf(m, so);
 #endif
 			/* Do window scaling on this connection? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 				tp->rcv_scale = tp->request_r_scale;
 			}
 			tp->rcv_adv += min(tp->rcv_wnd,
 			    TCP_MAXWIN << tp->rcv_scale);
 			tp->snd_una++;		/* SYN is acked */
 			/*
 			 * If not all the data that was sent in the TFO SYN
 			 * has been acked, resend the remainder right away.
 			 */
 			if (IS_FASTOPEN(tp->t_flags) &&
 			    (tp->snd_una != tp->snd_max)) {
 				tp->snd_nxt = th->th_ack;
 				tfo_partial_ack = 1;
 			}
 			/*
 			 * If there's data, delay ACK; if there's also a FIN
 			 * ACKNOW will be turned on later.
 			 */
 			if (DELAY_ACK(tp, tlen) && tlen != 0 && !tfo_partial_ack)
 				tcp_timer_activate(tp, TT_DELACK,
 				    tcp_delacktime);
 			else
 				tp->t_flags |= TF_ACKNOW;
 
 			if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
 			    (V_tcp_do_ecn == 1)) {
 				tp->t_flags2 |= TF2_ECN_PERMIT;
 				TCPSTAT_INC(tcps_ecn_shs);
 			}
 
 			/*
 			 * Received <SYN,ACK> in SYN_SENT[*] state.
 			 * Transitions:
 			 *	SYN_SENT  --> ESTABLISHED
 			 *	SYN_SENT* --> FIN_WAIT_1
 			 */
 			tp->t_starttime = ticks;
 			if (tp->t_flags & TF_NEEDFIN) {
 				tcp_state_change(tp, TCPS_FIN_WAIT_1);
 				tp->t_flags &= ~TF_NEEDFIN;
 				thflags &= ~TH_SYN;
 			} else {
 				tcp_state_change(tp, TCPS_ESTABLISHED);
 				TCP_PROBE5(connect__established, NULL, tp,
 				    m, tp, th);
 				cc_conn_init(tp);
 				tcp_timer_activate(tp, TT_KEEP,
 				    TP_KEEPIDLE(tp));
 			}
 		} else {
 			/*
 			 * Received initial SYN in SYN-SENT[*] state =>
 			 * simultaneous open.
 			 * If it succeeds, connection is * half-synchronized.
 			 * Otherwise, do 3-way handshake:
 			 *        SYN-SENT -> SYN-RECEIVED
 			 *        SYN-SENT* -> SYN-RECEIVED*
 			 */
 			tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			tcp_state_change(tp, TCPS_SYN_RECEIVED);
 		}
 
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		/*
 		 * Advance th->th_seq to correspond to first data byte.
 		 * If data, trim to stay within window,
 		 * dropping FIN if necessary.
 		 */
 		th->th_seq++;
 		if (tlen > tp->rcv_wnd) {
 			todrop = tlen - tp->rcv_wnd;
 			m_adj(m, -todrop);
 			tlen = tp->rcv_wnd;
 			thflags &= ~TH_FIN;
 			TCPSTAT_INC(tcps_rcvpackafterwin);
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
 		}
 		tp->snd_wl1 = th->th_seq - 1;
 		tp->rcv_up = th->th_seq;
 		/*
 		 * Client side of transaction: already sent SYN and data.
 		 * If the remote host used T/TCP to validate the SYN,
 		 * our data will be ACK'd; if so, enter normal data segment
 		 * processing in the middle of step 5, ack processing.
 		 * Otherwise, goto step 6.
 		 */
 		if (thflags & TH_ACK)
 			goto process_ACK;
 
 		goto step6;
 
 	/*
 	 * If the state is LAST_ACK or CLOSING or TIME_WAIT:
 	 *      do normal processing.
 	 *
 	 * NB: Leftover from RFC1644 T/TCP.  Cases to be reused later.
 	 */
 	case TCPS_LAST_ACK:
 	case TCPS_CLOSING:
 		break;  /* continue normal processing */
 	}
 
 	/*
 	 * States other than LISTEN or SYN_SENT.
 	 * First check the RST flag and sequence number since reset segments
 	 * are exempt from the timestamp and connection count tests.  This
 	 * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
 	 * below which allowed reset segments in half the sequence space
 	 * to fall though and be processed (which gives forged reset
 	 * segments with a random sequence number a 50 percent chance of
 	 * killing a connection).
 	 * Then check timestamp, if present.
 	 * Then check the connection count, if present.
 	 * Then check that at least some bytes of segment are within
 	 * receive window.  If segment begins before rcv_nxt,
 	 * drop leading data (and SYN); if nothing left, just ack.
 	 */
 	if (thflags & TH_RST) {
 		/*
 		 * RFC5961 Section 3.2
 		 *
 		 * - RST drops connection only if SEG.SEQ == RCV.NXT.
 		 * - If RST is in window, we send challenge ACK.
 		 *
 		 * Note: to take into account delayed ACKs, we should
 		 *   test against last_ack_sent instead of rcv_nxt.
 		 * Note 2: we handle special case of closed window, not
 		 *   covered by the RFC.
 		 */
 		if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
 		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
 		    (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
 			KASSERT(tp->t_state != TCPS_SYN_SENT,
 			    ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
 			    __func__, th, tp));
 
 			if (V_tcp_insecure_rst ||
 			    tp->last_ack_sent == th->th_seq) {
 				TCPSTAT_INC(tcps_drops);
 				/* Drop the connection. */
 				switch (tp->t_state) {
 				case TCPS_SYN_RECEIVED:
 					so->so_error = ECONNREFUSED;
 					goto close;
 				case TCPS_ESTABLISHED:
 				case TCPS_FIN_WAIT_1:
 				case TCPS_FIN_WAIT_2:
 				case TCPS_CLOSE_WAIT:
 				case TCPS_CLOSING:
 				case TCPS_LAST_ACK:
 					so->so_error = ECONNRESET;
 				close:
 					/* FALLTHROUGH */
 				default:
 					tp = tcp_close(tp);
 				}
 			} else {
 				TCPSTAT_INC(tcps_badrst);
 				/* Send challenge ACK. */
 				tcp_respond(tp, mtod(m, void *), th, m,
 				    tp->rcv_nxt, tp->snd_nxt, TH_ACK);
 				tp->last_ack_sent = tp->rcv_nxt;
 				m = NULL;
 			}
 		}
 		goto drop;
 	}
 
 	/*
 	 * RFC5961 Section 4.2
 	 * Send challenge ACK for any SYN in synchronized state.
 	 */
 	if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
 	    tp->t_state != TCPS_SYN_RECEIVED) {
 		TCPSTAT_INC(tcps_badsyn);
 		if (V_tcp_insecure_syn &&
 		    SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
 		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
 			tp = tcp_drop(tp, ECONNRESET);
 			rstreason = BANDLIM_UNLIMITED;
 		} else {
 			/* Send challenge ACK. */
 			tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
 			    tp->snd_nxt, TH_ACK);
 			tp->last_ack_sent = tp->rcv_nxt;
 			m = NULL;
 		}
 		goto drop;
 	}
 
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
 	 * and it's less than ts_recent, drop it.
 	 */
 	if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
 		/* Check to see if ts_recent is over 24 days old.  */
 		if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) {
 			/*
 			 * Invalidate ts_recent.  If this segment updates
 			 * ts_recent, the age will be reset later and ts_recent
 			 * will get a valid value.  If it does not, setting
 			 * ts_recent to zero will at least satisfy the
 			 * requirement that zero be placed in the timestamp
 			 * echo reply when ts_recent isn't valid.  The
 			 * age isn't reset until we get a valid ts_recent
 			 * because we don't want out-of-order segments to be
 			 * dropped when ts_recent is old.
 			 */
 			tp->ts_recent = 0;
 		} else {
 			TCPSTAT_INC(tcps_rcvduppack);
 			TCPSTAT_ADD(tcps_rcvdupbyte, tlen);
 			TCPSTAT_INC(tcps_pawsdrop);
 			if (tlen)
 				goto dropafterack;
 			goto drop;
 		}
 	}
 
 	/*
 	 * In the SYN-RECEIVED state, validate that the packet belongs to
 	 * this connection before trimming the data to fit the receive
 	 * window.  Check the sequence number versus IRS since we know
 	 * the sequence numbers haven't wrapped.  This is a partial fix
 	 * for the "LAND" DoS attack.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
 		rstreason = BANDLIM_RST_OPENPORT;
 		goto dropwithreset;
 	}
 
 	todrop = tp->rcv_nxt - th->th_seq;
 	if (todrop > 0) {
 		if (thflags & TH_SYN) {
 			thflags &= ~TH_SYN;
 			th->th_seq++;
 			if (th->th_urp > 1)
 				th->th_urp--;
 			else
 				thflags &= ~TH_URG;
 			todrop--;
 		}
 		/*
 		 * Following if statement from Stevens, vol. 2, p. 960.
 		 */
 		if (todrop > tlen
 		    || (todrop == tlen && (thflags & TH_FIN) == 0)) {
 			/*
 			 * Any valid FIN must be to the left of the window.
 			 * At this point the FIN must be a duplicate or out
 			 * of sequence; drop it.
 			 */
 			thflags &= ~TH_FIN;
 
 			/*
 			 * Send an ACK to resynchronize and drop any data.
 			 * But keep on processing for RST or ACK.
 			 */
 			tp->t_flags |= TF_ACKNOW;
 			todrop = tlen;
 			TCPSTAT_INC(tcps_rcvduppack);
 			TCPSTAT_ADD(tcps_rcvdupbyte, todrop);
 		} else {
 			TCPSTAT_INC(tcps_rcvpartduppack);
 			TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
 		}
 		/*
 		 * DSACK - add SACK block for dropped range
 		 */
 		if ((todrop > 0) && (tp->t_flags & TF_SACK_PERMIT)) {
 			tcp_update_sack_list(tp, th->th_seq,
 			    th->th_seq + todrop);
 			/*
 			 * ACK now, as the next in-sequence segment
 			 * will clear the DSACK block again
 			 */
 			tp->t_flags |= TF_ACKNOW;
 		}
 		drop_hdrlen += todrop;	/* drop from the top afterwards */
 		th->th_seq += todrop;
 		tlen -= todrop;
 		if (th->th_urp > todrop)
 			th->th_urp -= todrop;
 		else {
 			thflags &= ~TH_URG;
 			th->th_urp = 0;
 		}
 	}
 
 	/*
 	 * If new data are received on a connection after the
 	 * user processes are gone, then RST the other end.
 	 */
 	if ((so->so_state & SS_NOFDREF) &&
 	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
 			    "after socket was closed, "
 			    "sending RST and removing tcpcb\n",
 			    s, __func__, tcpstates[tp->t_state], tlen);
 			free(s, M_TCPLOG);
 		}
 		tp = tcp_close(tp);
 		TCPSTAT_INC(tcps_rcvafterclose);
 		rstreason = BANDLIM_UNLIMITED;
 		goto dropwithreset;
 	}
 
 	/*
 	 * If segment ends after window, drop trailing data
 	 * (and PUSH and FIN); if nothing left, just ACK.
 	 */
 	todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
 	if (todrop > 0) {
 		TCPSTAT_INC(tcps_rcvpackafterwin);
 		if (todrop >= tlen) {
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen);
 			/*
 			 * If window is closed can only take segments at
 			 * window edge, and have to drop data and PUSH from
 			 * incoming segments.  Continue processing, but
 			 * remember to ack.  Otherwise, drop segment
 			 * and ack.
 			 */
 			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
 				tp->t_flags |= TF_ACKNOW;
 				TCPSTAT_INC(tcps_rcvwinprobe);
 			} else
 				goto dropafterack;
 		} else
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
 		m_adj(m, -todrop);
 		tlen -= todrop;
 		thflags &= ~(TH_PUSH|TH_FIN);
 	}
 
 	/*
 	 * If last ACK falls within this segment's sequence numbers,
 	 * record its timestamp.
 	 * NOTE:
 	 * 1) That the test incorporates suggestions from the latest
 	 *    proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 * 2) That updating only on newer timestamps interferes with
 	 *    our earlier PAWS tests, so this check should be solely
 	 *    predicated on the sequence space of this segment.
 	 * 3) That we modify the segment boundary check to be
 	 *        Last.ACK.Sent <= SEG.SEQ + SEG.Len
 	 *    instead of RFC1323's
 	 *        Last.ACK.Sent < SEG.SEQ + SEG.Len,
 	 *    This modified check allows us to overcome RFC1323's
 	 *    limitations as described in Stevens TCP/IP Illustrated
 	 *    Vol. 2 p.869. In such cases, we can still calculate the
 	 *    RTT correctly when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to.to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 		((thflags & (TH_SYN|TH_FIN)) != 0))) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to.to_tsval;
 	}
 
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
 	 * flag is on (half-synchronized state), then queue data for
 	 * later processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_state == TCPS_SYN_RECEIVED ||
 		    (tp->t_flags & TF_NEEDSYN)) {
 			if (tp->t_state == TCPS_SYN_RECEIVED &&
 			    IS_FASTOPEN(tp->t_flags)) {
 				tp->snd_wnd = tiwin;
 				cc_conn_init(tp);
 			}
 			goto step6;
 		} else if (tp->t_flags & TF_ACKNOW)
 			goto dropafterack;
 		else
 			goto drop;
 	}
 
 	/*
 	 * Ack processing.
 	 */
 	switch (tp->t_state) {
 	/*
 	 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
 	 * ESTABLISHED state and continue processing.
 	 * The ACK was checked above.
 	 */
 	case TCPS_SYN_RECEIVED:
 
 		TCPSTAT_INC(tcps_connects);
 		soisconnected(so);
 		/* Do window scaling? */
 		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 			tp->rcv_scale = tp->request_r_scale;
 		}
 		tp->snd_wnd = tiwin;
 		/*
 		 * Make transitions:
 		 *      SYN-RECEIVED  -> ESTABLISHED
 		 *      SYN-RECEIVED* -> FIN-WAIT-1
 		 */
 		tp->t_starttime = ticks;
 		if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) {
 			tcp_fastopen_decrement_counter(tp->t_tfo_pending);
 			tp->t_tfo_pending = NULL;
 		}
 		if (tp->t_flags & TF_NEEDFIN) {
 			tcp_state_change(tp, TCPS_FIN_WAIT_1);
 			tp->t_flags &= ~TF_NEEDFIN;
 		} else {
 			tcp_state_change(tp, TCPS_ESTABLISHED);
 			TCP_PROBE5(accept__established, NULL, tp,
 			    m, tp, th);
 			/*
 			 * TFO connections call cc_conn_init() during SYN
 			 * processing.  Calling it again here for such
 			 * connections is not harmless as it would undo the
 			 * snd_cwnd reduction that occurs when a TFO SYN|ACK
 			 * is retransmitted.
 			 */
 			if (!IS_FASTOPEN(tp->t_flags))
 				cc_conn_init(tp);
 			tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 		}
 		/*
 		 * Account for the ACK of our SYN prior to
 		 * regular ACK processing below, except for
 		 * simultaneous SYN, which is handled later.
 		 */
 		if (SEQ_GT(th->th_ack, tp->snd_una) && !(tp->t_flags & TF_NEEDSYN))
 			incforsyn = 1;
 		/*
 		 * If segment contains data or ACK, will call tcp_reass()
 		 * later; if not, do so now to pass queued data to user.
 		 */
 		if (tlen == 0 && (thflags & TH_FIN) == 0) {
 			(void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
 			    (struct mbuf *)0);
 			tcp_handle_wakeup(tp, so);
 		}
 		tp->snd_wl1 = th->th_seq - 1;
 		/* FALLTHROUGH */
 
 	/*
 	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
 	 * ACKs.  If the ack is in the range
 	 *	tp->snd_una < th->th_ack <= tp->snd_max
 	 * then advance tp->snd_una to th->th_ack and drop
 	 * data from the retransmission queue.  If this ACK reflects
 	 * more up to date window information we update our window information.
 	 */
 	case TCPS_ESTABLISHED:
 	case TCPS_FIN_WAIT_1:
 	case TCPS_FIN_WAIT_2:
 	case TCPS_CLOSE_WAIT:
 	case TCPS_CLOSING:
 	case TCPS_LAST_ACK:
 		if (SEQ_GT(th->th_ack, tp->snd_max)) {
 			TCPSTAT_INC(tcps_rcvacktoomuch);
 			goto dropafterack;
 		}
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    ((to.to_flags & TOF_SACK) ||
 		     !TAILQ_EMPTY(&tp->snd_holes))) {
 			if (((sack_changed = tcp_sack_doack(tp, &to, th->th_ack)) != 0) &&
 			    (tp->t_flags & TF_LRD)) {
 				tcp_sack_lost_retransmission(tp, th);
 			}
 		} else
 			/*
 			 * Reset the value so that previous (valid) value
 			 * from the last ack with SACK doesn't get used.
 			 */
 			tp->sackhint.sacked_bytes = 0;
 
 #ifdef TCP_HHOOK
 		/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 		hhook_run_tcp_est_in(tp, th, &to);
 #endif
 
 		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
 			maxseg = tcp_maxseg(tp);
 			if (tlen == 0 &&
 			    (tiwin == tp->snd_wnd ||
 			    (tp->t_flags & TF_SACK_PERMIT))) {
 				/*
 				 * If this is the first time we've seen a
 				 * FIN from the remote, this is not a
 				 * duplicate and it needs to be processed
 				 * normally.  This happens during a
 				 * simultaneous close.
 				 */
 				if ((thflags & TH_FIN) &&
 				    (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
 					tp->t_dupacks = 0;
 					break;
 				}
 				TCPSTAT_INC(tcps_rcvdupack);
 				/*
 				 * If we have outstanding data (other than
 				 * a window probe), this is a completely
 				 * duplicate ack (ie, window info didn't
 				 * change and FIN isn't set),
 				 * the ack is the biggest we've
 				 * seen and we've seen exactly our rexmt
 				 * threshold of them, assume a packet
 				 * has been dropped and retransmit it.
 				 * Kludge snd_nxt & the congestion
 				 * window so we send only this one
 				 * packet.
 				 *
 				 * We know we're losing at the current
 				 * window size so do congestion avoidance
 				 * (set ssthresh to half the current window
 				 * and pull our congestion window back to
 				 * the new ssthresh).
 				 *
 				 * Dup acks mean that packets have left the
 				 * network (they're now cached at the receiver)
 				 * so bump cwnd by the amount in the receiver
 				 * to keep a constant cwnd packets in the
 				 * network.
 				 *
 				 * When using TCP ECN, notify the peer that
 				 * we reduced the cwnd.
 				 */
 				/*
 				 * Following 2 kinds of acks should not affect
 				 * dupack counting:
 				 * 1) Old acks
 				 * 2) Acks with SACK but without any new SACK
 				 * information in them. These could result from
 				 * any anomaly in the network like a switch
 				 * duplicating packets or a possible DoS attack.
 				 */
 				if (th->th_ack != tp->snd_una ||
 				    ((tp->t_flags & TF_SACK_PERMIT) &&
 				    (to.to_flags & TOF_SACK) &&
 				    !sack_changed))
 					break;
 				else if (!tcp_timer_active(tp, TT_REXMT))
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks > tcprexmtthresh ||
 				     IN_FASTRECOVERY(tp->t_flags)) {
 					cc_ack_received(tp, th, nsegs,
 					    CC_DUPACK);
 					if (V_tcp_do_prr &&
 					    IN_FASTRECOVERY(tp->t_flags) &&
 					    (tp->t_flags & TF_SACK_PERMIT)) {
 						tcp_do_prr_ack(tp, th, &to);
 					} else if ((tp->t_flags & TF_SACK_PERMIT) &&
 					    (to.to_flags & TOF_SACK) &&
 					    IN_FASTRECOVERY(tp->t_flags)) {
 						int awnd;
 
 						/*
 						 * Compute the amount of data in flight first.
 						 * We can inject new data into the pipe iff
 						 * we have less than 1/2 the original window's
 						 * worth of data in flight.
 						 */
 						if (V_tcp_do_newsack)
 							awnd = tcp_compute_pipe(tp);
 						else
 							awnd = (tp->snd_nxt - tp->snd_fack) +
 								tp->sackhint.sack_bytes_rexmit;
 
 						if (awnd < tp->snd_ssthresh) {
 							tp->snd_cwnd += maxseg;
 							if (tp->snd_cwnd > tp->snd_ssthresh)
 								tp->snd_cwnd = tp->snd_ssthresh;
 						}
 					} else
 						tp->snd_cwnd += maxseg;
 					(void) tp->t_fb->tfb_tcp_output(tp);
 					goto drop;
 				} else if (tp->t_dupacks == tcprexmtthresh ||
 					    (tp->t_flags & TF_SACK_PERMIT &&
 					     V_tcp_do_newsack &&
 					     tp->sackhint.sacked_bytes >
 					     (tcprexmtthresh - 1) * maxseg)) {
 enter_recovery:
 					/*
 					 * Above is the RFC6675 trigger condition of
 					 * more than (dupthresh-1)*maxseg sacked data.
 					 * If the count of holes in the
 					 * scoreboard is >= dupthresh, we could
 					 * also enter loss recovery, but don't
 					 * have that value readily available.
 					 */
 					tp->t_dupacks = tcprexmtthresh;
 					tcp_seq onxt = tp->snd_nxt;
 
 					/*
 					 * If we're doing sack, or prr, check
 					 * to see if we're already in sack
 					 * recovery. If we're not doing sack,
 					 * check to see if we're in newreno
 					 * recovery.
 					 */
 					if (V_tcp_do_prr ||
 					    (tp->t_flags & TF_SACK_PERMIT)) {
 						if (IN_FASTRECOVERY(tp->t_flags)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					} else {
 						if (SEQ_LEQ(th->th_ack,
 						    tp->snd_recover)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					}
 					/* Congestion signal before ack. */
 					cc_cong_signal(tp, th, CC_NDUPACK);
 					cc_ack_received(tp, th, nsegs,
 					    CC_DUPACK);
 					tcp_timer_activate(tp, TT_REXMT, 0);
 					tp->t_rtttime = 0;
 					if (V_tcp_do_prr) {
 						/*
 						 * snd_ssthresh is already updated by
 						 * cc_cong_signal.
 						 */
 						tp->sackhint.prr_delivered =
 						    tp->sackhint.sacked_bytes;
 						tp->sackhint.recover_fs = max(1,
 						    tp->snd_nxt - tp->snd_una);
 					}
 					if ((tp->t_flags & TF_SACK_PERMIT) &&
 					    (to.to_flags & TOF_SACK)) {
 						TCPSTAT_INC(
 						    tcps_sack_recovery_episode);
 						tp->snd_recover = tp->snd_nxt;
 						tp->snd_cwnd = maxseg;
 						(void) tp->t_fb->tfb_tcp_output(tp);
 						if (SEQ_GT(th->th_ack, tp->snd_una))
 							goto resume_partialack;
 						goto drop;
 					}
 					tp->snd_nxt = th->th_ack;
 					tp->snd_cwnd = maxseg;
 					(void) tp->t_fb->tfb_tcp_output(tp);
 					KASSERT(tp->snd_limited <= 2,
 					    ("%s: tp->snd_limited too big",
 					    __func__));
 					tp->snd_cwnd = tp->snd_ssthresh +
 					     maxseg *
 					     (tp->t_dupacks - tp->snd_limited);
 					if (SEQ_GT(onxt, tp->snd_nxt))
 						tp->snd_nxt = onxt;
 					goto drop;
 				} else if (V_tcp_do_rfc3042) {
 					/*
 					 * Process first and second duplicate
 					 * ACKs. Each indicates a segment
 					 * leaving the network, creating room
 					 * for more. Make sure we can send a
 					 * packet on reception of each duplicate
 					 * ACK by increasing snd_cwnd by one
 					 * segment. Restore the original
 					 * snd_cwnd after packet transmission.
 					 */
 					cc_ack_received(tp, th, nsegs,
 					    CC_DUPACK);
 					uint32_t oldcwnd = tp->snd_cwnd;
 					tcp_seq oldsndmax = tp->snd_max;
 					u_int sent;
 					int avail;
 
 					KASSERT(tp->t_dupacks == 1 ||
 					    tp->t_dupacks == 2,
 					    ("%s: dupacks not 1 or 2",
 					    __func__));
 					if (tp->t_dupacks == 1)
 						tp->snd_limited = 0;
 					tp->snd_cwnd =
 					    (tp->snd_nxt - tp->snd_una) +
 					    (tp->t_dupacks - tp->snd_limited) *
 					    maxseg;
 					/*
 					 * Only call tcp_output when there
 					 * is new data available to be sent.
 					 * Otherwise we would send pure ACKs.
 					 */
 					SOCKBUF_LOCK(&so->so_snd);
 					avail = sbavail(&so->so_snd) -
 					    (tp->snd_nxt - tp->snd_una);
 					SOCKBUF_UNLOCK(&so->so_snd);
 					if (avail > 0)
 						(void) tp->t_fb->tfb_tcp_output(tp);
 					sent = tp->snd_max - oldsndmax;
 					if (sent > maxseg) {
 						KASSERT((tp->t_dupacks == 2 &&
 						    tp->snd_limited == 0) ||
 						   (sent == maxseg + 1 &&
 						    tp->t_flags & TF_SENTFIN),
 						    ("%s: sent too much",
 						    __func__));
 						tp->snd_limited = 2;
 					} else if (sent > 0)
 						++tp->snd_limited;
 					tp->snd_cwnd = oldcwnd;
 					goto drop;
 				}
 			}
 			break;
 		} else {
 			/*
 			 * This ack is advancing the left edge, reset the
 			 * counter.
 			 */
 			tp->t_dupacks = 0;
 			/*
 			 * If this ack also has new SACK info, increment the
 			 * counter as per rfc6675. The variable
 			 * sack_changed tracks all changes to the SACK
 			 * scoreboard, including when partial ACKs without
 			 * SACK options are received, and clear the scoreboard
 			 * from the left side. Such partial ACKs should not be
 			 * counted as dupacks here.
 			 */
 			if ((tp->t_flags & TF_SACK_PERMIT) &&
 			    (to.to_flags & TOF_SACK) &&
 			    sack_changed) {
 				tp->t_dupacks++;
 				/* limit overhead by setting maxseg last */
 				if (!IN_FASTRECOVERY(tp->t_flags) &&
 				    (tp->sackhint.sacked_bytes >
 				    ((tcprexmtthresh - 1) *
 				    (maxseg = tcp_maxseg(tp))))) {
 					goto enter_recovery;
 				}
 			}
 		}
 
 resume_partialack:
 		KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
 		    ("%s: th_ack <= snd_una", __func__));
 
 		/*
 		 * If the congestion window was inflated to account
 		 * for the other side's cached packets, retract it.
 		 */
 		if (IN_FASTRECOVERY(tp->t_flags)) {
 			if (SEQ_LT(th->th_ack, tp->snd_recover)) {
 				if (tp->t_flags & TF_SACK_PERMIT)
 					if (V_tcp_do_prr && to.to_flags & TOF_SACK) {
 						tcp_timer_activate(tp, TT_REXMT, 0);
 						tp->t_rtttime = 0;
 						tcp_do_prr_ack(tp, th, &to);
 						tp->t_flags |= TF_ACKNOW;
 						(void) tcp_output(tp);
 					} else
 						tcp_sack_partialack(tp, th);
 				else
 					tcp_newreno_partial_ack(tp, th);
 			} else
 				cc_post_recovery(tp, th);
 		} else if (IN_CONGRECOVERY(tp->t_flags)) {
 			if (SEQ_LT(th->th_ack, tp->snd_recover)) {
 				if (V_tcp_do_prr) {
 					tp->sackhint.delivered_data = BYTES_THIS_ACK(tp, th);
 					tp->snd_fack = th->th_ack;
 					tcp_do_prr_ack(tp, th, &to);
 					(void) tcp_output(tp);
 				}
 			} else
 				cc_post_recovery(tp, th);
 		}
 		/*
 		 * If we reach this point, ACK is not a duplicate,
 		 *     i.e., it ACKs something we sent.
 		 */
 		if (tp->t_flags & TF_NEEDSYN) {
 			/*
 			 * T/TCP: Connection was half-synchronized, and our
 			 * SYN has been ACK'd (so connection is now fully
 			 * synchronized).  Go to non-starred state,
 			 * increment snd_una for ACK of SYN, and check if
 			 * we can do window scaling.
 			 */
 			tp->t_flags &= ~TF_NEEDSYN;
 			tp->snd_una++;
 			/* Do window scaling? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 				tp->rcv_scale = tp->request_r_scale;
 				/* Send window already scaled. */
 			}
 		}
 
 process_ACK:
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		/*
 		 * Adjust for the SYN bit in sequence space,
 		 * but don't account for it in cwnd calculations.
 		 * This is for the SYN_RECEIVED, non-simultaneous
 		 * SYN case. SYN_SENT and simultaneous SYN are
 		 * treated elsewhere.
 		 */
 		if (incforsyn)
 			tp->snd_una++;
 		acked = BYTES_THIS_ACK(tp, th);
 		KASSERT(acked >= 0, ("%s: acked unexepectedly negative "
 		    "(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__,
 		    tp->snd_una, th->th_ack, tp, m));
 		TCPSTAT_ADD(tcps_rcvackpack, nsegs);
 		TCPSTAT_ADD(tcps_rcvackbyte, acked);
 
 		/*
 		 * If we just performed our first retransmit, and the ACK
 		 * arrives within our recovery window, then it was a mistake
 		 * to do the retransmit in the first place.  Recover our
 		 * original cwnd and ssthresh, and proceed to transmit where
 		 * we left off.
 		 */
 		if (tp->t_rxtshift == 1 &&
 		    tp->t_flags & TF_PREVVALID &&
 		    tp->t_badrxtwin &&
 		    SEQ_LT(to.to_tsecr, tp->t_badrxtwin))
 			cc_cong_signal(tp, th, CC_RTO_ERR);
 
 		/*
 		 * If we have a timestamp reply, update smoothed
 		 * round trip time.  If no timestamp is present but
 		 * transmit timer is running and timed sequence
 		 * number was acked, update smoothed round trip time.
 		 * Since we now have an rtt measurement, cancel the
 		 * timer backoff (cf., Phil Karn's retransmit alg.).
 		 * Recompute the initial retransmit timer.
 		 *
 		 * Some boxes send broken timestamp replies
 		 * during the SYN+ACK phase, ignore
 		 * timestamps of 0 or we could calculate a
 		 * huge RTT and blow up the retransmit timer.
 		 */
 		if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) {
 			uint32_t t;
 
 			t = tcp_ts_getticks() - to.to_tsecr;
 			if (!tp->t_rttlow || tp->t_rttlow > t)
 				tp->t_rttlow = t;
 			tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1);
 		} else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) {
 			if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime)
 				tp->t_rttlow = ticks - tp->t_rtttime;
 			tcp_xmit_timer(tp, ticks - tp->t_rtttime);
 		}
 
 		/*
 		 * If all outstanding data is acked, stop retransmit
 		 * timer and remember to restart (more output or persist).
 		 * If there is more data to be acked, restart retransmit
 		 * timer, using current (possibly backed-off) value.
 		 */
 		if (th->th_ack == tp->snd_max) {
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			needoutput = 1;
 		} else if (!tcp_timer_active(tp, TT_PERSIST))
 			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 
 		/*
 		 * If no data (only SYN) was ACK'd,
 		 *    skip rest of ACK processing.
 		 */
 		if (acked == 0)
 			goto step6;
 
 		/*
 		 * Let the congestion control algorithm update congestion
 		 * control related information. This typically means increasing
 		 * the congestion window.
 		 */
 		cc_ack_received(tp, th, nsegs, CC_ACK);
 
 		SOCKBUF_LOCK(&so->so_snd);
 		if (acked > sbavail(&so->so_snd)) {
 			if (tp->snd_wnd >= sbavail(&so->so_snd))
 				tp->snd_wnd -= sbavail(&so->so_snd);
 			else
 				tp->snd_wnd = 0;
 			mfree = sbcut_locked(&so->so_snd,
 			    (int)sbavail(&so->so_snd));
 			ourfinisacked = 1;
 		} else {
 			mfree = sbcut_locked(&so->so_snd, acked);
 			if (tp->snd_wnd >= (uint32_t) acked)
 				tp->snd_wnd -= acked;
 			else
 				tp->snd_wnd = 0;
 			ourfinisacked = 0;
 		}
 		/* NB: sowwakeup_locked() does an implicit unlock. */
 		sowwakeup_locked(so);
 		m_freem(mfree);
 		/* Detect una wraparound. */
 		if (!IN_RECOVERY(tp->t_flags) &&
 		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
 		/* XXXLAS: Can this be moved up into cc_post_recovery? */
 		if (IN_RECOVERY(tp->t_flags) &&
 		    SEQ_GEQ(th->th_ack, tp->snd_recover)) {
 			EXIT_RECOVERY(tp->t_flags);
 		}
 		tp->snd_una = th->th_ack;
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (SEQ_GT(tp->snd_una, tp->snd_recover))
 				tp->snd_recover = tp->snd_una;
 		}
 		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
 			tp->snd_nxt = tp->snd_una;
 
 		switch (tp->t_state) {
 		/*
 		 * In FIN_WAIT_1 STATE in addition to the processing
 		 * for the ESTABLISHED state if our FIN is now acknowledged
 		 * then enter FIN_WAIT_2.
 		 */
 		case TCPS_FIN_WAIT_1:
 			if (ourfinisacked) {
 				/*
 				 * If we can't receive any more
 				 * data, then closing user can proceed.
 				 * Starting the timer is contrary to the
 				 * specification, but if we don't get a FIN
 				 * we'll hang forever.
 				 *
 				 * XXXjl:
 				 * we should release the tp also, and use a
 				 * compressed state.
 				 */
 				if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 					soisdisconnected(so);
 					tcp_timer_activate(tp, TT_2MSL,
 					    (tcp_fast_finwait2_recycle ?
 					    tcp_finwait2_timeout :
 					    TP_MAXIDLE(tp)));
 				}
 				tcp_state_change(tp, TCPS_FIN_WAIT_2);
 			}
 			break;
 
 		/*
 		 * In CLOSING STATE in addition to the processing for
 		 * the ESTABLISHED state if the ACK acknowledges our FIN
 		 * then enter the TIME-WAIT state, otherwise ignore
 		 * the segment.
 		 */
 		case TCPS_CLOSING:
 			if (ourfinisacked) {
 				tcp_twstart(tp);
 				m_freem(m);
 				return;
 			}
 			break;
 
 		/*
 		 * In LAST_ACK, we may still be waiting for data to drain
 		 * and/or to be acked, as well as for the ack of our FIN.
 		 * If our FIN is now acknowledged, delete the TCB,
 		 * enter the closed state and return.
 		 */
 		case TCPS_LAST_ACK:
 			if (ourfinisacked) {
 				tp = tcp_close(tp);
 				goto drop;
 			}
 			break;
 		}
 	}
 
 step6:
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Update window information.
 	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
 	 */
 	if ((thflags & TH_ACK) &&
 	    (SEQ_LT(tp->snd_wl1, th->th_seq) ||
 	    (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
 	     (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
 		/* keep track of pure window updates */
 		if (tlen == 0 &&
 		    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
 			TCPSTAT_INC(tcps_rcvwinupd);
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
 		tp->snd_wl2 = th->th_ack;
 		if (tp->snd_wnd > tp->max_sndwnd)
 			tp->max_sndwnd = tp->snd_wnd;
 		needoutput = 1;
 	}
 
 	/*
 	 * Process segments with URG.
 	 */
 	if ((thflags & TH_URG) && th->th_urp &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		/*
 		 * This is a kludge, but if we receive and accept
 		 * random urgent pointers, we'll crash in
 		 * soreceive.  It's hard to imagine someone
 		 * actually wanting to send this much urgent data.
 		 */
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (th->th_urp + sbavail(&so->so_rcv) > sb_max) {
 			th->th_urp = 0;			/* XXX */
 			thflags &= ~TH_URG;		/* XXX */
 			SOCKBUF_UNLOCK(&so->so_rcv);	/* XXX */
 			goto dodata;			/* XXX */
 		}
 		/*
 		 * If this segment advances the known urgent pointer,
 		 * then mark the data stream.  This should not happen
 		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
 		 * a FIN has been received from the remote side.
 		 * In these states we ignore the URG.
 		 *
 		 * According to RFC961 (Assigned Protocols),
 		 * the urgent pointer points to the last octet
 		 * of urgent data.  We continue, however,
 		 * to consider it to indicate the first octet
 		 * of data past the urgent section as the original
 		 * spec states (in one of two places).
 		 */
 		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
 			tp->rcv_up = th->th_seq + th->th_urp;
 			so->so_oobmark = sbavail(&so->so_rcv) +
 			    (tp->rcv_up - tp->rcv_nxt) - 1;
 			if (so->so_oobmark == 0)
 				so->so_rcv.sb_state |= SBS_RCVATMARK;
 			sohasoutofband(so);
 			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 		}
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		/*
 		 * Remove out of band data so doesn't get presented to user.
 		 * This can happen independent of advancing the URG pointer,
 		 * but if two URG's are pending at once, some out-of-band
 		 * data may creep in... ick.
 		 */
 		if (th->th_urp <= (uint32_t)tlen &&
 		    !(so->so_options & SO_OOBINLINE)) {
 			/* hdr drop is delayed */
 			tcp_pulloutofband(so, th, m, drop_hdrlen);
 		}
 	} else {
 		/*
 		 * If no out of band data is expected,
 		 * pull receive urgent pointer along
 		 * with the receive window.
 		 */
 		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
 			tp->rcv_up = tp->rcv_nxt;
 	}
 dodata:							/* XXX */
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Process the segment text, merging it into the TCP sequencing queue,
 	 * and arranging for acknowledgment of receipt if necessary.
 	 * This process logically involves adjusting tp->rcv_wnd as data
 	 * is presented to the user (this happens in tcp_usrreq.c,
 	 * case PRU_RCVD).  If a FIN has already been received on this
 	 * connection then we just ignore the text.
 	 */
 	tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
 		   IS_FASTOPEN(tp->t_flags));
 	if ((tlen || (thflags & TH_FIN) || (tfo_syn && tlen > 0)) &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		tcp_seq save_start = th->th_seq;
 		tcp_seq save_rnxt  = tp->rcv_nxt;
 		int     save_tlen  = tlen;
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
 		/*
 		 * Insert segment which includes th into TCP reassembly queue
 		 * with control block tp.  Set thflags to whether reassembly now
 		 * includes a segment with FIN.  This handles the common case
 		 * inline (segment is the next to be received on an established
 		 * connection, and the queue is empty), avoiding linkage into
 		 * and removal from the queue and repetition of various
 		 * conversions.
 		 * Set DELACK for segments received in order, but ack
 		 * immediately when segments are out of order (so
 		 * fast retransmit can work).
 		 */
 		if (th->th_seq == tp->rcv_nxt &&
 		    SEGQ_EMPTY(tp) &&
 		    (TCPS_HAVEESTABLISHED(tp->t_state) ||
 		     tfo_syn)) {
 			if (DELAY_ACK(tp, tlen) || tfo_syn)
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt += tlen;
 			if (tlen &&
 			    ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
 			    (tp->t_fbyte_in == 0)) {
 				tp->t_fbyte_in = ticks;
 				if (tp->t_fbyte_in == 0)
 					tp->t_fbyte_in = 1;
 				if (tp->t_fbyte_out && tp->t_fbyte_in)
 					tp->t_flags2 |= TF2_FBYTES_COMPLETE;
 			}
 			thflags = th->th_flags & TH_FIN;
 			TCPSTAT_INC(tcps_rcvpack);
 			TCPSTAT_ADD(tcps_rcvbyte, tlen);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				m_freem(m);
 			else
 				sbappendstream_locked(&so->so_rcv, m, 0);
 			tp->t_flags |= TF_WAKESOR;
 		} else {
 			/*
 			 * XXX: Due to the header drop above "th" is
 			 * theoretically invalid by now.  Fortunately
 			 * m_adj() doesn't actually frees any mbufs
 			 * when trimming from the head.
 			 */
 			tcp_seq temp = save_start;
 
 			thflags = tcp_reass(tp, th, &temp, &tlen, m);
 			tp->t_flags |= TF_ACKNOW;
 		}
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (save_tlen > 0) &&
 		    TCPS_HAVEESTABLISHED(tp->t_state)) {
 			if ((tlen == 0) && (SEQ_LT(save_start, save_rnxt))) {
 				/*
 				 * DSACK actually handled in the fastpath
 				 * above.
 				 */
 				tcp_update_sack_list(tp, save_start,
 				    save_start + save_tlen);
 			} else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
 				if ((tp->rcv_numsacks >= 1) &&
 				    (tp->sackblks[0].end == save_start)) {
 					/*
 					 * Partial overlap, recorded at todrop
 					 * above.
 					 */
 					tcp_update_sack_list(tp,
 					    tp->sackblks[0].start,
 					    tp->sackblks[0].end);
 				} else {
 					tcp_update_dsack_list(tp, save_start,
 					    save_start + save_tlen);
 				}
 			} else if (tlen >= save_tlen) {
 				/* Update of sackblks. */
 				tcp_update_dsack_list(tp, save_start,
 				    save_start + save_tlen);
 			} else if (tlen > 0) {
 				tcp_update_dsack_list(tp, save_start,
 				    save_start + tlen);
 			}
 		}
 		tcp_handle_wakeup(tp, so);
 #if 0
 		/*
 		 * Note the amount of data that peer has sent into
 		 * our window, in order to estimate the sender's
 		 * buffer size.
 		 * XXX: Unused.
 		 */
 		if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
 			len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
 		else
 			len = so->so_rcv.sb_hiwat;
 #endif
 	} else {
 		m_freem(m);
 		thflags &= ~TH_FIN;
 	}
 
 	/*
 	 * If FIN is received ACK the FIN and let the user know
 	 * that the connection is closing.
 	 */
 	if (thflags & TH_FIN) {
 		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 			/* The socket upcall is handled by socantrcvmore. */
 			socantrcvmore(so);
 			/*
 			 * If connection is half-synchronized
 			 * (ie NEEDSYN flag on) then delay ACK,
 			 * so it may be piggybacked when SYN is sent.
 			 * Otherwise, since we received a FIN then no
 			 * more input can be expected, send ACK now.
 			 */
 			if (tp->t_flags & TF_NEEDSYN)
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt++;
 		}
 		switch (tp->t_state) {
 		/*
 		 * In SYN_RECEIVED and ESTABLISHED STATES
 		 * enter the CLOSE_WAIT state.
 		 */
 		case TCPS_SYN_RECEIVED:
 			tp->t_starttime = ticks;
 			/* FALLTHROUGH */
 		case TCPS_ESTABLISHED:
 			tcp_state_change(tp, TCPS_CLOSE_WAIT);
 			break;
 
 		/*
 		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
 		 * enter the CLOSING state.
 		 */
 		case TCPS_FIN_WAIT_1:
 			tcp_state_change(tp, TCPS_CLOSING);
 			break;
 
 		/*
 		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
 		 * starting the time-wait timer, turning off the other
 		 * standard timers.
 		 */
 		case TCPS_FIN_WAIT_2:
 			tcp_twstart(tp);
 			return;
 		}
 	}
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, m);
 
 	/*
 	 * Return any desired output.
 	 */
 	if (needoutput || (tp->t_flags & TF_ACKNOW))
 		(void) tp->t_fb->tfb_tcp_output(tp);
 
 check_delack:
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 	INP_WUNLOCK(tp->t_inpcb);
 	return;
 
 dropafterack:
 	/*
 	 * Generate an ACK dropping incoming segment if it occupies
 	 * sequence space, where the ACK reflects our state.
 	 *
 	 * We can now skip the test for the RST flag since all
 	 * paths to this code happen after packets containing
 	 * RST have been dropped.
 	 *
 	 * In the SYN-RECEIVED state, don't send an ACK unless the
 	 * segment we received passes the SYN-RECEIVED ACK test.
 	 * If it fails send a RST.  This breaks the loop in the
 	 * "LAND" DoS attack, and also prevents an ACK storm
 	 * between two listening ports that have been sent forged
 	 * SYN segments, each with the source address of the other.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
 	    (SEQ_GT(tp->snd_una, th->th_ack) ||
 	     SEQ_GT(th->th_ack, tp->snd_max)) ) {
 		rstreason = BANDLIM_RST_OPENPORT;
 		goto dropwithreset;
 	}
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, m);
 	tp->t_flags |= TF_ACKNOW;
 	(void) tp->t_fb->tfb_tcp_output(tp);
 	INP_WUNLOCK(tp->t_inpcb);
 	m_freem(m);
 	return;
 
 dropwithreset:
 	if (tp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
 		INP_WUNLOCK(tp->t_inpcb);
 	} else
 		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
 	return;
 
 drop:
 	/*
 	 * Drop space held by incoming segment and return.
 	 */
 #ifdef TCPDEBUG
 	if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, m);
 	if (tp != NULL) {
 		INP_WUNLOCK(tp->t_inpcb);
 	}
 	m_freem(m);
 }
 
 /*
  * Issue RST and make ACK acceptable to originator of segment.
  * The mbuf must still include the original packet header.
  * tp may be NULL.
  */
 void
 tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
     int tlen, int rstreason)
 {
 #ifdef INET
 	struct ip *ip;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 
 	if (tp != NULL) {
 		INP_LOCK_ASSERT(tp->t_inpcb);
 	}
 
 	/* Don't bother if destination was broadcast/multicast. */
 	if ((th->th_flags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
 		goto drop;
 #ifdef INET6
 	if (mtod(m, struct ip *)->ip_v == 6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
 			goto drop;
 		/* IPv6 anycast check is done at tcp6_input() */
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		ip = mtod(m, struct ip *);
 		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 		    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 		    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
 		    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
 			goto drop;
 	}
 #endif
 
 	/* Perform bandwidth limiting. */
 	if (badport_bandlim(rstreason) < 0)
 		goto drop;
 
 	/* tcp_respond consumes the mbuf chain. */
 	if (th->th_flags & TH_ACK) {
 		tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0,
 		    th->th_ack, TH_RST);
 	} else {
 		if (th->th_flags & TH_SYN)
 			tlen++;
 		if (th->th_flags & TH_FIN)
 			tlen++;
 		tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
 		    (tcp_seq)0, TH_RST|TH_ACK);
 	}
 	return;
 drop:
 	m_freem(m);
 }
 
 /*
  * Parse TCP options and place in tcpopt.
  */
 void
 tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
 {
 	int opt, optlen;
 
 	to->to_flags = 0;
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < 2)
 				break;
 			optlen = cp[1];
 			if (optlen < 2 || optlen > cnt)
 				break;
 		}
 		switch (opt) {
 		case TCPOPT_MAXSEG:
 			if (optlen != TCPOLEN_MAXSEG)
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			to->to_flags |= TOF_MSS;
 			bcopy((char *)cp + 2,
 			    (char *)&to->to_mss, sizeof(to->to_mss));
 			to->to_mss = ntohs(to->to_mss);
 			break;
 		case TCPOPT_WINDOW:
 			if (optlen != TCPOLEN_WINDOW)
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			to->to_flags |= TOF_SCALE;
 			to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT);
 			break;
 		case TCPOPT_TIMESTAMP:
 			if (optlen != TCPOLEN_TIMESTAMP)
 				continue;
 			to->to_flags |= TOF_TS;
 			bcopy((char *)cp + 2,
 			    (char *)&to->to_tsval, sizeof(to->to_tsval));
 			to->to_tsval = ntohl(to->to_tsval);
 			bcopy((char *)cp + 6,
 			    (char *)&to->to_tsecr, sizeof(to->to_tsecr));
 			to->to_tsecr = ntohl(to->to_tsecr);
 			break;
 		case TCPOPT_SIGNATURE:
 			/*
 			 * In order to reply to a host which has set the
 			 * TCP_SIGNATURE option in its initial SYN, we have
 			 * to record the fact that the option was observed
 			 * here for the syncache code to perform the correct
 			 * response.
 			 */
 			if (optlen != TCPOLEN_SIGNATURE)
 				continue;
 			to->to_flags |= TOF_SIGNATURE;
 			to->to_signature = cp + 2;
 			break;
 		case TCPOPT_SACK_PERMITTED:
 			if (optlen != TCPOLEN_SACK_PERMITTED)
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			if (!V_tcp_do_sack)
 				continue;
 			to->to_flags |= TOF_SACKPERM;
 			break;
 		case TCPOPT_SACK:
 			if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
 				continue;
 			if (flags & TO_SYN)
 				continue;
 			to->to_flags |= TOF_SACK;
 			to->to_nsacks = (optlen - 2) / TCPOLEN_SACK;
 			to->to_sacks = cp + 2;
 			TCPSTAT_INC(tcps_sack_rcv_blocks);
 			break;
 		case TCPOPT_FAST_OPEN:
 			/*
 			 * Cookie length validation is performed by the
 			 * server side cookie checking code or the client
 			 * side cookie cache update code.
 			 */
 			if (!(flags & TO_SYN))
 				continue;
 			if (!V_tcp_fastopen_client_enable &&
 			    !V_tcp_fastopen_server_enable)
 				continue;
 			to->to_flags |= TOF_FASTOPEN;
 			to->to_tfo_len = optlen - 2;
 			to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
 			break;
 		default:
 			continue;
 		}
 	}
 }
 
 /*
  * Pull out of band byte out of a segment so
  * it doesn't appear in the user's data queue.
  * It is still reflected in the segment length for
  * sequencing purposes.
  */
 void
 tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m,
     int off)
 {
 	int cnt = off + th->th_urp - 1;
 
 	while (cnt >= 0) {
 		if (m->m_len > cnt) {
 			char *cp = mtod(m, caddr_t) + cnt;
 			struct tcpcb *tp = sototcpcb(so);
 
 			INP_WLOCK_ASSERT(tp->t_inpcb);
 
 			tp->t_iobc = *cp;
 			tp->t_oobflags |= TCPOOB_HAVEDATA;
 			bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
 			m->m_len--;
 			if (m->m_flags & M_PKTHDR)
 				m->m_pkthdr.len--;
 			return;
 		}
 		cnt -= m->m_len;
 		m = m->m_next;
 		if (m == NULL)
 			break;
 	}
 	panic("tcp_pulloutofband");
 }
 
 /*
  * Collect new round-trip time estimate
  * and update averages and current timeout.
  */
 void
 tcp_xmit_timer(struct tcpcb *tp, int rtt)
 {
 	int delta;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	TCPSTAT_INC(tcps_rttupdated);
 	tp->t_rttupdated++;
 #ifdef STATS
 	stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT,
 	    imax(0, rtt * 1000 / hz));
 #endif
 	if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) {
 		/*
 		 * srtt is stored as fixed point with 5 bits after the
 		 * binary point (i.e., scaled by 8).  The following magic
 		 * is equivalent to the smoothing algorithm in rfc793 with
 		 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
 		 * point).  Adjust rtt to origin 0.
 		 */
 		delta = ((rtt - 1) << TCP_DELTA_SHIFT)
 			- (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
 
 		if ((tp->t_srtt += delta) <= 0)
 			tp->t_srtt = 1;
 
 		/*
 		 * We accumulate a smoothed rtt variance (actually, a
 		 * smoothed mean difference), then set the retransmit
 		 * timer to smoothed rtt + 4 times the smoothed variance.
 		 * rttvar is stored as fixed point with 4 bits after the
 		 * binary point (scaled by 16).  The following is
 		 * equivalent to rfc793 smoothing with an alpha of .75
 		 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
 		 * rfc793's wired-in beta.
 		 */
 		if (delta < 0)
 			delta = -delta;
 		delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
 		if ((tp->t_rttvar += delta) <= 0)
 			tp->t_rttvar = 1;
 		if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar)
 		    tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
 	} else {
 		/*
 		 * No rtt measurement yet - use the unsmoothed rtt.
 		 * Set the variance to half the rtt (so our first
 		 * retransmit happens at 3*rtt).
 		 */
 		tp->t_srtt = rtt << TCP_RTT_SHIFT;
 		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
 		tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
 	}
 	tp->t_rtttime = 0;
 	tp->t_rxtshift = 0;
 
 	/*
 	 * the retransmit should happen at rtt + 4 * rttvar.
 	 * Because of the way we do the smoothing, srtt and rttvar
 	 * will each average +1/2 tick of bias.  When we compute
 	 * the retransmit timer, we want 1/2 tick of rounding and
 	 * 1 extra tick because of +-1/2 tick uncertainty in the
 	 * firing of the timer.  The bias will give us exactly the
 	 * 1.5 tick we need.  But, because the bias is
 	 * statistical, we have to test that we don't drop below
 	 * the minimum feasible timer (which is 2 ticks).
 	 */
 	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
 		      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
 
 	/*
 	 * We received an ack for a packet that wasn't retransmitted;
 	 * it is probably safe to discard any error indications we've
 	 * received recently.  This isn't quite right, but close enough
 	 * for now (a route might have failed after we sent a segment,
 	 * and the return path might not be symmetrical).
 	 */
 	tp->t_softerror = 0;
 }
 
 /*
  * Determine a reasonable value for maxseg size.
  * If the route is known, check route for mtu.
  * If none, use an mss that can be handled on the outgoing interface
  * without forcing IP to fragment.  If no route is found, route has no mtu,
  * or the destination isn't local, use a default, hopefully conservative
  * size (usually 512 or the default IP max size, but no more than the mtu
  * of the interface), as we can't discover anything about intervening
  * gateways or networks.  We also initialize the congestion/slow start
  * window to be a single segment if the destination isn't local.
  * While looking at the routing entry, we also initialize other path-dependent
  * parameters from pre-set or cached values in the routing entry.
  *
  * NOTE that resulting t_maxseg doesn't include space for TCP options or
  * IP options, e.g. IPSEC data, since length of this data may vary, and
  * thus it is calculated for every segment separately in tcp_output().
  *
  * NOTE that this routine is only called when we process an incoming
  * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS
  * settings are handled in tcp_mssopt().
  */
 void
 tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
     struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap)
 {
 	int mss = 0;
 	uint32_t maxmtu = 0;
 	struct inpcb *inp = tp->t_inpcb;
 	struct hc_metrics_lite metrics;
 #ifdef INET6
 	int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
 	size_t min_protoh = isipv6 ?
 			    sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
 			    sizeof (struct tcpiphdr);
 #else
 	 size_t min_protoh = sizeof(struct tcpiphdr);
 #endif
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_port)
 		min_protoh += V_tcp_udp_tunneling_overhead;
 	if (mtuoffer != -1) {
 		KASSERT(offer == -1, ("%s: conflict", __func__));
 		offer = mtuoffer - min_protoh;
 	}
 
 	/* Initialize. */
 #ifdef INET6
 	if (isipv6) {
 		maxmtu = tcp_maxmtu6(&inp->inp_inc, cap);
 		tp->t_maxseg = V_tcp_v6mssdflt;
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		maxmtu = tcp_maxmtu(&inp->inp_inc, cap);
 		tp->t_maxseg = V_tcp_mssdflt;
 	}
 #endif
 
 	/*
 	 * No route to sender, stay with default mss and return.
 	 */
 	if (maxmtu == 0) {
 		/*
 		 * In case we return early we need to initialize metrics
 		 * to a defined state as tcp_hc_get() would do for us
 		 * if there was no cache hit.
 		 */
 		if (metricptr != NULL)
 			bzero(metricptr, sizeof(struct hc_metrics_lite));
 		return;
 	}
 
 	/* What have we got? */
 	switch (offer) {
 		case 0:
 			/*
 			 * Offer == 0 means that there was no MSS on the SYN
 			 * segment, in this case we use tcp_mssdflt as
 			 * already assigned to t_maxseg above.
 			 */
 			offer = tp->t_maxseg;
 			break;
 
 		case -1:
 			/*
 			 * Offer == -1 means that we didn't receive SYN yet.
 			 */
 			/* FALLTHROUGH */
 
 		default:
 			/*
 			 * Prevent DoS attack with too small MSS. Round up
 			 * to at least minmss.
 			 */
 			offer = max(offer, V_tcp_minmss);
 	}
 
 	/*
 	 * rmx information is now retrieved from tcp_hostcache.
 	 */
 	tcp_hc_get(&inp->inp_inc, &metrics);
 	if (metricptr != NULL)
 		bcopy(&metrics, metricptr, sizeof(struct hc_metrics_lite));
 
 	/*
 	 * If there's a discovered mtu in tcp hostcache, use it.
 	 * Else, use the link mtu.
 	 */
 	if (metrics.rmx_mtu)
 		mss = min(metrics.rmx_mtu, maxmtu) - min_protoh;
 	else {
 #ifdef INET6
 		if (isipv6) {
 			mss = maxmtu - min_protoh;
 			if (!V_path_mtu_discovery &&
 			    !in6_localaddr(&inp->in6p_faddr))
 				mss = min(mss, V_tcp_v6mssdflt);
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			mss = maxmtu - min_protoh;
 			if (!V_path_mtu_discovery &&
 			    !in_localaddr(inp->inp_faddr))
 				mss = min(mss, V_tcp_mssdflt);
 		}
 #endif
 		/*
 		 * XXX - The above conditional (mss = maxmtu - min_protoh)
 		 * probably violates the TCP spec.
 		 * The problem is that, since we don't know the
 		 * other end's MSS, we are supposed to use a conservative
 		 * default.  But, if we do that, then MTU discovery will
 		 * never actually take place, because the conservative
 		 * default is much less than the MTUs typically seen
 		 * on the Internet today.  For the moment, we'll sweep
 		 * this under the carpet.
 		 *
 		 * The conservative default might not actually be a problem
 		 * if the only case this occurs is when sending an initial
 		 * SYN with options and data to a host we've never talked
 		 * to before.  Then, they will reply with an MSS value which
 		 * will get recorded and the new parameters should get
 		 * recomputed.  For Further Study.
 		 */
 	}
 	mss = min(mss, offer);
 
 	/*
 	 * Sanity check: make sure that maxseg will be large
 	 * enough to allow some data on segments even if the
 	 * all the option space is used (40bytes).  Otherwise
 	 * funny things may happen in tcp_output.
 	 *
 	 * XXXGL: shouldn't we reserve space for IP/IPv6 options?
 	 */
 	mss = max(mss, 64);
 
 	tp->t_maxseg = mss;
 }
 
 void
 tcp_mss(struct tcpcb *tp, int offer)
 {
 	int mss;
 	uint32_t bufsize;
 	struct inpcb *inp;
 	struct socket *so;
 	struct hc_metrics_lite metrics;
 	struct tcp_ifcap cap;
 
 	KASSERT(tp != NULL, ("%s: tp == NULL", __func__));
 
 	bzero(&cap, sizeof(cap));
 	tcp_mss_update(tp, offer, -1, &metrics, &cap);
 
 	mss = tp->t_maxseg;
 	inp = tp->t_inpcb;
 
 	/*
 	 * If there's a pipesize, change the socket buffer to that size,
 	 * don't change if sb_hiwat is different than default (then it
 	 * has been changed on purpose with setsockopt).
 	 * Make the socket buffers an integral number of mss units;
 	 * if the mss is larger than the socket buffer, decrease the mss.
 	 */
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_snd);
 	if ((so->so_snd.sb_hiwat == V_tcp_sendspace) && metrics.rmx_sendpipe)
 		bufsize = metrics.rmx_sendpipe;
 	else
 		bufsize = so->so_snd.sb_hiwat;
 	if (bufsize < mss)
 		mss = bufsize;
 	else {
 		bufsize = roundup(bufsize, mss);
 		if (bufsize > sb_max)
 			bufsize = sb_max;
 		if (bufsize > so->so_snd.sb_hiwat)
 			(void)sbreserve_locked(&so->so_snd, bufsize, so, NULL);
 	}
 	SOCKBUF_UNLOCK(&so->so_snd);
 	/*
 	 * Sanity check: make sure that maxseg will be large
 	 * enough to allow some data on segments even if the
 	 * all the option space is used (40bytes).  Otherwise
 	 * funny things may happen in tcp_output.
 	 *
 	 * XXXGL: shouldn't we reserve space for IP/IPv6 options?
 	 */
 	tp->t_maxseg = max(mss, 64);
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe)
 		bufsize = metrics.rmx_recvpipe;
 	else
 		bufsize = so->so_rcv.sb_hiwat;
 	if (bufsize > mss) {
 		bufsize = roundup(bufsize, mss);
 		if (bufsize > sb_max)
 			bufsize = sb_max;
 		if (bufsize > so->so_rcv.sb_hiwat)
 			(void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL);
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	/* Check the interface for TSO capabilities. */
 	if (cap.ifcap & CSUM_TSO) {
 		tp->t_flags |= TF_TSO;
 		tp->t_tsomax = cap.tsomax;
 		tp->t_tsomaxsegcount = cap.tsomaxsegcount;
 		tp->t_tsomaxsegsize = cap.tsomaxsegsize;
 	}
 }
 
 /*
  * Determine the MSS option to send on an outgoing SYN.
  */
 int
 tcp_mssopt(struct in_conninfo *inc)
 {
 	int mss = 0;
 	uint32_t thcmtu = 0;
 	uint32_t maxmtu = 0;
 	size_t min_protoh;
 
 	KASSERT(inc != NULL, ("tcp_mssopt with NULL in_conninfo pointer"));
 
 #ifdef INET6
 	if (inc->inc_flags & INC_ISIPV6) {
 		mss = V_tcp_v6mssdflt;
 		maxmtu = tcp_maxmtu6(inc, NULL);
 		min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		mss = V_tcp_mssdflt;
 		maxmtu = tcp_maxmtu(inc, NULL);
 		min_protoh = sizeof(struct tcpiphdr);
 	}
 #endif
 #if defined(INET6) || defined(INET)
 	thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
 #endif
 
 	if (maxmtu && thcmtu)
 		mss = min(maxmtu, thcmtu) - min_protoh;
 	else if (maxmtu || thcmtu)
 		mss = max(maxmtu, thcmtu) - min_protoh;
 
 	return (mss);
 }
 
 void
 tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
 {
 	int snd_cnt = 0, limit = 0, del_data = 0, pipe = 0;
 	int maxseg = tcp_maxseg(tp);
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Compute the amount of data that this ACK is indicating
 	 * (del_data) and an estimate of how many bytes are in the
 	 * network.
 	 */
 	del_data = tp->sackhint.delivered_data;
 	if (V_tcp_do_newsack)
 		pipe = tcp_compute_pipe(tp);
 	else
 		pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit;
 	tp->sackhint.prr_delivered += del_data;
 	/*
 	 * Proportional Rate Reduction
 	 */
 	if (pipe >= tp->snd_ssthresh) {
 		if (tp->sackhint.recover_fs == 0)
 			tp->sackhint.recover_fs =
 			    imax(1, tp->snd_nxt - tp->snd_una);
 		snd_cnt = howmany((long)tp->sackhint.prr_delivered *
 			    tp->snd_ssthresh, tp->sackhint.recover_fs) -
 			    tp->sackhint.prr_out;
 	} else {
 		if (V_tcp_do_prr_conservative)
 			limit = tp->sackhint.prr_delivered -
 			    tp->sackhint.prr_out;
 		else
 			limit = imax(tp->sackhint.prr_delivered -
 				    tp->sackhint.prr_out, del_data) +
 				    maxseg;
 		snd_cnt = imin((tp->snd_ssthresh - pipe), limit);
 	}
 	snd_cnt = imax(snd_cnt, 0) / maxseg;
 	/*
 	 * Send snd_cnt new data into the network in response to this ack.
 	 * If there is going to be a SACK retransmission, adjust snd_cwnd
 	 * accordingly.
 	 */
 	if (IN_FASTRECOVERY(tp->t_flags)) {
 		tp->snd_cwnd = imax(maxseg, tp->snd_nxt - tp->snd_recover +
 			tp->sackhint.sack_bytes_rexmit + (snd_cnt * maxseg));
 	} else if (IN_CONGRECOVERY(tp->t_flags))
 		tp->snd_cwnd = imax(maxseg, pipe - del_data +
 				    (snd_cnt * maxseg));
 }
 
 /*
  * On a partial ack arrives, force the retransmission of the
  * next unacknowledged segment.  Do not clear tp->t_dupacks.
  * By setting snd_nxt to ti_ack, this forces retransmission timer to
  * be started again.
  */
 void
 tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
 {
 	tcp_seq onxt = tp->snd_nxt;
 	uint32_t ocwnd = tp->snd_cwnd;
 	u_int maxseg = tcp_maxseg(tp);
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tcp_timer_activate(tp, TT_REXMT, 0);
 	tp->t_rtttime = 0;
 	tp->snd_nxt = th->th_ack;
 	/*
 	 * Set snd_cwnd to one segment beyond acknowledged offset.
 	 * (tp->snd_una has not yet been updated when this function is called.)
 	 */
 	tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
 	tp->t_flags |= TF_ACKNOW;
 	(void) tp->t_fb->tfb_tcp_output(tp);
 	tp->snd_cwnd = ocwnd;
 	if (SEQ_GT(onxt, tp->snd_nxt))
 		tp->snd_nxt = onxt;
 	/*
 	 * Partial window deflation.  Relies on fact that tp->snd_una
 	 * not updated yet.
 	 */
 	if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th))
 		tp->snd_cwnd -= BYTES_THIS_ACK(tp, th);
 	else
 		tp->snd_cwnd = 0;
 	tp->snd_cwnd += maxseg;
 }
 
 int
 tcp_compute_pipe(struct tcpcb *tp)
 {
 	return (tp->snd_max - tp->snd_una +
 		tp->sackhint.sack_bytes_rexmit -
 		tp->sackhint.sacked_bytes);
 }
 
 uint32_t
 tcp_compute_initwnd(uint32_t maxseg)
 {
 	/*
 	 * Calculate the Initial Window, also used as Restart Window
 	 *
 	 * RFC5681 Section 3.1 specifies the default conservative values.
 	 * RFC3390 specifies slightly more aggressive values.
 	 * RFC6928 increases it to ten segments.
 	 * Support for user specified value for initial flight size.
 	 */
 	if (V_tcp_initcwnd_segments)
 		return min(V_tcp_initcwnd_segments * maxseg,
 		    max(2 * maxseg, V_tcp_initcwnd_segments * 1460));
 	else if (V_tcp_do_rfc3390)
 		return min(4 * maxseg, max(2 * maxseg, 4380));
 	else {
 		/* Per RFC5681 Section 3.1 */
 		if (maxseg > 2190)
 			return (2 * maxseg);
 		else if (maxseg > 1095)
 			return (3 * maxseg);
 		else
 			return (4 * maxseg);
 	}
 }
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index de22310d241a..a1531ea8d2f3 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -1,4096 +1,4096 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_kern_tls.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/arb.h>
 #include <sys/callout.h>
 #include <sys/eventhandler.h>
 #ifdef TCP_HHOOK
 #include <sys/hhook.h>
 #endif
 #include <sys/kernel.h>
 #ifdef TCP_HHOOK
 #include <sys/khelp.h>
 #endif
 #ifdef KERN_TLS
 #include <sys/ktls.h>
 #endif
 #include <sys/qmath.h>
 #include <sys/stats.h>
 #include <sys/sysctl.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/refcount.h>
 #include <sys/mbuf.h>
 #ifdef INET6
 #include <sys/domain.h>
 #endif
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/random.h>
 
 #include <vm/uma.h>
 
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet/icmp6.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #endif
 
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/tcp_hpts.h>
 #include <netinet/cc/cc.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 #include <netinet/tcpip.h>
 #include <netinet/tcp_fastopen.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 #ifdef INET6
 #include <netinet6/ip6protosw.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/in_cksum.h>
 #include <crypto/siphash/siphash.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
 #ifdef INET6
 VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
 #endif
 
 #ifdef NETFLIX_EXP_DETECTION
 /*  Sack attack detection thresholds and such */
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack_attack,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Sack Attack detection thresholds");
 int32_t tcp_force_detection = 0;
 SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, force_detection,
     CTLFLAG_RW,
     &tcp_force_detection, 0,
     "Do we force detection even if the INP has it off?");
 int32_t tcp_sack_to_ack_thresh = 700;	/* 70 % */
 SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sack_to_ack_thresh,
     CTLFLAG_RW,
     &tcp_sack_to_ack_thresh, 700,
     "Percentage of sacks to acks we must see above (10.1 percent is 101)?");
 int32_t tcp_sack_to_move_thresh = 600;	/* 60 % */
 SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, move_thresh,
     CTLFLAG_RW,
     &tcp_sack_to_move_thresh, 600,
     "Percentage of sack moves we must see above (10.1 percent is 101)");
 int32_t tcp_restoral_thresh = 650;	/* 65 % (sack:2:ack -5%) */
 SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, restore_thresh,
     CTLFLAG_RW,
     &tcp_restoral_thresh, 550,
     "Percentage of sack to ack percentage we must see below to restore(10.1 percent is 101)");
 int32_t tcp_sad_decay_val = 800;
 SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, decay_per,
     CTLFLAG_RW,
     &tcp_sad_decay_val, 800,
     "The decay percentage (10.1 percent equals 101 )");
 int32_t tcp_map_minimum = 500;
 SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, nummaps,
     CTLFLAG_RW,
     &tcp_map_minimum, 500,
     "Number of Map enteries before we start detection");
 int32_t tcp_attack_on_turns_on_logging = 0;
 SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, attacks_logged,
     CTLFLAG_RW,
     &tcp_attack_on_turns_on_logging, 0,
    "When we have a positive hit on attack, do we turn on logging?");
 int32_t tcp_sad_pacing_interval = 2000;
 SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_pacing_int,
     CTLFLAG_RW,
     &tcp_sad_pacing_interval, 2000,
     "What is the minimum pacing interval for a classified attacker?");
 
 int32_t tcp_sad_low_pps = 100;
 SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_low_pps,
     CTLFLAG_RW,
     &tcp_sad_low_pps, 100,
     "What is the input pps that below which we do not decay?");
 #endif
 uint32_t tcp_ack_war_time_window = 1000;
 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, ack_war_timewindow,
     CTLFLAG_RW,
     &tcp_ack_war_time_window, 1000,
    "If the tcp_stack does ack-war prevention how many milliseconds are in its time window?");
 uint32_t tcp_ack_war_cnt = 5;
 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, ack_war_cnt,
     CTLFLAG_RW,
     &tcp_ack_war_cnt, 5,
    "If the tcp_stack does ack-war prevention how many acks can be sent in its time window?");
 
 struct rwlock tcp_function_lock;
 
 static int
 sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	new = V_tcp_mssdflt;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		if (new < TCP_MINMSS)
 			error = EINVAL;
 		else
 			V_tcp_mssdflt = new;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
     &VNET_NAME(tcp_mssdflt), 0, &sysctl_net_inet_tcp_mss_check, "I",
     "Default TCP Maximum Segment Size");
 
 #ifdef INET6
 static int
 sysctl_net_inet_tcp_mss_v6_check(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	new = V_tcp_v6mssdflt;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		if (new < TCP_MINMSS)
 			error = EINVAL;
 		else
 			V_tcp_v6mssdflt = new;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
     &VNET_NAME(tcp_v6mssdflt), 0, &sysctl_net_inet_tcp_mss_v6_check, "I",
    "Default TCP Maximum Segment Size for IPv6");
 #endif /* INET6 */
 
 /*
  * Minimum MSS we accept and use. This prevents DoS attacks where
  * we are forced to a ridiculous low MSS like 20 and send hundreds
  * of packets instead of one. The effect scales with the available
  * bandwidth and quickly saturates the CPU and network interface
  * with packet generation and sending. Set to zero to disable MINMSS
  * checking. This setting prevents us from sending too small packets.
  */
 VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_VNET | CTLFLAG_RW,
      &VNET_NAME(tcp_minmss), 0,
     "Minimum TCP Maximum Segment Size");
 
 VNET_DEFINE(int, tcp_do_rfc1323) = 1;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc1323), 0,
     "Enable rfc1323 (high performance TCP) extensions");
 
 VNET_DEFINE(int, tcp_tolerate_missing_ts) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tolerate_missing_ts, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_tolerate_missing_ts), 0,
     "Tolerate missing TCP timestamps");
 
 VNET_DEFINE(int, tcp_ts_offset_per_conn) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, ts_offset_per_conn, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_ts_offset_per_conn), 0,
     "Initialize TCP timestamps per connection instead of per host pair");
 
 /* How many connections are pacing */
 static volatile uint32_t number_of_tcp_connections_pacing = 0;
 static uint32_t shadow_num_connections = 0;
 
 static int tcp_pacing_limit = 10000;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pacing_limit, CTLFLAG_RW,
     &tcp_pacing_limit, 1000,
     "If the TCP stack does pacing, is there a limit (-1 = no, 0 = no pacing N = number of connections)");
 
 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pacing_count, CTLFLAG_RD,
     &shadow_num_connections, 0, "Number of TCP connections being paced");
 
 static int	tcp_log_debug = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
     &tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
 
 static int	tcp_tcbhashsize;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
 
 static int	do_tcpdrain = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
     "Enable tcp_drain routine for extra help when low on mbufs");
 
 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD,
     &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs");
 
 VNET_DEFINE_STATIC(int, icmp_may_rst) = 1;
 #define	V_icmp_may_rst			VNET(icmp_may_rst)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(icmp_may_rst), 0,
     "Certain ICMP unreachable messages may abort connections in SYN_SENT");
 
 VNET_DEFINE_STATIC(int, tcp_isn_reseed_interval) = 0;
 #define	V_tcp_isn_reseed_interval	VNET(tcp_isn_reseed_interval)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_isn_reseed_interval), 0,
     "Seconds between reseeding of ISN secret");
 
 static int	tcp_soreceive_stream;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
     &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets");
 
 VNET_DEFINE(uma_zone_t, sack_hole_zone);
 #define	V_sack_hole_zone		VNET(sack_hole_zone)
 VNET_DEFINE(uint32_t, tcp_map_entries_limit) = 0;	/* unlimited */
 static int
 sysctl_net_inet_tcp_map_limit_check(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	uint32_t new;
 
 	new = V_tcp_map_entries_limit;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		/* only allow "0" and value > minimum */
 		if (new > 0 && new < TCP_MIN_MAP_ENTRIES_LIMIT)
 			error = EINVAL;
 		else
 			V_tcp_map_entries_limit = new;
 	}
 	return (error);
 }
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, map_limit,
     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
     &VNET_NAME(tcp_map_entries_limit), 0,
     &sysctl_net_inet_tcp_map_limit_check, "IU",
     "Total sendmap entries limit");
 
 VNET_DEFINE(uint32_t, tcp_map_split_limit) = 0;	/* unlimited */
 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, split_limit, CTLFLAG_VNET | CTLFLAG_RW,
      &VNET_NAME(tcp_map_split_limit), 0,
     "Total sendmap split entries limit");
 
 #ifdef TCP_HHOOK
 VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);
 #endif
 
 #define TS_OFFSET_SECRET_LENGTH SIPHASH_KEY_LENGTH
 VNET_DEFINE_STATIC(u_char, ts_offset_secret[TS_OFFSET_SECRET_LENGTH]);
 #define	V_ts_offset_secret	VNET(ts_offset_secret)
 
 static int	tcp_default_fb_init(struct tcpcb *tp);
 static void	tcp_default_fb_fini(struct tcpcb *tp, int tcb_is_purged);
 static int	tcp_default_handoff_ok(struct tcpcb *tp);
 static struct inpcb *tcp_notify(struct inpcb *, int);
 static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int);
 static void tcp_mtudisc(struct inpcb *, int);
 static char *	tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
 		    void *ip4hdr, const void *ip6hdr);
 
 static struct tcp_function_block tcp_def_funcblk = {
 	.tfb_tcp_block_name = "freebsd",
 	.tfb_tcp_output = tcp_output,
 	.tfb_tcp_do_segment = tcp_do_segment,
 	.tfb_tcp_ctloutput = tcp_default_ctloutput,
 	.tfb_tcp_handoff_ok = tcp_default_handoff_ok,
 	.tfb_tcp_fb_init = tcp_default_fb_init,
 	.tfb_tcp_fb_fini = tcp_default_fb_fini,
 };
 
 static int tcp_fb_cnt = 0;
 struct tcp_funchead t_functions;
 static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk;
 
 static struct tcp_function_block *
 find_tcp_functions_locked(struct tcp_function_set *fs)
 {
 	struct tcp_function *f;
 	struct tcp_function_block *blk=NULL;
 
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		if (strcmp(f->tf_name, fs->function_set_name) == 0) {
 			blk = f->tf_fb;
 			break;
 		}
 	}
 	return(blk);
 }
 
 static struct tcp_function_block *
 find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s)
 {
 	struct tcp_function_block *rblk=NULL;
 	struct tcp_function *f;
 
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		if (f->tf_fb == blk) {
 			rblk = blk;
 			if (s) {
 				*s = f;
 			}
 			break;
 		}
 	}
 	return (rblk);
 }
 
 struct tcp_function_block *
 find_and_ref_tcp_functions(struct tcp_function_set *fs)
 {
 	struct tcp_function_block *blk;
 
 	rw_rlock(&tcp_function_lock);
 	blk = find_tcp_functions_locked(fs);
 	if (blk)
 		refcount_acquire(&blk->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
 	return(blk);
 }
 
 struct tcp_function_block *
 find_and_ref_tcp_fb(struct tcp_function_block *blk)
 {
 	struct tcp_function_block *rblk;
 
 	rw_rlock(&tcp_function_lock);
 	rblk = find_tcp_fb_locked(blk, NULL);
 	if (rblk)
 		refcount_acquire(&rblk->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
 	return(rblk);
 }
 
 static struct tcp_function_block *
 find_and_ref_tcp_default_fb(void)
 {
 	struct tcp_function_block *rblk;
 
 	rw_rlock(&tcp_function_lock);
 	rblk = tcp_func_set_ptr;
 	refcount_acquire(&rblk->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
 	return (rblk);
 }
 
 void
 tcp_switch_back_to_default(struct tcpcb *tp)
 {
 	struct tcp_function_block *tfb;
 
 	KASSERT(tp->t_fb != &tcp_def_funcblk,
 	    ("%s: called by the built-in default stack", __func__));
 
 	/*
 	 * Release the old stack. This function will either find a new one
 	 * or panic.
 	 */
 	if (tp->t_fb->tfb_tcp_fb_fini != NULL)
 		(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
 	refcount_release(&tp->t_fb->tfb_refcnt);
 
 	/*
 	 * Now, we'll find a new function block to use.
 	 * Start by trying the current user-selected
 	 * default, unless this stack is the user-selected
 	 * default.
 	 */
 	tfb = find_and_ref_tcp_default_fb();
 	if (tfb == tp->t_fb) {
 		refcount_release(&tfb->tfb_refcnt);
 		tfb = NULL;
 	}
 	/* Does the stack accept this connection? */
 	if (tfb != NULL && tfb->tfb_tcp_handoff_ok != NULL &&
 	    (*tfb->tfb_tcp_handoff_ok)(tp)) {
 		refcount_release(&tfb->tfb_refcnt);
 		tfb = NULL;
 	}
 	/* Try to use that stack. */
 	if (tfb != NULL) {
 		/* Initialize the new stack. If it succeeds, we are done. */
 		tp->t_fb = tfb;
 		if (tp->t_fb->tfb_tcp_fb_init == NULL ||
 		    (*tp->t_fb->tfb_tcp_fb_init)(tp) == 0)
 			return;
 
 		/*
 		 * Initialization failed. Release the reference count on
 		 * the stack.
 		 */
 		refcount_release(&tfb->tfb_refcnt);
 	}
 
 	/*
 	 * If that wasn't feasible, use the built-in default
 	 * stack which is not allowed to reject anyone.
 	 */
 	tfb = find_and_ref_tcp_fb(&tcp_def_funcblk);
 	if (tfb == NULL) {
 		/* there always should be a default */
 		panic("Can't refer to tcp_def_funcblk");
 	}
 	if (tfb->tfb_tcp_handoff_ok != NULL) {
 		if ((*tfb->tfb_tcp_handoff_ok) (tp)) {
 			/* The default stack cannot say no */
 			panic("Default stack rejects a new session?");
 		}
 	}
 	tp->t_fb = tfb;
 	if (tp->t_fb->tfb_tcp_fb_init != NULL &&
 	    (*tp->t_fb->tfb_tcp_fb_init)(tp)) {
 		/* The default stack cannot fail */
 		panic("Default stack initialization failed");
 	}
 }
 
 static void
 tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,
     const struct sockaddr *sa, void *ctx)
 {
 	struct ip *iph;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct udphdr *uh;
 	struct tcphdr *th;
 	int thlen;
 	uint16_t port;
 
 	TCPSTAT_INC(tcps_tunneled_pkts);
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		/* Can't handle one that is not a pkt hdr */
 		TCPSTAT_INC(tcps_tunneled_errs);
 		goto out;
 	}
 	thlen = sizeof(struct tcphdr);
 	if (m->m_len < off + sizeof(struct udphdr) + thlen &&
 	    (m =  m_pullup(m, off + sizeof(struct udphdr) + thlen)) == NULL) {
 		TCPSTAT_INC(tcps_tunneled_errs);
 		goto out;
 	}
 	iph = mtod(m, struct ip *);
 	uh = (struct udphdr *)((caddr_t)iph + off);
 	th = (struct tcphdr *)(uh + 1);
 	thlen = th->th_off << 2;
 	if (m->m_len < off + sizeof(struct udphdr) + thlen) {
 		m =  m_pullup(m, off + sizeof(struct udphdr) + thlen);
 		if (m == NULL) {
 			TCPSTAT_INC(tcps_tunneled_errs);
 			goto out;
 		} else {
 			iph = mtod(m, struct ip *);
 			uh = (struct udphdr *)((caddr_t)iph + off);
 			th = (struct tcphdr *)(uh + 1);
 		}
 	}
 	m->m_pkthdr.tcp_tun_port = port = uh->uh_sport;
 	bcopy(th, uh, m->m_len - off);
 	m->m_len -= sizeof(struct udphdr);
 	m->m_pkthdr.len -= sizeof(struct udphdr);
 	/*
 	 * We use the same algorithm for
 	 * both UDP and TCP for c-sum. So
 	 * the code in tcp_input will skip
 	 * the checksum. So we do nothing
 	 * with the flag (m->m_pkthdr.csum_flags).
 	 */
 	switch (iph->ip_v) {
 #ifdef INET
 	case IPVERSION:
 		iph->ip_len = htons(ntohs(iph->ip_len) - sizeof(struct udphdr));
 		tcp_input_with_port(&m, &off, IPPROTO_TCP, port);
 		break;
 #endif
 #ifdef INET6
 	case IPV6_VERSION >> 4:
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - sizeof(struct udphdr));
 		tcp6_input_with_port(&m, &off, IPPROTO_TCP, port);
 		break;
 #endif
 	default:
 		goto out;
 		break;
 	}
 	return;
 out:
 	m_freem(m);
 }
 
 static int
 sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
 {
 	int error=ENOENT;
 	struct tcp_function_set fs;
 	struct tcp_function_block *blk;
 
 	memset(&fs, 0, sizeof(fs));
 	rw_rlock(&tcp_function_lock);
 	blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL);
 	if (blk) {
 		/* Found him */
 		strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
 		fs.pcbcnt = blk->tfb_refcnt;
 	}
 	rw_runlock(&tcp_function_lock);
 	error = sysctl_handle_string(oidp, fs.function_set_name,
 				     sizeof(fs.function_set_name), req);
 
 	/* Check for error or no change */
 	if (error != 0 || req->newptr == NULL)
 		return(error);
 
 	rw_wlock(&tcp_function_lock);
 	blk = find_tcp_functions_locked(&fs);
 	if ((blk == NULL) ||
 	    (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) {
 		error = ENOENT;
 		goto done;
 	}
 	tcp_func_set_ptr = blk;
 done:
 	rw_wunlock(&tcp_function_lock);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
     NULL, 0, sysctl_net_inet_default_tcp_functions, "A",
     "Set/get the default TCP functions");
 
 static int
 sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS)
 {
 	int error, cnt, linesz;
 	struct tcp_function *f;
 	char *buffer, *cp;
 	size_t bufsz, outsz;
 	bool alias;
 
 	cnt = 0;
 	rw_rlock(&tcp_function_lock);
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		cnt++;
 	}
 	rw_runlock(&tcp_function_lock);
 
 	bufsz = (cnt+2) * ((TCP_FUNCTION_NAME_LEN_MAX * 2) + 13) + 1;
 	buffer = malloc(bufsz, M_TEMP, M_WAITOK);
 
 	error = 0;
 	cp = buffer;
 
 	linesz = snprintf(cp, bufsz, "\n%-32s%c %-32s %s\n", "Stack", 'D',
 	    "Alias", "PCB count");
 	cp += linesz;
 	bufsz -= linesz;
 	outsz = linesz;
 
 	rw_rlock(&tcp_function_lock);
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		alias = (f->tf_name != f->tf_fb->tfb_tcp_block_name);
 		linesz = snprintf(cp, bufsz, "%-32s%c %-32s %u\n",
 		    f->tf_fb->tfb_tcp_block_name,
 		    (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ',
 		    alias ? f->tf_name : "-",
 		    f->tf_fb->tfb_refcnt);
 		if (linesz >= bufsz) {
 			error = EOVERFLOW;
 			break;
 		}
 		cp += linesz;
 		bufsz -= linesz;
 		outsz += linesz;
 	}
 	rw_runlock(&tcp_function_lock);
 	if (error == 0)
 		error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
 	free(buffer, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
     NULL, 0, sysctl_net_inet_list_available, "A",
     "list available TCP Function sets");
 
 VNET_DEFINE(int, tcp_udp_tunneling_port) = TCP_TUNNELING_PORT_DEFAULT;
 
 #ifdef INET
 VNET_DEFINE(struct socket *, udp4_tun_socket) = NULL;
 #define	V_udp4_tun_socket	VNET(udp4_tun_socket)
 #endif
 #ifdef INET6
 VNET_DEFINE(struct socket *, udp6_tun_socket) = NULL;
 #define	V_udp6_tun_socket	VNET(udp6_tun_socket)
 #endif
 
 static void
 tcp_over_udp_stop(void)
 {
 	/*
 	 * This function assumes sysctl caller holds inp_rinfo_lock()
 	 * for writting!
 	 */
 #ifdef INET
 	if (V_udp4_tun_socket != NULL) {
 		soclose(V_udp4_tun_socket);
 		V_udp4_tun_socket = NULL;
 	}
 #endif
 #ifdef INET6
 	if (V_udp6_tun_socket != NULL) {
 		soclose(V_udp6_tun_socket);
 		V_udp6_tun_socket = NULL;
 	}
 #endif
 }
 
 static int
 tcp_over_udp_start(void)
 {
 	uint16_t port;
 	int ret;
 #ifdef INET
 	struct sockaddr_in sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 sin6;
 #endif
 	/*
 	 * This function assumes sysctl caller holds inp_info_rlock()
 	 * for writting!
 	 */
 	port = V_tcp_udp_tunneling_port;
 	if (ntohs(port) == 0) {
 		/* Must have a port set */
 		return (EINVAL);
 	}
 #ifdef INET
 	if (V_udp4_tun_socket != NULL) {
 		/* Already running -- must stop first */
 		return (EALREADY);
 	}
 #endif
 #ifdef INET6
 	if (V_udp6_tun_socket != NULL) {
 		/* Already running -- must stop first */
 		return (EALREADY);
 	}
 #endif
 #ifdef INET
 	if ((ret = socreate(PF_INET, &V_udp4_tun_socket,
 	    SOCK_DGRAM, IPPROTO_UDP,
 	    curthread->td_ucred, curthread))) {
 		tcp_over_udp_stop();
 		return (ret);
 	}
 	/* Call the special UDP hook. */
 	if ((ret = udp_set_kernel_tunneling(V_udp4_tun_socket,
 	    tcp_recv_udp_tunneled_packet,
 	    tcp_ctlinput_viaudp,
 	    NULL))) {
 		tcp_over_udp_stop();
 		return (ret);
 	}
 	/* Ok, we have a socket, bind it to the port. */
 	memset(&sin, 0, sizeof(struct sockaddr_in));
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_port = htons(port);
 	if ((ret = sobind(V_udp4_tun_socket,
 	    (struct sockaddr *)&sin, curthread))) {
 		tcp_over_udp_stop();
 		return (ret);
 	}
 #endif
 #ifdef INET6
 	if ((ret = socreate(PF_INET6, &V_udp6_tun_socket,
 	    SOCK_DGRAM, IPPROTO_UDP,
 	    curthread->td_ucred, curthread))) {
 		tcp_over_udp_stop();
 		return (ret);
 	}
 	/* Call the special UDP hook. */
 	if ((ret = udp_set_kernel_tunneling(V_udp6_tun_socket,
 	    tcp_recv_udp_tunneled_packet,
 	    tcp6_ctlinput_viaudp,
 	    NULL))) {
 		tcp_over_udp_stop();
 		return (ret);
 	}
 	/* Ok, we have a socket, bind it to the port. */
 	memset(&sin6, 0, sizeof(struct sockaddr_in6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_port = htons(port);
 	if ((ret = sobind(V_udp6_tun_socket,
 	    (struct sockaddr *)&sin6, curthread))) {
 		tcp_over_udp_stop();
 		return (ret);
 	}
 #endif
 	return (0);
 }
 
 static int
 sysctl_net_inet_tcp_udp_tunneling_port_check(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	uint32_t old, new;
 
 	old = V_tcp_udp_tunneling_port;
 	new = old;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if ((error == 0) &&
 	    (req->newptr != NULL)) {
 		if ((new < TCP_TUNNELING_PORT_MIN) ||
 		    (new > TCP_TUNNELING_PORT_MAX)) {
 			error = EINVAL;
 		} else {
 			V_tcp_udp_tunneling_port = new;
 			if (old != 0) {
 				tcp_over_udp_stop();
 			}
 			if (new != 0) {
 				error = tcp_over_udp_start();
 			}
 		}
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_port,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     &VNET_NAME(tcp_udp_tunneling_port),
     0, &sysctl_net_inet_tcp_udp_tunneling_port_check, "IU",
     "Tunneling port for tcp over udp");
 
 VNET_DEFINE(int, tcp_udp_tunneling_overhead) = TCP_TUNNELING_OVERHEAD_DEFAULT;
 
 static int
 sysctl_net_inet_tcp_udp_tunneling_overhead_check(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	new = V_tcp_udp_tunneling_overhead;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		if ((new < TCP_TUNNELING_OVERHEAD_MIN) ||
 		    (new > TCP_TUNNELING_OVERHEAD_MAX))
 			error = EINVAL;
 		else
 			V_tcp_udp_tunneling_overhead = new;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_overhead,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     &VNET_NAME(tcp_udp_tunneling_overhead),
     0, &sysctl_net_inet_tcp_udp_tunneling_overhead_check, "IU",
     "MSS reduction when using tcp over udp");
 
 /*
  * Exports one (struct tcp_function_info) for each alias/name.
  */
 static int
 sysctl_net_inet_list_func_info(SYSCTL_HANDLER_ARGS)
 {
 	int cnt, error;
 	struct tcp_function *f;
 	struct tcp_function_info tfi;
 
 	/*
 	 * We don't allow writes.
 	 */
 	if (req->newptr != NULL)
 		return (EINVAL);
 
 	/*
 	 * Wire the old buffer so we can directly copy the functions to
 	 * user space without dropping the lock.
 	 */
 	if (req->oldptr != NULL) {
 		error = sysctl_wire_old_buffer(req, 0);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * Walk the list and copy out matching entries. If INVARIANTS
 	 * is compiled in, also walk the list to verify the length of
 	 * the list matches what we have recorded.
 	 */
 	rw_rlock(&tcp_function_lock);
 
 	cnt = 0;
 #ifndef INVARIANTS
 	if (req->oldptr == NULL) {
 		cnt = tcp_fb_cnt;
 		goto skip_loop;
 	}
 #endif
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 #ifdef INVARIANTS
 		cnt++;
 #endif
 		if (req->oldptr != NULL) {
 			bzero(&tfi, sizeof(tfi));
 			tfi.tfi_refcnt = f->tf_fb->tfb_refcnt;
 			tfi.tfi_id = f->tf_fb->tfb_id;
 			(void)strlcpy(tfi.tfi_alias, f->tf_name,
 			    sizeof(tfi.tfi_alias));
 			(void)strlcpy(tfi.tfi_name,
 			    f->tf_fb->tfb_tcp_block_name, sizeof(tfi.tfi_name));
 			error = SYSCTL_OUT(req, &tfi, sizeof(tfi));
 			/*
 			 * Don't stop on error, as that is the
 			 * mechanism we use to accumulate length
 			 * information if the buffer was too short.
 			 */
 		}
 	}
 	KASSERT(cnt == tcp_fb_cnt,
 	    ("%s: cnt (%d) != tcp_fb_cnt (%d)", __func__, cnt, tcp_fb_cnt));
 #ifndef INVARIANTS
 skip_loop:
 #endif
 	rw_runlock(&tcp_function_lock);
 	if (req->oldptr == NULL)
 		error = SYSCTL_OUT(req, NULL,
 		    (cnt + 1) * sizeof(struct tcp_function_info));
 
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, function_info,
 	    CTLTYPE_OPAQUE | CTLFLAG_SKIP | CTLFLAG_RD | CTLFLAG_MPSAFE,
 	    NULL, 0, sysctl_net_inet_list_func_info, "S,tcp_function_info",
 	    "List TCP function block name-to-ID mappings");
 
 /*
  * tfb_tcp_handoff_ok() function for the default stack.
  * Note that we'll basically try to take all comers.
  */
 static int
 tcp_default_handoff_ok(struct tcpcb *tp)
 {
 
 	return (0);
 }
 
 /*
  * tfb_tcp_fb_init() function for the default stack.
  *
  * This handles making sure we have appropriate timers set if you are
  * transitioning a socket that has some amount of setup done.
  *
  * The init() fuction from the default can *never* return non-zero i.e.
  * it is required to always succeed since it is the stack of last resort!
  */
 static int
 tcp_default_fb_init(struct tcpcb *tp)
 {
 
 	struct socket *so;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	KASSERT(tp->t_state >= 0 && tp->t_state < TCPS_TIME_WAIT,
 	    ("%s: connection %p in unexpected state %d", __func__, tp,
 	    tp->t_state));
 
 	/*
 	 * Nothing to do for ESTABLISHED or LISTEN states. And, we don't
 	 * know what to do for unexpected states (which includes TIME_WAIT).
 	 */
 	if (tp->t_state <= TCPS_LISTEN || tp->t_state >= TCPS_TIME_WAIT)
 		return (0);
 
 	/*
 	 * Make sure some kind of transmission timer is set if there is
 	 * outstanding data.
 	 */
 	so = tp->t_inpcb->inp_socket;
 	if ((!TCPS_HAVEESTABLISHED(tp->t_state) || sbavail(&so->so_snd) ||
 	    tp->snd_una != tp->snd_max) && !(tcp_timer_active(tp, TT_REXMT) ||
 	    tcp_timer_active(tp, TT_PERSIST))) {
 		/*
 		 * If the session has established and it looks like it should
 		 * be in the persist state, set the persist timer. Otherwise,
 		 * set the retransmit timer.
 		 */
 		if (TCPS_HAVEESTABLISHED(tp->t_state) && tp->snd_wnd == 0 &&
 		    (int32_t)(tp->snd_nxt - tp->snd_una) <
 		    (int32_t)sbavail(&so->so_snd))
 			tcp_setpersist(tp);
 		else
 			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 	}
 
 	/* All non-embryonic sessions get a keepalive timer. */
 	if (!tcp_timer_active(tp, TT_KEEP))
 		tcp_timer_activate(tp, TT_KEEP,
 		    TCPS_HAVEESTABLISHED(tp->t_state) ? TP_KEEPIDLE(tp) :
 		    TP_KEEPINIT(tp));
 
 	/*
 	 * Make sure critical variables are initialized
 	 * if transitioning while in Recovery.
 	 */
 	if IN_FASTRECOVERY(tp->t_flags) {
 		if (tp->sackhint.recover_fs == 0)
 			tp->sackhint.recover_fs = max(1,
 			    tp->snd_nxt - tp->snd_una);
 	}
 
 	return (0);
 }
 
 /*
  * tfb_tcp_fb_fini() function for the default stack.
  *
  * This changes state as necessary (or prudent) to prepare for another stack
  * to assume responsibility for the connection.
  */
 static void
 tcp_default_fb_fini(struct tcpcb *tp, int tcb_is_purged)
 {
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	return;
 }
 
 /*
  * Target size of TCP PCB hash tables. Must be a power of two.
  *
  * Note that this can be overridden by the kernel environment
  * variable net.inet.tcp.tcbhashsize
  */
 #ifndef TCBHASHSIZE
 #define TCBHASHSIZE	0
 #endif
 
 /*
  * XXX
  * Callouts should be moved into struct tcp directly.  They are currently
  * separate because the tcpcb structure is exported to userland for sysctl
  * parsing purposes, which do not know about callouts.
  */
 struct tcpcb_mem {
 	struct	tcpcb		tcb;
 	struct	tcp_timer	tt;
 	struct	cc_var		ccv;
 #ifdef TCP_HHOOK
 	struct	osd		osd;
 #endif
 };
 
 VNET_DEFINE_STATIC(uma_zone_t, tcpcb_zone);
 #define	V_tcpcb_zone			VNET(tcpcb_zone)
 
 MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
 MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory");
 
 static struct mtx isn_mtx;
 
 #define	ISN_LOCK_INIT()	mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
 #define	ISN_LOCK()	mtx_lock(&isn_mtx)
 #define	ISN_UNLOCK()	mtx_unlock(&isn_mtx)
 
 /*
  * TCP initialization.
  */
 static void
 tcp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets);
 	uma_zone_set_max(V_tcpcb_zone, maxsockets);
 	tcp_tw_zone_change();
 }
 
 static int
 tcp_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "tcpinp");
 	return (0);
 }
 
 /*
  * Take a value and get the next power of 2 that doesn't overflow.
  * Used to size the tcp_inpcb hash buckets.
  */
 static int
 maketcp_hashsize(int size)
 {
 	int hashsize;
 
 	/*
 	 * auto tune.
 	 * get the next power of 2 higher than maxsockets.
 	 */
 	hashsize = 1 << fls(size);
 	/* catch overflow, and just go one power of 2 smaller */
 	if (hashsize < size) {
 		hashsize = 1 << (fls(size) - 1);
 	}
 	return (hashsize);
 }
 
 static volatile int next_tcp_stack_id = 1;
 
 /*
  * Register a TCP function block with the name provided in the names
  * array.  (Note that this function does NOT automatically register
  * blk->tfb_tcp_block_name as a stack name.  Therefore, you should
  * explicitly include blk->tfb_tcp_block_name in the list of names if
  * you wish to register the stack with that name.)
  *
  * Either all name registrations will succeed or all will fail.  If
  * a name registration fails, the function will update the num_names
  * argument to point to the array index of the name that encountered
  * the failure.
  *
  * Returns 0 on success, or an error code on failure.
  */
 int
 register_tcp_functions_as_names(struct tcp_function_block *blk, int wait,
     const char *names[], int *num_names)
 {
 	struct tcp_function *n;
 	struct tcp_function_set fs;
 	int error, i;
 
 	KASSERT(names != NULL && *num_names > 0,
 	    ("%s: Called with 0-length name list", __func__));
 	KASSERT(names != NULL, ("%s: Called with NULL name list", __func__));
 	KASSERT(rw_initialized(&tcp_function_lock),
 	    ("%s: called too early", __func__));
 
 	if ((blk->tfb_tcp_output == NULL) ||
 	    (blk->tfb_tcp_do_segment == NULL) ||
 	    (blk->tfb_tcp_ctloutput == NULL) ||
 	    (strlen(blk->tfb_tcp_block_name) == 0)) {
 		/*
 		 * These functions are required and you
 		 * need a name.
 		 */
 		*num_names = 0;
 		return (EINVAL);
 	}
 	if (blk->tfb_tcp_timer_stop_all ||
 	    blk->tfb_tcp_timer_activate ||
 	    blk->tfb_tcp_timer_active ||
 	    blk->tfb_tcp_timer_stop) {
 		/*
 		 * If you define one timer function you
 		 * must have them all.
 		 */
 		if ((blk->tfb_tcp_timer_stop_all == NULL) ||
 		    (blk->tfb_tcp_timer_activate == NULL) ||
 		    (blk->tfb_tcp_timer_active == NULL) ||
 		    (blk->tfb_tcp_timer_stop == NULL)) {
 			*num_names = 0;
 			return (EINVAL);
 		}
 	}
 
 	if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
 		*num_names = 0;
 		return (EINVAL);
 	}
 
 	refcount_init(&blk->tfb_refcnt, 0);
 	blk->tfb_id = atomic_fetchadd_int(&next_tcp_stack_id, 1);
 	for (i = 0; i < *num_names; i++) {
 		n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
 		if (n == NULL) {
 			error = ENOMEM;
 			goto cleanup;
 		}
 		n->tf_fb = blk;
 
 		(void)strlcpy(fs.function_set_name, names[i],
 		    sizeof(fs.function_set_name));
 		rw_wlock(&tcp_function_lock);
 		if (find_tcp_functions_locked(&fs) != NULL) {
 			/* Duplicate name space not allowed */
 			rw_wunlock(&tcp_function_lock);
 			free(n, M_TCPFUNCTIONS);
 			error = EALREADY;
 			goto cleanup;
 		}
 		(void)strlcpy(n->tf_name, names[i], sizeof(n->tf_name));
 		TAILQ_INSERT_TAIL(&t_functions, n, tf_next);
 		tcp_fb_cnt++;
 		rw_wunlock(&tcp_function_lock);
 	}
 	return(0);
 
 cleanup:
 	/*
 	 * Deregister the names we just added. Because registration failed
 	 * for names[i], we don't need to deregister that name.
 	 */
 	*num_names = i;
 	rw_wlock(&tcp_function_lock);
 	while (--i >= 0) {
 		TAILQ_FOREACH(n, &t_functions, tf_next) {
 			if (!strncmp(n->tf_name, names[i],
 			    TCP_FUNCTION_NAME_LEN_MAX)) {
 				TAILQ_REMOVE(&t_functions, n, tf_next);
 				tcp_fb_cnt--;
 				n->tf_fb = NULL;
 				free(n, M_TCPFUNCTIONS);
 				break;
 			}
 		}
 	}
 	rw_wunlock(&tcp_function_lock);
 	return (error);
 }
 
 /*
  * Register a TCP function block using the name provided in the name
  * argument.
  *
  * Returns 0 on success, or an error code on failure.
  */
 int
 register_tcp_functions_as_name(struct tcp_function_block *blk, const char *name,
     int wait)
 {
 	const char *name_list[1];
 	int num_names, rv;
 
 	num_names = 1;
 	if (name != NULL)
 		name_list[0] = name;
 	else
 		name_list[0] = blk->tfb_tcp_block_name;
 	rv = register_tcp_functions_as_names(blk, wait, name_list, &num_names);
 	return (rv);
 }
 
 /*
  * Register a TCP function block using the name defined in
  * blk->tfb_tcp_block_name.
  *
  * Returns 0 on success, or an error code on failure.
  */
 int
 register_tcp_functions(struct tcp_function_block *blk, int wait)
 {
 
 	return (register_tcp_functions_as_name(blk, NULL, wait));
 }
 
 /*
  * Deregister all names associated with a function block. This
  * functionally removes the function block from use within the system.
  *
  * When called with a true quiesce argument, mark the function block
  * as being removed so no more stacks will use it and determine
  * whether the removal would succeed.
  *
  * When called with a false quiesce argument, actually attempt the
  * removal.
  *
  * When called with a force argument, attempt to switch all TCBs to
  * use the default stack instead of returning EBUSY.
  *
  * Returns 0 on success (or if the removal would succeed, or an error
  * code on failure.
  */
 int
 deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce,
     bool force)
 {
 	struct tcp_function *f;
 
 	if (blk == &tcp_def_funcblk) {
 		/* You can't un-register the default */
 		return (EPERM);
 	}
 	rw_wlock(&tcp_function_lock);
 	if (blk == tcp_func_set_ptr) {
 		/* You can't free the current default */
 		rw_wunlock(&tcp_function_lock);
 		return (EBUSY);
 	}
 	/* Mark the block so no more stacks can use it. */
 	blk->tfb_flags |= TCP_FUNC_BEING_REMOVED;
 	/*
 	 * If TCBs are still attached to the stack, attempt to switch them
 	 * to the default stack.
 	 */
 	if (force && blk->tfb_refcnt) {
 		struct inpcb *inp;
 		struct tcpcb *tp;
 		VNET_ITERATOR_DECL(vnet_iter);
 
 		rw_wunlock(&tcp_function_lock);
 
 		VNET_LIST_RLOCK();
 		VNET_FOREACH(vnet_iter) {
 			CURVNET_SET(vnet_iter);
 			INP_INFO_WLOCK(&V_tcbinfo);
 			CK_LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
 				INP_WLOCK(inp);
 				if (inp->inp_flags & INP_TIMEWAIT) {
 					INP_WUNLOCK(inp);
 					continue;
 				}
 				tp = intotcpcb(inp);
 				if (tp == NULL || tp->t_fb != blk) {
 					INP_WUNLOCK(inp);
 					continue;
 				}
 				tcp_switch_back_to_default(tp);
 				INP_WUNLOCK(inp);
 			}
 			INP_INFO_WUNLOCK(&V_tcbinfo);
 			CURVNET_RESTORE();
 		}
 		VNET_LIST_RUNLOCK();
 
 		rw_wlock(&tcp_function_lock);
 	}
 	if (blk->tfb_refcnt) {
 		/* TCBs still attached. */
 		rw_wunlock(&tcp_function_lock);
 		return (EBUSY);
 	}
 	if (quiesce) {
 		/* Skip removal. */
 		rw_wunlock(&tcp_function_lock);
 		return (0);
 	}
 	/* Remove any function names that map to this function block. */
 	while (find_tcp_fb_locked(blk, &f) != NULL) {
 		TAILQ_REMOVE(&t_functions, f, tf_next);
 		tcp_fb_cnt--;
 		f->tf_fb = NULL;
 		free(f, M_TCPFUNCTIONS);
 	}
 	rw_wunlock(&tcp_function_lock);
 	return (0);
 }
 
 void
 tcp_init(void)
 {
 	const char *tcbhash_tuneable;
 	int hashsize;
 
 	tcbhash_tuneable = "net.inet.tcp.tcbhashsize";
 
 #ifdef TCP_HHOOK
 	if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN,
 	    &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register helper hook\n", __func__);
 	if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT,
 	    &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register helper hook\n", __func__);
 #endif
 #ifdef STATS
 	if (tcp_stats_init())
 		printf("%s: WARNING: unable to initialise TCP stats\n",
 		    __func__);
 #endif
 	hashsize = TCBHASHSIZE;
 	TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
 	if (hashsize == 0) {
 		/*
 		 * Auto tune the hash size based on maxsockets.
 		 * A perfect hash would have a 1:1 mapping
 		 * (hashsize = maxsockets) however it's been
 		 * suggested that O(2) average is better.
 		 */
 		hashsize = maketcp_hashsize(maxsockets / 4);
 		/*
 		 * Our historical default is 512,
 		 * do not autotune lower than this.
 		 */
 		if (hashsize < 512)
 			hashsize = 512;
 		if (bootverbose && IS_DEFAULT_VNET(curvnet))
 			printf("%s: %s auto tuned to %d\n", __func__,
 			    tcbhash_tuneable, hashsize);
 	}
 	/*
 	 * We require a hashsize to be a power of two.
 	 * Previously if it was not a power of two we would just reset it
 	 * back to 512, which could be a nasty surprise if you did not notice
 	 * the error message.
 	 * Instead what we do is clip it to the closest power of two lower
 	 * than the specified hash value.
 	 */
 	if (!powerof2(hashsize)) {
 		int oldhashsize = hashsize;
 
 		hashsize = maketcp_hashsize(hashsize);
 		/* prevent absurdly low value */
 		if (hashsize < 16)
 			hashsize = 16;
 		printf("%s: WARNING: TCB hash size not a power of 2, "
 		    "clipped from %d to %d.\n", __func__, oldhashsize,
 		    hashsize);
 	}
 	in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
 	    "tcp_inpcb", tcp_inpcb_init, IPI_HASHFIELDS_4TUPLE);
 
 	/*
 	 * These have to be type stable for the benefit of the timers.
 	 */
 	V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(V_tcpcb_zone, maxsockets);
 	uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached");
 
 	tcp_tw_init();
 	syncache_init();
 	tcp_hc_init();
 
 	TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack);
 	V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 
 	tcp_fastopen_init();
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	tcp_reass_global_init();
 
 	/* XXX virtualize those bellow? */
 	tcp_delacktime = TCPTV_DELACK;
 	tcp_keepinit = TCPTV_KEEP_INIT;
 	tcp_keepidle = TCPTV_KEEP_IDLE;
 	tcp_keepintvl = TCPTV_KEEPINTVL;
 	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
 	tcp_msl = TCPTV_MSL;
 	tcp_rexmit_initial = TCPTV_RTOBASE;
 	if (tcp_rexmit_initial < 1)
 		tcp_rexmit_initial = 1;
 	tcp_rexmit_min = TCPTV_MIN;
 	if (tcp_rexmit_min < 1)
 		tcp_rexmit_min = 1;
 	tcp_persmin = TCPTV_PERSMIN;
 	tcp_persmax = TCPTV_PERSMAX;
 	tcp_rexmit_slop = TCPTV_CPU_VAR;
 	tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
 	tcp_tcbhashsize = hashsize;
 
 	/* Setup the tcp function block list */
 	TAILQ_INIT(&t_functions);
 	rw_init(&tcp_function_lock, "tcp_func_lock");
 	register_tcp_functions(&tcp_def_funcblk, M_WAITOK);
 #ifdef TCP_BLACKBOX
 	/* Initialize the TCP logging data. */
 	tcp_log_init();
 #endif
 	arc4rand(&V_ts_offset_secret, sizeof(V_ts_offset_secret), 0);
 
 	if (tcp_soreceive_stream) {
 #ifdef INET
 		tcp_usrreqs.pru_soreceive = soreceive_stream;
 #endif
 #ifdef INET6
 		tcp6_usrreqs.pru_soreceive = soreceive_stream;
 #endif /* INET6 */
 	}
 
 #ifdef INET6
 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
 #else /* INET6 */
 #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
 #endif /* INET6 */
 	if (max_protohdr < TCP_MINPROTOHDR)
 		max_protohdr = TCP_MINPROTOHDR;
 	if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
 		panic("tcp_init");
 #undef TCP_MINPROTOHDR
 
 	ISN_LOCK_INIT();
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
 	EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
 		EVENTHANDLER_PRI_ANY);
 
 	tcp_inp_lro_direct_queue = counter_u64_alloc(M_WAITOK);
 	tcp_inp_lro_wokeup_queue = counter_u64_alloc(M_WAITOK);
 	tcp_inp_lro_compressed = counter_u64_alloc(M_WAITOK);
 	tcp_inp_lro_locks_taken = counter_u64_alloc(M_WAITOK);
 	tcp_extra_mbuf = counter_u64_alloc(M_WAITOK);
 	tcp_would_have_but = counter_u64_alloc(M_WAITOK);
 	tcp_comp_total = counter_u64_alloc(M_WAITOK);
 	tcp_uncomp_total = counter_u64_alloc(M_WAITOK);
 #ifdef TCPPCAP
 	tcp_pcap_init();
 #endif
 }
 
 #ifdef VIMAGE
 static void
 tcp_destroy(void *unused __unused)
 {
 	int n;
 #ifdef TCP_HHOOK
 	int error;
 #endif
 
 	/*
 	 * All our processes are gone, all our sockets should be cleaned
 	 * up, which means, we should be past the tcp_discardcb() calls.
 	 * Sleep to let all tcpcb timers really disappear and cleanup.
 	 */
 	for (;;) {
 		INP_LIST_RLOCK(&V_tcbinfo);
 		n = V_tcbinfo.ipi_count;
 		INP_LIST_RUNLOCK(&V_tcbinfo);
 		if (n == 0)
 			break;
 		pause("tcpdes", hz / 10);
 	}
 	tcp_hc_destroy();
 	syncache_destroy();
 	tcp_tw_destroy();
 	in_pcbinfo_destroy(&V_tcbinfo);
 	/* tcp_discardcb() clears the sack_holes up. */
 	uma_zdestroy(V_sack_hole_zone);
 	uma_zdestroy(V_tcpcb_zone);
 
 	/*
 	 * Cannot free the zone until all tcpcbs are released as we attach
 	 * the allocations to them.
 	 */
 	tcp_fastopen_destroy();
 
 #ifdef TCP_HHOOK
 	error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_IN]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister helper hook "
 		    "type=%d, id=%d: error %d returned\n", __func__,
 		    HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, error);
 	}
 	error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_OUT]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister helper hook "
 		    "type=%d, id=%d: error %d returned\n", __func__,
 		    HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, error);
 	}
 #endif
 }
 VNET_SYSUNINIT(tcp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, tcp_destroy, NULL);
 #endif
 
 void
 tcp_fini(void *xtp)
 {
 
 }
 
 /*
  * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb.
  * tcp_template used to store this data in mbufs, but we now recopy it out
  * of the tcpcb each time to conserve mbufs.
  */
 void
 tcpip_fillheaders(struct inpcb *inp, uint16_t port, void *ip_ptr, void *tcp_ptr)
 {
 	struct tcphdr *th = (struct tcphdr *)tcp_ptr;
 
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef INET6
 	if ((inp->inp_vflag & INP_IPV6) != 0) {
 		struct ip6_hdr *ip6;
 
 		ip6 = (struct ip6_hdr *)ip_ptr;
 		ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
 			(inp->inp_flow & IPV6_FLOWINFO_MASK);
 		ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
 			(IPV6_VERSION & IPV6_VERSION_MASK);
 		if (port == 0)
 			ip6->ip6_nxt = IPPROTO_TCP;
 		else
 			ip6->ip6_nxt = IPPROTO_UDP;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_src = inp->in6p_laddr;
 		ip6->ip6_dst = inp->in6p_faddr;
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		struct ip *ip;
 
 		ip = (struct ip *)ip_ptr;
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = 5;
 		ip->ip_tos = inp->inp_ip_tos;
 		ip->ip_len = 0;
 		ip->ip_id = 0;
 		ip->ip_off = 0;
 		ip->ip_ttl = inp->inp_ip_ttl;
 		ip->ip_sum = 0;
 		if (port == 0)
 			ip->ip_p = IPPROTO_TCP;
 		else
 			ip->ip_p = IPPROTO_UDP;
 		ip->ip_src = inp->inp_laddr;
 		ip->ip_dst = inp->inp_faddr;
 	}
 #endif /* INET */
 	th->th_sport = inp->inp_lport;
 	th->th_dport = inp->inp_fport;
 	th->th_seq = 0;
 	th->th_ack = 0;
 	th->th_x2 = 0;
 	th->th_off = 5;
 	th->th_flags = 0;
 	th->th_win = 0;
 	th->th_urp = 0;
 	th->th_sum = 0;		/* in_pseudo() is called later for ipv4 */
 }
 
 /*
  * Create template to be used to send tcp packets on a connection.
  * Allocates an mbuf and fills in a skeletal tcp/ip header.  The only
  * use for this function is in keepalives, which use tcp_respond.
  */
 struct tcptemp *
 tcpip_maketemplate(struct inpcb *inp)
 {
 	struct tcptemp *t;
 
 	t = malloc(sizeof(*t), M_TEMP, M_NOWAIT);
 	if (t == NULL)
 		return (NULL);
 	tcpip_fillheaders(inp, 0, (void *)&t->tt_ipgen, (void *)&t->tt_t);
 	return (t);
 }
 
 /*
  * Send a single message to the TCP at address specified by
  * the given TCP/IP header.  If m == NULL, then we make a copy
  * of the tcpiphdr at th and send directly to the addressed host.
  * This is used to force keep alive messages out using the TCP
  * template for a connection.  If flags are given then we send
  * a message back to the TCP which originated the segment th,
  * and discard the mbuf containing it and any other attached mbufs.
  *
  * In any case the ack and sequence number of the transmitted
  * segment are as specified by the parameters.
  *
  * NOTE: If m != NULL, then th must point to *inside* the mbuf.
  */
 void
 tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
     tcp_seq ack, tcp_seq seq, int flags)
 {
 	struct tcpopt to;
 	struct inpcb *inp;
 	struct ip *ip;
 	struct mbuf *optm;
 	struct udphdr *uh = NULL;
 	struct tcphdr *nth;
 	u_char *optp;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	int isipv6;
 #endif /* INET6 */
 	int optlen, tlen, win, ulen;
 	bool incl_opts;
 	uint16_t port;
 
 	KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
 	NET_EPOCH_ASSERT();
 
 #ifdef INET6
 	isipv6 = ((struct ip *)ipgen)->ip_v == (IPV6_VERSION >> 4);
 	ip6 = ipgen;
 #endif /* INET6 */
 	ip = ipgen;
 
 	if (tp != NULL) {
 		inp = tp->t_inpcb;
 		KASSERT(inp != NULL, ("tcp control block w/o inpcb"));
 		INP_LOCK_ASSERT(inp);
 	} else
 		inp = NULL;
 
 	if (m != NULL) {
 #ifdef INET6
 		if (isipv6 && ip6 && (ip6->ip6_nxt == IPPROTO_UDP))
 			port = m->m_pkthdr.tcp_tun_port;
 		else
 #endif
 		if (ip && (ip->ip_p == IPPROTO_UDP))
 			port = m->m_pkthdr.tcp_tun_port;
 		else
 			port = 0;
 	} else
 		port = tp->t_port;
 
 	incl_opts = false;
 	win = 0;
 	if (tp != NULL) {
 		if (!(flags & TH_RST)) {
 			win = sbspace(&inp->inp_socket->so_rcv);
 			if (win > TCP_MAXWIN << tp->rcv_scale)
 				win = TCP_MAXWIN << tp->rcv_scale;
 		}
 		if ((tp->t_flags & TF_NOOPT) == 0)
 			incl_opts = true;
 	}
 	if (m == NULL) {
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL)
 			return;
 		m->m_data += max_linkhdr;
 #ifdef INET6
 		if (isipv6) {
 			bcopy((caddr_t)ip6, mtod(m, caddr_t),
 			      sizeof(struct ip6_hdr));
 			ip6 = mtod(m, struct ip6_hdr *);
 			nth = (struct tcphdr *)(ip6 + 1);
 			if (port) {
 				/* Insert a UDP header */
 				uh = (struct udphdr *)nth;
 				uh->uh_sport = htons(V_tcp_udp_tunneling_port);
 				uh->uh_dport = port;
 				nth = (struct tcphdr *)(uh + 1);
 			}
 		} else
 #endif /* INET6 */
 		{
 			bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
 			ip = mtod(m, struct ip *);
 			nth = (struct tcphdr *)(ip + 1);
 			if (port) {
 				/* Insert a UDP header */
 				uh = (struct udphdr *)nth;
 				uh->uh_sport = htons(V_tcp_udp_tunneling_port);
 				uh->uh_dport = port;
 				nth = (struct tcphdr *)(uh + 1);
 			}
 		}
 		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
 		flags = TH_ACK;
 	} else if ((!M_WRITABLE(m)) || (port != 0)) {
 		struct mbuf *n;
 
 		/* Can't reuse 'm', allocate a new mbuf. */
 		n = m_gethdr(M_NOWAIT, MT_DATA);
 		if (n == NULL) {
 			m_freem(m);
 			return;
 		}
 
 		if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
 			m_freem(m);
 			m_freem(n);
 			return;
 		}
 
 		n->m_data += max_linkhdr;
 		/* m_len is set later */
 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
 #ifdef INET6
 		if (isipv6) {
 			bcopy((caddr_t)ip6, mtod(n, caddr_t),
 			      sizeof(struct ip6_hdr));
 			ip6 = mtod(n, struct ip6_hdr *);
 			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
 			nth = (struct tcphdr *)(ip6 + 1);
 			if (port) {
 				/* Insert a UDP header */
 				uh = (struct udphdr *)nth;
 				uh->uh_sport = htons(V_tcp_udp_tunneling_port);
 				uh->uh_dport = port;
 				nth = (struct tcphdr *)(uh + 1);
 			}
 		} else
 #endif /* INET6 */
 		{
 			bcopy((caddr_t)ip, mtod(n, caddr_t), sizeof(struct ip));
 			ip = mtod(n, struct ip *);
 			xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
 			nth = (struct tcphdr *)(ip + 1);
 			if (port) {
 				/* Insert a UDP header */
 				uh = (struct udphdr *)nth;
 				uh->uh_sport = htons(V_tcp_udp_tunneling_port);
 				uh->uh_dport = port;
 				nth = (struct tcphdr *)(uh + 1);
 			}
 		}
 		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
 		xchg(nth->th_dport, nth->th_sport, uint16_t);
 		th = nth;
 		m_freem(m);
 		m = n;
 	} else {
 		/*
 		 *  reuse the mbuf.
 		 * XXX MRT We inherit the FIB, which is lucky.
 		 */
 		m_freem(m->m_next);
 		m->m_next = NULL;
 		m->m_data = (caddr_t)ipgen;
 		/* m_len is set later */
 #ifdef INET6
 		if (isipv6) {
 			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 		{
 			xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
 			nth = (struct tcphdr *)(ip + 1);
 		}
 		if (th != nth) {
 			/*
 			 * this is usually a case when an extension header
 			 * exists between the IPv6 header and the
 			 * TCP header.
 			 */
 			nth->th_sport = th->th_sport;
 			nth->th_dport = th->th_dport;
 		}
 		xchg(nth->th_dport, nth->th_sport, uint16_t);
 #undef xchg
 	}
 	tlen = 0;
 #ifdef INET6
 	if (isipv6)
 		tlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 		tlen = sizeof (struct tcpiphdr);
 #endif
 	if (port)
 		tlen += sizeof (struct udphdr);
 #ifdef INVARIANTS
 	m->m_len = 0;
 	KASSERT(M_TRAILINGSPACE(m) >= tlen,
 	    ("Not enough trailing space for message (m=%p, need=%d, have=%ld)",
 	    m, tlen, (long)M_TRAILINGSPACE(m)));
 #endif
 	m->m_len = tlen;
 	to.to_flags = 0;
 	if (incl_opts) {
 		/* Make sure we have room. */
 		if (M_TRAILINGSPACE(m) < TCP_MAXOLEN) {
 			m->m_next = m_get(M_NOWAIT, MT_DATA);
 			if (m->m_next) {
 				optp = mtod(m->m_next, u_char *);
 				optm = m->m_next;
 			} else
 				incl_opts = false;
 		} else {
 			optp = (u_char *) (nth + 1);
 			optm = m;
 		}
 	}
 	if (incl_opts) {
 		/* Timestamps. */
 		if (tp->t_flags & TF_RCVD_TSTMP) {
 			to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
 			to.to_tsecr = tp->ts_recent;
 			to.to_flags |= TOF_TS;
 		}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		/* TCP-MD5 (RFC2385). */
 		if (tp->t_flags & TF_SIGNATURE)
 			to.to_flags |= TOF_SIGNATURE;
 #endif
 		/* Add the options. */
 		tlen += optlen = tcp_addoptions(&to, optp);
 
 		/* Update m_len in the correct mbuf. */
 		optm->m_len += optlen;
 	} else
 		optlen = 0;
 #ifdef INET6
 	if (isipv6) {
 		if (uh) {
 			ulen = tlen - sizeof(struct ip6_hdr);
 			uh->uh_ulen = htons(ulen);
 		}
 		ip6->ip6_flow = 0;
 		ip6->ip6_vfc = IPV6_VERSION;
 		if (port)
 			ip6->ip6_nxt = IPPROTO_UDP;
 		else
 			ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_plen = htons(tlen - sizeof(*ip6));
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		if (uh) {
 			ulen = tlen - sizeof(struct ip);
 			uh->uh_ulen = htons(ulen);
 		}
 		ip->ip_len = htons(tlen);
 		ip->ip_ttl = V_ip_defttl;
 		if (port) {
 			ip->ip_p = IPPROTO_UDP;
 		} else {
 			ip->ip_p = IPPROTO_TCP;
 		}
 		if (V_path_mtu_discovery)
 			ip->ip_off |= htons(IP_DF);
 	}
 #endif
 	m->m_pkthdr.len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 #ifdef MAC
 	if (inp != NULL) {
 		/*
 		 * Packet is associated with a socket, so allow the
 		 * label of the response to reflect the socket label.
 		 */
 		INP_LOCK_ASSERT(inp);
 		mac_inpcb_create_mbuf(inp, m);
 	} else {
 		/*
 		 * Packet is not associated with a socket, so possibly
 		 * update the label in place.
 		 */
 		mac_netinet_tcp_reply(m);
 	}
 #endif
 	nth->th_seq = htonl(seq);
 	nth->th_ack = htonl(ack);
 	nth->th_x2 = 0;
 	nth->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
 	nth->th_flags = flags;
 	if (tp != NULL)
 		nth->th_win = htons((u_short) (win >> tp->rcv_scale));
 	else
 		nth->th_win = htons((u_short)win);
 	nth->th_urp = 0;
 
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	if (to.to_flags & TOF_SIGNATURE) {
 		if (!TCPMD5_ENABLED() ||
 		    TCPMD5_OUTPUT(m, nth, to.to_signature) != 0) {
 			m_freem(m);
 			return;
 		}
 	}
 #endif
 
 #ifdef INET6
 	if (isipv6) {
 		if (port) {
 			m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 			uh->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
 			nth->th_sum = 0;
 		} else {
 			m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 			nth->th_sum = in6_cksum_pseudo(ip6,
 			    tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0);
 		}
 		ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb :
 		    NULL, NULL);
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		if (port) {
 			uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 			    htons(ulen + IPPROTO_UDP));
 			m->m_pkthdr.csum_flags = CSUM_UDP;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 			nth->th_sum = 0;
 		} else {
 			m->m_pkthdr.csum_flags = CSUM_TCP;
 			m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 			nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 			    htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
 		}
 	}
 #endif /* INET */
 #ifdef TCPDEBUG
 	if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
 #endif
 	TCP_PROBE3(debug__output, tp, th, m);
 	if (flags & TH_RST)
 		TCP_PROBE5(accept__refused, NULL, NULL, m, tp, nth);
 
 #ifdef INET6
 	if (isipv6) {
 		TCP_PROBE5(send, NULL, tp, ip6, tp, nth);
 		(void)ip6_output(m, NULL, NULL, 0, NULL, NULL, inp);
 	}
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		TCP_PROBE5(send, NULL, tp, ip, tp, nth);
 		(void)ip_output(m, NULL, NULL, 0, NULL, inp);
 	}
 #endif
 }
 
 /*
  * Create a new TCP control block, making an
  * empty reassembly queue and hooking it to the argument
  * protocol control block.  The `inp' parameter must have
  * come from the zone allocator set up in tcp_init().
  */
 struct tcpcb *
 tcp_newtcpcb(struct inpcb *inp)
 {
 	struct tcpcb_mem *tm;
 	struct tcpcb *tp;
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 
 	tm = uma_zalloc(V_tcpcb_zone, M_NOWAIT | M_ZERO);
 	if (tm == NULL)
 		return (NULL);
 	tp = &tm->tcb;
 
 	/* Initialise cc_var struct for this tcpcb. */
 	tp->ccv = &tm->ccv;
 	tp->ccv->type = IPPROTO_TCP;
 	tp->ccv->ccvc.tcp = tp;
 	rw_rlock(&tcp_function_lock);
 	tp->t_fb = tcp_func_set_ptr;
 	refcount_acquire(&tp->t_fb->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
 	/*
 	 * Use the current system default CC algorithm.
 	 */
 	CC_LIST_RLOCK();
 	KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!"));
 	CC_ALGO(tp) = CC_DEFAULT();
 	CC_LIST_RUNLOCK();
 	/*
 	 * The tcpcb will hold a reference on its inpcb until tcp_discardcb()
 	 * is called.
 	 */
 	in_pcbref(inp);	/* Reference for tcpcb */
 	tp->t_inpcb = inp;
 
 	if (CC_ALGO(tp)->cb_init != NULL)
 		if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
 			if (tp->t_fb->tfb_tcp_fb_fini)
 				(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
 			in_pcbrele_wlocked(inp);
 			refcount_release(&tp->t_fb->tfb_refcnt);
 			uma_zfree(V_tcpcb_zone, tm);
 			return (NULL);
 		}
 
 #ifdef TCP_HHOOK
 	tp->osd = &tm->osd;
 	if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) {
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
 		in_pcbrele_wlocked(inp);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		uma_zfree(V_tcpcb_zone, tm);
 		return (NULL);
 	}
 #endif
 
 #ifdef VIMAGE
 	tp->t_vnet = inp->inp_vnet;
 #endif
 	tp->t_timers = &tm->tt;
 	TAILQ_INIT(&tp->t_segq);
 	tp->t_maxseg =
 #ifdef INET6
 		isipv6 ? V_tcp_v6mssdflt :
 #endif /* INET6 */
 		V_tcp_mssdflt;
 
 	/* Set up our timeouts. */
 	callout_init(&tp->t_timers->tt_rexmt, 1);
 	callout_init(&tp->t_timers->tt_persist, 1);
 	callout_init(&tp->t_timers->tt_keep, 1);
 	callout_init(&tp->t_timers->tt_2msl, 1);
 	callout_init(&tp->t_timers->tt_delack, 1);
 
 	if (V_tcp_do_rfc1323)
 		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
 	if (V_tcp_do_sack)
 		tp->t_flags |= TF_SACK_PERMIT;
 	TAILQ_INIT(&tp->snd_holes);
 
 	/*
 	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
 	 * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
 	 * reasonable initial retransmit time.
 	 */
 	tp->t_srtt = TCPTV_SRTTBASE;
 	tp->t_rttvar = ((tcp_rexmit_initial - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
 	tp->t_rttmin = tcp_rexmit_min;
 	tp->t_rxtcur = tcp_rexmit_initial;
 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->t_rcvtime = ticks;
 	/*
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = V_ip_defttl;
 	inp->inp_ppcb = tp;
 #ifdef TCPPCAP
 	/*
 	 * Init the TCP PCAP queues.
 	 */
 	tcp_pcap_tcpcb_init(tp);
 #endif
 #ifdef TCP_BLACKBOX
 	/* Initialize the per-TCPCB log data. */
 	tcp_log_tcpcbinit(tp);
 #endif
 	tp->t_pacing_rate = -1;
 	if (tp->t_fb->tfb_tcp_fb_init) {
 		if ((*tp->t_fb->tfb_tcp_fb_init)(tp)) {
 			refcount_release(&tp->t_fb->tfb_refcnt);
 			in_pcbrele_wlocked(inp);
 			uma_zfree(V_tcpcb_zone, tm);
 			return (NULL);
 		}
 	}
 #ifdef STATS
 	if (V_tcp_perconn_stats_enable == 1)
 		tp->t_stats = stats_blob_alloc(V_tcp_perconn_stats_dflt_tpl, 0);
 #endif
 	if (V_tcp_do_lrd)
 		tp->t_flags |= TF_LRD;
 	return (tp);		/* XXX */
 }
 
 /*
  * Switch the congestion control algorithm back to NewReno for any active
  * control blocks using an algorithm which is about to go away.
  * This ensures the CC framework can allow the unload to proceed without leaving
  * any dangling pointers which would trigger a panic.
  * Returning non-zero would inform the CC framework that something went wrong
  * and it would be unsafe to allow the unload to proceed. However, there is no
  * way for this to occur with this implementation so we always return zero.
  */
 int
 tcp_ccalgounload(struct cc_algo *unload_algo)
 {
 	struct cc_algo *tmpalgo;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	/*
 	 * Check all active control blocks across all network stacks and change
 	 * any that are using "unload_algo" back to NewReno. If "unload_algo"
 	 * requires cleanup code to be run, call it.
 	 */
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		INP_INFO_WLOCK(&V_tcbinfo);
 		/*
 		 * New connections already part way through being initialised
 		 * with the CC algo we're removing will not race with this code
 		 * because the INP_INFO_WLOCK is held during initialisation. We
 		 * therefore don't enter the loop below until the connection
 		 * list has stabilised.
 		 */
 		CK_LIST_FOREACH(inp, &V_tcb, inp_list) {
 			INP_WLOCK(inp);
 			/* Important to skip tcptw structs. */
 			if (!(inp->inp_flags & INP_TIMEWAIT) &&
 			    (tp = intotcpcb(inp)) != NULL) {
 				/*
 				 * By holding INP_WLOCK here, we are assured
 				 * that the connection is not currently
 				 * executing inside the CC module's functions
 				 * i.e. it is safe to make the switch back to
 				 * NewReno.
 				 */
 				if (CC_ALGO(tp) == unload_algo) {
 					tmpalgo = CC_ALGO(tp);
 					if (tmpalgo->cb_destroy != NULL)
 						tmpalgo->cb_destroy(tp->ccv);
 					CC_DATA(tp) = NULL;
 					/*
 					 * NewReno may allocate memory on
 					 * demand for certain stateful
 					 * configuration as needed, but is
 					 * coded to never fail on memory
 					 * allocation failure so it is a safe
 					 * fallback.
 					 */
 					CC_ALGO(tp) = &newreno_cc_algo;
 				}
 			}
 			INP_WUNLOCK(inp);
 		}
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK();
 
 	return (0);
 }
 
 /*
  * Drop a TCP connection, reporting
  * the specified error.  If connection is synchronized,
  * then send a RST to peer.
  */
 struct tcpcb *
 tcp_drop(struct tcpcb *tp, int errno)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
 
 	NET_EPOCH_ASSERT();
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (TCPS_HAVERCVDSYN(tp->t_state)) {
 		tcp_state_change(tp, TCPS_CLOSED);
 		(void) tp->t_fb->tfb_tcp_output(tp);
 		TCPSTAT_INC(tcps_drops);
 	} else
 		TCPSTAT_INC(tcps_conndrops);
 	if (errno == ETIMEDOUT && tp->t_softerror)
 		errno = tp->t_softerror;
 	so->so_error = errno;
 	return (tcp_close(tp));
 }
 
 void
 tcp_discardcb(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 	int released __unused;
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Make sure that all of our timers are stopped before we delete the
 	 * PCB.
 	 *
 	 * If stopping a timer fails, we schedule a discard function in same
 	 * callout, and the last discard function called will take care of
 	 * deleting the tcpcb.
 	 */
 	tp->t_timers->tt_draincnt = 0;
 	tcp_timer_stop(tp, TT_REXMT);
 	tcp_timer_stop(tp, TT_PERSIST);
 	tcp_timer_stop(tp, TT_KEEP);
 	tcp_timer_stop(tp, TT_2MSL);
 	tcp_timer_stop(tp, TT_DELACK);
 	if (tp->t_fb->tfb_tcp_timer_stop_all) {
 		/*
 		 * Call the stop-all function of the methods,
 		 * this function should call the tcp_timer_stop()
 		 * method with each of the function specific timeouts.
 		 * That stop will be called via the tfb_tcp_timer_stop()
 		 * which should use the async drain function of the
 		 * callout system (see tcp_var.h).
 		 */
 		tp->t_fb->tfb_tcp_timer_stop_all(tp);
 	}
 
 	/* free the reassembly queue, if any */
 	tcp_reass_flush(tp);
 
 #ifdef TCP_OFFLOAD
 	/* Disconnect offload device, if any. */
 	if (tp->t_flags & TF_TOE)
 		tcp_offload_detach(tp);
 #endif
 
 	tcp_free_sackholes(tp);
 
 #ifdef TCPPCAP
 	/* Free the TCP PCAP queues. */
 	tcp_pcap_drain(&(tp->t_inpkts));
 	tcp_pcap_drain(&(tp->t_outpkts));
 #endif
 
 	/* Allow the CC algorithm to clean up after itself. */
 	if (CC_ALGO(tp)->cb_destroy != NULL)
 		CC_ALGO(tp)->cb_destroy(tp->ccv);
 	CC_DATA(tp) = NULL;
 
 #ifdef TCP_HHOOK
 	khelp_destroy_osd(tp->osd);
 #endif
 #ifdef STATS
 	stats_blob_destroy(tp->t_stats);
 #endif
 
 	CC_ALGO(tp) = NULL;
 	inp->inp_ppcb = NULL;
 	if (tp->t_timers->tt_draincnt == 0) {
 		/* We own the last reference on tcpcb, let's free it. */
 #ifdef TCP_BLACKBOX
 		tcp_log_tcpcbfini(tp);
 #endif
 		TCPSTATES_DEC(tp->t_state);
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
 
 		/*
 		 * If we got enough samples through the srtt filter,
 		 * save the rtt and rttvar in the routing entry.
 		 * 'Enough' is arbitrarily defined as 4 rtt samples.
 		 * 4 samples is enough for the srtt filter to converge
 		 * to within enough % of the correct value; fewer samples
 		 * and we could save a bogus rtt. The danger is not high
 		 * as tcp quickly recovers from everything.
 		 * XXX: Works very well but needs some more statistics!
 		 *
 		 * XXXRRS: Updating must be after the stack fini() since
 		 * that may be converting some internal representation of
 		 * say srtt etc into the general one used by other stacks.
 		 * Lets also at least protect against the so being NULL
 		 * as RW stated below.
 		 */
 		if ((tp->t_rttupdated >= 4) && (so != NULL)) {
 			struct hc_metrics_lite metrics;
 			uint32_t ssthresh;
 
 			bzero(&metrics, sizeof(metrics));
 			/*
 			 * Update the ssthresh always when the conditions below
 			 * are satisfied. This gives us better new start value
 			 * for the congestion avoidance for new connections.
 			 * ssthresh is only set if packet loss occurred on a session.
 			 *
 			 * XXXRW: 'so' may be NULL here, and/or socket buffer may be
 			 * being torn down.  Ideally this code would not use 'so'.
 			 */
 			ssthresh = tp->snd_ssthresh;
 			if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
 				/*
 				 * convert the limit from user data bytes to
 				 * packets then to packet data bytes.
 				 */
 				ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg;
 				if (ssthresh < 2)
 					ssthresh = 2;
 				ssthresh *= (tp->t_maxseg +
 #ifdef INET6
 					     (isipv6 ? sizeof (struct ip6_hdr) +
 					      sizeof (struct tcphdr) :
 #endif
 					      sizeof (struct tcpiphdr)
 #ifdef INET6
 						     )
 #endif
 					);
 			} else
 				ssthresh = 0;
 			metrics.rmx_ssthresh = ssthresh;
 
 			metrics.rmx_rtt = tp->t_srtt;
 			metrics.rmx_rttvar = tp->t_rttvar;
 			metrics.rmx_cwnd = tp->snd_cwnd;
 			metrics.rmx_sendpipe = 0;
 			metrics.rmx_recvpipe = 0;
 
 			tcp_hc_update(&inp->inp_inc, &metrics);
 		}
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		tp->t_inpcb = NULL;
 		uma_zfree(V_tcpcb_zone, tp);
 		released = in_pcbrele_wlocked(inp);
 		KASSERT(!released, ("%s: inp %p should not have been released "
 			"here", __func__, inp));
 	}
 }
 
 void
 tcp_timer_discard(void *ptp)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct epoch_tracker et;
 
 	tp = (struct tcpcb *)ptp;
 	CURVNET_SET(tp->t_vnet);
 	NET_EPOCH_ENTER(et);
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
 		__func__, tp));
 	INP_WLOCK(inp);
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0,
 		("%s: tcpcb has to be stopped here", __func__));
 	tp->t_timers->tt_draincnt--;
 	if (tp->t_timers->tt_draincnt == 0) {
 		/* We own the last reference on this tcpcb, let's free it. */
 #ifdef TCP_BLACKBOX
 		tcp_log_tcpcbfini(tp);
 #endif
 		TCPSTATES_DEC(tp->t_state);
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		tp->t_inpcb = NULL;
 		uma_zfree(V_tcpcb_zone, tp);
 		if (in_pcbrele_wlocked(inp)) {
 			NET_EPOCH_EXIT(et);
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 	INP_WUNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 }
 
 /*
  * Attempt to close a TCP control block, marking it as dropped, and freeing
  * the socket if we hold the only reference.
  */
 struct tcpcb *
 tcp_close(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_state == TCPS_LISTEN)
 		tcp_offload_listen_stop(tp);
 #endif
 	/*
 	 * This releases the TFO pending counter resource for TFO listen
 	 * sockets as well as passively-created TFO sockets that transition
 	 * from SYN_RECEIVED to CLOSED.
 	 */
 	if (tp->t_tfo_pending) {
 		tcp_fastopen_decrement_counter(tp->t_tfo_pending);
 		tp->t_tfo_pending = NULL;
 	}
 	in_pcbdrop(inp);
 	TCPSTAT_INC(tcps_closed);
 	if (tp->t_state != TCPS_CLOSED)
 		tcp_state_change(tp, TCPS_CLOSED);
 	KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
 	so = inp->inp_socket;
 	soisdisconnected(so);
 	if (inp->inp_flags & INP_SOCKREF) {
 		KASSERT(so->so_state & SS_PROTOREF,
 		    ("tcp_close: !SS_PROTOREF"));
 		inp->inp_flags &= ~INP_SOCKREF;
 		INP_WUNLOCK(inp);
 		SOCK_LOCK(so);
 		so->so_state &= ~SS_PROTOREF;
 		sofree(so);
 		return (NULL);
 	}
 	return (tp);
 }
 
 void
 tcp_drain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	if (!do_tcpdrain)
 		return;
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		struct inpcb *inpb;
 		struct tcpcb *tcpb;
 
 	/*
 	 * Walk the tcpbs, if existing, and flush the reassembly queue,
 	 * if there is one...
 	 * XXX: The "Net/3" implementation doesn't imply that the TCP
 	 *      reassembly queue should be flushed, but in a situation
 	 *	where we're really low on mbufs, this is potentially
 	 *	useful.
 	 */
 		INP_INFO_WLOCK(&V_tcbinfo);
 		CK_LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
 			INP_WLOCK(inpb);
 			if (inpb->inp_flags & INP_TIMEWAIT) {
 				INP_WUNLOCK(inpb);
 				continue;
 			}
 			if ((tcpb = intotcpcb(inpb)) != NULL) {
 				tcp_reass_flush(tcpb);
 				tcp_clean_sackreport(tcpb);
 #ifdef TCP_BLACKBOX
 				tcp_log_drain(tcpb);
 #endif
 #ifdef TCPPCAP
 				if (tcp_pcap_aggressive_free) {
 					/* Free the TCP PCAP queues. */
 					tcp_pcap_drain(&(tcpb->t_inpkts));
 					tcp_pcap_drain(&(tcpb->t_outpkts));
 				}
 #endif
 			}
 			INP_WUNLOCK(inpb);
 		}
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * Notify a tcp user of an asynchronous error;
  * store error as soft error, but wake up user
  * (for now, won't do anything until can select for soft error).
  *
  * Do not wake up user since there currently is no mechanism for
  * reporting soft errors (yet - a kqueue filter may be added).
  */
 static struct inpcb *
 tcp_notify(struct inpcb *inp, int error)
 {
 	struct tcpcb *tp;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	KASSERT(tp != NULL, ("tcp_notify: tp == NULL"));
 
 	/*
 	 * Ignore some errors if we are hooked up.
 	 * If connection hasn't completed, has retransmitted several times,
 	 * and receives a second error, give up now.  This is better
 	 * than waiting a long time to establish a connection that
 	 * can never complete.
 	 */
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    (error == EHOSTUNREACH || error == ENETUNREACH ||
 	     error == EHOSTDOWN)) {
 		if (inp->inp_route.ro_nh) {
 			NH_FREE(inp->inp_route.ro_nh);
 			inp->inp_route.ro_nh = (struct nhop_object *)NULL;
 		}
 		return (inp);
 	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
 	    tp->t_softerror) {
 		tp = tcp_drop(tp, error);
 		if (tp != NULL)
 			return (inp);
 		else
 			return (NULL);
 	} else {
 		tp->t_softerror = error;
 		return (inp);
 	}
 #if 0
 	wakeup( &so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
 #endif
 }
 
 static int
 tcp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	struct xinpgen xig;
 	int error;
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	if (req->oldptr == NULL) {
 		int n;
 
 		n = V_tcbinfo.ipi_count +
 		    counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
 		return (0);
 	}
 
 	if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
 		return (error);
 
 	bzero(&xig, sizeof(xig));
 	xig.xig_len = sizeof xig;
 	xig.xig_count = V_tcbinfo.ipi_count +
 	    counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
 	xig.xig_gen = V_tcbinfo.ipi_gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	error = syncache_pcblist(req);
 	if (error)
 		return (error);
 
 	NET_EPOCH_ENTER(et);
 	for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead);
 	    inp != NULL;
 	    inp = CK_LIST_NEXT(inp, inp_list)) {
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= xig.xig_gen) {
 			int crerr;
 
 			/*
 			 * XXX: This use of cr_cansee(), introduced with
 			 * TCP state changes, is not quite right, but for
 			 * now, better than nothing.
 			 */
 			if (inp->inp_flags & INP_TIMEWAIT) {
 				if (intotw(inp) != NULL)
 					crerr = cr_cansee(req->td->td_ucred,
 					    intotw(inp)->tw_cred);
 				else
 					crerr = EINVAL;	/* Skip this inp. */
 			} else
 				crerr = cr_canseeinpcb(req->td->td_ucred, inp);
 			if (crerr == 0) {
 				struct xtcpcb xt;
 
 				tcp_inptoxtp(inp, &xt);
 				INP_RUNLOCK(inp);
 				error = SYSCTL_OUT(req, &xt, sizeof xt);
 				if (error)
 					break;
 				else
 					continue;
 			}
 		}
 		INP_RUNLOCK(inp);
 	}
 	NET_EPOCH_EXIT(et);
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.
 		 * If the generation differs from what we told
 		 * her before, she knows that something happened
 		 * while we were processing this request, and it
 		 * might be necessary to retry.
 		 */
 		xig.xig_gen = V_tcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_tcbinfo.ipi_count +
 		    counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
     NULL, 0, tcp_pcblist, "S,xtcpcb",
     "List of active TCP connections");
 
 #ifdef INET
 static int
 tcp_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in addrs[2];
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	NET_EPOCH_ENTER(et);
 	inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
 	    addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL);
 	NET_EPOCH_EXIT(et);
 	if (inp != NULL) {
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred,
     CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_NEEDGIANT,
     0, 0, tcp_getcred, "S,xucred",
     "Get the xucred of a TCP connection");
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct epoch_tracker et;
 	struct xucred xuc;
 	struct sockaddr_in6 addrs[2];
 	struct inpcb *inp;
 	int error;
 #ifdef INET
 	int mapped = 0;
 #endif
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
 	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
 		return (error);
 	}
 	if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
 #ifdef INET
 		if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
 			mapped = 1;
 		else
 #endif
 			return (EINVAL);
 	}
 
 	NET_EPOCH_ENTER(et);
 #ifdef INET
 	if (mapped == 1)
 		inp = in_pcblookup(&V_tcbinfo,
 			*(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
 			addrs[1].sin6_port,
 			*(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
 			addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL);
 	else
 #endif
 		inp = in6_pcblookup(&V_tcbinfo,
 			&addrs[1].sin6_addr, addrs[1].sin6_port,
 			&addrs[0].sin6_addr, addrs[0].sin6_port,
 			INPLOOKUP_RLOCKPCB, NULL);
 	NET_EPOCH_EXIT(et);
 	if (inp != NULL) {
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
     CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_NEEDGIANT,
     0, 0, tcp6_getcred, "S,xucred",
     "Get the xucred of a TCP6 connection");
 #endif /* INET6 */
 
 #ifdef INET
 /* Path MTU to try next when a fragmentation-needed message is received. */
 static inline int
 tcp_next_pmtu(const struct icmp *icp, const struct ip *ip)
 {
 	int mtu = ntohs(icp->icmp_nextmtu);
 
 	/* If no alternative MTU was proposed, try the next smaller one. */
 	if (!mtu)
 		mtu = ip_next_mtu(ntohs(ip->ip_len), 1);
 	if (mtu < V_tcp_minmss + sizeof(struct tcpiphdr))
 		mtu = V_tcp_minmss + sizeof(struct tcpiphdr);
 
 	return (mtu);
 }
 
 static void
 tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
 {
 	struct ip *ip = vip;
 	struct tcphdr *th;
 	struct in_addr faddr;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
 	struct icmp *icp;
 	struct in_conninfo inc;
 	tcp_seq icmp_tcp_seq;
 	int mtu;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
 		return;
 
 	if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc_notify;
 	else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
 		cmd == PRC_UNREACH_PORT || cmd == PRC_UNREACH_PROTOCOL ||
 		cmd == PRC_TIMXCEED_INTRANS) && ip)
 		notify = tcp_drop_syn_sent;
 
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
 	 * XXX: We never get this from ICMP, otherwise it makes an
 	 * excellent DoS attack on machines with many connections.
 	 */
 	else if (cmd == PRC_HOSTDEAD)
 		ip = NULL;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
 
 	if (ip == NULL) {
 		in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
 		return;
 	}
 
 	icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip));
 	th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 	inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src,
 	    th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
 	if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
 		/* signal EHOSTDOWN, as it flushes the cached route */
 		inp = (*notify)(inp, EHOSTDOWN);
 		goto out;
 	}
 	icmp_tcp_seq = th->th_seq;
 	if (inp != NULL)  {
 		if (!(inp->inp_flags & INP_TIMEWAIT) &&
 		    !(inp->inp_flags & INP_DROPPED) &&
 		    !(inp->inp_socket == NULL)) {
 			tp = intotcpcb(inp);
 #ifdef TCP_OFFLOAD
 			if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) {
 				/*
 				 * MTU discovery for offloaded connections.  Let
 				 * the TOE driver verify seq# and process it.
 				 */
 				mtu = tcp_next_pmtu(icp, ip);
 				tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu);
 				goto out;
 			}
 #endif
 			if (tp->t_port != port) {
 				goto out;
 			}
 			if (SEQ_GEQ(ntohl(icmp_tcp_seq), tp->snd_una) &&
 			    SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) {
 				if (cmd == PRC_MSGSIZE) {
 					/*
 					 * MTU discovery: we got a needfrag and
 					 * will potentially try a lower MTU.
 					 */
 					mtu = tcp_next_pmtu(icp, ip);
 
 					/*
 					 * Only process the offered MTU if it
 					 * is smaller than the current one.
 					 */
 					if (mtu < tp->t_maxseg +
 					    sizeof(struct tcpiphdr)) {
 						bzero(&inc, sizeof(inc));
 						inc.inc_faddr = faddr;
 						inc.inc_fibnum =
 						    inp->inp_inc.inc_fibnum;
 						tcp_hc_updatemtu(&inc, mtu);
 						tcp_mtudisc(inp, mtu);
 					}
 				} else
 					inp = (*notify)(inp,
 					    inetctlerrmap[cmd]);
 			}
 		}
 	} else {
 		bzero(&inc, sizeof(inc));
 		inc.inc_fport = th->th_dport;
 		inc.inc_lport = th->th_sport;
 		inc.inc_faddr = faddr;
 		inc.inc_laddr = ip->ip_src;
 		syncache_unreach(&inc, icmp_tcp_seq, port);
 	}
 out:
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 }
 
 void
 tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	tcp_ctlinput_with_port(cmd, sa, vip, htons(0));
 }
 
 void
 tcp_ctlinput_viaudp(int cmd, struct sockaddr *sa, void *vip, void *unused)
 {
 	/* Its a tunneled TCP over UDP icmp */
 	struct ip *outer_ip, *inner_ip;
 	struct icmp *icmp;
 	struct udphdr *udp;
 	struct tcphdr *th, ttemp;
 	int i_hlen, o_len;
 	uint16_t port;
 
 	inner_ip = (struct ip *)vip;
 	icmp = (struct icmp *)((caddr_t)inner_ip -
 	    (sizeof(struct icmp) - sizeof(struct ip)));
 	outer_ip = (struct ip *)((caddr_t)icmp - sizeof(struct ip));
 	i_hlen = inner_ip->ip_hl << 2;
 	o_len = ntohs(outer_ip->ip_len);
 	if (o_len <
 	    (sizeof(struct ip) + 8 + i_hlen + sizeof(struct udphdr) + offsetof(struct tcphdr, th_ack))) {
 		/* Not enough data present */
 		return;
 	}
 	/* Ok lets strip out the inner udphdr header by copying up on top of it the tcp hdr */
 	udp = (struct udphdr *)(((caddr_t)inner_ip) + i_hlen);
 	if (ntohs(udp->uh_sport) != V_tcp_udp_tunneling_port) {
 		return;
 	}
 	port = udp->uh_dport;
 	th = (struct tcphdr *)(udp + 1);
 	memcpy(&ttemp, th, sizeof(struct tcphdr));
 	memcpy(udp, &ttemp, sizeof(struct tcphdr));
 	/* Now adjust down the size of the outer IP header */
 	o_len -= sizeof(struct udphdr);
 	outer_ip->ip_len = htons(o_len);
 	/* Now call in to the normal handling code */
 	tcp_ctlinput_with_port(cmd, sa, vip, port);
 }
 #endif /* INET */
 
 #ifdef INET6
 static inline int
 tcp6_next_pmtu(const struct icmp6_hdr *icmp6)
 {
 	int mtu = ntohl(icmp6->icmp6_mtu);
 
 	/*
 	 * If no alternative MTU was proposed, or the proposed MTU was too
 	 * small, set to the min.
 	 */
 	if (mtu < IPV6_MMTU)
 		mtu = IPV6_MMTU - 8;	/* XXXNP: what is the adjustment for? */
 	return (mtu);
 }
 
 static void
 tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port)
 {
 	struct in6_addr *dst;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct icmp6_hdr *icmp6;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
 	struct in_conninfo inc;
 	struct tcp_ports {
 		uint16_t th_sport;
 		uint16_t th_dport;
 	} t_ports;
 	tcp_seq icmp_tcp_seq;
 	unsigned int mtu;
 	unsigned int off;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		icmp6 = ip6cp->ip6c_icmp6;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 		sa6_src = ip6cp->ip6c_src;
 		dst = ip6cp->ip6c_finaldst;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		off = 0;	/* fool gcc */
 		sa6_src = &sa6_any;
 		dst = NULL;
 	}
 
 	if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc_notify;
 	else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
 		cmd == PRC_UNREACH_PORT || cmd == PRC_UNREACH_PROTOCOL ||
 		cmd == PRC_TIMXCEED_INTRANS) && ip6 != NULL)
 		notify = tcp_drop_syn_sent;
 
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
 	 * XXX: We never get this from ICMP, otherwise it makes an
 	 * excellent DoS attack on machines with many connections.
 	 */
 	else if (cmd == PRC_HOSTDEAD)
 		ip6 = NULL;
 	else if ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)
 		return;
 
 	if (ip6 == NULL) {
 		in6_pcbnotify(&V_tcbinfo, sa, 0,
 			      (const struct sockaddr *)sa6_src,
 			      0, cmd, NULL, notify);
 		return;
 	}
 
 	/* Check if we can safely get the ports from the tcp hdr */
 	if (m == NULL ||
 	    (m->m_pkthdr.len <
 		(int32_t) (off + sizeof(struct tcp_ports)))) {
 		return;
 	}
 	bzero(&t_ports, sizeof(struct tcp_ports));
 	m_copydata(m, off, sizeof(struct tcp_ports), (caddr_t)&t_ports);
 	inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, t_ports.th_dport,
 	    &ip6->ip6_src, t_ports.th_sport, INPLOOKUP_WLOCKPCB, NULL);
 	if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
 		/* signal EHOSTDOWN, as it flushes the cached route */
 		inp = (*notify)(inp, EHOSTDOWN);
 		goto out;
 	}
 	off += sizeof(struct tcp_ports);
 	if (m->m_pkthdr.len < (int32_t) (off + sizeof(tcp_seq))) {
 		goto out;
 	}
 	m_copydata(m, off, sizeof(tcp_seq), (caddr_t)&icmp_tcp_seq);
 	if (inp != NULL)  {
 		if (!(inp->inp_flags & INP_TIMEWAIT) &&
 		    !(inp->inp_flags & INP_DROPPED) &&
 		    !(inp->inp_socket == NULL)) {
 			tp = intotcpcb(inp);
 #ifdef TCP_OFFLOAD
 			if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) {
 				/* MTU discovery for offloaded connections. */
 				mtu = tcp6_next_pmtu(icmp6);
 				tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu);
 				goto out;
 			}
 #endif
 			if (tp->t_port != port) {
 				goto out;
 			}
 			if (SEQ_GEQ(ntohl(icmp_tcp_seq), tp->snd_una) &&
 			    SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) {
 				if (cmd == PRC_MSGSIZE) {
 					/*
 					 * MTU discovery:
 					 * If we got a needfrag set the MTU
 					 * in the route to the suggested new
 					 * value (if given) and then notify.
 					 */
 					mtu = tcp6_next_pmtu(icmp6);
 
 					bzero(&inc, sizeof(inc));
 					inc.inc_fibnum = M_GETFIB(m);
 					inc.inc_flags |= INC_ISIPV6;
 					inc.inc6_faddr = *dst;
 					if (in6_setscope(&inc.inc6_faddr,
 						m->m_pkthdr.rcvif, NULL))
 						goto out;
 					/*
 					 * Only process the offered MTU if it
 					 * is smaller than the current one.
 					 */
 					if (mtu < tp->t_maxseg +
 					    sizeof (struct tcphdr) +
 					    sizeof (struct ip6_hdr)) {
 						tcp_hc_updatemtu(&inc, mtu);
 						tcp_mtudisc(inp, mtu);
 						ICMP6STAT_INC(icp6s_pmtuchg);
 					}
 				} else
 					inp = (*notify)(inp,
 					    inet6ctlerrmap[cmd]);
 			}
 		}
 	} else {
 		bzero(&inc, sizeof(inc));
 		inc.inc_fibnum = M_GETFIB(m);
 		inc.inc_flags |= INC_ISIPV6;
 		inc.inc_fport = t_ports.th_dport;
 		inc.inc_lport = t_ports.th_sport;
 		inc.inc6_faddr = *dst;
 		inc.inc6_laddr = ip6->ip6_src;
 		syncache_unreach(&inc, icmp_tcp_seq, port);
 	}
 out:
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 }
 
 void
 tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 	tcp6_ctlinput_with_port(cmd, sa, d, htons(0));
 }
 
 void
 tcp6_ctlinput_viaudp(int cmd, struct sockaddr *sa, void *d, void *unused)
 {
 	struct ip6ctlparam *ip6cp;
 	struct mbuf *m;
 	struct udphdr *udp;
 	uint16_t port;
 
 	ip6cp = (struct ip6ctlparam *)d;
 	m = m_pulldown(ip6cp->ip6c_m, ip6cp->ip6c_off, sizeof(struct udphdr), NULL);
 	if (m == NULL) {
 		return;
 	}
 	udp = mtod(m, struct udphdr *);
 	if (ntohs(udp->uh_sport) != V_tcp_udp_tunneling_port) {
 		return;
 	}
 	port = udp->uh_dport;
 	m_adj(m, sizeof(struct udphdr));
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		ip6cp->ip6c_m->m_pkthdr.len -= sizeof(struct udphdr);
 	}
 	/* Now call in to the normal handling code */
 	tcp6_ctlinput_with_port(cmd, sa, d, port);
 }
 
 #endif /* INET6 */
 
 static uint32_t
 tcp_keyed_hash(struct in_conninfo *inc, u_char *key, u_int len)
 {
 	SIPHASH_CTX ctx;
 	uint32_t hash[2];
 
 	KASSERT(len >= SIPHASH_KEY_LENGTH,
 	    ("%s: keylen %u too short ", __func__, len));
 	SipHash24_Init(&ctx);
 	SipHash_SetKey(&ctx, (uint8_t *)key);
 	SipHash_Update(&ctx, &inc->inc_fport, sizeof(uint16_t));
 	SipHash_Update(&ctx, &inc->inc_lport, sizeof(uint16_t));
 	switch (inc->inc_flags & INC_ISIPV6) {
 #ifdef INET
 	case 0:
 		SipHash_Update(&ctx, &inc->inc_faddr, sizeof(struct in_addr));
 		SipHash_Update(&ctx, &inc->inc_laddr, sizeof(struct in_addr));
 		break;
 #endif
 #ifdef INET6
 	case INC_ISIPV6:
 		SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(struct in6_addr));
 		SipHash_Update(&ctx, &inc->inc6_laddr, sizeof(struct in6_addr));
 		break;
 #endif
 	}
 	SipHash_Final((uint8_t *)hash, &ctx);
 
 	return (hash[0] ^ hash[1]);
 }
 
 uint32_t
 tcp_new_ts_offset(struct in_conninfo *inc)
 {
 	struct in_conninfo inc_store, *local_inc;
 
 	if (!V_tcp_ts_offset_per_conn) {
 		memcpy(&inc_store, inc, sizeof(struct in_conninfo));
 		inc_store.inc_lport = 0;
 		inc_store.inc_fport = 0;
 		local_inc = &inc_store;
 	} else {
 		local_inc = inc;
 	}
 	return (tcp_keyed_hash(local_inc, V_ts_offset_secret,
 	    sizeof(V_ts_offset_secret)));
 }
 
 /*
  * Following is where TCP initial sequence number generation occurs.
  *
  * There are two places where we must use initial sequence numbers:
  * 1.  In SYN-ACK packets.
  * 2.  In SYN packets.
  *
  * All ISNs for SYN-ACK packets are generated by the syncache.  See
  * tcp_syncache.c for details.
  *
  * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling
  * depends on this property.  In addition, these ISNs should be
  * unguessable so as to prevent connection hijacking.  To satisfy
  * the requirements of this situation, the algorithm outlined in
  * RFC 1948 is used, with only small modifications.
  *
  * Implementation details:
  *
  * Time is based off the system timer, and is corrected so that it
  * increases by one megabyte per second.  This allows for proper
  * recycling on high speed LANs while still leaving over an hour
  * before rollover.
  *
  * As reading the *exact* system time is too expensive to be done
  * whenever setting up a TCP connection, we increment the time
  * offset in two ways.  First, a small random positive increment
  * is added to isn_offset for each connection that is set up.
  * Second, the function tcp_isn_tick fires once per clock tick
  * and increments isn_offset as necessary so that sequence numbers
  * are incremented at approximately ISN_BYTES_PER_SECOND.  The
  * random positive increments serve only to ensure that the same
  * exact sequence number is never sent out twice (as could otherwise
  * happen when a port is recycled in less than the system tick
  * interval.)
  *
  * net.inet.tcp.isn_reseed_interval controls the number of seconds
  * between seeding of isn_secret.  This is normally set to zero,
  * as reseeding should not be necessary.
  *
  * Locking of the global variables isn_secret, isn_last_reseed, isn_offset,
  * isn_offset_old, and isn_ctx is performed using the ISN lock.  In
  * general, this means holding an exclusive (write) lock.
  */
 
 #define ISN_BYTES_PER_SECOND 1048576
 #define ISN_STATIC_INCREMENT 4096
 #define ISN_RANDOM_INCREMENT (4096 - 1)
 #define ISN_SECRET_LENGTH    SIPHASH_KEY_LENGTH
 
 VNET_DEFINE_STATIC(u_char, isn_secret[ISN_SECRET_LENGTH]);
 VNET_DEFINE_STATIC(int, isn_last);
 VNET_DEFINE_STATIC(int, isn_last_reseed);
 VNET_DEFINE_STATIC(u_int32_t, isn_offset);
 VNET_DEFINE_STATIC(u_int32_t, isn_offset_old);
 
 #define	V_isn_secret			VNET(isn_secret)
 #define	V_isn_last			VNET(isn_last)
 #define	V_isn_last_reseed		VNET(isn_last_reseed)
 #define	V_isn_offset			VNET(isn_offset)
 #define	V_isn_offset_old		VNET(isn_offset_old)
 
 tcp_seq
 tcp_new_isn(struct in_conninfo *inc)
 {
 	tcp_seq new_isn;
 	u_int32_t projected_offset;
 
 	ISN_LOCK();
 	/* Seed if this is the first use, reseed if requested. */
 	if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) &&
 	     (((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz)
 		< (u_int)ticks))) {
 		arc4rand(&V_isn_secret, sizeof(V_isn_secret), 0);
 		V_isn_last_reseed = ticks;
 	}
 
 	/* Compute the hash and return the ISN. */
 	new_isn = (tcp_seq)tcp_keyed_hash(inc, V_isn_secret,
 	    sizeof(V_isn_secret));
 	V_isn_offset += ISN_STATIC_INCREMENT +
 		(arc4random() & ISN_RANDOM_INCREMENT);
 	if (ticks != V_isn_last) {
 		projected_offset = V_isn_offset_old +
 		    ISN_BYTES_PER_SECOND / hz * (ticks - V_isn_last);
 		if (SEQ_GT(projected_offset, V_isn_offset))
 			V_isn_offset = projected_offset;
 		V_isn_offset_old = V_isn_offset;
 		V_isn_last = ticks;
 	}
 	new_isn += V_isn_offset;
 	ISN_UNLOCK();
 	return (new_isn);
 }
 
 /*
  * When a specific ICMP unreachable message is received and the
  * connection state is SYN-SENT, drop the connection.  This behavior
  * is controlled by the icmp_may_rst sysctl.
  */
 struct inpcb *
 tcp_drop_syn_sent(struct inpcb *inp, int errno)
 {
 	struct tcpcb *tp;
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	if (tp->t_state != TCPS_SYN_SENT)
 		return (inp);
 
 	if (IS_FASTOPEN(tp->t_flags))
 		tcp_fastopen_disable_path(tp);
 
 	tp = tcp_drop(tp, errno);
 	if (tp != NULL)
 		return (inp);
 	else
 		return (NULL);
 }
 
 /*
  * When `need fragmentation' ICMP is received, update our idea of the MSS
  * based on the new value. Also nudge TCP to send something, since we
  * know the packet we just sent was dropped.
  * This duplicates some code in the tcp_mss() function in tcp_input.c.
  */
 static struct inpcb *
 tcp_mtudisc_notify(struct inpcb *inp, int error)
 {
 
 	tcp_mtudisc(inp, -1);
 	return (inp);
 }
 
 static void
 tcp_mtudisc(struct inpcb *inp, int mtuoffer)
 {
 	struct tcpcb *tp;
 	struct socket *so;
 
 	INP_WLOCK_ASSERT(inp);
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return;
 
 	tp = intotcpcb(inp);
 	KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL"));
 
 	tcp_mss_update(tp, -1, mtuoffer, NULL, NULL);
 
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_snd);
 	/* If the mss is larger than the socket buffer, decrease the mss. */
 	if (so->so_snd.sb_hiwat < tp->t_maxseg)
 		tp->t_maxseg = so->so_snd.sb_hiwat;
 	SOCKBUF_UNLOCK(&so->so_snd);
 
 	TCPSTAT_INC(tcps_mturesent);
 	tp->t_rtttime = 0;
 	tp->snd_nxt = tp->snd_una;
 	tcp_free_sackholes(tp);
 	tp->snd_recover = tp->snd_max;
 	if (tp->t_flags & TF_SACK_PERMIT)
 		EXIT_FASTRECOVERY(tp->t_flags);
 	if (tp->t_fb->tfb_tcp_mtu_chg != NULL) {
 		/*
 		 * Conceptually the snd_nxt setting
 		 * and freeing sack holes should
 		 * be done by the default stacks
 		 * own tfb_tcp_mtu_chg().
 		 */
 		tp->t_fb->tfb_tcp_mtu_chg(tp);
 	}
 	tp->t_fb->tfb_tcp_output(tp);
 }
 
 #ifdef INET
 /*
  * Look-up the routing entry to the peer of this inpcb.  If no route
  * is found and it cannot be allocated, then return 0.  This routine
  * is called by TCP routines that access the rmx structure and by
  * tcp_mss_update to get the peer/interface MTU.
  */
 uint32_t
 tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
 {
 	struct nhop_object *nh;
 	struct ifnet *ifp;
 	uint32_t maxmtu = 0;
 
 	KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer"));
 
 	if (inc->inc_faddr.s_addr != INADDR_ANY) {
 		nh = fib4_lookup(inc->inc_fibnum, inc->inc_faddr, 0, NHR_NONE, 0);
 		if (nh == NULL)
 			return (0);
 
 		ifp = nh->nh_ifp;
 		maxmtu = nh->nh_mtu;
 
 		/* Report additional interface capabilities. */
 		if (cap != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO4 &&
 			    ifp->if_hwassist & CSUM_TSO) {
 				cap->ifcap |= CSUM_TSO;
 				cap->tsomax = ifp->if_hw_tsomax;
 				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 			}
 		}
 	}
 	return (maxmtu);
 }
 #endif /* INET */
 
 #ifdef INET6
 uint32_t
 tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
 {
 	struct nhop_object *nh;
 	struct in6_addr dst6;
 	uint32_t scopeid;
 	struct ifnet *ifp;
 	uint32_t maxmtu = 0;
 
 	KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer"));
 
 	if (inc->inc_flags & INC_IPV6MINMTU)
 		return (IPV6_MMTU);
 
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
 		in6_splitscope(&inc->inc6_faddr, &dst6, &scopeid);
 		nh = fib6_lookup(inc->inc_fibnum, &dst6, scopeid, NHR_NONE, 0);
 		if (nh == NULL)
 			return (0);
 
 		ifp = nh->nh_ifp;
 		maxmtu = nh->nh_mtu;
 
 		/* Report additional interface capabilities. */
 		if (cap != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO6 &&
 			    ifp->if_hwassist & CSUM_TSO) {
 				cap->ifcap |= CSUM_TSO;
 				cap->tsomax = ifp->if_hw_tsomax;
 				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 			}
 		}
 	}
 
 	return (maxmtu);
 }
 #endif /* INET6 */
 
 /*
  * Calculate effective SMSS per RFC5681 definition for a given TCP
  * connection at its current state, taking into account SACK and etc.
  */
 u_int
 tcp_maxseg(const struct tcpcb *tp)
 {
 	u_int optlen;
 
 	if (tp->t_flags & TF_NOOPT)
 		return (tp->t_maxseg);
 
 	/*
 	 * Here we have a simplified code from tcp_addoptions(),
 	 * without a proper loop, and having most of paddings hardcoded.
 	 * We might make mistakes with padding here in some edge cases,
 	 * but this is harmless, since result of tcp_maxseg() is used
 	 * only in cwnd and ssthresh estimations.
 	 */
 	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
 		if (tp->t_flags & TF_RCVD_TSTMP)
 			optlen = TCPOLEN_TSTAMP_APPA;
 		else
 			optlen = 0;
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (tp->t_flags & TF_SIGNATURE)
 			optlen += PADTCPOLEN(TCPOLEN_SIGNATURE);
 #endif
 		if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) {
 			optlen += TCPOLEN_SACKHDR;
 			optlen += tp->rcv_numsacks * TCPOLEN_SACK;
 			optlen = PADTCPOLEN(optlen);
 		}
 	} else {
 		if (tp->t_flags & TF_REQ_TSTMP)
 			optlen = TCPOLEN_TSTAMP_APPA;
 		else
 			optlen = PADTCPOLEN(TCPOLEN_MAXSEG);
 		if (tp->t_flags & TF_REQ_SCALE)
 			optlen += PADTCPOLEN(TCPOLEN_WINDOW);
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (tp->t_flags & TF_SIGNATURE)
 			optlen += PADTCPOLEN(TCPOLEN_SIGNATURE);
 #endif
 		if (tp->t_flags & TF_SACK_PERMIT)
 			optlen += PADTCPOLEN(TCPOLEN_SACK_PERMITTED);
 	}
 #undef PAD
 	optlen = min(optlen, TCP_MAXOLEN);
 	return (tp->t_maxseg - optlen);
 }
 
 
 u_int
 tcp_fixed_maxseg(const struct tcpcb *tp)
 {
 	int optlen;
 
 	if (tp->t_flags & TF_NOOPT)
 		return (tp->t_maxseg);
 
 	/*
 	 * Here we have a simplified code from tcp_addoptions(),
 	 * without a proper loop, and having most of paddings hardcoded.
 	 * We only consider fixed options that we would send every
 	 * time I.e. SACK is not considered. This is important
 	 * for cc modules to figure out what the modulo of the
 	 * cwnd should be.
 	 */
 #define	PAD(len)	((((len) / 4) + !!((len) % 4)) * 4)
 	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
 		if (tp->t_flags & TF_RCVD_TSTMP)
 			optlen = TCPOLEN_TSTAMP_APPA;
 		else
 			optlen = 0;
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (tp->t_flags & TF_SIGNATURE)
 			optlen += PAD(TCPOLEN_SIGNATURE);
 #endif
 	} else {
 		if (tp->t_flags & TF_REQ_TSTMP)
 			optlen = TCPOLEN_TSTAMP_APPA;
 		else
 			optlen = PAD(TCPOLEN_MAXSEG);
 		if (tp->t_flags & TF_REQ_SCALE)
 			optlen += PAD(TCPOLEN_WINDOW);
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (tp->t_flags & TF_SIGNATURE)
 			optlen += PAD(TCPOLEN_SIGNATURE);
 #endif
 		if (tp->t_flags & TF_SACK_PERMIT)
 			optlen += PAD(TCPOLEN_SACK_PERMITTED);
 	}
 #undef PAD
 	optlen = min(optlen, TCP_MAXOLEN);
 	return (tp->t_maxseg - optlen);
 }
 
 
 
 static int
 sysctl_drop(SYSCTL_HANDLER_ARGS)
 {
 	/* addrs[0] is a foreign socket, addrs[1] is a local one. */
 	struct sockaddr_storage addrs[2];
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct tcptw *tw;
 	struct sockaddr_in *fin, *lin;
 	struct epoch_tracker et;
 #ifdef INET6
 	struct sockaddr_in6 *fin6, *lin6;
 #endif
 	int error;
 
 	inp = NULL;
 	fin = lin = NULL;
 #ifdef INET6
 	fin6 = lin6 = NULL;
 #endif
 	error = 0;
 
 	if (req->oldptr != NULL || req->oldlen != 0)
 		return (EINVAL);
 	if (req->newptr == NULL)
 		return (EPERM);
 	if (req->newlen < sizeof(addrs))
 		return (ENOMEM);
 	error = SYSCTL_IN(req, &addrs, sizeof(addrs));
 	if (error)
 		return (error);
 
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		fin6 = (struct sockaddr_in6 *)&addrs[0];
 		lin6 = (struct sockaddr_in6 *)&addrs[1];
 		if (fin6->sin6_len != sizeof(struct sockaddr_in6) ||
 		    lin6->sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
 		if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) {
 			if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr))
 				return (EINVAL);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]);
 			fin = (struct sockaddr_in *)&addrs[0];
 			lin = (struct sockaddr_in *)&addrs[1];
 			break;
 		}
 		error = sa6_embedscope(fin6, V_ip6_use_defzone);
 		if (error)
 			return (error);
 		error = sa6_embedscope(lin6, V_ip6_use_defzone);
 		if (error)
 			return (error);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		fin = (struct sockaddr_in *)&addrs[0];
 		lin = (struct sockaddr_in *)&addrs[1];
 		if (fin->sin_len != sizeof(struct sockaddr_in) ||
 		    lin->sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 		break;
 #endif
 	default:
 		return (EINVAL);
 	}
 	NET_EPOCH_ENTER(et);
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr,
 		    fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port,
 		    INPLOOKUP_WLOCKPCB, NULL);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port,
 		    lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL);
 		break;
 #endif
 	}
 	if (inp != NULL) {
 		if (inp->inp_flags & INP_TIMEWAIT) {
 			/*
 			 * XXXRW: There currently exists a state where an
 			 * inpcb is present, but its timewait state has been
 			 * discarded.  For now, don't allow dropping of this
 			 * type of inpcb.
 			 */
 			tw = intotw(inp);
 			if (tw != NULL)
 				tcp_twclose(tw, 0);
 			else
 				INP_WUNLOCK(inp);
-		} else if (!(inp->inp_flags & INP_DROPPED) &&
-			   !(inp->inp_socket->so_options & SO_ACCEPTCONN)) {
+		} else if ((inp->inp_flags & INP_DROPPED) == 0 &&
+		    !SOLISTENING(inp->inp_socket)) {
 			tp = intotcpcb(inp);
 			tp = tcp_drop(tp, ECONNABORTED);
 			if (tp != NULL)
 				INP_WUNLOCK(inp);
 		} else
 			INP_WUNLOCK(inp);
 	} else
 		error = ESRCH;
 	NET_EPOCH_EXIT(et);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
     CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP |
     CTLFLAG_NEEDGIANT, NULL, 0, sysctl_drop, "",
     "Drop TCP connection");
 
 #ifdef KERN_TLS
 static int
 sysctl_switch_tls(SYSCTL_HANDLER_ARGS)
 {
 	/* addrs[0] is a foreign socket, addrs[1] is a local one. */
 	struct sockaddr_storage addrs[2];
 	struct inpcb *inp;
 	struct sockaddr_in *fin, *lin;
 	struct epoch_tracker et;
 #ifdef INET6
 	struct sockaddr_in6 *fin6, *lin6;
 #endif
 	int error;
 
 	inp = NULL;
 	fin = lin = NULL;
 #ifdef INET6
 	fin6 = lin6 = NULL;
 #endif
 	error = 0;
 
 	if (req->oldptr != NULL || req->oldlen != 0)
 		return (EINVAL);
 	if (req->newptr == NULL)
 		return (EPERM);
 	if (req->newlen < sizeof(addrs))
 		return (ENOMEM);
 	error = SYSCTL_IN(req, &addrs, sizeof(addrs));
 	if (error)
 		return (error);
 
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		fin6 = (struct sockaddr_in6 *)&addrs[0];
 		lin6 = (struct sockaddr_in6 *)&addrs[1];
 		if (fin6->sin6_len != sizeof(struct sockaddr_in6) ||
 		    lin6->sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
 		if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) {
 			if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr))
 				return (EINVAL);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]);
 			fin = (struct sockaddr_in *)&addrs[0];
 			lin = (struct sockaddr_in *)&addrs[1];
 			break;
 		}
 		error = sa6_embedscope(fin6, V_ip6_use_defzone);
 		if (error)
 			return (error);
 		error = sa6_embedscope(lin6, V_ip6_use_defzone);
 		if (error)
 			return (error);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		fin = (struct sockaddr_in *)&addrs[0];
 		lin = (struct sockaddr_in *)&addrs[1];
 		if (fin->sin_len != sizeof(struct sockaddr_in) ||
 		    lin->sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 		break;
 #endif
 	default:
 		return (EINVAL);
 	}
 	NET_EPOCH_ENTER(et);
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr,
 		    fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port,
 		    INPLOOKUP_WLOCKPCB, NULL);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port,
 		    lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL);
 		break;
 #endif
 	}
 	NET_EPOCH_EXIT(et);
 	if (inp != NULL) {
 		if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) != 0 ||
 		    inp->inp_socket == NULL) {
 			error = ECONNRESET;
 			INP_WUNLOCK(inp);
 		} else {
 			struct socket *so;
 
 			so = inp->inp_socket;
 			soref(so);
 			error = ktls_set_tx_mode(so,
 			    arg2 == 0 ? TCP_TLS_MODE_SW : TCP_TLS_MODE_IFNET);
 			INP_WUNLOCK(inp);
 			SOCK_LOCK(so);
 			sorele(so);
 		}
 	} else
 		error = ESRCH;
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, switch_to_sw_tls,
     CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP |
     CTLFLAG_NEEDGIANT, NULL, 0, sysctl_switch_tls, "",
     "Switch TCP connection to SW TLS");
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, switch_to_ifnet_tls,
     CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP |
     CTLFLAG_NEEDGIANT, NULL, 1, sysctl_switch_tls, "",
     "Switch TCP connection to ifnet TLS");
 #endif
 
 /*
  * Generate a standardized TCP log line for use throughout the
  * tcp subsystem.  Memory allocation is done with M_NOWAIT to
  * allow use in the interrupt context.
  *
  * NB: The caller MUST free(s, M_TCPLOG) the returned string.
  * NB: The function may return NULL if memory allocation failed.
  *
  * Due to header inclusion and ordering limitations the struct ip
  * and ip6_hdr pointers have to be passed as void pointers.
  */
 char *
 tcp_log_vain(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 
 	/* Is logging enabled? */
 	if (V_tcp_log_in_vain == 0)
 		return (NULL);
 
 	return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
 }
 
 char *
 tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 
 	/* Is logging enabled? */
 	if (tcp_log_debug == 0)
 		return (NULL);
 
 	return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
 }
 
 static char *
 tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 	char *s, *sp;
 	size_t size;
 	struct ip *ip;
 #ifdef INET6
 	const struct ip6_hdr *ip6;
 
 	ip6 = (const struct ip6_hdr *)ip6hdr;
 #endif /* INET6 */
 	ip = (struct ip *)ip4hdr;
 
 	/*
 	 * The log line looks like this:
 	 * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2<SYN>"
 	 */
 	size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") +
 	    sizeof(PRINT_TH_FLAGS) + 1 +
 #ifdef INET6
 	    2 * INET6_ADDRSTRLEN;
 #else
 	    2 * INET_ADDRSTRLEN;
 #endif /* INET6 */
 
 	s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT);
 	if (s == NULL)
 		return (NULL);
 
 	strcat(s, "TCP: [");
 	sp = s + strlen(s);
 
 	if (inc && ((inc->inc_flags & INC_ISIPV6) == 0)) {
 		inet_ntoa_r(inc->inc_faddr, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
 		sp = s + strlen(s);
 		inet_ntoa_r(inc->inc_laddr, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
 #ifdef INET6
 	} else if (inc) {
 		ip6_sprintf(sp, &inc->inc6_faddr);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
 		sp = s + strlen(s);
 		ip6_sprintf(sp, &inc->inc6_laddr);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
 	} else if (ip6 && th) {
 		ip6_sprintf(sp, &ip6->ip6_src);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
 		sp = s + strlen(s);
 		ip6_sprintf(sp, &ip6->ip6_dst);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(th->th_dport));
 #endif /* INET6 */
 #ifdef INET
 	} else if (ip && th) {
 		inet_ntoa_r(ip->ip_src, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
 		sp = s + strlen(s);
 		inet_ntoa_r(ip->ip_dst, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(th->th_dport));
 #endif /* INET */
 	} else {
 		free(s, M_TCPLOG);
 		return (NULL);
 	}
 	sp = s + strlen(s);
 	if (th)
 		sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS);
 	if (*(s + size - 1) != '\0')
 		panic("%s: string too long", __func__);
 	return (s);
 }
 
 /*
  * A subroutine which makes it easy to track TCP state changes with DTrace.
  * This function shouldn't be called for t_state initializations that don't
  * correspond to actual TCP state transitions.
  */
 void
 tcp_state_change(struct tcpcb *tp, int newstate)
 {
 #if defined(KDTRACE_HOOKS)
 	int pstate = tp->t_state;
 #endif
 
 	TCPSTATES_DEC(tp->t_state);
 	TCPSTATES_INC(newstate);
 	tp->t_state = newstate;
 	TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate);
 }
 
 /*
  * Create an external-format (``xtcpcb'') structure using the information in
  * the kernel-format tcpcb structure pointed to by tp.  This is done to
  * reduce the spew of irrelevant information over this interface, to isolate
  * user code from changes in the kernel structure, and potentially to provide
  * information-hiding if we decide that some of this information should be
  * hidden from users.
  */
 void
 tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *xt)
 {
 	struct tcpcb *tp = intotcpcb(inp);
 	struct tcptw *tw = intotw(inp);
 	sbintime_t now;
 
 	bzero(xt, sizeof(*xt));
 	if (inp->inp_flags & INP_TIMEWAIT) {
 		xt->t_state = TCPS_TIME_WAIT;
 		xt->xt_encaps_port = tw->t_port;
 	} else {
 		xt->t_state = tp->t_state;
 		xt->t_logstate = tp->t_logstate;
 		xt->t_flags = tp->t_flags;
 		xt->t_sndzerowin = tp->t_sndzerowin;
 		xt->t_sndrexmitpack = tp->t_sndrexmitpack;
 		xt->t_rcvoopack = tp->t_rcvoopack;
 		xt->t_rcv_wnd = tp->rcv_wnd;
 		xt->t_snd_wnd = tp->snd_wnd;
 		xt->t_snd_cwnd = tp->snd_cwnd;
 		xt->t_snd_ssthresh = tp->snd_ssthresh;
 		xt->t_maxseg = tp->t_maxseg;
 		xt->xt_ecn = (tp->t_flags2 & TF2_ECN_PERMIT) ? 1 : 0 +
 			     (tp->t_flags2 & TF2_ACE_PERMIT) ? 2 : 0;
 
 		now = getsbinuptime();
 #define	COPYTIMER(ttt)	do {						\
 		if (callout_active(&tp->t_timers->ttt))			\
 			xt->ttt = (tp->t_timers->ttt.c_time - now) /	\
 			    SBT_1MS;					\
 		else							\
 			xt->ttt = 0;					\
 } while (0)
 		COPYTIMER(tt_delack);
 		COPYTIMER(tt_rexmt);
 		COPYTIMER(tt_persist);
 		COPYTIMER(tt_keep);
 		COPYTIMER(tt_2msl);
 #undef COPYTIMER
 		xt->t_rcvtime = 1000 * (ticks - tp->t_rcvtime) / hz;
 
 		xt->xt_encaps_port = tp->t_port;
 		bcopy(tp->t_fb->tfb_tcp_block_name, xt->xt_stack,
 		    TCP_FUNCTION_NAME_LEN_MAX);
 		bcopy(CC_ALGO(tp)->name, xt->xt_cc,
 		    TCP_CA_NAME_MAX);
 #ifdef TCP_BLACKBOX
 		(void)tcp_log_get_id(tp, xt->xt_logid);
 #endif
 	}
 
 	xt->xt_len = sizeof(struct xtcpcb);
 	in_pcbtoxinpcb(inp, &xt->xt_inp);
 	if (inp->inp_socket == NULL)
 		xt->xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
 }
 
 void
 tcp_log_end_status(struct tcpcb *tp, uint8_t status)
 {
 	uint32_t bit, i;
 
 	if ((tp == NULL) ||
 	    (status > TCP_EI_STATUS_MAX_VALUE) ||
 	    (status == 0)) {
 		/* Invalid */
 		return;
 	}
 	if (status > (sizeof(uint32_t) * 8)) {
 		/* Should this be a KASSERT? */
 		return;
 	}
 	bit = 1U << (status - 1);
 	if (bit & tp->t_end_info_status) {
 		/* already logged */
 		return;
 	}
 	for (i = 0; i < TCP_END_BYTE_INFO; i++) {
 		if (tp->t_end_info_bytes[i] == TCP_EI_EMPTY_SLOT) {
 			tp->t_end_info_bytes[i] = status;
 			tp->t_end_info_status |= bit;
 			break;
 		}
 	}
 }
 
 int
 tcp_can_enable_pacing(void)
 {
 
 	if ((tcp_pacing_limit == -1) ||
 	    (tcp_pacing_limit > number_of_tcp_connections_pacing)) {
 		atomic_fetchadd_int(&number_of_tcp_connections_pacing, 1);
 		shadow_num_connections = number_of_tcp_connections_pacing;
 		return (1);
 	} else {
 		return (0);
 	}
 }
 
 static uint8_t tcp_pacing_warning = 0;
 
 void
 tcp_decrement_paced_conn(void)
 {
 	uint32_t ret;
 
 	ret = atomic_fetchadd_int(&number_of_tcp_connections_pacing, -1);
 	shadow_num_connections = number_of_tcp_connections_pacing;
 	KASSERT(ret != 0, ("tcp_paced_connection_exits -1 would cause wrap?"));
 	if (ret == 0) {
 		if (tcp_pacing_limit != -1) {
 			printf("Warning all pacing is now disabled, count decrements invalidly!\n");
 			tcp_pacing_limit = 0;
 		} else if (tcp_pacing_warning == 0) {
 			printf("Warning pacing count is invalid, invalid decrement\n");
 			tcp_pacing_warning = 1;
 		}
 	}
 }
diff --git a/sys/rpc/svc_vc.c b/sys/rpc/svc_vc.c
index de1baa1417b1..234feba5c8bd 100644
--- a/sys/rpc/svc_vc.c
+++ b/sys/rpc/svc_vc.c
@@ -1,1130 +1,1130 @@
 /*	$NetBSD: svc_vc.c,v 1.7 2000/08/03 00:01:53 fvdl Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2009, Sun Microsystems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without 
  * modification, are permitted provided that the following conditions are met:
  * - Redistributions of source code must retain the above copyright notice, 
  *   this list of conditions and the following disclaimer.
  * - Redistributions in binary form must reproduce the above copyright notice, 
  *   this list of conditions and the following disclaimer in the documentation 
  *   and/or other materials provided with the distribution.
  * - Neither the name of Sun Microsystems, Inc. nor the names of its 
  *   contributors may be used to endorse or promote products derived 
  *   from this software without specific prior written permission.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #if defined(LIBC_SCCS) && !defined(lint)
 static char *sccsid2 = "@(#)svc_tcp.c 1.21 87/08/11 Copyr 1984 Sun Micro";
 static char *sccsid = "@(#)svc_tcp.c	2.2 88/08/01 4.0 RPCSRC";
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * svc_vc.c, Server side for Connection Oriented based RPC. 
  *
  * Actually implements two flavors of transporter -
  * a tcp rendezvouser (a listner and connection establisher)
  * and a record/tcp stream.
  */
 
 #include "opt_kern_tls.h"
 
 #include <sys/param.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/ktls.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/systm.h>
 #include <sys/uio.h>
 
 #include <net/vnet.h>
 
 #include <netinet/tcp.h>
 
 #include <rpc/rpc.h>
 #include <rpc/rpcsec_tls.h>
 
 #include <rpc/krpc.h>
 #include <rpc/rpc_com.h>
 
 #include <security/mac/mac_framework.h>
 
 static bool_t svc_vc_rendezvous_recv(SVCXPRT *, struct rpc_msg *,
     struct sockaddr **, struct mbuf **);
 static enum xprt_stat svc_vc_rendezvous_stat(SVCXPRT *);
 static void svc_vc_rendezvous_destroy(SVCXPRT *);
 static bool_t svc_vc_null(void);
 static void svc_vc_destroy(SVCXPRT *);
 static enum xprt_stat svc_vc_stat(SVCXPRT *);
 static bool_t svc_vc_ack(SVCXPRT *, uint32_t *);
 static bool_t svc_vc_recv(SVCXPRT *, struct rpc_msg *,
     struct sockaddr **, struct mbuf **);
 static bool_t svc_vc_reply(SVCXPRT *, struct rpc_msg *,
     struct sockaddr *, struct mbuf *, uint32_t *seq);
 static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
 static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
     void *in);
 static void svc_vc_backchannel_destroy(SVCXPRT *);
 static enum xprt_stat svc_vc_backchannel_stat(SVCXPRT *);
 static bool_t svc_vc_backchannel_recv(SVCXPRT *, struct rpc_msg *,
     struct sockaddr **, struct mbuf **);
 static bool_t svc_vc_backchannel_reply(SVCXPRT *, struct rpc_msg *,
     struct sockaddr *, struct mbuf *, uint32_t *);
 static bool_t svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq,
     void *in);
 static SVCXPRT *svc_vc_create_conn(SVCPOOL *pool, struct socket *so,
     struct sockaddr *raddr);
 static int svc_vc_accept(struct socket *head, struct socket **sop);
 static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag);
 static int svc_vc_rendezvous_soupcall(struct socket *, void *, int);
 
 static struct xp_ops svc_vc_rendezvous_ops = {
 	.xp_recv =	svc_vc_rendezvous_recv,
 	.xp_stat =	svc_vc_rendezvous_stat,
 	.xp_reply =	(bool_t (*)(SVCXPRT *, struct rpc_msg *,
 		struct sockaddr *, struct mbuf *, uint32_t *))svc_vc_null,
 	.xp_destroy =	svc_vc_rendezvous_destroy,
 	.xp_control =	svc_vc_rendezvous_control
 };
 
 static struct xp_ops svc_vc_ops = {
 	.xp_recv =	svc_vc_recv,
 	.xp_stat =	svc_vc_stat,
 	.xp_ack =	svc_vc_ack,
 	.xp_reply =	svc_vc_reply,
 	.xp_destroy =	svc_vc_destroy,
 	.xp_control =	svc_vc_control
 };
 
 static struct xp_ops svc_vc_backchannel_ops = {
 	.xp_recv =	svc_vc_backchannel_recv,
 	.xp_stat =	svc_vc_backchannel_stat,
 	.xp_reply =	svc_vc_backchannel_reply,
 	.xp_destroy =	svc_vc_backchannel_destroy,
 	.xp_control =	svc_vc_backchannel_control
 };
 
 /*
  * Usage:
  *	xprt = svc_vc_create(sock, send_buf_size, recv_buf_size);
  *
  * Creates, registers, and returns a (rpc) tcp based transporter.
  * Once *xprt is initialized, it is registered as a transporter
  * see (svc.h, xprt_register).  This routine returns
  * a NULL if a problem occurred.
  *
  * The filedescriptor passed in is expected to refer to a bound, but
  * not yet connected socket.
  *
  * Since streams do buffered io similar to stdio, the caller can specify
  * how big the send and receive buffers are via the second and third parms;
  * 0 => use the system default.
  */
 SVCXPRT *
 svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize,
     size_t recvsize)
 {
 	SVCXPRT *xprt;
 	struct sockaddr* sa;
 	int error;
 
 	SOCK_LOCK(so);
 	if (so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED)) {
 		SOCK_UNLOCK(so);
 		CURVNET_SET(so->so_vnet);
 		error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
 		CURVNET_RESTORE();
 		if (error)
 			return (NULL);
 		xprt = svc_vc_create_conn(pool, so, sa);
 		free(sa, M_SONAME);
 		return (xprt);
 	}
 	SOCK_UNLOCK(so);
 
 	xprt = svc_xprt_alloc();
 	sx_init(&xprt->xp_lock, "xprt->xp_lock");
 	xprt->xp_pool = pool;
 	xprt->xp_socket = so;
 	xprt->xp_p1 = NULL;
 	xprt->xp_p2 = NULL;
 	xprt->xp_ops = &svc_vc_rendezvous_ops;
 
 	CURVNET_SET(so->so_vnet);
 	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
 	CURVNET_RESTORE();
 	if (error) {
 		goto cleanup_svc_vc_create;
 	}
 
 	memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
 	free(sa, M_SONAME);
 
 	xprt_register(xprt);
 
 	solisten(so, -1, curthread);
 
 	SOLISTEN_LOCK(so);
 	xprt->xp_upcallset = 1;
 	solisten_upcall_set(so, svc_vc_rendezvous_soupcall, xprt);
 	SOLISTEN_UNLOCK(so);
 
 	return (xprt);
 
 cleanup_svc_vc_create:
 	sx_destroy(&xprt->xp_lock);
 	svc_xprt_free(xprt);
 
 	return (NULL);
 }
 
 /*
  * Create a new transport for a socket optained via soaccept().
  */
 SVCXPRT *
 svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr)
 {
 	SVCXPRT *xprt;
 	struct cf_conn *cd;
 	struct sockaddr* sa = NULL;
 	struct sockopt opt;
 	int one = 1;
 	int error;
 
 	bzero(&opt, sizeof(struct sockopt));
 	opt.sopt_dir = SOPT_SET;
 	opt.sopt_level = SOL_SOCKET;
 	opt.sopt_name = SO_KEEPALIVE;
 	opt.sopt_val = &one;
 	opt.sopt_valsize = sizeof(one);
 	error = sosetopt(so, &opt);
 	if (error) {
 		return (NULL);
 	}
 
 	if (so->so_proto->pr_protocol == IPPROTO_TCP) {
 		bzero(&opt, sizeof(struct sockopt));
 		opt.sopt_dir = SOPT_SET;
 		opt.sopt_level = IPPROTO_TCP;
 		opt.sopt_name = TCP_NODELAY;
 		opt.sopt_val = &one;
 		opt.sopt_valsize = sizeof(one);
 		error = sosetopt(so, &opt);
 		if (error) {
 			return (NULL);
 		}
 	}
 
 	cd = mem_alloc(sizeof(*cd));
 	cd->strm_stat = XPRT_IDLE;
 
 	xprt = svc_xprt_alloc();
 	sx_init(&xprt->xp_lock, "xprt->xp_lock");
 	xprt->xp_pool = pool;
 	xprt->xp_socket = so;
 	xprt->xp_p1 = cd;
 	xprt->xp_p2 = NULL;
 	xprt->xp_ops = &svc_vc_ops;
 
 	/*
 	 * See http://www.connectathon.org/talks96/nfstcp.pdf - client
 	 * has a 5 minute timer, server has a 6 minute timer.
 	 */
 	xprt->xp_idletimeout = 6 * 60;
 
 	memcpy(&xprt->xp_rtaddr, raddr, raddr->sa_len);
 
 	CURVNET_SET(so->so_vnet);
 	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
 	CURVNET_RESTORE();
 	if (error)
 		goto cleanup_svc_vc_create;
 
 	memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
 	free(sa, M_SONAME);
 
 	xprt_register(xprt);
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	xprt->xp_upcallset = 1;
 	soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	/*
 	 * Throw the transport into the active list in case it already
 	 * has some data buffered.
 	 */
 	sx_xlock(&xprt->xp_lock);
 	xprt_active(xprt);
 	sx_xunlock(&xprt->xp_lock);
 
 	return (xprt);
 cleanup_svc_vc_create:
 	sx_destroy(&xprt->xp_lock);
 	svc_xprt_free(xprt);
 	mem_free(cd, sizeof(*cd));
 
 	return (NULL);
 }
 
 /*
  * Create a new transport for a backchannel on a clnt_vc socket.
  */
 SVCXPRT *
 svc_vc_create_backchannel(SVCPOOL *pool)
 {
 	SVCXPRT *xprt = NULL;
 	struct cf_conn *cd = NULL;
 
 	cd = mem_alloc(sizeof(*cd));
 	cd->strm_stat = XPRT_IDLE;
 
 	xprt = svc_xprt_alloc();
 	sx_init(&xprt->xp_lock, "xprt->xp_lock");
 	xprt->xp_pool = pool;
 	xprt->xp_socket = NULL;
 	xprt->xp_p1 = cd;
 	xprt->xp_p2 = NULL;
 	xprt->xp_ops = &svc_vc_backchannel_ops;
 	return (xprt);
 }
 
 /*
  * This does all of the accept except the final call to soaccept. The
  * caller will call soaccept after dropping its locks (soaccept may
  * call malloc).
  */
 int
 svc_vc_accept(struct socket *head, struct socket **sop)
 {
 	struct socket *so;
 	int error = 0;
 	short nbio;
 
 	/* XXXGL: shouldn't that be an assertion? */
-	if ((head->so_options & SO_ACCEPTCONN) == 0) {
+	if (!SOLISTENING(head)) {
 		error = EINVAL;
 		goto done;
 	}
 #ifdef MAC
 	error = mac_socket_check_accept(curthread->td_ucred, head);
 	if (error != 0)
 		goto done;
 #endif
 	/*
 	 * XXXGL: we want non-blocking semantics.  The socket could be a
 	 * socket created by kernel as well as socket shared with userland,
 	 * so we can't be sure about presense of SS_NBIO.  We also shall not
 	 * toggle it on the socket, since that may surprise userland.  So we
 	 * set SS_NBIO only temporarily.
 	 */
 	SOLISTEN_LOCK(head);
 	nbio = head->so_state & SS_NBIO;
 	head->so_state |= SS_NBIO;
 	error = solisten_dequeue(head, &so, 0);
 	head->so_state &= (nbio & ~SS_NBIO);
 	if (error)
 		goto done;
 
 	so->so_state |= nbio;
 	*sop = so;
 
 	/* connection has been removed from the listen queue */
 	KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
 done:
 	return (error);
 }
 
 /*ARGSUSED*/
 static bool_t
 svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg,
     struct sockaddr **addrp, struct mbuf **mp)
 {
 	struct socket *so = NULL;
 	struct sockaddr *sa = NULL;
 	int error;
 	SVCXPRT *new_xprt;
 
 	/*
 	 * The socket upcall calls xprt_active() which will eventually
 	 * cause the server to call us here. We attempt to accept a
 	 * connection from the socket and turn it into a new
 	 * transport. If the accept fails, we have drained all pending
 	 * connections so we call xprt_inactive().
 	 */
 	sx_xlock(&xprt->xp_lock);
 
 	error = svc_vc_accept(xprt->xp_socket, &so);
 
 	if (error == EWOULDBLOCK) {
 		/*
 		 * We must re-test for new connections after taking
 		 * the lock to protect us in the case where a new
 		 * connection arrives after our call to accept fails
 		 * with EWOULDBLOCK.
 		 */
 		SOLISTEN_LOCK(xprt->xp_socket);
 		if (TAILQ_EMPTY(&xprt->xp_socket->sol_comp))
 			xprt_inactive_self(xprt);
 		SOLISTEN_UNLOCK(xprt->xp_socket);
 		sx_xunlock(&xprt->xp_lock);
 		return (FALSE);
 	}
 
 	if (error) {
 		SOLISTEN_LOCK(xprt->xp_socket);
 		if (xprt->xp_upcallset) {
 			xprt->xp_upcallset = 0;
 			soupcall_clear(xprt->xp_socket, SO_RCV);
 		}
 		SOLISTEN_UNLOCK(xprt->xp_socket);
 		xprt_inactive_self(xprt);
 		sx_xunlock(&xprt->xp_lock);
 		return (FALSE);
 	}
 
 	sx_xunlock(&xprt->xp_lock);
 
 	sa = NULL;
 	error = soaccept(so, &sa);
 
 	if (error) {
 		/*
 		 * XXX not sure if I need to call sofree or soclose here.
 		 */
 		if (sa)
 			free(sa, M_SONAME);
 		return (FALSE);
 	}
 
 	/*
 	 * svc_vc_create_conn will call xprt_register - we don't need
 	 * to do anything with the new connection except derefence it.
 	 */
 	new_xprt = svc_vc_create_conn(xprt->xp_pool, so, sa);
 	if (!new_xprt) {
 		soclose(so);
 	} else {
 		SVC_RELEASE(new_xprt);
 	}
 
 	free(sa, M_SONAME);
 
 	return (FALSE); /* there is never an rpc msg to be processed */
 }
 
 /*ARGSUSED*/
 static enum xprt_stat
 svc_vc_rendezvous_stat(SVCXPRT *xprt)
 {
 
 	return (XPRT_IDLE);
 }
 
 static void
 svc_vc_destroy_common(SVCXPRT *xprt)
 {
 	enum clnt_stat stat;
 	uint32_t reterr;
 
 	if (xprt->xp_socket) {
 		if ((xprt->xp_tls & (RPCTLS_FLAGS_HANDSHAKE |
 		    RPCTLS_FLAGS_HANDSHFAIL)) != 0) {
 			if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
 				/*
 				 * If the upcall fails, the socket has
 				 * probably been closed via the rpctlssd
 				 * daemon having crashed or been
 				 * restarted, so just ignore returned stat.
 				 */
 				stat = rpctls_srv_disconnect(xprt->xp_sslsec,
 				    xprt->xp_sslusec, xprt->xp_sslrefno,
 				    &reterr);
 			}
 			/* Must sorele() to get rid of reference. */
 			CURVNET_SET(xprt->xp_socket->so_vnet);
 			SOCK_LOCK(xprt->xp_socket);
 			sorele(xprt->xp_socket);
 			CURVNET_RESTORE();
 		} else
 			(void)soclose(xprt->xp_socket);
 	}
 
 	if (xprt->xp_netid)
 		(void) mem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1);
 	svc_xprt_free(xprt);
 }
 
 static void
 svc_vc_rendezvous_destroy(SVCXPRT *xprt)
 {
 
 	SOLISTEN_LOCK(xprt->xp_socket);
 	if (xprt->xp_upcallset) {
 		xprt->xp_upcallset = 0;
 		solisten_upcall_set(xprt->xp_socket, NULL, NULL);
 	}
 	SOLISTEN_UNLOCK(xprt->xp_socket);
 
 	svc_vc_destroy_common(xprt);
 }
 
 static void
 svc_vc_destroy(SVCXPRT *xprt)
 {
 	struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
 	CLIENT *cl = (CLIENT *)xprt->xp_p2;
 
 	SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
 	if (xprt->xp_upcallset) {
 		xprt->xp_upcallset = 0;
 		if (xprt->xp_socket->so_rcv.sb_upcall != NULL)
 			soupcall_clear(xprt->xp_socket, SO_RCV);
 	}
 	SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
 
 	if (cl != NULL)
 		CLNT_RELEASE(cl);
 
 	svc_vc_destroy_common(xprt);
 
 	if (cd->mreq)
 		m_freem(cd->mreq);
 	if (cd->mpending)
 		m_freem(cd->mpending);
 	mem_free(cd, sizeof(*cd));
 }
 
 static void
 svc_vc_backchannel_destroy(SVCXPRT *xprt)
 {
 	struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
 	struct mbuf *m, *m2;
 
 	svc_xprt_free(xprt);
 	m = cd->mreq;
 	while (m != NULL) {
 		m2 = m;
 		m = m->m_nextpkt;
 		m_freem(m2);
 	}
 	mem_free(cd, sizeof(*cd));
 }
 
 /*ARGSUSED*/
 static bool_t
 svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in)
 {
 	return (FALSE);
 }
 
 static bool_t
 svc_vc_rendezvous_control(SVCXPRT *xprt, const u_int rq, void *in)
 {
 
 	return (FALSE);
 }
 
 static bool_t
 svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq, void *in)
 {
 
 	return (FALSE);
 }
 
 static enum xprt_stat
 svc_vc_stat(SVCXPRT *xprt)
 {
 	struct cf_conn *cd;
 
 	cd = (struct cf_conn *)(xprt->xp_p1);
 
 	if (cd->strm_stat == XPRT_DIED)
 		return (XPRT_DIED);
 
 	if (cd->mreq != NULL && cd->resid == 0 && cd->eor)
 		return (XPRT_MOREREQS);
 
 	if (soreadable(xprt->xp_socket))
 		return (XPRT_MOREREQS);
 
 	return (XPRT_IDLE);
 }
 
 static bool_t
 svc_vc_ack(SVCXPRT *xprt, uint32_t *ack)
 {
 
 	*ack = atomic_load_acq_32(&xprt->xp_snt_cnt);
 	*ack -= sbused(&xprt->xp_socket->so_snd);
 	return (TRUE);
 }
 
 static enum xprt_stat
 svc_vc_backchannel_stat(SVCXPRT *xprt)
 {
 	struct cf_conn *cd;
 
 	cd = (struct cf_conn *)(xprt->xp_p1);
 
 	if (cd->mreq != NULL)
 		return (XPRT_MOREREQS);
 
 	return (XPRT_IDLE);
 }
 
 /*
  * If we have an mbuf chain in cd->mpending, try to parse a record from it,
  * leaving the result in cd->mreq. If we don't have a complete record, leave
  * the partial result in cd->mreq and try to read more from the socket.
  */
 static int
 svc_vc_process_pending(SVCXPRT *xprt)
 {
 	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
 	struct socket *so = xprt->xp_socket;
 	struct mbuf *m;
 
 	/*
 	 * If cd->resid is non-zero, we have part of the
 	 * record already, otherwise we are expecting a record
 	 * marker.
 	 */
 	if (!cd->resid && cd->mpending) {
 		/*
 		 * See if there is enough data buffered to
 		 * make up a record marker. Make sure we can
 		 * handle the case where the record marker is
 		 * split across more than one mbuf.
 		 */
 		size_t n = 0;
 		uint32_t header;
 
 		m = cd->mpending;
 		while (n < sizeof(uint32_t) && m) {
 			n += m->m_len;
 			m = m->m_next;
 		}
 		if (n < sizeof(uint32_t)) {
 			so->so_rcv.sb_lowat = sizeof(uint32_t) - n;
 			return (FALSE);
 		}
 		m_copydata(cd->mpending, 0, sizeof(header),
 		    (char *)&header);
 		header = ntohl(header);
 		cd->eor = (header & 0x80000000) != 0;
 		cd->resid = header & 0x7fffffff;
 		m_adj(cd->mpending, sizeof(uint32_t));
 	}
 
 	/*
 	 * Start pulling off mbufs from cd->mpending
 	 * until we either have a complete record or
 	 * we run out of data. We use m_split to pull
 	 * data - it will pull as much as possible and
 	 * split the last mbuf if necessary.
 	 */
 	while (cd->mpending && cd->resid) {
 		m = cd->mpending;
 		if (cd->mpending->m_next
 		    || cd->mpending->m_len > cd->resid)
 			cd->mpending = m_split(cd->mpending,
 			    cd->resid, M_WAITOK);
 		else
 			cd->mpending = NULL;
 		if (cd->mreq)
 			m_last(cd->mreq)->m_next = m;
 		else
 			cd->mreq = m;
 		while (m) {
 			cd->resid -= m->m_len;
 			m = m->m_next;
 		}
 	}
 
 	/*
 	 * Block receive upcalls if we have more data pending,
 	 * otherwise report our need.
 	 */
 	if (cd->mpending)
 		so->so_rcv.sb_lowat = INT_MAX;
 	else
 		so->so_rcv.sb_lowat =
 		    imax(1, imin(cd->resid, so->so_rcv.sb_hiwat / 2));
 	return (TRUE);
 }
 
 static bool_t
 svc_vc_recv(SVCXPRT *xprt, struct rpc_msg *msg,
     struct sockaddr **addrp, struct mbuf **mp)
 {
 	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
 	struct uio uio;
 	struct mbuf *m, *ctrl;
 	struct socket* so = xprt->xp_socket;
 	XDR xdrs;
 	int error, rcvflag;
 	uint32_t reterr, xid_plus_direction[2];
 	struct cmsghdr *cmsg;
 	struct tls_get_record tgr;
 	enum clnt_stat ret;
 
 	/*
 	 * Serialise access to the socket and our own record parsing
 	 * state.
 	 */
 	sx_xlock(&xprt->xp_lock);
 
 	for (;;) {
 		/* If we have no request ready, check pending queue. */
 		while (cd->mpending &&
 		    (cd->mreq == NULL || cd->resid != 0 || !cd->eor)) {
 			if (!svc_vc_process_pending(xprt))
 				break;
 		}
 
 		/* Process and return complete request in cd->mreq. */
 		if (cd->mreq != NULL && cd->resid == 0 && cd->eor) {
 
 			/*
 			 * Now, check for a backchannel reply.
 			 * The XID is in the first uint32_t of the reply
 			 * and the message direction is the second one.
 			 */
 			if ((cd->mreq->m_len >= sizeof(xid_plus_direction) ||
 			    m_length(cd->mreq, NULL) >=
 			    sizeof(xid_plus_direction)) &&
 			    xprt->xp_p2 != NULL) {
 				m_copydata(cd->mreq, 0,
 				    sizeof(xid_plus_direction),
 				    (char *)xid_plus_direction);
 				xid_plus_direction[0] =
 				    ntohl(xid_plus_direction[0]);
 				xid_plus_direction[1] =
 				    ntohl(xid_plus_direction[1]);
 				/* Check message direction. */
 				if (xid_plus_direction[1] == REPLY) {
 					clnt_bck_svccall(xprt->xp_p2,
 					    cd->mreq,
 					    xid_plus_direction[0]);
 					cd->mreq = NULL;
 					continue;
 				}
 			}
 
 			xdrmbuf_create(&xdrs, cd->mreq, XDR_DECODE);
 			cd->mreq = NULL;
 
 			/* Check for next request in a pending queue. */
 			svc_vc_process_pending(xprt);
 			if (cd->mreq == NULL || cd->resid != 0) {
 				SOCKBUF_LOCK(&so->so_rcv);
 				if (!soreadable(so))
 					xprt_inactive_self(xprt);
 				SOCKBUF_UNLOCK(&so->so_rcv);
 			}
 
 			sx_xunlock(&xprt->xp_lock);
 
 			if (! xdr_callmsg(&xdrs, msg)) {
 				XDR_DESTROY(&xdrs);
 				return (FALSE);
 			}
 
 			*addrp = NULL;
 			*mp = xdrmbuf_getall(&xdrs);
 			XDR_DESTROY(&xdrs);
 
 			return (TRUE);
 		}
 
 		/*
 		 * If receiving is disabled so that a TLS handshake can be
 		 * done by the rpctlssd daemon, return FALSE here.
 		 */
 		rcvflag = MSG_DONTWAIT;
 		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0)
 			rcvflag |= MSG_TLSAPPDATA;
 tryagain:
 		if (xprt->xp_dontrcv) {
 			sx_xunlock(&xprt->xp_lock);
 			return (FALSE);
 		}
 
 		/*
 		 * The socket upcall calls xprt_active() which will eventually
 		 * cause the server to call us here. We attempt to
 		 * read as much as possible from the socket and put
 		 * the result in cd->mpending. If the read fails,
 		 * we have drained both cd->mpending and the socket so
 		 * we can call xprt_inactive().
 		 */
 		uio.uio_resid = 1000000000;
 		uio.uio_td = curthread;
 		ctrl = m = NULL;
 		error = soreceive(so, NULL, &uio, &m, &ctrl, &rcvflag);
 
 		if (error == EWOULDBLOCK) {
 			/*
 			 * We must re-test for readability after
 			 * taking the lock to protect us in the case
 			 * where a new packet arrives on the socket
 			 * after our call to soreceive fails with
 			 * EWOULDBLOCK.
 			 */
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (!soreadable(so))
 				xprt_inactive_self(xprt);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			sx_xunlock(&xprt->xp_lock);
 			return (FALSE);
 		}
 
 		/*
 		 * A return of ENXIO indicates that there is a
 		 * non-application data record at the head of the
 		 * socket's receive queue, for TLS connections.
 		 * This record needs to be handled in userland
 		 * via an SSL_read() call, so do an upcall to the daemon.
 		 */
 		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0 &&
 		    error == ENXIO) {
 			/* Disable reception. */
 			xprt->xp_dontrcv = TRUE;
 			sx_xunlock(&xprt->xp_lock);
 			ret = rpctls_srv_handlerecord(xprt->xp_sslsec,
 			    xprt->xp_sslusec, xprt->xp_sslrefno,
 			    &reterr);
 			sx_xlock(&xprt->xp_lock);
 			xprt->xp_dontrcv = FALSE;
 			if (ret != RPC_SUCCESS || reterr != RPCTLSERR_OK) {
 				/*
 				 * All we can do is soreceive() it and
 				 * then toss it.
 				 */
 				rcvflag = MSG_DONTWAIT;
 				goto tryagain;
 			}
 			sx_xunlock(&xprt->xp_lock);
 			xprt_active(xprt);   /* Harmless if already active. */
 			return (FALSE);
 		}
 
 		if (error) {
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (xprt->xp_upcallset) {
 				xprt->xp_upcallset = 0;
 				soupcall_clear(so, SO_RCV);
 			}
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			xprt_inactive_self(xprt);
 			cd->strm_stat = XPRT_DIED;
 			sx_xunlock(&xprt->xp_lock);
 			return (FALSE);
 		}
 
 		if (!m) {
 			/*
 			 * EOF - the other end has closed the socket.
 			 */
 			xprt_inactive_self(xprt);
 			cd->strm_stat = XPRT_DIED;
 			sx_xunlock(&xprt->xp_lock);
 			return (FALSE);
 		}
 
 		/* Process any record header(s). */
 		if (ctrl != NULL) {
 			cmsg = mtod(ctrl, struct cmsghdr *);
 			if (cmsg->cmsg_type == TLS_GET_RECORD &&
 			    cmsg->cmsg_len == CMSG_LEN(sizeof(tgr))) {
 				memcpy(&tgr, CMSG_DATA(cmsg), sizeof(tgr));
 				/*
 				 * This should have been handled by
 				 * the rpctls_svc_handlerecord()
 				 * upcall.  If not, all we can do is
 				 * toss it away.
 				 */
 				if (tgr.tls_type != TLS_RLTYPE_APP) {
 					m_freem(m);
 					m_free(ctrl);
 					rcvflag = MSG_DONTWAIT | MSG_TLSAPPDATA;
 					goto tryagain;
 				}
 			}
 			m_free(ctrl);
 		}
 
 		if (cd->mpending)
 			m_last(cd->mpending)->m_next = m;
 		else
 			cd->mpending = m;
 	}
 }
 
 static bool_t
 svc_vc_backchannel_recv(SVCXPRT *xprt, struct rpc_msg *msg,
     struct sockaddr **addrp, struct mbuf **mp)
 {
 	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
 	struct ct_data *ct;
 	struct mbuf *m;
 	XDR xdrs;
 
 	sx_xlock(&xprt->xp_lock);
 	ct = (struct ct_data *)xprt->xp_p2;
 	if (ct == NULL) {
 		sx_xunlock(&xprt->xp_lock);
 		return (FALSE);
 	}
 	mtx_lock(&ct->ct_lock);
 	m = cd->mreq;
 	if (m == NULL) {
 		xprt_inactive_self(xprt);
 		mtx_unlock(&ct->ct_lock);
 		sx_xunlock(&xprt->xp_lock);
 		return (FALSE);
 	}
 	cd->mreq = m->m_nextpkt;
 	mtx_unlock(&ct->ct_lock);
 	sx_xunlock(&xprt->xp_lock);
 
 	xdrmbuf_create(&xdrs, m, XDR_DECODE);
 	if (! xdr_callmsg(&xdrs, msg)) {
 		XDR_DESTROY(&xdrs);
 		return (FALSE);
 	}
 	*addrp = NULL;
 	*mp = xdrmbuf_getall(&xdrs);
 	XDR_DESTROY(&xdrs);
 	return (TRUE);
 }
 
 static bool_t
 svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg,
     struct sockaddr *addr, struct mbuf *m, uint32_t *seq)
 {
 	XDR xdrs;
 	struct mbuf *mrep;
 	bool_t stat = TRUE;
 	int error, len, maxextsiz;
 #ifdef KERN_TLS
 	u_int maxlen;
 #endif
 
 	/*
 	 * Leave space for record mark.
 	 */
 	mrep = m_gethdr(M_WAITOK, MT_DATA);
 	mrep->m_data += sizeof(uint32_t);
 
 	xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
 
 	if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
 	    msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
 		if (!xdr_replymsg(&xdrs, msg))
 			stat = FALSE;
 		else
 			xdrmbuf_append(&xdrs, m);
 	} else {
 		stat = xdr_replymsg(&xdrs, msg);
 	}
 
 	if (stat) {
 		m_fixhdr(mrep);
 
 		/*
 		 * Prepend a record marker containing the reply length.
 		 */
 		M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
 		len = mrep->m_pkthdr.len;
 		*mtod(mrep, uint32_t *) =
 			htonl(0x80000000 | (len - sizeof(uint32_t)));
 
 		/* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */
 		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
 			/*
 			 * Copy the mbuf chain to a chain of
 			 * ext_pgs mbuf(s) as required by KERN_TLS.
 			 */
 			maxextsiz = TLS_MAX_MSG_SIZE_V10_2;
 #ifdef KERN_TLS
 			if (rpctls_getinfo(&maxlen, false, false))
 				maxextsiz = min(maxextsiz, maxlen);
 #endif
 			mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz);
 		}
 		atomic_add_32(&xprt->xp_snd_cnt, len);
 		/*
 		 * sosend consumes mreq.
 		 */
 		error = sosend(xprt->xp_socket, NULL, NULL, mrep, NULL,
 		    0, curthread);
 		if (!error) {
 			atomic_add_rel_32(&xprt->xp_snt_cnt, len);
 			if (seq)
 				*seq = xprt->xp_snd_cnt;
 			stat = TRUE;
 		} else
 			atomic_subtract_32(&xprt->xp_snd_cnt, len);
 	} else {
 		m_freem(mrep);
 	}
 
 	XDR_DESTROY(&xdrs);
 
 	return (stat);
 }
 
 static bool_t
 svc_vc_backchannel_reply(SVCXPRT *xprt, struct rpc_msg *msg,
     struct sockaddr *addr, struct mbuf *m, uint32_t *seq)
 {
 	struct ct_data *ct;
 	XDR xdrs;
 	struct mbuf *mrep;
 	bool_t stat = TRUE;
 	int error, maxextsiz;
 #ifdef KERN_TLS
 	u_int maxlen;
 #endif
 
 	/*
 	 * Leave space for record mark.
 	 */
 	mrep = m_gethdr(M_WAITOK, MT_DATA);
 	mrep->m_data += sizeof(uint32_t);
 
 	xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
 
 	if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
 	    msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
 		if (!xdr_replymsg(&xdrs, msg))
 			stat = FALSE;
 		else
 			xdrmbuf_append(&xdrs, m);
 	} else {
 		stat = xdr_replymsg(&xdrs, msg);
 	}
 
 	if (stat) {
 		m_fixhdr(mrep);
 
 		/*
 		 * Prepend a record marker containing the reply length.
 		 */
 		M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
 		*mtod(mrep, uint32_t *) =
 			htonl(0x80000000 | (mrep->m_pkthdr.len
 				- sizeof(uint32_t)));
 
 		/* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */
 		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
 			/*
 			 * Copy the mbuf chain to a chain of
 			 * ext_pgs mbuf(s) as required by KERN_TLS.
 			 */
 			maxextsiz = TLS_MAX_MSG_SIZE_V10_2;
 #ifdef KERN_TLS
 			if (rpctls_getinfo(&maxlen, false, false))
 				maxextsiz = min(maxextsiz, maxlen);
 #endif
 			mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz);
 		}
 		sx_xlock(&xprt->xp_lock);
 		ct = (struct ct_data *)xprt->xp_p2;
 		if (ct != NULL)
 			error = sosend(ct->ct_socket, NULL, NULL, mrep, NULL,
 			    0, curthread);
 		else
 			error = EPIPE;
 		sx_xunlock(&xprt->xp_lock);
 		if (!error) {
 			stat = TRUE;
 		}
 	} else {
 		m_freem(mrep);
 	}
 
 	XDR_DESTROY(&xdrs);
 
 	return (stat);
 }
 
 static bool_t
 svc_vc_null()
 {
 
 	return (FALSE);
 }
 
 static int
 svc_vc_soupcall(struct socket *so, void *arg, int waitflag)
 {
 	SVCXPRT *xprt = (SVCXPRT *) arg;
 
 	if (soreadable(xprt->xp_socket))
 		xprt_active(xprt);
 	return (SU_OK);
 }
 
 static int
 svc_vc_rendezvous_soupcall(struct socket *head, void *arg, int waitflag)
 {
 	SVCXPRT *xprt = (SVCXPRT *) arg;
 
 	if (!TAILQ_EMPTY(&head->sol_comp))
 		xprt_active(xprt);
 	return (SU_OK);
 }
 
 #if 0
 /*
  * Get the effective UID of the sending process. Used by rpcbind, keyserv
  * and rpc.yppasswdd on AF_LOCAL.
  */
 int
 __rpc_get_local_uid(SVCXPRT *transp, uid_t *uid) {
 	int sock, ret;
 	gid_t egid;
 	uid_t euid;
 	struct sockaddr *sa;
 
 	sock = transp->xp_fd;
 	sa = (struct sockaddr *)transp->xp_rtaddr;
 	if (sa->sa_family == AF_LOCAL) {
 		ret = getpeereid(sock, &euid, &egid);
 		if (ret == 0)
 			*uid = euid;
 		return (ret);
 	} else
 		return (-1);
 }
 #endif